{ "best_metric": 0.8810526315789474, "best_model_checkpoint": "videomae-finetuned-nba-5-class-4-batch-8000-vid-multiclass-4/checkpoint-40000", "epoch": 24.04, "eval_steps": 500, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.0000000000000004e-08, "loss": 1.6289, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-08, "loss": 1.6849, "step": 20 }, { "epoch": 0.0, "learning_rate": 9e-08, "loss": 1.7074, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-07, "loss": 1.657, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.5000000000000002e-07, "loss": 1.6217, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.8e-07, "loss": 1.6704, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.1e-07, "loss": 1.6519, "step": 70 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-07, "loss": 1.7045, "step": 80 }, { "epoch": 0.0, "learning_rate": 2.7e-07, "loss": 1.6178, "step": 90 }, { "epoch": 0.0, "learning_rate": 3.0000000000000004e-07, "loss": 1.6117, "step": 100 }, { "epoch": 0.0, "learning_rate": 3.2999999999999996e-07, "loss": 1.614, "step": 110 }, { "epoch": 0.0, "learning_rate": 3.6e-07, "loss": 1.6564, "step": 120 }, { "epoch": 0.0, "learning_rate": 3.8999999999999997e-07, "loss": 1.6711, "step": 130 }, { "epoch": 0.0, "learning_rate": 4.2e-07, "loss": 1.6176, "step": 140 }, { "epoch": 0.0, "learning_rate": 4.5e-07, "loss": 1.5739, "step": 150 }, { "epoch": 0.0, "learning_rate": 4.800000000000001e-07, "loss": 1.6065, "step": 160 }, { "epoch": 0.0, "learning_rate": 5.100000000000001e-07, "loss": 1.611, "step": 170 }, { "epoch": 0.0, "learning_rate": 5.4e-07, "loss": 1.6273, "step": 180 }, { "epoch": 0.0, "learning_rate": 5.7e-07, "loss": 1.6327, "step": 190 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 1.5965, "step": 200 }, { "epoch": 0.0, "learning_rate": 6.3e-07, "loss": 1.5956, "step": 210 }, { "epoch": 0.0, "learning_rate": 6.599999999999999e-07, "loss": 1.611, "step": 220 }, { "epoch": 0.0, "learning_rate": 6.9e-07, "loss": 1.6605, "step": 230 }, { "epoch": 0.0, "learning_rate": 7.2e-07, "loss": 1.6057, "step": 240 }, { "epoch": 0.01, "learning_rate": 7.5e-07, "loss": 1.6235, "step": 250 }, { "epoch": 0.01, "learning_rate": 7.799999999999999e-07, "loss": 1.6241, "step": 260 }, { "epoch": 0.01, "learning_rate": 8.1e-07, "loss": 1.5955, "step": 270 }, { "epoch": 0.01, "learning_rate": 8.4e-07, "loss": 1.5911, "step": 280 }, { "epoch": 0.01, "learning_rate": 8.7e-07, "loss": 1.628, "step": 290 }, { "epoch": 0.01, "learning_rate": 9e-07, "loss": 1.6224, "step": 300 }, { "epoch": 0.01, "learning_rate": 9.3e-07, "loss": 1.6257, "step": 310 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-07, "loss": 1.6915, "step": 320 }, { "epoch": 0.01, "learning_rate": 9.9e-07, "loss": 1.63, "step": 330 }, { "epoch": 0.01, "learning_rate": 1.0200000000000002e-06, "loss": 1.6068, "step": 340 }, { "epoch": 0.01, "learning_rate": 1.0500000000000001e-06, "loss": 1.6432, "step": 350 }, { "epoch": 0.01, "learning_rate": 1.08e-06, "loss": 1.636, "step": 360 }, { "epoch": 0.01, "learning_rate": 1.11e-06, "loss": 1.6221, "step": 370 }, { "epoch": 0.01, "learning_rate": 1.14e-06, "loss": 1.6367, "step": 380 }, { "epoch": 0.01, "learning_rate": 1.17e-06, "loss": 1.6197, "step": 390 }, { "epoch": 0.01, "learning_rate": 1.2000000000000002e-06, "loss": 1.6448, "step": 400 }, { "epoch": 0.01, "learning_rate": 1.23e-06, "loss": 1.6023, "step": 410 }, { "epoch": 0.01, "learning_rate": 1.26e-06, "loss": 1.5916, "step": 420 }, { "epoch": 0.01, "learning_rate": 1.29e-06, "loss": 1.6068, "step": 430 }, { "epoch": 0.01, "learning_rate": 1.3199999999999999e-06, "loss": 1.6125, "step": 440 }, { "epoch": 0.01, "learning_rate": 1.35e-06, "loss": 1.5929, "step": 450 }, { "epoch": 0.01, "learning_rate": 1.38e-06, "loss": 1.5875, "step": 460 }, { "epoch": 0.01, "learning_rate": 1.41e-06, "loss": 1.6399, "step": 470 }, { "epoch": 0.01, "learning_rate": 1.44e-06, "loss": 1.6119, "step": 480 }, { "epoch": 0.01, "learning_rate": 1.4700000000000001e-06, "loss": 1.6221, "step": 490 }, { "epoch": 0.01, "learning_rate": 1.5e-06, "loss": 1.5873, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.53e-06, "loss": 1.6285, "step": 510 }, { "epoch": 0.01, "learning_rate": 1.5599999999999999e-06, "loss": 1.6254, "step": 520 }, { "epoch": 0.01, "learning_rate": 1.59e-06, "loss": 1.6067, "step": 530 }, { "epoch": 0.01, "learning_rate": 1.62e-06, "loss": 1.573, "step": 540 }, { "epoch": 0.01, "learning_rate": 1.65e-06, "loss": 1.6053, "step": 550 }, { "epoch": 0.01, "learning_rate": 1.68e-06, "loss": 1.5985, "step": 560 }, { "epoch": 0.01, "learning_rate": 1.7100000000000001e-06, "loss": 1.5918, "step": 570 }, { "epoch": 0.01, "learning_rate": 1.74e-06, "loss": 1.5986, "step": 580 }, { "epoch": 0.01, "learning_rate": 1.77e-06, "loss": 1.613, "step": 590 }, { "epoch": 0.01, "learning_rate": 1.8e-06, "loss": 1.6021, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.83e-06, "loss": 1.5799, "step": 610 }, { "epoch": 0.01, "learning_rate": 1.86e-06, "loss": 1.59, "step": 620 }, { "epoch": 0.01, "learning_rate": 1.8900000000000001e-06, "loss": 1.6096, "step": 630 }, { "epoch": 0.01, "learning_rate": 1.9200000000000003e-06, "loss": 1.5695, "step": 640 }, { "epoch": 0.01, "learning_rate": 1.95e-06, "loss": 1.5877, "step": 650 }, { "epoch": 0.01, "learning_rate": 1.98e-06, "loss": 1.6079, "step": 660 }, { "epoch": 0.01, "learning_rate": 2.0100000000000002e-06, "loss": 1.5655, "step": 670 }, { "epoch": 0.01, "learning_rate": 2.0400000000000004e-06, "loss": 1.6314, "step": 680 }, { "epoch": 0.01, "learning_rate": 2.07e-06, "loss": 1.5892, "step": 690 }, { "epoch": 0.01, "learning_rate": 2.1000000000000002e-06, "loss": 1.6097, "step": 700 }, { "epoch": 0.01, "learning_rate": 2.13e-06, "loss": 1.6052, "step": 710 }, { "epoch": 0.01, "learning_rate": 2.16e-06, "loss": 1.6101, "step": 720 }, { "epoch": 0.01, "learning_rate": 2.1899999999999998e-06, "loss": 1.5883, "step": 730 }, { "epoch": 0.01, "learning_rate": 2.22e-06, "loss": 1.6043, "step": 740 }, { "epoch": 0.01, "learning_rate": 2.25e-06, "loss": 1.6058, "step": 750 }, { "epoch": 0.02, "learning_rate": 2.28e-06, "loss": 1.6058, "step": 760 }, { "epoch": 0.02, "learning_rate": 2.31e-06, "loss": 1.583, "step": 770 }, { "epoch": 0.02, "learning_rate": 2.34e-06, "loss": 1.5749, "step": 780 }, { "epoch": 0.02, "learning_rate": 2.37e-06, "loss": 1.5734, "step": 790 }, { "epoch": 0.02, "learning_rate": 2.4000000000000003e-06, "loss": 1.5619, "step": 800 }, { "epoch": 0.02, "learning_rate": 2.43e-06, "loss": 1.5821, "step": 810 }, { "epoch": 0.02, "learning_rate": 2.46e-06, "loss": 1.5731, "step": 820 }, { "epoch": 0.02, "learning_rate": 2.4900000000000003e-06, "loss": 1.5712, "step": 830 }, { "epoch": 0.02, "learning_rate": 2.52e-06, "loss": 1.5662, "step": 840 }, { "epoch": 0.02, "learning_rate": 2.55e-06, "loss": 1.5786, "step": 850 }, { "epoch": 0.02, "learning_rate": 2.58e-06, "loss": 1.5821, "step": 860 }, { "epoch": 0.02, "learning_rate": 2.61e-06, "loss": 1.5779, "step": 870 }, { "epoch": 0.02, "learning_rate": 2.6399999999999997e-06, "loss": 1.568, "step": 880 }, { "epoch": 0.02, "learning_rate": 2.67e-06, "loss": 1.5414, "step": 890 }, { "epoch": 0.02, "learning_rate": 2.7e-06, "loss": 1.5311, "step": 900 }, { "epoch": 0.02, "learning_rate": 2.73e-06, "loss": 1.5791, "step": 910 }, { "epoch": 0.02, "learning_rate": 2.76e-06, "loss": 1.5277, "step": 920 }, { "epoch": 0.02, "learning_rate": 2.79e-06, "loss": 1.5659, "step": 930 }, { "epoch": 0.02, "learning_rate": 2.82e-06, "loss": 1.5636, "step": 940 }, { "epoch": 0.02, "learning_rate": 2.8500000000000002e-06, "loss": 1.5084, "step": 950 }, { "epoch": 0.02, "learning_rate": 2.88e-06, "loss": 1.5494, "step": 960 }, { "epoch": 0.02, "learning_rate": 2.91e-06, "loss": 1.5576, "step": 970 }, { "epoch": 0.02, "learning_rate": 2.9400000000000002e-06, "loss": 1.5472, "step": 980 }, { "epoch": 0.02, "learning_rate": 2.9700000000000004e-06, "loss": 1.5201, "step": 990 }, { "epoch": 0.02, "learning_rate": 3e-06, "loss": 1.5265, "step": 1000 }, { "epoch": 0.02, "learning_rate": 3.0300000000000002e-06, "loss": 1.5452, "step": 1010 }, { "epoch": 0.02, "learning_rate": 3.06e-06, "loss": 1.5058, "step": 1020 }, { "epoch": 0.02, "learning_rate": 3.09e-06, "loss": 1.5153, "step": 1030 }, { "epoch": 0.02, "learning_rate": 3.1199999999999998e-06, "loss": 1.5483, "step": 1040 }, { "epoch": 0.02, "learning_rate": 3.15e-06, "loss": 1.5126, "step": 1050 }, { "epoch": 0.02, "learning_rate": 3.18e-06, "loss": 1.4588, "step": 1060 }, { "epoch": 0.02, "learning_rate": 3.21e-06, "loss": 1.4918, "step": 1070 }, { "epoch": 0.02, "learning_rate": 3.24e-06, "loss": 1.4994, "step": 1080 }, { "epoch": 0.02, "learning_rate": 3.27e-06, "loss": 1.4209, "step": 1090 }, { "epoch": 0.02, "learning_rate": 3.3e-06, "loss": 1.5078, "step": 1100 }, { "epoch": 0.02, "learning_rate": 3.3300000000000003e-06, "loss": 1.5444, "step": 1110 }, { "epoch": 0.02, "learning_rate": 3.36e-06, "loss": 1.5468, "step": 1120 }, { "epoch": 0.02, "learning_rate": 3.39e-06, "loss": 1.4958, "step": 1130 }, { "epoch": 0.02, "learning_rate": 3.4200000000000003e-06, "loss": 1.4897, "step": 1140 }, { "epoch": 0.02, "learning_rate": 3.4500000000000004e-06, "loss": 1.4517, "step": 1150 }, { "epoch": 0.02, "learning_rate": 3.48e-06, "loss": 1.3506, "step": 1160 }, { "epoch": 0.02, "learning_rate": 3.5100000000000003e-06, "loss": 1.4706, "step": 1170 }, { "epoch": 0.02, "learning_rate": 3.54e-06, "loss": 1.4046, "step": 1180 }, { "epoch": 0.02, "learning_rate": 3.57e-06, "loss": 1.498, "step": 1190 }, { "epoch": 0.02, "learning_rate": 3.6e-06, "loss": 1.4703, "step": 1200 }, { "epoch": 0.02, "learning_rate": 3.63e-06, "loss": 1.4965, "step": 1210 }, { "epoch": 0.02, "learning_rate": 3.66e-06, "loss": 1.3704, "step": 1220 }, { "epoch": 0.02, "learning_rate": 3.6900000000000002e-06, "loss": 1.3918, "step": 1230 }, { "epoch": 0.02, "learning_rate": 3.72e-06, "loss": 1.3952, "step": 1240 }, { "epoch": 0.03, "learning_rate": 3.75e-06, "loss": 1.3897, "step": 1250 }, { "epoch": 0.03, "learning_rate": 3.7800000000000002e-06, "loss": 1.3657, "step": 1260 }, { "epoch": 0.03, "learning_rate": 3.81e-06, "loss": 1.3527, "step": 1270 }, { "epoch": 0.03, "learning_rate": 3.8400000000000005e-06, "loss": 1.39, "step": 1280 }, { "epoch": 0.03, "learning_rate": 3.87e-06, "loss": 1.3892, "step": 1290 }, { "epoch": 0.03, "learning_rate": 3.9e-06, "loss": 1.4569, "step": 1300 }, { "epoch": 0.03, "learning_rate": 3.9300000000000005e-06, "loss": 1.3253, "step": 1310 }, { "epoch": 0.03, "learning_rate": 3.96e-06, "loss": 1.5201, "step": 1320 }, { "epoch": 0.03, "learning_rate": 3.99e-06, "loss": 1.4022, "step": 1330 }, { "epoch": 0.03, "learning_rate": 4.0200000000000005e-06, "loss": 1.3751, "step": 1340 }, { "epoch": 0.03, "learning_rate": 4.05e-06, "loss": 1.3118, "step": 1350 }, { "epoch": 0.03, "learning_rate": 4.080000000000001e-06, "loss": 1.3506, "step": 1360 }, { "epoch": 0.03, "learning_rate": 4.1100000000000005e-06, "loss": 1.522, "step": 1370 }, { "epoch": 0.03, "learning_rate": 4.14e-06, "loss": 1.4244, "step": 1380 }, { "epoch": 0.03, "learning_rate": 4.170000000000001e-06, "loss": 1.4063, "step": 1390 }, { "epoch": 0.03, "learning_rate": 4.2000000000000004e-06, "loss": 1.48, "step": 1400 }, { "epoch": 0.03, "learning_rate": 4.229999999999999e-06, "loss": 1.359, "step": 1410 }, { "epoch": 0.03, "learning_rate": 4.26e-06, "loss": 1.323, "step": 1420 }, { "epoch": 0.03, "learning_rate": 4.29e-06, "loss": 1.4078, "step": 1430 }, { "epoch": 0.03, "learning_rate": 4.32e-06, "loss": 1.4117, "step": 1440 }, { "epoch": 0.03, "learning_rate": 4.35e-06, "loss": 1.3358, "step": 1450 }, { "epoch": 0.03, "learning_rate": 4.3799999999999996e-06, "loss": 1.3316, "step": 1460 }, { "epoch": 0.03, "learning_rate": 4.41e-06, "loss": 1.2612, "step": 1470 }, { "epoch": 0.03, "learning_rate": 4.44e-06, "loss": 1.3363, "step": 1480 }, { "epoch": 0.03, "learning_rate": 4.4699999999999996e-06, "loss": 1.4093, "step": 1490 }, { "epoch": 0.03, "learning_rate": 4.5e-06, "loss": 1.2774, "step": 1500 }, { "epoch": 0.03, "learning_rate": 4.53e-06, "loss": 1.2604, "step": 1510 }, { "epoch": 0.03, "learning_rate": 4.56e-06, "loss": 1.2288, "step": 1520 }, { "epoch": 0.03, "learning_rate": 4.59e-06, "loss": 1.3362, "step": 1530 }, { "epoch": 0.03, "learning_rate": 4.62e-06, "loss": 1.4124, "step": 1540 }, { "epoch": 0.03, "learning_rate": 4.65e-06, "loss": 1.2563, "step": 1550 }, { "epoch": 0.03, "learning_rate": 4.68e-06, "loss": 1.2945, "step": 1560 }, { "epoch": 0.03, "learning_rate": 4.71e-06, "loss": 1.4438, "step": 1570 }, { "epoch": 0.03, "learning_rate": 4.74e-06, "loss": 1.2614, "step": 1580 }, { "epoch": 0.03, "learning_rate": 4.77e-06, "loss": 1.313, "step": 1590 }, { "epoch": 0.03, "learning_rate": 4.800000000000001e-06, "loss": 1.0942, "step": 1600 }, { "epoch": 0.03, "learning_rate": 4.83e-06, "loss": 1.2324, "step": 1610 }, { "epoch": 0.03, "learning_rate": 4.86e-06, "loss": 1.5338, "step": 1620 }, { "epoch": 0.03, "learning_rate": 4.890000000000001e-06, "loss": 1.4801, "step": 1630 }, { "epoch": 0.03, "learning_rate": 4.92e-06, "loss": 1.2899, "step": 1640 }, { "epoch": 0.03, "learning_rate": 4.95e-06, "loss": 1.3281, "step": 1650 }, { "epoch": 0.03, "learning_rate": 4.980000000000001e-06, "loss": 1.2799, "step": 1660 }, { "epoch": 0.03, "learning_rate": 5.01e-06, "loss": 1.2776, "step": 1670 }, { "epoch": 0.03, "learning_rate": 5.04e-06, "loss": 1.1707, "step": 1680 }, { "epoch": 0.03, "learning_rate": 5.070000000000001e-06, "loss": 1.2293, "step": 1690 }, { "epoch": 0.03, "learning_rate": 5.1e-06, "loss": 1.2953, "step": 1700 }, { "epoch": 0.03, "learning_rate": 5.130000000000001e-06, "loss": 1.2206, "step": 1710 }, { "epoch": 0.03, "learning_rate": 5.16e-06, "loss": 1.2309, "step": 1720 }, { "epoch": 0.03, "learning_rate": 5.1899999999999994e-06, "loss": 1.0757, "step": 1730 }, { "epoch": 0.03, "learning_rate": 5.22e-06, "loss": 1.4371, "step": 1740 }, { "epoch": 0.04, "learning_rate": 5.25e-06, "loss": 1.0968, "step": 1750 }, { "epoch": 0.04, "learning_rate": 5.279999999999999e-06, "loss": 1.2251, "step": 1760 }, { "epoch": 0.04, "learning_rate": 5.31e-06, "loss": 1.3717, "step": 1770 }, { "epoch": 0.04, "learning_rate": 5.34e-06, "loss": 1.2957, "step": 1780 }, { "epoch": 0.04, "learning_rate": 5.37e-06, "loss": 1.2934, "step": 1790 }, { "epoch": 0.04, "learning_rate": 5.4e-06, "loss": 1.2453, "step": 1800 }, { "epoch": 0.04, "learning_rate": 5.43e-06, "loss": 1.1924, "step": 1810 }, { "epoch": 0.04, "learning_rate": 5.46e-06, "loss": 1.1958, "step": 1820 }, { "epoch": 0.04, "learning_rate": 5.49e-06, "loss": 1.1375, "step": 1830 }, { "epoch": 0.04, "learning_rate": 5.52e-06, "loss": 1.2275, "step": 1840 }, { "epoch": 0.04, "learning_rate": 5.55e-06, "loss": 1.2237, "step": 1850 }, { "epoch": 0.04, "learning_rate": 5.58e-06, "loss": 1.3711, "step": 1860 }, { "epoch": 0.04, "learning_rate": 5.6100000000000005e-06, "loss": 1.1037, "step": 1870 }, { "epoch": 0.04, "learning_rate": 5.64e-06, "loss": 1.2853, "step": 1880 }, { "epoch": 0.04, "learning_rate": 5.67e-06, "loss": 1.2894, "step": 1890 }, { "epoch": 0.04, "learning_rate": 5.7000000000000005e-06, "loss": 1.0479, "step": 1900 }, { "epoch": 0.04, "learning_rate": 5.73e-06, "loss": 1.3781, "step": 1910 }, { "epoch": 0.04, "learning_rate": 5.76e-06, "loss": 1.313, "step": 1920 }, { "epoch": 0.04, "learning_rate": 5.7900000000000005e-06, "loss": 1.2074, "step": 1930 }, { "epoch": 0.04, "learning_rate": 5.82e-06, "loss": 1.1394, "step": 1940 }, { "epoch": 0.04, "learning_rate": 5.850000000000001e-06, "loss": 1.0924, "step": 1950 }, { "epoch": 0.04, "learning_rate": 5.8800000000000005e-06, "loss": 1.1553, "step": 1960 }, { "epoch": 0.04, "learning_rate": 5.91e-06, "loss": 1.3285, "step": 1970 }, { "epoch": 0.04, "learning_rate": 5.940000000000001e-06, "loss": 1.4713, "step": 1980 }, { "epoch": 0.04, "learning_rate": 5.9700000000000004e-06, "loss": 0.9996, "step": 1990 }, { "epoch": 0.04, "learning_rate": 6e-06, "loss": 1.3802, "step": 2000 }, { "epoch": 0.04, "eval_accuracy": 0.52, "eval_f1": 0.52, "eval_loss": 1.23806631565094, "eval_runtime": 786.4493, "eval_samples_per_second": 6.04, "eval_steps_per_second": 1.511, "step": 2000 }, { "epoch": 1.0, "learning_rate": 6.030000000000001e-06, "loss": 1.4493, "step": 2010 }, { "epoch": 1.0, "learning_rate": 6.0600000000000004e-06, "loss": 1.3382, "step": 2020 }, { "epoch": 1.0, "learning_rate": 6.090000000000001e-06, "loss": 1.2994, "step": 2030 }, { "epoch": 1.0, "learning_rate": 6.12e-06, "loss": 1.1561, "step": 2040 }, { "epoch": 1.0, "learning_rate": 6.1499999999999996e-06, "loss": 1.2175, "step": 2050 }, { "epoch": 1.0, "learning_rate": 6.18e-06, "loss": 1.2732, "step": 2060 }, { "epoch": 1.0, "learning_rate": 6.21e-06, "loss": 1.1406, "step": 2070 }, { "epoch": 1.0, "learning_rate": 6.2399999999999995e-06, "loss": 1.2093, "step": 2080 }, { "epoch": 1.0, "learning_rate": 6.27e-06, "loss": 1.0776, "step": 2090 }, { "epoch": 1.0, "learning_rate": 6.3e-06, "loss": 1.3075, "step": 2100 }, { "epoch": 1.0, "learning_rate": 6.3299999999999995e-06, "loss": 1.2189, "step": 2110 }, { "epoch": 1.0, "learning_rate": 6.36e-06, "loss": 1.1371, "step": 2120 }, { "epoch": 1.0, "learning_rate": 6.39e-06, "loss": 1.2426, "step": 2130 }, { "epoch": 1.0, "learning_rate": 6.42e-06, "loss": 1.1542, "step": 2140 }, { "epoch": 1.0, "learning_rate": 6.45e-06, "loss": 1.2668, "step": 2150 }, { "epoch": 1.0, "learning_rate": 6.48e-06, "loss": 1.1043, "step": 2160 }, { "epoch": 1.0, "learning_rate": 6.51e-06, "loss": 1.3451, "step": 2170 }, { "epoch": 1.0, "learning_rate": 6.54e-06, "loss": 1.1112, "step": 2180 }, { "epoch": 1.0, "learning_rate": 6.57e-06, "loss": 0.9727, "step": 2190 }, { "epoch": 1.0, "learning_rate": 6.6e-06, "loss": 1.1381, "step": 2200 }, { "epoch": 1.0, "learning_rate": 6.63e-06, "loss": 1.1848, "step": 2210 }, { "epoch": 1.0, "learning_rate": 6.660000000000001e-06, "loss": 1.2371, "step": 2220 }, { "epoch": 1.0, "learning_rate": 6.69e-06, "loss": 1.4057, "step": 2230 }, { "epoch": 1.0, "learning_rate": 6.72e-06, "loss": 1.2255, "step": 2240 }, { "epoch": 1.0, "learning_rate": 6.750000000000001e-06, "loss": 1.2957, "step": 2250 }, { "epoch": 1.01, "learning_rate": 6.78e-06, "loss": 1.1859, "step": 2260 }, { "epoch": 1.01, "learning_rate": 6.81e-06, "loss": 1.3443, "step": 2270 }, { "epoch": 1.01, "learning_rate": 6.840000000000001e-06, "loss": 1.2337, "step": 2280 }, { "epoch": 1.01, "learning_rate": 6.87e-06, "loss": 1.0896, "step": 2290 }, { "epoch": 1.01, "learning_rate": 6.900000000000001e-06, "loss": 1.1986, "step": 2300 }, { "epoch": 1.01, "learning_rate": 6.9300000000000006e-06, "loss": 1.2643, "step": 2310 }, { "epoch": 1.01, "learning_rate": 6.96e-06, "loss": 1.0731, "step": 2320 }, { "epoch": 1.01, "learning_rate": 6.990000000000001e-06, "loss": 1.0705, "step": 2330 }, { "epoch": 1.01, "learning_rate": 7.0200000000000006e-06, "loss": 1.141, "step": 2340 }, { "epoch": 1.01, "learning_rate": 7.049999999999999e-06, "loss": 0.9496, "step": 2350 }, { "epoch": 1.01, "learning_rate": 7.08e-06, "loss": 1.2506, "step": 2360 }, { "epoch": 1.01, "learning_rate": 7.11e-06, "loss": 1.3169, "step": 2370 }, { "epoch": 1.01, "learning_rate": 7.14e-06, "loss": 1.2174, "step": 2380 }, { "epoch": 1.01, "learning_rate": 7.17e-06, "loss": 1.3699, "step": 2390 }, { "epoch": 1.01, "learning_rate": 7.2e-06, "loss": 1.2294, "step": 2400 }, { "epoch": 1.01, "learning_rate": 7.23e-06, "loss": 1.0979, "step": 2410 }, { "epoch": 1.01, "learning_rate": 7.26e-06, "loss": 1.1005, "step": 2420 }, { "epoch": 1.01, "learning_rate": 7.29e-06, "loss": 1.0107, "step": 2430 }, { "epoch": 1.01, "learning_rate": 7.32e-06, "loss": 1.187, "step": 2440 }, { "epoch": 1.01, "learning_rate": 7.35e-06, "loss": 0.9485, "step": 2450 }, { "epoch": 1.01, "learning_rate": 7.3800000000000005e-06, "loss": 1.2392, "step": 2460 }, { "epoch": 1.01, "learning_rate": 7.41e-06, "loss": 1.1229, "step": 2470 }, { "epoch": 1.01, "learning_rate": 7.44e-06, "loss": 1.0925, "step": 2480 }, { "epoch": 1.01, "learning_rate": 7.4700000000000005e-06, "loss": 1.1106, "step": 2490 }, { "epoch": 1.01, "learning_rate": 7.5e-06, "loss": 1.2757, "step": 2500 }, { "epoch": 1.01, "learning_rate": 7.53e-06, "loss": 1.0142, "step": 2510 }, { "epoch": 1.01, "learning_rate": 7.5600000000000005e-06, "loss": 1.3197, "step": 2520 }, { "epoch": 1.01, "learning_rate": 7.59e-06, "loss": 1.1721, "step": 2530 }, { "epoch": 1.01, "learning_rate": 7.62e-06, "loss": 1.2866, "step": 2540 }, { "epoch": 1.01, "learning_rate": 7.65e-06, "loss": 1.2752, "step": 2550 }, { "epoch": 1.01, "learning_rate": 7.680000000000001e-06, "loss": 1.1496, "step": 2560 }, { "epoch": 1.01, "learning_rate": 7.71e-06, "loss": 1.0845, "step": 2570 }, { "epoch": 1.01, "learning_rate": 7.74e-06, "loss": 1.0634, "step": 2580 }, { "epoch": 1.01, "learning_rate": 7.77e-06, "loss": 1.179, "step": 2590 }, { "epoch": 1.01, "learning_rate": 7.8e-06, "loss": 1.2051, "step": 2600 }, { "epoch": 1.01, "learning_rate": 7.830000000000001e-06, "loss": 1.062, "step": 2610 }, { "epoch": 1.01, "learning_rate": 7.860000000000001e-06, "loss": 1.2288, "step": 2620 }, { "epoch": 1.01, "learning_rate": 7.89e-06, "loss": 1.4461, "step": 2630 }, { "epoch": 1.01, "learning_rate": 7.92e-06, "loss": 1.0729, "step": 2640 }, { "epoch": 1.01, "learning_rate": 7.95e-06, "loss": 1.1626, "step": 2650 }, { "epoch": 1.01, "learning_rate": 7.98e-06, "loss": 1.1844, "step": 2660 }, { "epoch": 1.01, "learning_rate": 8.010000000000001e-06, "loss": 1.2165, "step": 2670 }, { "epoch": 1.01, "learning_rate": 8.040000000000001e-06, "loss": 1.0876, "step": 2680 }, { "epoch": 1.01, "learning_rate": 8.07e-06, "loss": 1.2056, "step": 2690 }, { "epoch": 1.01, "learning_rate": 8.1e-06, "loss": 1.3396, "step": 2700 }, { "epoch": 1.01, "learning_rate": 8.13e-06, "loss": 1.165, "step": 2710 }, { "epoch": 1.01, "learning_rate": 8.160000000000001e-06, "loss": 0.9852, "step": 2720 }, { "epoch": 1.01, "learning_rate": 8.190000000000001e-06, "loss": 0.7788, "step": 2730 }, { "epoch": 1.01, "learning_rate": 8.220000000000001e-06, "loss": 1.1942, "step": 2740 }, { "epoch": 1.01, "learning_rate": 8.25e-06, "loss": 1.2522, "step": 2750 }, { "epoch": 1.02, "learning_rate": 8.28e-06, "loss": 1.2626, "step": 2760 }, { "epoch": 1.02, "learning_rate": 8.310000000000002e-06, "loss": 1.1067, "step": 2770 }, { "epoch": 1.02, "learning_rate": 8.340000000000001e-06, "loss": 1.0825, "step": 2780 }, { "epoch": 1.02, "learning_rate": 8.370000000000001e-06, "loss": 1.1901, "step": 2790 }, { "epoch": 1.02, "learning_rate": 8.400000000000001e-06, "loss": 1.2596, "step": 2800 }, { "epoch": 1.02, "learning_rate": 8.43e-06, "loss": 1.2528, "step": 2810 }, { "epoch": 1.02, "learning_rate": 8.459999999999999e-06, "loss": 1.1476, "step": 2820 }, { "epoch": 1.02, "learning_rate": 8.49e-06, "loss": 1.2297, "step": 2830 }, { "epoch": 1.02, "learning_rate": 8.52e-06, "loss": 1.0016, "step": 2840 }, { "epoch": 1.02, "learning_rate": 8.55e-06, "loss": 1.2551, "step": 2850 }, { "epoch": 1.02, "learning_rate": 8.58e-06, "loss": 0.9679, "step": 2860 }, { "epoch": 1.02, "learning_rate": 8.609999999999999e-06, "loss": 1.2398, "step": 2870 }, { "epoch": 1.02, "learning_rate": 8.64e-06, "loss": 1.0966, "step": 2880 }, { "epoch": 1.02, "learning_rate": 8.67e-06, "loss": 0.9264, "step": 2890 }, { "epoch": 1.02, "learning_rate": 8.7e-06, "loss": 1.1826, "step": 2900 }, { "epoch": 1.02, "learning_rate": 8.73e-06, "loss": 1.0171, "step": 2910 }, { "epoch": 1.02, "learning_rate": 8.759999999999999e-06, "loss": 1.0463, "step": 2920 }, { "epoch": 1.02, "learning_rate": 8.79e-06, "loss": 1.2805, "step": 2930 }, { "epoch": 1.02, "learning_rate": 8.82e-06, "loss": 1.1559, "step": 2940 }, { "epoch": 1.02, "learning_rate": 8.85e-06, "loss": 1.1426, "step": 2950 }, { "epoch": 1.02, "learning_rate": 8.88e-06, "loss": 1.3127, "step": 2960 }, { "epoch": 1.02, "learning_rate": 8.91e-06, "loss": 1.2136, "step": 2970 }, { "epoch": 1.02, "learning_rate": 8.939999999999999e-06, "loss": 1.2175, "step": 2980 }, { "epoch": 1.02, "learning_rate": 8.97e-06, "loss": 1.3208, "step": 2990 }, { "epoch": 1.02, "learning_rate": 9e-06, "loss": 1.2648, "step": 3000 }, { "epoch": 1.02, "learning_rate": 9.03e-06, "loss": 1.0219, "step": 3010 }, { "epoch": 1.02, "learning_rate": 9.06e-06, "loss": 1.1467, "step": 3020 }, { "epoch": 1.02, "learning_rate": 9.09e-06, "loss": 0.9967, "step": 3030 }, { "epoch": 1.02, "learning_rate": 9.12e-06, "loss": 0.9672, "step": 3040 }, { "epoch": 1.02, "learning_rate": 9.15e-06, "loss": 0.9971, "step": 3050 }, { "epoch": 1.02, "learning_rate": 9.18e-06, "loss": 1.2074, "step": 3060 }, { "epoch": 1.02, "learning_rate": 9.21e-06, "loss": 1.2956, "step": 3070 }, { "epoch": 1.02, "learning_rate": 9.24e-06, "loss": 1.0696, "step": 3080 }, { "epoch": 1.02, "learning_rate": 9.27e-06, "loss": 1.1306, "step": 3090 }, { "epoch": 1.02, "learning_rate": 9.3e-06, "loss": 0.9367, "step": 3100 }, { "epoch": 1.02, "learning_rate": 9.33e-06, "loss": 0.9672, "step": 3110 }, { "epoch": 1.02, "learning_rate": 9.36e-06, "loss": 1.0834, "step": 3120 }, { "epoch": 1.02, "learning_rate": 9.39e-06, "loss": 1.0347, "step": 3130 }, { "epoch": 1.02, "learning_rate": 9.42e-06, "loss": 0.881, "step": 3140 }, { "epoch": 1.02, "learning_rate": 9.450000000000001e-06, "loss": 0.8191, "step": 3150 }, { "epoch": 1.02, "learning_rate": 9.48e-06, "loss": 1.0785, "step": 3160 }, { "epoch": 1.02, "learning_rate": 9.51e-06, "loss": 1.1429, "step": 3170 }, { "epoch": 1.02, "learning_rate": 9.54e-06, "loss": 0.9583, "step": 3180 }, { "epoch": 1.02, "learning_rate": 9.57e-06, "loss": 1.3133, "step": 3190 }, { "epoch": 1.02, "learning_rate": 9.600000000000001e-06, "loss": 1.1462, "step": 3200 }, { "epoch": 1.02, "learning_rate": 9.630000000000001e-06, "loss": 1.1374, "step": 3210 }, { "epoch": 1.02, "learning_rate": 9.66e-06, "loss": 1.0824, "step": 3220 }, { "epoch": 1.02, "learning_rate": 9.69e-06, "loss": 1.2885, "step": 3230 }, { "epoch": 1.02, "learning_rate": 9.72e-06, "loss": 1.2692, "step": 3240 }, { "epoch": 1.02, "learning_rate": 9.75e-06, "loss": 1.4844, "step": 3250 }, { "epoch": 1.03, "learning_rate": 9.780000000000001e-06, "loss": 1.2084, "step": 3260 }, { "epoch": 1.03, "learning_rate": 9.810000000000001e-06, "loss": 1.1797, "step": 3270 }, { "epoch": 1.03, "learning_rate": 9.84e-06, "loss": 1.0209, "step": 3280 }, { "epoch": 1.03, "learning_rate": 9.87e-06, "loss": 1.1393, "step": 3290 }, { "epoch": 1.03, "learning_rate": 9.9e-06, "loss": 1.023, "step": 3300 }, { "epoch": 1.03, "learning_rate": 9.930000000000001e-06, "loss": 1.0185, "step": 3310 }, { "epoch": 1.03, "learning_rate": 9.960000000000001e-06, "loss": 1.0343, "step": 3320 }, { "epoch": 1.03, "learning_rate": 9.990000000000001e-06, "loss": 1.0599, "step": 3330 }, { "epoch": 1.03, "learning_rate": 1.002e-05, "loss": 0.9796, "step": 3340 }, { "epoch": 1.03, "learning_rate": 1.005e-05, "loss": 1.016, "step": 3350 }, { "epoch": 1.03, "learning_rate": 1.008e-05, "loss": 1.3834, "step": 3360 }, { "epoch": 1.03, "learning_rate": 1.0110000000000001e-05, "loss": 1.0219, "step": 3370 }, { "epoch": 1.03, "learning_rate": 1.0140000000000001e-05, "loss": 0.7479, "step": 3380 }, { "epoch": 1.03, "learning_rate": 1.0170000000000001e-05, "loss": 1.2832, "step": 3390 }, { "epoch": 1.03, "learning_rate": 1.02e-05, "loss": 1.2133, "step": 3400 }, { "epoch": 1.03, "learning_rate": 1.023e-05, "loss": 1.3182, "step": 3410 }, { "epoch": 1.03, "learning_rate": 1.0260000000000002e-05, "loss": 0.9505, "step": 3420 }, { "epoch": 1.03, "learning_rate": 1.0290000000000001e-05, "loss": 0.8651, "step": 3430 }, { "epoch": 1.03, "learning_rate": 1.032e-05, "loss": 0.9899, "step": 3440 }, { "epoch": 1.03, "learning_rate": 1.035e-05, "loss": 1.1628, "step": 3450 }, { "epoch": 1.03, "learning_rate": 1.0379999999999999e-05, "loss": 1.2309, "step": 3460 }, { "epoch": 1.03, "learning_rate": 1.041e-05, "loss": 1.0505, "step": 3470 }, { "epoch": 1.03, "learning_rate": 1.044e-05, "loss": 1.042, "step": 3480 }, { "epoch": 1.03, "learning_rate": 1.047e-05, "loss": 1.1551, "step": 3490 }, { "epoch": 1.03, "learning_rate": 1.05e-05, "loss": 1.0644, "step": 3500 }, { "epoch": 1.03, "learning_rate": 1.0529999999999999e-05, "loss": 1.0462, "step": 3510 }, { "epoch": 1.03, "learning_rate": 1.0559999999999999e-05, "loss": 1.0319, "step": 3520 }, { "epoch": 1.03, "learning_rate": 1.059e-05, "loss": 1.0456, "step": 3530 }, { "epoch": 1.03, "learning_rate": 1.062e-05, "loss": 0.9852, "step": 3540 }, { "epoch": 1.03, "learning_rate": 1.065e-05, "loss": 1.0286, "step": 3550 }, { "epoch": 1.03, "learning_rate": 1.068e-05, "loss": 1.0501, "step": 3560 }, { "epoch": 1.03, "learning_rate": 1.0709999999999999e-05, "loss": 1.0495, "step": 3570 }, { "epoch": 1.03, "learning_rate": 1.074e-05, "loss": 0.947, "step": 3580 }, { "epoch": 1.03, "learning_rate": 1.077e-05, "loss": 0.9354, "step": 3590 }, { "epoch": 1.03, "learning_rate": 1.08e-05, "loss": 0.9716, "step": 3600 }, { "epoch": 1.03, "learning_rate": 1.083e-05, "loss": 1.0102, "step": 3610 }, { "epoch": 1.03, "learning_rate": 1.086e-05, "loss": 1.2956, "step": 3620 }, { "epoch": 1.03, "learning_rate": 1.089e-05, "loss": 1.311, "step": 3630 }, { "epoch": 1.03, "learning_rate": 1.092e-05, "loss": 1.2648, "step": 3640 }, { "epoch": 1.03, "learning_rate": 1.095e-05, "loss": 1.1586, "step": 3650 }, { "epoch": 1.03, "learning_rate": 1.098e-05, "loss": 1.0152, "step": 3660 }, { "epoch": 1.03, "learning_rate": 1.101e-05, "loss": 1.0581, "step": 3670 }, { "epoch": 1.03, "learning_rate": 1.104e-05, "loss": 1.0796, "step": 3680 }, { "epoch": 1.03, "learning_rate": 1.107e-05, "loss": 1.078, "step": 3690 }, { "epoch": 1.03, "learning_rate": 1.11e-05, "loss": 1.2068, "step": 3700 }, { "epoch": 1.03, "learning_rate": 1.113e-05, "loss": 1.0106, "step": 3710 }, { "epoch": 1.03, "learning_rate": 1.116e-05, "loss": 1.1514, "step": 3720 }, { "epoch": 1.03, "learning_rate": 1.119e-05, "loss": 0.9577, "step": 3730 }, { "epoch": 1.03, "learning_rate": 1.1220000000000001e-05, "loss": 1.2864, "step": 3740 }, { "epoch": 1.03, "learning_rate": 1.125e-05, "loss": 1.1648, "step": 3750 }, { "epoch": 1.04, "learning_rate": 1.128e-05, "loss": 1.1744, "step": 3760 }, { "epoch": 1.04, "learning_rate": 1.131e-05, "loss": 1.0227, "step": 3770 }, { "epoch": 1.04, "learning_rate": 1.134e-05, "loss": 0.8843, "step": 3780 }, { "epoch": 1.04, "learning_rate": 1.137e-05, "loss": 0.9374, "step": 3790 }, { "epoch": 1.04, "learning_rate": 1.1400000000000001e-05, "loss": 1.045, "step": 3800 }, { "epoch": 1.04, "learning_rate": 1.143e-05, "loss": 1.1581, "step": 3810 }, { "epoch": 1.04, "learning_rate": 1.146e-05, "loss": 1.0742, "step": 3820 }, { "epoch": 1.04, "learning_rate": 1.149e-05, "loss": 1.2959, "step": 3830 }, { "epoch": 1.04, "learning_rate": 1.152e-05, "loss": 1.1807, "step": 3840 }, { "epoch": 1.04, "learning_rate": 1.1550000000000001e-05, "loss": 1.0489, "step": 3850 }, { "epoch": 1.04, "learning_rate": 1.1580000000000001e-05, "loss": 1.1499, "step": 3860 }, { "epoch": 1.04, "learning_rate": 1.161e-05, "loss": 1.1995, "step": 3870 }, { "epoch": 1.04, "learning_rate": 1.164e-05, "loss": 0.9318, "step": 3880 }, { "epoch": 1.04, "learning_rate": 1.167e-05, "loss": 1.0526, "step": 3890 }, { "epoch": 1.04, "learning_rate": 1.1700000000000001e-05, "loss": 1.1047, "step": 3900 }, { "epoch": 1.04, "learning_rate": 1.1730000000000001e-05, "loss": 1.0165, "step": 3910 }, { "epoch": 1.04, "learning_rate": 1.1760000000000001e-05, "loss": 1.188, "step": 3920 }, { "epoch": 1.04, "learning_rate": 1.179e-05, "loss": 1.137, "step": 3930 }, { "epoch": 1.04, "learning_rate": 1.182e-05, "loss": 1.1563, "step": 3940 }, { "epoch": 1.04, "learning_rate": 1.185e-05, "loss": 1.1343, "step": 3950 }, { "epoch": 1.04, "learning_rate": 1.1880000000000001e-05, "loss": 1.1096, "step": 3960 }, { "epoch": 1.04, "learning_rate": 1.1910000000000001e-05, "loss": 1.1273, "step": 3970 }, { "epoch": 1.04, "learning_rate": 1.1940000000000001e-05, "loss": 0.976, "step": 3980 }, { "epoch": 1.04, "learning_rate": 1.197e-05, "loss": 0.9634, "step": 3990 }, { "epoch": 1.04, "learning_rate": 1.2e-05, "loss": 1.0115, "step": 4000 }, { "epoch": 1.04, "eval_accuracy": 0.6684210526315789, "eval_f1": 0.6684210526315789, "eval_loss": 1.0522350072860718, "eval_runtime": 771.0264, "eval_samples_per_second": 6.161, "eval_steps_per_second": 1.541, "step": 4000 }, { "epoch": 2.0, "learning_rate": 1.2030000000000002e-05, "loss": 1.1145, "step": 4010 }, { "epoch": 2.0, "learning_rate": 1.2060000000000001e-05, "loss": 1.378, "step": 4020 }, { "epoch": 2.0, "learning_rate": 1.2090000000000001e-05, "loss": 1.1337, "step": 4030 }, { "epoch": 2.0, "learning_rate": 1.2120000000000001e-05, "loss": 0.8445, "step": 4040 }, { "epoch": 2.0, "learning_rate": 1.215e-05, "loss": 0.8296, "step": 4050 }, { "epoch": 2.0, "learning_rate": 1.2180000000000002e-05, "loss": 1.2409, "step": 4060 }, { "epoch": 2.0, "learning_rate": 1.221e-05, "loss": 1.0398, "step": 4070 }, { "epoch": 2.0, "learning_rate": 1.224e-05, "loss": 0.9204, "step": 4080 }, { "epoch": 2.0, "learning_rate": 1.227e-05, "loss": 0.9676, "step": 4090 }, { "epoch": 2.0, "learning_rate": 1.2299999999999999e-05, "loss": 1.0203, "step": 4100 }, { "epoch": 2.0, "learning_rate": 1.2329999999999999e-05, "loss": 1.1571, "step": 4110 }, { "epoch": 2.0, "learning_rate": 1.236e-05, "loss": 1.2317, "step": 4120 }, { "epoch": 2.0, "learning_rate": 1.239e-05, "loss": 1.004, "step": 4130 }, { "epoch": 2.0, "learning_rate": 1.242e-05, "loss": 1.3242, "step": 4140 }, { "epoch": 2.0, "learning_rate": 1.245e-05, "loss": 1.0648, "step": 4150 }, { "epoch": 2.0, "learning_rate": 1.2479999999999999e-05, "loss": 0.8635, "step": 4160 }, { "epoch": 2.0, "learning_rate": 1.251e-05, "loss": 0.9419, "step": 4170 }, { "epoch": 2.0, "learning_rate": 1.254e-05, "loss": 1.0629, "step": 4180 }, { "epoch": 2.0, "learning_rate": 1.257e-05, "loss": 1.0586, "step": 4190 }, { "epoch": 2.0, "learning_rate": 1.26e-05, "loss": 0.9666, "step": 4200 }, { "epoch": 2.0, "learning_rate": 1.263e-05, "loss": 0.8782, "step": 4210 }, { "epoch": 2.0, "learning_rate": 1.2659999999999999e-05, "loss": 1.1148, "step": 4220 }, { "epoch": 2.0, "learning_rate": 1.269e-05, "loss": 0.9179, "step": 4230 }, { "epoch": 2.0, "learning_rate": 1.272e-05, "loss": 1.1387, "step": 4240 }, { "epoch": 2.0, "learning_rate": 1.275e-05, "loss": 1.1456, "step": 4250 }, { "epoch": 2.01, "learning_rate": 1.278e-05, "loss": 1.1761, "step": 4260 }, { "epoch": 2.01, "learning_rate": 1.281e-05, "loss": 0.7777, "step": 4270 }, { "epoch": 2.01, "learning_rate": 1.284e-05, "loss": 0.9239, "step": 4280 }, { "epoch": 2.01, "learning_rate": 1.287e-05, "loss": 0.8541, "step": 4290 }, { "epoch": 2.01, "learning_rate": 1.29e-05, "loss": 1.1384, "step": 4300 }, { "epoch": 2.01, "learning_rate": 1.293e-05, "loss": 0.9394, "step": 4310 }, { "epoch": 2.01, "learning_rate": 1.296e-05, "loss": 0.9221, "step": 4320 }, { "epoch": 2.01, "learning_rate": 1.2990000000000001e-05, "loss": 1.0862, "step": 4330 }, { "epoch": 2.01, "learning_rate": 1.302e-05, "loss": 1.1334, "step": 4340 }, { "epoch": 2.01, "learning_rate": 1.305e-05, "loss": 1.1778, "step": 4350 }, { "epoch": 2.01, "learning_rate": 1.308e-05, "loss": 0.9096, "step": 4360 }, { "epoch": 2.01, "learning_rate": 1.311e-05, "loss": 1.006, "step": 4370 }, { "epoch": 2.01, "learning_rate": 1.314e-05, "loss": 1.0986, "step": 4380 }, { "epoch": 2.01, "learning_rate": 1.3170000000000001e-05, "loss": 1.0341, "step": 4390 }, { "epoch": 2.01, "learning_rate": 1.32e-05, "loss": 1.1357, "step": 4400 }, { "epoch": 2.01, "learning_rate": 1.323e-05, "loss": 0.9352, "step": 4410 }, { "epoch": 2.01, "learning_rate": 1.326e-05, "loss": 1.1474, "step": 4420 }, { "epoch": 2.01, "learning_rate": 1.329e-05, "loss": 0.8776, "step": 4430 }, { "epoch": 2.01, "learning_rate": 1.3320000000000001e-05, "loss": 0.945, "step": 4440 }, { "epoch": 2.01, "learning_rate": 1.3350000000000001e-05, "loss": 1.067, "step": 4450 }, { "epoch": 2.01, "learning_rate": 1.338e-05, "loss": 0.6447, "step": 4460 }, { "epoch": 2.01, "learning_rate": 1.341e-05, "loss": 1.123, "step": 4470 }, { "epoch": 2.01, "learning_rate": 1.344e-05, "loss": 0.9915, "step": 4480 }, { "epoch": 2.01, "learning_rate": 1.3470000000000001e-05, "loss": 1.0084, "step": 4490 }, { "epoch": 2.01, "learning_rate": 1.3500000000000001e-05, "loss": 1.0558, "step": 4500 }, { "epoch": 2.01, "learning_rate": 1.3530000000000001e-05, "loss": 0.9298, "step": 4510 }, { "epoch": 2.01, "learning_rate": 1.356e-05, "loss": 0.948, "step": 4520 }, { "epoch": 2.01, "learning_rate": 1.359e-05, "loss": 1.0212, "step": 4530 }, { "epoch": 2.01, "learning_rate": 1.362e-05, "loss": 1.1543, "step": 4540 }, { "epoch": 2.01, "learning_rate": 1.3650000000000001e-05, "loss": 1.1628, "step": 4550 }, { "epoch": 2.01, "learning_rate": 1.3680000000000001e-05, "loss": 1.231, "step": 4560 }, { "epoch": 2.01, "learning_rate": 1.3710000000000001e-05, "loss": 1.251, "step": 4570 }, { "epoch": 2.01, "learning_rate": 1.374e-05, "loss": 0.821, "step": 4580 }, { "epoch": 2.01, "learning_rate": 1.377e-05, "loss": 0.786, "step": 4590 }, { "epoch": 2.01, "learning_rate": 1.3800000000000002e-05, "loss": 1.1307, "step": 4600 }, { "epoch": 2.01, "learning_rate": 1.3830000000000001e-05, "loss": 1.1182, "step": 4610 }, { "epoch": 2.01, "learning_rate": 1.3860000000000001e-05, "loss": 1.0388, "step": 4620 }, { "epoch": 2.01, "learning_rate": 1.389e-05, "loss": 1.0585, "step": 4630 }, { "epoch": 2.01, "learning_rate": 1.392e-05, "loss": 0.7796, "step": 4640 }, { "epoch": 2.01, "learning_rate": 1.395e-05, "loss": 1.1808, "step": 4650 }, { "epoch": 2.01, "learning_rate": 1.3980000000000002e-05, "loss": 0.9879, "step": 4660 }, { "epoch": 2.01, "learning_rate": 1.4010000000000001e-05, "loss": 1.0262, "step": 4670 }, { "epoch": 2.01, "learning_rate": 1.4040000000000001e-05, "loss": 0.9567, "step": 4680 }, { "epoch": 2.01, "learning_rate": 1.4069999999999999e-05, "loss": 1.2993, "step": 4690 }, { "epoch": 2.01, "learning_rate": 1.4099999999999999e-05, "loss": 1.024, "step": 4700 }, { "epoch": 2.01, "learning_rate": 1.413e-05, "loss": 1.0662, "step": 4710 }, { "epoch": 2.01, "learning_rate": 1.416e-05, "loss": 1.2413, "step": 4720 }, { "epoch": 2.01, "learning_rate": 1.419e-05, "loss": 1.0022, "step": 4730 }, { "epoch": 2.01, "learning_rate": 1.422e-05, "loss": 0.807, "step": 4740 }, { "epoch": 2.02, "learning_rate": 1.4249999999999999e-05, "loss": 1.2506, "step": 4750 }, { "epoch": 2.02, "learning_rate": 1.428e-05, "loss": 1.1809, "step": 4760 }, { "epoch": 2.02, "learning_rate": 1.431e-05, "loss": 1.0797, "step": 4770 }, { "epoch": 2.02, "learning_rate": 1.434e-05, "loss": 0.9592, "step": 4780 }, { "epoch": 2.02, "learning_rate": 1.437e-05, "loss": 1.0962, "step": 4790 }, { "epoch": 2.02, "learning_rate": 1.44e-05, "loss": 1.1471, "step": 4800 }, { "epoch": 2.02, "learning_rate": 1.4429999999999999e-05, "loss": 0.9697, "step": 4810 }, { "epoch": 2.02, "learning_rate": 1.446e-05, "loss": 1.0342, "step": 4820 }, { "epoch": 2.02, "learning_rate": 1.449e-05, "loss": 0.8653, "step": 4830 }, { "epoch": 2.02, "learning_rate": 1.452e-05, "loss": 1.1813, "step": 4840 }, { "epoch": 2.02, "learning_rate": 1.455e-05, "loss": 1.097, "step": 4850 }, { "epoch": 2.02, "learning_rate": 1.458e-05, "loss": 1.3181, "step": 4860 }, { "epoch": 2.02, "learning_rate": 1.461e-05, "loss": 1.0778, "step": 4870 }, { "epoch": 2.02, "learning_rate": 1.464e-05, "loss": 1.096, "step": 4880 }, { "epoch": 2.02, "learning_rate": 1.467e-05, "loss": 0.8033, "step": 4890 }, { "epoch": 2.02, "learning_rate": 1.47e-05, "loss": 1.0194, "step": 4900 }, { "epoch": 2.02, "learning_rate": 1.473e-05, "loss": 1.0501, "step": 4910 }, { "epoch": 2.02, "learning_rate": 1.4760000000000001e-05, "loss": 0.7855, "step": 4920 }, { "epoch": 2.02, "learning_rate": 1.479e-05, "loss": 1.151, "step": 4930 }, { "epoch": 2.02, "learning_rate": 1.482e-05, "loss": 1.072, "step": 4940 }, { "epoch": 2.02, "learning_rate": 1.485e-05, "loss": 1.0244, "step": 4950 }, { "epoch": 2.02, "learning_rate": 1.488e-05, "loss": 0.9692, "step": 4960 }, { "epoch": 2.02, "learning_rate": 1.491e-05, "loss": 1.0249, "step": 4970 }, { "epoch": 2.02, "learning_rate": 1.4940000000000001e-05, "loss": 0.964, "step": 4980 }, { "epoch": 2.02, "learning_rate": 1.497e-05, "loss": 0.9706, "step": 4990 }, { "epoch": 2.02, "learning_rate": 1.5e-05, "loss": 1.2112, "step": 5000 }, { "epoch": 2.02, "learning_rate": 1.4999998172295556e-05, "loss": 0.8264, "step": 5010 }, { "epoch": 2.02, "learning_rate": 1.4999992689183113e-05, "loss": 0.952, "step": 5020 }, { "epoch": 2.02, "learning_rate": 1.4999983550665345e-05, "loss": 1.3463, "step": 5030 }, { "epoch": 2.02, "learning_rate": 1.4999970756746704e-05, "loss": 0.9856, "step": 5040 }, { "epoch": 2.02, "learning_rate": 1.4999954307433428e-05, "loss": 0.904, "step": 5050 }, { "epoch": 2.02, "learning_rate": 1.4999934202733533e-05, "loss": 0.9741, "step": 5060 }, { "epoch": 2.02, "learning_rate": 1.4999910442656817e-05, "loss": 0.8908, "step": 5070 }, { "epoch": 2.02, "learning_rate": 1.4999883027214862e-05, "loss": 1.2765, "step": 5080 }, { "epoch": 2.02, "learning_rate": 1.4999851956421028e-05, "loss": 0.9875, "step": 5090 }, { "epoch": 2.02, "learning_rate": 1.499981723029046e-05, "loss": 0.7411, "step": 5100 }, { "epoch": 2.02, "learning_rate": 1.4999778848840085e-05, "loss": 0.9148, "step": 5110 }, { "epoch": 2.02, "learning_rate": 1.4999736812088606e-05, "loss": 0.7769, "step": 5120 }, { "epoch": 2.02, "learning_rate": 1.4999691120056512e-05, "loss": 0.8871, "step": 5130 }, { "epoch": 2.02, "learning_rate": 1.4999641772766074e-05, "loss": 0.9924, "step": 5140 }, { "epoch": 2.02, "learning_rate": 1.4999588770241342e-05, "loss": 1.062, "step": 5150 }, { "epoch": 2.02, "learning_rate": 1.499953211250815e-05, "loss": 1.0356, "step": 5160 }, { "epoch": 2.02, "learning_rate": 1.499947179959411e-05, "loss": 0.9518, "step": 5170 }, { "epoch": 2.02, "learning_rate": 1.4999407831528622e-05, "loss": 1.1531, "step": 5180 }, { "epoch": 2.02, "learning_rate": 1.4999340208342858e-05, "loss": 0.9666, "step": 5190 }, { "epoch": 2.02, "learning_rate": 1.4999268930069782e-05, "loss": 1.3574, "step": 5200 }, { "epoch": 2.02, "learning_rate": 1.4999193996744131e-05, "loss": 0.9965, "step": 5210 }, { "epoch": 2.02, "learning_rate": 1.4999115408402427e-05, "loss": 1.0885, "step": 5220 }, { "epoch": 2.02, "learning_rate": 1.4999033165082974e-05, "loss": 0.9979, "step": 5230 }, { "epoch": 2.02, "learning_rate": 1.4998947266825853e-05, "loss": 1.0737, "step": 5240 }, { "epoch": 2.02, "learning_rate": 1.4998857713672935e-05, "loss": 0.9925, "step": 5250 }, { "epoch": 2.03, "learning_rate": 1.4998764505667862e-05, "loss": 1.1046, "step": 5260 }, { "epoch": 2.03, "learning_rate": 1.4998667642856068e-05, "loss": 0.9812, "step": 5270 }, { "epoch": 2.03, "learning_rate": 1.4998567125284757e-05, "loss": 1.0785, "step": 5280 }, { "epoch": 2.03, "learning_rate": 1.4998462953002925e-05, "loss": 1.0161, "step": 5290 }, { "epoch": 2.03, "learning_rate": 1.4998355126061342e-05, "loss": 0.8528, "step": 5300 }, { "epoch": 2.03, "learning_rate": 1.499824364451256e-05, "loss": 1.0053, "step": 5310 }, { "epoch": 2.03, "learning_rate": 1.4998128508410916e-05, "loss": 1.0668, "step": 5320 }, { "epoch": 2.03, "learning_rate": 1.4998009717812525e-05, "loss": 0.9532, "step": 5330 }, { "epoch": 2.03, "learning_rate": 1.4997887272775285e-05, "loss": 0.8229, "step": 5340 }, { "epoch": 2.03, "learning_rate": 1.4997761173358875e-05, "loss": 1.0487, "step": 5350 }, { "epoch": 2.03, "learning_rate": 1.499763141962475e-05, "loss": 0.8786, "step": 5360 }, { "epoch": 2.03, "learning_rate": 1.4997498011636154e-05, "loss": 0.9382, "step": 5370 }, { "epoch": 2.03, "learning_rate": 1.499736094945811e-05, "loss": 1.2307, "step": 5380 }, { "epoch": 2.03, "learning_rate": 1.4997220233157415e-05, "loss": 1.0116, "step": 5390 }, { "epoch": 2.03, "learning_rate": 1.4997075862802657e-05, "loss": 1.0605, "step": 5400 }, { "epoch": 2.03, "learning_rate": 1.49969278384642e-05, "loss": 0.9716, "step": 5410 }, { "epoch": 2.03, "learning_rate": 1.4996776160214188e-05, "loss": 1.1633, "step": 5420 }, { "epoch": 2.03, "learning_rate": 1.4996620828126546e-05, "loss": 0.7762, "step": 5430 }, { "epoch": 2.03, "learning_rate": 1.4996461842276982e-05, "loss": 1.1011, "step": 5440 }, { "epoch": 2.03, "learning_rate": 1.4996299202742987e-05, "loss": 1.0375, "step": 5450 }, { "epoch": 2.03, "learning_rate": 1.4996132909603825e-05, "loss": 1.0724, "step": 5460 }, { "epoch": 2.03, "learning_rate": 1.4995962962940547e-05, "loss": 1.5755, "step": 5470 }, { "epoch": 2.03, "learning_rate": 1.4995789362835983e-05, "loss": 0.9854, "step": 5480 }, { "epoch": 2.03, "learning_rate": 1.4995612109374742e-05, "loss": 0.8795, "step": 5490 }, { "epoch": 2.03, "learning_rate": 1.4995431202643219e-05, "loss": 0.8795, "step": 5500 }, { "epoch": 2.03, "learning_rate": 1.499524664272958e-05, "loss": 0.9471, "step": 5510 }, { "epoch": 2.03, "learning_rate": 1.4995058429723783e-05, "loss": 1.116, "step": 5520 }, { "epoch": 2.03, "learning_rate": 1.499486656371756e-05, "loss": 1.0457, "step": 5530 }, { "epoch": 2.03, "learning_rate": 1.4994671044804419e-05, "loss": 1.1661, "step": 5540 }, { "epoch": 2.03, "learning_rate": 1.4994471873079658e-05, "loss": 0.9537, "step": 5550 }, { "epoch": 2.03, "learning_rate": 1.499426904864035e-05, "loss": 0.9942, "step": 5560 }, { "epoch": 2.03, "learning_rate": 1.4994062571585351e-05, "loss": 1.101, "step": 5570 }, { "epoch": 2.03, "learning_rate": 1.4993852442015293e-05, "loss": 1.3061, "step": 5580 }, { "epoch": 2.03, "learning_rate": 1.499363866003259e-05, "loss": 1.0937, "step": 5590 }, { "epoch": 2.03, "learning_rate": 1.4993421225741438e-05, "loss": 0.8348, "step": 5600 }, { "epoch": 2.03, "learning_rate": 1.4993200139247813e-05, "loss": 1.1291, "step": 5610 }, { "epoch": 2.03, "learning_rate": 1.4992975400659466e-05, "loss": 0.9377, "step": 5620 }, { "epoch": 2.03, "learning_rate": 1.4992747010085936e-05, "loss": 0.9313, "step": 5630 }, { "epoch": 2.03, "learning_rate": 1.4992514967638537e-05, "loss": 1.1957, "step": 5640 }, { "epoch": 2.03, "learning_rate": 1.4992279273430361e-05, "loss": 0.9746, "step": 5650 }, { "epoch": 2.03, "learning_rate": 1.4992039927576285e-05, "loss": 1.1093, "step": 5660 }, { "epoch": 2.03, "learning_rate": 1.4991796930192962e-05, "loss": 0.9546, "step": 5670 }, { "epoch": 2.03, "learning_rate": 1.4991550281398828e-05, "loss": 0.9618, "step": 5680 }, { "epoch": 2.03, "learning_rate": 1.4991299981314094e-05, "loss": 0.9739, "step": 5690 }, { "epoch": 2.03, "learning_rate": 1.4991046030060756e-05, "loss": 0.9386, "step": 5700 }, { "epoch": 2.03, "learning_rate": 1.4990788427762585e-05, "loss": 1.2641, "step": 5710 }, { "epoch": 2.03, "learning_rate": 1.4990527174545132e-05, "loss": 1.0383, "step": 5720 }, { "epoch": 2.03, "learning_rate": 1.499026227053573e-05, "loss": 0.8838, "step": 5730 }, { "epoch": 2.03, "learning_rate": 1.498999371586349e-05, "loss": 1.0993, "step": 5740 }, { "epoch": 2.04, "learning_rate": 1.4989721510659303e-05, "loss": 1.2501, "step": 5750 }, { "epoch": 2.04, "learning_rate": 1.498944565505584e-05, "loss": 1.2304, "step": 5760 }, { "epoch": 2.04, "learning_rate": 1.4989166149187544e-05, "loss": 0.9101, "step": 5770 }, { "epoch": 2.04, "learning_rate": 1.498888299319065e-05, "loss": 1.1794, "step": 5780 }, { "epoch": 2.04, "learning_rate": 1.4988596187203158e-05, "loss": 0.9571, "step": 5790 }, { "epoch": 2.04, "learning_rate": 1.4988305731364858e-05, "loss": 1.2548, "step": 5800 }, { "epoch": 2.04, "learning_rate": 1.4988011625817314e-05, "loss": 1.0551, "step": 5810 }, { "epoch": 2.04, "learning_rate": 1.4987713870703869e-05, "loss": 1.1907, "step": 5820 }, { "epoch": 2.04, "learning_rate": 1.4987412466169642e-05, "loss": 1.0176, "step": 5830 }, { "epoch": 2.04, "learning_rate": 1.4987107412361541e-05, "loss": 1.0289, "step": 5840 }, { "epoch": 2.04, "learning_rate": 1.4986798709428242e-05, "loss": 1.3242, "step": 5850 }, { "epoch": 2.04, "learning_rate": 1.49864863575202e-05, "loss": 1.2081, "step": 5860 }, { "epoch": 2.04, "learning_rate": 1.4986170356789654e-05, "loss": 0.8864, "step": 5870 }, { "epoch": 2.04, "learning_rate": 1.4985850707390621e-05, "loss": 0.7922, "step": 5880 }, { "epoch": 2.04, "learning_rate": 1.4985527409478893e-05, "loss": 0.9615, "step": 5890 }, { "epoch": 2.04, "learning_rate": 1.4985200463212038e-05, "loss": 0.8607, "step": 5900 }, { "epoch": 2.04, "learning_rate": 1.498486986874941e-05, "loss": 0.8566, "step": 5910 }, { "epoch": 2.04, "learning_rate": 1.4984535626252133e-05, "loss": 1.2487, "step": 5920 }, { "epoch": 2.04, "learning_rate": 1.4984197735883119e-05, "loss": 1.1537, "step": 5930 }, { "epoch": 2.04, "learning_rate": 1.4983856197807045e-05, "loss": 1.1069, "step": 5940 }, { "epoch": 2.04, "learning_rate": 1.4983511012190374e-05, "loss": 0.6814, "step": 5950 }, { "epoch": 2.04, "learning_rate": 1.498316217920135e-05, "loss": 1.1646, "step": 5960 }, { "epoch": 2.04, "learning_rate": 1.4982809699009982e-05, "loss": 1.0384, "step": 5970 }, { "epoch": 2.04, "learning_rate": 1.4982453571788074e-05, "loss": 1.0221, "step": 5980 }, { "epoch": 2.04, "learning_rate": 1.4982093797709188e-05, "loss": 1.0354, "step": 5990 }, { "epoch": 2.04, "learning_rate": 1.4981730376948682e-05, "loss": 0.9749, "step": 6000 }, { "epoch": 2.04, "eval_accuracy": 0.7536842105263157, "eval_f1": 0.7536842105263157, "eval_loss": 0.9298247694969177, "eval_runtime": 753.1546, "eval_samples_per_second": 6.307, "eval_steps_per_second": 1.577, "step": 6000 }, { "epoch": 3.0, "learning_rate": 1.4981363309683678e-05, "loss": 1.0715, "step": 6010 }, { "epoch": 3.0, "learning_rate": 1.4980992596093081e-05, "loss": 0.7671, "step": 6020 }, { "epoch": 3.0, "learning_rate": 1.4980618236357574e-05, "loss": 1.0827, "step": 6030 }, { "epoch": 3.0, "learning_rate": 1.4980240230659615e-05, "loss": 0.8094, "step": 6040 }, { "epoch": 3.0, "learning_rate": 1.4979858579183435e-05, "loss": 1.0124, "step": 6050 }, { "epoch": 3.0, "learning_rate": 1.4979473282115054e-05, "loss": 1.0736, "step": 6060 }, { "epoch": 3.0, "learning_rate": 1.4979084339642255e-05, "loss": 0.9094, "step": 6070 }, { "epoch": 3.0, "learning_rate": 1.4978691751954603e-05, "loss": 0.8073, "step": 6080 }, { "epoch": 3.0, "learning_rate": 1.497829551924345e-05, "loss": 1.0181, "step": 6090 }, { "epoch": 3.0, "learning_rate": 1.4977895641701902e-05, "loss": 0.9183, "step": 6100 }, { "epoch": 3.0, "learning_rate": 1.4977492119524863e-05, "loss": 0.7493, "step": 6110 }, { "epoch": 3.0, "learning_rate": 1.4977084952909003e-05, "loss": 1.0122, "step": 6120 }, { "epoch": 3.0, "learning_rate": 1.4976674142052769e-05, "loss": 0.9298, "step": 6130 }, { "epoch": 3.0, "learning_rate": 1.4976259687156385e-05, "loss": 0.9499, "step": 6140 }, { "epoch": 3.0, "learning_rate": 1.4975841588421854e-05, "loss": 1.0176, "step": 6150 }, { "epoch": 3.0, "learning_rate": 1.497541984605295e-05, "loss": 1.1361, "step": 6160 }, { "epoch": 3.0, "learning_rate": 1.4974994460255223e-05, "loss": 0.7681, "step": 6170 }, { "epoch": 3.0, "learning_rate": 1.4974565431236006e-05, "loss": 0.8797, "step": 6180 }, { "epoch": 3.0, "learning_rate": 1.49741327592044e-05, "loss": 0.9948, "step": 6190 }, { "epoch": 3.0, "learning_rate": 1.4973696444371283e-05, "loss": 1.2721, "step": 6200 }, { "epoch": 3.0, "learning_rate": 1.497325648694931e-05, "loss": 0.9341, "step": 6210 }, { "epoch": 3.0, "learning_rate": 1.4972812887152913e-05, "loss": 0.9105, "step": 6220 }, { "epoch": 3.0, "learning_rate": 1.4972365645198294e-05, "loss": 1.1488, "step": 6230 }, { "epoch": 3.0, "learning_rate": 1.4971914761303436e-05, "loss": 1.1213, "step": 6240 }, { "epoch": 3.0, "learning_rate": 1.4971460235688093e-05, "loss": 0.9571, "step": 6250 }, { "epoch": 3.01, "learning_rate": 1.4971002068573793e-05, "loss": 0.8745, "step": 6260 }, { "epoch": 3.01, "learning_rate": 1.4970540260183847e-05, "loss": 1.1222, "step": 6270 }, { "epoch": 3.01, "learning_rate": 1.497007481074333e-05, "loss": 0.9804, "step": 6280 }, { "epoch": 3.01, "learning_rate": 1.4969605720479096e-05, "loss": 0.933, "step": 6290 }, { "epoch": 3.01, "learning_rate": 1.4969132989619776e-05, "loss": 1.0085, "step": 6300 }, { "epoch": 3.01, "learning_rate": 1.4968656618395776e-05, "loss": 0.8859, "step": 6310 }, { "epoch": 3.01, "learning_rate": 1.496817660703927e-05, "loss": 0.7726, "step": 6320 }, { "epoch": 3.01, "learning_rate": 1.4967692955784207e-05, "loss": 1.1581, "step": 6330 }, { "epoch": 3.01, "learning_rate": 1.4967205664866318e-05, "loss": 0.9762, "step": 6340 }, { "epoch": 3.01, "learning_rate": 1.4966714734523101e-05, "loss": 0.7109, "step": 6350 }, { "epoch": 3.01, "learning_rate": 1.4966220164993826e-05, "loss": 0.9304, "step": 6360 }, { "epoch": 3.01, "learning_rate": 1.4965721956519547e-05, "loss": 0.7953, "step": 6370 }, { "epoch": 3.01, "learning_rate": 1.496522010934308e-05, "loss": 1.2858, "step": 6380 }, { "epoch": 3.01, "learning_rate": 1.4964714623709019e-05, "loss": 1.1374, "step": 6390 }, { "epoch": 3.01, "learning_rate": 1.4964205499863734e-05, "loss": 1.2635, "step": 6400 }, { "epoch": 3.01, "learning_rate": 1.4963692738055364e-05, "loss": 1.2273, "step": 6410 }, { "epoch": 3.01, "learning_rate": 1.4963176338533823e-05, "loss": 0.9485, "step": 6420 }, { "epoch": 3.01, "learning_rate": 1.49626563015508e-05, "loss": 0.8397, "step": 6430 }, { "epoch": 3.01, "learning_rate": 1.4962132627359753e-05, "loss": 0.9709, "step": 6440 }, { "epoch": 3.01, "learning_rate": 1.4961605316215913e-05, "loss": 0.7586, "step": 6450 }, { "epoch": 3.01, "learning_rate": 1.4961074368376286e-05, "loss": 1.2284, "step": 6460 }, { "epoch": 3.01, "learning_rate": 1.496053978409965e-05, "loss": 1.1168, "step": 6470 }, { "epoch": 3.01, "learning_rate": 1.4960001563646557e-05, "loss": 0.9851, "step": 6480 }, { "epoch": 3.01, "learning_rate": 1.4959459707279325e-05, "loss": 1.106, "step": 6490 }, { "epoch": 3.01, "learning_rate": 1.495891421526205e-05, "loss": 1.0387, "step": 6500 }, { "epoch": 3.01, "learning_rate": 1.49583650878606e-05, "loss": 0.8843, "step": 6510 }, { "epoch": 3.01, "learning_rate": 1.495781232534261e-05, "loss": 1.2072, "step": 6520 }, { "epoch": 3.01, "learning_rate": 1.4957255927977493e-05, "loss": 0.7769, "step": 6530 }, { "epoch": 3.01, "learning_rate": 1.4956695896036427e-05, "loss": 0.7896, "step": 6540 }, { "epoch": 3.01, "learning_rate": 1.4956132229792366e-05, "loss": 0.9106, "step": 6550 }, { "epoch": 3.01, "learning_rate": 1.4955564929520036e-05, "loss": 1.1088, "step": 6560 }, { "epoch": 3.01, "learning_rate": 1.4954993995495928e-05, "loss": 1.0579, "step": 6570 }, { "epoch": 3.01, "learning_rate": 1.4954419427998312e-05, "loss": 1.0675, "step": 6580 }, { "epoch": 3.01, "learning_rate": 1.4953841227307225e-05, "loss": 1.046, "step": 6590 }, { "epoch": 3.01, "learning_rate": 1.4953259393704474e-05, "loss": 0.7583, "step": 6600 }, { "epoch": 3.01, "learning_rate": 1.4952673927473636e-05, "loss": 0.9493, "step": 6610 }, { "epoch": 3.01, "learning_rate": 1.4952084828900064e-05, "loss": 0.9263, "step": 6620 }, { "epoch": 3.01, "learning_rate": 1.4951492098270874e-05, "loss": 0.9671, "step": 6630 }, { "epoch": 3.01, "learning_rate": 1.4950895735874958e-05, "loss": 1.1237, "step": 6640 }, { "epoch": 3.01, "learning_rate": 1.4950295742002972e-05, "loss": 1.056, "step": 6650 }, { "epoch": 3.01, "learning_rate": 1.4949692116947354e-05, "loss": 1.439, "step": 6660 }, { "epoch": 3.01, "learning_rate": 1.4949084861002293e-05, "loss": 0.9316, "step": 6670 }, { "epoch": 3.01, "learning_rate": 1.4948473974463767e-05, "loss": 0.6299, "step": 6680 }, { "epoch": 3.01, "learning_rate": 1.4947859457629508e-05, "loss": 0.7169, "step": 6690 }, { "epoch": 3.01, "learning_rate": 1.4947241310799028e-05, "loss": 0.6922, "step": 6700 }, { "epoch": 3.01, "learning_rate": 1.4946619534273603e-05, "loss": 0.8926, "step": 6710 }, { "epoch": 3.01, "learning_rate": 1.494599412835628e-05, "loss": 1.0797, "step": 6720 }, { "epoch": 3.01, "learning_rate": 1.4945365093351874e-05, "loss": 0.8354, "step": 6730 }, { "epoch": 3.01, "learning_rate": 1.4944732429566967e-05, "loss": 0.7327, "step": 6740 }, { "epoch": 3.02, "learning_rate": 1.4944096137309916e-05, "loss": 0.8729, "step": 6750 }, { "epoch": 3.02, "learning_rate": 1.4943456216890838e-05, "loss": 1.0992, "step": 6760 }, { "epoch": 3.02, "learning_rate": 1.4942812668621623e-05, "loss": 0.6533, "step": 6770 }, { "epoch": 3.02, "learning_rate": 1.4942165492815934e-05, "loss": 1.1681, "step": 6780 }, { "epoch": 3.02, "learning_rate": 1.494151468978919e-05, "loss": 0.8496, "step": 6790 }, { "epoch": 3.02, "learning_rate": 1.4940860259858585e-05, "loss": 1.1699, "step": 6800 }, { "epoch": 3.02, "learning_rate": 1.4940202203343083e-05, "loss": 0.9825, "step": 6810 }, { "epoch": 3.02, "learning_rate": 1.4939540520563411e-05, "loss": 0.9348, "step": 6820 }, { "epoch": 3.02, "learning_rate": 1.4938875211842066e-05, "loss": 0.949, "step": 6830 }, { "epoch": 3.02, "learning_rate": 1.4938206277503313e-05, "loss": 1.1664, "step": 6840 }, { "epoch": 3.02, "learning_rate": 1.4937533717873178e-05, "loss": 0.961, "step": 6850 }, { "epoch": 3.02, "learning_rate": 1.4936857533279463e-05, "loss": 1.007, "step": 6860 }, { "epoch": 3.02, "learning_rate": 1.4936177724051729e-05, "loss": 0.9197, "step": 6870 }, { "epoch": 3.02, "learning_rate": 1.493549429052131e-05, "loss": 0.8877, "step": 6880 }, { "epoch": 3.02, "learning_rate": 1.49348072330213e-05, "loss": 0.9528, "step": 6890 }, { "epoch": 3.02, "learning_rate": 1.4934116551886563e-05, "loss": 0.8209, "step": 6900 }, { "epoch": 3.02, "learning_rate": 1.493342224745373e-05, "loss": 0.8172, "step": 6910 }, { "epoch": 3.02, "learning_rate": 1.4932724320061195e-05, "loss": 1.2846, "step": 6920 }, { "epoch": 3.02, "learning_rate": 1.493202277004912e-05, "loss": 1.0496, "step": 6930 }, { "epoch": 3.02, "learning_rate": 1.4931317597759435e-05, "loss": 1.0846, "step": 6940 }, { "epoch": 3.02, "learning_rate": 1.4930608803535828e-05, "loss": 1.0522, "step": 6950 }, { "epoch": 3.02, "learning_rate": 1.4929896387723756e-05, "loss": 0.9628, "step": 6960 }, { "epoch": 3.02, "learning_rate": 1.4929180350670445e-05, "loss": 1.494, "step": 6970 }, { "epoch": 3.02, "learning_rate": 1.4928460692724883e-05, "loss": 1.061, "step": 6980 }, { "epoch": 3.02, "learning_rate": 1.4927737414237823e-05, "loss": 0.6719, "step": 6990 }, { "epoch": 3.02, "learning_rate": 1.4927010515561777e-05, "loss": 1.0067, "step": 7000 }, { "epoch": 3.02, "learning_rate": 1.4926279997051033e-05, "loss": 0.9501, "step": 7010 }, { "epoch": 3.02, "learning_rate": 1.4925545859061631e-05, "loss": 0.8364, "step": 7020 }, { "epoch": 3.02, "learning_rate": 1.4924808101951386e-05, "loss": 0.792, "step": 7030 }, { "epoch": 3.02, "learning_rate": 1.4924066726079868e-05, "loss": 0.7958, "step": 7040 }, { "epoch": 3.02, "learning_rate": 1.4923321731808417e-05, "loss": 0.9504, "step": 7050 }, { "epoch": 3.02, "learning_rate": 1.4922573119500133e-05, "loss": 0.8696, "step": 7060 }, { "epoch": 3.02, "learning_rate": 1.492182088951988e-05, "loss": 1.0283, "step": 7070 }, { "epoch": 3.02, "learning_rate": 1.492106504223429e-05, "loss": 1.0415, "step": 7080 }, { "epoch": 3.02, "learning_rate": 1.492030557801175e-05, "loss": 0.7804, "step": 7090 }, { "epoch": 3.02, "learning_rate": 1.4919542497222414e-05, "loss": 1.0154, "step": 7100 }, { "epoch": 3.02, "learning_rate": 1.4918775800238197e-05, "loss": 1.1701, "step": 7110 }, { "epoch": 3.02, "learning_rate": 1.4918005487432779e-05, "loss": 0.9983, "step": 7120 }, { "epoch": 3.02, "learning_rate": 1.4917231559181602e-05, "loss": 0.8283, "step": 7130 }, { "epoch": 3.02, "learning_rate": 1.4916454015861869e-05, "loss": 0.8256, "step": 7140 }, { "epoch": 3.02, "learning_rate": 1.4915672857852545e-05, "loss": 0.9813, "step": 7150 }, { "epoch": 3.02, "learning_rate": 1.4914888085534355e-05, "loss": 0.9492, "step": 7160 }, { "epoch": 3.02, "learning_rate": 1.491409969928979e-05, "loss": 0.8943, "step": 7170 }, { "epoch": 3.02, "learning_rate": 1.49133076995031e-05, "loss": 0.9886, "step": 7180 }, { "epoch": 3.02, "learning_rate": 1.4912512086560295e-05, "loss": 1.0587, "step": 7190 }, { "epoch": 3.02, "learning_rate": 1.4911712860849147e-05, "loss": 0.9218, "step": 7200 }, { "epoch": 3.02, "learning_rate": 1.4910910022759188e-05, "loss": 0.9411, "step": 7210 }, { "epoch": 3.02, "learning_rate": 1.4910103572681715e-05, "loss": 0.633, "step": 7220 }, { "epoch": 3.02, "learning_rate": 1.4909293511009777e-05, "loss": 0.7656, "step": 7230 }, { "epoch": 3.02, "learning_rate": 1.4908479838138192e-05, "loss": 1.0236, "step": 7240 }, { "epoch": 3.02, "learning_rate": 1.4907662554463534e-05, "loss": 0.7882, "step": 7250 }, { "epoch": 3.03, "learning_rate": 1.4906841660384135e-05, "loss": 0.703, "step": 7260 }, { "epoch": 3.03, "learning_rate": 1.4906017156300092e-05, "loss": 0.9532, "step": 7270 }, { "epoch": 3.03, "learning_rate": 1.4905189042613253e-05, "loss": 0.8584, "step": 7280 }, { "epoch": 3.03, "learning_rate": 1.4904357319727236e-05, "loss": 0.8987, "step": 7290 }, { "epoch": 3.03, "learning_rate": 1.490352198804741e-05, "loss": 0.7435, "step": 7300 }, { "epoch": 3.03, "learning_rate": 1.4902683047980905e-05, "loss": 0.7606, "step": 7310 }, { "epoch": 3.03, "learning_rate": 1.4901840499936614e-05, "loss": 1.0078, "step": 7320 }, { "epoch": 3.03, "learning_rate": 1.490099434432518e-05, "loss": 1.0276, "step": 7330 }, { "epoch": 3.03, "learning_rate": 1.490014458155901e-05, "loss": 0.9141, "step": 7340 }, { "epoch": 3.03, "learning_rate": 1.4899291212052271e-05, "loss": 0.9084, "step": 7350 }, { "epoch": 3.03, "learning_rate": 1.4898434236220883e-05, "loss": 1.0924, "step": 7360 }, { "epoch": 3.03, "learning_rate": 1.4897573654482523e-05, "loss": 1.0189, "step": 7370 }, { "epoch": 3.03, "learning_rate": 1.4896709467256632e-05, "loss": 0.9006, "step": 7380 }, { "epoch": 3.03, "learning_rate": 1.4895841674964404e-05, "loss": 0.9912, "step": 7390 }, { "epoch": 3.03, "learning_rate": 1.4894970278028787e-05, "loss": 1.0447, "step": 7400 }, { "epoch": 3.03, "learning_rate": 1.4894095276874493e-05, "loss": 0.9935, "step": 7410 }, { "epoch": 3.03, "learning_rate": 1.4893216671927985e-05, "loss": 0.9729, "step": 7420 }, { "epoch": 3.03, "learning_rate": 1.4892334463617486e-05, "loss": 0.926, "step": 7430 }, { "epoch": 3.03, "learning_rate": 1.4891448652372972e-05, "loss": 0.9932, "step": 7440 }, { "epoch": 3.03, "learning_rate": 1.4890559238626177e-05, "loss": 0.8552, "step": 7450 }, { "epoch": 3.03, "learning_rate": 1.488966622281059e-05, "loss": 0.8495, "step": 7460 }, { "epoch": 3.03, "learning_rate": 1.4888769605361456e-05, "loss": 0.7872, "step": 7470 }, { "epoch": 3.03, "learning_rate": 1.4887869386715777e-05, "loss": 1.157, "step": 7480 }, { "epoch": 3.03, "learning_rate": 1.4886965567312308e-05, "loss": 1.1723, "step": 7490 }, { "epoch": 3.03, "learning_rate": 1.488605814759156e-05, "loss": 0.9442, "step": 7500 }, { "epoch": 3.03, "learning_rate": 1.4885147127995799e-05, "loss": 0.8961, "step": 7510 }, { "epoch": 3.03, "learning_rate": 1.4884232508969042e-05, "loss": 1.2466, "step": 7520 }, { "epoch": 3.03, "learning_rate": 1.4883314290957063e-05, "loss": 0.9751, "step": 7530 }, { "epoch": 3.03, "learning_rate": 1.4882392474407394e-05, "loss": 0.6522, "step": 7540 }, { "epoch": 3.03, "learning_rate": 1.4881467059769314e-05, "loss": 1.0721, "step": 7550 }, { "epoch": 3.03, "learning_rate": 1.4880538047493862e-05, "loss": 0.8575, "step": 7560 }, { "epoch": 3.03, "learning_rate": 1.4879605438033825e-05, "loss": 1.1339, "step": 7570 }, { "epoch": 3.03, "learning_rate": 1.4878669231843745e-05, "loss": 0.7888, "step": 7580 }, { "epoch": 3.03, "learning_rate": 1.4877729429379918e-05, "loss": 0.7422, "step": 7590 }, { "epoch": 3.03, "learning_rate": 1.4876786031100395e-05, "loss": 1.1973, "step": 7600 }, { "epoch": 3.03, "learning_rate": 1.4875839037464973e-05, "loss": 1.2434, "step": 7610 }, { "epoch": 3.03, "learning_rate": 1.4874888448935207e-05, "loss": 0.8379, "step": 7620 }, { "epoch": 3.03, "learning_rate": 1.4873934265974401e-05, "loss": 0.7247, "step": 7630 }, { "epoch": 3.03, "learning_rate": 1.4872976489047615e-05, "loss": 0.8807, "step": 7640 }, { "epoch": 3.03, "learning_rate": 1.4872015118621657e-05, "loss": 0.879, "step": 7650 }, { "epoch": 3.03, "learning_rate": 1.4871050155165086e-05, "loss": 1.0314, "step": 7660 }, { "epoch": 3.03, "learning_rate": 1.4870081599148213e-05, "loss": 0.8832, "step": 7670 }, { "epoch": 3.03, "learning_rate": 1.48691094510431e-05, "loss": 1.1437, "step": 7680 }, { "epoch": 3.03, "learning_rate": 1.4868133711323566e-05, "loss": 1.0185, "step": 7690 }, { "epoch": 3.03, "learning_rate": 1.4867154380465167e-05, "loss": 0.963, "step": 7700 }, { "epoch": 3.03, "learning_rate": 1.4866171458945218e-05, "loss": 0.7426, "step": 7710 }, { "epoch": 3.03, "learning_rate": 1.4865184947242789e-05, "loss": 0.733, "step": 7720 }, { "epoch": 3.03, "learning_rate": 1.486419484583869e-05, "loss": 1.0429, "step": 7730 }, { "epoch": 3.03, "learning_rate": 1.486320115521548e-05, "loss": 1.0294, "step": 7740 }, { "epoch": 3.04, "learning_rate": 1.486220387585748e-05, "loss": 0.8187, "step": 7750 }, { "epoch": 3.04, "learning_rate": 1.4861203008250747e-05, "loss": 1.0844, "step": 7760 }, { "epoch": 3.04, "learning_rate": 1.4860198552883092e-05, "loss": 0.6466, "step": 7770 }, { "epoch": 3.04, "learning_rate": 1.4859190510244076e-05, "loss": 1.0297, "step": 7780 }, { "epoch": 3.04, "learning_rate": 1.4858178880825006e-05, "loss": 1.0666, "step": 7790 }, { "epoch": 3.04, "learning_rate": 1.4857163665118937e-05, "loss": 0.8843, "step": 7800 }, { "epoch": 3.04, "learning_rate": 1.4856144863620673e-05, "loss": 0.9252, "step": 7810 }, { "epoch": 3.04, "learning_rate": 1.4855122476826767e-05, "loss": 0.6275, "step": 7820 }, { "epoch": 3.04, "learning_rate": 1.4854096505235517e-05, "loss": 0.797, "step": 7830 }, { "epoch": 3.04, "learning_rate": 1.4853066949346967e-05, "loss": 0.8827, "step": 7840 }, { "epoch": 3.04, "learning_rate": 1.4852033809662914e-05, "loss": 1.0379, "step": 7850 }, { "epoch": 3.04, "learning_rate": 1.4850997086686896e-05, "loss": 1.122, "step": 7860 }, { "epoch": 3.04, "learning_rate": 1.4849956780924197e-05, "loss": 0.8421, "step": 7870 }, { "epoch": 3.04, "learning_rate": 1.4848912892881852e-05, "loss": 0.7973, "step": 7880 }, { "epoch": 3.04, "learning_rate": 1.4847865423068639e-05, "loss": 1.1859, "step": 7890 }, { "epoch": 3.04, "learning_rate": 1.484681437199508e-05, "loss": 0.9623, "step": 7900 }, { "epoch": 3.04, "learning_rate": 1.4845759740173448e-05, "loss": 1.1052, "step": 7910 }, { "epoch": 3.04, "learning_rate": 1.4844701528117756e-05, "loss": 0.8889, "step": 7920 }, { "epoch": 3.04, "learning_rate": 1.4843639736343764e-05, "loss": 0.6552, "step": 7930 }, { "epoch": 3.04, "learning_rate": 1.4842574365368974e-05, "loss": 0.9871, "step": 7940 }, { "epoch": 3.04, "learning_rate": 1.4841505415712638e-05, "loss": 0.7968, "step": 7950 }, { "epoch": 3.04, "learning_rate": 1.4840432887895748e-05, "loss": 1.1069, "step": 7960 }, { "epoch": 3.04, "learning_rate": 1.483935678244104e-05, "loss": 1.0628, "step": 7970 }, { "epoch": 3.04, "learning_rate": 1.4838277099873e-05, "loss": 0.9716, "step": 7980 }, { "epoch": 3.04, "learning_rate": 1.4837193840717842e-05, "loss": 1.1399, "step": 7990 }, { "epoch": 3.04, "learning_rate": 1.4836107005503543e-05, "loss": 0.9048, "step": 8000 }, { "epoch": 3.04, "eval_accuracy": 0.7863157894736842, "eval_f1": 0.7863157894736842, "eval_loss": 0.8679137825965881, "eval_runtime": 746.5646, "eval_samples_per_second": 6.362, "eval_steps_per_second": 1.591, "step": 8000 }, { "epoch": 4.0, "learning_rate": 1.4835016594759808e-05, "loss": 0.9414, "step": 8010 }, { "epoch": 4.0, "learning_rate": 1.4833922609018092e-05, "loss": 0.7321, "step": 8020 }, { "epoch": 4.0, "learning_rate": 1.483282504881159e-05, "loss": 1.1217, "step": 8030 }, { "epoch": 4.0, "learning_rate": 1.483172391467524e-05, "loss": 0.9879, "step": 8040 }, { "epoch": 4.0, "learning_rate": 1.483061920714572e-05, "loss": 0.9518, "step": 8050 }, { "epoch": 4.0, "learning_rate": 1.4829510926761451e-05, "loss": 0.7889, "step": 8060 }, { "epoch": 4.0, "learning_rate": 1.4828399074062598e-05, "loss": 0.757, "step": 8070 }, { "epoch": 4.0, "learning_rate": 1.4827283649591061e-05, "loss": 0.9488, "step": 8080 }, { "epoch": 4.0, "learning_rate": 1.4826164653890486e-05, "loss": 0.7085, "step": 8090 }, { "epoch": 4.0, "learning_rate": 1.482504208750626e-05, "loss": 0.8293, "step": 8100 }, { "epoch": 4.0, "learning_rate": 1.4823915950985504e-05, "loss": 0.9005, "step": 8110 }, { "epoch": 4.0, "learning_rate": 1.4822786244877088e-05, "loss": 1.0684, "step": 8120 }, { "epoch": 4.0, "learning_rate": 1.4821652969731612e-05, "loss": 0.8249, "step": 8130 }, { "epoch": 4.0, "learning_rate": 1.4820516126101424e-05, "loss": 0.8295, "step": 8140 }, { "epoch": 4.0, "learning_rate": 1.4819375714540607e-05, "loss": 1.0305, "step": 8150 }, { "epoch": 4.0, "learning_rate": 1.4818231735604982e-05, "loss": 1.0169, "step": 8160 }, { "epoch": 4.0, "learning_rate": 1.4817084189852114e-05, "loss": 0.9314, "step": 8170 }, { "epoch": 4.0, "learning_rate": 1.48159330778413e-05, "loss": 1.0152, "step": 8180 }, { "epoch": 4.0, "learning_rate": 1.4814778400133578e-05, "loss": 0.8649, "step": 8190 }, { "epoch": 4.0, "learning_rate": 1.4813620157291724e-05, "loss": 0.8508, "step": 8200 }, { "epoch": 4.0, "learning_rate": 1.4812458349880255e-05, "loss": 0.8145, "step": 8210 }, { "epoch": 4.0, "learning_rate": 1.4811292978465416e-05, "loss": 1.0982, "step": 8220 }, { "epoch": 4.0, "learning_rate": 1.48101240436152e-05, "loss": 0.6951, "step": 8230 }, { "epoch": 4.0, "learning_rate": 1.480895154589933e-05, "loss": 0.9137, "step": 8240 }, { "epoch": 4.0, "learning_rate": 1.4807775485889265e-05, "loss": 1.2906, "step": 8250 }, { "epoch": 4.01, "learning_rate": 1.4806595864158203e-05, "loss": 0.8748, "step": 8260 }, { "epoch": 4.01, "learning_rate": 1.4805412681281081e-05, "loss": 1.1232, "step": 8270 }, { "epoch": 4.01, "learning_rate": 1.4804225937834564e-05, "loss": 0.8602, "step": 8280 }, { "epoch": 4.01, "learning_rate": 1.4803035634397058e-05, "loss": 0.8962, "step": 8290 }, { "epoch": 4.01, "learning_rate": 1.4801841771548703e-05, "loss": 0.61, "step": 8300 }, { "epoch": 4.01, "learning_rate": 1.4800644349871372e-05, "loss": 0.8235, "step": 8310 }, { "epoch": 4.01, "learning_rate": 1.479944336994867e-05, "loss": 0.9972, "step": 8320 }, { "epoch": 4.01, "learning_rate": 1.479823883236595e-05, "loss": 0.7722, "step": 8330 }, { "epoch": 4.01, "learning_rate": 1.479703073771028e-05, "loss": 0.9794, "step": 8340 }, { "epoch": 4.01, "learning_rate": 1.4795819086570476e-05, "loss": 1.2318, "step": 8350 }, { "epoch": 4.01, "learning_rate": 1.4794603879537076e-05, "loss": 0.6804, "step": 8360 }, { "epoch": 4.01, "learning_rate": 1.4793385117202365e-05, "loss": 0.7993, "step": 8370 }, { "epoch": 4.01, "learning_rate": 1.4792162800160346e-05, "loss": 0.9275, "step": 8380 }, { "epoch": 4.01, "learning_rate": 1.4790936929006766e-05, "loss": 0.9467, "step": 8390 }, { "epoch": 4.01, "learning_rate": 1.4789707504339098e-05, "loss": 0.8108, "step": 8400 }, { "epoch": 4.01, "learning_rate": 1.4788474526756547e-05, "loss": 0.9307, "step": 8410 }, { "epoch": 4.01, "learning_rate": 1.4787237996860056e-05, "loss": 0.7687, "step": 8420 }, { "epoch": 4.01, "learning_rate": 1.478599791525229e-05, "loss": 0.9544, "step": 8430 }, { "epoch": 4.01, "learning_rate": 1.478475428253765e-05, "loss": 0.968, "step": 8440 }, { "epoch": 4.01, "learning_rate": 1.4783507099322271e-05, "loss": 1.0598, "step": 8450 }, { "epoch": 4.01, "learning_rate": 1.4782256366214012e-05, "loss": 0.7067, "step": 8460 }, { "epoch": 4.01, "learning_rate": 1.4781002083822465e-05, "loss": 0.8091, "step": 8470 }, { "epoch": 4.01, "learning_rate": 1.4779744252758955e-05, "loss": 0.8498, "step": 8480 }, { "epoch": 4.01, "learning_rate": 1.4778482873636531e-05, "loss": 1.1503, "step": 8490 }, { "epoch": 4.01, "learning_rate": 1.4777217947069972e-05, "loss": 0.9158, "step": 8500 }, { "epoch": 4.01, "learning_rate": 1.4775949473675794e-05, "loss": 0.7677, "step": 8510 }, { "epoch": 4.01, "learning_rate": 1.477467745407223e-05, "loss": 0.5724, "step": 8520 }, { "epoch": 4.01, "learning_rate": 1.4773401888879247e-05, "loss": 0.919, "step": 8530 }, { "epoch": 4.01, "learning_rate": 1.4772122778718545e-05, "loss": 0.9545, "step": 8540 }, { "epoch": 4.01, "learning_rate": 1.4770840124213543e-05, "loss": 0.9738, "step": 8550 }, { "epoch": 4.01, "learning_rate": 1.476955392598939e-05, "loss": 0.9716, "step": 8560 }, { "epoch": 4.01, "learning_rate": 1.4768264184672965e-05, "loss": 1.2282, "step": 8570 }, { "epoch": 4.01, "learning_rate": 1.4766970900892873e-05, "loss": 0.8605, "step": 8580 }, { "epoch": 4.01, "learning_rate": 1.4765674075279444e-05, "loss": 0.9213, "step": 8590 }, { "epoch": 4.01, "learning_rate": 1.4764373708464733e-05, "loss": 0.7695, "step": 8600 }, { "epoch": 4.01, "learning_rate": 1.4763069801082527e-05, "loss": 0.6323, "step": 8610 }, { "epoch": 4.01, "learning_rate": 1.4761762353768333e-05, "loss": 0.9472, "step": 8620 }, { "epoch": 4.01, "learning_rate": 1.4760451367159385e-05, "loss": 0.7674, "step": 8630 }, { "epoch": 4.01, "learning_rate": 1.475913684189464e-05, "loss": 0.6949, "step": 8640 }, { "epoch": 4.01, "learning_rate": 1.4757818778614786e-05, "loss": 1.1363, "step": 8650 }, { "epoch": 4.01, "learning_rate": 1.4756497177962224e-05, "loss": 0.9669, "step": 8660 }, { "epoch": 4.01, "learning_rate": 1.4755172040581093e-05, "loss": 0.7092, "step": 8670 }, { "epoch": 4.01, "learning_rate": 1.4753843367117248e-05, "loss": 0.937, "step": 8680 }, { "epoch": 4.01, "learning_rate": 1.4752511158218263e-05, "loss": 1.281, "step": 8690 }, { "epoch": 4.01, "learning_rate": 1.4751175414533447e-05, "loss": 0.8582, "step": 8700 }, { "epoch": 4.01, "learning_rate": 1.4749836136713819e-05, "loss": 1.0818, "step": 8710 }, { "epoch": 4.01, "learning_rate": 1.4748493325412132e-05, "loss": 1.1468, "step": 8720 }, { "epoch": 4.01, "learning_rate": 1.4747146981282854e-05, "loss": 0.9654, "step": 8730 }, { "epoch": 4.01, "learning_rate": 1.4745797104982177e-05, "loss": 0.9186, "step": 8740 }, { "epoch": 4.01, "learning_rate": 1.4744443697168013e-05, "loss": 0.9825, "step": 8750 }, { "epoch": 4.02, "learning_rate": 1.4743086758499996e-05, "loss": 0.7791, "step": 8760 }, { "epoch": 4.02, "learning_rate": 1.4741726289639485e-05, "loss": 0.8682, "step": 8770 }, { "epoch": 4.02, "learning_rate": 1.4740362291249555e-05, "loss": 1.0464, "step": 8780 }, { "epoch": 4.02, "learning_rate": 1.4738994763995e-05, "loss": 0.7383, "step": 8790 }, { "epoch": 4.02, "learning_rate": 1.4737623708542336e-05, "loss": 0.9431, "step": 8800 }, { "epoch": 4.02, "learning_rate": 1.47362491255598e-05, "loss": 0.8399, "step": 8810 }, { "epoch": 4.02, "learning_rate": 1.473487101571735e-05, "loss": 0.7528, "step": 8820 }, { "epoch": 4.02, "learning_rate": 1.4733489379686654e-05, "loss": 1.0939, "step": 8830 }, { "epoch": 4.02, "learning_rate": 1.473210421814111e-05, "loss": 1.1854, "step": 8840 }, { "epoch": 4.02, "learning_rate": 1.4730715531755826e-05, "loss": 0.8147, "step": 8850 }, { "epoch": 4.02, "learning_rate": 1.472932332120763e-05, "loss": 0.9854, "step": 8860 }, { "epoch": 4.02, "learning_rate": 1.4727927587175074e-05, "loss": 0.9375, "step": 8870 }, { "epoch": 4.02, "learning_rate": 1.4726528330338416e-05, "loss": 1.1204, "step": 8880 }, { "epoch": 4.02, "learning_rate": 1.4725125551379637e-05, "loss": 0.7848, "step": 8890 }, { "epoch": 4.02, "learning_rate": 1.4723719250982437e-05, "loss": 0.937, "step": 8900 }, { "epoch": 4.02, "learning_rate": 1.4722309429832228e-05, "loss": 0.6796, "step": 8910 }, { "epoch": 4.02, "learning_rate": 1.4720896088616142e-05, "loss": 0.6637, "step": 8920 }, { "epoch": 4.02, "learning_rate": 1.4719479228023022e-05, "loss": 0.7359, "step": 8930 }, { "epoch": 4.02, "learning_rate": 1.471805884874343e-05, "loss": 0.8415, "step": 8940 }, { "epoch": 4.02, "learning_rate": 1.471663495146964e-05, "loss": 0.8642, "step": 8950 }, { "epoch": 4.02, "learning_rate": 1.4715207536895644e-05, "loss": 1.1598, "step": 8960 }, { "epoch": 4.02, "learning_rate": 1.4713776605717146e-05, "loss": 0.9631, "step": 8970 }, { "epoch": 4.02, "learning_rate": 1.4712342158631564e-05, "loss": 1.028, "step": 8980 }, { "epoch": 4.02, "learning_rate": 1.4710904196338032e-05, "loss": 0.9079, "step": 8990 }, { "epoch": 4.02, "learning_rate": 1.4709462719537392e-05, "loss": 0.7848, "step": 9000 }, { "epoch": 4.02, "learning_rate": 1.4708017728932204e-05, "loss": 0.7513, "step": 9010 }, { "epoch": 4.02, "learning_rate": 1.4706569225226741e-05, "loss": 1.0347, "step": 9020 }, { "epoch": 4.02, "learning_rate": 1.470511720912698e-05, "loss": 0.9487, "step": 9030 }, { "epoch": 4.02, "learning_rate": 1.4703661681340624e-05, "loss": 0.649, "step": 9040 }, { "epoch": 4.02, "learning_rate": 1.4702202642577073e-05, "loss": 0.8312, "step": 9050 }, { "epoch": 4.02, "learning_rate": 1.470074009354745e-05, "loss": 0.883, "step": 9060 }, { "epoch": 4.02, "learning_rate": 1.4699274034964577e-05, "loss": 0.635, "step": 9070 }, { "epoch": 4.02, "learning_rate": 1.4697804467543001e-05, "loss": 1.1506, "step": 9080 }, { "epoch": 4.02, "learning_rate": 1.4696331391998966e-05, "loss": 1.3274, "step": 9090 }, { "epoch": 4.02, "learning_rate": 1.4694854809050431e-05, "loss": 0.4753, "step": 9100 }, { "epoch": 4.02, "learning_rate": 1.4693374719417069e-05, "loss": 0.9106, "step": 9110 }, { "epoch": 4.02, "learning_rate": 1.4691891123820253e-05, "loss": 1.1933, "step": 9120 }, { "epoch": 4.02, "learning_rate": 1.469040402298307e-05, "loss": 1.4015, "step": 9130 }, { "epoch": 4.02, "learning_rate": 1.468891341763032e-05, "loss": 0.9833, "step": 9140 }, { "epoch": 4.02, "learning_rate": 1.46874193084885e-05, "loss": 1.1419, "step": 9150 }, { "epoch": 4.02, "learning_rate": 1.4685921696285823e-05, "loss": 0.7536, "step": 9160 }, { "epoch": 4.02, "learning_rate": 1.4684420581752207e-05, "loss": 0.9597, "step": 9170 }, { "epoch": 4.02, "learning_rate": 1.4682915965619275e-05, "loss": 1.0005, "step": 9180 }, { "epoch": 4.02, "learning_rate": 1.4681407848620362e-05, "loss": 1.1734, "step": 9190 }, { "epoch": 4.02, "learning_rate": 1.4679896231490503e-05, "loss": 0.9856, "step": 9200 }, { "epoch": 4.02, "learning_rate": 1.4678381114966447e-05, "loss": 1.2223, "step": 9210 }, { "epoch": 4.02, "learning_rate": 1.4676862499786637e-05, "loss": 0.8492, "step": 9220 }, { "epoch": 4.02, "learning_rate": 1.467534038669123e-05, "loss": 0.8826, "step": 9230 }, { "epoch": 4.02, "learning_rate": 1.4673814776422084e-05, "loss": 0.5519, "step": 9240 }, { "epoch": 4.03, "learning_rate": 1.4672285669722767e-05, "loss": 1.0553, "step": 9250 }, { "epoch": 4.03, "learning_rate": 1.4670753067338543e-05, "loss": 1.1525, "step": 9260 }, { "epoch": 4.03, "learning_rate": 1.4669216970016385e-05, "loss": 1.0257, "step": 9270 }, { "epoch": 4.03, "learning_rate": 1.466767737850497e-05, "loss": 0.8463, "step": 9280 }, { "epoch": 4.03, "learning_rate": 1.4666134293554673e-05, "loss": 0.6491, "step": 9290 }, { "epoch": 4.03, "learning_rate": 1.4664587715917576e-05, "loss": 0.7512, "step": 9300 }, { "epoch": 4.03, "learning_rate": 1.4663037646347467e-05, "loss": 0.7006, "step": 9310 }, { "epoch": 4.03, "learning_rate": 1.4661484085599823e-05, "loss": 0.9646, "step": 9320 }, { "epoch": 4.03, "learning_rate": 1.4659927034431834e-05, "loss": 0.8665, "step": 9330 }, { "epoch": 4.03, "learning_rate": 1.465836649360239e-05, "loss": 0.5898, "step": 9340 }, { "epoch": 4.03, "learning_rate": 1.4656802463872076e-05, "loss": 0.6902, "step": 9350 }, { "epoch": 4.03, "learning_rate": 1.4655234946003185e-05, "loss": 0.8633, "step": 9360 }, { "epoch": 4.03, "learning_rate": 1.4653663940759703e-05, "loss": 0.8003, "step": 9370 }, { "epoch": 4.03, "learning_rate": 1.465208944890732e-05, "loss": 0.6989, "step": 9380 }, { "epoch": 4.03, "learning_rate": 1.4650511471213424e-05, "loss": 1.0922, "step": 9390 }, { "epoch": 4.03, "learning_rate": 1.4648930008447102e-05, "loss": 0.9699, "step": 9400 }, { "epoch": 4.03, "learning_rate": 1.4647345061379142e-05, "loss": 0.894, "step": 9410 }, { "epoch": 4.03, "learning_rate": 1.4645756630782025e-05, "loss": 0.8662, "step": 9420 }, { "epoch": 4.03, "learning_rate": 1.4644164717429931e-05, "loss": 0.7388, "step": 9430 }, { "epoch": 4.03, "learning_rate": 1.4642569322098747e-05, "loss": 0.5905, "step": 9440 }, { "epoch": 4.03, "learning_rate": 1.464097044556604e-05, "loss": 0.8467, "step": 9450 }, { "epoch": 4.03, "learning_rate": 1.463936808861109e-05, "loss": 1.0525, "step": 9460 }, { "epoch": 4.03, "learning_rate": 1.4637762252014863e-05, "loss": 0.8764, "step": 9470 }, { "epoch": 4.03, "learning_rate": 1.4636152936560023e-05, "loss": 0.7288, "step": 9480 }, { "epoch": 4.03, "learning_rate": 1.4634540143030935e-05, "loss": 0.6504, "step": 9490 }, { "epoch": 4.03, "learning_rate": 1.4632923872213653e-05, "loss": 0.8992, "step": 9500 }, { "epoch": 4.03, "learning_rate": 1.4631304124895924e-05, "loss": 0.652, "step": 9510 }, { "epoch": 4.03, "learning_rate": 1.46296809018672e-05, "loss": 0.9174, "step": 9520 }, { "epoch": 4.03, "learning_rate": 1.4628054203918615e-05, "loss": 1.2839, "step": 9530 }, { "epoch": 4.03, "learning_rate": 1.4626424031843006e-05, "loss": 0.9751, "step": 9540 }, { "epoch": 4.03, "learning_rate": 1.4624790386434893e-05, "loss": 0.8526, "step": 9550 }, { "epoch": 4.03, "learning_rate": 1.4623153268490502e-05, "loss": 0.9823, "step": 9560 }, { "epoch": 4.03, "learning_rate": 1.4621512678807738e-05, "loss": 1.1091, "step": 9570 }, { "epoch": 4.03, "learning_rate": 1.461986861818621e-05, "loss": 0.8197, "step": 9580 }, { "epoch": 4.03, "learning_rate": 1.461822108742721e-05, "loss": 0.6894, "step": 9590 }, { "epoch": 4.03, "learning_rate": 1.4616570087333725e-05, "loss": 1.047, "step": 9600 }, { "epoch": 4.03, "learning_rate": 1.4614915618710431e-05, "loss": 0.8924, "step": 9610 }, { "epoch": 4.03, "learning_rate": 1.46132576823637e-05, "loss": 0.8171, "step": 9620 }, { "epoch": 4.03, "learning_rate": 1.4611596279101584e-05, "loss": 0.9181, "step": 9630 }, { "epoch": 4.03, "learning_rate": 1.4609931409733837e-05, "loss": 0.8818, "step": 9640 }, { "epoch": 4.03, "learning_rate": 1.4608263075071894e-05, "loss": 0.8118, "step": 9650 }, { "epoch": 4.03, "learning_rate": 1.4606591275928879e-05, "loss": 0.9907, "step": 9660 }, { "epoch": 4.03, "learning_rate": 1.4604916013119607e-05, "loss": 0.9498, "step": 9670 }, { "epoch": 4.03, "learning_rate": 1.4603237287460582e-05, "loss": 0.7819, "step": 9680 }, { "epoch": 4.03, "learning_rate": 1.4601555099769994e-05, "loss": 0.6889, "step": 9690 }, { "epoch": 4.03, "learning_rate": 1.4599869450867724e-05, "loss": 0.7433, "step": 9700 }, { "epoch": 4.03, "learning_rate": 1.4598180341575332e-05, "loss": 0.8066, "step": 9710 }, { "epoch": 4.03, "learning_rate": 1.459648777271607e-05, "loss": 0.7443, "step": 9720 }, { "epoch": 4.03, "learning_rate": 1.4594791745114878e-05, "loss": 0.6299, "step": 9730 }, { "epoch": 4.03, "learning_rate": 1.4593092259598375e-05, "loss": 1.0822, "step": 9740 }, { "epoch": 4.04, "learning_rate": 1.4591389316994878e-05, "loss": 0.8876, "step": 9750 }, { "epoch": 4.04, "learning_rate": 1.4589682918134371e-05, "loss": 1.0569, "step": 9760 }, { "epoch": 4.04, "learning_rate": 1.4587973063848537e-05, "loss": 0.8454, "step": 9770 }, { "epoch": 4.04, "learning_rate": 1.4586259754970738e-05, "loss": 0.687, "step": 9780 }, { "epoch": 4.04, "learning_rate": 1.4584542992336017e-05, "loss": 1.2345, "step": 9790 }, { "epoch": 4.04, "learning_rate": 1.4582822776781108e-05, "loss": 0.6698, "step": 9800 }, { "epoch": 4.04, "learning_rate": 1.4581099109144421e-05, "loss": 0.6466, "step": 9810 }, { "epoch": 4.04, "learning_rate": 1.457937199026605e-05, "loss": 1.2157, "step": 9820 }, { "epoch": 4.04, "learning_rate": 1.457764142098777e-05, "loss": 0.9521, "step": 9830 }, { "epoch": 4.04, "learning_rate": 1.4575907402153044e-05, "loss": 1.0293, "step": 9840 }, { "epoch": 4.04, "learning_rate": 1.4574169934607006e-05, "loss": 0.9327, "step": 9850 }, { "epoch": 4.04, "learning_rate": 1.4572429019196484e-05, "loss": 0.7715, "step": 9860 }, { "epoch": 4.04, "learning_rate": 1.4570684656769973e-05, "loss": 1.1282, "step": 9870 }, { "epoch": 4.04, "learning_rate": 1.4568936848177657e-05, "loss": 0.9545, "step": 9880 }, { "epoch": 4.04, "learning_rate": 1.4567185594271393e-05, "loss": 0.7993, "step": 9890 }, { "epoch": 4.04, "learning_rate": 1.4565430895904725e-05, "loss": 0.8394, "step": 9900 }, { "epoch": 4.04, "learning_rate": 1.456367275393287e-05, "loss": 0.6563, "step": 9910 }, { "epoch": 4.04, "learning_rate": 1.4561911169212726e-05, "loss": 0.7107, "step": 9920 }, { "epoch": 4.04, "learning_rate": 1.4560146142602868e-05, "loss": 1.2381, "step": 9930 }, { "epoch": 4.04, "learning_rate": 1.4558377674963545e-05, "loss": 0.6681, "step": 9940 }, { "epoch": 4.04, "learning_rate": 1.4556605767156693e-05, "loss": 1.149, "step": 9950 }, { "epoch": 4.04, "learning_rate": 1.455483042004591e-05, "loss": 0.9799, "step": 9960 }, { "epoch": 4.04, "learning_rate": 1.4553051634496486e-05, "loss": 0.9604, "step": 9970 }, { "epoch": 4.04, "learning_rate": 1.4551269411375377e-05, "loss": 0.681, "step": 9980 }, { "epoch": 4.04, "learning_rate": 1.4549483751551216e-05, "loss": 0.8368, "step": 9990 }, { "epoch": 4.04, "learning_rate": 1.4547694655894313e-05, "loss": 0.7977, "step": 10000 }, { "epoch": 4.04, "eval_accuracy": 0.7810526315789473, "eval_f1": 0.7810526315789474, "eval_loss": 0.8845712542533875, "eval_runtime": 758.7878, "eval_samples_per_second": 6.26, "eval_steps_per_second": 1.566, "step": 10000 }, { "epoch": 5.0, "learning_rate": 1.4545902125276652e-05, "loss": 1.0388, "step": 10010 }, { "epoch": 5.0, "learning_rate": 1.4544106160571887e-05, "loss": 0.9292, "step": 10020 }, { "epoch": 5.0, "learning_rate": 1.4542306762655355e-05, "loss": 0.897, "step": 10030 }, { "epoch": 5.0, "learning_rate": 1.4540503932404057e-05, "loss": 1.0358, "step": 10040 }, { "epoch": 5.0, "learning_rate": 1.453869767069667e-05, "loss": 1.0391, "step": 10050 }, { "epoch": 5.0, "learning_rate": 1.4536887978413547e-05, "loss": 0.7696, "step": 10060 }, { "epoch": 5.0, "learning_rate": 1.4535074856436707e-05, "loss": 0.6376, "step": 10070 }, { "epoch": 5.0, "learning_rate": 1.4533258305649845e-05, "loss": 0.8613, "step": 10080 }, { "epoch": 5.0, "learning_rate": 1.4531438326938328e-05, "loss": 0.8559, "step": 10090 }, { "epoch": 5.0, "learning_rate": 1.4529614921189187e-05, "loss": 0.6378, "step": 10100 }, { "epoch": 5.0, "learning_rate": 1.452778808929113e-05, "loss": 0.6774, "step": 10110 }, { "epoch": 5.0, "learning_rate": 1.4525957832134532e-05, "loss": 0.6826, "step": 10120 }, { "epoch": 5.0, "learning_rate": 1.4524124150611443e-05, "loss": 0.7114, "step": 10130 }, { "epoch": 5.0, "learning_rate": 1.452228704561557e-05, "loss": 0.8111, "step": 10140 }, { "epoch": 5.0, "learning_rate": 1.4520446518042301e-05, "loss": 0.8248, "step": 10150 }, { "epoch": 5.0, "learning_rate": 1.4518602568788683e-05, "loss": 0.8909, "step": 10160 }, { "epoch": 5.0, "learning_rate": 1.4516755198753435e-05, "loss": 1.0263, "step": 10170 }, { "epoch": 5.0, "learning_rate": 1.4514904408836944e-05, "loss": 0.7914, "step": 10180 }, { "epoch": 5.0, "learning_rate": 1.4513050199941261e-05, "loss": 0.9401, "step": 10190 }, { "epoch": 5.0, "learning_rate": 1.4511192572970108e-05, "loss": 0.7783, "step": 10200 }, { "epoch": 5.0, "learning_rate": 1.4509331528828868e-05, "loss": 0.6487, "step": 10210 }, { "epoch": 5.0, "learning_rate": 1.4507467068424591e-05, "loss": 0.7832, "step": 10220 }, { "epoch": 5.0, "learning_rate": 1.4505599192665993e-05, "loss": 0.7375, "step": 10230 }, { "epoch": 5.0, "learning_rate": 1.4503727902463451e-05, "loss": 0.8463, "step": 10240 }, { "epoch": 5.0, "learning_rate": 1.4501853198729012e-05, "loss": 0.7524, "step": 10250 }, { "epoch": 5.01, "learning_rate": 1.4499975082376387e-05, "loss": 0.5843, "step": 10260 }, { "epoch": 5.01, "learning_rate": 1.449809355432094e-05, "loss": 1.342, "step": 10270 }, { "epoch": 5.01, "learning_rate": 1.449620861547971e-05, "loss": 0.8964, "step": 10280 }, { "epoch": 5.01, "learning_rate": 1.4494320266771391e-05, "loss": 0.8378, "step": 10290 }, { "epoch": 5.01, "learning_rate": 1.4492428509116341e-05, "loss": 0.8489, "step": 10300 }, { "epoch": 5.01, "learning_rate": 1.4490533343436581e-05, "loss": 0.5495, "step": 10310 }, { "epoch": 5.01, "learning_rate": 1.4488634770655793e-05, "loss": 0.9422, "step": 10320 }, { "epoch": 5.01, "learning_rate": 1.4486732791699318e-05, "loss": 1.0297, "step": 10330 }, { "epoch": 5.01, "learning_rate": 1.4484827407494154e-05, "loss": 0.8256, "step": 10340 }, { "epoch": 5.01, "learning_rate": 1.4482918618968963e-05, "loss": 0.5982, "step": 10350 }, { "epoch": 5.01, "learning_rate": 1.4481006427054067e-05, "loss": 0.7247, "step": 10360 }, { "epoch": 5.01, "learning_rate": 1.4479090832681445e-05, "loss": 1.0779, "step": 10370 }, { "epoch": 5.01, "learning_rate": 1.4477171836784736e-05, "loss": 0.7428, "step": 10380 }, { "epoch": 5.01, "learning_rate": 1.4475249440299231e-05, "loss": 0.6429, "step": 10390 }, { "epoch": 5.01, "learning_rate": 1.4473323644161886e-05, "loss": 0.557, "step": 10400 }, { "epoch": 5.01, "learning_rate": 1.4471394449311309e-05, "loss": 0.7514, "step": 10410 }, { "epoch": 5.01, "learning_rate": 1.4469461856687769e-05, "loss": 1.0773, "step": 10420 }, { "epoch": 5.01, "learning_rate": 1.4467525867233184e-05, "loss": 1.1142, "step": 10430 }, { "epoch": 5.01, "learning_rate": 1.4465586481891134e-05, "loss": 0.8702, "step": 10440 }, { "epoch": 5.01, "learning_rate": 1.4463643701606852e-05, "loss": 1.0078, "step": 10450 }, { "epoch": 5.01, "learning_rate": 1.4461697527327225e-05, "loss": 0.6047, "step": 10460 }, { "epoch": 5.01, "learning_rate": 1.4459747960000795e-05, "loss": 0.9054, "step": 10470 }, { "epoch": 5.01, "learning_rate": 1.4457795000577756e-05, "loss": 0.8874, "step": 10480 }, { "epoch": 5.01, "learning_rate": 1.4455838650009962e-05, "loss": 1.1414, "step": 10490 }, { "epoch": 5.01, "learning_rate": 1.4453878909250906e-05, "loss": 0.8461, "step": 10500 }, { "epoch": 5.01, "learning_rate": 1.4451915779255748e-05, "loss": 1.2347, "step": 10510 }, { "epoch": 5.01, "learning_rate": 1.4449949260981291e-05, "loss": 0.9733, "step": 10520 }, { "epoch": 5.01, "learning_rate": 1.4447979355385994e-05, "loss": 0.7622, "step": 10530 }, { "epoch": 5.01, "learning_rate": 1.4446006063429966e-05, "loss": 1.0488, "step": 10540 }, { "epoch": 5.01, "learning_rate": 1.4444029386074961e-05, "loss": 0.6356, "step": 10550 }, { "epoch": 5.01, "learning_rate": 1.4442049324284393e-05, "loss": 0.9436, "step": 10560 }, { "epoch": 5.01, "learning_rate": 1.4440065879023313e-05, "loss": 0.9486, "step": 10570 }, { "epoch": 5.01, "learning_rate": 1.4438079051258435e-05, "loss": 1.1029, "step": 10580 }, { "epoch": 5.01, "learning_rate": 1.4436088841958113e-05, "loss": 1.1475, "step": 10590 }, { "epoch": 5.01, "learning_rate": 1.443409525209235e-05, "loss": 0.7903, "step": 10600 }, { "epoch": 5.01, "learning_rate": 1.4432098282632795e-05, "loss": 0.6607, "step": 10610 }, { "epoch": 5.01, "learning_rate": 1.4430097934552751e-05, "loss": 0.6229, "step": 10620 }, { "epoch": 5.01, "learning_rate": 1.4428094208827161e-05, "loss": 1.006, "step": 10630 }, { "epoch": 5.01, "learning_rate": 1.4426087106432617e-05, "loss": 0.7741, "step": 10640 }, { "epoch": 5.01, "learning_rate": 1.4424076628347357e-05, "loss": 0.5454, "step": 10650 }, { "epoch": 5.01, "learning_rate": 1.4422062775551262e-05, "loss": 0.7863, "step": 10660 }, { "epoch": 5.01, "learning_rate": 1.4420045549025862e-05, "loss": 0.977, "step": 10670 }, { "epoch": 5.01, "learning_rate": 1.4418024949754326e-05, "loss": 0.956, "step": 10680 }, { "epoch": 5.01, "learning_rate": 1.441600097872147e-05, "loss": 0.9236, "step": 10690 }, { "epoch": 5.01, "learning_rate": 1.4413973636913754e-05, "loss": 0.9959, "step": 10700 }, { "epoch": 5.01, "learning_rate": 1.441194292531928e-05, "loss": 1.2392, "step": 10710 }, { "epoch": 5.01, "learning_rate": 1.4409908844927792e-05, "loss": 0.6514, "step": 10720 }, { "epoch": 5.01, "learning_rate": 1.4407871396730672e-05, "loss": 0.9126, "step": 10730 }, { "epoch": 5.01, "learning_rate": 1.4405830581720953e-05, "loss": 0.9703, "step": 10740 }, { "epoch": 5.01, "learning_rate": 1.4403786400893304e-05, "loss": 0.6085, "step": 10750 }, { "epoch": 5.02, "learning_rate": 1.4401738855244029e-05, "loss": 0.7244, "step": 10760 }, { "epoch": 5.02, "learning_rate": 1.439968794577108e-05, "loss": 0.8882, "step": 10770 }, { "epoch": 5.02, "learning_rate": 1.4397633673474042e-05, "loss": 0.584, "step": 10780 }, { "epoch": 5.02, "learning_rate": 1.4395576039354148e-05, "loss": 1.2507, "step": 10790 }, { "epoch": 5.02, "learning_rate": 1.4393515044414259e-05, "loss": 0.8442, "step": 10800 }, { "epoch": 5.02, "learning_rate": 1.439145068965888e-05, "loss": 1.0842, "step": 10810 }, { "epoch": 5.02, "learning_rate": 1.4389382976094155e-05, "loss": 0.9413, "step": 10820 }, { "epoch": 5.02, "learning_rate": 1.438731190472786e-05, "loss": 0.655, "step": 10830 }, { "epoch": 5.02, "learning_rate": 1.438523747656941e-05, "loss": 0.8029, "step": 10840 }, { "epoch": 5.02, "learning_rate": 1.4383159692629858e-05, "loss": 0.5417, "step": 10850 }, { "epoch": 5.02, "learning_rate": 1.4381078553921888e-05, "loss": 1.0177, "step": 10860 }, { "epoch": 5.02, "learning_rate": 1.4378994061459826e-05, "loss": 0.9567, "step": 10870 }, { "epoch": 5.02, "learning_rate": 1.4376906216259623e-05, "loss": 0.7705, "step": 10880 }, { "epoch": 5.02, "learning_rate": 1.4374815019338873e-05, "loss": 0.6263, "step": 10890 }, { "epoch": 5.02, "learning_rate": 1.4372720471716797e-05, "loss": 0.5253, "step": 10900 }, { "epoch": 5.02, "learning_rate": 1.4370622574414254e-05, "loss": 0.8735, "step": 10910 }, { "epoch": 5.02, "learning_rate": 1.4368521328453736e-05, "loss": 0.916, "step": 10920 }, { "epoch": 5.02, "learning_rate": 1.4366416734859362e-05, "loss": 0.8634, "step": 10930 }, { "epoch": 5.02, "learning_rate": 1.4364308794656881e-05, "loss": 0.7812, "step": 10940 }, { "epoch": 5.02, "learning_rate": 1.4362197508873688e-05, "loss": 0.7478, "step": 10950 }, { "epoch": 5.02, "learning_rate": 1.4360082878538787e-05, "loss": 0.7418, "step": 10960 }, { "epoch": 5.02, "learning_rate": 1.4357964904682832e-05, "loss": 0.6585, "step": 10970 }, { "epoch": 5.02, "learning_rate": 1.4355843588338092e-05, "loss": 0.8206, "step": 10980 }, { "epoch": 5.02, "learning_rate": 1.4353718930538473e-05, "loss": 0.6967, "step": 10990 }, { "epoch": 5.02, "learning_rate": 1.4351590932319506e-05, "loss": 0.8266, "step": 11000 }, { "epoch": 5.02, "learning_rate": 1.4349459594718354e-05, "loss": 0.8005, "step": 11010 }, { "epoch": 5.02, "learning_rate": 1.4347324918773805e-05, "loss": 0.9922, "step": 11020 }, { "epoch": 5.02, "learning_rate": 1.4345186905526272e-05, "loss": 0.9101, "step": 11030 }, { "epoch": 5.02, "learning_rate": 1.4343045556017798e-05, "loss": 0.8269, "step": 11040 }, { "epoch": 5.02, "learning_rate": 1.4340900871292047e-05, "loss": 1.0536, "step": 11050 }, { "epoch": 5.02, "learning_rate": 1.433875285239432e-05, "loss": 0.8667, "step": 11060 }, { "epoch": 5.02, "learning_rate": 1.4336601500371527e-05, "loss": 0.8843, "step": 11070 }, { "epoch": 5.02, "learning_rate": 1.4334446816272218e-05, "loss": 0.7773, "step": 11080 }, { "epoch": 5.02, "learning_rate": 1.4332288801146554e-05, "loss": 1.0272, "step": 11090 }, { "epoch": 5.02, "learning_rate": 1.4330127456046328e-05, "loss": 0.739, "step": 11100 }, { "epoch": 5.02, "learning_rate": 1.4327962782024956e-05, "loss": 0.7335, "step": 11110 }, { "epoch": 5.02, "learning_rate": 1.4325794780137468e-05, "loss": 0.7412, "step": 11120 }, { "epoch": 5.02, "learning_rate": 1.4323623451440525e-05, "loss": 1.011, "step": 11130 }, { "epoch": 5.02, "learning_rate": 1.4321448796992409e-05, "loss": 0.9723, "step": 11140 }, { "epoch": 5.02, "learning_rate": 1.4319270817853014e-05, "loss": 1.0028, "step": 11150 }, { "epoch": 5.02, "learning_rate": 1.4317089515083866e-05, "loss": 0.7306, "step": 11160 }, { "epoch": 5.02, "learning_rate": 1.4314904889748102e-05, "loss": 0.9448, "step": 11170 }, { "epoch": 5.02, "learning_rate": 1.4312716942910483e-05, "loss": 0.617, "step": 11180 }, { "epoch": 5.02, "learning_rate": 1.4310525675637389e-05, "loss": 0.7878, "step": 11190 }, { "epoch": 5.02, "learning_rate": 1.4308331088996816e-05, "loss": 1.0331, "step": 11200 }, { "epoch": 5.02, "learning_rate": 1.4306133184058378e-05, "loss": 0.6705, "step": 11210 }, { "epoch": 5.02, "learning_rate": 1.4303931961893309e-05, "loss": 0.7288, "step": 11220 }, { "epoch": 5.02, "learning_rate": 1.4301727423574453e-05, "loss": 1.2383, "step": 11230 }, { "epoch": 5.02, "learning_rate": 1.4299519570176284e-05, "loss": 0.7154, "step": 11240 }, { "epoch": 5.03, "learning_rate": 1.4297308402774876e-05, "loss": 0.6404, "step": 11250 }, { "epoch": 5.03, "learning_rate": 1.4295093922447927e-05, "loss": 0.9904, "step": 11260 }, { "epoch": 5.03, "learning_rate": 1.4292876130274747e-05, "loss": 1.0343, "step": 11270 }, { "epoch": 5.03, "learning_rate": 1.4290655027336264e-05, "loss": 0.7563, "step": 11280 }, { "epoch": 5.03, "learning_rate": 1.428843061471501e-05, "loss": 1.0234, "step": 11290 }, { "epoch": 5.03, "learning_rate": 1.4286202893495147e-05, "loss": 0.6342, "step": 11300 }, { "epoch": 5.03, "learning_rate": 1.428397186476243e-05, "loss": 0.7474, "step": 11310 }, { "epoch": 5.03, "learning_rate": 1.428173752960424e-05, "loss": 0.7082, "step": 11320 }, { "epoch": 5.03, "learning_rate": 1.4279499889109563e-05, "loss": 0.9103, "step": 11330 }, { "epoch": 5.03, "learning_rate": 1.4277258944369001e-05, "loss": 0.6227, "step": 11340 }, { "epoch": 5.03, "learning_rate": 1.4275014696474758e-05, "loss": 0.5647, "step": 11350 }, { "epoch": 5.03, "learning_rate": 1.4272767146520655e-05, "loss": 0.9027, "step": 11360 }, { "epoch": 5.03, "learning_rate": 1.4270516295602122e-05, "loss": 0.7919, "step": 11370 }, { "epoch": 5.03, "learning_rate": 1.4268262144816196e-05, "loss": 0.831, "step": 11380 }, { "epoch": 5.03, "learning_rate": 1.426600469526152e-05, "loss": 0.7439, "step": 11390 }, { "epoch": 5.03, "learning_rate": 1.4263743948038355e-05, "loss": 0.9817, "step": 11400 }, { "epoch": 5.03, "learning_rate": 1.4261479904248552e-05, "loss": 0.6809, "step": 11410 }, { "epoch": 5.03, "learning_rate": 1.4259212564995586e-05, "loss": 0.852, "step": 11420 }, { "epoch": 5.03, "learning_rate": 1.4256941931384526e-05, "loss": 0.6747, "step": 11430 }, { "epoch": 5.03, "learning_rate": 1.4254668004522053e-05, "loss": 1.0233, "step": 11440 }, { "epoch": 5.03, "learning_rate": 1.4252390785516453e-05, "loss": 0.9007, "step": 11450 }, { "epoch": 5.03, "learning_rate": 1.4250110275477612e-05, "loss": 0.9747, "step": 11460 }, { "epoch": 5.03, "learning_rate": 1.4247826475517023e-05, "loss": 1.2008, "step": 11470 }, { "epoch": 5.03, "learning_rate": 1.4245539386747784e-05, "loss": 0.782, "step": 11480 }, { "epoch": 5.03, "learning_rate": 1.4243249010284593e-05, "loss": 0.6924, "step": 11490 }, { "epoch": 5.03, "learning_rate": 1.4240955347243754e-05, "loss": 0.8578, "step": 11500 }, { "epoch": 5.03, "learning_rate": 1.4238658398743167e-05, "loss": 0.6872, "step": 11510 }, { "epoch": 5.03, "learning_rate": 1.4236358165902338e-05, "loss": 0.5779, "step": 11520 }, { "epoch": 5.03, "learning_rate": 1.4234054649842377e-05, "loss": 0.8016, "step": 11530 }, { "epoch": 5.03, "learning_rate": 1.4231747851685982e-05, "loss": 0.7191, "step": 11540 }, { "epoch": 5.03, "learning_rate": 1.4229437772557463e-05, "loss": 0.8966, "step": 11550 }, { "epoch": 5.03, "learning_rate": 1.4227124413582726e-05, "loss": 0.8387, "step": 11560 }, { "epoch": 5.03, "learning_rate": 1.422480777588927e-05, "loss": 0.6938, "step": 11570 }, { "epoch": 5.03, "learning_rate": 1.4222487860606197e-05, "loss": 0.964, "step": 11580 }, { "epoch": 5.03, "learning_rate": 1.4220164668864207e-05, "loss": 0.831, "step": 11590 }, { "epoch": 5.03, "learning_rate": 1.4217838201795596e-05, "loss": 1.0617, "step": 11600 }, { "epoch": 5.03, "learning_rate": 1.4215508460534254e-05, "loss": 0.7678, "step": 11610 }, { "epoch": 5.03, "learning_rate": 1.4213175446215669e-05, "loss": 1.1968, "step": 11620 }, { "epoch": 5.03, "learning_rate": 1.4210839159976927e-05, "loss": 0.8451, "step": 11630 }, { "epoch": 5.03, "learning_rate": 1.4208499602956699e-05, "loss": 0.803, "step": 11640 }, { "epoch": 5.03, "learning_rate": 1.420615677629526e-05, "loss": 0.5882, "step": 11650 }, { "epoch": 5.03, "learning_rate": 1.4203810681134479e-05, "loss": 1.0378, "step": 11660 }, { "epoch": 5.03, "learning_rate": 1.4201461318617807e-05, "loss": 0.8273, "step": 11670 }, { "epoch": 5.03, "learning_rate": 1.4199108689890303e-05, "loss": 0.8917, "step": 11680 }, { "epoch": 5.03, "learning_rate": 1.4196752796098601e-05, "loss": 0.8185, "step": 11690 }, { "epoch": 5.03, "learning_rate": 1.4194393638390943e-05, "loss": 0.7411, "step": 11700 }, { "epoch": 5.03, "learning_rate": 1.4192031217917148e-05, "loss": 0.649, "step": 11710 }, { "epoch": 5.03, "learning_rate": 1.4189665535828631e-05, "loss": 1.0503, "step": 11720 }, { "epoch": 5.03, "learning_rate": 1.41872965932784e-05, "loss": 0.9669, "step": 11730 }, { "epoch": 5.03, "learning_rate": 1.418492439142105e-05, "loss": 0.7626, "step": 11740 }, { "epoch": 5.04, "learning_rate": 1.4182548931412758e-05, "loss": 0.7923, "step": 11750 }, { "epoch": 5.04, "learning_rate": 1.41801702144113e-05, "loss": 0.9244, "step": 11760 }, { "epoch": 5.04, "learning_rate": 1.417778824157603e-05, "loss": 1.2815, "step": 11770 }, { "epoch": 5.04, "learning_rate": 1.4175403014067892e-05, "loss": 1.0134, "step": 11780 }, { "epoch": 5.04, "learning_rate": 1.4173014533049422e-05, "loss": 0.9197, "step": 11790 }, { "epoch": 5.04, "learning_rate": 1.4170622799684732e-05, "loss": 0.9558, "step": 11800 }, { "epoch": 5.04, "learning_rate": 1.4168227815139526e-05, "loss": 0.9409, "step": 11810 }, { "epoch": 5.04, "learning_rate": 1.4165829580581085e-05, "loss": 0.861, "step": 11820 }, { "epoch": 5.04, "learning_rate": 1.416342809717829e-05, "loss": 0.7087, "step": 11830 }, { "epoch": 5.04, "learning_rate": 1.4161023366101585e-05, "loss": 1.0238, "step": 11840 }, { "epoch": 5.04, "learning_rate": 1.415861538852301e-05, "loss": 0.782, "step": 11850 }, { "epoch": 5.04, "learning_rate": 1.4156204165616188e-05, "loss": 0.6829, "step": 11860 }, { "epoch": 5.04, "learning_rate": 1.4153789698556311e-05, "loss": 0.7065, "step": 11870 }, { "epoch": 5.04, "learning_rate": 1.4151371988520169e-05, "loss": 0.7532, "step": 11880 }, { "epoch": 5.04, "learning_rate": 1.414895103668612e-05, "loss": 0.4863, "step": 11890 }, { "epoch": 5.04, "learning_rate": 1.414652684423411e-05, "loss": 1.1852, "step": 11900 }, { "epoch": 5.04, "learning_rate": 1.414409941234566e-05, "loss": 0.7895, "step": 11910 }, { "epoch": 5.04, "learning_rate": 1.4141668742203868e-05, "loss": 0.6872, "step": 11920 }, { "epoch": 5.04, "learning_rate": 1.4139234834993416e-05, "loss": 1.1803, "step": 11930 }, { "epoch": 5.04, "learning_rate": 1.4136797691900557e-05, "loss": 0.5786, "step": 11940 }, { "epoch": 5.04, "learning_rate": 1.413435731411313e-05, "loss": 0.8615, "step": 11950 }, { "epoch": 5.04, "learning_rate": 1.4131913702820543e-05, "loss": 0.7935, "step": 11960 }, { "epoch": 5.04, "learning_rate": 1.4129466859213782e-05, "loss": 0.5791, "step": 11970 }, { "epoch": 5.04, "learning_rate": 1.4127016784485411e-05, "loss": 1.2098, "step": 11980 }, { "epoch": 5.04, "learning_rate": 1.4124563479829562e-05, "loss": 1.1726, "step": 11990 }, { "epoch": 5.04, "learning_rate": 1.4122106946441953e-05, "loss": 0.9259, "step": 12000 }, { "epoch": 5.04, "eval_accuracy": 0.8263157894736842, "eval_f1": 0.8263157894736841, "eval_loss": 0.8018165230751038, "eval_runtime": 741.1315, "eval_samples_per_second": 6.409, "eval_steps_per_second": 1.603, "step": 12000 }, { "epoch": 6.0, "learning_rate": 1.4119647185519863e-05, "loss": 0.9292, "step": 12010 }, { "epoch": 6.0, "learning_rate": 1.4117184198262151e-05, "loss": 0.9956, "step": 12020 }, { "epoch": 6.0, "learning_rate": 1.4114717985869247e-05, "loss": 0.9384, "step": 12030 }, { "epoch": 6.0, "learning_rate": 1.4112248549543151e-05, "loss": 0.9028, "step": 12040 }, { "epoch": 6.0, "learning_rate": 1.410977589048744e-05, "loss": 0.8509, "step": 12050 }, { "epoch": 6.0, "learning_rate": 1.410730000990726e-05, "loss": 0.6837, "step": 12060 }, { "epoch": 6.0, "learning_rate": 1.4104820909009319e-05, "loss": 0.6524, "step": 12070 }, { "epoch": 6.0, "learning_rate": 1.4102338589001901e-05, "loss": 0.6301, "step": 12080 }, { "epoch": 6.0, "learning_rate": 1.4099853051094864e-05, "loss": 0.8283, "step": 12090 }, { "epoch": 6.0, "learning_rate": 1.4097364296499624e-05, "loss": 1.0106, "step": 12100 }, { "epoch": 6.0, "learning_rate": 1.409487232642917e-05, "loss": 0.8437, "step": 12110 }, { "epoch": 6.0, "learning_rate": 1.4092377142098065e-05, "loss": 0.7118, "step": 12120 }, { "epoch": 6.0, "learning_rate": 1.4089878744722421e-05, "loss": 1.087, "step": 12130 }, { "epoch": 6.0, "learning_rate": 1.4087377135519934e-05, "loss": 0.54, "step": 12140 }, { "epoch": 6.0, "learning_rate": 1.4084872315709853e-05, "loss": 0.785, "step": 12150 }, { "epoch": 6.0, "learning_rate": 1.4082364286513003e-05, "loss": 0.509, "step": 12160 }, { "epoch": 6.0, "learning_rate": 1.4079853049151762e-05, "loss": 0.8785, "step": 12170 }, { "epoch": 6.0, "learning_rate": 1.4077338604850075e-05, "loss": 0.9986, "step": 12180 }, { "epoch": 6.0, "learning_rate": 1.4074820954833457e-05, "loss": 0.8, "step": 12190 }, { "epoch": 6.0, "learning_rate": 1.4072300100328976e-05, "loss": 0.8324, "step": 12200 }, { "epoch": 6.0, "learning_rate": 1.4069776042565269e-05, "loss": 0.3989, "step": 12210 }, { "epoch": 6.0, "learning_rate": 1.406724878277253e-05, "loss": 0.9609, "step": 12220 }, { "epoch": 6.0, "learning_rate": 1.4064718322182512e-05, "loss": 0.9633, "step": 12230 }, { "epoch": 6.0, "learning_rate": 1.4062184662028534e-05, "loss": 0.8126, "step": 12240 }, { "epoch": 6.0, "learning_rate": 1.4059647803545468e-05, "loss": 0.5011, "step": 12250 }, { "epoch": 6.01, "learning_rate": 1.4057107747969753e-05, "loss": 0.5289, "step": 12260 }, { "epoch": 6.01, "learning_rate": 1.4054564496539378e-05, "loss": 1.0525, "step": 12270 }, { "epoch": 6.01, "learning_rate": 1.4052018050493892e-05, "loss": 0.7972, "step": 12280 }, { "epoch": 6.01, "learning_rate": 1.4049468411074402e-05, "loss": 0.7144, "step": 12290 }, { "epoch": 6.01, "learning_rate": 1.4046915579523573e-05, "loss": 0.6966, "step": 12300 }, { "epoch": 6.01, "learning_rate": 1.4044359557085624e-05, "loss": 0.9538, "step": 12310 }, { "epoch": 6.01, "learning_rate": 1.4041800345006328e-05, "loss": 0.7337, "step": 12320 }, { "epoch": 6.01, "learning_rate": 1.4039237944533015e-05, "loss": 0.8238, "step": 12330 }, { "epoch": 6.01, "learning_rate": 1.4036672356914567e-05, "loss": 0.7472, "step": 12340 }, { "epoch": 6.01, "learning_rate": 1.4034103583401422e-05, "loss": 0.7119, "step": 12350 }, { "epoch": 6.01, "learning_rate": 1.4031531625245567e-05, "loss": 0.7508, "step": 12360 }, { "epoch": 6.01, "learning_rate": 1.4028956483700542e-05, "loss": 0.8372, "step": 12370 }, { "epoch": 6.01, "learning_rate": 1.4026378160021442e-05, "loss": 0.8902, "step": 12380 }, { "epoch": 6.01, "learning_rate": 1.4023796655464912e-05, "loss": 0.666, "step": 12390 }, { "epoch": 6.01, "learning_rate": 1.4021211971289142e-05, "loss": 0.9766, "step": 12400 }, { "epoch": 6.01, "learning_rate": 1.401862410875388e-05, "loss": 0.9295, "step": 12410 }, { "epoch": 6.01, "learning_rate": 1.4016033069120414e-05, "loss": 0.8617, "step": 12420 }, { "epoch": 6.01, "learning_rate": 1.4013438853651591e-05, "loss": 0.6898, "step": 12430 }, { "epoch": 6.01, "learning_rate": 1.4010841463611795e-05, "loss": 0.6646, "step": 12440 }, { "epoch": 6.01, "learning_rate": 1.4008240900266964e-05, "loss": 0.4307, "step": 12450 }, { "epoch": 6.01, "learning_rate": 1.400563716488458e-05, "loss": 0.7693, "step": 12460 }, { "epoch": 6.01, "learning_rate": 1.4003030258733676e-05, "loss": 0.735, "step": 12470 }, { "epoch": 6.01, "learning_rate": 1.4000420183084823e-05, "loss": 0.818, "step": 12480 }, { "epoch": 6.01, "learning_rate": 1.3997806939210139e-05, "loss": 1.0814, "step": 12490 }, { "epoch": 6.01, "learning_rate": 1.3995190528383292e-05, "loss": 1.1466, "step": 12500 }, { "epoch": 6.01, "learning_rate": 1.3992570951879483e-05, "loss": 0.8816, "step": 12510 }, { "epoch": 6.01, "learning_rate": 1.3989948210975466e-05, "loss": 0.8825, "step": 12520 }, { "epoch": 6.01, "learning_rate": 1.3987322306949532e-05, "loss": 0.9807, "step": 12530 }, { "epoch": 6.01, "learning_rate": 1.3984693241081512e-05, "loss": 0.8425, "step": 12540 }, { "epoch": 6.01, "learning_rate": 1.3982061014652787e-05, "loss": 0.8364, "step": 12550 }, { "epoch": 6.01, "learning_rate": 1.3979425628946263e-05, "loss": 0.8375, "step": 12560 }, { "epoch": 6.01, "learning_rate": 1.3976787085246405e-05, "loss": 0.7715, "step": 12570 }, { "epoch": 6.01, "learning_rate": 1.3974145384839203e-05, "loss": 0.7157, "step": 12580 }, { "epoch": 6.01, "learning_rate": 1.3971500529012188e-05, "loss": 0.8325, "step": 12590 }, { "epoch": 6.01, "learning_rate": 1.396885251905443e-05, "loss": 0.5132, "step": 12600 }, { "epoch": 6.01, "learning_rate": 1.3966201356256543e-05, "loss": 1.0001, "step": 12610 }, { "epoch": 6.01, "learning_rate": 1.3963547041910663e-05, "loss": 0.8268, "step": 12620 }, { "epoch": 6.01, "learning_rate": 1.3960889577310476e-05, "loss": 0.7412, "step": 12630 }, { "epoch": 6.01, "learning_rate": 1.3958228963751197e-05, "loss": 0.5222, "step": 12640 }, { "epoch": 6.01, "learning_rate": 1.3955565202529577e-05, "loss": 0.6424, "step": 12650 }, { "epoch": 6.01, "learning_rate": 1.39528982949439e-05, "loss": 0.9083, "step": 12660 }, { "epoch": 6.01, "learning_rate": 1.3950228242293985e-05, "loss": 0.6788, "step": 12670 }, { "epoch": 6.01, "learning_rate": 1.3947555045881183e-05, "loss": 0.9128, "step": 12680 }, { "epoch": 6.01, "learning_rate": 1.3944878707008378e-05, "loss": 1.0018, "step": 12690 }, { "epoch": 6.01, "learning_rate": 1.3942199226979984e-05, "loss": 0.9426, "step": 12700 }, { "epoch": 6.01, "learning_rate": 1.3939516607101947e-05, "loss": 0.7346, "step": 12710 }, { "epoch": 6.01, "learning_rate": 1.3936830848681743e-05, "loss": 0.7292, "step": 12720 }, { "epoch": 6.01, "learning_rate": 1.393414195302838e-05, "loss": 0.9039, "step": 12730 }, { "epoch": 6.01, "learning_rate": 1.3931449921452392e-05, "loss": 0.4155, "step": 12740 }, { "epoch": 6.01, "learning_rate": 1.3928754755265844e-05, "loss": 0.5747, "step": 12750 }, { "epoch": 6.02, "learning_rate": 1.3926056455782322e-05, "loss": 0.7444, "step": 12760 }, { "epoch": 6.02, "learning_rate": 1.392335502431695e-05, "loss": 1.033, "step": 12770 }, { "epoch": 6.02, "learning_rate": 1.392065046218637e-05, "loss": 0.5894, "step": 12780 }, { "epoch": 6.02, "learning_rate": 1.3917942770708757e-05, "loss": 0.7544, "step": 12790 }, { "epoch": 6.02, "learning_rate": 1.39152319512038e-05, "loss": 0.6563, "step": 12800 }, { "epoch": 6.02, "learning_rate": 1.3912518004992724e-05, "loss": 0.7536, "step": 12810 }, { "epoch": 6.02, "learning_rate": 1.3909800933398273e-05, "loss": 0.577, "step": 12820 }, { "epoch": 6.02, "learning_rate": 1.3907080737744714e-05, "loss": 0.6801, "step": 12830 }, { "epoch": 6.02, "learning_rate": 1.3904357419357838e-05, "loss": 0.8534, "step": 12840 }, { "epoch": 6.02, "learning_rate": 1.3901630979564955e-05, "loss": 0.9727, "step": 12850 }, { "epoch": 6.02, "learning_rate": 1.3898901419694903e-05, "loss": 0.8376, "step": 12860 }, { "epoch": 6.02, "learning_rate": 1.3896168741078033e-05, "loss": 0.9828, "step": 12870 }, { "epoch": 6.02, "learning_rate": 1.3893432945046219e-05, "loss": 0.81, "step": 12880 }, { "epoch": 6.02, "learning_rate": 1.3890694032932857e-05, "loss": 0.9423, "step": 12890 }, { "epoch": 6.02, "learning_rate": 1.3887952006072857e-05, "loss": 0.9811, "step": 12900 }, { "epoch": 6.02, "learning_rate": 1.388520686580265e-05, "loss": 1.0064, "step": 12910 }, { "epoch": 6.02, "learning_rate": 1.388245861346019e-05, "loss": 1.0164, "step": 12920 }, { "epoch": 6.02, "learning_rate": 1.3879707250384934e-05, "loss": 0.6345, "step": 12930 }, { "epoch": 6.02, "learning_rate": 1.3876952777917864e-05, "loss": 0.8114, "step": 12940 }, { "epoch": 6.02, "learning_rate": 1.387419519740148e-05, "loss": 0.8133, "step": 12950 }, { "epoch": 6.02, "learning_rate": 1.3871434510179791e-05, "loss": 0.7562, "step": 12960 }, { "epoch": 6.02, "learning_rate": 1.3868670717598323e-05, "loss": 0.9805, "step": 12970 }, { "epoch": 6.02, "learning_rate": 1.3865903821004115e-05, "loss": 0.7079, "step": 12980 }, { "epoch": 6.02, "learning_rate": 1.3863133821745717e-05, "loss": 0.9933, "step": 12990 }, { "epoch": 6.02, "learning_rate": 1.3860360721173195e-05, "loss": 0.5244, "step": 13000 }, { "epoch": 6.02, "learning_rate": 1.3857584520638124e-05, "loss": 0.6868, "step": 13010 }, { "epoch": 6.02, "learning_rate": 1.3854805221493592e-05, "loss": 0.7727, "step": 13020 }, { "epoch": 6.02, "learning_rate": 1.3852022825094192e-05, "loss": 1.0346, "step": 13030 }, { "epoch": 6.02, "learning_rate": 1.3849237332796034e-05, "loss": 0.9681, "step": 13040 }, { "epoch": 6.02, "learning_rate": 1.384644874595673e-05, "loss": 0.824, "step": 13050 }, { "epoch": 6.02, "learning_rate": 1.3843657065935406e-05, "loss": 0.9007, "step": 13060 }, { "epoch": 6.02, "learning_rate": 1.3840862294092691e-05, "loss": 1.0339, "step": 13070 }, { "epoch": 6.02, "learning_rate": 1.3838064431790724e-05, "loss": 0.8334, "step": 13080 }, { "epoch": 6.02, "learning_rate": 1.3835263480393149e-05, "loss": 0.9044, "step": 13090 }, { "epoch": 6.02, "learning_rate": 1.3832459441265114e-05, "loss": 0.8958, "step": 13100 }, { "epoch": 6.02, "learning_rate": 1.3829652315773276e-05, "loss": 0.8666, "step": 13110 }, { "epoch": 6.02, "learning_rate": 1.3826842105285792e-05, "loss": 0.9741, "step": 13120 }, { "epoch": 6.02, "learning_rate": 1.3824028811172325e-05, "loss": 0.8652, "step": 13130 }, { "epoch": 6.02, "learning_rate": 1.3821212434804042e-05, "loss": 0.9392, "step": 13140 }, { "epoch": 6.02, "learning_rate": 1.3818392977553607e-05, "loss": 0.658, "step": 13150 }, { "epoch": 6.02, "learning_rate": 1.3815570440795194e-05, "loss": 0.636, "step": 13160 }, { "epoch": 6.02, "learning_rate": 1.3812744825904467e-05, "loss": 0.8125, "step": 13170 }, { "epoch": 6.02, "learning_rate": 1.3809916134258603e-05, "loss": 0.5889, "step": 13180 }, { "epoch": 6.02, "learning_rate": 1.3807084367236269e-05, "loss": 0.6374, "step": 13190 }, { "epoch": 6.02, "learning_rate": 1.3804249526217633e-05, "loss": 0.9499, "step": 13200 }, { "epoch": 6.02, "learning_rate": 1.3801411612584363e-05, "loss": 0.9158, "step": 13210 }, { "epoch": 6.02, "learning_rate": 1.3798570627719622e-05, "loss": 0.4447, "step": 13220 }, { "epoch": 6.02, "learning_rate": 1.3795726573008075e-05, "loss": 0.976, "step": 13230 }, { "epoch": 6.02, "learning_rate": 1.3792879449835875e-05, "loss": 0.7793, "step": 13240 }, { "epoch": 6.03, "learning_rate": 1.3790029259590681e-05, "loss": 0.9342, "step": 13250 }, { "epoch": 6.03, "learning_rate": 1.3787176003661635e-05, "loss": 0.6184, "step": 13260 }, { "epoch": 6.03, "learning_rate": 1.3784319683439385e-05, "loss": 0.7955, "step": 13270 }, { "epoch": 6.03, "learning_rate": 1.3781460300316064e-05, "loss": 0.906, "step": 13280 }, { "epoch": 6.03, "learning_rate": 1.37785978556853e-05, "loss": 0.9102, "step": 13290 }, { "epoch": 6.03, "learning_rate": 1.3775732350942213e-05, "loss": 0.9564, "step": 13300 }, { "epoch": 6.03, "learning_rate": 1.3772863787483418e-05, "loss": 0.734, "step": 13310 }, { "epoch": 6.03, "learning_rate": 1.3769992166707014e-05, "loss": 0.8327, "step": 13320 }, { "epoch": 6.03, "learning_rate": 1.3767117490012603e-05, "loss": 0.6923, "step": 13330 }, { "epoch": 6.03, "learning_rate": 1.3764239758801257e-05, "loss": 0.692, "step": 13340 }, { "epoch": 6.03, "learning_rate": 1.376135897447555e-05, "loss": 0.8945, "step": 13350 }, { "epoch": 6.03, "learning_rate": 1.3758475138439543e-05, "loss": 0.6951, "step": 13360 }, { "epoch": 6.03, "learning_rate": 1.3755588252098785e-05, "loss": 0.6342, "step": 13370 }, { "epoch": 6.03, "learning_rate": 1.3752698316860305e-05, "loss": 0.8719, "step": 13380 }, { "epoch": 6.03, "learning_rate": 1.3749805334132624e-05, "loss": 0.7696, "step": 13390 }, { "epoch": 6.03, "learning_rate": 1.3746909305325747e-05, "loss": 0.721, "step": 13400 }, { "epoch": 6.03, "learning_rate": 1.3744010231851161e-05, "loss": 0.5691, "step": 13410 }, { "epoch": 6.03, "learning_rate": 1.3741108115121844e-05, "loss": 1.035, "step": 13420 }, { "epoch": 6.03, "learning_rate": 1.373820295655225e-05, "loss": 0.7772, "step": 13430 }, { "epoch": 6.03, "learning_rate": 1.3735294757558315e-05, "loss": 0.9524, "step": 13440 }, { "epoch": 6.03, "learning_rate": 1.3732383519557461e-05, "loss": 1.0839, "step": 13450 }, { "epoch": 6.03, "learning_rate": 1.3729469243968596e-05, "loss": 0.4841, "step": 13460 }, { "epoch": 6.03, "learning_rate": 1.3726551932212094e-05, "loss": 0.7338, "step": 13470 }, { "epoch": 6.03, "learning_rate": 1.3723631585709822e-05, "loss": 1.0517, "step": 13480 }, { "epoch": 6.03, "learning_rate": 1.3720708205885125e-05, "loss": 0.9323, "step": 13490 }, { "epoch": 6.03, "learning_rate": 1.3717781794162813e-05, "loss": 0.6849, "step": 13500 }, { "epoch": 6.03, "learning_rate": 1.371485235196919e-05, "loss": 0.3872, "step": 13510 }, { "epoch": 6.03, "learning_rate": 1.3711919880732033e-05, "loss": 0.9672, "step": 13520 }, { "epoch": 6.03, "learning_rate": 1.3708984381880584e-05, "loss": 0.6098, "step": 13530 }, { "epoch": 6.03, "learning_rate": 1.3706045856845579e-05, "loss": 0.7626, "step": 13540 }, { "epoch": 6.03, "learning_rate": 1.3703104307059213e-05, "loss": 0.8963, "step": 13550 }, { "epoch": 6.03, "learning_rate": 1.3700159733955166e-05, "loss": 0.7506, "step": 13560 }, { "epoch": 6.03, "learning_rate": 1.3697212138968584e-05, "loss": 0.9264, "step": 13570 }, { "epoch": 6.03, "learning_rate": 1.369426152353609e-05, "loss": 0.787, "step": 13580 }, { "epoch": 6.03, "learning_rate": 1.3691307889095778e-05, "loss": 0.7793, "step": 13590 }, { "epoch": 6.03, "learning_rate": 1.3688351237087214e-05, "loss": 0.8481, "step": 13600 }, { "epoch": 6.03, "learning_rate": 1.3685391568951434e-05, "loss": 0.8792, "step": 13610 }, { "epoch": 6.03, "learning_rate": 1.3682428886130944e-05, "loss": 0.888, "step": 13620 }, { "epoch": 6.03, "learning_rate": 1.367946319006972e-05, "loss": 0.7437, "step": 13630 }, { "epoch": 6.03, "learning_rate": 1.3676494482213206e-05, "loss": 1.0551, "step": 13640 }, { "epoch": 6.03, "learning_rate": 1.3673522764008315e-05, "loss": 0.9655, "step": 13650 }, { "epoch": 6.03, "learning_rate": 1.3670548036903425e-05, "loss": 0.8299, "step": 13660 }, { "epoch": 6.03, "learning_rate": 1.3667570302348384e-05, "loss": 0.9885, "step": 13670 }, { "epoch": 6.03, "learning_rate": 1.3664589561794498e-05, "loss": 0.6566, "step": 13680 }, { "epoch": 6.03, "learning_rate": 1.3661605816694551e-05, "loss": 0.6017, "step": 13690 }, { "epoch": 6.03, "learning_rate": 1.365861906850278e-05, "loss": 0.6448, "step": 13700 }, { "epoch": 6.03, "learning_rate": 1.3655629318674892e-05, "loss": 0.8787, "step": 13710 }, { "epoch": 6.03, "learning_rate": 1.3652636568668053e-05, "loss": 0.8334, "step": 13720 }, { "epoch": 6.03, "learning_rate": 1.3649640819940891e-05, "loss": 1.1023, "step": 13730 }, { "epoch": 6.03, "learning_rate": 1.3646642073953505e-05, "loss": 0.5216, "step": 13740 }, { "epoch": 6.04, "learning_rate": 1.364364033216744e-05, "loss": 1.0398, "step": 13750 }, { "epoch": 6.04, "learning_rate": 1.3640635596045708e-05, "loss": 0.9455, "step": 13760 }, { "epoch": 6.04, "learning_rate": 1.3637627867052786e-05, "loss": 0.6643, "step": 13770 }, { "epoch": 6.04, "learning_rate": 1.3634617146654605e-05, "loss": 0.8005, "step": 13780 }, { "epoch": 6.04, "learning_rate": 1.3631603436318548e-05, "loss": 1.2044, "step": 13790 }, { "epoch": 6.04, "learning_rate": 1.3628586737513463e-05, "loss": 1.0157, "step": 13800 }, { "epoch": 6.04, "learning_rate": 1.3625567051709656e-05, "loss": 0.4879, "step": 13810 }, { "epoch": 6.04, "learning_rate": 1.362254438037888e-05, "loss": 0.6704, "step": 13820 }, { "epoch": 6.04, "learning_rate": 1.3619518724994351e-05, "loss": 0.8101, "step": 13830 }, { "epoch": 6.04, "learning_rate": 1.3616490087030738e-05, "loss": 0.6362, "step": 13840 }, { "epoch": 6.04, "learning_rate": 1.3613458467964156e-05, "loss": 0.9716, "step": 13850 }, { "epoch": 6.04, "learning_rate": 1.3610423869272188e-05, "loss": 0.6861, "step": 13860 }, { "epoch": 6.04, "learning_rate": 1.3607386292433854e-05, "loss": 1.2453, "step": 13870 }, { "epoch": 6.04, "learning_rate": 1.3604345738929636e-05, "loss": 0.7002, "step": 13880 }, { "epoch": 6.04, "learning_rate": 1.360130221024146e-05, "loss": 0.7556, "step": 13890 }, { "epoch": 6.04, "learning_rate": 1.3598255707852707e-05, "loss": 0.7298, "step": 13900 }, { "epoch": 6.04, "learning_rate": 1.3595206233248204e-05, "loss": 0.7313, "step": 13910 }, { "epoch": 6.04, "learning_rate": 1.3592153787914228e-05, "loss": 0.4009, "step": 13920 }, { "epoch": 6.04, "learning_rate": 1.3589098373338507e-05, "loss": 0.829, "step": 13930 }, { "epoch": 6.04, "learning_rate": 1.3586039991010208e-05, "loss": 0.6719, "step": 13940 }, { "epoch": 6.04, "learning_rate": 1.358297864241995e-05, "loss": 0.5607, "step": 13950 }, { "epoch": 6.04, "learning_rate": 1.35799143290598e-05, "loss": 0.6292, "step": 13960 }, { "epoch": 6.04, "learning_rate": 1.3576847052423266e-05, "loss": 0.9576, "step": 13970 }, { "epoch": 6.04, "learning_rate": 1.35737768140053e-05, "loss": 1.0345, "step": 13980 }, { "epoch": 6.04, "learning_rate": 1.35707036153023e-05, "loss": 0.4929, "step": 13990 }, { "epoch": 6.04, "learning_rate": 1.3567627457812107e-05, "loss": 0.6077, "step": 14000 }, { "epoch": 6.04, "eval_accuracy": 0.8189473684210526, "eval_f1": 0.8189473684210526, "eval_loss": 0.8212071061134338, "eval_runtime": 747.465, "eval_samples_per_second": 6.355, "eval_steps_per_second": 1.589, "step": 14000 }, { "epoch": 7.0, "learning_rate": 1.3564548343034e-05, "loss": 0.8925, "step": 14010 }, { "epoch": 7.0, "learning_rate": 1.3561466272468704e-05, "loss": 1.021, "step": 14020 }, { "epoch": 7.0, "learning_rate": 1.3558381247618381e-05, "loss": 0.8947, "step": 14030 }, { "epoch": 7.0, "learning_rate": 1.3555293269986639e-05, "loss": 0.7799, "step": 14040 }, { "epoch": 7.0, "learning_rate": 1.3552202341078515e-05, "loss": 0.7529, "step": 14050 }, { "epoch": 7.0, "learning_rate": 1.3549108462400494e-05, "loss": 0.8222, "step": 14060 }, { "epoch": 7.0, "learning_rate": 1.3546011635460494e-05, "loss": 1.1847, "step": 14070 }, { "epoch": 7.0, "learning_rate": 1.354291186176787e-05, "loss": 0.7235, "step": 14080 }, { "epoch": 7.0, "learning_rate": 1.3539809142833414e-05, "loss": 0.8463, "step": 14090 }, { "epoch": 7.0, "learning_rate": 1.3536703480169356e-05, "loss": 0.512, "step": 14100 }, { "epoch": 7.0, "learning_rate": 1.3533594875289357e-05, "loss": 0.8988, "step": 14110 }, { "epoch": 7.0, "learning_rate": 1.353048332970851e-05, "loss": 0.8714, "step": 14120 }, { "epoch": 7.0, "learning_rate": 1.3527368844943349e-05, "loss": 0.8919, "step": 14130 }, { "epoch": 7.0, "learning_rate": 1.3524251422511834e-05, "loss": 0.6822, "step": 14140 }, { "epoch": 7.0, "learning_rate": 1.3521131063933359e-05, "loss": 0.8376, "step": 14150 }, { "epoch": 7.0, "learning_rate": 1.351800777072875e-05, "loss": 0.6984, "step": 14160 }, { "epoch": 7.0, "learning_rate": 1.3514881544420259e-05, "loss": 0.7484, "step": 14170 }, { "epoch": 7.0, "learning_rate": 1.3511752386531575e-05, "loss": 1.064, "step": 14180 }, { "epoch": 7.0, "learning_rate": 1.3508620298587809e-05, "loss": 0.5924, "step": 14190 }, { "epoch": 7.0, "learning_rate": 1.3505485282115501e-05, "loss": 0.812, "step": 14200 }, { "epoch": 7.0, "learning_rate": 1.3502347338642623e-05, "loss": 0.7384, "step": 14210 }, { "epoch": 7.0, "learning_rate": 1.3499206469698571e-05, "loss": 0.8245, "step": 14220 }, { "epoch": 7.0, "learning_rate": 1.3496062676814165e-05, "loss": 0.8636, "step": 14230 }, { "epoch": 7.0, "learning_rate": 1.349291596152165e-05, "loss": 0.6606, "step": 14240 }, { "epoch": 7.0, "learning_rate": 1.3489766325354697e-05, "loss": 0.8448, "step": 14250 }, { "epoch": 7.01, "learning_rate": 1.3486613769848403e-05, "loss": 0.9852, "step": 14260 }, { "epoch": 7.01, "learning_rate": 1.3483458296539283e-05, "loss": 1.052, "step": 14270 }, { "epoch": 7.01, "learning_rate": 1.3480299906965276e-05, "loss": 0.6644, "step": 14280 }, { "epoch": 7.01, "learning_rate": 1.3477138602665747e-05, "loss": 0.8872, "step": 14290 }, { "epoch": 7.01, "learning_rate": 1.3473974385181472e-05, "loss": 1.069, "step": 14300 }, { "epoch": 7.01, "learning_rate": 1.3470807256054654e-05, "loss": 0.7077, "step": 14310 }, { "epoch": 7.01, "learning_rate": 1.3467637216828916e-05, "loss": 0.8647, "step": 14320 }, { "epoch": 7.01, "learning_rate": 1.3464464269049293e-05, "loss": 0.9642, "step": 14330 }, { "epoch": 7.01, "learning_rate": 1.3461288414262242e-05, "loss": 0.7707, "step": 14340 }, { "epoch": 7.01, "learning_rate": 1.3458109654015637e-05, "loss": 0.7427, "step": 14350 }, { "epoch": 7.01, "learning_rate": 1.3454927989858766e-05, "loss": 0.561, "step": 14360 }, { "epoch": 7.01, "learning_rate": 1.3451743423342333e-05, "loss": 1.0471, "step": 14370 }, { "epoch": 7.01, "learning_rate": 1.344855595601846e-05, "loss": 0.9729, "step": 14380 }, { "epoch": 7.01, "learning_rate": 1.3445365589440676e-05, "loss": 0.7116, "step": 14390 }, { "epoch": 7.01, "learning_rate": 1.344217232516393e-05, "loss": 0.8295, "step": 14400 }, { "epoch": 7.01, "learning_rate": 1.343897616474458e-05, "loss": 0.6606, "step": 14410 }, { "epoch": 7.01, "learning_rate": 1.3435777109740394e-05, "loss": 0.8007, "step": 14420 }, { "epoch": 7.01, "learning_rate": 1.3432575161710552e-05, "loss": 1.0616, "step": 14430 }, { "epoch": 7.01, "learning_rate": 1.3429370322215648e-05, "loss": 0.7153, "step": 14440 }, { "epoch": 7.01, "learning_rate": 1.3426162592817678e-05, "loss": 0.7569, "step": 14450 }, { "epoch": 7.01, "learning_rate": 1.3422951975080054e-05, "loss": 0.9862, "step": 14460 }, { "epoch": 7.01, "learning_rate": 1.3419738470567587e-05, "loss": 0.5145, "step": 14470 }, { "epoch": 7.01, "learning_rate": 1.3416522080846506e-05, "loss": 1.1163, "step": 14480 }, { "epoch": 7.01, "learning_rate": 1.3413302807484436e-05, "loss": 0.8015, "step": 14490 }, { "epoch": 7.01, "learning_rate": 1.3410080652050414e-05, "loss": 0.7657, "step": 14500 }, { "epoch": 7.01, "learning_rate": 1.340685561611488e-05, "loss": 0.534, "step": 14510 }, { "epoch": 7.01, "learning_rate": 1.3403627701249675e-05, "loss": 0.8209, "step": 14520 }, { "epoch": 7.01, "learning_rate": 1.3400396909028046e-05, "loss": 0.5046, "step": 14530 }, { "epoch": 7.01, "learning_rate": 1.3397163241024641e-05, "loss": 0.8202, "step": 14540 }, { "epoch": 7.01, "learning_rate": 1.3393926698815516e-05, "loss": 0.7993, "step": 14550 }, { "epoch": 7.01, "learning_rate": 1.3390687283978114e-05, "loss": 0.6522, "step": 14560 }, { "epoch": 7.01, "learning_rate": 1.3387444998091294e-05, "loss": 0.7665, "step": 14570 }, { "epoch": 7.01, "learning_rate": 1.33841998427353e-05, "loss": 0.5005, "step": 14580 }, { "epoch": 7.01, "learning_rate": 1.3380951819491785e-05, "loss": 0.8614, "step": 14590 }, { "epoch": 7.01, "learning_rate": 1.3377700929943799e-05, "loss": 0.4467, "step": 14600 }, { "epoch": 7.01, "learning_rate": 1.337444717567578e-05, "loss": 1.0509, "step": 14610 }, { "epoch": 7.01, "learning_rate": 1.3371190558273574e-05, "loss": 1.0283, "step": 14620 }, { "epoch": 7.01, "learning_rate": 1.336793107932441e-05, "loss": 0.9415, "step": 14630 }, { "epoch": 7.01, "learning_rate": 1.3364668740416927e-05, "loss": 0.6773, "step": 14640 }, { "epoch": 7.01, "learning_rate": 1.3361403543141141e-05, "loss": 0.7324, "step": 14650 }, { "epoch": 7.01, "learning_rate": 1.3358135489088473e-05, "loss": 0.9419, "step": 14660 }, { "epoch": 7.01, "learning_rate": 1.3354864579851737e-05, "loss": 0.4525, "step": 14670 }, { "epoch": 7.01, "learning_rate": 1.3351590817025127e-05, "loss": 0.5833, "step": 14680 }, { "epoch": 7.01, "learning_rate": 1.3348314202204241e-05, "loss": 0.8469, "step": 14690 }, { "epoch": 7.01, "learning_rate": 1.3345034736986057e-05, "loss": 0.8856, "step": 14700 }, { "epoch": 7.01, "learning_rate": 1.3341752422968948e-05, "loss": 0.5665, "step": 14710 }, { "epoch": 7.01, "learning_rate": 1.3338467261752677e-05, "loss": 0.9724, "step": 14720 }, { "epoch": 7.01, "learning_rate": 1.3335179254938387e-05, "loss": 0.6042, "step": 14730 }, { "epoch": 7.01, "learning_rate": 1.3331888404128615e-05, "loss": 0.9173, "step": 14740 }, { "epoch": 7.01, "learning_rate": 1.3328594710927282e-05, "loss": 1.3782, "step": 14750 }, { "epoch": 7.02, "learning_rate": 1.3325298176939694e-05, "loss": 0.876, "step": 14760 }, { "epoch": 7.02, "learning_rate": 1.332199880377254e-05, "loss": 0.876, "step": 14770 }, { "epoch": 7.02, "learning_rate": 1.3318696593033896e-05, "loss": 1.0023, "step": 14780 }, { "epoch": 7.02, "learning_rate": 1.3315391546333219e-05, "loss": 0.6538, "step": 14790 }, { "epoch": 7.02, "learning_rate": 1.3312083665281348e-05, "loss": 0.7284, "step": 14800 }, { "epoch": 7.02, "learning_rate": 1.3308772951490503e-05, "loss": 0.7376, "step": 14810 }, { "epoch": 7.02, "learning_rate": 1.330545940657429e-05, "loss": 0.5036, "step": 14820 }, { "epoch": 7.02, "learning_rate": 1.3302143032147687e-05, "loss": 0.7377, "step": 14830 }, { "epoch": 7.02, "learning_rate": 1.3298823829827055e-05, "loss": 1.1506, "step": 14840 }, { "epoch": 7.02, "learning_rate": 1.3295501801230133e-05, "loss": 0.6578, "step": 14850 }, { "epoch": 7.02, "learning_rate": 1.3292176947976038e-05, "loss": 0.7625, "step": 14860 }, { "epoch": 7.02, "learning_rate": 1.3288849271685263e-05, "loss": 0.64, "step": 14870 }, { "epoch": 7.02, "learning_rate": 1.3285518773979677e-05, "loss": 0.6243, "step": 14880 }, { "epoch": 7.02, "learning_rate": 1.3282185456482522e-05, "loss": 0.9075, "step": 14890 }, { "epoch": 7.02, "learning_rate": 1.327884932081842e-05, "loss": 0.9783, "step": 14900 }, { "epoch": 7.02, "learning_rate": 1.327551036861336e-05, "loss": 0.6165, "step": 14910 }, { "epoch": 7.02, "learning_rate": 1.327216860149471e-05, "loss": 0.9649, "step": 14920 }, { "epoch": 7.02, "learning_rate": 1.3268824021091203e-05, "loss": 0.7211, "step": 14930 }, { "epoch": 7.02, "learning_rate": 1.326547662903295e-05, "loss": 0.9264, "step": 14940 }, { "epoch": 7.02, "learning_rate": 1.3262126426951427e-05, "loss": 0.7827, "step": 14950 }, { "epoch": 7.02, "learning_rate": 1.3258773416479483e-05, "loss": 0.947, "step": 14960 }, { "epoch": 7.02, "learning_rate": 1.3255417599251331e-05, "loss": 0.5499, "step": 14970 }, { "epoch": 7.02, "learning_rate": 1.3252058976902563e-05, "loss": 0.6036, "step": 14980 }, { "epoch": 7.02, "learning_rate": 1.3248697551070124e-05, "loss": 0.8893, "step": 14990 }, { "epoch": 7.02, "learning_rate": 1.3245333323392335e-05, "loss": 0.6627, "step": 15000 }, { "epoch": 7.02, "learning_rate": 1.3241966295508879e-05, "loss": 0.7549, "step": 15010 }, { "epoch": 7.02, "learning_rate": 1.3238596469060808e-05, "loss": 0.7898, "step": 15020 }, { "epoch": 7.02, "learning_rate": 1.3235223845690528e-05, "loss": 0.7913, "step": 15030 }, { "epoch": 7.02, "learning_rate": 1.3231848427041817e-05, "loss": 1.0171, "step": 15040 }, { "epoch": 7.02, "learning_rate": 1.3228470214759818e-05, "loss": 0.9284, "step": 15050 }, { "epoch": 7.02, "learning_rate": 1.3225089210491024e-05, "loss": 1.0654, "step": 15060 }, { "epoch": 7.02, "learning_rate": 1.3221705415883297e-05, "loss": 0.762, "step": 15070 }, { "epoch": 7.02, "learning_rate": 1.321831883258586e-05, "loss": 0.656, "step": 15080 }, { "epoch": 7.02, "learning_rate": 1.321492946224929e-05, "loss": 0.5709, "step": 15090 }, { "epoch": 7.02, "learning_rate": 1.3211537306525526e-05, "loss": 0.8218, "step": 15100 }, { "epoch": 7.02, "learning_rate": 1.3208142367067865e-05, "loss": 0.5841, "step": 15110 }, { "epoch": 7.02, "learning_rate": 1.3204744645530956e-05, "loss": 0.6966, "step": 15120 }, { "epoch": 7.02, "learning_rate": 1.3201344143570806e-05, "loss": 0.9424, "step": 15130 }, { "epoch": 7.02, "learning_rate": 1.3197940862844786e-05, "loss": 0.6475, "step": 15140 }, { "epoch": 7.02, "learning_rate": 1.3194534805011606e-05, "loss": 0.91, "step": 15150 }, { "epoch": 7.02, "learning_rate": 1.3191125971731342e-05, "loss": 0.8327, "step": 15160 }, { "epoch": 7.02, "learning_rate": 1.3187714364665415e-05, "loss": 0.7005, "step": 15170 }, { "epoch": 7.02, "learning_rate": 1.31842999854766e-05, "loss": 0.9117, "step": 15180 }, { "epoch": 7.02, "learning_rate": 1.318088283582903e-05, "loss": 0.6846, "step": 15190 }, { "epoch": 7.02, "learning_rate": 1.3177462917388173e-05, "loss": 0.8622, "step": 15200 }, { "epoch": 7.02, "learning_rate": 1.3174040231820863e-05, "loss": 0.9937, "step": 15210 }, { "epoch": 7.02, "learning_rate": 1.3170614780795273e-05, "loss": 0.5033, "step": 15220 }, { "epoch": 7.02, "learning_rate": 1.3167186565980927e-05, "loss": 1.3306, "step": 15230 }, { "epoch": 7.02, "learning_rate": 1.3163755589048693e-05, "loss": 0.9008, "step": 15240 }, { "epoch": 7.03, "learning_rate": 1.316032185167079e-05, "loss": 0.8008, "step": 15250 }, { "epoch": 7.03, "learning_rate": 1.3156885355520778e-05, "loss": 0.6892, "step": 15260 }, { "epoch": 7.03, "learning_rate": 1.3153446102273566e-05, "loss": 0.7426, "step": 15270 }, { "epoch": 7.03, "learning_rate": 1.3150004093605403e-05, "loss": 0.7207, "step": 15280 }, { "epoch": 7.03, "learning_rate": 1.3146559331193879e-05, "loss": 0.9089, "step": 15290 }, { "epoch": 7.03, "learning_rate": 1.3143111816717933e-05, "loss": 0.8718, "step": 15300 }, { "epoch": 7.03, "learning_rate": 1.3139661551857842e-05, "loss": 0.8146, "step": 15310 }, { "epoch": 7.03, "learning_rate": 1.3136208538295221e-05, "loss": 0.7586, "step": 15320 }, { "epoch": 7.03, "learning_rate": 1.3132752777713027e-05, "loss": 0.6503, "step": 15330 }, { "epoch": 7.03, "learning_rate": 1.312929427179556e-05, "loss": 0.6057, "step": 15340 }, { "epoch": 7.03, "learning_rate": 1.3125833022228448e-05, "loss": 0.7167, "step": 15350 }, { "epoch": 7.03, "learning_rate": 1.3122369030698663e-05, "loss": 0.8433, "step": 15360 }, { "epoch": 7.03, "learning_rate": 1.3118902298894515e-05, "loss": 0.7265, "step": 15370 }, { "epoch": 7.03, "learning_rate": 1.3115432828505646e-05, "loss": 0.6624, "step": 15380 }, { "epoch": 7.03, "learning_rate": 1.3111960621223035e-05, "loss": 0.5974, "step": 15390 }, { "epoch": 7.03, "learning_rate": 1.310848567873899e-05, "loss": 0.9127, "step": 15400 }, { "epoch": 7.03, "learning_rate": 1.3105008002747158e-05, "loss": 0.8466, "step": 15410 }, { "epoch": 7.03, "learning_rate": 1.3101527594942517e-05, "loss": 0.5286, "step": 15420 }, { "epoch": 7.03, "learning_rate": 1.3098044457021373e-05, "loss": 0.6996, "step": 15430 }, { "epoch": 7.03, "learning_rate": 1.3094558590681367e-05, "loss": 0.6488, "step": 15440 }, { "epoch": 7.03, "learning_rate": 1.3091069997621466e-05, "loss": 0.5853, "step": 15450 }, { "epoch": 7.03, "learning_rate": 1.3087578679541972e-05, "loss": 0.7664, "step": 15460 }, { "epoch": 7.03, "learning_rate": 1.3084084638144506e-05, "loss": 0.8834, "step": 15470 }, { "epoch": 7.03, "learning_rate": 1.3080587875132024e-05, "loss": 1.1025, "step": 15480 }, { "epoch": 7.03, "learning_rate": 1.3077088392208807e-05, "loss": 0.82, "step": 15490 }, { "epoch": 7.03, "learning_rate": 1.3073586191080456e-05, "loss": 0.9271, "step": 15500 }, { "epoch": 7.03, "learning_rate": 1.3070081273453906e-05, "loss": 0.8683, "step": 15510 }, { "epoch": 7.03, "learning_rate": 1.3066573641037412e-05, "loss": 0.7035, "step": 15520 }, { "epoch": 7.03, "learning_rate": 1.3063063295540545e-05, "loss": 0.6797, "step": 15530 }, { "epoch": 7.03, "learning_rate": 1.3059550238674209e-05, "loss": 0.8762, "step": 15540 }, { "epoch": 7.03, "learning_rate": 1.3056034472150625e-05, "loss": 0.6074, "step": 15550 }, { "epoch": 7.03, "learning_rate": 1.3052515997683336e-05, "loss": 0.6325, "step": 15560 }, { "epoch": 7.03, "learning_rate": 1.3048994816987201e-05, "loss": 0.7776, "step": 15570 }, { "epoch": 7.03, "learning_rate": 1.3045470931778403e-05, "loss": 0.8261, "step": 15580 }, { "epoch": 7.03, "learning_rate": 1.304194434377444e-05, "loss": 0.6536, "step": 15590 }, { "epoch": 7.03, "learning_rate": 1.303841505469413e-05, "loss": 1.1411, "step": 15600 }, { "epoch": 7.03, "learning_rate": 1.3034883066257602e-05, "loss": 0.6888, "step": 15610 }, { "epoch": 7.03, "learning_rate": 1.3031348380186305e-05, "loss": 0.7335, "step": 15620 }, { "epoch": 7.03, "learning_rate": 1.3027810998203005e-05, "loss": 0.8149, "step": 15630 }, { "epoch": 7.03, "learning_rate": 1.3024270922031775e-05, "loss": 1.115, "step": 15640 }, { "epoch": 7.03, "learning_rate": 1.302072815339801e-05, "loss": 0.6974, "step": 15650 }, { "epoch": 7.03, "learning_rate": 1.3017182694028406e-05, "loss": 1.0305, "step": 15660 }, { "epoch": 7.03, "learning_rate": 1.3013634545650983e-05, "loss": 0.5599, "step": 15670 }, { "epoch": 7.03, "learning_rate": 1.3010083709995062e-05, "loss": 0.7644, "step": 15680 }, { "epoch": 7.03, "learning_rate": 1.3006530188791278e-05, "loss": 0.688, "step": 15690 }, { "epoch": 7.03, "learning_rate": 1.3002973983771572e-05, "loss": 0.8266, "step": 15700 }, { "epoch": 7.03, "learning_rate": 1.2999415096669201e-05, "loss": 0.9208, "step": 15710 }, { "epoch": 7.03, "learning_rate": 1.2995853529218718e-05, "loss": 0.6909, "step": 15720 }, { "epoch": 7.03, "learning_rate": 1.2992289283155988e-05, "loss": 0.9514, "step": 15730 }, { "epoch": 7.03, "learning_rate": 1.2988722360218184e-05, "loss": 0.5879, "step": 15740 }, { "epoch": 7.04, "learning_rate": 1.2985152762143779e-05, "loss": 0.7636, "step": 15750 }, { "epoch": 7.04, "learning_rate": 1.2981580490672554e-05, "loss": 0.6555, "step": 15760 }, { "epoch": 7.04, "learning_rate": 1.2978005547545586e-05, "loss": 0.4848, "step": 15770 }, { "epoch": 7.04, "learning_rate": 1.2974427934505262e-05, "loss": 0.6107, "step": 15780 }, { "epoch": 7.04, "learning_rate": 1.2970847653295267e-05, "loss": 1.2346, "step": 15790 }, { "epoch": 7.04, "learning_rate": 1.2967264705660587e-05, "loss": 0.6772, "step": 15800 }, { "epoch": 7.04, "learning_rate": 1.2963679093347507e-05, "loss": 0.7676, "step": 15810 }, { "epoch": 7.04, "learning_rate": 1.296009081810361e-05, "loss": 0.8187, "step": 15820 }, { "epoch": 7.04, "learning_rate": 1.2956499881677777e-05, "loss": 0.5784, "step": 15830 }, { "epoch": 7.04, "learning_rate": 1.295290628582019e-05, "loss": 0.5904, "step": 15840 }, { "epoch": 7.04, "learning_rate": 1.294931003228232e-05, "loss": 0.7243, "step": 15850 }, { "epoch": 7.04, "learning_rate": 1.2945711122816939e-05, "loss": 0.8472, "step": 15860 }, { "epoch": 7.04, "learning_rate": 1.2942109559178113e-05, "loss": 0.9001, "step": 15870 }, { "epoch": 7.04, "learning_rate": 1.2938505343121199e-05, "loss": 0.6855, "step": 15880 }, { "epoch": 7.04, "learning_rate": 1.2934898476402851e-05, "loss": 0.7127, "step": 15890 }, { "epoch": 7.04, "learning_rate": 1.2931288960781008e-05, "loss": 0.4177, "step": 15900 }, { "epoch": 7.04, "learning_rate": 1.2927676798014904e-05, "loss": 0.8813, "step": 15910 }, { "epoch": 7.04, "learning_rate": 1.2924061989865066e-05, "loss": 0.6631, "step": 15920 }, { "epoch": 7.04, "learning_rate": 1.2920444538093305e-05, "loss": 1.1157, "step": 15930 }, { "epoch": 7.04, "learning_rate": 1.2916824444462726e-05, "loss": 0.9802, "step": 15940 }, { "epoch": 7.04, "learning_rate": 1.2913201710737716e-05, "loss": 0.5958, "step": 15950 }, { "epoch": 7.04, "learning_rate": 1.2909576338683956e-05, "loss": 1.1829, "step": 15960 }, { "epoch": 7.04, "learning_rate": 1.2905948330068401e-05, "loss": 0.7153, "step": 15970 }, { "epoch": 7.04, "learning_rate": 1.2902317686659302e-05, "loss": 1.0184, "step": 15980 }, { "epoch": 7.04, "learning_rate": 1.2898684410226192e-05, "loss": 0.6815, "step": 15990 }, { "epoch": 7.04, "learning_rate": 1.2895048502539883e-05, "loss": 0.7102, "step": 16000 }, { "epoch": 7.04, "eval_accuracy": 0.8242105263157895, "eval_f1": 0.8242105263157895, "eval_loss": 0.7876191735267639, "eval_runtime": 761.4492, "eval_samples_per_second": 6.238, "eval_steps_per_second": 1.56, "step": 16000 }, { "epoch": 8.0, "learning_rate": 1.2891409965372476e-05, "loss": 0.903, "step": 16010 }, { "epoch": 8.0, "learning_rate": 1.2887768800497346e-05, "loss": 0.7573, "step": 16020 }, { "epoch": 8.0, "learning_rate": 1.2884125009689153e-05, "loss": 0.9222, "step": 16030 }, { "epoch": 8.0, "learning_rate": 1.2880478594723841e-05, "loss": 0.9862, "step": 16040 }, { "epoch": 8.0, "learning_rate": 1.287682955737862e-05, "loss": 0.7781, "step": 16050 }, { "epoch": 8.0, "learning_rate": 1.2873177899431994e-05, "loss": 0.7142, "step": 16060 }, { "epoch": 8.0, "learning_rate": 1.2869523622663734e-05, "loss": 0.9108, "step": 16070 }, { "epoch": 8.0, "learning_rate": 1.2865866728854889e-05, "loss": 0.572, "step": 16080 }, { "epoch": 8.0, "learning_rate": 1.2862207219787787e-05, "loss": 0.5146, "step": 16090 }, { "epoch": 8.0, "learning_rate": 1.2858545097246025e-05, "loss": 0.7091, "step": 16100 }, { "epoch": 8.0, "learning_rate": 1.2854880363014482e-05, "loss": 0.759, "step": 16110 }, { "epoch": 8.0, "learning_rate": 1.2851213018879298e-05, "loss": 0.5985, "step": 16120 }, { "epoch": 8.0, "learning_rate": 1.2847543066627897e-05, "loss": 0.8886, "step": 16130 }, { "epoch": 8.0, "learning_rate": 1.284387050804897e-05, "loss": 1.0709, "step": 16140 }, { "epoch": 8.0, "learning_rate": 1.2840195344932474e-05, "loss": 0.735, "step": 16150 }, { "epoch": 8.0, "learning_rate": 1.2836517579069642e-05, "loss": 0.8514, "step": 16160 }, { "epoch": 8.0, "learning_rate": 1.2832837212252966e-05, "loss": 0.6403, "step": 16170 }, { "epoch": 8.0, "learning_rate": 1.2829154246276222e-05, "loss": 0.6856, "step": 16180 }, { "epoch": 8.0, "learning_rate": 1.2825468682934436e-05, "loss": 0.7865, "step": 16190 }, { "epoch": 8.0, "learning_rate": 1.2821780524023907e-05, "loss": 0.6365, "step": 16200 }, { "epoch": 8.0, "learning_rate": 1.28180897713422e-05, "loss": 0.6259, "step": 16210 }, { "epoch": 8.0, "learning_rate": 1.2814396426688147e-05, "loss": 0.7151, "step": 16220 }, { "epoch": 8.0, "learning_rate": 1.2810700491861833e-05, "loss": 0.7079, "step": 16230 }, { "epoch": 8.0, "learning_rate": 1.2807001968664616e-05, "loss": 0.7594, "step": 16240 }, { "epoch": 8.01, "learning_rate": 1.2803300858899106e-05, "loss": 0.7652, "step": 16250 }, { "epoch": 8.01, "learning_rate": 1.2799597164369187e-05, "loss": 0.8973, "step": 16260 }, { "epoch": 8.01, "learning_rate": 1.2795890886879988e-05, "loss": 0.9693, "step": 16270 }, { "epoch": 8.01, "learning_rate": 1.2792182028237907e-05, "loss": 1.0535, "step": 16280 }, { "epoch": 8.01, "learning_rate": 1.2788470590250594e-05, "loss": 0.7048, "step": 16290 }, { "epoch": 8.01, "learning_rate": 1.278475657472696e-05, "loss": 0.6197, "step": 16300 }, { "epoch": 8.01, "learning_rate": 1.2781039983477172e-05, "loss": 0.6348, "step": 16310 }, { "epoch": 8.01, "learning_rate": 1.2777320818312652e-05, "loss": 0.8078, "step": 16320 }, { "epoch": 8.01, "learning_rate": 1.2773599081046076e-05, "loss": 1.0742, "step": 16330 }, { "epoch": 8.01, "learning_rate": 1.276987477349137e-05, "loss": 0.6463, "step": 16340 }, { "epoch": 8.01, "learning_rate": 1.2766147897463718e-05, "loss": 0.4302, "step": 16350 }, { "epoch": 8.01, "learning_rate": 1.2762418454779556e-05, "loss": 0.751, "step": 16360 }, { "epoch": 8.01, "learning_rate": 1.2758686447256568e-05, "loss": 0.9189, "step": 16370 }, { "epoch": 8.01, "learning_rate": 1.2754951876713688e-05, "loss": 0.8189, "step": 16380 }, { "epoch": 8.01, "learning_rate": 1.27512147449711e-05, "loss": 0.5518, "step": 16390 }, { "epoch": 8.01, "learning_rate": 1.2747475053850241e-05, "loss": 1.0893, "step": 16400 }, { "epoch": 8.01, "learning_rate": 1.2743732805173786e-05, "loss": 0.9392, "step": 16410 }, { "epoch": 8.01, "learning_rate": 1.2739988000765664e-05, "loss": 1.016, "step": 16420 }, { "epoch": 8.01, "learning_rate": 1.2736240642451046e-05, "loss": 0.3563, "step": 16430 }, { "epoch": 8.01, "learning_rate": 1.2732490732056348e-05, "loss": 0.9955, "step": 16440 }, { "epoch": 8.01, "learning_rate": 1.2728738271409236e-05, "loss": 0.5461, "step": 16450 }, { "epoch": 8.01, "learning_rate": 1.2724983262338605e-05, "loss": 0.5001, "step": 16460 }, { "epoch": 8.01, "learning_rate": 1.2721225706674609e-05, "loss": 0.6405, "step": 16470 }, { "epoch": 8.01, "learning_rate": 1.2717465606248632e-05, "loss": 0.7436, "step": 16480 }, { "epoch": 8.01, "learning_rate": 1.2713702962893299e-05, "loss": 0.8613, "step": 16490 }, { "epoch": 8.01, "learning_rate": 1.270993777844248e-05, "loss": 0.929, "step": 16500 }, { "epoch": 8.01, "learning_rate": 1.270617005473128e-05, "loss": 0.504, "step": 16510 }, { "epoch": 8.01, "learning_rate": 1.2702399793596037e-05, "loss": 0.4618, "step": 16520 }, { "epoch": 8.01, "learning_rate": 1.2698626996874338e-05, "loss": 0.6505, "step": 16530 }, { "epoch": 8.01, "learning_rate": 1.2694851666404991e-05, "loss": 0.8792, "step": 16540 }, { "epoch": 8.01, "learning_rate": 1.2691073804028052e-05, "loss": 0.7674, "step": 16550 }, { "epoch": 8.01, "learning_rate": 1.26872934115848e-05, "loss": 1.042, "step": 16560 }, { "epoch": 8.01, "learning_rate": 1.2683510490917759e-05, "loss": 0.9164, "step": 16570 }, { "epoch": 8.01, "learning_rate": 1.2679725043870672e-05, "loss": 0.6523, "step": 16580 }, { "epoch": 8.01, "learning_rate": 1.2675937072288526e-05, "loss": 0.5904, "step": 16590 }, { "epoch": 8.01, "learning_rate": 1.2672146578017524e-05, "loss": 0.8063, "step": 16600 }, { "epoch": 8.01, "learning_rate": 1.2668353562905114e-05, "loss": 0.4042, "step": 16610 }, { "epoch": 8.01, "learning_rate": 1.2664558028799963e-05, "loss": 0.6684, "step": 16620 }, { "epoch": 8.01, "learning_rate": 1.2660759977551972e-05, "loss": 0.6434, "step": 16630 }, { "epoch": 8.01, "learning_rate": 1.2656959411012255e-05, "loss": 0.9733, "step": 16640 }, { "epoch": 8.01, "learning_rate": 1.2653156331033174e-05, "loss": 0.5205, "step": 16650 }, { "epoch": 8.01, "learning_rate": 1.2649350739468295e-05, "loss": 0.4417, "step": 16660 }, { "epoch": 8.01, "learning_rate": 1.2645542638172423e-05, "loss": 0.8051, "step": 16670 }, { "epoch": 8.01, "learning_rate": 1.2641732029001577e-05, "loss": 0.7189, "step": 16680 }, { "epoch": 8.01, "learning_rate": 1.2637918913813002e-05, "loss": 0.7512, "step": 16690 }, { "epoch": 8.01, "learning_rate": 1.2634103294465166e-05, "loss": 1.0825, "step": 16700 }, { "epoch": 8.01, "learning_rate": 1.2630285172817751e-05, "loss": 1.1748, "step": 16710 }, { "epoch": 8.01, "learning_rate": 1.2626464550731671e-05, "loss": 1.0208, "step": 16720 }, { "epoch": 8.01, "learning_rate": 1.2622641430069041e-05, "loss": 0.5634, "step": 16730 }, { "epoch": 8.01, "learning_rate": 1.2618815812693213e-05, "loss": 0.4922, "step": 16740 }, { "epoch": 8.02, "learning_rate": 1.2614987700468739e-05, "loss": 0.8235, "step": 16750 }, { "epoch": 8.02, "learning_rate": 1.2611157095261399e-05, "loss": 0.6315, "step": 16760 }, { "epoch": 8.02, "learning_rate": 1.2607323998938181e-05, "loss": 0.8166, "step": 16770 }, { "epoch": 8.02, "learning_rate": 1.260348841336729e-05, "loss": 0.7631, "step": 16780 }, { "epoch": 8.02, "learning_rate": 1.2599650340418144e-05, "loss": 0.7756, "step": 16790 }, { "epoch": 8.02, "learning_rate": 1.2595809781961374e-05, "loss": 0.7076, "step": 16800 }, { "epoch": 8.02, "learning_rate": 1.2591966739868822e-05, "loss": 0.7914, "step": 16810 }, { "epoch": 8.02, "learning_rate": 1.2588121216013537e-05, "loss": 0.6832, "step": 16820 }, { "epoch": 8.02, "learning_rate": 1.2584273212269782e-05, "loss": 0.547, "step": 16830 }, { "epoch": 8.02, "learning_rate": 1.258042273051303e-05, "loss": 0.9669, "step": 16840 }, { "epoch": 8.02, "learning_rate": 1.2576569772619955e-05, "loss": 0.6243, "step": 16850 }, { "epoch": 8.02, "learning_rate": 1.2572714340468445e-05, "loss": 0.6971, "step": 16860 }, { "epoch": 8.02, "learning_rate": 1.256885643593759e-05, "loss": 0.5598, "step": 16870 }, { "epoch": 8.02, "learning_rate": 1.2564996060907683e-05, "loss": 0.604, "step": 16880 }, { "epoch": 8.02, "learning_rate": 1.2561133217260227e-05, "loss": 0.7139, "step": 16890 }, { "epoch": 8.02, "learning_rate": 1.2557267906877925e-05, "loss": 0.4249, "step": 16900 }, { "epoch": 8.02, "learning_rate": 1.255340013164468e-05, "loss": 0.4992, "step": 16910 }, { "epoch": 8.02, "learning_rate": 1.2549529893445603e-05, "loss": 1.0948, "step": 16920 }, { "epoch": 8.02, "learning_rate": 1.2545657194166997e-05, "loss": 0.6932, "step": 16930 }, { "epoch": 8.02, "learning_rate": 1.2541782035696372e-05, "loss": 0.5878, "step": 16940 }, { "epoch": 8.02, "learning_rate": 1.2537904419922428e-05, "loss": 0.6641, "step": 16950 }, { "epoch": 8.02, "learning_rate": 1.253402434873507e-05, "loss": 0.8987, "step": 16960 }, { "epoch": 8.02, "learning_rate": 1.2530141824025399e-05, "loss": 0.5498, "step": 16970 }, { "epoch": 8.02, "learning_rate": 1.2526256847685713e-05, "loss": 0.7692, "step": 16980 }, { "epoch": 8.02, "learning_rate": 1.2522369421609497e-05, "loss": 0.7997, "step": 16990 }, { "epoch": 8.02, "learning_rate": 1.2518479547691437e-05, "loss": 0.5458, "step": 17000 }, { "epoch": 8.02, "learning_rate": 1.2514587227827412e-05, "loss": 0.8942, "step": 17010 }, { "epoch": 8.02, "learning_rate": 1.2510692463914487e-05, "loss": 0.461, "step": 17020 }, { "epoch": 8.02, "learning_rate": 1.250679525785093e-05, "loss": 0.9453, "step": 17030 }, { "epoch": 8.02, "learning_rate": 1.2502895611536185e-05, "loss": 1.1806, "step": 17040 }, { "epoch": 8.02, "learning_rate": 1.2498993526870893e-05, "loss": 0.8235, "step": 17050 }, { "epoch": 8.02, "learning_rate": 1.2495089005756888e-05, "loss": 0.3497, "step": 17060 }, { "epoch": 8.02, "learning_rate": 1.249118205009718e-05, "loss": 0.9592, "step": 17070 }, { "epoch": 8.02, "learning_rate": 1.2487272661795976e-05, "loss": 0.5981, "step": 17080 }, { "epoch": 8.02, "learning_rate": 1.2483360842758662e-05, "loss": 0.7031, "step": 17090 }, { "epoch": 8.02, "learning_rate": 1.2479446594891812e-05, "loss": 0.8887, "step": 17100 }, { "epoch": 8.02, "learning_rate": 1.2475529920103183e-05, "loss": 0.5698, "step": 17110 }, { "epoch": 8.02, "learning_rate": 1.2471610820301714e-05, "loss": 0.9545, "step": 17120 }, { "epoch": 8.02, "learning_rate": 1.2467689297397526e-05, "loss": 0.5898, "step": 17130 }, { "epoch": 8.02, "learning_rate": 1.2463765353301925e-05, "loss": 0.4993, "step": 17140 }, { "epoch": 8.02, "learning_rate": 1.245983898992739e-05, "loss": 0.4658, "step": 17150 }, { "epoch": 8.02, "learning_rate": 1.2455910209187584e-05, "loss": 0.7944, "step": 17160 }, { "epoch": 8.02, "learning_rate": 1.2451979012997347e-05, "loss": 0.8215, "step": 17170 }, { "epoch": 8.02, "learning_rate": 1.2448045403272696e-05, "loss": 0.5273, "step": 17180 }, { "epoch": 8.02, "learning_rate": 1.2444109381930825e-05, "loss": 0.6735, "step": 17190 }, { "epoch": 8.02, "learning_rate": 1.2440170950890103e-05, "loss": 0.8846, "step": 17200 }, { "epoch": 8.02, "learning_rate": 1.2436230112070075e-05, "loss": 0.5238, "step": 17210 }, { "epoch": 8.02, "learning_rate": 1.2432286867391455e-05, "loss": 0.9863, "step": 17220 }, { "epoch": 8.02, "learning_rate": 1.2428341218776135e-05, "loss": 0.8749, "step": 17230 }, { "epoch": 8.02, "learning_rate": 1.2424393168147173e-05, "loss": 0.5667, "step": 17240 }, { "epoch": 8.03, "learning_rate": 1.2420442717428804e-05, "loss": 0.877, "step": 17250 }, { "epoch": 8.03, "learning_rate": 1.241648986854643e-05, "loss": 0.7394, "step": 17260 }, { "epoch": 8.03, "learning_rate": 1.241253462342662e-05, "loss": 0.6451, "step": 17270 }, { "epoch": 8.03, "learning_rate": 1.2408576983997112e-05, "loss": 0.8236, "step": 17280 }, { "epoch": 8.03, "learning_rate": 1.2404616952186811e-05, "loss": 0.8152, "step": 17290 }, { "epoch": 8.03, "learning_rate": 1.2400654529925793e-05, "loss": 0.8507, "step": 17300 }, { "epoch": 8.03, "learning_rate": 1.2396689719145288e-05, "loss": 0.7373, "step": 17310 }, { "epoch": 8.03, "learning_rate": 1.2392722521777699e-05, "loss": 0.4609, "step": 17320 }, { "epoch": 8.03, "learning_rate": 1.2388752939756591e-05, "loss": 0.7621, "step": 17330 }, { "epoch": 8.03, "learning_rate": 1.238478097501669e-05, "loss": 0.7369, "step": 17340 }, { "epoch": 8.03, "learning_rate": 1.2380806629493882e-05, "loss": 0.6674, "step": 17350 }, { "epoch": 8.03, "learning_rate": 1.2376829905125215e-05, "loss": 0.7949, "step": 17360 }, { "epoch": 8.03, "learning_rate": 1.23728508038489e-05, "loss": 0.9449, "step": 17370 }, { "epoch": 8.03, "learning_rate": 1.2368869327604297e-05, "loss": 0.7967, "step": 17380 }, { "epoch": 8.03, "learning_rate": 1.2364885478331933e-05, "loss": 0.9675, "step": 17390 }, { "epoch": 8.03, "learning_rate": 1.2360899257973482e-05, "loss": 1.2767, "step": 17400 }, { "epoch": 8.03, "learning_rate": 1.2356910668471788e-05, "loss": 0.816, "step": 17410 }, { "epoch": 8.03, "learning_rate": 1.2352919711770834e-05, "loss": 0.8011, "step": 17420 }, { "epoch": 8.03, "learning_rate": 1.2348926389815766e-05, "loss": 0.5407, "step": 17430 }, { "epoch": 8.03, "learning_rate": 1.2344930704552883e-05, "loss": 0.5253, "step": 17440 }, { "epoch": 8.03, "learning_rate": 1.234093265792963e-05, "loss": 0.6741, "step": 17450 }, { "epoch": 8.03, "learning_rate": 1.2336932251894608e-05, "loss": 0.8949, "step": 17460 }, { "epoch": 8.03, "learning_rate": 1.2332929488397567e-05, "loss": 1.0454, "step": 17470 }, { "epoch": 8.03, "learning_rate": 1.2328924369389404e-05, "loss": 0.4574, "step": 17480 }, { "epoch": 8.03, "learning_rate": 1.2324916896822166e-05, "loss": 0.9879, "step": 17490 }, { "epoch": 8.03, "learning_rate": 1.2320907072649045e-05, "loss": 0.9544, "step": 17500 }, { "epoch": 8.03, "learning_rate": 1.2316894898824383e-05, "loss": 1.1716, "step": 17510 }, { "epoch": 8.03, "learning_rate": 1.2312880377303664e-05, "loss": 0.9311, "step": 17520 }, { "epoch": 8.03, "learning_rate": 1.2308863510043517e-05, "loss": 0.5337, "step": 17530 }, { "epoch": 8.03, "learning_rate": 1.2304844299001712e-05, "loss": 0.8078, "step": 17540 }, { "epoch": 8.03, "learning_rate": 1.2300822746137167e-05, "loss": 0.8005, "step": 17550 }, { "epoch": 8.03, "learning_rate": 1.2296798853409937e-05, "loss": 0.8335, "step": 17560 }, { "epoch": 8.03, "learning_rate": 1.2292772622781215e-05, "loss": 0.6197, "step": 17570 }, { "epoch": 8.03, "learning_rate": 1.2288744056213343e-05, "loss": 0.7248, "step": 17580 }, { "epoch": 8.03, "learning_rate": 1.228471315566979e-05, "loss": 0.5651, "step": 17590 }, { "epoch": 8.03, "learning_rate": 1.2280679923115173e-05, "loss": 0.7526, "step": 17600 }, { "epoch": 8.03, "learning_rate": 1.2276644360515238e-05, "loss": 0.3586, "step": 17610 }, { "epoch": 8.03, "learning_rate": 1.2272606469836868e-05, "loss": 0.8768, "step": 17620 }, { "epoch": 8.03, "learning_rate": 1.2268566253048081e-05, "loss": 0.8052, "step": 17630 }, { "epoch": 8.03, "learning_rate": 1.2264523712118033e-05, "loss": 0.748, "step": 17640 }, { "epoch": 8.03, "learning_rate": 1.2260478849017007e-05, "loss": 0.7358, "step": 17650 }, { "epoch": 8.03, "learning_rate": 1.225643166571642e-05, "loss": 0.3771, "step": 17660 }, { "epoch": 8.03, "learning_rate": 1.2252382164188825e-05, "loss": 0.6613, "step": 17670 }, { "epoch": 8.03, "learning_rate": 1.2248330346407893e-05, "loss": 1.1527, "step": 17680 }, { "epoch": 8.03, "learning_rate": 1.2244276214348435e-05, "loss": 0.5922, "step": 17690 }, { "epoch": 8.03, "learning_rate": 1.2240219769986382e-05, "loss": 0.6686, "step": 17700 }, { "epoch": 8.03, "learning_rate": 1.2236161015298799e-05, "loss": 0.6159, "step": 17710 }, { "epoch": 8.03, "learning_rate": 1.2232099952263872e-05, "loss": 0.9876, "step": 17720 }, { "epoch": 8.03, "learning_rate": 1.2228036582860917e-05, "loss": 0.6581, "step": 17730 }, { "epoch": 8.03, "learning_rate": 1.2223970909070367e-05, "loss": 0.613, "step": 17740 }, { "epoch": 8.04, "learning_rate": 1.2219902932873782e-05, "loss": 0.5538, "step": 17750 }, { "epoch": 8.04, "learning_rate": 1.2215832656253846e-05, "loss": 0.7896, "step": 17760 }, { "epoch": 8.04, "learning_rate": 1.2211760081194362e-05, "loss": 0.9046, "step": 17770 }, { "epoch": 8.04, "learning_rate": 1.2207685209680254e-05, "loss": 0.8718, "step": 17780 }, { "epoch": 8.04, "learning_rate": 1.2203608043697564e-05, "loss": 0.6827, "step": 17790 }, { "epoch": 8.04, "learning_rate": 1.2199528585233455e-05, "loss": 0.5505, "step": 17800 }, { "epoch": 8.04, "learning_rate": 1.2195446836276202e-05, "loss": 0.5683, "step": 17810 }, { "epoch": 8.04, "learning_rate": 1.2191362798815203e-05, "loss": 0.946, "step": 17820 }, { "epoch": 8.04, "learning_rate": 1.2187276474840968e-05, "loss": 0.581, "step": 17830 }, { "epoch": 8.04, "learning_rate": 1.2183187866345117e-05, "loss": 0.9227, "step": 17840 }, { "epoch": 8.04, "learning_rate": 1.2179096975320398e-05, "loss": 0.5363, "step": 17850 }, { "epoch": 8.04, "learning_rate": 1.217500380376065e-05, "loss": 0.6947, "step": 17860 }, { "epoch": 8.04, "learning_rate": 1.2170908353660844e-05, "loss": 0.8875, "step": 17870 }, { "epoch": 8.04, "learning_rate": 1.216681062701705e-05, "loss": 0.4628, "step": 17880 }, { "epoch": 8.04, "learning_rate": 1.2162710625826446e-05, "loss": 0.8057, "step": 17890 }, { "epoch": 8.04, "learning_rate": 1.2158608352087328e-05, "loss": 0.9372, "step": 17900 }, { "epoch": 8.04, "learning_rate": 1.2154503807799093e-05, "loss": 0.649, "step": 17910 }, { "epoch": 8.04, "learning_rate": 1.2150396994962244e-05, "loss": 0.835, "step": 17920 }, { "epoch": 8.04, "learning_rate": 1.2146287915578394e-05, "loss": 0.4606, "step": 17930 }, { "epoch": 8.04, "learning_rate": 1.2142176571650259e-05, "loss": 0.7991, "step": 17940 }, { "epoch": 8.04, "learning_rate": 1.2138062965181656e-05, "loss": 0.7018, "step": 17950 }, { "epoch": 8.04, "learning_rate": 1.2133947098177508e-05, "loss": 1.0809, "step": 17960 }, { "epoch": 8.04, "learning_rate": 1.2129828972643837e-05, "loss": 0.4973, "step": 17970 }, { "epoch": 8.04, "learning_rate": 1.212570859058777e-05, "loss": 0.6203, "step": 17980 }, { "epoch": 8.04, "learning_rate": 1.2121585954017528e-05, "loss": 0.5908, "step": 17990 }, { "epoch": 8.04, "learning_rate": 1.2117461064942437e-05, "loss": 0.5726, "step": 18000 }, { "epoch": 8.04, "eval_accuracy": 0.8231578947368421, "eval_f1": 0.8231578947368422, "eval_loss": 0.8805130124092102, "eval_runtime": 767.3617, "eval_samples_per_second": 6.19, "eval_steps_per_second": 1.548, "step": 18000 }, { "epoch": 9.0, "learning_rate": 1.2113333925372919e-05, "loss": 1.2099, "step": 18010 }, { "epoch": 9.0, "learning_rate": 1.2109204537320483e-05, "loss": 0.937, "step": 18020 }, { "epoch": 9.0, "learning_rate": 1.2105072902797753e-05, "loss": 0.7613, "step": 18030 }, { "epoch": 9.0, "learning_rate": 1.2100939023818432e-05, "loss": 0.8872, "step": 18040 }, { "epoch": 9.0, "learning_rate": 1.2096802902397324e-05, "loss": 0.7832, "step": 18050 }, { "epoch": 9.0, "learning_rate": 1.2092664540550323e-05, "loss": 0.8112, "step": 18060 }, { "epoch": 9.0, "learning_rate": 1.2088523940294418e-05, "loss": 0.6844, "step": 18070 }, { "epoch": 9.0, "learning_rate": 1.2084381103647688e-05, "loss": 0.8863, "step": 18080 }, { "epoch": 9.0, "learning_rate": 1.2080236032629298e-05, "loss": 0.6259, "step": 18090 }, { "epoch": 9.0, "learning_rate": 1.207608872925951e-05, "loss": 0.5141, "step": 18100 }, { "epoch": 9.0, "learning_rate": 1.207193919555966e-05, "loss": 0.7785, "step": 18110 }, { "epoch": 9.0, "learning_rate": 1.2067787433552192e-05, "loss": 0.8503, "step": 18120 }, { "epoch": 9.0, "learning_rate": 1.2063633445260615e-05, "loss": 0.7436, "step": 18130 }, { "epoch": 9.0, "learning_rate": 1.2059477232709542e-05, "loss": 0.8031, "step": 18140 }, { "epoch": 9.0, "learning_rate": 1.205531879792465e-05, "loss": 1.2157, "step": 18150 }, { "epoch": 9.0, "learning_rate": 1.2051158142932718e-05, "loss": 0.8124, "step": 18160 }, { "epoch": 9.0, "learning_rate": 1.204699526976159e-05, "loss": 0.7364, "step": 18170 }, { "epoch": 9.0, "learning_rate": 1.2042830180440211e-05, "loss": 0.7025, "step": 18180 }, { "epoch": 9.0, "learning_rate": 1.2038662876998586e-05, "loss": 0.398, "step": 18190 }, { "epoch": 9.0, "learning_rate": 1.2034493361467813e-05, "loss": 0.9601, "step": 18200 }, { "epoch": 9.0, "learning_rate": 1.203032163588006e-05, "loss": 0.4517, "step": 18210 }, { "epoch": 9.0, "learning_rate": 1.2026147702268574e-05, "loss": 0.6078, "step": 18220 }, { "epoch": 9.0, "learning_rate": 1.2021971562667687e-05, "loss": 0.6127, "step": 18230 }, { "epoch": 9.0, "learning_rate": 1.201779321911279e-05, "loss": 0.9812, "step": 18240 }, { "epoch": 9.01, "learning_rate": 1.2013612673640364e-05, "loss": 0.7009, "step": 18250 }, { "epoch": 9.01, "learning_rate": 1.200942992828795e-05, "loss": 0.5552, "step": 18260 }, { "epoch": 9.01, "learning_rate": 1.2005244985094171e-05, "loss": 0.7145, "step": 18270 }, { "epoch": 9.01, "learning_rate": 1.2001057846098717e-05, "loss": 0.4111, "step": 18280 }, { "epoch": 9.01, "learning_rate": 1.1996868513342349e-05, "loss": 0.5976, "step": 18290 }, { "epoch": 9.01, "learning_rate": 1.1992676988866894e-05, "loss": 0.7484, "step": 18300 }, { "epoch": 9.01, "learning_rate": 1.1988483274715256e-05, "loss": 0.9863, "step": 18310 }, { "epoch": 9.01, "learning_rate": 1.1984287372931392e-05, "loss": 0.3628, "step": 18320 }, { "epoch": 9.01, "learning_rate": 1.1980089285560342e-05, "loss": 0.4903, "step": 18330 }, { "epoch": 9.01, "learning_rate": 1.1975889014648195e-05, "loss": 0.8426, "step": 18340 }, { "epoch": 9.01, "learning_rate": 1.197168656224212e-05, "loss": 0.7967, "step": 18350 }, { "epoch": 9.01, "learning_rate": 1.1967481930390335e-05, "loss": 0.4083, "step": 18360 }, { "epoch": 9.01, "learning_rate": 1.196327512114213e-05, "loss": 0.6116, "step": 18370 }, { "epoch": 9.01, "learning_rate": 1.1959066136547851e-05, "loss": 0.9358, "step": 18380 }, { "epoch": 9.01, "learning_rate": 1.1954854978658903e-05, "loss": 0.6193, "step": 18390 }, { "epoch": 9.01, "learning_rate": 1.1950641649527762e-05, "loss": 0.677, "step": 18400 }, { "epoch": 9.01, "learning_rate": 1.1946426151207945e-05, "loss": 0.6106, "step": 18410 }, { "epoch": 9.01, "learning_rate": 1.1942208485754038e-05, "loss": 0.6699, "step": 18420 }, { "epoch": 9.01, "learning_rate": 1.193798865522168e-05, "loss": 0.5351, "step": 18430 }, { "epoch": 9.01, "learning_rate": 1.1933766661667565e-05, "loss": 0.836, "step": 18440 }, { "epoch": 9.01, "learning_rate": 1.192954250714944e-05, "loss": 0.7118, "step": 18450 }, { "epoch": 9.01, "learning_rate": 1.192531619372611e-05, "loss": 1.0574, "step": 18460 }, { "epoch": 9.01, "learning_rate": 1.1921087723457425e-05, "loss": 0.7786, "step": 18470 }, { "epoch": 9.01, "learning_rate": 1.191685709840429e-05, "loss": 0.9358, "step": 18480 }, { "epoch": 9.01, "learning_rate": 1.1912624320628666e-05, "loss": 0.8795, "step": 18490 }, { "epoch": 9.01, "learning_rate": 1.1908389392193549e-05, "loss": 0.7275, "step": 18500 }, { "epoch": 9.01, "learning_rate": 1.1904152315162996e-05, "loss": 0.487, "step": 18510 }, { "epoch": 9.01, "learning_rate": 1.189991309160211e-05, "loss": 0.6892, "step": 18520 }, { "epoch": 9.01, "learning_rate": 1.1895671723577032e-05, "loss": 0.8501, "step": 18530 }, { "epoch": 9.01, "learning_rate": 1.1891428213154956e-05, "loss": 1.0048, "step": 18540 }, { "epoch": 9.01, "learning_rate": 1.1887182562404118e-05, "loss": 0.579, "step": 18550 }, { "epoch": 9.01, "learning_rate": 1.188293477339379e-05, "loss": 1.0681, "step": 18560 }, { "epoch": 9.01, "learning_rate": 1.1878684848194302e-05, "loss": 0.9328, "step": 18570 }, { "epoch": 9.01, "learning_rate": 1.187443278887701e-05, "loss": 0.8938, "step": 18580 }, { "epoch": 9.01, "learning_rate": 1.187017859751432e-05, "loss": 0.7139, "step": 18590 }, { "epoch": 9.01, "learning_rate": 1.1865922276179671e-05, "loss": 0.7831, "step": 18600 }, { "epoch": 9.01, "learning_rate": 1.186166382694754e-05, "loss": 0.6213, "step": 18610 }, { "epoch": 9.01, "learning_rate": 1.185740325189345e-05, "loss": 0.901, "step": 18620 }, { "epoch": 9.01, "learning_rate": 1.1853140553093945e-05, "loss": 0.5026, "step": 18630 }, { "epoch": 9.01, "learning_rate": 1.1848875732626619e-05, "loss": 0.5141, "step": 18640 }, { "epoch": 9.01, "learning_rate": 1.1844608792570091e-05, "loss": 0.7118, "step": 18650 }, { "epoch": 9.01, "learning_rate": 1.1840339735004018e-05, "loss": 1.1035, "step": 18660 }, { "epoch": 9.01, "learning_rate": 1.1836068562009084e-05, "loss": 0.4183, "step": 18670 }, { "epoch": 9.01, "learning_rate": 1.1831795275667007e-05, "loss": 0.6543, "step": 18680 }, { "epoch": 9.01, "learning_rate": 1.1827519878060537e-05, "loss": 0.5805, "step": 18690 }, { "epoch": 9.01, "learning_rate": 1.182324237127345e-05, "loss": 0.3918, "step": 18700 }, { "epoch": 9.01, "learning_rate": 1.1818962757390552e-05, "loss": 1.0159, "step": 18710 }, { "epoch": 9.01, "learning_rate": 1.1814681038497671e-05, "loss": 0.5038, "step": 18720 }, { "epoch": 9.01, "learning_rate": 1.1810397216681665e-05, "loss": 0.5818, "step": 18730 }, { "epoch": 9.01, "learning_rate": 1.1806111294030424e-05, "loss": 0.8919, "step": 18740 }, { "epoch": 9.02, "learning_rate": 1.1801823272632845e-05, "loss": 0.8671, "step": 18750 }, { "epoch": 9.02, "learning_rate": 1.1797533154578866e-05, "loss": 0.858, "step": 18760 }, { "epoch": 9.02, "learning_rate": 1.1793240941959434e-05, "loss": 0.5649, "step": 18770 }, { "epoch": 9.02, "learning_rate": 1.1788946636866518e-05, "loss": 0.7596, "step": 18780 }, { "epoch": 9.02, "learning_rate": 1.1784650241393117e-05, "loss": 0.8354, "step": 18790 }, { "epoch": 9.02, "learning_rate": 1.178035175763324e-05, "loss": 0.5007, "step": 18800 }, { "epoch": 9.02, "learning_rate": 1.1776051187681911e-05, "loss": 0.9104, "step": 18810 }, { "epoch": 9.02, "learning_rate": 1.177174853363518e-05, "loss": 0.639, "step": 18820 }, { "epoch": 9.02, "learning_rate": 1.176744379759011e-05, "loss": 0.6799, "step": 18830 }, { "epoch": 9.02, "learning_rate": 1.1763136981644773e-05, "loss": 0.9681, "step": 18840 }, { "epoch": 9.02, "learning_rate": 1.175882808789826e-05, "loss": 1.0896, "step": 18850 }, { "epoch": 9.02, "learning_rate": 1.1754517118450675e-05, "loss": 0.5733, "step": 18860 }, { "epoch": 9.02, "learning_rate": 1.1750204075403128e-05, "loss": 0.6242, "step": 18870 }, { "epoch": 9.02, "learning_rate": 1.1745888960857749e-05, "loss": 0.5115, "step": 18880 }, { "epoch": 9.02, "learning_rate": 1.1741571776917673e-05, "loss": 0.5443, "step": 18890 }, { "epoch": 9.02, "learning_rate": 1.1737252525687035e-05, "loss": 0.6259, "step": 18900 }, { "epoch": 9.02, "learning_rate": 1.1732931209270995e-05, "loss": 0.7084, "step": 18910 }, { "epoch": 9.02, "learning_rate": 1.17286078297757e-05, "loss": 0.8891, "step": 18920 }, { "epoch": 9.02, "learning_rate": 1.1724282389308324e-05, "loss": 0.8908, "step": 18930 }, { "epoch": 9.02, "learning_rate": 1.1719954889977027e-05, "loss": 0.8218, "step": 18940 }, { "epoch": 9.02, "learning_rate": 1.1715625333890979e-05, "loss": 0.7136, "step": 18950 }, { "epoch": 9.02, "learning_rate": 1.1711293723160359e-05, "loss": 0.587, "step": 18960 }, { "epoch": 9.02, "learning_rate": 1.1706960059896336e-05, "loss": 0.6956, "step": 18970 }, { "epoch": 9.02, "learning_rate": 1.1702624346211084e-05, "loss": 0.5694, "step": 18980 }, { "epoch": 9.02, "learning_rate": 1.1698286584217785e-05, "loss": 0.5655, "step": 18990 }, { "epoch": 9.02, "learning_rate": 1.1693946776030601e-05, "loss": 0.5769, "step": 19000 }, { "epoch": 9.02, "learning_rate": 1.168960492376471e-05, "loss": 0.7568, "step": 19010 }, { "epoch": 9.02, "learning_rate": 1.1685261029536276e-05, "loss": 0.6725, "step": 19020 }, { "epoch": 9.02, "learning_rate": 1.1680915095462456e-05, "loss": 0.6308, "step": 19030 }, { "epoch": 9.02, "learning_rate": 1.167656712366141e-05, "loss": 0.5472, "step": 19040 }, { "epoch": 9.02, "learning_rate": 1.1672217116252287e-05, "loss": 0.6917, "step": 19050 }, { "epoch": 9.02, "learning_rate": 1.1667865075355224e-05, "loss": 0.8717, "step": 19060 }, { "epoch": 9.02, "learning_rate": 1.1663511003091356e-05, "loss": 0.8956, "step": 19070 }, { "epoch": 9.02, "learning_rate": 1.1659154901582805e-05, "loss": 0.3889, "step": 19080 }, { "epoch": 9.02, "learning_rate": 1.165479677295268e-05, "loss": 0.494, "step": 19090 }, { "epoch": 9.02, "learning_rate": 1.1650436619325081e-05, "loss": 0.3606, "step": 19100 }, { "epoch": 9.02, "learning_rate": 1.1646074442825094e-05, "loss": 0.6802, "step": 19110 }, { "epoch": 9.02, "learning_rate": 1.164171024557879e-05, "loss": 0.6772, "step": 19120 }, { "epoch": 9.02, "learning_rate": 1.1637344029713228e-05, "loss": 0.9367, "step": 19130 }, { "epoch": 9.02, "learning_rate": 1.1632975797356445e-05, "loss": 0.5726, "step": 19140 }, { "epoch": 9.02, "learning_rate": 1.1628605550637467e-05, "loss": 0.5994, "step": 19150 }, { "epoch": 9.02, "learning_rate": 1.16242332916863e-05, "loss": 0.6001, "step": 19160 }, { "epoch": 9.02, "learning_rate": 1.1619859022633925e-05, "loss": 0.7313, "step": 19170 }, { "epoch": 9.02, "learning_rate": 1.1615482745612315e-05, "loss": 0.5976, "step": 19180 }, { "epoch": 9.02, "learning_rate": 1.1611104462754406e-05, "loss": 0.6038, "step": 19190 }, { "epoch": 9.02, "learning_rate": 1.1606724176194128e-05, "loss": 0.7814, "step": 19200 }, { "epoch": 9.02, "learning_rate": 1.1602341888066372e-05, "loss": 0.6553, "step": 19210 }, { "epoch": 9.02, "learning_rate": 1.1597957600507019e-05, "loss": 0.7844, "step": 19220 }, { "epoch": 9.02, "learning_rate": 1.1593571315652912e-05, "loss": 0.4785, "step": 19230 }, { "epoch": 9.02, "learning_rate": 1.1589183035641877e-05, "loss": 0.7918, "step": 19240 }, { "epoch": 9.03, "learning_rate": 1.1584792762612705e-05, "loss": 1.0776, "step": 19250 }, { "epoch": 9.03, "learning_rate": 1.1580400498705161e-05, "loss": 1.1064, "step": 19260 }, { "epoch": 9.03, "learning_rate": 1.1576006246059987e-05, "loss": 0.9584, "step": 19270 }, { "epoch": 9.03, "learning_rate": 1.1571610006818883e-05, "loss": 0.983, "step": 19280 }, { "epoch": 9.03, "learning_rate": 1.1567211783124523e-05, "loss": 0.5213, "step": 19290 }, { "epoch": 9.03, "learning_rate": 1.156281157712055e-05, "loss": 0.8329, "step": 19300 }, { "epoch": 9.03, "learning_rate": 1.155840939095157e-05, "loss": 0.9438, "step": 19310 }, { "epoch": 9.03, "learning_rate": 1.1554005226763153e-05, "loss": 0.5701, "step": 19320 }, { "epoch": 9.03, "learning_rate": 1.1549599086701841e-05, "loss": 0.698, "step": 19330 }, { "epoch": 9.03, "learning_rate": 1.1545190972915127e-05, "loss": 0.5665, "step": 19340 }, { "epoch": 9.03, "learning_rate": 1.1540780887551473e-05, "loss": 0.7165, "step": 19350 }, { "epoch": 9.03, "learning_rate": 1.1536368832760304e-05, "loss": 0.694, "step": 19360 }, { "epoch": 9.03, "learning_rate": 1.1531954810692e-05, "loss": 0.6035, "step": 19370 }, { "epoch": 9.03, "learning_rate": 1.1527538823497903e-05, "loss": 0.7644, "step": 19380 }, { "epoch": 9.03, "learning_rate": 1.1523120873330308e-05, "loss": 0.8886, "step": 19390 }, { "epoch": 9.03, "learning_rate": 1.1518700962342475e-05, "loss": 0.7071, "step": 19400 }, { "epoch": 9.03, "learning_rate": 1.1514279092688612e-05, "loss": 0.8012, "step": 19410 }, { "epoch": 9.03, "learning_rate": 1.1509855266523884e-05, "loss": 0.9929, "step": 19420 }, { "epoch": 9.03, "learning_rate": 1.1505429486004414e-05, "loss": 0.7935, "step": 19430 }, { "epoch": 9.03, "learning_rate": 1.150100175328727e-05, "loss": 0.6962, "step": 19440 }, { "epoch": 9.03, "learning_rate": 1.1496572070530475e-05, "loss": 0.8044, "step": 19450 }, { "epoch": 9.03, "learning_rate": 1.1492140439893006e-05, "loss": 0.7601, "step": 19460 }, { "epoch": 9.03, "learning_rate": 1.148770686353478e-05, "loss": 0.9783, "step": 19470 }, { "epoch": 9.03, "learning_rate": 1.1483271343616675e-05, "loss": 0.5717, "step": 19480 }, { "epoch": 9.03, "learning_rate": 1.1478833882300505e-05, "loss": 0.4403, "step": 19490 }, { "epoch": 9.03, "learning_rate": 1.1474394481749037e-05, "loss": 0.6769, "step": 19500 }, { "epoch": 9.03, "learning_rate": 1.1469953144125981e-05, "loss": 0.628, "step": 19510 }, { "epoch": 9.03, "learning_rate": 1.1465509871595986e-05, "loss": 0.2903, "step": 19520 }, { "epoch": 9.03, "learning_rate": 1.1461064666324659e-05, "loss": 1.0457, "step": 19530 }, { "epoch": 9.03, "learning_rate": 1.1456617530478528e-05, "loss": 0.9678, "step": 19540 }, { "epoch": 9.03, "learning_rate": 1.1452168466225084e-05, "loss": 0.7661, "step": 19550 }, { "epoch": 9.03, "learning_rate": 1.1447717475732735e-05, "loss": 0.7126, "step": 19560 }, { "epoch": 9.03, "learning_rate": 1.144326456117085e-05, "loss": 0.6659, "step": 19570 }, { "epoch": 9.03, "learning_rate": 1.1438809724709719e-05, "loss": 0.5846, "step": 19580 }, { "epoch": 9.03, "learning_rate": 1.1434352968520574e-05, "loss": 1.1135, "step": 19590 }, { "epoch": 9.03, "learning_rate": 1.1429894294775594e-05, "loss": 0.4112, "step": 19600 }, { "epoch": 9.03, "learning_rate": 1.1425433705647872e-05, "loss": 0.6958, "step": 19610 }, { "epoch": 9.03, "learning_rate": 1.142097120331145e-05, "loss": 0.5993, "step": 19620 }, { "epoch": 9.03, "learning_rate": 1.1416506789941295e-05, "loss": 0.9286, "step": 19630 }, { "epoch": 9.03, "learning_rate": 1.1412040467713309e-05, "loss": 1.0423, "step": 19640 }, { "epoch": 9.03, "learning_rate": 1.1407572238804325e-05, "loss": 0.6883, "step": 19650 }, { "epoch": 9.03, "learning_rate": 1.1403102105392098e-05, "loss": 0.5804, "step": 19660 }, { "epoch": 9.03, "learning_rate": 1.139863006965532e-05, "loss": 0.7858, "step": 19670 }, { "epoch": 9.03, "learning_rate": 1.139415613377361e-05, "loss": 0.7412, "step": 19680 }, { "epoch": 9.03, "learning_rate": 1.1389680299927506e-05, "loss": 0.8717, "step": 19690 }, { "epoch": 9.03, "learning_rate": 1.1385202570298477e-05, "loss": 0.6939, "step": 19700 }, { "epoch": 9.03, "learning_rate": 1.1380722947068912e-05, "loss": 0.7415, "step": 19710 }, { "epoch": 9.03, "learning_rate": 1.1376241432422127e-05, "loss": 0.9208, "step": 19720 }, { "epoch": 9.03, "learning_rate": 1.1371758028542356e-05, "loss": 0.9007, "step": 19730 }, { "epoch": 9.03, "learning_rate": 1.1367272737614758e-05, "loss": 0.6171, "step": 19740 }, { "epoch": 9.04, "learning_rate": 1.1362785561825407e-05, "loss": 0.7419, "step": 19750 }, { "epoch": 9.04, "learning_rate": 1.13582965033613e-05, "loss": 0.8097, "step": 19760 }, { "epoch": 9.04, "learning_rate": 1.1353805564410347e-05, "loss": 0.6628, "step": 19770 }, { "epoch": 9.04, "learning_rate": 1.1349312747161377e-05, "loss": 0.5436, "step": 19780 }, { "epoch": 9.04, "learning_rate": 1.1344818053804139e-05, "loss": 0.8017, "step": 19790 }, { "epoch": 9.04, "learning_rate": 1.1340321486529287e-05, "loss": 0.6905, "step": 19800 }, { "epoch": 9.04, "learning_rate": 1.1335823047528395e-05, "loss": 0.4657, "step": 19810 }, { "epoch": 9.04, "learning_rate": 1.1331322738993949e-05, "loss": 0.815, "step": 19820 }, { "epoch": 9.04, "learning_rate": 1.1326820563119344e-05, "loss": 0.6882, "step": 19830 }, { "epoch": 9.04, "learning_rate": 1.1322316522098883e-05, "loss": 0.748, "step": 19840 }, { "epoch": 9.04, "learning_rate": 1.1317810618127785e-05, "loss": 0.572, "step": 19850 }, { "epoch": 9.04, "learning_rate": 1.1313302853402172e-05, "loss": 0.8386, "step": 19860 }, { "epoch": 9.04, "learning_rate": 1.130879323011907e-05, "loss": 0.6839, "step": 19870 }, { "epoch": 9.04, "learning_rate": 1.1304281750476418e-05, "loss": 1.0472, "step": 19880 }, { "epoch": 9.04, "learning_rate": 1.1299768416673056e-05, "loss": 0.6152, "step": 19890 }, { "epoch": 9.04, "learning_rate": 1.1295253230908728e-05, "loss": 0.6518, "step": 19900 }, { "epoch": 9.04, "learning_rate": 1.1290736195384084e-05, "loss": 0.4062, "step": 19910 }, { "epoch": 9.04, "learning_rate": 1.1286217312300663e-05, "loss": 0.5654, "step": 19920 }, { "epoch": 9.04, "learning_rate": 1.1281696583860923e-05, "loss": 0.4624, "step": 19930 }, { "epoch": 9.04, "learning_rate": 1.1277174012268207e-05, "loss": 0.5182, "step": 19940 }, { "epoch": 9.04, "learning_rate": 1.1272649599726764e-05, "loss": 0.8319, "step": 19950 }, { "epoch": 9.04, "learning_rate": 1.1268123348441735e-05, "loss": 0.7917, "step": 19960 }, { "epoch": 9.04, "learning_rate": 1.1263595260619163e-05, "loss": 0.7896, "step": 19970 }, { "epoch": 9.04, "learning_rate": 1.1259065338465981e-05, "loss": 0.5326, "step": 19980 }, { "epoch": 9.04, "learning_rate": 1.125453358419002e-05, "loss": 0.7379, "step": 19990 }, { "epoch": 9.04, "learning_rate": 1.125e-05, "loss": 0.7768, "step": 20000 }, { "epoch": 9.04, "eval_accuracy": 0.8589473684210527, "eval_f1": 0.8589473684210527, "eval_loss": 0.7489694356918335, "eval_runtime": 770.3856, "eval_samples_per_second": 6.166, "eval_steps_per_second": 1.542, "step": 20000 }, { "epoch": 10.0, "learning_rate": 1.1245464588105537e-05, "loss": 0.4541, "step": 20010 }, { "epoch": 10.0, "learning_rate": 1.1240927350717133e-05, "loss": 0.6959, "step": 20020 }, { "epoch": 10.0, "learning_rate": 1.123638829004618e-05, "loss": 0.8044, "step": 20030 }, { "epoch": 10.0, "learning_rate": 1.123184740830497e-05, "loss": 0.754, "step": 20040 }, { "epoch": 10.0, "learning_rate": 1.1227304707706665e-05, "loss": 0.3744, "step": 20050 }, { "epoch": 10.0, "learning_rate": 1.1222760190465327e-05, "loss": 0.8936, "step": 20060 }, { "epoch": 10.0, "learning_rate": 1.1218213858795896e-05, "loss": 1.1047, "step": 20070 }, { "epoch": 10.0, "learning_rate": 1.1213665714914198e-05, "loss": 0.3212, "step": 20080 }, { "epoch": 10.0, "learning_rate": 1.1209115761036948e-05, "loss": 0.5225, "step": 20090 }, { "epoch": 10.0, "learning_rate": 1.1204563999381733e-05, "loss": 0.5067, "step": 20100 }, { "epoch": 10.0, "learning_rate": 1.1200010432167028e-05, "loss": 0.692, "step": 20110 }, { "epoch": 10.0, "learning_rate": 1.1195455061612187e-05, "loss": 0.4932, "step": 20120 }, { "epoch": 10.0, "learning_rate": 1.1190897889937441e-05, "loss": 0.528, "step": 20130 }, { "epoch": 10.0, "learning_rate": 1.1186338919363903e-05, "loss": 0.5525, "step": 20140 }, { "epoch": 10.0, "learning_rate": 1.1181778152113556e-05, "loss": 0.6876, "step": 20150 }, { "epoch": 10.0, "learning_rate": 1.1177215590409265e-05, "loss": 0.7504, "step": 20160 }, { "epoch": 10.0, "learning_rate": 1.1172651236474768e-05, "loss": 0.7971, "step": 20170 }, { "epoch": 10.0, "learning_rate": 1.1168085092534673e-05, "loss": 0.7278, "step": 20180 }, { "epoch": 10.0, "learning_rate": 1.1163517160814464e-05, "loss": 0.3767, "step": 20190 }, { "epoch": 10.0, "learning_rate": 1.1158947443540496e-05, "loss": 0.924, "step": 20200 }, { "epoch": 10.0, "learning_rate": 1.1154375942939992e-05, "loss": 0.6778, "step": 20210 }, { "epoch": 10.0, "learning_rate": 1.1149802661241051e-05, "loss": 0.7237, "step": 20220 }, { "epoch": 10.0, "learning_rate": 1.1145227600672627e-05, "loss": 0.7282, "step": 20230 }, { "epoch": 10.0, "learning_rate": 1.1140650763464555e-05, "loss": 0.5504, "step": 20240 }, { "epoch": 10.01, "learning_rate": 1.1136072151847529e-05, "loss": 0.7132, "step": 20250 }, { "epoch": 10.01, "learning_rate": 1.1131491768053105e-05, "loss": 0.6256, "step": 20260 }, { "epoch": 10.01, "learning_rate": 1.1126909614313711e-05, "loss": 0.3614, "step": 20270 }, { "epoch": 10.01, "learning_rate": 1.1122325692862631e-05, "loss": 0.3448, "step": 20280 }, { "epoch": 10.01, "learning_rate": 1.1117740005934013e-05, "loss": 0.6783, "step": 20290 }, { "epoch": 10.01, "learning_rate": 1.1113152555762865e-05, "loss": 0.8791, "step": 20300 }, { "epoch": 10.01, "learning_rate": 1.1108563344585056e-05, "loss": 0.7556, "step": 20310 }, { "epoch": 10.01, "learning_rate": 1.1103972374637305e-05, "loss": 0.6908, "step": 20320 }, { "epoch": 10.01, "learning_rate": 1.1099379648157206e-05, "loss": 0.9076, "step": 20330 }, { "epoch": 10.01, "learning_rate": 1.1094785167383189e-05, "loss": 0.6393, "step": 20340 }, { "epoch": 10.01, "learning_rate": 1.1090188934554552e-05, "loss": 0.4002, "step": 20350 }, { "epoch": 10.01, "learning_rate": 1.1085590951911442e-05, "loss": 0.5696, "step": 20360 }, { "epoch": 10.01, "learning_rate": 1.108099122169486e-05, "loss": 0.5739, "step": 20370 }, { "epoch": 10.01, "learning_rate": 1.1076389746146659e-05, "loss": 0.8006, "step": 20380 }, { "epoch": 10.01, "learning_rate": 1.1071786527509544e-05, "loss": 0.7159, "step": 20390 }, { "epoch": 10.01, "learning_rate": 1.1067181568027065e-05, "loss": 0.5583, "step": 20400 }, { "epoch": 10.01, "learning_rate": 1.1062574869943623e-05, "loss": 0.4653, "step": 20410 }, { "epoch": 10.01, "learning_rate": 1.1057966435504468e-05, "loss": 0.8138, "step": 20420 }, { "epoch": 10.01, "learning_rate": 1.1053356266955699e-05, "loss": 0.9701, "step": 20430 }, { "epoch": 10.01, "learning_rate": 1.1048744366544248e-05, "loss": 1.121, "step": 20440 }, { "epoch": 10.01, "learning_rate": 1.1044130736517906e-05, "loss": 0.6044, "step": 20450 }, { "epoch": 10.01, "learning_rate": 1.1039515379125297e-05, "loss": 0.7948, "step": 20460 }, { "epoch": 10.01, "learning_rate": 1.1034898296615888e-05, "loss": 0.7339, "step": 20470 }, { "epoch": 10.01, "learning_rate": 1.1030279491239996e-05, "loss": 0.6781, "step": 20480 }, { "epoch": 10.01, "learning_rate": 1.1025658965248762e-05, "loss": 0.5691, "step": 20490 }, { "epoch": 10.01, "learning_rate": 1.1021036720894182e-05, "loss": 0.7411, "step": 20500 }, { "epoch": 10.01, "learning_rate": 1.1016412760429078e-05, "loss": 0.8556, "step": 20510 }, { "epoch": 10.01, "learning_rate": 1.1011787086107109e-05, "loss": 0.8254, "step": 20520 }, { "epoch": 10.01, "learning_rate": 1.100715970018278e-05, "loss": 0.6314, "step": 20530 }, { "epoch": 10.01, "learning_rate": 1.1002530604911416e-05, "loss": 0.6799, "step": 20540 }, { "epoch": 10.01, "learning_rate": 1.0997899802549185e-05, "loss": 0.7688, "step": 20550 }, { "epoch": 10.01, "learning_rate": 1.0993267295353082e-05, "loss": 0.755, "step": 20560 }, { "epoch": 10.01, "learning_rate": 1.0988633085580938e-05, "loss": 0.6034, "step": 20570 }, { "epoch": 10.01, "learning_rate": 1.0983997175491409e-05, "loss": 0.6137, "step": 20580 }, { "epoch": 10.01, "learning_rate": 1.0979359567343977e-05, "loss": 0.5719, "step": 20590 }, { "epoch": 10.01, "learning_rate": 1.0974720263398964e-05, "loss": 0.6048, "step": 20600 }, { "epoch": 10.01, "learning_rate": 1.0970079265917503e-05, "loss": 0.7313, "step": 20610 }, { "epoch": 10.01, "learning_rate": 1.0965436577161566e-05, "loss": 0.5351, "step": 20620 }, { "epoch": 10.01, "learning_rate": 1.0960792199393936e-05, "loss": 0.71, "step": 20630 }, { "epoch": 10.01, "learning_rate": 1.0956146134878232e-05, "loss": 0.8728, "step": 20640 }, { "epoch": 10.01, "learning_rate": 1.0951498385878888e-05, "loss": 1.0162, "step": 20650 }, { "epoch": 10.01, "learning_rate": 1.0946848954661161e-05, "loss": 0.7644, "step": 20660 }, { "epoch": 10.01, "learning_rate": 1.0942197843491125e-05, "loss": 0.7666, "step": 20670 }, { "epoch": 10.01, "learning_rate": 1.0937545054635673e-05, "loss": 0.4105, "step": 20680 }, { "epoch": 10.01, "learning_rate": 1.0932890590362526e-05, "loss": 0.6279, "step": 20690 }, { "epoch": 10.01, "learning_rate": 1.0928234452940207e-05, "loss": 0.6136, "step": 20700 }, { "epoch": 10.01, "learning_rate": 1.0923576644638063e-05, "loss": 0.7239, "step": 20710 }, { "epoch": 10.01, "learning_rate": 1.0918917167726252e-05, "loss": 0.5398, "step": 20720 }, { "epoch": 10.01, "learning_rate": 1.0914256024475743e-05, "loss": 0.9341, "step": 20730 }, { "epoch": 10.01, "learning_rate": 1.090959321715833e-05, "loss": 0.899, "step": 20740 }, { "epoch": 10.02, "learning_rate": 1.0904928748046601e-05, "loss": 0.5511, "step": 20750 }, { "epoch": 10.02, "learning_rate": 1.0900262619413965e-05, "loss": 0.8734, "step": 20760 }, { "epoch": 10.02, "learning_rate": 1.0895594833534635e-05, "loss": 0.865, "step": 20770 }, { "epoch": 10.02, "learning_rate": 1.089092539268363e-05, "loss": 0.5086, "step": 20780 }, { "epoch": 10.02, "learning_rate": 1.0886254299136787e-05, "loss": 0.9701, "step": 20790 }, { "epoch": 10.02, "learning_rate": 1.088158155517073e-05, "loss": 0.9013, "step": 20800 }, { "epoch": 10.02, "learning_rate": 1.0876907163062907e-05, "loss": 0.8956, "step": 20810 }, { "epoch": 10.02, "learning_rate": 1.0872231125091554e-05, "loss": 0.6707, "step": 20820 }, { "epoch": 10.02, "learning_rate": 1.0867553443535718e-05, "loss": 0.7424, "step": 20830 }, { "epoch": 10.02, "learning_rate": 1.0862874120675244e-05, "loss": 0.7948, "step": 20840 }, { "epoch": 10.02, "learning_rate": 1.0858193158790773e-05, "loss": 0.895, "step": 20850 }, { "epoch": 10.02, "learning_rate": 1.0853510560163755e-05, "loss": 0.5475, "step": 20860 }, { "epoch": 10.02, "learning_rate": 1.0848826327076426e-05, "loss": 0.6243, "step": 20870 }, { "epoch": 10.02, "learning_rate": 1.0844140461811832e-05, "loss": 0.2264, "step": 20880 }, { "epoch": 10.02, "learning_rate": 1.0839452966653798e-05, "loss": 0.8413, "step": 20890 }, { "epoch": 10.02, "learning_rate": 1.0834763843886956e-05, "loss": 0.9677, "step": 20900 }, { "epoch": 10.02, "learning_rate": 1.083007309579673e-05, "loss": 0.4505, "step": 20910 }, { "epoch": 10.02, "learning_rate": 1.0825380724669328e-05, "loss": 0.8381, "step": 20920 }, { "epoch": 10.02, "learning_rate": 1.0820686732791763e-05, "loss": 0.3911, "step": 20930 }, { "epoch": 10.02, "learning_rate": 1.081599112245182e-05, "loss": 0.4558, "step": 20940 }, { "epoch": 10.02, "learning_rate": 1.081129389593809e-05, "loss": 0.7595, "step": 20950 }, { "epoch": 10.02, "learning_rate": 1.080659505553994e-05, "loss": 0.6722, "step": 20960 }, { "epoch": 10.02, "learning_rate": 1.0801894603547529e-05, "loss": 0.6681, "step": 20970 }, { "epoch": 10.02, "learning_rate": 1.07971925422518e-05, "loss": 0.9848, "step": 20980 }, { "epoch": 10.02, "learning_rate": 1.0792488873944481e-05, "loss": 0.8077, "step": 20990 }, { "epoch": 10.02, "learning_rate": 1.078778360091808e-05, "loss": 0.6053, "step": 21000 }, { "epoch": 10.02, "learning_rate": 1.0783076725465896e-05, "loss": 0.2675, "step": 21010 }, { "epoch": 10.02, "learning_rate": 1.0778368249881996e-05, "loss": 0.7754, "step": 21020 }, { "epoch": 10.02, "learning_rate": 1.0773658176461242e-05, "loss": 0.4894, "step": 21030 }, { "epoch": 10.02, "learning_rate": 1.0768946507499255e-05, "loss": 0.9299, "step": 21040 }, { "epoch": 10.02, "learning_rate": 1.0764233245292457e-05, "loss": 0.7061, "step": 21050 }, { "epoch": 10.02, "learning_rate": 1.0759518392138026e-05, "loss": 0.5992, "step": 21060 }, { "epoch": 10.02, "learning_rate": 1.0754801950333931e-05, "loss": 1.0293, "step": 21070 }, { "epoch": 10.02, "learning_rate": 1.0750083922178904e-05, "loss": 0.9621, "step": 21080 }, { "epoch": 10.02, "learning_rate": 1.0745364309972454e-05, "loss": 1.074, "step": 21090 }, { "epoch": 10.02, "learning_rate": 1.0740643116014868e-05, "loss": 1.0342, "step": 21100 }, { "epoch": 10.02, "learning_rate": 1.0735920342607193e-05, "loss": 0.5996, "step": 21110 }, { "epoch": 10.02, "learning_rate": 1.0731195992051254e-05, "loss": 0.4885, "step": 21120 }, { "epoch": 10.02, "learning_rate": 1.0726470066649639e-05, "loss": 0.8302, "step": 21130 }, { "epoch": 10.02, "learning_rate": 1.0721742568705713e-05, "loss": 0.8496, "step": 21140 }, { "epoch": 10.02, "learning_rate": 1.0717013500523595e-05, "loss": 0.6095, "step": 21150 }, { "epoch": 10.02, "learning_rate": 1.0712282864408178e-05, "loss": 0.7966, "step": 21160 }, { "epoch": 10.02, "learning_rate": 1.0707550662665117e-05, "loss": 0.7261, "step": 21170 }, { "epoch": 10.02, "learning_rate": 1.0702816897600825e-05, "loss": 0.5543, "step": 21180 }, { "epoch": 10.02, "learning_rate": 1.0698081571522491e-05, "loss": 0.4776, "step": 21190 }, { "epoch": 10.02, "learning_rate": 1.0693344686738045e-05, "loss": 0.7805, "step": 21200 }, { "epoch": 10.02, "learning_rate": 1.0688606245556196e-05, "loss": 0.4526, "step": 21210 }, { "epoch": 10.02, "learning_rate": 1.0683866250286394e-05, "loss": 0.6296, "step": 21220 }, { "epoch": 10.02, "learning_rate": 1.0679124703238862e-05, "loss": 0.7896, "step": 21230 }, { "epoch": 10.02, "learning_rate": 1.0674381606724573e-05, "loss": 0.75, "step": 21240 }, { "epoch": 10.03, "learning_rate": 1.0669636963055247e-05, "loss": 0.4908, "step": 21250 }, { "epoch": 10.03, "learning_rate": 1.0664890774543372e-05, "loss": 0.5771, "step": 21260 }, { "epoch": 10.03, "learning_rate": 1.0660143043502181e-05, "loss": 0.2833, "step": 21270 }, { "epoch": 10.03, "learning_rate": 1.0655393772245661e-05, "loss": 0.495, "step": 21280 }, { "epoch": 10.03, "learning_rate": 1.0650642963088549e-05, "loss": 0.6387, "step": 21290 }, { "epoch": 10.03, "learning_rate": 1.0645890618346329e-05, "loss": 0.718, "step": 21300 }, { "epoch": 10.03, "learning_rate": 1.0641136740335238e-05, "loss": 0.6408, "step": 21310 }, { "epoch": 10.03, "learning_rate": 1.063638133137226e-05, "loss": 0.7938, "step": 21320 }, { "epoch": 10.03, "learning_rate": 1.0631624393775125e-05, "loss": 0.4977, "step": 21330 }, { "epoch": 10.03, "learning_rate": 1.0626865929862303e-05, "loss": 0.7993, "step": 21340 }, { "epoch": 10.03, "learning_rate": 1.062210594195301e-05, "loss": 0.9612, "step": 21350 }, { "epoch": 10.03, "learning_rate": 1.0617344432367208e-05, "loss": 0.6478, "step": 21360 }, { "epoch": 10.03, "learning_rate": 1.0612581403425603e-05, "loss": 1.1047, "step": 21370 }, { "epoch": 10.03, "learning_rate": 1.0607816857449632e-05, "loss": 0.6164, "step": 21380 }, { "epoch": 10.03, "learning_rate": 1.060305079676148e-05, "loss": 0.5161, "step": 21390 }, { "epoch": 10.03, "learning_rate": 1.0598283223684064e-05, "loss": 0.7603, "step": 21400 }, { "epoch": 10.03, "learning_rate": 1.0593514140541044e-05, "loss": 0.827, "step": 21410 }, { "epoch": 10.03, "learning_rate": 1.0588743549656812e-05, "loss": 0.9353, "step": 21420 }, { "epoch": 10.03, "learning_rate": 1.0583971453356499e-05, "loss": 0.7165, "step": 21430 }, { "epoch": 10.03, "learning_rate": 1.057919785396596e-05, "loss": 0.5806, "step": 21440 }, { "epoch": 10.03, "learning_rate": 1.0574422753811796e-05, "loss": 0.8537, "step": 21450 }, { "epoch": 10.03, "learning_rate": 1.056964615522133e-05, "loss": 0.587, "step": 21460 }, { "epoch": 10.03, "learning_rate": 1.0564868060522619e-05, "loss": 0.6138, "step": 21470 }, { "epoch": 10.03, "learning_rate": 1.0560088472044448e-05, "loss": 0.6889, "step": 21480 }, { "epoch": 10.03, "learning_rate": 1.0555307392116327e-05, "loss": 1.3632, "step": 21490 }, { "epoch": 10.03, "learning_rate": 1.0550524823068504e-05, "loss": 0.5563, "step": 21500 }, { "epoch": 10.03, "learning_rate": 1.0545740767231936e-05, "loss": 0.6569, "step": 21510 }, { "epoch": 10.03, "learning_rate": 1.054095522693832e-05, "loss": 0.4933, "step": 21520 }, { "epoch": 10.03, "learning_rate": 1.0536168204520068e-05, "loss": 1.0152, "step": 21530 }, { "epoch": 10.03, "learning_rate": 1.0531379702310317e-05, "loss": 0.5184, "step": 21540 }, { "epoch": 10.03, "learning_rate": 1.0526589722642927e-05, "loss": 0.9484, "step": 21550 }, { "epoch": 10.03, "learning_rate": 1.0521798267852471e-05, "loss": 0.7081, "step": 21560 }, { "epoch": 10.03, "learning_rate": 1.051700534027425e-05, "loss": 0.6213, "step": 21570 }, { "epoch": 10.03, "learning_rate": 1.0512210942244275e-05, "loss": 0.6249, "step": 21580 }, { "epoch": 10.03, "learning_rate": 1.0507415076099281e-05, "loss": 0.657, "step": 21590 }, { "epoch": 10.03, "learning_rate": 1.0502617744176715e-05, "loss": 0.6751, "step": 21600 }, { "epoch": 10.03, "learning_rate": 1.0497818948814732e-05, "loss": 0.5777, "step": 21610 }, { "epoch": 10.03, "learning_rate": 1.0493018692352216e-05, "loss": 0.6597, "step": 21620 }, { "epoch": 10.03, "learning_rate": 1.0488216977128745e-05, "loss": 0.6365, "step": 21630 }, { "epoch": 10.03, "learning_rate": 1.0483413805484625e-05, "loss": 0.5279, "step": 21640 }, { "epoch": 10.03, "learning_rate": 1.0478609179760854e-05, "loss": 0.2789, "step": 21650 }, { "epoch": 10.03, "learning_rate": 1.0473803102299157e-05, "loss": 0.6284, "step": 21660 }, { "epoch": 10.03, "learning_rate": 1.0468995575441954e-05, "loss": 0.9454, "step": 21670 }, { "epoch": 10.03, "learning_rate": 1.0464186601532374e-05, "loss": 1.0541, "step": 21680 }, { "epoch": 10.03, "learning_rate": 1.0459376182914256e-05, "loss": 0.676, "step": 21690 }, { "epoch": 10.03, "learning_rate": 1.0454564321932134e-05, "loss": 1.0218, "step": 21700 }, { "epoch": 10.03, "learning_rate": 1.0449751020931255e-05, "loss": 0.6263, "step": 21710 }, { "epoch": 10.03, "learning_rate": 1.0444936282257564e-05, "loss": 0.6173, "step": 21720 }, { "epoch": 10.03, "learning_rate": 1.0440120108257702e-05, "loss": 1.0447, "step": 21730 }, { "epoch": 10.03, "learning_rate": 1.043530250127902e-05, "loss": 0.5234, "step": 21740 }, { "epoch": 10.04, "learning_rate": 1.0430483463669552e-05, "loss": 0.6918, "step": 21750 }, { "epoch": 10.04, "learning_rate": 1.0425662997778048e-05, "loss": 0.5981, "step": 21760 }, { "epoch": 10.04, "learning_rate": 1.042084110595394e-05, "loss": 0.6197, "step": 21770 }, { "epoch": 10.04, "learning_rate": 1.0416017790547357e-05, "loss": 0.4061, "step": 21780 }, { "epoch": 10.04, "learning_rate": 1.041119305390913e-05, "loss": 0.8991, "step": 21790 }, { "epoch": 10.04, "learning_rate": 1.0406366898390772e-05, "loss": 0.3839, "step": 21800 }, { "epoch": 10.04, "learning_rate": 1.0401539326344498e-05, "loss": 0.902, "step": 21810 }, { "epoch": 10.04, "learning_rate": 1.03967103401232e-05, "loss": 0.9921, "step": 21820 }, { "epoch": 10.04, "learning_rate": 1.0391879942080475e-05, "loss": 0.7106, "step": 21830 }, { "epoch": 10.04, "learning_rate": 1.0387048134570596e-05, "loss": 0.632, "step": 21840 }, { "epoch": 10.04, "learning_rate": 1.0382214919948527e-05, "loss": 0.6111, "step": 21850 }, { "epoch": 10.04, "learning_rate": 1.0377380300569923e-05, "loss": 0.3693, "step": 21860 }, { "epoch": 10.04, "learning_rate": 1.0372544278791114e-05, "loss": 0.6298, "step": 21870 }, { "epoch": 10.04, "learning_rate": 1.0367706856969119e-05, "loss": 0.474, "step": 21880 }, { "epoch": 10.04, "learning_rate": 1.0362868037461638e-05, "loss": 0.5683, "step": 21890 }, { "epoch": 10.04, "learning_rate": 1.0358027822627057e-05, "loss": 0.8299, "step": 21900 }, { "epoch": 10.04, "learning_rate": 1.0353186214824433e-05, "loss": 0.7203, "step": 21910 }, { "epoch": 10.04, "learning_rate": 1.034834321641351e-05, "loss": 0.3176, "step": 21920 }, { "epoch": 10.04, "learning_rate": 1.0343498829754703e-05, "loss": 0.5711, "step": 21930 }, { "epoch": 10.04, "learning_rate": 1.0338653057209111e-05, "loss": 0.5252, "step": 21940 }, { "epoch": 10.04, "learning_rate": 1.0333805901138504e-05, "loss": 0.5265, "step": 21950 }, { "epoch": 10.04, "learning_rate": 1.0328957363905325e-05, "loss": 0.7713, "step": 21960 }, { "epoch": 10.04, "learning_rate": 1.0324107447872695e-05, "loss": 0.5364, "step": 21970 }, { "epoch": 10.04, "learning_rate": 1.03192561554044e-05, "loss": 0.6878, "step": 21980 }, { "epoch": 10.04, "learning_rate": 1.0314403488864907e-05, "loss": 0.7172, "step": 21990 }, { "epoch": 10.04, "learning_rate": 1.0309549450619342e-05, "loss": 0.6793, "step": 22000 }, { "epoch": 10.04, "eval_accuracy": 0.8557894736842105, "eval_f1": 0.8557894736842105, "eval_loss": 0.7730118036270142, "eval_runtime": 761.7959, "eval_samples_per_second": 6.235, "eval_steps_per_second": 1.559, "step": 22000 }, { "epoch": 11.0, "learning_rate": 1.0304694043033502e-05, "loss": 0.9001, "step": 22010 }, { "epoch": 11.0, "learning_rate": 1.0299837268473863e-05, "loss": 0.7062, "step": 22020 }, { "epoch": 11.0, "learning_rate": 1.0294979129307548e-05, "loss": 0.4986, "step": 22030 }, { "epoch": 11.0, "learning_rate": 1.0290119627902361e-05, "loss": 0.563, "step": 22040 }, { "epoch": 11.0, "learning_rate": 1.0285258766626762e-05, "loss": 0.7271, "step": 22050 }, { "epoch": 11.0, "learning_rate": 1.0280396547849873e-05, "loss": 0.782, "step": 22060 }, { "epoch": 11.0, "learning_rate": 1.0275532973941487e-05, "loss": 0.686, "step": 22070 }, { "epoch": 11.0, "learning_rate": 1.0270668047272045e-05, "loss": 0.6969, "step": 22080 }, { "epoch": 11.0, "learning_rate": 1.0265801770212656e-05, "loss": 0.6656, "step": 22090 }, { "epoch": 11.0, "learning_rate": 1.0260934145135086e-05, "loss": 0.5161, "step": 22100 }, { "epoch": 11.0, "learning_rate": 1.025606517441175e-05, "loss": 0.9492, "step": 22110 }, { "epoch": 11.0, "learning_rate": 1.0251194860415732e-05, "loss": 0.7005, "step": 22120 }, { "epoch": 11.0, "learning_rate": 1.0246323205520757e-05, "loss": 1.0263, "step": 22130 }, { "epoch": 11.0, "learning_rate": 1.0241450212101217e-05, "loss": 0.4128, "step": 22140 }, { "epoch": 11.0, "learning_rate": 1.0236575882532147e-05, "loss": 0.601, "step": 22150 }, { "epoch": 11.0, "learning_rate": 1.0231700219189237e-05, "loss": 0.391, "step": 22160 }, { "epoch": 11.0, "learning_rate": 1.0226823224448825e-05, "loss": 0.623, "step": 22170 }, { "epoch": 11.0, "learning_rate": 1.0221944900687897e-05, "loss": 0.5038, "step": 22180 }, { "epoch": 11.0, "learning_rate": 1.0217065250284094e-05, "loss": 0.5027, "step": 22190 }, { "epoch": 11.0, "learning_rate": 1.0212184275615691e-05, "loss": 0.7453, "step": 22200 }, { "epoch": 11.0, "learning_rate": 1.0207301979061625e-05, "loss": 0.6545, "step": 22210 }, { "epoch": 11.0, "learning_rate": 1.0202418363001462e-05, "loss": 0.8928, "step": 22220 }, { "epoch": 11.0, "learning_rate": 1.0197533429815416e-05, "loss": 0.759, "step": 22230 }, { "epoch": 11.0, "learning_rate": 1.0192647181884347e-05, "loss": 0.6488, "step": 22240 }, { "epoch": 11.01, "learning_rate": 1.018775962158975e-05, "loss": 0.4284, "step": 22250 }, { "epoch": 11.01, "learning_rate": 1.018287075131377e-05, "loss": 0.7273, "step": 22260 }, { "epoch": 11.01, "learning_rate": 1.0177980573439176e-05, "loss": 0.8202, "step": 22270 }, { "epoch": 11.01, "learning_rate": 1.017308909034938e-05, "loss": 0.6795, "step": 22280 }, { "epoch": 11.01, "learning_rate": 1.0168196304428437e-05, "loss": 0.7264, "step": 22290 }, { "epoch": 11.01, "learning_rate": 1.0163302218061028e-05, "loss": 0.5813, "step": 22300 }, { "epoch": 11.01, "learning_rate": 1.0158406833632473e-05, "loss": 0.6163, "step": 22310 }, { "epoch": 11.01, "learning_rate": 1.015351015352872e-05, "loss": 0.4763, "step": 22320 }, { "epoch": 11.01, "learning_rate": 1.0148612180136355e-05, "loss": 0.5641, "step": 22330 }, { "epoch": 11.01, "learning_rate": 1.0143712915842589e-05, "loss": 0.704, "step": 22340 }, { "epoch": 11.01, "learning_rate": 1.0138812363035263e-05, "loss": 0.4373, "step": 22350 }, { "epoch": 11.01, "learning_rate": 1.013391052410285e-05, "loss": 0.8222, "step": 22360 }, { "epoch": 11.01, "learning_rate": 1.0129007401434443e-05, "loss": 0.7842, "step": 22370 }, { "epoch": 11.01, "learning_rate": 1.0124102997419767e-05, "loss": 0.3431, "step": 22380 }, { "epoch": 11.01, "learning_rate": 1.0119197314449169e-05, "loss": 0.7633, "step": 22390 }, { "epoch": 11.01, "learning_rate": 1.0114290354913615e-05, "loss": 0.5489, "step": 22400 }, { "epoch": 11.01, "learning_rate": 1.0109382121204702e-05, "loss": 0.5858, "step": 22410 }, { "epoch": 11.01, "learning_rate": 1.0104472615714642e-05, "loss": 0.5548, "step": 22420 }, { "epoch": 11.01, "learning_rate": 1.0099561840836272e-05, "loss": 0.6733, "step": 22430 }, { "epoch": 11.01, "learning_rate": 1.0094649798963037e-05, "loss": 0.8389, "step": 22440 }, { "epoch": 11.01, "learning_rate": 1.008973649248901e-05, "loss": 0.4196, "step": 22450 }, { "epoch": 11.01, "learning_rate": 1.0084821923808877e-05, "loss": 0.7719, "step": 22460 }, { "epoch": 11.01, "learning_rate": 1.007990609531794e-05, "loss": 0.4378, "step": 22470 }, { "epoch": 11.01, "learning_rate": 1.0074989009412116e-05, "loss": 0.3773, "step": 22480 }, { "epoch": 11.01, "learning_rate": 1.0070070668487926e-05, "loss": 0.7681, "step": 22490 }, { "epoch": 11.01, "learning_rate": 1.0065151074942516e-05, "loss": 0.4419, "step": 22500 }, { "epoch": 11.01, "learning_rate": 1.0060230231173632e-05, "loss": 0.7388, "step": 22510 }, { "epoch": 11.01, "learning_rate": 1.0055308139579639e-05, "loss": 0.678, "step": 22520 }, { "epoch": 11.01, "learning_rate": 1.0050384802559497e-05, "loss": 0.4424, "step": 22530 }, { "epoch": 11.01, "learning_rate": 1.0045460222512785e-05, "loss": 0.5644, "step": 22540 }, { "epoch": 11.01, "learning_rate": 1.0040534401839687e-05, "loss": 0.9504, "step": 22550 }, { "epoch": 11.01, "learning_rate": 1.003560734294098e-05, "loss": 0.5278, "step": 22560 }, { "epoch": 11.01, "learning_rate": 1.003067904821806e-05, "loss": 0.546, "step": 22570 }, { "epoch": 11.01, "learning_rate": 1.0025749520072912e-05, "loss": 1.0441, "step": 22580 }, { "epoch": 11.01, "learning_rate": 1.0020818760908133e-05, "loss": 0.8724, "step": 22590 }, { "epoch": 11.01, "learning_rate": 1.0015886773126914e-05, "loss": 0.9456, "step": 22600 }, { "epoch": 11.01, "learning_rate": 1.0010953559133043e-05, "loss": 0.662, "step": 22610 }, { "epoch": 11.01, "learning_rate": 1.0006019121330913e-05, "loss": 0.7724, "step": 22620 }, { "epoch": 11.01, "learning_rate": 1.0001083462125504e-05, "loss": 1.0125, "step": 22630 }, { "epoch": 11.01, "learning_rate": 9.996146583922401e-06, "loss": 0.5953, "step": 22640 }, { "epoch": 11.01, "learning_rate": 9.991208489127775e-06, "loss": 0.871, "step": 22650 }, { "epoch": 11.01, "learning_rate": 9.986269180148397e-06, "loss": 0.6984, "step": 22660 }, { "epoch": 11.01, "learning_rate": 9.981328659391623e-06, "loss": 0.6937, "step": 22670 }, { "epoch": 11.01, "learning_rate": 9.976386929265403e-06, "loss": 0.6753, "step": 22680 }, { "epoch": 11.01, "learning_rate": 9.971443992178277e-06, "loss": 0.6009, "step": 22690 }, { "epoch": 11.01, "learning_rate": 9.966499850539375e-06, "loss": 0.9312, "step": 22700 }, { "epoch": 11.01, "learning_rate": 9.961554506758408e-06, "loss": 0.5313, "step": 22710 }, { "epoch": 11.01, "learning_rate": 9.956607963245676e-06, "loss": 0.5486, "step": 22720 }, { "epoch": 11.01, "learning_rate": 9.951660222412069e-06, "loss": 0.6269, "step": 22730 }, { "epoch": 11.01, "learning_rate": 9.94671128666905e-06, "loss": 0.7164, "step": 22740 }, { "epoch": 11.02, "learning_rate": 9.941761158428675e-06, "loss": 0.4487, "step": 22750 }, { "epoch": 11.02, "learning_rate": 9.936809840103575e-06, "loss": 0.851, "step": 22760 }, { "epoch": 11.02, "learning_rate": 9.931857334106958e-06, "loss": 0.6416, "step": 22770 }, { "epoch": 11.02, "learning_rate": 9.926903642852621e-06, "loss": 0.9216, "step": 22780 }, { "epoch": 11.02, "learning_rate": 9.921948768754931e-06, "loss": 0.6046, "step": 22790 }, { "epoch": 11.02, "learning_rate": 9.916992714228835e-06, "loss": 0.6689, "step": 22800 }, { "epoch": 11.02, "learning_rate": 9.91203548168985e-06, "loss": 1.0292, "step": 22810 }, { "epoch": 11.02, "learning_rate": 9.90707707355407e-06, "loss": 0.7029, "step": 22820 }, { "epoch": 11.02, "learning_rate": 9.90211749223817e-06, "loss": 1.2995, "step": 22830 }, { "epoch": 11.02, "learning_rate": 9.897156740159384e-06, "loss": 0.5994, "step": 22840 }, { "epoch": 11.02, "learning_rate": 9.892194819735525e-06, "loss": 0.8809, "step": 22850 }, { "epoch": 11.02, "learning_rate": 9.887231733384972e-06, "loss": 0.8827, "step": 22860 }, { "epoch": 11.02, "learning_rate": 9.882267483526669e-06, "loss": 0.7032, "step": 22870 }, { "epoch": 11.02, "learning_rate": 9.877302072580139e-06, "loss": 0.4283, "step": 22880 }, { "epoch": 11.02, "learning_rate": 9.872335502965455e-06, "loss": 0.5838, "step": 22890 }, { "epoch": 11.02, "learning_rate": 9.867367777103269e-06, "loss": 0.5258, "step": 22900 }, { "epoch": 11.02, "learning_rate": 9.862398897414786e-06, "loss": 0.5202, "step": 22910 }, { "epoch": 11.02, "learning_rate": 9.857428866321781e-06, "loss": 0.5962, "step": 22920 }, { "epoch": 11.02, "learning_rate": 9.852457686246583e-06, "loss": 0.4088, "step": 22930 }, { "epoch": 11.02, "learning_rate": 9.847485359612086e-06, "loss": 0.9281, "step": 22940 }, { "epoch": 11.02, "learning_rate": 9.842511888841744e-06, "loss": 0.5964, "step": 22950 }, { "epoch": 11.02, "learning_rate": 9.837537276359564e-06, "loss": 0.6418, "step": 22960 }, { "epoch": 11.02, "learning_rate": 9.832561524590115e-06, "loss": 0.4186, "step": 22970 }, { "epoch": 11.02, "learning_rate": 9.82758463595851e-06, "loss": 0.6558, "step": 22980 }, { "epoch": 11.02, "learning_rate": 9.822606612890431e-06, "loss": 0.765, "step": 22990 }, { "epoch": 11.02, "learning_rate": 9.817627457812105e-06, "loss": 0.9792, "step": 23000 }, { "epoch": 11.02, "learning_rate": 9.812647173150313e-06, "loss": 0.9115, "step": 23010 }, { "epoch": 11.02, "learning_rate": 9.807665761332382e-06, "loss": 0.3789, "step": 23020 }, { "epoch": 11.02, "learning_rate": 9.80268322478619e-06, "loss": 0.4179, "step": 23030 }, { "epoch": 11.02, "learning_rate": 9.797699565940168e-06, "loss": 0.861, "step": 23040 }, { "epoch": 11.02, "learning_rate": 9.792714787223294e-06, "loss": 0.5827, "step": 23050 }, { "epoch": 11.02, "learning_rate": 9.787728891065084e-06, "loss": 0.6217, "step": 23060 }, { "epoch": 11.02, "learning_rate": 9.782741879895602e-06, "loss": 0.7407, "step": 23070 }, { "epoch": 11.02, "learning_rate": 9.777753756145458e-06, "loss": 0.5362, "step": 23080 }, { "epoch": 11.02, "learning_rate": 9.772764522245806e-06, "loss": 0.6387, "step": 23090 }, { "epoch": 11.02, "learning_rate": 9.767774180628335e-06, "loss": 0.4657, "step": 23100 }, { "epoch": 11.02, "learning_rate": 9.762782733725277e-06, "loss": 0.6733, "step": 23110 }, { "epoch": 11.02, "learning_rate": 9.757790183969404e-06, "loss": 0.6548, "step": 23120 }, { "epoch": 11.02, "learning_rate": 9.752796533794022e-06, "loss": 0.8012, "step": 23130 }, { "epoch": 11.02, "learning_rate": 9.747801785632979e-06, "loss": 0.7032, "step": 23140 }, { "epoch": 11.02, "learning_rate": 9.74280594192065e-06, "loss": 0.7914, "step": 23150 }, { "epoch": 11.02, "learning_rate": 9.737809005091952e-06, "loss": 0.7735, "step": 23160 }, { "epoch": 11.02, "learning_rate": 9.732810977582329e-06, "loss": 0.5257, "step": 23170 }, { "epoch": 11.02, "learning_rate": 9.727811861827762e-06, "loss": 0.7142, "step": 23180 }, { "epoch": 11.02, "learning_rate": 9.722811660264757e-06, "loss": 0.5854, "step": 23190 }, { "epoch": 11.02, "learning_rate": 9.717810375330351e-06, "loss": 1.0973, "step": 23200 }, { "epoch": 11.02, "learning_rate": 9.71280800946211e-06, "loss": 0.4392, "step": 23210 }, { "epoch": 11.02, "learning_rate": 9.707804565098127e-06, "loss": 0.5886, "step": 23220 }, { "epoch": 11.02, "learning_rate": 9.702800044677022e-06, "loss": 0.9332, "step": 23230 }, { "epoch": 11.02, "learning_rate": 9.69779445063793e-06, "loss": 0.639, "step": 23240 }, { "epoch": 11.03, "learning_rate": 9.692787785420525e-06, "loss": 0.4684, "step": 23250 }, { "epoch": 11.03, "learning_rate": 9.687780051464993e-06, "loss": 0.7447, "step": 23260 }, { "epoch": 11.03, "learning_rate": 9.682771251212038e-06, "loss": 0.5868, "step": 23270 }, { "epoch": 11.03, "learning_rate": 9.677761387102896e-06, "loss": 0.7026, "step": 23280 }, { "epoch": 11.03, "learning_rate": 9.672750461579305e-06, "loss": 0.4704, "step": 23290 }, { "epoch": 11.03, "learning_rate": 9.667738477083536e-06, "loss": 0.7248, "step": 23300 }, { "epoch": 11.03, "learning_rate": 9.66272543605837e-06, "loss": 0.7306, "step": 23310 }, { "epoch": 11.03, "learning_rate": 9.657711340947096e-06, "loss": 0.578, "step": 23320 }, { "epoch": 11.03, "learning_rate": 9.652696194193527e-06, "loss": 0.5586, "step": 23330 }, { "epoch": 11.03, "learning_rate": 9.647679998241984e-06, "loss": 0.7175, "step": 23340 }, { "epoch": 11.03, "learning_rate": 9.642662755537301e-06, "loss": 0.4754, "step": 23350 }, { "epoch": 11.03, "learning_rate": 9.63764446852482e-06, "loss": 0.4033, "step": 23360 }, { "epoch": 11.03, "learning_rate": 9.632625139650395e-06, "loss": 1.0351, "step": 23370 }, { "epoch": 11.03, "learning_rate": 9.627604771360382e-06, "loss": 0.5524, "step": 23380 }, { "epoch": 11.03, "learning_rate": 9.622583366101652e-06, "loss": 0.4591, "step": 23390 }, { "epoch": 11.03, "learning_rate": 9.617560926321575e-06, "loss": 0.9319, "step": 23400 }, { "epoch": 11.03, "learning_rate": 9.612537454468024e-06, "loss": 1.0524, "step": 23410 }, { "epoch": 11.03, "learning_rate": 9.607512952989385e-06, "loss": 0.6202, "step": 23420 }, { "epoch": 11.03, "learning_rate": 9.602487424334532e-06, "loss": 0.8657, "step": 23430 }, { "epoch": 11.03, "learning_rate": 9.597460870952849e-06, "loss": 0.8354, "step": 23440 }, { "epoch": 11.03, "learning_rate": 9.59243329529422e-06, "loss": 0.886, "step": 23450 }, { "epoch": 11.03, "learning_rate": 9.58740469980902e-06, "loss": 0.6174, "step": 23460 }, { "epoch": 11.03, "learning_rate": 9.58237508694813e-06, "loss": 0.7236, "step": 23470 }, { "epoch": 11.03, "learning_rate": 9.577344459162918e-06, "loss": 0.4743, "step": 23480 }, { "epoch": 11.03, "learning_rate": 9.572312818905252e-06, "loss": 0.6526, "step": 23490 }, { "epoch": 11.03, "learning_rate": 9.567280168627493e-06, "loss": 1.0537, "step": 23500 }, { "epoch": 11.03, "learning_rate": 9.562246510782496e-06, "loss": 0.1958, "step": 23510 }, { "epoch": 11.03, "learning_rate": 9.5572118478236e-06, "loss": 0.5649, "step": 23520 }, { "epoch": 11.03, "learning_rate": 9.55217618220464e-06, "loss": 0.8306, "step": 23530 }, { "epoch": 11.03, "learning_rate": 9.54713951637994e-06, "loss": 1.0081, "step": 23540 }, { "epoch": 11.03, "learning_rate": 9.542101852804307e-06, "loss": 0.7292, "step": 23550 }, { "epoch": 11.03, "learning_rate": 9.537063193933041e-06, "loss": 0.7931, "step": 23560 }, { "epoch": 11.03, "learning_rate": 9.53202354222192e-06, "loss": 0.4955, "step": 23570 }, { "epoch": 11.03, "learning_rate": 9.52698290012721e-06, "loss": 0.949, "step": 23580 }, { "epoch": 11.03, "learning_rate": 9.521941270105657e-06, "loss": 1.0758, "step": 23590 }, { "epoch": 11.03, "learning_rate": 9.516898654614492e-06, "loss": 1.0021, "step": 23600 }, { "epoch": 11.03, "learning_rate": 9.511855056111426e-06, "loss": 0.7936, "step": 23610 }, { "epoch": 11.03, "learning_rate": 9.506810477054645e-06, "loss": 0.7424, "step": 23620 }, { "epoch": 11.03, "learning_rate": 9.501764919902818e-06, "loss": 0.5233, "step": 23630 }, { "epoch": 11.03, "learning_rate": 9.496718387115085e-06, "loss": 0.5166, "step": 23640 }, { "epoch": 11.03, "learning_rate": 9.491670881151067e-06, "loss": 0.6646, "step": 23650 }, { "epoch": 11.03, "learning_rate": 9.486622404470855e-06, "loss": 0.6662, "step": 23660 }, { "epoch": 11.03, "learning_rate": 9.481572959535019e-06, "loss": 0.618, "step": 23670 }, { "epoch": 11.03, "learning_rate": 9.476522548804596e-06, "loss": 0.6063, "step": 23680 }, { "epoch": 11.03, "learning_rate": 9.47147117474109e-06, "loss": 0.3129, "step": 23690 }, { "epoch": 11.03, "learning_rate": 9.466418839806486e-06, "loss": 0.9168, "step": 23700 }, { "epoch": 11.03, "learning_rate": 9.461365546463226e-06, "loss": 0.7685, "step": 23710 }, { "epoch": 11.03, "learning_rate": 9.456311297174228e-06, "loss": 0.3527, "step": 23720 }, { "epoch": 11.03, "learning_rate": 9.451256094402867e-06, "loss": 0.5965, "step": 23730 }, { "epoch": 11.03, "learning_rate": 9.44619994061299e-06, "loss": 0.9973, "step": 23740 }, { "epoch": 11.04, "learning_rate": 9.441142838268906e-06, "loss": 0.8503, "step": 23750 }, { "epoch": 11.04, "learning_rate": 9.436084789835383e-06, "loss": 0.8314, "step": 23760 }, { "epoch": 11.04, "learning_rate": 9.431025797777654e-06, "loss": 0.4643, "step": 23770 }, { "epoch": 11.04, "learning_rate": 9.425965864561408e-06, "loss": 0.9074, "step": 23780 }, { "epoch": 11.04, "learning_rate": 9.420904992652797e-06, "loss": 0.4391, "step": 23790 }, { "epoch": 11.04, "learning_rate": 9.41584318451843e-06, "loss": 0.9418, "step": 23800 }, { "epoch": 11.04, "learning_rate": 9.410780442625368e-06, "loss": 0.575, "step": 23810 }, { "epoch": 11.04, "learning_rate": 9.405716769441129e-06, "loss": 0.8775, "step": 23820 }, { "epoch": 11.04, "learning_rate": 9.400652167433687e-06, "loss": 0.6625, "step": 23830 }, { "epoch": 11.04, "learning_rate": 9.39558663907147e-06, "loss": 1.0413, "step": 23840 }, { "epoch": 11.04, "learning_rate": 9.390520186823354e-06, "loss": 0.3134, "step": 23850 }, { "epoch": 11.04, "learning_rate": 9.385452813158662e-06, "loss": 0.722, "step": 23860 }, { "epoch": 11.04, "learning_rate": 9.380384520547176e-06, "loss": 0.8009, "step": 23870 }, { "epoch": 11.04, "learning_rate": 9.375315311459116e-06, "loss": 0.6277, "step": 23880 }, { "epoch": 11.04, "learning_rate": 9.370245188365156e-06, "loss": 0.5125, "step": 23890 }, { "epoch": 11.04, "learning_rate": 9.365174153736414e-06, "loss": 0.493, "step": 23900 }, { "epoch": 11.04, "learning_rate": 9.360102210044441e-06, "loss": 0.7453, "step": 23910 }, { "epoch": 11.04, "learning_rate": 9.355029359761253e-06, "loss": 0.439, "step": 23920 }, { "epoch": 11.04, "learning_rate": 9.34995560535929e-06, "loss": 0.691, "step": 23930 }, { "epoch": 11.04, "learning_rate": 9.344880949311437e-06, "loss": 0.6713, "step": 23940 }, { "epoch": 11.04, "learning_rate": 9.33980539409102e-06, "loss": 0.4107, "step": 23950 }, { "epoch": 11.04, "learning_rate": 9.334728942171805e-06, "loss": 0.7445, "step": 23960 }, { "epoch": 11.04, "learning_rate": 9.329651596027992e-06, "loss": 0.5816, "step": 23970 }, { "epoch": 11.04, "learning_rate": 9.324573358134219e-06, "loss": 0.8575, "step": 23980 }, { "epoch": 11.04, "learning_rate": 9.319494230965556e-06, "loss": 0.6872, "step": 23990 }, { "epoch": 11.04, "learning_rate": 9.314414216997507e-06, "loss": 0.5765, "step": 24000 }, { "epoch": 11.04, "eval_accuracy": 0.8368421052631579, "eval_f1": 0.8368421052631579, "eval_loss": 0.7751592993736267, "eval_runtime": 757.0231, "eval_samples_per_second": 6.275, "eval_steps_per_second": 1.569, "step": 24000 }, { "epoch": 12.0, "learning_rate": 9.309333318706012e-06, "loss": 0.5647, "step": 24010 }, { "epoch": 12.0, "learning_rate": 9.304251538567439e-06, "loss": 0.4817, "step": 24020 }, { "epoch": 12.0, "learning_rate": 9.299168879058583e-06, "loss": 1.1363, "step": 24030 }, { "epoch": 12.0, "learning_rate": 9.29408534265667e-06, "loss": 0.4983, "step": 24040 }, { "epoch": 12.0, "learning_rate": 9.289000931839357e-06, "loss": 0.5477, "step": 24050 }, { "epoch": 12.0, "learning_rate": 9.283915649084722e-06, "loss": 0.5383, "step": 24060 }, { "epoch": 12.0, "learning_rate": 9.278829496871273e-06, "loss": 0.3909, "step": 24070 }, { "epoch": 12.0, "learning_rate": 9.273742477677936e-06, "loss": 0.5127, "step": 24080 }, { "epoch": 12.0, "learning_rate": 9.268654593984062e-06, "loss": 0.6024, "step": 24090 }, { "epoch": 12.0, "learning_rate": 9.263565848269425e-06, "loss": 0.6374, "step": 24100 }, { "epoch": 12.0, "learning_rate": 9.258476243014217e-06, "loss": 0.3111, "step": 24110 }, { "epoch": 12.0, "learning_rate": 9.253385780699054e-06, "loss": 0.8523, "step": 24120 }, { "epoch": 12.0, "learning_rate": 9.248294463804958e-06, "loss": 0.6465, "step": 24130 }, { "epoch": 12.0, "learning_rate": 9.24320229481338e-06, "loss": 0.3356, "step": 24140 }, { "epoch": 12.0, "learning_rate": 9.238109276206179e-06, "loss": 0.4021, "step": 24150 }, { "epoch": 12.0, "learning_rate": 9.233015410465636e-06, "loss": 0.5717, "step": 24160 }, { "epoch": 12.0, "learning_rate": 9.22792070007443e-06, "loss": 0.5375, "step": 24170 }, { "epoch": 12.0, "learning_rate": 9.222825147515668e-06, "loss": 0.5466, "step": 24180 }, { "epoch": 12.0, "learning_rate": 9.217728755272858e-06, "loss": 0.5708, "step": 24190 }, { "epoch": 12.0, "learning_rate": 9.212631525829919e-06, "loss": 0.5068, "step": 24200 }, { "epoch": 12.0, "learning_rate": 9.207533461671181e-06, "loss": 0.3744, "step": 24210 }, { "epoch": 12.0, "learning_rate": 9.202434565281376e-06, "loss": 0.7047, "step": 24220 }, { "epoch": 12.0, "learning_rate": 9.197334839145645e-06, "loss": 0.7185, "step": 24230 }, { "epoch": 12.0, "learning_rate": 9.19223428574953e-06, "loss": 0.6393, "step": 24240 }, { "epoch": 12.01, "learning_rate": 9.187132907578989e-06, "loss": 0.9273, "step": 24250 }, { "epoch": 12.01, "learning_rate": 9.18203070712036e-06, "loss": 0.771, "step": 24260 }, { "epoch": 12.01, "learning_rate": 9.176927686860397e-06, "loss": 0.5174, "step": 24270 }, { "epoch": 12.01, "learning_rate": 9.171823849286254e-06, "loss": 0.1818, "step": 24280 }, { "epoch": 12.01, "learning_rate": 9.166719196885473e-06, "loss": 0.8949, "step": 24290 }, { "epoch": 12.01, "learning_rate": 9.161613732146007e-06, "loss": 0.5596, "step": 24300 }, { "epoch": 12.01, "learning_rate": 9.156507457556189e-06, "loss": 0.8454, "step": 24310 }, { "epoch": 12.01, "learning_rate": 9.151400375604762e-06, "loss": 0.8878, "step": 24320 }, { "epoch": 12.01, "learning_rate": 9.146292488780854e-06, "loss": 0.5003, "step": 24330 }, { "epoch": 12.01, "learning_rate": 9.141183799573984e-06, "loss": 0.7932, "step": 24340 }, { "epoch": 12.01, "learning_rate": 9.136074310474071e-06, "loss": 0.6643, "step": 24350 }, { "epoch": 12.01, "learning_rate": 9.130964023971411e-06, "loss": 0.4153, "step": 24360 }, { "epoch": 12.01, "learning_rate": 9.1258529425567e-06, "loss": 1.0002, "step": 24370 }, { "epoch": 12.01, "learning_rate": 9.120741068721017e-06, "loss": 0.6616, "step": 24380 }, { "epoch": 12.01, "learning_rate": 9.115628404955823e-06, "loss": 0.7308, "step": 24390 }, { "epoch": 12.01, "learning_rate": 9.110514953752975e-06, "loss": 0.6399, "step": 24400 }, { "epoch": 12.01, "learning_rate": 9.1054007176047e-06, "loss": 0.6749, "step": 24410 }, { "epoch": 12.01, "learning_rate": 9.10028569900362e-06, "loss": 0.9301, "step": 24420 }, { "epoch": 12.01, "learning_rate": 9.09516990044273e-06, "loss": 0.7458, "step": 24430 }, { "epoch": 12.01, "learning_rate": 9.090053324415412e-06, "loss": 0.6056, "step": 24440 }, { "epoch": 12.01, "learning_rate": 9.084935973415417e-06, "loss": 0.7213, "step": 24450 }, { "epoch": 12.01, "learning_rate": 9.07981784993688e-06, "loss": 0.4729, "step": 24460 }, { "epoch": 12.01, "learning_rate": 9.074698956474321e-06, "loss": 0.7893, "step": 24470 }, { "epoch": 12.01, "learning_rate": 9.069579295522614e-06, "loss": 0.6198, "step": 24480 }, { "epoch": 12.01, "learning_rate": 9.064458869577028e-06, "loss": 0.5706, "step": 24490 }, { "epoch": 12.01, "learning_rate": 9.059337681133194e-06, "loss": 0.9065, "step": 24500 }, { "epoch": 12.01, "learning_rate": 9.054215732687118e-06, "loss": 0.6079, "step": 24510 }, { "epoch": 12.01, "learning_rate": 9.049093026735176e-06, "loss": 0.4128, "step": 24520 }, { "epoch": 12.01, "learning_rate": 9.04396956577411e-06, "loss": 0.4852, "step": 24530 }, { "epoch": 12.01, "learning_rate": 9.038845352301034e-06, "loss": 0.6561, "step": 24540 }, { "epoch": 12.01, "learning_rate": 9.033720388813426e-06, "loss": 0.3259, "step": 24550 }, { "epoch": 12.01, "learning_rate": 9.028594677809138e-06, "loss": 0.7694, "step": 24560 }, { "epoch": 12.01, "learning_rate": 9.023468221786367e-06, "loss": 0.6811, "step": 24570 }, { "epoch": 12.01, "learning_rate": 9.018341023243696e-06, "loss": 0.6014, "step": 24580 }, { "epoch": 12.01, "learning_rate": 9.013213084680053e-06, "loss": 1.0853, "step": 24590 }, { "epoch": 12.01, "learning_rate": 9.008084408594737e-06, "loss": 0.664, "step": 24600 }, { "epoch": 12.01, "learning_rate": 9.002954997487397e-06, "loss": 0.643, "step": 24610 }, { "epoch": 12.01, "learning_rate": 8.997824853858052e-06, "loss": 0.7941, "step": 24620 }, { "epoch": 12.01, "learning_rate": 8.992693980207069e-06, "loss": 0.7122, "step": 24630 }, { "epoch": 12.01, "learning_rate": 8.987562379035175e-06, "loss": 0.6897, "step": 24640 }, { "epoch": 12.01, "learning_rate": 8.982430052843447e-06, "loss": 0.6138, "step": 24650 }, { "epoch": 12.01, "learning_rate": 8.97729700413332e-06, "loss": 0.6242, "step": 24660 }, { "epoch": 12.01, "learning_rate": 8.97216323540658e-06, "loss": 0.6159, "step": 24670 }, { "epoch": 12.01, "learning_rate": 8.967028749165362e-06, "loss": 0.5243, "step": 24680 }, { "epoch": 12.01, "learning_rate": 8.961893547912155e-06, "loss": 0.5596, "step": 24690 }, { "epoch": 12.01, "learning_rate": 8.95675763414979e-06, "loss": 0.8293, "step": 24700 }, { "epoch": 12.01, "learning_rate": 8.951621010381454e-06, "loss": 0.5012, "step": 24710 }, { "epoch": 12.01, "learning_rate": 8.946483679110668e-06, "loss": 0.6794, "step": 24720 }, { "epoch": 12.01, "learning_rate": 8.941345642841312e-06, "loss": 0.4822, "step": 24730 }, { "epoch": 12.01, "learning_rate": 8.936206904077598e-06, "loss": 0.3251, "step": 24740 }, { "epoch": 12.02, "learning_rate": 8.931067465324087e-06, "loss": 0.3446, "step": 24750 }, { "epoch": 12.02, "learning_rate": 8.925927329085677e-06, "loss": 0.921, "step": 24760 }, { "epoch": 12.02, "learning_rate": 8.92078649786761e-06, "loss": 0.5805, "step": 24770 }, { "epoch": 12.02, "learning_rate": 8.915644974175466e-06, "loss": 0.6757, "step": 24780 }, { "epoch": 12.02, "learning_rate": 8.910502760515155e-06, "loss": 0.8727, "step": 24790 }, { "epoch": 12.02, "learning_rate": 8.905359859392936e-06, "loss": 0.5516, "step": 24800 }, { "epoch": 12.02, "learning_rate": 8.900216273315393e-06, "loss": 0.8247, "step": 24810 }, { "epoch": 12.02, "learning_rate": 8.895072004789447e-06, "loss": 1.006, "step": 24820 }, { "epoch": 12.02, "learning_rate": 8.889927056322356e-06, "loss": 0.6281, "step": 24830 }, { "epoch": 12.02, "learning_rate": 8.884781430421703e-06, "loss": 0.7306, "step": 24840 }, { "epoch": 12.02, "learning_rate": 8.879635129595402e-06, "loss": 0.5848, "step": 24850 }, { "epoch": 12.02, "learning_rate": 8.874488156351698e-06, "loss": 0.5106, "step": 24860 }, { "epoch": 12.02, "learning_rate": 8.869340513199166e-06, "loss": 0.2741, "step": 24870 }, { "epoch": 12.02, "learning_rate": 8.864192202646702e-06, "loss": 0.5024, "step": 24880 }, { "epoch": 12.02, "learning_rate": 8.85904322720353e-06, "loss": 0.6582, "step": 24890 }, { "epoch": 12.02, "learning_rate": 8.853893589379202e-06, "loss": 0.6676, "step": 24900 }, { "epoch": 12.02, "learning_rate": 8.848743291683583e-06, "loss": 0.727, "step": 24910 }, { "epoch": 12.02, "learning_rate": 8.843592336626868e-06, "loss": 0.827, "step": 24920 }, { "epoch": 12.02, "learning_rate": 8.83844072671957e-06, "loss": 0.4957, "step": 24930 }, { "epoch": 12.02, "learning_rate": 8.833288464472524e-06, "loss": 0.7769, "step": 24940 }, { "epoch": 12.02, "learning_rate": 8.828135552396875e-06, "loss": 0.4891, "step": 24950 }, { "epoch": 12.02, "learning_rate": 8.822981993004093e-06, "loss": 1.0911, "step": 24960 }, { "epoch": 12.02, "learning_rate": 8.81782778880596e-06, "loss": 0.4612, "step": 24970 }, { "epoch": 12.02, "learning_rate": 8.812672942314572e-06, "loss": 0.9634, "step": 24980 }, { "epoch": 12.02, "learning_rate": 8.807517456042335e-06, "loss": 0.7368, "step": 24990 }, { "epoch": 12.02, "learning_rate": 8.80236133250198e-06, "loss": 0.5336, "step": 25000 }, { "epoch": 12.02, "learning_rate": 8.797204574206529e-06, "loss": 0.7426, "step": 25010 }, { "epoch": 12.02, "learning_rate": 8.792047183669327e-06, "loss": 0.7129, "step": 25020 }, { "epoch": 12.02, "learning_rate": 8.786889163404021e-06, "loss": 0.7036, "step": 25030 }, { "epoch": 12.02, "learning_rate": 8.781730515924576e-06, "loss": 0.5815, "step": 25040 }, { "epoch": 12.02, "learning_rate": 8.776571243745244e-06, "loss": 1.1234, "step": 25050 }, { "epoch": 12.02, "learning_rate": 8.771411349380598e-06, "loss": 0.9104, "step": 25060 }, { "epoch": 12.02, "learning_rate": 8.766250835345503e-06, "loss": 0.6302, "step": 25070 }, { "epoch": 12.02, "learning_rate": 8.761089704155136e-06, "loss": 0.7441, "step": 25080 }, { "epoch": 12.02, "learning_rate": 8.755927958324966e-06, "loss": 0.9698, "step": 25090 }, { "epoch": 12.02, "learning_rate": 8.750765600370768e-06, "loss": 0.596, "step": 25100 }, { "epoch": 12.02, "learning_rate": 8.74560263280861e-06, "loss": 0.7446, "step": 25110 }, { "epoch": 12.02, "learning_rate": 8.740439058154858e-06, "loss": 0.4741, "step": 25120 }, { "epoch": 12.02, "learning_rate": 8.73527487892618e-06, "loss": 0.884, "step": 25130 }, { "epoch": 12.02, "learning_rate": 8.730110097639533e-06, "loss": 0.5014, "step": 25140 }, { "epoch": 12.02, "learning_rate": 8.724944716812167e-06, "loss": 0.871, "step": 25150 }, { "epoch": 12.02, "learning_rate": 8.719778738961629e-06, "loss": 0.7679, "step": 25160 }, { "epoch": 12.02, "learning_rate": 8.714612166605747e-06, "loss": 0.6464, "step": 25170 }, { "epoch": 12.02, "learning_rate": 8.709445002262655e-06, "loss": 0.4916, "step": 25180 }, { "epoch": 12.02, "learning_rate": 8.704277248450758e-06, "loss": 0.9301, "step": 25190 }, { "epoch": 12.02, "learning_rate": 8.699108907688763e-06, "loss": 0.6673, "step": 25200 }, { "epoch": 12.02, "learning_rate": 8.69393998249565e-06, "loss": 0.7301, "step": 25210 }, { "epoch": 12.02, "learning_rate": 8.688770475390698e-06, "loss": 0.73, "step": 25220 }, { "epoch": 12.02, "learning_rate": 8.683600388893454e-06, "loss": 0.5313, "step": 25230 }, { "epoch": 12.02, "learning_rate": 8.67842972552376e-06, "loss": 0.4326, "step": 25240 }, { "epoch": 12.03, "learning_rate": 8.673258487801733e-06, "loss": 0.6077, "step": 25250 }, { "epoch": 12.03, "learning_rate": 8.668086678247771e-06, "loss": 0.8506, "step": 25260 }, { "epoch": 12.03, "learning_rate": 8.662914299382555e-06, "loss": 0.8871, "step": 25270 }, { "epoch": 12.03, "learning_rate": 8.657741353727034e-06, "loss": 0.6094, "step": 25280 }, { "epoch": 12.03, "learning_rate": 8.652567843802442e-06, "loss": 0.3646, "step": 25290 }, { "epoch": 12.03, "learning_rate": 8.647393772130287e-06, "loss": 0.7661, "step": 25300 }, { "epoch": 12.03, "learning_rate": 8.642219141232343e-06, "loss": 0.7078, "step": 25310 }, { "epoch": 12.03, "learning_rate": 8.637043953630668e-06, "loss": 0.6364, "step": 25320 }, { "epoch": 12.03, "learning_rate": 8.63186821184758e-06, "loss": 0.7387, "step": 25330 }, { "epoch": 12.03, "learning_rate": 8.626691918405678e-06, "loss": 0.7593, "step": 25340 }, { "epoch": 12.03, "learning_rate": 8.621515075827822e-06, "loss": 0.8417, "step": 25350 }, { "epoch": 12.03, "learning_rate": 8.616337686637142e-06, "loss": 0.7509, "step": 25360 }, { "epoch": 12.03, "learning_rate": 8.611159753357035e-06, "loss": 0.5077, "step": 25370 }, { "epoch": 12.03, "learning_rate": 8.605981278511162e-06, "loss": 0.5972, "step": 25380 }, { "epoch": 12.03, "learning_rate": 8.60080226462345e-06, "loss": 0.8684, "step": 25390 }, { "epoch": 12.03, "learning_rate": 8.595622714218088e-06, "loss": 0.8425, "step": 25400 }, { "epoch": 12.03, "learning_rate": 8.590442629819523e-06, "loss": 0.6553, "step": 25410 }, { "epoch": 12.03, "learning_rate": 8.585262013952469e-06, "loss": 1.0543, "step": 25420 }, { "epoch": 12.03, "learning_rate": 8.580080869141891e-06, "loss": 0.5518, "step": 25430 }, { "epoch": 12.03, "learning_rate": 8.574899197913021e-06, "loss": 0.6666, "step": 25440 }, { "epoch": 12.03, "learning_rate": 8.569717002791338e-06, "loss": 0.8162, "step": 25450 }, { "epoch": 12.03, "learning_rate": 8.564534286302583e-06, "loss": 0.5814, "step": 25460 }, { "epoch": 12.03, "learning_rate": 8.559351050972751e-06, "loss": 0.7725, "step": 25470 }, { "epoch": 12.03, "learning_rate": 8.554167299328082e-06, "loss": 0.6521, "step": 25480 }, { "epoch": 12.03, "learning_rate": 8.548983033895081e-06, "loss": 0.6795, "step": 25490 }, { "epoch": 12.03, "learning_rate": 8.543798257200491e-06, "loss": 0.4602, "step": 25500 }, { "epoch": 12.03, "learning_rate": 8.538612971771311e-06, "loss": 0.557, "step": 25510 }, { "epoch": 12.03, "learning_rate": 8.533427180134784e-06, "loss": 0.5068, "step": 25520 }, { "epoch": 12.03, "learning_rate": 8.52824088481841e-06, "loss": 0.7087, "step": 25530 }, { "epoch": 12.03, "learning_rate": 8.523054088349913e-06, "loss": 0.3761, "step": 25540 }, { "epoch": 12.03, "learning_rate": 8.517866793257284e-06, "loss": 0.4812, "step": 25550 }, { "epoch": 12.03, "learning_rate": 8.512679002068744e-06, "loss": 0.7368, "step": 25560 }, { "epoch": 12.03, "learning_rate": 8.50749071731276e-06, "loss": 0.581, "step": 25570 }, { "epoch": 12.03, "learning_rate": 8.502301941518045e-06, "loss": 0.2424, "step": 25580 }, { "epoch": 12.03, "learning_rate": 8.497112677213532e-06, "loss": 0.4681, "step": 25590 }, { "epoch": 12.03, "learning_rate": 8.49192292692842e-06, "loss": 0.8638, "step": 25600 }, { "epoch": 12.03, "learning_rate": 8.48673269319212e-06, "loss": 0.4321, "step": 25610 }, { "epoch": 12.03, "learning_rate": 8.481541978534293e-06, "loss": 0.5152, "step": 25620 }, { "epoch": 12.03, "learning_rate": 8.476350785484828e-06, "loss": 0.5563, "step": 25630 }, { "epoch": 12.03, "learning_rate": 8.471159116573852e-06, "loss": 0.9802, "step": 25640 }, { "epoch": 12.03, "learning_rate": 8.465966974331722e-06, "loss": 1.1415, "step": 25650 }, { "epoch": 12.03, "learning_rate": 8.460774361289021e-06, "loss": 1.0542, "step": 25660 }, { "epoch": 12.03, "learning_rate": 8.45558127997657e-06, "loss": 0.7572, "step": 25670 }, { "epoch": 12.03, "learning_rate": 8.450387732925411e-06, "loss": 0.6368, "step": 25680 }, { "epoch": 12.03, "learning_rate": 8.445193722666814e-06, "loss": 0.8224, "step": 25690 }, { "epoch": 12.03, "learning_rate": 8.439999251732282e-06, "loss": 0.4071, "step": 25700 }, { "epoch": 12.03, "learning_rate": 8.434804322653534e-06, "loss": 0.8619, "step": 25710 }, { "epoch": 12.03, "learning_rate": 8.429608937962513e-06, "loss": 0.3016, "step": 25720 }, { "epoch": 12.03, "learning_rate": 8.424413100191391e-06, "loss": 1.1291, "step": 25730 }, { "epoch": 12.03, "learning_rate": 8.419216811872551e-06, "loss": 0.5899, "step": 25740 }, { "epoch": 12.04, "learning_rate": 8.414020075538606e-06, "loss": 0.3795, "step": 25750 }, { "epoch": 12.04, "learning_rate": 8.408822893722379e-06, "loss": 0.5415, "step": 25760 }, { "epoch": 12.04, "learning_rate": 8.403625268956915e-06, "loss": 0.3862, "step": 25770 }, { "epoch": 12.04, "learning_rate": 8.398427203775473e-06, "loss": 0.7194, "step": 25780 }, { "epoch": 12.04, "learning_rate": 8.393228700711524e-06, "loss": 0.7457, "step": 25790 }, { "epoch": 12.04, "learning_rate": 8.388029762298759e-06, "loss": 0.6828, "step": 25800 }, { "epoch": 12.04, "learning_rate": 8.382830391071072e-06, "loss": 0.4018, "step": 25810 }, { "epoch": 12.04, "learning_rate": 8.377630589562581e-06, "loss": 0.6345, "step": 25820 }, { "epoch": 12.04, "learning_rate": 8.3724303603076e-06, "loss": 0.6341, "step": 25830 }, { "epoch": 12.04, "learning_rate": 8.367229705840664e-06, "loss": 0.4764, "step": 25840 }, { "epoch": 12.04, "learning_rate": 8.3620286286965e-06, "loss": 0.8783, "step": 25850 }, { "epoch": 12.04, "learning_rate": 8.356827131410057e-06, "loss": 0.5031, "step": 25860 }, { "epoch": 12.04, "learning_rate": 8.351625216516476e-06, "loss": 0.8044, "step": 25870 }, { "epoch": 12.04, "learning_rate": 8.346422886551112e-06, "loss": 0.3773, "step": 25880 }, { "epoch": 12.04, "learning_rate": 8.341220144049517e-06, "loss": 0.4815, "step": 25890 }, { "epoch": 12.04, "learning_rate": 8.33601699154744e-06, "loss": 0.6278, "step": 25900 }, { "epoch": 12.04, "learning_rate": 8.33081343158084e-06, "loss": 0.5951, "step": 25910 }, { "epoch": 12.04, "learning_rate": 8.325609466685867e-06, "loss": 0.7307, "step": 25920 }, { "epoch": 12.04, "learning_rate": 8.320405099398867e-06, "loss": 0.4442, "step": 25930 }, { "epoch": 12.04, "learning_rate": 8.31520033225639e-06, "loss": 0.5929, "step": 25940 }, { "epoch": 12.04, "learning_rate": 8.309995167795172e-06, "loss": 0.7052, "step": 25950 }, { "epoch": 12.04, "learning_rate": 8.30478960855215e-06, "loss": 0.5476, "step": 25960 }, { "epoch": 12.04, "learning_rate": 8.29958365706445e-06, "loss": 0.4296, "step": 25970 }, { "epoch": 12.04, "learning_rate": 8.29437731586939e-06, "loss": 0.5321, "step": 25980 }, { "epoch": 12.04, "learning_rate": 8.289170587504476e-06, "loss": 0.7862, "step": 25990 }, { "epoch": 12.04, "learning_rate": 8.283963474507402e-06, "loss": 0.4789, "step": 26000 }, { "epoch": 12.04, "eval_accuracy": 0.848421052631579, "eval_f1": 0.848421052631579, "eval_loss": 0.7902358770370483, "eval_runtime": 759.5604, "eval_samples_per_second": 6.254, "eval_steps_per_second": 1.564, "step": 26000 }, { "epoch": 13.0, "learning_rate": 8.278755979416055e-06, "loss": 0.8803, "step": 26010 }, { "epoch": 13.0, "learning_rate": 8.273548104768505e-06, "loss": 0.2926, "step": 26020 }, { "epoch": 13.0, "learning_rate": 8.268339853103007e-06, "loss": 0.7337, "step": 26030 }, { "epoch": 13.0, "learning_rate": 8.263131226957998e-06, "loss": 0.714, "step": 26040 }, { "epoch": 13.0, "learning_rate": 8.257922228872097e-06, "loss": 0.8997, "step": 26050 }, { "epoch": 13.0, "learning_rate": 8.25271286138411e-06, "loss": 0.8157, "step": 26060 }, { "epoch": 13.0, "learning_rate": 8.247503127033023e-06, "loss": 0.6588, "step": 26070 }, { "epoch": 13.0, "learning_rate": 8.242293028357989e-06, "loss": 0.7557, "step": 26080 }, { "epoch": 13.0, "learning_rate": 8.237082567898349e-06, "loss": 0.5747, "step": 26090 }, { "epoch": 13.0, "learning_rate": 8.231871748193622e-06, "loss": 0.4681, "step": 26100 }, { "epoch": 13.0, "learning_rate": 8.226660571783495e-06, "loss": 0.662, "step": 26110 }, { "epoch": 13.0, "learning_rate": 8.221449041207832e-06, "loss": 0.5678, "step": 26120 }, { "epoch": 13.0, "learning_rate": 8.216237159006672e-06, "loss": 0.4976, "step": 26130 }, { "epoch": 13.0, "learning_rate": 8.21102492772022e-06, "loss": 0.754, "step": 26140 }, { "epoch": 13.0, "learning_rate": 8.20581234988886e-06, "loss": 0.7292, "step": 26150 }, { "epoch": 13.0, "learning_rate": 8.200599428053131e-06, "loss": 0.7095, "step": 26160 }, { "epoch": 13.0, "learning_rate": 8.19538616475375e-06, "loss": 0.5002, "step": 26170 }, { "epoch": 13.0, "learning_rate": 8.190172562531606e-06, "loss": 0.4976, "step": 26180 }, { "epoch": 13.0, "learning_rate": 8.184958623927732e-06, "loss": 0.833, "step": 26190 }, { "epoch": 13.0, "learning_rate": 8.179744351483353e-06, "loss": 0.3571, "step": 26200 }, { "epoch": 13.0, "learning_rate": 8.174529747739829e-06, "loss": 0.5573, "step": 26210 }, { "epoch": 13.0, "learning_rate": 8.169314815238705e-06, "loss": 0.4676, "step": 26220 }, { "epoch": 13.0, "learning_rate": 8.164099556521667e-06, "loss": 0.3272, "step": 26230 }, { "epoch": 13.0, "learning_rate": 8.158883974130576e-06, "loss": 0.4443, "step": 26240 }, { "epoch": 13.01, "learning_rate": 8.153668070607439e-06, "loss": 0.5747, "step": 26250 }, { "epoch": 13.01, "learning_rate": 8.148451848494422e-06, "loss": 0.7184, "step": 26260 }, { "epoch": 13.01, "learning_rate": 8.143235310333854e-06, "loss": 0.5533, "step": 26270 }, { "epoch": 13.01, "learning_rate": 8.13801845866821e-06, "loss": 0.4728, "step": 26280 }, { "epoch": 13.01, "learning_rate": 8.132801296040121e-06, "loss": 0.3192, "step": 26290 }, { "epoch": 13.01, "learning_rate": 8.127583824992369e-06, "loss": 0.7083, "step": 26300 }, { "epoch": 13.01, "learning_rate": 8.12236604806788e-06, "loss": 0.8348, "step": 26310 }, { "epoch": 13.01, "learning_rate": 8.117147967809741e-06, "loss": 0.9413, "step": 26320 }, { "epoch": 13.01, "learning_rate": 8.111929586761183e-06, "loss": 0.7851, "step": 26330 }, { "epoch": 13.01, "learning_rate": 8.106710907465576e-06, "loss": 0.7636, "step": 26340 }, { "epoch": 13.01, "learning_rate": 8.101491932466441e-06, "loss": 0.5119, "step": 26350 }, { "epoch": 13.01, "learning_rate": 8.096272664307448e-06, "loss": 0.7746, "step": 26360 }, { "epoch": 13.01, "learning_rate": 8.091053105532402e-06, "loss": 0.3223, "step": 26370 }, { "epoch": 13.01, "learning_rate": 8.085833258685251e-06, "loss": 0.8672, "step": 26380 }, { "epoch": 13.01, "learning_rate": 8.08061312631009e-06, "loss": 0.5817, "step": 26390 }, { "epoch": 13.01, "learning_rate": 8.07539271095114e-06, "loss": 0.6081, "step": 26400 }, { "epoch": 13.01, "learning_rate": 8.070172015152773e-06, "loss": 0.5154, "step": 26410 }, { "epoch": 13.01, "learning_rate": 8.064951041459496e-06, "loss": 0.664, "step": 26420 }, { "epoch": 13.01, "learning_rate": 8.059729792415942e-06, "loss": 0.8406, "step": 26430 }, { "epoch": 13.01, "learning_rate": 8.054508270566888e-06, "loss": 0.5142, "step": 26440 }, { "epoch": 13.01, "learning_rate": 8.049286478457237e-06, "loss": 0.7025, "step": 26450 }, { "epoch": 13.01, "learning_rate": 8.044064418632032e-06, "loss": 0.4909, "step": 26460 }, { "epoch": 13.01, "learning_rate": 8.038842093636438e-06, "loss": 0.8021, "step": 26470 }, { "epoch": 13.01, "learning_rate": 8.033619506015754e-06, "loss": 0.8264, "step": 26480 }, { "epoch": 13.01, "learning_rate": 8.028396658315402e-06, "loss": 0.7216, "step": 26490 }, { "epoch": 13.01, "learning_rate": 8.02317355308094e-06, "loss": 0.8995, "step": 26500 }, { "epoch": 13.01, "learning_rate": 8.017950192858045e-06, "loss": 0.6565, "step": 26510 }, { "epoch": 13.01, "learning_rate": 8.012726580192515e-06, "loss": 0.8193, "step": 26520 }, { "epoch": 13.01, "learning_rate": 8.007502717630282e-06, "loss": 0.5443, "step": 26530 }, { "epoch": 13.01, "learning_rate": 8.002278607717387e-06, "loss": 0.6395, "step": 26540 }, { "epoch": 13.01, "learning_rate": 7.997054253000003e-06, "loss": 0.56, "step": 26550 }, { "epoch": 13.01, "learning_rate": 7.991829656024412e-06, "loss": 0.5525, "step": 26560 }, { "epoch": 13.01, "learning_rate": 7.986604819337021e-06, "loss": 0.4242, "step": 26570 }, { "epoch": 13.01, "learning_rate": 7.981379745484353e-06, "loss": 0.533, "step": 26580 }, { "epoch": 13.01, "learning_rate": 7.976154437013045e-06, "loss": 0.597, "step": 26590 }, { "epoch": 13.01, "learning_rate": 7.970928896469851e-06, "loss": 0.7054, "step": 26600 }, { "epoch": 13.01, "learning_rate": 7.965703126401631e-06, "loss": 0.4532, "step": 26610 }, { "epoch": 13.01, "learning_rate": 7.960477129355367e-06, "loss": 0.7798, "step": 26620 }, { "epoch": 13.01, "learning_rate": 7.955250907878143e-06, "loss": 0.9396, "step": 26630 }, { "epoch": 13.01, "learning_rate": 7.950024464517157e-06, "loss": 0.6024, "step": 26640 }, { "epoch": 13.01, "learning_rate": 7.944797801819713e-06, "loss": 0.4973, "step": 26650 }, { "epoch": 13.01, "learning_rate": 7.939570922333223e-06, "loss": 0.7844, "step": 26660 }, { "epoch": 13.01, "learning_rate": 7.934343828605207e-06, "loss": 0.852, "step": 26670 }, { "epoch": 13.01, "learning_rate": 7.929116523183287e-06, "loss": 0.9642, "step": 26680 }, { "epoch": 13.01, "learning_rate": 7.923889008615186e-06, "loss": 0.7534, "step": 26690 }, { "epoch": 13.01, "learning_rate": 7.91866128744873e-06, "loss": 0.6524, "step": 26700 }, { "epoch": 13.01, "learning_rate": 7.913433362231847e-06, "loss": 0.6843, "step": 26710 }, { "epoch": 13.01, "learning_rate": 7.908205235512568e-06, "loss": 0.8911, "step": 26720 }, { "epoch": 13.01, "learning_rate": 7.902976909839015e-06, "loss": 0.711, "step": 26730 }, { "epoch": 13.01, "learning_rate": 7.897748387759413e-06, "loss": 0.6965, "step": 26740 }, { "epoch": 13.02, "learning_rate": 7.89251967182208e-06, "loss": 0.906, "step": 26750 }, { "epoch": 13.02, "learning_rate": 7.887290764575424e-06, "loss": 0.7533, "step": 26760 }, { "epoch": 13.02, "learning_rate": 7.882061668567957e-06, "loss": 0.4359, "step": 26770 }, { "epoch": 13.02, "learning_rate": 7.876832386348273e-06, "loss": 0.3871, "step": 26780 }, { "epoch": 13.02, "learning_rate": 7.87160292046506e-06, "loss": 0.4603, "step": 26790 }, { "epoch": 13.02, "learning_rate": 7.8663732734671e-06, "loss": 0.5004, "step": 26800 }, { "epoch": 13.02, "learning_rate": 7.861143447903256e-06, "loss": 0.7104, "step": 26810 }, { "epoch": 13.02, "learning_rate": 7.855913446322486e-06, "loss": 0.4944, "step": 26820 }, { "epoch": 13.02, "learning_rate": 7.850683271273822e-06, "loss": 0.61, "step": 26830 }, { "epoch": 13.02, "learning_rate": 7.845452925306393e-06, "loss": 0.607, "step": 26840 }, { "epoch": 13.02, "learning_rate": 7.840222410969402e-06, "loss": 0.7068, "step": 26850 }, { "epoch": 13.02, "learning_rate": 7.834991730812147e-06, "loss": 0.3703, "step": 26860 }, { "epoch": 13.02, "learning_rate": 7.829760887383987e-06, "loss": 0.7156, "step": 26870 }, { "epoch": 13.02, "learning_rate": 7.824529883234379e-06, "loss": 0.6012, "step": 26880 }, { "epoch": 13.02, "learning_rate": 7.819298720912848e-06, "loss": 0.8801, "step": 26890 }, { "epoch": 13.02, "learning_rate": 7.814067402968998e-06, "loss": 0.6596, "step": 26900 }, { "epoch": 13.02, "learning_rate": 7.808835931952513e-06, "loss": 0.6712, "step": 26910 }, { "epoch": 13.02, "learning_rate": 7.803604310413144e-06, "loss": 0.3717, "step": 26920 }, { "epoch": 13.02, "learning_rate": 7.798372540900723e-06, "loss": 0.7806, "step": 26930 }, { "epoch": 13.02, "learning_rate": 7.793140625965152e-06, "loss": 0.4922, "step": 26940 }, { "epoch": 13.02, "learning_rate": 7.7879085681564e-06, "loss": 0.2948, "step": 26950 }, { "epoch": 13.02, "learning_rate": 7.78267637002451e-06, "loss": 0.8103, "step": 26960 }, { "epoch": 13.02, "learning_rate": 7.77744403411959e-06, "loss": 0.2707, "step": 26970 }, { "epoch": 13.02, "learning_rate": 7.772211562991819e-06, "loss": 0.6444, "step": 26980 }, { "epoch": 13.02, "learning_rate": 7.766978959191438e-06, "loss": 1.2529, "step": 26990 }, { "epoch": 13.02, "learning_rate": 7.76174622526876e-06, "loss": 0.6869, "step": 27000 }, { "epoch": 13.02, "learning_rate": 7.756513363774147e-06, "loss": 0.5928, "step": 27010 }, { "epoch": 13.02, "learning_rate": 7.751280377258036e-06, "loss": 0.2015, "step": 27020 }, { "epoch": 13.02, "learning_rate": 7.746047268270923e-06, "loss": 0.5429, "step": 27030 }, { "epoch": 13.02, "learning_rate": 7.740814039363363e-06, "loss": 0.7598, "step": 27040 }, { "epoch": 13.02, "learning_rate": 7.735580693085962e-06, "loss": 0.6728, "step": 27050 }, { "epoch": 13.02, "learning_rate": 7.730347231989397e-06, "loss": 0.455, "step": 27060 }, { "epoch": 13.02, "learning_rate": 7.725113658624384e-06, "loss": 0.7046, "step": 27070 }, { "epoch": 13.02, "learning_rate": 7.719879975541714e-06, "loss": 0.4476, "step": 27080 }, { "epoch": 13.02, "learning_rate": 7.714646185292211e-06, "loss": 0.3398, "step": 27090 }, { "epoch": 13.02, "learning_rate": 7.709412290426768e-06, "loss": 0.4458, "step": 27100 }, { "epoch": 13.02, "learning_rate": 7.704178293496315e-06, "loss": 1.237, "step": 27110 }, { "epoch": 13.02, "learning_rate": 7.698944197051845e-06, "loss": 0.495, "step": 27120 }, { "epoch": 13.02, "learning_rate": 7.693710003644391e-06, "loss": 0.918, "step": 27130 }, { "epoch": 13.02, "learning_rate": 7.688475715825032e-06, "loss": 1.0978, "step": 27140 }, { "epoch": 13.02, "learning_rate": 7.6832413361449e-06, "loss": 0.5083, "step": 27150 }, { "epoch": 13.02, "learning_rate": 7.678006867155165e-06, "loss": 0.7703, "step": 27160 }, { "epoch": 13.02, "learning_rate": 7.672772311407047e-06, "loss": 0.4654, "step": 27170 }, { "epoch": 13.02, "learning_rate": 7.667537671451803e-06, "loss": 0.9089, "step": 27180 }, { "epoch": 13.02, "learning_rate": 7.662302949840734e-06, "loss": 0.4724, "step": 27190 }, { "epoch": 13.02, "learning_rate": 7.657068149125177e-06, "loss": 0.752, "step": 27200 }, { "epoch": 13.02, "learning_rate": 7.651833271856514e-06, "loss": 0.6876, "step": 27210 }, { "epoch": 13.02, "learning_rate": 7.64659832058616e-06, "loss": 0.8171, "step": 27220 }, { "epoch": 13.02, "learning_rate": 7.641363297865563e-06, "loss": 0.5231, "step": 27230 }, { "epoch": 13.02, "learning_rate": 7.636128206246212e-06, "loss": 0.6129, "step": 27240 }, { "epoch": 13.03, "learning_rate": 7.630893048279627e-06, "loss": 0.498, "step": 27250 }, { "epoch": 13.03, "learning_rate": 7.625657826517362e-06, "loss": 0.5614, "step": 27260 }, { "epoch": 13.03, "learning_rate": 7.620422543510997e-06, "loss": 0.4612, "step": 27270 }, { "epoch": 13.03, "learning_rate": 7.615187201812148e-06, "loss": 0.3073, "step": 27280 }, { "epoch": 13.03, "learning_rate": 7.609951803972455e-06, "loss": 0.3593, "step": 27290 }, { "epoch": 13.03, "learning_rate": 7.604716352543591e-06, "loss": 0.4811, "step": 27300 }, { "epoch": 13.03, "learning_rate": 7.5994808500772465e-06, "loss": 0.5341, "step": 27310 }, { "epoch": 13.03, "learning_rate": 7.594245299125145e-06, "loss": 0.5838, "step": 27320 }, { "epoch": 13.03, "learning_rate": 7.589009702239029e-06, "loss": 0.6061, "step": 27330 }, { "epoch": 13.03, "learning_rate": 7.583774061970667e-06, "loss": 1.0167, "step": 27340 }, { "epoch": 13.03, "learning_rate": 7.578538380871844e-06, "loss": 0.3962, "step": 27350 }, { "epoch": 13.03, "learning_rate": 7.573302661494369e-06, "loss": 0.7158, "step": 27360 }, { "epoch": 13.03, "learning_rate": 7.568066906390064e-06, "loss": 0.6698, "step": 27370 }, { "epoch": 13.03, "learning_rate": 7.56283111811078e-06, "loss": 0.4631, "step": 27380 }, { "epoch": 13.03, "learning_rate": 7.557595299208372e-06, "loss": 0.6564, "step": 27390 }, { "epoch": 13.03, "learning_rate": 7.552359452234712e-06, "loss": 1.0689, "step": 27400 }, { "epoch": 13.03, "learning_rate": 7.547123579741694e-06, "loss": 0.4199, "step": 27410 }, { "epoch": 13.03, "learning_rate": 7.541887684281212e-06, "loss": 0.1127, "step": 27420 }, { "epoch": 13.03, "learning_rate": 7.536651768405185e-06, "loss": 0.6529, "step": 27430 }, { "epoch": 13.03, "learning_rate": 7.53141583466553e-06, "loss": 0.8881, "step": 27440 }, { "epoch": 13.03, "learning_rate": 7.526179885614178e-06, "loss": 0.6771, "step": 27450 }, { "epoch": 13.03, "learning_rate": 7.52094392380307e-06, "loss": 0.3566, "step": 27460 }, { "epoch": 13.03, "learning_rate": 7.515707951784145e-06, "loss": 0.4737, "step": 27470 }, { "epoch": 13.03, "learning_rate": 7.510471972109359e-06, "loss": 0.5579, "step": 27480 }, { "epoch": 13.03, "learning_rate": 7.505235987330656e-06, "loss": 0.7683, "step": 27490 }, { "epoch": 13.03, "learning_rate": 7.5e-06, "loss": 0.8997, "step": 27500 }, { "epoch": 13.03, "learning_rate": 7.494764012669344e-06, "loss": 0.8071, "step": 27510 }, { "epoch": 13.03, "learning_rate": 7.489528027890643e-06, "loss": 0.5877, "step": 27520 }, { "epoch": 13.03, "learning_rate": 7.484292048215854e-06, "loss": 0.8307, "step": 27530 }, { "epoch": 13.03, "learning_rate": 7.479056076196931e-06, "loss": 0.8793, "step": 27540 }, { "epoch": 13.03, "learning_rate": 7.473820114385825e-06, "loss": 0.5178, "step": 27550 }, { "epoch": 13.03, "learning_rate": 7.468584165334472e-06, "loss": 0.5048, "step": 27560 }, { "epoch": 13.03, "learning_rate": 7.463348231594817e-06, "loss": 0.6739, "step": 27570 }, { "epoch": 13.03, "learning_rate": 7.458112315718789e-06, "loss": 0.5874, "step": 27580 }, { "epoch": 13.03, "learning_rate": 7.452876420258308e-06, "loss": 0.5699, "step": 27590 }, { "epoch": 13.03, "learning_rate": 7.447640547765289e-06, "loss": 0.884, "step": 27600 }, { "epoch": 13.03, "learning_rate": 7.442404700791629e-06, "loss": 0.4479, "step": 27610 }, { "epoch": 13.03, "learning_rate": 7.43716888188922e-06, "loss": 0.4794, "step": 27620 }, { "epoch": 13.03, "learning_rate": 7.4319330936099345e-06, "loss": 0.5059, "step": 27630 }, { "epoch": 13.03, "learning_rate": 7.426697338505634e-06, "loss": 0.5991, "step": 27640 }, { "epoch": 13.03, "learning_rate": 7.421461619128159e-06, "loss": 0.9087, "step": 27650 }, { "epoch": 13.03, "learning_rate": 7.4162259380293345e-06, "loss": 0.8848, "step": 27660 }, { "epoch": 13.03, "learning_rate": 7.4109902977609716e-06, "loss": 0.7439, "step": 27670 }, { "epoch": 13.03, "learning_rate": 7.405754700874855e-06, "loss": 1.0823, "step": 27680 }, { "epoch": 13.03, "learning_rate": 7.400519149922755e-06, "loss": 0.3812, "step": 27690 }, { "epoch": 13.03, "learning_rate": 7.395283647456411e-06, "loss": 0.474, "step": 27700 }, { "epoch": 13.03, "learning_rate": 7.390048196027545e-06, "loss": 0.3511, "step": 27710 }, { "epoch": 13.03, "learning_rate": 7.3848127981878525e-06, "loss": 0.4437, "step": 27720 }, { "epoch": 13.03, "learning_rate": 7.379577456489006e-06, "loss": 0.7786, "step": 27730 }, { "epoch": 13.03, "learning_rate": 7.374342173482641e-06, "loss": 0.7242, "step": 27740 }, { "epoch": 13.04, "learning_rate": 7.369106951720374e-06, "loss": 0.4872, "step": 27750 }, { "epoch": 13.04, "learning_rate": 7.36387179375379e-06, "loss": 0.7691, "step": 27760 }, { "epoch": 13.04, "learning_rate": 7.35863670213444e-06, "loss": 0.7271, "step": 27770 }, { "epoch": 13.04, "learning_rate": 7.353401679413842e-06, "loss": 0.2824, "step": 27780 }, { "epoch": 13.04, "learning_rate": 7.348166728143486e-06, "loss": 0.323, "step": 27790 }, { "epoch": 13.04, "learning_rate": 7.342931850874824e-06, "loss": 0.8196, "step": 27800 }, { "epoch": 13.04, "learning_rate": 7.337697050159266e-06, "loss": 0.7817, "step": 27810 }, { "epoch": 13.04, "learning_rate": 7.332462328548198e-06, "loss": 0.6831, "step": 27820 }, { "epoch": 13.04, "learning_rate": 7.327227688592955e-06, "loss": 0.4716, "step": 27830 }, { "epoch": 13.04, "learning_rate": 7.321993132844837e-06, "loss": 0.4233, "step": 27840 }, { "epoch": 13.04, "learning_rate": 7.316758663855102e-06, "loss": 0.5285, "step": 27850 }, { "epoch": 13.04, "learning_rate": 7.31152428417497e-06, "loss": 0.7737, "step": 27860 }, { "epoch": 13.04, "learning_rate": 7.30628999635561e-06, "loss": 0.4997, "step": 27870 }, { "epoch": 13.04, "learning_rate": 7.301055802948154e-06, "loss": 0.4997, "step": 27880 }, { "epoch": 13.04, "learning_rate": 7.295821706503684e-06, "loss": 0.7782, "step": 27890 }, { "epoch": 13.04, "learning_rate": 7.290587709573235e-06, "loss": 0.8473, "step": 27900 }, { "epoch": 13.04, "learning_rate": 7.285353814707791e-06, "loss": 0.7146, "step": 27910 }, { "epoch": 13.04, "learning_rate": 7.280120024458289e-06, "loss": 0.7538, "step": 27920 }, { "epoch": 13.04, "learning_rate": 7.274886341375616e-06, "loss": 0.5318, "step": 27930 }, { "epoch": 13.04, "learning_rate": 7.269652768010605e-06, "loss": 0.5479, "step": 27940 }, { "epoch": 13.04, "learning_rate": 7.264419306914038e-06, "loss": 0.5562, "step": 27950 }, { "epoch": 13.04, "learning_rate": 7.259185960636639e-06, "loss": 0.6011, "step": 27960 }, { "epoch": 13.04, "learning_rate": 7.253952731729076e-06, "loss": 0.67, "step": 27970 }, { "epoch": 13.04, "learning_rate": 7.2487196227419636e-06, "loss": 0.8015, "step": 27980 }, { "epoch": 13.04, "learning_rate": 7.243486636225856e-06, "loss": 0.528, "step": 27990 }, { "epoch": 13.04, "learning_rate": 7.238253774731245e-06, "loss": 0.7398, "step": 28000 }, { "epoch": 13.04, "eval_accuracy": 0.8568421052631578, "eval_f1": 0.8568421052631578, "eval_loss": 0.7603176832199097, "eval_runtime": 752.2156, "eval_samples_per_second": 6.315, "eval_steps_per_second": 1.579, "step": 28000 }, { "epoch": 14.0, "learning_rate": 7.233021040808562e-06, "loss": 0.6519, "step": 28010 }, { "epoch": 14.0, "learning_rate": 7.227788437008182e-06, "loss": 0.4855, "step": 28020 }, { "epoch": 14.0, "learning_rate": 7.2225559658804115e-06, "loss": 0.547, "step": 28030 }, { "epoch": 14.0, "learning_rate": 7.21732362997549e-06, "loss": 0.4211, "step": 28040 }, { "epoch": 14.0, "learning_rate": 7.212091431843601e-06, "loss": 0.6844, "step": 28050 }, { "epoch": 14.0, "learning_rate": 7.206859374034849e-06, "loss": 0.6981, "step": 28060 }, { "epoch": 14.0, "learning_rate": 7.201627459099275e-06, "loss": 0.3789, "step": 28070 }, { "epoch": 14.0, "learning_rate": 7.196395689586858e-06, "loss": 0.418, "step": 28080 }, { "epoch": 14.0, "learning_rate": 7.19116406804749e-06, "loss": 0.8616, "step": 28090 }, { "epoch": 14.0, "learning_rate": 7.1859325970310044e-06, "loss": 0.4312, "step": 28100 }, { "epoch": 14.0, "learning_rate": 7.1807012790871536e-06, "loss": 0.3848, "step": 28110 }, { "epoch": 14.0, "learning_rate": 7.175470116765623e-06, "loss": 0.4381, "step": 28120 }, { "epoch": 14.0, "learning_rate": 7.170239112616015e-06, "loss": 0.7408, "step": 28130 }, { "epoch": 14.0, "learning_rate": 7.165008269187855e-06, "loss": 0.5329, "step": 28140 }, { "epoch": 14.0, "learning_rate": 7.159777589030597e-06, "loss": 0.3478, "step": 28150 }, { "epoch": 14.0, "learning_rate": 7.1545470746936075e-06, "loss": 0.6114, "step": 28160 }, { "epoch": 14.0, "learning_rate": 7.149316728726182e-06, "loss": 0.4138, "step": 28170 }, { "epoch": 14.0, "learning_rate": 7.144086553677518e-06, "loss": 0.6431, "step": 28180 }, { "epoch": 14.0, "learning_rate": 7.138856552096746e-06, "loss": 0.7836, "step": 28190 }, { "epoch": 14.0, "learning_rate": 7.1336267265329e-06, "loss": 0.71, "step": 28200 }, { "epoch": 14.0, "learning_rate": 7.128397079534941e-06, "loss": 0.7157, "step": 28210 }, { "epoch": 14.0, "learning_rate": 7.123167613651729e-06, "loss": 0.2906, "step": 28220 }, { "epoch": 14.0, "learning_rate": 7.117938331432043e-06, "loss": 0.548, "step": 28230 }, { "epoch": 14.0, "learning_rate": 7.112709235424576e-06, "loss": 0.3176, "step": 28240 }, { "epoch": 14.01, "learning_rate": 7.107480328177922e-06, "loss": 0.5765, "step": 28250 }, { "epoch": 14.01, "learning_rate": 7.102251612240589e-06, "loss": 0.9896, "step": 28260 }, { "epoch": 14.01, "learning_rate": 7.097023090160984e-06, "loss": 0.5716, "step": 28270 }, { "epoch": 14.01, "learning_rate": 7.091794764487433e-06, "loss": 0.7398, "step": 28280 }, { "epoch": 14.01, "learning_rate": 7.086566637768154e-06, "loss": 0.6816, "step": 28290 }, { "epoch": 14.01, "learning_rate": 7.081338712551271e-06, "loss": 0.5037, "step": 28300 }, { "epoch": 14.01, "learning_rate": 7.076110991384817e-06, "loss": 0.8894, "step": 28310 }, { "epoch": 14.01, "learning_rate": 7.070883476816714e-06, "loss": 0.6717, "step": 28320 }, { "epoch": 14.01, "learning_rate": 7.065656171394791e-06, "loss": 0.4743, "step": 28330 }, { "epoch": 14.01, "learning_rate": 7.0604290776667756e-06, "loss": 0.5505, "step": 28340 }, { "epoch": 14.01, "learning_rate": 7.055202198180289e-06, "loss": 0.6482, "step": 28350 }, { "epoch": 14.01, "learning_rate": 7.049975535482847e-06, "loss": 0.5837, "step": 28360 }, { "epoch": 14.01, "learning_rate": 7.044749092121859e-06, "loss": 0.8117, "step": 28370 }, { "epoch": 14.01, "learning_rate": 7.039522870644635e-06, "loss": 0.4058, "step": 28380 }, { "epoch": 14.01, "learning_rate": 7.03429687359837e-06, "loss": 0.4443, "step": 28390 }, { "epoch": 14.01, "learning_rate": 7.02907110353015e-06, "loss": 0.5815, "step": 28400 }, { "epoch": 14.01, "learning_rate": 7.023845562986955e-06, "loss": 0.3337, "step": 28410 }, { "epoch": 14.01, "learning_rate": 7.018620254515645e-06, "loss": 0.5415, "step": 28420 }, { "epoch": 14.01, "learning_rate": 7.013395180662979e-06, "loss": 0.8324, "step": 28430 }, { "epoch": 14.01, "learning_rate": 7.00817034397559e-06, "loss": 0.4286, "step": 28440 }, { "epoch": 14.01, "learning_rate": 7.002945747000001e-06, "loss": 0.5507, "step": 28450 }, { "epoch": 14.01, "learning_rate": 6.997721392282614e-06, "loss": 0.6761, "step": 28460 }, { "epoch": 14.01, "learning_rate": 6.99249728236972e-06, "loss": 0.8651, "step": 28470 }, { "epoch": 14.01, "learning_rate": 6.987273419807486e-06, "loss": 0.9459, "step": 28480 }, { "epoch": 14.01, "learning_rate": 6.982049807141956e-06, "loss": 0.8787, "step": 28490 }, { "epoch": 14.01, "learning_rate": 6.976826446919061e-06, "loss": 0.759, "step": 28500 }, { "epoch": 14.01, "learning_rate": 6.971603341684598e-06, "loss": 0.3774, "step": 28510 }, { "epoch": 14.01, "learning_rate": 6.966380493984251e-06, "loss": 0.4496, "step": 28520 }, { "epoch": 14.01, "learning_rate": 6.961157906363564e-06, "loss": 0.5926, "step": 28530 }, { "epoch": 14.01, "learning_rate": 6.95593558136797e-06, "loss": 0.231, "step": 28540 }, { "epoch": 14.01, "learning_rate": 6.950713521542764e-06, "loss": 0.4263, "step": 28550 }, { "epoch": 14.01, "learning_rate": 6.945491729433113e-06, "loss": 0.6419, "step": 28560 }, { "epoch": 14.01, "learning_rate": 6.940270207584059e-06, "loss": 0.5247, "step": 28570 }, { "epoch": 14.01, "learning_rate": 6.935048958540506e-06, "loss": 0.5439, "step": 28580 }, { "epoch": 14.01, "learning_rate": 6.929827984847225e-06, "loss": 0.4805, "step": 28590 }, { "epoch": 14.01, "learning_rate": 6.9246072890488605e-06, "loss": 0.3178, "step": 28600 }, { "epoch": 14.01, "learning_rate": 6.919386873689914e-06, "loss": 0.6941, "step": 28610 }, { "epoch": 14.01, "learning_rate": 6.91416674131475e-06, "loss": 0.5058, "step": 28620 }, { "epoch": 14.01, "learning_rate": 6.9089468944675996e-06, "loss": 0.4293, "step": 28630 }, { "epoch": 14.01, "learning_rate": 6.903727335692553e-06, "loss": 0.3477, "step": 28640 }, { "epoch": 14.01, "learning_rate": 6.8985080675335594e-06, "loss": 0.7547, "step": 28650 }, { "epoch": 14.01, "learning_rate": 6.893289092534425e-06, "loss": 0.43, "step": 28660 }, { "epoch": 14.01, "learning_rate": 6.888070413238819e-06, "loss": 0.5564, "step": 28670 }, { "epoch": 14.01, "learning_rate": 6.882852032190257e-06, "loss": 0.9997, "step": 28680 }, { "epoch": 14.01, "learning_rate": 6.87763395193212e-06, "loss": 0.6674, "step": 28690 }, { "epoch": 14.01, "learning_rate": 6.8724161750076355e-06, "loss": 0.485, "step": 28700 }, { "epoch": 14.01, "learning_rate": 6.867198703959881e-06, "loss": 0.7944, "step": 28710 }, { "epoch": 14.01, "learning_rate": 6.86198154133179e-06, "loss": 0.4259, "step": 28720 }, { "epoch": 14.01, "learning_rate": 6.856764689666146e-06, "loss": 0.8059, "step": 28730 }, { "epoch": 14.01, "learning_rate": 6.85154815150558e-06, "loss": 0.5936, "step": 28740 }, { "epoch": 14.02, "learning_rate": 6.8463319293925634e-06, "loss": 0.301, "step": 28750 }, { "epoch": 14.02, "learning_rate": 6.841116025869426e-06, "loss": 0.5836, "step": 28760 }, { "epoch": 14.02, "learning_rate": 6.835900443478333e-06, "loss": 0.673, "step": 28770 }, { "epoch": 14.02, "learning_rate": 6.830685184761296e-06, "loss": 0.3068, "step": 28780 }, { "epoch": 14.02, "learning_rate": 6.8254702522601715e-06, "loss": 0.8024, "step": 28790 }, { "epoch": 14.02, "learning_rate": 6.82025564851665e-06, "loss": 0.5062, "step": 28800 }, { "epoch": 14.02, "learning_rate": 6.815041376072268e-06, "loss": 0.5104, "step": 28810 }, { "epoch": 14.02, "learning_rate": 6.809827437468397e-06, "loss": 0.5171, "step": 28820 }, { "epoch": 14.02, "learning_rate": 6.80461383524625e-06, "loss": 0.6172, "step": 28830 }, { "epoch": 14.02, "learning_rate": 6.799400571946872e-06, "loss": 0.8842, "step": 28840 }, { "epoch": 14.02, "learning_rate": 6.7941876501111426e-06, "loss": 0.7714, "step": 28850 }, { "epoch": 14.02, "learning_rate": 6.78897507227978e-06, "loss": 0.82, "step": 28860 }, { "epoch": 14.02, "learning_rate": 6.7837628409933274e-06, "loss": 0.9166, "step": 28870 }, { "epoch": 14.02, "learning_rate": 6.778550958792171e-06, "loss": 0.4862, "step": 28880 }, { "epoch": 14.02, "learning_rate": 6.773339428216507e-06, "loss": 0.5772, "step": 28890 }, { "epoch": 14.02, "learning_rate": 6.76812825180638e-06, "loss": 0.3489, "step": 28900 }, { "epoch": 14.02, "learning_rate": 6.762917432101653e-06, "loss": 0.7883, "step": 28910 }, { "epoch": 14.02, "learning_rate": 6.7577069716420125e-06, "loss": 0.6087, "step": 28920 }, { "epoch": 14.02, "learning_rate": 6.752496872966979e-06, "loss": 0.588, "step": 28930 }, { "epoch": 14.02, "learning_rate": 6.747287138615887e-06, "loss": 0.5674, "step": 28940 }, { "epoch": 14.02, "learning_rate": 6.742077771127902e-06, "loss": 0.3757, "step": 28950 }, { "epoch": 14.02, "learning_rate": 6.7368687730420035e-06, "loss": 0.4141, "step": 28960 }, { "epoch": 14.02, "learning_rate": 6.731660146896996e-06, "loss": 0.7038, "step": 28970 }, { "epoch": 14.02, "learning_rate": 6.726451895231497e-06, "loss": 0.6075, "step": 28980 }, { "epoch": 14.02, "learning_rate": 6.721244020583946e-06, "loss": 0.6138, "step": 28990 }, { "epoch": 14.02, "learning_rate": 6.7160365254926005e-06, "loss": 0.6848, "step": 29000 }, { "epoch": 14.02, "learning_rate": 6.710829412495527e-06, "loss": 0.5487, "step": 29010 }, { "epoch": 14.02, "learning_rate": 6.705622684130612e-06, "loss": 0.6123, "step": 29020 }, { "epoch": 14.02, "learning_rate": 6.700416342935551e-06, "loss": 0.572, "step": 29030 }, { "epoch": 14.02, "learning_rate": 6.69521039144785e-06, "loss": 1.101, "step": 29040 }, { "epoch": 14.02, "learning_rate": 6.690004832204828e-06, "loss": 0.5468, "step": 29050 }, { "epoch": 14.02, "learning_rate": 6.684799667743613e-06, "loss": 0.4592, "step": 29060 }, { "epoch": 14.02, "learning_rate": 6.679594900601136e-06, "loss": 0.4517, "step": 29070 }, { "epoch": 14.02, "learning_rate": 6.674390533314135e-06, "loss": 0.3175, "step": 29080 }, { "epoch": 14.02, "learning_rate": 6.66918656841916e-06, "loss": 0.5221, "step": 29090 }, { "epoch": 14.02, "learning_rate": 6.66398300845256e-06, "loss": 0.6589, "step": 29100 }, { "epoch": 14.02, "learning_rate": 6.658779855950483e-06, "loss": 0.7721, "step": 29110 }, { "epoch": 14.02, "learning_rate": 6.653577113448887e-06, "loss": 0.5447, "step": 29120 }, { "epoch": 14.02, "learning_rate": 6.648374783483521e-06, "loss": 0.6826, "step": 29130 }, { "epoch": 14.02, "learning_rate": 6.643172868589947e-06, "loss": 0.8476, "step": 29140 }, { "epoch": 14.02, "learning_rate": 6.6379713713035015e-06, "loss": 0.5372, "step": 29150 }, { "epoch": 14.02, "learning_rate": 6.63277029415934e-06, "loss": 0.426, "step": 29160 }, { "epoch": 14.02, "learning_rate": 6.627569639692401e-06, "loss": 0.4579, "step": 29170 }, { "epoch": 14.02, "learning_rate": 6.62236941043742e-06, "loss": 0.3956, "step": 29180 }, { "epoch": 14.02, "learning_rate": 6.617169608928927e-06, "loss": 0.663, "step": 29190 }, { "epoch": 14.02, "learning_rate": 6.611970237701242e-06, "loss": 0.5617, "step": 29200 }, { "epoch": 14.02, "learning_rate": 6.606771299288477e-06, "loss": 0.5615, "step": 29210 }, { "epoch": 14.02, "learning_rate": 6.6015727962245286e-06, "loss": 0.5542, "step": 29220 }, { "epoch": 14.02, "learning_rate": 6.596374731043087e-06, "loss": 0.4503, "step": 29230 }, { "epoch": 14.02, "learning_rate": 6.591177106277623e-06, "loss": 0.7225, "step": 29240 }, { "epoch": 14.03, "learning_rate": 6.5859799244613955e-06, "loss": 0.9153, "step": 29250 }, { "epoch": 14.03, "learning_rate": 6.580783188127449e-06, "loss": 0.6467, "step": 29260 }, { "epoch": 14.03, "learning_rate": 6.57558689980861e-06, "loss": 0.4762, "step": 29270 }, { "epoch": 14.03, "learning_rate": 6.570391062037487e-06, "loss": 0.5648, "step": 29280 }, { "epoch": 14.03, "learning_rate": 6.5651956773464675e-06, "loss": 0.6234, "step": 29290 }, { "epoch": 14.03, "learning_rate": 6.560000748267717e-06, "loss": 0.9244, "step": 29300 }, { "epoch": 14.03, "learning_rate": 6.554806277333185e-06, "loss": 0.3941, "step": 29310 }, { "epoch": 14.03, "learning_rate": 6.549612267074592e-06, "loss": 0.5927, "step": 29320 }, { "epoch": 14.03, "learning_rate": 6.544418720023433e-06, "loss": 0.5772, "step": 29330 }, { "epoch": 14.03, "learning_rate": 6.53922563871098e-06, "loss": 0.4929, "step": 29340 }, { "epoch": 14.03, "learning_rate": 6.5340330256682805e-06, "loss": 0.853, "step": 29350 }, { "epoch": 14.03, "learning_rate": 6.5288408834261485e-06, "loss": 0.516, "step": 29360 }, { "epoch": 14.03, "learning_rate": 6.523649214515172e-06, "loss": 0.5695, "step": 29370 }, { "epoch": 14.03, "learning_rate": 6.5184580214657085e-06, "loss": 0.3876, "step": 29380 }, { "epoch": 14.03, "learning_rate": 6.513267306807882e-06, "loss": 0.6751, "step": 29390 }, { "epoch": 14.03, "learning_rate": 6.508077073071581e-06, "loss": 0.6608, "step": 29400 }, { "epoch": 14.03, "learning_rate": 6.502887322786468e-06, "loss": 0.5213, "step": 29410 }, { "epoch": 14.03, "learning_rate": 6.497698058481959e-06, "loss": 0.9044, "step": 29420 }, { "epoch": 14.03, "learning_rate": 6.49250928268724e-06, "loss": 0.7779, "step": 29430 }, { "epoch": 14.03, "learning_rate": 6.487320997931256e-06, "loss": 0.8036, "step": 29440 }, { "epoch": 14.03, "learning_rate": 6.482133206742718e-06, "loss": 0.9571, "step": 29450 }, { "epoch": 14.03, "learning_rate": 6.476945911650088e-06, "loss": 0.5353, "step": 29460 }, { "epoch": 14.03, "learning_rate": 6.471759115181593e-06, "loss": 0.3019, "step": 29470 }, { "epoch": 14.03, "learning_rate": 6.466572819865215e-06, "loss": 0.4097, "step": 29480 }, { "epoch": 14.03, "learning_rate": 6.461387028228688e-06, "loss": 0.7054, "step": 29490 }, { "epoch": 14.03, "learning_rate": 6.456201742799511e-06, "loss": 0.9272, "step": 29500 }, { "epoch": 14.03, "learning_rate": 6.451016966104921e-06, "loss": 0.7606, "step": 29510 }, { "epoch": 14.03, "learning_rate": 6.445832700671919e-06, "loss": 0.4147, "step": 29520 }, { "epoch": 14.03, "learning_rate": 6.440648949027251e-06, "loss": 0.4601, "step": 29530 }, { "epoch": 14.03, "learning_rate": 6.435465713697417e-06, "loss": 0.4947, "step": 29540 }, { "epoch": 14.03, "learning_rate": 6.430282997208663e-06, "loss": 0.4206, "step": 29550 }, { "epoch": 14.03, "learning_rate": 6.425100802086979e-06, "loss": 0.8854, "step": 29560 }, { "epoch": 14.03, "learning_rate": 6.419919130858109e-06, "loss": 0.4316, "step": 29570 }, { "epoch": 14.03, "learning_rate": 6.414737986047532e-06, "loss": 0.3306, "step": 29580 }, { "epoch": 14.03, "learning_rate": 6.409557370180479e-06, "loss": 1.0803, "step": 29590 }, { "epoch": 14.03, "learning_rate": 6.404377285781914e-06, "loss": 0.5344, "step": 29600 }, { "epoch": 14.03, "learning_rate": 6.399197735376551e-06, "loss": 0.5595, "step": 29610 }, { "epoch": 14.03, "learning_rate": 6.394018721488839e-06, "loss": 0.9271, "step": 29620 }, { "epoch": 14.03, "learning_rate": 6.388840246642965e-06, "loss": 0.328, "step": 29630 }, { "epoch": 14.03, "learning_rate": 6.383662313362858e-06, "loss": 0.6804, "step": 29640 }, { "epoch": 14.03, "learning_rate": 6.37848492417218e-06, "loss": 0.231, "step": 29650 }, { "epoch": 14.03, "learning_rate": 6.373308081594322e-06, "loss": 0.6374, "step": 29660 }, { "epoch": 14.03, "learning_rate": 6.368131788152419e-06, "loss": 0.6959, "step": 29670 }, { "epoch": 14.03, "learning_rate": 6.362956046369335e-06, "loss": 0.346, "step": 29680 }, { "epoch": 14.03, "learning_rate": 6.3577808587676585e-06, "loss": 0.5246, "step": 29690 }, { "epoch": 14.03, "learning_rate": 6.3526062278697154e-06, "loss": 0.8182, "step": 29700 }, { "epoch": 14.03, "learning_rate": 6.347432156197558e-06, "loss": 0.7254, "step": 29710 }, { "epoch": 14.03, "learning_rate": 6.342258646272966e-06, "loss": 0.6114, "step": 29720 }, { "epoch": 14.03, "learning_rate": 6.3370857006174464e-06, "loss": 0.526, "step": 29730 }, { "epoch": 14.03, "learning_rate": 6.3319133217522295e-06, "loss": 0.4828, "step": 29740 }, { "epoch": 14.04, "learning_rate": 6.326741512198267e-06, "loss": 0.5321, "step": 29750 }, { "epoch": 14.04, "learning_rate": 6.321570274476245e-06, "loss": 0.7288, "step": 29760 }, { "epoch": 14.04, "learning_rate": 6.316399611106549e-06, "loss": 0.5285, "step": 29770 }, { "epoch": 14.04, "learning_rate": 6.311229524609307e-06, "loss": 0.656, "step": 29780 }, { "epoch": 14.04, "learning_rate": 6.3060600175043494e-06, "loss": 0.8339, "step": 29790 }, { "epoch": 14.04, "learning_rate": 6.300891092311239e-06, "loss": 0.506, "step": 29800 }, { "epoch": 14.04, "learning_rate": 6.295722751549243e-06, "loss": 0.5651, "step": 29810 }, { "epoch": 14.04, "learning_rate": 6.290554997737346e-06, "loss": 0.4679, "step": 29820 }, { "epoch": 14.04, "learning_rate": 6.2853878333942526e-06, "loss": 0.7864, "step": 29830 }, { "epoch": 14.04, "learning_rate": 6.2802212610383735e-06, "loss": 0.5258, "step": 29840 }, { "epoch": 14.04, "learning_rate": 6.2750552831878354e-06, "loss": 0.7134, "step": 29850 }, { "epoch": 14.04, "learning_rate": 6.269889902360468e-06, "loss": 0.7398, "step": 29860 }, { "epoch": 14.04, "learning_rate": 6.26472512107382e-06, "loss": 0.439, "step": 29870 }, { "epoch": 14.04, "learning_rate": 6.259560941845143e-06, "loss": 0.5584, "step": 29880 }, { "epoch": 14.04, "learning_rate": 6.254397367191391e-06, "loss": 0.6233, "step": 29890 }, { "epoch": 14.04, "learning_rate": 6.249234399629234e-06, "loss": 0.702, "step": 29900 }, { "epoch": 14.04, "learning_rate": 6.244072041675034e-06, "loss": 0.5829, "step": 29910 }, { "epoch": 14.04, "learning_rate": 6.238910295844863e-06, "loss": 0.3531, "step": 29920 }, { "epoch": 14.04, "learning_rate": 6.233749164654496e-06, "loss": 0.6007, "step": 29930 }, { "epoch": 14.04, "learning_rate": 6.228588650619405e-06, "loss": 0.4124, "step": 29940 }, { "epoch": 14.04, "learning_rate": 6.223428756254758e-06, "loss": 0.5734, "step": 29950 }, { "epoch": 14.04, "learning_rate": 6.218269484075426e-06, "loss": 0.6953, "step": 29960 }, { "epoch": 14.04, "learning_rate": 6.213110836595978e-06, "loss": 0.716, "step": 29970 }, { "epoch": 14.04, "learning_rate": 6.207952816330676e-06, "loss": 0.708, "step": 29980 }, { "epoch": 14.04, "learning_rate": 6.202795425793473e-06, "loss": 0.6968, "step": 29990 }, { "epoch": 14.04, "learning_rate": 6.197638667498023e-06, "loss": 0.6807, "step": 30000 }, { "epoch": 14.04, "eval_accuracy": 0.871578947368421, "eval_f1": 0.871578947368421, "eval_loss": 0.7531183362007141, "eval_runtime": 743.1398, "eval_samples_per_second": 6.392, "eval_steps_per_second": 1.599, "step": 30000 }, { "epoch": 15.0, "learning_rate": 6.1924825439576625e-06, "loss": 0.3591, "step": 30010 }, { "epoch": 15.0, "learning_rate": 6.1873270576854295e-06, "loss": 0.8104, "step": 30020 }, { "epoch": 15.0, "learning_rate": 6.182172211194042e-06, "loss": 0.6743, "step": 30030 }, { "epoch": 15.0, "learning_rate": 6.177018006995909e-06, "loss": 0.3849, "step": 30040 }, { "epoch": 15.0, "learning_rate": 6.171864447603126e-06, "loss": 0.4138, "step": 30050 }, { "epoch": 15.0, "learning_rate": 6.1667115355274785e-06, "loss": 0.252, "step": 30060 }, { "epoch": 15.0, "learning_rate": 6.161559273280431e-06, "loss": 0.5183, "step": 30070 }, { "epoch": 15.0, "learning_rate": 6.156407663373133e-06, "loss": 0.5016, "step": 30080 }, { "epoch": 15.0, "learning_rate": 6.1512567083164184e-06, "loss": 0.7197, "step": 30090 }, { "epoch": 15.0, "learning_rate": 6.146106410620801e-06, "loss": 0.5565, "step": 30100 }, { "epoch": 15.0, "learning_rate": 6.140956772796469e-06, "loss": 0.4194, "step": 30110 }, { "epoch": 15.0, "learning_rate": 6.1358077973533e-06, "loss": 0.4268, "step": 30120 }, { "epoch": 15.0, "learning_rate": 6.130659486800836e-06, "loss": 0.4098, "step": 30130 }, { "epoch": 15.0, "learning_rate": 6.125511843648304e-06, "loss": 0.3364, "step": 30140 }, { "epoch": 15.0, "learning_rate": 6.1203648704045986e-06, "loss": 0.5037, "step": 30150 }, { "epoch": 15.0, "learning_rate": 6.115218569578299e-06, "loss": 0.3156, "step": 30160 }, { "epoch": 15.0, "learning_rate": 6.110072943677645e-06, "loss": 0.6378, "step": 30170 }, { "epoch": 15.0, "learning_rate": 6.104927995210551e-06, "loss": 0.2939, "step": 30180 }, { "epoch": 15.0, "learning_rate": 6.099783726684608e-06, "loss": 0.595, "step": 30190 }, { "epoch": 15.0, "learning_rate": 6.094640140607064e-06, "loss": 0.4013, "step": 30200 }, { "epoch": 15.0, "learning_rate": 6.089497239484847e-06, "loss": 0.751, "step": 30210 }, { "epoch": 15.0, "learning_rate": 6.084355025824538e-06, "loss": 0.7006, "step": 30220 }, { "epoch": 15.0, "learning_rate": 6.079213502132392e-06, "loss": 0.3794, "step": 30230 }, { "epoch": 15.0, "learning_rate": 6.074072670914325e-06, "loss": 0.4284, "step": 30240 }, { "epoch": 15.01, "learning_rate": 6.068932534675914e-06, "loss": 0.7241, "step": 30250 }, { "epoch": 15.01, "learning_rate": 6.063793095922403e-06, "loss": 0.4987, "step": 30260 }, { "epoch": 15.01, "learning_rate": 6.058654357158688e-06, "loss": 0.5056, "step": 30270 }, { "epoch": 15.01, "learning_rate": 6.053516320889331e-06, "loss": 0.7833, "step": 30280 }, { "epoch": 15.01, "learning_rate": 6.048378989618548e-06, "loss": 0.6071, "step": 30290 }, { "epoch": 15.01, "learning_rate": 6.043242365850212e-06, "loss": 0.3674, "step": 30300 }, { "epoch": 15.01, "learning_rate": 6.038106452087847e-06, "loss": 0.6286, "step": 30310 }, { "epoch": 15.01, "learning_rate": 6.032971250834639e-06, "loss": 0.695, "step": 30320 }, { "epoch": 15.01, "learning_rate": 6.027836764593422e-06, "loss": 0.4895, "step": 30330 }, { "epoch": 15.01, "learning_rate": 6.022702995866681e-06, "loss": 0.42, "step": 30340 }, { "epoch": 15.01, "learning_rate": 6.017569947156554e-06, "loss": 0.2868, "step": 30350 }, { "epoch": 15.01, "learning_rate": 6.012437620964827e-06, "loss": 0.4577, "step": 30360 }, { "epoch": 15.01, "learning_rate": 6.0073060197929295e-06, "loss": 0.662, "step": 30370 }, { "epoch": 15.01, "learning_rate": 6.0021751461419485e-06, "loss": 0.6371, "step": 30380 }, { "epoch": 15.01, "learning_rate": 5.997045002512604e-06, "loss": 0.3516, "step": 30390 }, { "epoch": 15.01, "learning_rate": 5.991915591405267e-06, "loss": 0.5922, "step": 30400 }, { "epoch": 15.01, "learning_rate": 5.986786915319949e-06, "loss": 0.7923, "step": 30410 }, { "epoch": 15.01, "learning_rate": 5.9816589767563065e-06, "loss": 0.3149, "step": 30420 }, { "epoch": 15.01, "learning_rate": 5.9765317782136346e-06, "loss": 0.4833, "step": 30430 }, { "epoch": 15.01, "learning_rate": 5.971405322190864e-06, "loss": 0.7275, "step": 30440 }, { "epoch": 15.01, "learning_rate": 5.966279611186573e-06, "loss": 0.6083, "step": 30450 }, { "epoch": 15.01, "learning_rate": 5.961154647698965e-06, "loss": 0.5943, "step": 30460 }, { "epoch": 15.01, "learning_rate": 5.956030434225892e-06, "loss": 0.7019, "step": 30470 }, { "epoch": 15.01, "learning_rate": 5.9509069732648255e-06, "loss": 0.5745, "step": 30480 }, { "epoch": 15.01, "learning_rate": 5.945784267312882e-06, "loss": 0.7729, "step": 30490 }, { "epoch": 15.01, "learning_rate": 5.9406623188668065e-06, "loss": 0.5774, "step": 30500 }, { "epoch": 15.01, "learning_rate": 5.935541130422972e-06, "loss": 0.468, "step": 30510 }, { "epoch": 15.01, "learning_rate": 5.930420704477387e-06, "loss": 0.5499, "step": 30520 }, { "epoch": 15.01, "learning_rate": 5.925301043525682e-06, "loss": 0.6378, "step": 30530 }, { "epoch": 15.01, "learning_rate": 5.9201821500631195e-06, "loss": 1.0126, "step": 30540 }, { "epoch": 15.01, "learning_rate": 5.915064026584586e-06, "loss": 1.0802, "step": 30550 }, { "epoch": 15.01, "learning_rate": 5.9099466755845925e-06, "loss": 0.7932, "step": 30560 }, { "epoch": 15.01, "learning_rate": 5.904830099557271e-06, "loss": 0.5136, "step": 30570 }, { "epoch": 15.01, "learning_rate": 5.899714300996381e-06, "loss": 0.4871, "step": 30580 }, { "epoch": 15.01, "learning_rate": 5.8945992823953e-06, "loss": 0.4485, "step": 30590 }, { "epoch": 15.01, "learning_rate": 5.889485046247026e-06, "loss": 0.5692, "step": 30600 }, { "epoch": 15.01, "learning_rate": 5.8843715950441765e-06, "loss": 0.6649, "step": 30610 }, { "epoch": 15.01, "learning_rate": 5.8792589312789855e-06, "loss": 0.6113, "step": 30620 }, { "epoch": 15.01, "learning_rate": 5.874147057443301e-06, "loss": 0.4845, "step": 30630 }, { "epoch": 15.01, "learning_rate": 5.869035976028589e-06, "loss": 0.6766, "step": 30640 }, { "epoch": 15.01, "learning_rate": 5.863925689525933e-06, "loss": 0.385, "step": 30650 }, { "epoch": 15.01, "learning_rate": 5.858816200426018e-06, "loss": 0.5752, "step": 30660 }, { "epoch": 15.01, "learning_rate": 5.853707511219148e-06, "loss": 0.7434, "step": 30670 }, { "epoch": 15.01, "learning_rate": 5.848599624395239e-06, "loss": 0.8021, "step": 30680 }, { "epoch": 15.01, "learning_rate": 5.8434925424438125e-06, "loss": 0.5919, "step": 30690 }, { "epoch": 15.01, "learning_rate": 5.838386267853996e-06, "loss": 0.7574, "step": 30700 }, { "epoch": 15.01, "learning_rate": 5.833280803114527e-06, "loss": 0.7047, "step": 30710 }, { "epoch": 15.01, "learning_rate": 5.828176150713747e-06, "loss": 0.4341, "step": 30720 }, { "epoch": 15.01, "learning_rate": 5.823072313139602e-06, "loss": 0.8295, "step": 30730 }, { "epoch": 15.01, "learning_rate": 5.817969292879642e-06, "loss": 0.2953, "step": 30740 }, { "epoch": 15.02, "learning_rate": 5.812867092421014e-06, "loss": 0.2567, "step": 30750 }, { "epoch": 15.02, "learning_rate": 5.807765714250469e-06, "loss": 0.6689, "step": 30760 }, { "epoch": 15.02, "learning_rate": 5.802665160854357e-06, "loss": 0.6787, "step": 30770 }, { "epoch": 15.02, "learning_rate": 5.797565434718626e-06, "loss": 0.7291, "step": 30780 }, { "epoch": 15.02, "learning_rate": 5.79246653832882e-06, "loss": 0.4689, "step": 30790 }, { "epoch": 15.02, "learning_rate": 5.787368474170081e-06, "loss": 0.144, "step": 30800 }, { "epoch": 15.02, "learning_rate": 5.782271244727144e-06, "loss": 0.598, "step": 30810 }, { "epoch": 15.02, "learning_rate": 5.777174852484333e-06, "loss": 0.7588, "step": 30820 }, { "epoch": 15.02, "learning_rate": 5.772079299925573e-06, "loss": 0.4024, "step": 30830 }, { "epoch": 15.02, "learning_rate": 5.766984589534368e-06, "loss": 0.6319, "step": 30840 }, { "epoch": 15.02, "learning_rate": 5.761890723793821e-06, "loss": 0.5051, "step": 30850 }, { "epoch": 15.02, "learning_rate": 5.75679770518662e-06, "loss": 0.428, "step": 30860 }, { "epoch": 15.02, "learning_rate": 5.751705536195043e-06, "loss": 0.6227, "step": 30870 }, { "epoch": 15.02, "learning_rate": 5.7466142193009485e-06, "loss": 0.5371, "step": 30880 }, { "epoch": 15.02, "learning_rate": 5.7415237569857805e-06, "loss": 0.5355, "step": 30890 }, { "epoch": 15.02, "learning_rate": 5.736434151730575e-06, "loss": 0.5317, "step": 30900 }, { "epoch": 15.02, "learning_rate": 5.731345406015938e-06, "loss": 0.7229, "step": 30910 }, { "epoch": 15.02, "learning_rate": 5.726257522322067e-06, "loss": 0.5791, "step": 30920 }, { "epoch": 15.02, "learning_rate": 5.7211705031287285e-06, "loss": 0.8684, "step": 30930 }, { "epoch": 15.02, "learning_rate": 5.716084350915279e-06, "loss": 0.2345, "step": 30940 }, { "epoch": 15.02, "learning_rate": 5.710999068160645e-06, "loss": 0.4998, "step": 30950 }, { "epoch": 15.02, "learning_rate": 5.70591465734333e-06, "loss": 0.599, "step": 30960 }, { "epoch": 15.02, "learning_rate": 5.7008311209414195e-06, "loss": 0.5463, "step": 30970 }, { "epoch": 15.02, "learning_rate": 5.695748461432562e-06, "loss": 0.4036, "step": 30980 }, { "epoch": 15.02, "learning_rate": 5.690666681293987e-06, "loss": 0.393, "step": 30990 }, { "epoch": 15.02, "learning_rate": 5.685585783002493e-06, "loss": 0.4896, "step": 31000 }, { "epoch": 15.02, "learning_rate": 5.680505769034446e-06, "loss": 0.4201, "step": 31010 }, { "epoch": 15.02, "learning_rate": 5.675426641865783e-06, "loss": 0.8405, "step": 31020 }, { "epoch": 15.02, "learning_rate": 5.6703484039720085e-06, "loss": 0.5632, "step": 31030 }, { "epoch": 15.02, "learning_rate": 5.665271057828196e-06, "loss": 0.3888, "step": 31040 }, { "epoch": 15.02, "learning_rate": 5.66019460590898e-06, "loss": 0.398, "step": 31050 }, { "epoch": 15.02, "learning_rate": 5.655119050688565e-06, "loss": 0.6701, "step": 31060 }, { "epoch": 15.02, "learning_rate": 5.650044394640713e-06, "loss": 0.5749, "step": 31070 }, { "epoch": 15.02, "learning_rate": 5.644970640238747e-06, "loss": 0.5094, "step": 31080 }, { "epoch": 15.02, "learning_rate": 5.63989778995556e-06, "loss": 0.5268, "step": 31090 }, { "epoch": 15.02, "learning_rate": 5.6348258462635905e-06, "loss": 0.4821, "step": 31100 }, { "epoch": 15.02, "learning_rate": 5.629754811634846e-06, "loss": 0.5058, "step": 31110 }, { "epoch": 15.02, "learning_rate": 5.624684688540884e-06, "loss": 0.2582, "step": 31120 }, { "epoch": 15.02, "learning_rate": 5.619615479452826e-06, "loss": 0.7632, "step": 31130 }, { "epoch": 15.02, "learning_rate": 5.614547186841339e-06, "loss": 0.6387, "step": 31140 }, { "epoch": 15.02, "learning_rate": 5.609479813176648e-06, "loss": 0.4985, "step": 31150 }, { "epoch": 15.02, "learning_rate": 5.6044133609285305e-06, "loss": 0.454, "step": 31160 }, { "epoch": 15.02, "learning_rate": 5.5993478325663125e-06, "loss": 0.7597, "step": 31170 }, { "epoch": 15.02, "learning_rate": 5.594283230558874e-06, "loss": 0.3568, "step": 31180 }, { "epoch": 15.02, "learning_rate": 5.589219557374635e-06, "loss": 0.3692, "step": 31190 }, { "epoch": 15.02, "learning_rate": 5.5841568154815725e-06, "loss": 0.6227, "step": 31200 }, { "epoch": 15.02, "learning_rate": 5.579095007347204e-06, "loss": 0.3188, "step": 31210 }, { "epoch": 15.02, "learning_rate": 5.574034135438591e-06, "loss": 0.5097, "step": 31220 }, { "epoch": 15.02, "learning_rate": 5.568974202222348e-06, "loss": 0.7134, "step": 31230 }, { "epoch": 15.02, "learning_rate": 5.5639152101646184e-06, "loss": 0.6648, "step": 31240 }, { "epoch": 15.03, "learning_rate": 5.558857161731094e-06, "loss": 0.9005, "step": 31250 }, { "epoch": 15.03, "learning_rate": 5.55380005938701e-06, "loss": 0.6035, "step": 31260 }, { "epoch": 15.03, "learning_rate": 5.548743905597135e-06, "loss": 0.3174, "step": 31270 }, { "epoch": 15.03, "learning_rate": 5.543688702825775e-06, "loss": 0.7496, "step": 31280 }, { "epoch": 15.03, "learning_rate": 5.538634453536774e-06, "loss": 0.6956, "step": 31290 }, { "epoch": 15.03, "learning_rate": 5.533581160193516e-06, "loss": 0.6995, "step": 31300 }, { "epoch": 15.03, "learning_rate": 5.5285288252589095e-06, "loss": 0.4295, "step": 31310 }, { "epoch": 15.03, "learning_rate": 5.5234774511954065e-06, "loss": 0.5449, "step": 31320 }, { "epoch": 15.03, "learning_rate": 5.5184270404649815e-06, "loss": 0.7376, "step": 31330 }, { "epoch": 15.03, "learning_rate": 5.513377595529143e-06, "loss": 0.5837, "step": 31340 }, { "epoch": 15.03, "learning_rate": 5.508329118848934e-06, "loss": 1.0156, "step": 31350 }, { "epoch": 15.03, "learning_rate": 5.503281612884917e-06, "loss": 0.6391, "step": 31360 }, { "epoch": 15.03, "learning_rate": 5.498235080097185e-06, "loss": 0.4268, "step": 31370 }, { "epoch": 15.03, "learning_rate": 5.493189522945356e-06, "loss": 0.4658, "step": 31380 }, { "epoch": 15.03, "learning_rate": 5.488144943888575e-06, "loss": 0.7193, "step": 31390 }, { "epoch": 15.03, "learning_rate": 5.483101345385508e-06, "loss": 1.1998, "step": 31400 }, { "epoch": 15.03, "learning_rate": 5.478058729894342e-06, "loss": 0.3759, "step": 31410 }, { "epoch": 15.03, "learning_rate": 5.4730170998727915e-06, "loss": 1.0222, "step": 31420 }, { "epoch": 15.03, "learning_rate": 5.4679764577780815e-06, "loss": 0.6979, "step": 31430 }, { "epoch": 15.03, "learning_rate": 5.4629368060669585e-06, "loss": 0.6705, "step": 31440 }, { "epoch": 15.03, "learning_rate": 5.457898147195693e-06, "loss": 0.2831, "step": 31450 }, { "epoch": 15.03, "learning_rate": 5.452860483620062e-06, "loss": 0.5871, "step": 31460 }, { "epoch": 15.03, "learning_rate": 5.447823817795362e-06, "loss": 0.6708, "step": 31470 }, { "epoch": 15.03, "learning_rate": 5.442788152176401e-06, "loss": 0.3945, "step": 31480 }, { "epoch": 15.03, "learning_rate": 5.437753489217505e-06, "loss": 1.0316, "step": 31490 }, { "epoch": 15.03, "learning_rate": 5.432719831372507e-06, "loss": 0.4664, "step": 31500 }, { "epoch": 15.03, "learning_rate": 5.427687181094748e-06, "loss": 0.4658, "step": 31510 }, { "epoch": 15.03, "learning_rate": 5.422655540837083e-06, "loss": 0.4841, "step": 31520 }, { "epoch": 15.03, "learning_rate": 5.417624913051869e-06, "loss": 0.4642, "step": 31530 }, { "epoch": 15.03, "learning_rate": 5.412595300190981e-06, "loss": 0.5075, "step": 31540 }, { "epoch": 15.03, "learning_rate": 5.407566704705782e-06, "loss": 0.7411, "step": 31550 }, { "epoch": 15.03, "learning_rate": 5.402539129047152e-06, "loss": 1.0875, "step": 31560 }, { "epoch": 15.03, "learning_rate": 5.397512575665469e-06, "loss": 0.7321, "step": 31570 }, { "epoch": 15.03, "learning_rate": 5.392487047010618e-06, "loss": 0.537, "step": 31580 }, { "epoch": 15.03, "learning_rate": 5.387462545531976e-06, "loss": 0.4299, "step": 31590 }, { "epoch": 15.03, "learning_rate": 5.382439073678426e-06, "loss": 0.5034, "step": 31600 }, { "epoch": 15.03, "learning_rate": 5.377416633898348e-06, "loss": 0.7486, "step": 31610 }, { "epoch": 15.03, "learning_rate": 5.372395228639619e-06, "loss": 0.6522, "step": 31620 }, { "epoch": 15.03, "learning_rate": 5.367374860349607e-06, "loss": 0.5415, "step": 31630 }, { "epoch": 15.03, "learning_rate": 5.362355531475179e-06, "loss": 0.592, "step": 31640 }, { "epoch": 15.03, "learning_rate": 5.357337244462699e-06, "loss": 0.2441, "step": 31650 }, { "epoch": 15.03, "learning_rate": 5.352320001758017e-06, "loss": 0.6928, "step": 31660 }, { "epoch": 15.03, "learning_rate": 5.3473038058064725e-06, "loss": 0.3521, "step": 31670 }, { "epoch": 15.03, "learning_rate": 5.342288659052905e-06, "loss": 0.5533, "step": 31680 }, { "epoch": 15.03, "learning_rate": 5.337274563941633e-06, "loss": 0.6759, "step": 31690 }, { "epoch": 15.03, "learning_rate": 5.332261522916461e-06, "loss": 0.4252, "step": 31700 }, { "epoch": 15.03, "learning_rate": 5.3272495384206945e-06, "loss": 0.8168, "step": 31710 }, { "epoch": 15.03, "learning_rate": 5.322238612897107e-06, "loss": 0.619, "step": 31720 }, { "epoch": 15.03, "learning_rate": 5.317228748787963e-06, "loss": 0.8278, "step": 31730 }, { "epoch": 15.03, "learning_rate": 5.3122199485350085e-06, "loss": 0.6556, "step": 31740 }, { "epoch": 15.04, "learning_rate": 5.307212214579475e-06, "loss": 0.3492, "step": 31750 }, { "epoch": 15.04, "learning_rate": 5.3022055493620705e-06, "loss": 0.5529, "step": 31760 }, { "epoch": 15.04, "learning_rate": 5.29719995532298e-06, "loss": 0.5318, "step": 31770 }, { "epoch": 15.04, "learning_rate": 5.292195434901873e-06, "loss": 0.6974, "step": 31780 }, { "epoch": 15.04, "learning_rate": 5.2871919905378896e-06, "loss": 0.4782, "step": 31790 }, { "epoch": 15.04, "learning_rate": 5.282189624669653e-06, "loss": 0.6933, "step": 31800 }, { "epoch": 15.04, "learning_rate": 5.277188339735244e-06, "loss": 0.5515, "step": 31810 }, { "epoch": 15.04, "learning_rate": 5.272188138172239e-06, "loss": 0.4032, "step": 31820 }, { "epoch": 15.04, "learning_rate": 5.267189022417672e-06, "loss": 0.7064, "step": 31830 }, { "epoch": 15.04, "learning_rate": 5.2621909949080485e-06, "loss": 0.814, "step": 31840 }, { "epoch": 15.04, "learning_rate": 5.2571940580793506e-06, "loss": 0.5002, "step": 31850 }, { "epoch": 15.04, "learning_rate": 5.252198214367021e-06, "loss": 0.6971, "step": 31860 }, { "epoch": 15.04, "learning_rate": 5.247203466205977e-06, "loss": 0.6614, "step": 31870 }, { "epoch": 15.04, "learning_rate": 5.242209816030597e-06, "loss": 0.4285, "step": 31880 }, { "epoch": 15.04, "learning_rate": 5.237217266274726e-06, "loss": 0.4315, "step": 31890 }, { "epoch": 15.04, "learning_rate": 5.232225819371667e-06, "loss": 0.4179, "step": 31900 }, { "epoch": 15.04, "learning_rate": 5.227235477754197e-06, "loss": 0.5745, "step": 31910 }, { "epoch": 15.04, "learning_rate": 5.222246243854544e-06, "loss": 0.2488, "step": 31920 }, { "epoch": 15.04, "learning_rate": 5.217258120104401e-06, "loss": 0.5487, "step": 31930 }, { "epoch": 15.04, "learning_rate": 5.212271108934919e-06, "loss": 0.3712, "step": 31940 }, { "epoch": 15.04, "learning_rate": 5.207285212776708e-06, "loss": 0.6056, "step": 31950 }, { "epoch": 15.04, "learning_rate": 5.20230043405983e-06, "loss": 0.3832, "step": 31960 }, { "epoch": 15.04, "learning_rate": 5.19731677521381e-06, "loss": 0.4765, "step": 31970 }, { "epoch": 15.04, "learning_rate": 5.192334238667622e-06, "loss": 0.6145, "step": 31980 }, { "epoch": 15.04, "learning_rate": 5.18735282684969e-06, "loss": 0.3832, "step": 31990 }, { "epoch": 15.04, "learning_rate": 5.182372542187895e-06, "loss": 0.3262, "step": 32000 }, { "epoch": 15.04, "eval_accuracy": 0.8768421052631579, "eval_f1": 0.876842105263158, "eval_loss": 0.7663307785987854, "eval_runtime": 750.321, "eval_samples_per_second": 6.331, "eval_steps_per_second": 1.583, "step": 32000 }, { "epoch": 16.0, "learning_rate": 5.177393387109569e-06, "loss": 0.4934, "step": 32010 }, { "epoch": 16.0, "learning_rate": 5.172415364041492e-06, "loss": 0.7969, "step": 32020 }, { "epoch": 16.0, "learning_rate": 5.167438475409888e-06, "loss": 0.6713, "step": 32030 }, { "epoch": 16.0, "learning_rate": 5.162462723640436e-06, "loss": 0.4962, "step": 32040 }, { "epoch": 16.0, "learning_rate": 5.157488111158255e-06, "loss": 0.465, "step": 32050 }, { "epoch": 16.0, "learning_rate": 5.152514640387912e-06, "loss": 0.4099, "step": 32060 }, { "epoch": 16.0, "learning_rate": 5.147542313753419e-06, "loss": 0.228, "step": 32070 }, { "epoch": 16.0, "learning_rate": 5.142571133678222e-06, "loss": 0.85, "step": 32080 }, { "epoch": 16.0, "learning_rate": 5.137601102585216e-06, "loss": 0.5891, "step": 32090 }, { "epoch": 16.0, "learning_rate": 5.132632222896733e-06, "loss": 0.3639, "step": 32100 }, { "epoch": 16.0, "learning_rate": 5.127664497034546e-06, "loss": 0.5626, "step": 32110 }, { "epoch": 16.0, "learning_rate": 5.122697927419861e-06, "loss": 0.7871, "step": 32120 }, { "epoch": 16.0, "learning_rate": 5.1177325164733296e-06, "loss": 0.3236, "step": 32130 }, { "epoch": 16.0, "learning_rate": 5.11276826661503e-06, "loss": 0.6567, "step": 32140 }, { "epoch": 16.0, "learning_rate": 5.107805180264475e-06, "loss": 0.5179, "step": 32150 }, { "epoch": 16.0, "learning_rate": 5.102843259840616e-06, "loss": 0.6067, "step": 32160 }, { "epoch": 16.0, "learning_rate": 5.097882507761832e-06, "loss": 0.3634, "step": 32170 }, { "epoch": 16.0, "learning_rate": 5.09292292644593e-06, "loss": 0.6364, "step": 32180 }, { "epoch": 16.0, "learning_rate": 5.087964518310153e-06, "loss": 0.9535, "step": 32190 }, { "epoch": 16.0, "learning_rate": 5.083007285771168e-06, "loss": 0.712, "step": 32200 }, { "epoch": 16.0, "learning_rate": 5.07805123124507e-06, "loss": 0.3794, "step": 32210 }, { "epoch": 16.0, "learning_rate": 5.073096357147379e-06, "loss": 0.4944, "step": 32220 }, { "epoch": 16.0, "learning_rate": 5.068142665893042e-06, "loss": 1.1231, "step": 32230 }, { "epoch": 16.0, "learning_rate": 5.063190159896428e-06, "loss": 0.619, "step": 32240 }, { "epoch": 16.0, "learning_rate": 5.058238841571326e-06, "loss": 0.6341, "step": 32250 }, { "epoch": 16.01, "learning_rate": 5.05328871333095e-06, "loss": 0.6589, "step": 32260 }, { "epoch": 16.01, "learning_rate": 5.0483397775879325e-06, "loss": 0.5169, "step": 32270 }, { "epoch": 16.01, "learning_rate": 5.043392036754324e-06, "loss": 0.4261, "step": 32280 }, { "epoch": 16.01, "learning_rate": 5.038445493241593e-06, "loss": 0.4723, "step": 32290 }, { "epoch": 16.01, "learning_rate": 5.033500149460626e-06, "loss": 0.653, "step": 32300 }, { "epoch": 16.01, "learning_rate": 5.028556007821722e-06, "loss": 0.535, "step": 32310 }, { "epoch": 16.01, "learning_rate": 5.023613070734597e-06, "loss": 0.706, "step": 32320 }, { "epoch": 16.01, "learning_rate": 5.01867134060838e-06, "loss": 0.9763, "step": 32330 }, { "epoch": 16.01, "learning_rate": 5.013730819851606e-06, "loss": 0.6044, "step": 32340 }, { "epoch": 16.01, "learning_rate": 5.008791510872227e-06, "loss": 0.6842, "step": 32350 }, { "epoch": 16.01, "learning_rate": 5.003853416077601e-06, "loss": 0.4799, "step": 32360 }, { "epoch": 16.01, "learning_rate": 4.998916537874498e-06, "loss": 0.7376, "step": 32370 }, { "epoch": 16.01, "learning_rate": 4.9939808786690885e-06, "loss": 0.3854, "step": 32380 }, { "epoch": 16.01, "learning_rate": 4.989046440866958e-06, "loss": 0.8238, "step": 32390 }, { "epoch": 16.01, "learning_rate": 4.984113226873088e-06, "loss": 0.5112, "step": 32400 }, { "epoch": 16.01, "learning_rate": 4.979181239091867e-06, "loss": 0.4544, "step": 32410 }, { "epoch": 16.01, "learning_rate": 4.97425047992709e-06, "loss": 0.5501, "step": 32420 }, { "epoch": 16.01, "learning_rate": 4.969320951781942e-06, "loss": 0.4778, "step": 32430 }, { "epoch": 16.01, "learning_rate": 4.964392657059022e-06, "loss": 0.7256, "step": 32440 }, { "epoch": 16.01, "learning_rate": 4.959465598160315e-06, "loss": 0.5249, "step": 32450 }, { "epoch": 16.01, "learning_rate": 4.9545397774872146e-06, "loss": 0.3695, "step": 32460 }, { "epoch": 16.01, "learning_rate": 4.9496151974405045e-06, "loss": 0.6224, "step": 32470 }, { "epoch": 16.01, "learning_rate": 4.944691860420363e-06, "loss": 0.5238, "step": 32480 }, { "epoch": 16.01, "learning_rate": 4.939769768826367e-06, "loss": 0.5127, "step": 32490 }, { "epoch": 16.01, "learning_rate": 4.934848925057485e-06, "loss": 0.8212, "step": 32500 }, { "epoch": 16.01, "learning_rate": 4.929929331512076e-06, "loss": 0.4451, "step": 32510 }, { "epoch": 16.01, "learning_rate": 4.925010990587886e-06, "loss": 0.472, "step": 32520 }, { "epoch": 16.01, "learning_rate": 4.92009390468206e-06, "loss": 0.9875, "step": 32530 }, { "epoch": 16.01, "learning_rate": 4.915178076191123e-06, "loss": 0.4381, "step": 32540 }, { "epoch": 16.01, "learning_rate": 4.9102635075109905e-06, "loss": 0.4436, "step": 32550 }, { "epoch": 16.01, "learning_rate": 4.905350201036965e-06, "loss": 0.3102, "step": 32560 }, { "epoch": 16.01, "learning_rate": 4.9004381591637295e-06, "loss": 0.7939, "step": 32570 }, { "epoch": 16.01, "learning_rate": 4.895527384285357e-06, "loss": 0.8265, "step": 32580 }, { "epoch": 16.01, "learning_rate": 4.890617878795299e-06, "loss": 0.4009, "step": 32590 }, { "epoch": 16.01, "learning_rate": 4.885709645086388e-06, "loss": 0.4055, "step": 32600 }, { "epoch": 16.01, "learning_rate": 4.880802685550836e-06, "loss": 1.0053, "step": 32610 }, { "epoch": 16.01, "learning_rate": 4.875897002580235e-06, "loss": 0.2254, "step": 32620 }, { "epoch": 16.01, "learning_rate": 4.870992598565559e-06, "loss": 0.3148, "step": 32630 }, { "epoch": 16.01, "learning_rate": 4.86608947589715e-06, "loss": 0.5144, "step": 32640 }, { "epoch": 16.01, "learning_rate": 4.861187636964736e-06, "loss": 0.5433, "step": 32650 }, { "epoch": 16.01, "learning_rate": 4.8562870841574115e-06, "loss": 0.4401, "step": 32660 }, { "epoch": 16.01, "learning_rate": 4.851387819863644e-06, "loss": 0.6315, "step": 32670 }, { "epoch": 16.01, "learning_rate": 4.846489846471279e-06, "loss": 0.5919, "step": 32680 }, { "epoch": 16.01, "learning_rate": 4.841593166367529e-06, "loss": 1.168, "step": 32690 }, { "epoch": 16.01, "learning_rate": 4.836697781938973e-06, "loss": 0.5355, "step": 32700 }, { "epoch": 16.01, "learning_rate": 4.831803695571563e-06, "loss": 0.3735, "step": 32710 }, { "epoch": 16.01, "learning_rate": 4.82691090965062e-06, "loss": 0.4671, "step": 32720 }, { "epoch": 16.01, "learning_rate": 4.822019426560827e-06, "loss": 0.7564, "step": 32730 }, { "epoch": 16.01, "learning_rate": 4.817129248686231e-06, "loss": 0.3753, "step": 32740 }, { "epoch": 16.02, "learning_rate": 4.812240378410248e-06, "loss": 0.5225, "step": 32750 }, { "epoch": 16.02, "learning_rate": 4.8073528181156535e-06, "loss": 0.78, "step": 32760 }, { "epoch": 16.02, "learning_rate": 4.802466570184584e-06, "loss": 0.6062, "step": 32770 }, { "epoch": 16.02, "learning_rate": 4.797581636998541e-06, "loss": 0.6152, "step": 32780 }, { "epoch": 16.02, "learning_rate": 4.792698020938377e-06, "loss": 0.2874, "step": 32790 }, { "epoch": 16.02, "learning_rate": 4.787815724384309e-06, "loss": 0.5955, "step": 32800 }, { "epoch": 16.02, "learning_rate": 4.782934749715908e-06, "loss": 0.654, "step": 32810 }, { "epoch": 16.02, "learning_rate": 4.778055099312103e-06, "loss": 0.3857, "step": 32820 }, { "epoch": 16.02, "learning_rate": 4.7731767755511775e-06, "loss": 0.5285, "step": 32830 }, { "epoch": 16.02, "learning_rate": 4.768299780810763e-06, "loss": 0.7611, "step": 32840 }, { "epoch": 16.02, "learning_rate": 4.763424117467852e-06, "loss": 0.759, "step": 32850 }, { "epoch": 16.02, "learning_rate": 4.758549787898785e-06, "loss": 0.6901, "step": 32860 }, { "epoch": 16.02, "learning_rate": 4.753676794479244e-06, "loss": 0.8388, "step": 32870 }, { "epoch": 16.02, "learning_rate": 4.74880513958427e-06, "loss": 0.3117, "step": 32880 }, { "epoch": 16.02, "learning_rate": 4.743934825588252e-06, "loss": 0.4733, "step": 32890 }, { "epoch": 16.02, "learning_rate": 4.739065854864916e-06, "loss": 0.297, "step": 32900 }, { "epoch": 16.02, "learning_rate": 4.734198229787343e-06, "loss": 0.4909, "step": 32910 }, { "epoch": 16.02, "learning_rate": 4.729331952727954e-06, "loss": 0.4957, "step": 32920 }, { "epoch": 16.02, "learning_rate": 4.724467026058512e-06, "loss": 0.3845, "step": 32930 }, { "epoch": 16.02, "learning_rate": 4.719603452150125e-06, "loss": 0.3681, "step": 32940 }, { "epoch": 16.02, "learning_rate": 4.714741233373241e-06, "loss": 0.5048, "step": 32950 }, { "epoch": 16.02, "learning_rate": 4.709880372097642e-06, "loss": 0.5012, "step": 32960 }, { "epoch": 16.02, "learning_rate": 4.705020870692452e-06, "loss": 0.5737, "step": 32970 }, { "epoch": 16.02, "learning_rate": 4.7001627315261395e-06, "loss": 0.7233, "step": 32980 }, { "epoch": 16.02, "learning_rate": 4.695305956966498e-06, "loss": 0.5549, "step": 32990 }, { "epoch": 16.02, "learning_rate": 4.6904505493806595e-06, "loss": 0.442, "step": 33000 }, { "epoch": 16.02, "learning_rate": 4.685596511135094e-06, "loss": 0.7247, "step": 33010 }, { "epoch": 16.02, "learning_rate": 4.6807438445956e-06, "loss": 0.6389, "step": 33020 }, { "epoch": 16.02, "learning_rate": 4.675892552127305e-06, "loss": 0.4903, "step": 33030 }, { "epoch": 16.02, "learning_rate": 4.671042636094677e-06, "loss": 0.6385, "step": 33040 }, { "epoch": 16.02, "learning_rate": 4.666194098861498e-06, "loss": 0.6107, "step": 33050 }, { "epoch": 16.02, "learning_rate": 4.661346942790891e-06, "loss": 0.8643, "step": 33060 }, { "epoch": 16.02, "learning_rate": 4.656501170245297e-06, "loss": 0.2314, "step": 33070 }, { "epoch": 16.02, "learning_rate": 4.651656783586493e-06, "loss": 0.2656, "step": 33080 }, { "epoch": 16.02, "learning_rate": 4.646813785175569e-06, "loss": 0.5808, "step": 33090 }, { "epoch": 16.02, "learning_rate": 4.641972177372944e-06, "loss": 0.3246, "step": 33100 }, { "epoch": 16.02, "learning_rate": 4.637131962538362e-06, "loss": 0.4211, "step": 33110 }, { "epoch": 16.02, "learning_rate": 4.632293143030881e-06, "loss": 0.599, "step": 33120 }, { "epoch": 16.02, "learning_rate": 4.62745572120889e-06, "loss": 0.5716, "step": 33130 }, { "epoch": 16.02, "learning_rate": 4.6226196994300784e-06, "loss": 0.229, "step": 33140 }, { "epoch": 16.02, "learning_rate": 4.6177850800514735e-06, "loss": 0.5619, "step": 33150 }, { "epoch": 16.02, "learning_rate": 4.612951865429405e-06, "loss": 0.3864, "step": 33160 }, { "epoch": 16.02, "learning_rate": 4.6081200579195265e-06, "loss": 0.6615, "step": 33170 }, { "epoch": 16.02, "learning_rate": 4.6032896598768006e-06, "loss": 0.6474, "step": 33180 }, { "epoch": 16.02, "learning_rate": 4.5984606736555045e-06, "loss": 0.5483, "step": 33190 }, { "epoch": 16.02, "learning_rate": 4.5936331016092285e-06, "loss": 0.3562, "step": 33200 }, { "epoch": 16.02, "learning_rate": 4.58880694609087e-06, "loss": 0.5079, "step": 33210 }, { "epoch": 16.02, "learning_rate": 4.583982209452645e-06, "loss": 0.3317, "step": 33220 }, { "epoch": 16.02, "learning_rate": 4.579158894046062e-06, "loss": 0.3543, "step": 33230 }, { "epoch": 16.02, "learning_rate": 4.574337002221953e-06, "loss": 0.6051, "step": 33240 }, { "epoch": 16.02, "learning_rate": 4.569516536330448e-06, "loss": 0.5412, "step": 33250 }, { "epoch": 16.03, "learning_rate": 4.564697498720982e-06, "loss": 0.8271, "step": 33260 }, { "epoch": 16.03, "learning_rate": 4.5598798917422975e-06, "loss": 0.5189, "step": 33270 }, { "epoch": 16.03, "learning_rate": 4.555063717742438e-06, "loss": 0.4982, "step": 33280 }, { "epoch": 16.03, "learning_rate": 4.550248979068744e-06, "loss": 0.3839, "step": 33290 }, { "epoch": 16.03, "learning_rate": 4.5454356780678666e-06, "loss": 0.7477, "step": 33300 }, { "epoch": 16.03, "learning_rate": 4.540623817085747e-06, "loss": 0.3904, "step": 33310 }, { "epoch": 16.03, "learning_rate": 4.535813398467629e-06, "loss": 0.4538, "step": 33320 }, { "epoch": 16.03, "learning_rate": 4.531004424558048e-06, "loss": 0.3612, "step": 33330 }, { "epoch": 16.03, "learning_rate": 4.526196897700844e-06, "loss": 0.4546, "step": 33340 }, { "epoch": 16.03, "learning_rate": 4.521390820239146e-06, "loss": 0.3839, "step": 33350 }, { "epoch": 16.03, "learning_rate": 4.516586194515378e-06, "loss": 0.8155, "step": 33360 }, { "epoch": 16.03, "learning_rate": 4.511783022871256e-06, "loss": 0.289, "step": 33370 }, { "epoch": 16.03, "learning_rate": 4.506981307647786e-06, "loss": 1.0293, "step": 33380 }, { "epoch": 16.03, "learning_rate": 4.502181051185268e-06, "loss": 0.4668, "step": 33390 }, { "epoch": 16.03, "learning_rate": 4.497382255823289e-06, "loss": 0.4752, "step": 33400 }, { "epoch": 16.03, "learning_rate": 4.492584923900722e-06, "loss": 0.7419, "step": 33410 }, { "epoch": 16.03, "learning_rate": 4.487789057755726e-06, "loss": 0.5496, "step": 33420 }, { "epoch": 16.03, "learning_rate": 4.482994659725753e-06, "loss": 0.3482, "step": 33430 }, { "epoch": 16.03, "learning_rate": 4.478201732147531e-06, "loss": 0.4638, "step": 33440 }, { "epoch": 16.03, "learning_rate": 4.4734102773570745e-06, "loss": 0.6718, "step": 33450 }, { "epoch": 16.03, "learning_rate": 4.468620297689682e-06, "loss": 0.2801, "step": 33460 }, { "epoch": 16.03, "learning_rate": 4.463831795479932e-06, "loss": 0.5753, "step": 33470 }, { "epoch": 16.03, "learning_rate": 4.459044773061682e-06, "loss": 0.6162, "step": 33480 }, { "epoch": 16.03, "learning_rate": 4.454259232768066e-06, "loss": 0.5012, "step": 33490 }, { "epoch": 16.03, "learning_rate": 4.4494751769315e-06, "loss": 0.4228, "step": 33500 }, { "epoch": 16.03, "learning_rate": 4.444692607883674e-06, "loss": 0.5142, "step": 33510 }, { "epoch": 16.03, "learning_rate": 4.439911527955554e-06, "loss": 0.442, "step": 33520 }, { "epoch": 16.03, "learning_rate": 4.4351319394773836e-06, "loss": 0.4225, "step": 33530 }, { "epoch": 16.03, "learning_rate": 4.430353844778671e-06, "loss": 0.5155, "step": 33540 }, { "epoch": 16.03, "learning_rate": 4.425577246188204e-06, "loss": 0.8971, "step": 33550 }, { "epoch": 16.03, "learning_rate": 4.420802146034041e-06, "loss": 0.8625, "step": 33560 }, { "epoch": 16.03, "learning_rate": 4.416028546643505e-06, "loss": 0.619, "step": 33570 }, { "epoch": 16.03, "learning_rate": 4.4112564503431904e-06, "loss": 0.756, "step": 33580 }, { "epoch": 16.03, "learning_rate": 4.406485859458957e-06, "loss": 0.3665, "step": 33590 }, { "epoch": 16.03, "learning_rate": 4.401716776315938e-06, "loss": 0.4867, "step": 33600 }, { "epoch": 16.03, "learning_rate": 4.396949203238524e-06, "loss": 0.8304, "step": 33610 }, { "epoch": 16.03, "learning_rate": 4.392183142550369e-06, "loss": 0.6656, "step": 33620 }, { "epoch": 16.03, "learning_rate": 4.387418596574399e-06, "loss": 0.5113, "step": 33630 }, { "epoch": 16.03, "learning_rate": 4.382655567632791e-06, "loss": 0.4387, "step": 33640 }, { "epoch": 16.03, "learning_rate": 4.377894058046992e-06, "loss": 0.4837, "step": 33650 }, { "epoch": 16.03, "learning_rate": 4.3731340701377e-06, "loss": 0.6714, "step": 33660 }, { "epoch": 16.03, "learning_rate": 4.368375606224877e-06, "loss": 0.7669, "step": 33670 }, { "epoch": 16.03, "learning_rate": 4.36361866862774e-06, "loss": 0.2445, "step": 33680 }, { "epoch": 16.03, "learning_rate": 4.358863259664763e-06, "loss": 0.4635, "step": 33690 }, { "epoch": 16.03, "learning_rate": 4.354109381653672e-06, "loss": 0.4453, "step": 33700 }, { "epoch": 16.03, "learning_rate": 4.349357036911453e-06, "loss": 0.5797, "step": 33710 }, { "epoch": 16.03, "learning_rate": 4.344606227754341e-06, "loss": 0.4042, "step": 33720 }, { "epoch": 16.03, "learning_rate": 4.339856956497821e-06, "loss": 0.4951, "step": 33730 }, { "epoch": 16.03, "learning_rate": 4.33510922545663e-06, "loss": 0.7429, "step": 33740 }, { "epoch": 16.04, "learning_rate": 4.330363036944755e-06, "loss": 0.6737, "step": 33750 }, { "epoch": 16.04, "learning_rate": 4.325618393275432e-06, "loss": 0.5931, "step": 33760 }, { "epoch": 16.04, "learning_rate": 4.320875296761139e-06, "loss": 0.6849, "step": 33770 }, { "epoch": 16.04, "learning_rate": 4.316133749713608e-06, "loss": 0.3549, "step": 33780 }, { "epoch": 16.04, "learning_rate": 4.311393754443805e-06, "loss": 0.6275, "step": 33790 }, { "epoch": 16.04, "learning_rate": 4.306655313261955e-06, "loss": 0.4788, "step": 33800 }, { "epoch": 16.04, "learning_rate": 4.301918428477511e-06, "loss": 0.5819, "step": 33810 }, { "epoch": 16.04, "learning_rate": 4.2971831023991745e-06, "loss": 0.4174, "step": 33820 }, { "epoch": 16.04, "learning_rate": 4.292449337334886e-06, "loss": 0.6669, "step": 33830 }, { "epoch": 16.04, "learning_rate": 4.287717135591823e-06, "loss": 0.4027, "step": 33840 }, { "epoch": 16.04, "learning_rate": 4.2829864994764075e-06, "loss": 0.6037, "step": 33850 }, { "epoch": 16.04, "learning_rate": 4.278257431294289e-06, "loss": 0.4935, "step": 33860 }, { "epoch": 16.04, "learning_rate": 4.2735299333503615e-06, "loss": 0.2093, "step": 33870 }, { "epoch": 16.04, "learning_rate": 4.268804007948749e-06, "loss": 0.4776, "step": 33880 }, { "epoch": 16.04, "learning_rate": 4.264079657392807e-06, "loss": 0.4668, "step": 33890 }, { "epoch": 16.04, "learning_rate": 4.259356883985131e-06, "loss": 0.8342, "step": 33900 }, { "epoch": 16.04, "learning_rate": 4.254635690027544e-06, "loss": 0.569, "step": 33910 }, { "epoch": 16.04, "learning_rate": 4.249916077821096e-06, "loss": 0.3638, "step": 33920 }, { "epoch": 16.04, "learning_rate": 4.245198049666069e-06, "loss": 0.832, "step": 33930 }, { "epoch": 16.04, "learning_rate": 4.240481607861973e-06, "loss": 0.8977, "step": 33940 }, { "epoch": 16.04, "learning_rate": 4.235766754707545e-06, "loss": 0.5267, "step": 33950 }, { "epoch": 16.04, "learning_rate": 4.231053492500745e-06, "loss": 0.4278, "step": 33960 }, { "epoch": 16.04, "learning_rate": 4.2263418235387615e-06, "loss": 0.5325, "step": 33970 }, { "epoch": 16.04, "learning_rate": 4.221631750118005e-06, "loss": 0.6714, "step": 33980 }, { "epoch": 16.04, "learning_rate": 4.216923274534104e-06, "loss": 1.2647, "step": 33990 }, { "epoch": 16.04, "learning_rate": 4.212216399081919e-06, "loss": 0.4387, "step": 34000 }, { "epoch": 16.04, "eval_accuracy": 0.88, "eval_f1": 0.88, "eval_loss": 0.7549371719360352, "eval_runtime": 770.409, "eval_samples_per_second": 6.166, "eval_steps_per_second": 1.542, "step": 34000 }, { "epoch": 17.0, "learning_rate": 4.2075111260555195e-06, "loss": 0.5721, "step": 34010 }, { "epoch": 17.0, "learning_rate": 4.202807457748205e-06, "loss": 0.4209, "step": 34020 }, { "epoch": 17.0, "learning_rate": 4.1981053964524725e-06, "loss": 0.2641, "step": 34030 }, { "epoch": 17.0, "learning_rate": 4.193404944460062e-06, "loss": 0.4175, "step": 34040 }, { "epoch": 17.0, "learning_rate": 4.188706104061912e-06, "loss": 0.3068, "step": 34050 }, { "epoch": 17.0, "learning_rate": 4.18400887754818e-06, "loss": 0.5447, "step": 34060 }, { "epoch": 17.0, "learning_rate": 4.17931326720824e-06, "loss": 0.4912, "step": 34070 }, { "epoch": 17.0, "learning_rate": 4.1746192753306685e-06, "loss": 0.3235, "step": 34080 }, { "epoch": 17.0, "learning_rate": 4.16992690420327e-06, "loss": 0.695, "step": 34090 }, { "epoch": 17.0, "learning_rate": 4.165236156113046e-06, "loss": 0.7171, "step": 34100 }, { "epoch": 17.0, "learning_rate": 4.160547033346206e-06, "loss": 0.2027, "step": 34110 }, { "epoch": 17.0, "learning_rate": 4.15585953818817e-06, "loss": 0.4179, "step": 34120 }, { "epoch": 17.0, "learning_rate": 4.151173672923573e-06, "loss": 0.545, "step": 34130 }, { "epoch": 17.0, "learning_rate": 4.146489439836246e-06, "loss": 0.4815, "step": 34140 }, { "epoch": 17.0, "learning_rate": 4.141806841209228e-06, "loss": 0.2793, "step": 34150 }, { "epoch": 17.0, "learning_rate": 4.137125879324759e-06, "loss": 0.5699, "step": 34160 }, { "epoch": 17.0, "learning_rate": 4.1324465564642845e-06, "loss": 0.7039, "step": 34170 }, { "epoch": 17.0, "learning_rate": 4.127768874908445e-06, "loss": 0.4332, "step": 34180 }, { "epoch": 17.0, "learning_rate": 4.123092836937095e-06, "loss": 0.3581, "step": 34190 }, { "epoch": 17.0, "learning_rate": 4.118418444829271e-06, "loss": 0.6377, "step": 34200 }, { "epoch": 17.0, "learning_rate": 4.1137457008632175e-06, "loss": 0.8027, "step": 34210 }, { "epoch": 17.0, "learning_rate": 4.1090746073163686e-06, "loss": 0.3202, "step": 34220 }, { "epoch": 17.0, "learning_rate": 4.104405166465368e-06, "loss": 0.325, "step": 34230 }, { "epoch": 17.0, "learning_rate": 4.099737380586036e-06, "loss": 0.3973, "step": 34240 }, { "epoch": 17.0, "learning_rate": 4.095071251953399e-06, "loss": 0.6119, "step": 34250 }, { "epoch": 17.01, "learning_rate": 4.090406782841671e-06, "loss": 0.3482, "step": 34260 }, { "epoch": 17.01, "learning_rate": 4.085743975524253e-06, "loss": 0.2866, "step": 34270 }, { "epoch": 17.01, "learning_rate": 4.081082832273752e-06, "loss": 0.593, "step": 34280 }, { "epoch": 17.01, "learning_rate": 4.07642335536194e-06, "loss": 0.3209, "step": 34290 }, { "epoch": 17.01, "learning_rate": 4.071765547059796e-06, "loss": 0.4869, "step": 34300 }, { "epoch": 17.01, "learning_rate": 4.067109409637476e-06, "loss": 0.5368, "step": 34310 }, { "epoch": 17.01, "learning_rate": 4.062454945364326e-06, "loss": 0.5865, "step": 34320 }, { "epoch": 17.01, "learning_rate": 4.057802156508877e-06, "loss": 0.9716, "step": 34330 }, { "epoch": 17.01, "learning_rate": 4.053151045338842e-06, "loss": 0.5427, "step": 34340 }, { "epoch": 17.01, "learning_rate": 4.0485016141211134e-06, "loss": 0.728, "step": 34350 }, { "epoch": 17.01, "learning_rate": 4.04385386512177e-06, "loss": 0.4432, "step": 34360 }, { "epoch": 17.01, "learning_rate": 4.039207800606065e-06, "loss": 0.6848, "step": 34370 }, { "epoch": 17.01, "learning_rate": 4.034563422838437e-06, "loss": 0.2282, "step": 34380 }, { "epoch": 17.01, "learning_rate": 4.029920734082499e-06, "loss": 0.2571, "step": 34390 }, { "epoch": 17.01, "learning_rate": 4.025279736601039e-06, "loss": 0.4565, "step": 34400 }, { "epoch": 17.01, "learning_rate": 4.0206404326560205e-06, "loss": 0.3911, "step": 34410 }, { "epoch": 17.01, "learning_rate": 4.016002824508593e-06, "loss": 0.4225, "step": 34420 }, { "epoch": 17.01, "learning_rate": 4.011366914419062e-06, "loss": 0.1458, "step": 34430 }, { "epoch": 17.01, "learning_rate": 4.006732704646918e-06, "loss": 0.2513, "step": 34440 }, { "epoch": 17.01, "learning_rate": 4.002100197450817e-06, "loss": 0.5512, "step": 34450 }, { "epoch": 17.01, "learning_rate": 3.997469395088586e-06, "loss": 0.4876, "step": 34460 }, { "epoch": 17.01, "learning_rate": 3.992840299817223e-06, "loss": 0.9696, "step": 34470 }, { "epoch": 17.01, "learning_rate": 3.988212913892892e-06, "loss": 0.4729, "step": 34480 }, { "epoch": 17.01, "learning_rate": 3.983587239570926e-06, "loss": 0.5691, "step": 34490 }, { "epoch": 17.01, "learning_rate": 3.978963279105821e-06, "loss": 0.6559, "step": 34500 }, { "epoch": 17.01, "learning_rate": 3.974341034751237e-06, "loss": 0.5865, "step": 34510 }, { "epoch": 17.01, "learning_rate": 3.969720508760006e-06, "loss": 0.5114, "step": 34520 }, { "epoch": 17.01, "learning_rate": 3.965101703384111e-06, "loss": 0.8371, "step": 34530 }, { "epoch": 17.01, "learning_rate": 3.960484620874706e-06, "loss": 0.7119, "step": 34540 }, { "epoch": 17.01, "learning_rate": 3.955869263482096e-06, "loss": 0.6099, "step": 34550 }, { "epoch": 17.01, "learning_rate": 3.951255633455752e-06, "loss": 0.9088, "step": 34560 }, { "epoch": 17.01, "learning_rate": 3.946643733044303e-06, "loss": 0.6616, "step": 34570 }, { "epoch": 17.01, "learning_rate": 3.942033564495532e-06, "loss": 0.6684, "step": 34580 }, { "epoch": 17.01, "learning_rate": 3.937425130056378e-06, "loss": 0.8524, "step": 34590 }, { "epoch": 17.01, "learning_rate": 3.9328184319729346e-06, "loss": 0.3119, "step": 34600 }, { "epoch": 17.01, "learning_rate": 3.9282134724904555e-06, "loss": 0.8073, "step": 34610 }, { "epoch": 17.01, "learning_rate": 3.92361025385334e-06, "loss": 0.4739, "step": 34620 }, { "epoch": 17.01, "learning_rate": 3.919008778305139e-06, "loss": 0.7186, "step": 34630 }, { "epoch": 17.01, "learning_rate": 3.914409048088559e-06, "loss": 0.3444, "step": 34640 }, { "epoch": 17.01, "learning_rate": 3.90981106544545e-06, "loss": 0.3746, "step": 34650 }, { "epoch": 17.01, "learning_rate": 3.905214832616813e-06, "loss": 0.3366, "step": 34660 }, { "epoch": 17.01, "learning_rate": 3.900620351842797e-06, "loss": 0.5922, "step": 34670 }, { "epoch": 17.01, "learning_rate": 3.896027625362695e-06, "loss": 0.5424, "step": 34680 }, { "epoch": 17.01, "learning_rate": 3.891436655414948e-06, "loss": 0.4661, "step": 34690 }, { "epoch": 17.01, "learning_rate": 3.886847444237134e-06, "loss": 0.5704, "step": 34700 }, { "epoch": 17.01, "learning_rate": 3.882259994065986e-06, "loss": 0.3393, "step": 34710 }, { "epoch": 17.01, "learning_rate": 3.877674307137371e-06, "loss": 0.6378, "step": 34720 }, { "epoch": 17.01, "learning_rate": 3.873090385686292e-06, "loss": 0.2401, "step": 34730 }, { "epoch": 17.01, "learning_rate": 3.868508231946894e-06, "loss": 0.6446, "step": 34740 }, { "epoch": 17.02, "learning_rate": 3.8639278481524725e-06, "loss": 0.7911, "step": 34750 }, { "epoch": 17.02, "learning_rate": 3.859349236535446e-06, "loss": 0.4049, "step": 34760 }, { "epoch": 17.02, "learning_rate": 3.854772399327374e-06, "loss": 0.5855, "step": 34770 }, { "epoch": 17.02, "learning_rate": 3.850197338758952e-06, "loss": 0.5569, "step": 34780 }, { "epoch": 17.02, "learning_rate": 3.845624057060006e-06, "loss": 0.5836, "step": 34790 }, { "epoch": 17.02, "learning_rate": 3.841052556459503e-06, "loss": 0.5529, "step": 34800 }, { "epoch": 17.02, "learning_rate": 3.83648283918554e-06, "loss": 0.413, "step": 34810 }, { "epoch": 17.02, "learning_rate": 3.831914907465331e-06, "loss": 0.4782, "step": 34820 }, { "epoch": 17.02, "learning_rate": 3.8273487635252355e-06, "loss": 0.2445, "step": 34830 }, { "epoch": 17.02, "learning_rate": 3.822784409590735e-06, "loss": 0.7761, "step": 34840 }, { "epoch": 17.02, "learning_rate": 3.818221847886444e-06, "loss": 0.3729, "step": 34850 }, { "epoch": 17.02, "learning_rate": 3.813661080636098e-06, "loss": 0.6889, "step": 34860 }, { "epoch": 17.02, "learning_rate": 3.809102110062559e-06, "loss": 0.6488, "step": 34870 }, { "epoch": 17.02, "learning_rate": 3.8045449383878144e-06, "loss": 0.5869, "step": 34880 }, { "epoch": 17.02, "learning_rate": 3.799989567832971e-06, "loss": 0.6171, "step": 34890 }, { "epoch": 17.02, "learning_rate": 3.795436000618269e-06, "loss": 0.7484, "step": 34900 }, { "epoch": 17.02, "learning_rate": 3.7908842389630542e-06, "loss": 0.7111, "step": 34910 }, { "epoch": 17.02, "learning_rate": 3.7863342850858034e-06, "loss": 0.4369, "step": 34920 }, { "epoch": 17.02, "learning_rate": 3.7817861412041044e-06, "loss": 0.3915, "step": 34930 }, { "epoch": 17.02, "learning_rate": 3.7772398095346738e-06, "loss": 0.3937, "step": 34940 }, { "epoch": 17.02, "learning_rate": 3.7726952922933353e-06, "loss": 0.597, "step": 34950 }, { "epoch": 17.02, "learning_rate": 3.768152591695031e-06, "loss": 0.6374, "step": 34960 }, { "epoch": 17.02, "learning_rate": 3.763611709953819e-06, "loss": 0.5747, "step": 34970 }, { "epoch": 17.02, "learning_rate": 3.7590726492828704e-06, "loss": 0.413, "step": 34980 }, { "epoch": 17.02, "learning_rate": 3.7545354118944664e-06, "loss": 0.7505, "step": 34990 }, { "epoch": 17.02, "learning_rate": 3.750000000000002e-06, "loss": 0.7681, "step": 35000 }, { "epoch": 17.02, "learning_rate": 3.7454664158099814e-06, "loss": 0.7081, "step": 35010 }, { "epoch": 17.02, "learning_rate": 3.7409346615340203e-06, "loss": 0.3066, "step": 35020 }, { "epoch": 17.02, "learning_rate": 3.736404739380836e-06, "loss": 0.7753, "step": 35030 }, { "epoch": 17.02, "learning_rate": 3.731876651558265e-06, "loss": 0.5647, "step": 35040 }, { "epoch": 17.02, "learning_rate": 3.727350400273237e-06, "loss": 0.4773, "step": 35050 }, { "epoch": 17.02, "learning_rate": 3.7228259877317942e-06, "loss": 0.5379, "step": 35060 }, { "epoch": 17.02, "learning_rate": 3.7183034161390787e-06, "loss": 0.2418, "step": 35070 }, { "epoch": 17.02, "learning_rate": 3.7137826876993383e-06, "loss": 0.4506, "step": 35080 }, { "epoch": 17.02, "learning_rate": 3.7092638046159198e-06, "loss": 0.6835, "step": 35090 }, { "epoch": 17.02, "learning_rate": 3.704746769091273e-06, "loss": 0.2637, "step": 35100 }, { "epoch": 17.02, "learning_rate": 3.7002315833269455e-06, "loss": 0.3303, "step": 35110 }, { "epoch": 17.02, "learning_rate": 3.695718249523581e-06, "loss": 0.7317, "step": 35120 }, { "epoch": 17.02, "learning_rate": 3.6912067698809306e-06, "loss": 0.4164, "step": 35130 }, { "epoch": 17.02, "learning_rate": 3.6866971465978297e-06, "loss": 0.5373, "step": 35140 }, { "epoch": 17.02, "learning_rate": 3.6821893818722152e-06, "loss": 0.6486, "step": 35150 }, { "epoch": 17.02, "learning_rate": 3.677683477901117e-06, "loss": 0.8014, "step": 35160 }, { "epoch": 17.02, "learning_rate": 3.673179436880658e-06, "loss": 0.5934, "step": 35170 }, { "epoch": 17.02, "learning_rate": 3.6686772610060517e-06, "loss": 0.8094, "step": 35180 }, { "epoch": 17.02, "learning_rate": 3.6641769524716058e-06, "loss": 0.3018, "step": 35190 }, { "epoch": 17.02, "learning_rate": 3.6596785134707146e-06, "loss": 0.7329, "step": 35200 }, { "epoch": 17.02, "learning_rate": 3.6551819461958634e-06, "loss": 0.3554, "step": 35210 }, { "epoch": 17.02, "learning_rate": 3.650687252838622e-06, "loss": 0.4408, "step": 35220 }, { "epoch": 17.02, "learning_rate": 3.6461944355896537e-06, "loss": 0.457, "step": 35230 }, { "epoch": 17.02, "learning_rate": 3.641703496638701e-06, "loss": 0.4134, "step": 35240 }, { "epoch": 17.02, "learning_rate": 3.637214438174593e-06, "loss": 0.5221, "step": 35250 }, { "epoch": 17.03, "learning_rate": 3.632727262385243e-06, "loss": 0.4694, "step": 35260 }, { "epoch": 17.03, "learning_rate": 3.6282419714576444e-06, "loss": 0.5371, "step": 35270 }, { "epoch": 17.03, "learning_rate": 3.6237585675778735e-06, "loss": 0.3918, "step": 35280 }, { "epoch": 17.03, "learning_rate": 3.619277052931089e-06, "loss": 0.6303, "step": 35290 }, { "epoch": 17.03, "learning_rate": 3.6147974297015245e-06, "loss": 0.7591, "step": 35300 }, { "epoch": 17.03, "learning_rate": 3.6103197000724957e-06, "loss": 0.6012, "step": 35310 }, { "epoch": 17.03, "learning_rate": 3.6058438662263894e-06, "loss": 0.6378, "step": 35320 }, { "epoch": 17.03, "learning_rate": 3.6013699303446784e-06, "loss": 0.5929, "step": 35330 }, { "epoch": 17.03, "learning_rate": 3.596897894607905e-06, "loss": 0.4922, "step": 35340 }, { "epoch": 17.03, "learning_rate": 3.5924277611956798e-06, "loss": 0.5399, "step": 35350 }, { "epoch": 17.03, "learning_rate": 3.587959532286692e-06, "loss": 0.8259, "step": 35360 }, { "epoch": 17.03, "learning_rate": 3.583493210058706e-06, "loss": 0.7079, "step": 35370 }, { "epoch": 17.03, "learning_rate": 3.579028796688551e-06, "loss": 0.6888, "step": 35380 }, { "epoch": 17.03, "learning_rate": 3.574566294352129e-06, "loss": 0.8479, "step": 35390 }, { "epoch": 17.03, "learning_rate": 3.5701057052244076e-06, "loss": 0.6815, "step": 35400 }, { "epoch": 17.03, "learning_rate": 3.565647031479422e-06, "loss": 0.725, "step": 35410 }, { "epoch": 17.03, "learning_rate": 3.5611902752902814e-06, "loss": 1.1404, "step": 35420 }, { "epoch": 17.03, "learning_rate": 3.556735438829154e-06, "loss": 0.3504, "step": 35430 }, { "epoch": 17.03, "learning_rate": 3.5522825242672673e-06, "loss": 0.3773, "step": 35440 }, { "epoch": 17.03, "learning_rate": 3.5478315337749183e-06, "loss": 1.0465, "step": 35450 }, { "epoch": 17.03, "learning_rate": 3.543382469521471e-06, "loss": 0.6458, "step": 35460 }, { "epoch": 17.03, "learning_rate": 3.5389353336753426e-06, "loss": 0.4453, "step": 35470 }, { "epoch": 17.03, "learning_rate": 3.5344901284040122e-06, "loss": 0.2837, "step": 35480 }, { "epoch": 17.03, "learning_rate": 3.530046855874021e-06, "loss": 0.618, "step": 35490 }, { "epoch": 17.03, "learning_rate": 3.525605518250964e-06, "loss": 0.4564, "step": 35500 }, { "epoch": 17.03, "learning_rate": 3.521166117699493e-06, "loss": 0.8331, "step": 35510 }, { "epoch": 17.03, "learning_rate": 3.5167286563833268e-06, "loss": 0.4391, "step": 35520 }, { "epoch": 17.03, "learning_rate": 3.512293136465221e-06, "loss": 0.4842, "step": 35530 }, { "epoch": 17.03, "learning_rate": 3.507859560106998e-06, "loss": 0.7373, "step": 35540 }, { "epoch": 17.03, "learning_rate": 3.5034279294695245e-06, "loss": 0.3786, "step": 35550 }, { "epoch": 17.03, "learning_rate": 3.498998246712731e-06, "loss": 0.322, "step": 35560 }, { "epoch": 17.03, "learning_rate": 3.4945705139955865e-06, "loss": 0.4963, "step": 35570 }, { "epoch": 17.03, "learning_rate": 3.490144733476116e-06, "loss": 0.6952, "step": 35580 }, { "epoch": 17.03, "learning_rate": 3.4857209073113893e-06, "loss": 0.5512, "step": 35590 }, { "epoch": 17.03, "learning_rate": 3.4812990376575235e-06, "loss": 0.4881, "step": 35600 }, { "epoch": 17.03, "learning_rate": 3.4768791266696926e-06, "loss": 0.5135, "step": 35610 }, { "epoch": 17.03, "learning_rate": 3.4724611765021e-06, "loss": 0.7624, "step": 35620 }, { "epoch": 17.03, "learning_rate": 3.4680451893080028e-06, "loss": 0.3704, "step": 35630 }, { "epoch": 17.03, "learning_rate": 3.4636311672396958e-06, "loss": 0.2933, "step": 35640 }, { "epoch": 17.03, "learning_rate": 3.4592191124485265e-06, "loss": 0.5389, "step": 35650 }, { "epoch": 17.03, "learning_rate": 3.454809027084874e-06, "loss": 0.5478, "step": 35660 }, { "epoch": 17.03, "learning_rate": 3.4504009132981614e-06, "loss": 0.5282, "step": 35670 }, { "epoch": 17.03, "learning_rate": 3.445994773236847e-06, "loss": 0.6104, "step": 35680 }, { "epoch": 17.03, "learning_rate": 3.4415906090484316e-06, "loss": 0.5048, "step": 35690 }, { "epoch": 17.03, "learning_rate": 3.437188422879452e-06, "loss": 0.2955, "step": 35700 }, { "epoch": 17.03, "learning_rate": 3.4327882168754793e-06, "loss": 0.404, "step": 35710 }, { "epoch": 17.03, "learning_rate": 3.4283899931811203e-06, "loss": 0.5149, "step": 35720 }, { "epoch": 17.03, "learning_rate": 3.4239937539400167e-06, "loss": 0.7681, "step": 35730 }, { "epoch": 17.03, "learning_rate": 3.4195995012948382e-06, "loss": 0.4023, "step": 35740 }, { "epoch": 17.04, "learning_rate": 3.415207237387297e-06, "loss": 0.5534, "step": 35750 }, { "epoch": 17.04, "learning_rate": 3.4108169643581244e-06, "loss": 0.7659, "step": 35760 }, { "epoch": 17.04, "learning_rate": 3.4064286843470882e-06, "loss": 0.6809, "step": 35770 }, { "epoch": 17.04, "learning_rate": 3.4020423994929824e-06, "loss": 0.4474, "step": 35780 }, { "epoch": 17.04, "learning_rate": 3.397658111933628e-06, "loss": 0.496, "step": 35790 }, { "epoch": 17.04, "learning_rate": 3.3932758238058738e-06, "loss": 0.2837, "step": 35800 }, { "epoch": 17.04, "learning_rate": 3.3888955372455947e-06, "loss": 0.1942, "step": 35810 }, { "epoch": 17.04, "learning_rate": 3.3845172543876883e-06, "loss": 0.5356, "step": 35820 }, { "epoch": 17.04, "learning_rate": 3.380140977366077e-06, "loss": 0.7328, "step": 35830 }, { "epoch": 17.04, "learning_rate": 3.3757667083137015e-06, "loss": 0.2156, "step": 35840 }, { "epoch": 17.04, "learning_rate": 3.371394449362533e-06, "loss": 0.4024, "step": 35850 }, { "epoch": 17.04, "learning_rate": 3.367024202643555e-06, "loss": 0.3176, "step": 35860 }, { "epoch": 17.04, "learning_rate": 3.3626559702867738e-06, "loss": 0.627, "step": 35870 }, { "epoch": 17.04, "learning_rate": 3.358289754421211e-06, "loss": 0.3277, "step": 35880 }, { "epoch": 17.04, "learning_rate": 3.353925557174907e-06, "loss": 0.5174, "step": 35890 }, { "epoch": 17.04, "learning_rate": 3.3495633806749203e-06, "loss": 0.4933, "step": 35900 }, { "epoch": 17.04, "learning_rate": 3.345203227047322e-06, "loss": 0.4456, "step": 35910 }, { "epoch": 17.04, "learning_rate": 3.340845098417197e-06, "loss": 0.6054, "step": 35920 }, { "epoch": 17.04, "learning_rate": 3.3364889969086428e-06, "loss": 0.7091, "step": 35930 }, { "epoch": 17.04, "learning_rate": 3.3321349246447753e-06, "loss": 0.7329, "step": 35940 }, { "epoch": 17.04, "learning_rate": 3.327782883747714e-06, "loss": 0.562, "step": 35950 }, { "epoch": 17.04, "learning_rate": 3.323432876338593e-06, "loss": 0.6282, "step": 35960 }, { "epoch": 17.04, "learning_rate": 3.319084904537545e-06, "loss": 0.574, "step": 35970 }, { "epoch": 17.04, "learning_rate": 3.3147389704637263e-06, "loss": 0.3558, "step": 35980 }, { "epoch": 17.04, "learning_rate": 3.310395076235291e-06, "loss": 0.834, "step": 35990 }, { "epoch": 17.04, "learning_rate": 3.3060532239694e-06, "loss": 0.5013, "step": 36000 }, { "epoch": 17.04, "eval_accuracy": 0.8736842105263158, "eval_f1": 0.8736842105263158, "eval_loss": 0.7713430523872375, "eval_runtime": 768.879, "eval_samples_per_second": 6.178, "eval_steps_per_second": 1.545, "step": 36000 }, { "epoch": 18.0, "learning_rate": 3.3017134157822186e-06, "loss": 0.3416, "step": 36010 }, { "epoch": 18.0, "learning_rate": 3.2973756537889164e-06, "loss": 0.519, "step": 36020 }, { "epoch": 18.0, "learning_rate": 3.2930399401036647e-06, "loss": 0.7151, "step": 36030 }, { "epoch": 18.0, "learning_rate": 3.288706276839642e-06, "loss": 0.7257, "step": 36040 }, { "epoch": 18.0, "learning_rate": 3.2843746661090225e-06, "loss": 0.3615, "step": 36050 }, { "epoch": 18.0, "learning_rate": 3.2800451100229773e-06, "loss": 0.2905, "step": 36060 }, { "epoch": 18.0, "learning_rate": 3.275717610691677e-06, "loss": 1.0138, "step": 36070 }, { "epoch": 18.0, "learning_rate": 3.2713921702242993e-06, "loss": 0.5712, "step": 36080 }, { "epoch": 18.0, "learning_rate": 3.267068790729008e-06, "loss": 0.4467, "step": 36090 }, { "epoch": 18.0, "learning_rate": 3.262747474312966e-06, "loss": 0.3777, "step": 36100 }, { "epoch": 18.0, "learning_rate": 3.2584282230823308e-06, "loss": 0.5909, "step": 36110 }, { "epoch": 18.0, "learning_rate": 3.2541110391422493e-06, "loss": 0.3593, "step": 36120 }, { "epoch": 18.0, "learning_rate": 3.2497959245968696e-06, "loss": 0.3639, "step": 36130 }, { "epoch": 18.0, "learning_rate": 3.2454828815493277e-06, "loss": 0.6528, "step": 36140 }, { "epoch": 18.0, "learning_rate": 3.241171912101743e-06, "loss": 0.3752, "step": 36150 }, { "epoch": 18.0, "learning_rate": 3.23686301835523e-06, "loss": 0.5945, "step": 36160 }, { "epoch": 18.0, "learning_rate": 3.2325562024098906e-06, "loss": 0.3346, "step": 36170 }, { "epoch": 18.0, "learning_rate": 3.2282514663648196e-06, "loss": 0.3911, "step": 36180 }, { "epoch": 18.0, "learning_rate": 3.22394881231809e-06, "loss": 0.7658, "step": 36190 }, { "epoch": 18.0, "learning_rate": 3.219648242366763e-06, "loss": 0.352, "step": 36200 }, { "epoch": 18.0, "learning_rate": 3.215349758606885e-06, "loss": 0.6865, "step": 36210 }, { "epoch": 18.0, "learning_rate": 3.2110533631334805e-06, "loss": 0.5107, "step": 36220 }, { "epoch": 18.0, "learning_rate": 3.20675905804057e-06, "loss": 0.4335, "step": 36230 }, { "epoch": 18.0, "learning_rate": 3.2024668454211373e-06, "loss": 0.556, "step": 36240 }, { "epoch": 18.0, "learning_rate": 3.1981767273671563e-06, "loss": 0.6473, "step": 36250 }, { "epoch": 18.01, "learning_rate": 3.1938887059695775e-06, "loss": 0.737, "step": 36260 }, { "epoch": 18.01, "learning_rate": 3.189602783318334e-06, "loss": 0.5317, "step": 36270 }, { "epoch": 18.01, "learning_rate": 3.185318961502331e-06, "loss": 1.087, "step": 36280 }, { "epoch": 18.01, "learning_rate": 3.1810372426094507e-06, "loss": 0.9457, "step": 36290 }, { "epoch": 18.01, "learning_rate": 3.1767576287265512e-06, "loss": 0.3877, "step": 36300 }, { "epoch": 18.01, "learning_rate": 3.1724801219394645e-06, "loss": 0.1997, "step": 36310 }, { "epoch": 18.01, "learning_rate": 3.1682047243329947e-06, "loss": 0.4113, "step": 36320 }, { "epoch": 18.01, "learning_rate": 3.1639314379909188e-06, "loss": 0.5148, "step": 36330 }, { "epoch": 18.01, "learning_rate": 3.1596602649959844e-06, "loss": 0.5285, "step": 36340 }, { "epoch": 18.01, "learning_rate": 3.1553912074299106e-06, "loss": 0.3002, "step": 36350 }, { "epoch": 18.01, "learning_rate": 3.151124267373381e-06, "loss": 0.9515, "step": 36360 }, { "epoch": 18.01, "learning_rate": 3.1468594469060546e-06, "loss": 0.3972, "step": 36370 }, { "epoch": 18.01, "learning_rate": 3.1425967481065515e-06, "loss": 0.3017, "step": 36380 }, { "epoch": 18.01, "learning_rate": 3.1383361730524596e-06, "loss": 0.3262, "step": 36390 }, { "epoch": 18.01, "learning_rate": 3.13407772382033e-06, "loss": 0.2876, "step": 36400 }, { "epoch": 18.01, "learning_rate": 3.1298214024856807e-06, "loss": 0.4643, "step": 36410 }, { "epoch": 18.01, "learning_rate": 3.1255672111229896e-06, "loss": 0.2409, "step": 36420 }, { "epoch": 18.01, "learning_rate": 3.121315151805699e-06, "loss": 0.5946, "step": 36430 }, { "epoch": 18.01, "learning_rate": 3.11706522660621e-06, "loss": 0.5023, "step": 36440 }, { "epoch": 18.01, "learning_rate": 3.1128174375958836e-06, "loss": 0.5401, "step": 36450 }, { "epoch": 18.01, "learning_rate": 3.1085717868450442e-06, "loss": 0.8222, "step": 36460 }, { "epoch": 18.01, "learning_rate": 3.104328276422968e-06, "loss": 0.3783, "step": 36470 }, { "epoch": 18.01, "learning_rate": 3.100086908397891e-06, "loss": 0.5229, "step": 36480 }, { "epoch": 18.01, "learning_rate": 3.095847684837003e-06, "loss": 0.2334, "step": 36490 }, { "epoch": 18.01, "learning_rate": 3.0916106078064522e-06, "loss": 0.471, "step": 36500 }, { "epoch": 18.01, "learning_rate": 3.0873756793713374e-06, "loss": 0.4497, "step": 36510 }, { "epoch": 18.01, "learning_rate": 3.083142901595711e-06, "loss": 0.924, "step": 36520 }, { "epoch": 18.01, "learning_rate": 3.0789122765425775e-06, "loss": 0.7265, "step": 36530 }, { "epoch": 18.01, "learning_rate": 3.0746838062738935e-06, "loss": 0.676, "step": 36540 }, { "epoch": 18.01, "learning_rate": 3.0704574928505588e-06, "loss": 0.45, "step": 36550 }, { "epoch": 18.01, "learning_rate": 3.066233338332435e-06, "loss": 0.4074, "step": 36560 }, { "epoch": 18.01, "learning_rate": 3.06201134477832e-06, "loss": 0.4613, "step": 36570 }, { "epoch": 18.01, "learning_rate": 3.057791514245965e-06, "loss": 0.4177, "step": 36580 }, { "epoch": 18.01, "learning_rate": 3.0535738487920557e-06, "loss": 0.3157, "step": 36590 }, { "epoch": 18.01, "learning_rate": 3.04935835047224e-06, "loss": 0.402, "step": 36600 }, { "epoch": 18.01, "learning_rate": 3.045145021341097e-06, "loss": 0.5566, "step": 36610 }, { "epoch": 18.01, "learning_rate": 3.0409338634521526e-06, "loss": 0.8892, "step": 36620 }, { "epoch": 18.01, "learning_rate": 3.036724878857873e-06, "loss": 0.3931, "step": 36630 }, { "epoch": 18.01, "learning_rate": 3.032518069609665e-06, "loss": 0.4067, "step": 36640 }, { "epoch": 18.01, "learning_rate": 3.0283134377578804e-06, "loss": 0.5997, "step": 36650 }, { "epoch": 18.01, "learning_rate": 3.024110985351804e-06, "loss": 0.6039, "step": 36660 }, { "epoch": 18.01, "learning_rate": 3.019910714439662e-06, "loss": 1.072, "step": 36670 }, { "epoch": 18.01, "learning_rate": 3.0157126270686113e-06, "loss": 0.6233, "step": 36680 }, { "epoch": 18.01, "learning_rate": 3.011516725284747e-06, "loss": 0.619, "step": 36690 }, { "epoch": 18.01, "learning_rate": 3.007323011133107e-06, "loss": 0.6432, "step": 36700 }, { "epoch": 18.01, "learning_rate": 3.0031314866576535e-06, "loss": 0.5534, "step": 36710 }, { "epoch": 18.01, "learning_rate": 2.9989421539012843e-06, "loss": 0.8209, "step": 36720 }, { "epoch": 18.01, "learning_rate": 2.9947550149058308e-06, "loss": 0.3964, "step": 36730 }, { "epoch": 18.01, "learning_rate": 2.99057007171205e-06, "loss": 0.2976, "step": 36740 }, { "epoch": 18.02, "learning_rate": 2.9863873263596374e-06, "loss": 0.4838, "step": 36750 }, { "epoch": 18.02, "learning_rate": 2.982206780887212e-06, "loss": 0.6236, "step": 36760 }, { "epoch": 18.02, "learning_rate": 2.9780284373323167e-06, "loss": 0.4662, "step": 36770 }, { "epoch": 18.02, "learning_rate": 2.9738522977314257e-06, "loss": 0.3537, "step": 36780 }, { "epoch": 18.02, "learning_rate": 2.9696783641199416e-06, "loss": 0.5034, "step": 36790 }, { "epoch": 18.02, "learning_rate": 2.9655066385321886e-06, "loss": 0.2501, "step": 36800 }, { "epoch": 18.02, "learning_rate": 2.9613371230014146e-06, "loss": 0.6887, "step": 36810 }, { "epoch": 18.02, "learning_rate": 2.9571698195597902e-06, "loss": 0.5212, "step": 36820 }, { "epoch": 18.02, "learning_rate": 2.953004730238409e-06, "loss": 0.4871, "step": 36830 }, { "epoch": 18.02, "learning_rate": 2.9488418570672825e-06, "loss": 0.3243, "step": 36840 }, { "epoch": 18.02, "learning_rate": 2.9446812020753518e-06, "loss": 0.4076, "step": 36850 }, { "epoch": 18.02, "learning_rate": 2.9405227672904613e-06, "loss": 0.3996, "step": 36860 }, { "epoch": 18.02, "learning_rate": 2.9363665547393856e-06, "loss": 0.7454, "step": 36870 }, { "epoch": 18.02, "learning_rate": 2.932212566447809e-06, "loss": 0.6302, "step": 36880 }, { "epoch": 18.02, "learning_rate": 2.928060804440339e-06, "loss": 0.3576, "step": 36890 }, { "epoch": 18.02, "learning_rate": 2.9239112707404932e-06, "loss": 0.3181, "step": 36900 }, { "epoch": 18.02, "learning_rate": 2.9197639673707034e-06, "loss": 0.608, "step": 36910 }, { "epoch": 18.02, "learning_rate": 2.915618896352314e-06, "loss": 0.2541, "step": 36920 }, { "epoch": 18.02, "learning_rate": 2.9114760597055798e-06, "loss": 0.2384, "step": 36930 }, { "epoch": 18.02, "learning_rate": 2.907335459449678e-06, "loss": 0.529, "step": 36940 }, { "epoch": 18.02, "learning_rate": 2.9031970976026783e-06, "loss": 0.5254, "step": 36950 }, { "epoch": 18.02, "learning_rate": 2.89906097618157e-06, "loss": 0.9418, "step": 36960 }, { "epoch": 18.02, "learning_rate": 2.8949270972022473e-06, "loss": 0.3854, "step": 36970 }, { "epoch": 18.02, "learning_rate": 2.8907954626795167e-06, "loss": 0.5665, "step": 36980 }, { "epoch": 18.02, "learning_rate": 2.886666074627084e-06, "loss": 0.5238, "step": 36990 }, { "epoch": 18.02, "learning_rate": 2.882538935057563e-06, "loss": 0.2699, "step": 37000 }, { "epoch": 18.02, "learning_rate": 2.8784140459824717e-06, "loss": 0.7433, "step": 37010 }, { "epoch": 18.02, "learning_rate": 2.8742914094122317e-06, "loss": 0.6341, "step": 37020 }, { "epoch": 18.02, "learning_rate": 2.8701710273561635e-06, "loss": 0.4695, "step": 37030 }, { "epoch": 18.02, "learning_rate": 2.8660529018224937e-06, "loss": 0.6994, "step": 37040 }, { "epoch": 18.02, "learning_rate": 2.861937034818345e-06, "loss": 0.2551, "step": 37050 }, { "epoch": 18.02, "learning_rate": 2.8578234283497423e-06, "loss": 0.4353, "step": 37060 }, { "epoch": 18.02, "learning_rate": 2.8537120844216044e-06, "loss": 0.5129, "step": 37070 }, { "epoch": 18.02, "learning_rate": 2.8496030050377552e-06, "loss": 0.8072, "step": 37080 }, { "epoch": 18.02, "learning_rate": 2.845496192200908e-06, "loss": 0.6905, "step": 37090 }, { "epoch": 18.02, "learning_rate": 2.841391647912673e-06, "loss": 0.489, "step": 37100 }, { "epoch": 18.02, "learning_rate": 2.8372893741735546e-06, "loss": 0.5638, "step": 37110 }, { "epoch": 18.02, "learning_rate": 2.8331893729829527e-06, "loss": 0.5291, "step": 37120 }, { "epoch": 18.02, "learning_rate": 2.8290916463391576e-06, "loss": 0.5672, "step": 37130 }, { "epoch": 18.02, "learning_rate": 2.8249961962393506e-06, "loss": 0.5564, "step": 37140 }, { "epoch": 18.02, "learning_rate": 2.8209030246796054e-06, "loss": 0.4435, "step": 37150 }, { "epoch": 18.02, "learning_rate": 2.8168121336548832e-06, "loss": 0.3198, "step": 37160 }, { "epoch": 18.02, "learning_rate": 2.812723525159033e-06, "loss": 0.4359, "step": 37170 }, { "epoch": 18.02, "learning_rate": 2.808637201184797e-06, "loss": 0.2043, "step": 37180 }, { "epoch": 18.02, "learning_rate": 2.8045531637237984e-06, "loss": 0.6797, "step": 37190 }, { "epoch": 18.02, "learning_rate": 2.8004714147665487e-06, "loss": 0.6557, "step": 37200 }, { "epoch": 18.02, "learning_rate": 2.7963919563024366e-06, "loss": 0.7913, "step": 37210 }, { "epoch": 18.02, "learning_rate": 2.7923147903197473e-06, "loss": 0.5721, "step": 37220 }, { "epoch": 18.02, "learning_rate": 2.7882399188056393e-06, "loss": 0.7529, "step": 37230 }, { "epoch": 18.02, "learning_rate": 2.7841673437461557e-06, "loss": 0.2419, "step": 37240 }, { "epoch": 18.02, "learning_rate": 2.7800970671262205e-06, "loss": 0.5299, "step": 37250 }, { "epoch": 18.03, "learning_rate": 2.7760290909296344e-06, "loss": 0.8419, "step": 37260 }, { "epoch": 18.03, "learning_rate": 2.771963417139084e-06, "loss": 0.6347, "step": 37270 }, { "epoch": 18.03, "learning_rate": 2.7679000477361276e-06, "loss": 0.6209, "step": 37280 }, { "epoch": 18.03, "learning_rate": 2.763838984701204e-06, "loss": 0.389, "step": 37290 }, { "epoch": 18.03, "learning_rate": 2.759780230013618e-06, "loss": 0.5476, "step": 37300 }, { "epoch": 18.03, "learning_rate": 2.755723785651567e-06, "loss": 0.8579, "step": 37310 }, { "epoch": 18.03, "learning_rate": 2.7516696535921084e-06, "loss": 0.7398, "step": 37320 }, { "epoch": 18.03, "learning_rate": 2.7476178358111765e-06, "loss": 0.5888, "step": 37330 }, { "epoch": 18.03, "learning_rate": 2.7435683342835794e-06, "loss": 0.5358, "step": 37340 }, { "epoch": 18.03, "learning_rate": 2.739521150982994e-06, "loss": 0.4386, "step": 37350 }, { "epoch": 18.03, "learning_rate": 2.735476287881967e-06, "loss": 0.2455, "step": 37360 }, { "epoch": 18.03, "learning_rate": 2.731433746951919e-06, "loss": 0.2797, "step": 37370 }, { "epoch": 18.03, "learning_rate": 2.727393530163136e-06, "loss": 0.5183, "step": 37380 }, { "epoch": 18.03, "learning_rate": 2.7233556394847664e-06, "loss": 0.3987, "step": 37390 }, { "epoch": 18.03, "learning_rate": 2.719320076884827e-06, "loss": 0.4604, "step": 37400 }, { "epoch": 18.03, "learning_rate": 2.7152868443302092e-06, "loss": 0.5954, "step": 37410 }, { "epoch": 18.03, "learning_rate": 2.7112559437866583e-06, "loss": 0.5356, "step": 37420 }, { "epoch": 18.03, "learning_rate": 2.707227377218785e-06, "loss": 0.5328, "step": 37430 }, { "epoch": 18.03, "learning_rate": 2.7032011465900655e-06, "loss": 0.4616, "step": 37440 }, { "epoch": 18.03, "learning_rate": 2.6991772538628325e-06, "loss": 0.4083, "step": 37450 }, { "epoch": 18.03, "learning_rate": 2.6951557009982875e-06, "loss": 0.5883, "step": 37460 }, { "epoch": 18.03, "learning_rate": 2.691136489956486e-06, "loss": 0.4895, "step": 37470 }, { "epoch": 18.03, "learning_rate": 2.687119622696339e-06, "loss": 0.6126, "step": 37480 }, { "epoch": 18.03, "learning_rate": 2.683105101175617e-06, "loss": 0.2373, "step": 37490 }, { "epoch": 18.03, "learning_rate": 2.6790929273509547e-06, "loss": 0.445, "step": 37500 }, { "epoch": 18.03, "learning_rate": 2.6750831031778352e-06, "loss": 0.5903, "step": 37510 }, { "epoch": 18.03, "learning_rate": 2.6710756306105973e-06, "loss": 0.2374, "step": 37520 }, { "epoch": 18.03, "learning_rate": 2.6670705116024345e-06, "loss": 0.5996, "step": 37530 }, { "epoch": 18.03, "learning_rate": 2.663067748105393e-06, "loss": 0.3053, "step": 37540 }, { "epoch": 18.03, "learning_rate": 2.6590673420703694e-06, "loss": 0.325, "step": 37550 }, { "epoch": 18.03, "learning_rate": 2.6550692954471202e-06, "loss": 0.351, "step": 37560 }, { "epoch": 18.03, "learning_rate": 2.6510736101842365e-06, "loss": 0.8695, "step": 37570 }, { "epoch": 18.03, "learning_rate": 2.64708028822917e-06, "loss": 0.4065, "step": 37580 }, { "epoch": 18.03, "learning_rate": 2.643089331528214e-06, "loss": 0.563, "step": 37590 }, { "epoch": 18.03, "learning_rate": 2.6391007420265183e-06, "loss": 0.8252, "step": 37600 }, { "epoch": 18.03, "learning_rate": 2.63511452166807e-06, "loss": 0.2165, "step": 37610 }, { "epoch": 18.03, "learning_rate": 2.631130672395705e-06, "loss": 0.5479, "step": 37620 }, { "epoch": 18.03, "learning_rate": 2.6271491961511017e-06, "loss": 0.5603, "step": 37630 }, { "epoch": 18.03, "learning_rate": 2.623170094874782e-06, "loss": 0.4072, "step": 37640 }, { "epoch": 18.03, "learning_rate": 2.619193370506119e-06, "loss": 0.5133, "step": 37650 }, { "epoch": 18.03, "learning_rate": 2.615219024983312e-06, "loss": 0.6001, "step": 37660 }, { "epoch": 18.03, "learning_rate": 2.6112470602434107e-06, "loss": 0.4305, "step": 37670 }, { "epoch": 18.03, "learning_rate": 2.6072774782223036e-06, "loss": 0.5292, "step": 37680 }, { "epoch": 18.03, "learning_rate": 2.6033102808547136e-06, "loss": 0.538, "step": 37690 }, { "epoch": 18.03, "learning_rate": 2.599345470074209e-06, "loss": 0.4507, "step": 37700 }, { "epoch": 18.03, "learning_rate": 2.595383047813189e-06, "loss": 0.2525, "step": 37710 }, { "epoch": 18.03, "learning_rate": 2.591423016002889e-06, "loss": 0.3462, "step": 37720 }, { "epoch": 18.03, "learning_rate": 2.5874653765733816e-06, "loss": 0.4602, "step": 37730 }, { "epoch": 18.03, "learning_rate": 2.5835101314535712e-06, "loss": 0.5111, "step": 37740 }, { "epoch": 18.04, "learning_rate": 2.5795572825711963e-06, "loss": 0.3537, "step": 37750 }, { "epoch": 18.04, "learning_rate": 2.5756068318528278e-06, "loss": 0.5182, "step": 37760 }, { "epoch": 18.04, "learning_rate": 2.571658781223867e-06, "loss": 0.2725, "step": 37770 }, { "epoch": 18.04, "learning_rate": 2.5677131326085447e-06, "loss": 0.6178, "step": 37780 }, { "epoch": 18.04, "learning_rate": 2.5637698879299253e-06, "loss": 0.4089, "step": 37790 }, { "epoch": 18.04, "learning_rate": 2.5598290491098956e-06, "loss": 0.4844, "step": 37800 }, { "epoch": 18.04, "learning_rate": 2.5558906180691747e-06, "loss": 0.525, "step": 37810 }, { "epoch": 18.04, "learning_rate": 2.5519545967273048e-06, "loss": 0.8549, "step": 37820 }, { "epoch": 18.04, "learning_rate": 2.548020987002654e-06, "loss": 0.4982, "step": 37830 }, { "epoch": 18.04, "learning_rate": 2.5440897908124176e-06, "loss": 0.5018, "step": 37840 }, { "epoch": 18.04, "learning_rate": 2.5401610100726124e-06, "loss": 0.3848, "step": 37850 }, { "epoch": 18.04, "learning_rate": 2.536234646698077e-06, "loss": 0.3812, "step": 37860 }, { "epoch": 18.04, "learning_rate": 2.5323107026024746e-06, "loss": 0.4099, "step": 37870 }, { "epoch": 18.04, "learning_rate": 2.528389179698286e-06, "loss": 0.5027, "step": 37880 }, { "epoch": 18.04, "learning_rate": 2.5244700798968166e-06, "loss": 0.3912, "step": 37890 }, { "epoch": 18.04, "learning_rate": 2.520553405108188e-06, "loss": 0.3629, "step": 37900 }, { "epoch": 18.04, "learning_rate": 2.5166391572413403e-06, "loss": 0.951, "step": 37910 }, { "epoch": 18.04, "learning_rate": 2.5127273382040244e-06, "loss": 0.3341, "step": 37920 }, { "epoch": 18.04, "learning_rate": 2.5088179499028205e-06, "loss": 0.8679, "step": 37930 }, { "epoch": 18.04, "learning_rate": 2.5049109942431135e-06, "loss": 0.8129, "step": 37940 }, { "epoch": 18.04, "learning_rate": 2.5010064731291072e-06, "loss": 0.6507, "step": 37950 }, { "epoch": 18.04, "learning_rate": 2.497104388463818e-06, "loss": 1.0909, "step": 37960 }, { "epoch": 18.04, "learning_rate": 2.4932047421490708e-06, "loss": 0.3817, "step": 37970 }, { "epoch": 18.04, "learning_rate": 2.4893075360855116e-06, "loss": 0.8744, "step": 37980 }, { "epoch": 18.04, "learning_rate": 2.485412772172589e-06, "loss": 0.4002, "step": 37990 }, { "epoch": 18.04, "learning_rate": 2.4815204523085656e-06, "loss": 0.9572, "step": 38000 }, { "epoch": 18.04, "eval_accuracy": 0.868421052631579, "eval_f1": 0.868421052631579, "eval_loss": 0.761253297328949, "eval_runtime": 768.4537, "eval_samples_per_second": 6.181, "eval_steps_per_second": 1.546, "step": 38000 }, { "epoch": 19.0, "learning_rate": 2.4776305783905063e-06, "loss": 1.176, "step": 38010 }, { "epoch": 19.0, "learning_rate": 2.473743152314288e-06, "loss": 0.3656, "step": 38020 }, { "epoch": 19.0, "learning_rate": 2.4698581759746e-06, "loss": 0.3558, "step": 38030 }, { "epoch": 19.0, "learning_rate": 2.465975651264931e-06, "loss": 0.723, "step": 38040 }, { "epoch": 19.0, "learning_rate": 2.462095580077575e-06, "loss": 0.747, "step": 38050 }, { "epoch": 19.0, "learning_rate": 2.4582179643036316e-06, "loss": 0.5305, "step": 38060 }, { "epoch": 19.0, "learning_rate": 2.4543428058330024e-06, "loss": 0.5172, "step": 38070 }, { "epoch": 19.0, "learning_rate": 2.450470106554396e-06, "loss": 0.4605, "step": 38080 }, { "epoch": 19.0, "learning_rate": 2.44659986835532e-06, "loss": 0.3962, "step": 38090 }, { "epoch": 19.0, "learning_rate": 2.442732093122077e-06, "loss": 0.4428, "step": 38100 }, { "epoch": 19.0, "learning_rate": 2.4388667827397724e-06, "loss": 0.5217, "step": 38110 }, { "epoch": 19.0, "learning_rate": 2.4350039390923164e-06, "loss": 0.6184, "step": 38120 }, { "epoch": 19.0, "learning_rate": 2.431143564062411e-06, "loss": 0.4994, "step": 38130 }, { "epoch": 19.0, "learning_rate": 2.427285659531556e-06, "loss": 0.578, "step": 38140 }, { "epoch": 19.0, "learning_rate": 2.4234302273800455e-06, "loss": 0.2993, "step": 38150 }, { "epoch": 19.0, "learning_rate": 2.4195772694869712e-06, "loss": 0.3344, "step": 38160 }, { "epoch": 19.0, "learning_rate": 2.415726787730216e-06, "loss": 0.4295, "step": 38170 }, { "epoch": 19.0, "learning_rate": 2.411878783986465e-06, "loss": 0.697, "step": 38180 }, { "epoch": 19.0, "learning_rate": 2.4080332601311813e-06, "loss": 0.3132, "step": 38190 }, { "epoch": 19.0, "learning_rate": 2.404190218038628e-06, "loss": 0.5248, "step": 38200 }, { "epoch": 19.0, "learning_rate": 2.4003496595818557e-06, "loss": 0.457, "step": 38210 }, { "epoch": 19.0, "learning_rate": 2.396511586632711e-06, "loss": 0.3919, "step": 38220 }, { "epoch": 19.0, "learning_rate": 2.39267600106182e-06, "loss": 0.2957, "step": 38230 }, { "epoch": 19.0, "learning_rate": 2.3888429047386022e-06, "loss": 0.4636, "step": 38240 }, { "epoch": 19.0, "learning_rate": 2.3850122995312623e-06, "loss": 0.2432, "step": 38250 }, { "epoch": 19.01, "learning_rate": 2.3811841873067865e-06, "loss": 0.4494, "step": 38260 }, { "epoch": 19.01, "learning_rate": 2.3773585699309594e-06, "loss": 0.6232, "step": 38270 }, { "epoch": 19.01, "learning_rate": 2.3735354492683315e-06, "loss": 0.4032, "step": 38280 }, { "epoch": 19.01, "learning_rate": 2.3697148271822498e-06, "loss": 0.4197, "step": 38290 }, { "epoch": 19.01, "learning_rate": 2.3658967055348347e-06, "loss": 0.4868, "step": 38300 }, { "epoch": 19.01, "learning_rate": 2.3620810861869978e-06, "loss": 0.2848, "step": 38310 }, { "epoch": 19.01, "learning_rate": 2.3582679709984236e-06, "loss": 0.3721, "step": 38320 }, { "epoch": 19.01, "learning_rate": 2.3544573618275777e-06, "loss": 0.2844, "step": 38330 }, { "epoch": 19.01, "learning_rate": 2.3506492605317054e-06, "loss": 0.9406, "step": 38340 }, { "epoch": 19.01, "learning_rate": 2.3468436689668285e-06, "loss": 0.6308, "step": 38350 }, { "epoch": 19.01, "learning_rate": 2.343040588987745e-06, "loss": 0.4966, "step": 38360 }, { "epoch": 19.01, "learning_rate": 2.3392400224480327e-06, "loss": 0.2921, "step": 38370 }, { "epoch": 19.01, "learning_rate": 2.3354419712000383e-06, "loss": 0.7538, "step": 38380 }, { "epoch": 19.01, "learning_rate": 2.331646437094888e-06, "loss": 0.146, "step": 38390 }, { "epoch": 19.01, "learning_rate": 2.3278534219824766e-06, "loss": 0.588, "step": 38400 }, { "epoch": 19.01, "learning_rate": 2.3240629277114765e-06, "loss": 0.4177, "step": 38410 }, { "epoch": 19.01, "learning_rate": 2.320274956129328e-06, "loss": 0.1569, "step": 38420 }, { "epoch": 19.01, "learning_rate": 2.316489509082243e-06, "loss": 0.2267, "step": 38430 }, { "epoch": 19.01, "learning_rate": 2.3127065884152e-06, "loss": 0.4758, "step": 38440 }, { "epoch": 19.01, "learning_rate": 2.30892619597195e-06, "loss": 0.7699, "step": 38450 }, { "epoch": 19.01, "learning_rate": 2.3051483335950098e-06, "loss": 0.3688, "step": 38460 }, { "epoch": 19.01, "learning_rate": 2.3013730031256647e-06, "loss": 0.842, "step": 38470 }, { "epoch": 19.01, "learning_rate": 2.297600206403964e-06, "loss": 0.5726, "step": 38480 }, { "epoch": 19.01, "learning_rate": 2.2938299452687213e-06, "loss": 0.339, "step": 38490 }, { "epoch": 19.01, "learning_rate": 2.29006222155752e-06, "loss": 0.5403, "step": 38500 }, { "epoch": 19.01, "learning_rate": 2.286297037106701e-06, "loss": 0.1399, "step": 38510 }, { "epoch": 19.01, "learning_rate": 2.282534393751369e-06, "loss": 0.6494, "step": 38520 }, { "epoch": 19.01, "learning_rate": 2.2787742933253936e-06, "loss": 0.9051, "step": 38530 }, { "epoch": 19.01, "learning_rate": 2.2750167376613945e-06, "loss": 0.98, "step": 38540 }, { "epoch": 19.01, "learning_rate": 2.2712617285907668e-06, "loss": 0.5566, "step": 38550 }, { "epoch": 19.01, "learning_rate": 2.2675092679436525e-06, "loss": 0.3889, "step": 38560 }, { "epoch": 19.01, "learning_rate": 2.2637593575489562e-06, "loss": 0.2252, "step": 38570 }, { "epoch": 19.01, "learning_rate": 2.260011999234338e-06, "loss": 0.8652, "step": 38580 }, { "epoch": 19.01, "learning_rate": 2.2562671948262134e-06, "loss": 0.5661, "step": 38590 }, { "epoch": 19.01, "learning_rate": 2.2525249461497585e-06, "loss": 0.4515, "step": 38600 }, { "epoch": 19.01, "learning_rate": 2.2487852550288986e-06, "loss": 0.8839, "step": 38610 }, { "epoch": 19.01, "learning_rate": 2.2450481232863144e-06, "loss": 0.8046, "step": 38620 }, { "epoch": 19.01, "learning_rate": 2.2413135527434324e-06, "loss": 0.3477, "step": 38630 }, { "epoch": 19.01, "learning_rate": 2.2375815452204444e-06, "loss": 0.1162, "step": 38640 }, { "epoch": 19.01, "learning_rate": 2.233852102536282e-06, "loss": 0.2483, "step": 38650 }, { "epoch": 19.01, "learning_rate": 2.230125226508632e-06, "loss": 0.2738, "step": 38660 }, { "epoch": 19.01, "learning_rate": 2.226400918953927e-06, "loss": 0.2559, "step": 38670 }, { "epoch": 19.01, "learning_rate": 2.222679181687349e-06, "loss": 0.2962, "step": 38680 }, { "epoch": 19.01, "learning_rate": 2.2189600165228265e-06, "loss": 0.5954, "step": 38690 }, { "epoch": 19.01, "learning_rate": 2.2152434252730397e-06, "loss": 0.546, "step": 38700 }, { "epoch": 19.01, "learning_rate": 2.2115294097494088e-06, "loss": 0.5356, "step": 38710 }, { "epoch": 19.01, "learning_rate": 2.207817971762097e-06, "loss": 0.2988, "step": 38720 }, { "epoch": 19.01, "learning_rate": 2.2041091131200133e-06, "loss": 0.3633, "step": 38730 }, { "epoch": 19.01, "learning_rate": 2.2004028356308146e-06, "loss": 0.7466, "step": 38740 }, { "epoch": 19.02, "learning_rate": 2.196699141100894e-06, "loss": 0.8244, "step": 38750 }, { "epoch": 19.02, "learning_rate": 2.192998031335387e-06, "loss": 0.3217, "step": 38760 }, { "epoch": 19.02, "learning_rate": 2.1892995081381687e-06, "loss": 0.643, "step": 38770 }, { "epoch": 19.02, "learning_rate": 2.185603573311853e-06, "loss": 0.9874, "step": 38780 }, { "epoch": 19.02, "learning_rate": 2.181910228657798e-06, "loss": 0.6318, "step": 38790 }, { "epoch": 19.02, "learning_rate": 2.1782194759760947e-06, "loss": 0.5392, "step": 38800 }, { "epoch": 19.02, "learning_rate": 2.1745313170655672e-06, "loss": 0.3199, "step": 38810 }, { "epoch": 19.02, "learning_rate": 2.170845753723778e-06, "loss": 0.5739, "step": 38820 }, { "epoch": 19.02, "learning_rate": 2.1671627877470326e-06, "loss": 0.6472, "step": 38830 }, { "epoch": 19.02, "learning_rate": 2.16348242093036e-06, "loss": 0.6141, "step": 38840 }, { "epoch": 19.02, "learning_rate": 2.1598046550675273e-06, "loss": 0.4404, "step": 38850 }, { "epoch": 19.02, "learning_rate": 2.156129491951031e-06, "loss": 0.6016, "step": 38860 }, { "epoch": 19.02, "learning_rate": 2.152456933372103e-06, "loss": 0.3857, "step": 38870 }, { "epoch": 19.02, "learning_rate": 2.1487869811207007e-06, "loss": 0.5235, "step": 38880 }, { "epoch": 19.02, "learning_rate": 2.1451196369855212e-06, "loss": 0.7967, "step": 38890 }, { "epoch": 19.02, "learning_rate": 2.1414549027539765e-06, "loss": 0.2731, "step": 38900 }, { "epoch": 19.02, "learning_rate": 2.1377927802122154e-06, "loss": 0.4294, "step": 38910 }, { "epoch": 19.02, "learning_rate": 2.1341332711451104e-06, "loss": 0.7456, "step": 38920 }, { "epoch": 19.02, "learning_rate": 2.130476377336266e-06, "loss": 0.5222, "step": 38930 }, { "epoch": 19.02, "learning_rate": 2.1268221005680057e-06, "loss": 0.5666, "step": 38940 }, { "epoch": 19.02, "learning_rate": 2.1231704426213795e-06, "loss": 0.4093, "step": 38950 }, { "epoch": 19.02, "learning_rate": 2.1195214052761614e-06, "loss": 0.7834, "step": 38960 }, { "epoch": 19.02, "learning_rate": 2.115874990310845e-06, "loss": 0.4512, "step": 38970 }, { "epoch": 19.02, "learning_rate": 2.1122311995026557e-06, "loss": 0.4492, "step": 38980 }, { "epoch": 19.02, "learning_rate": 2.108590034627526e-06, "loss": 0.4992, "step": 38990 }, { "epoch": 19.02, "learning_rate": 2.104951497460118e-06, "loss": 0.3091, "step": 39000 }, { "epoch": 19.02, "learning_rate": 2.1013155897738106e-06, "loss": 0.411, "step": 39010 }, { "epoch": 19.02, "learning_rate": 2.097682313340698e-06, "loss": 0.9523, "step": 39020 }, { "epoch": 19.02, "learning_rate": 2.0940516699316e-06, "loss": 0.3464, "step": 39030 }, { "epoch": 19.02, "learning_rate": 2.0904236613160463e-06, "loss": 0.4488, "step": 39040 }, { "epoch": 19.02, "learning_rate": 2.086798289262284e-06, "loss": 0.4607, "step": 39050 }, { "epoch": 19.02, "learning_rate": 2.0831755555372753e-06, "loss": 0.4364, "step": 39060 }, { "epoch": 19.02, "learning_rate": 2.0795554619066955e-06, "loss": 0.5127, "step": 39070 }, { "epoch": 19.02, "learning_rate": 2.075938010134936e-06, "loss": 0.5928, "step": 39080 }, { "epoch": 19.02, "learning_rate": 2.072323201985098e-06, "loss": 0.431, "step": 39090 }, { "epoch": 19.02, "learning_rate": 2.068711039218996e-06, "loss": 0.4404, "step": 39100 }, { "epoch": 19.02, "learning_rate": 2.06510152359715e-06, "loss": 0.1239, "step": 39110 }, { "epoch": 19.02, "learning_rate": 2.0614946568788e-06, "loss": 0.8922, "step": 39120 }, { "epoch": 19.02, "learning_rate": 2.057890440821886e-06, "loss": 0.5615, "step": 39130 }, { "epoch": 19.02, "learning_rate": 2.0542888771830605e-06, "loss": 0.6164, "step": 39140 }, { "epoch": 19.02, "learning_rate": 2.050689967717681e-06, "loss": 0.3161, "step": 39150 }, { "epoch": 19.02, "learning_rate": 2.0470937141798125e-06, "loss": 0.3372, "step": 39160 }, { "epoch": 19.02, "learning_rate": 2.0435001183222243e-06, "loss": 0.6414, "step": 39170 }, { "epoch": 19.02, "learning_rate": 2.039909181896393e-06, "loss": 0.9572, "step": 39180 }, { "epoch": 19.02, "learning_rate": 2.036320906652495e-06, "loss": 0.6156, "step": 39190 }, { "epoch": 19.02, "learning_rate": 2.032735294339415e-06, "loss": 0.1312, "step": 39200 }, { "epoch": 19.02, "learning_rate": 2.029152346704733e-06, "loss": 0.865, "step": 39210 }, { "epoch": 19.02, "learning_rate": 2.0255720654947383e-06, "loss": 0.4385, "step": 39220 }, { "epoch": 19.02, "learning_rate": 2.021994452454415e-06, "loss": 0.4145, "step": 39230 }, { "epoch": 19.02, "learning_rate": 2.0184195093274506e-06, "loss": 0.4148, "step": 39240 }, { "epoch": 19.02, "learning_rate": 2.0148472378562215e-06, "loss": 0.4605, "step": 39250 }, { "epoch": 19.03, "learning_rate": 2.0112776397818165e-06, "loss": 0.4646, "step": 39260 }, { "epoch": 19.03, "learning_rate": 2.007710716844012e-06, "loss": 0.8151, "step": 39270 }, { "epoch": 19.03, "learning_rate": 2.0041464707812835e-06, "loss": 0.3476, "step": 39280 }, { "epoch": 19.03, "learning_rate": 2.0005849033308014e-06, "loss": 0.712, "step": 39290 }, { "epoch": 19.03, "learning_rate": 1.9970260162284266e-06, "loss": 0.566, "step": 39300 }, { "epoch": 19.03, "learning_rate": 1.993469811208723e-06, "loss": 0.6018, "step": 39310 }, { "epoch": 19.03, "learning_rate": 1.9899162900049393e-06, "loss": 0.6175, "step": 39320 }, { "epoch": 19.03, "learning_rate": 1.9863654543490208e-06, "loss": 0.6559, "step": 39330 }, { "epoch": 19.03, "learning_rate": 1.982817305971595e-06, "loss": 0.336, "step": 39340 }, { "epoch": 19.03, "learning_rate": 1.9792718466019927e-06, "loss": 0.7435, "step": 39350 }, { "epoch": 19.03, "learning_rate": 1.975729077968226e-06, "loss": 0.3189, "step": 39360 }, { "epoch": 19.03, "learning_rate": 1.972189001796997e-06, "loss": 0.7507, "step": 39370 }, { "epoch": 19.03, "learning_rate": 1.9686516198136965e-06, "loss": 0.7727, "step": 39380 }, { "epoch": 19.03, "learning_rate": 1.9651169337424006e-06, "loss": 0.3345, "step": 39390 }, { "epoch": 19.03, "learning_rate": 1.9615849453058707e-06, "loss": 0.193, "step": 39400 }, { "epoch": 19.03, "learning_rate": 1.958055656225559e-06, "loss": 0.8061, "step": 39410 }, { "epoch": 19.03, "learning_rate": 1.9545290682215987e-06, "loss": 0.4554, "step": 39420 }, { "epoch": 19.03, "learning_rate": 1.9510051830128007e-06, "loss": 0.2398, "step": 39430 }, { "epoch": 19.03, "learning_rate": 1.947484002316664e-06, "loss": 0.1954, "step": 39440 }, { "epoch": 19.03, "learning_rate": 1.943965527849375e-06, "loss": 0.4979, "step": 39450 }, { "epoch": 19.03, "learning_rate": 1.9404497613257914e-06, "loss": 0.4122, "step": 39460 }, { "epoch": 19.03, "learning_rate": 1.9369367044594567e-06, "loss": 0.501, "step": 39470 }, { "epoch": 19.03, "learning_rate": 1.9334263589625904e-06, "loss": 0.4208, "step": 39480 }, { "epoch": 19.03, "learning_rate": 1.929918726546092e-06, "loss": 0.4463, "step": 39490 }, { "epoch": 19.03, "learning_rate": 1.9264138089195424e-06, "loss": 0.7467, "step": 39500 }, { "epoch": 19.03, "learning_rate": 1.922911607791196e-06, "loss": 0.4804, "step": 39510 }, { "epoch": 19.03, "learning_rate": 1.9194121248679772e-06, "loss": 0.7707, "step": 39520 }, { "epoch": 19.03, "learning_rate": 1.915915361855496e-06, "loss": 0.4711, "step": 39530 }, { "epoch": 19.03, "learning_rate": 1.912421320458029e-06, "loss": 0.5653, "step": 39540 }, { "epoch": 19.03, "learning_rate": 1.9089300023785338e-06, "loss": 0.473, "step": 39550 }, { "epoch": 19.03, "learning_rate": 1.9054414093186343e-06, "loss": 0.4003, "step": 39560 }, { "epoch": 19.03, "learning_rate": 1.9019555429786287e-06, "loss": 0.8618, "step": 39570 }, { "epoch": 19.03, "learning_rate": 1.8984724050574857e-06, "loss": 0.3744, "step": 39580 }, { "epoch": 19.03, "learning_rate": 1.8949919972528412e-06, "loss": 0.3537, "step": 39590 }, { "epoch": 19.03, "learning_rate": 1.891514321261012e-06, "loss": 0.6167, "step": 39600 }, { "epoch": 19.03, "learning_rate": 1.888039378776968e-06, "loss": 0.5296, "step": 39610 }, { "epoch": 19.03, "learning_rate": 1.8845671714943557e-06, "loss": 0.701, "step": 39620 }, { "epoch": 19.03, "learning_rate": 1.8810977011054845e-06, "loss": 0.4249, "step": 39630 }, { "epoch": 19.03, "learning_rate": 1.877630969301337e-06, "loss": 0.4951, "step": 39640 }, { "epoch": 19.03, "learning_rate": 1.8741669777715532e-06, "loss": 0.4541, "step": 39650 }, { "epoch": 19.03, "learning_rate": 1.8707057282044417e-06, "loss": 0.7534, "step": 39660 }, { "epoch": 19.03, "learning_rate": 1.867247222286973e-06, "loss": 0.2315, "step": 39670 }, { "epoch": 19.03, "learning_rate": 1.86379146170478e-06, "loss": 0.6699, "step": 39680 }, { "epoch": 19.03, "learning_rate": 1.8603384481421596e-06, "loss": 0.4909, "step": 39690 }, { "epoch": 19.03, "learning_rate": 1.856888183282068e-06, "loss": 0.4573, "step": 39700 }, { "epoch": 19.03, "learning_rate": 1.8534406688061228e-06, "loss": 0.5582, "step": 39710 }, { "epoch": 19.03, "learning_rate": 1.8499959063946007e-06, "loss": 0.4972, "step": 39720 }, { "epoch": 19.03, "learning_rate": 1.8465538977264343e-06, "loss": 0.1831, "step": 39730 }, { "epoch": 19.03, "learning_rate": 1.843114644479221e-06, "loss": 0.9039, "step": 39740 }, { "epoch": 19.04, "learning_rate": 1.83967814832921e-06, "loss": 0.4582, "step": 39750 }, { "epoch": 19.04, "learning_rate": 1.8362444109513072e-06, "loss": 0.4638, "step": 39760 }, { "epoch": 19.04, "learning_rate": 1.8328134340190738e-06, "loss": 0.4641, "step": 39770 }, { "epoch": 19.04, "learning_rate": 1.8293852192047275e-06, "loss": 0.7425, "step": 39780 }, { "epoch": 19.04, "learning_rate": 1.8259597681791374e-06, "loss": 0.8255, "step": 39790 }, { "epoch": 19.04, "learning_rate": 1.822537082611828e-06, "loss": 0.5125, "step": 39800 }, { "epoch": 19.04, "learning_rate": 1.819117164170973e-06, "loss": 0.5191, "step": 39810 }, { "epoch": 19.04, "learning_rate": 1.8157000145233987e-06, "loss": 0.4591, "step": 39820 }, { "epoch": 19.04, "learning_rate": 1.812285635334586e-06, "loss": 0.2738, "step": 39830 }, { "epoch": 19.04, "learning_rate": 1.8088740282686587e-06, "loss": 0.3304, "step": 39840 }, { "epoch": 19.04, "learning_rate": 1.8054651949883941e-06, "loss": 0.6303, "step": 39850 }, { "epoch": 19.04, "learning_rate": 1.8020591371552175e-06, "loss": 0.7347, "step": 39860 }, { "epoch": 19.04, "learning_rate": 1.7986558564291935e-06, "loss": 0.5348, "step": 39870 }, { "epoch": 19.04, "learning_rate": 1.7952553544690462e-06, "loss": 0.2213, "step": 39880 }, { "epoch": 19.04, "learning_rate": 1.791857632932138e-06, "loss": 0.4525, "step": 39890 }, { "epoch": 19.04, "learning_rate": 1.7884626934744759e-06, "loss": 0.3864, "step": 39900 }, { "epoch": 19.04, "learning_rate": 1.785070537750712e-06, "loss": 0.5979, "step": 39910 }, { "epoch": 19.04, "learning_rate": 1.78168116741414e-06, "loss": 0.3264, "step": 39920 }, { "epoch": 19.04, "learning_rate": 1.7782945841167034e-06, "loss": 0.3334, "step": 39930 }, { "epoch": 19.04, "learning_rate": 1.7749107895089774e-06, "loss": 0.6249, "step": 39940 }, { "epoch": 19.04, "learning_rate": 1.7715297852401858e-06, "loss": 0.3204, "step": 39950 }, { "epoch": 19.04, "learning_rate": 1.7681515729581825e-06, "loss": 0.2486, "step": 39960 }, { "epoch": 19.04, "learning_rate": 1.764776154309473e-06, "loss": 0.3564, "step": 39970 }, { "epoch": 19.04, "learning_rate": 1.7614035309391938e-06, "loss": 0.6261, "step": 39980 }, { "epoch": 19.04, "learning_rate": 1.758033704491121e-06, "loss": 0.5859, "step": 39990 }, { "epoch": 19.04, "learning_rate": 1.7546666766076658e-06, "loss": 1.0645, "step": 40000 }, { "epoch": 19.04, "eval_accuracy": 0.8810526315789474, "eval_f1": 0.8810526315789474, "eval_loss": 0.7618152499198914, "eval_runtime": 771.4019, "eval_samples_per_second": 6.158, "eval_steps_per_second": 1.54, "step": 40000 }, { "epoch": 20.0, "learning_rate": 1.751302448929877e-06, "loss": 0.2598, "step": 40010 }, { "epoch": 20.0, "learning_rate": 1.7479410230974375e-06, "loss": 0.4122, "step": 40020 }, { "epoch": 20.0, "learning_rate": 1.744582400748668e-06, "loss": 0.283, "step": 40030 }, { "epoch": 20.0, "learning_rate": 1.741226583520521e-06, "loss": 0.5017, "step": 40040 }, { "epoch": 20.0, "learning_rate": 1.7378735730485766e-06, "loss": 0.9872, "step": 40050 }, { "epoch": 20.0, "learning_rate": 1.7345233709670513e-06, "loss": 0.5128, "step": 40060 }, { "epoch": 20.0, "learning_rate": 1.7311759789087977e-06, "loss": 0.4265, "step": 40070 }, { "epoch": 20.0, "learning_rate": 1.7278313985052915e-06, "loss": 0.5808, "step": 40080 }, { "epoch": 20.0, "learning_rate": 1.7244896313866404e-06, "loss": 0.3198, "step": 40090 }, { "epoch": 20.0, "learning_rate": 1.7211506791815815e-06, "loss": 0.5577, "step": 40100 }, { "epoch": 20.0, "learning_rate": 1.7178145435174772e-06, "loss": 0.5626, "step": 40110 }, { "epoch": 20.0, "learning_rate": 1.7144812260203234e-06, "loss": 0.3418, "step": 40120 }, { "epoch": 20.0, "learning_rate": 1.7111507283147392e-06, "loss": 0.2519, "step": 40130 }, { "epoch": 20.0, "learning_rate": 1.7078230520239644e-06, "loss": 0.3026, "step": 40140 }, { "epoch": 20.0, "learning_rate": 1.7044981987698672e-06, "loss": 0.3616, "step": 40150 }, { "epoch": 20.0, "learning_rate": 1.7011761701729456e-06, "loss": 0.4037, "step": 40160 }, { "epoch": 20.0, "learning_rate": 1.6978569678523137e-06, "loss": 0.2516, "step": 40170 }, { "epoch": 20.0, "learning_rate": 1.6945405934257107e-06, "loss": 0.3255, "step": 40180 }, { "epoch": 20.0, "learning_rate": 1.6912270485094972e-06, "loss": 0.3581, "step": 40190 }, { "epoch": 20.0, "learning_rate": 1.6879163347186536e-06, "loss": 0.8509, "step": 40200 }, { "epoch": 20.0, "learning_rate": 1.6846084536667816e-06, "loss": 0.6085, "step": 40210 }, { "epoch": 20.0, "learning_rate": 1.681303406966106e-06, "loss": 0.3777, "step": 40220 }, { "epoch": 20.0, "learning_rate": 1.6780011962274622e-06, "loss": 0.3812, "step": 40230 }, { "epoch": 20.0, "learning_rate": 1.6747018230603087e-06, "loss": 0.9465, "step": 40240 }, { "epoch": 20.0, "learning_rate": 1.6714052890727182e-06, "loss": 0.4388, "step": 40250 }, { "epoch": 20.01, "learning_rate": 1.668111595871385e-06, "loss": 0.6455, "step": 40260 }, { "epoch": 20.01, "learning_rate": 1.6648207450616137e-06, "loss": 0.2788, "step": 40270 }, { "epoch": 20.01, "learning_rate": 1.661532738247325e-06, "loss": 0.7803, "step": 40280 }, { "epoch": 20.01, "learning_rate": 1.6582475770310526e-06, "loss": 0.5762, "step": 40290 }, { "epoch": 20.01, "learning_rate": 1.6549652630139425e-06, "loss": 0.5437, "step": 40300 }, { "epoch": 20.01, "learning_rate": 1.6516857977957617e-06, "loss": 0.5942, "step": 40310 }, { "epoch": 20.01, "learning_rate": 1.6484091829748748e-06, "loss": 0.7837, "step": 40320 }, { "epoch": 20.01, "learning_rate": 1.6451354201482654e-06, "loss": 0.6525, "step": 40330 }, { "epoch": 20.01, "learning_rate": 1.6418645109115255e-06, "loss": 0.5469, "step": 40340 }, { "epoch": 20.01, "learning_rate": 1.6385964568588587e-06, "loss": 0.2445, "step": 40350 }, { "epoch": 20.01, "learning_rate": 1.6353312595830746e-06, "loss": 0.3352, "step": 40360 }, { "epoch": 20.01, "learning_rate": 1.6320689206755892e-06, "loss": 0.3698, "step": 40370 }, { "epoch": 20.01, "learning_rate": 1.6288094417264273e-06, "loss": 0.4792, "step": 40380 }, { "epoch": 20.01, "learning_rate": 1.6255528243242198e-06, "loss": 0.5886, "step": 40390 }, { "epoch": 20.01, "learning_rate": 1.622299070056202e-06, "loss": 0.4715, "step": 40400 }, { "epoch": 20.01, "learning_rate": 1.6190481805082146e-06, "loss": 0.5926, "step": 40410 }, { "epoch": 20.01, "learning_rate": 1.615800157264701e-06, "loss": 0.5368, "step": 40420 }, { "epoch": 20.01, "learning_rate": 1.612555001908709e-06, "loss": 0.2857, "step": 40430 }, { "epoch": 20.01, "learning_rate": 1.609312716021886e-06, "loss": 0.6023, "step": 40440 }, { "epoch": 20.01, "learning_rate": 1.6060733011844852e-06, "loss": 0.3041, "step": 40450 }, { "epoch": 20.01, "learning_rate": 1.602836758975358e-06, "loss": 0.2443, "step": 40460 }, { "epoch": 20.01, "learning_rate": 1.5996030909719554e-06, "loss": 0.4438, "step": 40470 }, { "epoch": 20.01, "learning_rate": 1.5963722987503259e-06, "loss": 0.6301, "step": 40480 }, { "epoch": 20.01, "learning_rate": 1.5931443838851215e-06, "loss": 0.3789, "step": 40490 }, { "epoch": 20.01, "learning_rate": 1.5899193479495858e-06, "loss": 0.7228, "step": 40500 }, { "epoch": 20.01, "learning_rate": 1.5866971925155641e-06, "loss": 0.3848, "step": 40510 }, { "epoch": 20.01, "learning_rate": 1.583477919153495e-06, "loss": 0.2661, "step": 40520 }, { "epoch": 20.01, "learning_rate": 1.5802615294324137e-06, "loss": 0.503, "step": 40530 }, { "epoch": 20.01, "learning_rate": 1.577048024919947e-06, "loss": 0.3205, "step": 40540 }, { "epoch": 20.01, "learning_rate": 1.5738374071823219e-06, "loss": 0.7299, "step": 40550 }, { "epoch": 20.01, "learning_rate": 1.5706296777843525e-06, "loss": 0.4654, "step": 40560 }, { "epoch": 20.01, "learning_rate": 1.5674248382894501e-06, "loss": 0.4685, "step": 40570 }, { "epoch": 20.01, "learning_rate": 1.5642228902596076e-06, "loss": 0.5656, "step": 40580 }, { "epoch": 20.01, "learning_rate": 1.5610238352554214e-06, "loss": 0.3782, "step": 40590 }, { "epoch": 20.01, "learning_rate": 1.5578276748360706e-06, "loss": 0.275, "step": 40600 }, { "epoch": 20.01, "learning_rate": 1.5546344105593247e-06, "loss": 0.6039, "step": 40610 }, { "epoch": 20.01, "learning_rate": 1.5514440439815419e-06, "loss": 0.5187, "step": 40620 }, { "epoch": 20.01, "learning_rate": 1.5482565766576661e-06, "loss": 0.4336, "step": 40630 }, { "epoch": 20.01, "learning_rate": 1.5450720101412346e-06, "loss": 0.6509, "step": 40640 }, { "epoch": 20.01, "learning_rate": 1.5418903459843633e-06, "loss": 0.2785, "step": 40650 }, { "epoch": 20.01, "learning_rate": 1.5387115857377602e-06, "loss": 0.355, "step": 40660 }, { "epoch": 20.01, "learning_rate": 1.535535730950708e-06, "loss": 0.5541, "step": 40670 }, { "epoch": 20.01, "learning_rate": 1.5323627831710843e-06, "loss": 0.2017, "step": 40680 }, { "epoch": 20.01, "learning_rate": 1.5291927439453457e-06, "loss": 0.5485, "step": 40690 }, { "epoch": 20.01, "learning_rate": 1.526025614818528e-06, "loss": 0.6539, "step": 40700 }, { "epoch": 20.01, "learning_rate": 1.5228613973342541e-06, "loss": 1.0763, "step": 40710 }, { "epoch": 20.01, "learning_rate": 1.5197000930347237e-06, "loss": 0.388, "step": 40720 }, { "epoch": 20.01, "learning_rate": 1.5165417034607169e-06, "loss": 0.534, "step": 40730 }, { "epoch": 20.01, "learning_rate": 1.5133862301515977e-06, "loss": 0.3051, "step": 40740 }, { "epoch": 20.02, "learning_rate": 1.5102336746453054e-06, "loss": 0.4692, "step": 40750 }, { "epoch": 20.02, "learning_rate": 1.5070840384783534e-06, "loss": 0.5556, "step": 40760 }, { "epoch": 20.02, "learning_rate": 1.5039373231858372e-06, "loss": 0.5806, "step": 40770 }, { "epoch": 20.02, "learning_rate": 1.5007935303014297e-06, "loss": 0.6341, "step": 40780 }, { "epoch": 20.02, "learning_rate": 1.4976526613573772e-06, "loss": 0.6556, "step": 40790 }, { "epoch": 20.02, "learning_rate": 1.4945147178844997e-06, "loss": 0.4185, "step": 40800 }, { "epoch": 20.02, "learning_rate": 1.4913797014121927e-06, "loss": 0.4563, "step": 40810 }, { "epoch": 20.02, "learning_rate": 1.4882476134684245e-06, "loss": 0.5987, "step": 40820 }, { "epoch": 20.02, "learning_rate": 1.4851184555797397e-06, "loss": 0.3656, "step": 40830 }, { "epoch": 20.02, "learning_rate": 1.481992229271252e-06, "loss": 0.556, "step": 40840 }, { "epoch": 20.02, "learning_rate": 1.4788689360666423e-06, "loss": 0.7573, "step": 40850 }, { "epoch": 20.02, "learning_rate": 1.4757485774881682e-06, "loss": 0.2427, "step": 40860 }, { "epoch": 20.02, "learning_rate": 1.4726311550566512e-06, "loss": 0.5579, "step": 40870 }, { "epoch": 20.02, "learning_rate": 1.4695166702914903e-06, "loss": 0.4394, "step": 40880 }, { "epoch": 20.02, "learning_rate": 1.4664051247106443e-06, "loss": 0.7392, "step": 40890 }, { "epoch": 20.02, "learning_rate": 1.4632965198306448e-06, "loss": 0.6134, "step": 40900 }, { "epoch": 20.02, "learning_rate": 1.460190857166586e-06, "loss": 0.6297, "step": 40910 }, { "epoch": 20.02, "learning_rate": 1.4570881382321298e-06, "loss": 0.5465, "step": 40920 }, { "epoch": 20.02, "learning_rate": 1.453988364539508e-06, "loss": 0.503, "step": 40930 }, { "epoch": 20.02, "learning_rate": 1.4508915375995082e-06, "loss": 0.3181, "step": 40940 }, { "epoch": 20.02, "learning_rate": 1.4477976589214873e-06, "loss": 0.6384, "step": 40950 }, { "epoch": 20.02, "learning_rate": 1.4447067300133621e-06, "loss": 0.6659, "step": 40960 }, { "epoch": 20.02, "learning_rate": 1.4416187523816186e-06, "loss": 0.605, "step": 40970 }, { "epoch": 20.02, "learning_rate": 1.4385337275312969e-06, "loss": 0.5757, "step": 40980 }, { "epoch": 20.02, "learning_rate": 1.4354516569660009e-06, "loss": 0.387, "step": 40990 }, { "epoch": 20.02, "learning_rate": 1.432372542187895e-06, "loss": 0.5196, "step": 41000 }, { "epoch": 20.02, "learning_rate": 1.429296384697701e-06, "loss": 0.3582, "step": 41010 }, { "epoch": 20.02, "learning_rate": 1.4262231859947016e-06, "loss": 0.516, "step": 41020 }, { "epoch": 20.02, "learning_rate": 1.423152947576736e-06, "loss": 0.3667, "step": 41030 }, { "epoch": 20.02, "learning_rate": 1.4200856709402014e-06, "loss": 0.3674, "step": 41040 }, { "epoch": 20.02, "learning_rate": 1.4170213575800518e-06, "loss": 0.7149, "step": 41050 }, { "epoch": 20.02, "learning_rate": 1.4139600089897933e-06, "loss": 0.2172, "step": 41060 }, { "epoch": 20.02, "learning_rate": 1.4109016266614934e-06, "loss": 0.5621, "step": 41070 }, { "epoch": 20.02, "learning_rate": 1.4078462120857708e-06, "loss": 0.4305, "step": 41080 }, { "epoch": 20.02, "learning_rate": 1.4047937667517954e-06, "loss": 0.4987, "step": 41090 }, { "epoch": 20.02, "learning_rate": 1.4017442921472933e-06, "loss": 0.6915, "step": 41100 }, { "epoch": 20.02, "learning_rate": 1.3986977897585398e-06, "loss": 0.5445, "step": 41110 }, { "epoch": 20.02, "learning_rate": 1.3956542610703654e-06, "loss": 0.4355, "step": 41120 }, { "epoch": 20.02, "learning_rate": 1.392613707566147e-06, "loss": 0.7685, "step": 41130 }, { "epoch": 20.02, "learning_rate": 1.3895761307278137e-06, "loss": 0.4018, "step": 41140 }, { "epoch": 20.02, "learning_rate": 1.3865415320358427e-06, "loss": 0.6319, "step": 41150 }, { "epoch": 20.02, "learning_rate": 1.3835099129692637e-06, "loss": 0.6525, "step": 41160 }, { "epoch": 20.02, "learning_rate": 1.3804812750056494e-06, "loss": 0.5524, "step": 41170 }, { "epoch": 20.02, "learning_rate": 1.3774556196211206e-06, "loss": 0.2176, "step": 41180 }, { "epoch": 20.02, "learning_rate": 1.3744329482903447e-06, "loss": 0.4774, "step": 41190 }, { "epoch": 20.02, "learning_rate": 1.3714132624865363e-06, "loss": 0.8767, "step": 41200 }, { "epoch": 20.02, "learning_rate": 1.368396563681453e-06, "loss": 0.9403, "step": 41210 }, { "epoch": 20.02, "learning_rate": 1.3653828533453968e-06, "loss": 0.6222, "step": 41220 }, { "epoch": 20.02, "learning_rate": 1.3623721329472139e-06, "loss": 0.8261, "step": 41230 }, { "epoch": 20.02, "learning_rate": 1.3593644039542919e-06, "loss": 0.5185, "step": 41240 }, { "epoch": 20.02, "learning_rate": 1.3563596678325607e-06, "loss": 0.6334, "step": 41250 }, { "epoch": 20.03, "learning_rate": 1.3533579260464956e-06, "loss": 0.3334, "step": 41260 }, { "epoch": 20.03, "learning_rate": 1.3503591800591072e-06, "loss": 0.3246, "step": 41270 }, { "epoch": 20.03, "learning_rate": 1.3473634313319497e-06, "loss": 0.661, "step": 41280 }, { "epoch": 20.03, "learning_rate": 1.3443706813251082e-06, "loss": 0.5927, "step": 41290 }, { "epoch": 20.03, "learning_rate": 1.3413809314972197e-06, "loss": 0.2087, "step": 41300 }, { "epoch": 20.03, "learning_rate": 1.33839418330545e-06, "loss": 0.3979, "step": 41310 }, { "epoch": 20.03, "learning_rate": 1.3354104382055022e-06, "loss": 0.654, "step": 41320 }, { "epoch": 20.03, "learning_rate": 1.3324296976516195e-06, "loss": 0.5012, "step": 41330 }, { "epoch": 20.03, "learning_rate": 1.3294519630965752e-06, "loss": 0.5119, "step": 41340 }, { "epoch": 20.03, "learning_rate": 1.3264772359916855e-06, "loss": 0.2278, "step": 41350 }, { "epoch": 20.03, "learning_rate": 1.3235055177867942e-06, "loss": 0.5415, "step": 41360 }, { "epoch": 20.03, "learning_rate": 1.3205368099302818e-06, "loss": 0.6114, "step": 41370 }, { "epoch": 20.03, "learning_rate": 1.3175711138690582e-06, "loss": 0.3263, "step": 41380 }, { "epoch": 20.03, "learning_rate": 1.3146084310485668e-06, "loss": 0.4803, "step": 41390 }, { "epoch": 20.03, "learning_rate": 1.3116487629127872e-06, "loss": 0.3701, "step": 41400 }, { "epoch": 20.03, "learning_rate": 1.308692110904223e-06, "loss": 0.5197, "step": 41410 }, { "epoch": 20.03, "learning_rate": 1.3057384764639107e-06, "loss": 0.7112, "step": 41420 }, { "epoch": 20.03, "learning_rate": 1.3027878610314173e-06, "loss": 0.5485, "step": 41430 }, { "epoch": 20.03, "learning_rate": 1.2998402660448339e-06, "loss": 0.3948, "step": 41440 }, { "epoch": 20.03, "learning_rate": 1.2968956929407854e-06, "loss": 0.9255, "step": 41450 }, { "epoch": 20.03, "learning_rate": 1.2939541431544224e-06, "loss": 0.6046, "step": 41460 }, { "epoch": 20.03, "learning_rate": 1.2910156181194163e-06, "loss": 0.5436, "step": 41470 }, { "epoch": 20.03, "learning_rate": 1.288080119267969e-06, "loss": 0.6322, "step": 41480 }, { "epoch": 20.03, "learning_rate": 1.2851476480308092e-06, "loss": 0.8129, "step": 41490 }, { "epoch": 20.03, "learning_rate": 1.282218205837188e-06, "loss": 0.2926, "step": 41500 }, { "epoch": 20.03, "learning_rate": 1.2792917941148778e-06, "loss": 0.4101, "step": 41510 }, { "epoch": 20.03, "learning_rate": 1.2763684142901778e-06, "loss": 0.6326, "step": 41520 }, { "epoch": 20.03, "learning_rate": 1.2734480677879066e-06, "loss": 0.7398, "step": 41530 }, { "epoch": 20.03, "learning_rate": 1.270530756031404e-06, "loss": 0.5005, "step": 41540 }, { "epoch": 20.03, "learning_rate": 1.2676164804425391e-06, "loss": 0.3263, "step": 41550 }, { "epoch": 20.03, "learning_rate": 1.2647052424416878e-06, "loss": 0.3796, "step": 41560 }, { "epoch": 20.03, "learning_rate": 1.2617970434477532e-06, "loss": 0.491, "step": 41570 }, { "epoch": 20.03, "learning_rate": 1.258891884878156e-06, "loss": 0.426, "step": 41580 }, { "epoch": 20.03, "learning_rate": 1.2559897681488377e-06, "loss": 0.712, "step": 41590 }, { "epoch": 20.03, "learning_rate": 1.2530906946742544e-06, "loss": 0.29, "step": 41600 }, { "epoch": 20.03, "learning_rate": 1.2501946658673771e-06, "loss": 0.5593, "step": 41610 }, { "epoch": 20.03, "learning_rate": 1.2473016831396962e-06, "loss": 0.5783, "step": 41620 }, { "epoch": 20.03, "learning_rate": 1.2444117479012166e-06, "loss": 0.5131, "step": 41630 }, { "epoch": 20.03, "learning_rate": 1.2415248615604577e-06, "loss": 0.4384, "step": 41640 }, { "epoch": 20.03, "learning_rate": 1.2386410255244518e-06, "loss": 0.307, "step": 41650 }, { "epoch": 20.03, "learning_rate": 1.235760241198747e-06, "loss": 0.4889, "step": 41660 }, { "epoch": 20.03, "learning_rate": 1.2328825099873995e-06, "loss": 0.3653, "step": 41670 }, { "epoch": 20.03, "learning_rate": 1.2300078332929845e-06, "loss": 0.583, "step": 41680 }, { "epoch": 20.03, "learning_rate": 1.2271362125165825e-06, "loss": 0.5165, "step": 41690 }, { "epoch": 20.03, "learning_rate": 1.224267649057788e-06, "loss": 0.6934, "step": 41700 }, { "epoch": 20.03, "learning_rate": 1.2214021443147022e-06, "loss": 0.4905, "step": 41710 }, { "epoch": 20.03, "learning_rate": 1.2185396996839376e-06, "loss": 0.6703, "step": 41720 }, { "epoch": 20.03, "learning_rate": 1.2156803165606156e-06, "loss": 0.7073, "step": 41730 }, { "epoch": 20.03, "learning_rate": 1.2128239963383647e-06, "loss": 0.314, "step": 41740 }, { "epoch": 20.04, "learning_rate": 1.2099707404093204e-06, "loss": 0.6241, "step": 41750 }, { "epoch": 20.04, "learning_rate": 1.2071205501641256e-06, "loss": 0.6667, "step": 41760 }, { "epoch": 20.04, "learning_rate": 1.2042734269919254e-06, "loss": 0.3184, "step": 41770 }, { "epoch": 20.04, "learning_rate": 1.2014293722803782e-06, "loss": 0.469, "step": 41780 }, { "epoch": 20.04, "learning_rate": 1.1985883874156386e-06, "loss": 0.161, "step": 41790 }, { "epoch": 20.04, "learning_rate": 1.1957504737823682e-06, "loss": 0.4946, "step": 41800 }, { "epoch": 20.04, "learning_rate": 1.1929156327637321e-06, "loss": 0.3183, "step": 41810 }, { "epoch": 20.04, "learning_rate": 1.1900838657413978e-06, "loss": 0.1142, "step": 41820 }, { "epoch": 20.04, "learning_rate": 1.187255174095533e-06, "loss": 0.5159, "step": 41830 }, { "epoch": 20.04, "learning_rate": 1.1844295592048086e-06, "loss": 0.1947, "step": 41840 }, { "epoch": 20.04, "learning_rate": 1.181607022446394e-06, "loss": 0.3758, "step": 41850 }, { "epoch": 20.04, "learning_rate": 1.1787875651959606e-06, "loss": 0.4884, "step": 41860 }, { "epoch": 20.04, "learning_rate": 1.175971188827675e-06, "loss": 0.6049, "step": 41870 }, { "epoch": 20.04, "learning_rate": 1.173157894714209e-06, "loss": 0.6191, "step": 41880 }, { "epoch": 20.04, "learning_rate": 1.1703476842267253e-06, "loss": 0.4537, "step": 41890 }, { "epoch": 20.04, "learning_rate": 1.1675405587348885e-06, "loss": 0.6927, "step": 41900 }, { "epoch": 20.04, "learning_rate": 1.1647365196068524e-06, "loss": 0.1467, "step": 41910 }, { "epoch": 20.04, "learning_rate": 1.1619355682092774e-06, "loss": 0.4122, "step": 41920 }, { "epoch": 20.04, "learning_rate": 1.1591377059073103e-06, "loss": 0.3217, "step": 41930 }, { "epoch": 20.04, "learning_rate": 1.1563429340645955e-06, "loss": 0.8771, "step": 41940 }, { "epoch": 20.04, "learning_rate": 1.1535512540432707e-06, "loss": 0.6063, "step": 41950 }, { "epoch": 20.04, "learning_rate": 1.1507626672039656e-06, "loss": 0.5014, "step": 41960 }, { "epoch": 20.04, "learning_rate": 1.1479771749058071e-06, "loss": 0.5023, "step": 41970 }, { "epoch": 20.04, "learning_rate": 1.1451947785064086e-06, "loss": 0.7961, "step": 41980 }, { "epoch": 20.04, "learning_rate": 1.1424154793618775e-06, "loss": 0.3274, "step": 41990 }, { "epoch": 20.04, "learning_rate": 1.1396392788268054e-06, "loss": 0.4949, "step": 42000 }, { "epoch": 20.04, "eval_accuracy": 0.8747368421052631, "eval_f1": 0.8747368421052631, "eval_loss": 0.7881549000740051, "eval_runtime": 742.269, "eval_samples_per_second": 6.399, "eval_steps_per_second": 1.6, "step": 42000 }, { "epoch": 21.0, "learning_rate": 1.1368661782542842e-06, "loss": 0.5779, "step": 42010 }, { "epoch": 21.0, "learning_rate": 1.1340961789958867e-06, "loss": 0.7587, "step": 42020 }, { "epoch": 21.0, "learning_rate": 1.1313292824016783e-06, "loss": 0.7537, "step": 42030 }, { "epoch": 21.0, "learning_rate": 1.1285654898202107e-06, "loss": 0.2199, "step": 42040 }, { "epoch": 21.0, "learning_rate": 1.1258048025985219e-06, "loss": 0.4507, "step": 42050 }, { "epoch": 21.0, "learning_rate": 1.123047222082135e-06, "loss": 0.3394, "step": 42060 }, { "epoch": 21.0, "learning_rate": 1.120292749615067e-06, "loss": 0.812, "step": 42070 }, { "epoch": 21.0, "learning_rate": 1.117541386539812e-06, "loss": 0.4893, "step": 42080 }, { "epoch": 21.0, "learning_rate": 1.1147931341973493e-06, "loss": 0.7984, "step": 42090 }, { "epoch": 21.0, "learning_rate": 1.1120479939271424e-06, "loss": 0.3631, "step": 42100 }, { "epoch": 21.0, "learning_rate": 1.1093059670671444e-06, "loss": 0.2635, "step": 42110 }, { "epoch": 21.0, "learning_rate": 1.1065670549537818e-06, "loss": 0.1696, "step": 42120 }, { "epoch": 21.0, "learning_rate": 1.1038312589219687e-06, "loss": 0.4704, "step": 42130 }, { "epoch": 21.0, "learning_rate": 1.1010985803050986e-06, "loss": 0.4442, "step": 42140 }, { "epoch": 21.0, "learning_rate": 1.0983690204350433e-06, "loss": 0.1498, "step": 42150 }, { "epoch": 21.0, "learning_rate": 1.0956425806421622e-06, "loss": 0.4469, "step": 42160 }, { "epoch": 21.0, "learning_rate": 1.0929192622552872e-06, "loss": 0.3605, "step": 42170 }, { "epoch": 21.0, "learning_rate": 1.0901990666017283e-06, "loss": 1.0167, "step": 42180 }, { "epoch": 21.0, "learning_rate": 1.087481995007276e-06, "loss": 0.9402, "step": 42190 }, { "epoch": 21.0, "learning_rate": 1.0847680487962005e-06, "loss": 0.5113, "step": 42200 }, { "epoch": 21.0, "learning_rate": 1.0820572292912442e-06, "loss": 0.2354, "step": 42210 }, { "epoch": 21.0, "learning_rate": 1.0793495378136291e-06, "loss": 0.1514, "step": 42220 }, { "epoch": 21.0, "learning_rate": 1.0766449756830507e-06, "loss": 0.4024, "step": 42230 }, { "epoch": 21.0, "learning_rate": 1.0739435442176783e-06, "loss": 0.1413, "step": 42240 }, { "epoch": 21.0, "learning_rate": 1.0712452447341584e-06, "loss": 0.4313, "step": 42250 }, { "epoch": 21.01, "learning_rate": 1.0685500785476093e-06, "loss": 0.3361, "step": 42260 }, { "epoch": 21.01, "learning_rate": 1.0658580469716214e-06, "loss": 0.5155, "step": 42270 }, { "epoch": 21.01, "learning_rate": 1.0631691513182585e-06, "loss": 0.7166, "step": 42280 }, { "epoch": 21.01, "learning_rate": 1.0604833928980537e-06, "loss": 0.5803, "step": 42290 }, { "epoch": 21.01, "learning_rate": 1.0578007730200167e-06, "loss": 0.2315, "step": 42300 }, { "epoch": 21.01, "learning_rate": 1.0551212929916225e-06, "loss": 0.4389, "step": 42310 }, { "epoch": 21.01, "learning_rate": 1.0524449541188174e-06, "loss": 0.6883, "step": 42320 }, { "epoch": 21.01, "learning_rate": 1.0497717577060153e-06, "loss": 0.5107, "step": 42330 }, { "epoch": 21.01, "learning_rate": 1.0471017050560999e-06, "loss": 0.5452, "step": 42340 }, { "epoch": 21.01, "learning_rate": 1.0444347974704235e-06, "loss": 0.2522, "step": 42350 }, { "epoch": 21.01, "learning_rate": 1.0417710362488031e-06, "loss": 0.2507, "step": 42360 }, { "epoch": 21.01, "learning_rate": 1.039110422689525e-06, "loss": 0.2414, "step": 42370 }, { "epoch": 21.01, "learning_rate": 1.036452958089338e-06, "loss": 1.171, "step": 42380 }, { "epoch": 21.01, "learning_rate": 1.0337986437434584e-06, "loss": 0.3574, "step": 42390 }, { "epoch": 21.01, "learning_rate": 1.031147480945569e-06, "loss": 0.3141, "step": 42400 }, { "epoch": 21.01, "learning_rate": 1.0284994709878125e-06, "loss": 0.8145, "step": 42410 }, { "epoch": 21.01, "learning_rate": 1.0258546151607978e-06, "loss": 0.5917, "step": 42420 }, { "epoch": 21.01, "learning_rate": 1.0232129147535943e-06, "loss": 0.5349, "step": 42430 }, { "epoch": 21.01, "learning_rate": 1.0205743710537355e-06, "loss": 0.5481, "step": 42440 }, { "epoch": 21.01, "learning_rate": 1.0179389853472152e-06, "loss": 0.3898, "step": 42450 }, { "epoch": 21.01, "learning_rate": 1.0153067589184893e-06, "loss": 0.4142, "step": 42460 }, { "epoch": 21.01, "learning_rate": 1.0126776930504706e-06, "loss": 0.4881, "step": 42470 }, { "epoch": 21.01, "learning_rate": 1.010051789024535e-06, "loss": 0.4907, "step": 42480 }, { "epoch": 21.01, "learning_rate": 1.0074290481205182e-06, "loss": 0.5862, "step": 42490 }, { "epoch": 21.01, "learning_rate": 1.0048094716167097e-06, "loss": 0.2149, "step": 42500 }, { "epoch": 21.01, "learning_rate": 1.0021930607898611e-06, "loss": 0.5522, "step": 42510 }, { "epoch": 21.01, "learning_rate": 9.995798169151788e-07, "loss": 0.5393, "step": 42520 }, { "epoch": 21.01, "learning_rate": 9.969697412663247e-07, "loss": 0.4814, "step": 42530 }, { "epoch": 21.01, "learning_rate": 9.9436283511542e-07, "loss": 0.364, "step": 42540 }, { "epoch": 21.01, "learning_rate": 9.917590997330377e-07, "loss": 0.5711, "step": 42550 }, { "epoch": 21.01, "learning_rate": 9.891585363882066e-07, "loss": 0.5678, "step": 42560 }, { "epoch": 21.01, "learning_rate": 9.865611463484108e-07, "loss": 0.1107, "step": 42570 }, { "epoch": 21.01, "learning_rate": 9.839669308795846e-07, "loss": 0.6555, "step": 42580 }, { "epoch": 21.01, "learning_rate": 9.813758912461204e-07, "loss": 0.987, "step": 42590 }, { "epoch": 21.01, "learning_rate": 9.787880287108574e-07, "loss": 0.3986, "step": 42600 }, { "epoch": 21.01, "learning_rate": 9.762033445350901e-07, "loss": 0.498, "step": 42610 }, { "epoch": 21.01, "learning_rate": 9.736218399785573e-07, "loss": 0.5059, "step": 42620 }, { "epoch": 21.01, "learning_rate": 9.710435162994585e-07, "loss": 0.4711, "step": 42630 }, { "epoch": 21.01, "learning_rate": 9.68468374754435e-07, "loss": 0.2611, "step": 42640 }, { "epoch": 21.01, "learning_rate": 9.658964165985798e-07, "loss": 0.4665, "step": 42650 }, { "epoch": 21.01, "learning_rate": 9.63327643085434e-07, "loss": 1.166, "step": 42660 }, { "epoch": 21.01, "learning_rate": 9.607620554669846e-07, "loss": 0.3244, "step": 42670 }, { "epoch": 21.01, "learning_rate": 9.581996549936721e-07, "loss": 0.8249, "step": 42680 }, { "epoch": 21.01, "learning_rate": 9.55640442914376e-07, "loss": 0.5912, "step": 42690 }, { "epoch": 21.01, "learning_rate": 9.530844204764286e-07, "loss": 0.4654, "step": 42700 }, { "epoch": 21.01, "learning_rate": 9.505315889256005e-07, "loss": 0.5017, "step": 42710 }, { "epoch": 21.01, "learning_rate": 9.479819495061102e-07, "loss": 0.2483, "step": 42720 }, { "epoch": 21.01, "learning_rate": 9.454355034606241e-07, "loss": 0.2872, "step": 42730 }, { "epoch": 21.01, "learning_rate": 9.428922520302479e-07, "loss": 0.3335, "step": 42740 }, { "epoch": 21.02, "learning_rate": 9.40352196454532e-07, "loss": 0.6583, "step": 42750 }, { "epoch": 21.02, "learning_rate": 9.378153379714682e-07, "loss": 0.6831, "step": 42760 }, { "epoch": 21.02, "learning_rate": 9.352816778174878e-07, "loss": 0.1119, "step": 42770 }, { "epoch": 21.02, "learning_rate": 9.327512172274711e-07, "loss": 0.6276, "step": 42780 }, { "epoch": 21.02, "learning_rate": 9.302239574347323e-07, "loss": 0.1037, "step": 42790 }, { "epoch": 21.02, "learning_rate": 9.276998996710248e-07, "loss": 0.432, "step": 42800 }, { "epoch": 21.02, "learning_rate": 9.251790451665426e-07, "loss": 0.5301, "step": 42810 }, { "epoch": 21.02, "learning_rate": 9.226613951499246e-07, "loss": 0.2378, "step": 42820 }, { "epoch": 21.02, "learning_rate": 9.201469508482394e-07, "loss": 0.6031, "step": 42830 }, { "epoch": 21.02, "learning_rate": 9.176357134869981e-07, "loss": 0.5935, "step": 42840 }, { "epoch": 21.02, "learning_rate": 9.15127684290146e-07, "loss": 0.752, "step": 42850 }, { "epoch": 21.02, "learning_rate": 9.126228644800669e-07, "loss": 0.4204, "step": 42860 }, { "epoch": 21.02, "learning_rate": 9.101212552775793e-07, "loss": 0.1586, "step": 42870 }, { "epoch": 21.02, "learning_rate": 9.076228579019377e-07, "loss": 0.7899, "step": 42880 }, { "epoch": 21.02, "learning_rate": 9.051276735708292e-07, "loss": 0.1739, "step": 42890 }, { "epoch": 21.02, "learning_rate": 9.026357035003774e-07, "loss": 0.1888, "step": 42900 }, { "epoch": 21.02, "learning_rate": 9.001469489051367e-07, "loss": 0.1448, "step": 42910 }, { "epoch": 21.02, "learning_rate": 8.976614109980985e-07, "loss": 0.4078, "step": 42920 }, { "epoch": 21.02, "learning_rate": 8.951790909906829e-07, "loss": 0.628, "step": 42930 }, { "epoch": 21.02, "learning_rate": 8.926999900927413e-07, "loss": 0.6041, "step": 42940 }, { "epoch": 21.02, "learning_rate": 8.902241095125588e-07, "loss": 0.4989, "step": 42950 }, { "epoch": 21.02, "learning_rate": 8.877514504568484e-07, "loss": 0.4466, "step": 42960 }, { "epoch": 21.02, "learning_rate": 8.852820141307555e-07, "loss": 0.4655, "step": 42970 }, { "epoch": 21.02, "learning_rate": 8.828158017378512e-07, "loss": 0.7187, "step": 42980 }, { "epoch": 21.02, "learning_rate": 8.803528144801393e-07, "loss": 0.8563, "step": 42990 }, { "epoch": 21.02, "learning_rate": 8.778930535580476e-07, "loss": 0.516, "step": 43000 }, { "epoch": 21.02, "learning_rate": 8.754365201704373e-07, "loss": 0.6609, "step": 43010 }, { "epoch": 21.02, "learning_rate": 8.729832155145897e-07, "loss": 0.4027, "step": 43020 }, { "epoch": 21.02, "learning_rate": 8.705331407862185e-07, "loss": 0.2548, "step": 43030 }, { "epoch": 21.02, "learning_rate": 8.680862971794575e-07, "loss": 0.5615, "step": 43040 }, { "epoch": 21.02, "learning_rate": 8.656426858868705e-07, "loss": 0.4992, "step": 43050 }, { "epoch": 21.02, "learning_rate": 8.632023080994436e-07, "loss": 0.388, "step": 43060 }, { "epoch": 21.02, "learning_rate": 8.607651650065865e-07, "loss": 0.5331, "step": 43070 }, { "epoch": 21.02, "learning_rate": 8.583312577961339e-07, "loss": 0.4323, "step": 43080 }, { "epoch": 21.02, "learning_rate": 8.559005876543427e-07, "loss": 0.4964, "step": 43090 }, { "epoch": 21.02, "learning_rate": 8.534731557658895e-07, "loss": 0.3992, "step": 43100 }, { "epoch": 21.02, "learning_rate": 8.510489633138788e-07, "loss": 0.5065, "step": 43110 }, { "epoch": 21.02, "learning_rate": 8.486280114798311e-07, "loss": 0.5718, "step": 43120 }, { "epoch": 21.02, "learning_rate": 8.462103014436887e-07, "loss": 0.3075, "step": 43130 }, { "epoch": 21.02, "learning_rate": 8.437958343838146e-07, "loss": 0.3258, "step": 43140 }, { "epoch": 21.02, "learning_rate": 8.413846114769907e-07, "loss": 0.5287, "step": 43150 }, { "epoch": 21.02, "learning_rate": 8.389766338984165e-07, "loss": 0.8632, "step": 43160 }, { "epoch": 21.02, "learning_rate": 8.365719028217128e-07, "loss": 0.372, "step": 43170 }, { "epoch": 21.02, "learning_rate": 8.341704194189148e-07, "loss": 0.4632, "step": 43180 }, { "epoch": 21.02, "learning_rate": 8.317721848604754e-07, "loss": 0.2295, "step": 43190 }, { "epoch": 21.02, "learning_rate": 8.293772003152686e-07, "loss": 0.5518, "step": 43200 }, { "epoch": 21.02, "learning_rate": 8.269854669505783e-07, "loss": 0.6363, "step": 43210 }, { "epoch": 21.02, "learning_rate": 8.245969859321062e-07, "loss": 0.4671, "step": 43220 }, { "epoch": 21.02, "learning_rate": 8.222117584239719e-07, "loss": 0.5977, "step": 43230 }, { "epoch": 21.02, "learning_rate": 8.198297855887005e-07, "loss": 0.6138, "step": 43240 }, { "epoch": 21.02, "learning_rate": 8.174510685872417e-07, "loss": 0.6577, "step": 43250 }, { "epoch": 21.03, "learning_rate": 8.150756085789512e-07, "loss": 0.4194, "step": 43260 }, { "epoch": 21.03, "learning_rate": 8.127034067215999e-07, "loss": 0.3516, "step": 43270 }, { "epoch": 21.03, "learning_rate": 8.103344641713695e-07, "loss": 0.3407, "step": 43280 }, { "epoch": 21.03, "learning_rate": 8.079687820828538e-07, "loss": 0.5712, "step": 43290 }, { "epoch": 21.03, "learning_rate": 8.056063616090581e-07, "loss": 0.6257, "step": 43300 }, { "epoch": 21.03, "learning_rate": 8.032472039013988e-07, "loss": 0.3842, "step": 43310 }, { "epoch": 21.03, "learning_rate": 8.008913101096996e-07, "loss": 0.684, "step": 43320 }, { "epoch": 21.03, "learning_rate": 7.985386813821918e-07, "loss": 0.7516, "step": 43330 }, { "epoch": 21.03, "learning_rate": 7.961893188655217e-07, "loss": 0.3816, "step": 43340 }, { "epoch": 21.03, "learning_rate": 7.938432237047392e-07, "loss": 0.5124, "step": 43350 }, { "epoch": 21.03, "learning_rate": 7.915003970433018e-07, "loss": 0.6431, "step": 43360 }, { "epoch": 21.03, "learning_rate": 7.891608400230749e-07, "loss": 0.4608, "step": 43370 }, { "epoch": 21.03, "learning_rate": 7.868245537843311e-07, "loss": 0.5772, "step": 43380 }, { "epoch": 21.03, "learning_rate": 7.844915394657445e-07, "loss": 0.6119, "step": 43390 }, { "epoch": 21.03, "learning_rate": 7.821617982044033e-07, "loss": 0.1979, "step": 43400 }, { "epoch": 21.03, "learning_rate": 7.798353311357931e-07, "loss": 0.4184, "step": 43410 }, { "epoch": 21.03, "learning_rate": 7.775121393938045e-07, "loss": 0.6045, "step": 43420 }, { "epoch": 21.03, "learning_rate": 7.751922241107309e-07, "loss": 0.6312, "step": 43430 }, { "epoch": 21.03, "learning_rate": 7.728755864172754e-07, "loss": 0.3845, "step": 43440 }, { "epoch": 21.03, "learning_rate": 7.705622274425372e-07, "loss": 0.2585, "step": 43450 }, { "epoch": 21.03, "learning_rate": 7.682521483140187e-07, "loss": 0.874, "step": 43460 }, { "epoch": 21.03, "learning_rate": 7.659453501576258e-07, "loss": 0.1983, "step": 43470 }, { "epoch": 21.03, "learning_rate": 7.636418340976609e-07, "loss": 0.5186, "step": 43480 }, { "epoch": 21.03, "learning_rate": 7.613416012568349e-07, "loss": 0.5401, "step": 43490 }, { "epoch": 21.03, "learning_rate": 7.59044652756249e-07, "loss": 0.3666, "step": 43500 }, { "epoch": 21.03, "learning_rate": 7.567509897154088e-07, "loss": 0.2822, "step": 43510 }, { "epoch": 21.03, "learning_rate": 7.54460613252217e-07, "loss": 0.3534, "step": 43520 }, { "epoch": 21.03, "learning_rate": 7.52173524482978e-07, "loss": 0.7955, "step": 43530 }, { "epoch": 21.03, "learning_rate": 7.498897245223904e-07, "loss": 0.6602, "step": 43540 }, { "epoch": 21.03, "learning_rate": 7.476092144835487e-07, "loss": 0.2675, "step": 43550 }, { "epoch": 21.03, "learning_rate": 7.453319954779478e-07, "loss": 0.526, "step": 43560 }, { "epoch": 21.03, "learning_rate": 7.430580686154751e-07, "loss": 0.5293, "step": 43570 }, { "epoch": 21.03, "learning_rate": 7.407874350044155e-07, "loss": 0.8674, "step": 43580 }, { "epoch": 21.03, "learning_rate": 7.38520095751449e-07, "loss": 0.8319, "step": 43590 }, { "epoch": 21.03, "learning_rate": 7.362560519616474e-07, "loss": 0.3474, "step": 43600 }, { "epoch": 21.03, "learning_rate": 7.339953047384795e-07, "loss": 0.7456, "step": 43610 }, { "epoch": 21.03, "learning_rate": 7.31737855183805e-07, "loss": 0.3838, "step": 43620 }, { "epoch": 21.03, "learning_rate": 7.294837043978786e-07, "loss": 0.359, "step": 43630 }, { "epoch": 21.03, "learning_rate": 7.272328534793465e-07, "loss": 0.4701, "step": 43640 }, { "epoch": 21.03, "learning_rate": 7.249853035252437e-07, "loss": 0.468, "step": 43650 }, { "epoch": 21.03, "learning_rate": 7.227410556310016e-07, "loss": 0.468, "step": 43660 }, { "epoch": 21.03, "learning_rate": 7.205001108904375e-07, "loss": 0.4356, "step": 43670 }, { "epoch": 21.03, "learning_rate": 7.182624703957603e-07, "loss": 0.1461, "step": 43680 }, { "epoch": 21.03, "learning_rate": 7.1602813523757e-07, "loss": 0.5284, "step": 43690 }, { "epoch": 21.03, "learning_rate": 7.137971065048549e-07, "loss": 0.6941, "step": 43700 }, { "epoch": 21.03, "learning_rate": 7.115693852849891e-07, "loss": 0.7175, "step": 43710 }, { "epoch": 21.03, "learning_rate": 7.093449726637369e-07, "loss": 0.4247, "step": 43720 }, { "epoch": 21.03, "learning_rate": 7.071238697252533e-07, "loss": 0.3809, "step": 43730 }, { "epoch": 21.03, "learning_rate": 7.049060775520741e-07, "loss": 0.6679, "step": 43740 }, { "epoch": 21.04, "learning_rate": 7.026915972251254e-07, "loss": 0.5361, "step": 43750 }, { "epoch": 21.04, "learning_rate": 7.004804298237175e-07, "loss": 0.7257, "step": 43760 }, { "epoch": 21.04, "learning_rate": 6.982725764255463e-07, "loss": 0.3651, "step": 43770 }, { "epoch": 21.04, "learning_rate": 6.960680381066936e-07, "loss": 0.4422, "step": 43780 }, { "epoch": 21.04, "learning_rate": 6.938668159416234e-07, "loss": 0.1049, "step": 43790 }, { "epoch": 21.04, "learning_rate": 6.916689110031857e-07, "loss": 0.4155, "step": 43800 }, { "epoch": 21.04, "learning_rate": 6.894743243626112e-07, "loss": 0.4926, "step": 43810 }, { "epoch": 21.04, "learning_rate": 6.872830570895167e-07, "loss": 0.3949, "step": 43820 }, { "epoch": 21.04, "learning_rate": 6.850951102518979e-07, "loss": 0.2546, "step": 43830 }, { "epoch": 21.04, "learning_rate": 6.829104849161344e-07, "loss": 0.4445, "step": 43840 }, { "epoch": 21.04, "learning_rate": 6.80729182146986e-07, "loss": 0.5695, "step": 43850 }, { "epoch": 21.04, "learning_rate": 6.785512030075925e-07, "loss": 0.6643, "step": 43860 }, { "epoch": 21.04, "learning_rate": 6.763765485594744e-07, "loss": 0.7855, "step": 43870 }, { "epoch": 21.04, "learning_rate": 6.742052198625326e-07, "loss": 0.5017, "step": 43880 }, { "epoch": 21.04, "learning_rate": 6.720372179750461e-07, "loss": 0.3665, "step": 43890 }, { "epoch": 21.04, "learning_rate": 6.698725439536723e-07, "loss": 0.4931, "step": 43900 }, { "epoch": 21.04, "learning_rate": 6.677111988534465e-07, "loss": 0.3041, "step": 43910 }, { "epoch": 21.04, "learning_rate": 6.655531837277834e-07, "loss": 0.2887, "step": 43920 }, { "epoch": 21.04, "learning_rate": 6.633984996284725e-07, "loss": 0.2932, "step": 43930 }, { "epoch": 21.04, "learning_rate": 6.61247147605683e-07, "loss": 0.4111, "step": 43940 }, { "epoch": 21.04, "learning_rate": 6.590991287079529e-07, "loss": 0.4439, "step": 43950 }, { "epoch": 21.04, "learning_rate": 6.569544439822045e-07, "loss": 0.604, "step": 43960 }, { "epoch": 21.04, "learning_rate": 6.548130944737294e-07, "loss": 0.4894, "step": 43970 }, { "epoch": 21.04, "learning_rate": 6.526750812261958e-07, "loss": 0.3146, "step": 43980 }, { "epoch": 21.04, "learning_rate": 6.505404052816455e-07, "loss": 0.2673, "step": 43990 }, { "epoch": 21.04, "learning_rate": 6.484090676804927e-07, "loss": 0.6131, "step": 44000 }, { "epoch": 21.04, "eval_accuracy": 0.8705263157894737, "eval_f1": 0.8705263157894737, "eval_loss": 0.796357274055481, "eval_runtime": 747.1435, "eval_samples_per_second": 6.358, "eval_steps_per_second": 1.59, "step": 44000 }, { "epoch": 22.0, "learning_rate": 6.462810694615273e-07, "loss": 0.6104, "step": 44010 }, { "epoch": 22.0, "learning_rate": 6.441564116619089e-07, "loss": 0.671, "step": 44020 }, { "epoch": 22.0, "learning_rate": 6.420350953171708e-07, "loss": 0.466, "step": 44030 }, { "epoch": 22.0, "learning_rate": 6.399171214612126e-07, "loss": 0.3888, "step": 44040 }, { "epoch": 22.0, "learning_rate": 6.378024911263144e-07, "loss": 0.428, "step": 44050 }, { "epoch": 22.0, "learning_rate": 6.356912053431185e-07, "loss": 0.177, "step": 44060 }, { "epoch": 22.0, "learning_rate": 6.33583265140641e-07, "loss": 0.6163, "step": 44070 }, { "epoch": 22.0, "learning_rate": 6.314786715462656e-07, "loss": 0.3802, "step": 44080 }, { "epoch": 22.0, "learning_rate": 6.293774255857463e-07, "loss": 0.5927, "step": 44090 }, { "epoch": 22.0, "learning_rate": 6.272795282832028e-07, "loss": 0.2755, "step": 44100 }, { "epoch": 22.0, "learning_rate": 6.25184980661129e-07, "loss": 0.4401, "step": 44110 }, { "epoch": 22.0, "learning_rate": 6.230937837403783e-07, "loss": 0.3299, "step": 44120 }, { "epoch": 22.0, "learning_rate": 6.210059385401754e-07, "loss": 0.2617, "step": 44130 }, { "epoch": 22.0, "learning_rate": 6.189214460781104e-07, "loss": 0.3985, "step": 44140 }, { "epoch": 22.0, "learning_rate": 6.168403073701415e-07, "loss": 0.512, "step": 44150 }, { "epoch": 22.0, "learning_rate": 6.14762523430589e-07, "loss": 0.4434, "step": 44160 }, { "epoch": 22.0, "learning_rate": 6.126880952721403e-07, "loss": 0.4754, "step": 44170 }, { "epoch": 22.0, "learning_rate": 6.10617023905845e-07, "loss": 0.5834, "step": 44180 }, { "epoch": 22.0, "learning_rate": 6.085493103411183e-07, "loss": 0.2878, "step": 44190 }, { "epoch": 22.0, "learning_rate": 6.064849555857421e-07, "loss": 0.3629, "step": 44200 }, { "epoch": 22.0, "learning_rate": 6.044239606458543e-07, "loss": 0.5338, "step": 44210 }, { "epoch": 22.0, "learning_rate": 6.023663265259597e-07, "loss": 0.5599, "step": 44220 }, { "epoch": 22.0, "learning_rate": 6.003120542289234e-07, "loss": 0.5573, "step": 44230 }, { "epoch": 22.0, "learning_rate": 5.982611447559722e-07, "loss": 0.3172, "step": 44240 }, { "epoch": 22.0, "learning_rate": 5.962135991066972e-07, "loss": 0.7887, "step": 44250 }, { "epoch": 22.01, "learning_rate": 5.941694182790461e-07, "loss": 0.4521, "step": 44260 }, { "epoch": 22.01, "learning_rate": 5.921286032693277e-07, "loss": 0.5678, "step": 44270 }, { "epoch": 22.01, "learning_rate": 5.900911550722105e-07, "loss": 0.3744, "step": 44280 }, { "epoch": 22.01, "learning_rate": 5.88057074680721e-07, "loss": 0.576, "step": 44290 }, { "epoch": 22.01, "learning_rate": 5.86026363086247e-07, "loss": 0.5443, "step": 44300 }, { "epoch": 22.01, "learning_rate": 5.83999021278531e-07, "loss": 0.8581, "step": 44310 }, { "epoch": 22.01, "learning_rate": 5.819750502456758e-07, "loss": 0.2708, "step": 44320 }, { "epoch": 22.01, "learning_rate": 5.799544509741384e-07, "loss": 0.7194, "step": 44330 }, { "epoch": 22.01, "learning_rate": 5.779372244487374e-07, "loss": 0.4304, "step": 44340 }, { "epoch": 22.01, "learning_rate": 5.759233716526433e-07, "loss": 0.3599, "step": 44350 }, { "epoch": 22.01, "learning_rate": 5.739128935673832e-07, "loss": 0.259, "step": 44360 }, { "epoch": 22.01, "learning_rate": 5.719057911728392e-07, "loss": 0.2699, "step": 44370 }, { "epoch": 22.01, "learning_rate": 5.699020654472497e-07, "loss": 0.5559, "step": 44380 }, { "epoch": 22.01, "learning_rate": 5.679017173672052e-07, "loss": 0.3535, "step": 44390 }, { "epoch": 22.01, "learning_rate": 5.659047479076524e-07, "loss": 0.3878, "step": 44400 }, { "epoch": 22.01, "learning_rate": 5.639111580418885e-07, "loss": 0.2821, "step": 44410 }, { "epoch": 22.01, "learning_rate": 5.619209487415655e-07, "loss": 0.1983, "step": 44420 }, { "epoch": 22.01, "learning_rate": 5.599341209766867e-07, "loss": 0.5107, "step": 44430 }, { "epoch": 22.01, "learning_rate": 5.579506757156091e-07, "loss": 0.5185, "step": 44440 }, { "epoch": 22.01, "learning_rate": 5.559706139250395e-07, "loss": 0.4973, "step": 44450 }, { "epoch": 22.01, "learning_rate": 5.539939365700358e-07, "loss": 0.3193, "step": 44460 }, { "epoch": 22.01, "learning_rate": 5.520206446140058e-07, "loss": 0.495, "step": 44470 }, { "epoch": 22.01, "learning_rate": 5.500507390187093e-07, "loss": 0.6467, "step": 44480 }, { "epoch": 22.01, "learning_rate": 5.480842207442533e-07, "loss": 0.6714, "step": 44490 }, { "epoch": 22.01, "learning_rate": 5.461210907490952e-07, "loss": 0.3219, "step": 44500 }, { "epoch": 22.01, "learning_rate": 5.441613499900405e-07, "loss": 0.4564, "step": 44510 }, { "epoch": 22.01, "learning_rate": 5.422049994222427e-07, "loss": 0.4278, "step": 44520 }, { "epoch": 22.01, "learning_rate": 5.40252039999205e-07, "loss": 0.3553, "step": 44530 }, { "epoch": 22.01, "learning_rate": 5.38302472672775e-07, "loss": 0.5166, "step": 44540 }, { "epoch": 22.01, "learning_rate": 5.363562983931486e-07, "loss": 0.318, "step": 44550 }, { "epoch": 22.01, "learning_rate": 5.344135181088677e-07, "loss": 0.6389, "step": 44560 }, { "epoch": 22.01, "learning_rate": 5.324741327668168e-07, "loss": 0.635, "step": 44570 }, { "epoch": 22.01, "learning_rate": 5.305381433122328e-07, "loss": 0.6101, "step": 44580 }, { "epoch": 22.01, "learning_rate": 5.286055506886911e-07, "loss": 0.3987, "step": 44590 }, { "epoch": 22.01, "learning_rate": 5.266763558381149e-07, "loss": 0.8255, "step": 44600 }, { "epoch": 22.01, "learning_rate": 5.247505597007704e-07, "loss": 0.2846, "step": 44610 }, { "epoch": 22.01, "learning_rate": 5.228281632152651e-07, "loss": 0.6096, "step": 44620 }, { "epoch": 22.01, "learning_rate": 5.209091673185546e-07, "loss": 0.667, "step": 44630 }, { "epoch": 22.01, "learning_rate": 5.18993572945933e-07, "loss": 0.3633, "step": 44640 }, { "epoch": 22.01, "learning_rate": 5.170813810310388e-07, "loss": 0.4713, "step": 44650 }, { "epoch": 22.01, "learning_rate": 5.151725925058478e-07, "loss": 0.2222, "step": 44660 }, { "epoch": 22.01, "learning_rate": 5.132672083006836e-07, "loss": 0.459, "step": 44670 }, { "epoch": 22.01, "learning_rate": 5.113652293442067e-07, "loss": 0.4248, "step": 44680 }, { "epoch": 22.01, "learning_rate": 5.094666565634178e-07, "loss": 0.602, "step": 44690 }, { "epoch": 22.01, "learning_rate": 5.075714908836587e-07, "loss": 0.4219, "step": 44700 }, { "epoch": 22.01, "learning_rate": 5.056797332286103e-07, "loss": 0.3305, "step": 44710 }, { "epoch": 22.01, "learning_rate": 5.037913845202901e-07, "loss": 0.2996, "step": 44720 }, { "epoch": 22.01, "learning_rate": 5.019064456790612e-07, "loss": 0.2826, "step": 44730 }, { "epoch": 22.01, "learning_rate": 5.000249176236151e-07, "loss": 0.4705, "step": 44740 }, { "epoch": 22.02, "learning_rate": 4.981468012709877e-07, "loss": 0.6573, "step": 44750 }, { "epoch": 22.02, "learning_rate": 4.962720975365492e-07, "loss": 0.4925, "step": 44760 }, { "epoch": 22.02, "learning_rate": 4.944008073340086e-07, "loss": 0.3959, "step": 44770 }, { "epoch": 22.02, "learning_rate": 4.925329315754099e-07, "loss": 0.2424, "step": 44780 }, { "epoch": 22.02, "learning_rate": 4.906684711711328e-07, "loss": 0.3226, "step": 44790 }, { "epoch": 22.02, "learning_rate": 4.888074270298917e-07, "loss": 0.4597, "step": 44800 }, { "epoch": 22.02, "learning_rate": 4.869498000587375e-07, "loss": 0.3997, "step": 44810 }, { "epoch": 22.02, "learning_rate": 4.850955911630576e-07, "loss": 0.3794, "step": 44820 }, { "epoch": 22.02, "learning_rate": 4.832448012465665e-07, "loss": 0.5885, "step": 44830 }, { "epoch": 22.02, "learning_rate": 4.813974312113195e-07, "loss": 0.5058, "step": 44840 }, { "epoch": 22.02, "learning_rate": 4.795534819577005e-07, "loss": 0.5762, "step": 44850 }, { "epoch": 22.02, "learning_rate": 4.777129543844297e-07, "loss": 0.5572, "step": 44860 }, { "epoch": 22.02, "learning_rate": 4.7587584938855786e-07, "loss": 0.9591, "step": 44870 }, { "epoch": 22.02, "learning_rate": 4.7404216786546634e-07, "loss": 0.6308, "step": 44880 }, { "epoch": 22.02, "learning_rate": 4.7221191070887013e-07, "loss": 0.3762, "step": 44890 }, { "epoch": 22.02, "learning_rate": 4.703850788108141e-07, "loss": 0.7058, "step": 44900 }, { "epoch": 22.02, "learning_rate": 4.685616730616743e-07, "loss": 0.167, "step": 44910 }, { "epoch": 22.02, "learning_rate": 4.667416943501557e-07, "loss": 0.4422, "step": 44920 }, { "epoch": 22.02, "learning_rate": 4.649251435632945e-07, "loss": 0.2814, "step": 44930 }, { "epoch": 22.02, "learning_rate": 4.631120215864551e-07, "loss": 0.4397, "step": 44940 }, { "epoch": 22.02, "learning_rate": 4.6130232930333056e-07, "loss": 0.6745, "step": 44950 }, { "epoch": 22.02, "learning_rate": 4.5949606759594446e-07, "loss": 0.0493, "step": 44960 }, { "epoch": 22.02, "learning_rate": 4.57693237344646e-07, "loss": 0.5869, "step": 44970 }, { "epoch": 22.02, "learning_rate": 4.5589383942811315e-07, "loss": 0.5944, "step": 44980 }, { "epoch": 22.02, "learning_rate": 4.540978747233493e-07, "loss": 0.3901, "step": 44990 }, { "epoch": 22.02, "learning_rate": 4.5230534410568764e-07, "loss": 0.572, "step": 45000 }, { "epoch": 22.02, "learning_rate": 4.505162484487843e-07, "loss": 0.3358, "step": 45010 }, { "epoch": 22.02, "learning_rate": 4.4873058862462347e-07, "loss": 0.349, "step": 45020 }, { "epoch": 22.02, "learning_rate": 4.469483655035148e-07, "loss": 0.5895, "step": 45030 }, { "epoch": 22.02, "learning_rate": 4.451695799540892e-07, "loss": 0.2153, "step": 45040 }, { "epoch": 22.02, "learning_rate": 4.433942328433091e-07, "loss": 0.6343, "step": 45050 }, { "epoch": 22.02, "learning_rate": 4.4162232503645484e-07, "loss": 0.2725, "step": 45060 }, { "epoch": 22.02, "learning_rate": 4.3985385739713306e-07, "loss": 0.5323, "step": 45070 }, { "epoch": 22.02, "learning_rate": 4.3808883078727437e-07, "loss": 0.3967, "step": 45080 }, { "epoch": 22.02, "learning_rate": 4.363272460671297e-07, "loss": 0.7175, "step": 45090 }, { "epoch": 22.02, "learning_rate": 4.3456910409527483e-07, "loss": 1.0338, "step": 45100 }, { "epoch": 22.02, "learning_rate": 4.328144057286068e-07, "loss": 0.352, "step": 45110 }, { "epoch": 22.02, "learning_rate": 4.3106315182234484e-07, "loss": 0.4248, "step": 45120 }, { "epoch": 22.02, "learning_rate": 4.2931534323002787e-07, "loss": 0.5194, "step": 45130 }, { "epoch": 22.02, "learning_rate": 4.2757098080351606e-07, "loss": 0.497, "step": 45140 }, { "epoch": 22.02, "learning_rate": 4.258300653929928e-07, "loss": 0.5801, "step": 45150 }, { "epoch": 22.02, "learning_rate": 4.2409259784695675e-07, "loss": 0.2365, "step": 45160 }, { "epoch": 22.02, "learning_rate": 4.223585790122306e-07, "loss": 0.3726, "step": 45170 }, { "epoch": 22.02, "learning_rate": 4.2062800973395157e-07, "loss": 0.6571, "step": 45180 }, { "epoch": 22.02, "learning_rate": 4.1890089085557996e-07, "loss": 0.4289, "step": 45190 }, { "epoch": 22.02, "learning_rate": 4.171772232188925e-07, "loss": 0.9237, "step": 45200 }, { "epoch": 22.02, "learning_rate": 4.1545700766398297e-07, "loss": 0.2547, "step": 45210 }, { "epoch": 22.02, "learning_rate": 4.137402450292641e-07, "loss": 0.3113, "step": 45220 }, { "epoch": 22.02, "learning_rate": 4.1202693615146476e-07, "loss": 0.4242, "step": 45230 }, { "epoch": 22.02, "learning_rate": 4.103170818656296e-07, "loss": 0.433, "step": 45240 }, { "epoch": 22.02, "learning_rate": 4.0861068300512364e-07, "loss": 0.5028, "step": 45250 }, { "epoch": 22.03, "learning_rate": 4.0690774040162416e-07, "loss": 0.6273, "step": 45260 }, { "epoch": 22.03, "learning_rate": 4.0520825488512463e-07, "loss": 0.5756, "step": 45270 }, { "epoch": 22.03, "learning_rate": 4.0351222728393087e-07, "loss": 0.3489, "step": 45280 }, { "epoch": 22.03, "learning_rate": 4.0181965842467e-07, "loss": 0.9094, "step": 45290 }, { "epoch": 22.03, "learning_rate": 4.001305491322779e-07, "loss": 0.4944, "step": 45300 }, { "epoch": 22.03, "learning_rate": 3.984449002300053e-07, "loss": 0.7408, "step": 45310 }, { "epoch": 22.03, "learning_rate": 3.9676271253941827e-07, "loss": 0.7202, "step": 45320 }, { "epoch": 22.03, "learning_rate": 3.9508398688039267e-07, "loss": 0.6042, "step": 45330 }, { "epoch": 22.03, "learning_rate": 3.9340872407112234e-07, "loss": 0.5301, "step": 45340 }, { "epoch": 22.03, "learning_rate": 3.917369249281083e-07, "loss": 0.437, "step": 45350 }, { "epoch": 22.03, "learning_rate": 3.900685902661638e-07, "loss": 0.4105, "step": 45360 }, { "epoch": 22.03, "learning_rate": 3.884037208984151e-07, "loss": 0.2409, "step": 45370 }, { "epoch": 22.03, "learning_rate": 3.8674231763630134e-07, "loss": 0.3427, "step": 45380 }, { "epoch": 22.03, "learning_rate": 3.850843812895691e-07, "loss": 0.5633, "step": 45390 }, { "epoch": 22.03, "learning_rate": 3.8342991266627605e-07, "loss": 0.6687, "step": 45400 }, { "epoch": 22.03, "learning_rate": 3.8177891257279136e-07, "loss": 0.4543, "step": 45410 }, { "epoch": 22.03, "learning_rate": 3.8013138181379054e-07, "loss": 0.5662, "step": 45420 }, { "epoch": 22.03, "learning_rate": 3.784873211922604e-07, "loss": 0.9155, "step": 45430 }, { "epoch": 22.03, "learning_rate": 3.768467315094992e-07, "loss": 0.4995, "step": 45440 }, { "epoch": 22.03, "learning_rate": 3.752096135651073e-07, "loss": 0.4798, "step": 45450 }, { "epoch": 22.03, "learning_rate": 3.7357596815699557e-07, "loss": 0.7374, "step": 45460 }, { "epoch": 22.03, "learning_rate": 3.719457960813846e-07, "loss": 0.3294, "step": 45470 }, { "epoch": 22.03, "learning_rate": 3.7031909813279974e-07, "loss": 0.5075, "step": 45480 }, { "epoch": 22.03, "learning_rate": 3.68695875104075e-07, "loss": 0.3049, "step": 45490 }, { "epoch": 22.03, "learning_rate": 3.6707612778634855e-07, "loss": 0.7697, "step": 45500 }, { "epoch": 22.03, "learning_rate": 3.6545985696906535e-07, "loss": 0.4896, "step": 45510 }, { "epoch": 22.03, "learning_rate": 3.6384706343997544e-07, "loss": 0.531, "step": 45520 }, { "epoch": 22.03, "learning_rate": 3.6223774798513825e-07, "loss": 0.5829, "step": 45530 }, { "epoch": 22.03, "learning_rate": 3.6063191138891144e-07, "loss": 0.7269, "step": 45540 }, { "epoch": 22.03, "learning_rate": 3.590295544339606e-07, "loss": 0.3007, "step": 45550 }, { "epoch": 22.03, "learning_rate": 3.574306779012551e-07, "loss": 0.6541, "step": 45560 }, { "epoch": 22.03, "learning_rate": 3.5583528257006755e-07, "loss": 0.6501, "step": 45570 }, { "epoch": 22.03, "learning_rate": 3.542433692179767e-07, "loss": 0.2508, "step": 45580 }, { "epoch": 22.03, "learning_rate": 3.526549386208594e-07, "loss": 0.3447, "step": 45590 }, { "epoch": 22.03, "learning_rate": 3.510699915528981e-07, "loss": 0.2587, "step": 45600 }, { "epoch": 22.03, "learning_rate": 3.494885287865765e-07, "loss": 0.4874, "step": 45610 }, { "epoch": 22.03, "learning_rate": 3.4791055109268053e-07, "loss": 0.6196, "step": 45620 }, { "epoch": 22.03, "learning_rate": 3.4633605924029754e-07, "loss": 0.7277, "step": 45630 }, { "epoch": 22.03, "learning_rate": 3.4476505399681615e-07, "loss": 0.754, "step": 45640 }, { "epoch": 22.03, "learning_rate": 3.431975361279238e-07, "loss": 0.6357, "step": 45650 }, { "epoch": 22.03, "learning_rate": 3.416335063976103e-07, "loss": 0.1892, "step": 45660 }, { "epoch": 22.03, "learning_rate": 3.400729655681659e-07, "loss": 0.5099, "step": 45670 }, { "epoch": 22.03, "learning_rate": 3.3851591440017785e-07, "loss": 0.6402, "step": 45680 }, { "epoch": 22.03, "learning_rate": 3.369623536525343e-07, "loss": 0.4794, "step": 45690 }, { "epoch": 22.03, "learning_rate": 3.354122840824228e-07, "loss": 0.4491, "step": 45700 }, { "epoch": 22.03, "learning_rate": 3.3386570644532736e-07, "loss": 0.4113, "step": 45710 }, { "epoch": 22.03, "learning_rate": 3.323226214950309e-07, "loss": 0.278, "step": 45720 }, { "epoch": 22.03, "learning_rate": 3.307830299836151e-07, "loss": 0.4852, "step": 45730 }, { "epoch": 22.03, "learning_rate": 3.2924693266145804e-07, "loss": 0.7552, "step": 45740 }, { "epoch": 22.04, "learning_rate": 3.2771433027723425e-07, "loss": 0.5251, "step": 45750 }, { "epoch": 22.04, "learning_rate": 3.261852235779153e-07, "loss": 0.3029, "step": 45760 }, { "epoch": 22.04, "learning_rate": 3.246596133087709e-07, "loss": 0.442, "step": 45770 }, { "epoch": 22.04, "learning_rate": 3.2313750021336464e-07, "loss": 0.7183, "step": 45780 }, { "epoch": 22.04, "learning_rate": 3.2161888503355386e-07, "loss": 0.7049, "step": 45790 }, { "epoch": 22.04, "learning_rate": 3.201037685094957e-07, "loss": 0.7686, "step": 45800 }, { "epoch": 22.04, "learning_rate": 3.1859215137963775e-07, "loss": 0.3101, "step": 45810 }, { "epoch": 22.04, "learning_rate": 3.170840343807249e-07, "loss": 0.6807, "step": 45820 }, { "epoch": 22.04, "learning_rate": 3.1557941824779417e-07, "loss": 0.3282, "step": 45830 }, { "epoch": 22.04, "learning_rate": 3.140783037141781e-07, "loss": 0.5177, "step": 45840 }, { "epoch": 22.04, "learning_rate": 3.1258069151150067e-07, "loss": 0.6931, "step": 45850 }, { "epoch": 22.04, "learning_rate": 3.110865823696804e-07, "loss": 0.4378, "step": 45860 }, { "epoch": 22.04, "learning_rate": 3.0959597701692905e-07, "loss": 0.683, "step": 45870 }, { "epoch": 22.04, "learning_rate": 3.0810887617974793e-07, "loss": 0.3099, "step": 45880 }, { "epoch": 22.04, "learning_rate": 3.066252805829323e-07, "loss": 0.7326, "step": 45890 }, { "epoch": 22.04, "learning_rate": 3.0514519094956864e-07, "loss": 0.6025, "step": 45900 }, { "epoch": 22.04, "learning_rate": 3.0366860800103507e-07, "loss": 0.6772, "step": 45910 }, { "epoch": 22.04, "learning_rate": 3.02195532457e-07, "loss": 0.3919, "step": 45920 }, { "epoch": 22.04, "learning_rate": 3.0072596503542244e-07, "loss": 0.2697, "step": 45930 }, { "epoch": 22.04, "learning_rate": 2.99259906452552e-07, "loss": 0.6047, "step": 45940 }, { "epoch": 22.04, "learning_rate": 2.97797357422927e-07, "loss": 0.3856, "step": 45950 }, { "epoch": 22.04, "learning_rate": 2.9633831865937656e-07, "loss": 0.4051, "step": 45960 }, { "epoch": 22.04, "learning_rate": 2.9488279087302004e-07, "loss": 0.4755, "step": 45970 }, { "epoch": 22.04, "learning_rate": 2.9343077477326173e-07, "loss": 0.3595, "step": 45980 }, { "epoch": 22.04, "learning_rate": 2.9198227106779634e-07, "loss": 0.6075, "step": 45990 }, { "epoch": 22.04, "learning_rate": 2.905372804626083e-07, "loss": 0.628, "step": 46000 }, { "epoch": 22.04, "eval_accuracy": 0.8747368421052631, "eval_f1": 0.8747368421052631, "eval_loss": 0.8088645339012146, "eval_runtime": 748.0599, "eval_samples_per_second": 6.35, "eval_steps_per_second": 1.588, "step": 46000 }, { "epoch": 23.0, "learning_rate": 2.8909580366196926e-07, "loss": 0.4376, "step": 46010 }, { "epoch": 23.0, "learning_rate": 2.8765784136843554e-07, "loss": 0.7477, "step": 46020 }, { "epoch": 23.0, "learning_rate": 2.8622339428285406e-07, "loss": 0.294, "step": 46030 }, { "epoch": 23.0, "learning_rate": 2.847924631043555e-07, "loss": 0.4043, "step": 46040 }, { "epoch": 23.0, "learning_rate": 2.833650485303596e-07, "loss": 0.4888, "step": 46050 }, { "epoch": 23.0, "learning_rate": 2.8194115125657146e-07, "loss": 0.7153, "step": 46060 }, { "epoch": 23.0, "learning_rate": 2.8052077197697925e-07, "loss": 0.3605, "step": 46070 }, { "epoch": 23.0, "learning_rate": 2.791039113838592e-07, "loss": 0.2711, "step": 46080 }, { "epoch": 23.0, "learning_rate": 2.7769057016777234e-07, "loss": 0.3246, "step": 46090 }, { "epoch": 23.0, "learning_rate": 2.7628074901756423e-07, "loss": 0.696, "step": 46100 }, { "epoch": 23.0, "learning_rate": 2.748744486203644e-07, "loss": 0.6081, "step": 46110 }, { "epoch": 23.0, "learning_rate": 2.734716696615863e-07, "loss": 0.2118, "step": 46120 }, { "epoch": 23.0, "learning_rate": 2.720724128249272e-07, "loss": 0.617, "step": 46130 }, { "epoch": 23.0, "learning_rate": 2.7067667879236815e-07, "loss": 0.6095, "step": 46140 }, { "epoch": 23.0, "learning_rate": 2.6928446824417503e-07, "loss": 0.5906, "step": 46150 }, { "epoch": 23.0, "learning_rate": 2.678957818588909e-07, "loss": 0.7001, "step": 46160 }, { "epoch": 23.0, "learning_rate": 2.6651062031334607e-07, "loss": 0.2159, "step": 46170 }, { "epoch": 23.0, "learning_rate": 2.651289842826504e-07, "loss": 0.3471, "step": 46180 }, { "epoch": 23.0, "learning_rate": 2.637508744401987e-07, "loss": 0.4117, "step": 46190 }, { "epoch": 23.0, "learning_rate": 2.623762914576644e-07, "loss": 0.6562, "step": 46200 }, { "epoch": 23.0, "learning_rate": 2.6100523600500163e-07, "loss": 0.5202, "step": 46210 }, { "epoch": 23.0, "learning_rate": 2.596377087504459e-07, "loss": 0.6931, "step": 46220 }, { "epoch": 23.0, "learning_rate": 2.5827371036051486e-07, "loss": 0.4239, "step": 46230 }, { "epoch": 23.0, "learning_rate": 2.5691324150000333e-07, "loss": 0.8724, "step": 46240 }, { "epoch": 23.0, "learning_rate": 2.555563028319885e-07, "loss": 0.8134, "step": 46250 }, { "epoch": 23.01, "learning_rate": 2.542028950178246e-07, "loss": 0.6479, "step": 46260 }, { "epoch": 23.01, "learning_rate": 2.528530187171474e-07, "loss": 0.3903, "step": 46270 }, { "epoch": 23.01, "learning_rate": 2.5150667458786804e-07, "loss": 0.3458, "step": 46280 }, { "epoch": 23.01, "learning_rate": 2.5016386328618077e-07, "loss": 0.4394, "step": 46290 }, { "epoch": 23.01, "learning_rate": 2.488245854665547e-07, "loss": 0.5828, "step": 46300 }, { "epoch": 23.01, "learning_rate": 2.4748884178173754e-07, "loss": 0.6674, "step": 46310 }, { "epoch": 23.01, "learning_rate": 2.461566328827536e-07, "loss": 0.3149, "step": 46320 }, { "epoch": 23.01, "learning_rate": 2.4482795941890677e-07, "loss": 0.6931, "step": 46330 }, { "epoch": 23.01, "learning_rate": 2.435028220377758e-07, "loss": 0.3118, "step": 46340 }, { "epoch": 23.01, "learning_rate": 2.421812213852165e-07, "loss": 0.6017, "step": 46350 }, { "epoch": 23.01, "learning_rate": 2.408631581053602e-07, "loss": 0.4137, "step": 46360 }, { "epoch": 23.01, "learning_rate": 2.3954863284061573e-07, "loss": 0.395, "step": 46370 }, { "epoch": 23.01, "learning_rate": 2.3823764623166712e-07, "loss": 0.5066, "step": 46380 }, { "epoch": 23.01, "learning_rate": 2.3693019891747241e-07, "loss": 0.4814, "step": 46390 }, { "epoch": 23.01, "learning_rate": 2.3562629153526693e-07, "loss": 0.5267, "step": 46400 }, { "epoch": 23.01, "learning_rate": 2.3432592472055732e-07, "loss": 0.8471, "step": 46410 }, { "epoch": 23.01, "learning_rate": 2.3302909910712823e-07, "loss": 0.3451, "step": 46420 }, { "epoch": 23.01, "learning_rate": 2.3173581532703574e-07, "loss": 0.6432, "step": 46430 }, { "epoch": 23.01, "learning_rate": 2.3044607401061152e-07, "loss": 0.2905, "step": 46440 }, { "epoch": 23.01, "learning_rate": 2.2915987578645852e-07, "loss": 0.4183, "step": 46450 }, { "epoch": 23.01, "learning_rate": 2.2787722128145534e-07, "loss": 0.4307, "step": 46460 }, { "epoch": 23.01, "learning_rate": 2.26598111120751e-07, "loss": 0.4788, "step": 46470 }, { "epoch": 23.01, "learning_rate": 2.2532254592777025e-07, "loss": 0.2118, "step": 46480 }, { "epoch": 23.01, "learning_rate": 2.2405052632420652e-07, "loss": 0.3335, "step": 46490 }, { "epoch": 23.01, "learning_rate": 2.2278205293002645e-07, "loss": 0.1742, "step": 46500 }, { "epoch": 23.01, "learning_rate": 2.2151712636346966e-07, "loss": 0.5771, "step": 46510 }, { "epoch": 23.01, "learning_rate": 2.2025574724104548e-07, "loss": 0.2051, "step": 46520 }, { "epoch": 23.01, "learning_rate": 2.189979161775346e-07, "loss": 0.3724, "step": 46530 }, { "epoch": 23.01, "learning_rate": 2.1774363378598838e-07, "loss": 0.3743, "step": 46540 }, { "epoch": 23.01, "learning_rate": 2.1649290067773026e-07, "loss": 0.5481, "step": 46550 }, { "epoch": 23.01, "learning_rate": 2.152457174623501e-07, "loss": 0.6652, "step": 46560 }, { "epoch": 23.01, "learning_rate": 2.1400208474771087e-07, "loss": 0.4334, "step": 46570 }, { "epoch": 23.01, "learning_rate": 2.1276200313994522e-07, "loss": 0.339, "step": 46580 }, { "epoch": 23.01, "learning_rate": 2.115254732434538e-07, "loss": 0.4181, "step": 46590 }, { "epoch": 23.01, "learning_rate": 2.102924956609037e-07, "loss": 0.3678, "step": 46600 }, { "epoch": 23.01, "learning_rate": 2.0906307099323508e-07, "loss": 0.4751, "step": 46610 }, { "epoch": 23.01, "learning_rate": 2.0783719983965443e-07, "loss": 0.4232, "step": 46620 }, { "epoch": 23.01, "learning_rate": 2.0661488279763634e-07, "loss": 0.3166, "step": 46630 }, { "epoch": 23.01, "learning_rate": 2.0539612046292344e-07, "loss": 0.3519, "step": 46640 }, { "epoch": 23.01, "learning_rate": 2.0418091342952638e-07, "loss": 0.4472, "step": 46650 }, { "epoch": 23.01, "learning_rate": 2.0296926228971973e-07, "loss": 0.6069, "step": 46660 }, { "epoch": 23.01, "learning_rate": 2.0176116763405027e-07, "loss": 0.5496, "step": 46670 }, { "epoch": 23.01, "learning_rate": 2.0055663005132868e-07, "loss": 0.5399, "step": 46680 }, { "epoch": 23.01, "learning_rate": 1.9935565012863032e-07, "loss": 0.2832, "step": 46690 }, { "epoch": 23.01, "learning_rate": 1.9815822845129783e-07, "loss": 0.5219, "step": 46700 }, { "epoch": 23.01, "learning_rate": 1.9696436560294184e-07, "loss": 0.4706, "step": 46710 }, { "epoch": 23.01, "learning_rate": 1.9577406216543607e-07, "loss": 0.3866, "step": 46720 }, { "epoch": 23.01, "learning_rate": 1.9458731871891899e-07, "loss": 0.4084, "step": 46730 }, { "epoch": 23.01, "learning_rate": 1.9340413584179623e-07, "loss": 0.2977, "step": 46740 }, { "epoch": 23.02, "learning_rate": 1.9222451411073648e-07, "loss": 0.6459, "step": 46750 }, { "epoch": 23.02, "learning_rate": 1.9104845410067072e-07, "loss": 0.5983, "step": 46760 }, { "epoch": 23.02, "learning_rate": 1.8987595638480042e-07, "loss": 0.5445, "step": 46770 }, { "epoch": 23.02, "learning_rate": 1.887070215345843e-07, "loss": 0.5333, "step": 46780 }, { "epoch": 23.02, "learning_rate": 1.875416501197466e-07, "loss": 0.2043, "step": 46790 }, { "epoch": 23.02, "learning_rate": 1.8637984270827552e-07, "loss": 0.6342, "step": 46800 }, { "epoch": 23.02, "learning_rate": 1.8522159986642306e-07, "loss": 0.6586, "step": 46810 }, { "epoch": 23.02, "learning_rate": 1.8406692215870185e-07, "loss": 0.3052, "step": 46820 }, { "epoch": 23.02, "learning_rate": 1.8291581014788755e-07, "loss": 0.9379, "step": 46830 }, { "epoch": 23.02, "learning_rate": 1.8176826439501887e-07, "loss": 0.26, "step": 46840 }, { "epoch": 23.02, "learning_rate": 1.8062428545939424e-07, "loss": 0.3086, "step": 46850 }, { "epoch": 23.02, "learning_rate": 1.7948387389857767e-07, "loss": 0.1077, "step": 46860 }, { "epoch": 23.02, "learning_rate": 1.7834703026838957e-07, "loss": 0.3269, "step": 46870 }, { "epoch": 23.02, "learning_rate": 1.7721375512291417e-07, "loss": 0.3113, "step": 46880 }, { "epoch": 23.02, "learning_rate": 1.7608404901449548e-07, "loss": 0.2717, "step": 46890 }, { "epoch": 23.02, "learning_rate": 1.749579124937406e-07, "loss": 0.6674, "step": 46900 }, { "epoch": 23.02, "learning_rate": 1.738353461095138e-07, "loss": 0.2997, "step": 46910 }, { "epoch": 23.02, "learning_rate": 1.7271635040893908e-07, "loss": 0.6477, "step": 46920 }, { "epoch": 23.02, "learning_rate": 1.716009259374035e-07, "loss": 0.3548, "step": 46930 }, { "epoch": 23.02, "learning_rate": 1.704890732385489e-07, "loss": 0.3934, "step": 46940 }, { "epoch": 23.02, "learning_rate": 1.6938079285428094e-07, "loss": 0.3057, "step": 46950 }, { "epoch": 23.02, "learning_rate": 1.6827608532476168e-07, "loss": 0.6208, "step": 46960 }, { "epoch": 23.02, "learning_rate": 1.6717495118841048e-07, "loss": 0.3769, "step": 46970 }, { "epoch": 23.02, "learning_rate": 1.6607739098190883e-07, "loss": 0.331, "step": 46980 }, { "epoch": 23.02, "learning_rate": 1.6498340524019218e-07, "loss": 0.6539, "step": 46990 }, { "epoch": 23.02, "learning_rate": 1.6389299449645734e-07, "loss": 0.6753, "step": 47000 }, { "epoch": 23.02, "learning_rate": 1.6280615928215753e-07, "loss": 0.1063, "step": 47010 }, { "epoch": 23.02, "learning_rate": 1.6172290012700235e-07, "loss": 0.7285, "step": 47020 }, { "epoch": 23.02, "learning_rate": 1.606432175589595e-07, "loss": 0.5733, "step": 47030 }, { "epoch": 23.02, "learning_rate": 1.5956711210425218e-07, "loss": 0.4015, "step": 47040 }, { "epoch": 23.02, "learning_rate": 1.584945842873625e-07, "loss": 0.6097, "step": 47050 }, { "epoch": 23.02, "learning_rate": 1.5742563463102654e-07, "loss": 0.326, "step": 47060 }, { "epoch": 23.02, "learning_rate": 1.5636026365623833e-07, "loss": 0.8333, "step": 47070 }, { "epoch": 23.02, "learning_rate": 1.5529847188224504e-07, "loss": 0.4393, "step": 47080 }, { "epoch": 23.02, "learning_rate": 1.5424025982655187e-07, "loss": 0.9822, "step": 47090 }, { "epoch": 23.02, "learning_rate": 1.5318562800491957e-07, "loss": 0.6041, "step": 47100 }, { "epoch": 23.02, "learning_rate": 1.5213457693136201e-07, "loss": 0.3749, "step": 47110 }, { "epoch": 23.02, "learning_rate": 1.5108710711814855e-07, "loss": 0.4134, "step": 47120 }, { "epoch": 23.02, "learning_rate": 1.5004321907580332e-07, "loss": 0.4031, "step": 47130 }, { "epoch": 23.02, "learning_rate": 1.4900291331310518e-07, "loss": 0.35, "step": 47140 }, { "epoch": 23.02, "learning_rate": 1.4796619033708686e-07, "loss": 0.3272, "step": 47150 }, { "epoch": 23.02, "learning_rate": 1.4693305065303252e-07, "loss": 0.247, "step": 47160 }, { "epoch": 23.02, "learning_rate": 1.459034947644844e-07, "loss": 0.7903, "step": 47170 }, { "epoch": 23.02, "learning_rate": 1.4487752317323272e-07, "loss": 0.3523, "step": 47180 }, { "epoch": 23.02, "learning_rate": 1.438551363793267e-07, "loss": 0.7924, "step": 47190 }, { "epoch": 23.02, "learning_rate": 1.4283633488106352e-07, "loss": 0.3725, "step": 47200 }, { "epoch": 23.02, "learning_rate": 1.4182111917499517e-07, "loss": 0.9139, "step": 47210 }, { "epoch": 23.02, "learning_rate": 1.4080948975592423e-07, "loss": 0.2654, "step": 47220 }, { "epoch": 23.02, "learning_rate": 1.398014471169079e-07, "loss": 0.4064, "step": 47230 }, { "epoch": 23.02, "learning_rate": 1.387969917492532e-07, "loss": 0.3291, "step": 47240 }, { "epoch": 23.02, "learning_rate": 1.3779612414252017e-07, "loss": 0.5799, "step": 47250 }, { "epoch": 23.03, "learning_rate": 1.3679884478451948e-07, "loss": 0.423, "step": 47260 }, { "epoch": 23.03, "learning_rate": 1.358051541613131e-07, "loss": 0.6288, "step": 47270 }, { "epoch": 23.03, "learning_rate": 1.3481505275721202e-07, "loss": 0.6378, "step": 47280 }, { "epoch": 23.03, "learning_rate": 1.3382854105478099e-07, "loss": 0.7306, "step": 47290 }, { "epoch": 23.03, "learning_rate": 1.3284561953483541e-07, "loss": 0.2512, "step": 47300 }, { "epoch": 23.03, "learning_rate": 1.3186628867643713e-07, "loss": 0.4269, "step": 47310 }, { "epoch": 23.03, "learning_rate": 1.308905489568993e-07, "loss": 0.2313, "step": 47320 }, { "epoch": 23.03, "learning_rate": 1.2991840085178814e-07, "loss": 0.7018, "step": 47330 }, { "epoch": 23.03, "learning_rate": 1.2894984483491468e-07, "loss": 0.3318, "step": 47340 }, { "epoch": 23.03, "learning_rate": 1.279848813783438e-07, "loss": 0.2508, "step": 47350 }, { "epoch": 23.03, "learning_rate": 1.270235109523843e-07, "loss": 0.3462, "step": 47360 }, { "epoch": 23.03, "learning_rate": 1.2606573402559718e-07, "loss": 0.1962, "step": 47370 }, { "epoch": 23.03, "learning_rate": 1.251115510647932e-07, "loss": 0.6131, "step": 47380 }, { "epoch": 23.03, "learning_rate": 1.241609625350279e-07, "loss": 0.4011, "step": 47390 }, { "epoch": 23.03, "learning_rate": 1.2321396889960645e-07, "loss": 0.597, "step": 47400 }, { "epoch": 23.03, "learning_rate": 1.2227057062008217e-07, "loss": 0.3469, "step": 47410 }, { "epoch": 23.03, "learning_rate": 1.213307681562556e-07, "loss": 0.5855, "step": 47420 }, { "epoch": 23.03, "learning_rate": 1.2039456196617616e-07, "loss": 0.3312, "step": 47430 }, { "epoch": 23.03, "learning_rate": 1.1946195250613885e-07, "loss": 0.3626, "step": 47440 }, { "epoch": 23.03, "learning_rate": 1.1853294023068589e-07, "loss": 0.3461, "step": 47450 }, { "epoch": 23.03, "learning_rate": 1.1760752559260679e-07, "loss": 0.4803, "step": 47460 }, { "epoch": 23.03, "learning_rate": 1.1668570904293657e-07, "loss": 0.4819, "step": 47470 }, { "epoch": 23.03, "learning_rate": 1.1576749103096002e-07, "loss": 0.9285, "step": 47480 }, { "epoch": 23.03, "learning_rate": 1.1485287200420246e-07, "loss": 0.473, "step": 47490 }, { "epoch": 23.03, "learning_rate": 1.1394185240843985e-07, "loss": 0.271, "step": 47500 }, { "epoch": 23.03, "learning_rate": 1.1303443268769114e-07, "loss": 0.6521, "step": 47510 }, { "epoch": 23.03, "learning_rate": 1.1213061328422175e-07, "loss": 0.6992, "step": 47520 }, { "epoch": 23.03, "learning_rate": 1.1123039463854345e-07, "loss": 0.4707, "step": 47530 }, { "epoch": 23.03, "learning_rate": 1.1033377718941112e-07, "loss": 0.6397, "step": 47540 }, { "epoch": 23.03, "learning_rate": 1.0944076137382436e-07, "loss": 0.3328, "step": 47550 }, { "epoch": 23.03, "learning_rate": 1.0855134762702917e-07, "loss": 0.2288, "step": 47560 }, { "epoch": 23.03, "learning_rate": 1.0766553638251464e-07, "loss": 0.3954, "step": 47570 }, { "epoch": 23.03, "learning_rate": 1.0678332807201541e-07, "loss": 0.4067, "step": 47580 }, { "epoch": 23.03, "learning_rate": 1.0590472312550753e-07, "loss": 0.5821, "step": 47590 }, { "epoch": 23.03, "learning_rate": 1.0502972197121347e-07, "loss": 0.6503, "step": 47600 }, { "epoch": 23.03, "learning_rate": 1.0415832503559708e-07, "loss": 0.5509, "step": 47610 }, { "epoch": 23.03, "learning_rate": 1.032905327433678e-07, "loss": 0.9522, "step": 47620 }, { "epoch": 23.03, "learning_rate": 1.0242634551747731e-07, "loss": 0.6986, "step": 47630 }, { "epoch": 23.03, "learning_rate": 1.0156576377911869e-07, "loss": 0.4621, "step": 47640 }, { "epoch": 23.03, "learning_rate": 1.0070878794772975e-07, "loss": 0.6139, "step": 47650 }, { "epoch": 23.03, "learning_rate": 9.985541844098972e-08, "loss": 0.5695, "step": 47660 }, { "epoch": 23.03, "learning_rate": 9.900565567482089e-08, "loss": 0.5113, "step": 47670 }, { "epoch": 23.03, "learning_rate": 9.815950006338697e-08, "loss": 0.3426, "step": 47680 }, { "epoch": 23.03, "learning_rate": 9.731695201909474e-08, "loss": 0.4428, "step": 47690 }, { "epoch": 23.03, "learning_rate": 9.647801195259071e-08, "loss": 0.4442, "step": 47700 }, { "epoch": 23.03, "learning_rate": 9.564268027276446e-08, "loss": 0.3995, "step": 47710 }, { "epoch": 23.03, "learning_rate": 9.4810957386747e-08, "loss": 0.8283, "step": 47720 }, { "epoch": 23.03, "learning_rate": 9.398284369990989e-08, "loss": 0.438, "step": 47730 }, { "epoch": 23.03, "learning_rate": 9.315833961586612e-08, "loss": 0.5626, "step": 47740 }, { "epoch": 23.04, "learning_rate": 9.233744553646756e-08, "loss": 0.4854, "step": 47750 }, { "epoch": 23.04, "learning_rate": 9.152016186180834e-08, "loss": 0.4819, "step": 47760 }, { "epoch": 23.04, "learning_rate": 9.070648899022315e-08, "loss": 0.769, "step": 47770 }, { "epoch": 23.04, "learning_rate": 8.989642731828646e-08, "loss": 0.4348, "step": 47780 }, { "epoch": 23.04, "learning_rate": 8.908997724081241e-08, "loss": 0.5948, "step": 47790 }, { "epoch": 23.04, "learning_rate": 8.828713915085412e-08, "loss": 0.1465, "step": 47800 }, { "epoch": 23.04, "learning_rate": 8.748791343970524e-08, "loss": 0.3815, "step": 47810 }, { "epoch": 23.04, "learning_rate": 8.669230049690003e-08, "loss": 0.3987, "step": 47820 }, { "epoch": 23.04, "learning_rate": 8.590030071020994e-08, "loss": 0.316, "step": 47830 }, { "epoch": 23.04, "learning_rate": 8.511191446564537e-08, "loss": 0.2271, "step": 47840 }, { "epoch": 23.04, "learning_rate": 8.432714214745646e-08, "loss": 0.3009, "step": 47850 }, { "epoch": 23.04, "learning_rate": 8.354598413813141e-08, "loss": 0.2945, "step": 47860 }, { "epoch": 23.04, "learning_rate": 8.276844081839819e-08, "loss": 0.2503, "step": 47870 }, { "epoch": 23.04, "learning_rate": 8.199451256722196e-08, "loss": 0.5661, "step": 47880 }, { "epoch": 23.04, "learning_rate": 8.122419976180434e-08, "loss": 0.6049, "step": 47890 }, { "epoch": 23.04, "learning_rate": 8.045750277758751e-08, "loss": 0.4348, "step": 47900 }, { "epoch": 23.04, "learning_rate": 7.969442198825089e-08, "loss": 0.8783, "step": 47910 }, { "epoch": 23.04, "learning_rate": 7.893495776570947e-08, "loss": 0.4905, "step": 47920 }, { "epoch": 23.04, "learning_rate": 7.8179110480118e-08, "loss": 0.3333, "step": 47930 }, { "epoch": 23.04, "learning_rate": 7.742688049986596e-08, "loss": 0.4873, "step": 47940 }, { "epoch": 23.04, "learning_rate": 7.667826819158257e-08, "loss": 0.4427, "step": 47950 }, { "epoch": 23.04, "learning_rate": 7.593327392013183e-08, "loss": 0.4111, "step": 47960 }, { "epoch": 23.04, "learning_rate": 7.519189804861493e-08, "loss": 0.6091, "step": 47970 }, { "epoch": 23.04, "learning_rate": 7.44541409383695e-08, "loss": 0.2827, "step": 47980 }, { "epoch": 23.04, "learning_rate": 7.372000294896792e-08, "loss": 0.2771, "step": 47990 }, { "epoch": 23.04, "learning_rate": 7.298948443822229e-08, "loss": 0.5693, "step": 48000 }, { "epoch": 23.04, "eval_accuracy": 0.8747368421052631, "eval_f1": 0.8747368421052631, "eval_loss": 0.8009958863258362, "eval_runtime": 732.1449, "eval_samples_per_second": 6.488, "eval_steps_per_second": 1.623, "step": 48000 }, { "epoch": 24.0, "learning_rate": 7.226258576217865e-08, "loss": 0.3753, "step": 48010 }, { "epoch": 24.0, "learning_rate": 7.15393072751161e-08, "loss": 0.4069, "step": 48020 }, { "epoch": 24.0, "learning_rate": 7.081964932955349e-08, "loss": 0.4612, "step": 48030 }, { "epoch": 24.0, "learning_rate": 7.010361227624357e-08, "loss": 0.3692, "step": 48040 }, { "epoch": 24.0, "learning_rate": 6.939119646417302e-08, "loss": 0.3168, "step": 48050 }, { "epoch": 24.0, "learning_rate": 6.868240224056577e-08, "loss": 0.5174, "step": 48060 }, { "epoch": 24.0, "learning_rate": 6.79772299508788e-08, "loss": 0.5508, "step": 48070 }, { "epoch": 24.0, "learning_rate": 6.727567993880468e-08, "loss": 0.5056, "step": 48080 }, { "epoch": 24.0, "learning_rate": 6.657775254626991e-08, "loss": 0.4838, "step": 48090 }, { "epoch": 24.0, "learning_rate": 6.588344811343738e-08, "loss": 0.3053, "step": 48100 }, { "epoch": 24.0, "learning_rate": 6.519276697870142e-08, "loss": 0.4235, "step": 48110 }, { "epoch": 24.0, "learning_rate": 6.450570947869106e-08, "loss": 0.5998, "step": 48120 }, { "epoch": 24.0, "learning_rate": 6.382227594827012e-08, "loss": 0.5087, "step": 48130 }, { "epoch": 24.0, "learning_rate": 6.314246672053715e-08, "loss": 0.4317, "step": 48140 }, { "epoch": 24.0, "learning_rate": 6.246628212682209e-08, "loss": 0.8639, "step": 48150 }, { "epoch": 24.0, "learning_rate": 6.179372249668802e-08, "loss": 0.3694, "step": 48160 }, { "epoch": 24.0, "learning_rate": 6.112478815793437e-08, "loss": 0.2338, "step": 48170 }, { "epoch": 24.0, "learning_rate": 6.045947943658953e-08, "loss": 0.2262, "step": 48180 }, { "epoch": 24.0, "learning_rate": 5.979779665691826e-08, "loss": 0.5833, "step": 48190 }, { "epoch": 24.0, "learning_rate": 5.9139740141416765e-08, "loss": 0.3248, "step": 48200 }, { "epoch": 24.0, "learning_rate": 5.848531021081266e-08, "loss": 1.0088, "step": 48210 }, { "epoch": 24.0, "learning_rate": 5.7834507184067466e-08, "loss": 0.3743, "step": 48220 }, { "epoch": 24.0, "learning_rate": 5.718733137837578e-08, "loss": 0.4842, "step": 48230 }, { "epoch": 24.0, "learning_rate": 5.6543783109161974e-08, "loss": 0.2676, "step": 48240 }, { "epoch": 24.0, "learning_rate": 5.5903862690085125e-08, "loss": 0.3801, "step": 48250 }, { "epoch": 24.01, "learning_rate": 5.526757043303243e-08, "loss": 0.7997, "step": 48260 }, { "epoch": 24.01, "learning_rate": 5.463490664812748e-08, "loss": 0.7286, "step": 48270 }, { "epoch": 24.01, "learning_rate": 5.4005871643721114e-08, "loss": 0.2438, "step": 48280 }, { "epoch": 24.01, "learning_rate": 5.3380465726398096e-08, "loss": 0.6707, "step": 48290 }, { "epoch": 24.01, "learning_rate": 5.275868920097293e-08, "loss": 0.5388, "step": 48300 }, { "epoch": 24.01, "learning_rate": 5.214054237049321e-08, "loss": 0.3983, "step": 48310 }, { "epoch": 24.01, "learning_rate": 5.152602553623459e-08, "loss": 0.3292, "step": 48320 }, { "epoch": 24.01, "learning_rate": 5.091513899770667e-08, "loss": 0.5818, "step": 48330 }, { "epoch": 24.01, "learning_rate": 5.0307883052647944e-08, "loss": 0.4123, "step": 48340 }, { "epoch": 24.01, "learning_rate": 4.970425799702666e-08, "loss": 0.389, "step": 48350 }, { "epoch": 24.01, "learning_rate": 4.910426412504332e-08, "loss": 0.258, "step": 48360 }, { "epoch": 24.01, "learning_rate": 4.85079017291265e-08, "loss": 0.6119, "step": 48370 }, { "epoch": 24.01, "learning_rate": 4.791517109993704e-08, "loss": 0.5102, "step": 48380 }, { "epoch": 24.01, "learning_rate": 4.732607252636384e-08, "loss": 0.5128, "step": 48390 }, { "epoch": 24.01, "learning_rate": 4.6740606295527236e-08, "loss": 0.1333, "step": 48400 }, { "epoch": 24.01, "learning_rate": 4.615877269277563e-08, "loss": 0.3087, "step": 48410 }, { "epoch": 24.01, "learning_rate": 4.558057200168802e-08, "loss": 0.6449, "step": 48420 }, { "epoch": 24.01, "learning_rate": 4.500600450407233e-08, "loss": 0.3746, "step": 48430 }, { "epoch": 24.01, "learning_rate": 4.4435070479965366e-08, "loss": 0.5859, "step": 48440 }, { "epoch": 24.01, "learning_rate": 4.386777020763455e-08, "loss": 0.3587, "step": 48450 }, { "epoch": 24.01, "learning_rate": 4.330410396357371e-08, "loss": 0.2082, "step": 48460 }, { "epoch": 24.01, "learning_rate": 4.274407202250807e-08, "loss": 0.3468, "step": 48470 }, { "epoch": 24.01, "learning_rate": 4.2187674657390154e-08, "loss": 0.8718, "step": 48480 }, { "epoch": 24.01, "learning_rate": 4.1634912139400514e-08, "loss": 0.4234, "step": 48490 }, { "epoch": 24.01, "learning_rate": 4.108578473795033e-08, "loss": 0.6711, "step": 48500 }, { "epoch": 24.01, "learning_rate": 4.0540292720675495e-08, "loss": 0.4326, "step": 48510 }, { "epoch": 24.01, "learning_rate": 3.999843635344419e-08, "loss": 0.6955, "step": 48520 }, { "epoch": 24.01, "learning_rate": 3.946021590035015e-08, "loss": 0.3644, "step": 48530 }, { "epoch": 24.01, "learning_rate": 3.892563162371521e-08, "loss": 0.1796, "step": 48540 }, { "epoch": 24.01, "learning_rate": 3.839468378408845e-08, "loss": 0.4076, "step": 48550 }, { "epoch": 24.01, "learning_rate": 3.7867372640248697e-08, "loss": 0.6121, "step": 48560 }, { "epoch": 24.01, "learning_rate": 3.734369844920038e-08, "loss": 0.446, "step": 48570 }, { "epoch": 24.01, "learning_rate": 3.6823661466176825e-08, "loss": 0.4861, "step": 48580 }, { "epoch": 24.01, "learning_rate": 3.6307261944636126e-08, "loss": 0.4031, "step": 48590 }, { "epoch": 24.01, "learning_rate": 3.579450013626612e-08, "loss": 0.3043, "step": 48600 }, { "epoch": 24.01, "learning_rate": 3.528537629098022e-08, "loss": 0.3778, "step": 48610 }, { "epoch": 24.01, "learning_rate": 3.477989065692078e-08, "loss": 0.4012, "step": 48620 }, { "epoch": 24.01, "learning_rate": 3.4278043480453216e-08, "loss": 0.3413, "step": 48630 }, { "epoch": 24.01, "learning_rate": 3.377983500617271e-08, "loss": 0.6273, "step": 48640 }, { "epoch": 24.01, "learning_rate": 3.328526547690003e-08, "loss": 0.7842, "step": 48650 }, { "epoch": 24.01, "learning_rate": 3.279433513368235e-08, "loss": 0.462, "step": 48660 }, { "epoch": 24.01, "learning_rate": 3.230704421579328e-08, "loss": 0.6631, "step": 48670 }, { "epoch": 24.01, "learning_rate": 3.1823392960732e-08, "loss": 0.3991, "step": 48680 }, { "epoch": 24.01, "learning_rate": 3.134338160422412e-08, "loss": 0.7023, "step": 48690 }, { "epoch": 24.01, "learning_rate": 3.086701038022249e-08, "loss": 0.2972, "step": 48700 }, { "epoch": 24.01, "learning_rate": 3.039427952090307e-08, "loss": 0.4932, "step": 48710 }, { "epoch": 24.01, "learning_rate": 2.992518925667154e-08, "loss": 0.3013, "step": 48720 }, { "epoch": 24.01, "learning_rate": 2.9459739816154186e-08, "loss": 0.3875, "step": 48730 }, { "epoch": 24.01, "learning_rate": 2.8997931426206214e-08, "loss": 0.4445, "step": 48740 }, { "epoch": 24.02, "learning_rate": 2.8539764311908412e-08, "loss": 0.5529, "step": 48750 }, { "epoch": 24.02, "learning_rate": 2.8085238696565485e-08, "loss": 0.6926, "step": 48760 }, { "epoch": 24.02, "learning_rate": 2.7634354801706896e-08, "loss": 0.3991, "step": 48770 }, { "epoch": 24.02, "learning_rate": 2.7187112847087693e-08, "loss": 0.9109, "step": 48780 }, { "epoch": 24.02, "learning_rate": 2.6743513050690172e-08, "loss": 0.7538, "step": 48790 }, { "epoch": 24.02, "learning_rate": 2.6303555628717225e-08, "loss": 0.4482, "step": 48800 }, { "epoch": 24.02, "learning_rate": 2.5867240795600656e-08, "loss": 0.3986, "step": 48810 }, { "epoch": 24.02, "learning_rate": 2.5434568763993692e-08, "loss": 0.6185, "step": 48820 }, { "epoch": 24.02, "learning_rate": 2.5005539744775984e-08, "loss": 0.6925, "step": 48830 }, { "epoch": 24.02, "learning_rate": 2.45801539470511e-08, "loss": 0.4379, "step": 48840 }, { "epoch": 24.02, "learning_rate": 2.4158411578146523e-08, "loss": 0.4391, "step": 48850 }, { "epoch": 24.02, "learning_rate": 2.3740312843614497e-08, "loss": 0.89, "step": 48860 }, { "epoch": 24.02, "learning_rate": 2.332585794723119e-08, "loss": 0.3428, "step": 48870 }, { "epoch": 24.02, "learning_rate": 2.291504709099751e-08, "loss": 0.7952, "step": 48880 }, { "epoch": 24.02, "learning_rate": 2.2507880475136634e-08, "loss": 0.6414, "step": 48890 }, { "epoch": 24.02, "learning_rate": 2.2104358298098147e-08, "loss": 0.3109, "step": 48900 }, { "epoch": 24.02, "learning_rate": 2.1704480756552237e-08, "loss": 0.159, "step": 48910 }, { "epoch": 24.02, "learning_rate": 2.1308248045395494e-08, "loss": 0.4725, "step": 48920 }, { "epoch": 24.02, "learning_rate": 2.0915660357746778e-08, "loss": 0.7352, "step": 48930 }, { "epoch": 24.02, "learning_rate": 2.05267178849472e-08, "loss": 0.6739, "step": 48940 }, { "epoch": 24.02, "learning_rate": 2.0141420816564282e-08, "loss": 0.8071, "step": 48950 }, { "epoch": 24.02, "learning_rate": 1.9759769340386148e-08, "loss": 0.5585, "step": 48960 }, { "epoch": 24.02, "learning_rate": 1.9381763642425665e-08, "loss": 0.6838, "step": 48970 }, { "epoch": 24.02, "learning_rate": 1.9007403906918797e-08, "loss": 0.6336, "step": 48980 }, { "epoch": 24.02, "learning_rate": 1.8636690316322092e-08, "loss": 0.5289, "step": 48990 }, { "epoch": 24.02, "learning_rate": 1.8269623051318517e-08, "loss": 0.7095, "step": 49000 }, { "epoch": 24.02, "learning_rate": 1.7906202290810803e-08, "loss": 0.4653, "step": 49010 }, { "epoch": 24.02, "learning_rate": 1.7546428211927257e-08, "loss": 0.2675, "step": 49020 }, { "epoch": 24.02, "learning_rate": 1.7190300990016784e-08, "loss": 0.4124, "step": 49030 }, { "epoch": 24.02, "learning_rate": 1.6837820798650538e-08, "loss": 0.6305, "step": 49040 }, { "epoch": 24.02, "learning_rate": 1.6488987809625268e-08, "loss": 0.6233, "step": 49050 }, { "epoch": 24.02, "learning_rate": 1.6143802192955805e-08, "loss": 0.694, "step": 49060 }, { "epoch": 24.02, "learning_rate": 1.580226411688257e-08, "loss": 0.252, "step": 49070 }, { "epoch": 24.02, "learning_rate": 1.5464373747866577e-08, "loss": 0.4817, "step": 49080 }, { "epoch": 24.02, "learning_rate": 1.5130131250591093e-08, "loss": 0.1697, "step": 49090 }, { "epoch": 24.02, "learning_rate": 1.4799536787963308e-08, "loss": 0.2944, "step": 49100 }, { "epoch": 24.02, "learning_rate": 1.4472590521110162e-08, "loss": 0.7365, "step": 49110 }, { "epoch": 24.02, "learning_rate": 1.4149292609380027e-08, "loss": 0.5104, "step": 49120 }, { "epoch": 24.02, "learning_rate": 1.3829643210346854e-08, "loss": 0.7771, "step": 49130 }, { "epoch": 24.02, "learning_rate": 1.3513642479801857e-08, "loss": 0.5946, "step": 49140 }, { "epoch": 24.02, "learning_rate": 1.3201290571760172e-08, "loss": 0.5192, "step": 49150 }, { "epoch": 24.02, "learning_rate": 1.2892587638460018e-08, "loss": 0.4447, "step": 49160 }, { "epoch": 24.02, "learning_rate": 1.258753383035771e-08, "loss": 0.5295, "step": 49170 }, { "epoch": 24.02, "learning_rate": 1.2286129296132653e-08, "loss": 0.3215, "step": 49180 }, { "epoch": 24.02, "learning_rate": 1.1988374182687334e-08, "loss": 0.8657, "step": 49190 }, { "epoch": 24.02, "learning_rate": 1.1694268635142335e-08, "loss": 0.4053, "step": 49200 }, { "epoch": 24.02, "learning_rate": 1.1403812796842161e-08, "loss": 0.5778, "step": 49210 }, { "epoch": 24.02, "learning_rate": 1.1117006809351072e-08, "loss": 0.4498, "step": 49220 }, { "epoch": 24.02, "learning_rate": 1.0833850812455581e-08, "loss": 0.4014, "step": 49230 }, { "epoch": 24.02, "learning_rate": 1.0554344944161132e-08, "loss": 0.3483, "step": 49240 }, { "epoch": 24.02, "learning_rate": 1.027848934069625e-08, "loss": 0.2246, "step": 49250 }, { "epoch": 24.03, "learning_rate": 1.0006284136509224e-08, "loss": 0.4773, "step": 49260 }, { "epoch": 24.03, "learning_rate": 9.737729464269762e-09, "loss": 0.4115, "step": 49270 }, { "epoch": 24.03, "learning_rate": 9.472825454868995e-09, "loss": 0.5762, "step": 49280 }, { "epoch": 24.03, "learning_rate": 9.211572237416145e-09, "loss": 0.7074, "step": 49290 }, { "epoch": 24.03, "learning_rate": 8.953969939245188e-09, "loss": 0.3273, "step": 49300 }, { "epoch": 24.03, "learning_rate": 8.700018685905697e-09, "loss": 0.2681, "step": 49310 }, { "epoch": 24.03, "learning_rate": 8.449718601171997e-09, "loss": 0.2451, "step": 49320 }, { "epoch": 24.03, "learning_rate": 8.203069807038165e-09, "loss": 0.2841, "step": 49330 }, { "epoch": 24.03, "learning_rate": 7.960072423715547e-09, "loss": 0.3003, "step": 49340 }, { "epoch": 24.03, "learning_rate": 7.720726569640235e-09, "loss": 0.3248, "step": 49350 }, { "epoch": 24.03, "learning_rate": 7.48503236146475e-09, "loss": 0.2326, "step": 49360 }, { "epoch": 24.03, "learning_rate": 7.252989914064701e-09, "loss": 0.2678, "step": 49370 }, { "epoch": 24.03, "learning_rate": 7.024599340534621e-09, "loss": 0.3889, "step": 49380 }, { "epoch": 24.03, "learning_rate": 6.7998607521888026e-09, "loss": 0.6104, "step": 49390 }, { "epoch": 24.03, "learning_rate": 6.5787742585621256e-09, "loss": 0.3541, "step": 49400 }, { "epoch": 24.03, "learning_rate": 6.361339967410895e-09, "loss": 0.4186, "step": 49410 }, { "epoch": 24.03, "learning_rate": 6.147557984707841e-09, "loss": 0.5533, "step": 49420 }, { "epoch": 24.03, "learning_rate": 5.937428414648782e-09, "loss": 0.4036, "step": 49430 }, { "epoch": 24.03, "learning_rate": 5.730951359648462e-09, "loss": 0.8842, "step": 49440 }, { "epoch": 24.03, "learning_rate": 5.528126920341381e-09, "loss": 0.4078, "step": 49450 }, { "epoch": 24.03, "learning_rate": 5.3289551955809645e-09, "loss": 0.3735, "step": 49460 }, { "epoch": 24.03, "learning_rate": 5.133436282441228e-09, "loss": 0.5025, "step": 49470 }, { "epoch": 24.03, "learning_rate": 4.941570276215945e-09, "loss": 0.2423, "step": 49480 }, { "epoch": 24.03, "learning_rate": 4.753357270418646e-09, "loss": 0.5151, "step": 49490 }, { "epoch": 24.03, "learning_rate": 4.568797356781784e-09, "loss": 0.4716, "step": 49500 }, { "epoch": 24.03, "learning_rate": 4.387890625257574e-09, "loss": 0.689, "step": 49510 }, { "epoch": 24.03, "learning_rate": 4.210637164017983e-09, "loss": 0.4343, "step": 49520 }, { "epoch": 24.03, "learning_rate": 4.037037059453908e-09, "loss": 0.1581, "step": 49530 }, { "epoch": 24.03, "learning_rate": 3.8670903961751655e-09, "loss": 0.5421, "step": 49540 }, { "epoch": 24.03, "learning_rate": 3.700797257013e-09, "loss": 0.4423, "step": 49550 }, { "epoch": 24.03, "learning_rate": 3.5381577230167437e-09, "loss": 0.4451, "step": 49560 }, { "epoch": 24.03, "learning_rate": 3.3791718734538235e-09, "loss": 0.4525, "step": 49570 }, { "epoch": 24.03, "learning_rate": 3.2238397858122546e-09, "loss": 0.3258, "step": 49580 }, { "epoch": 24.03, "learning_rate": 3.072161535799811e-09, "loss": 0.4485, "step": 49590 }, { "epoch": 24.03, "learning_rate": 2.924137197342358e-09, "loss": 0.6284, "step": 49600 }, { "epoch": 24.03, "learning_rate": 2.7797668425846857e-09, "loss": 0.3722, "step": 49610 }, { "epoch": 24.03, "learning_rate": 2.6390505418913413e-09, "loss": 0.6805, "step": 49620 }, { "epoch": 24.03, "learning_rate": 2.5019883638457973e-09, "loss": 0.9199, "step": 49630 }, { "epoch": 24.03, "learning_rate": 2.368580375250451e-09, "loss": 0.2866, "step": 49640 }, { "epoch": 24.03, "learning_rate": 2.2388266411266234e-09, "loss": 0.4129, "step": 49650 }, { "epoch": 24.03, "learning_rate": 2.1127272247145614e-09, "loss": 0.718, "step": 49660 }, { "epoch": 24.03, "learning_rate": 1.9902821874742684e-09, "loss": 0.339, "step": 49670 }, { "epoch": 24.03, "learning_rate": 1.8714915890838404e-09, "loss": 0.6156, "step": 49680 }, { "epoch": 24.03, "learning_rate": 1.7563554874402975e-09, "loss": 0.3388, "step": 49690 }, { "epoch": 24.03, "learning_rate": 1.644873938658753e-09, "loss": 0.5114, "step": 49700 }, { "epoch": 24.03, "learning_rate": 1.537046997074909e-09, "loss": 0.8165, "step": 49710 }, { "epoch": 24.03, "learning_rate": 1.4328747152417277e-09, "loss": 0.5025, "step": 49720 }, { "epoch": 24.03, "learning_rate": 1.332357143932761e-09, "loss": 0.2093, "step": 49730 }, { "epoch": 24.03, "learning_rate": 1.2354943321371548e-09, "loss": 0.6465, "step": 49740 }, { "epoch": 24.04, "learning_rate": 1.1422863270654781e-09, "loss": 0.7074, "step": 49750 }, { "epoch": 24.04, "learning_rate": 1.0527331741472247e-09, "loss": 0.3695, "step": 49760 }, { "epoch": 24.04, "learning_rate": 9.668349170274814e-10, "loss": 0.6652, "step": 49770 }, { "epoch": 24.04, "learning_rate": 8.845915975735919e-10, "loss": 0.4753, "step": 49780 }, { "epoch": 24.04, "learning_rate": 8.060032558693253e-10, "loss": 0.3091, "step": 49790 }, { "epoch": 24.04, "learning_rate": 7.310699302182089e-10, "loss": 0.4409, "step": 49800 }, { "epoch": 24.04, "learning_rate": 6.597916571418617e-10, "loss": 0.2497, "step": 49810 }, { "epoch": 24.04, "learning_rate": 5.92168471379162e-10, "loss": 0.5315, "step": 49820 }, { "epoch": 24.04, "learning_rate": 5.282004058895784e-10, "loss": 0.5753, "step": 49830 }, { "epoch": 24.04, "learning_rate": 4.678874918515041e-10, "loss": 0.5933, "step": 49840 }, { "epoch": 24.04, "learning_rate": 4.112297586589264e-10, "loss": 0.6205, "step": 49850 }, { "epoch": 24.04, "learning_rate": 3.582272339272552e-10, "loss": 0.3334, "step": 49860 }, { "epoch": 24.04, "learning_rate": 3.088799434891598e-10, "loss": 0.5659, "step": 49870 }, { "epoch": 24.04, "learning_rate": 2.631879113954017e-10, "loss": 0.6555, "step": 49880 }, { "epoch": 24.04, "learning_rate": 2.2115115991566682e-10, "loss": 0.7503, "step": 49890 }, { "epoch": 24.04, "learning_rate": 1.8276970953939875e-10, "loss": 0.4174, "step": 49900 }, { "epoch": 24.04, "learning_rate": 1.48043578971635e-10, "loss": 0.234, "step": 49910 }, { "epoch": 24.04, "learning_rate": 1.1697278513800313e-10, "loss": 0.521, "step": 49920 }, { "epoch": 24.04, "learning_rate": 8.955734318305542e-11, "loss": 0.5629, "step": 49930 }, { "epoch": 24.04, "learning_rate": 6.579726646777085e-11, "loss": 0.6017, "step": 49940 }, { "epoch": 24.04, "learning_rate": 4.5692566572053116e-11, "loss": 0.6729, "step": 49950 }, { "epoch": 24.04, "learning_rate": 2.924325329556332e-11, "loss": 0.2276, "step": 49960 }, { "epoch": 24.04, "learning_rate": 1.6449334655221914e-11, "loss": 0.4924, "step": 49970 }, { "epoch": 24.04, "learning_rate": 7.310816886874072e-12, "loss": 0.5423, "step": 49980 }, { "epoch": 24.04, "learning_rate": 1.8277044444570124e-12, "loss": 0.3546, "step": 49990 }, { "epoch": 24.04, "learning_rate": 0.0, "loss": 0.4764, "step": 50000 }, { "epoch": 24.04, "eval_accuracy": 0.8789473684210526, "eval_f1": 0.8789473684210526, "eval_loss": 0.8117492198944092, "eval_runtime": 766.7022, "eval_samples_per_second": 6.195, "eval_steps_per_second": 1.549, "step": 50000 }, { "epoch": 24.04, "step": 50000, "total_flos": 2.49219585441792e+20, "train_loss": 0.7113624122858048, "train_runtime": 65187.9107, "train_samples_per_second": 3.068, "train_steps_per_second": 0.767 }, { "epoch": 24.04, "eval_accuracy": 0.8757894736842106, "eval_f1": 0.8757894736842106, "eval_loss": 0.7742094993591309, "eval_runtime": 766.7571, "eval_samples_per_second": 6.195, "eval_steps_per_second": 1.549, "step": 50000 }, { "epoch": 24.04, "eval_accuracy": 0.870116156282999, "eval_f1": 0.870116156282999, "eval_loss": 0.7973663210868835, "eval_runtime": 771.4511, "eval_samples_per_second": 6.138, "eval_steps_per_second": 1.535, "step": 50000 } ], "logging_steps": 10, "max_steps": 50000, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.49219585441792e+20, "trial_name": null, "trial_params": null }