diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6502 @@ +{ + "best_metric": 0.9375, + "best_model_checkpoint": "videomae-base-finetuned-kinetics-finetuned-nba-binary-data-2-batch-50-epochs-new-database/checkpoint-6800", + "epoch": 49.02, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6874, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6804, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 0.6966, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6969, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 0.6994, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.7038, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.7008, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7033, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.5e-06, + "loss": 0.6754, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.6911, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6836, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 0.6869, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.6629, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 7.000000000000001e-06, + "loss": 0.7001, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 7.5e-06, + "loss": 0.6755, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6481, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 8.500000000000002e-06, + "loss": 0.6, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 0.6837, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.5831, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.6618, + "step": 200 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.6875, + "eval_loss": 0.6292815208435059, + "eval_runtime": 41.8961, + "eval_samples_per_second": 1.146, + "eval_steps_per_second": 0.573, + "step": 200 + }, + { + "epoch": 1.0, + "learning_rate": 1.05e-05, + "loss": 0.5938, + "step": 210 + }, + { + "epoch": 1.0, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.4928, + "step": 220 + }, + { + "epoch": 1.0, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.5788, + "step": 230 + }, + { + "epoch": 1.0, + "learning_rate": 1.2e-05, + "loss": 0.5877, + "step": 240 + }, + { + "epoch": 1.0, + "learning_rate": 1.25e-05, + "loss": 0.4394, + "step": 250 + }, + { + "epoch": 1.01, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.9105, + "step": 260 + }, + { + "epoch": 1.01, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.355, + "step": 270 + }, + { + "epoch": 1.01, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.4857, + "step": 280 + }, + { + "epoch": 1.01, + "learning_rate": 1.45e-05, + "loss": 1.1395, + "step": 290 + }, + { + "epoch": 1.01, + "learning_rate": 1.5e-05, + "loss": 0.635, + "step": 300 + }, + { + "epoch": 1.01, + "learning_rate": 1.55e-05, + "loss": 0.6305, + "step": 310 + }, + { + "epoch": 1.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.807, + "step": 320 + }, + { + "epoch": 1.01, + "learning_rate": 1.65e-05, + "loss": 0.4485, + "step": 330 + }, + { + "epoch": 1.01, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.4862, + "step": 340 + }, + { + "epoch": 1.01, + "learning_rate": 1.75e-05, + "loss": 0.782, + "step": 350 + }, + { + "epoch": 1.02, + "learning_rate": 1.8e-05, + "loss": 0.7402, + "step": 360 + }, + { + "epoch": 1.02, + "learning_rate": 1.85e-05, + "loss": 0.3372, + "step": 370 + }, + { + "epoch": 1.02, + "learning_rate": 1.9e-05, + "loss": 0.7372, + "step": 380 + }, + { + "epoch": 1.02, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.6038, + "step": 390 + }, + { + "epoch": 1.02, + "learning_rate": 2e-05, + "loss": 0.5781, + "step": 400 + }, + { + "epoch": 1.02, + "eval_accuracy": 0.6041666666666666, + "eval_loss": 1.4659703969955444, + "eval_runtime": 12.3416, + "eval_samples_per_second": 3.889, + "eval_steps_per_second": 1.945, + "step": 400 + }, + { + "epoch": 2.0, + "learning_rate": 2.05e-05, + "loss": 0.4226, + "step": 410 + }, + { + "epoch": 2.0, + "learning_rate": 2.1e-05, + "loss": 0.5528, + "step": 420 + }, + { + "epoch": 2.0, + "learning_rate": 2.15e-05, + "loss": 0.3744, + "step": 430 + }, + { + "epoch": 2.0, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.119, + "step": 440 + }, + { + "epoch": 2.0, + "learning_rate": 2.25e-05, + "loss": 0.4872, + "step": 450 + }, + { + "epoch": 2.01, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.2642, + "step": 460 + }, + { + "epoch": 2.01, + "learning_rate": 2.35e-05, + "loss": 0.9276, + "step": 470 + }, + { + "epoch": 2.01, + "learning_rate": 2.4e-05, + "loss": 0.2525, + "step": 480 + }, + { + "epoch": 2.01, + "learning_rate": 2.45e-05, + "loss": 1.6111, + "step": 490 + }, + { + "epoch": 2.01, + "learning_rate": 2.5e-05, + "loss": 0.0984, + "step": 500 + }, + { + "epoch": 2.01, + "learning_rate": 2.5500000000000003e-05, + "loss": 1.4009, + "step": 510 + }, + { + "epoch": 2.01, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.7135, + "step": 520 + }, + { + "epoch": 2.01, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.657, + "step": 530 + }, + { + "epoch": 2.01, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.3769, + "step": 540 + }, + { + "epoch": 2.02, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.5822, + "step": 550 + }, + { + "epoch": 2.02, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.7176, + "step": 560 + }, + { + "epoch": 2.02, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.2372, + "step": 570 + }, + { + "epoch": 2.02, + "learning_rate": 2.9e-05, + "loss": 0.5913, + "step": 580 + }, + { + "epoch": 2.02, + "learning_rate": 2.95e-05, + "loss": 1.3415, + "step": 590 + }, + { + "epoch": 2.02, + "learning_rate": 3e-05, + "loss": 0.8554, + "step": 600 + }, + { + "epoch": 2.02, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 0.8739717602729797, + "eval_runtime": 11.7537, + "eval_samples_per_second": 4.084, + "eval_steps_per_second": 2.042, + "step": 600 + }, + { + "epoch": 3.0, + "learning_rate": 3.05e-05, + "loss": 0.3129, + "step": 610 + }, + { + "epoch": 3.0, + "learning_rate": 3.1e-05, + "loss": 0.8611, + "step": 620 + }, + { + "epoch": 3.0, + "learning_rate": 3.15e-05, + "loss": 0.7397, + "step": 630 + }, + { + "epoch": 3.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6457, + "step": 640 + }, + { + "epoch": 3.0, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.0948, + "step": 650 + }, + { + "epoch": 3.01, + "learning_rate": 3.3e-05, + "loss": 0.6953, + "step": 660 + }, + { + "epoch": 3.01, + "learning_rate": 3.35e-05, + "loss": 0.8116, + "step": 670 + }, + { + "epoch": 3.01, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.3654, + "step": 680 + }, + { + "epoch": 3.01, + "learning_rate": 3.45e-05, + "loss": 0.7707, + "step": 690 + }, + { + "epoch": 3.01, + "learning_rate": 3.5e-05, + "loss": 0.9505, + "step": 700 + }, + { + "epoch": 3.01, + "learning_rate": 3.55e-05, + "loss": 1.4837, + "step": 710 + }, + { + "epoch": 3.01, + "learning_rate": 3.6e-05, + "loss": 0.7708, + "step": 720 + }, + { + "epoch": 3.01, + "learning_rate": 3.65e-05, + "loss": 0.855, + "step": 730 + }, + { + "epoch": 3.01, + "learning_rate": 3.7e-05, + "loss": 0.2798, + "step": 740 + }, + { + "epoch": 3.02, + "learning_rate": 3.7500000000000003e-05, + "loss": 1.2943, + "step": 750 + }, + { + "epoch": 3.02, + "learning_rate": 3.8e-05, + "loss": 1.3738, + "step": 760 + }, + { + "epoch": 3.02, + "learning_rate": 3.85e-05, + "loss": 0.2935, + "step": 770 + }, + { + "epoch": 3.02, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.6118, + "step": 780 + }, + { + "epoch": 3.02, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.4122, + "step": 790 + }, + { + "epoch": 3.02, + "learning_rate": 4e-05, + "loss": 0.4445, + "step": 800 + }, + { + "epoch": 3.02, + "eval_accuracy": 0.6666666666666666, + "eval_loss": 1.0660134553909302, + "eval_runtime": 12.6381, + "eval_samples_per_second": 3.798, + "eval_steps_per_second": 1.899, + "step": 800 + }, + { + "epoch": 4.0, + "learning_rate": 4.05e-05, + "loss": 1.1546, + "step": 810 + }, + { + "epoch": 4.0, + "learning_rate": 4.1e-05, + "loss": 0.7915, + "step": 820 + }, + { + "epoch": 4.0, + "learning_rate": 4.15e-05, + "loss": 0.936, + "step": 830 + }, + { + "epoch": 4.0, + "learning_rate": 4.2e-05, + "loss": 0.4684, + "step": 840 + }, + { + "epoch": 4.0, + "learning_rate": 4.25e-05, + "loss": 0.9355, + "step": 850 + }, + { + "epoch": 4.01, + "learning_rate": 4.3e-05, + "loss": 0.8043, + "step": 860 + }, + { + "epoch": 4.01, + "learning_rate": 4.35e-05, + "loss": 0.2802, + "step": 870 + }, + { + "epoch": 4.01, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.5654, + "step": 880 + }, + { + "epoch": 4.01, + "learning_rate": 4.4500000000000004e-05, + "loss": 1.4987, + "step": 890 + }, + { + "epoch": 4.01, + "learning_rate": 4.5e-05, + "loss": 0.7365, + "step": 900 + }, + { + "epoch": 4.01, + "learning_rate": 4.55e-05, + "loss": 0.141, + "step": 910 + }, + { + "epoch": 4.01, + "learning_rate": 4.600000000000001e-05, + "loss": 0.8596, + "step": 920 + }, + { + "epoch": 4.01, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.9623, + "step": 930 + }, + { + "epoch": 4.01, + "learning_rate": 4.7e-05, + "loss": 1.1488, + "step": 940 + }, + { + "epoch": 4.01, + "learning_rate": 4.75e-05, + "loss": 0.237, + "step": 950 + }, + { + "epoch": 4.02, + "learning_rate": 4.8e-05, + "loss": 0.9762, + "step": 960 + }, + { + "epoch": 4.02, + "learning_rate": 4.85e-05, + "loss": 0.7712, + "step": 970 + }, + { + "epoch": 4.02, + "learning_rate": 4.9e-05, + "loss": 0.9408, + "step": 980 + }, + { + "epoch": 4.02, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.5351, + "step": 990 + }, + { + "epoch": 4.02, + "learning_rate": 5e-05, + "loss": 0.3265, + "step": 1000 + }, + { + "epoch": 4.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 0.6634658575057983, + "eval_runtime": 11.7099, + "eval_samples_per_second": 4.099, + "eval_steps_per_second": 2.05, + "step": 1000 + }, + { + "epoch": 5.0, + "learning_rate": 4.994444444444445e-05, + "loss": 1.064, + "step": 1010 + }, + { + "epoch": 5.0, + "learning_rate": 4.9888888888888894e-05, + "loss": 1.0349, + "step": 1020 + }, + { + "epoch": 5.0, + "learning_rate": 4.9833333333333336e-05, + "loss": 1.0541, + "step": 1030 + }, + { + "epoch": 5.0, + "learning_rate": 4.977777777777778e-05, + "loss": 0.7495, + "step": 1040 + }, + { + "epoch": 5.0, + "learning_rate": 4.972222222222223e-05, + "loss": 0.5859, + "step": 1050 + }, + { + "epoch": 5.01, + "learning_rate": 4.966666666666667e-05, + "loss": 0.7543, + "step": 1060 + }, + { + "epoch": 5.01, + "learning_rate": 4.961111111111111e-05, + "loss": 0.9994, + "step": 1070 + }, + { + "epoch": 5.01, + "learning_rate": 4.955555555555556e-05, + "loss": 0.4021, + "step": 1080 + }, + { + "epoch": 5.01, + "learning_rate": 4.9500000000000004e-05, + "loss": 0.6197, + "step": 1090 + }, + { + "epoch": 5.01, + "learning_rate": 4.9444444444444446e-05, + "loss": 0.6279, + "step": 1100 + }, + { + "epoch": 5.01, + "learning_rate": 4.938888888888889e-05, + "loss": 0.598, + "step": 1110 + }, + { + "epoch": 5.01, + "learning_rate": 4.933333333333334e-05, + "loss": 0.7877, + "step": 1120 + }, + { + "epoch": 5.01, + "learning_rate": 4.927777777777778e-05, + "loss": 1.4482, + "step": 1130 + }, + { + "epoch": 5.01, + "learning_rate": 4.922222222222222e-05, + "loss": 0.7729, + "step": 1140 + }, + { + "epoch": 5.01, + "learning_rate": 4.9166666666666665e-05, + "loss": 0.6785, + "step": 1150 + }, + { + "epoch": 5.02, + "learning_rate": 4.9111111111111114e-05, + "loss": 0.5642, + "step": 1160 + }, + { + "epoch": 5.02, + "learning_rate": 4.905555555555556e-05, + "loss": 0.2877, + "step": 1170 + }, + { + "epoch": 5.02, + "learning_rate": 4.9e-05, + "loss": 1.1808, + "step": 1180 + }, + { + "epoch": 5.02, + "learning_rate": 4.894444444444445e-05, + "loss": 0.8441, + "step": 1190 + }, + { + "epoch": 5.02, + "learning_rate": 4.888888888888889e-05, + "loss": 0.5417, + "step": 1200 + }, + { + "epoch": 5.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.4705321490764618, + "eval_runtime": 12.2541, + "eval_samples_per_second": 3.917, + "eval_steps_per_second": 1.959, + "step": 1200 + }, + { + "epoch": 6.0, + "learning_rate": 4.883333333333334e-05, + "loss": 0.4682, + "step": 1210 + }, + { + "epoch": 6.0, + "learning_rate": 4.8777777777777775e-05, + "loss": 1.2522, + "step": 1220 + }, + { + "epoch": 6.0, + "learning_rate": 4.8722222222222224e-05, + "loss": 0.8509, + "step": 1230 + }, + { + "epoch": 6.0, + "learning_rate": 4.866666666666667e-05, + "loss": 0.4758, + "step": 1240 + }, + { + "epoch": 6.0, + "learning_rate": 4.8611111111111115e-05, + "loss": 0.9578, + "step": 1250 + }, + { + "epoch": 6.01, + "learning_rate": 4.855555555555556e-05, + "loss": 0.6471, + "step": 1260 + }, + { + "epoch": 6.01, + "learning_rate": 4.85e-05, + "loss": 0.2263, + "step": 1270 + }, + { + "epoch": 6.01, + "learning_rate": 4.844444444444445e-05, + "loss": 0.8359, + "step": 1280 + }, + { + "epoch": 6.01, + "learning_rate": 4.838888888888889e-05, + "loss": 0.3426, + "step": 1290 + }, + { + "epoch": 6.01, + "learning_rate": 4.8333333333333334e-05, + "loss": 0.8537, + "step": 1300 + }, + { + "epoch": 6.01, + "learning_rate": 4.8277777777777776e-05, + "loss": 0.7768, + "step": 1310 + }, + { + "epoch": 6.01, + "learning_rate": 4.8222222222222225e-05, + "loss": 0.8999, + "step": 1320 + }, + { + "epoch": 6.01, + "learning_rate": 4.8166666666666674e-05, + "loss": 0.3208, + "step": 1330 + }, + { + "epoch": 6.01, + "learning_rate": 4.811111111111111e-05, + "loss": 0.6026, + "step": 1340 + }, + { + "epoch": 6.01, + "learning_rate": 4.805555555555556e-05, + "loss": 0.917, + "step": 1350 + }, + { + "epoch": 6.02, + "learning_rate": 4.8e-05, + "loss": 0.662, + "step": 1360 + }, + { + "epoch": 6.02, + "learning_rate": 4.794444444444445e-05, + "loss": 0.4472, + "step": 1370 + }, + { + "epoch": 6.02, + "learning_rate": 4.7888888888888886e-05, + "loss": 0.9458, + "step": 1380 + }, + { + "epoch": 6.02, + "learning_rate": 4.7833333333333335e-05, + "loss": 0.6513, + "step": 1390 + }, + { + "epoch": 6.02, + "learning_rate": 4.7777777777777784e-05, + "loss": 0.5912, + "step": 1400 + }, + { + "epoch": 6.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 1.0081998109817505, + "eval_runtime": 11.7409, + "eval_samples_per_second": 4.088, + "eval_steps_per_second": 2.044, + "step": 1400 + }, + { + "epoch": 7.0, + "learning_rate": 4.7722222222222226e-05, + "loss": 0.6759, + "step": 1410 + }, + { + "epoch": 7.0, + "learning_rate": 4.766666666666667e-05, + "loss": 0.4469, + "step": 1420 + }, + { + "epoch": 7.0, + "learning_rate": 4.761111111111111e-05, + "loss": 0.7664, + "step": 1430 + }, + { + "epoch": 7.0, + "learning_rate": 4.755555555555556e-05, + "loss": 0.7224, + "step": 1440 + }, + { + "epoch": 7.0, + "learning_rate": 4.75e-05, + "loss": 0.8619, + "step": 1450 + }, + { + "epoch": 7.01, + "learning_rate": 4.7444444444444445e-05, + "loss": 0.5872, + "step": 1460 + }, + { + "epoch": 7.01, + "learning_rate": 4.7388888888888894e-05, + "loss": 0.8781, + "step": 1470 + }, + { + "epoch": 7.01, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.4065, + "step": 1480 + }, + { + "epoch": 7.01, + "learning_rate": 4.727777777777778e-05, + "loss": 0.7563, + "step": 1490 + }, + { + "epoch": 7.01, + "learning_rate": 4.722222222222222e-05, + "loss": 0.5345, + "step": 1500 + }, + { + "epoch": 7.01, + "learning_rate": 4.716666666666667e-05, + "loss": 0.5917, + "step": 1510 + }, + { + "epoch": 7.01, + "learning_rate": 4.711111111111111e-05, + "loss": 0.6414, + "step": 1520 + }, + { + "epoch": 7.01, + "learning_rate": 4.7055555555555555e-05, + "loss": 0.8291, + "step": 1530 + }, + { + "epoch": 7.01, + "learning_rate": 4.7e-05, + "loss": 0.7571, + "step": 1540 + }, + { + "epoch": 7.01, + "learning_rate": 4.6944444444444446e-05, + "loss": 0.1543, + "step": 1550 + }, + { + "epoch": 7.02, + "learning_rate": 4.6888888888888895e-05, + "loss": 1.3236, + "step": 1560 + }, + { + "epoch": 7.02, + "learning_rate": 4.683333333333334e-05, + "loss": 0.8399, + "step": 1570 + }, + { + "epoch": 7.02, + "learning_rate": 4.677777777777778e-05, + "loss": 0.9057, + "step": 1580 + }, + { + "epoch": 7.02, + "learning_rate": 4.672222222222222e-05, + "loss": 0.6788, + "step": 1590 + }, + { + "epoch": 7.02, + "learning_rate": 4.666666666666667e-05, + "loss": 0.5918, + "step": 1600 + }, + { + "epoch": 7.02, + "eval_accuracy": 0.5625, + "eval_loss": 2.629246711730957, + "eval_runtime": 12.2894, + "eval_samples_per_second": 3.906, + "eval_steps_per_second": 1.953, + "step": 1600 + }, + { + "epoch": 8.0, + "learning_rate": 4.6611111111111114e-05, + "loss": 2.0891, + "step": 1610 + }, + { + "epoch": 8.0, + "learning_rate": 4.6555555555555556e-05, + "loss": 0.5541, + "step": 1620 + }, + { + "epoch": 8.0, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.2217, + "step": 1630 + }, + { + "epoch": 8.0, + "learning_rate": 4.644444444444445e-05, + "loss": 0.7825, + "step": 1640 + }, + { + "epoch": 8.01, + "learning_rate": 4.638888888888889e-05, + "loss": 0.0591, + "step": 1650 + }, + { + "epoch": 8.01, + "learning_rate": 4.633333333333333e-05, + "loss": 0.3167, + "step": 1660 + }, + { + "epoch": 8.01, + "learning_rate": 4.627777777777778e-05, + "loss": 0.5536, + "step": 1670 + }, + { + "epoch": 8.01, + "learning_rate": 4.6222222222222224e-05, + "loss": 0.7871, + "step": 1680 + }, + { + "epoch": 8.01, + "learning_rate": 4.6166666666666666e-05, + "loss": 0.6523, + "step": 1690 + }, + { + "epoch": 8.01, + "learning_rate": 4.6111111111111115e-05, + "loss": 0.3759, + "step": 1700 + }, + { + "epoch": 8.01, + "learning_rate": 4.605555555555556e-05, + "loss": 0.4117, + "step": 1710 + }, + { + "epoch": 8.01, + "learning_rate": 4.600000000000001e-05, + "loss": 1.1512, + "step": 1720 + }, + { + "epoch": 8.01, + "learning_rate": 4.594444444444444e-05, + "loss": 0.841, + "step": 1730 + }, + { + "epoch": 8.01, + "learning_rate": 4.588888888888889e-05, + "loss": 0.5656, + "step": 1740 + }, + { + "epoch": 8.02, + "learning_rate": 4.5833333333333334e-05, + "loss": 0.4376, + "step": 1750 + }, + { + "epoch": 8.02, + "learning_rate": 4.577777777777778e-05, + "loss": 0.9157, + "step": 1760 + }, + { + "epoch": 8.02, + "learning_rate": 4.572222222222222e-05, + "loss": 0.6012, + "step": 1770 + }, + { + "epoch": 8.02, + "learning_rate": 4.566666666666667e-05, + "loss": 1.5018, + "step": 1780 + }, + { + "epoch": 8.02, + "learning_rate": 4.561111111111112e-05, + "loss": 0.4772, + "step": 1790 + }, + { + "epoch": 8.02, + "learning_rate": 4.555555555555556e-05, + "loss": 0.8992, + "step": 1800 + }, + { + "epoch": 8.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 0.851398229598999, + "eval_runtime": 11.9202, + "eval_samples_per_second": 4.027, + "eval_steps_per_second": 2.013, + "step": 1800 + }, + { + "epoch": 9.0, + "learning_rate": 4.55e-05, + "loss": 1.2507, + "step": 1810 + }, + { + "epoch": 9.0, + "learning_rate": 4.5444444444444444e-05, + "loss": 0.5606, + "step": 1820 + }, + { + "epoch": 9.0, + "learning_rate": 4.538888888888889e-05, + "loss": 0.2066, + "step": 1830 + }, + { + "epoch": 9.0, + "learning_rate": 4.5333333333333335e-05, + "loss": 0.2448, + "step": 1840 + }, + { + "epoch": 9.01, + "learning_rate": 4.527777777777778e-05, + "loss": 0.8724, + "step": 1850 + }, + { + "epoch": 9.01, + "learning_rate": 4.522222222222223e-05, + "loss": 0.8256, + "step": 1860 + }, + { + "epoch": 9.01, + "learning_rate": 4.516666666666667e-05, + "loss": 0.3326, + "step": 1870 + }, + { + "epoch": 9.01, + "learning_rate": 4.511111111111112e-05, + "loss": 0.8212, + "step": 1880 + }, + { + "epoch": 9.01, + "learning_rate": 4.5055555555555554e-05, + "loss": 0.2155, + "step": 1890 + }, + { + "epoch": 9.01, + "learning_rate": 4.5e-05, + "loss": 0.3316, + "step": 1900 + }, + { + "epoch": 9.01, + "learning_rate": 4.4944444444444445e-05, + "loss": 0.6521, + "step": 1910 + }, + { + "epoch": 9.01, + "learning_rate": 4.4888888888888894e-05, + "loss": 0.5286, + "step": 1920 + }, + { + "epoch": 9.01, + "learning_rate": 4.483333333333333e-05, + "loss": 0.9271, + "step": 1930 + }, + { + "epoch": 9.01, + "learning_rate": 4.477777777777778e-05, + "loss": 0.0367, + "step": 1940 + }, + { + "epoch": 9.02, + "learning_rate": 4.472222222222223e-05, + "loss": 2.0037, + "step": 1950 + }, + { + "epoch": 9.02, + "learning_rate": 4.466666666666667e-05, + "loss": 0.6484, + "step": 1960 + }, + { + "epoch": 9.02, + "learning_rate": 4.461111111111111e-05, + "loss": 0.251, + "step": 1970 + }, + { + "epoch": 9.02, + "learning_rate": 4.4555555555555555e-05, + "loss": 0.4765, + "step": 1980 + }, + { + "epoch": 9.02, + "learning_rate": 4.4500000000000004e-05, + "loss": 0.7967, + "step": 1990 + }, + { + "epoch": 9.02, + "learning_rate": 4.4444444444444447e-05, + "loss": 0.172, + "step": 2000 + }, + { + "epoch": 9.02, + "eval_accuracy": 0.875, + "eval_loss": 0.45680248737335205, + "eval_runtime": 12.5869, + "eval_samples_per_second": 3.813, + "eval_steps_per_second": 1.907, + "step": 2000 + }, + { + "epoch": 10.0, + "learning_rate": 4.438888888888889e-05, + "loss": 0.7177, + "step": 2010 + }, + { + "epoch": 10.0, + "learning_rate": 4.433333333333334e-05, + "loss": 0.8508, + "step": 2020 + }, + { + "epoch": 10.0, + "learning_rate": 4.427777777777778e-05, + "loss": 0.8407, + "step": 2030 + }, + { + "epoch": 10.0, + "learning_rate": 4.422222222222222e-05, + "loss": 0.5341, + "step": 2040 + }, + { + "epoch": 10.01, + "learning_rate": 4.4166666666666665e-05, + "loss": 0.3149, + "step": 2050 + }, + { + "epoch": 10.01, + "learning_rate": 4.4111111111111114e-05, + "loss": 0.2314, + "step": 2060 + }, + { + "epoch": 10.01, + "learning_rate": 4.4055555555555557e-05, + "loss": 0.952, + "step": 2070 + }, + { + "epoch": 10.01, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.428, + "step": 2080 + }, + { + "epoch": 10.01, + "learning_rate": 4.394444444444445e-05, + "loss": 0.304, + "step": 2090 + }, + { + "epoch": 10.01, + "learning_rate": 4.388888888888889e-05, + "loss": 0.4067, + "step": 2100 + }, + { + "epoch": 10.01, + "learning_rate": 4.383333333333334e-05, + "loss": 0.2923, + "step": 2110 + }, + { + "epoch": 10.01, + "learning_rate": 4.377777777777778e-05, + "loss": 0.2763, + "step": 2120 + }, + { + "epoch": 10.01, + "learning_rate": 4.3722222222222224e-05, + "loss": 1.4951, + "step": 2130 + }, + { + "epoch": 10.01, + "learning_rate": 4.3666666666666666e-05, + "loss": 0.7374, + "step": 2140 + }, + { + "epoch": 10.02, + "learning_rate": 4.3611111111111116e-05, + "loss": 0.8782, + "step": 2150 + }, + { + "epoch": 10.02, + "learning_rate": 4.355555555555556e-05, + "loss": 0.5374, + "step": 2160 + }, + { + "epoch": 10.02, + "learning_rate": 4.35e-05, + "loss": 0.6164, + "step": 2170 + }, + { + "epoch": 10.02, + "learning_rate": 4.344444444444445e-05, + "loss": 0.5851, + "step": 2180 + }, + { + "epoch": 10.02, + "learning_rate": 4.338888888888889e-05, + "loss": 0.5256, + "step": 2190 + }, + { + "epoch": 10.02, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.493, + "step": 2200 + }, + { + "epoch": 10.02, + "eval_accuracy": 0.7916666666666666, + "eval_loss": 0.735382080078125, + "eval_runtime": 12.068, + "eval_samples_per_second": 3.977, + "eval_steps_per_second": 1.989, + "step": 2200 + }, + { + "epoch": 11.0, + "learning_rate": 4.3277777777777776e-05, + "loss": 0.6347, + "step": 2210 + }, + { + "epoch": 11.0, + "learning_rate": 4.3222222222222226e-05, + "loss": 1.0816, + "step": 2220 + }, + { + "epoch": 11.0, + "learning_rate": 4.316666666666667e-05, + "loss": 0.3297, + "step": 2230 + }, + { + "epoch": 11.0, + "learning_rate": 4.311111111111111e-05, + "loss": 0.438, + "step": 2240 + }, + { + "epoch": 11.01, + "learning_rate": 4.305555555555556e-05, + "loss": 0.978, + "step": 2250 + }, + { + "epoch": 11.01, + "learning_rate": 4.3e-05, + "loss": 0.8443, + "step": 2260 + }, + { + "epoch": 11.01, + "learning_rate": 4.294444444444445e-05, + "loss": 0.1932, + "step": 2270 + }, + { + "epoch": 11.01, + "learning_rate": 4.2888888888888886e-05, + "loss": 0.1223, + "step": 2280 + }, + { + "epoch": 11.01, + "learning_rate": 4.2833333333333335e-05, + "loss": 0.6449, + "step": 2290 + }, + { + "epoch": 11.01, + "learning_rate": 4.277777777777778e-05, + "loss": 0.6536, + "step": 2300 + }, + { + "epoch": 11.01, + "learning_rate": 4.272222222222223e-05, + "loss": 1.0084, + "step": 2310 + }, + { + "epoch": 11.01, + "learning_rate": 4.266666666666667e-05, + "loss": 0.5281, + "step": 2320 + }, + { + "epoch": 11.01, + "learning_rate": 4.261111111111111e-05, + "loss": 0.4767, + "step": 2330 + }, + { + "epoch": 11.01, + "learning_rate": 4.255555555555556e-05, + "loss": 0.3046, + "step": 2340 + }, + { + "epoch": 11.02, + "learning_rate": 4.25e-05, + "loss": 0.5255, + "step": 2350 + }, + { + "epoch": 11.02, + "learning_rate": 4.2444444444444445e-05, + "loss": 0.4564, + "step": 2360 + }, + { + "epoch": 11.02, + "learning_rate": 4.238888888888889e-05, + "loss": 0.7596, + "step": 2370 + }, + { + "epoch": 11.02, + "learning_rate": 4.233333333333334e-05, + "loss": 0.8184, + "step": 2380 + }, + { + "epoch": 11.02, + "learning_rate": 4.227777777777778e-05, + "loss": 0.3929, + "step": 2390 + }, + { + "epoch": 11.02, + "learning_rate": 4.222222222222222e-05, + "loss": 0.3622, + "step": 2400 + }, + { + "epoch": 11.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 1.038616418838501, + "eval_runtime": 12.2973, + "eval_samples_per_second": 3.903, + "eval_steps_per_second": 1.952, + "step": 2400 + }, + { + "epoch": 12.0, + "learning_rate": 4.216666666666667e-05, + "loss": 0.3532, + "step": 2410 + }, + { + "epoch": 12.0, + "learning_rate": 4.211111111111111e-05, + "loss": 1.1891, + "step": 2420 + }, + { + "epoch": 12.0, + "learning_rate": 4.205555555555556e-05, + "loss": 1.2511, + "step": 2430 + }, + { + "epoch": 12.0, + "learning_rate": 4.2e-05, + "loss": 0.5967, + "step": 2440 + }, + { + "epoch": 12.01, + "learning_rate": 4.194444444444445e-05, + "loss": 0.213, + "step": 2450 + }, + { + "epoch": 12.01, + "learning_rate": 4.188888888888889e-05, + "loss": 0.4385, + "step": 2460 + }, + { + "epoch": 12.01, + "learning_rate": 4.183333333333334e-05, + "loss": 0.3407, + "step": 2470 + }, + { + "epoch": 12.01, + "learning_rate": 4.177777777777778e-05, + "loss": 0.6524, + "step": 2480 + }, + { + "epoch": 12.01, + "learning_rate": 4.172222222222222e-05, + "loss": 0.4037, + "step": 2490 + }, + { + "epoch": 12.01, + "learning_rate": 4.166666666666667e-05, + "loss": 0.5343, + "step": 2500 + }, + { + "epoch": 12.01, + "learning_rate": 4.1611111111111114e-05, + "loss": 0.8634, + "step": 2510 + }, + { + "epoch": 12.01, + "learning_rate": 4.155555555555556e-05, + "loss": 0.2486, + "step": 2520 + }, + { + "epoch": 12.01, + "learning_rate": 4.15e-05, + "loss": 0.0436, + "step": 2530 + }, + { + "epoch": 12.01, + "learning_rate": 4.144444444444445e-05, + "loss": 0.6587, + "step": 2540 + }, + { + "epoch": 12.02, + "learning_rate": 4.138888888888889e-05, + "loss": 1.0205, + "step": 2550 + }, + { + "epoch": 12.02, + "learning_rate": 4.133333333333333e-05, + "loss": 0.0764, + "step": 2560 + }, + { + "epoch": 12.02, + "learning_rate": 4.127777777777778e-05, + "loss": 0.5688, + "step": 2570 + }, + { + "epoch": 12.02, + "learning_rate": 4.1222222222222224e-05, + "loss": 0.478, + "step": 2580 + }, + { + "epoch": 12.02, + "learning_rate": 4.116666666666667e-05, + "loss": 0.3865, + "step": 2590 + }, + { + "epoch": 12.02, + "learning_rate": 4.111111111111111e-05, + "loss": 0.4966, + "step": 2600 + }, + { + "epoch": 12.02, + "eval_accuracy": 0.7916666666666666, + "eval_loss": 0.8979193568229675, + "eval_runtime": 12.2264, + "eval_samples_per_second": 3.926, + "eval_steps_per_second": 1.963, + "step": 2600 + }, + { + "epoch": 13.0, + "learning_rate": 4.105555555555556e-05, + "loss": 0.485, + "step": 2610 + }, + { + "epoch": 13.0, + "learning_rate": 4.1e-05, + "loss": 0.2967, + "step": 2620 + }, + { + "epoch": 13.0, + "learning_rate": 4.094444444444445e-05, + "loss": 0.8117, + "step": 2630 + }, + { + "epoch": 13.0, + "learning_rate": 4.088888888888889e-05, + "loss": 0.3335, + "step": 2640 + }, + { + "epoch": 13.01, + "learning_rate": 4.0833333333333334e-05, + "loss": 0.524, + "step": 2650 + }, + { + "epoch": 13.01, + "learning_rate": 4.0777777777777783e-05, + "loss": 0.3098, + "step": 2660 + }, + { + "epoch": 13.01, + "learning_rate": 4.0722222222222226e-05, + "loss": 0.5006, + "step": 2670 + }, + { + "epoch": 13.01, + "learning_rate": 4.066666666666667e-05, + "loss": 0.5384, + "step": 2680 + }, + { + "epoch": 13.01, + "learning_rate": 4.061111111111111e-05, + "loss": 0.2278, + "step": 2690 + }, + { + "epoch": 13.01, + "learning_rate": 4.055555555555556e-05, + "loss": 0.3817, + "step": 2700 + }, + { + "epoch": 13.01, + "learning_rate": 4.05e-05, + "loss": 0.5161, + "step": 2710 + }, + { + "epoch": 13.01, + "learning_rate": 4.0444444444444444e-05, + "loss": 0.9926, + "step": 2720 + }, + { + "epoch": 13.01, + "learning_rate": 4.038888888888889e-05, + "loss": 0.2719, + "step": 2730 + }, + { + "epoch": 13.01, + "learning_rate": 4.0333333333333336e-05, + "loss": 1.0233, + "step": 2740 + }, + { + "epoch": 13.02, + "learning_rate": 4.027777777777778e-05, + "loss": 0.7767, + "step": 2750 + }, + { + "epoch": 13.02, + "learning_rate": 4.022222222222222e-05, + "loss": 0.5676, + "step": 2760 + }, + { + "epoch": 13.02, + "learning_rate": 4.016666666666667e-05, + "loss": 0.5567, + "step": 2770 + }, + { + "epoch": 13.02, + "learning_rate": 4.011111111111111e-05, + "loss": 0.5635, + "step": 2780 + }, + { + "epoch": 13.02, + "learning_rate": 4.0055555555555554e-05, + "loss": 0.6506, + "step": 2790 + }, + { + "epoch": 13.02, + "learning_rate": 4e-05, + "loss": 0.3541, + "step": 2800 + }, + { + "epoch": 13.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 0.8220213055610657, + "eval_runtime": 12.3358, + "eval_samples_per_second": 3.891, + "eval_steps_per_second": 1.946, + "step": 2800 + }, + { + "epoch": 14.0, + "learning_rate": 3.9944444444444446e-05, + "loss": 1.2772, + "step": 2810 + }, + { + "epoch": 14.0, + "learning_rate": 3.9888888888888895e-05, + "loss": 0.1496, + "step": 2820 + }, + { + "epoch": 14.0, + "learning_rate": 3.983333333333333e-05, + "loss": 0.4015, + "step": 2830 + }, + { + "epoch": 14.0, + "learning_rate": 3.977777777777778e-05, + "loss": 0.2116, + "step": 2840 + }, + { + "epoch": 14.01, + "learning_rate": 3.972222222222222e-05, + "loss": 0.3245, + "step": 2850 + }, + { + "epoch": 14.01, + "learning_rate": 3.966666666666667e-05, + "loss": 0.2073, + "step": 2860 + }, + { + "epoch": 14.01, + "learning_rate": 3.961111111111111e-05, + "loss": 0.311, + "step": 2870 + }, + { + "epoch": 14.01, + "learning_rate": 3.9555555555555556e-05, + "loss": 0.7659, + "step": 2880 + }, + { + "epoch": 14.01, + "learning_rate": 3.9500000000000005e-05, + "loss": 0.3232, + "step": 2890 + }, + { + "epoch": 14.01, + "learning_rate": 3.944444444444445e-05, + "loss": 0.3152, + "step": 2900 + }, + { + "epoch": 14.01, + "learning_rate": 3.938888888888889e-05, + "loss": 0.8308, + "step": 2910 + }, + { + "epoch": 14.01, + "learning_rate": 3.933333333333333e-05, + "loss": 0.0689, + "step": 2920 + }, + { + "epoch": 14.01, + "learning_rate": 3.927777777777778e-05, + "loss": 0.361, + "step": 2930 + }, + { + "epoch": 14.01, + "learning_rate": 3.922222222222223e-05, + "loss": 0.686, + "step": 2940 + }, + { + "epoch": 14.02, + "learning_rate": 3.9166666666666665e-05, + "loss": 0.4834, + "step": 2950 + }, + { + "epoch": 14.02, + "learning_rate": 3.9111111111111115e-05, + "loss": 0.382, + "step": 2960 + }, + { + "epoch": 14.02, + "learning_rate": 3.905555555555556e-05, + "loss": 0.7582, + "step": 2970 + }, + { + "epoch": 14.02, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.3903, + "step": 2980 + }, + { + "epoch": 14.02, + "learning_rate": 3.894444444444444e-05, + "loss": 0.2723, + "step": 2990 + }, + { + "epoch": 14.02, + "learning_rate": 3.888888888888889e-05, + "loss": 0.5386, + "step": 3000 + }, + { + "epoch": 14.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 1.0255749225616455, + "eval_runtime": 11.7863, + "eval_samples_per_second": 4.073, + "eval_steps_per_second": 2.036, + "step": 3000 + }, + { + "epoch": 15.0, + "learning_rate": 3.883333333333333e-05, + "loss": 0.1094, + "step": 3010 + }, + { + "epoch": 15.0, + "learning_rate": 3.877777777777778e-05, + "loss": 0.7977, + "step": 3020 + }, + { + "epoch": 15.0, + "learning_rate": 3.8722222222222225e-05, + "loss": 0.5661, + "step": 3030 + }, + { + "epoch": 15.0, + "learning_rate": 3.866666666666667e-05, + "loss": 0.4519, + "step": 3040 + }, + { + "epoch": 15.01, + "learning_rate": 3.8611111111111116e-05, + "loss": 0.0226, + "step": 3050 + }, + { + "epoch": 15.01, + "learning_rate": 3.855555555555556e-05, + "loss": 0.8201, + "step": 3060 + }, + { + "epoch": 15.01, + "learning_rate": 3.85e-05, + "loss": 1.1224, + "step": 3070 + }, + { + "epoch": 15.01, + "learning_rate": 3.844444444444444e-05, + "loss": 0.4948, + "step": 3080 + }, + { + "epoch": 15.01, + "learning_rate": 3.838888888888889e-05, + "loss": 0.7494, + "step": 3090 + }, + { + "epoch": 15.01, + "learning_rate": 3.8333333333333334e-05, + "loss": 0.2823, + "step": 3100 + }, + { + "epoch": 15.01, + "learning_rate": 3.827777777777778e-05, + "loss": 0.003, + "step": 3110 + }, + { + "epoch": 15.01, + "learning_rate": 3.8222222222222226e-05, + "loss": 0.2106, + "step": 3120 + }, + { + "epoch": 15.01, + "learning_rate": 3.816666666666667e-05, + "loss": 0.7284, + "step": 3130 + }, + { + "epoch": 15.01, + "learning_rate": 3.811111111111112e-05, + "loss": 0.475, + "step": 3140 + }, + { + "epoch": 15.02, + "learning_rate": 3.805555555555555e-05, + "loss": 0.6231, + "step": 3150 + }, + { + "epoch": 15.02, + "learning_rate": 3.8e-05, + "loss": 0.2487, + "step": 3160 + }, + { + "epoch": 15.02, + "learning_rate": 3.7944444444444444e-05, + "loss": 0.8212, + "step": 3170 + }, + { + "epoch": 15.02, + "learning_rate": 3.7888888888888894e-05, + "loss": 0.6314, + "step": 3180 + }, + { + "epoch": 15.02, + "learning_rate": 3.7833333333333336e-05, + "loss": 0.8588, + "step": 3190 + }, + { + "epoch": 15.02, + "learning_rate": 3.777777777777778e-05, + "loss": 0.4615, + "step": 3200 + }, + { + "epoch": 15.02, + "eval_accuracy": 0.7916666666666666, + "eval_loss": 1.0447040796279907, + "eval_runtime": 11.8354, + "eval_samples_per_second": 4.056, + "eval_steps_per_second": 2.028, + "step": 3200 + }, + { + "epoch": 16.0, + "learning_rate": 3.772222222222223e-05, + "loss": 0.2494, + "step": 3210 + }, + { + "epoch": 16.0, + "learning_rate": 3.766666666666667e-05, + "loss": 0.4866, + "step": 3220 + }, + { + "epoch": 16.0, + "learning_rate": 3.761111111111111e-05, + "loss": 0.6039, + "step": 3230 + }, + { + "epoch": 16.0, + "learning_rate": 3.7555555555555554e-05, + "loss": 0.8248, + "step": 3240 + }, + { + "epoch": 16.0, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.5987, + "step": 3250 + }, + { + "epoch": 16.01, + "learning_rate": 3.7444444444444446e-05, + "loss": 0.3115, + "step": 3260 + }, + { + "epoch": 16.01, + "learning_rate": 3.738888888888889e-05, + "loss": 0.7818, + "step": 3270 + }, + { + "epoch": 16.01, + "learning_rate": 3.733333333333334e-05, + "loss": 0.3428, + "step": 3280 + }, + { + "epoch": 16.01, + "learning_rate": 3.727777777777778e-05, + "loss": 0.291, + "step": 3290 + }, + { + "epoch": 16.01, + "learning_rate": 3.722222222222222e-05, + "loss": 0.3707, + "step": 3300 + }, + { + "epoch": 16.01, + "learning_rate": 3.7166666666666664e-05, + "loss": 0.2312, + "step": 3310 + }, + { + "epoch": 16.01, + "learning_rate": 3.7111111111111113e-05, + "loss": 1.1075, + "step": 3320 + }, + { + "epoch": 16.01, + "learning_rate": 3.705555555555556e-05, + "loss": 0.6069, + "step": 3330 + }, + { + "epoch": 16.01, + "learning_rate": 3.7e-05, + "loss": 0.8865, + "step": 3340 + }, + { + "epoch": 16.02, + "learning_rate": 3.694444444444445e-05, + "loss": 0.9964, + "step": 3350 + }, + { + "epoch": 16.02, + "learning_rate": 3.688888888888889e-05, + "loss": 0.1496, + "step": 3360 + }, + { + "epoch": 16.02, + "learning_rate": 3.683333333333334e-05, + "loss": 0.458, + "step": 3370 + }, + { + "epoch": 16.02, + "learning_rate": 3.677777777777778e-05, + "loss": 0.103, + "step": 3380 + }, + { + "epoch": 16.02, + "learning_rate": 3.672222222222222e-05, + "loss": 0.1444, + "step": 3390 + }, + { + "epoch": 16.02, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.1624, + "step": 3400 + }, + { + "epoch": 16.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.6448492407798767, + "eval_runtime": 11.8539, + "eval_samples_per_second": 4.049, + "eval_steps_per_second": 2.025, + "step": 3400 + }, + { + "epoch": 17.0, + "learning_rate": 3.6611111111111115e-05, + "loss": 0.3557, + "step": 3410 + }, + { + "epoch": 17.0, + "learning_rate": 3.655555555555556e-05, + "loss": 0.2707, + "step": 3420 + }, + { + "epoch": 17.0, + "learning_rate": 3.65e-05, + "loss": 0.6198, + "step": 3430 + }, + { + "epoch": 17.0, + "learning_rate": 3.644444444444445e-05, + "loss": 0.4712, + "step": 3440 + }, + { + "epoch": 17.0, + "learning_rate": 3.638888888888889e-05, + "loss": 0.4232, + "step": 3450 + }, + { + "epoch": 17.01, + "learning_rate": 3.633333333333333e-05, + "loss": 0.0412, + "step": 3460 + }, + { + "epoch": 17.01, + "learning_rate": 3.6277777777777776e-05, + "loss": 0.2926, + "step": 3470 + }, + { + "epoch": 17.01, + "learning_rate": 3.6222222222222225e-05, + "loss": 0.4301, + "step": 3480 + }, + { + "epoch": 17.01, + "learning_rate": 3.6166666666666674e-05, + "loss": 0.5938, + "step": 3490 + }, + { + "epoch": 17.01, + "learning_rate": 3.611111111111111e-05, + "loss": 0.2795, + "step": 3500 + }, + { + "epoch": 17.01, + "learning_rate": 3.605555555555556e-05, + "loss": 0.7038, + "step": 3510 + }, + { + "epoch": 17.01, + "learning_rate": 3.6e-05, + "loss": 0.6683, + "step": 3520 + }, + { + "epoch": 17.01, + "learning_rate": 3.594444444444445e-05, + "loss": 0.9081, + "step": 3530 + }, + { + "epoch": 17.01, + "learning_rate": 3.5888888888888886e-05, + "loss": 0.7373, + "step": 3540 + }, + { + "epoch": 17.02, + "learning_rate": 3.5833333333333335e-05, + "loss": 0.0338, + "step": 3550 + }, + { + "epoch": 17.02, + "learning_rate": 3.577777777777778e-05, + "loss": 0.7519, + "step": 3560 + }, + { + "epoch": 17.02, + "learning_rate": 3.5722222222222226e-05, + "loss": 1.1557, + "step": 3570 + }, + { + "epoch": 17.02, + "learning_rate": 3.566666666666667e-05, + "loss": 0.2387, + "step": 3580 + }, + { + "epoch": 17.02, + "learning_rate": 3.561111111111111e-05, + "loss": 0.7307, + "step": 3590 + }, + { + "epoch": 17.02, + "learning_rate": 3.555555555555556e-05, + "loss": 1.0388, + "step": 3600 + }, + { + "epoch": 17.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 0.9992363452911377, + "eval_runtime": 12.4199, + "eval_samples_per_second": 3.865, + "eval_steps_per_second": 1.932, + "step": 3600 + }, + { + "epoch": 18.0, + "learning_rate": 3.55e-05, + "loss": 0.1617, + "step": 3610 + }, + { + "epoch": 18.0, + "learning_rate": 3.5444444444444445e-05, + "loss": 0.008, + "step": 3620 + }, + { + "epoch": 18.0, + "learning_rate": 3.538888888888889e-05, + "loss": 0.6766, + "step": 3630 + }, + { + "epoch": 18.0, + "learning_rate": 3.5333333333333336e-05, + "loss": 0.6959, + "step": 3640 + }, + { + "epoch": 18.0, + "learning_rate": 3.527777777777778e-05, + "loss": 0.4455, + "step": 3650 + }, + { + "epoch": 18.01, + "learning_rate": 3.522222222222222e-05, + "loss": 0.0627, + "step": 3660 + }, + { + "epoch": 18.01, + "learning_rate": 3.516666666666667e-05, + "loss": 0.4591, + "step": 3670 + }, + { + "epoch": 18.01, + "learning_rate": 3.511111111111111e-05, + "loss": 0.3082, + "step": 3680 + }, + { + "epoch": 18.01, + "learning_rate": 3.505555555555556e-05, + "loss": 0.2478, + "step": 3690 + }, + { + "epoch": 18.01, + "learning_rate": 3.5e-05, + "loss": 0.5041, + "step": 3700 + }, + { + "epoch": 18.01, + "learning_rate": 3.4944444444444446e-05, + "loss": 0.2401, + "step": 3710 + }, + { + "epoch": 18.01, + "learning_rate": 3.4888888888888895e-05, + "loss": 0.2564, + "step": 3720 + }, + { + "epoch": 18.01, + "learning_rate": 3.483333333333334e-05, + "loss": 0.4303, + "step": 3730 + }, + { + "epoch": 18.01, + "learning_rate": 3.477777777777778e-05, + "loss": 0.2812, + "step": 3740 + }, + { + "epoch": 18.02, + "learning_rate": 3.472222222222222e-05, + "loss": 0.4144, + "step": 3750 + }, + { + "epoch": 18.02, + "learning_rate": 3.466666666666667e-05, + "loss": 0.5275, + "step": 3760 + }, + { + "epoch": 18.02, + "learning_rate": 3.4611111111111114e-05, + "loss": 0.1221, + "step": 3770 + }, + { + "epoch": 18.02, + "learning_rate": 3.4555555555555556e-05, + "loss": 0.2441, + "step": 3780 + }, + { + "epoch": 18.02, + "learning_rate": 3.45e-05, + "loss": 0.2515, + "step": 3790 + }, + { + "epoch": 18.02, + "learning_rate": 3.444444444444445e-05, + "loss": 0.0442, + "step": 3800 + }, + { + "epoch": 18.02, + "eval_accuracy": 0.7708333333333334, + "eval_loss": 1.1625920534133911, + "eval_runtime": 11.9697, + "eval_samples_per_second": 4.01, + "eval_steps_per_second": 2.005, + "step": 3800 + }, + { + "epoch": 19.0, + "learning_rate": 3.438888888888889e-05, + "loss": 0.0023, + "step": 3810 + }, + { + "epoch": 19.0, + "learning_rate": 3.433333333333333e-05, + "loss": 0.3082, + "step": 3820 + }, + { + "epoch": 19.0, + "learning_rate": 3.427777777777778e-05, + "loss": 0.0907, + "step": 3830 + }, + { + "epoch": 19.0, + "learning_rate": 3.4222222222222224e-05, + "loss": 0.3014, + "step": 3840 + }, + { + "epoch": 19.0, + "learning_rate": 3.4166666666666666e-05, + "loss": 0.0267, + "step": 3850 + }, + { + "epoch": 19.01, + "learning_rate": 3.411111111111111e-05, + "loss": 0.0004, + "step": 3860 + }, + { + "epoch": 19.01, + "learning_rate": 3.405555555555556e-05, + "loss": 0.8851, + "step": 3870 + }, + { + "epoch": 19.01, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.3284, + "step": 3880 + }, + { + "epoch": 19.01, + "learning_rate": 3.394444444444444e-05, + "loss": 0.001, + "step": 3890 + }, + { + "epoch": 19.01, + "learning_rate": 3.388888888888889e-05, + "loss": 0.8428, + "step": 3900 + }, + { + "epoch": 19.01, + "learning_rate": 3.3833333333333334e-05, + "loss": 0.3571, + "step": 3910 + }, + { + "epoch": 19.01, + "learning_rate": 3.377777777777778e-05, + "loss": 0.003, + "step": 3920 + }, + { + "epoch": 19.01, + "learning_rate": 3.3722222222222225e-05, + "loss": 0.1794, + "step": 3930 + }, + { + "epoch": 19.01, + "learning_rate": 3.366666666666667e-05, + "loss": 0.9065, + "step": 3940 + }, + { + "epoch": 19.02, + "learning_rate": 3.3611111111111116e-05, + "loss": 0.4515, + "step": 3950 + }, + { + "epoch": 19.02, + "learning_rate": 3.355555555555556e-05, + "loss": 0.3212, + "step": 3960 + }, + { + "epoch": 19.02, + "learning_rate": 3.35e-05, + "loss": 0.3582, + "step": 3970 + }, + { + "epoch": 19.02, + "learning_rate": 3.3444444444444443e-05, + "loss": 0.299, + "step": 3980 + }, + { + "epoch": 19.02, + "learning_rate": 3.338888888888889e-05, + "loss": 0.053, + "step": 3990 + }, + { + "epoch": 19.02, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.2449, + "step": 4000 + }, + { + "epoch": 19.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.8173995614051819, + "eval_runtime": 12.1319, + "eval_samples_per_second": 3.957, + "eval_steps_per_second": 1.978, + "step": 4000 + }, + { + "epoch": 20.0, + "learning_rate": 3.327777777777778e-05, + "loss": 0.0005, + "step": 4010 + }, + { + "epoch": 20.0, + "learning_rate": 3.322222222222222e-05, + "loss": 0.7228, + "step": 4020 + }, + { + "epoch": 20.0, + "learning_rate": 3.316666666666667e-05, + "loss": 0.0022, + "step": 4030 + }, + { + "epoch": 20.0, + "learning_rate": 3.311111111111112e-05, + "loss": 0.4646, + "step": 4040 + }, + { + "epoch": 20.0, + "learning_rate": 3.3055555555555553e-05, + "loss": 1.0143, + "step": 4050 + }, + { + "epoch": 20.01, + "learning_rate": 3.3e-05, + "loss": 0.5995, + "step": 4060 + }, + { + "epoch": 20.01, + "learning_rate": 3.2944444444444445e-05, + "loss": 0.0007, + "step": 4070 + }, + { + "epoch": 20.01, + "learning_rate": 3.2888888888888894e-05, + "loss": 0.0003, + "step": 4080 + }, + { + "epoch": 20.01, + "learning_rate": 3.283333333333333e-05, + "loss": 0.1072, + "step": 4090 + }, + { + "epoch": 20.01, + "learning_rate": 3.277777777777778e-05, + "loss": 0.0004, + "step": 4100 + }, + { + "epoch": 20.01, + "learning_rate": 3.272222222222223e-05, + "loss": 0.4676, + "step": 4110 + }, + { + "epoch": 20.01, + "learning_rate": 3.266666666666667e-05, + "loss": 0.2447, + "step": 4120 + }, + { + "epoch": 20.01, + "learning_rate": 3.261111111111111e-05, + "loss": 0.0444, + "step": 4130 + }, + { + "epoch": 20.01, + "learning_rate": 3.2555555555555555e-05, + "loss": 0.0146, + "step": 4140 + }, + { + "epoch": 20.02, + "learning_rate": 3.2500000000000004e-05, + "loss": 1.0221, + "step": 4150 + }, + { + "epoch": 20.02, + "learning_rate": 3.2444444444444446e-05, + "loss": 0.9506, + "step": 4160 + }, + { + "epoch": 20.02, + "learning_rate": 3.238888888888889e-05, + "loss": 0.5168, + "step": 4170 + }, + { + "epoch": 20.02, + "learning_rate": 3.233333333333333e-05, + "loss": 0.1898, + "step": 4180 + }, + { + "epoch": 20.02, + "learning_rate": 3.227777777777778e-05, + "loss": 0.4702, + "step": 4190 + }, + { + "epoch": 20.02, + "learning_rate": 3.222222222222223e-05, + "loss": 0.3024, + "step": 4200 + }, + { + "epoch": 20.02, + "eval_accuracy": 0.7916666666666666, + "eval_loss": 0.8500446677207947, + "eval_runtime": 12.0251, + "eval_samples_per_second": 3.992, + "eval_steps_per_second": 1.996, + "step": 4200 + }, + { + "epoch": 21.0, + "learning_rate": 3.2166666666666665e-05, + "loss": 0.3821, + "step": 4210 + }, + { + "epoch": 21.0, + "learning_rate": 3.2111111111111114e-05, + "loss": 1.171, + "step": 4220 + }, + { + "epoch": 21.0, + "learning_rate": 3.2055555555555556e-05, + "loss": 0.5253, + "step": 4230 + }, + { + "epoch": 21.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.5947, + "step": 4240 + }, + { + "epoch": 21.0, + "learning_rate": 3.194444444444444e-05, + "loss": 0.489, + "step": 4250 + }, + { + "epoch": 21.01, + "learning_rate": 3.188888888888889e-05, + "loss": 0.352, + "step": 4260 + }, + { + "epoch": 21.01, + "learning_rate": 3.183333333333334e-05, + "loss": 0.3475, + "step": 4270 + }, + { + "epoch": 21.01, + "learning_rate": 3.177777777777778e-05, + "loss": 0.3498, + "step": 4280 + }, + { + "epoch": 21.01, + "learning_rate": 3.1722222222222224e-05, + "loss": 0.0222, + "step": 4290 + }, + { + "epoch": 21.01, + "learning_rate": 3.1666666666666666e-05, + "loss": 0.1436, + "step": 4300 + }, + { + "epoch": 21.01, + "learning_rate": 3.1611111111111115e-05, + "loss": 0.3364, + "step": 4310 + }, + { + "epoch": 21.01, + "learning_rate": 3.155555555555556e-05, + "loss": 0.0368, + "step": 4320 + }, + { + "epoch": 21.01, + "learning_rate": 3.15e-05, + "loss": 0.2799, + "step": 4330 + }, + { + "epoch": 21.01, + "learning_rate": 3.144444444444445e-05, + "loss": 0.4279, + "step": 4340 + }, + { + "epoch": 21.02, + "learning_rate": 3.138888888888889e-05, + "loss": 0.2409, + "step": 4350 + }, + { + "epoch": 21.02, + "learning_rate": 3.1333333333333334e-05, + "loss": 0.0012, + "step": 4360 + }, + { + "epoch": 21.02, + "learning_rate": 3.1277777777777776e-05, + "loss": 0.0224, + "step": 4370 + }, + { + "epoch": 21.02, + "learning_rate": 3.1222222222222225e-05, + "loss": 0.2865, + "step": 4380 + }, + { + "epoch": 21.02, + "learning_rate": 3.116666666666667e-05, + "loss": 0.7114, + "step": 4390 + }, + { + "epoch": 21.02, + "learning_rate": 3.111111111111111e-05, + "loss": 0.4879, + "step": 4400 + }, + { + "epoch": 21.02, + "eval_accuracy": 0.7291666666666666, + "eval_loss": 1.2219163179397583, + "eval_runtime": 12.2455, + "eval_samples_per_second": 3.92, + "eval_steps_per_second": 1.96, + "step": 4400 + }, + { + "epoch": 22.0, + "learning_rate": 3.105555555555555e-05, + "loss": 0.2362, + "step": 4410 + }, + { + "epoch": 22.0, + "learning_rate": 3.1e-05, + "loss": 0.0008, + "step": 4420 + }, + { + "epoch": 22.0, + "learning_rate": 3.094444444444445e-05, + "loss": 0.1325, + "step": 4430 + }, + { + "epoch": 22.0, + "learning_rate": 3.088888888888889e-05, + "loss": 0.0823, + "step": 4440 + }, + { + "epoch": 22.0, + "learning_rate": 3.0833333333333335e-05, + "loss": 0.7201, + "step": 4450 + }, + { + "epoch": 22.01, + "learning_rate": 3.077777777777778e-05, + "loss": 0.4, + "step": 4460 + }, + { + "epoch": 22.01, + "learning_rate": 3.0722222222222227e-05, + "loss": 0.2967, + "step": 4470 + }, + { + "epoch": 22.01, + "learning_rate": 3.066666666666667e-05, + "loss": 0.4169, + "step": 4480 + }, + { + "epoch": 22.01, + "learning_rate": 3.061111111111111e-05, + "loss": 0.2798, + "step": 4490 + }, + { + "epoch": 22.01, + "learning_rate": 3.055555555555556e-05, + "loss": 0.745, + "step": 4500 + }, + { + "epoch": 22.01, + "learning_rate": 3.05e-05, + "loss": 0.0427, + "step": 4510 + }, + { + "epoch": 22.01, + "learning_rate": 3.044444444444445e-05, + "loss": 0.1328, + "step": 4520 + }, + { + "epoch": 22.01, + "learning_rate": 3.0388888888888887e-05, + "loss": 0.275, + "step": 4530 + }, + { + "epoch": 22.01, + "learning_rate": 3.0333333333333337e-05, + "loss": 0.2451, + "step": 4540 + }, + { + "epoch": 22.02, + "learning_rate": 3.0277777777777776e-05, + "loss": 0.4669, + "step": 4550 + }, + { + "epoch": 22.02, + "learning_rate": 3.0222222222222225e-05, + "loss": 0.3182, + "step": 4560 + }, + { + "epoch": 22.02, + "learning_rate": 3.016666666666667e-05, + "loss": 0.293, + "step": 4570 + }, + { + "epoch": 22.02, + "learning_rate": 3.0111111111111113e-05, + "loss": 0.3155, + "step": 4580 + }, + { + "epoch": 22.02, + "learning_rate": 3.005555555555556e-05, + "loss": 0.7555, + "step": 4590 + }, + { + "epoch": 22.02, + "learning_rate": 3e-05, + "loss": 0.4035, + "step": 4600 + }, + { + "epoch": 22.02, + "eval_accuracy": 0.8333333333333334, + "eval_loss": 0.6436423659324646, + "eval_runtime": 11.8808, + "eval_samples_per_second": 4.04, + "eval_steps_per_second": 2.02, + "step": 4600 + }, + { + "epoch": 23.0, + "learning_rate": 2.9944444444444446e-05, + "loss": 0.2087, + "step": 4610 + }, + { + "epoch": 23.0, + "learning_rate": 2.988888888888889e-05, + "loss": 1.0019, + "step": 4620 + }, + { + "epoch": 23.0, + "learning_rate": 2.9833333333333335e-05, + "loss": 0.0338, + "step": 4630 + }, + { + "epoch": 23.0, + "learning_rate": 2.9777777777777777e-05, + "loss": 0.0014, + "step": 4640 + }, + { + "epoch": 23.0, + "learning_rate": 2.9722222222222223e-05, + "loss": 0.096, + "step": 4650 + }, + { + "epoch": 23.01, + "learning_rate": 2.9666666666666672e-05, + "loss": 0.1689, + "step": 4660 + }, + { + "epoch": 23.01, + "learning_rate": 2.961111111111111e-05, + "loss": 0.5203, + "step": 4670 + }, + { + "epoch": 23.01, + "learning_rate": 2.955555555555556e-05, + "loss": 0.2689, + "step": 4680 + }, + { + "epoch": 23.01, + "learning_rate": 2.95e-05, + "loss": 0.6529, + "step": 4690 + }, + { + "epoch": 23.01, + "learning_rate": 2.9444444444444448e-05, + "loss": 0.4351, + "step": 4700 + }, + { + "epoch": 23.01, + "learning_rate": 2.9388888888888887e-05, + "loss": 0.1847, + "step": 4710 + }, + { + "epoch": 23.01, + "learning_rate": 2.9333333333333336e-05, + "loss": 0.979, + "step": 4720 + }, + { + "epoch": 23.01, + "learning_rate": 2.927777777777778e-05, + "loss": 0.1496, + "step": 4730 + }, + { + "epoch": 23.01, + "learning_rate": 2.9222222222222224e-05, + "loss": 0.4046, + "step": 4740 + }, + { + "epoch": 23.02, + "learning_rate": 2.916666666666667e-05, + "loss": 1.481, + "step": 4750 + }, + { + "epoch": 23.02, + "learning_rate": 2.9111111111111112e-05, + "loss": 0.1607, + "step": 4760 + }, + { + "epoch": 23.02, + "learning_rate": 2.9055555555555558e-05, + "loss": 0.3698, + "step": 4770 + }, + { + "epoch": 23.02, + "learning_rate": 2.9e-05, + "loss": 0.4519, + "step": 4780 + }, + { + "epoch": 23.02, + "learning_rate": 2.8944444444444446e-05, + "loss": 0.0016, + "step": 4790 + }, + { + "epoch": 23.02, + "learning_rate": 2.8888888888888888e-05, + "loss": 0.0334, + "step": 4800 + }, + { + "epoch": 23.02, + "eval_accuracy": 0.8333333333333334, + "eval_loss": 0.7433444857597351, + "eval_runtime": 12.225, + "eval_samples_per_second": 3.926, + "eval_steps_per_second": 1.963, + "step": 4800 + }, + { + "epoch": 24.0, + "learning_rate": 2.8833333333333334e-05, + "loss": 0.2755, + "step": 4810 + }, + { + "epoch": 24.0, + "learning_rate": 2.877777777777778e-05, + "loss": 0.0019, + "step": 4820 + }, + { + "epoch": 24.0, + "learning_rate": 2.8722222222222222e-05, + "loss": 0.5901, + "step": 4830 + }, + { + "epoch": 24.0, + "learning_rate": 2.8666666666666668e-05, + "loss": 0.2686, + "step": 4840 + }, + { + "epoch": 24.0, + "learning_rate": 2.861111111111111e-05, + "loss": 0.5917, + "step": 4850 + }, + { + "epoch": 24.01, + "learning_rate": 2.855555555555556e-05, + "loss": 0.1629, + "step": 4860 + }, + { + "epoch": 24.01, + "learning_rate": 2.8499999999999998e-05, + "loss": 0.4539, + "step": 4870 + }, + { + "epoch": 24.01, + "learning_rate": 2.8444444444444447e-05, + "loss": 0.0014, + "step": 4880 + }, + { + "epoch": 24.01, + "learning_rate": 2.8388888888888893e-05, + "loss": 0.2313, + "step": 4890 + }, + { + "epoch": 24.01, + "learning_rate": 2.8333333333333335e-05, + "loss": 0.7107, + "step": 4900 + }, + { + "epoch": 24.01, + "learning_rate": 2.827777777777778e-05, + "loss": 0.5604, + "step": 4910 + }, + { + "epoch": 24.01, + "learning_rate": 2.8222222222222223e-05, + "loss": 0.2858, + "step": 4920 + }, + { + "epoch": 24.01, + "learning_rate": 2.816666666666667e-05, + "loss": 0.2374, + "step": 4930 + }, + { + "epoch": 24.01, + "learning_rate": 2.811111111111111e-05, + "loss": 0.256, + "step": 4940 + }, + { + "epoch": 24.02, + "learning_rate": 2.8055555555555557e-05, + "loss": 0.1735, + "step": 4950 + }, + { + "epoch": 24.02, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.0017, + "step": 4960 + }, + { + "epoch": 24.02, + "learning_rate": 2.7944444444444445e-05, + "loss": 0.4564, + "step": 4970 + }, + { + "epoch": 24.02, + "learning_rate": 2.788888888888889e-05, + "loss": 0.0009, + "step": 4980 + }, + { + "epoch": 24.02, + "learning_rate": 2.7833333333333333e-05, + "loss": 1.0652, + "step": 4990 + }, + { + "epoch": 24.02, + "learning_rate": 2.777777777777778e-05, + "loss": 0.4849, + "step": 5000 + }, + { + "epoch": 24.02, + "eval_accuracy": 0.8125, + "eval_loss": 0.991054117679596, + "eval_runtime": 11.8199, + "eval_samples_per_second": 4.061, + "eval_steps_per_second": 2.03, + "step": 5000 + }, + { + "epoch": 25.0, + "learning_rate": 2.772222222222222e-05, + "loss": 0.4665, + "step": 5010 + }, + { + "epoch": 25.0, + "learning_rate": 2.7666666666666667e-05, + "loss": 0.0116, + "step": 5020 + }, + { + "epoch": 25.0, + "learning_rate": 2.761111111111111e-05, + "loss": 0.283, + "step": 5030 + }, + { + "epoch": 25.0, + "learning_rate": 2.7555555555555555e-05, + "loss": 0.5053, + "step": 5040 + }, + { + "epoch": 25.0, + "learning_rate": 2.7500000000000004e-05, + "loss": 1.1481, + "step": 5050 + }, + { + "epoch": 25.01, + "learning_rate": 2.7444444444444443e-05, + "loss": 0.6906, + "step": 5060 + }, + { + "epoch": 25.01, + "learning_rate": 2.7388888888888892e-05, + "loss": 0.2288, + "step": 5070 + }, + { + "epoch": 25.01, + "learning_rate": 2.733333333333333e-05, + "loss": 0.2736, + "step": 5080 + }, + { + "epoch": 25.01, + "learning_rate": 2.727777777777778e-05, + "loss": 0.1634, + "step": 5090 + }, + { + "epoch": 25.01, + "learning_rate": 2.7222222222222223e-05, + "loss": 0.2221, + "step": 5100 + }, + { + "epoch": 25.01, + "learning_rate": 2.716666666666667e-05, + "loss": 0.0125, + "step": 5110 + }, + { + "epoch": 25.01, + "learning_rate": 2.7111111111111114e-05, + "loss": 0.4706, + "step": 5120 + }, + { + "epoch": 25.01, + "learning_rate": 2.7055555555555557e-05, + "loss": 0.0138, + "step": 5130 + }, + { + "epoch": 25.01, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.1388, + "step": 5140 + }, + { + "epoch": 25.02, + "learning_rate": 2.6944444444444445e-05, + "loss": 0.0384, + "step": 5150 + }, + { + "epoch": 25.02, + "learning_rate": 2.688888888888889e-05, + "loss": 0.3701, + "step": 5160 + }, + { + "epoch": 25.02, + "learning_rate": 2.6833333333333333e-05, + "loss": 0.9735, + "step": 5170 + }, + { + "epoch": 25.02, + "learning_rate": 2.677777777777778e-05, + "loss": 0.604, + "step": 5180 + }, + { + "epoch": 25.02, + "learning_rate": 2.6722222222222228e-05, + "loss": 0.0021, + "step": 5190 + }, + { + "epoch": 25.02, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.6075, + "step": 5200 + }, + { + "epoch": 25.02, + "eval_accuracy": 0.7083333333333334, + "eval_loss": 1.2248910665512085, + "eval_runtime": 12.4027, + "eval_samples_per_second": 3.87, + "eval_steps_per_second": 1.935, + "step": 5200 + }, + { + "epoch": 26.0, + "learning_rate": 2.6611111111111116e-05, + "loss": 0.3497, + "step": 5210 + }, + { + "epoch": 26.0, + "learning_rate": 2.6555555555555555e-05, + "loss": 0.0025, + "step": 5220 + }, + { + "epoch": 26.0, + "learning_rate": 2.6500000000000004e-05, + "loss": 0.0021, + "step": 5230 + }, + { + "epoch": 26.0, + "learning_rate": 2.6444444444444443e-05, + "loss": 0.31, + "step": 5240 + }, + { + "epoch": 26.0, + "learning_rate": 2.6388888888888892e-05, + "loss": 0.3074, + "step": 5250 + }, + { + "epoch": 26.01, + "learning_rate": 2.633333333333333e-05, + "loss": 0.0399, + "step": 5260 + }, + { + "epoch": 26.01, + "learning_rate": 2.627777777777778e-05, + "loss": 0.346, + "step": 5270 + }, + { + "epoch": 26.01, + "learning_rate": 2.6222222222222226e-05, + "loss": 0.2626, + "step": 5280 + }, + { + "epoch": 26.01, + "learning_rate": 2.6166666666666668e-05, + "loss": 0.4374, + "step": 5290 + }, + { + "epoch": 26.01, + "learning_rate": 2.6111111111111114e-05, + "loss": 0.1794, + "step": 5300 + }, + { + "epoch": 26.01, + "learning_rate": 2.6055555555555556e-05, + "loss": 0.0114, + "step": 5310 + }, + { + "epoch": 26.01, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.7371, + "step": 5320 + }, + { + "epoch": 26.01, + "learning_rate": 2.5944444444444444e-05, + "loss": 0.1541, + "step": 5330 + }, + { + "epoch": 26.01, + "learning_rate": 2.588888888888889e-05, + "loss": 0.3013, + "step": 5340 + }, + { + "epoch": 26.02, + "learning_rate": 2.5833333333333336e-05, + "loss": 0.0008, + "step": 5350 + }, + { + "epoch": 26.02, + "learning_rate": 2.5777777777777778e-05, + "loss": 0.2295, + "step": 5360 + }, + { + "epoch": 26.02, + "learning_rate": 2.5722222222222224e-05, + "loss": 0.0008, + "step": 5370 + }, + { + "epoch": 26.02, + "learning_rate": 2.5666666666666666e-05, + "loss": 0.0006, + "step": 5380 + }, + { + "epoch": 26.02, + "learning_rate": 2.5611111111111115e-05, + "loss": 0.0027, + "step": 5390 + }, + { + "epoch": 26.02, + "learning_rate": 2.5555555555555554e-05, + "loss": 0.3441, + "step": 5400 + }, + { + "epoch": 26.02, + "eval_accuracy": 0.8333333333333334, + "eval_loss": 0.8563232421875, + "eval_runtime": 12.0153, + "eval_samples_per_second": 3.995, + "eval_steps_per_second": 1.997, + "step": 5400 + }, + { + "epoch": 27.0, + "learning_rate": 2.5500000000000003e-05, + "loss": 0.3165, + "step": 5410 + }, + { + "epoch": 27.0, + "learning_rate": 2.5444444444444442e-05, + "loss": 0.0024, + "step": 5420 + }, + { + "epoch": 27.0, + "learning_rate": 2.538888888888889e-05, + "loss": 0.0005, + "step": 5430 + }, + { + "epoch": 27.0, + "learning_rate": 2.5333333333333337e-05, + "loss": 0.001, + "step": 5440 + }, + { + "epoch": 27.0, + "learning_rate": 2.527777777777778e-05, + "loss": 0.0003, + "step": 5450 + }, + { + "epoch": 27.01, + "learning_rate": 2.5222222222222225e-05, + "loss": 1.448, + "step": 5460 + }, + { + "epoch": 27.01, + "learning_rate": 2.5166666666666667e-05, + "loss": 0.2954, + "step": 5470 + }, + { + "epoch": 27.01, + "learning_rate": 2.5111111111111113e-05, + "loss": 0.3194, + "step": 5480 + }, + { + "epoch": 27.01, + "learning_rate": 2.5055555555555555e-05, + "loss": 0.001, + "step": 5490 + }, + { + "epoch": 27.01, + "learning_rate": 2.5e-05, + "loss": 0.0388, + "step": 5500 + }, + { + "epoch": 27.01, + "learning_rate": 2.4944444444444447e-05, + "loss": 0.3337, + "step": 5510 + }, + { + "epoch": 27.01, + "learning_rate": 2.488888888888889e-05, + "loss": 0.2475, + "step": 5520 + }, + { + "epoch": 27.01, + "learning_rate": 2.4833333333333335e-05, + "loss": 0.401, + "step": 5530 + }, + { + "epoch": 27.01, + "learning_rate": 2.477777777777778e-05, + "loss": 0.495, + "step": 5540 + }, + { + "epoch": 27.02, + "learning_rate": 2.4722222222222223e-05, + "loss": 0.2633, + "step": 5550 + }, + { + "epoch": 27.02, + "learning_rate": 2.466666666666667e-05, + "loss": 0.0965, + "step": 5560 + }, + { + "epoch": 27.02, + "learning_rate": 2.461111111111111e-05, + "loss": 0.0012, + "step": 5570 + }, + { + "epoch": 27.02, + "learning_rate": 2.4555555555555557e-05, + "loss": 0.3345, + "step": 5580 + }, + { + "epoch": 27.02, + "learning_rate": 2.45e-05, + "loss": 0.5968, + "step": 5590 + }, + { + "epoch": 27.02, + "learning_rate": 2.4444444444444445e-05, + "loss": 0.5653, + "step": 5600 + }, + { + "epoch": 27.02, + "eval_accuracy": 0.8958333333333334, + "eval_loss": 0.45567557215690613, + "eval_runtime": 12.6471, + "eval_samples_per_second": 3.795, + "eval_steps_per_second": 1.898, + "step": 5600 + }, + { + "epoch": 28.0, + "learning_rate": 2.4388888888888887e-05, + "loss": 0.3856, + "step": 5610 + }, + { + "epoch": 28.0, + "learning_rate": 2.4333333333333336e-05, + "loss": 0.0019, + "step": 5620 + }, + { + "epoch": 28.0, + "learning_rate": 2.427777777777778e-05, + "loss": 0.0011, + "step": 5630 + }, + { + "epoch": 28.0, + "learning_rate": 2.4222222222222224e-05, + "loss": 0.3706, + "step": 5640 + }, + { + "epoch": 28.0, + "learning_rate": 2.4166666666666667e-05, + "loss": 0.1063, + "step": 5650 + }, + { + "epoch": 28.01, + "learning_rate": 2.4111111111111113e-05, + "loss": 0.0004, + "step": 5660 + }, + { + "epoch": 28.01, + "learning_rate": 2.4055555555555555e-05, + "loss": 0.0073, + "step": 5670 + }, + { + "epoch": 28.01, + "learning_rate": 2.4e-05, + "loss": 0.6459, + "step": 5680 + }, + { + "epoch": 28.01, + "learning_rate": 2.3944444444444443e-05, + "loss": 0.3438, + "step": 5690 + }, + { + "epoch": 28.01, + "learning_rate": 2.3888888888888892e-05, + "loss": 0.1719, + "step": 5700 + }, + { + "epoch": 28.01, + "learning_rate": 2.3833333333333334e-05, + "loss": 0.3087, + "step": 5710 + }, + { + "epoch": 28.01, + "learning_rate": 2.377777777777778e-05, + "loss": 0.0012, + "step": 5720 + }, + { + "epoch": 28.01, + "learning_rate": 2.3722222222222222e-05, + "loss": 0.3797, + "step": 5730 + }, + { + "epoch": 28.01, + "learning_rate": 2.3666666666666668e-05, + "loss": 0.0867, + "step": 5740 + }, + { + "epoch": 28.02, + "learning_rate": 2.361111111111111e-05, + "loss": 0.7648, + "step": 5750 + }, + { + "epoch": 28.02, + "learning_rate": 2.3555555555555556e-05, + "loss": 0.448, + "step": 5760 + }, + { + "epoch": 28.02, + "learning_rate": 2.35e-05, + "loss": 0.2457, + "step": 5770 + }, + { + "epoch": 28.02, + "learning_rate": 2.3444444444444448e-05, + "loss": 0.5912, + "step": 5780 + }, + { + "epoch": 28.02, + "learning_rate": 2.338888888888889e-05, + "loss": 0.3141, + "step": 5790 + }, + { + "epoch": 28.02, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.196, + "step": 5800 + }, + { + "epoch": 28.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.4156099259853363, + "eval_runtime": 11.7855, + "eval_samples_per_second": 4.073, + "eval_steps_per_second": 2.036, + "step": 5800 + }, + { + "epoch": 29.0, + "learning_rate": 2.3277777777777778e-05, + "loss": 0.0005, + "step": 5810 + }, + { + "epoch": 29.0, + "learning_rate": 2.3222222222222224e-05, + "loss": 0.5131, + "step": 5820 + }, + { + "epoch": 29.0, + "learning_rate": 2.3166666666666666e-05, + "loss": 0.1469, + "step": 5830 + }, + { + "epoch": 29.0, + "learning_rate": 2.3111111111111112e-05, + "loss": 0.4118, + "step": 5840 + }, + { + "epoch": 29.0, + "learning_rate": 2.3055555555555558e-05, + "loss": 0.2109, + "step": 5850 + }, + { + "epoch": 29.01, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.3721, + "step": 5860 + }, + { + "epoch": 29.01, + "learning_rate": 2.2944444444444446e-05, + "loss": 0.0009, + "step": 5870 + }, + { + "epoch": 29.01, + "learning_rate": 2.288888888888889e-05, + "loss": 0.0822, + "step": 5880 + }, + { + "epoch": 29.01, + "learning_rate": 2.2833333333333334e-05, + "loss": 0.0004, + "step": 5890 + }, + { + "epoch": 29.01, + "learning_rate": 2.277777777777778e-05, + "loss": 0.2703, + "step": 5900 + }, + { + "epoch": 29.01, + "learning_rate": 2.2722222222222222e-05, + "loss": 0.0004, + "step": 5910 + }, + { + "epoch": 29.01, + "learning_rate": 2.2666666666666668e-05, + "loss": 0.3388, + "step": 5920 + }, + { + "epoch": 29.01, + "learning_rate": 2.2611111111111113e-05, + "loss": 0.021, + "step": 5930 + }, + { + "epoch": 29.01, + "learning_rate": 2.255555555555556e-05, + "loss": 0.2202, + "step": 5940 + }, + { + "epoch": 29.02, + "learning_rate": 2.25e-05, + "loss": 0.0009, + "step": 5950 + }, + { + "epoch": 29.02, + "learning_rate": 2.2444444444444447e-05, + "loss": 0.001, + "step": 5960 + }, + { + "epoch": 29.02, + "learning_rate": 2.238888888888889e-05, + "loss": 0.6966, + "step": 5970 + }, + { + "epoch": 29.02, + "learning_rate": 2.2333333333333335e-05, + "loss": 0.6777, + "step": 5980 + }, + { + "epoch": 29.02, + "learning_rate": 2.2277777777777778e-05, + "loss": 0.2519, + "step": 5990 + }, + { + "epoch": 29.02, + "learning_rate": 2.2222222222222223e-05, + "loss": 0.0038, + "step": 6000 + }, + { + "epoch": 29.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.456225723028183, + "eval_runtime": 12.1414, + "eval_samples_per_second": 3.953, + "eval_steps_per_second": 1.977, + "step": 6000 + }, + { + "epoch": 30.0, + "learning_rate": 2.216666666666667e-05, + "loss": 0.0088, + "step": 6010 + }, + { + "epoch": 30.0, + "learning_rate": 2.211111111111111e-05, + "loss": 0.518, + "step": 6020 + }, + { + "epoch": 30.0, + "learning_rate": 2.2055555555555557e-05, + "loss": 0.0655, + "step": 6030 + }, + { + "epoch": 30.0, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.0809, + "step": 6040 + }, + { + "epoch": 30.0, + "learning_rate": 2.1944444444444445e-05, + "loss": 0.0007, + "step": 6050 + }, + { + "epoch": 30.01, + "learning_rate": 2.188888888888889e-05, + "loss": 0.3516, + "step": 6060 + }, + { + "epoch": 30.01, + "learning_rate": 2.1833333333333333e-05, + "loss": 0.2624, + "step": 6070 + }, + { + "epoch": 30.01, + "learning_rate": 2.177777777777778e-05, + "loss": 0.3091, + "step": 6080 + }, + { + "epoch": 30.01, + "learning_rate": 2.1722222222222225e-05, + "loss": 0.2033, + "step": 6090 + }, + { + "epoch": 30.01, + "learning_rate": 2.1666666666666667e-05, + "loss": 0.0009, + "step": 6100 + }, + { + "epoch": 30.01, + "learning_rate": 2.1611111111111113e-05, + "loss": 0.0015, + "step": 6110 + }, + { + "epoch": 30.01, + "learning_rate": 2.1555555555555555e-05, + "loss": 0.0916, + "step": 6120 + }, + { + "epoch": 30.01, + "learning_rate": 2.15e-05, + "loss": 0.0004, + "step": 6130 + }, + { + "epoch": 30.01, + "learning_rate": 2.1444444444444443e-05, + "loss": 0.6008, + "step": 6140 + }, + { + "epoch": 30.02, + "learning_rate": 2.138888888888889e-05, + "loss": 0.0004, + "step": 6150 + }, + { + "epoch": 30.02, + "learning_rate": 2.1333333333333335e-05, + "loss": 0.336, + "step": 6160 + }, + { + "epoch": 30.02, + "learning_rate": 2.127777777777778e-05, + "loss": 0.9783, + "step": 6170 + }, + { + "epoch": 30.02, + "learning_rate": 2.1222222222222223e-05, + "loss": 0.182, + "step": 6180 + }, + { + "epoch": 30.02, + "learning_rate": 2.116666666666667e-05, + "loss": 0.3465, + "step": 6190 + }, + { + "epoch": 30.02, + "learning_rate": 2.111111111111111e-05, + "loss": 0.2696, + "step": 6200 + }, + { + "epoch": 30.02, + "eval_accuracy": 0.7916666666666666, + "eval_loss": 0.8153278231620789, + "eval_runtime": 11.793, + "eval_samples_per_second": 4.07, + "eval_steps_per_second": 2.035, + "step": 6200 + }, + { + "epoch": 31.0, + "learning_rate": 2.1055555555555556e-05, + "loss": 0.5477, + "step": 6210 + }, + { + "epoch": 31.0, + "learning_rate": 2.1e-05, + "loss": 0.4015, + "step": 6220 + }, + { + "epoch": 31.0, + "learning_rate": 2.0944444444444445e-05, + "loss": 0.2602, + "step": 6230 + }, + { + "epoch": 31.0, + "learning_rate": 2.088888888888889e-05, + "loss": 0.3825, + "step": 6240 + }, + { + "epoch": 31.0, + "learning_rate": 2.0833333333333336e-05, + "loss": 0.2155, + "step": 6250 + }, + { + "epoch": 31.01, + "learning_rate": 2.077777777777778e-05, + "loss": 0.0007, + "step": 6260 + }, + { + "epoch": 31.01, + "learning_rate": 2.0722222222222224e-05, + "loss": 0.6795, + "step": 6270 + }, + { + "epoch": 31.01, + "learning_rate": 2.0666666666666666e-05, + "loss": 0.0007, + "step": 6280 + }, + { + "epoch": 31.01, + "learning_rate": 2.0611111111111112e-05, + "loss": 0.3173, + "step": 6290 + }, + { + "epoch": 31.01, + "learning_rate": 2.0555555555555555e-05, + "loss": 0.2455, + "step": 6300 + }, + { + "epoch": 31.01, + "learning_rate": 2.05e-05, + "loss": 0.6658, + "step": 6310 + }, + { + "epoch": 31.01, + "learning_rate": 2.0444444444444446e-05, + "loss": 0.4408, + "step": 6320 + }, + { + "epoch": 31.01, + "learning_rate": 2.0388888888888892e-05, + "loss": 0.0891, + "step": 6330 + }, + { + "epoch": 31.01, + "learning_rate": 2.0333333333333334e-05, + "loss": 0.4497, + "step": 6340 + }, + { + "epoch": 31.02, + "learning_rate": 2.027777777777778e-05, + "loss": 0.2107, + "step": 6350 + }, + { + "epoch": 31.02, + "learning_rate": 2.0222222222222222e-05, + "loss": 0.0016, + "step": 6360 + }, + { + "epoch": 31.02, + "learning_rate": 2.0166666666666668e-05, + "loss": 0.5188, + "step": 6370 + }, + { + "epoch": 31.02, + "learning_rate": 2.011111111111111e-05, + "loss": 0.0017, + "step": 6380 + }, + { + "epoch": 31.02, + "learning_rate": 2.0055555555555556e-05, + "loss": 0.0122, + "step": 6390 + }, + { + "epoch": 31.02, + "learning_rate": 2e-05, + "loss": 0.0015, + "step": 6400 + }, + { + "epoch": 31.02, + "eval_accuracy": 0.8958333333333334, + "eval_loss": 0.5923376679420471, + "eval_runtime": 12.3257, + "eval_samples_per_second": 3.894, + "eval_steps_per_second": 1.947, + "step": 6400 + }, + { + "epoch": 32.0, + "learning_rate": 1.9944444444444447e-05, + "loss": 0.0014, + "step": 6410 + }, + { + "epoch": 32.0, + "learning_rate": 1.988888888888889e-05, + "loss": 0.0003, + "step": 6420 + }, + { + "epoch": 32.0, + "learning_rate": 1.9833333333333335e-05, + "loss": 0.3568, + "step": 6430 + }, + { + "epoch": 32.0, + "learning_rate": 1.9777777777777778e-05, + "loss": 0.0004, + "step": 6440 + }, + { + "epoch": 32.01, + "learning_rate": 1.9722222222222224e-05, + "loss": 0.052, + "step": 6450 + }, + { + "epoch": 32.01, + "learning_rate": 1.9666666666666666e-05, + "loss": 0.405, + "step": 6460 + }, + { + "epoch": 32.01, + "learning_rate": 1.9611111111111115e-05, + "loss": 0.002, + "step": 6470 + }, + { + "epoch": 32.01, + "learning_rate": 1.9555555555555557e-05, + "loss": 0.0002, + "step": 6480 + }, + { + "epoch": 32.01, + "learning_rate": 1.9500000000000003e-05, + "loss": 0.1832, + "step": 6490 + }, + { + "epoch": 32.01, + "learning_rate": 1.9444444444444445e-05, + "loss": 0.6414, + "step": 6500 + }, + { + "epoch": 32.01, + "learning_rate": 1.938888888888889e-05, + "loss": 0.0003, + "step": 6510 + }, + { + "epoch": 32.01, + "learning_rate": 1.9333333333333333e-05, + "loss": 0.3833, + "step": 6520 + }, + { + "epoch": 32.01, + "learning_rate": 1.927777777777778e-05, + "loss": 0.2141, + "step": 6530 + }, + { + "epoch": 32.01, + "learning_rate": 1.922222222222222e-05, + "loss": 0.3455, + "step": 6540 + }, + { + "epoch": 32.02, + "learning_rate": 1.9166666666666667e-05, + "loss": 0.0005, + "step": 6550 + }, + { + "epoch": 32.02, + "learning_rate": 1.9111111111111113e-05, + "loss": 0.3215, + "step": 6560 + }, + { + "epoch": 32.02, + "learning_rate": 1.905555555555556e-05, + "loss": 0.0014, + "step": 6570 + }, + { + "epoch": 32.02, + "learning_rate": 1.9e-05, + "loss": 0.7868, + "step": 6580 + }, + { + "epoch": 32.02, + "learning_rate": 1.8944444444444447e-05, + "loss": 0.8695, + "step": 6590 + }, + { + "epoch": 32.02, + "learning_rate": 1.888888888888889e-05, + "loss": 0.0036, + "step": 6600 + }, + { + "epoch": 32.02, + "eval_accuracy": 0.875, + "eval_loss": 0.734348714351654, + "eval_runtime": 11.9584, + "eval_samples_per_second": 4.014, + "eval_steps_per_second": 2.007, + "step": 6600 + }, + { + "epoch": 33.0, + "learning_rate": 1.8833333333333335e-05, + "loss": 0.3371, + "step": 6610 + }, + { + "epoch": 33.0, + "learning_rate": 1.8777777777777777e-05, + "loss": 0.273, + "step": 6620 + }, + { + "epoch": 33.0, + "learning_rate": 1.8722222222222223e-05, + "loss": 0.5335, + "step": 6630 + }, + { + "epoch": 33.0, + "learning_rate": 1.866666666666667e-05, + "loss": 0.0013, + "step": 6640 + }, + { + "epoch": 33.01, + "learning_rate": 1.861111111111111e-05, + "loss": 0.3249, + "step": 6650 + }, + { + "epoch": 33.01, + "learning_rate": 1.8555555555555557e-05, + "loss": 0.0005, + "step": 6660 + }, + { + "epoch": 33.01, + "learning_rate": 1.85e-05, + "loss": 0.1923, + "step": 6670 + }, + { + "epoch": 33.01, + "learning_rate": 1.8444444444444445e-05, + "loss": 0.0564, + "step": 6680 + }, + { + "epoch": 33.01, + "learning_rate": 1.838888888888889e-05, + "loss": 0.0004, + "step": 6690 + }, + { + "epoch": 33.01, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.1158, + "step": 6700 + }, + { + "epoch": 33.01, + "learning_rate": 1.827777777777778e-05, + "loss": 0.6012, + "step": 6710 + }, + { + "epoch": 33.01, + "learning_rate": 1.8222222222222224e-05, + "loss": 0.65, + "step": 6720 + }, + { + "epoch": 33.01, + "learning_rate": 1.8166666666666667e-05, + "loss": 0.0017, + "step": 6730 + }, + { + "epoch": 33.01, + "learning_rate": 1.8111111111111112e-05, + "loss": 0.0009, + "step": 6740 + }, + { + "epoch": 33.02, + "learning_rate": 1.8055555555555555e-05, + "loss": 0.0012, + "step": 6750 + }, + { + "epoch": 33.02, + "learning_rate": 1.8e-05, + "loss": 0.0008, + "step": 6760 + }, + { + "epoch": 33.02, + "learning_rate": 1.7944444444444443e-05, + "loss": 0.0043, + "step": 6770 + }, + { + "epoch": 33.02, + "learning_rate": 1.788888888888889e-05, + "loss": 0.2399, + "step": 6780 + }, + { + "epoch": 33.02, + "learning_rate": 1.7833333333333334e-05, + "loss": 0.0885, + "step": 6790 + }, + { + "epoch": 33.02, + "learning_rate": 1.777777777777778e-05, + "loss": 0.3623, + "step": 6800 + }, + { + "epoch": 33.02, + "eval_accuracy": 0.9375, + "eval_loss": 0.30889853835105896, + "eval_runtime": 13.1578, + "eval_samples_per_second": 3.648, + "eval_steps_per_second": 1.824, + "step": 6800 + }, + { + "epoch": 34.0, + "learning_rate": 1.7722222222222222e-05, + "loss": 0.0004, + "step": 6810 + }, + { + "epoch": 34.0, + "learning_rate": 1.7666666666666668e-05, + "loss": 0.0004, + "step": 6820 + }, + { + "epoch": 34.0, + "learning_rate": 1.761111111111111e-05, + "loss": 0.2989, + "step": 6830 + }, + { + "epoch": 34.0, + "learning_rate": 1.7555555555555556e-05, + "loss": 0.1982, + "step": 6840 + }, + { + "epoch": 34.01, + "learning_rate": 1.75e-05, + "loss": 0.0016, + "step": 6850 + }, + { + "epoch": 34.01, + "learning_rate": 1.7444444444444448e-05, + "loss": 0.0002, + "step": 6860 + }, + { + "epoch": 34.01, + "learning_rate": 1.738888888888889e-05, + "loss": 0.0018, + "step": 6870 + }, + { + "epoch": 34.01, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.1795, + "step": 6880 + }, + { + "epoch": 34.01, + "learning_rate": 1.7277777777777778e-05, + "loss": 0.2938, + "step": 6890 + }, + { + "epoch": 34.01, + "learning_rate": 1.7222222222222224e-05, + "loss": 0.4597, + "step": 6900 + }, + { + "epoch": 34.01, + "learning_rate": 1.7166666666666666e-05, + "loss": 0.2432, + "step": 6910 + }, + { + "epoch": 34.01, + "learning_rate": 1.7111111111111112e-05, + "loss": 0.0025, + "step": 6920 + }, + { + "epoch": 34.01, + "learning_rate": 1.7055555555555554e-05, + "loss": 0.2494, + "step": 6930 + }, + { + "epoch": 34.01, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.2388, + "step": 6940 + }, + { + "epoch": 34.02, + "learning_rate": 1.6944444444444446e-05, + "loss": 0.2193, + "step": 6950 + }, + { + "epoch": 34.02, + "learning_rate": 1.688888888888889e-05, + "loss": 0.0004, + "step": 6960 + }, + { + "epoch": 34.02, + "learning_rate": 1.6833333333333334e-05, + "loss": 0.0158, + "step": 6970 + }, + { + "epoch": 34.02, + "learning_rate": 1.677777777777778e-05, + "loss": 0.1012, + "step": 6980 + }, + { + "epoch": 34.02, + "learning_rate": 1.6722222222222222e-05, + "loss": 0.0071, + "step": 6990 + }, + { + "epoch": 34.02, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.2142, + "step": 7000 + }, + { + "epoch": 34.02, + "eval_accuracy": 0.8958333333333334, + "eval_loss": 0.6142041683197021, + "eval_runtime": 12.1703, + "eval_samples_per_second": 3.944, + "eval_steps_per_second": 1.972, + "step": 7000 + }, + { + "epoch": 35.0, + "learning_rate": 1.661111111111111e-05, + "loss": 0.0001, + "step": 7010 + }, + { + "epoch": 35.0, + "learning_rate": 1.655555555555556e-05, + "loss": 0.0005, + "step": 7020 + }, + { + "epoch": 35.0, + "learning_rate": 1.65e-05, + "loss": 0.0018, + "step": 7030 + }, + { + "epoch": 35.0, + "learning_rate": 1.6444444444444447e-05, + "loss": 0.4747, + "step": 7040 + }, + { + "epoch": 35.01, + "learning_rate": 1.638888888888889e-05, + "loss": 0.1249, + "step": 7050 + }, + { + "epoch": 35.01, + "learning_rate": 1.6333333333333335e-05, + "loss": 0.6736, + "step": 7060 + }, + { + "epoch": 35.01, + "learning_rate": 1.6277777777777777e-05, + "loss": 0.3599, + "step": 7070 + }, + { + "epoch": 35.01, + "learning_rate": 1.6222222222222223e-05, + "loss": 0.0593, + "step": 7080 + }, + { + "epoch": 35.01, + "learning_rate": 1.6166666666666665e-05, + "loss": 0.0015, + "step": 7090 + }, + { + "epoch": 35.01, + "learning_rate": 1.6111111111111115e-05, + "loss": 0.3011, + "step": 7100 + }, + { + "epoch": 35.01, + "learning_rate": 1.6055555555555557e-05, + "loss": 0.0006, + "step": 7110 + }, + { + "epoch": 35.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.0578, + "step": 7120 + }, + { + "epoch": 35.01, + "learning_rate": 1.5944444444444445e-05, + "loss": 0.3444, + "step": 7130 + }, + { + "epoch": 35.01, + "learning_rate": 1.588888888888889e-05, + "loss": 0.1937, + "step": 7140 + }, + { + "epoch": 35.02, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.3914, + "step": 7150 + }, + { + "epoch": 35.02, + "learning_rate": 1.577777777777778e-05, + "loss": 0.1062, + "step": 7160 + }, + { + "epoch": 35.02, + "learning_rate": 1.5722222222222225e-05, + "loss": 0.0034, + "step": 7170 + }, + { + "epoch": 35.02, + "learning_rate": 1.5666666666666667e-05, + "loss": 0.4035, + "step": 7180 + }, + { + "epoch": 35.02, + "learning_rate": 1.5611111111111113e-05, + "loss": 0.4617, + "step": 7190 + }, + { + "epoch": 35.02, + "learning_rate": 1.5555555555555555e-05, + "loss": 0.0008, + "step": 7200 + }, + { + "epoch": 35.02, + "eval_accuracy": 0.875, + "eval_loss": 0.6010245680809021, + "eval_runtime": 12.549, + "eval_samples_per_second": 3.825, + "eval_steps_per_second": 1.913, + "step": 7200 + }, + { + "epoch": 36.0, + "learning_rate": 1.55e-05, + "loss": 0.0011, + "step": 7210 + }, + { + "epoch": 36.0, + "learning_rate": 1.5444444444444446e-05, + "loss": 0.0003, + "step": 7220 + }, + { + "epoch": 36.0, + "learning_rate": 1.538888888888889e-05, + "loss": 0.2107, + "step": 7230 + }, + { + "epoch": 36.0, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.3339, + "step": 7240 + }, + { + "epoch": 36.01, + "learning_rate": 1.527777777777778e-05, + "loss": 0.2039, + "step": 7250 + }, + { + "epoch": 36.01, + "learning_rate": 1.5222222222222224e-05, + "loss": 0.5722, + "step": 7260 + }, + { + "epoch": 36.01, + "learning_rate": 1.5166666666666668e-05, + "loss": 0.0008, + "step": 7270 + }, + { + "epoch": 36.01, + "learning_rate": 1.5111111111111112e-05, + "loss": 0.0011, + "step": 7280 + }, + { + "epoch": 36.01, + "learning_rate": 1.5055555555555556e-05, + "loss": 0.0006, + "step": 7290 + }, + { + "epoch": 36.01, + "learning_rate": 1.5e-05, + "loss": 0.3744, + "step": 7300 + }, + { + "epoch": 36.01, + "learning_rate": 1.4944444444444444e-05, + "loss": 0.0012, + "step": 7310 + }, + { + "epoch": 36.01, + "learning_rate": 1.4888888888888888e-05, + "loss": 0.3035, + "step": 7320 + }, + { + "epoch": 36.01, + "learning_rate": 1.4833333333333336e-05, + "loss": 0.1144, + "step": 7330 + }, + { + "epoch": 36.01, + "learning_rate": 1.477777777777778e-05, + "loss": 0.0272, + "step": 7340 + }, + { + "epoch": 36.02, + "learning_rate": 1.4722222222222224e-05, + "loss": 0.0005, + "step": 7350 + }, + { + "epoch": 36.02, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.0005, + "step": 7360 + }, + { + "epoch": 36.02, + "learning_rate": 1.4611111111111112e-05, + "loss": 0.0052, + "step": 7370 + }, + { + "epoch": 36.02, + "learning_rate": 1.4555555555555556e-05, + "loss": 0.0043, + "step": 7380 + }, + { + "epoch": 36.02, + "learning_rate": 1.45e-05, + "loss": 0.0001, + "step": 7390 + }, + { + "epoch": 36.02, + "learning_rate": 1.4444444444444444e-05, + "loss": 0.0005, + "step": 7400 + }, + { + "epoch": 36.02, + "eval_accuracy": 0.875, + "eval_loss": 0.6238037943840027, + "eval_runtime": 12.0891, + "eval_samples_per_second": 3.971, + "eval_steps_per_second": 1.985, + "step": 7400 + }, + { + "epoch": 37.0, + "learning_rate": 1.438888888888889e-05, + "loss": 0.2606, + "step": 7410 + }, + { + "epoch": 37.0, + "learning_rate": 1.4333333333333334e-05, + "loss": 0.0002, + "step": 7420 + }, + { + "epoch": 37.0, + "learning_rate": 1.427777777777778e-05, + "loss": 0.0001, + "step": 7430 + }, + { + "epoch": 37.0, + "learning_rate": 1.4222222222222224e-05, + "loss": 0.3046, + "step": 7440 + }, + { + "epoch": 37.01, + "learning_rate": 1.4166666666666668e-05, + "loss": 0.0002, + "step": 7450 + }, + { + "epoch": 37.01, + "learning_rate": 1.4111111111111112e-05, + "loss": 0.0008, + "step": 7460 + }, + { + "epoch": 37.01, + "learning_rate": 1.4055555555555556e-05, + "loss": 0.3541, + "step": 7470 + }, + { + "epoch": 37.01, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.0001, + "step": 7480 + }, + { + "epoch": 37.01, + "learning_rate": 1.3944444444444446e-05, + "loss": 0.0113, + "step": 7490 + }, + { + "epoch": 37.01, + "learning_rate": 1.388888888888889e-05, + "loss": 0.2522, + "step": 7500 + }, + { + "epoch": 37.01, + "learning_rate": 1.3833333333333334e-05, + "loss": 0.4705, + "step": 7510 + }, + { + "epoch": 37.01, + "learning_rate": 1.3777777777777778e-05, + "loss": 0.0001, + "step": 7520 + }, + { + "epoch": 37.01, + "learning_rate": 1.3722222222222222e-05, + "loss": 0.0124, + "step": 7530 + }, + { + "epoch": 37.01, + "learning_rate": 1.3666666666666666e-05, + "loss": 0.3502, + "step": 7540 + }, + { + "epoch": 37.02, + "learning_rate": 1.3611111111111111e-05, + "loss": 0.0029, + "step": 7550 + }, + { + "epoch": 37.02, + "learning_rate": 1.3555555555555557e-05, + "loss": 0.3978, + "step": 7560 + }, + { + "epoch": 37.02, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.0001, + "step": 7570 + }, + { + "epoch": 37.02, + "learning_rate": 1.3444444444444445e-05, + "loss": 0.3307, + "step": 7580 + }, + { + "epoch": 37.02, + "learning_rate": 1.338888888888889e-05, + "loss": 0.0001, + "step": 7590 + }, + { + "epoch": 37.02, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.0002, + "step": 7600 + }, + { + "epoch": 37.02, + "eval_accuracy": 0.875, + "eval_loss": 0.5965681076049805, + "eval_runtime": 12.7074, + "eval_samples_per_second": 3.777, + "eval_steps_per_second": 1.889, + "step": 7600 + }, + { + "epoch": 38.0, + "learning_rate": 1.3277777777777777e-05, + "loss": 0.1374, + "step": 7610 + }, + { + "epoch": 38.0, + "learning_rate": 1.3222222222222221e-05, + "loss": 0.0001, + "step": 7620 + }, + { + "epoch": 38.0, + "learning_rate": 1.3166666666666665e-05, + "loss": 0.2238, + "step": 7630 + }, + { + "epoch": 38.0, + "learning_rate": 1.3111111111111113e-05, + "loss": 0.0002, + "step": 7640 + }, + { + "epoch": 38.01, + "learning_rate": 1.3055555555555557e-05, + "loss": 0.0003, + "step": 7650 + }, + { + "epoch": 38.01, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.2966, + "step": 7660 + }, + { + "epoch": 38.01, + "learning_rate": 1.2944444444444445e-05, + "loss": 0.0001, + "step": 7670 + }, + { + "epoch": 38.01, + "learning_rate": 1.2888888888888889e-05, + "loss": 0.2405, + "step": 7680 + }, + { + "epoch": 38.01, + "learning_rate": 1.2833333333333333e-05, + "loss": 0.0002, + "step": 7690 + }, + { + "epoch": 38.01, + "learning_rate": 1.2777777777777777e-05, + "loss": 0.2149, + "step": 7700 + }, + { + "epoch": 38.01, + "learning_rate": 1.2722222222222221e-05, + "loss": 0.006, + "step": 7710 + }, + { + "epoch": 38.01, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.0003, + "step": 7720 + }, + { + "epoch": 38.01, + "learning_rate": 1.2611111111111113e-05, + "loss": 0.3521, + "step": 7730 + }, + { + "epoch": 38.01, + "learning_rate": 1.2555555555555557e-05, + "loss": 0.2337, + "step": 7740 + }, + { + "epoch": 38.02, + "learning_rate": 1.25e-05, + "loss": 0.0003, + "step": 7750 + }, + { + "epoch": 38.02, + "learning_rate": 1.2444444444444445e-05, + "loss": 0.0011, + "step": 7760 + }, + { + "epoch": 38.02, + "learning_rate": 1.238888888888889e-05, + "loss": 0.3938, + "step": 7770 + }, + { + "epoch": 38.02, + "learning_rate": 1.2333333333333334e-05, + "loss": 0.0013, + "step": 7780 + }, + { + "epoch": 38.02, + "learning_rate": 1.2277777777777778e-05, + "loss": 0.3642, + "step": 7790 + }, + { + "epoch": 38.02, + "learning_rate": 1.2222222222222222e-05, + "loss": 0.5, + "step": 7800 + }, + { + "epoch": 38.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.6370941996574402, + "eval_runtime": 12.1176, + "eval_samples_per_second": 3.961, + "eval_steps_per_second": 1.981, + "step": 7800 + }, + { + "epoch": 39.0, + "learning_rate": 1.2166666666666668e-05, + "loss": 0.1917, + "step": 7810 + }, + { + "epoch": 39.0, + "learning_rate": 1.2111111111111112e-05, + "loss": 0.0005, + "step": 7820 + }, + { + "epoch": 39.0, + "learning_rate": 1.2055555555555556e-05, + "loss": 0.0008, + "step": 7830 + }, + { + "epoch": 39.0, + "learning_rate": 1.2e-05, + "loss": 0.0003, + "step": 7840 + }, + { + "epoch": 39.01, + "learning_rate": 1.1944444444444446e-05, + "loss": 0.3156, + "step": 7850 + }, + { + "epoch": 39.01, + "learning_rate": 1.188888888888889e-05, + "loss": 0.2985, + "step": 7860 + }, + { + "epoch": 39.01, + "learning_rate": 1.1833333333333334e-05, + "loss": 0.1472, + "step": 7870 + }, + { + "epoch": 39.01, + "learning_rate": 1.1777777777777778e-05, + "loss": 0.0003, + "step": 7880 + }, + { + "epoch": 39.01, + "learning_rate": 1.1722222222222224e-05, + "loss": 0.0004, + "step": 7890 + }, + { + "epoch": 39.01, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.0001, + "step": 7900 + }, + { + "epoch": 39.01, + "learning_rate": 1.1611111111111112e-05, + "loss": 0.0015, + "step": 7910 + }, + { + "epoch": 39.01, + "learning_rate": 1.1555555555555556e-05, + "loss": 0.0007, + "step": 7920 + }, + { + "epoch": 39.01, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.177, + "step": 7930 + }, + { + "epoch": 39.01, + "learning_rate": 1.1444444444444446e-05, + "loss": 0.4072, + "step": 7940 + }, + { + "epoch": 39.02, + "learning_rate": 1.138888888888889e-05, + "loss": 0.1378, + "step": 7950 + }, + { + "epoch": 39.02, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.0117, + "step": 7960 + }, + { + "epoch": 39.02, + "learning_rate": 1.127777777777778e-05, + "loss": 0.3177, + "step": 7970 + }, + { + "epoch": 39.02, + "learning_rate": 1.1222222222222224e-05, + "loss": 0.0002, + "step": 7980 + }, + { + "epoch": 39.02, + "learning_rate": 1.1166666666666668e-05, + "loss": 0.2967, + "step": 7990 + }, + { + "epoch": 39.02, + "learning_rate": 1.1111111111111112e-05, + "loss": 0.0004, + "step": 8000 + }, + { + "epoch": 39.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.8514948487281799, + "eval_runtime": 12.3588, + "eval_samples_per_second": 3.884, + "eval_steps_per_second": 1.942, + "step": 8000 + }, + { + "epoch": 40.0, + "learning_rate": 1.1055555555555556e-05, + "loss": 0.4713, + "step": 8010 + }, + { + "epoch": 40.0, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.0004, + "step": 8020 + }, + { + "epoch": 40.0, + "learning_rate": 1.0944444444444445e-05, + "loss": 0.0001, + "step": 8030 + }, + { + "epoch": 40.0, + "learning_rate": 1.088888888888889e-05, + "loss": 0.2562, + "step": 8040 + }, + { + "epoch": 40.01, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.002, + "step": 8050 + }, + { + "epoch": 40.01, + "learning_rate": 1.0777777777777778e-05, + "loss": 0.0001, + "step": 8060 + }, + { + "epoch": 40.01, + "learning_rate": 1.0722222222222222e-05, + "loss": 0.0001, + "step": 8070 + }, + { + "epoch": 40.01, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.0004, + "step": 8080 + }, + { + "epoch": 40.01, + "learning_rate": 1.0611111111111111e-05, + "loss": 0.0001, + "step": 8090 + }, + { + "epoch": 40.01, + "learning_rate": 1.0555555555555555e-05, + "loss": 0.0004, + "step": 8100 + }, + { + "epoch": 40.01, + "learning_rate": 1.05e-05, + "loss": 0.0003, + "step": 8110 + }, + { + "epoch": 40.01, + "learning_rate": 1.0444444444444445e-05, + "loss": 0.2269, + "step": 8120 + }, + { + "epoch": 40.01, + "learning_rate": 1.038888888888889e-05, + "loss": 0.3188, + "step": 8130 + }, + { + "epoch": 40.01, + "learning_rate": 1.0333333333333333e-05, + "loss": 0.5858, + "step": 8140 + }, + { + "epoch": 40.02, + "learning_rate": 1.0277777777777777e-05, + "loss": 0.0033, + "step": 8150 + }, + { + "epoch": 40.02, + "learning_rate": 1.0222222222222223e-05, + "loss": 0.2789, + "step": 8160 + }, + { + "epoch": 40.02, + "learning_rate": 1.0166666666666667e-05, + "loss": 0.0003, + "step": 8170 + }, + { + "epoch": 40.02, + "learning_rate": 1.0111111111111111e-05, + "loss": 0.0006, + "step": 8180 + }, + { + "epoch": 40.02, + "learning_rate": 1.0055555555555555e-05, + "loss": 0.0002, + "step": 8190 + }, + { + "epoch": 40.02, + "learning_rate": 1e-05, + "loss": 0.0001, + "step": 8200 + }, + { + "epoch": 40.02, + "eval_accuracy": 0.875, + "eval_loss": 0.5120431780815125, + "eval_runtime": 11.76, + "eval_samples_per_second": 4.082, + "eval_steps_per_second": 2.041, + "step": 8200 + }, + { + "epoch": 41.0, + "learning_rate": 9.944444444444445e-06, + "loss": 0.0203, + "step": 8210 + }, + { + "epoch": 41.0, + "learning_rate": 9.888888888888889e-06, + "loss": 0.2243, + "step": 8220 + }, + { + "epoch": 41.0, + "learning_rate": 9.833333333333333e-06, + "loss": 0.088, + "step": 8230 + }, + { + "epoch": 41.0, + "learning_rate": 9.777777777777779e-06, + "loss": 0.0001, + "step": 8240 + }, + { + "epoch": 41.01, + "learning_rate": 9.722222222222223e-06, + "loss": 0.0003, + "step": 8250 + }, + { + "epoch": 41.01, + "learning_rate": 9.666666666666667e-06, + "loss": 0.0002, + "step": 8260 + }, + { + "epoch": 41.01, + "learning_rate": 9.61111111111111e-06, + "loss": 0.0001, + "step": 8270 + }, + { + "epoch": 41.01, + "learning_rate": 9.555555555555556e-06, + "loss": 0.0001, + "step": 8280 + }, + { + "epoch": 41.01, + "learning_rate": 9.5e-06, + "loss": 0.0005, + "step": 8290 + }, + { + "epoch": 41.01, + "learning_rate": 9.444444444444445e-06, + "loss": 0.0111, + "step": 8300 + }, + { + "epoch": 41.01, + "learning_rate": 9.388888888888889e-06, + "loss": 0.0001, + "step": 8310 + }, + { + "epoch": 41.01, + "learning_rate": 9.333333333333334e-06, + "loss": 0.0001, + "step": 8320 + }, + { + "epoch": 41.01, + "learning_rate": 9.277777777777778e-06, + "loss": 0.0001, + "step": 8330 + }, + { + "epoch": 41.01, + "learning_rate": 9.222222222222222e-06, + "loss": 0.2168, + "step": 8340 + }, + { + "epoch": 41.02, + "learning_rate": 9.166666666666666e-06, + "loss": 0.0002, + "step": 8350 + }, + { + "epoch": 41.02, + "learning_rate": 9.111111111111112e-06, + "loss": 0.0057, + "step": 8360 + }, + { + "epoch": 41.02, + "learning_rate": 9.055555555555556e-06, + "loss": 0.4063, + "step": 8370 + }, + { + "epoch": 41.02, + "learning_rate": 9e-06, + "loss": 0.0005, + "step": 8380 + }, + { + "epoch": 41.02, + "learning_rate": 8.944444444444444e-06, + "loss": 0.0, + "step": 8390 + }, + { + "epoch": 41.02, + "learning_rate": 8.88888888888889e-06, + "loss": 0.0069, + "step": 8400 + }, + { + "epoch": 41.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.8686442375183105, + "eval_runtime": 12.3031, + "eval_samples_per_second": 3.901, + "eval_steps_per_second": 1.951, + "step": 8400 + }, + { + "epoch": 42.0, + "learning_rate": 8.833333333333334e-06, + "loss": 0.038, + "step": 8410 + }, + { + "epoch": 42.0, + "learning_rate": 8.777777777777778e-06, + "loss": 0.0004, + "step": 8420 + }, + { + "epoch": 42.0, + "learning_rate": 8.722222222222224e-06, + "loss": 0.4298, + "step": 8430 + }, + { + "epoch": 42.0, + "learning_rate": 8.666666666666668e-06, + "loss": 0.0001, + "step": 8440 + }, + { + "epoch": 42.01, + "learning_rate": 8.611111111111112e-06, + "loss": 0.0002, + "step": 8450 + }, + { + "epoch": 42.01, + "learning_rate": 8.555555555555556e-06, + "loss": 0.0001, + "step": 8460 + }, + { + "epoch": 42.01, + "learning_rate": 8.500000000000002e-06, + "loss": 0.0503, + "step": 8470 + }, + { + "epoch": 42.01, + "learning_rate": 8.444444444444446e-06, + "loss": 0.1871, + "step": 8480 + }, + { + "epoch": 42.01, + "learning_rate": 8.38888888888889e-06, + "loss": 0.4466, + "step": 8490 + }, + { + "epoch": 42.01, + "learning_rate": 8.333333333333334e-06, + "loss": 0.0001, + "step": 8500 + }, + { + "epoch": 42.01, + "learning_rate": 8.27777777777778e-06, + "loss": 0.0087, + "step": 8510 + }, + { + "epoch": 42.01, + "learning_rate": 8.222222222222223e-06, + "loss": 0.6041, + "step": 8520 + }, + { + "epoch": 42.01, + "learning_rate": 8.166666666666668e-06, + "loss": 0.2826, + "step": 8530 + }, + { + "epoch": 42.01, + "learning_rate": 8.111111111111112e-06, + "loss": 0.0001, + "step": 8540 + }, + { + "epoch": 42.02, + "learning_rate": 8.055555555555557e-06, + "loss": 0.0001, + "step": 8550 + }, + { + "epoch": 42.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0074, + "step": 8560 + }, + { + "epoch": 42.02, + "learning_rate": 7.944444444444445e-06, + "loss": 0.0006, + "step": 8570 + }, + { + "epoch": 42.02, + "learning_rate": 7.88888888888889e-06, + "loss": 0.0001, + "step": 8580 + }, + { + "epoch": 42.02, + "learning_rate": 7.833333333333333e-06, + "loss": 0.0626, + "step": 8590 + }, + { + "epoch": 42.02, + "learning_rate": 7.777777777777777e-06, + "loss": 0.0002, + "step": 8600 + }, + { + "epoch": 42.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.8800749182701111, + "eval_runtime": 12.0228, + "eval_samples_per_second": 3.992, + "eval_steps_per_second": 1.996, + "step": 8600 + }, + { + "epoch": 43.0, + "learning_rate": 7.722222222222223e-06, + "loss": 0.0001, + "step": 8610 + }, + { + "epoch": 43.0, + "learning_rate": 7.666666666666667e-06, + "loss": 0.0879, + "step": 8620 + }, + { + "epoch": 43.0, + "learning_rate": 7.611111111111112e-06, + "loss": 0.0004, + "step": 8630 + }, + { + "epoch": 43.0, + "learning_rate": 7.555555555555556e-06, + "loss": 0.0001, + "step": 8640 + }, + { + "epoch": 43.01, + "learning_rate": 7.5e-06, + "loss": 0.0021, + "step": 8650 + }, + { + "epoch": 43.01, + "learning_rate": 7.444444444444444e-06, + "loss": 0.0, + "step": 8660 + }, + { + "epoch": 43.01, + "learning_rate": 7.38888888888889e-06, + "loss": 0.5426, + "step": 8670 + }, + { + "epoch": 43.01, + "learning_rate": 7.333333333333334e-06, + "loss": 0.5154, + "step": 8680 + }, + { + "epoch": 43.01, + "learning_rate": 7.277777777777778e-06, + "loss": 0.0004, + "step": 8690 + }, + { + "epoch": 43.01, + "learning_rate": 7.222222222222222e-06, + "loss": 0.0003, + "step": 8700 + }, + { + "epoch": 43.01, + "learning_rate": 7.166666666666667e-06, + "loss": 0.2055, + "step": 8710 + }, + { + "epoch": 43.01, + "learning_rate": 7.111111111111112e-06, + "loss": 0.0274, + "step": 8720 + }, + { + "epoch": 43.01, + "learning_rate": 7.055555555555556e-06, + "loss": 0.0001, + "step": 8730 + }, + { + "epoch": 43.01, + "learning_rate": 7.000000000000001e-06, + "loss": 0.0001, + "step": 8740 + }, + { + "epoch": 43.02, + "learning_rate": 6.944444444444445e-06, + "loss": 0.0001, + "step": 8750 + }, + { + "epoch": 43.02, + "learning_rate": 6.888888888888889e-06, + "loss": 0.0001, + "step": 8760 + }, + { + "epoch": 43.02, + "learning_rate": 6.833333333333333e-06, + "loss": 0.0001, + "step": 8770 + }, + { + "epoch": 43.02, + "learning_rate": 6.777777777777779e-06, + "loss": 0.1058, + "step": 8780 + }, + { + "epoch": 43.02, + "learning_rate": 6.722222222222223e-06, + "loss": 0.0002, + "step": 8790 + }, + { + "epoch": 43.02, + "learning_rate": 6.666666666666667e-06, + "loss": 0.0001, + "step": 8800 + }, + { + "epoch": 43.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.8995665907859802, + "eval_runtime": 12.4764, + "eval_samples_per_second": 3.847, + "eval_steps_per_second": 1.924, + "step": 8800 + }, + { + "epoch": 44.0, + "learning_rate": 6.611111111111111e-06, + "loss": 0.0001, + "step": 8810 + }, + { + "epoch": 44.0, + "learning_rate": 6.555555555555556e-06, + "loss": 0.2998, + "step": 8820 + }, + { + "epoch": 44.0, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0001, + "step": 8830 + }, + { + "epoch": 44.0, + "learning_rate": 6.4444444444444445e-06, + "loss": 0.0002, + "step": 8840 + }, + { + "epoch": 44.01, + "learning_rate": 6.3888888888888885e-06, + "loss": 0.0002, + "step": 8850 + }, + { + "epoch": 44.01, + "learning_rate": 6.333333333333334e-06, + "loss": 0.0001, + "step": 8860 + }, + { + "epoch": 44.01, + "learning_rate": 6.277777777777778e-06, + "loss": 0.4396, + "step": 8870 + }, + { + "epoch": 44.01, + "learning_rate": 6.222222222222222e-06, + "loss": 0.002, + "step": 8880 + }, + { + "epoch": 44.01, + "learning_rate": 6.166666666666667e-06, + "loss": 0.0002, + "step": 8890 + }, + { + "epoch": 44.01, + "learning_rate": 6.111111111111111e-06, + "loss": 0.0001, + "step": 8900 + }, + { + "epoch": 44.01, + "learning_rate": 6.055555555555556e-06, + "loss": 0.0433, + "step": 8910 + }, + { + "epoch": 44.01, + "learning_rate": 6e-06, + "loss": 0.0002, + "step": 8920 + }, + { + "epoch": 44.01, + "learning_rate": 5.944444444444445e-06, + "loss": 0.0001, + "step": 8930 + }, + { + "epoch": 44.01, + "learning_rate": 5.888888888888889e-06, + "loss": 0.0004, + "step": 8940 + }, + { + "epoch": 44.02, + "learning_rate": 5.833333333333334e-06, + "loss": 0.2249, + "step": 8950 + }, + { + "epoch": 44.02, + "learning_rate": 5.777777777777778e-06, + "loss": 0.0001, + "step": 8960 + }, + { + "epoch": 44.02, + "learning_rate": 5.722222222222223e-06, + "loss": 0.0001, + "step": 8970 + }, + { + "epoch": 44.02, + "learning_rate": 5.666666666666667e-06, + "loss": 0.0, + "step": 8980 + }, + { + "epoch": 44.02, + "learning_rate": 5.611111111111112e-06, + "loss": 0.0004, + "step": 8990 + }, + { + "epoch": 44.02, + "learning_rate": 5.555555555555556e-06, + "loss": 0.0067, + "step": 9000 + }, + { + "epoch": 44.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.7670348286628723, + "eval_runtime": 11.9175, + "eval_samples_per_second": 4.028, + "eval_steps_per_second": 2.014, + "step": 9000 + }, + { + "epoch": 45.0, + "learning_rate": 5.500000000000001e-06, + "loss": 0.0001, + "step": 9010 + }, + { + "epoch": 45.0, + "learning_rate": 5.444444444444445e-06, + "loss": 0.3147, + "step": 9020 + }, + { + "epoch": 45.0, + "learning_rate": 5.388888888888889e-06, + "loss": 0.4242, + "step": 9030 + }, + { + "epoch": 45.0, + "learning_rate": 5.333333333333334e-06, + "loss": 0.0001, + "step": 9040 + }, + { + "epoch": 45.01, + "learning_rate": 5.277777777777778e-06, + "loss": 0.0403, + "step": 9050 + }, + { + "epoch": 45.01, + "learning_rate": 5.2222222222222226e-06, + "loss": 0.0006, + "step": 9060 + }, + { + "epoch": 45.01, + "learning_rate": 5.166666666666667e-06, + "loss": 0.0001, + "step": 9070 + }, + { + "epoch": 45.01, + "learning_rate": 5.1111111111111115e-06, + "loss": 0.0145, + "step": 9080 + }, + { + "epoch": 45.01, + "learning_rate": 5.0555555555555555e-06, + "loss": 0.0001, + "step": 9090 + }, + { + "epoch": 45.01, + "learning_rate": 5e-06, + "loss": 0.2499, + "step": 9100 + }, + { + "epoch": 45.01, + "learning_rate": 4.9444444444444444e-06, + "loss": 0.4483, + "step": 9110 + }, + { + "epoch": 45.01, + "learning_rate": 4.888888888888889e-06, + "loss": 0.0001, + "step": 9120 + }, + { + "epoch": 45.01, + "learning_rate": 4.833333333333333e-06, + "loss": 0.1297, + "step": 9130 + }, + { + "epoch": 45.01, + "learning_rate": 4.777777777777778e-06, + "loss": 0.0002, + "step": 9140 + }, + { + "epoch": 45.02, + "learning_rate": 4.722222222222222e-06, + "loss": 0.0001, + "step": 9150 + }, + { + "epoch": 45.02, + "learning_rate": 4.666666666666667e-06, + "loss": 0.3679, + "step": 9160 + }, + { + "epoch": 45.02, + "learning_rate": 4.611111111111111e-06, + "loss": 0.0001, + "step": 9170 + }, + { + "epoch": 45.02, + "learning_rate": 4.555555555555556e-06, + "loss": 0.003, + "step": 9180 + }, + { + "epoch": 45.02, + "learning_rate": 4.5e-06, + "loss": 0.0014, + "step": 9190 + }, + { + "epoch": 45.02, + "learning_rate": 4.444444444444445e-06, + "loss": 0.0001, + "step": 9200 + }, + { + "epoch": 45.02, + "eval_accuracy": 0.8333333333333334, + "eval_loss": 0.9936349987983704, + "eval_runtime": 12.6658, + "eval_samples_per_second": 3.79, + "eval_steps_per_second": 1.895, + "step": 9200 + }, + { + "epoch": 46.0, + "learning_rate": 4.388888888888889e-06, + "loss": 0.0001, + "step": 9210 + }, + { + "epoch": 46.0, + "learning_rate": 4.333333333333334e-06, + "loss": 0.0004, + "step": 9220 + }, + { + "epoch": 46.0, + "learning_rate": 4.277777777777778e-06, + "loss": 0.0003, + "step": 9230 + }, + { + "epoch": 46.0, + "learning_rate": 4.222222222222223e-06, + "loss": 0.0942, + "step": 9240 + }, + { + "epoch": 46.01, + "learning_rate": 4.166666666666667e-06, + "loss": 0.0001, + "step": 9250 + }, + { + "epoch": 46.01, + "learning_rate": 4.111111111111112e-06, + "loss": 0.0001, + "step": 9260 + }, + { + "epoch": 46.01, + "learning_rate": 4.055555555555556e-06, + "loss": 0.0001, + "step": 9270 + }, + { + "epoch": 46.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0002, + "step": 9280 + }, + { + "epoch": 46.01, + "learning_rate": 3.944444444444445e-06, + "loss": 0.246, + "step": 9290 + }, + { + "epoch": 46.01, + "learning_rate": 3.888888888888889e-06, + "loss": 0.0005, + "step": 9300 + }, + { + "epoch": 46.01, + "learning_rate": 3.833333333333334e-06, + "loss": 0.0001, + "step": 9310 + }, + { + "epoch": 46.01, + "learning_rate": 3.777777777777778e-06, + "loss": 0.3834, + "step": 9320 + }, + { + "epoch": 46.01, + "learning_rate": 3.722222222222222e-06, + "loss": 0.0001, + "step": 9330 + }, + { + "epoch": 46.01, + "learning_rate": 3.666666666666667e-06, + "loss": 0.5107, + "step": 9340 + }, + { + "epoch": 46.02, + "learning_rate": 3.611111111111111e-06, + "loss": 0.0701, + "step": 9350 + }, + { + "epoch": 46.02, + "learning_rate": 3.555555555555556e-06, + "loss": 0.0001, + "step": 9360 + }, + { + "epoch": 46.02, + "learning_rate": 3.5000000000000004e-06, + "loss": 0.0003, + "step": 9370 + }, + { + "epoch": 46.02, + "learning_rate": 3.4444444444444444e-06, + "loss": 0.0002, + "step": 9380 + }, + { + "epoch": 46.02, + "learning_rate": 3.3888888888888893e-06, + "loss": 0.003, + "step": 9390 + }, + { + "epoch": 46.02, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.0638, + "step": 9400 + }, + { + "epoch": 46.02, + "eval_accuracy": 0.875, + "eval_loss": 0.6616021990776062, + "eval_runtime": 12.2174, + "eval_samples_per_second": 3.929, + "eval_steps_per_second": 1.964, + "step": 9400 + }, + { + "epoch": 47.0, + "learning_rate": 3.277777777777778e-06, + "loss": 0.0001, + "step": 9410 + }, + { + "epoch": 47.0, + "learning_rate": 3.2222222222222222e-06, + "loss": 0.0002, + "step": 9420 + }, + { + "epoch": 47.0, + "learning_rate": 3.166666666666667e-06, + "loss": 0.0002, + "step": 9430 + }, + { + "epoch": 47.0, + "learning_rate": 3.111111111111111e-06, + "loss": 0.5827, + "step": 9440 + }, + { + "epoch": 47.01, + "learning_rate": 3.0555555555555556e-06, + "loss": 0.0002, + "step": 9450 + }, + { + "epoch": 47.01, + "learning_rate": 3e-06, + "loss": 0.0001, + "step": 9460 + }, + { + "epoch": 47.01, + "learning_rate": 2.9444444444444445e-06, + "loss": 0.0264, + "step": 9470 + }, + { + "epoch": 47.01, + "learning_rate": 2.888888888888889e-06, + "loss": 0.0001, + "step": 9480 + }, + { + "epoch": 47.01, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.0001, + "step": 9490 + }, + { + "epoch": 47.01, + "learning_rate": 2.777777777777778e-06, + "loss": 0.0001, + "step": 9500 + }, + { + "epoch": 47.01, + "learning_rate": 2.7222222222222224e-06, + "loss": 0.0391, + "step": 9510 + }, + { + "epoch": 47.01, + "learning_rate": 2.666666666666667e-06, + "loss": 0.0001, + "step": 9520 + }, + { + "epoch": 47.01, + "learning_rate": 2.6111111111111113e-06, + "loss": 0.0001, + "step": 9530 + }, + { + "epoch": 47.01, + "learning_rate": 2.5555555555555557e-06, + "loss": 0.3777, + "step": 9540 + }, + { + "epoch": 47.02, + "learning_rate": 2.5e-06, + "loss": 0.0001, + "step": 9550 + }, + { + "epoch": 47.02, + "learning_rate": 2.4444444444444447e-06, + "loss": 0.0001, + "step": 9560 + }, + { + "epoch": 47.02, + "learning_rate": 2.388888888888889e-06, + "loss": 0.0002, + "step": 9570 + }, + { + "epoch": 47.02, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.085, + "step": 9580 + }, + { + "epoch": 47.02, + "learning_rate": 2.277777777777778e-06, + "loss": 0.0001, + "step": 9590 + }, + { + "epoch": 47.02, + "learning_rate": 2.2222222222222225e-06, + "loss": 0.0001, + "step": 9600 + }, + { + "epoch": 47.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.7978042960166931, + "eval_runtime": 12.8106, + "eval_samples_per_second": 3.747, + "eval_steps_per_second": 1.873, + "step": 9600 + }, + { + "epoch": 48.0, + "learning_rate": 2.166666666666667e-06, + "loss": 0.599, + "step": 9610 + }, + { + "epoch": 48.0, + "learning_rate": 2.1111111111111114e-06, + "loss": 0.0017, + "step": 9620 + }, + { + "epoch": 48.0, + "learning_rate": 2.055555555555556e-06, + "loss": 0.0003, + "step": 9630 + }, + { + "epoch": 48.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.0001, + "step": 9640 + }, + { + "epoch": 48.01, + "learning_rate": 1.9444444444444444e-06, + "loss": 0.0001, + "step": 9650 + }, + { + "epoch": 48.01, + "learning_rate": 1.888888888888889e-06, + "loss": 0.0001, + "step": 9660 + }, + { + "epoch": 48.01, + "learning_rate": 1.8333333333333335e-06, + "loss": 0.0001, + "step": 9670 + }, + { + "epoch": 48.01, + "learning_rate": 1.777777777777778e-06, + "loss": 0.2912, + "step": 9680 + }, + { + "epoch": 48.01, + "learning_rate": 1.7222222222222222e-06, + "loss": 0.0001, + "step": 9690 + }, + { + "epoch": 48.01, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.0001, + "step": 9700 + }, + { + "epoch": 48.01, + "learning_rate": 1.6111111111111111e-06, + "loss": 0.0001, + "step": 9710 + }, + { + "epoch": 48.01, + "learning_rate": 1.5555555555555556e-06, + "loss": 0.0193, + "step": 9720 + }, + { + "epoch": 48.01, + "learning_rate": 1.5e-06, + "loss": 0.0002, + "step": 9730 + }, + { + "epoch": 48.01, + "learning_rate": 1.4444444444444445e-06, + "loss": 0.0, + "step": 9740 + }, + { + "epoch": 48.02, + "learning_rate": 1.388888888888889e-06, + "loss": 0.3547, + "step": 9750 + }, + { + "epoch": 48.02, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.0001, + "step": 9760 + }, + { + "epoch": 48.02, + "learning_rate": 1.2777777777777779e-06, + "loss": 0.0001, + "step": 9770 + }, + { + "epoch": 48.02, + "learning_rate": 1.2222222222222223e-06, + "loss": 0.0001, + "step": 9780 + }, + { + "epoch": 48.02, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.0132, + "step": 9790 + }, + { + "epoch": 48.02, + "learning_rate": 1.1111111111111112e-06, + "loss": 0.0001, + "step": 9800 + }, + { + "epoch": 48.02, + "eval_accuracy": 0.8541666666666666, + "eval_loss": 0.6736838817596436, + "eval_runtime": 12.1656, + "eval_samples_per_second": 3.946, + "eval_steps_per_second": 1.973, + "step": 9800 + }, + { + "epoch": 49.0, + "learning_rate": 1.0555555555555557e-06, + "loss": 0.0001, + "step": 9810 + }, + { + "epoch": 49.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.0, + "step": 9820 + }, + { + "epoch": 49.0, + "learning_rate": 9.444444444444445e-07, + "loss": 0.0001, + "step": 9830 + }, + { + "epoch": 49.0, + "learning_rate": 8.88888888888889e-07, + "loss": 0.119, + "step": 9840 + }, + { + "epoch": 49.01, + "learning_rate": 8.333333333333333e-07, + "loss": 0.0002, + "step": 9850 + }, + { + "epoch": 49.01, + "learning_rate": 7.777777777777778e-07, + "loss": 0.0001, + "step": 9860 + }, + { + "epoch": 49.01, + "learning_rate": 7.222222222222222e-07, + "loss": 0.0028, + "step": 9870 + }, + { + "epoch": 49.01, + "learning_rate": 6.666666666666667e-07, + "loss": 0.0001, + "step": 9880 + }, + { + "epoch": 49.01, + "learning_rate": 6.111111111111112e-07, + "loss": 0.0001, + "step": 9890 + }, + { + "epoch": 49.01, + "learning_rate": 5.555555555555556e-07, + "loss": 0.366, + "step": 9900 + }, + { + "epoch": 49.01, + "learning_rate": 5.000000000000001e-07, + "loss": 0.4115, + "step": 9910 + }, + { + "epoch": 49.01, + "learning_rate": 4.444444444444445e-07, + "loss": 0.0, + "step": 9920 + }, + { + "epoch": 49.01, + "learning_rate": 3.888888888888889e-07, + "loss": 0.0001, + "step": 9930 + }, + { + "epoch": 49.01, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.1473, + "step": 9940 + }, + { + "epoch": 49.02, + "learning_rate": 2.777777777777778e-07, + "loss": 0.0, + "step": 9950 + }, + { + "epoch": 49.02, + "learning_rate": 2.2222222222222224e-07, + "loss": 0.0, + "step": 9960 + }, + { + "epoch": 49.02, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.0001, + "step": 9970 + }, + { + "epoch": 49.02, + "learning_rate": 1.1111111111111112e-07, + "loss": 0.0001, + "step": 9980 + }, + { + "epoch": 49.02, + "learning_rate": 5.555555555555556e-08, + "loss": 0.0049, + "step": 9990 + }, + { + "epoch": 49.02, + "learning_rate": 0.0, + "loss": 0.0001, + "step": 10000 + }, + { + "epoch": 49.02, + "eval_accuracy": 0.875, + "eval_loss": 0.5887275338172913, + "eval_runtime": 12.575, + "eval_samples_per_second": 3.817, + "eval_steps_per_second": 1.909, + "step": 10000 + }, + { + "epoch": 49.02, + "step": 10000, + "total_flos": 2.492129178943488e+19, + "train_loss": 0.3473788271739875, + "train_runtime": 6088.9476, + "train_samples_per_second": 3.285, + "train_steps_per_second": 1.642 + }, + { + "epoch": 49.02, + "eval_accuracy": 0.9166666666666666, + "eval_loss": 0.591888427734375, + "eval_runtime": 48.9212, + "eval_samples_per_second": 0.981, + "eval_steps_per_second": 0.491, + "step": 10000 + }, + { + "epoch": 49.02, + "eval_accuracy": 0.9375, + "eval_loss": 0.30889856815338135, + "eval_runtime": 12.2829, + "eval_samples_per_second": 3.908, + "eval_steps_per_second": 1.954, + "step": 10000 + }, + { + "epoch": 49.02, + "eval_accuracy": 0.965, + "eval_loss": 0.17441120743751526, + "eval_runtime": 62.983, + "eval_samples_per_second": 6.351, + "eval_steps_per_second": 3.175, + "step": 10000 + } + ], + "max_steps": 10000, + "num_train_epochs": 9223372036854775807, + "total_flos": 2.492129178943488e+19, + "trial_name": null, + "trial_params": null +}