diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,30930 @@ +{ + "best_metric": 4.741575241088867, + "best_model_checkpoint": "runs/checkpoint-12000", + "epoch": 603.6217303822938, + "eval_steps": 1000, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.12, + "learning_rate": 6.666666666666667e-08, + "loss": 9.3209, + "step": 20 + }, + { + "epoch": 0.24, + "learning_rate": 1.3333333333333334e-07, + "loss": 9.3182, + "step": 40 + }, + { + "epoch": 0.36, + "learning_rate": 2.0000000000000002e-07, + "loss": 9.3155, + "step": 60 + }, + { + "epoch": 0.48, + "learning_rate": 2.6666666666666667e-07, + "loss": 9.311, + "step": 80 + }, + { + "epoch": 0.6, + "learning_rate": 3.3333333333333335e-07, + "loss": 9.3029, + "step": 100 + }, + { + "epoch": 0.72, + "learning_rate": 4.0000000000000003e-07, + "loss": 9.293, + "step": 120 + }, + { + "epoch": 0.85, + "learning_rate": 4.666666666666667e-07, + "loss": 9.2809, + "step": 140 + }, + { + "epoch": 0.97, + "learning_rate": 5.333333333333333e-07, + "loss": 9.2666, + "step": 160 + }, + { + "epoch": 1.09, + "learning_rate": 6.000000000000001e-07, + "loss": 9.2481, + "step": 180 + }, + { + "epoch": 1.21, + "learning_rate": 6.666666666666667e-07, + "loss": 9.2245, + "step": 200 + }, + { + "epoch": 1.33, + "learning_rate": 7.333333333333333e-07, + "loss": 9.1971, + "step": 220 + }, + { + "epoch": 1.45, + "learning_rate": 8.000000000000001e-07, + "loss": 9.1603, + "step": 240 + }, + { + "epoch": 1.57, + "learning_rate": 8.666666666666667e-07, + "loss": 9.1233, + "step": 260 + }, + { + "epoch": 1.69, + "learning_rate": 9.333333333333334e-07, + "loss": 9.0849, + "step": 280 + }, + { + "epoch": 1.81, + "learning_rate": 1.0000000000000002e-06, + "loss": 9.045, + "step": 300 + }, + { + "epoch": 1.93, + "learning_rate": 1.0666666666666667e-06, + "loss": 9.0066, + "step": 320 + }, + { + "epoch": 2.05, + "learning_rate": 1.1333333333333334e-06, + "loss": 8.9707, + "step": 340 + }, + { + "epoch": 2.17, + "learning_rate": 1.2000000000000002e-06, + "loss": 8.9421, + "step": 360 + }, + { + "epoch": 2.29, + "learning_rate": 1.2666666666666667e-06, + "loss": 8.9107, + "step": 380 + }, + { + "epoch": 2.41, + "learning_rate": 1.3333333333333334e-06, + "loss": 8.8775, + "step": 400 + }, + { + "epoch": 2.54, + "learning_rate": 1.4000000000000001e-06, + "loss": 8.8519, + "step": 420 + }, + { + "epoch": 2.66, + "learning_rate": 1.4666666666666667e-06, + "loss": 8.831, + "step": 440 + }, + { + "epoch": 2.78, + "learning_rate": 1.5333333333333334e-06, + "loss": 8.8038, + "step": 460 + }, + { + "epoch": 2.9, + "learning_rate": 1.6000000000000001e-06, + "loss": 8.7751, + "step": 480 + }, + { + "epoch": 3.02, + "learning_rate": 1.6666666666666667e-06, + "loss": 8.7584, + "step": 500 + }, + { + "epoch": 3.14, + "learning_rate": 1.7333333333333334e-06, + "loss": 8.7354, + "step": 520 + }, + { + "epoch": 3.26, + "learning_rate": 1.8e-06, + "loss": 8.7144, + "step": 540 + }, + { + "epoch": 3.38, + "learning_rate": 1.8666666666666669e-06, + "loss": 8.694, + "step": 560 + }, + { + "epoch": 3.5, + "learning_rate": 1.9333333333333336e-06, + "loss": 8.6699, + "step": 580 + }, + { + "epoch": 3.62, + "learning_rate": 2.0000000000000003e-06, + "loss": 8.6457, + "step": 600 + }, + { + "epoch": 3.74, + "learning_rate": 2.0666666666666666e-06, + "loss": 8.6262, + "step": 620 + }, + { + "epoch": 3.86, + "learning_rate": 2.1333333333333334e-06, + "loss": 8.6001, + "step": 640 + }, + { + "epoch": 3.98, + "learning_rate": 2.2e-06, + "loss": 8.5778, + "step": 660 + }, + { + "epoch": 4.1, + "learning_rate": 2.266666666666667e-06, + "loss": 8.5677, + "step": 680 + }, + { + "epoch": 4.23, + "learning_rate": 2.3333333333333336e-06, + "loss": 8.541, + "step": 700 + }, + { + "epoch": 4.35, + "learning_rate": 2.4000000000000003e-06, + "loss": 8.5137, + "step": 720 + }, + { + "epoch": 4.47, + "learning_rate": 2.4666666666666666e-06, + "loss": 8.4983, + "step": 740 + }, + { + "epoch": 4.59, + "learning_rate": 2.5333333333333334e-06, + "loss": 8.4784, + "step": 760 + }, + { + "epoch": 4.71, + "learning_rate": 2.6e-06, + "loss": 8.4527, + "step": 780 + }, + { + "epoch": 4.83, + "learning_rate": 2.666666666666667e-06, + "loss": 8.4373, + "step": 800 + }, + { + "epoch": 4.95, + "learning_rate": 2.7333333333333336e-06, + "loss": 8.4061, + "step": 820 + }, + { + "epoch": 5.07, + "learning_rate": 2.8000000000000003e-06, + "loss": 8.3854, + "step": 840 + }, + { + "epoch": 5.19, + "learning_rate": 2.8666666666666666e-06, + "loss": 8.364, + "step": 860 + }, + { + "epoch": 5.31, + "learning_rate": 2.9333333333333333e-06, + "loss": 8.3524, + "step": 880 + }, + { + "epoch": 5.43, + "learning_rate": 3e-06, + "loss": 8.3238, + "step": 900 + }, + { + "epoch": 5.55, + "learning_rate": 3.066666666666667e-06, + "loss": 8.3033, + "step": 920 + }, + { + "epoch": 5.67, + "learning_rate": 3.133333333333333e-06, + "loss": 8.2918, + "step": 940 + }, + { + "epoch": 5.79, + "learning_rate": 3.2000000000000003e-06, + "loss": 8.2772, + "step": 960 + }, + { + "epoch": 5.92, + "learning_rate": 3.2666666666666666e-06, + "loss": 8.2527, + "step": 980 + }, + { + "epoch": 6.04, + "learning_rate": 3.3333333333333333e-06, + "loss": 8.2359, + "step": 1000 + }, + { + "epoch": 6.04, + "eval_accuracy": 0.007023540219893116, + "eval_loss": 8.217020988464355, + "eval_runtime": 18.6004, + "eval_samples_per_second": 109.675, + "eval_steps_per_second": 2.312, + "step": 1000 + }, + { + "epoch": 6.16, + "learning_rate": 3.4000000000000005e-06, + "loss": 8.2118, + "step": 1020 + }, + { + "epoch": 6.28, + "learning_rate": 3.466666666666667e-06, + "loss": 8.1921, + "step": 1040 + }, + { + "epoch": 6.4, + "learning_rate": 3.5333333333333335e-06, + "loss": 8.1792, + "step": 1060 + }, + { + "epoch": 6.52, + "learning_rate": 3.6e-06, + "loss": 8.1525, + "step": 1080 + }, + { + "epoch": 6.64, + "learning_rate": 3.666666666666667e-06, + "loss": 8.1462, + "step": 1100 + }, + { + "epoch": 6.76, + "learning_rate": 3.7333333333333337e-06, + "loss": 8.1221, + "step": 1120 + }, + { + "epoch": 6.88, + "learning_rate": 3.8e-06, + "loss": 8.1082, + "step": 1140 + }, + { + "epoch": 7.0, + "learning_rate": 3.866666666666667e-06, + "loss": 8.0877, + "step": 1160 + }, + { + "epoch": 7.12, + "learning_rate": 3.9333333333333335e-06, + "loss": 8.0805, + "step": 1180 + }, + { + "epoch": 7.24, + "learning_rate": 4.000000000000001e-06, + "loss": 8.0655, + "step": 1200 + }, + { + "epoch": 7.36, + "learning_rate": 4.066666666666666e-06, + "loss": 8.0427, + "step": 1220 + }, + { + "epoch": 7.48, + "learning_rate": 4.133333333333333e-06, + "loss": 8.0315, + "step": 1240 + }, + { + "epoch": 7.61, + "learning_rate": 4.2000000000000004e-06, + "loss": 8.0165, + "step": 1260 + }, + { + "epoch": 7.73, + "learning_rate": 4.266666666666667e-06, + "loss": 8.009, + "step": 1280 + }, + { + "epoch": 7.85, + "learning_rate": 4.333333333333334e-06, + "loss": 7.9895, + "step": 1300 + }, + { + "epoch": 7.97, + "learning_rate": 4.4e-06, + "loss": 7.9791, + "step": 1320 + }, + { + "epoch": 8.09, + "learning_rate": 4.4666666666666665e-06, + "loss": 7.9651, + "step": 1340 + }, + { + "epoch": 8.21, + "learning_rate": 4.533333333333334e-06, + "loss": 7.954, + "step": 1360 + }, + { + "epoch": 8.33, + "learning_rate": 4.6e-06, + "loss": 7.9467, + "step": 1380 + }, + { + "epoch": 8.45, + "learning_rate": 4.666666666666667e-06, + "loss": 7.9417, + "step": 1400 + }, + { + "epoch": 8.57, + "learning_rate": 4.7333333333333335e-06, + "loss": 7.9304, + "step": 1420 + }, + { + "epoch": 8.69, + "learning_rate": 4.800000000000001e-06, + "loss": 7.9198, + "step": 1440 + }, + { + "epoch": 8.81, + "learning_rate": 4.866666666666667e-06, + "loss": 7.9181, + "step": 1460 + }, + { + "epoch": 8.93, + "learning_rate": 4.933333333333333e-06, + "loss": 7.8935, + "step": 1480 + }, + { + "epoch": 9.05, + "learning_rate": 5e-06, + "loss": 7.8909, + "step": 1500 + }, + { + "epoch": 9.18, + "learning_rate": 5.066666666666667e-06, + "loss": 7.8876, + "step": 1520 + }, + { + "epoch": 9.3, + "learning_rate": 5.133333333333334e-06, + "loss": 7.8792, + "step": 1540 + }, + { + "epoch": 9.42, + "learning_rate": 5.2e-06, + "loss": 7.8645, + "step": 1560 + }, + { + "epoch": 9.54, + "learning_rate": 5.266666666666667e-06, + "loss": 7.8694, + "step": 1580 + }, + { + "epoch": 9.66, + "learning_rate": 5.333333333333334e-06, + "loss": 7.857, + "step": 1600 + }, + { + "epoch": 9.78, + "learning_rate": 5.4e-06, + "loss": 7.8533, + "step": 1620 + }, + { + "epoch": 9.9, + "learning_rate": 5.466666666666667e-06, + "loss": 7.8439, + "step": 1640 + }, + { + "epoch": 10.02, + "learning_rate": 5.5333333333333334e-06, + "loss": 7.841, + "step": 1660 + }, + { + "epoch": 10.14, + "learning_rate": 5.600000000000001e-06, + "loss": 7.8247, + "step": 1680 + }, + { + "epoch": 10.26, + "learning_rate": 5.666666666666667e-06, + "loss": 7.8361, + "step": 1700 + }, + { + "epoch": 10.38, + "learning_rate": 5.733333333333333e-06, + "loss": 7.8296, + "step": 1720 + }, + { + "epoch": 10.5, + "learning_rate": 5.8e-06, + "loss": 7.8217, + "step": 1740 + }, + { + "epoch": 10.62, + "learning_rate": 5.866666666666667e-06, + "loss": 7.8181, + "step": 1760 + }, + { + "epoch": 10.74, + "learning_rate": 5.933333333333334e-06, + "loss": 7.8007, + "step": 1780 + }, + { + "epoch": 10.87, + "learning_rate": 6e-06, + "loss": 7.8151, + "step": 1800 + }, + { + "epoch": 10.99, + "learning_rate": 6.066666666666667e-06, + "loss": 7.8132, + "step": 1820 + }, + { + "epoch": 11.11, + "learning_rate": 6.133333333333334e-06, + "loss": 7.7844, + "step": 1840 + }, + { + "epoch": 11.23, + "learning_rate": 6.2e-06, + "loss": 7.7854, + "step": 1860 + }, + { + "epoch": 11.35, + "learning_rate": 6.266666666666666e-06, + "loss": 7.7878, + "step": 1880 + }, + { + "epoch": 11.47, + "learning_rate": 6.333333333333334e-06, + "loss": 7.7815, + "step": 1900 + }, + { + "epoch": 11.59, + "learning_rate": 6.4000000000000006e-06, + "loss": 7.7711, + "step": 1920 + }, + { + "epoch": 11.71, + "learning_rate": 6.466666666666667e-06, + "loss": 7.7599, + "step": 1940 + }, + { + "epoch": 11.83, + "learning_rate": 6.533333333333333e-06, + "loss": 7.7511, + "step": 1960 + }, + { + "epoch": 11.95, + "learning_rate": 6.6e-06, + "loss": 7.7292, + "step": 1980 + }, + { + "epoch": 12.07, + "learning_rate": 6.666666666666667e-06, + "loss": 7.7137, + "step": 2000 + }, + { + "epoch": 12.07, + "eval_accuracy": 0.006356254507018601, + "eval_loss": 7.700709342956543, + "eval_runtime": 18.2793, + "eval_samples_per_second": 111.602, + "eval_steps_per_second": 2.352, + "step": 2000 + }, + { + "epoch": 12.19, + "learning_rate": 6.733333333333333e-06, + "loss": 7.6936, + "step": 2020 + }, + { + "epoch": 12.31, + "learning_rate": 6.800000000000001e-06, + "loss": 7.6831, + "step": 2040 + }, + { + "epoch": 12.43, + "learning_rate": 6.866666666666667e-06, + "loss": 7.6583, + "step": 2060 + }, + { + "epoch": 12.56, + "learning_rate": 6.933333333333334e-06, + "loss": 7.6526, + "step": 2080 + }, + { + "epoch": 12.68, + "learning_rate": 7.000000000000001e-06, + "loss": 7.6229, + "step": 2100 + }, + { + "epoch": 12.8, + "learning_rate": 7.066666666666667e-06, + "loss": 7.6069, + "step": 2120 + }, + { + "epoch": 12.92, + "learning_rate": 7.133333333333333e-06, + "loss": 7.567, + "step": 2140 + }, + { + "epoch": 13.04, + "learning_rate": 7.2e-06, + "loss": 7.4886, + "step": 2160 + }, + { + "epoch": 13.16, + "learning_rate": 7.266666666666668e-06, + "loss": 7.4052, + "step": 2180 + }, + { + "epoch": 13.28, + "learning_rate": 7.333333333333334e-06, + "loss": 7.3529, + "step": 2200 + }, + { + "epoch": 13.4, + "learning_rate": 7.4e-06, + "loss": 7.2899, + "step": 2220 + }, + { + "epoch": 13.52, + "learning_rate": 7.4666666666666675e-06, + "loss": 7.2501, + "step": 2240 + }, + { + "epoch": 13.64, + "learning_rate": 7.533333333333334e-06, + "loss": 7.2155, + "step": 2260 + }, + { + "epoch": 13.76, + "learning_rate": 7.6e-06, + "loss": 7.1748, + "step": 2280 + }, + { + "epoch": 13.88, + "learning_rate": 7.666666666666667e-06, + "loss": 7.147, + "step": 2300 + }, + { + "epoch": 14.0, + "learning_rate": 7.733333333333334e-06, + "loss": 7.0968, + "step": 2320 + }, + { + "epoch": 14.12, + "learning_rate": 7.8e-06, + "loss": 7.083, + "step": 2340 + }, + { + "epoch": 14.25, + "learning_rate": 7.866666666666667e-06, + "loss": 7.0658, + "step": 2360 + }, + { + "epoch": 14.37, + "learning_rate": 7.933333333333334e-06, + "loss": 7.0429, + "step": 2380 + }, + { + "epoch": 14.49, + "learning_rate": 8.000000000000001e-06, + "loss": 7.0212, + "step": 2400 + }, + { + "epoch": 14.61, + "learning_rate": 8.066666666666667e-06, + "loss": 7.0132, + "step": 2420 + }, + { + "epoch": 14.73, + "learning_rate": 8.133333333333332e-06, + "loss": 6.9619, + "step": 2440 + }, + { + "epoch": 14.85, + "learning_rate": 8.200000000000001e-06, + "loss": 6.9558, + "step": 2460 + }, + { + "epoch": 14.97, + "learning_rate": 8.266666666666667e-06, + "loss": 6.9292, + "step": 2480 + }, + { + "epoch": 15.09, + "learning_rate": 8.333333333333334e-06, + "loss": 6.8963, + "step": 2500 + }, + { + "epoch": 15.21, + "learning_rate": 8.400000000000001e-06, + "loss": 6.9002, + "step": 2520 + }, + { + "epoch": 15.33, + "learning_rate": 8.466666666666666e-06, + "loss": 6.8821, + "step": 2540 + }, + { + "epoch": 15.45, + "learning_rate": 8.533333333333334e-06, + "loss": 6.8669, + "step": 2560 + }, + { + "epoch": 15.57, + "learning_rate": 8.599999999999999e-06, + "loss": 6.8442, + "step": 2580 + }, + { + "epoch": 15.69, + "learning_rate": 8.666666666666668e-06, + "loss": 6.8243, + "step": 2600 + }, + { + "epoch": 15.81, + "learning_rate": 8.733333333333333e-06, + "loss": 6.7897, + "step": 2620 + }, + { + "epoch": 15.94, + "learning_rate": 8.8e-06, + "loss": 6.7863, + "step": 2640 + }, + { + "epoch": 16.06, + "learning_rate": 8.866666666666668e-06, + "loss": 6.7918, + "step": 2660 + }, + { + "epoch": 16.18, + "learning_rate": 8.933333333333333e-06, + "loss": 6.7465, + "step": 2680 + }, + { + "epoch": 16.3, + "learning_rate": 9e-06, + "loss": 6.7454, + "step": 2700 + }, + { + "epoch": 16.42, + "learning_rate": 9.066666666666667e-06, + "loss": 6.7315, + "step": 2720 + }, + { + "epoch": 16.54, + "learning_rate": 9.133333333333335e-06, + "loss": 6.7298, + "step": 2740 + }, + { + "epoch": 16.66, + "learning_rate": 9.2e-06, + "loss": 6.7084, + "step": 2760 + }, + { + "epoch": 16.78, + "learning_rate": 9.266666666666667e-06, + "loss": 6.6964, + "step": 2780 + }, + { + "epoch": 16.9, + "learning_rate": 9.333333333333334e-06, + "loss": 6.6611, + "step": 2800 + }, + { + "epoch": 17.02, + "learning_rate": 9.4e-06, + "loss": 6.6561, + "step": 2820 + }, + { + "epoch": 17.14, + "learning_rate": 9.466666666666667e-06, + "loss": 6.6364, + "step": 2840 + }, + { + "epoch": 17.26, + "learning_rate": 9.533333333333334e-06, + "loss": 6.6261, + "step": 2860 + }, + { + "epoch": 17.38, + "learning_rate": 9.600000000000001e-06, + "loss": 6.6371, + "step": 2880 + }, + { + "epoch": 17.51, + "learning_rate": 9.666666666666667e-06, + "loss": 6.5983, + "step": 2900 + }, + { + "epoch": 17.63, + "learning_rate": 9.733333333333334e-06, + "loss": 6.5912, + "step": 2920 + }, + { + "epoch": 17.75, + "learning_rate": 9.800000000000001e-06, + "loss": 6.5684, + "step": 2940 + }, + { + "epoch": 17.87, + "learning_rate": 9.866666666666667e-06, + "loss": 6.5782, + "step": 2960 + }, + { + "epoch": 17.99, + "learning_rate": 9.933333333333334e-06, + "loss": 6.5523, + "step": 2980 + }, + { + "epoch": 18.11, + "learning_rate": 1e-05, + "loss": 6.5277, + "step": 3000 + }, + { + "epoch": 18.11, + "eval_accuracy": 4.099534727504421e-05, + "eval_loss": 6.525410175323486, + "eval_runtime": 18.3845, + "eval_samples_per_second": 110.963, + "eval_steps_per_second": 2.339, + "step": 3000 + }, + { + "epoch": 18.23, + "learning_rate": 1.0066666666666668e-05, + "loss": 6.5291, + "step": 3020 + }, + { + "epoch": 18.35, + "learning_rate": 1.0133333333333333e-05, + "loss": 6.5222, + "step": 3040 + }, + { + "epoch": 18.47, + "learning_rate": 1.02e-05, + "loss": 6.5028, + "step": 3060 + }, + { + "epoch": 18.59, + "learning_rate": 1.0266666666666668e-05, + "loss": 6.4887, + "step": 3080 + }, + { + "epoch": 18.71, + "learning_rate": 1.0333333333333333e-05, + "loss": 6.4745, + "step": 3100 + }, + { + "epoch": 18.83, + "learning_rate": 1.04e-05, + "loss": 6.4663, + "step": 3120 + }, + { + "epoch": 18.95, + "learning_rate": 1.0466666666666668e-05, + "loss": 6.4686, + "step": 3140 + }, + { + "epoch": 19.07, + "learning_rate": 1.0533333333333335e-05, + "loss": 6.4507, + "step": 3160 + }, + { + "epoch": 19.2, + "learning_rate": 1.06e-05, + "loss": 6.4451, + "step": 3180 + }, + { + "epoch": 19.32, + "learning_rate": 1.0666666666666667e-05, + "loss": 6.4153, + "step": 3200 + }, + { + "epoch": 19.44, + "learning_rate": 1.0733333333333334e-05, + "loss": 6.427, + "step": 3220 + }, + { + "epoch": 19.56, + "learning_rate": 1.08e-05, + "loss": 6.4189, + "step": 3240 + }, + { + "epoch": 19.68, + "learning_rate": 1.0866666666666667e-05, + "loss": 6.4027, + "step": 3260 + }, + { + "epoch": 19.8, + "learning_rate": 1.0933333333333334e-05, + "loss": 6.3801, + "step": 3280 + }, + { + "epoch": 19.92, + "learning_rate": 1.1000000000000001e-05, + "loss": 6.3769, + "step": 3300 + }, + { + "epoch": 20.04, + "learning_rate": 1.1066666666666667e-05, + "loss": 6.358, + "step": 3320 + }, + { + "epoch": 20.16, + "learning_rate": 1.1133333333333334e-05, + "loss": 6.3591, + "step": 3340 + }, + { + "epoch": 20.28, + "learning_rate": 1.1200000000000001e-05, + "loss": 6.3585, + "step": 3360 + }, + { + "epoch": 20.4, + "learning_rate": 1.1266666666666667e-05, + "loss": 6.3219, + "step": 3380 + }, + { + "epoch": 20.52, + "learning_rate": 1.1333333333333334e-05, + "loss": 6.3147, + "step": 3400 + }, + { + "epoch": 20.64, + "learning_rate": 1.1400000000000001e-05, + "loss": 6.3324, + "step": 3420 + }, + { + "epoch": 20.76, + "learning_rate": 1.1466666666666666e-05, + "loss": 6.316, + "step": 3440 + }, + { + "epoch": 20.89, + "learning_rate": 1.1533333333333334e-05, + "loss": 6.2978, + "step": 3460 + }, + { + "epoch": 21.01, + "learning_rate": 1.16e-05, + "loss": 6.2895, + "step": 3480 + }, + { + "epoch": 21.13, + "learning_rate": 1.1666666666666668e-05, + "loss": 6.2864, + "step": 3500 + }, + { + "epoch": 21.25, + "learning_rate": 1.1733333333333333e-05, + "loss": 6.2824, + "step": 3520 + }, + { + "epoch": 21.37, + "learning_rate": 1.18e-05, + "loss": 6.2451, + "step": 3540 + }, + { + "epoch": 21.49, + "learning_rate": 1.1866666666666668e-05, + "loss": 6.2508, + "step": 3560 + }, + { + "epoch": 21.61, + "learning_rate": 1.1933333333333333e-05, + "loss": 6.2341, + "step": 3580 + }, + { + "epoch": 21.73, + "learning_rate": 1.2e-05, + "loss": 6.2244, + "step": 3600 + }, + { + "epoch": 21.85, + "learning_rate": 1.2066666666666667e-05, + "loss": 6.2077, + "step": 3620 + }, + { + "epoch": 21.97, + "learning_rate": 1.2133333333333335e-05, + "loss": 6.2116, + "step": 3640 + }, + { + "epoch": 22.09, + "learning_rate": 1.22e-05, + "loss": 6.2047, + "step": 3660 + }, + { + "epoch": 22.21, + "learning_rate": 1.2266666666666667e-05, + "loss": 6.2012, + "step": 3680 + }, + { + "epoch": 22.33, + "learning_rate": 1.2333333333333334e-05, + "loss": 6.1741, + "step": 3700 + }, + { + "epoch": 22.45, + "learning_rate": 1.24e-05, + "loss": 6.1616, + "step": 3720 + }, + { + "epoch": 22.58, + "learning_rate": 1.2466666666666667e-05, + "loss": 6.1643, + "step": 3740 + }, + { + "epoch": 22.7, + "learning_rate": 1.2533333333333332e-05, + "loss": 6.1585, + "step": 3760 + }, + { + "epoch": 22.82, + "learning_rate": 1.2600000000000001e-05, + "loss": 6.1467, + "step": 3780 + }, + { + "epoch": 22.94, + "learning_rate": 1.2666666666666668e-05, + "loss": 6.1542, + "step": 3800 + }, + { + "epoch": 23.06, + "learning_rate": 1.2733333333333334e-05, + "loss": 6.1341, + "step": 3820 + }, + { + "epoch": 23.18, + "learning_rate": 1.2800000000000001e-05, + "loss": 6.1156, + "step": 3840 + }, + { + "epoch": 23.3, + "learning_rate": 1.2866666666666668e-05, + "loss": 6.1146, + "step": 3860 + }, + { + "epoch": 23.42, + "learning_rate": 1.2933333333333334e-05, + "loss": 6.109, + "step": 3880 + }, + { + "epoch": 23.54, + "learning_rate": 1.3000000000000001e-05, + "loss": 6.0964, + "step": 3900 + }, + { + "epoch": 23.66, + "learning_rate": 1.3066666666666666e-05, + "loss": 6.0964, + "step": 3920 + }, + { + "epoch": 23.78, + "learning_rate": 1.3133333333333334e-05, + "loss": 6.0941, + "step": 3940 + }, + { + "epoch": 23.9, + "learning_rate": 1.32e-05, + "loss": 6.0569, + "step": 3960 + }, + { + "epoch": 24.02, + "learning_rate": 1.3266666666666666e-05, + "loss": 6.0643, + "step": 3980 + }, + { + "epoch": 24.14, + "learning_rate": 1.3333333333333333e-05, + "loss": 6.0375, + "step": 4000 + }, + { + "epoch": 24.14, + "eval_accuracy": 4.494670604854244e-05, + "eval_loss": 6.053168773651123, + "eval_runtime": 18.3968, + "eval_samples_per_second": 110.889, + "eval_steps_per_second": 2.337, + "step": 4000 + }, + { + "epoch": 24.27, + "learning_rate": 1.3400000000000002e-05, + "loss": 6.04, + "step": 4020 + }, + { + "epoch": 24.39, + "learning_rate": 1.3466666666666666e-05, + "loss": 6.0531, + "step": 4040 + }, + { + "epoch": 24.51, + "learning_rate": 1.3533333333333335e-05, + "loss": 6.0362, + "step": 4060 + }, + { + "epoch": 24.63, + "learning_rate": 1.3600000000000002e-05, + "loss": 6.0139, + "step": 4080 + }, + { + "epoch": 24.75, + "learning_rate": 1.3666666666666666e-05, + "loss": 5.994, + "step": 4100 + }, + { + "epoch": 24.87, + "learning_rate": 1.3733333333333335e-05, + "loss": 6.009, + "step": 4120 + }, + { + "epoch": 24.99, + "learning_rate": 1.3800000000000002e-05, + "loss": 5.9937, + "step": 4140 + }, + { + "epoch": 25.11, + "learning_rate": 1.3866666666666667e-05, + "loss": 5.9837, + "step": 4160 + }, + { + "epoch": 25.23, + "learning_rate": 1.3933333333333334e-05, + "loss": 5.9619, + "step": 4180 + }, + { + "epoch": 25.35, + "learning_rate": 1.4000000000000001e-05, + "loss": 5.9603, + "step": 4200 + }, + { + "epoch": 25.47, + "learning_rate": 1.4066666666666667e-05, + "loss": 5.9447, + "step": 4220 + }, + { + "epoch": 25.59, + "learning_rate": 1.4133333333333334e-05, + "loss": 5.9396, + "step": 4240 + }, + { + "epoch": 25.71, + "learning_rate": 1.42e-05, + "loss": 5.9694, + "step": 4260 + }, + { + "epoch": 25.84, + "learning_rate": 1.4266666666666667e-05, + "loss": 5.9305, + "step": 4280 + }, + { + "epoch": 25.96, + "learning_rate": 1.4333333333333334e-05, + "loss": 5.9412, + "step": 4300 + }, + { + "epoch": 26.08, + "learning_rate": 1.44e-05, + "loss": 5.9241, + "step": 4320 + }, + { + "epoch": 26.2, + "learning_rate": 1.4466666666666667e-05, + "loss": 5.9065, + "step": 4340 + }, + { + "epoch": 26.32, + "learning_rate": 1.4533333333333335e-05, + "loss": 5.912, + "step": 4360 + }, + { + "epoch": 26.44, + "learning_rate": 1.4599999999999999e-05, + "loss": 5.8877, + "step": 4380 + }, + { + "epoch": 26.56, + "learning_rate": 1.4666666666666668e-05, + "loss": 5.8726, + "step": 4400 + }, + { + "epoch": 26.68, + "learning_rate": 1.4733333333333335e-05, + "loss": 5.8834, + "step": 4420 + }, + { + "epoch": 26.8, + "learning_rate": 1.48e-05, + "loss": 5.88, + "step": 4440 + }, + { + "epoch": 26.92, + "learning_rate": 1.4866666666666668e-05, + "loss": 5.8724, + "step": 4460 + }, + { + "epoch": 27.04, + "learning_rate": 1.4933333333333335e-05, + "loss": 5.864, + "step": 4480 + }, + { + "epoch": 27.16, + "learning_rate": 1.5e-05, + "loss": 5.8542, + "step": 4500 + }, + { + "epoch": 27.28, + "learning_rate": 1.5066666666666668e-05, + "loss": 5.858, + "step": 4520 + }, + { + "epoch": 27.4, + "learning_rate": 1.5133333333333333e-05, + "loss": 5.8406, + "step": 4540 + }, + { + "epoch": 27.53, + "learning_rate": 1.52e-05, + "loss": 5.8369, + "step": 4560 + }, + { + "epoch": 27.65, + "learning_rate": 1.5266666666666667e-05, + "loss": 5.8308, + "step": 4580 + }, + { + "epoch": 27.77, + "learning_rate": 1.5333333333333334e-05, + "loss": 5.8009, + "step": 4600 + }, + { + "epoch": 27.89, + "learning_rate": 1.54e-05, + "loss": 5.8264, + "step": 4620 + }, + { + "epoch": 28.01, + "learning_rate": 1.546666666666667e-05, + "loss": 5.7941, + "step": 4640 + }, + { + "epoch": 28.13, + "learning_rate": 1.5533333333333333e-05, + "loss": 5.8114, + "step": 4660 + }, + { + "epoch": 28.25, + "learning_rate": 1.56e-05, + "loss": 5.7903, + "step": 4680 + }, + { + "epoch": 28.37, + "learning_rate": 1.5666666666666667e-05, + "loss": 5.7821, + "step": 4700 + }, + { + "epoch": 28.49, + "learning_rate": 1.5733333333333334e-05, + "loss": 5.7837, + "step": 4720 + }, + { + "epoch": 28.61, + "learning_rate": 1.58e-05, + "loss": 5.774, + "step": 4740 + }, + { + "epoch": 28.73, + "learning_rate": 1.586666666666667e-05, + "loss": 5.7587, + "step": 4760 + }, + { + "epoch": 28.85, + "learning_rate": 1.5933333333333332e-05, + "loss": 5.7502, + "step": 4780 + }, + { + "epoch": 28.97, + "learning_rate": 1.6000000000000003e-05, + "loss": 5.7666, + "step": 4800 + }, + { + "epoch": 29.09, + "learning_rate": 1.606666666666667e-05, + "loss": 5.7418, + "step": 4820 + }, + { + "epoch": 29.22, + "learning_rate": 1.6133333333333334e-05, + "loss": 5.7404, + "step": 4840 + }, + { + "epoch": 29.34, + "learning_rate": 1.62e-05, + "loss": 5.7338, + "step": 4860 + }, + { + "epoch": 29.46, + "learning_rate": 1.6266666666666665e-05, + "loss": 5.7212, + "step": 4880 + }, + { + "epoch": 29.58, + "learning_rate": 1.6333333333333335e-05, + "loss": 5.7091, + "step": 4900 + }, + { + "epoch": 29.7, + "learning_rate": 1.6400000000000002e-05, + "loss": 5.7103, + "step": 4920 + }, + { + "epoch": 29.82, + "learning_rate": 1.6466666666666666e-05, + "loss": 5.725, + "step": 4940 + }, + { + "epoch": 29.94, + "learning_rate": 1.6533333333333333e-05, + "loss": 5.7061, + "step": 4960 + }, + { + "epoch": 30.06, + "learning_rate": 1.66e-05, + "loss": 5.6879, + "step": 4980 + }, + { + "epoch": 30.18, + "learning_rate": 1.6666666666666667e-05, + "loss": 5.6908, + "step": 5000 + }, + { + "epoch": 30.18, + "eval_accuracy": 6.618525945609546e-05, + "eval_loss": 5.7099995613098145, + "eval_runtime": 18.2382, + "eval_samples_per_second": 111.853, + "eval_steps_per_second": 2.358, + "step": 5000 + }, + { + "epoch": 30.3, + "learning_rate": 1.6733333333333335e-05, + "loss": 5.6782, + "step": 5020 + }, + { + "epoch": 30.42, + "learning_rate": 1.6800000000000002e-05, + "loss": 5.6745, + "step": 5040 + }, + { + "epoch": 30.54, + "learning_rate": 1.6866666666666666e-05, + "loss": 5.6698, + "step": 5060 + }, + { + "epoch": 30.66, + "learning_rate": 1.6933333333333333e-05, + "loss": 5.68, + "step": 5080 + }, + { + "epoch": 30.78, + "learning_rate": 1.7000000000000003e-05, + "loss": 5.6718, + "step": 5100 + }, + { + "epoch": 30.91, + "learning_rate": 1.7066666666666667e-05, + "loss": 5.6621, + "step": 5120 + }, + { + "epoch": 31.03, + "learning_rate": 1.7133333333333334e-05, + "loss": 5.6677, + "step": 5140 + }, + { + "epoch": 31.15, + "learning_rate": 1.7199999999999998e-05, + "loss": 5.6582, + "step": 5160 + }, + { + "epoch": 31.27, + "learning_rate": 1.726666666666667e-05, + "loss": 5.6369, + "step": 5180 + }, + { + "epoch": 31.39, + "learning_rate": 1.7333333333333336e-05, + "loss": 5.631, + "step": 5200 + }, + { + "epoch": 31.51, + "learning_rate": 1.74e-05, + "loss": 5.6239, + "step": 5220 + }, + { + "epoch": 31.63, + "learning_rate": 1.7466666666666667e-05, + "loss": 5.6244, + "step": 5240 + }, + { + "epoch": 31.75, + "learning_rate": 1.7533333333333334e-05, + "loss": 5.629, + "step": 5260 + }, + { + "epoch": 31.87, + "learning_rate": 1.76e-05, + "loss": 5.6257, + "step": 5280 + }, + { + "epoch": 31.99, + "learning_rate": 1.7666666666666668e-05, + "loss": 5.6067, + "step": 5300 + }, + { + "epoch": 32.11, + "learning_rate": 1.7733333333333335e-05, + "loss": 5.5915, + "step": 5320 + }, + { + "epoch": 32.23, + "learning_rate": 1.78e-05, + "loss": 5.6116, + "step": 5340 + }, + { + "epoch": 32.35, + "learning_rate": 1.7866666666666666e-05, + "loss": 5.6139, + "step": 5360 + }, + { + "epoch": 32.47, + "learning_rate": 1.7933333333333337e-05, + "loss": 5.5716, + "step": 5380 + }, + { + "epoch": 32.6, + "learning_rate": 1.8e-05, + "loss": 5.5983, + "step": 5400 + }, + { + "epoch": 32.72, + "learning_rate": 1.8066666666666668e-05, + "loss": 5.5821, + "step": 5420 + }, + { + "epoch": 32.84, + "learning_rate": 1.8133333333333335e-05, + "loss": 5.5799, + "step": 5440 + }, + { + "epoch": 32.96, + "learning_rate": 1.8200000000000002e-05, + "loss": 5.558, + "step": 5460 + }, + { + "epoch": 33.08, + "learning_rate": 1.826666666666667e-05, + "loss": 5.5665, + "step": 5480 + }, + { + "epoch": 33.2, + "learning_rate": 1.8333333333333333e-05, + "loss": 5.5493, + "step": 5500 + }, + { + "epoch": 33.32, + "learning_rate": 1.84e-05, + "loss": 5.5597, + "step": 5520 + }, + { + "epoch": 33.44, + "learning_rate": 1.8466666666666667e-05, + "loss": 5.5507, + "step": 5540 + }, + { + "epoch": 33.56, + "learning_rate": 1.8533333333333334e-05, + "loss": 5.5527, + "step": 5560 + }, + { + "epoch": 33.68, + "learning_rate": 1.86e-05, + "loss": 5.5438, + "step": 5580 + }, + { + "epoch": 33.8, + "learning_rate": 1.866666666666667e-05, + "loss": 5.5427, + "step": 5600 + }, + { + "epoch": 33.92, + "learning_rate": 1.8733333333333332e-05, + "loss": 5.5477, + "step": 5620 + }, + { + "epoch": 34.04, + "learning_rate": 1.88e-05, + "loss": 5.5087, + "step": 5640 + }, + { + "epoch": 34.16, + "learning_rate": 1.886666666666667e-05, + "loss": 5.5358, + "step": 5660 + }, + { + "epoch": 34.29, + "learning_rate": 1.8933333333333334e-05, + "loss": 5.5069, + "step": 5680 + }, + { + "epoch": 34.41, + "learning_rate": 1.9e-05, + "loss": 5.5231, + "step": 5700 + }, + { + "epoch": 34.53, + "learning_rate": 1.9066666666666668e-05, + "loss": 5.5083, + "step": 5720 + }, + { + "epoch": 34.65, + "learning_rate": 1.9133333333333332e-05, + "loss": 5.5009, + "step": 5740 + }, + { + "epoch": 34.77, + "learning_rate": 1.9200000000000003e-05, + "loss": 5.5038, + "step": 5760 + }, + { + "epoch": 34.89, + "learning_rate": 1.926666666666667e-05, + "loss": 5.5063, + "step": 5780 + }, + { + "epoch": 35.01, + "learning_rate": 1.9333333333333333e-05, + "loss": 5.4864, + "step": 5800 + }, + { + "epoch": 35.13, + "learning_rate": 1.94e-05, + "loss": 5.4737, + "step": 5820 + }, + { + "epoch": 35.25, + "learning_rate": 1.9466666666666668e-05, + "loss": 5.4905, + "step": 5840 + }, + { + "epoch": 35.37, + "learning_rate": 1.9533333333333335e-05, + "loss": 5.501, + "step": 5860 + }, + { + "epoch": 35.49, + "learning_rate": 1.9600000000000002e-05, + "loss": 5.4765, + "step": 5880 + }, + { + "epoch": 35.61, + "learning_rate": 1.9666666666666666e-05, + "loss": 5.4575, + "step": 5900 + }, + { + "epoch": 35.73, + "learning_rate": 1.9733333333333333e-05, + "loss": 5.4521, + "step": 5920 + }, + { + "epoch": 35.86, + "learning_rate": 1.9800000000000004e-05, + "loss": 5.4681, + "step": 5940 + }, + { + "epoch": 35.98, + "learning_rate": 1.9866666666666667e-05, + "loss": 5.4621, + "step": 5960 + }, + { + "epoch": 36.1, + "learning_rate": 1.9933333333333334e-05, + "loss": 5.4473, + "step": 5980 + }, + { + "epoch": 36.22, + "learning_rate": 2e-05, + "loss": 5.4294, + "step": 6000 + }, + { + "epoch": 36.22, + "eval_accuracy": 0.0002054706562219083, + "eval_loss": 5.475820541381836, + "eval_runtime": 18.2393, + "eval_samples_per_second": 111.846, + "eval_steps_per_second": 2.358, + "step": 6000 + }, + { + "epoch": 36.34, + "learning_rate": 2.0066666666666665e-05, + "loss": 5.4475, + "step": 6020 + }, + { + "epoch": 36.46, + "learning_rate": 2.0133333333333336e-05, + "loss": 5.4346, + "step": 6040 + }, + { + "epoch": 36.58, + "learning_rate": 2.0200000000000003e-05, + "loss": 5.4358, + "step": 6060 + }, + { + "epoch": 36.7, + "learning_rate": 2.0266666666666667e-05, + "loss": 5.4414, + "step": 6080 + }, + { + "epoch": 36.82, + "learning_rate": 2.0333333333333334e-05, + "loss": 5.431, + "step": 6100 + }, + { + "epoch": 36.94, + "learning_rate": 2.04e-05, + "loss": 5.4276, + "step": 6120 + }, + { + "epoch": 37.06, + "learning_rate": 2.046666666666667e-05, + "loss": 5.4126, + "step": 6140 + }, + { + "epoch": 37.18, + "learning_rate": 2.0533333333333336e-05, + "loss": 5.4055, + "step": 6160 + }, + { + "epoch": 37.3, + "learning_rate": 2.06e-05, + "loss": 5.4142, + "step": 6180 + }, + { + "epoch": 37.42, + "learning_rate": 2.0666666666666666e-05, + "loss": 5.4029, + "step": 6200 + }, + { + "epoch": 37.55, + "learning_rate": 2.0733333333333334e-05, + "loss": 5.3825, + "step": 6220 + }, + { + "epoch": 37.67, + "learning_rate": 2.08e-05, + "loss": 5.3948, + "step": 6240 + }, + { + "epoch": 37.79, + "learning_rate": 2.0866666666666668e-05, + "loss": 5.4173, + "step": 6260 + }, + { + "epoch": 37.91, + "learning_rate": 2.0933333333333335e-05, + "loss": 5.3965, + "step": 6280 + }, + { + "epoch": 38.03, + "learning_rate": 2.1e-05, + "loss": 5.3943, + "step": 6300 + }, + { + "epoch": 38.15, + "learning_rate": 2.106666666666667e-05, + "loss": 5.372, + "step": 6320 + }, + { + "epoch": 38.27, + "learning_rate": 2.1133333333333337e-05, + "loss": 5.3875, + "step": 6340 + }, + { + "epoch": 38.39, + "learning_rate": 2.12e-05, + "loss": 5.3339, + "step": 6360 + }, + { + "epoch": 38.51, + "learning_rate": 2.1266666666666667e-05, + "loss": 5.3707, + "step": 6380 + }, + { + "epoch": 38.63, + "learning_rate": 2.1333333333333335e-05, + "loss": 5.3882, + "step": 6400 + }, + { + "epoch": 38.75, + "learning_rate": 2.1400000000000002e-05, + "loss": 5.3582, + "step": 6420 + }, + { + "epoch": 38.87, + "learning_rate": 2.146666666666667e-05, + "loss": 5.3476, + "step": 6440 + }, + { + "epoch": 38.99, + "learning_rate": 2.1533333333333333e-05, + "loss": 5.3561, + "step": 6460 + }, + { + "epoch": 39.11, + "learning_rate": 2.16e-05, + "loss": 5.3582, + "step": 6480 + }, + { + "epoch": 39.24, + "learning_rate": 2.1666666666666667e-05, + "loss": 5.3456, + "step": 6500 + }, + { + "epoch": 39.36, + "learning_rate": 2.1733333333333334e-05, + "loss": 5.3251, + "step": 6520 + }, + { + "epoch": 39.48, + "learning_rate": 2.18e-05, + "loss": 5.3242, + "step": 6540 + }, + { + "epoch": 39.6, + "learning_rate": 2.186666666666667e-05, + "loss": 5.3291, + "step": 6560 + }, + { + "epoch": 39.72, + "learning_rate": 2.1933333333333332e-05, + "loss": 5.3198, + "step": 6580 + }, + { + "epoch": 39.84, + "learning_rate": 2.2000000000000003e-05, + "loss": 5.3222, + "step": 6600 + }, + { + "epoch": 39.96, + "learning_rate": 2.206666666666667e-05, + "loss": 5.3368, + "step": 6620 + }, + { + "epoch": 40.08, + "learning_rate": 2.2133333333333334e-05, + "loss": 5.2988, + "step": 6640 + }, + { + "epoch": 40.2, + "learning_rate": 2.22e-05, + "loss": 5.2908, + "step": 6660 + }, + { + "epoch": 40.32, + "learning_rate": 2.2266666666666668e-05, + "loss": 5.2934, + "step": 6680 + }, + { + "epoch": 40.44, + "learning_rate": 2.2333333333333335e-05, + "loss": 5.3153, + "step": 6700 + }, + { + "epoch": 40.56, + "learning_rate": 2.2400000000000002e-05, + "loss": 5.295, + "step": 6720 + }, + { + "epoch": 40.68, + "learning_rate": 2.2466666666666666e-05, + "loss": 5.2895, + "step": 6740 + }, + { + "epoch": 40.8, + "learning_rate": 2.2533333333333333e-05, + "loss": 5.3141, + "step": 6760 + }, + { + "epoch": 40.93, + "learning_rate": 2.26e-05, + "loss": 5.2998, + "step": 6780 + }, + { + "epoch": 41.05, + "learning_rate": 2.2666666666666668e-05, + "loss": 5.2623, + "step": 6800 + }, + { + "epoch": 41.17, + "learning_rate": 2.2733333333333335e-05, + "loss": 5.2713, + "step": 6820 + }, + { + "epoch": 41.29, + "learning_rate": 2.2800000000000002e-05, + "loss": 5.269, + "step": 6840 + }, + { + "epoch": 41.41, + "learning_rate": 2.2866666666666666e-05, + "loss": 5.2645, + "step": 6860 + }, + { + "epoch": 41.53, + "learning_rate": 2.2933333333333333e-05, + "loss": 5.2724, + "step": 6880 + }, + { + "epoch": 41.65, + "learning_rate": 2.3000000000000003e-05, + "loss": 5.2596, + "step": 6900 + }, + { + "epoch": 41.77, + "learning_rate": 2.3066666666666667e-05, + "loss": 5.256, + "step": 6920 + }, + { + "epoch": 41.89, + "learning_rate": 2.3133333333333334e-05, + "loss": 5.2449, + "step": 6940 + }, + { + "epoch": 42.01, + "learning_rate": 2.32e-05, + "loss": 5.2506, + "step": 6960 + }, + { + "epoch": 42.13, + "learning_rate": 2.326666666666667e-05, + "loss": 5.2261, + "step": 6980 + }, + { + "epoch": 42.25, + "learning_rate": 2.3333333333333336e-05, + "loss": 5.2161, + "step": 7000 + }, + { + "epoch": 42.25, + "eval_accuracy": 0.000646541079313649, + "eval_loss": 5.289076805114746, + "eval_runtime": 18.2424, + "eval_samples_per_second": 111.828, + "eval_steps_per_second": 2.357, + "step": 7000 + }, + { + "epoch": 42.37, + "learning_rate": 2.3400000000000003e-05, + "loss": 5.2372, + "step": 7020 + }, + { + "epoch": 42.49, + "learning_rate": 2.3466666666666667e-05, + "loss": 5.2254, + "step": 7040 + }, + { + "epoch": 42.62, + "learning_rate": 2.3533333333333334e-05, + "loss": 5.2269, + "step": 7060 + }, + { + "epoch": 42.74, + "learning_rate": 2.36e-05, + "loss": 5.2126, + "step": 7080 + }, + { + "epoch": 42.86, + "learning_rate": 2.3666666666666668e-05, + "loss": 5.2207, + "step": 7100 + }, + { + "epoch": 42.98, + "learning_rate": 2.3733333333333335e-05, + "loss": 5.2211, + "step": 7120 + }, + { + "epoch": 43.1, + "learning_rate": 2.38e-05, + "loss": 5.1833, + "step": 7140 + }, + { + "epoch": 43.22, + "learning_rate": 2.3866666666666666e-05, + "loss": 5.1911, + "step": 7160 + }, + { + "epoch": 43.34, + "learning_rate": 2.3933333333333337e-05, + "loss": 5.1874, + "step": 7180 + }, + { + "epoch": 43.46, + "learning_rate": 2.4e-05, + "loss": 5.2008, + "step": 7200 + }, + { + "epoch": 43.58, + "learning_rate": 2.4066666666666668e-05, + "loss": 5.195, + "step": 7220 + }, + { + "epoch": 43.7, + "learning_rate": 2.4133333333333335e-05, + "loss": 5.183, + "step": 7240 + }, + { + "epoch": 43.82, + "learning_rate": 2.4200000000000002e-05, + "loss": 5.1729, + "step": 7260 + }, + { + "epoch": 43.94, + "learning_rate": 2.426666666666667e-05, + "loss": 5.1818, + "step": 7280 + }, + { + "epoch": 44.06, + "learning_rate": 2.4333333333333336e-05, + "loss": 5.1615, + "step": 7300 + }, + { + "epoch": 44.19, + "learning_rate": 2.44e-05, + "loss": 5.1769, + "step": 7320 + }, + { + "epoch": 44.31, + "learning_rate": 2.4466666666666667e-05, + "loss": 5.1479, + "step": 7340 + }, + { + "epoch": 44.43, + "learning_rate": 2.4533333333333334e-05, + "loss": 5.1789, + "step": 7360 + }, + { + "epoch": 44.55, + "learning_rate": 2.46e-05, + "loss": 5.1305, + "step": 7380 + }, + { + "epoch": 44.67, + "learning_rate": 2.466666666666667e-05, + "loss": 5.1425, + "step": 7400 + }, + { + "epoch": 44.79, + "learning_rate": 2.4733333333333333e-05, + "loss": 5.1452, + "step": 7420 + }, + { + "epoch": 44.91, + "learning_rate": 2.48e-05, + "loss": 5.1387, + "step": 7440 + }, + { + "epoch": 45.03, + "learning_rate": 2.486666666666667e-05, + "loss": 5.1278, + "step": 7460 + }, + { + "epoch": 45.15, + "learning_rate": 2.4933333333333334e-05, + "loss": 5.1048, + "step": 7480 + }, + { + "epoch": 45.27, + "learning_rate": 2.5e-05, + "loss": 5.1143, + "step": 7500 + }, + { + "epoch": 45.39, + "learning_rate": 2.5066666666666665e-05, + "loss": 5.1258, + "step": 7520 + }, + { + "epoch": 45.51, + "learning_rate": 2.5133333333333336e-05, + "loss": 5.1237, + "step": 7540 + }, + { + "epoch": 45.63, + "learning_rate": 2.5200000000000003e-05, + "loss": 5.1253, + "step": 7560 + }, + { + "epoch": 45.75, + "learning_rate": 2.5266666666666666e-05, + "loss": 5.1031, + "step": 7580 + }, + { + "epoch": 45.88, + "learning_rate": 2.5333333333333337e-05, + "loss": 5.0993, + "step": 7600 + }, + { + "epoch": 46.0, + "learning_rate": 2.54e-05, + "loss": 5.098, + "step": 7620 + }, + { + "epoch": 46.12, + "learning_rate": 2.5466666666666668e-05, + "loss": 5.0804, + "step": 7640 + }, + { + "epoch": 46.24, + "learning_rate": 2.553333333333334e-05, + "loss": 5.0841, + "step": 7660 + }, + { + "epoch": 46.36, + "learning_rate": 2.5600000000000002e-05, + "loss": 5.085, + "step": 7680 + }, + { + "epoch": 46.48, + "learning_rate": 2.5666666666666666e-05, + "loss": 5.0754, + "step": 7700 + }, + { + "epoch": 46.6, + "learning_rate": 2.5733333333333337e-05, + "loss": 5.0788, + "step": 7720 + }, + { + "epoch": 46.72, + "learning_rate": 2.58e-05, + "loss": 5.0837, + "step": 7740 + }, + { + "epoch": 46.84, + "learning_rate": 2.5866666666666667e-05, + "loss": 5.0867, + "step": 7760 + }, + { + "epoch": 46.96, + "learning_rate": 2.5933333333333338e-05, + "loss": 5.0632, + "step": 7780 + }, + { + "epoch": 47.08, + "learning_rate": 2.6000000000000002e-05, + "loss": 5.0334, + "step": 7800 + }, + { + "epoch": 47.2, + "learning_rate": 2.6066666666666666e-05, + "loss": 5.051, + "step": 7820 + }, + { + "epoch": 47.32, + "learning_rate": 2.6133333333333333e-05, + "loss": 5.0631, + "step": 7840 + }, + { + "epoch": 47.44, + "learning_rate": 2.6200000000000003e-05, + "loss": 5.0541, + "step": 7860 + }, + { + "epoch": 47.57, + "learning_rate": 2.6266666666666667e-05, + "loss": 5.0481, + "step": 7880 + }, + { + "epoch": 47.69, + "learning_rate": 2.633333333333333e-05, + "loss": 5.03, + "step": 7900 + }, + { + "epoch": 47.81, + "learning_rate": 2.64e-05, + "loss": 5.0424, + "step": 7920 + }, + { + "epoch": 47.93, + "learning_rate": 2.646666666666667e-05, + "loss": 5.0613, + "step": 7940 + }, + { + "epoch": 48.05, + "learning_rate": 2.6533333333333332e-05, + "loss": 5.0139, + "step": 7960 + }, + { + "epoch": 48.17, + "learning_rate": 2.6600000000000003e-05, + "loss": 5.0153, + "step": 7980 + }, + { + "epoch": 48.29, + "learning_rate": 2.6666666666666667e-05, + "loss": 5.0151, + "step": 8000 + }, + { + "epoch": 48.29, + "eval_accuracy": 0.0020700180774663886, + "eval_loss": 5.1151652336120605, + "eval_runtime": 18.1374, + "eval_samples_per_second": 112.475, + "eval_steps_per_second": 2.371, + "step": 8000 + }, + { + "epoch": 48.41, + "learning_rate": 2.6733333333333334e-05, + "loss": 5.0138, + "step": 8020 + }, + { + "epoch": 48.53, + "learning_rate": 2.6800000000000004e-05, + "loss": 5.0247, + "step": 8040 + }, + { + "epoch": 48.65, + "learning_rate": 2.6866666666666668e-05, + "loss": 5.0113, + "step": 8060 + }, + { + "epoch": 48.77, + "learning_rate": 2.6933333333333332e-05, + "loss": 5.0084, + "step": 8080 + }, + { + "epoch": 48.89, + "learning_rate": 2.7000000000000002e-05, + "loss": 5.018, + "step": 8100 + }, + { + "epoch": 49.01, + "learning_rate": 2.706666666666667e-05, + "loss": 4.9896, + "step": 8120 + }, + { + "epoch": 49.13, + "learning_rate": 2.7133333333333333e-05, + "loss": 4.9871, + "step": 8140 + }, + { + "epoch": 49.26, + "learning_rate": 2.7200000000000004e-05, + "loss": 4.979, + "step": 8160 + }, + { + "epoch": 49.38, + "learning_rate": 2.7266666666666668e-05, + "loss": 4.9857, + "step": 8180 + }, + { + "epoch": 49.5, + "learning_rate": 2.733333333333333e-05, + "loss": 4.9877, + "step": 8200 + }, + { + "epoch": 49.62, + "learning_rate": 2.7400000000000002e-05, + "loss": 5.0132, + "step": 8220 + }, + { + "epoch": 49.74, + "learning_rate": 2.746666666666667e-05, + "loss": 4.9614, + "step": 8240 + }, + { + "epoch": 49.86, + "learning_rate": 2.7533333333333333e-05, + "loss": 4.9745, + "step": 8260 + }, + { + "epoch": 49.98, + "learning_rate": 2.7600000000000003e-05, + "loss": 4.9696, + "step": 8280 + }, + { + "epoch": 50.1, + "learning_rate": 2.7666666666666667e-05, + "loss": 4.957, + "step": 8300 + }, + { + "epoch": 50.22, + "learning_rate": 2.7733333333333334e-05, + "loss": 4.9363, + "step": 8320 + }, + { + "epoch": 50.34, + "learning_rate": 2.7800000000000005e-05, + "loss": 4.9432, + "step": 8340 + }, + { + "epoch": 50.46, + "learning_rate": 2.786666666666667e-05, + "loss": 4.9699, + "step": 8360 + }, + { + "epoch": 50.58, + "learning_rate": 2.7933333333333332e-05, + "loss": 4.9538, + "step": 8380 + }, + { + "epoch": 50.7, + "learning_rate": 2.8000000000000003e-05, + "loss": 4.9687, + "step": 8400 + }, + { + "epoch": 50.82, + "learning_rate": 2.806666666666667e-05, + "loss": 4.9383, + "step": 8420 + }, + { + "epoch": 50.95, + "learning_rate": 2.8133333333333334e-05, + "loss": 4.9384, + "step": 8440 + }, + { + "epoch": 51.07, + "learning_rate": 2.8199999999999998e-05, + "loss": 4.9388, + "step": 8460 + }, + { + "epoch": 51.19, + "learning_rate": 2.8266666666666668e-05, + "loss": 4.9146, + "step": 8480 + }, + { + "epoch": 51.31, + "learning_rate": 2.8333333333333335e-05, + "loss": 4.9131, + "step": 8500 + }, + { + "epoch": 51.43, + "learning_rate": 2.84e-05, + "loss": 4.9165, + "step": 8520 + }, + { + "epoch": 51.55, + "learning_rate": 2.846666666666667e-05, + "loss": 4.9345, + "step": 8540 + }, + { + "epoch": 51.67, + "learning_rate": 2.8533333333333333e-05, + "loss": 4.9299, + "step": 8560 + }, + { + "epoch": 51.79, + "learning_rate": 2.86e-05, + "loss": 4.9118, + "step": 8580 + }, + { + "epoch": 51.91, + "learning_rate": 2.8666666666666668e-05, + "loss": 4.9335, + "step": 8600 + }, + { + "epoch": 52.03, + "learning_rate": 2.8733333333333335e-05, + "loss": 4.9034, + "step": 8620 + }, + { + "epoch": 52.15, + "learning_rate": 2.88e-05, + "loss": 4.8897, + "step": 8640 + }, + { + "epoch": 52.27, + "learning_rate": 2.886666666666667e-05, + "loss": 4.8783, + "step": 8660 + }, + { + "epoch": 52.39, + "learning_rate": 2.8933333333333333e-05, + "loss": 4.9061, + "step": 8680 + }, + { + "epoch": 52.52, + "learning_rate": 2.9e-05, + "loss": 4.8954, + "step": 8700 + }, + { + "epoch": 52.64, + "learning_rate": 2.906666666666667e-05, + "loss": 4.8877, + "step": 8720 + }, + { + "epoch": 52.76, + "learning_rate": 2.9133333333333334e-05, + "loss": 4.893, + "step": 8740 + }, + { + "epoch": 52.88, + "learning_rate": 2.9199999999999998e-05, + "loss": 4.868, + "step": 8760 + }, + { + "epoch": 53.0, + "learning_rate": 2.926666666666667e-05, + "loss": 4.9046, + "step": 8780 + }, + { + "epoch": 53.12, + "learning_rate": 2.9333333333333336e-05, + "loss": 4.8657, + "step": 8800 + }, + { + "epoch": 53.24, + "learning_rate": 2.94e-05, + "loss": 4.8584, + "step": 8820 + }, + { + "epoch": 53.36, + "learning_rate": 2.946666666666667e-05, + "loss": 4.8433, + "step": 8840 + }, + { + "epoch": 53.48, + "learning_rate": 2.9533333333333334e-05, + "loss": 4.8586, + "step": 8860 + }, + { + "epoch": 53.6, + "learning_rate": 2.96e-05, + "loss": 4.875, + "step": 8880 + }, + { + "epoch": 53.72, + "learning_rate": 2.9666666666666672e-05, + "loss": 4.8812, + "step": 8900 + }, + { + "epoch": 53.84, + "learning_rate": 2.9733333333333336e-05, + "loss": 4.8584, + "step": 8920 + }, + { + "epoch": 53.96, + "learning_rate": 2.98e-05, + "loss": 4.8654, + "step": 8940 + }, + { + "epoch": 54.08, + "learning_rate": 2.986666666666667e-05, + "loss": 4.8279, + "step": 8960 + }, + { + "epoch": 54.21, + "learning_rate": 2.9933333333333337e-05, + "loss": 4.8162, + "step": 8980 + }, + { + "epoch": 54.33, + "learning_rate": 3e-05, + "loss": 4.8349, + "step": 9000 + }, + { + "epoch": 54.33, + "eval_accuracy": 0.0019850638638361767, + "eval_loss": 4.9847259521484375, + "eval_runtime": 18.0947, + "eval_samples_per_second": 112.74, + "eval_steps_per_second": 2.376, + "step": 9000 + }, + { + "epoch": 54.45, + "learning_rate": 3.006666666666667e-05, + "loss": 4.8558, + "step": 9020 + }, + { + "epoch": 54.57, + "learning_rate": 3.0133333333333335e-05, + "loss": 4.8273, + "step": 9040 + }, + { + "epoch": 54.69, + "learning_rate": 3.02e-05, + "loss": 4.8337, + "step": 9060 + }, + { + "epoch": 54.81, + "learning_rate": 3.0266666666666666e-05, + "loss": 4.8381, + "step": 9080 + }, + { + "epoch": 54.93, + "learning_rate": 3.0333333333333337e-05, + "loss": 4.8132, + "step": 9100 + }, + { + "epoch": 55.05, + "learning_rate": 3.04e-05, + "loss": 4.8152, + "step": 9120 + }, + { + "epoch": 55.17, + "learning_rate": 3.0466666666666664e-05, + "loss": 4.7843, + "step": 9140 + }, + { + "epoch": 55.29, + "learning_rate": 3.0533333333333335e-05, + "loss": 4.7997, + "step": 9160 + }, + { + "epoch": 55.41, + "learning_rate": 3.06e-05, + "loss": 4.8173, + "step": 9180 + }, + { + "epoch": 55.53, + "learning_rate": 3.066666666666667e-05, + "loss": 4.8259, + "step": 9200 + }, + { + "epoch": 55.65, + "learning_rate": 3.073333333333334e-05, + "loss": 4.8002, + "step": 9220 + }, + { + "epoch": 55.77, + "learning_rate": 3.08e-05, + "loss": 4.803, + "step": 9240 + }, + { + "epoch": 55.9, + "learning_rate": 3.086666666666667e-05, + "loss": 4.7885, + "step": 9260 + }, + { + "epoch": 56.02, + "learning_rate": 3.093333333333334e-05, + "loss": 4.7864, + "step": 9280 + }, + { + "epoch": 56.14, + "learning_rate": 3.1e-05, + "loss": 4.7604, + "step": 9300 + }, + { + "epoch": 56.26, + "learning_rate": 3.1066666666666665e-05, + "loss": 4.7582, + "step": 9320 + }, + { + "epoch": 56.38, + "learning_rate": 3.1133333333333336e-05, + "loss": 4.8099, + "step": 9340 + }, + { + "epoch": 56.5, + "learning_rate": 3.12e-05, + "loss": 4.7534, + "step": 9360 + }, + { + "epoch": 56.62, + "learning_rate": 3.126666666666666e-05, + "loss": 4.7918, + "step": 9380 + }, + { + "epoch": 56.74, + "learning_rate": 3.1333333333333334e-05, + "loss": 4.7662, + "step": 9400 + }, + { + "epoch": 56.86, + "learning_rate": 3.1400000000000004e-05, + "loss": 4.783, + "step": 9420 + }, + { + "epoch": 56.98, + "learning_rate": 3.146666666666667e-05, + "loss": 4.7519, + "step": 9440 + }, + { + "epoch": 57.1, + "learning_rate": 3.153333333333334e-05, + "loss": 4.7292, + "step": 9460 + }, + { + "epoch": 57.22, + "learning_rate": 3.16e-05, + "loss": 4.7479, + "step": 9480 + }, + { + "epoch": 57.34, + "learning_rate": 3.1666666666666666e-05, + "loss": 4.7309, + "step": 9500 + }, + { + "epoch": 57.46, + "learning_rate": 3.173333333333334e-05, + "loss": 4.7625, + "step": 9520 + }, + { + "epoch": 57.59, + "learning_rate": 3.18e-05, + "loss": 4.7517, + "step": 9540 + }, + { + "epoch": 57.71, + "learning_rate": 3.1866666666666664e-05, + "loss": 4.7495, + "step": 9560 + }, + { + "epoch": 57.83, + "learning_rate": 3.1933333333333335e-05, + "loss": 4.7457, + "step": 9580 + }, + { + "epoch": 57.95, + "learning_rate": 3.2000000000000005e-05, + "loss": 4.7366, + "step": 9600 + }, + { + "epoch": 58.07, + "learning_rate": 3.206666666666667e-05, + "loss": 4.7249, + "step": 9620 + }, + { + "epoch": 58.19, + "learning_rate": 3.213333333333334e-05, + "loss": 4.7089, + "step": 9640 + }, + { + "epoch": 58.31, + "learning_rate": 3.2200000000000003e-05, + "loss": 4.7289, + "step": 9660 + }, + { + "epoch": 58.43, + "learning_rate": 3.226666666666667e-05, + "loss": 4.7049, + "step": 9680 + }, + { + "epoch": 58.55, + "learning_rate": 3.233333333333333e-05, + "loss": 4.6996, + "step": 9700 + }, + { + "epoch": 58.67, + "learning_rate": 3.24e-05, + "loss": 4.7265, + "step": 9720 + }, + { + "epoch": 58.79, + "learning_rate": 3.2466666666666665e-05, + "loss": 4.7109, + "step": 9740 + }, + { + "epoch": 58.91, + "learning_rate": 3.253333333333333e-05, + "loss": 4.7198, + "step": 9760 + }, + { + "epoch": 59.03, + "learning_rate": 3.26e-05, + "loss": 4.7008, + "step": 9780 + }, + { + "epoch": 59.15, + "learning_rate": 3.266666666666667e-05, + "loss": 4.6824, + "step": 9800 + }, + { + "epoch": 59.28, + "learning_rate": 3.2733333333333334e-05, + "loss": 4.6835, + "step": 9820 + }, + { + "epoch": 59.4, + "learning_rate": 3.2800000000000004e-05, + "loss": 4.6769, + "step": 9840 + }, + { + "epoch": 59.52, + "learning_rate": 3.286666666666667e-05, + "loss": 4.6937, + "step": 9860 + }, + { + "epoch": 59.64, + "learning_rate": 3.293333333333333e-05, + "loss": 4.6854, + "step": 9880 + }, + { + "epoch": 59.76, + "learning_rate": 3.3e-05, + "loss": 4.6908, + "step": 9900 + }, + { + "epoch": 59.88, + "learning_rate": 3.3066666666666666e-05, + "loss": 4.6809, + "step": 9920 + }, + { + "epoch": 60.0, + "learning_rate": 3.313333333333333e-05, + "loss": 4.6611, + "step": 9940 + }, + { + "epoch": 60.12, + "learning_rate": 3.32e-05, + "loss": 4.6599, + "step": 9960 + }, + { + "epoch": 60.24, + "learning_rate": 3.326666666666667e-05, + "loss": 4.6588, + "step": 9980 + }, + { + "epoch": 60.36, + "learning_rate": 3.3333333333333335e-05, + "loss": 4.6358, + "step": 10000 + }, + { + "epoch": 60.36, + "eval_accuracy": 0.002247335302427122, + "eval_loss": 4.875439643859863, + "eval_runtime": 18.0739, + "eval_samples_per_second": 112.87, + "eval_steps_per_second": 2.379, + "step": 10000 + }, + { + "epoch": 60.48, + "learning_rate": 3.339666666666667e-05, + "loss": 4.6601, + "step": 10020 + }, + { + "epoch": 60.6, + "learning_rate": 3.3463333333333335e-05, + "loss": 4.6622, + "step": 10040 + }, + { + "epoch": 60.72, + "learning_rate": 3.353e-05, + "loss": 4.6417, + "step": 10060 + }, + { + "epoch": 60.85, + "learning_rate": 3.359666666666667e-05, + "loss": 4.659, + "step": 10080 + }, + { + "epoch": 60.97, + "learning_rate": 3.3663333333333333e-05, + "loss": 4.6455, + "step": 10100 + }, + { + "epoch": 61.09, + "learning_rate": 3.373e-05, + "loss": 4.6091, + "step": 10120 + }, + { + "epoch": 61.21, + "learning_rate": 3.379666666666667e-05, + "loss": 4.613, + "step": 10140 + }, + { + "epoch": 61.33, + "learning_rate": 3.386333333333334e-05, + "loss": 4.6148, + "step": 10160 + }, + { + "epoch": 61.45, + "learning_rate": 3.393e-05, + "loss": 4.6302, + "step": 10180 + }, + { + "epoch": 61.57, + "learning_rate": 3.3996666666666666e-05, + "loss": 4.6415, + "step": 10200 + }, + { + "epoch": 61.69, + "learning_rate": 3.4063333333333336e-05, + "loss": 4.6201, + "step": 10220 + }, + { + "epoch": 61.81, + "learning_rate": 3.413e-05, + "loss": 4.5999, + "step": 10240 + }, + { + "epoch": 61.93, + "learning_rate": 3.4196666666666664e-05, + "loss": 4.6226, + "step": 10260 + }, + { + "epoch": 62.05, + "learning_rate": 3.4263333333333334e-05, + "loss": 4.6262, + "step": 10280 + }, + { + "epoch": 62.17, + "learning_rate": 3.433e-05, + "loss": 4.5851, + "step": 10300 + }, + { + "epoch": 62.29, + "learning_rate": 3.439666666666667e-05, + "loss": 4.5964, + "step": 10320 + }, + { + "epoch": 62.41, + "learning_rate": 3.446333333333334e-05, + "loss": 4.5909, + "step": 10340 + }, + { + "epoch": 62.54, + "learning_rate": 3.453e-05, + "loss": 4.5874, + "step": 10360 + }, + { + "epoch": 62.66, + "learning_rate": 3.459666666666667e-05, + "loss": 4.5741, + "step": 10380 + }, + { + "epoch": 62.78, + "learning_rate": 3.466333333333334e-05, + "loss": 4.5983, + "step": 10400 + }, + { + "epoch": 62.9, + "learning_rate": 3.473e-05, + "loss": 4.5904, + "step": 10420 + }, + { + "epoch": 63.02, + "learning_rate": 3.4796666666666665e-05, + "loss": 4.5989, + "step": 10440 + }, + { + "epoch": 63.14, + "learning_rate": 3.4863333333333336e-05, + "loss": 4.5575, + "step": 10460 + }, + { + "epoch": 63.26, + "learning_rate": 3.493e-05, + "loss": 4.5537, + "step": 10480 + }, + { + "epoch": 63.38, + "learning_rate": 3.499666666666667e-05, + "loss": 4.5645, + "step": 10500 + }, + { + "epoch": 63.5, + "learning_rate": 3.5063333333333334e-05, + "loss": 4.5454, + "step": 10520 + }, + { + "epoch": 63.62, + "learning_rate": 3.5130000000000004e-05, + "loss": 4.5659, + "step": 10540 + }, + { + "epoch": 63.74, + "learning_rate": 3.519666666666667e-05, + "loss": 4.5412, + "step": 10560 + }, + { + "epoch": 63.86, + "learning_rate": 3.526333333333334e-05, + "loss": 4.5668, + "step": 10580 + }, + { + "epoch": 63.98, + "learning_rate": 3.533e-05, + "loss": 4.5536, + "step": 10600 + }, + { + "epoch": 64.1, + "learning_rate": 3.5396666666666666e-05, + "loss": 4.5191, + "step": 10620 + }, + { + "epoch": 64.23, + "learning_rate": 3.5463333333333337e-05, + "loss": 4.5262, + "step": 10640 + }, + { + "epoch": 64.35, + "learning_rate": 3.553e-05, + "loss": 4.544, + "step": 10660 + }, + { + "epoch": 64.47, + "learning_rate": 3.5596666666666664e-05, + "loss": 4.5408, + "step": 10680 + }, + { + "epoch": 64.59, + "learning_rate": 3.5663333333333335e-05, + "loss": 4.5145, + "step": 10700 + }, + { + "epoch": 64.71, + "learning_rate": 3.5730000000000005e-05, + "loss": 4.5178, + "step": 10720 + }, + { + "epoch": 64.83, + "learning_rate": 3.579666666666667e-05, + "loss": 4.5363, + "step": 10740 + }, + { + "epoch": 64.95, + "learning_rate": 3.586333333333334e-05, + "loss": 4.5128, + "step": 10760 + }, + { + "epoch": 65.07, + "learning_rate": 3.593e-05, + "loss": 4.503, + "step": 10780 + }, + { + "epoch": 65.19, + "learning_rate": 3.599666666666667e-05, + "loss": 4.491, + "step": 10800 + }, + { + "epoch": 65.31, + "learning_rate": 3.606333333333333e-05, + "loss": 4.4675, + "step": 10820 + }, + { + "epoch": 65.43, + "learning_rate": 3.613e-05, + "loss": 4.5172, + "step": 10840 + }, + { + "epoch": 65.55, + "learning_rate": 3.6196666666666665e-05, + "loss": 4.4865, + "step": 10860 + }, + { + "epoch": 65.67, + "learning_rate": 3.6263333333333336e-05, + "loss": 4.4962, + "step": 10880 + }, + { + "epoch": 65.79, + "learning_rate": 3.6330000000000006e-05, + "loss": 4.5097, + "step": 10900 + }, + { + "epoch": 65.92, + "learning_rate": 3.639666666666667e-05, + "loss": 4.5079, + "step": 10920 + }, + { + "epoch": 66.04, + "learning_rate": 3.6463333333333334e-05, + "loss": 4.4555, + "step": 10940 + }, + { + "epoch": 66.16, + "learning_rate": 3.6530000000000004e-05, + "loss": 4.4732, + "step": 10960 + }, + { + "epoch": 66.28, + "learning_rate": 3.659666666666667e-05, + "loss": 4.4547, + "step": 10980 + }, + { + "epoch": 66.4, + "learning_rate": 3.666333333333333e-05, + "loss": 4.4326, + "step": 11000 + }, + { + "epoch": 66.4, + "eval_accuracy": 0.002107062065967935, + "eval_loss": 4.780860424041748, + "eval_runtime": 18.0696, + "eval_samples_per_second": 112.897, + "eval_steps_per_second": 2.38, + "step": 11000 + }, + { + "epoch": 66.52, + "learning_rate": 3.673e-05, + "loss": 4.4623, + "step": 11020 + }, + { + "epoch": 66.64, + "learning_rate": 3.6796666666666666e-05, + "loss": 4.4712, + "step": 11040 + }, + { + "epoch": 66.76, + "learning_rate": 3.686333333333333e-05, + "loss": 4.4571, + "step": 11060 + }, + { + "epoch": 66.88, + "learning_rate": 3.693e-05, + "loss": 4.4793, + "step": 11080 + }, + { + "epoch": 67.0, + "learning_rate": 3.699666666666667e-05, + "loss": 4.4662, + "step": 11100 + }, + { + "epoch": 67.12, + "learning_rate": 3.7063333333333335e-05, + "loss": 4.3932, + "step": 11120 + }, + { + "epoch": 67.24, + "learning_rate": 3.7130000000000005e-05, + "loss": 4.4387, + "step": 11140 + }, + { + "epoch": 67.36, + "learning_rate": 3.719666666666667e-05, + "loss": 4.426, + "step": 11160 + }, + { + "epoch": 67.48, + "learning_rate": 3.726333333333333e-05, + "loss": 4.4222, + "step": 11180 + }, + { + "epoch": 67.61, + "learning_rate": 3.7330000000000003e-05, + "loss": 4.4323, + "step": 11200 + }, + { + "epoch": 67.73, + "learning_rate": 3.739666666666667e-05, + "loss": 4.4221, + "step": 11220 + }, + { + "epoch": 67.85, + "learning_rate": 3.746333333333333e-05, + "loss": 4.4385, + "step": 11240 + }, + { + "epoch": 67.97, + "learning_rate": 3.753e-05, + "loss": 4.4514, + "step": 11260 + }, + { + "epoch": 68.09, + "learning_rate": 3.759666666666667e-05, + "loss": 4.3912, + "step": 11280 + }, + { + "epoch": 68.21, + "learning_rate": 3.7663333333333336e-05, + "loss": 4.3944, + "step": 11300 + }, + { + "epoch": 68.33, + "learning_rate": 3.7730000000000006e-05, + "loss": 4.3894, + "step": 11320 + }, + { + "epoch": 68.45, + "learning_rate": 3.779666666666667e-05, + "loss": 4.3806, + "step": 11340 + }, + { + "epoch": 68.57, + "learning_rate": 3.7863333333333334e-05, + "loss": 4.3978, + "step": 11360 + }, + { + "epoch": 68.69, + "learning_rate": 3.7930000000000004e-05, + "loss": 4.4171, + "step": 11380 + }, + { + "epoch": 68.81, + "learning_rate": 3.799666666666667e-05, + "loss": 4.4158, + "step": 11400 + }, + { + "epoch": 68.93, + "learning_rate": 3.806333333333333e-05, + "loss": 4.4008, + "step": 11420 + }, + { + "epoch": 69.05, + "learning_rate": 3.8129999999999996e-05, + "loss": 4.3798, + "step": 11440 + }, + { + "epoch": 69.18, + "learning_rate": 3.8196666666666666e-05, + "loss": 4.3538, + "step": 11460 + }, + { + "epoch": 69.3, + "learning_rate": 3.826333333333334e-05, + "loss": 4.3783, + "step": 11480 + }, + { + "epoch": 69.42, + "learning_rate": 3.833e-05, + "loss": 4.3597, + "step": 11500 + }, + { + "epoch": 69.54, + "learning_rate": 3.839666666666667e-05, + "loss": 4.3653, + "step": 11520 + }, + { + "epoch": 69.66, + "learning_rate": 3.8463333333333335e-05, + "loss": 4.3719, + "step": 11540 + }, + { + "epoch": 69.78, + "learning_rate": 3.853e-05, + "loss": 4.3391, + "step": 11560 + }, + { + "epoch": 69.9, + "learning_rate": 3.859666666666667e-05, + "loss": 4.3558, + "step": 11580 + }, + { + "epoch": 70.02, + "learning_rate": 3.866333333333333e-05, + "loss": 4.3695, + "step": 11600 + }, + { + "epoch": 70.14, + "learning_rate": 3.873e-05, + "loss": 4.3293, + "step": 11620 + }, + { + "epoch": 70.26, + "learning_rate": 3.879666666666667e-05, + "loss": 4.3039, + "step": 11640 + }, + { + "epoch": 70.38, + "learning_rate": 3.886333333333334e-05, + "loss": 4.3328, + "step": 11660 + }, + { + "epoch": 70.5, + "learning_rate": 3.893e-05, + "loss": 4.3128, + "step": 11680 + }, + { + "epoch": 70.62, + "learning_rate": 3.899666666666667e-05, + "loss": 4.3224, + "step": 11700 + }, + { + "epoch": 70.74, + "learning_rate": 3.9063333333333336e-05, + "loss": 4.3377, + "step": 11720 + }, + { + "epoch": 70.87, + "learning_rate": 3.913e-05, + "loss": 4.3658, + "step": 11740 + }, + { + "epoch": 70.99, + "learning_rate": 3.919666666666667e-05, + "loss": 4.3473, + "step": 11760 + }, + { + "epoch": 71.11, + "learning_rate": 3.9263333333333334e-05, + "loss": 4.2763, + "step": 11780 + }, + { + "epoch": 71.23, + "learning_rate": 3.933e-05, + "loss": 4.2899, + "step": 11800 + }, + { + "epoch": 71.35, + "learning_rate": 3.939666666666667e-05, + "loss": 4.2896, + "step": 11820 + }, + { + "epoch": 71.47, + "learning_rate": 3.946333333333333e-05, + "loss": 4.2895, + "step": 11840 + }, + { + "epoch": 71.59, + "learning_rate": 3.953e-05, + "loss": 4.3077, + "step": 11860 + }, + { + "epoch": 71.71, + "learning_rate": 3.959666666666667e-05, + "loss": 4.3121, + "step": 11880 + }, + { + "epoch": 71.83, + "learning_rate": 3.966333333333334e-05, + "loss": 4.3091, + "step": 11900 + }, + { + "epoch": 71.95, + "learning_rate": 3.973e-05, + "loss": 4.2995, + "step": 11920 + }, + { + "epoch": 72.07, + "learning_rate": 3.979666666666667e-05, + "loss": 4.2908, + "step": 11940 + }, + { + "epoch": 72.19, + "learning_rate": 3.9863333333333335e-05, + "loss": 4.2227, + "step": 11960 + }, + { + "epoch": 72.31, + "learning_rate": 3.993e-05, + "loss": 4.239, + "step": 11980 + }, + { + "epoch": 72.43, + "learning_rate": 3.999666666666667e-05, + "loss": 4.2632, + "step": 12000 + }, + { + "epoch": 72.43, + "eval_accuracy": 0.0017331647420256641, + "eval_loss": 4.741575241088867, + "eval_runtime": 18.0838, + "eval_samples_per_second": 112.808, + "eval_steps_per_second": 2.378, + "step": 12000 + }, + { + "epoch": 72.56, + "learning_rate": 4.0060000000000006e-05, + "loss": 4.2956, + "step": 12020 + }, + { + "epoch": 72.68, + "learning_rate": 4.012666666666667e-05, + "loss": 4.2654, + "step": 12040 + }, + { + "epoch": 72.8, + "learning_rate": 4.0193333333333334e-05, + "loss": 4.2661, + "step": 12060 + }, + { + "epoch": 72.92, + "learning_rate": 4.0260000000000004e-05, + "loss": 4.2876, + "step": 12080 + }, + { + "epoch": 73.04, + "learning_rate": 4.032666666666667e-05, + "loss": 4.2446, + "step": 12100 + }, + { + "epoch": 73.16, + "learning_rate": 4.039333333333333e-05, + "loss": 4.2041, + "step": 12120 + }, + { + "epoch": 73.28, + "learning_rate": 4.046e-05, + "loss": 4.2398, + "step": 12140 + }, + { + "epoch": 73.4, + "learning_rate": 4.0526666666666666e-05, + "loss": 4.2222, + "step": 12160 + }, + { + "epoch": 73.52, + "learning_rate": 4.0593333333333337e-05, + "loss": 4.2484, + "step": 12180 + }, + { + "epoch": 73.64, + "learning_rate": 4.066e-05, + "loss": 4.2174, + "step": 12200 + }, + { + "epoch": 73.76, + "learning_rate": 4.072666666666667e-05, + "loss": 4.2504, + "step": 12220 + }, + { + "epoch": 73.88, + "learning_rate": 4.0793333333333335e-05, + "loss": 4.2291, + "step": 12240 + }, + { + "epoch": 74.0, + "learning_rate": 4.0860000000000005e-05, + "loss": 4.2397, + "step": 12260 + }, + { + "epoch": 74.12, + "learning_rate": 4.092666666666667e-05, + "loss": 4.1777, + "step": 12280 + }, + { + "epoch": 74.25, + "learning_rate": 4.099333333333333e-05, + "loss": 4.1897, + "step": 12300 + }, + { + "epoch": 74.37, + "learning_rate": 4.106e-05, + "loss": 4.1765, + "step": 12320 + }, + { + "epoch": 74.49, + "learning_rate": 4.112666666666667e-05, + "loss": 4.1897, + "step": 12340 + }, + { + "epoch": 74.61, + "learning_rate": 4.119333333333333e-05, + "loss": 4.2046, + "step": 12360 + }, + { + "epoch": 74.73, + "learning_rate": 4.126e-05, + "loss": 4.2271, + "step": 12380 + }, + { + "epoch": 74.85, + "learning_rate": 4.132666666666667e-05, + "loss": 4.1944, + "step": 12400 + }, + { + "epoch": 74.97, + "learning_rate": 4.1393333333333336e-05, + "loss": 4.2009, + "step": 12420 + }, + { + "epoch": 75.09, + "learning_rate": 4.1460000000000006e-05, + "loss": 4.1821, + "step": 12440 + }, + { + "epoch": 75.21, + "learning_rate": 4.152666666666667e-05, + "loss": 4.1583, + "step": 12460 + }, + { + "epoch": 75.33, + "learning_rate": 4.1593333333333334e-05, + "loss": 4.1454, + "step": 12480 + }, + { + "epoch": 75.45, + "learning_rate": 4.1660000000000004e-05, + "loss": 4.1584, + "step": 12500 + }, + { + "epoch": 75.57, + "learning_rate": 4.172666666666667e-05, + "loss": 4.1799, + "step": 12520 + }, + { + "epoch": 75.69, + "learning_rate": 4.179333333333333e-05, + "loss": 4.1769, + "step": 12540 + }, + { + "epoch": 75.81, + "learning_rate": 4.186e-05, + "loss": 4.1602, + "step": 12560 + }, + { + "epoch": 75.94, + "learning_rate": 4.192666666666667e-05, + "loss": 4.1414, + "step": 12580 + }, + { + "epoch": 76.06, + "learning_rate": 4.199333333333334e-05, + "loss": 4.1437, + "step": 12600 + }, + { + "epoch": 76.18, + "learning_rate": 4.206e-05, + "loss": 4.1252, + "step": 12620 + }, + { + "epoch": 76.3, + "learning_rate": 4.212666666666667e-05, + "loss": 4.1107, + "step": 12640 + }, + { + "epoch": 76.42, + "learning_rate": 4.2193333333333335e-05, + "loss": 4.1409, + "step": 12660 + }, + { + "epoch": 76.54, + "learning_rate": 4.226e-05, + "loss": 4.1118, + "step": 12680 + }, + { + "epoch": 76.66, + "learning_rate": 4.232666666666667e-05, + "loss": 4.1119, + "step": 12700 + }, + { + "epoch": 76.78, + "learning_rate": 4.239333333333333e-05, + "loss": 4.1528, + "step": 12720 + }, + { + "epoch": 76.9, + "learning_rate": 4.246e-05, + "loss": 4.1155, + "step": 12740 + }, + { + "epoch": 77.02, + "learning_rate": 4.252666666666667e-05, + "loss": 4.1292, + "step": 12760 + }, + { + "epoch": 77.14, + "learning_rate": 4.259333333333334e-05, + "loss": 4.0907, + "step": 12780 + }, + { + "epoch": 77.26, + "learning_rate": 4.266e-05, + "loss": 4.0803, + "step": 12800 + }, + { + "epoch": 77.38, + "learning_rate": 4.272666666666667e-05, + "loss": 4.0886, + "step": 12820 + }, + { + "epoch": 77.51, + "learning_rate": 4.2793333333333336e-05, + "loss": 4.1005, + "step": 12840 + }, + { + "epoch": 77.63, + "learning_rate": 4.286e-05, + "loss": 4.0972, + "step": 12860 + }, + { + "epoch": 77.75, + "learning_rate": 4.292666666666667e-05, + "loss": 4.1042, + "step": 12880 + }, + { + "epoch": 77.87, + "learning_rate": 4.2993333333333334e-05, + "loss": 4.0769, + "step": 12900 + }, + { + "epoch": 77.99, + "learning_rate": 4.306e-05, + "loss": 4.1025, + "step": 12920 + }, + { + "epoch": 78.11, + "learning_rate": 4.312666666666667e-05, + "loss": 4.0391, + "step": 12940 + }, + { + "epoch": 78.23, + "learning_rate": 4.319333333333334e-05, + "loss": 4.0377, + "step": 12960 + }, + { + "epoch": 78.35, + "learning_rate": 4.326e-05, + "loss": 4.0672, + "step": 12980 + }, + { + "epoch": 78.47, + "learning_rate": 4.332666666666667e-05, + "loss": 4.0415, + "step": 13000 + }, + { + "epoch": 78.47, + "eval_accuracy": 0.001637838211615019, + "eval_loss": 4.750297546386719, + "eval_runtime": 18.199, + "eval_samples_per_second": 112.094, + "eval_steps_per_second": 2.363, + "step": 13000 + }, + { + "epoch": 78.59, + "learning_rate": 4.339333333333334e-05, + "loss": 4.0433, + "step": 13020 + }, + { + "epoch": 78.71, + "learning_rate": 4.346e-05, + "loss": 4.0433, + "step": 13040 + }, + { + "epoch": 78.83, + "learning_rate": 4.352666666666667e-05, + "loss": 4.0692, + "step": 13060 + }, + { + "epoch": 78.95, + "learning_rate": 4.3593333333333335e-05, + "loss": 4.0831, + "step": 13080 + }, + { + "epoch": 79.07, + "learning_rate": 4.366e-05, + "loss": 4.0378, + "step": 13100 + }, + { + "epoch": 79.2, + "learning_rate": 4.372666666666667e-05, + "loss": 4.0092, + "step": 13120 + }, + { + "epoch": 79.32, + "learning_rate": 4.379333333333333e-05, + "loss": 4.0043, + "step": 13140 + }, + { + "epoch": 79.44, + "learning_rate": 4.3860000000000004e-05, + "loss": 4.0388, + "step": 13160 + }, + { + "epoch": 79.56, + "learning_rate": 4.3926666666666674e-05, + "loss": 4.0174, + "step": 13180 + }, + { + "epoch": 79.68, + "learning_rate": 4.399333333333334e-05, + "loss": 3.9951, + "step": 13200 + }, + { + "epoch": 79.8, + "learning_rate": 4.406e-05, + "loss": 4.0435, + "step": 13220 + }, + { + "epoch": 79.92, + "learning_rate": 4.4126666666666665e-05, + "loss": 4.0442, + "step": 13240 + }, + { + "epoch": 80.04, + "learning_rate": 4.4193333333333336e-05, + "loss": 3.9929, + "step": 13260 + }, + { + "epoch": 80.16, + "learning_rate": 4.426e-05, + "loss": 3.9753, + "step": 13280 + }, + { + "epoch": 80.28, + "learning_rate": 4.4326666666666664e-05, + "loss": 3.949, + "step": 13300 + }, + { + "epoch": 80.4, + "learning_rate": 4.4393333333333334e-05, + "loss": 3.9818, + "step": 13320 + }, + { + "epoch": 80.52, + "learning_rate": 4.4460000000000005e-05, + "loss": 3.9604, + "step": 13340 + }, + { + "epoch": 80.64, + "learning_rate": 4.452666666666667e-05, + "loss": 3.9965, + "step": 13360 + }, + { + "epoch": 80.76, + "learning_rate": 4.459333333333334e-05, + "loss": 3.9871, + "step": 13380 + }, + { + "epoch": 80.89, + "learning_rate": 4.466e-05, + "loss": 4.0184, + "step": 13400 + }, + { + "epoch": 81.01, + "learning_rate": 4.4726666666666666e-05, + "loss": 3.9988, + "step": 13420 + }, + { + "epoch": 81.13, + "learning_rate": 4.479333333333334e-05, + "loss": 3.9294, + "step": 13440 + }, + { + "epoch": 81.25, + "learning_rate": 4.486e-05, + "loss": 3.9239, + "step": 13460 + }, + { + "epoch": 81.37, + "learning_rate": 4.4926666666666665e-05, + "loss": 3.935, + "step": 13480 + }, + { + "epoch": 81.49, + "learning_rate": 4.4993333333333335e-05, + "loss": 3.941, + "step": 13500 + }, + { + "epoch": 81.61, + "learning_rate": 4.506e-05, + "loss": 3.9797, + "step": 13520 + }, + { + "epoch": 81.73, + "learning_rate": 4.512666666666667e-05, + "loss": 3.9534, + "step": 13540 + }, + { + "epoch": 81.85, + "learning_rate": 4.519333333333334e-05, + "loss": 3.9324, + "step": 13560 + }, + { + "epoch": 81.97, + "learning_rate": 4.5260000000000004e-05, + "loss": 3.9686, + "step": 13580 + }, + { + "epoch": 82.09, + "learning_rate": 4.532666666666667e-05, + "loss": 3.8894, + "step": 13600 + }, + { + "epoch": 82.21, + "learning_rate": 4.539333333333334e-05, + "loss": 3.8861, + "step": 13620 + }, + { + "epoch": 82.33, + "learning_rate": 4.546e-05, + "loss": 3.9124, + "step": 13640 + }, + { + "epoch": 82.45, + "learning_rate": 4.5526666666666666e-05, + "loss": 3.9025, + "step": 13660 + }, + { + "epoch": 82.58, + "learning_rate": 4.5593333333333336e-05, + "loss": 3.8856, + "step": 13680 + }, + { + "epoch": 82.7, + "learning_rate": 4.566e-05, + "loss": 3.9063, + "step": 13700 + }, + { + "epoch": 82.82, + "learning_rate": 4.572666666666667e-05, + "loss": 3.9387, + "step": 13720 + }, + { + "epoch": 82.94, + "learning_rate": 4.579333333333334e-05, + "loss": 3.9133, + "step": 13740 + }, + { + "epoch": 83.06, + "learning_rate": 4.5860000000000005e-05, + "loss": 3.8905, + "step": 13760 + }, + { + "epoch": 83.18, + "learning_rate": 4.592666666666667e-05, + "loss": 3.8375, + "step": 13780 + }, + { + "epoch": 83.3, + "learning_rate": 4.599333333333334e-05, + "loss": 3.8402, + "step": 13800 + }, + { + "epoch": 83.42, + "learning_rate": 4.606e-05, + "loss": 3.8487, + "step": 13820 + }, + { + "epoch": 83.54, + "learning_rate": 4.612666666666667e-05, + "loss": 3.8902, + "step": 13840 + }, + { + "epoch": 83.66, + "learning_rate": 4.619333333333333e-05, + "loss": 3.8876, + "step": 13860 + }, + { + "epoch": 83.78, + "learning_rate": 4.626e-05, + "loss": 3.8639, + "step": 13880 + }, + { + "epoch": 83.9, + "learning_rate": 4.632666666666667e-05, + "loss": 3.9104, + "step": 13900 + }, + { + "epoch": 84.02, + "learning_rate": 4.6393333333333335e-05, + "loss": 3.8802, + "step": 13920 + }, + { + "epoch": 84.14, + "learning_rate": 4.6460000000000006e-05, + "loss": 3.8044, + "step": 13940 + }, + { + "epoch": 84.27, + "learning_rate": 4.652666666666667e-05, + "loss": 3.7955, + "step": 13960 + }, + { + "epoch": 84.39, + "learning_rate": 4.659333333333333e-05, + "loss": 3.8249, + "step": 13980 + }, + { + "epoch": 84.51, + "learning_rate": 4.6660000000000004e-05, + "loss": 3.8196, + "step": 14000 + }, + { + "epoch": 84.51, + "eval_accuracy": 0.001441258112633482, + "eval_loss": 4.847209453582764, + "eval_runtime": 18.1965, + "eval_samples_per_second": 112.109, + "eval_steps_per_second": 2.363, + "step": 14000 + }, + { + "epoch": 84.63, + "learning_rate": 4.672666666666667e-05, + "loss": 3.8446, + "step": 14020 + }, + { + "epoch": 84.75, + "learning_rate": 4.679e-05, + "loss": 3.8489, + "step": 14040 + }, + { + "epoch": 84.87, + "learning_rate": 4.685666666666667e-05, + "loss": 3.8578, + "step": 14060 + }, + { + "epoch": 84.99, + "learning_rate": 4.692333333333334e-05, + "loss": 3.8582, + "step": 14080 + }, + { + "epoch": 85.11, + "learning_rate": 4.699e-05, + "loss": 3.7613, + "step": 14100 + }, + { + "epoch": 85.23, + "learning_rate": 4.705666666666667e-05, + "loss": 3.752, + "step": 14120 + }, + { + "epoch": 85.35, + "learning_rate": 4.712333333333334e-05, + "loss": 3.7674, + "step": 14140 + }, + { + "epoch": 85.47, + "learning_rate": 4.719e-05, + "loss": 3.8142, + "step": 14160 + }, + { + "epoch": 85.59, + "learning_rate": 4.725666666666667e-05, + "loss": 3.7781, + "step": 14180 + }, + { + "epoch": 85.71, + "learning_rate": 4.7323333333333335e-05, + "loss": 3.8041, + "step": 14200 + }, + { + "epoch": 85.84, + "learning_rate": 4.739e-05, + "loss": 3.8062, + "step": 14220 + }, + { + "epoch": 85.96, + "learning_rate": 4.745666666666667e-05, + "loss": 3.8096, + "step": 14240 + }, + { + "epoch": 86.08, + "learning_rate": 4.752333333333334e-05, + "loss": 3.7798, + "step": 14260 + }, + { + "epoch": 86.2, + "learning_rate": 4.7590000000000003e-05, + "loss": 3.7221, + "step": 14280 + }, + { + "epoch": 86.32, + "learning_rate": 4.7656666666666674e-05, + "loss": 3.7424, + "step": 14300 + }, + { + "epoch": 86.44, + "learning_rate": 4.772333333333334e-05, + "loss": 3.7457, + "step": 14320 + }, + { + "epoch": 86.56, + "learning_rate": 4.779e-05, + "loss": 3.755, + "step": 14340 + }, + { + "epoch": 86.68, + "learning_rate": 4.7856666666666665e-05, + "loss": 3.7468, + "step": 14360 + }, + { + "epoch": 86.8, + "learning_rate": 4.7923333333333336e-05, + "loss": 3.7541, + "step": 14380 + }, + { + "epoch": 86.92, + "learning_rate": 4.799e-05, + "loss": 3.7615, + "step": 14400 + }, + { + "epoch": 87.04, + "learning_rate": 4.805666666666666e-05, + "loss": 3.7446, + "step": 14420 + }, + { + "epoch": 87.16, + "learning_rate": 4.8123333333333334e-05, + "loss": 3.6729, + "step": 14440 + }, + { + "epoch": 87.28, + "learning_rate": 4.8190000000000004e-05, + "loss": 3.7036, + "step": 14460 + }, + { + "epoch": 87.4, + "learning_rate": 4.825666666666667e-05, + "loss": 3.702, + "step": 14480 + }, + { + "epoch": 87.53, + "learning_rate": 4.832333333333334e-05, + "loss": 3.7092, + "step": 14500 + }, + { + "epoch": 87.65, + "learning_rate": 4.839e-05, + "loss": 3.7291, + "step": 14520 + }, + { + "epoch": 87.77, + "learning_rate": 4.8456666666666666e-05, + "loss": 3.7032, + "step": 14540 + }, + { + "epoch": 87.89, + "learning_rate": 4.852333333333334e-05, + "loss": 3.7265, + "step": 14560 + }, + { + "epoch": 88.01, + "learning_rate": 4.859e-05, + "loss": 3.7322, + "step": 14580 + }, + { + "epoch": 88.13, + "learning_rate": 4.865333333333334e-05, + "loss": 3.631, + "step": 14600 + }, + { + "epoch": 88.25, + "learning_rate": 4.872000000000001e-05, + "loss": 3.6268, + "step": 14620 + }, + { + "epoch": 88.37, + "learning_rate": 4.878666666666667e-05, + "loss": 3.6521, + "step": 14640 + }, + { + "epoch": 88.49, + "learning_rate": 4.8853333333333335e-05, + "loss": 3.6974, + "step": 14660 + }, + { + "epoch": 88.61, + "learning_rate": 4.8920000000000006e-05, + "loss": 3.6469, + "step": 14680 + }, + { + "epoch": 88.73, + "learning_rate": 4.898666666666667e-05, + "loss": 3.681, + "step": 14700 + }, + { + "epoch": 88.85, + "learning_rate": 4.9053333333333333e-05, + "loss": 3.6776, + "step": 14720 + }, + { + "epoch": 88.97, + "learning_rate": 4.9120000000000004e-05, + "loss": 3.6987, + "step": 14740 + }, + { + "epoch": 89.09, + "learning_rate": 4.918666666666667e-05, + "loss": 3.6138, + "step": 14760 + }, + { + "epoch": 89.22, + "learning_rate": 4.925333333333333e-05, + "loss": 3.6124, + "step": 14780 + }, + { + "epoch": 89.34, + "learning_rate": 4.932e-05, + "loss": 3.6088, + "step": 14800 + }, + { + "epoch": 89.46, + "learning_rate": 4.938666666666667e-05, + "loss": 3.6353, + "step": 14820 + }, + { + "epoch": 89.58, + "learning_rate": 4.9453333333333336e-05, + "loss": 3.6475, + "step": 14840 + }, + { + "epoch": 89.7, + "learning_rate": 4.952e-05, + "loss": 3.6232, + "step": 14860 + }, + { + "epoch": 89.82, + "learning_rate": 4.958666666666667e-05, + "loss": 3.6363, + "step": 14880 + }, + { + "epoch": 89.94, + "learning_rate": 4.9653333333333335e-05, + "loss": 3.6583, + "step": 14900 + }, + { + "epoch": 90.06, + "learning_rate": 4.972e-05, + "loss": 3.5976, + "step": 14920 + }, + { + "epoch": 90.18, + "learning_rate": 4.978666666666667e-05, + "loss": 3.5279, + "step": 14940 + }, + { + "epoch": 90.3, + "learning_rate": 4.985333333333333e-05, + "loss": 3.562, + "step": 14960 + }, + { + "epoch": 90.42, + "learning_rate": 4.992e-05, + "loss": 3.5838, + "step": 14980 + }, + { + "epoch": 90.54, + "learning_rate": 4.9986666666666674e-05, + "loss": 3.6207, + "step": 15000 + }, + { + "epoch": 90.54, + "eval_accuracy": 0.0014145864409123687, + "eval_loss": 5.021492958068848, + "eval_runtime": 18.2836, + "eval_samples_per_second": 111.575, + "eval_steps_per_second": 2.352, + "step": 15000 + }, + { + "epoch": 90.66, + "learning_rate": 5.0053333333333344e-05, + "loss": 3.6005, + "step": 15020 + }, + { + "epoch": 90.78, + "learning_rate": 5.012e-05, + "loss": 3.6005, + "step": 15040 + }, + { + "epoch": 90.91, + "learning_rate": 5.018666666666667e-05, + "loss": 3.5765, + "step": 15060 + }, + { + "epoch": 91.03, + "learning_rate": 5.025333333333334e-05, + "loss": 3.5912, + "step": 15080 + }, + { + "epoch": 91.15, + "learning_rate": 5.032e-05, + "loss": 3.4993, + "step": 15100 + }, + { + "epoch": 91.27, + "learning_rate": 5.038666666666667e-05, + "loss": 3.5075, + "step": 15120 + }, + { + "epoch": 91.39, + "learning_rate": 5.045333333333333e-05, + "loss": 3.5238, + "step": 15140 + }, + { + "epoch": 91.51, + "learning_rate": 5.052e-05, + "loss": 3.5347, + "step": 15160 + }, + { + "epoch": 91.63, + "learning_rate": 5.058666666666667e-05, + "loss": 3.5646, + "step": 15180 + }, + { + "epoch": 91.75, + "learning_rate": 5.065333333333333e-05, + "loss": 3.5525, + "step": 15200 + }, + { + "epoch": 91.87, + "learning_rate": 5.072e-05, + "loss": 3.5477, + "step": 15220 + }, + { + "epoch": 91.99, + "learning_rate": 5.078666666666667e-05, + "loss": 3.5826, + "step": 15240 + }, + { + "epoch": 92.11, + "learning_rate": 5.085333333333333e-05, + "loss": 3.4527, + "step": 15260 + }, + { + "epoch": 92.23, + "learning_rate": 5.092e-05, + "loss": 3.4612, + "step": 15280 + }, + { + "epoch": 92.35, + "learning_rate": 5.098666666666667e-05, + "loss": 3.4665, + "step": 15300 + }, + { + "epoch": 92.47, + "learning_rate": 5.105333333333333e-05, + "loss": 3.4885, + "step": 15320 + }, + { + "epoch": 92.6, + "learning_rate": 5.112e-05, + "loss": 3.5039, + "step": 15340 + }, + { + "epoch": 92.72, + "learning_rate": 5.118666666666667e-05, + "loss": 3.5134, + "step": 15360 + }, + { + "epoch": 92.84, + "learning_rate": 5.125333333333333e-05, + "loss": 3.5206, + "step": 15380 + }, + { + "epoch": 92.96, + "learning_rate": 5.132e-05, + "loss": 3.522, + "step": 15400 + }, + { + "epoch": 93.08, + "learning_rate": 5.1386666666666674e-05, + "loss": 3.4437, + "step": 15420 + }, + { + "epoch": 93.2, + "learning_rate": 5.145333333333333e-05, + "loss": 3.3961, + "step": 15440 + }, + { + "epoch": 93.32, + "learning_rate": 5.152e-05, + "loss": 3.4249, + "step": 15460 + }, + { + "epoch": 93.44, + "learning_rate": 5.158666666666667e-05, + "loss": 3.4609, + "step": 15480 + }, + { + "epoch": 93.56, + "learning_rate": 5.165333333333333e-05, + "loss": 3.4521, + "step": 15500 + }, + { + "epoch": 93.68, + "learning_rate": 5.172e-05, + "loss": 3.5057, + "step": 15520 + }, + { + "epoch": 93.8, + "learning_rate": 5.178666666666667e-05, + "loss": 3.454, + "step": 15540 + }, + { + "epoch": 93.92, + "learning_rate": 5.1853333333333334e-05, + "loss": 3.4676, + "step": 15560 + }, + { + "epoch": 94.04, + "learning_rate": 5.1920000000000004e-05, + "loss": 3.4264, + "step": 15580 + }, + { + "epoch": 94.16, + "learning_rate": 5.1986666666666675e-05, + "loss": 3.3537, + "step": 15600 + }, + { + "epoch": 94.29, + "learning_rate": 5.205333333333333e-05, + "loss": 3.3604, + "step": 15620 + }, + { + "epoch": 94.41, + "learning_rate": 5.212e-05, + "loss": 3.4009, + "step": 15640 + }, + { + "epoch": 94.53, + "learning_rate": 5.218666666666667e-05, + "loss": 3.3898, + "step": 15660 + }, + { + "epoch": 94.65, + "learning_rate": 5.225333333333333e-05, + "loss": 3.418, + "step": 15680 + }, + { + "epoch": 94.77, + "learning_rate": 5.232e-05, + "loss": 3.407, + "step": 15700 + }, + { + "epoch": 94.89, + "learning_rate": 5.238666666666667e-05, + "loss": 3.4534, + "step": 15720 + }, + { + "epoch": 95.01, + "learning_rate": 5.2453333333333335e-05, + "loss": 3.444, + "step": 15740 + }, + { + "epoch": 95.13, + "learning_rate": 5.2520000000000005e-05, + "loss": 3.2952, + "step": 15760 + }, + { + "epoch": 95.25, + "learning_rate": 5.2586666666666676e-05, + "loss": 3.3335, + "step": 15780 + }, + { + "epoch": 95.37, + "learning_rate": 5.265333333333333e-05, + "loss": 3.3337, + "step": 15800 + }, + { + "epoch": 95.49, + "learning_rate": 5.2720000000000003e-05, + "loss": 3.3459, + "step": 15820 + }, + { + "epoch": 95.61, + "learning_rate": 5.2786666666666674e-05, + "loss": 3.3494, + "step": 15840 + }, + { + "epoch": 95.73, + "learning_rate": 5.285333333333333e-05, + "loss": 3.375, + "step": 15860 + }, + { + "epoch": 95.86, + "learning_rate": 5.292e-05, + "loss": 3.3768, + "step": 15880 + }, + { + "epoch": 95.98, + "learning_rate": 5.298666666666667e-05, + "loss": 3.4199, + "step": 15900 + }, + { + "epoch": 96.1, + "learning_rate": 5.3053333333333336e-05, + "loss": 3.2968, + "step": 15920 + }, + { + "epoch": 96.22, + "learning_rate": 5.3120000000000006e-05, + "loss": 3.2651, + "step": 15940 + }, + { + "epoch": 96.34, + "learning_rate": 5.318666666666667e-05, + "loss": 3.2762, + "step": 15960 + }, + { + "epoch": 96.46, + "learning_rate": 5.3253333333333334e-05, + "loss": 3.3002, + "step": 15980 + }, + { + "epoch": 96.58, + "learning_rate": 5.3320000000000004e-05, + "loss": 3.3163, + "step": 16000 + }, + { + "epoch": 96.58, + "eval_accuracy": 0.0013558099791565826, + "eval_loss": 5.293873310089111, + "eval_runtime": 18.1779, + "eval_samples_per_second": 112.224, + "eval_steps_per_second": 2.366, + "step": 16000 + }, + { + "epoch": 96.7, + "learning_rate": 5.3386666666666675e-05, + "loss": 3.3285, + "step": 16020 + }, + { + "epoch": 96.82, + "learning_rate": 5.345333333333333e-05, + "loss": 3.3154, + "step": 16040 + }, + { + "epoch": 96.94, + "learning_rate": 5.352e-05, + "loss": 3.3525, + "step": 16060 + }, + { + "epoch": 97.06, + "learning_rate": 5.358666666666667e-05, + "loss": 3.2891, + "step": 16080 + }, + { + "epoch": 97.18, + "learning_rate": 5.365333333333333e-05, + "loss": 3.2113, + "step": 16100 + }, + { + "epoch": 97.3, + "learning_rate": 5.372e-05, + "loss": 3.2459, + "step": 16120 + }, + { + "epoch": 97.42, + "learning_rate": 5.378666666666667e-05, + "loss": 3.2479, + "step": 16140 + }, + { + "epoch": 97.55, + "learning_rate": 5.3853333333333335e-05, + "loss": 3.2572, + "step": 16160 + }, + { + "epoch": 97.67, + "learning_rate": 5.3920000000000006e-05, + "loss": 3.2825, + "step": 16180 + }, + { + "epoch": 97.79, + "learning_rate": 5.3986666666666676e-05, + "loss": 3.2782, + "step": 16200 + }, + { + "epoch": 97.91, + "learning_rate": 5.405333333333333e-05, + "loss": 3.2773, + "step": 16220 + }, + { + "epoch": 98.03, + "learning_rate": 5.4120000000000004e-05, + "loss": 3.2568, + "step": 16240 + }, + { + "epoch": 98.15, + "learning_rate": 5.4186666666666674e-05, + "loss": 3.1807, + "step": 16260 + }, + { + "epoch": 98.27, + "learning_rate": 5.425333333333333e-05, + "loss": 3.1694, + "step": 16280 + }, + { + "epoch": 98.39, + "learning_rate": 5.432e-05, + "loss": 3.1982, + "step": 16300 + }, + { + "epoch": 98.51, + "learning_rate": 5.438666666666667e-05, + "loss": 3.2195, + "step": 16320 + }, + { + "epoch": 98.63, + "learning_rate": 5.4453333333333336e-05, + "loss": 3.2266, + "step": 16340 + }, + { + "epoch": 98.75, + "learning_rate": 5.4520000000000007e-05, + "loss": 3.2312, + "step": 16360 + }, + { + "epoch": 98.87, + "learning_rate": 5.4586666666666664e-05, + "loss": 3.2291, + "step": 16380 + }, + { + "epoch": 98.99, + "learning_rate": 5.4653333333333334e-05, + "loss": 3.2518, + "step": 16400 + }, + { + "epoch": 99.11, + "learning_rate": 5.4720000000000005e-05, + "loss": 3.1003, + "step": 16420 + }, + { + "epoch": 99.24, + "learning_rate": 5.478666666666666e-05, + "loss": 3.1198, + "step": 16440 + }, + { + "epoch": 99.36, + "learning_rate": 5.485333333333333e-05, + "loss": 3.1373, + "step": 16460 + }, + { + "epoch": 99.48, + "learning_rate": 5.492e-05, + "loss": 3.164, + "step": 16480 + }, + { + "epoch": 99.6, + "learning_rate": 5.4986666666666666e-05, + "loss": 3.1803, + "step": 16500 + }, + { + "epoch": 99.72, + "learning_rate": 5.505333333333334e-05, + "loss": 3.1856, + "step": 16520 + }, + { + "epoch": 99.84, + "learning_rate": 5.512000000000001e-05, + "loss": 3.2084, + "step": 16540 + }, + { + "epoch": 99.96, + "learning_rate": 5.5186666666666665e-05, + "loss": 3.2043, + "step": 16560 + }, + { + "epoch": 100.08, + "learning_rate": 5.5253333333333335e-05, + "loss": 3.1133, + "step": 16580 + }, + { + "epoch": 100.2, + "learning_rate": 5.531666666666667e-05, + "loss": 3.0598, + "step": 16600 + }, + { + "epoch": 100.32, + "learning_rate": 5.538333333333333e-05, + "loss": 3.0833, + "step": 16620 + }, + { + "epoch": 100.44, + "learning_rate": 5.545e-05, + "loss": 3.0921, + "step": 16640 + }, + { + "epoch": 100.56, + "learning_rate": 5.551666666666667e-05, + "loss": 3.1288, + "step": 16660 + }, + { + "epoch": 100.68, + "learning_rate": 5.5583333333333334e-05, + "loss": 3.1343, + "step": 16680 + }, + { + "epoch": 100.8, + "learning_rate": 5.5650000000000004e-05, + "loss": 3.1412, + "step": 16700 + }, + { + "epoch": 100.93, + "learning_rate": 5.5716666666666675e-05, + "loss": 3.1505, + "step": 16720 + }, + { + "epoch": 101.05, + "learning_rate": 5.578333333333333e-05, + "loss": 3.109, + "step": 16740 + }, + { + "epoch": 101.17, + "learning_rate": 5.585e-05, + "loss": 3.0122, + "step": 16760 + }, + { + "epoch": 101.29, + "learning_rate": 5.591666666666667e-05, + "loss": 3.0209, + "step": 16780 + }, + { + "epoch": 101.41, + "learning_rate": 5.598333333333333e-05, + "loss": 3.0625, + "step": 16800 + }, + { + "epoch": 101.53, + "learning_rate": 5.605e-05, + "loss": 3.0545, + "step": 16820 + }, + { + "epoch": 101.65, + "learning_rate": 5.611666666666667e-05, + "loss": 3.0823, + "step": 16840 + }, + { + "epoch": 101.77, + "learning_rate": 5.6183333333333335e-05, + "loss": 3.0862, + "step": 16860 + }, + { + "epoch": 101.89, + "learning_rate": 5.6250000000000005e-05, + "loss": 3.0877, + "step": 16880 + }, + { + "epoch": 102.01, + "learning_rate": 5.6316666666666676e-05, + "loss": 3.0987, + "step": 16900 + }, + { + "epoch": 102.13, + "learning_rate": 5.638333333333333e-05, + "loss": 2.9435, + "step": 16920 + }, + { + "epoch": 102.25, + "learning_rate": 5.645e-05, + "loss": 2.9804, + "step": 16940 + }, + { + "epoch": 102.37, + "learning_rate": 5.6516666666666674e-05, + "loss": 3.0011, + "step": 16960 + }, + { + "epoch": 102.49, + "learning_rate": 5.658333333333333e-05, + "loss": 3.0206, + "step": 16980 + }, + { + "epoch": 102.62, + "learning_rate": 5.665e-05, + "loss": 3.0377, + "step": 17000 + }, + { + "epoch": 102.62, + "eval_accuracy": 0.001351364700536397, + "eval_loss": 5.668473720550537, + "eval_runtime": 18.2311, + "eval_samples_per_second": 111.897, + "eval_steps_per_second": 2.359, + "step": 17000 + }, + { + "epoch": 102.74, + "learning_rate": 5.671666666666667e-05, + "loss": 3.0418, + "step": 17020 + }, + { + "epoch": 102.86, + "learning_rate": 5.6783333333333336e-05, + "loss": 3.043, + "step": 17040 + }, + { + "epoch": 102.98, + "learning_rate": 5.6850000000000006e-05, + "loss": 3.0512, + "step": 17060 + }, + { + "epoch": 103.1, + "learning_rate": 5.691666666666668e-05, + "loss": 2.9331, + "step": 17080 + }, + { + "epoch": 103.22, + "learning_rate": 5.6983333333333334e-05, + "loss": 2.9127, + "step": 17100 + }, + { + "epoch": 103.34, + "learning_rate": 5.7050000000000004e-05, + "loss": 2.9382, + "step": 17120 + }, + { + "epoch": 103.46, + "learning_rate": 5.7116666666666675e-05, + "loss": 2.9569, + "step": 17140 + }, + { + "epoch": 103.58, + "learning_rate": 5.718333333333333e-05, + "loss": 2.9815, + "step": 17160 + }, + { + "epoch": 103.7, + "learning_rate": 5.725e-05, + "loss": 2.9712, + "step": 17180 + }, + { + "epoch": 103.82, + "learning_rate": 5.731666666666667e-05, + "loss": 3.0054, + "step": 17200 + }, + { + "epoch": 103.94, + "learning_rate": 5.738333333333334e-05, + "loss": 2.9977, + "step": 17220 + }, + { + "epoch": 104.06, + "learning_rate": 5.745e-05, + "loss": 2.942, + "step": 17240 + }, + { + "epoch": 104.19, + "learning_rate": 5.751666666666667e-05, + "loss": 2.8658, + "step": 17260 + }, + { + "epoch": 104.31, + "learning_rate": 5.7583333333333335e-05, + "loss": 2.8741, + "step": 17280 + }, + { + "epoch": 104.43, + "learning_rate": 5.7650000000000005e-05, + "loss": 2.8884, + "step": 17300 + }, + { + "epoch": 104.55, + "learning_rate": 5.7716666666666676e-05, + "loss": 2.9327, + "step": 17320 + }, + { + "epoch": 104.67, + "learning_rate": 5.778333333333333e-05, + "loss": 2.9238, + "step": 17340 + }, + { + "epoch": 104.79, + "learning_rate": 5.7850000000000003e-05, + "loss": 2.9496, + "step": 17360 + }, + { + "epoch": 104.91, + "learning_rate": 5.7916666666666674e-05, + "loss": 2.9621, + "step": 17380 + }, + { + "epoch": 105.03, + "learning_rate": 5.798333333333333e-05, + "loss": 2.9189, + "step": 17400 + }, + { + "epoch": 105.15, + "learning_rate": 5.805e-05, + "loss": 2.7945, + "step": 17420 + }, + { + "epoch": 105.27, + "learning_rate": 5.811666666666667e-05, + "loss": 2.8316, + "step": 17440 + }, + { + "epoch": 105.39, + "learning_rate": 5.8183333333333336e-05, + "loss": 2.8449, + "step": 17460 + }, + { + "epoch": 105.51, + "learning_rate": 5.8250000000000006e-05, + "loss": 2.8638, + "step": 17480 + }, + { + "epoch": 105.63, + "learning_rate": 5.831666666666668e-05, + "loss": 2.8714, + "step": 17500 + }, + { + "epoch": 105.75, + "learning_rate": 5.8383333333333334e-05, + "loss": 2.9079, + "step": 17520 + }, + { + "epoch": 105.88, + "learning_rate": 5.8450000000000005e-05, + "loss": 2.9015, + "step": 17540 + }, + { + "epoch": 106.0, + "learning_rate": 5.851666666666666e-05, + "loss": 2.9151, + "step": 17560 + }, + { + "epoch": 106.12, + "learning_rate": 5.858333333333333e-05, + "loss": 2.7488, + "step": 17580 + }, + { + "epoch": 106.24, + "learning_rate": 5.865e-05, + "loss": 2.7721, + "step": 17600 + }, + { + "epoch": 106.36, + "learning_rate": 5.8716666666666666e-05, + "loss": 2.7912, + "step": 17620 + }, + { + "epoch": 106.48, + "learning_rate": 5.878333333333334e-05, + "loss": 2.8061, + "step": 17640 + }, + { + "epoch": 106.6, + "learning_rate": 5.885000000000001e-05, + "loss": 2.819, + "step": 17660 + }, + { + "epoch": 106.72, + "learning_rate": 5.8916666666666664e-05, + "loss": 2.8298, + "step": 17680 + }, + { + "epoch": 106.84, + "learning_rate": 5.8983333333333335e-05, + "loss": 2.8507, + "step": 17700 + }, + { + "epoch": 106.96, + "learning_rate": 5.9050000000000006e-05, + "loss": 2.8622, + "step": 17720 + }, + { + "epoch": 107.08, + "learning_rate": 5.911666666666666e-05, + "loss": 2.7413, + "step": 17740 + }, + { + "epoch": 107.2, + "learning_rate": 5.918333333333333e-05, + "loss": 2.7069, + "step": 17760 + }, + { + "epoch": 107.32, + "learning_rate": 5.9250000000000004e-05, + "loss": 2.7273, + "step": 17780 + }, + { + "epoch": 107.44, + "learning_rate": 5.931666666666667e-05, + "loss": 2.7585, + "step": 17800 + }, + { + "epoch": 107.57, + "learning_rate": 5.938333333333334e-05, + "loss": 2.7499, + "step": 17820 + }, + { + "epoch": 107.69, + "learning_rate": 5.945000000000001e-05, + "loss": 2.778, + "step": 17840 + }, + { + "epoch": 107.81, + "learning_rate": 5.9516666666666665e-05, + "loss": 2.7969, + "step": 17860 + }, + { + "epoch": 107.93, + "learning_rate": 5.9583333333333336e-05, + "loss": 2.8039, + "step": 17880 + }, + { + "epoch": 108.05, + "learning_rate": 5.9650000000000007e-05, + "loss": 2.7593, + "step": 17900 + }, + { + "epoch": 108.17, + "learning_rate": 5.9716666666666664e-05, + "loss": 2.6348, + "step": 17920 + }, + { + "epoch": 108.29, + "learning_rate": 5.9783333333333334e-05, + "loss": 2.6874, + "step": 17940 + }, + { + "epoch": 108.41, + "learning_rate": 5.9850000000000005e-05, + "loss": 2.6992, + "step": 17960 + }, + { + "epoch": 108.53, + "learning_rate": 5.991666666666667e-05, + "loss": 2.7062, + "step": 17980 + }, + { + "epoch": 108.65, + "learning_rate": 5.998333333333334e-05, + "loss": 2.7272, + "step": 18000 + }, + { + "epoch": 108.65, + "eval_accuracy": 0.0013049362349477926, + "eval_loss": 6.164906024932861, + "eval_runtime": 18.2042, + "eval_samples_per_second": 112.062, + "eval_steps_per_second": 2.362, + "step": 18000 + }, + { + "epoch": 108.77, + "learning_rate": 6.005000000000001e-05, + "loss": 2.7555, + "step": 18020 + }, + { + "epoch": 108.89, + "learning_rate": 6.0116666666666667e-05, + "loss": 2.7582, + "step": 18040 + }, + { + "epoch": 109.01, + "learning_rate": 6.018333333333334e-05, + "loss": 2.7407, + "step": 18060 + }, + { + "epoch": 109.13, + "learning_rate": 6.025000000000001e-05, + "loss": 2.5885, + "step": 18080 + }, + { + "epoch": 109.26, + "learning_rate": 6.0316666666666665e-05, + "loss": 2.6007, + "step": 18100 + }, + { + "epoch": 109.38, + "learning_rate": 6.0383333333333335e-05, + "loss": 2.6265, + "step": 18120 + }, + { + "epoch": 109.5, + "learning_rate": 6.0450000000000006e-05, + "loss": 2.6611, + "step": 18140 + }, + { + "epoch": 109.62, + "learning_rate": 6.051666666666666e-05, + "loss": 2.6709, + "step": 18160 + }, + { + "epoch": 109.74, + "learning_rate": 6.058333333333333e-05, + "loss": 2.694, + "step": 18180 + }, + { + "epoch": 109.86, + "learning_rate": 6.0650000000000004e-05, + "loss": 2.7067, + "step": 18200 + }, + { + "epoch": 109.98, + "learning_rate": 6.071666666666667e-05, + "loss": 2.7346, + "step": 18220 + }, + { + "epoch": 110.1, + "learning_rate": 6.078333333333334e-05, + "loss": 2.5661, + "step": 18240 + }, + { + "epoch": 110.22, + "learning_rate": 6.085000000000001e-05, + "loss": 2.5621, + "step": 18260 + }, + { + "epoch": 110.34, + "learning_rate": 6.0916666666666666e-05, + "loss": 2.5762, + "step": 18280 + }, + { + "epoch": 110.46, + "learning_rate": 6.0983333333333336e-05, + "loss": 2.5957, + "step": 18300 + }, + { + "epoch": 110.58, + "learning_rate": 6.105e-05, + "loss": 2.6202, + "step": 18320 + }, + { + "epoch": 110.7, + "learning_rate": 6.111666666666667e-05, + "loss": 2.6272, + "step": 18340 + }, + { + "epoch": 110.82, + "learning_rate": 6.118000000000001e-05, + "loss": 2.6594, + "step": 18360 + }, + { + "epoch": 110.95, + "learning_rate": 6.124666666666667e-05, + "loss": 2.6702, + "step": 18380 + }, + { + "epoch": 111.07, + "learning_rate": 6.131333333333333e-05, + "loss": 2.5795, + "step": 18400 + }, + { + "epoch": 111.19, + "learning_rate": 6.138e-05, + "loss": 2.4937, + "step": 18420 + }, + { + "epoch": 111.31, + "learning_rate": 6.144666666666668e-05, + "loss": 2.5201, + "step": 18440 + }, + { + "epoch": 111.43, + "learning_rate": 6.151333333333334e-05, + "loss": 2.5366, + "step": 18460 + }, + { + "epoch": 111.55, + "learning_rate": 6.158e-05, + "loss": 2.5571, + "step": 18480 + }, + { + "epoch": 111.67, + "learning_rate": 6.164666666666668e-05, + "loss": 2.5815, + "step": 18500 + }, + { + "epoch": 111.79, + "learning_rate": 6.171333333333333e-05, + "loss": 2.5902, + "step": 18520 + }, + { + "epoch": 111.91, + "learning_rate": 6.178000000000001e-05, + "loss": 2.6134, + "step": 18540 + }, + { + "epoch": 112.03, + "learning_rate": 6.184666666666667e-05, + "loss": 2.5729, + "step": 18560 + }, + { + "epoch": 112.15, + "learning_rate": 6.191333333333334e-05, + "loss": 2.4462, + "step": 18580 + }, + { + "epoch": 112.27, + "learning_rate": 6.198e-05, + "loss": 2.4578, + "step": 18600 + }, + { + "epoch": 112.39, + "learning_rate": 6.204666666666668e-05, + "loss": 2.4986, + "step": 18620 + }, + { + "epoch": 112.52, + "learning_rate": 6.211333333333334e-05, + "loss": 2.5043, + "step": 18640 + }, + { + "epoch": 112.64, + "learning_rate": 6.218e-05, + "loss": 2.5323, + "step": 18660 + }, + { + "epoch": 112.76, + "learning_rate": 6.224666666666667e-05, + "loss": 2.5318, + "step": 18680 + }, + { + "epoch": 112.88, + "learning_rate": 6.231333333333333e-05, + "loss": 2.5624, + "step": 18700 + }, + { + "epoch": 113.0, + "learning_rate": 6.238000000000001e-05, + "loss": 2.563, + "step": 18720 + }, + { + "epoch": 113.12, + "learning_rate": 6.244666666666666e-05, + "loss": 2.3918, + "step": 18740 + }, + { + "epoch": 113.24, + "learning_rate": 6.251333333333334e-05, + "loss": 2.4037, + "step": 18760 + }, + { + "epoch": 113.36, + "learning_rate": 6.258e-05, + "loss": 2.4364, + "step": 18780 + }, + { + "epoch": 113.48, + "learning_rate": 6.264666666666666e-05, + "loss": 2.4501, + "step": 18800 + }, + { + "epoch": 113.6, + "learning_rate": 6.271333333333334e-05, + "loss": 2.4688, + "step": 18820 + }, + { + "epoch": 113.72, + "learning_rate": 6.278e-05, + "loss": 2.4888, + "step": 18840 + }, + { + "epoch": 113.84, + "learning_rate": 6.284666666666667e-05, + "loss": 2.4992, + "step": 18860 + }, + { + "epoch": 113.96, + "learning_rate": 6.291333333333333e-05, + "loss": 2.5299, + "step": 18880 + }, + { + "epoch": 114.08, + "learning_rate": 6.298000000000001e-05, + "loss": 2.396, + "step": 18900 + }, + { + "epoch": 114.21, + "learning_rate": 6.304666666666666e-05, + "loss": 2.348, + "step": 18920 + }, + { + "epoch": 114.33, + "learning_rate": 6.311333333333334e-05, + "loss": 2.3742, + "step": 18940 + }, + { + "epoch": 114.45, + "learning_rate": 6.318e-05, + "loss": 2.3905, + "step": 18960 + }, + { + "epoch": 114.57, + "learning_rate": 6.324666666666667e-05, + "loss": 2.4142, + "step": 18980 + }, + { + "epoch": 114.69, + "learning_rate": 6.331333333333333e-05, + "loss": 2.4319, + "step": 19000 + }, + { + "epoch": 114.69, + "eval_accuracy": 0.0012856733609269888, + "eval_loss": 6.755620956420898, + "eval_runtime": 18.1595, + "eval_samples_per_second": 112.338, + "eval_steps_per_second": 2.368, + "step": 19000 + }, + { + "epoch": 114.81, + "learning_rate": 6.338e-05, + "loss": 2.4442, + "step": 19020 + }, + { + "epoch": 114.93, + "learning_rate": 6.344666666666667e-05, + "loss": 2.4764, + "step": 19040 + }, + { + "epoch": 115.05, + "learning_rate": 6.351333333333333e-05, + "loss": 2.4088, + "step": 19060 + }, + { + "epoch": 115.17, + "learning_rate": 6.358000000000001e-05, + "loss": 2.2868, + "step": 19080 + }, + { + "epoch": 115.29, + "learning_rate": 6.364666666666666e-05, + "loss": 2.3098, + "step": 19100 + }, + { + "epoch": 115.41, + "learning_rate": 6.371333333333334e-05, + "loss": 2.3324, + "step": 19120 + }, + { + "epoch": 115.53, + "learning_rate": 6.378e-05, + "loss": 2.3646, + "step": 19140 + }, + { + "epoch": 115.65, + "learning_rate": 6.384666666666667e-05, + "loss": 2.3865, + "step": 19160 + }, + { + "epoch": 115.77, + "learning_rate": 6.391333333333333e-05, + "loss": 2.3991, + "step": 19180 + }, + { + "epoch": 115.9, + "learning_rate": 6.398000000000001e-05, + "loss": 2.4262, + "step": 19200 + }, + { + "epoch": 116.02, + "learning_rate": 6.404666666666667e-05, + "loss": 2.3915, + "step": 19220 + }, + { + "epoch": 116.14, + "learning_rate": 6.411333333333333e-05, + "loss": 2.2261, + "step": 19240 + }, + { + "epoch": 116.26, + "learning_rate": 6.418000000000001e-05, + "loss": 2.2716, + "step": 19260 + }, + { + "epoch": 116.38, + "learning_rate": 6.424666666666666e-05, + "loss": 2.2805, + "step": 19280 + }, + { + "epoch": 116.5, + "learning_rate": 6.431333333333334e-05, + "loss": 2.3009, + "step": 19300 + }, + { + "epoch": 116.62, + "learning_rate": 6.438e-05, + "loss": 2.3275, + "step": 19320 + }, + { + "epoch": 116.74, + "learning_rate": 6.444666666666667e-05, + "loss": 2.3381, + "step": 19340 + }, + { + "epoch": 116.86, + "learning_rate": 6.451333333333333e-05, + "loss": 2.366, + "step": 19360 + }, + { + "epoch": 116.98, + "learning_rate": 6.458000000000001e-05, + "loss": 2.381, + "step": 19380 + }, + { + "epoch": 117.1, + "learning_rate": 6.464666666666667e-05, + "loss": 2.2173, + "step": 19400 + }, + { + "epoch": 117.22, + "learning_rate": 6.471333333333334e-05, + "loss": 2.1985, + "step": 19420 + }, + { + "epoch": 117.34, + "learning_rate": 6.478000000000001e-05, + "loss": 2.2065, + "step": 19440 + }, + { + "epoch": 117.46, + "learning_rate": 6.484666666666666e-05, + "loss": 2.2554, + "step": 19460 + }, + { + "epoch": 117.59, + "learning_rate": 6.491333333333334e-05, + "loss": 2.2683, + "step": 19480 + }, + { + "epoch": 117.71, + "learning_rate": 6.498e-05, + "loss": 2.2806, + "step": 19500 + }, + { + "epoch": 117.83, + "learning_rate": 6.504666666666667e-05, + "loss": 2.3162, + "step": 19520 + }, + { + "epoch": 117.95, + "learning_rate": 6.511333333333333e-05, + "loss": 2.3214, + "step": 19540 + }, + { + "epoch": 118.07, + "learning_rate": 6.518000000000001e-05, + "loss": 2.2072, + "step": 19560 + }, + { + "epoch": 118.19, + "learning_rate": 6.524666666666667e-05, + "loss": 2.1393, + "step": 19580 + }, + { + "epoch": 118.31, + "learning_rate": 6.531333333333334e-05, + "loss": 2.1773, + "step": 19600 + }, + { + "epoch": 118.43, + "learning_rate": 6.538000000000001e-05, + "loss": 2.192, + "step": 19620 + }, + { + "epoch": 118.55, + "learning_rate": 6.544666666666666e-05, + "loss": 2.2329, + "step": 19640 + }, + { + "epoch": 118.67, + "learning_rate": 6.551333333333334e-05, + "loss": 2.2384, + "step": 19660 + }, + { + "epoch": 118.79, + "learning_rate": 6.558e-05, + "loss": 2.2479, + "step": 19680 + }, + { + "epoch": 118.91, + "learning_rate": 6.564666666666667e-05, + "loss": 2.2883, + "step": 19700 + }, + { + "epoch": 119.03, + "learning_rate": 6.571333333333333e-05, + "loss": 2.2437, + "step": 19720 + }, + { + "epoch": 119.15, + "learning_rate": 6.578000000000001e-05, + "loss": 2.0835, + "step": 19740 + }, + { + "epoch": 119.28, + "learning_rate": 6.584666666666667e-05, + "loss": 2.1107, + "step": 19760 + }, + { + "epoch": 119.4, + "learning_rate": 6.591333333333334e-05, + "loss": 2.1388, + "step": 19780 + }, + { + "epoch": 119.52, + "learning_rate": 6.598e-05, + "loss": 2.1651, + "step": 19800 + }, + { + "epoch": 119.64, + "learning_rate": 6.604666666666667e-05, + "loss": 2.1889, + "step": 19820 + }, + { + "epoch": 119.76, + "learning_rate": 6.611333333333334e-05, + "loss": 2.1927, + "step": 19840 + }, + { + "epoch": 119.88, + "learning_rate": 6.618e-05, + "loss": 2.2085, + "step": 19860 + }, + { + "epoch": 120.0, + "learning_rate": 6.624666666666667e-05, + "loss": 2.2624, + "step": 19880 + }, + { + "epoch": 120.12, + "learning_rate": 6.631333333333333e-05, + "loss": 2.0292, + "step": 19900 + }, + { + "epoch": 120.24, + "learning_rate": 6.638e-05, + "loss": 2.0654, + "step": 19920 + }, + { + "epoch": 120.36, + "learning_rate": 6.644666666666666e-05, + "loss": 2.0827, + "step": 19940 + }, + { + "epoch": 120.48, + "learning_rate": 6.651333333333334e-05, + "loss": 2.1191, + "step": 19960 + }, + { + "epoch": 120.6, + "learning_rate": 6.658e-05, + "loss": 2.1327, + "step": 19980 + }, + { + "epoch": 120.72, + "learning_rate": 6.664666666666667e-05, + "loss": 2.1647, + "step": 20000 + }, + { + "epoch": 120.72, + "eval_accuracy": 0.0013138267921881637, + "eval_loss": 7.395051002502441, + "eval_runtime": 18.1458, + "eval_samples_per_second": 112.423, + "eval_steps_per_second": 2.37, + "step": 20000 + }, + { + "epoch": 120.85, + "learning_rate": 6.671333333333334e-05, + "loss": 2.1718, + "step": 20020 + }, + { + "epoch": 120.97, + "learning_rate": 6.678e-05, + "loss": 2.179, + "step": 20040 + }, + { + "epoch": 121.09, + "learning_rate": 6.684666666666667e-05, + "loss": 2.0547, + "step": 20060 + }, + { + "epoch": 121.21, + "learning_rate": 6.691333333333334e-05, + "loss": 2.0058, + "step": 20080 + }, + { + "epoch": 121.33, + "learning_rate": 6.698e-05, + "loss": 2.0423, + "step": 20100 + }, + { + "epoch": 121.45, + "learning_rate": 6.704666666666666e-05, + "loss": 2.0595, + "step": 20120 + }, + { + "epoch": 121.57, + "learning_rate": 6.711333333333334e-05, + "loss": 2.0777, + "step": 20140 + }, + { + "epoch": 121.69, + "learning_rate": 6.718e-05, + "loss": 2.0979, + "step": 20160 + }, + { + "epoch": 121.81, + "learning_rate": 6.724666666666667e-05, + "loss": 2.132, + "step": 20180 + }, + { + "epoch": 121.93, + "learning_rate": 6.731333333333335e-05, + "loss": 2.1595, + "step": 20200 + }, + { + "epoch": 122.05, + "learning_rate": 6.738e-05, + "loss": 2.0667, + "step": 20220 + }, + { + "epoch": 122.17, + "learning_rate": 6.744666666666667e-05, + "loss": 1.9628, + "step": 20240 + }, + { + "epoch": 122.29, + "learning_rate": 6.751333333333334e-05, + "loss": 1.9853, + "step": 20260 + }, + { + "epoch": 122.41, + "learning_rate": 6.758e-05, + "loss": 2.0203, + "step": 20280 + }, + { + "epoch": 122.54, + "learning_rate": 6.764666666666666e-05, + "loss": 2.0288, + "step": 20300 + }, + { + "epoch": 122.66, + "learning_rate": 6.771333333333334e-05, + "loss": 2.0528, + "step": 20320 + }, + { + "epoch": 122.78, + "learning_rate": 6.778e-05, + "loss": 2.0722, + "step": 20340 + }, + { + "epoch": 122.9, + "learning_rate": 6.784666666666667e-05, + "loss": 2.0802, + "step": 20360 + }, + { + "epoch": 123.02, + "learning_rate": 6.791e-05, + "loss": 2.0712, + "step": 20380 + }, + { + "epoch": 123.14, + "learning_rate": 6.797666666666667e-05, + "loss": 1.9094, + "step": 20400 + }, + { + "epoch": 123.26, + "learning_rate": 6.804333333333333e-05, + "loss": 1.9334, + "step": 20420 + }, + { + "epoch": 123.38, + "learning_rate": 6.811000000000001e-05, + "loss": 1.9429, + "step": 20440 + }, + { + "epoch": 123.5, + "learning_rate": 6.817666666666666e-05, + "loss": 1.9631, + "step": 20460 + }, + { + "epoch": 123.62, + "learning_rate": 6.824333333333334e-05, + "loss": 2.0011, + "step": 20480 + }, + { + "epoch": 123.74, + "learning_rate": 6.831e-05, + "loss": 2.0226, + "step": 20500 + }, + { + "epoch": 123.86, + "learning_rate": 6.837666666666667e-05, + "loss": 2.0459, + "step": 20520 + }, + { + "epoch": 123.98, + "learning_rate": 6.844333333333334e-05, + "loss": 2.072, + "step": 20540 + }, + { + "epoch": 124.1, + "learning_rate": 6.851e-05, + "loss": 1.8929, + "step": 20560 + }, + { + "epoch": 124.23, + "learning_rate": 6.857666666666667e-05, + "loss": 1.8904, + "step": 20580 + }, + { + "epoch": 124.35, + "learning_rate": 6.864333333333333e-05, + "loss": 1.9023, + "step": 20600 + }, + { + "epoch": 124.47, + "learning_rate": 6.871000000000001e-05, + "loss": 1.9269, + "step": 20620 + }, + { + "epoch": 124.59, + "learning_rate": 6.877666666666666e-05, + "loss": 1.9467, + "step": 20640 + }, + { + "epoch": 124.71, + "learning_rate": 6.884333333333334e-05, + "loss": 1.9685, + "step": 20660 + }, + { + "epoch": 124.83, + "learning_rate": 6.891e-05, + "loss": 1.9962, + "step": 20680 + }, + { + "epoch": 124.95, + "learning_rate": 6.897666666666667e-05, + "loss": 2.0065, + "step": 20700 + }, + { + "epoch": 125.07, + "learning_rate": 6.904333333333334e-05, + "loss": 1.9111, + "step": 20720 + }, + { + "epoch": 125.19, + "learning_rate": 6.911000000000001e-05, + "loss": 1.8209, + "step": 20740 + }, + { + "epoch": 125.31, + "learning_rate": 6.917666666666667e-05, + "loss": 1.8422, + "step": 20760 + }, + { + "epoch": 125.43, + "learning_rate": 6.924333333333334e-05, + "loss": 1.8881, + "step": 20780 + }, + { + "epoch": 125.55, + "learning_rate": 6.931000000000001e-05, + "loss": 1.895, + "step": 20800 + }, + { + "epoch": 125.67, + "learning_rate": 6.937666666666666e-05, + "loss": 1.9153, + "step": 20820 + }, + { + "epoch": 125.79, + "learning_rate": 6.944333333333334e-05, + "loss": 1.9444, + "step": 20840 + }, + { + "epoch": 125.92, + "learning_rate": 6.951e-05, + "loss": 1.9533, + "step": 20860 + }, + { + "epoch": 126.04, + "learning_rate": 6.957666666666667e-05, + "loss": 1.9184, + "step": 20880 + }, + { + "epoch": 126.16, + "learning_rate": 6.964333333333334e-05, + "loss": 1.7681, + "step": 20900 + }, + { + "epoch": 126.28, + "learning_rate": 6.971000000000001e-05, + "loss": 1.7999, + "step": 20920 + }, + { + "epoch": 126.4, + "learning_rate": 6.977666666666667e-05, + "loss": 1.8303, + "step": 20940 + }, + { + "epoch": 126.52, + "learning_rate": 6.984333333333334e-05, + "loss": 1.8537, + "step": 20960 + }, + { + "epoch": 126.64, + "learning_rate": 6.991000000000001e-05, + "loss": 1.8724, + "step": 20980 + }, + { + "epoch": 126.76, + "learning_rate": 6.997666666666666e-05, + "loss": 1.9001, + "step": 21000 + }, + { + "epoch": 126.76, + "eval_accuracy": 0.001323211269275222, + "eval_loss": 8.08230972290039, + "eval_runtime": 18.3096, + "eval_samples_per_second": 111.417, + "eval_steps_per_second": 2.348, + "step": 21000 + }, + { + "epoch": 126.88, + "learning_rate": 7.004333333333334e-05, + "loss": 1.9246, + "step": 21020 + }, + { + "epoch": 127.0, + "learning_rate": 7.011e-05, + "loss": 1.9363, + "step": 21040 + }, + { + "epoch": 127.12, + "learning_rate": 7.017666666666667e-05, + "loss": 1.7457, + "step": 21060 + }, + { + "epoch": 127.24, + "learning_rate": 7.024333333333333e-05, + "loss": 1.7626, + "step": 21080 + }, + { + "epoch": 127.36, + "learning_rate": 7.031e-05, + "loss": 1.785, + "step": 21100 + }, + { + "epoch": 127.48, + "learning_rate": 7.037666666666667e-05, + "loss": 1.7905, + "step": 21120 + }, + { + "epoch": 127.61, + "learning_rate": 7.044333333333334e-05, + "loss": 1.8258, + "step": 21140 + }, + { + "epoch": 127.73, + "learning_rate": 7.051e-05, + "loss": 1.8491, + "step": 21160 + }, + { + "epoch": 127.85, + "learning_rate": 7.057666666666666e-05, + "loss": 1.8689, + "step": 21180 + }, + { + "epoch": 127.97, + "learning_rate": 7.064333333333334e-05, + "loss": 1.8854, + "step": 21200 + }, + { + "epoch": 128.09, + "learning_rate": 7.070999999999999e-05, + "loss": 1.7481, + "step": 21220 + }, + { + "epoch": 128.21, + "learning_rate": 7.077666666666667e-05, + "loss": 1.7011, + "step": 21240 + }, + { + "epoch": 128.33, + "learning_rate": 7.084333333333333e-05, + "loss": 1.7239, + "step": 21260 + }, + { + "epoch": 128.45, + "learning_rate": 7.091e-05, + "loss": 1.7696, + "step": 21280 + }, + { + "epoch": 128.57, + "learning_rate": 7.097666666666667e-05, + "loss": 1.7884, + "step": 21300 + }, + { + "epoch": 128.69, + "learning_rate": 7.104333333333334e-05, + "loss": 1.8025, + "step": 21320 + }, + { + "epoch": 128.81, + "learning_rate": 7.111e-05, + "loss": 1.8398, + "step": 21340 + }, + { + "epoch": 128.93, + "learning_rate": 7.117666666666667e-05, + "loss": 1.8538, + "step": 21360 + }, + { + "epoch": 129.05, + "learning_rate": 7.124333333333334e-05, + "loss": 1.7678, + "step": 21380 + }, + { + "epoch": 129.18, + "learning_rate": 7.130999999999999e-05, + "loss": 1.6649, + "step": 21400 + }, + { + "epoch": 129.3, + "learning_rate": 7.137666666666667e-05, + "loss": 1.695, + "step": 21420 + }, + { + "epoch": 129.42, + "learning_rate": 7.144333333333333e-05, + "loss": 1.7073, + "step": 21440 + }, + { + "epoch": 129.54, + "learning_rate": 7.151e-05, + "loss": 1.7488, + "step": 21460 + }, + { + "epoch": 129.66, + "learning_rate": 7.157666666666668e-05, + "loss": 1.7523, + "step": 21480 + }, + { + "epoch": 129.78, + "learning_rate": 7.164333333333334e-05, + "loss": 1.792, + "step": 21500 + }, + { + "epoch": 129.9, + "learning_rate": 7.171e-05, + "loss": 1.8004, + "step": 21520 + }, + { + "epoch": 130.02, + "learning_rate": 7.177666666666667e-05, + "loss": 1.7849, + "step": 21540 + }, + { + "epoch": 130.14, + "learning_rate": 7.184333333333334e-05, + "loss": 1.629, + "step": 21560 + }, + { + "epoch": 130.26, + "learning_rate": 7.191e-05, + "loss": 1.6424, + "step": 21580 + }, + { + "epoch": 130.38, + "learning_rate": 7.197666666666667e-05, + "loss": 1.6661, + "step": 21600 + }, + { + "epoch": 130.5, + "learning_rate": 7.204333333333334e-05, + "loss": 1.7055, + "step": 21620 + }, + { + "epoch": 130.62, + "learning_rate": 7.211e-05, + "loss": 1.7159, + "step": 21640 + }, + { + "epoch": 130.74, + "learning_rate": 7.217666666666668e-05, + "loss": 1.7365, + "step": 21660 + }, + { + "epoch": 130.87, + "learning_rate": 7.224333333333334e-05, + "loss": 1.7562, + "step": 21680 + }, + { + "epoch": 130.99, + "learning_rate": 7.231e-05, + "loss": 1.7823, + "step": 21700 + }, + { + "epoch": 131.11, + "learning_rate": 7.237666666666667e-05, + "loss": 1.6083, + "step": 21720 + }, + { + "epoch": 131.23, + "learning_rate": 7.244333333333335e-05, + "loss": 1.6106, + "step": 21740 + }, + { + "epoch": 131.35, + "learning_rate": 7.251e-05, + "loss": 1.6318, + "step": 21760 + }, + { + "epoch": 131.47, + "learning_rate": 7.257666666666667e-05, + "loss": 1.6561, + "step": 21780 + }, + { + "epoch": 131.59, + "learning_rate": 7.264333333333334e-05, + "loss": 1.6767, + "step": 21800 + }, + { + "epoch": 131.71, + "learning_rate": 7.271e-05, + "loss": 1.681, + "step": 21820 + }, + { + "epoch": 131.83, + "learning_rate": 7.277666666666668e-05, + "loss": 1.7072, + "step": 21840 + }, + { + "epoch": 131.95, + "learning_rate": 7.284333333333334e-05, + "loss": 1.7437, + "step": 21860 + }, + { + "epoch": 132.07, + "learning_rate": 7.291e-05, + "loss": 1.6319, + "step": 21880 + }, + { + "epoch": 132.19, + "learning_rate": 7.297666666666667e-05, + "loss": 1.547, + "step": 21900 + }, + { + "epoch": 132.31, + "learning_rate": 7.304333333333335e-05, + "loss": 1.5772, + "step": 21920 + }, + { + "epoch": 132.43, + "learning_rate": 7.311e-05, + "loss": 1.5979, + "step": 21940 + }, + { + "epoch": 132.56, + "learning_rate": 7.317666666666667e-05, + "loss": 1.6208, + "step": 21960 + }, + { + "epoch": 132.68, + "learning_rate": 7.324333333333334e-05, + "loss": 1.6501, + "step": 21980 + }, + { + "epoch": 132.8, + "learning_rate": 7.331e-05, + "loss": 1.6708, + "step": 22000 + }, + { + "epoch": 132.8, + "eval_accuracy": 0.001301972715867669, + "eval_loss": 8.823031425476074, + "eval_runtime": 18.1374, + "eval_samples_per_second": 112.475, + "eval_steps_per_second": 2.371, + "step": 22000 + }, + { + "epoch": 132.92, + "learning_rate": 7.337666666666666e-05, + "loss": 1.6895, + "step": 22020 + }, + { + "epoch": 133.04, + "learning_rate": 7.344333333333334e-05, + "loss": 1.6339, + "step": 22040 + }, + { + "epoch": 133.16, + "learning_rate": 7.351e-05, + "loss": 1.4999, + "step": 22060 + }, + { + "epoch": 133.28, + "learning_rate": 7.357666666666667e-05, + "loss": 1.5272, + "step": 22080 + }, + { + "epoch": 133.4, + "learning_rate": 7.364333333333335e-05, + "loss": 1.5672, + "step": 22100 + }, + { + "epoch": 133.52, + "learning_rate": 7.371e-05, + "loss": 1.5869, + "step": 22120 + }, + { + "epoch": 133.64, + "learning_rate": 7.377666666666667e-05, + "loss": 1.6036, + "step": 22140 + }, + { + "epoch": 133.76, + "learning_rate": 7.384333333333334e-05, + "loss": 1.6253, + "step": 22160 + }, + { + "epoch": 133.88, + "learning_rate": 7.391e-05, + "loss": 1.6508, + "step": 22180 + }, + { + "epoch": 134.0, + "learning_rate": 7.397666666666667e-05, + "loss": 1.6691, + "step": 22200 + }, + { + "epoch": 134.12, + "learning_rate": 7.404333333333334e-05, + "loss": 1.4677, + "step": 22220 + }, + { + "epoch": 134.25, + "learning_rate": 7.411000000000001e-05, + "loss": 1.4871, + "step": 22240 + }, + { + "epoch": 134.37, + "learning_rate": 7.417666666666667e-05, + "loss": 1.5172, + "step": 22260 + }, + { + "epoch": 134.49, + "learning_rate": 7.424333333333333e-05, + "loss": 1.5336, + "step": 22280 + }, + { + "epoch": 134.61, + "learning_rate": 7.431e-05, + "loss": 1.5616, + "step": 22300 + }, + { + "epoch": 134.73, + "learning_rate": 7.437666666666668e-05, + "loss": 1.5933, + "step": 22320 + }, + { + "epoch": 134.85, + "learning_rate": 7.444333333333333e-05, + "loss": 1.6153, + "step": 22340 + }, + { + "epoch": 134.97, + "learning_rate": 7.451e-05, + "loss": 1.6363, + "step": 22360 + }, + { + "epoch": 135.09, + "learning_rate": 7.457333333333334e-05, + "loss": 1.4673, + "step": 22380 + }, + { + "epoch": 135.21, + "learning_rate": 7.464e-05, + "loss": 1.4428, + "step": 22400 + }, + { + "epoch": 135.33, + "learning_rate": 7.470666666666667e-05, + "loss": 1.4685, + "step": 22420 + }, + { + "epoch": 135.45, + "learning_rate": 7.477333333333334e-05, + "loss": 1.5001, + "step": 22440 + }, + { + "epoch": 135.57, + "learning_rate": 7.484e-05, + "loss": 1.5205, + "step": 22460 + }, + { + "epoch": 135.69, + "learning_rate": 7.490666666666667e-05, + "loss": 1.5495, + "step": 22480 + }, + { + "epoch": 135.81, + "learning_rate": 7.497333333333334e-05, + "loss": 1.5563, + "step": 22500 + }, + { + "epoch": 135.94, + "learning_rate": 7.504e-05, + "loss": 1.5845, + "step": 22520 + }, + { + "epoch": 136.06, + "learning_rate": 7.510666666666666e-05, + "loss": 1.498, + "step": 22540 + }, + { + "epoch": 136.18, + "learning_rate": 7.517333333333334e-05, + "loss": 1.4094, + "step": 22560 + }, + { + "epoch": 136.3, + "learning_rate": 7.524e-05, + "loss": 1.4375, + "step": 22580 + }, + { + "epoch": 136.42, + "learning_rate": 7.530666666666667e-05, + "loss": 1.4701, + "step": 22600 + }, + { + "epoch": 136.54, + "learning_rate": 7.537333333333335e-05, + "loss": 1.5002, + "step": 22620 + }, + { + "epoch": 136.66, + "learning_rate": 7.544e-05, + "loss": 1.512, + "step": 22640 + }, + { + "epoch": 136.78, + "learning_rate": 7.550666666666667e-05, + "loss": 1.5179, + "step": 22660 + }, + { + "epoch": 136.9, + "learning_rate": 7.557333333333334e-05, + "loss": 1.5384, + "step": 22680 + }, + { + "epoch": 137.02, + "learning_rate": 7.564e-05, + "loss": 1.5182, + "step": 22700 + }, + { + "epoch": 137.14, + "learning_rate": 7.570666666666666e-05, + "loss": 1.3657, + "step": 22720 + }, + { + "epoch": 137.26, + "learning_rate": 7.577333333333334e-05, + "loss": 1.3809, + "step": 22740 + }, + { + "epoch": 137.38, + "learning_rate": 7.584e-05, + "loss": 1.4188, + "step": 22760 + }, + { + "epoch": 137.51, + "learning_rate": 7.590666666666667e-05, + "loss": 1.4384, + "step": 22780 + }, + { + "epoch": 137.63, + "learning_rate": 7.597333333333335e-05, + "loss": 1.4652, + "step": 22800 + }, + { + "epoch": 137.75, + "learning_rate": 7.604e-05, + "loss": 1.4989, + "step": 22820 + }, + { + "epoch": 137.87, + "learning_rate": 7.610666666666667e-05, + "loss": 1.5065, + "step": 22840 + }, + { + "epoch": 137.99, + "learning_rate": 7.617333333333334e-05, + "loss": 1.5319, + "step": 22860 + }, + { + "epoch": 138.11, + "learning_rate": 7.624e-05, + "loss": 1.3482, + "step": 22880 + }, + { + "epoch": 138.23, + "learning_rate": 7.630666666666667e-05, + "loss": 1.3541, + "step": 22900 + }, + { + "epoch": 138.35, + "learning_rate": 7.637333333333334e-05, + "loss": 1.3673, + "step": 22920 + }, + { + "epoch": 138.47, + "learning_rate": 7.644e-05, + "loss": 1.4019, + "step": 22940 + }, + { + "epoch": 138.59, + "learning_rate": 7.650666666666667e-05, + "loss": 1.4269, + "step": 22960 + }, + { + "epoch": 138.71, + "learning_rate": 7.657333333333335e-05, + "loss": 1.4475, + "step": 22980 + }, + { + "epoch": 138.83, + "learning_rate": 7.664e-05, + "loss": 1.4762, + "step": 23000 + }, + { + "epoch": 138.83, + "eval_accuracy": 0.001289624719700487, + "eval_loss": 9.533530235290527, + "eval_runtime": 18.0601, + "eval_samples_per_second": 112.956, + "eval_steps_per_second": 2.381, + "step": 23000 + }, + { + "epoch": 138.95, + "learning_rate": 7.670666666666668e-05, + "loss": 1.5101, + "step": 23020 + }, + { + "epoch": 139.07, + "learning_rate": 7.677333333333334e-05, + "loss": 1.3644, + "step": 23040 + }, + { + "epoch": 139.2, + "learning_rate": 7.684e-05, + "loss": 1.3088, + "step": 23060 + }, + { + "epoch": 139.32, + "learning_rate": 7.690666666666667e-05, + "loss": 1.3408, + "step": 23080 + }, + { + "epoch": 139.44, + "learning_rate": 7.697333333333334e-05, + "loss": 1.3495, + "step": 23100 + }, + { + "epoch": 139.56, + "learning_rate": 7.704000000000001e-05, + "loss": 1.3906, + "step": 23120 + }, + { + "epoch": 139.68, + "learning_rate": 7.710666666666667e-05, + "loss": 1.408, + "step": 23140 + }, + { + "epoch": 139.8, + "learning_rate": 7.717333333333334e-05, + "loss": 1.4335, + "step": 23160 + }, + { + "epoch": 139.92, + "learning_rate": 7.724e-05, + "loss": 1.4561, + "step": 23180 + }, + { + "epoch": 140.04, + "learning_rate": 7.730666666666668e-05, + "loss": 1.3944, + "step": 23200 + }, + { + "epoch": 140.16, + "learning_rate": 7.737333333333334e-05, + "loss": 1.2728, + "step": 23220 + }, + { + "epoch": 140.28, + "learning_rate": 7.744e-05, + "loss": 1.2974, + "step": 23240 + }, + { + "epoch": 140.4, + "learning_rate": 7.750666666666667e-05, + "loss": 1.321, + "step": 23260 + }, + { + "epoch": 140.52, + "learning_rate": 7.757333333333335e-05, + "loss": 1.3482, + "step": 23280 + }, + { + "epoch": 140.64, + "learning_rate": 7.764e-05, + "loss": 1.3878, + "step": 23300 + }, + { + "epoch": 140.76, + "learning_rate": 7.770666666666667e-05, + "loss": 1.3908, + "step": 23320 + }, + { + "epoch": 140.89, + "learning_rate": 7.777333333333334e-05, + "loss": 1.4063, + "step": 23340 + }, + { + "epoch": 141.01, + "learning_rate": 7.784e-05, + "loss": 1.4223, + "step": 23360 + }, + { + "epoch": 141.13, + "learning_rate": 7.790666666666668e-05, + "loss": 1.2325, + "step": 23380 + }, + { + "epoch": 141.25, + "learning_rate": 7.797333333333333e-05, + "loss": 1.2659, + "step": 23400 + }, + { + "epoch": 141.37, + "learning_rate": 7.804e-05, + "loss": 1.2973, + "step": 23420 + }, + { + "epoch": 141.49, + "learning_rate": 7.810666666666667e-05, + "loss": 1.3071, + "step": 23440 + }, + { + "epoch": 141.61, + "learning_rate": 7.817333333333333e-05, + "loss": 1.3359, + "step": 23460 + }, + { + "epoch": 141.73, + "learning_rate": 7.824e-05, + "loss": 1.3585, + "step": 23480 + }, + { + "epoch": 141.85, + "learning_rate": 7.830666666666667e-05, + "loss": 1.3678, + "step": 23500 + }, + { + "epoch": 141.97, + "learning_rate": 7.837333333333334e-05, + "loss": 1.379, + "step": 23520 + }, + { + "epoch": 142.09, + "learning_rate": 7.844e-05, + "loss": 1.2398, + "step": 23540 + }, + { + "epoch": 142.21, + "learning_rate": 7.850666666666668e-05, + "loss": 1.2069, + "step": 23560 + }, + { + "epoch": 142.33, + "learning_rate": 7.857333333333333e-05, + "loss": 1.2478, + "step": 23580 + }, + { + "epoch": 142.45, + "learning_rate": 7.864e-05, + "loss": 1.2768, + "step": 23600 + }, + { + "epoch": 142.58, + "learning_rate": 7.870666666666667e-05, + "loss": 1.2963, + "step": 23620 + }, + { + "epoch": 142.7, + "learning_rate": 7.876999999999999e-05, + "loss": 1.3087, + "step": 23640 + }, + { + "epoch": 142.82, + "learning_rate": 7.883666666666667e-05, + "loss": 1.3438, + "step": 23660 + }, + { + "epoch": 142.94, + "learning_rate": 7.890333333333333e-05, + "loss": 1.3481, + "step": 23680 + }, + { + "epoch": 143.06, + "learning_rate": 7.897e-05, + "loss": 1.2644, + "step": 23700 + }, + { + "epoch": 143.18, + "learning_rate": 7.903666666666668e-05, + "loss": 1.176, + "step": 23720 + }, + { + "epoch": 143.3, + "learning_rate": 7.910333333333334e-05, + "loss": 1.206, + "step": 23740 + }, + { + "epoch": 143.42, + "learning_rate": 7.917e-05, + "loss": 1.23, + "step": 23760 + }, + { + "epoch": 143.54, + "learning_rate": 7.923666666666667e-05, + "loss": 1.2664, + "step": 23780 + }, + { + "epoch": 143.66, + "learning_rate": 7.930333333333334e-05, + "loss": 1.2763, + "step": 23800 + }, + { + "epoch": 143.78, + "learning_rate": 7.937e-05, + "loss": 1.3145, + "step": 23820 + }, + { + "epoch": 143.9, + "learning_rate": 7.943666666666667e-05, + "loss": 1.3303, + "step": 23840 + }, + { + "epoch": 144.02, + "learning_rate": 7.950333333333334e-05, + "loss": 1.305, + "step": 23860 + }, + { + "epoch": 144.14, + "learning_rate": 7.957e-05, + "loss": 1.152, + "step": 23880 + }, + { + "epoch": 144.27, + "learning_rate": 7.963666666666668e-05, + "loss": 1.1715, + "step": 23900 + }, + { + "epoch": 144.39, + "learning_rate": 7.970333333333334e-05, + "loss": 1.2116, + "step": 23920 + }, + { + "epoch": 144.51, + "learning_rate": 7.977e-05, + "loss": 1.2291, + "step": 23940 + }, + { + "epoch": 144.63, + "learning_rate": 7.983666666666667e-05, + "loss": 1.2752, + "step": 23960 + }, + { + "epoch": 144.75, + "learning_rate": 7.990333333333334e-05, + "loss": 1.267, + "step": 23980 + }, + { + "epoch": 144.87, + "learning_rate": 7.997e-05, + "loss": 1.2833, + "step": 24000 + }, + { + "epoch": 144.87, + "eval_accuracy": 0.001314320712034851, + "eval_loss": 10.197283744812012, + "eval_runtime": 18.1169, + "eval_samples_per_second": 112.602, + "eval_steps_per_second": 2.373, + "step": 24000 + }, + { + "epoch": 144.99, + "learning_rate": 8.003666666666667e-05, + "loss": 1.319, + "step": 24020 + }, + { + "epoch": 145.11, + "learning_rate": 8.010333333333334e-05, + "loss": 1.1368, + "step": 24040 + }, + { + "epoch": 145.23, + "learning_rate": 8.017e-05, + "loss": 1.1447, + "step": 24060 + }, + { + "epoch": 145.35, + "learning_rate": 8.023666666666668e-05, + "loss": 1.1814, + "step": 24080 + }, + { + "epoch": 145.47, + "learning_rate": 8.030333333333334e-05, + "loss": 1.2116, + "step": 24100 + }, + { + "epoch": 145.59, + "learning_rate": 8.037e-05, + "loss": 1.221, + "step": 24120 + }, + { + "epoch": 145.71, + "learning_rate": 8.043666666666667e-05, + "loss": 1.2411, + "step": 24140 + }, + { + "epoch": 145.84, + "learning_rate": 8.050333333333335e-05, + "loss": 1.2624, + "step": 24160 + }, + { + "epoch": 145.96, + "learning_rate": 8.057e-05, + "loss": 1.2827, + "step": 24180 + }, + { + "epoch": 146.08, + "learning_rate": 8.063666666666667e-05, + "loss": 1.1684, + "step": 24200 + }, + { + "epoch": 146.2, + "learning_rate": 8.070333333333334e-05, + "loss": 1.1256, + "step": 24220 + }, + { + "epoch": 146.32, + "learning_rate": 8.077e-05, + "loss": 1.1634, + "step": 24240 + }, + { + "epoch": 146.44, + "learning_rate": 8.083666666666668e-05, + "loss": 1.1625, + "step": 24260 + }, + { + "epoch": 146.56, + "learning_rate": 8.090333333333334e-05, + "loss": 1.1606, + "step": 24280 + }, + { + "epoch": 146.68, + "learning_rate": 8.097e-05, + "loss": 1.1903, + "step": 24300 + }, + { + "epoch": 146.8, + "learning_rate": 8.103666666666667e-05, + "loss": 1.218, + "step": 24320 + }, + { + "epoch": 146.92, + "learning_rate": 8.110333333333335e-05, + "loss": 1.2402, + "step": 24340 + }, + { + "epoch": 147.04, + "learning_rate": 8.117e-05, + "loss": 1.1858, + "step": 24360 + }, + { + "epoch": 147.16, + "learning_rate": 8.123666666666667e-05, + "loss": 1.0605, + "step": 24380 + }, + { + "epoch": 147.28, + "learning_rate": 8.130333333333334e-05, + "loss": 1.1045, + "step": 24400 + }, + { + "epoch": 147.4, + "learning_rate": 8.137e-05, + "loss": 1.1225, + "step": 24420 + }, + { + "epoch": 147.53, + "learning_rate": 8.143666666666667e-05, + "loss": 1.1464, + "step": 24440 + }, + { + "epoch": 147.65, + "learning_rate": 8.150333333333334e-05, + "loss": 1.1694, + "step": 24460 + }, + { + "epoch": 147.77, + "learning_rate": 8.157e-05, + "loss": 1.1967, + "step": 24480 + }, + { + "epoch": 147.89, + "learning_rate": 8.163666666666667e-05, + "loss": 1.2042, + "step": 24500 + }, + { + "epoch": 148.01, + "learning_rate": 8.170333333333333e-05, + "loss": 1.2241, + "step": 24520 + }, + { + "epoch": 148.13, + "learning_rate": 8.177e-05, + "loss": 1.0313, + "step": 24540 + }, + { + "epoch": 148.25, + "learning_rate": 8.183666666666668e-05, + "loss": 1.0624, + "step": 24560 + }, + { + "epoch": 148.37, + "learning_rate": 8.190333333333333e-05, + "loss": 1.0905, + "step": 24580 + }, + { + "epoch": 148.49, + "learning_rate": 8.197e-05, + "loss": 1.1182, + "step": 24600 + }, + { + "epoch": 148.61, + "learning_rate": 8.203666666666667e-05, + "loss": 1.1554, + "step": 24620 + }, + { + "epoch": 148.73, + "learning_rate": 8.210333333333333e-05, + "loss": 1.1825, + "step": 24640 + }, + { + "epoch": 148.85, + "learning_rate": 8.217000000000001e-05, + "loss": 1.1987, + "step": 24660 + }, + { + "epoch": 148.97, + "learning_rate": 8.223666666666667e-05, + "loss": 1.2207, + "step": 24680 + }, + { + "epoch": 149.09, + "learning_rate": 8.230333333333334e-05, + "loss": 1.0685, + "step": 24700 + }, + { + "epoch": 149.22, + "learning_rate": 8.237e-05, + "loss": 1.0407, + "step": 24720 + }, + { + "epoch": 149.34, + "learning_rate": 8.243666666666668e-05, + "loss": 1.0582, + "step": 24740 + }, + { + "epoch": 149.46, + "learning_rate": 8.250333333333333e-05, + "loss": 1.0789, + "step": 24760 + }, + { + "epoch": 149.58, + "learning_rate": 8.257e-05, + "loss": 1.1121, + "step": 24780 + }, + { + "epoch": 149.7, + "learning_rate": 8.263666666666667e-05, + "loss": 1.1414, + "step": 24800 + }, + { + "epoch": 149.82, + "learning_rate": 8.270333333333333e-05, + "loss": 1.1653, + "step": 24820 + }, + { + "epoch": 149.94, + "learning_rate": 8.277000000000001e-05, + "loss": 1.1782, + "step": 24840 + }, + { + "epoch": 150.06, + "learning_rate": 8.283666666666667e-05, + "loss": 1.0871, + "step": 24860 + }, + { + "epoch": 150.18, + "learning_rate": 8.290333333333334e-05, + "loss": 1.0067, + "step": 24880 + }, + { + "epoch": 150.3, + "learning_rate": 8.297e-05, + "loss": 1.0524, + "step": 24900 + }, + { + "epoch": 150.42, + "learning_rate": 8.303666666666668e-05, + "loss": 1.0559, + "step": 24920 + }, + { + "epoch": 150.54, + "learning_rate": 8.310333333333333e-05, + "loss": 1.0838, + "step": 24940 + }, + { + "epoch": 150.66, + "learning_rate": 8.317e-05, + "loss": 1.1119, + "step": 24960 + }, + { + "epoch": 150.78, + "learning_rate": 8.323666666666667e-05, + "loss": 1.1226, + "step": 24980 + }, + { + "epoch": 150.91, + "learning_rate": 8.330333333333333e-05, + "loss": 1.1451, + "step": 25000 + }, + { + "epoch": 150.91, + "eval_accuracy": 0.0012935760784739852, + "eval_loss": 10.821310043334961, + "eval_runtime": 18.1019, + "eval_samples_per_second": 112.695, + "eval_steps_per_second": 2.375, + "step": 25000 + }, + { + "epoch": 151.03, + "learning_rate": 8.337000000000001e-05, + "loss": 1.12, + "step": 25020 + }, + { + "epoch": 151.15, + "learning_rate": 8.343666666666667e-05, + "loss": 0.9728, + "step": 25040 + }, + { + "epoch": 151.27, + "learning_rate": 8.350333333333334e-05, + "loss": 1.0058, + "step": 25060 + }, + { + "epoch": 151.39, + "learning_rate": 8.357e-05, + "loss": 1.0314, + "step": 25080 + }, + { + "epoch": 151.51, + "learning_rate": 8.363666666666668e-05, + "loss": 1.0595, + "step": 25100 + }, + { + "epoch": 151.63, + "learning_rate": 8.370333333333333e-05, + "loss": 1.0719, + "step": 25120 + }, + { + "epoch": 151.75, + "learning_rate": 8.377e-05, + "loss": 1.1014, + "step": 25140 + }, + { + "epoch": 151.87, + "learning_rate": 8.383666666666667e-05, + "loss": 1.1169, + "step": 25160 + }, + { + "epoch": 151.99, + "learning_rate": 8.390333333333333e-05, + "loss": 1.1255, + "step": 25180 + }, + { + "epoch": 152.11, + "learning_rate": 8.397000000000001e-05, + "loss": 0.9556, + "step": 25200 + }, + { + "epoch": 152.23, + "learning_rate": 8.403666666666667e-05, + "loss": 0.9681, + "step": 25220 + }, + { + "epoch": 152.35, + "learning_rate": 8.410333333333334e-05, + "loss": 1.0043, + "step": 25240 + }, + { + "epoch": 152.47, + "learning_rate": 8.417e-05, + "loss": 1.0042, + "step": 25260 + }, + { + "epoch": 152.6, + "learning_rate": 8.423666666666668e-05, + "loss": 1.0228, + "step": 25280 + }, + { + "epoch": 152.72, + "learning_rate": 8.430333333333333e-05, + "loss": 1.0566, + "step": 25300 + }, + { + "epoch": 152.84, + "learning_rate": 8.437000000000001e-05, + "loss": 1.0815, + "step": 25320 + }, + { + "epoch": 152.96, + "learning_rate": 8.443666666666667e-05, + "loss": 1.1157, + "step": 25340 + }, + { + "epoch": 153.08, + "learning_rate": 8.450333333333333e-05, + "loss": 0.9935, + "step": 25360 + }, + { + "epoch": 153.2, + "learning_rate": 8.457e-05, + "loss": 0.9415, + "step": 25380 + }, + { + "epoch": 153.32, + "learning_rate": 8.463666666666668e-05, + "loss": 0.9601, + "step": 25400 + }, + { + "epoch": 153.44, + "learning_rate": 8.470333333333334e-05, + "loss": 0.9884, + "step": 25420 + }, + { + "epoch": 153.56, + "learning_rate": 8.477e-05, + "loss": 1.0064, + "step": 25440 + }, + { + "epoch": 153.68, + "learning_rate": 8.483666666666668e-05, + "loss": 1.0422, + "step": 25460 + }, + { + "epoch": 153.8, + "learning_rate": 8.490333333333333e-05, + "loss": 1.0665, + "step": 25480 + }, + { + "epoch": 153.92, + "learning_rate": 8.497000000000001e-05, + "loss": 1.0851, + "step": 25500 + }, + { + "epoch": 154.04, + "learning_rate": 8.503666666666667e-05, + "loss": 1.0234, + "step": 25520 + }, + { + "epoch": 154.16, + "learning_rate": 8.510333333333334e-05, + "loss": 0.9137, + "step": 25540 + }, + { + "epoch": 154.29, + "learning_rate": 8.517e-05, + "loss": 0.9448, + "step": 25560 + }, + { + "epoch": 154.41, + "learning_rate": 8.523666666666668e-05, + "loss": 0.9683, + "step": 25580 + }, + { + "epoch": 154.53, + "learning_rate": 8.530333333333334e-05, + "loss": 1.0072, + "step": 25600 + }, + { + "epoch": 154.65, + "learning_rate": 8.537e-05, + "loss": 0.9969, + "step": 25620 + }, + { + "epoch": 154.77, + "learning_rate": 8.543666666666668e-05, + "loss": 1.022, + "step": 25640 + }, + { + "epoch": 154.89, + "learning_rate": 8.55e-05, + "loss": 1.0452, + "step": 25660 + }, + { + "epoch": 155.01, + "learning_rate": 8.556666666666667e-05, + "loss": 1.0589, + "step": 25680 + }, + { + "epoch": 155.13, + "learning_rate": 8.563333333333333e-05, + "loss": 0.8943, + "step": 25700 + }, + { + "epoch": 155.25, + "learning_rate": 8.57e-05, + "loss": 0.9057, + "step": 25720 + }, + { + "epoch": 155.37, + "learning_rate": 8.576666666666667e-05, + "loss": 0.943, + "step": 25740 + }, + { + "epoch": 155.49, + "learning_rate": 8.583333333333334e-05, + "loss": 0.9658, + "step": 25760 + }, + { + "epoch": 155.61, + "learning_rate": 8.59e-05, + "loss": 0.9853, + "step": 25780 + }, + { + "epoch": 155.73, + "learning_rate": 8.596666666666668e-05, + "loss": 1.0105, + "step": 25800 + }, + { + "epoch": 155.86, + "learning_rate": 8.603333333333333e-05, + "loss": 1.0318, + "step": 25820 + }, + { + "epoch": 155.98, + "learning_rate": 8.61e-05, + "loss": 1.0523, + "step": 25840 + }, + { + "epoch": 156.1, + "learning_rate": 8.616666666666667e-05, + "loss": 0.8952, + "step": 25860 + }, + { + "epoch": 156.22, + "learning_rate": 8.623333333333333e-05, + "loss": 0.899, + "step": 25880 + }, + { + "epoch": 156.34, + "learning_rate": 8.63e-05, + "loss": 0.9027, + "step": 25900 + }, + { + "epoch": 156.46, + "learning_rate": 8.636666666666667e-05, + "loss": 0.9333, + "step": 25920 + }, + { + "epoch": 156.58, + "learning_rate": 8.643333333333334e-05, + "loss": 0.962, + "step": 25940 + }, + { + "epoch": 156.7, + "learning_rate": 8.65e-05, + "loss": 0.9726, + "step": 25960 + }, + { + "epoch": 156.82, + "learning_rate": 8.656666666666668e-05, + "loss": 0.9879, + "step": 25980 + }, + { + "epoch": 156.94, + "learning_rate": 8.663333333333333e-05, + "loss": 1.0251, + "step": 26000 + }, + { + "epoch": 156.94, + "eval_accuracy": 0.0012832037616935523, + "eval_loss": 11.440205574035645, + "eval_runtime": 18.1944, + "eval_samples_per_second": 112.123, + "eval_steps_per_second": 2.363, + "step": 26000 + }, + { + "epoch": 157.06, + "learning_rate": 8.67e-05, + "loss": 0.9266, + "step": 26020 + }, + { + "epoch": 157.18, + "learning_rate": 8.676666666666667e-05, + "loss": 0.8623, + "step": 26040 + }, + { + "epoch": 157.3, + "learning_rate": 8.683333333333333e-05, + "loss": 0.887, + "step": 26060 + }, + { + "epoch": 157.42, + "learning_rate": 8.69e-05, + "loss": 0.9145, + "step": 26080 + }, + { + "epoch": 157.55, + "learning_rate": 8.696666666666668e-05, + "loss": 0.9028, + "step": 26100 + }, + { + "epoch": 157.67, + "learning_rate": 8.703333333333334e-05, + "loss": 0.9591, + "step": 26120 + }, + { + "epoch": 157.79, + "learning_rate": 8.71e-05, + "loss": 0.98, + "step": 26140 + }, + { + "epoch": 157.91, + "learning_rate": 8.716666666666668e-05, + "loss": 1.0075, + "step": 26160 + }, + { + "epoch": 158.03, + "learning_rate": 8.723333333333333e-05, + "loss": 0.9595, + "step": 26180 + }, + { + "epoch": 158.15, + "learning_rate": 8.730000000000001e-05, + "loss": 0.8239, + "step": 26200 + }, + { + "epoch": 158.27, + "learning_rate": 8.736666666666667e-05, + "loss": 0.8542, + "step": 26220 + }, + { + "epoch": 158.39, + "learning_rate": 8.743333333333334e-05, + "loss": 0.8868, + "step": 26240 + }, + { + "epoch": 158.51, + "learning_rate": 8.75e-05, + "loss": 0.8996, + "step": 26260 + }, + { + "epoch": 158.63, + "learning_rate": 8.756666666666668e-05, + "loss": 0.9325, + "step": 26280 + }, + { + "epoch": 158.75, + "learning_rate": 8.763333333333334e-05, + "loss": 0.9553, + "step": 26300 + }, + { + "epoch": 158.87, + "learning_rate": 8.77e-05, + "loss": 0.9839, + "step": 26320 + }, + { + "epoch": 158.99, + "learning_rate": 8.776666666666668e-05, + "loss": 1.0011, + "step": 26340 + }, + { + "epoch": 159.11, + "learning_rate": 8.783333333333333e-05, + "loss": 0.8096, + "step": 26360 + }, + { + "epoch": 159.24, + "learning_rate": 8.790000000000001e-05, + "loss": 0.826, + "step": 26380 + }, + { + "epoch": 159.36, + "learning_rate": 8.796666666666667e-05, + "loss": 0.8569, + "step": 26400 + }, + { + "epoch": 159.48, + "learning_rate": 8.803333333333334e-05, + "loss": 0.8801, + "step": 26420 + }, + { + "epoch": 159.6, + "learning_rate": 8.81e-05, + "loss": 0.8991, + "step": 26440 + }, + { + "epoch": 159.72, + "learning_rate": 8.816666666666668e-05, + "loss": 0.9162, + "step": 26460 + }, + { + "epoch": 159.84, + "learning_rate": 8.823333333333334e-05, + "loss": 0.9429, + "step": 26480 + }, + { + "epoch": 159.96, + "learning_rate": 8.83e-05, + "loss": 0.9653, + "step": 26500 + }, + { + "epoch": 160.08, + "learning_rate": 8.836666666666667e-05, + "loss": 0.8525, + "step": 26520 + }, + { + "epoch": 160.2, + "learning_rate": 8.843333333333333e-05, + "loss": 0.8147, + "step": 26540 + }, + { + "epoch": 160.32, + "learning_rate": 8.850000000000001e-05, + "loss": 0.829, + "step": 26560 + }, + { + "epoch": 160.44, + "learning_rate": 8.856666666666667e-05, + "loss": 0.8521, + "step": 26580 + }, + { + "epoch": 160.56, + "learning_rate": 8.863333333333334e-05, + "loss": 0.8666, + "step": 26600 + }, + { + "epoch": 160.68, + "learning_rate": 8.87e-05, + "loss": 0.9125, + "step": 26620 + }, + { + "epoch": 160.8, + "learning_rate": 8.876666666666668e-05, + "loss": 0.9217, + "step": 26640 + }, + { + "epoch": 160.93, + "learning_rate": 8.883333333333333e-05, + "loss": 0.9398, + "step": 26660 + }, + { + "epoch": 161.05, + "learning_rate": 8.89e-05, + "loss": 0.8752, + "step": 26680 + }, + { + "epoch": 161.17, + "learning_rate": 8.896666666666667e-05, + "loss": 0.7788, + "step": 26700 + }, + { + "epoch": 161.29, + "learning_rate": 8.903333333333333e-05, + "loss": 0.8097, + "step": 26720 + }, + { + "epoch": 161.41, + "learning_rate": 8.910000000000001e-05, + "loss": 0.8351, + "step": 26740 + }, + { + "epoch": 161.53, + "learning_rate": 8.916666666666667e-05, + "loss": 0.8531, + "step": 26760 + }, + { + "epoch": 161.65, + "learning_rate": 8.923333333333334e-05, + "loss": 0.8707, + "step": 26780 + }, + { + "epoch": 161.77, + "learning_rate": 8.93e-05, + "loss": 0.8952, + "step": 26800 + }, + { + "epoch": 161.89, + "learning_rate": 8.936666666666668e-05, + "loss": 0.9155, + "step": 26820 + }, + { + "epoch": 162.01, + "learning_rate": 8.943333333333333e-05, + "loss": 0.9079, + "step": 26840 + }, + { + "epoch": 162.13, + "learning_rate": 8.950000000000001e-05, + "loss": 0.7534, + "step": 26860 + }, + { + "epoch": 162.25, + "learning_rate": 8.956666666666667e-05, + "loss": 0.7953, + "step": 26880 + }, + { + "epoch": 162.37, + "learning_rate": 8.963333333333333e-05, + "loss": 0.8226, + "step": 26900 + }, + { + "epoch": 162.49, + "learning_rate": 8.970000000000001e-05, + "loss": 0.8424, + "step": 26920 + }, + { + "epoch": 162.62, + "learning_rate": 8.976666666666666e-05, + "loss": 0.8411, + "step": 26940 + }, + { + "epoch": 162.74, + "learning_rate": 8.983333333333334e-05, + "loss": 0.877, + "step": 26960 + }, + { + "epoch": 162.86, + "learning_rate": 8.99e-05, + "loss": 0.8957, + "step": 26980 + }, + { + "epoch": 162.98, + "learning_rate": 8.996666666666667e-05, + "loss": 0.9164, + "step": 27000 + }, + { + "epoch": 162.98, + "eval_accuracy": 0.0012822159220001778, + "eval_loss": 11.99949836730957, + "eval_runtime": 18.2112, + "eval_samples_per_second": 112.019, + "eval_steps_per_second": 2.361, + "step": 27000 + }, + { + "epoch": 163.1, + "learning_rate": 9.003333333333333e-05, + "loss": 0.7606, + "step": 27020 + }, + { + "epoch": 163.22, + "learning_rate": 9.010000000000001e-05, + "loss": 0.7527, + "step": 27040 + }, + { + "epoch": 163.34, + "learning_rate": 9.016666666666667e-05, + "loss": 0.7913, + "step": 27060 + }, + { + "epoch": 163.46, + "learning_rate": 9.023333333333334e-05, + "loss": 0.8033, + "step": 27080 + }, + { + "epoch": 163.58, + "learning_rate": 9.030000000000001e-05, + "loss": 0.8187, + "step": 27100 + }, + { + "epoch": 163.7, + "learning_rate": 9.036666666666666e-05, + "loss": 0.8593, + "step": 27120 + }, + { + "epoch": 163.82, + "learning_rate": 9.043333333333334e-05, + "loss": 0.8624, + "step": 27140 + }, + { + "epoch": 163.94, + "learning_rate": 9.05e-05, + "loss": 0.884, + "step": 27160 + }, + { + "epoch": 164.06, + "learning_rate": 9.056666666666667e-05, + "loss": 0.8033, + "step": 27180 + }, + { + "epoch": 164.19, + "learning_rate": 9.063333333333333e-05, + "loss": 0.7372, + "step": 27200 + }, + { + "epoch": 164.31, + "learning_rate": 9.070000000000001e-05, + "loss": 0.7514, + "step": 27220 + }, + { + "epoch": 164.43, + "learning_rate": 9.076666666666667e-05, + "loss": 0.7856, + "step": 27240 + }, + { + "epoch": 164.55, + "learning_rate": 9.083333333333334e-05, + "loss": 0.8032, + "step": 27260 + }, + { + "epoch": 164.67, + "learning_rate": 9.090000000000001e-05, + "loss": 0.8261, + "step": 27280 + }, + { + "epoch": 164.79, + "learning_rate": 9.096666666666666e-05, + "loss": 0.8518, + "step": 27300 + }, + { + "epoch": 164.91, + "learning_rate": 9.103333333333334e-05, + "loss": 0.8898, + "step": 27320 + }, + { + "epoch": 165.03, + "learning_rate": 9.11e-05, + "loss": 0.8442, + "step": 27340 + }, + { + "epoch": 165.15, + "learning_rate": 9.116666666666667e-05, + "loss": 0.7088, + "step": 27360 + }, + { + "epoch": 165.27, + "learning_rate": 9.123333333333333e-05, + "loss": 0.7288, + "step": 27380 + }, + { + "epoch": 165.39, + "learning_rate": 9.130000000000001e-05, + "loss": 0.7667, + "step": 27400 + }, + { + "epoch": 165.51, + "learning_rate": 9.136666666666666e-05, + "loss": 0.7884, + "step": 27420 + }, + { + "epoch": 165.63, + "learning_rate": 9.143333333333334e-05, + "loss": 0.815, + "step": 27440 + }, + { + "epoch": 165.75, + "learning_rate": 9.15e-05, + "loss": 0.8417, + "step": 27460 + }, + { + "epoch": 165.88, + "learning_rate": 9.156666666666667e-05, + "loss": 0.8541, + "step": 27480 + }, + { + "epoch": 166.0, + "learning_rate": 9.163333333333334e-05, + "loss": 0.8833, + "step": 27500 + }, + { + "epoch": 166.12, + "learning_rate": 9.17e-05, + "loss": 0.7026, + "step": 27520 + }, + { + "epoch": 166.24, + "learning_rate": 9.176666666666667e-05, + "loss": 0.7185, + "step": 27540 + }, + { + "epoch": 166.36, + "learning_rate": 9.183333333333333e-05, + "loss": 0.7563, + "step": 27560 + }, + { + "epoch": 166.48, + "learning_rate": 9.190000000000001e-05, + "loss": 0.7845, + "step": 27580 + }, + { + "epoch": 166.6, + "learning_rate": 9.196666666666666e-05, + "loss": 0.7935, + "step": 27600 + }, + { + "epoch": 166.72, + "learning_rate": 9.203333333333334e-05, + "loss": 0.8099, + "step": 27620 + }, + { + "epoch": 166.84, + "learning_rate": 9.21e-05, + "loss": 0.8281, + "step": 27640 + }, + { + "epoch": 166.96, + "learning_rate": 9.216666666666667e-05, + "loss": 0.8446, + "step": 27660 + }, + { + "epoch": 167.08, + "learning_rate": 9.223e-05, + "loss": 0.7318, + "step": 27680 + }, + { + "epoch": 167.2, + "learning_rate": 9.229666666666668e-05, + "loss": 0.7045, + "step": 27700 + }, + { + "epoch": 167.32, + "learning_rate": 9.236333333333333e-05, + "loss": 0.7265, + "step": 27720 + }, + { + "epoch": 167.44, + "learning_rate": 9.243000000000001e-05, + "loss": 0.7518, + "step": 27740 + }, + { + "epoch": 167.57, + "learning_rate": 9.249666666666667e-05, + "loss": 0.7368, + "step": 27760 + }, + { + "epoch": 167.69, + "learning_rate": 9.256333333333334e-05, + "loss": 0.774, + "step": 27780 + }, + { + "epoch": 167.81, + "learning_rate": 9.263e-05, + "loss": 0.8074, + "step": 27800 + }, + { + "epoch": 167.93, + "learning_rate": 9.269666666666668e-05, + "loss": 0.8461, + "step": 27820 + }, + { + "epoch": 168.05, + "learning_rate": 9.276333333333334e-05, + "loss": 0.7798, + "step": 27840 + }, + { + "epoch": 168.17, + "learning_rate": 9.283e-05, + "loss": 0.6831, + "step": 27860 + }, + { + "epoch": 168.29, + "learning_rate": 9.289666666666668e-05, + "loss": 0.7007, + "step": 27880 + }, + { + "epoch": 168.41, + "learning_rate": 9.296333333333333e-05, + "loss": 0.7333, + "step": 27900 + }, + { + "epoch": 168.53, + "learning_rate": 9.303000000000001e-05, + "loss": 0.7524, + "step": 27920 + }, + { + "epoch": 168.65, + "learning_rate": 9.309666666666667e-05, + "loss": 0.779, + "step": 27940 + }, + { + "epoch": 168.77, + "learning_rate": 9.316333333333334e-05, + "loss": 0.7919, + "step": 27960 + }, + { + "epoch": 168.89, + "learning_rate": 9.323e-05, + "loss": 0.8171, + "step": 27980 + }, + { + "epoch": 169.01, + "learning_rate": 9.329666666666668e-05, + "loss": 0.8174, + "step": 28000 + }, + { + "epoch": 169.01, + "eval_accuracy": 0.0012678922464462467, + "eval_loss": 12.56796646118164, + "eval_runtime": 18.1166, + "eval_samples_per_second": 112.604, + "eval_steps_per_second": 2.374, + "step": 28000 + }, + { + "epoch": 169.13, + "learning_rate": 9.336333333333334e-05, + "loss": 0.6442, + "step": 28020 + }, + { + "epoch": 169.26, + "learning_rate": 9.343e-05, + "loss": 0.6902, + "step": 28040 + }, + { + "epoch": 169.38, + "learning_rate": 9.349666666666667e-05, + "loss": 0.7125, + "step": 28060 + }, + { + "epoch": 169.5, + "learning_rate": 9.356333333333333e-05, + "loss": 0.735, + "step": 28080 + }, + { + "epoch": 169.62, + "learning_rate": 9.363000000000001e-05, + "loss": 0.7575, + "step": 28100 + }, + { + "epoch": 169.74, + "learning_rate": 9.369666666666666e-05, + "loss": 0.7694, + "step": 28120 + }, + { + "epoch": 169.86, + "learning_rate": 9.376333333333334e-05, + "loss": 0.799, + "step": 28140 + }, + { + "epoch": 169.98, + "learning_rate": 9.383e-05, + "loss": 0.8383, + "step": 28160 + }, + { + "epoch": 170.1, + "learning_rate": 9.389666666666666e-05, + "loss": 0.7015, + "step": 28180 + }, + { + "epoch": 170.22, + "learning_rate": 9.396333333333334e-05, + "loss": 0.6759, + "step": 28200 + }, + { + "epoch": 170.34, + "learning_rate": 9.403e-05, + "loss": 0.6986, + "step": 28220 + }, + { + "epoch": 170.46, + "learning_rate": 9.409666666666667e-05, + "loss": 0.7233, + "step": 28240 + }, + { + "epoch": 170.58, + "learning_rate": 9.416333333333333e-05, + "loss": 0.747, + "step": 28260 + }, + { + "epoch": 170.7, + "learning_rate": 9.423000000000001e-05, + "loss": 0.7691, + "step": 28280 + }, + { + "epoch": 170.82, + "learning_rate": 9.429666666666666e-05, + "loss": 0.7904, + "step": 28300 + }, + { + "epoch": 170.95, + "learning_rate": 9.436333333333334e-05, + "loss": 0.8325, + "step": 28320 + }, + { + "epoch": 171.07, + "learning_rate": 9.443e-05, + "loss": 0.7161, + "step": 28340 + }, + { + "epoch": 171.19, + "learning_rate": 9.449666666666667e-05, + "loss": 0.6488, + "step": 28360 + }, + { + "epoch": 171.31, + "learning_rate": 9.456333333333334e-05, + "loss": 0.6754, + "step": 28380 + }, + { + "epoch": 171.43, + "learning_rate": 9.463000000000001e-05, + "loss": 0.6955, + "step": 28400 + }, + { + "epoch": 171.55, + "learning_rate": 9.469666666666667e-05, + "loss": 0.7172, + "step": 28420 + }, + { + "epoch": 171.67, + "learning_rate": 9.476333333333333e-05, + "loss": 0.7461, + "step": 28440 + }, + { + "epoch": 171.79, + "learning_rate": 9.483000000000001e-05, + "loss": 0.7593, + "step": 28460 + }, + { + "epoch": 171.91, + "learning_rate": 9.489666666666666e-05, + "loss": 0.7793, + "step": 28480 + }, + { + "epoch": 172.03, + "learning_rate": 9.496333333333334e-05, + "loss": 0.75, + "step": 28500 + }, + { + "epoch": 172.15, + "learning_rate": 9.503e-05, + "loss": 0.6312, + "step": 28520 + }, + { + "epoch": 172.27, + "learning_rate": 9.509666666666667e-05, + "loss": 0.645, + "step": 28540 + }, + { + "epoch": 172.39, + "learning_rate": 9.516333333333334e-05, + "loss": 0.6736, + "step": 28560 + }, + { + "epoch": 172.52, + "learning_rate": 9.523000000000001e-05, + "loss": 0.6944, + "step": 28580 + }, + { + "epoch": 172.64, + "learning_rate": 9.529666666666667e-05, + "loss": 0.7285, + "step": 28600 + }, + { + "epoch": 172.76, + "learning_rate": 9.536333333333334e-05, + "loss": 0.7417, + "step": 28620 + }, + { + "epoch": 172.88, + "learning_rate": 9.543000000000001e-05, + "loss": 0.7498, + "step": 28640 + }, + { + "epoch": 173.0, + "learning_rate": 9.549666666666666e-05, + "loss": 0.7812, + "step": 28660 + }, + { + "epoch": 173.12, + "learning_rate": 9.556333333333334e-05, + "loss": 0.5998, + "step": 28680 + }, + { + "epoch": 173.24, + "learning_rate": 9.563e-05, + "loss": 0.6117, + "step": 28700 + }, + { + "epoch": 173.36, + "learning_rate": 9.569666666666667e-05, + "loss": 0.6423, + "step": 28720 + }, + { + "epoch": 173.48, + "learning_rate": 9.576333333333333e-05, + "loss": 0.679, + "step": 28740 + }, + { + "epoch": 173.6, + "learning_rate": 9.583000000000001e-05, + "loss": 0.7041, + "step": 28760 + }, + { + "epoch": 173.72, + "learning_rate": 9.589666666666667e-05, + "loss": 0.7316, + "step": 28780 + }, + { + "epoch": 173.84, + "learning_rate": 9.596333333333334e-05, + "loss": 0.7618, + "step": 28800 + }, + { + "epoch": 173.96, + "learning_rate": 9.603000000000001e-05, + "loss": 0.7617, + "step": 28820 + }, + { + "epoch": 174.08, + "learning_rate": 9.609666666666666e-05, + "loss": 0.6526, + "step": 28840 + }, + { + "epoch": 174.21, + "learning_rate": 9.616333333333334e-05, + "loss": 0.6295, + "step": 28860 + }, + { + "epoch": 174.33, + "learning_rate": 9.623e-05, + "loss": 0.6592, + "step": 28880 + }, + { + "epoch": 174.45, + "learning_rate": 9.629666666666667e-05, + "loss": 0.6655, + "step": 28900 + }, + { + "epoch": 174.57, + "learning_rate": 9.636333333333333e-05, + "loss": 0.6944, + "step": 28920 + }, + { + "epoch": 174.69, + "learning_rate": 9.643000000000001e-05, + "loss": 0.7155, + "step": 28940 + }, + { + "epoch": 174.81, + "learning_rate": 9.649666666666667e-05, + "loss": 0.7335, + "step": 28960 + }, + { + "epoch": 174.93, + "learning_rate": 9.656333333333334e-05, + "loss": 0.7532, + "step": 28980 + }, + { + "epoch": 175.05, + "learning_rate": 9.663000000000002e-05, + "loss": 0.6862, + "step": 29000 + }, + { + "epoch": 175.05, + "eval_accuracy": 0.0012698679258329958, + "eval_loss": 13.00501537322998, + "eval_runtime": 18.2714, + "eval_samples_per_second": 111.65, + "eval_steps_per_second": 2.353, + "step": 29000 + }, + { + "epoch": 175.17, + "learning_rate": 9.669666666666667e-05, + "loss": 0.6244, + "step": 29020 + }, + { + "epoch": 175.29, + "learning_rate": 9.676333333333334e-05, + "loss": 0.6274, + "step": 29040 + }, + { + "epoch": 175.41, + "learning_rate": 9.683e-05, + "loss": 0.6504, + "step": 29060 + }, + { + "epoch": 175.53, + "learning_rate": 9.689666666666667e-05, + "loss": 0.6514, + "step": 29080 + }, + { + "epoch": 175.65, + "learning_rate": 9.696333333333333e-05, + "loss": 0.6818, + "step": 29100 + }, + { + "epoch": 175.77, + "learning_rate": 9.703000000000001e-05, + "loss": 0.7134, + "step": 29120 + }, + { + "epoch": 175.9, + "learning_rate": 9.709666666666667e-05, + "loss": 0.7197, + "step": 29140 + }, + { + "epoch": 176.02, + "learning_rate": 9.716333333333334e-05, + "loss": 0.7298, + "step": 29160 + }, + { + "epoch": 176.14, + "learning_rate": 9.723000000000002e-05, + "loss": 0.5845, + "step": 29180 + }, + { + "epoch": 176.26, + "learning_rate": 9.729666666666667e-05, + "loss": 0.6012, + "step": 29200 + }, + { + "epoch": 176.38, + "learning_rate": 9.736333333333334e-05, + "loss": 0.6107, + "step": 29220 + }, + { + "epoch": 176.5, + "learning_rate": 9.743000000000001e-05, + "loss": 0.6382, + "step": 29240 + }, + { + "epoch": 176.62, + "learning_rate": 9.749666666666667e-05, + "loss": 0.6693, + "step": 29260 + }, + { + "epoch": 176.74, + "learning_rate": 9.756333333333333e-05, + "loss": 0.6891, + "step": 29280 + }, + { + "epoch": 176.86, + "learning_rate": 9.763e-05, + "loss": 0.7019, + "step": 29300 + }, + { + "epoch": 176.98, + "learning_rate": 9.769666666666668e-05, + "loss": 0.7324, + "step": 29320 + }, + { + "epoch": 177.1, + "learning_rate": 9.776333333333334e-05, + "loss": 0.5831, + "step": 29340 + }, + { + "epoch": 177.22, + "learning_rate": 9.783e-05, + "loss": 0.5856, + "step": 29360 + }, + { + "epoch": 177.34, + "learning_rate": 9.789666666666667e-05, + "loss": 0.6299, + "step": 29380 + }, + { + "epoch": 177.46, + "learning_rate": 9.796333333333334e-05, + "loss": 0.6551, + "step": 29400 + }, + { + "epoch": 177.59, + "learning_rate": 9.803e-05, + "loss": 0.6866, + "step": 29420 + }, + { + "epoch": 177.71, + "learning_rate": 9.809666666666667e-05, + "loss": 0.6927, + "step": 29440 + }, + { + "epoch": 177.83, + "learning_rate": 9.816333333333334e-05, + "loss": 0.7127, + "step": 29460 + }, + { + "epoch": 177.95, + "learning_rate": 9.823e-05, + "loss": 0.729, + "step": 29480 + }, + { + "epoch": 178.07, + "learning_rate": 9.829666666666666e-05, + "loss": 0.6424, + "step": 29500 + }, + { + "epoch": 178.19, + "learning_rate": 9.836333333333334e-05, + "loss": 0.587, + "step": 29520 + }, + { + "epoch": 178.31, + "learning_rate": 9.843e-05, + "loss": 0.6095, + "step": 29540 + }, + { + "epoch": 178.43, + "learning_rate": 9.849666666666667e-05, + "loss": 0.6249, + "step": 29560 + }, + { + "epoch": 178.55, + "learning_rate": 9.856333333333335e-05, + "loss": 0.6561, + "step": 29580 + }, + { + "epoch": 178.67, + "learning_rate": 9.863e-05, + "loss": 0.6764, + "step": 29600 + }, + { + "epoch": 178.79, + "learning_rate": 9.869666666666667e-05, + "loss": 0.6963, + "step": 29620 + }, + { + "epoch": 178.91, + "learning_rate": 9.876333333333334e-05, + "loss": 0.7177, + "step": 29640 + }, + { + "epoch": 179.03, + "learning_rate": 9.883e-05, + "loss": 0.6766, + "step": 29660 + }, + { + "epoch": 179.15, + "learning_rate": 9.889333333333334e-05, + "loss": 0.5581, + "step": 29680 + }, + { + "epoch": 179.28, + "learning_rate": 9.896000000000001e-05, + "loss": 0.5755, + "step": 29700 + }, + { + "epoch": 179.4, + "learning_rate": 9.902666666666666e-05, + "loss": 0.6193, + "step": 29720 + }, + { + "epoch": 179.52, + "learning_rate": 9.909333333333334e-05, + "loss": 0.6339, + "step": 29740 + }, + { + "epoch": 179.64, + "learning_rate": 9.916e-05, + "loss": 0.6576, + "step": 29760 + }, + { + "epoch": 179.76, + "learning_rate": 9.922666666666667e-05, + "loss": 0.666, + "step": 29780 + }, + { + "epoch": 179.88, + "learning_rate": 9.929333333333333e-05, + "loss": 0.6945, + "step": 29800 + }, + { + "epoch": 180.0, + "learning_rate": 9.936000000000001e-05, + "loss": 0.7119, + "step": 29820 + }, + { + "epoch": 180.12, + "learning_rate": 9.942666666666667e-05, + "loss": 0.5469, + "step": 29840 + }, + { + "epoch": 180.24, + "learning_rate": 9.949333333333334e-05, + "loss": 0.5791, + "step": 29860 + }, + { + "epoch": 180.36, + "learning_rate": 9.956e-05, + "loss": 0.6019, + "step": 29880 + }, + { + "epoch": 180.48, + "learning_rate": 9.962666666666667e-05, + "loss": 0.6169, + "step": 29900 + }, + { + "epoch": 180.6, + "learning_rate": 9.969333333333334e-05, + "loss": 0.6365, + "step": 29920 + }, + { + "epoch": 180.72, + "learning_rate": 9.976000000000001e-05, + "loss": 0.6589, + "step": 29940 + }, + { + "epoch": 180.85, + "learning_rate": 9.982666666666667e-05, + "loss": 0.699, + "step": 29960 + }, + { + "epoch": 180.97, + "learning_rate": 9.989333333333333e-05, + "loss": 0.7137, + "step": 29980 + }, + { + "epoch": 181.09, + "learning_rate": 9.996000000000001e-05, + "loss": 0.5738, + "step": 30000 + }, + { + "epoch": 181.09, + "eval_accuracy": 0.0012624591281326866, + "eval_loss": 13.469202041625977, + "eval_runtime": 18.2911, + "eval_samples_per_second": 111.529, + "eval_steps_per_second": 2.351, + "step": 30000 + }, + { + "epoch": 181.21, + "learning_rate": 9.999999677727208e-05, + "loss": 0.5442, + "step": 30020 + }, + { + "epoch": 181.33, + "learning_rate": 9.99999605215876e-05, + "loss": 0.5689, + "step": 30040 + }, + { + "epoch": 181.45, + "learning_rate": 9.999988398183804e-05, + "loss": 0.6313, + "step": 30060 + }, + { + "epoch": 181.57, + "learning_rate": 9.999976715808505e-05, + "loss": 0.6337, + "step": 30080 + }, + { + "epoch": 181.69, + "learning_rate": 9.999961005042278e-05, + "loss": 0.6481, + "step": 30100 + }, + { + "epoch": 181.81, + "learning_rate": 9.999941265897779e-05, + "loss": 0.6627, + "step": 30120 + }, + { + "epoch": 181.93, + "learning_rate": 9.999917498390912e-05, + "loss": 0.6641, + "step": 30140 + }, + { + "epoch": 182.05, + "learning_rate": 9.999889702540826e-05, + "loss": 0.6085, + "step": 30160 + }, + { + "epoch": 182.17, + "learning_rate": 9.999857878369916e-05, + "loss": 0.5334, + "step": 30180 + }, + { + "epoch": 182.29, + "learning_rate": 9.99982202590382e-05, + "loss": 0.5593, + "step": 30200 + }, + { + "epoch": 182.41, + "learning_rate": 9.999782145171425e-05, + "loss": 0.5836, + "step": 30220 + }, + { + "epoch": 182.54, + "learning_rate": 9.999738236204865e-05, + "loss": 0.5933, + "step": 30240 + }, + { + "epoch": 182.66, + "learning_rate": 9.999690299039513e-05, + "loss": 0.6157, + "step": 30260 + }, + { + "epoch": 182.78, + "learning_rate": 9.999638333713993e-05, + "loss": 0.648, + "step": 30280 + }, + { + "epoch": 182.9, + "learning_rate": 9.99958234027017e-05, + "loss": 0.6773, + "step": 30300 + }, + { + "epoch": 183.02, + "learning_rate": 9.99952231875316e-05, + "loss": 0.6561, + "step": 30320 + }, + { + "epoch": 183.14, + "learning_rate": 9.999458269211321e-05, + "loss": 0.5231, + "step": 30340 + }, + { + "epoch": 183.26, + "learning_rate": 9.999390191696255e-05, + "loss": 0.5543, + "step": 30360 + }, + { + "epoch": 183.38, + "learning_rate": 9.99931808626281e-05, + "loss": 0.5525, + "step": 30380 + }, + { + "epoch": 183.5, + "learning_rate": 9.999241952969083e-05, + "loss": 0.5855, + "step": 30400 + }, + { + "epoch": 183.62, + "learning_rate": 9.99916179187641e-05, + "loss": 0.6019, + "step": 30420 + }, + { + "epoch": 183.74, + "learning_rate": 9.999077603049378e-05, + "loss": 0.6313, + "step": 30440 + }, + { + "epoch": 183.86, + "learning_rate": 9.998989386555814e-05, + "loss": 0.6601, + "step": 30460 + }, + { + "epoch": 183.98, + "learning_rate": 9.998897142466797e-05, + "loss": 0.6642, + "step": 30480 + }, + { + "epoch": 184.1, + "learning_rate": 9.99880087085664e-05, + "loss": 0.5154, + "step": 30500 + }, + { + "epoch": 184.23, + "learning_rate": 9.998700571802912e-05, + "loss": 0.5116, + "step": 30520 + }, + { + "epoch": 184.35, + "learning_rate": 9.998596245386422e-05, + "loss": 0.5457, + "step": 30540 + }, + { + "epoch": 184.47, + "learning_rate": 9.998487891691219e-05, + "loss": 0.5773, + "step": 30560 + }, + { + "epoch": 184.59, + "learning_rate": 9.998375510804609e-05, + "loss": 0.5974, + "step": 30580 + }, + { + "epoch": 184.71, + "learning_rate": 9.998259102817129e-05, + "loss": 0.6244, + "step": 30600 + }, + { + "epoch": 184.83, + "learning_rate": 9.99813866782257e-05, + "loss": 0.6283, + "step": 30620 + }, + { + "epoch": 184.95, + "learning_rate": 9.998014205917963e-05, + "loss": 0.6439, + "step": 30640 + }, + { + "epoch": 185.07, + "learning_rate": 9.997885717203586e-05, + "loss": 0.5588, + "step": 30660 + }, + { + "epoch": 185.19, + "learning_rate": 9.997753201782961e-05, + "loss": 0.5076, + "step": 30680 + }, + { + "epoch": 185.31, + "learning_rate": 9.997616659762848e-05, + "loss": 0.5282, + "step": 30700 + }, + { + "epoch": 185.43, + "learning_rate": 9.997476091253261e-05, + "loss": 0.5626, + "step": 30720 + }, + { + "epoch": 185.55, + "learning_rate": 9.997331496367455e-05, + "loss": 0.5749, + "step": 30740 + }, + { + "epoch": 185.67, + "learning_rate": 9.997182875221922e-05, + "loss": 0.5809, + "step": 30760 + }, + { + "epoch": 185.79, + "learning_rate": 9.997030227936408e-05, + "loss": 0.6128, + "step": 30780 + }, + { + "epoch": 185.92, + "learning_rate": 9.996873554633897e-05, + "loss": 0.6223, + "step": 30800 + }, + { + "epoch": 186.04, + "learning_rate": 9.996712855440616e-05, + "loss": 0.5699, + "step": 30820 + }, + { + "epoch": 186.16, + "learning_rate": 9.99654813048604e-05, + "loss": 0.4744, + "step": 30840 + }, + { + "epoch": 186.28, + "learning_rate": 9.996379379902883e-05, + "loss": 0.5115, + "step": 30860 + }, + { + "epoch": 186.4, + "learning_rate": 9.996206603827105e-05, + "loss": 0.5336, + "step": 30880 + }, + { + "epoch": 186.52, + "learning_rate": 9.99602980239791e-05, + "loss": 0.5699, + "step": 30900 + }, + { + "epoch": 186.64, + "learning_rate": 9.99584897575774e-05, + "loss": 0.5892, + "step": 30920 + }, + { + "epoch": 186.76, + "learning_rate": 9.995664124052288e-05, + "loss": 0.5947, + "step": 30940 + }, + { + "epoch": 186.88, + "learning_rate": 9.995475247430483e-05, + "loss": 0.6197, + "step": 30960 + }, + { + "epoch": 187.0, + "learning_rate": 9.995282346044501e-05, + "loss": 0.6107, + "step": 30980 + }, + { + "epoch": 187.12, + "learning_rate": 9.995085420049757e-05, + "loss": 0.4524, + "step": 31000 + }, + { + "epoch": 187.12, + "eval_accuracy": 0.0012782645632266796, + "eval_loss": 13.922017097473145, + "eval_runtime": 18.1472, + "eval_samples_per_second": 112.414, + "eval_steps_per_second": 2.37, + "step": 31000 + }, + { + "epoch": 187.24, + "learning_rate": 9.994884469604912e-05, + "loss": 0.488, + "step": 31020 + }, + { + "epoch": 187.36, + "learning_rate": 9.99467949487187e-05, + "loss": 0.5118, + "step": 31040 + }, + { + "epoch": 187.48, + "learning_rate": 9.994470496015773e-05, + "loss": 0.527, + "step": 31060 + }, + { + "epoch": 187.61, + "learning_rate": 9.994257473205009e-05, + "loss": 0.5565, + "step": 31080 + }, + { + "epoch": 187.73, + "learning_rate": 9.994040426611204e-05, + "loss": 0.5922, + "step": 31100 + }, + { + "epoch": 187.85, + "learning_rate": 9.993819356409233e-05, + "loss": 0.6074, + "step": 31120 + }, + { + "epoch": 187.97, + "learning_rate": 9.993594262777203e-05, + "loss": 0.6096, + "step": 31140 + }, + { + "epoch": 188.09, + "learning_rate": 9.993365145896473e-05, + "loss": 0.4983, + "step": 31160 + }, + { + "epoch": 188.21, + "learning_rate": 9.993132005951635e-05, + "loss": 0.4859, + "step": 31180 + }, + { + "epoch": 188.33, + "learning_rate": 9.992894843130526e-05, + "loss": 0.5026, + "step": 31200 + }, + { + "epoch": 188.45, + "learning_rate": 9.992653657624225e-05, + "loss": 0.5297, + "step": 31220 + }, + { + "epoch": 188.57, + "learning_rate": 9.992408449627051e-05, + "loss": 0.5384, + "step": 31240 + }, + { + "epoch": 188.69, + "learning_rate": 9.992159219336561e-05, + "loss": 0.5536, + "step": 31260 + }, + { + "epoch": 188.81, + "learning_rate": 9.991905966953559e-05, + "loss": 0.5885, + "step": 31280 + }, + { + "epoch": 188.93, + "learning_rate": 9.991648692682083e-05, + "loss": 0.612, + "step": 31300 + }, + { + "epoch": 189.05, + "learning_rate": 9.991387396729415e-05, + "loss": 0.5315, + "step": 31320 + }, + { + "epoch": 189.18, + "learning_rate": 9.99112207930608e-05, + "loss": 0.4821, + "step": 31340 + }, + { + "epoch": 189.3, + "learning_rate": 9.990852740625831e-05, + "loss": 0.5144, + "step": 31360 + }, + { + "epoch": 189.42, + "learning_rate": 9.990579380905678e-05, + "loss": 0.5196, + "step": 31380 + }, + { + "epoch": 189.54, + "learning_rate": 9.990302000365858e-05, + "loss": 0.5276, + "step": 31400 + }, + { + "epoch": 189.66, + "learning_rate": 9.99002059922985e-05, + "loss": 0.5463, + "step": 31420 + }, + { + "epoch": 189.78, + "learning_rate": 9.989735177724378e-05, + "loss": 0.5747, + "step": 31440 + }, + { + "epoch": 189.9, + "learning_rate": 9.989445736079397e-05, + "loss": 0.5867, + "step": 31460 + }, + { + "epoch": 190.02, + "learning_rate": 9.989152274528109e-05, + "loss": 0.5554, + "step": 31480 + }, + { + "epoch": 190.14, + "learning_rate": 9.988854793306947e-05, + "loss": 0.4461, + "step": 31500 + }, + { + "epoch": 190.26, + "learning_rate": 9.988553292655587e-05, + "loss": 0.4767, + "step": 31520 + }, + { + "epoch": 190.38, + "learning_rate": 9.988247772816945e-05, + "loss": 0.4925, + "step": 31540 + }, + { + "epoch": 190.5, + "learning_rate": 9.987938234037168e-05, + "loss": 0.5114, + "step": 31560 + }, + { + "epoch": 190.62, + "learning_rate": 9.987624676565652e-05, + "loss": 0.5341, + "step": 31580 + }, + { + "epoch": 190.74, + "learning_rate": 9.987307100655019e-05, + "loss": 0.5667, + "step": 31600 + }, + { + "epoch": 190.87, + "learning_rate": 9.986985506561139e-05, + "loss": 0.5664, + "step": 31620 + }, + { + "epoch": 190.99, + "learning_rate": 9.98665989454311e-05, + "loss": 0.5933, + "step": 31640 + }, + { + "epoch": 191.11, + "learning_rate": 9.986330264863275e-05, + "loss": 0.449, + "step": 31660 + }, + { + "epoch": 191.23, + "learning_rate": 9.985996617787208e-05, + "loss": 0.4608, + "step": 31680 + }, + { + "epoch": 191.35, + "learning_rate": 9.985658953583725e-05, + "loss": 0.4939, + "step": 31700 + }, + { + "epoch": 191.47, + "learning_rate": 9.985334451973896e-05, + "loss": 0.5193, + "step": 31720 + }, + { + "epoch": 191.59, + "learning_rate": 9.984988955157381e-05, + "loss": 0.5285, + "step": 31740 + }, + { + "epoch": 191.71, + "learning_rate": 9.984639442025306e-05, + "loss": 0.5442, + "step": 31760 + }, + { + "epoch": 191.83, + "learning_rate": 9.984285912859263e-05, + "loss": 0.5525, + "step": 31780 + }, + { + "epoch": 191.95, + "learning_rate": 9.98392836794409e-05, + "loss": 0.5815, + "step": 31800 + }, + { + "epoch": 192.07, + "learning_rate": 9.98356680756785e-05, + "loss": 0.4852, + "step": 31820 + }, + { + "epoch": 192.19, + "learning_rate": 9.983201232021847e-05, + "loss": 0.4503, + "step": 31840 + }, + { + "epoch": 192.31, + "learning_rate": 9.982831641600618e-05, + "loss": 0.4759, + "step": 31860 + }, + { + "epoch": 192.43, + "learning_rate": 9.982458036601939e-05, + "loss": 0.4892, + "step": 31880 + }, + { + "epoch": 192.56, + "learning_rate": 9.982080417326811e-05, + "loss": 0.5049, + "step": 31900 + }, + { + "epoch": 192.68, + "learning_rate": 9.98169878407948e-05, + "loss": 0.5061, + "step": 31920 + }, + { + "epoch": 192.8, + "learning_rate": 9.981313137167419e-05, + "loss": 0.5286, + "step": 31940 + }, + { + "epoch": 192.92, + "learning_rate": 9.980923476901333e-05, + "loss": 0.5424, + "step": 31960 + }, + { + "epoch": 193.04, + "learning_rate": 9.980529803595172e-05, + "loss": 0.5141, + "step": 31980 + }, + { + "epoch": 193.16, + "learning_rate": 9.980132117566106e-05, + "loss": 0.4252, + "step": 32000 + }, + { + "epoch": 193.16, + "eval_accuracy": 0.0012767828036866177, + "eval_loss": 14.33401107788086, + "eval_runtime": 18.147, + "eval_samples_per_second": 112.415, + "eval_steps_per_second": 2.37, + "step": 32000 + }, + { + "epoch": 193.28, + "learning_rate": 9.979730419134546e-05, + "loss": 0.4494, + "step": 32020 + }, + { + "epoch": 193.4, + "learning_rate": 9.979324708624129e-05, + "loss": 0.4821, + "step": 32040 + }, + { + "epoch": 193.52, + "learning_rate": 9.978914986361734e-05, + "loss": 0.5057, + "step": 32060 + }, + { + "epoch": 193.64, + "learning_rate": 9.978501252677464e-05, + "loss": 0.5145, + "step": 32080 + }, + { + "epoch": 193.76, + "learning_rate": 9.978083507904659e-05, + "loss": 0.5297, + "step": 32100 + }, + { + "epoch": 193.88, + "learning_rate": 9.977661752379883e-05, + "loss": 0.5479, + "step": 32120 + }, + { + "epoch": 194.0, + "learning_rate": 9.977235986442943e-05, + "loss": 0.5598, + "step": 32140 + }, + { + "epoch": 194.12, + "learning_rate": 9.976806210436868e-05, + "loss": 0.3987, + "step": 32160 + }, + { + "epoch": 194.25, + "learning_rate": 9.97637242470792e-05, + "loss": 0.4324, + "step": 32180 + }, + { + "epoch": 194.37, + "learning_rate": 9.975934629605595e-05, + "loss": 0.4709, + "step": 32200 + }, + { + "epoch": 194.49, + "learning_rate": 9.975492825482615e-05, + "loss": 0.4809, + "step": 32220 + }, + { + "epoch": 194.61, + "learning_rate": 9.975047012694933e-05, + "loss": 0.4829, + "step": 32240 + }, + { + "epoch": 194.73, + "learning_rate": 9.974597191601733e-05, + "loss": 0.5059, + "step": 32260 + }, + { + "epoch": 194.85, + "learning_rate": 9.97414336256543e-05, + "loss": 0.5159, + "step": 32280 + }, + { + "epoch": 194.97, + "learning_rate": 9.97368552595166e-05, + "loss": 0.534, + "step": 32300 + }, + { + "epoch": 195.09, + "learning_rate": 9.9732236821293e-05, + "loss": 0.4287, + "step": 32320 + }, + { + "epoch": 195.21, + "learning_rate": 9.972757831470445e-05, + "loss": 0.4337, + "step": 32340 + }, + { + "epoch": 195.33, + "learning_rate": 9.972287974350425e-05, + "loss": 0.4485, + "step": 32360 + }, + { + "epoch": 195.45, + "learning_rate": 9.971814111147793e-05, + "loss": 0.4649, + "step": 32380 + }, + { + "epoch": 195.57, + "learning_rate": 9.971336242244333e-05, + "loss": 0.4701, + "step": 32400 + }, + { + "epoch": 195.69, + "learning_rate": 9.970854368025057e-05, + "loss": 0.4826, + "step": 32420 + }, + { + "epoch": 195.81, + "learning_rate": 9.970368488878202e-05, + "loss": 0.4994, + "step": 32440 + }, + { + "epoch": 195.94, + "learning_rate": 9.969878605195231e-05, + "loss": 0.5269, + "step": 32460 + }, + { + "epoch": 196.06, + "learning_rate": 9.969384717370834e-05, + "loss": 0.4673, + "step": 32480 + }, + { + "epoch": 196.18, + "learning_rate": 9.968886825802926e-05, + "loss": 0.3981, + "step": 32500 + }, + { + "epoch": 196.3, + "learning_rate": 9.968384930892653e-05, + "loss": 0.4332, + "step": 32520 + }, + { + "epoch": 196.42, + "learning_rate": 9.967879033044382e-05, + "loss": 0.4525, + "step": 32540 + }, + { + "epoch": 196.54, + "learning_rate": 9.967369132665703e-05, + "loss": 0.4606, + "step": 32560 + }, + { + "epoch": 196.66, + "learning_rate": 9.966855230167436e-05, + "loss": 0.4709, + "step": 32580 + }, + { + "epoch": 196.78, + "learning_rate": 9.966337325963624e-05, + "loss": 0.4942, + "step": 32600 + }, + { + "epoch": 196.9, + "learning_rate": 9.96581542047153e-05, + "loss": 0.5128, + "step": 32620 + }, + { + "epoch": 197.02, + "learning_rate": 9.965289514111644e-05, + "loss": 0.4951, + "step": 32640 + }, + { + "epoch": 197.14, + "learning_rate": 9.96475960730768e-05, + "loss": 0.4019, + "step": 32660 + }, + { + "epoch": 197.26, + "learning_rate": 9.964225700486576e-05, + "loss": 0.4134, + "step": 32680 + }, + { + "epoch": 197.38, + "learning_rate": 9.96368779407849e-05, + "loss": 0.4361, + "step": 32700 + }, + { + "epoch": 197.51, + "learning_rate": 9.963145888516803e-05, + "loss": 0.4558, + "step": 32720 + }, + { + "epoch": 197.63, + "learning_rate": 9.96259998423812e-05, + "loss": 0.47, + "step": 32740 + }, + { + "epoch": 197.75, + "learning_rate": 9.962050081682261e-05, + "loss": 0.4927, + "step": 32760 + }, + { + "epoch": 197.87, + "learning_rate": 9.96149618129228e-05, + "loss": 0.5016, + "step": 32780 + }, + { + "epoch": 197.99, + "learning_rate": 9.96093828351444e-05, + "loss": 0.5179, + "step": 32800 + }, + { + "epoch": 198.11, + "learning_rate": 9.960376388798232e-05, + "loss": 0.4012, + "step": 32820 + }, + { + "epoch": 198.23, + "learning_rate": 9.959810497596361e-05, + "loss": 0.4098, + "step": 32840 + }, + { + "epoch": 198.35, + "learning_rate": 9.959240610364757e-05, + "loss": 0.4298, + "step": 32860 + }, + { + "epoch": 198.47, + "learning_rate": 9.958666727562568e-05, + "loss": 0.4416, + "step": 32880 + }, + { + "epoch": 198.59, + "learning_rate": 9.95808884965216e-05, + "loss": 0.4643, + "step": 32900 + }, + { + "epoch": 198.71, + "learning_rate": 9.957506977099123e-05, + "loss": 0.4748, + "step": 32920 + }, + { + "epoch": 198.83, + "learning_rate": 9.956921110372255e-05, + "loss": 0.4831, + "step": 32940 + }, + { + "epoch": 198.95, + "learning_rate": 9.956331249943583e-05, + "loss": 0.502, + "step": 32960 + }, + { + "epoch": 199.07, + "learning_rate": 9.955737396288343e-05, + "loss": 0.417, + "step": 32980 + }, + { + "epoch": 199.2, + "learning_rate": 9.955139549884995e-05, + "loss": 0.3952, + "step": 33000 + }, + { + "epoch": 199.2, + "eval_accuracy": 0.001259495609052563, + "eval_loss": 14.796114921569824, + "eval_runtime": 18.1968, + "eval_samples_per_second": 112.108, + "eval_steps_per_second": 2.363, + "step": 33000 + }, + { + "epoch": 199.32, + "learning_rate": 9.954537711215212e-05, + "loss": 0.4078, + "step": 33020 + }, + { + "epoch": 199.44, + "learning_rate": 9.953931880763885e-05, + "loss": 0.4446, + "step": 33040 + }, + { + "epoch": 199.56, + "learning_rate": 9.953322059019118e-05, + "loss": 0.4559, + "step": 33060 + }, + { + "epoch": 199.68, + "learning_rate": 9.95270824647224e-05, + "loss": 0.4593, + "step": 33080 + }, + { + "epoch": 199.8, + "learning_rate": 9.952090443617783e-05, + "loss": 0.4724, + "step": 33100 + }, + { + "epoch": 199.92, + "learning_rate": 9.951468650953499e-05, + "loss": 0.4951, + "step": 33120 + }, + { + "epoch": 200.04, + "learning_rate": 9.95084286898036e-05, + "loss": 0.4568, + "step": 33140 + }, + { + "epoch": 200.16, + "learning_rate": 9.950213098202543e-05, + "loss": 0.3755, + "step": 33160 + }, + { + "epoch": 200.28, + "learning_rate": 9.949579339127444e-05, + "loss": 0.4004, + "step": 33180 + }, + { + "epoch": 200.4, + "learning_rate": 9.948941592265673e-05, + "loss": 0.4046, + "step": 33200 + }, + { + "epoch": 200.52, + "learning_rate": 9.948299858131049e-05, + "loss": 0.4194, + "step": 33220 + }, + { + "epoch": 200.64, + "learning_rate": 9.947654137240606e-05, + "loss": 0.4467, + "step": 33240 + }, + { + "epoch": 200.76, + "learning_rate": 9.94700443011459e-05, + "loss": 0.4544, + "step": 33260 + }, + { + "epoch": 200.89, + "learning_rate": 9.946350737276459e-05, + "loss": 0.476, + "step": 33280 + }, + { + "epoch": 201.01, + "learning_rate": 9.945693059252881e-05, + "loss": 0.4785, + "step": 33300 + }, + { + "epoch": 201.13, + "learning_rate": 9.945031396573734e-05, + "loss": 0.3558, + "step": 33320 + }, + { + "epoch": 201.25, + "learning_rate": 9.94436574977211e-05, + "loss": 0.3768, + "step": 33340 + }, + { + "epoch": 201.37, + "learning_rate": 9.943696119384305e-05, + "loss": 0.4028, + "step": 33360 + }, + { + "epoch": 201.49, + "learning_rate": 9.943022505949833e-05, + "loss": 0.4206, + "step": 33380 + }, + { + "epoch": 201.61, + "learning_rate": 9.942344910011407e-05, + "loss": 0.4348, + "step": 33400 + }, + { + "epoch": 201.73, + "learning_rate": 9.941663332114959e-05, + "loss": 0.456, + "step": 33420 + }, + { + "epoch": 201.85, + "learning_rate": 9.940977772809621e-05, + "loss": 0.4605, + "step": 33440 + }, + { + "epoch": 201.97, + "learning_rate": 9.940288232647736e-05, + "loss": 0.4742, + "step": 33460 + }, + { + "epoch": 202.09, + "learning_rate": 9.939594712184854e-05, + "loss": 0.3742, + "step": 33480 + }, + { + "epoch": 202.21, + "learning_rate": 9.938897211979732e-05, + "loss": 0.3726, + "step": 33500 + }, + { + "epoch": 202.33, + "learning_rate": 9.938195732594335e-05, + "loss": 0.4019, + "step": 33520 + }, + { + "epoch": 202.45, + "learning_rate": 9.937490274593828e-05, + "loss": 0.4086, + "step": 33540 + }, + { + "epoch": 202.58, + "learning_rate": 9.93678083854659e-05, + "loss": 0.4346, + "step": 33560 + }, + { + "epoch": 202.7, + "learning_rate": 9.936067425024201e-05, + "loss": 0.4373, + "step": 33580 + }, + { + "epoch": 202.82, + "learning_rate": 9.93535003460144e-05, + "loss": 0.453, + "step": 33600 + }, + { + "epoch": 202.94, + "learning_rate": 9.934628667856303e-05, + "loss": 0.4692, + "step": 33620 + }, + { + "epoch": 203.06, + "learning_rate": 9.933903325369977e-05, + "loss": 0.4171, + "step": 33640 + }, + { + "epoch": 203.18, + "learning_rate": 9.933174007726858e-05, + "loss": 0.3703, + "step": 33660 + }, + { + "epoch": 203.3, + "learning_rate": 9.932440715514545e-05, + "loss": 0.3899, + "step": 33680 + }, + { + "epoch": 203.42, + "learning_rate": 9.93170344932384e-05, + "loss": 0.4087, + "step": 33700 + }, + { + "epoch": 203.54, + "learning_rate": 9.930962209748741e-05, + "loss": 0.4244, + "step": 33720 + }, + { + "epoch": 203.66, + "learning_rate": 9.930254352349008e-05, + "loss": 0.4306, + "step": 33740 + }, + { + "epoch": 203.78, + "learning_rate": 9.929505366394967e-05, + "loss": 0.443, + "step": 33760 + }, + { + "epoch": 203.9, + "learning_rate": 9.92875240882749e-05, + "loss": 0.4556, + "step": 33780 + }, + { + "epoch": 204.02, + "learning_rate": 9.92799548025322e-05, + "loss": 0.4405, + "step": 33800 + }, + { + "epoch": 204.14, + "learning_rate": 9.927234581282004e-05, + "loss": 0.3568, + "step": 33820 + }, + { + "epoch": 204.27, + "learning_rate": 9.92646971252688e-05, + "loss": 0.3736, + "step": 33840 + }, + { + "epoch": 204.39, + "learning_rate": 9.925700874604093e-05, + "loss": 0.3884, + "step": 33860 + }, + { + "epoch": 204.51, + "learning_rate": 9.92492806813308e-05, + "loss": 0.408, + "step": 33880 + }, + { + "epoch": 204.63, + "learning_rate": 9.924151293736476e-05, + "loss": 0.4256, + "step": 33900 + }, + { + "epoch": 204.75, + "learning_rate": 9.923370552040116e-05, + "loss": 0.4364, + "step": 33920 + }, + { + "epoch": 204.87, + "learning_rate": 9.922585843673031e-05, + "loss": 0.4657, + "step": 33940 + }, + { + "epoch": 204.99, + "learning_rate": 9.921797169267443e-05, + "loss": 0.4655, + "step": 33960 + }, + { + "epoch": 205.11, + "learning_rate": 9.921004529458772e-05, + "loss": 0.35, + "step": 33980 + }, + { + "epoch": 205.23, + "learning_rate": 9.920207924885638e-05, + "loss": 0.3684, + "step": 34000 + }, + { + "epoch": 205.23, + "eval_accuracy": 0.0012590016892058756, + "eval_loss": 15.24209976196289, + "eval_runtime": 18.177, + "eval_samples_per_second": 112.23, + "eval_steps_per_second": 2.366, + "step": 34000 + }, + { + "epoch": 205.35, + "learning_rate": 9.919407356189849e-05, + "loss": 0.3822, + "step": 34020 + }, + { + "epoch": 205.47, + "learning_rate": 9.918602824016408e-05, + "loss": 0.3956, + "step": 34040 + }, + { + "epoch": 205.59, + "learning_rate": 9.91779432901351e-05, + "loss": 0.4029, + "step": 34060 + }, + { + "epoch": 205.71, + "learning_rate": 9.916981871832549e-05, + "loss": 0.4162, + "step": 34080 + }, + { + "epoch": 205.84, + "learning_rate": 9.916165453128106e-05, + "loss": 0.4329, + "step": 34100 + }, + { + "epoch": 205.96, + "learning_rate": 9.915345073557953e-05, + "loss": 0.4465, + "step": 34120 + }, + { + "epoch": 206.08, + "learning_rate": 9.914520733783055e-05, + "loss": 0.3748, + "step": 34140 + }, + { + "epoch": 206.2, + "learning_rate": 9.913692434467569e-05, + "loss": 0.3555, + "step": 34160 + }, + { + "epoch": 206.32, + "learning_rate": 9.912860176278842e-05, + "loss": 0.3747, + "step": 34180 + }, + { + "epoch": 206.44, + "learning_rate": 9.912023959887408e-05, + "loss": 0.3954, + "step": 34200 + }, + { + "epoch": 206.56, + "learning_rate": 9.91118378596699e-05, + "loss": 0.403, + "step": 34220 + }, + { + "epoch": 206.68, + "learning_rate": 9.910339655194502e-05, + "loss": 0.4143, + "step": 34240 + }, + { + "epoch": 206.8, + "learning_rate": 9.909491568250047e-05, + "loss": 0.4167, + "step": 34260 + }, + { + "epoch": 206.92, + "learning_rate": 9.90863952581691e-05, + "loss": 0.434, + "step": 34280 + }, + { + "epoch": 207.04, + "learning_rate": 9.907783528581568e-05, + "loss": 0.4013, + "step": 34300 + }, + { + "epoch": 207.16, + "learning_rate": 9.906923577233682e-05, + "loss": 0.3298, + "step": 34320 + }, + { + "epoch": 207.28, + "learning_rate": 9.906059672466101e-05, + "loss": 0.3502, + "step": 34340 + }, + { + "epoch": 207.4, + "learning_rate": 9.905191814974854e-05, + "loss": 0.3611, + "step": 34360 + }, + { + "epoch": 207.53, + "learning_rate": 9.90432000545916e-05, + "loss": 0.3774, + "step": 34380 + }, + { + "epoch": 207.65, + "learning_rate": 9.903444244621423e-05, + "loss": 0.3982, + "step": 34400 + }, + { + "epoch": 207.77, + "learning_rate": 9.902564533167222e-05, + "loss": 0.4134, + "step": 34420 + }, + { + "epoch": 207.89, + "learning_rate": 9.901680871805329e-05, + "loss": 0.4233, + "step": 34440 + }, + { + "epoch": 208.01, + "learning_rate": 9.900793261247692e-05, + "loss": 0.4307, + "step": 34460 + }, + { + "epoch": 208.13, + "learning_rate": 9.899901702209445e-05, + "loss": 0.3294, + "step": 34480 + }, + { + "epoch": 208.25, + "learning_rate": 9.899006195408898e-05, + "loss": 0.3552, + "step": 34500 + }, + { + "epoch": 208.37, + "learning_rate": 9.898106741567545e-05, + "loss": 0.366, + "step": 34520 + }, + { + "epoch": 208.49, + "learning_rate": 9.897203341410062e-05, + "loss": 0.3707, + "step": 34540 + }, + { + "epoch": 208.61, + "learning_rate": 9.8962959956643e-05, + "loss": 0.402, + "step": 34560 + }, + { + "epoch": 208.73, + "learning_rate": 9.895384705061292e-05, + "loss": 0.3991, + "step": 34580 + }, + { + "epoch": 208.85, + "learning_rate": 9.894469470335251e-05, + "loss": 0.4095, + "step": 34600 + }, + { + "epoch": 208.97, + "learning_rate": 9.89355029222356e-05, + "loss": 0.4253, + "step": 34620 + }, + { + "epoch": 209.09, + "learning_rate": 9.892627171466786e-05, + "loss": 0.3422, + "step": 34640 + }, + { + "epoch": 209.22, + "learning_rate": 9.891700108808674e-05, + "loss": 0.3367, + "step": 34660 + }, + { + "epoch": 209.34, + "learning_rate": 9.890769104996137e-05, + "loss": 0.3543, + "step": 34680 + }, + { + "epoch": 209.46, + "learning_rate": 9.889834160779271e-05, + "loss": 0.378, + "step": 34700 + }, + { + "epoch": 209.58, + "learning_rate": 9.888895276911342e-05, + "loss": 0.384, + "step": 34720 + }, + { + "epoch": 209.7, + "learning_rate": 9.887952454148792e-05, + "loss": 0.4011, + "step": 34740 + }, + { + "epoch": 209.82, + "learning_rate": 9.88700569325124e-05, + "loss": 0.4053, + "step": 34760 + }, + { + "epoch": 209.94, + "learning_rate": 9.886054994981466e-05, + "loss": 0.4203, + "step": 34780 + }, + { + "epoch": 210.06, + "learning_rate": 9.88510036010544e-05, + "loss": 0.3665, + "step": 34800 + }, + { + "epoch": 210.18, + "learning_rate": 9.884141789392285e-05, + "loss": 0.3291, + "step": 34820 + }, + { + "epoch": 210.3, + "learning_rate": 9.883179283614311e-05, + "loss": 0.3487, + "step": 34840 + }, + { + "epoch": 210.42, + "learning_rate": 9.882212843546988e-05, + "loss": 0.3575, + "step": 34860 + }, + { + "epoch": 210.54, + "learning_rate": 9.881242469968962e-05, + "loss": 0.3729, + "step": 34880 + }, + { + "epoch": 210.66, + "learning_rate": 9.880268163662042e-05, + "loss": 0.3939, + "step": 34900 + }, + { + "epoch": 210.78, + "learning_rate": 9.879289925411212e-05, + "loss": 0.3978, + "step": 34920 + }, + { + "epoch": 210.91, + "learning_rate": 9.878307756004619e-05, + "loss": 0.4146, + "step": 34940 + }, + { + "epoch": 211.03, + "learning_rate": 9.87732165623358e-05, + "loss": 0.3925, + "step": 34960 + }, + { + "epoch": 211.15, + "learning_rate": 9.876331626892576e-05, + "loss": 0.3063, + "step": 34980 + }, + { + "epoch": 211.27, + "learning_rate": 9.875337668779259e-05, + "loss": 0.3338, + "step": 35000 + }, + { + "epoch": 211.27, + "eval_accuracy": 0.001261471288439312, + "eval_loss": 15.643324851989746, + "eval_runtime": 18.2037, + "eval_samples_per_second": 112.065, + "eval_steps_per_second": 2.362, + "step": 35000 + }, + { + "epoch": 211.39, + "learning_rate": 9.874339782694441e-05, + "loss": 0.3398, + "step": 35020 + }, + { + "epoch": 211.51, + "learning_rate": 9.873337969442101e-05, + "loss": 0.3385, + "step": 35040 + }, + { + "epoch": 211.63, + "learning_rate": 9.872332229829383e-05, + "loss": 0.3627, + "step": 35060 + }, + { + "epoch": 211.75, + "learning_rate": 9.871322564666592e-05, + "loss": 0.3795, + "step": 35080 + }, + { + "epoch": 211.87, + "learning_rate": 9.870308974767197e-05, + "loss": 0.3977, + "step": 35100 + }, + { + "epoch": 211.99, + "learning_rate": 9.869291460947829e-05, + "loss": 0.4096, + "step": 35120 + }, + { + "epoch": 212.11, + "learning_rate": 9.86827002402828e-05, + "loss": 0.3138, + "step": 35140 + }, + { + "epoch": 212.23, + "learning_rate": 9.867244664831507e-05, + "loss": 0.3286, + "step": 35160 + }, + { + "epoch": 212.35, + "learning_rate": 9.866215384183619e-05, + "loss": 0.3414, + "step": 35180 + }, + { + "epoch": 212.47, + "learning_rate": 9.86518218291389e-05, + "loss": 0.3603, + "step": 35200 + }, + { + "epoch": 212.6, + "learning_rate": 9.864145061854753e-05, + "loss": 0.3646, + "step": 35220 + }, + { + "epoch": 212.72, + "learning_rate": 9.863104021841796e-05, + "loss": 0.3821, + "step": 35240 + }, + { + "epoch": 212.84, + "learning_rate": 9.862059063713767e-05, + "loss": 0.3876, + "step": 35260 + }, + { + "epoch": 212.96, + "learning_rate": 9.861010188312568e-05, + "loss": 0.3981, + "step": 35280 + }, + { + "epoch": 213.08, + "learning_rate": 9.859957396483262e-05, + "loss": 0.3357, + "step": 35300 + }, + { + "epoch": 213.2, + "learning_rate": 9.858900689074064e-05, + "loss": 0.3274, + "step": 35320 + }, + { + "epoch": 213.32, + "learning_rate": 9.857840066936341e-05, + "loss": 0.3389, + "step": 35340 + }, + { + "epoch": 213.44, + "learning_rate": 9.856775530924621e-05, + "loss": 0.3445, + "step": 35360 + }, + { + "epoch": 213.56, + "learning_rate": 9.855707081896579e-05, + "loss": 0.3609, + "step": 35380 + }, + { + "epoch": 213.68, + "learning_rate": 9.854634720713047e-05, + "loss": 0.3758, + "step": 35400 + }, + { + "epoch": 213.8, + "learning_rate": 9.853558448238003e-05, + "loss": 0.3877, + "step": 35420 + }, + { + "epoch": 213.92, + "learning_rate": 9.852478265338585e-05, + "loss": 0.3981, + "step": 35440 + }, + { + "epoch": 214.04, + "learning_rate": 9.851394172885074e-05, + "loss": 0.3666, + "step": 35460 + }, + { + "epoch": 214.16, + "learning_rate": 9.850306171750905e-05, + "loss": 0.2927, + "step": 35480 + }, + { + "epoch": 214.29, + "learning_rate": 9.84921426281266e-05, + "loss": 0.3201, + "step": 35500 + }, + { + "epoch": 214.41, + "learning_rate": 9.848118446950072e-05, + "loss": 0.3414, + "step": 35520 + }, + { + "epoch": 214.53, + "learning_rate": 9.847018725046018e-05, + "loss": 0.3555, + "step": 35540 + }, + { + "epoch": 214.65, + "learning_rate": 9.845915097986526e-05, + "loss": 0.3696, + "step": 35560 + }, + { + "epoch": 214.77, + "learning_rate": 9.844807566660765e-05, + "loss": 0.3677, + "step": 35580 + }, + { + "epoch": 214.89, + "learning_rate": 9.843696131961058e-05, + "loss": 0.3831, + "step": 35600 + }, + { + "epoch": 215.01, + "learning_rate": 9.842580794782863e-05, + "loss": 0.3879, + "step": 35620 + }, + { + "epoch": 215.13, + "learning_rate": 9.841461556024789e-05, + "loss": 0.2884, + "step": 35640 + }, + { + "epoch": 215.25, + "learning_rate": 9.840338416588588e-05, + "loss": 0.3135, + "step": 35660 + }, + { + "epoch": 215.37, + "learning_rate": 9.839211377379151e-05, + "loss": 0.3296, + "step": 35680 + }, + { + "epoch": 215.49, + "learning_rate": 9.838080439304515e-05, + "loss": 0.3421, + "step": 35700 + }, + { + "epoch": 215.61, + "learning_rate": 9.836945603275853e-05, + "loss": 0.3571, + "step": 35720 + }, + { + "epoch": 215.73, + "learning_rate": 9.835806870207487e-05, + "loss": 0.3734, + "step": 35740 + }, + { + "epoch": 215.86, + "learning_rate": 9.834664241016869e-05, + "loss": 0.372, + "step": 35760 + }, + { + "epoch": 215.98, + "learning_rate": 9.833575135341002e-05, + "loss": 0.3917, + "step": 35780 + }, + { + "epoch": 216.1, + "learning_rate": 9.832424911362719e-05, + "loss": 0.3021, + "step": 35800 + }, + { + "epoch": 216.22, + "learning_rate": 9.831270793986968e-05, + "loss": 0.3098, + "step": 35820 + }, + { + "epoch": 216.34, + "learning_rate": 9.830112784143601e-05, + "loss": 0.3254, + "step": 35840 + }, + { + "epoch": 216.46, + "learning_rate": 9.828950882765608e-05, + "loss": 0.3275, + "step": 35860 + }, + { + "epoch": 216.58, + "learning_rate": 9.827785090789109e-05, + "loss": 0.3395, + "step": 35880 + }, + { + "epoch": 216.7, + "learning_rate": 9.826615409153362e-05, + "loss": 0.3466, + "step": 35900 + }, + { + "epoch": 216.82, + "learning_rate": 9.82544183880076e-05, + "loss": 0.361, + "step": 35920 + }, + { + "epoch": 216.94, + "learning_rate": 9.824264380676826e-05, + "loss": 0.3701, + "step": 35940 + }, + { + "epoch": 217.06, + "learning_rate": 9.823083035730218e-05, + "loss": 0.329, + "step": 35960 + }, + { + "epoch": 217.18, + "learning_rate": 9.821897804912722e-05, + "loss": 0.3044, + "step": 35980 + }, + { + "epoch": 217.3, + "learning_rate": 9.820708689179259e-05, + "loss": 0.307, + "step": 36000 + }, + { + "epoch": 217.3, + "eval_accuracy": 0.0012698679258329958, + "eval_loss": 16.018239974975586, + "eval_runtime": 18.2272, + "eval_samples_per_second": 111.921, + "eval_steps_per_second": 2.359, + "step": 36000 + }, + { + "epoch": 217.42, + "learning_rate": 9.819515689487877e-05, + "loss": 0.3246, + "step": 36020 + }, + { + "epoch": 217.55, + "learning_rate": 9.818318806799755e-05, + "loss": 0.3441, + "step": 36040 + }, + { + "epoch": 217.67, + "learning_rate": 9.817118042079201e-05, + "loss": 0.3581, + "step": 36060 + }, + { + "epoch": 217.79, + "learning_rate": 9.815913396293648e-05, + "loss": 0.3714, + "step": 36080 + }, + { + "epoch": 217.91, + "learning_rate": 9.814704870413657e-05, + "loss": 0.3816, + "step": 36100 + }, + { + "epoch": 218.03, + "learning_rate": 9.813492465412916e-05, + "loss": 0.3529, + "step": 36120 + }, + { + "epoch": 218.15, + "learning_rate": 9.812276182268236e-05, + "loss": 0.2744, + "step": 36140 + }, + { + "epoch": 218.27, + "learning_rate": 9.811056021959559e-05, + "loss": 0.2969, + "step": 36160 + }, + { + "epoch": 218.39, + "learning_rate": 9.809831985469942e-05, + "loss": 0.3186, + "step": 36180 + }, + { + "epoch": 218.51, + "learning_rate": 9.80860407378557e-05, + "loss": 0.3259, + "step": 36200 + }, + { + "epoch": 218.63, + "learning_rate": 9.807372287895748e-05, + "loss": 0.3433, + "step": 36220 + }, + { + "epoch": 218.75, + "learning_rate": 9.806136628792907e-05, + "loss": 0.3623, + "step": 36240 + }, + { + "epoch": 218.87, + "learning_rate": 9.804897097472594e-05, + "loss": 0.3708, + "step": 36260 + }, + { + "epoch": 218.99, + "learning_rate": 9.803653694933476e-05, + "loss": 0.3843, + "step": 36280 + }, + { + "epoch": 219.11, + "learning_rate": 9.802406422177339e-05, + "loss": 0.285, + "step": 36300 + }, + { + "epoch": 219.24, + "learning_rate": 9.801155280209091e-05, + "loss": 0.2975, + "step": 36320 + }, + { + "epoch": 219.36, + "learning_rate": 9.799900270036753e-05, + "loss": 0.3218, + "step": 36340 + }, + { + "epoch": 219.48, + "learning_rate": 9.798641392671463e-05, + "loss": 0.3277, + "step": 36360 + }, + { + "epoch": 219.6, + "learning_rate": 9.797378649127478e-05, + "loss": 0.3469, + "step": 36380 + }, + { + "epoch": 219.72, + "learning_rate": 9.796112040422165e-05, + "loss": 0.3561, + "step": 36400 + }, + { + "epoch": 219.84, + "learning_rate": 9.794841567576011e-05, + "loss": 0.3646, + "step": 36420 + }, + { + "epoch": 219.96, + "learning_rate": 9.79356723161261e-05, + "loss": 0.3528, + "step": 36440 + }, + { + "epoch": 220.08, + "learning_rate": 9.792289033558673e-05, + "loss": 0.2793, + "step": 36460 + }, + { + "epoch": 220.2, + "learning_rate": 9.791006974444022e-05, + "loss": 0.2655, + "step": 36480 + }, + { + "epoch": 220.32, + "learning_rate": 9.789721055301586e-05, + "loss": 0.2856, + "step": 36500 + }, + { + "epoch": 220.44, + "learning_rate": 9.788431277167409e-05, + "loss": 0.3194, + "step": 36520 + }, + { + "epoch": 220.56, + "learning_rate": 9.787137641080641e-05, + "loss": 0.3235, + "step": 36540 + }, + { + "epoch": 220.68, + "learning_rate": 9.785840148083543e-05, + "loss": 0.3404, + "step": 36560 + }, + { + "epoch": 220.8, + "learning_rate": 9.784538799221478e-05, + "loss": 0.3541, + "step": 36580 + }, + { + "epoch": 220.93, + "learning_rate": 9.783233595542923e-05, + "loss": 0.3574, + "step": 36600 + }, + { + "epoch": 221.05, + "learning_rate": 9.781924538099456e-05, + "loss": 0.3257, + "step": 36620 + }, + { + "epoch": 221.17, + "learning_rate": 9.78061162794576e-05, + "loss": 0.2748, + "step": 36640 + }, + { + "epoch": 221.29, + "learning_rate": 9.779294866139624e-05, + "loss": 0.293, + "step": 36660 + }, + { + "epoch": 221.41, + "learning_rate": 9.777974253741938e-05, + "loss": 0.3191, + "step": 36680 + }, + { + "epoch": 221.53, + "learning_rate": 9.776649791816698e-05, + "loss": 0.3214, + "step": 36700 + }, + { + "epoch": 221.65, + "learning_rate": 9.775321481430996e-05, + "loss": 0.3387, + "step": 36720 + }, + { + "epoch": 221.77, + "learning_rate": 9.773989323655028e-05, + "loss": 0.3565, + "step": 36740 + }, + { + "epoch": 221.89, + "learning_rate": 9.772653319562093e-05, + "loss": 0.3568, + "step": 36760 + }, + { + "epoch": 222.01, + "learning_rate": 9.771313470228583e-05, + "loss": 0.3475, + "step": 36780 + }, + { + "epoch": 222.13, + "learning_rate": 9.769969776733988e-05, + "loss": 0.2739, + "step": 36800 + }, + { + "epoch": 222.25, + "learning_rate": 9.768622240160901e-05, + "loss": 0.2889, + "step": 36820 + }, + { + "epoch": 222.37, + "learning_rate": 9.767270861595005e-05, + "loss": 0.3142, + "step": 36840 + }, + { + "epoch": 222.49, + "learning_rate": 9.765915642125087e-05, + "loss": 0.3283, + "step": 36860 + }, + { + "epoch": 222.62, + "learning_rate": 9.764556582843016e-05, + "loss": 0.3417, + "step": 36880 + }, + { + "epoch": 222.74, + "learning_rate": 9.763193684843765e-05, + "loss": 0.3398, + "step": 36900 + }, + { + "epoch": 222.86, + "learning_rate": 9.761826949225397e-05, + "loss": 0.3485, + "step": 36920 + }, + { + "epoch": 222.98, + "learning_rate": 9.760456377089062e-05, + "loss": 0.3611, + "step": 36940 + }, + { + "epoch": 223.1, + "learning_rate": 9.759081969539012e-05, + "loss": 0.2758, + "step": 36960 + }, + { + "epoch": 223.22, + "learning_rate": 9.757703727682574e-05, + "loss": 0.2811, + "step": 36980 + }, + { + "epoch": 223.34, + "learning_rate": 9.75632165263018e-05, + "loss": 0.2951, + "step": 37000 + }, + { + "epoch": 223.34, + "eval_accuracy": 0.0012827098418468652, + "eval_loss": 16.308704376220703, + "eval_runtime": 18.151, + "eval_samples_per_second": 112.391, + "eval_steps_per_second": 2.369, + "step": 37000 + }, + { + "epoch": 223.46, + "learning_rate": 9.75493574549534e-05, + "loss": 0.3059, + "step": 37020 + }, + { + "epoch": 223.58, + "learning_rate": 9.753546007394654e-05, + "loss": 0.3237, + "step": 37040 + }, + { + "epoch": 223.7, + "learning_rate": 9.752152439447808e-05, + "loss": 0.3246, + "step": 37060 + }, + { + "epoch": 223.82, + "learning_rate": 9.750755042777577e-05, + "loss": 0.3269, + "step": 37080 + }, + { + "epoch": 223.94, + "learning_rate": 9.749353818509817e-05, + "loss": 0.3511, + "step": 37100 + }, + { + "epoch": 224.06, + "learning_rate": 9.74794876777347e-05, + "loss": 0.3038, + "step": 37120 + }, + { + "epoch": 224.19, + "learning_rate": 9.746539891700556e-05, + "loss": 0.2703, + "step": 37140 + }, + { + "epoch": 224.31, + "learning_rate": 9.745127191426187e-05, + "loss": 0.2879, + "step": 37160 + }, + { + "epoch": 224.43, + "learning_rate": 9.743710668088545e-05, + "loss": 0.2977, + "step": 37180 + }, + { + "epoch": 224.55, + "learning_rate": 9.7422903228289e-05, + "loss": 0.3238, + "step": 37200 + }, + { + "epoch": 224.67, + "learning_rate": 9.7408661567916e-05, + "loss": 0.3337, + "step": 37220 + }, + { + "epoch": 224.79, + "learning_rate": 9.739438171124064e-05, + "loss": 0.338, + "step": 37240 + }, + { + "epoch": 224.91, + "learning_rate": 9.7380063669768e-05, + "loss": 0.3417, + "step": 37260 + }, + { + "epoch": 225.03, + "learning_rate": 9.736570745503386e-05, + "loss": 0.3337, + "step": 37280 + }, + { + "epoch": 225.15, + "learning_rate": 9.735131307860475e-05, + "loss": 0.2796, + "step": 37300 + }, + { + "epoch": 225.27, + "learning_rate": 9.733688055207794e-05, + "loss": 0.2922, + "step": 37320 + }, + { + "epoch": 225.39, + "learning_rate": 9.732240988708148e-05, + "loss": 0.3005, + "step": 37340 + }, + { + "epoch": 225.51, + "learning_rate": 9.730790109527412e-05, + "loss": 0.3055, + "step": 37360 + }, + { + "epoch": 225.63, + "learning_rate": 9.729335418834534e-05, + "loss": 0.3104, + "step": 37380 + }, + { + "epoch": 225.75, + "learning_rate": 9.72787691780153e-05, + "loss": 0.3269, + "step": 37400 + }, + { + "epoch": 225.88, + "learning_rate": 9.726414607603492e-05, + "loss": 0.3465, + "step": 37420 + }, + { + "epoch": 226.0, + "learning_rate": 9.724948489418571e-05, + "loss": 0.3467, + "step": 37440 + }, + { + "epoch": 226.12, + "learning_rate": 9.723478564427995e-05, + "loss": 0.2534, + "step": 37460 + }, + { + "epoch": 226.24, + "learning_rate": 9.722004833816056e-05, + "loss": 0.273, + "step": 37480 + }, + { + "epoch": 226.36, + "learning_rate": 9.720527298770112e-05, + "loss": 0.2769, + "step": 37500 + }, + { + "epoch": 226.48, + "learning_rate": 9.719045960480588e-05, + "loss": 0.2951, + "step": 37520 + }, + { + "epoch": 226.6, + "learning_rate": 9.717560820140969e-05, + "loss": 0.3103, + "step": 37540 + }, + { + "epoch": 226.72, + "learning_rate": 9.716071878947805e-05, + "loss": 0.315, + "step": 37560 + }, + { + "epoch": 226.84, + "learning_rate": 9.714579138100712e-05, + "loss": 0.3223, + "step": 37580 + }, + { + "epoch": 226.96, + "learning_rate": 9.713082598802364e-05, + "loss": 0.3421, + "step": 37600 + }, + { + "epoch": 227.08, + "learning_rate": 9.711582262258493e-05, + "loss": 0.27, + "step": 37620 + }, + { + "epoch": 227.2, + "learning_rate": 9.710078129677896e-05, + "loss": 0.2533, + "step": 37640 + }, + { + "epoch": 227.32, + "learning_rate": 9.708570202272422e-05, + "loss": 0.2818, + "step": 37660 + }, + { + "epoch": 227.44, + "learning_rate": 9.707058481256985e-05, + "loss": 0.2896, + "step": 37680 + }, + { + "epoch": 227.57, + "learning_rate": 9.705542967849547e-05, + "loss": 0.3054, + "step": 37700 + }, + { + "epoch": 227.69, + "learning_rate": 9.704023663271136e-05, + "loss": 0.3231, + "step": 37720 + }, + { + "epoch": 227.81, + "learning_rate": 9.702500568745821e-05, + "loss": 0.325, + "step": 37740 + }, + { + "epoch": 227.93, + "learning_rate": 9.700973685500735e-05, + "loss": 0.3357, + "step": 37760 + }, + { + "epoch": 228.05, + "learning_rate": 9.69951963823665e-05, + "loss": 0.2986, + "step": 37780 + }, + { + "epoch": 228.17, + "learning_rate": 9.697985370529101e-05, + "loss": 0.2518, + "step": 37800 + }, + { + "epoch": 228.29, + "learning_rate": 9.696447317739598e-05, + "loss": 0.2729, + "step": 37820 + }, + { + "epoch": 228.41, + "learning_rate": 9.694905481107317e-05, + "loss": 0.2929, + "step": 37840 + }, + { + "epoch": 228.53, + "learning_rate": 9.693359861874491e-05, + "loss": 0.3086, + "step": 37860 + }, + { + "epoch": 228.65, + "learning_rate": 9.691810461286397e-05, + "loss": 0.3131, + "step": 37880 + }, + { + "epoch": 228.77, + "learning_rate": 9.690257280591358e-05, + "loss": 0.3224, + "step": 37900 + }, + { + "epoch": 228.89, + "learning_rate": 9.688700321040744e-05, + "loss": 0.3387, + "step": 37920 + }, + { + "epoch": 229.01, + "learning_rate": 9.687139583888972e-05, + "loss": 0.3309, + "step": 37940 + }, + { + "epoch": 229.13, + "learning_rate": 9.685575070393494e-05, + "loss": 0.2475, + "step": 37960 + }, + { + "epoch": 229.26, + "learning_rate": 9.684006781814814e-05, + "loss": 0.2696, + "step": 37980 + }, + { + "epoch": 229.38, + "learning_rate": 9.682434719416473e-05, + "loss": 0.28, + "step": 38000 + }, + { + "epoch": 229.38, + "eval_accuracy": 0.0013024666357143563, + "eval_loss": 16.655616760253906, + "eval_runtime": 18.1057, + "eval_samples_per_second": 112.672, + "eval_steps_per_second": 2.375, + "step": 38000 + }, + { + "epoch": 229.5, + "learning_rate": 9.680858884465054e-05, + "loss": 0.3057, + "step": 38020 + }, + { + "epoch": 229.62, + "learning_rate": 9.679279278230179e-05, + "loss": 0.3062, + "step": 38040 + }, + { + "epoch": 229.74, + "learning_rate": 9.677695901984507e-05, + "loss": 0.3179, + "step": 38060 + }, + { + "epoch": 229.86, + "learning_rate": 9.676108757003735e-05, + "loss": 0.3286, + "step": 38080 + }, + { + "epoch": 229.98, + "learning_rate": 9.6745178445666e-05, + "loss": 0.3341, + "step": 38100 + }, + { + "epoch": 230.1, + "learning_rate": 9.672923165954868e-05, + "loss": 0.2633, + "step": 38120 + }, + { + "epoch": 230.22, + "learning_rate": 9.671324722453345e-05, + "loss": 0.2591, + "step": 38140 + }, + { + "epoch": 230.34, + "learning_rate": 9.669722515349868e-05, + "loss": 0.267, + "step": 38160 + }, + { + "epoch": 230.46, + "learning_rate": 9.668116545935304e-05, + "loss": 0.285, + "step": 38180 + }, + { + "epoch": 230.58, + "learning_rate": 9.666506815503558e-05, + "loss": 0.3014, + "step": 38200 + }, + { + "epoch": 230.7, + "learning_rate": 9.664893325351555e-05, + "loss": 0.3129, + "step": 38220 + }, + { + "epoch": 230.82, + "learning_rate": 9.66327607677926e-05, + "loss": 0.3232, + "step": 38240 + }, + { + "epoch": 230.95, + "learning_rate": 9.661655071089657e-05, + "loss": 0.3232, + "step": 38260 + }, + { + "epoch": 231.07, + "learning_rate": 9.660030309588764e-05, + "loss": 0.2646, + "step": 38280 + }, + { + "epoch": 231.19, + "learning_rate": 9.65840179358562e-05, + "loss": 0.2464, + "step": 38300 + }, + { + "epoch": 231.31, + "learning_rate": 9.656769524392292e-05, + "loss": 0.2627, + "step": 38320 + }, + { + "epoch": 231.43, + "learning_rate": 9.65513350332387e-05, + "loss": 0.2748, + "step": 38340 + }, + { + "epoch": 231.55, + "learning_rate": 9.653493731698467e-05, + "loss": 0.2917, + "step": 38360 + }, + { + "epoch": 231.67, + "learning_rate": 9.651850210837215e-05, + "loss": 0.3009, + "step": 38380 + }, + { + "epoch": 231.79, + "learning_rate": 9.65020294206427e-05, + "loss": 0.3167, + "step": 38400 + }, + { + "epoch": 231.91, + "learning_rate": 9.64855192670681e-05, + "loss": 0.325, + "step": 38420 + }, + { + "epoch": 232.03, + "learning_rate": 9.646897166095024e-05, + "loss": 0.2969, + "step": 38440 + }, + { + "epoch": 232.15, + "learning_rate": 9.645238661562124e-05, + "loss": 0.2487, + "step": 38460 + }, + { + "epoch": 232.27, + "learning_rate": 9.643576414444338e-05, + "loss": 0.2682, + "step": 38480 + }, + { + "epoch": 232.39, + "learning_rate": 9.641910426080908e-05, + "loss": 0.2745, + "step": 38500 + }, + { + "epoch": 232.52, + "learning_rate": 9.64024069781409e-05, + "loss": 0.2934, + "step": 38520 + }, + { + "epoch": 232.64, + "learning_rate": 9.638567230989155e-05, + "loss": 0.305, + "step": 38540 + }, + { + "epoch": 232.76, + "learning_rate": 9.636890026954385e-05, + "loss": 0.306, + "step": 38560 + }, + { + "epoch": 232.88, + "learning_rate": 9.635209087061072e-05, + "loss": 0.3161, + "step": 38580 + }, + { + "epoch": 233.0, + "learning_rate": 9.63352441266352e-05, + "loss": 0.3189, + "step": 38600 + }, + { + "epoch": 233.12, + "learning_rate": 9.63183600511904e-05, + "loss": 0.2318, + "step": 38620 + }, + { + "epoch": 233.24, + "learning_rate": 9.630143865787951e-05, + "loss": 0.2566, + "step": 38640 + }, + { + "epoch": 233.36, + "learning_rate": 9.62844799603358e-05, + "loss": 0.2733, + "step": 38660 + }, + { + "epoch": 233.48, + "learning_rate": 9.62674839722226e-05, + "loss": 0.2854, + "step": 38680 + }, + { + "epoch": 233.6, + "learning_rate": 9.625045070723324e-05, + "loss": 0.2851, + "step": 38700 + }, + { + "epoch": 233.72, + "learning_rate": 9.623338017909113e-05, + "loss": 0.2984, + "step": 38720 + }, + { + "epoch": 233.84, + "learning_rate": 9.621627240154968e-05, + "loss": 0.3112, + "step": 38740 + }, + { + "epoch": 233.96, + "learning_rate": 9.619912738839233e-05, + "loss": 0.3176, + "step": 38760 + }, + { + "epoch": 234.08, + "learning_rate": 9.61819451534325e-05, + "loss": 0.257, + "step": 38780 + }, + { + "epoch": 234.21, + "learning_rate": 9.61647257105136e-05, + "loss": 0.2461, + "step": 38800 + }, + { + "epoch": 234.33, + "learning_rate": 9.614746907350904e-05, + "loss": 0.2659, + "step": 38820 + }, + { + "epoch": 234.45, + "learning_rate": 9.613017525632215e-05, + "loss": 0.2738, + "step": 38840 + }, + { + "epoch": 234.57, + "learning_rate": 9.611284427288628e-05, + "loss": 0.2759, + "step": 38860 + }, + { + "epoch": 234.69, + "learning_rate": 9.609547613716468e-05, + "loss": 0.288, + "step": 38880 + }, + { + "epoch": 234.81, + "learning_rate": 9.607807086315055e-05, + "loss": 0.2997, + "step": 38900 + }, + { + "epoch": 234.93, + "learning_rate": 9.606062846486698e-05, + "loss": 0.3054, + "step": 38920 + }, + { + "epoch": 235.05, + "learning_rate": 9.604314895636702e-05, + "loss": 0.2719, + "step": 38940 + }, + { + "epoch": 235.17, + "learning_rate": 9.602563235173356e-05, + "loss": 0.2404, + "step": 38960 + }, + { + "epoch": 235.29, + "learning_rate": 9.600807866507946e-05, + "loss": 0.2632, + "step": 38980 + }, + { + "epoch": 235.41, + "learning_rate": 9.59904879105474e-05, + "loss": 0.2688, + "step": 39000 + }, + { + "epoch": 235.41, + "eval_accuracy": 0.0012797463227667415, + "eval_loss": 16.930341720581055, + "eval_runtime": 18.3698, + "eval_samples_per_second": 111.052, + "eval_steps_per_second": 2.341, + "step": 39000 + }, + { + "epoch": 235.53, + "learning_rate": 9.597286010230989e-05, + "loss": 0.2756, + "step": 39020 + }, + { + "epoch": 235.65, + "learning_rate": 9.595519525456937e-05, + "loss": 0.2901, + "step": 39040 + }, + { + "epoch": 235.77, + "learning_rate": 9.593749338155809e-05, + "loss": 0.3015, + "step": 39060 + }, + { + "epoch": 235.9, + "learning_rate": 9.591975449753812e-05, + "loss": 0.3011, + "step": 39080 + }, + { + "epoch": 236.02, + "learning_rate": 9.590197861680137e-05, + "loss": 0.2924, + "step": 39100 + }, + { + "epoch": 236.14, + "learning_rate": 9.588416575366953e-05, + "loss": 0.2287, + "step": 39120 + }, + { + "epoch": 236.26, + "learning_rate": 9.58663159224941e-05, + "loss": 0.2561, + "step": 39140 + }, + { + "epoch": 236.38, + "learning_rate": 9.58484291376564e-05, + "loss": 0.269, + "step": 39160 + }, + { + "epoch": 236.5, + "learning_rate": 9.583050541356747e-05, + "loss": 0.2768, + "step": 39180 + }, + { + "epoch": 236.62, + "learning_rate": 9.58125447646681e-05, + "loss": 0.2751, + "step": 39200 + }, + { + "epoch": 236.74, + "learning_rate": 9.579454720542889e-05, + "loss": 0.2796, + "step": 39220 + }, + { + "epoch": 236.86, + "learning_rate": 9.577651275035015e-05, + "loss": 0.294, + "step": 39240 + }, + { + "epoch": 236.98, + "learning_rate": 9.575844141396191e-05, + "loss": 0.3024, + "step": 39260 + }, + { + "epoch": 237.1, + "learning_rate": 9.574033321082392e-05, + "loss": 0.225, + "step": 39280 + }, + { + "epoch": 237.22, + "learning_rate": 9.572218815552565e-05, + "loss": 0.2385, + "step": 39300 + }, + { + "epoch": 237.34, + "learning_rate": 9.57040062626862e-05, + "loss": 0.2572, + "step": 39320 + }, + { + "epoch": 237.46, + "learning_rate": 9.568578754695442e-05, + "loss": 0.2704, + "step": 39340 + }, + { + "epoch": 237.59, + "learning_rate": 9.566753202300881e-05, + "loss": 0.2814, + "step": 39360 + }, + { + "epoch": 237.71, + "learning_rate": 9.56492397055575e-05, + "loss": 0.2763, + "step": 39380 + }, + { + "epoch": 237.83, + "learning_rate": 9.563091060933829e-05, + "loss": 0.2887, + "step": 39400 + }, + { + "epoch": 237.95, + "learning_rate": 9.561254474911858e-05, + "loss": 0.3002, + "step": 39420 + }, + { + "epoch": 238.07, + "learning_rate": 9.559414213969543e-05, + "loss": 0.248, + "step": 39440 + }, + { + "epoch": 238.19, + "learning_rate": 9.55757027958955e-05, + "loss": 0.2257, + "step": 39460 + }, + { + "epoch": 238.31, + "learning_rate": 9.555722673257501e-05, + "loss": 0.2484, + "step": 39480 + }, + { + "epoch": 238.43, + "learning_rate": 9.55387139646198e-05, + "loss": 0.2583, + "step": 39500 + }, + { + "epoch": 238.55, + "learning_rate": 9.552016450694529e-05, + "loss": 0.2678, + "step": 39520 + }, + { + "epoch": 238.67, + "learning_rate": 9.550157837449643e-05, + "loss": 0.277, + "step": 39540 + }, + { + "epoch": 238.79, + "learning_rate": 9.548295558224771e-05, + "loss": 0.289, + "step": 39560 + }, + { + "epoch": 238.91, + "learning_rate": 9.546429614520322e-05, + "loss": 0.2988, + "step": 39580 + }, + { + "epoch": 239.03, + "learning_rate": 9.54456000783965e-05, + "loss": 0.2735, + "step": 39600 + }, + { + "epoch": 239.15, + "learning_rate": 9.542686739689065e-05, + "loss": 0.2326, + "step": 39620 + }, + { + "epoch": 239.28, + "learning_rate": 9.540809811577823e-05, + "loss": 0.2474, + "step": 39640 + }, + { + "epoch": 239.4, + "learning_rate": 9.538929225018133e-05, + "loss": 0.2511, + "step": 39660 + }, + { + "epoch": 239.52, + "learning_rate": 9.537044981525151e-05, + "loss": 0.2598, + "step": 39680 + }, + { + "epoch": 239.64, + "learning_rate": 9.535157082616975e-05, + "loss": 0.2636, + "step": 39700 + }, + { + "epoch": 239.76, + "learning_rate": 9.533265529814653e-05, + "loss": 0.2777, + "step": 39720 + }, + { + "epoch": 239.88, + "learning_rate": 9.531370324642175e-05, + "loss": 0.291, + "step": 39740 + }, + { + "epoch": 240.0, + "learning_rate": 9.529471468626472e-05, + "loss": 0.2991, + "step": 39760 + }, + { + "epoch": 240.12, + "learning_rate": 9.527568963297418e-05, + "loss": 0.221, + "step": 39780 + }, + { + "epoch": 240.24, + "learning_rate": 9.525662810187827e-05, + "loss": 0.2425, + "step": 39800 + }, + { + "epoch": 240.36, + "learning_rate": 9.523753010833456e-05, + "loss": 0.2525, + "step": 39820 + }, + { + "epoch": 240.48, + "learning_rate": 9.521839566772993e-05, + "loss": 0.254, + "step": 39840 + }, + { + "epoch": 240.6, + "learning_rate": 9.520018420410632e-05, + "loss": 0.2664, + "step": 39860 + }, + { + "epoch": 240.72, + "learning_rate": 9.518097873610068e-05, + "loss": 0.2762, + "step": 39880 + }, + { + "epoch": 240.85, + "learning_rate": 9.516173686659653e-05, + "loss": 0.2776, + "step": 39900 + }, + { + "epoch": 240.97, + "learning_rate": 9.51424586110967e-05, + "loss": 0.2882, + "step": 39920 + }, + { + "epoch": 241.09, + "learning_rate": 9.512314398513335e-05, + "loss": 0.2368, + "step": 39940 + }, + { + "epoch": 241.21, + "learning_rate": 9.510379300426791e-05, + "loss": 0.234, + "step": 39960 + }, + { + "epoch": 241.33, + "learning_rate": 9.508440568409111e-05, + "loss": 0.2474, + "step": 39980 + }, + { + "epoch": 241.45, + "learning_rate": 9.506498204022297e-05, + "loss": 0.2582, + "step": 40000 + }, + { + "epoch": 241.45, + "eval_accuracy": 0.001275794963993243, + "eval_loss": 17.22085189819336, + "eval_runtime": 18.3419, + "eval_samples_per_second": 111.221, + "eval_steps_per_second": 2.344, + "step": 40000 + }, + { + "epoch": 241.57, + "learning_rate": 9.504552208831279e-05, + "loss": 0.2672, + "step": 40020 + }, + { + "epoch": 241.69, + "learning_rate": 9.502602584403908e-05, + "loss": 0.2703, + "step": 40040 + }, + { + "epoch": 241.81, + "learning_rate": 9.50064933231096e-05, + "loss": 0.2776, + "step": 40060 + }, + { + "epoch": 241.93, + "learning_rate": 9.498692454126137e-05, + "loss": 0.2848, + "step": 40080 + }, + { + "epoch": 242.05, + "learning_rate": 9.496731951426059e-05, + "loss": 0.2521, + "step": 40100 + }, + { + "epoch": 242.17, + "learning_rate": 9.494767825790269e-05, + "loss": 0.2219, + "step": 40120 + }, + { + "epoch": 242.29, + "learning_rate": 9.492800078801228e-05, + "loss": 0.2415, + "step": 40140 + }, + { + "epoch": 242.41, + "learning_rate": 9.490828712044312e-05, + "loss": 0.2456, + "step": 40160 + }, + { + "epoch": 242.54, + "learning_rate": 9.488853727107818e-05, + "loss": 0.254, + "step": 40180 + }, + { + "epoch": 242.66, + "learning_rate": 9.486875125582955e-05, + "loss": 0.2649, + "step": 40200 + }, + { + "epoch": 242.78, + "learning_rate": 9.484892909063845e-05, + "loss": 0.2692, + "step": 40220 + }, + { + "epoch": 242.9, + "learning_rate": 9.482907079147524e-05, + "loss": 0.2733, + "step": 40240 + }, + { + "epoch": 243.02, + "learning_rate": 9.480917637433942e-05, + "loss": 0.2743, + "step": 40260 + }, + { + "epoch": 243.14, + "learning_rate": 9.478924585525955e-05, + "loss": 0.2128, + "step": 40280 + }, + { + "epoch": 243.26, + "learning_rate": 9.476927925029326e-05, + "loss": 0.2237, + "step": 40300 + }, + { + "epoch": 243.38, + "learning_rate": 9.474927657552733e-05, + "loss": 0.2399, + "step": 40320 + }, + { + "epoch": 243.5, + "learning_rate": 9.472923784707752e-05, + "loss": 0.2543, + "step": 40340 + }, + { + "epoch": 243.62, + "learning_rate": 9.47091630810887e-05, + "loss": 0.2638, + "step": 40360 + }, + { + "epoch": 243.74, + "learning_rate": 9.468905229373472e-05, + "loss": 0.274, + "step": 40380 + }, + { + "epoch": 243.86, + "learning_rate": 9.466890550121849e-05, + "loss": 0.2794, + "step": 40400 + }, + { + "epoch": 243.98, + "learning_rate": 9.464872271977189e-05, + "loss": 0.2843, + "step": 40420 + }, + { + "epoch": 244.1, + "learning_rate": 9.462850396565588e-05, + "loss": 0.2136, + "step": 40440 + }, + { + "epoch": 244.23, + "learning_rate": 9.460824925516031e-05, + "loss": 0.2276, + "step": 40460 + }, + { + "epoch": 244.35, + "learning_rate": 9.458795860460403e-05, + "loss": 0.2371, + "step": 40480 + }, + { + "epoch": 244.47, + "learning_rate": 9.456763203033485e-05, + "loss": 0.2377, + "step": 40500 + }, + { + "epoch": 244.59, + "learning_rate": 9.454726954872954e-05, + "loss": 0.2525, + "step": 40520 + }, + { + "epoch": 244.71, + "learning_rate": 9.452687117619377e-05, + "loss": 0.2647, + "step": 40540 + }, + { + "epoch": 244.83, + "learning_rate": 9.450643692916216e-05, + "loss": 0.2757, + "step": 40560 + }, + { + "epoch": 244.95, + "learning_rate": 9.448596682409819e-05, + "loss": 0.2834, + "step": 40580 + }, + { + "epoch": 245.07, + "learning_rate": 9.446546087749425e-05, + "loss": 0.2351, + "step": 40600 + }, + { + "epoch": 245.19, + "learning_rate": 9.444491910587163e-05, + "loss": 0.214, + "step": 40620 + }, + { + "epoch": 245.31, + "learning_rate": 9.442434152578046e-05, + "loss": 0.2268, + "step": 40640 + }, + { + "epoch": 245.43, + "learning_rate": 9.440372815379973e-05, + "loss": 0.2443, + "step": 40660 + }, + { + "epoch": 245.55, + "learning_rate": 9.438307900653724e-05, + "loss": 0.2506, + "step": 40680 + }, + { + "epoch": 245.67, + "learning_rate": 9.436239410062966e-05, + "loss": 0.2556, + "step": 40700 + }, + { + "epoch": 245.79, + "learning_rate": 9.434167345274244e-05, + "loss": 0.2669, + "step": 40720 + }, + { + "epoch": 245.92, + "learning_rate": 9.432091707956981e-05, + "loss": 0.2705, + "step": 40740 + }, + { + "epoch": 246.04, + "learning_rate": 9.430012499783482e-05, + "loss": 0.2517, + "step": 40760 + }, + { + "epoch": 246.16, + "learning_rate": 9.42792972242893e-05, + "loss": 0.2108, + "step": 40780 + }, + { + "epoch": 246.28, + "learning_rate": 9.425843377571377e-05, + "loss": 0.2259, + "step": 40800 + }, + { + "epoch": 246.4, + "learning_rate": 9.423753466891754e-05, + "loss": 0.2342, + "step": 40820 + }, + { + "epoch": 246.52, + "learning_rate": 9.421659992073866e-05, + "loss": 0.2451, + "step": 40840 + }, + { + "epoch": 246.64, + "learning_rate": 9.419562954804388e-05, + "loss": 0.2526, + "step": 40860 + }, + { + "epoch": 246.76, + "learning_rate": 9.417462356772863e-05, + "loss": 0.2691, + "step": 40880 + }, + { + "epoch": 246.88, + "learning_rate": 9.415358199671707e-05, + "loss": 0.2647, + "step": 40900 + }, + { + "epoch": 247.0, + "learning_rate": 9.4132504851962e-05, + "loss": 0.2669, + "step": 40920 + }, + { + "epoch": 247.12, + "learning_rate": 9.411139215044491e-05, + "loss": 0.1954, + "step": 40940 + }, + { + "epoch": 247.24, + "learning_rate": 9.409024390917591e-05, + "loss": 0.2133, + "step": 40960 + }, + { + "epoch": 247.36, + "learning_rate": 9.406906014519374e-05, + "loss": 0.228, + "step": 40980 + }, + { + "epoch": 247.48, + "learning_rate": 9.404784087556582e-05, + "loss": 0.238, + "step": 41000 + }, + { + "epoch": 247.48, + "eval_accuracy": 0.0012669044067528722, + "eval_loss": 17.53106689453125, + "eval_runtime": 18.3103, + "eval_samples_per_second": 111.413, + "eval_steps_per_second": 2.348, + "step": 41000 + }, + { + "epoch": 247.61, + "learning_rate": 9.402658611738809e-05, + "loss": 0.2404, + "step": 41020 + }, + { + "epoch": 247.73, + "learning_rate": 9.400529588778513e-05, + "loss": 0.246, + "step": 41040 + }, + { + "epoch": 247.85, + "learning_rate": 9.398397020391012e-05, + "loss": 0.2531, + "step": 41060 + }, + { + "epoch": 247.97, + "learning_rate": 9.396260908294476e-05, + "loss": 0.2728, + "step": 41080 + }, + { + "epoch": 248.09, + "learning_rate": 9.394121254209932e-05, + "loss": 0.2107, + "step": 41100 + }, + { + "epoch": 248.21, + "learning_rate": 9.391978059861261e-05, + "loss": 0.2131, + "step": 41120 + }, + { + "epoch": 248.33, + "learning_rate": 9.389831326975196e-05, + "loss": 0.2284, + "step": 41140 + }, + { + "epoch": 248.45, + "learning_rate": 9.38768105728132e-05, + "loss": 0.2374, + "step": 41160 + }, + { + "epoch": 248.57, + "learning_rate": 9.385527252512068e-05, + "loss": 0.2467, + "step": 41180 + }, + { + "epoch": 248.69, + "learning_rate": 9.38336991440272e-05, + "loss": 0.2539, + "step": 41200 + }, + { + "epoch": 248.81, + "learning_rate": 9.381209044691405e-05, + "loss": 0.2612, + "step": 41220 + }, + { + "epoch": 248.93, + "learning_rate": 9.379044645119098e-05, + "loss": 0.2592, + "step": 41240 + }, + { + "epoch": 249.05, + "learning_rate": 9.376876717429615e-05, + "loss": 0.2225, + "step": 41260 + }, + { + "epoch": 249.18, + "learning_rate": 9.374705263369617e-05, + "loss": 0.2056, + "step": 41280 + }, + { + "epoch": 249.3, + "learning_rate": 9.372530284688603e-05, + "loss": 0.2156, + "step": 41300 + }, + { + "epoch": 249.42, + "learning_rate": 9.370351783138917e-05, + "loss": 0.2303, + "step": 41320 + }, + { + "epoch": 249.54, + "learning_rate": 9.36816976047574e-05, + "loss": 0.2363, + "step": 41340 + }, + { + "epoch": 249.66, + "learning_rate": 9.365984218457083e-05, + "loss": 0.2437, + "step": 41360 + }, + { + "epoch": 249.78, + "learning_rate": 9.3637951588438e-05, + "loss": 0.258, + "step": 41380 + }, + { + "epoch": 249.9, + "learning_rate": 9.361602583399578e-05, + "loss": 0.2636, + "step": 41400 + }, + { + "epoch": 250.02, + "learning_rate": 9.359406493890934e-05, + "loss": 0.2554, + "step": 41420 + }, + { + "epoch": 250.14, + "learning_rate": 9.357206892087219e-05, + "loss": 0.2029, + "step": 41440 + }, + { + "epoch": 250.26, + "learning_rate": 9.355003779760612e-05, + "loss": 0.2184, + "step": 41460 + }, + { + "epoch": 250.38, + "learning_rate": 9.352797158686119e-05, + "loss": 0.223, + "step": 41480 + }, + { + "epoch": 250.5, + "learning_rate": 9.350587030641578e-05, + "loss": 0.2358, + "step": 41500 + }, + { + "epoch": 250.62, + "learning_rate": 9.348373397407646e-05, + "loss": 0.2397, + "step": 41520 + }, + { + "epoch": 250.74, + "learning_rate": 9.346156260767809e-05, + "loss": 0.2508, + "step": 41540 + }, + { + "epoch": 250.87, + "learning_rate": 9.343935622508373e-05, + "loss": 0.2566, + "step": 41560 + }, + { + "epoch": 250.99, + "learning_rate": 9.341711484418468e-05, + "loss": 0.2656, + "step": 41580 + }, + { + "epoch": 251.11, + "learning_rate": 9.33948384829004e-05, + "loss": 0.1967, + "step": 41600 + }, + { + "epoch": 251.23, + "learning_rate": 9.337252715917857e-05, + "loss": 0.2034, + "step": 41620 + }, + { + "epoch": 251.35, + "learning_rate": 9.3350180890995e-05, + "loss": 0.2193, + "step": 41640 + }, + { + "epoch": 251.47, + "learning_rate": 9.332779969635369e-05, + "loss": 0.2288, + "step": 41660 + }, + { + "epoch": 251.59, + "learning_rate": 9.330538359328675e-05, + "loss": 0.2342, + "step": 41680 + }, + { + "epoch": 251.71, + "learning_rate": 9.328293259985446e-05, + "loss": 0.2438, + "step": 41700 + }, + { + "epoch": 251.83, + "learning_rate": 9.326044673414513e-05, + "loss": 0.2479, + "step": 41720 + }, + { + "epoch": 251.95, + "learning_rate": 9.323792601427526e-05, + "loss": 0.2524, + "step": 41740 + }, + { + "epoch": 252.07, + "learning_rate": 9.321537045838936e-05, + "loss": 0.2124, + "step": 41760 + }, + { + "epoch": 252.19, + "learning_rate": 9.319278008466006e-05, + "loss": 0.204, + "step": 41780 + }, + { + "epoch": 252.31, + "learning_rate": 9.317015491128797e-05, + "loss": 0.2212, + "step": 41800 + }, + { + "epoch": 252.43, + "learning_rate": 9.314749495650186e-05, + "loss": 0.2324, + "step": 41820 + }, + { + "epoch": 252.56, + "learning_rate": 9.312480023855836e-05, + "loss": 0.2432, + "step": 41840 + }, + { + "epoch": 252.68, + "learning_rate": 9.310207077574227e-05, + "loss": 0.2483, + "step": 41860 + }, + { + "epoch": 252.8, + "learning_rate": 9.307930658636626e-05, + "loss": 0.2507, + "step": 41880 + }, + { + "epoch": 252.92, + "learning_rate": 9.305650768877104e-05, + "loss": 0.2496, + "step": 41900 + }, + { + "epoch": 253.04, + "learning_rate": 9.303367410132526e-05, + "loss": 0.2302, + "step": 41920 + }, + { + "epoch": 253.16, + "learning_rate": 9.301080584242554e-05, + "loss": 0.1952, + "step": 41940 + }, + { + "epoch": 253.28, + "learning_rate": 9.298790293049641e-05, + "loss": 0.2053, + "step": 41960 + }, + { + "epoch": 253.4, + "learning_rate": 9.296496538399036e-05, + "loss": 0.2208, + "step": 41980 + }, + { + "epoch": 253.52, + "learning_rate": 9.294199322138773e-05, + "loss": 0.2261, + "step": 42000 + }, + { + "epoch": 253.52, + "eval_accuracy": 0.0012703618456796832, + "eval_loss": 17.773109436035156, + "eval_runtime": 18.2679, + "eval_samples_per_second": 111.672, + "eval_steps_per_second": 2.354, + "step": 42000 + }, + { + "epoch": 253.64, + "learning_rate": 9.29189864611968e-05, + "loss": 0.2384, + "step": 42020 + }, + { + "epoch": 253.76, + "learning_rate": 9.289594512195368e-05, + "loss": 0.2474, + "step": 42040 + }, + { + "epoch": 253.88, + "learning_rate": 9.287286922222239e-05, + "loss": 0.2504, + "step": 42060 + }, + { + "epoch": 254.0, + "learning_rate": 9.28497587805947e-05, + "loss": 0.2465, + "step": 42080 + }, + { + "epoch": 254.12, + "learning_rate": 9.282661381569036e-05, + "loss": 0.1798, + "step": 42100 + }, + { + "epoch": 254.25, + "learning_rate": 9.280343434615679e-05, + "loss": 0.2024, + "step": 42120 + }, + { + "epoch": 254.37, + "learning_rate": 9.27802203906693e-05, + "loss": 0.2118, + "step": 42140 + }, + { + "epoch": 254.49, + "learning_rate": 9.275697196793094e-05, + "loss": 0.2175, + "step": 42160 + }, + { + "epoch": 254.61, + "learning_rate": 9.273368909667255e-05, + "loss": 0.2289, + "step": 42180 + }, + { + "epoch": 254.73, + "learning_rate": 9.271037179565272e-05, + "loss": 0.2415, + "step": 42200 + }, + { + "epoch": 254.85, + "learning_rate": 9.268702008365777e-05, + "loss": 0.2444, + "step": 42220 + }, + { + "epoch": 254.97, + "learning_rate": 9.266363397950178e-05, + "loss": 0.2511, + "step": 42240 + }, + { + "epoch": 255.09, + "learning_rate": 9.264021350202646e-05, + "loss": 0.1973, + "step": 42260 + }, + { + "epoch": 255.21, + "learning_rate": 9.26167586701013e-05, + "loss": 0.203, + "step": 42280 + }, + { + "epoch": 255.33, + "learning_rate": 9.259326950262345e-05, + "loss": 0.2129, + "step": 42300 + }, + { + "epoch": 255.45, + "learning_rate": 9.256974601851767e-05, + "loss": 0.2219, + "step": 42320 + }, + { + "epoch": 255.57, + "learning_rate": 9.254618823673644e-05, + "loss": 0.2311, + "step": 42340 + }, + { + "epoch": 255.69, + "learning_rate": 9.252259617625983e-05, + "loss": 0.2318, + "step": 42360 + }, + { + "epoch": 255.81, + "learning_rate": 9.249896985609551e-05, + "loss": 0.2426, + "step": 42380 + }, + { + "epoch": 255.94, + "learning_rate": 9.247530929527881e-05, + "loss": 0.2493, + "step": 42400 + }, + { + "epoch": 256.06, + "learning_rate": 9.245161451287261e-05, + "loss": 0.213, + "step": 42420 + }, + { + "epoch": 256.18, + "learning_rate": 9.242788552796735e-05, + "loss": 0.1958, + "step": 42440 + }, + { + "epoch": 256.3, + "learning_rate": 9.240412235968108e-05, + "loss": 0.2034, + "step": 42460 + }, + { + "epoch": 256.42, + "learning_rate": 9.238032502715931e-05, + "loss": 0.2114, + "step": 42480 + }, + { + "epoch": 256.54, + "learning_rate": 9.235649354957517e-05, + "loss": 0.215, + "step": 42500 + }, + { + "epoch": 256.66, + "learning_rate": 9.233262794612923e-05, + "loss": 0.2266, + "step": 42520 + }, + { + "epoch": 256.78, + "learning_rate": 9.230872823604956e-05, + "loss": 0.2381, + "step": 42540 + }, + { + "epoch": 256.9, + "learning_rate": 9.228479443859173e-05, + "loss": 0.2428, + "step": 42560 + }, + { + "epoch": 257.02, + "learning_rate": 9.226082657303879e-05, + "loss": 0.2346, + "step": 42580 + }, + { + "epoch": 257.14, + "learning_rate": 9.22368246587012e-05, + "loss": 0.1906, + "step": 42600 + }, + { + "epoch": 257.26, + "learning_rate": 9.221278871491688e-05, + "loss": 0.1992, + "step": 42620 + }, + { + "epoch": 257.38, + "learning_rate": 9.218871876105113e-05, + "loss": 0.2092, + "step": 42640 + }, + { + "epoch": 257.51, + "learning_rate": 9.21646148164967e-05, + "loss": 0.215, + "step": 42660 + }, + { + "epoch": 257.63, + "learning_rate": 9.214047690067369e-05, + "loss": 0.2278, + "step": 42680 + }, + { + "epoch": 257.75, + "learning_rate": 9.21163050330296e-05, + "loss": 0.2331, + "step": 42700 + }, + { + "epoch": 257.87, + "learning_rate": 9.209209923303925e-05, + "loss": 0.2367, + "step": 42720 + }, + { + "epoch": 257.99, + "learning_rate": 9.206785952020482e-05, + "loss": 0.2468, + "step": 42740 + }, + { + "epoch": 258.11, + "learning_rate": 9.204358591405582e-05, + "loss": 0.1846, + "step": 42760 + }, + { + "epoch": 258.23, + "learning_rate": 9.201927843414904e-05, + "loss": 0.1935, + "step": 42780 + }, + { + "epoch": 258.35, + "learning_rate": 9.199493710006859e-05, + "loss": 0.2078, + "step": 42800 + }, + { + "epoch": 258.47, + "learning_rate": 9.197056193142583e-05, + "loss": 0.215, + "step": 42820 + }, + { + "epoch": 258.59, + "learning_rate": 9.194615294785942e-05, + "loss": 0.2207, + "step": 42840 + }, + { + "epoch": 258.71, + "learning_rate": 9.192171016903521e-05, + "loss": 0.2314, + "step": 42860 + }, + { + "epoch": 258.83, + "learning_rate": 9.189723361464631e-05, + "loss": 0.2314, + "step": 42880 + }, + { + "epoch": 258.95, + "learning_rate": 9.187272330441303e-05, + "loss": 0.2301, + "step": 42900 + }, + { + "epoch": 259.07, + "learning_rate": 9.184817925808291e-05, + "loss": 0.196, + "step": 42920 + }, + { + "epoch": 259.2, + "learning_rate": 9.182360149543063e-05, + "loss": 0.1875, + "step": 42940 + }, + { + "epoch": 259.32, + "learning_rate": 9.179899003625804e-05, + "loss": 0.1949, + "step": 42960 + }, + { + "epoch": 259.44, + "learning_rate": 9.177434490039418e-05, + "loss": 0.2029, + "step": 42980 + }, + { + "epoch": 259.56, + "learning_rate": 9.174966610769516e-05, + "loss": 0.21, + "step": 43000 + }, + { + "epoch": 259.56, + "eval_accuracy": 0.0012723375250664323, + "eval_loss": 18.020519256591797, + "eval_runtime": 18.2997, + "eval_samples_per_second": 111.477, + "eval_steps_per_second": 2.35, + "step": 43000 + }, + { + "epoch": 259.68, + "learning_rate": 9.172495367804423e-05, + "loss": 0.22, + "step": 43020 + }, + { + "epoch": 259.8, + "learning_rate": 9.170020763135179e-05, + "loss": 0.2269, + "step": 43040 + }, + { + "epoch": 259.92, + "learning_rate": 9.16766677673682e-05, + "loss": 0.2345, + "step": 43060 + }, + { + "epoch": 260.04, + "learning_rate": 9.165185622481447e-05, + "loss": 0.211, + "step": 43080 + }, + { + "epoch": 260.16, + "learning_rate": 9.162701112411252e-05, + "loss": 0.1793, + "step": 43100 + }, + { + "epoch": 260.28, + "learning_rate": 9.160213248527955e-05, + "loss": 0.1949, + "step": 43120 + }, + { + "epoch": 260.4, + "learning_rate": 9.15772203283599e-05, + "loss": 0.2091, + "step": 43140 + }, + { + "epoch": 260.52, + "learning_rate": 9.155227467342477e-05, + "loss": 0.2117, + "step": 43160 + }, + { + "epoch": 260.64, + "learning_rate": 9.152729554057249e-05, + "loss": 0.2188, + "step": 43180 + }, + { + "epoch": 260.76, + "learning_rate": 9.150228294992826e-05, + "loss": 0.2277, + "step": 43200 + }, + { + "epoch": 260.89, + "learning_rate": 9.147723692164427e-05, + "loss": 0.2348, + "step": 43220 + }, + { + "epoch": 261.01, + "learning_rate": 9.145215747589966e-05, + "loss": 0.2308, + "step": 43240 + }, + { + "epoch": 261.13, + "learning_rate": 9.142704463290049e-05, + "loss": 0.1738, + "step": 43260 + }, + { + "epoch": 261.25, + "learning_rate": 9.140189841287972e-05, + "loss": 0.1887, + "step": 43280 + }, + { + "epoch": 261.37, + "learning_rate": 9.137671883609721e-05, + "loss": 0.198, + "step": 43300 + }, + { + "epoch": 261.49, + "learning_rate": 9.13515059228397e-05, + "loss": 0.2048, + "step": 43320 + }, + { + "epoch": 261.61, + "learning_rate": 9.132625969342075e-05, + "loss": 0.2194, + "step": 43340 + }, + { + "epoch": 261.73, + "learning_rate": 9.13009801681808e-05, + "loss": 0.2212, + "step": 43360 + }, + { + "epoch": 261.85, + "learning_rate": 9.127566736748714e-05, + "loss": 0.2255, + "step": 43380 + }, + { + "epoch": 261.97, + "learning_rate": 9.12503213117338e-05, + "loss": 0.2328, + "step": 43400 + }, + { + "epoch": 262.09, + "learning_rate": 9.122494202134162e-05, + "loss": 0.1815, + "step": 43420 + }, + { + "epoch": 262.21, + "learning_rate": 9.11995295167583e-05, + "loss": 0.1861, + "step": 43440 + }, + { + "epoch": 262.33, + "learning_rate": 9.117408381845817e-05, + "loss": 0.19, + "step": 43460 + }, + { + "epoch": 262.45, + "learning_rate": 9.114860494694244e-05, + "loss": 0.1978, + "step": 43480 + }, + { + "epoch": 262.58, + "learning_rate": 9.112309292273891e-05, + "loss": 0.2094, + "step": 43500 + }, + { + "epoch": 262.7, + "learning_rate": 9.10975477664022e-05, + "loss": 0.2218, + "step": 43520 + }, + { + "epoch": 262.82, + "learning_rate": 9.107196949851356e-05, + "loss": 0.2242, + "step": 43540 + }, + { + "epoch": 262.94, + "learning_rate": 9.104635813968093e-05, + "loss": 0.2231, + "step": 43560 + }, + { + "epoch": 263.06, + "learning_rate": 9.102071371053896e-05, + "loss": 0.1983, + "step": 43580 + }, + { + "epoch": 263.18, + "learning_rate": 9.099503623174885e-05, + "loss": 0.1759, + "step": 43600 + }, + { + "epoch": 263.3, + "learning_rate": 9.096932572399852e-05, + "loss": 0.1882, + "step": 43620 + }, + { + "epoch": 263.42, + "learning_rate": 9.094358220800243e-05, + "loss": 0.2008, + "step": 43640 + }, + { + "epoch": 263.54, + "learning_rate": 9.09178057045017e-05, + "loss": 0.2054, + "step": 43660 + }, + { + "epoch": 263.66, + "learning_rate": 9.089199623426398e-05, + "loss": 0.2105, + "step": 43680 + }, + { + "epoch": 263.78, + "learning_rate": 9.08661538180835e-05, + "loss": 0.2176, + "step": 43700 + }, + { + "epoch": 263.9, + "learning_rate": 9.0840278476781e-05, + "loss": 0.2194, + "step": 43720 + }, + { + "epoch": 264.02, + "learning_rate": 9.081437023120381e-05, + "loss": 0.2127, + "step": 43740 + }, + { + "epoch": 264.14, + "learning_rate": 9.078842910222573e-05, + "loss": 0.1643, + "step": 43760 + }, + { + "epoch": 264.27, + "learning_rate": 9.076245511074703e-05, + "loss": 0.1765, + "step": 43780 + }, + { + "epoch": 264.39, + "learning_rate": 9.073644827769451e-05, + "loss": 0.1886, + "step": 43800 + }, + { + "epoch": 264.51, + "learning_rate": 9.071040862402141e-05, + "loss": 0.1927, + "step": 43820 + }, + { + "epoch": 264.63, + "learning_rate": 9.06843361707074e-05, + "loss": 0.2045, + "step": 43840 + }, + { + "epoch": 264.75, + "learning_rate": 9.065823093875858e-05, + "loss": 0.2113, + "step": 43860 + }, + { + "epoch": 264.87, + "learning_rate": 9.063209294920746e-05, + "loss": 0.2232, + "step": 43880 + }, + { + "epoch": 264.99, + "learning_rate": 9.060592222311296e-05, + "loss": 0.2263, + "step": 43900 + }, + { + "epoch": 265.11, + "learning_rate": 9.057971878156036e-05, + "loss": 0.1684, + "step": 43920 + }, + { + "epoch": 265.23, + "learning_rate": 9.05534826456613e-05, + "loss": 0.1869, + "step": 43940 + }, + { + "epoch": 265.35, + "learning_rate": 9.052721383655374e-05, + "loss": 0.1947, + "step": 43960 + }, + { + "epoch": 265.47, + "learning_rate": 9.050091237540201e-05, + "loss": 0.1987, + "step": 43980 + }, + { + "epoch": 265.59, + "learning_rate": 9.047457828339672e-05, + "loss": 0.2073, + "step": 44000 + }, + { + "epoch": 265.59, + "eval_accuracy": 0.001287649040313738, + "eval_loss": 18.269285202026367, + "eval_runtime": 18.1707, + "eval_samples_per_second": 112.268, + "eval_steps_per_second": 2.366, + "step": 44000 + }, + { + "epoch": 265.71, + "learning_rate": 9.044821158175476e-05, + "loss": 0.2091, + "step": 44020 + }, + { + "epoch": 265.84, + "learning_rate": 9.042181229171931e-05, + "loss": 0.2144, + "step": 44040 + }, + { + "epoch": 265.96, + "learning_rate": 9.03953804345598e-05, + "loss": 0.2186, + "step": 44060 + }, + { + "epoch": 266.08, + "learning_rate": 9.03689160315719e-05, + "loss": 0.1812, + "step": 44080 + }, + { + "epoch": 266.2, + "learning_rate": 9.03424191040775e-05, + "loss": 0.1765, + "step": 44100 + }, + { + "epoch": 266.32, + "learning_rate": 9.031588967342473e-05, + "loss": 0.1837, + "step": 44120 + }, + { + "epoch": 266.44, + "learning_rate": 9.028932776098781e-05, + "loss": 0.1924, + "step": 44140 + }, + { + "epoch": 266.56, + "learning_rate": 9.026273338816724e-05, + "loss": 0.2053, + "step": 44160 + }, + { + "epoch": 266.68, + "learning_rate": 9.02361065763896e-05, + "loss": 0.2062, + "step": 44180 + }, + { + "epoch": 266.8, + "learning_rate": 9.020944734710766e-05, + "loss": 0.2131, + "step": 44200 + }, + { + "epoch": 266.92, + "learning_rate": 9.018275572180024e-05, + "loss": 0.2233, + "step": 44220 + }, + { + "epoch": 267.04, + "learning_rate": 9.015603172197233e-05, + "loss": 0.1966, + "step": 44240 + }, + { + "epoch": 267.16, + "learning_rate": 9.012927536915497e-05, + "loss": 0.1641, + "step": 44260 + }, + { + "epoch": 267.28, + "learning_rate": 9.010248668490528e-05, + "loss": 0.1751, + "step": 44280 + }, + { + "epoch": 267.4, + "learning_rate": 9.007566569080639e-05, + "loss": 0.1921, + "step": 44300 + }, + { + "epoch": 267.53, + "learning_rate": 9.004881240846751e-05, + "loss": 0.1983, + "step": 44320 + }, + { + "epoch": 267.65, + "learning_rate": 9.002192685952385e-05, + "loss": 0.2062, + "step": 44340 + }, + { + "epoch": 267.77, + "learning_rate": 8.999500906563658e-05, + "loss": 0.2132, + "step": 44360 + }, + { + "epoch": 267.89, + "learning_rate": 8.996805904849292e-05, + "loss": 0.2181, + "step": 44380 + }, + { + "epoch": 268.01, + "learning_rate": 8.9941076829806e-05, + "loss": 0.224, + "step": 44400 + }, + { + "epoch": 268.13, + "learning_rate": 8.991406243131489e-05, + "loss": 0.1613, + "step": 44420 + }, + { + "epoch": 268.25, + "learning_rate": 8.98870158747846e-05, + "loss": 0.1759, + "step": 44440 + }, + { + "epoch": 268.37, + "learning_rate": 8.985993718200608e-05, + "loss": 0.1872, + "step": 44460 + }, + { + "epoch": 268.49, + "learning_rate": 8.983282637479614e-05, + "loss": 0.1948, + "step": 44480 + }, + { + "epoch": 268.61, + "learning_rate": 8.980568347499744e-05, + "loss": 0.2063, + "step": 44500 + }, + { + "epoch": 268.73, + "learning_rate": 8.977850850447855e-05, + "loss": 0.2108, + "step": 44520 + }, + { + "epoch": 268.85, + "learning_rate": 8.975130148513384e-05, + "loss": 0.2153, + "step": 44540 + }, + { + "epoch": 268.97, + "learning_rate": 8.972406243888351e-05, + "loss": 0.2124, + "step": 44560 + }, + { + "epoch": 269.09, + "learning_rate": 8.969679138767359e-05, + "loss": 0.1699, + "step": 44580 + }, + { + "epoch": 269.22, + "learning_rate": 8.966948835347586e-05, + "loss": 0.1748, + "step": 44600 + }, + { + "epoch": 269.34, + "learning_rate": 8.964215335828787e-05, + "loss": 0.1807, + "step": 44620 + }, + { + "epoch": 269.46, + "learning_rate": 8.961478642413294e-05, + "loss": 0.1875, + "step": 44640 + }, + { + "epoch": 269.58, + "learning_rate": 8.958738757306013e-05, + "loss": 0.2025, + "step": 44660 + }, + { + "epoch": 269.7, + "learning_rate": 8.955995682714417e-05, + "loss": 0.2071, + "step": 44680 + }, + { + "epoch": 269.82, + "learning_rate": 8.953249420848555e-05, + "loss": 0.2106, + "step": 44700 + }, + { + "epoch": 269.94, + "learning_rate": 8.950499973921038e-05, + "loss": 0.2192, + "step": 44720 + }, + { + "epoch": 270.06, + "learning_rate": 8.947747344147049e-05, + "loss": 0.183, + "step": 44740 + }, + { + "epoch": 270.18, + "learning_rate": 8.944991533744327e-05, + "loss": 0.163, + "step": 44760 + }, + { + "epoch": 270.3, + "learning_rate": 8.942232544933182e-05, + "loss": 0.1736, + "step": 44780 + }, + { + "epoch": 270.42, + "learning_rate": 8.93947037993648e-05, + "loss": 0.1891, + "step": 44800 + }, + { + "epoch": 270.54, + "learning_rate": 8.93670504097965e-05, + "loss": 0.195, + "step": 44820 + }, + { + "epoch": 270.66, + "learning_rate": 8.933936530290672e-05, + "loss": 0.2006, + "step": 44840 + }, + { + "epoch": 270.78, + "learning_rate": 8.931164850100086e-05, + "loss": 0.21, + "step": 44860 + }, + { + "epoch": 270.91, + "learning_rate": 8.928390002640989e-05, + "loss": 0.2132, + "step": 44880 + }, + { + "epoch": 271.03, + "learning_rate": 8.925611990149021e-05, + "loss": 0.1937, + "step": 44900 + }, + { + "epoch": 271.15, + "learning_rate": 8.922830814862377e-05, + "loss": 0.1565, + "step": 44920 + }, + { + "epoch": 271.27, + "learning_rate": 8.920046479021799e-05, + "loss": 0.1704, + "step": 44940 + }, + { + "epoch": 271.39, + "learning_rate": 8.91725898487058e-05, + "loss": 0.1823, + "step": 44960 + }, + { + "epoch": 271.51, + "learning_rate": 8.91446833465455e-05, + "loss": 0.1915, + "step": 44980 + }, + { + "epoch": 271.63, + "learning_rate": 8.911674530622088e-05, + "loss": 0.1976, + "step": 45000 + }, + { + "epoch": 271.63, + "eval_accuracy": 0.0012748071242998686, + "eval_loss": 18.4633846282959, + "eval_runtime": 18.1499, + "eval_samples_per_second": 112.397, + "eval_steps_per_second": 2.369, + "step": 45000 + }, + { + "epoch": 271.75, + "learning_rate": 8.908877575024107e-05, + "loss": 0.2002, + "step": 45020 + }, + { + "epoch": 271.87, + "learning_rate": 8.906077470114069e-05, + "loss": 0.2083, + "step": 45040 + }, + { + "epoch": 271.99, + "learning_rate": 8.903274218147963e-05, + "loss": 0.2086, + "step": 45060 + }, + { + "epoch": 272.11, + "learning_rate": 8.900467821384324e-05, + "loss": 0.1585, + "step": 45080 + }, + { + "epoch": 272.23, + "learning_rate": 8.897658282084211e-05, + "loss": 0.1718, + "step": 45100 + }, + { + "epoch": 272.35, + "learning_rate": 8.894845602511219e-05, + "loss": 0.1782, + "step": 45120 + }, + { + "epoch": 272.47, + "learning_rate": 8.892029784931476e-05, + "loss": 0.1915, + "step": 45140 + }, + { + "epoch": 272.6, + "learning_rate": 8.889210831613634e-05, + "loss": 0.1949, + "step": 45160 + }, + { + "epoch": 272.72, + "learning_rate": 8.886388744828872e-05, + "loss": 0.1967, + "step": 45180 + }, + { + "epoch": 272.84, + "learning_rate": 8.883563526850894e-05, + "loss": 0.1999, + "step": 45200 + }, + { + "epoch": 272.96, + "learning_rate": 8.88073517995593e-05, + "loss": 0.2079, + "step": 45220 + }, + { + "epoch": 273.08, + "learning_rate": 8.877903706422724e-05, + "loss": 0.1664, + "step": 45240 + }, + { + "epoch": 273.2, + "learning_rate": 8.875069108532546e-05, + "loss": 0.1624, + "step": 45260 + }, + { + "epoch": 273.32, + "learning_rate": 8.87223138856918e-05, + "loss": 0.1712, + "step": 45280 + }, + { + "epoch": 273.44, + "learning_rate": 8.869390548818923e-05, + "loss": 0.185, + "step": 45300 + }, + { + "epoch": 273.56, + "learning_rate": 8.866546591570592e-05, + "loss": 0.1908, + "step": 45320 + }, + { + "epoch": 273.68, + "learning_rate": 8.863699519115511e-05, + "loss": 0.199, + "step": 45340 + }, + { + "epoch": 273.8, + "learning_rate": 8.860849333747514e-05, + "loss": 0.199, + "step": 45360 + }, + { + "epoch": 273.92, + "learning_rate": 8.857996037762944e-05, + "loss": 0.2075, + "step": 45380 + }, + { + "epoch": 274.04, + "learning_rate": 8.855139633460649e-05, + "loss": 0.1818, + "step": 45400 + }, + { + "epoch": 274.16, + "learning_rate": 8.852280123141984e-05, + "loss": 0.1547, + "step": 45420 + }, + { + "epoch": 274.29, + "learning_rate": 8.849417509110805e-05, + "loss": 0.1717, + "step": 45440 + }, + { + "epoch": 274.41, + "learning_rate": 8.846551793673467e-05, + "loss": 0.1772, + "step": 45460 + }, + { + "epoch": 274.53, + "learning_rate": 8.843682979138825e-05, + "loss": 0.1837, + "step": 45480 + }, + { + "epoch": 274.65, + "learning_rate": 8.840811067818233e-05, + "loss": 0.1893, + "step": 45500 + }, + { + "epoch": 274.77, + "learning_rate": 8.837936062025538e-05, + "loss": 0.1939, + "step": 45520 + }, + { + "epoch": 274.89, + "learning_rate": 8.835057964077079e-05, + "loss": 0.1976, + "step": 45540 + }, + { + "epoch": 275.01, + "learning_rate": 8.832176776291688e-05, + "loss": 0.1941, + "step": 45560 + }, + { + "epoch": 275.13, + "learning_rate": 8.829292500990683e-05, + "loss": 0.1437, + "step": 45580 + }, + { + "epoch": 275.25, + "learning_rate": 8.826405140497878e-05, + "loss": 0.1559, + "step": 45600 + }, + { + "epoch": 275.37, + "learning_rate": 8.823514697139564e-05, + "loss": 0.166, + "step": 45620 + }, + { + "epoch": 275.49, + "learning_rate": 8.820621173244519e-05, + "loss": 0.1785, + "step": 45640 + }, + { + "epoch": 275.61, + "learning_rate": 8.817724571144004e-05, + "loss": 0.1846, + "step": 45660 + }, + { + "epoch": 275.73, + "learning_rate": 8.814824893171758e-05, + "loss": 0.1921, + "step": 45680 + }, + { + "epoch": 275.86, + "learning_rate": 8.811922141664e-05, + "loss": 0.1994, + "step": 45700 + }, + { + "epoch": 275.98, + "learning_rate": 8.809016318959424e-05, + "loss": 0.2001, + "step": 45720 + }, + { + "epoch": 276.1, + "learning_rate": 8.806107427399197e-05, + "loss": 0.1521, + "step": 45740 + }, + { + "epoch": 276.22, + "learning_rate": 8.803195469326964e-05, + "loss": 0.1565, + "step": 45760 + }, + { + "epoch": 276.34, + "learning_rate": 8.800280447088836e-05, + "loss": 0.1699, + "step": 45780 + }, + { + "epoch": 276.46, + "learning_rate": 8.797362363033392e-05, + "loss": 0.1753, + "step": 45800 + }, + { + "epoch": 276.58, + "learning_rate": 8.794441219511681e-05, + "loss": 0.1801, + "step": 45820 + }, + { + "epoch": 276.7, + "learning_rate": 8.791517018877216e-05, + "loss": 0.1878, + "step": 45840 + }, + { + "epoch": 276.82, + "learning_rate": 8.78858976348597e-05, + "loss": 0.191, + "step": 45860 + }, + { + "epoch": 276.94, + "learning_rate": 8.785659455696384e-05, + "loss": 0.1946, + "step": 45880 + }, + { + "epoch": 277.06, + "learning_rate": 8.782726097869349e-05, + "loss": 0.1662, + "step": 45900 + }, + { + "epoch": 277.18, + "learning_rate": 8.779789692368223e-05, + "loss": 0.1548, + "step": 45920 + }, + { + "epoch": 277.3, + "learning_rate": 8.776850241558814e-05, + "loss": 0.1647, + "step": 45940 + }, + { + "epoch": 277.42, + "learning_rate": 8.773907747809383e-05, + "loss": 0.1762, + "step": 45960 + }, + { + "epoch": 277.55, + "learning_rate": 8.770962213490643e-05, + "loss": 0.1846, + "step": 45980 + }, + { + "epoch": 277.67, + "learning_rate": 8.768013640975761e-05, + "loss": 0.1865, + "step": 46000 + }, + { + "epoch": 277.67, + "eval_accuracy": 0.0012427023342651954, + "eval_loss": 18.721508026123047, + "eval_runtime": 18.1526, + "eval_samples_per_second": 112.381, + "eval_steps_per_second": 2.369, + "step": 46000 + }, + { + "epoch": 277.79, + "learning_rate": 8.765062032640346e-05, + "loss": 0.1917, + "step": 46020 + }, + { + "epoch": 277.91, + "learning_rate": 8.762107390862455e-05, + "loss": 0.1932, + "step": 46040 + }, + { + "epoch": 278.03, + "learning_rate": 8.759149718022594e-05, + "loss": 0.1826, + "step": 46060 + }, + { + "epoch": 278.15, + "learning_rate": 8.756189016503702e-05, + "loss": 0.149, + "step": 46080 + }, + { + "epoch": 278.27, + "learning_rate": 8.753225288691165e-05, + "loss": 0.1608, + "step": 46100 + }, + { + "epoch": 278.39, + "learning_rate": 8.750258536972804e-05, + "loss": 0.1662, + "step": 46120 + }, + { + "epoch": 278.51, + "learning_rate": 8.747288763738877e-05, + "loss": 0.175, + "step": 46140 + }, + { + "epoch": 278.63, + "learning_rate": 8.744315971382078e-05, + "loss": 0.1816, + "step": 46160 + }, + { + "epoch": 278.75, + "learning_rate": 8.741340162297531e-05, + "loss": 0.1871, + "step": 46180 + }, + { + "epoch": 278.87, + "learning_rate": 8.738361338882792e-05, + "loss": 0.1913, + "step": 46200 + }, + { + "epoch": 278.99, + "learning_rate": 8.735379503537844e-05, + "loss": 0.1944, + "step": 46220 + }, + { + "epoch": 279.11, + "learning_rate": 8.732394658665101e-05, + "loss": 0.1461, + "step": 46240 + }, + { + "epoch": 279.24, + "learning_rate": 8.729406806669396e-05, + "loss": 0.1549, + "step": 46260 + }, + { + "epoch": 279.36, + "learning_rate": 8.726415949957987e-05, + "loss": 0.1621, + "step": 46280 + }, + { + "epoch": 279.48, + "learning_rate": 8.723422090940555e-05, + "loss": 0.1694, + "step": 46300 + }, + { + "epoch": 279.6, + "learning_rate": 8.720425232029198e-05, + "loss": 0.1767, + "step": 46320 + }, + { + "epoch": 279.72, + "learning_rate": 8.717425375638429e-05, + "loss": 0.1835, + "step": 46340 + }, + { + "epoch": 279.84, + "learning_rate": 8.714422524185181e-05, + "loss": 0.1895, + "step": 46360 + }, + { + "epoch": 279.96, + "learning_rate": 8.711416680088795e-05, + "loss": 0.1945, + "step": 46380 + }, + { + "epoch": 280.08, + "learning_rate": 8.708407845771027e-05, + "loss": 0.1564, + "step": 46400 + }, + { + "epoch": 280.2, + "learning_rate": 8.70539602365604e-05, + "loss": 0.1489, + "step": 46420 + }, + { + "epoch": 280.32, + "learning_rate": 8.702381216170403e-05, + "loss": 0.1611, + "step": 46440 + }, + { + "epoch": 280.44, + "learning_rate": 8.699363425743093e-05, + "loss": 0.1717, + "step": 46460 + }, + { + "epoch": 280.56, + "learning_rate": 8.69634265480549e-05, + "loss": 0.1738, + "step": 46480 + }, + { + "epoch": 280.68, + "learning_rate": 8.693318905791375e-05, + "loss": 0.1804, + "step": 46500 + }, + { + "epoch": 280.8, + "learning_rate": 8.690292181136924e-05, + "loss": 0.1828, + "step": 46520 + }, + { + "epoch": 280.93, + "learning_rate": 8.687262483280719e-05, + "loss": 0.193, + "step": 46540 + }, + { + "epoch": 281.05, + "learning_rate": 8.684229814663731e-05, + "loss": 0.173, + "step": 46560 + }, + { + "epoch": 281.17, + "learning_rate": 8.681194177729328e-05, + "loss": 0.1497, + "step": 46580 + }, + { + "epoch": 281.29, + "learning_rate": 8.678155574923265e-05, + "loss": 0.1582, + "step": 46600 + }, + { + "epoch": 281.41, + "learning_rate": 8.675114008693689e-05, + "loss": 0.1657, + "step": 46620 + }, + { + "epoch": 281.53, + "learning_rate": 8.672069481491141e-05, + "loss": 0.1739, + "step": 46640 + }, + { + "epoch": 281.65, + "learning_rate": 8.669021995768534e-05, + "loss": 0.1777, + "step": 46660 + }, + { + "epoch": 281.77, + "learning_rate": 8.665971553981175e-05, + "loss": 0.1774, + "step": 46680 + }, + { + "epoch": 281.89, + "learning_rate": 8.662918158586753e-05, + "loss": 0.1802, + "step": 46700 + }, + { + "epoch": 282.01, + "learning_rate": 8.65986181204533e-05, + "loss": 0.1809, + "step": 46720 + }, + { + "epoch": 282.13, + "learning_rate": 8.656802516819349e-05, + "loss": 0.14, + "step": 46740 + }, + { + "epoch": 282.25, + "learning_rate": 8.653740275373631e-05, + "loss": 0.153, + "step": 46760 + }, + { + "epoch": 282.37, + "learning_rate": 8.650675090175366e-05, + "loss": 0.1604, + "step": 46780 + }, + { + "epoch": 282.49, + "learning_rate": 8.647606963694122e-05, + "loss": 0.1675, + "step": 46800 + }, + { + "epoch": 282.62, + "learning_rate": 8.644535898401831e-05, + "loss": 0.168, + "step": 46820 + }, + { + "epoch": 282.74, + "learning_rate": 8.641461896772793e-05, + "loss": 0.1735, + "step": 46840 + }, + { + "epoch": 282.86, + "learning_rate": 8.638384961283679e-05, + "loss": 0.1833, + "step": 46860 + }, + { + "epoch": 282.98, + "learning_rate": 8.63530509441352e-05, + "loss": 0.1921, + "step": 46880 + }, + { + "epoch": 283.1, + "learning_rate": 8.632222298643706e-05, + "loss": 0.1446, + "step": 46900 + }, + { + "epoch": 283.22, + "learning_rate": 8.629136576457991e-05, + "loss": 0.1552, + "step": 46920 + }, + { + "epoch": 283.34, + "learning_rate": 8.626047930342488e-05, + "loss": 0.1601, + "step": 46940 + }, + { + "epoch": 283.46, + "learning_rate": 8.622956362785662e-05, + "loss": 0.1659, + "step": 46960 + }, + { + "epoch": 283.58, + "learning_rate": 8.619861876278332e-05, + "loss": 0.1737, + "step": 46980 + }, + { + "epoch": 283.7, + "learning_rate": 8.616764473313671e-05, + "loss": 0.1769, + "step": 47000 + }, + { + "epoch": 283.7, + "eval_accuracy": 0.0012644348075194357, + "eval_loss": 18.946748733520508, + "eval_runtime": 18.1402, + "eval_samples_per_second": 112.457, + "eval_steps_per_second": 2.37, + "step": 47000 + }, + { + "epoch": 283.82, + "learning_rate": 8.6136641563872e-05, + "loss": 0.1784, + "step": 47020 + }, + { + "epoch": 283.94, + "learning_rate": 8.610560927996788e-05, + "loss": 0.1857, + "step": 47040 + }, + { + "epoch": 284.06, + "learning_rate": 8.607610166559625e-05, + "loss": 0.1554, + "step": 47060 + }, + { + "epoch": 284.19, + "learning_rate": 8.60450126800791e-05, + "loss": 0.1436, + "step": 47080 + }, + { + "epoch": 284.31, + "learning_rate": 8.601389465374627e-05, + "loss": 0.1576, + "step": 47100 + }, + { + "epoch": 284.43, + "learning_rate": 8.598274761166903e-05, + "loss": 0.1629, + "step": 47120 + }, + { + "epoch": 284.55, + "learning_rate": 8.595157157894194e-05, + "loss": 0.1713, + "step": 47140 + }, + { + "epoch": 284.67, + "learning_rate": 8.592036658068301e-05, + "loss": 0.1786, + "step": 47160 + }, + { + "epoch": 284.79, + "learning_rate": 8.588913264203351e-05, + "loss": 0.1788, + "step": 47180 + }, + { + "epoch": 284.91, + "learning_rate": 8.585786978815808e-05, + "loss": 0.1832, + "step": 47200 + }, + { + "epoch": 285.03, + "learning_rate": 8.582657804424465e-05, + "loss": 0.1693, + "step": 47220 + }, + { + "epoch": 285.15, + "learning_rate": 8.579525743550438e-05, + "loss": 0.1376, + "step": 47240 + }, + { + "epoch": 285.27, + "learning_rate": 8.576390798717174e-05, + "loss": 0.1543, + "step": 47260 + }, + { + "epoch": 285.39, + "learning_rate": 8.57325297245044e-05, + "loss": 0.1552, + "step": 47280 + }, + { + "epoch": 285.51, + "learning_rate": 8.570112267278328e-05, + "loss": 0.1635, + "step": 47300 + }, + { + "epoch": 285.63, + "learning_rate": 8.566968685731245e-05, + "loss": 0.1733, + "step": 47320 + }, + { + "epoch": 285.75, + "learning_rate": 8.563822230341919e-05, + "loss": 0.1781, + "step": 47340 + }, + { + "epoch": 285.88, + "learning_rate": 8.560672903645392e-05, + "loss": 0.1807, + "step": 47360 + }, + { + "epoch": 286.0, + "learning_rate": 8.557520708179021e-05, + "loss": 0.1837, + "step": 47380 + }, + { + "epoch": 286.12, + "learning_rate": 8.554365646482472e-05, + "loss": 0.1383, + "step": 47400 + }, + { + "epoch": 286.24, + "learning_rate": 8.551207721097721e-05, + "loss": 0.1507, + "step": 47420 + }, + { + "epoch": 286.36, + "learning_rate": 8.548046934569051e-05, + "loss": 0.1589, + "step": 47440 + }, + { + "epoch": 286.48, + "learning_rate": 8.544883289443053e-05, + "loss": 0.1627, + "step": 47460 + }, + { + "epoch": 286.6, + "learning_rate": 8.541716788268617e-05, + "loss": 0.1676, + "step": 47480 + }, + { + "epoch": 286.72, + "learning_rate": 8.538547433596933e-05, + "loss": 0.1768, + "step": 47500 + }, + { + "epoch": 286.84, + "learning_rate": 8.535375227981497e-05, + "loss": 0.1743, + "step": 47520 + }, + { + "epoch": 286.96, + "learning_rate": 8.532200173978097e-05, + "loss": 0.178, + "step": 47540 + }, + { + "epoch": 287.08, + "learning_rate": 8.529022274144816e-05, + "loss": 0.1468, + "step": 47560 + }, + { + "epoch": 287.2, + "learning_rate": 8.525841531042031e-05, + "loss": 0.1407, + "step": 47580 + }, + { + "epoch": 287.32, + "learning_rate": 8.522657947232407e-05, + "loss": 0.1505, + "step": 47600 + }, + { + "epoch": 287.44, + "learning_rate": 8.519471525280903e-05, + "loss": 0.1569, + "step": 47620 + }, + { + "epoch": 287.57, + "learning_rate": 8.516282267754761e-05, + "loss": 0.1657, + "step": 47640 + }, + { + "epoch": 287.69, + "learning_rate": 8.513090177223506e-05, + "loss": 0.1651, + "step": 47660 + }, + { + "epoch": 287.81, + "learning_rate": 8.509895256258948e-05, + "loss": 0.169, + "step": 47680 + }, + { + "epoch": 287.93, + "learning_rate": 8.506697507435182e-05, + "loss": 0.1762, + "step": 47700 + }, + { + "epoch": 288.05, + "learning_rate": 8.50349693332857e-05, + "loss": 0.1584, + "step": 47720 + }, + { + "epoch": 288.17, + "learning_rate": 8.50029353651776e-05, + "loss": 0.1408, + "step": 47740 + }, + { + "epoch": 288.29, + "learning_rate": 8.497087319583672e-05, + "loss": 0.1439, + "step": 47760 + }, + { + "epoch": 288.41, + "learning_rate": 8.493878285109495e-05, + "loss": 0.1497, + "step": 47780 + }, + { + "epoch": 288.53, + "learning_rate": 8.49066643568069e-05, + "loss": 0.1571, + "step": 47800 + }, + { + "epoch": 288.65, + "learning_rate": 8.487451773884987e-05, + "loss": 0.1613, + "step": 47820 + }, + { + "epoch": 288.77, + "learning_rate": 8.484234302312382e-05, + "loss": 0.1688, + "step": 47840 + }, + { + "epoch": 288.89, + "learning_rate": 8.48101402355513e-05, + "loss": 0.174, + "step": 47860 + }, + { + "epoch": 289.01, + "learning_rate": 8.477790940207756e-05, + "loss": 0.1721, + "step": 47880 + }, + { + "epoch": 289.13, + "learning_rate": 8.474565054867037e-05, + "loss": 0.134, + "step": 47900 + }, + { + "epoch": 289.26, + "learning_rate": 8.471336370132012e-05, + "loss": 0.1472, + "step": 47920 + }, + { + "epoch": 289.38, + "learning_rate": 8.468104888603973e-05, + "loss": 0.1517, + "step": 47940 + }, + { + "epoch": 289.5, + "learning_rate": 8.464870612886467e-05, + "loss": 0.1545, + "step": 47960 + }, + { + "epoch": 289.62, + "learning_rate": 8.46163354558529e-05, + "loss": 0.1612, + "step": 47980 + }, + { + "epoch": 289.74, + "learning_rate": 8.458393689308491e-05, + "loss": 0.1649, + "step": 48000 + }, + { + "epoch": 289.74, + "eval_accuracy": 0.0012802402426134287, + "eval_loss": 19.142297744750977, + "eval_runtime": 18.2303, + "eval_samples_per_second": 111.902, + "eval_steps_per_second": 2.359, + "step": 48000 + }, + { + "epoch": 289.86, + "learning_rate": 8.455151046666363e-05, + "loss": 0.1753, + "step": 48020 + }, + { + "epoch": 289.98, + "learning_rate": 8.451905620271443e-05, + "loss": 0.1782, + "step": 48040 + }, + { + "epoch": 290.1, + "learning_rate": 8.448657412738515e-05, + "loss": 0.1353, + "step": 48060 + }, + { + "epoch": 290.22, + "learning_rate": 8.445406426684598e-05, + "loss": 0.1392, + "step": 48080 + }, + { + "epoch": 290.34, + "learning_rate": 8.442152664728958e-05, + "loss": 0.1525, + "step": 48100 + }, + { + "epoch": 290.46, + "learning_rate": 8.438896129493086e-05, + "loss": 0.1546, + "step": 48120 + }, + { + "epoch": 290.58, + "learning_rate": 8.435636823600716e-05, + "loss": 0.1624, + "step": 48140 + }, + { + "epoch": 290.7, + "learning_rate": 8.432374749677814e-05, + "loss": 0.1696, + "step": 48160 + }, + { + "epoch": 290.82, + "learning_rate": 8.42910991035257e-05, + "loss": 0.1724, + "step": 48180 + }, + { + "epoch": 290.95, + "learning_rate": 8.425842308255412e-05, + "loss": 0.1678, + "step": 48200 + }, + { + "epoch": 291.07, + "learning_rate": 8.422571946018982e-05, + "loss": 0.1391, + "step": 48220 + }, + { + "epoch": 291.19, + "learning_rate": 8.419298826278154e-05, + "loss": 0.1272, + "step": 48240 + }, + { + "epoch": 291.31, + "learning_rate": 8.416022951670022e-05, + "loss": 0.1432, + "step": 48260 + }, + { + "epoch": 291.43, + "learning_rate": 8.412744324833898e-05, + "loss": 0.1465, + "step": 48280 + }, + { + "epoch": 291.55, + "learning_rate": 8.409462948411315e-05, + "loss": 0.1572, + "step": 48300 + }, + { + "epoch": 291.67, + "learning_rate": 8.406178825046015e-05, + "loss": 0.1593, + "step": 48320 + }, + { + "epoch": 291.79, + "learning_rate": 8.402891957383959e-05, + "loss": 0.1619, + "step": 48340 + }, + { + "epoch": 291.91, + "learning_rate": 8.399602348073316e-05, + "loss": 0.1699, + "step": 48360 + }, + { + "epoch": 292.03, + "learning_rate": 8.396309999764467e-05, + "loss": 0.1575, + "step": 48380 + }, + { + "epoch": 292.15, + "learning_rate": 8.393014915109995e-05, + "loss": 0.1269, + "step": 48400 + }, + { + "epoch": 292.27, + "learning_rate": 8.389717096764691e-05, + "loss": 0.1392, + "step": 48420 + }, + { + "epoch": 292.39, + "learning_rate": 8.386416547385547e-05, + "loss": 0.1476, + "step": 48440 + }, + { + "epoch": 292.52, + "learning_rate": 8.383113269631757e-05, + "loss": 0.1546, + "step": 48460 + }, + { + "epoch": 292.64, + "learning_rate": 8.379807266164714e-05, + "loss": 0.1575, + "step": 48480 + }, + { + "epoch": 292.76, + "learning_rate": 8.376498539648001e-05, + "loss": 0.1614, + "step": 48500 + }, + { + "epoch": 292.88, + "learning_rate": 8.373187092747403e-05, + "loss": 0.1648, + "step": 48520 + }, + { + "epoch": 293.0, + "learning_rate": 8.369872928130891e-05, + "loss": 0.1679, + "step": 48540 + }, + { + "epoch": 293.12, + "learning_rate": 8.366556048468628e-05, + "loss": 0.1246, + "step": 48560 + }, + { + "epoch": 293.24, + "learning_rate": 8.363236456432964e-05, + "loss": 0.1359, + "step": 48580 + }, + { + "epoch": 293.36, + "learning_rate": 8.359914154698434e-05, + "loss": 0.1425, + "step": 48600 + }, + { + "epoch": 293.48, + "learning_rate": 8.356589145941757e-05, + "loss": 0.1506, + "step": 48620 + }, + { + "epoch": 293.6, + "learning_rate": 8.353261432841832e-05, + "loss": 0.1561, + "step": 48640 + }, + { + "epoch": 293.72, + "learning_rate": 8.34993101807974e-05, + "loss": 0.1618, + "step": 48660 + }, + { + "epoch": 293.84, + "learning_rate": 8.346597904338731e-05, + "loss": 0.1633, + "step": 48680 + }, + { + "epoch": 293.96, + "learning_rate": 8.343262094304238e-05, + "loss": 0.1679, + "step": 48700 + }, + { + "epoch": 294.08, + "learning_rate": 8.339923590663863e-05, + "loss": 0.1352, + "step": 48720 + }, + { + "epoch": 294.21, + "learning_rate": 8.336582396107378e-05, + "loss": 0.1327, + "step": 48740 + }, + { + "epoch": 294.33, + "learning_rate": 8.33323851332672e-05, + "loss": 0.1416, + "step": 48760 + }, + { + "epoch": 294.45, + "learning_rate": 8.329891945015998e-05, + "loss": 0.1473, + "step": 48780 + }, + { + "epoch": 294.57, + "learning_rate": 8.326542693871482e-05, + "loss": 0.1527, + "step": 48800 + }, + { + "epoch": 294.69, + "learning_rate": 8.323190762591601e-05, + "loss": 0.156, + "step": 48820 + }, + { + "epoch": 294.81, + "learning_rate": 8.319836153876947e-05, + "loss": 0.1582, + "step": 48840 + }, + { + "epoch": 294.93, + "learning_rate": 8.316478870430269e-05, + "loss": 0.1618, + "step": 48860 + }, + { + "epoch": 295.05, + "learning_rate": 8.313118914956466e-05, + "loss": 0.145, + "step": 48880 + }, + { + "epoch": 295.17, + "learning_rate": 8.309756290162595e-05, + "loss": 0.1271, + "step": 48900 + }, + { + "epoch": 295.29, + "learning_rate": 8.306390998757863e-05, + "loss": 0.1355, + "step": 48920 + }, + { + "epoch": 295.41, + "learning_rate": 8.303023043453624e-05, + "loss": 0.1441, + "step": 48940 + }, + { + "epoch": 295.53, + "learning_rate": 8.299652426963379e-05, + "loss": 0.1485, + "step": 48960 + }, + { + "epoch": 295.65, + "learning_rate": 8.296279152002771e-05, + "loss": 0.1496, + "step": 48980 + }, + { + "epoch": 295.77, + "learning_rate": 8.29290322128959e-05, + "loss": 0.1517, + "step": 49000 + }, + { + "epoch": 295.77, + "eval_accuracy": 0.0012590016892058756, + "eval_loss": 19.363754272460938, + "eval_runtime": 18.183, + "eval_samples_per_second": 112.193, + "eval_steps_per_second": 2.365, + "step": 49000 + }, + { + "epoch": 295.9, + "learning_rate": 8.28952463754376e-05, + "loss": 0.1588, + "step": 49020 + }, + { + "epoch": 296.02, + "learning_rate": 8.286143403487345e-05, + "loss": 0.1548, + "step": 49040 + }, + { + "epoch": 296.14, + "learning_rate": 8.282759521844545e-05, + "loss": 0.1199, + "step": 49060 + }, + { + "epoch": 296.26, + "learning_rate": 8.27954238444015e-05, + "loss": 0.1325, + "step": 49080 + }, + { + "epoch": 296.38, + "learning_rate": 8.276153347847452e-05, + "loss": 0.1418, + "step": 49100 + }, + { + "epoch": 296.5, + "learning_rate": 8.272761671717178e-05, + "loss": 0.1464, + "step": 49120 + }, + { + "epoch": 296.62, + "learning_rate": 8.269367358781937e-05, + "loss": 0.1504, + "step": 49140 + }, + { + "epoch": 296.74, + "learning_rate": 8.265970411776467e-05, + "loss": 0.1542, + "step": 49160 + }, + { + "epoch": 296.86, + "learning_rate": 8.262570833437626e-05, + "loss": 0.1546, + "step": 49180 + }, + { + "epoch": 296.98, + "learning_rate": 8.259168626504395e-05, + "loss": 0.1581, + "step": 49200 + }, + { + "epoch": 297.1, + "learning_rate": 8.255763793717868e-05, + "loss": 0.1237, + "step": 49220 + }, + { + "epoch": 297.22, + "learning_rate": 8.25235633782126e-05, + "loss": 0.131, + "step": 49240 + }, + { + "epoch": 297.34, + "learning_rate": 8.248946261559893e-05, + "loss": 0.1359, + "step": 49260 + }, + { + "epoch": 297.46, + "learning_rate": 8.245533567681208e-05, + "loss": 0.1455, + "step": 49280 + }, + { + "epoch": 297.59, + "learning_rate": 8.24211825893475e-05, + "loss": 0.1492, + "step": 49300 + }, + { + "epoch": 297.71, + "learning_rate": 8.238700338072167e-05, + "loss": 0.1528, + "step": 49320 + }, + { + "epoch": 297.83, + "learning_rate": 8.235279807847223e-05, + "loss": 0.1536, + "step": 49340 + }, + { + "epoch": 297.95, + "learning_rate": 8.231856671015772e-05, + "loss": 0.1589, + "step": 49360 + }, + { + "epoch": 298.07, + "learning_rate": 8.228430930335775e-05, + "loss": 0.1374, + "step": 49380 + }, + { + "epoch": 298.19, + "learning_rate": 8.225002588567291e-05, + "loss": 0.1271, + "step": 49400 + }, + { + "epoch": 298.31, + "learning_rate": 8.221571648472472e-05, + "loss": 0.132, + "step": 49420 + }, + { + "epoch": 298.43, + "learning_rate": 8.218138112815564e-05, + "loss": 0.141, + "step": 49440 + }, + { + "epoch": 298.55, + "learning_rate": 8.214701984362906e-05, + "loss": 0.1447, + "step": 49460 + }, + { + "epoch": 298.67, + "learning_rate": 8.211263265882923e-05, + "loss": 0.1473, + "step": 49480 + }, + { + "epoch": 298.79, + "learning_rate": 8.207821960146128e-05, + "loss": 0.1529, + "step": 49500 + }, + { + "epoch": 298.91, + "learning_rate": 8.20437806992512e-05, + "loss": 0.1556, + "step": 49520 + }, + { + "epoch": 299.03, + "learning_rate": 8.200931597994582e-05, + "loss": 0.1437, + "step": 49540 + }, + { + "epoch": 299.15, + "learning_rate": 8.197482547131268e-05, + "loss": 0.1192, + "step": 49560 + }, + { + "epoch": 299.28, + "learning_rate": 8.194030920114021e-05, + "loss": 0.1297, + "step": 49580 + }, + { + "epoch": 299.4, + "learning_rate": 8.190576719723752e-05, + "loss": 0.1367, + "step": 49600 + }, + { + "epoch": 299.52, + "learning_rate": 8.18711994874345e-05, + "loss": 0.1429, + "step": 49620 + }, + { + "epoch": 299.64, + "learning_rate": 8.183660609958169e-05, + "loss": 0.1467, + "step": 49640 + }, + { + "epoch": 299.76, + "learning_rate": 8.18019870615504e-05, + "loss": 0.1531, + "step": 49660 + }, + { + "epoch": 299.88, + "learning_rate": 8.176734240123255e-05, + "loss": 0.1564, + "step": 49680 + }, + { + "epoch": 300.0, + "learning_rate": 8.17326721465407e-05, + "loss": 0.1527, + "step": 49700 + }, + { + "epoch": 300.12, + "learning_rate": 8.16979763254081e-05, + "loss": 0.1109, + "step": 49720 + }, + { + "epoch": 300.24, + "learning_rate": 8.166325496578847e-05, + "loss": 0.1241, + "step": 49740 + }, + { + "epoch": 300.36, + "learning_rate": 8.162850809565623e-05, + "loss": 0.1373, + "step": 49760 + }, + { + "epoch": 300.48, + "learning_rate": 8.159373574300629e-05, + "loss": 0.141, + "step": 49780 + }, + { + "epoch": 300.6, + "learning_rate": 8.155893793585413e-05, + "loss": 0.1433, + "step": 49800 + }, + { + "epoch": 300.72, + "learning_rate": 8.152411470223569e-05, + "loss": 0.1458, + "step": 49820 + }, + { + "epoch": 300.85, + "learning_rate": 8.148926607020743e-05, + "loss": 0.1525, + "step": 49840 + }, + { + "epoch": 300.97, + "learning_rate": 8.145439206784626e-05, + "loss": 0.1573, + "step": 49860 + }, + { + "epoch": 301.09, + "learning_rate": 8.141949272324953e-05, + "loss": 0.1243, + "step": 49880 + }, + { + "epoch": 301.21, + "learning_rate": 8.138456806453503e-05, + "loss": 0.1229, + "step": 49900 + }, + { + "epoch": 301.33, + "learning_rate": 8.13496181198409e-05, + "loss": 0.1313, + "step": 49920 + }, + { + "epoch": 301.45, + "learning_rate": 8.131464291732572e-05, + "loss": 0.1363, + "step": 49940 + }, + { + "epoch": 301.57, + "learning_rate": 8.127964248516832e-05, + "loss": 0.1392, + "step": 49960 + }, + { + "epoch": 301.69, + "learning_rate": 8.124461685156795e-05, + "loss": 0.1404, + "step": 49980 + }, + { + "epoch": 301.81, + "learning_rate": 8.120956604474415e-05, + "loss": 0.1491, + "step": 50000 + }, + { + "epoch": 301.81, + "eval_accuracy": 0.0012639408876727485, + "eval_loss": 19.587871551513672, + "eval_runtime": 18.2116, + "eval_samples_per_second": 112.016, + "eval_steps_per_second": 2.361, + "step": 50000 + }, + { + "epoch": 301.93, + "learning_rate": 8.117449009293668e-05, + "loss": 0.1501, + "step": 50020 + }, + { + "epoch": 302.05, + "learning_rate": 8.113938902440564e-05, + "loss": 0.1327, + "step": 50040 + }, + { + "epoch": 302.17, + "learning_rate": 8.110426286743129e-05, + "loss": 0.1179, + "step": 50060 + }, + { + "epoch": 302.29, + "learning_rate": 8.106911165031415e-05, + "loss": 0.125, + "step": 50080 + }, + { + "epoch": 302.41, + "learning_rate": 8.103393540137496e-05, + "loss": 0.1345, + "step": 50100 + }, + { + "epoch": 302.54, + "learning_rate": 8.099873414895453e-05, + "loss": 0.1386, + "step": 50120 + }, + { + "epoch": 302.66, + "learning_rate": 8.096350792141392e-05, + "loss": 0.1422, + "step": 50140 + }, + { + "epoch": 302.78, + "learning_rate": 8.092825674713425e-05, + "loss": 0.1463, + "step": 50160 + }, + { + "epoch": 302.9, + "learning_rate": 8.089298065451672e-05, + "loss": 0.1481, + "step": 50180 + }, + { + "epoch": 303.02, + "learning_rate": 8.085767967198269e-05, + "loss": 0.1403, + "step": 50200 + }, + { + "epoch": 303.14, + "learning_rate": 8.082235382797349e-05, + "loss": 0.11, + "step": 50220 + }, + { + "epoch": 303.26, + "learning_rate": 8.078700315095055e-05, + "loss": 0.1223, + "step": 50240 + }, + { + "epoch": 303.38, + "learning_rate": 8.075162766939526e-05, + "loss": 0.1302, + "step": 50260 + }, + { + "epoch": 303.5, + "learning_rate": 8.071622741180898e-05, + "loss": 0.1352, + "step": 50280 + }, + { + "epoch": 303.62, + "learning_rate": 8.068080240671308e-05, + "loss": 0.1387, + "step": 50300 + }, + { + "epoch": 303.74, + "learning_rate": 8.064535268264883e-05, + "loss": 0.1357, + "step": 50320 + }, + { + "epoch": 303.86, + "learning_rate": 8.060987826817745e-05, + "loss": 0.1428, + "step": 50340 + }, + { + "epoch": 303.98, + "learning_rate": 8.057437919188005e-05, + "loss": 0.1516, + "step": 50360 + }, + { + "epoch": 304.1, + "learning_rate": 8.053885548235755e-05, + "loss": 0.1121, + "step": 50380 + }, + { + "epoch": 304.23, + "learning_rate": 8.05033071682308e-05, + "loss": 0.1177, + "step": 50400 + }, + { + "epoch": 304.35, + "learning_rate": 8.046773427814042e-05, + "loss": 0.1241, + "step": 50420 + }, + { + "epoch": 304.47, + "learning_rate": 8.043213684074684e-05, + "loss": 0.1314, + "step": 50440 + }, + { + "epoch": 304.59, + "learning_rate": 8.039651488473028e-05, + "loss": 0.1347, + "step": 50460 + }, + { + "epoch": 304.71, + "learning_rate": 8.03608684387907e-05, + "loss": 0.1367, + "step": 50480 + }, + { + "epoch": 304.83, + "learning_rate": 8.03251975316478e-05, + "loss": 0.1446, + "step": 50500 + }, + { + "epoch": 304.95, + "learning_rate": 8.0289502192041e-05, + "loss": 0.1429, + "step": 50520 + }, + { + "epoch": 305.07, + "learning_rate": 8.025378244872936e-05, + "loss": 0.1184, + "step": 50540 + }, + { + "epoch": 305.19, + "learning_rate": 8.021803833049166e-05, + "loss": 0.1131, + "step": 50560 + }, + { + "epoch": 305.31, + "learning_rate": 8.01822698661263e-05, + "loss": 0.1242, + "step": 50580 + }, + { + "epoch": 305.43, + "learning_rate": 8.014647708445124e-05, + "loss": 0.13, + "step": 50600 + }, + { + "epoch": 305.55, + "learning_rate": 8.011066001430412e-05, + "loss": 0.1324, + "step": 50620 + }, + { + "epoch": 305.67, + "learning_rate": 8.007481868454208e-05, + "loss": 0.1361, + "step": 50640 + }, + { + "epoch": 305.79, + "learning_rate": 8.003895312404183e-05, + "loss": 0.1416, + "step": 50660 + }, + { + "epoch": 305.92, + "learning_rate": 8.000306336169963e-05, + "loss": 0.1446, + "step": 50680 + }, + { + "epoch": 306.04, + "learning_rate": 7.99671494264312e-05, + "loss": 0.1321, + "step": 50700 + }, + { + "epoch": 306.16, + "learning_rate": 7.993121134717177e-05, + "loss": 0.1086, + "step": 50720 + }, + { + "epoch": 306.28, + "learning_rate": 7.989524915287595e-05, + "loss": 0.1181, + "step": 50740 + }, + { + "epoch": 306.4, + "learning_rate": 7.985926287251787e-05, + "loss": 0.1277, + "step": 50760 + }, + { + "epoch": 306.52, + "learning_rate": 7.982325253509102e-05, + "loss": 0.1331, + "step": 50780 + }, + { + "epoch": 306.64, + "learning_rate": 7.978721816960826e-05, + "loss": 0.1349, + "step": 50800 + }, + { + "epoch": 306.76, + "learning_rate": 7.975115980510187e-05, + "loss": 0.1399, + "step": 50820 + }, + { + "epoch": 306.88, + "learning_rate": 7.971507747062337e-05, + "loss": 0.1395, + "step": 50840 + }, + { + "epoch": 307.0, + "learning_rate": 7.967897119524368e-05, + "loss": 0.1431, + "step": 50860 + }, + { + "epoch": 307.12, + "learning_rate": 7.964284100805297e-05, + "loss": 0.1063, + "step": 50880 + }, + { + "epoch": 307.24, + "learning_rate": 7.960668693816067e-05, + "loss": 0.119, + "step": 50900 + }, + { + "epoch": 307.36, + "learning_rate": 7.957050901469545e-05, + "loss": 0.127, + "step": 50920 + }, + { + "epoch": 307.48, + "learning_rate": 7.953430726680524e-05, + "loss": 0.1279, + "step": 50940 + }, + { + "epoch": 307.61, + "learning_rate": 7.949808172365713e-05, + "loss": 0.1325, + "step": 50960 + }, + { + "epoch": 307.73, + "learning_rate": 7.946183241443736e-05, + "loss": 0.1374, + "step": 50980 + }, + { + "epoch": 307.85, + "learning_rate": 7.942555936835135e-05, + "loss": 0.1387, + "step": 51000 + }, + { + "epoch": 307.85, + "eval_accuracy": 0.001271843605219745, + "eval_loss": 19.7823429107666, + "eval_runtime": 18.1904, + "eval_samples_per_second": 112.147, + "eval_steps_per_second": 2.364, + "step": 51000 + }, + { + "epoch": 307.97, + "learning_rate": 7.938926261462366e-05, + "loss": 0.1433, + "step": 51020 + }, + { + "epoch": 308.09, + "learning_rate": 7.935294218249791e-05, + "loss": 0.1137, + "step": 51040 + }, + { + "epoch": 308.21, + "learning_rate": 7.931659810123683e-05, + "loss": 0.113, + "step": 51060 + }, + { + "epoch": 308.33, + "learning_rate": 7.928204934569717e-05, + "loss": 0.1231, + "step": 51080 + }, + { + "epoch": 308.45, + "learning_rate": 7.92456592328612e-05, + "loss": 0.1247, + "step": 51100 + }, + { + "epoch": 308.57, + "learning_rate": 7.920924555732582e-05, + "loss": 0.132, + "step": 51120 + }, + { + "epoch": 308.69, + "learning_rate": 7.917280834842888e-05, + "loss": 0.1351, + "step": 51140 + }, + { + "epoch": 308.81, + "learning_rate": 7.913634763552717e-05, + "loss": 0.1395, + "step": 51160 + }, + { + "epoch": 308.93, + "learning_rate": 7.909986344799645e-05, + "loss": 0.1372, + "step": 51180 + }, + { + "epoch": 309.05, + "learning_rate": 7.906335581523135e-05, + "loss": 0.1199, + "step": 51200 + }, + { + "epoch": 309.18, + "learning_rate": 7.90268247666454e-05, + "loss": 0.1085, + "step": 51220 + }, + { + "epoch": 309.3, + "learning_rate": 7.899027033167104e-05, + "loss": 0.1169, + "step": 51240 + }, + { + "epoch": 309.42, + "learning_rate": 7.895369253975951e-05, + "loss": 0.1223, + "step": 51260 + }, + { + "epoch": 309.54, + "learning_rate": 7.891709142038084e-05, + "loss": 0.129, + "step": 51280 + }, + { + "epoch": 309.66, + "learning_rate": 7.888046700302394e-05, + "loss": 0.1346, + "step": 51300 + }, + { + "epoch": 309.78, + "learning_rate": 7.884381931719642e-05, + "loss": 0.1356, + "step": 51320 + }, + { + "epoch": 309.9, + "learning_rate": 7.880714839242464e-05, + "loss": 0.1366, + "step": 51340 + }, + { + "epoch": 310.02, + "learning_rate": 7.877045425825374e-05, + "loss": 0.133, + "step": 51360 + }, + { + "epoch": 310.14, + "learning_rate": 7.873373694424751e-05, + "loss": 0.1043, + "step": 51380 + }, + { + "epoch": 310.26, + "learning_rate": 7.869699647998839e-05, + "loss": 0.1133, + "step": 51400 + }, + { + "epoch": 310.38, + "learning_rate": 7.866023289507754e-05, + "loss": 0.1199, + "step": 51420 + }, + { + "epoch": 310.5, + "learning_rate": 7.862344621913472e-05, + "loss": 0.1252, + "step": 51440 + }, + { + "epoch": 310.62, + "learning_rate": 7.858663648179828e-05, + "loss": 0.1313, + "step": 51460 + }, + { + "epoch": 310.74, + "learning_rate": 7.854980371272517e-05, + "loss": 0.1312, + "step": 51480 + }, + { + "epoch": 310.87, + "learning_rate": 7.851294794159087e-05, + "loss": 0.1364, + "step": 51500 + }, + { + "epoch": 310.99, + "learning_rate": 7.847606919808944e-05, + "loss": 0.1399, + "step": 51520 + }, + { + "epoch": 311.11, + "learning_rate": 7.843916751193336e-05, + "loss": 0.1065, + "step": 51540 + }, + { + "epoch": 311.23, + "learning_rate": 7.840224291285372e-05, + "loss": 0.1115, + "step": 51560 + }, + { + "epoch": 311.35, + "learning_rate": 7.836529543059995e-05, + "loss": 0.119, + "step": 51580 + }, + { + "epoch": 311.47, + "learning_rate": 7.832832509494001e-05, + "loss": 0.1225, + "step": 51600 + }, + { + "epoch": 311.59, + "learning_rate": 7.82913319356602e-05, + "loss": 0.1261, + "step": 51620 + }, + { + "epoch": 311.71, + "learning_rate": 7.825431598256525e-05, + "loss": 0.129, + "step": 51640 + }, + { + "epoch": 311.83, + "learning_rate": 7.821727726547825e-05, + "loss": 0.131, + "step": 51660 + }, + { + "epoch": 311.95, + "learning_rate": 7.818021581424064e-05, + "loss": 0.1364, + "step": 51680 + }, + { + "epoch": 312.07, + "learning_rate": 7.814313165871212e-05, + "loss": 0.1154, + "step": 51700 + }, + { + "epoch": 312.19, + "learning_rate": 7.810602482877075e-05, + "loss": 0.1043, + "step": 51720 + }, + { + "epoch": 312.31, + "learning_rate": 7.806889535431284e-05, + "loss": 0.1127, + "step": 51740 + }, + { + "epoch": 312.43, + "learning_rate": 7.803174326525293e-05, + "loss": 0.1205, + "step": 51760 + }, + { + "epoch": 312.56, + "learning_rate": 7.799456859152378e-05, + "loss": 0.1243, + "step": 51780 + }, + { + "epoch": 312.68, + "learning_rate": 7.795737136307636e-05, + "loss": 0.1318, + "step": 51800 + }, + { + "epoch": 312.8, + "learning_rate": 7.79201516098798e-05, + "loss": 0.1335, + "step": 51820 + }, + { + "epoch": 312.92, + "learning_rate": 7.78829093619214e-05, + "loss": 0.1371, + "step": 51840 + }, + { + "epoch": 313.04, + "learning_rate": 7.784564464920654e-05, + "loss": 0.1249, + "step": 51860 + }, + { + "epoch": 313.16, + "learning_rate": 7.780835750175874e-05, + "loss": 0.103, + "step": 51880 + }, + { + "epoch": 313.28, + "learning_rate": 7.777104794961957e-05, + "loss": 0.113, + "step": 51900 + }, + { + "epoch": 313.4, + "learning_rate": 7.773371602284869e-05, + "loss": 0.1204, + "step": 51920 + }, + { + "epoch": 313.52, + "learning_rate": 7.769636175152374e-05, + "loss": 0.1231, + "step": 51940 + }, + { + "epoch": 313.64, + "learning_rate": 7.765898516574038e-05, + "loss": 0.1306, + "step": 51960 + }, + { + "epoch": 313.76, + "learning_rate": 7.762158629561225e-05, + "loss": 0.1301, + "step": 51980 + }, + { + "epoch": 313.88, + "learning_rate": 7.758416517127094e-05, + "loss": 0.1332, + "step": 52000 + }, + { + "epoch": 313.88, + "eval_accuracy": 0.0012550503304323774, + "eval_loss": 19.966257095336914, + "eval_runtime": 18.2423, + "eval_samples_per_second": 111.828, + "eval_steps_per_second": 2.357, + "step": 52000 + }, + { + "epoch": 314.0, + "learning_rate": 7.7546721822866e-05, + "loss": 0.1348, + "step": 52020 + }, + { + "epoch": 314.12, + "learning_rate": 7.750925628056482e-05, + "loss": 0.0974, + "step": 52040 + }, + { + "epoch": 314.25, + "learning_rate": 7.747176857455275e-05, + "loss": 0.1108, + "step": 52060 + }, + { + "epoch": 314.37, + "learning_rate": 7.743425873503294e-05, + "loss": 0.118, + "step": 52080 + }, + { + "epoch": 314.49, + "learning_rate": 7.739672679222638e-05, + "loss": 0.1192, + "step": 52100 + }, + { + "epoch": 314.61, + "learning_rate": 7.735917277637189e-05, + "loss": 0.1247, + "step": 52120 + }, + { + "epoch": 314.73, + "learning_rate": 7.732159671772605e-05, + "loss": 0.1268, + "step": 52140 + }, + { + "epoch": 314.85, + "learning_rate": 7.728399864656324e-05, + "loss": 0.129, + "step": 52160 + }, + { + "epoch": 314.97, + "learning_rate": 7.724637859317551e-05, + "loss": 0.1327, + "step": 52180 + }, + { + "epoch": 315.09, + "learning_rate": 7.720873658787268e-05, + "loss": 0.1058, + "step": 52200 + }, + { + "epoch": 315.21, + "learning_rate": 7.717107266098225e-05, + "loss": 0.105, + "step": 52220 + }, + { + "epoch": 315.33, + "learning_rate": 7.713338684284932e-05, + "loss": 0.1141, + "step": 52240 + }, + { + "epoch": 315.45, + "learning_rate": 7.709567916383672e-05, + "loss": 0.1154, + "step": 52260 + }, + { + "epoch": 315.57, + "learning_rate": 7.705794965432481e-05, + "loss": 0.1197, + "step": 52280 + }, + { + "epoch": 315.69, + "learning_rate": 7.702019834471159e-05, + "loss": 0.1265, + "step": 52300 + }, + { + "epoch": 315.81, + "learning_rate": 7.698242526541262e-05, + "loss": 0.1258, + "step": 52320 + }, + { + "epoch": 315.94, + "learning_rate": 7.694463044686095e-05, + "loss": 0.128, + "step": 52340 + }, + { + "epoch": 316.06, + "learning_rate": 7.690681391950723e-05, + "loss": 0.1119, + "step": 52360 + }, + { + "epoch": 316.18, + "learning_rate": 7.686897571381952e-05, + "loss": 0.1039, + "step": 52380 + }, + { + "epoch": 316.3, + "learning_rate": 7.68311158602834e-05, + "loss": 0.1102, + "step": 52400 + }, + { + "epoch": 316.42, + "learning_rate": 7.679323438940184e-05, + "loss": 0.1139, + "step": 52420 + }, + { + "epoch": 316.54, + "learning_rate": 7.67553313316953e-05, + "loss": 0.1171, + "step": 52440 + }, + { + "epoch": 316.66, + "learning_rate": 7.671740671770153e-05, + "loss": 0.1205, + "step": 52460 + }, + { + "epoch": 316.78, + "learning_rate": 7.667946057797578e-05, + "loss": 0.1236, + "step": 52480 + }, + { + "epoch": 316.9, + "learning_rate": 7.664149294309051e-05, + "loss": 0.129, + "step": 52500 + }, + { + "epoch": 317.02, + "learning_rate": 7.66035038436356e-05, + "loss": 0.1206, + "step": 52520 + }, + { + "epoch": 317.14, + "learning_rate": 7.656549331021814e-05, + "loss": 0.0967, + "step": 52540 + }, + { + "epoch": 317.26, + "learning_rate": 7.652746137346255e-05, + "loss": 0.1072, + "step": 52560 + }, + { + "epoch": 317.38, + "learning_rate": 7.648940806401048e-05, + "loss": 0.1102, + "step": 52580 + }, + { + "epoch": 317.51, + "learning_rate": 7.645133341252078e-05, + "loss": 0.1166, + "step": 52600 + }, + { + "epoch": 317.63, + "learning_rate": 7.641323744966953e-05, + "loss": 0.1216, + "step": 52620 + }, + { + "epoch": 317.75, + "learning_rate": 7.637512020614995e-05, + "loss": 0.1245, + "step": 52640 + }, + { + "epoch": 317.87, + "learning_rate": 7.633698171267241e-05, + "loss": 0.1263, + "step": 52660 + }, + { + "epoch": 317.99, + "learning_rate": 7.629882199996441e-05, + "loss": 0.1286, + "step": 52680 + }, + { + "epoch": 318.11, + "learning_rate": 7.626064109877054e-05, + "loss": 0.0974, + "step": 52700 + }, + { + "epoch": 318.23, + "learning_rate": 7.622243903985245e-05, + "loss": 0.1049, + "step": 52720 + }, + { + "epoch": 318.35, + "learning_rate": 7.618421585398885e-05, + "loss": 0.1115, + "step": 52740 + }, + { + "epoch": 318.47, + "learning_rate": 7.61459715719755e-05, + "loss": 0.1154, + "step": 52760 + }, + { + "epoch": 318.59, + "learning_rate": 7.610770622462508e-05, + "loss": 0.1164, + "step": 52780 + }, + { + "epoch": 318.71, + "learning_rate": 7.606941984276734e-05, + "loss": 0.1205, + "step": 52800 + }, + { + "epoch": 318.83, + "learning_rate": 7.60311124572489e-05, + "loss": 0.1233, + "step": 52820 + }, + { + "epoch": 318.95, + "learning_rate": 7.599278409893334e-05, + "loss": 0.1259, + "step": 52840 + }, + { + "epoch": 319.07, + "learning_rate": 7.59544347987011e-05, + "loss": 0.1046, + "step": 52860 + }, + { + "epoch": 319.2, + "learning_rate": 7.591606458744955e-05, + "loss": 0.0991, + "step": 52880 + }, + { + "epoch": 319.32, + "learning_rate": 7.587767349609284e-05, + "loss": 0.1062, + "step": 52900 + }, + { + "epoch": 319.44, + "learning_rate": 7.583926155556203e-05, + "loss": 0.1112, + "step": 52920 + }, + { + "epoch": 319.56, + "learning_rate": 7.580082879680488e-05, + "loss": 0.1148, + "step": 52940 + }, + { + "epoch": 319.68, + "learning_rate": 7.5762375250786e-05, + "loss": 0.1182, + "step": 52960 + }, + { + "epoch": 319.8, + "learning_rate": 7.572390094848669e-05, + "loss": 0.1205, + "step": 52980 + }, + { + "epoch": 319.92, + "learning_rate": 7.568540592090503e-05, + "loss": 0.1256, + "step": 53000 + }, + { + "epoch": 319.92, + "eval_accuracy": 0.0012787584830733667, + "eval_loss": 20.19066619873047, + "eval_runtime": 18.1725, + "eval_samples_per_second": 112.257, + "eval_steps_per_second": 2.366, + "step": 53000 + }, + { + "epoch": 320.04, + "learning_rate": 7.564689019905575e-05, + "loss": 0.1129, + "step": 53020 + }, + { + "epoch": 320.16, + "learning_rate": 7.560835381397027e-05, + "loss": 0.0971, + "step": 53040 + }, + { + "epoch": 320.28, + "learning_rate": 7.556979679669666e-05, + "loss": 0.1016, + "step": 53060 + }, + { + "epoch": 320.4, + "learning_rate": 7.553314854801641e-05, + "loss": 0.1067, + "step": 53080 + }, + { + "epoch": 320.52, + "learning_rate": 7.549455138734089e-05, + "loss": 0.1125, + "step": 53100 + }, + { + "epoch": 320.64, + "learning_rate": 7.545593368616582e-05, + "loss": 0.115, + "step": 53120 + }, + { + "epoch": 320.76, + "learning_rate": 7.541729547560477e-05, + "loss": 0.1155, + "step": 53140 + }, + { + "epoch": 320.89, + "learning_rate": 7.537863678678783e-05, + "loss": 0.1214, + "step": 53160 + }, + { + "epoch": 321.01, + "learning_rate": 7.533995765086165e-05, + "loss": 0.121, + "step": 53180 + }, + { + "epoch": 321.13, + "learning_rate": 7.530125809898927e-05, + "loss": 0.0912, + "step": 53200 + }, + { + "epoch": 321.25, + "learning_rate": 7.526253816235023e-05, + "loss": 0.1005, + "step": 53220 + }, + { + "epoch": 321.37, + "learning_rate": 7.522379787214049e-05, + "loss": 0.1048, + "step": 53240 + }, + { + "epoch": 321.49, + "learning_rate": 7.51850372595724e-05, + "loss": 0.1052, + "step": 53260 + }, + { + "epoch": 321.61, + "learning_rate": 7.51462563558747e-05, + "loss": 0.1141, + "step": 53280 + }, + { + "epoch": 321.73, + "learning_rate": 7.510745519229243e-05, + "loss": 0.1162, + "step": 53300 + }, + { + "epoch": 321.85, + "learning_rate": 7.506863380008702e-05, + "loss": 0.118, + "step": 53320 + }, + { + "epoch": 321.97, + "learning_rate": 7.502979221053615e-05, + "loss": 0.1209, + "step": 53340 + }, + { + "epoch": 322.09, + "learning_rate": 7.499093045493379e-05, + "loss": 0.0971, + "step": 53360 + }, + { + "epoch": 322.21, + "learning_rate": 7.495204856459014e-05, + "loss": 0.0977, + "step": 53380 + }, + { + "epoch": 322.33, + "learning_rate": 7.491314657083166e-05, + "loss": 0.1014, + "step": 53400 + }, + { + "epoch": 322.45, + "learning_rate": 7.487422450500096e-05, + "loss": 0.1085, + "step": 53420 + }, + { + "epoch": 322.58, + "learning_rate": 7.483528239845688e-05, + "loss": 0.1129, + "step": 53440 + }, + { + "epoch": 322.7, + "learning_rate": 7.479632028257435e-05, + "loss": 0.1163, + "step": 53460 + }, + { + "epoch": 322.82, + "learning_rate": 7.475733818874443e-05, + "loss": 0.1189, + "step": 53480 + }, + { + "epoch": 322.94, + "learning_rate": 7.471833614837431e-05, + "loss": 0.1211, + "step": 53500 + }, + { + "epoch": 323.06, + "learning_rate": 7.467931419288721e-05, + "loss": 0.1047, + "step": 53520 + }, + { + "epoch": 323.18, + "learning_rate": 7.464027235372244e-05, + "loss": 0.0945, + "step": 53540 + }, + { + "epoch": 323.3, + "learning_rate": 7.460121066233529e-05, + "loss": 0.1007, + "step": 53560 + }, + { + "epoch": 323.42, + "learning_rate": 7.456212915019706e-05, + "loss": 0.1046, + "step": 53580 + }, + { + "epoch": 323.54, + "learning_rate": 7.452302784879501e-05, + "loss": 0.1108, + "step": 53600 + }, + { + "epoch": 323.66, + "learning_rate": 7.448390678963238e-05, + "loss": 0.1148, + "step": 53620 + }, + { + "epoch": 323.78, + "learning_rate": 7.444476600422828e-05, + "loss": 0.1164, + "step": 53640 + }, + { + "epoch": 323.9, + "learning_rate": 7.440560552411771e-05, + "loss": 0.1192, + "step": 53660 + }, + { + "epoch": 324.02, + "learning_rate": 7.43664253808516e-05, + "loss": 0.1115, + "step": 53680 + }, + { + "epoch": 324.14, + "learning_rate": 7.432722560599669e-05, + "loss": 0.0896, + "step": 53700 + }, + { + "epoch": 324.27, + "learning_rate": 7.428800623113548e-05, + "loss": 0.0974, + "step": 53720 + }, + { + "epoch": 324.39, + "learning_rate": 7.424876728786637e-05, + "loss": 0.1004, + "step": 53740 + }, + { + "epoch": 324.51, + "learning_rate": 7.420950880780343e-05, + "loss": 0.1065, + "step": 53760 + }, + { + "epoch": 324.63, + "learning_rate": 7.417023082257652e-05, + "loss": 0.1095, + "step": 53780 + }, + { + "epoch": 324.75, + "learning_rate": 7.413093336383121e-05, + "loss": 0.1108, + "step": 53800 + }, + { + "epoch": 324.87, + "learning_rate": 7.409161646322874e-05, + "loss": 0.1144, + "step": 53820 + }, + { + "epoch": 324.99, + "learning_rate": 7.405228015244603e-05, + "loss": 0.1161, + "step": 53840 + }, + { + "epoch": 325.11, + "learning_rate": 7.401292446317564e-05, + "loss": 0.0876, + "step": 53860 + }, + { + "epoch": 325.23, + "learning_rate": 7.397354942712574e-05, + "loss": 0.0939, + "step": 53880 + }, + { + "epoch": 325.35, + "learning_rate": 7.393415507602007e-05, + "loss": 0.1007, + "step": 53900 + }, + { + "epoch": 325.47, + "learning_rate": 7.389474144159796e-05, + "loss": 0.1036, + "step": 53920 + }, + { + "epoch": 325.59, + "learning_rate": 7.385530855561426e-05, + "loss": 0.107, + "step": 53940 + }, + { + "epoch": 325.71, + "learning_rate": 7.381585644983935e-05, + "loss": 0.1091, + "step": 53960 + }, + { + "epoch": 325.84, + "learning_rate": 7.377638515605904e-05, + "loss": 0.1124, + "step": 53980 + }, + { + "epoch": 325.96, + "learning_rate": 7.373689470607469e-05, + "loss": 0.1154, + "step": 54000 + }, + { + "epoch": 325.96, + "eval_accuracy": 0.0012624591281326866, + "eval_loss": 20.39387321472168, + "eval_runtime": 18.3547, + "eval_samples_per_second": 111.143, + "eval_steps_per_second": 2.343, + "step": 54000 + }, + { + "epoch": 326.08, + "learning_rate": 7.369738513170301e-05, + "loss": 0.0942, + "step": 54020 + }, + { + "epoch": 326.2, + "learning_rate": 7.365785646477614e-05, + "loss": 0.091, + "step": 54040 + }, + { + "epoch": 326.32, + "learning_rate": 7.361830873714165e-05, + "loss": 0.0967, + "step": 54060 + }, + { + "epoch": 326.44, + "learning_rate": 7.35787419806624e-05, + "loss": 0.1024, + "step": 54080 + }, + { + "epoch": 326.56, + "learning_rate": 7.35391562272166e-05, + "loss": 0.1043, + "step": 54100 + }, + { + "epoch": 326.68, + "learning_rate": 7.349955150869781e-05, + "loss": 0.1083, + "step": 54120 + }, + { + "epoch": 326.8, + "learning_rate": 7.345992785701483e-05, + "loss": 0.1097, + "step": 54140 + }, + { + "epoch": 326.92, + "learning_rate": 7.342028530409171e-05, + "loss": 0.1117, + "step": 54160 + }, + { + "epoch": 327.04, + "learning_rate": 7.338062388186772e-05, + "loss": 0.1025, + "step": 54180 + }, + { + "epoch": 327.16, + "learning_rate": 7.33409436222974e-05, + "loss": 0.0874, + "step": 54200 + }, + { + "epoch": 327.28, + "learning_rate": 7.330124455735039e-05, + "loss": 0.0968, + "step": 54220 + }, + { + "epoch": 327.4, + "learning_rate": 7.32615267190115e-05, + "loss": 0.1013, + "step": 54240 + }, + { + "epoch": 327.53, + "learning_rate": 7.322179013928067e-05, + "loss": 0.104, + "step": 54260 + }, + { + "epoch": 327.65, + "learning_rate": 7.318203485017299e-05, + "loss": 0.1066, + "step": 54280 + }, + { + "epoch": 327.77, + "learning_rate": 7.314226088371854e-05, + "loss": 0.1099, + "step": 54300 + }, + { + "epoch": 327.89, + "learning_rate": 7.31024682719625e-05, + "loss": 0.1097, + "step": 54320 + }, + { + "epoch": 328.01, + "learning_rate": 7.306265704696504e-05, + "loss": 0.1103, + "step": 54340 + }, + { + "epoch": 328.13, + "learning_rate": 7.302282724080138e-05, + "loss": 0.0831, + "step": 54360 + }, + { + "epoch": 328.25, + "learning_rate": 7.298297888556164e-05, + "loss": 0.091, + "step": 54380 + }, + { + "epoch": 328.37, + "learning_rate": 7.294311201335093e-05, + "loss": 0.0931, + "step": 54400 + }, + { + "epoch": 328.49, + "learning_rate": 7.290322665628928e-05, + "loss": 0.0992, + "step": 54420 + }, + { + "epoch": 328.61, + "learning_rate": 7.286332284651159e-05, + "loss": 0.1046, + "step": 54440 + }, + { + "epoch": 328.73, + "learning_rate": 7.282340061616766e-05, + "loss": 0.1055, + "step": 54460 + }, + { + "epoch": 328.85, + "learning_rate": 7.278345999742208e-05, + "loss": 0.109, + "step": 54480 + }, + { + "epoch": 328.97, + "learning_rate": 7.274350102245431e-05, + "loss": 0.1102, + "step": 54500 + }, + { + "epoch": 329.09, + "learning_rate": 7.270352372345855e-05, + "loss": 0.0873, + "step": 54520 + }, + { + "epoch": 329.22, + "learning_rate": 7.266352813264378e-05, + "loss": 0.0888, + "step": 54540 + }, + { + "epoch": 329.34, + "learning_rate": 7.262351428223378e-05, + "loss": 0.0948, + "step": 54560 + }, + { + "epoch": 329.46, + "learning_rate": 7.258348220446695e-05, + "loss": 0.0994, + "step": 54580 + }, + { + "epoch": 329.58, + "learning_rate": 7.25434319315964e-05, + "loss": 0.1023, + "step": 54600 + }, + { + "epoch": 329.7, + "learning_rate": 7.250336349588994e-05, + "loss": 0.1052, + "step": 54620 + }, + { + "epoch": 329.82, + "learning_rate": 7.246327692962996e-05, + "loss": 0.1095, + "step": 54640 + }, + { + "epoch": 329.94, + "learning_rate": 7.24231722651135e-05, + "loss": 0.1091, + "step": 54660 + }, + { + "epoch": 330.06, + "learning_rate": 7.238304953465217e-05, + "loss": 0.0947, + "step": 54680 + }, + { + "epoch": 330.18, + "learning_rate": 7.234290877057208e-05, + "loss": 0.0863, + "step": 54700 + }, + { + "epoch": 330.3, + "learning_rate": 7.230275000521398e-05, + "loss": 0.0924, + "step": 54720 + }, + { + "epoch": 330.42, + "learning_rate": 7.226257327093304e-05, + "loss": 0.0977, + "step": 54740 + }, + { + "epoch": 330.54, + "learning_rate": 7.222237860009892e-05, + "loss": 0.1017, + "step": 54760 + }, + { + "epoch": 330.66, + "learning_rate": 7.218216602509574e-05, + "loss": 0.1038, + "step": 54780 + }, + { + "epoch": 330.78, + "learning_rate": 7.214193557832206e-05, + "loss": 0.1062, + "step": 54800 + }, + { + "epoch": 330.91, + "learning_rate": 7.21016872921908e-05, + "loss": 0.1092, + "step": 54820 + }, + { + "epoch": 331.03, + "learning_rate": 7.206142119912931e-05, + "loss": 0.1028, + "step": 54840 + }, + { + "epoch": 331.15, + "learning_rate": 7.202113733157923e-05, + "loss": 0.0834, + "step": 54860 + }, + { + "epoch": 331.27, + "learning_rate": 7.198083572199657e-05, + "loss": 0.0871, + "step": 54880 + }, + { + "epoch": 331.39, + "learning_rate": 7.194051640285157e-05, + "loss": 0.0929, + "step": 54900 + }, + { + "epoch": 331.51, + "learning_rate": 7.190017940662878e-05, + "loss": 0.0981, + "step": 54920 + }, + { + "epoch": 331.63, + "learning_rate": 7.185982476582705e-05, + "loss": 0.1029, + "step": 54940 + }, + { + "epoch": 331.75, + "learning_rate": 7.181945251295931e-05, + "loss": 0.1041, + "step": 54960 + }, + { + "epoch": 331.87, + "learning_rate": 7.17790626805528e-05, + "loss": 0.1068, + "step": 54980 + }, + { + "epoch": 331.99, + "learning_rate": 7.173865530114886e-05, + "loss": 0.1091, + "step": 55000 + }, + { + "epoch": 331.99, + "eval_accuracy": 0.001258013849512501, + "eval_loss": 20.59257698059082, + "eval_runtime": 18.2239, + "eval_samples_per_second": 111.941, + "eval_steps_per_second": 2.36, + "step": 55000 + }, + { + "epoch": 332.11, + "learning_rate": 7.1698230407303e-05, + "loss": 0.081, + "step": 55020 + }, + { + "epoch": 332.23, + "learning_rate": 7.16577880315848e-05, + "loss": 0.0891, + "step": 55040 + }, + { + "epoch": 332.35, + "learning_rate": 7.161732820657799e-05, + "loss": 0.0929, + "step": 55060 + }, + { + "epoch": 332.47, + "learning_rate": 7.157685096488029e-05, + "loss": 0.0964, + "step": 55080 + }, + { + "epoch": 332.6, + "learning_rate": 7.15383814827606e-05, + "loss": 0.0999, + "step": 55100 + }, + { + "epoch": 332.72, + "learning_rate": 7.14978703723281e-05, + "loss": 0.1031, + "step": 55120 + }, + { + "epoch": 332.84, + "learning_rate": 7.14573419414497e-05, + "loss": 0.1065, + "step": 55140 + }, + { + "epoch": 332.96, + "learning_rate": 7.14167962227785e-05, + "loss": 0.1079, + "step": 55160 + }, + { + "epoch": 333.08, + "learning_rate": 7.13762332489814e-05, + "loss": 0.0871, + "step": 55180 + }, + { + "epoch": 333.2, + "learning_rate": 7.133565305273927e-05, + "loss": 0.0879, + "step": 55200 + }, + { + "epoch": 333.32, + "learning_rate": 7.129505566674685e-05, + "loss": 0.0927, + "step": 55220 + }, + { + "epoch": 333.44, + "learning_rate": 7.125444112371272e-05, + "loss": 0.096, + "step": 55240 + }, + { + "epoch": 333.56, + "learning_rate": 7.121380945635927e-05, + "loss": 0.0951, + "step": 55260 + }, + { + "epoch": 333.68, + "learning_rate": 7.11731606974227e-05, + "loss": 0.0994, + "step": 55280 + }, + { + "epoch": 333.8, + "learning_rate": 7.1132494879653e-05, + "loss": 0.1019, + "step": 55300 + }, + { + "epoch": 333.92, + "learning_rate": 7.109181203581386e-05, + "loss": 0.1061, + "step": 55320 + }, + { + "epoch": 334.04, + "learning_rate": 7.105111219868274e-05, + "loss": 0.0956, + "step": 55340 + }, + { + "epoch": 334.16, + "learning_rate": 7.101039540105074e-05, + "loss": 0.0816, + "step": 55360 + }, + { + "epoch": 334.29, + "learning_rate": 7.096966167572268e-05, + "loss": 0.088, + "step": 55380 + }, + { + "epoch": 334.41, + "learning_rate": 7.092891105551694e-05, + "loss": 0.0918, + "step": 55400 + }, + { + "epoch": 334.53, + "learning_rate": 7.088814357326561e-05, + "loss": 0.0952, + "step": 55420 + }, + { + "epoch": 334.65, + "learning_rate": 7.084735926181428e-05, + "loss": 0.0985, + "step": 55440 + }, + { + "epoch": 334.77, + "learning_rate": 7.080655815402213e-05, + "loss": 0.1021, + "step": 55460 + }, + { + "epoch": 334.89, + "learning_rate": 7.076574028276188e-05, + "loss": 0.1049, + "step": 55480 + }, + { + "epoch": 335.01, + "learning_rate": 7.072490568091978e-05, + "loss": 0.1021, + "step": 55500 + }, + { + "epoch": 335.13, + "learning_rate": 7.06840543813955e-05, + "loss": 0.0766, + "step": 55520 + }, + { + "epoch": 335.25, + "learning_rate": 7.064318641710221e-05, + "loss": 0.0853, + "step": 55540 + }, + { + "epoch": 335.37, + "learning_rate": 7.060230182096648e-05, + "loss": 0.0908, + "step": 55560 + }, + { + "epoch": 335.49, + "learning_rate": 7.056140062592828e-05, + "loss": 0.0939, + "step": 55580 + }, + { + "epoch": 335.61, + "learning_rate": 7.052048286494101e-05, + "loss": 0.0962, + "step": 55600 + }, + { + "epoch": 335.73, + "learning_rate": 7.04795485709713e-05, + "loss": 0.1002, + "step": 55620 + }, + { + "epoch": 335.86, + "learning_rate": 7.043859777699925e-05, + "loss": 0.1016, + "step": 55640 + }, + { + "epoch": 335.98, + "learning_rate": 7.039763051601811e-05, + "loss": 0.1019, + "step": 55660 + }, + { + "epoch": 336.1, + "learning_rate": 7.03566468210345e-05, + "loss": 0.0824, + "step": 55680 + }, + { + "epoch": 336.22, + "learning_rate": 7.031564672506824e-05, + "loss": 0.0839, + "step": 55700 + }, + { + "epoch": 336.34, + "learning_rate": 7.027463026115235e-05, + "loss": 0.0895, + "step": 55720 + }, + { + "epoch": 336.46, + "learning_rate": 7.023359746233307e-05, + "loss": 0.0913, + "step": 55740 + }, + { + "epoch": 336.58, + "learning_rate": 7.019254836166976e-05, + "loss": 0.0952, + "step": 55760 + }, + { + "epoch": 336.7, + "learning_rate": 7.015148299223497e-05, + "loss": 0.0973, + "step": 55780 + }, + { + "epoch": 336.82, + "learning_rate": 7.01104013871143e-05, + "loss": 0.0999, + "step": 55800 + }, + { + "epoch": 336.94, + "learning_rate": 7.006930357940648e-05, + "loss": 0.1008, + "step": 55820 + }, + { + "epoch": 337.06, + "learning_rate": 7.002818960222326e-05, + "loss": 0.0874, + "step": 55840 + }, + { + "epoch": 337.18, + "learning_rate": 6.998705948868944e-05, + "loss": 0.08, + "step": 55860 + }, + { + "epoch": 337.3, + "learning_rate": 6.994591327194278e-05, + "loss": 0.0838, + "step": 55880 + }, + { + "epoch": 337.42, + "learning_rate": 6.990475098513406e-05, + "loss": 0.0882, + "step": 55900 + }, + { + "epoch": 337.55, + "learning_rate": 6.986357266142702e-05, + "loss": 0.0928, + "step": 55920 + }, + { + "epoch": 337.67, + "learning_rate": 6.982237833399825e-05, + "loss": 0.0946, + "step": 55940 + }, + { + "epoch": 337.79, + "learning_rate": 6.978116803603729e-05, + "loss": 0.0989, + "step": 55960 + }, + { + "epoch": 337.91, + "learning_rate": 6.973994180074654e-05, + "loss": 0.0996, + "step": 55980 + }, + { + "epoch": 338.03, + "learning_rate": 6.969869966134123e-05, + "loss": 0.0928, + "step": 56000 + }, + { + "epoch": 338.03, + "eval_accuracy": 0.0012570260098191265, + "eval_loss": 20.80438232421875, + "eval_runtime": 18.1697, + "eval_samples_per_second": 112.275, + "eval_steps_per_second": 2.367, + "step": 56000 + }, + { + "epoch": 338.15, + "learning_rate": 6.965744165104939e-05, + "loss": 0.0757, + "step": 56020 + }, + { + "epoch": 338.27, + "learning_rate": 6.961616780311188e-05, + "loss": 0.0818, + "step": 56040 + }, + { + "epoch": 338.39, + "learning_rate": 6.957487815078227e-05, + "loss": 0.0867, + "step": 56060 + }, + { + "epoch": 338.51, + "learning_rate": 6.953357272732691e-05, + "loss": 0.0897, + "step": 56080 + }, + { + "epoch": 338.63, + "learning_rate": 6.949225156602481e-05, + "loss": 0.0923, + "step": 56100 + }, + { + "epoch": 338.75, + "learning_rate": 6.945091470016771e-05, + "loss": 0.0941, + "step": 56120 + }, + { + "epoch": 338.87, + "learning_rate": 6.940956216305996e-05, + "loss": 0.0971, + "step": 56140 + }, + { + "epoch": 338.99, + "learning_rate": 6.936819398801856e-05, + "loss": 0.0988, + "step": 56160 + }, + { + "epoch": 339.11, + "learning_rate": 6.932681020837307e-05, + "loss": 0.0749, + "step": 56180 + }, + { + "epoch": 339.24, + "learning_rate": 6.928541085746571e-05, + "loss": 0.0812, + "step": 56200 + }, + { + "epoch": 339.36, + "learning_rate": 6.924399596865113e-05, + "loss": 0.0852, + "step": 56220 + }, + { + "epoch": 339.48, + "learning_rate": 6.92025655752966e-05, + "loss": 0.0877, + "step": 56240 + }, + { + "epoch": 339.6, + "learning_rate": 6.916111971078183e-05, + "loss": 0.0926, + "step": 56260 + }, + { + "epoch": 339.72, + "learning_rate": 6.9119658408499e-05, + "loss": 0.094, + "step": 56280 + }, + { + "epoch": 339.84, + "learning_rate": 6.907818170185273e-05, + "loss": 0.0972, + "step": 56300 + }, + { + "epoch": 339.96, + "learning_rate": 6.903668962426005e-05, + "loss": 0.097, + "step": 56320 + }, + { + "epoch": 340.08, + "learning_rate": 6.899518220915038e-05, + "loss": 0.0808, + "step": 56340 + }, + { + "epoch": 340.2, + "learning_rate": 6.895365948996552e-05, + "loss": 0.0772, + "step": 56360 + }, + { + "epoch": 340.32, + "learning_rate": 6.891212150015955e-05, + "loss": 0.0845, + "step": 56380 + }, + { + "epoch": 340.44, + "learning_rate": 6.887056827319885e-05, + "loss": 0.0884, + "step": 56400 + }, + { + "epoch": 340.56, + "learning_rate": 6.882899984256216e-05, + "loss": 0.0918, + "step": 56420 + }, + { + "epoch": 340.68, + "learning_rate": 6.878741624174039e-05, + "loss": 0.092, + "step": 56440 + }, + { + "epoch": 340.8, + "learning_rate": 6.87458175042367e-05, + "loss": 0.0932, + "step": 56460 + }, + { + "epoch": 340.93, + "learning_rate": 6.870420366356642e-05, + "loss": 0.0965, + "step": 56480 + }, + { + "epoch": 341.05, + "learning_rate": 6.86625747532571e-05, + "loss": 0.0863, + "step": 56500 + }, + { + "epoch": 341.17, + "learning_rate": 6.862093080684838e-05, + "loss": 0.0756, + "step": 56520 + }, + { + "epoch": 341.29, + "learning_rate": 6.857927185789204e-05, + "loss": 0.0826, + "step": 56540 + }, + { + "epoch": 341.41, + "learning_rate": 6.853759793995196e-05, + "loss": 0.0862, + "step": 56560 + }, + { + "epoch": 341.53, + "learning_rate": 6.849590908660404e-05, + "loss": 0.0896, + "step": 56580 + }, + { + "epoch": 341.65, + "learning_rate": 6.845420533143627e-05, + "loss": 0.0913, + "step": 56600 + }, + { + "epoch": 341.77, + "learning_rate": 6.841248670804853e-05, + "loss": 0.0915, + "step": 56620 + }, + { + "epoch": 341.89, + "learning_rate": 6.837075325005286e-05, + "loss": 0.0938, + "step": 56640 + }, + { + "epoch": 342.01, + "learning_rate": 6.832900499107311e-05, + "loss": 0.0932, + "step": 56660 + }, + { + "epoch": 342.13, + "learning_rate": 6.82872419647451e-05, + "loss": 0.0713, + "step": 56680 + }, + { + "epoch": 342.25, + "learning_rate": 6.824546420471653e-05, + "loss": 0.0782, + "step": 56700 + }, + { + "epoch": 342.37, + "learning_rate": 6.820367174464703e-05, + "loss": 0.0839, + "step": 56720 + }, + { + "epoch": 342.49, + "learning_rate": 6.816186461820798e-05, + "loss": 0.0853, + "step": 56740 + }, + { + "epoch": 342.62, + "learning_rate": 6.812004285908266e-05, + "loss": 0.0841, + "step": 56760 + }, + { + "epoch": 342.74, + "learning_rate": 6.807820650096609e-05, + "loss": 0.0881, + "step": 56780 + }, + { + "epoch": 342.86, + "learning_rate": 6.803635557756507e-05, + "loss": 0.0902, + "step": 56800 + }, + { + "epoch": 342.98, + "learning_rate": 6.799449012259816e-05, + "loss": 0.093, + "step": 56820 + }, + { + "epoch": 343.1, + "learning_rate": 6.795261016979555e-05, + "loss": 0.0726, + "step": 56840 + }, + { + "epoch": 343.22, + "learning_rate": 6.791071575289922e-05, + "loss": 0.0756, + "step": 56860 + }, + { + "epoch": 343.34, + "learning_rate": 6.786880690566268e-05, + "loss": 0.0801, + "step": 56880 + }, + { + "epoch": 343.46, + "learning_rate": 6.78268836618512e-05, + "loss": 0.0826, + "step": 56900 + }, + { + "epoch": 343.58, + "learning_rate": 6.778494605524151e-05, + "loss": 0.0865, + "step": 56920 + }, + { + "epoch": 343.7, + "learning_rate": 6.774299411962203e-05, + "loss": 0.0899, + "step": 56940 + }, + { + "epoch": 343.82, + "learning_rate": 6.770102788879267e-05, + "loss": 0.0901, + "step": 56960 + }, + { + "epoch": 343.94, + "learning_rate": 6.765904739656486e-05, + "loss": 0.0931, + "step": 56980 + }, + { + "epoch": 344.06, + "learning_rate": 6.761705267676153e-05, + "loss": 0.0812, + "step": 57000 + }, + { + "epoch": 344.06, + "eval_accuracy": 0.0012797463227667415, + "eval_loss": 20.987285614013672, + "eval_runtime": 18.1769, + "eval_samples_per_second": 112.23, + "eval_steps_per_second": 2.366, + "step": 57000 + }, + { + "epoch": 344.19, + "learning_rate": 6.757504376321704e-05, + "loss": 0.0736, + "step": 57020 + }, + { + "epoch": 344.31, + "learning_rate": 6.753302068977725e-05, + "loss": 0.0799, + "step": 57040 + }, + { + "epoch": 344.43, + "learning_rate": 6.749098349029935e-05, + "loss": 0.0823, + "step": 57060 + }, + { + "epoch": 344.55, + "learning_rate": 6.7448932198652e-05, + "loss": 0.0857, + "step": 57080 + }, + { + "epoch": 344.67, + "learning_rate": 6.740686684871515e-05, + "loss": 0.0896, + "step": 57100 + }, + { + "epoch": 344.79, + "learning_rate": 6.736478747438007e-05, + "loss": 0.0906, + "step": 57120 + }, + { + "epoch": 344.91, + "learning_rate": 6.732269410954938e-05, + "loss": 0.0916, + "step": 57140 + }, + { + "epoch": 345.03, + "learning_rate": 6.728058678813694e-05, + "loss": 0.0864, + "step": 57160 + }, + { + "epoch": 345.15, + "learning_rate": 6.723846554406782e-05, + "loss": 0.0712, + "step": 57180 + }, + { + "epoch": 345.27, + "learning_rate": 6.719633041127839e-05, + "loss": 0.0783, + "step": 57200 + }, + { + "epoch": 345.39, + "learning_rate": 6.715418142371614e-05, + "loss": 0.0822, + "step": 57220 + }, + { + "epoch": 345.51, + "learning_rate": 6.711201861533978e-05, + "loss": 0.0831, + "step": 57240 + }, + { + "epoch": 345.63, + "learning_rate": 6.707195117679314e-05, + "loss": 0.0857, + "step": 57260 + }, + { + "epoch": 345.75, + "learning_rate": 6.7029761515545e-05, + "loss": 0.0866, + "step": 57280 + }, + { + "epoch": 345.88, + "learning_rate": 6.698755813372561e-05, + "loss": 0.0893, + "step": 57300 + }, + { + "epoch": 346.0, + "learning_rate": 6.694534106533745e-05, + "loss": 0.0919, + "step": 57320 + }, + { + "epoch": 346.12, + "learning_rate": 6.690311034439408e-05, + "loss": 0.0691, + "step": 57340 + }, + { + "epoch": 346.24, + "learning_rate": 6.686086600492e-05, + "loss": 0.0745, + "step": 57360 + }, + { + "epoch": 346.36, + "learning_rate": 6.681860808095074e-05, + "loss": 0.0775, + "step": 57380 + }, + { + "epoch": 346.48, + "learning_rate": 6.677633660653271e-05, + "loss": 0.0811, + "step": 57400 + }, + { + "epoch": 346.6, + "learning_rate": 6.673405161572331e-05, + "loss": 0.0844, + "step": 57420 + }, + { + "epoch": 346.72, + "learning_rate": 6.669175314259076e-05, + "loss": 0.0858, + "step": 57440 + }, + { + "epoch": 346.84, + "learning_rate": 6.66494412212142e-05, + "loss": 0.0876, + "step": 57460 + }, + { + "epoch": 346.96, + "learning_rate": 6.660711588568358e-05, + "loss": 0.0886, + "step": 57480 + }, + { + "epoch": 347.08, + "learning_rate": 6.656477717009965e-05, + "loss": 0.0728, + "step": 57500 + }, + { + "epoch": 347.2, + "learning_rate": 6.652242510857395e-05, + "loss": 0.0716, + "step": 57520 + }, + { + "epoch": 347.32, + "learning_rate": 6.648005973522877e-05, + "loss": 0.0758, + "step": 57540 + }, + { + "epoch": 347.44, + "learning_rate": 6.64376810841971e-05, + "loss": 0.0783, + "step": 57560 + }, + { + "epoch": 347.57, + "learning_rate": 6.63952891896227e-05, + "loss": 0.0818, + "step": 57580 + }, + { + "epoch": 347.69, + "learning_rate": 6.635288408565995e-05, + "loss": 0.0867, + "step": 57600 + }, + { + "epoch": 347.81, + "learning_rate": 6.631046580647384e-05, + "loss": 0.0856, + "step": 57620 + }, + { + "epoch": 347.93, + "learning_rate": 6.626803438624005e-05, + "loss": 0.0866, + "step": 57640 + }, + { + "epoch": 348.05, + "learning_rate": 6.622558985914477e-05, + "loss": 0.0786, + "step": 57660 + }, + { + "epoch": 348.17, + "learning_rate": 6.618313225938483e-05, + "loss": 0.0692, + "step": 57680 + }, + { + "epoch": 348.29, + "learning_rate": 6.614066162116752e-05, + "loss": 0.0728, + "step": 57700 + }, + { + "epoch": 348.41, + "learning_rate": 6.609817797871067e-05, + "loss": 0.0777, + "step": 57720 + }, + { + "epoch": 348.53, + "learning_rate": 6.605568136624262e-05, + "loss": 0.0802, + "step": 57740 + }, + { + "epoch": 348.65, + "learning_rate": 6.601317181800207e-05, + "loss": 0.0828, + "step": 57760 + }, + { + "epoch": 348.77, + "learning_rate": 6.597064936823822e-05, + "loss": 0.0843, + "step": 57780 + }, + { + "epoch": 348.89, + "learning_rate": 6.592811405121064e-05, + "loss": 0.0857, + "step": 57800 + }, + { + "epoch": 349.01, + "learning_rate": 6.588556590118927e-05, + "loss": 0.0846, + "step": 57820 + }, + { + "epoch": 349.13, + "learning_rate": 6.584300495245437e-05, + "loss": 0.0665, + "step": 57840 + }, + { + "epoch": 349.26, + "learning_rate": 6.580043123929655e-05, + "loss": 0.0718, + "step": 57860 + }, + { + "epoch": 349.38, + "learning_rate": 6.575784479601667e-05, + "loss": 0.076, + "step": 57880 + }, + { + "epoch": 349.5, + "learning_rate": 6.571524565692586e-05, + "loss": 0.0778, + "step": 57900 + }, + { + "epoch": 349.62, + "learning_rate": 6.567263385634547e-05, + "loss": 0.0808, + "step": 57920 + }, + { + "epoch": 349.74, + "learning_rate": 6.563000942860706e-05, + "loss": 0.083, + "step": 57940 + }, + { + "epoch": 349.86, + "learning_rate": 6.558737240805237e-05, + "loss": 0.085, + "step": 57960 + }, + { + "epoch": 349.98, + "learning_rate": 6.554472282903328e-05, + "loss": 0.0865, + "step": 57980 + }, + { + "epoch": 350.1, + "learning_rate": 6.550206072591176e-05, + "loss": 0.0677, + "step": 58000 + }, + { + "epoch": 350.1, + "eval_accuracy": 0.001262953047979374, + "eval_loss": 21.19306755065918, + "eval_runtime": 18.1733, + "eval_samples_per_second": 112.252, + "eval_steps_per_second": 2.366, + "step": 58000 + }, + { + "epoch": 350.22, + "learning_rate": 6.545938613305993e-05, + "loss": 0.0676, + "step": 58020 + }, + { + "epoch": 350.34, + "learning_rate": 6.541669908485992e-05, + "loss": 0.0738, + "step": 58040 + }, + { + "epoch": 350.46, + "learning_rate": 6.53739996157039e-05, + "loss": 0.0772, + "step": 58060 + }, + { + "epoch": 350.58, + "learning_rate": 6.533128775999411e-05, + "loss": 0.0774, + "step": 58080 + }, + { + "epoch": 350.7, + "learning_rate": 6.528856355214267e-05, + "loss": 0.0799, + "step": 58100 + }, + { + "epoch": 350.82, + "learning_rate": 6.524582702657171e-05, + "loss": 0.0817, + "step": 58120 + }, + { + "epoch": 350.95, + "learning_rate": 6.52030782177133e-05, + "loss": 0.0831, + "step": 58140 + }, + { + "epoch": 351.07, + "learning_rate": 6.516031716000936e-05, + "loss": 0.0708, + "step": 58160 + }, + { + "epoch": 351.19, + "learning_rate": 6.511754388791173e-05, + "loss": 0.0686, + "step": 58180 + }, + { + "epoch": 351.31, + "learning_rate": 6.507475843588202e-05, + "loss": 0.0719, + "step": 58200 + }, + { + "epoch": 351.43, + "learning_rate": 6.503196083839174e-05, + "loss": 0.0742, + "step": 58220 + }, + { + "epoch": 351.55, + "learning_rate": 6.498915112992212e-05, + "loss": 0.0771, + "step": 58240 + }, + { + "epoch": 351.67, + "learning_rate": 6.494632934496418e-05, + "loss": 0.0786, + "step": 58260 + }, + { + "epoch": 351.79, + "learning_rate": 6.490349551801864e-05, + "loss": 0.081, + "step": 58280 + }, + { + "epoch": 351.91, + "learning_rate": 6.486064968359596e-05, + "loss": 0.0835, + "step": 58300 + }, + { + "epoch": 352.03, + "learning_rate": 6.481779187621621e-05, + "loss": 0.0775, + "step": 58320 + }, + { + "epoch": 352.15, + "learning_rate": 6.477492213040924e-05, + "loss": 0.0634, + "step": 58340 + }, + { + "epoch": 352.27, + "learning_rate": 6.473204048071432e-05, + "loss": 0.0693, + "step": 58360 + }, + { + "epoch": 352.39, + "learning_rate": 6.468914696168053e-05, + "loss": 0.0718, + "step": 58380 + }, + { + "epoch": 352.52, + "learning_rate": 6.464624160786633e-05, + "loss": 0.0758, + "step": 58400 + }, + { + "epoch": 352.64, + "learning_rate": 6.46033244538398e-05, + "loss": 0.0774, + "step": 58420 + }, + { + "epoch": 352.76, + "learning_rate": 6.456039553417852e-05, + "loss": 0.0814, + "step": 58440 + }, + { + "epoch": 352.88, + "learning_rate": 6.451745488346956e-05, + "loss": 0.0812, + "step": 58460 + }, + { + "epoch": 353.0, + "learning_rate": 6.447450253630942e-05, + "loss": 0.0845, + "step": 58480 + }, + { + "epoch": 353.12, + "learning_rate": 6.443153852730404e-05, + "loss": 0.062, + "step": 58500 + }, + { + "epoch": 353.24, + "learning_rate": 6.438856289106871e-05, + "loss": 0.0674, + "step": 58520 + }, + { + "epoch": 353.36, + "learning_rate": 6.434557566222817e-05, + "loss": 0.0721, + "step": 58540 + }, + { + "epoch": 353.48, + "learning_rate": 6.430257687541641e-05, + "loss": 0.0739, + "step": 58560 + }, + { + "epoch": 353.6, + "learning_rate": 6.425956656527681e-05, + "loss": 0.0757, + "step": 58580 + }, + { + "epoch": 353.72, + "learning_rate": 6.421654476646199e-05, + "loss": 0.0778, + "step": 58600 + }, + { + "epoch": 353.84, + "learning_rate": 6.417351151363381e-05, + "loss": 0.0806, + "step": 58620 + }, + { + "epoch": 353.96, + "learning_rate": 6.413046684146343e-05, + "loss": 0.0811, + "step": 58640 + }, + { + "epoch": 354.08, + "learning_rate": 6.408741078463114e-05, + "loss": 0.0667, + "step": 58660 + }, + { + "epoch": 354.21, + "learning_rate": 6.404434337782642e-05, + "loss": 0.065, + "step": 58680 + }, + { + "epoch": 354.33, + "learning_rate": 6.400126465574793e-05, + "loss": 0.0699, + "step": 58700 + }, + { + "epoch": 354.45, + "learning_rate": 6.395817465310338e-05, + "loss": 0.0726, + "step": 58720 + }, + { + "epoch": 354.57, + "learning_rate": 6.391507340460963e-05, + "loss": 0.0744, + "step": 58740 + }, + { + "epoch": 354.69, + "learning_rate": 6.387196094499258e-05, + "loss": 0.0774, + "step": 58760 + }, + { + "epoch": 354.81, + "learning_rate": 6.382883730898717e-05, + "loss": 0.0778, + "step": 58780 + }, + { + "epoch": 354.93, + "learning_rate": 6.378570253133729e-05, + "loss": 0.0795, + "step": 58800 + }, + { + "epoch": 355.05, + "learning_rate": 6.37425566467959e-05, + "loss": 0.0724, + "step": 58820 + }, + { + "epoch": 355.17, + "learning_rate": 6.369939969012483e-05, + "loss": 0.0633, + "step": 58840 + }, + { + "epoch": 355.29, + "learning_rate": 6.365623169609487e-05, + "loss": 0.068, + "step": 58860 + }, + { + "epoch": 355.41, + "learning_rate": 6.36130526994857e-05, + "loss": 0.0704, + "step": 58880 + }, + { + "epoch": 355.53, + "learning_rate": 6.356986273508585e-05, + "loss": 0.0718, + "step": 58900 + }, + { + "epoch": 355.65, + "learning_rate": 6.35266618376927e-05, + "loss": 0.0759, + "step": 58920 + }, + { + "epoch": 355.77, + "learning_rate": 6.34834500421124e-05, + "loss": 0.0765, + "step": 58940 + }, + { + "epoch": 355.9, + "learning_rate": 6.344022738315998e-05, + "loss": 0.0778, + "step": 58960 + }, + { + "epoch": 356.02, + "learning_rate": 6.33969938956591e-05, + "loss": 0.0761, + "step": 58980 + }, + { + "epoch": 356.14, + "learning_rate": 6.335374961444222e-05, + "loss": 0.0609, + "step": 59000 + }, + { + "epoch": 356.14, + "eval_accuracy": 0.0012891307998537998, + "eval_loss": 21.364957809448242, + "eval_runtime": 18.1818, + "eval_samples_per_second": 112.2, + "eval_steps_per_second": 2.365, + "step": 59000 + }, + { + "epoch": 356.26, + "learning_rate": 6.331049457435046e-05, + "loss": 0.0655, + "step": 59020 + }, + { + "epoch": 356.38, + "learning_rate": 6.326722881023366e-05, + "loss": 0.0695, + "step": 59040 + }, + { + "epoch": 356.5, + "learning_rate": 6.322395235695022e-05, + "loss": 0.072, + "step": 59060 + }, + { + "epoch": 356.62, + "learning_rate": 6.318066524936725e-05, + "loss": 0.0735, + "step": 59080 + }, + { + "epoch": 356.74, + "learning_rate": 6.313736752236033e-05, + "loss": 0.0754, + "step": 59100 + }, + { + "epoch": 356.86, + "learning_rate": 6.309405921081369e-05, + "loss": 0.0771, + "step": 59120 + }, + { + "epoch": 356.98, + "learning_rate": 6.305074034962006e-05, + "loss": 0.0799, + "step": 59140 + }, + { + "epoch": 357.1, + "learning_rate": 6.300741097368065e-05, + "loss": 0.0621, + "step": 59160 + }, + { + "epoch": 357.22, + "learning_rate": 6.296407111790516e-05, + "loss": 0.0634, + "step": 59180 + }, + { + "epoch": 357.34, + "learning_rate": 6.292072081721173e-05, + "loss": 0.0662, + "step": 59200 + }, + { + "epoch": 357.46, + "learning_rate": 6.287736010652693e-05, + "loss": 0.0706, + "step": 59220 + }, + { + "epoch": 357.59, + "learning_rate": 6.283398902078567e-05, + "loss": 0.0729, + "step": 59240 + }, + { + "epoch": 357.71, + "learning_rate": 6.279060759493128e-05, + "loss": 0.0734, + "step": 59260 + }, + { + "epoch": 357.83, + "learning_rate": 6.274938569467407e-05, + "loss": 0.0742, + "step": 59280 + }, + { + "epoch": 357.95, + "learning_rate": 6.270598420613623e-05, + "loss": 0.0771, + "step": 59300 + }, + { + "epoch": 358.07, + "learning_rate": 6.266257248061641e-05, + "loss": 0.0666, + "step": 59320 + }, + { + "epoch": 358.19, + "learning_rate": 6.261915055309066e-05, + "loss": 0.0616, + "step": 59340 + }, + { + "epoch": 358.31, + "learning_rate": 6.257571845854323e-05, + "loss": 0.0649, + "step": 59360 + }, + { + "epoch": 358.43, + "learning_rate": 6.253227623196658e-05, + "loss": 0.0685, + "step": 59380 + }, + { + "epoch": 358.55, + "learning_rate": 6.248882390836135e-05, + "loss": 0.0696, + "step": 59400 + }, + { + "epoch": 358.67, + "learning_rate": 6.244536152273626e-05, + "loss": 0.0713, + "step": 59420 + }, + { + "epoch": 358.79, + "learning_rate": 6.240188911010818e-05, + "loss": 0.0739, + "step": 59440 + }, + { + "epoch": 358.91, + "learning_rate": 6.235840670550204e-05, + "loss": 0.0753, + "step": 59460 + }, + { + "epoch": 359.03, + "learning_rate": 6.231491434395087e-05, + "loss": 0.0715, + "step": 59480 + }, + { + "epoch": 359.15, + "learning_rate": 6.22714120604956e-05, + "loss": 0.0591, + "step": 59500 + }, + { + "epoch": 359.28, + "learning_rate": 6.222789989018532e-05, + "loss": 0.0642, + "step": 59520 + }, + { + "epoch": 359.4, + "learning_rate": 6.218437786807694e-05, + "loss": 0.0675, + "step": 59540 + }, + { + "epoch": 359.52, + "learning_rate": 6.21408460292354e-05, + "loss": 0.0701, + "step": 59560 + }, + { + "epoch": 359.64, + "learning_rate": 6.20973044087335e-05, + "loss": 0.0718, + "step": 59580 + }, + { + "epoch": 359.76, + "learning_rate": 6.205375304165194e-05, + "loss": 0.0729, + "step": 59600 + }, + { + "epoch": 359.88, + "learning_rate": 6.201019196307927e-05, + "loss": 0.0748, + "step": 59620 + }, + { + "epoch": 360.0, + "learning_rate": 6.196662120811186e-05, + "loss": 0.0779, + "step": 59640 + }, + { + "epoch": 360.12, + "learning_rate": 6.192304081185389e-05, + "loss": 0.058, + "step": 59660 + }, + { + "epoch": 360.24, + "learning_rate": 6.187945080941731e-05, + "loss": 0.0629, + "step": 59680 + }, + { + "epoch": 360.36, + "learning_rate": 6.183585123592178e-05, + "loss": 0.0668, + "step": 59700 + }, + { + "epoch": 360.48, + "learning_rate": 6.179224212649466e-05, + "loss": 0.0669, + "step": 59720 + }, + { + "epoch": 360.6, + "learning_rate": 6.174862351627108e-05, + "loss": 0.0683, + "step": 59740 + }, + { + "epoch": 360.72, + "learning_rate": 6.170499544039372e-05, + "loss": 0.0713, + "step": 59760 + }, + { + "epoch": 360.85, + "learning_rate": 6.166135793401296e-05, + "loss": 0.0733, + "step": 59780 + }, + { + "epoch": 360.97, + "learning_rate": 6.161771103228674e-05, + "loss": 0.0747, + "step": 59800 + }, + { + "epoch": 361.09, + "learning_rate": 6.157405477038059e-05, + "loss": 0.061, + "step": 59820 + }, + { + "epoch": 361.21, + "learning_rate": 6.153038918346756e-05, + "loss": 0.0595, + "step": 59840 + }, + { + "epoch": 361.33, + "learning_rate": 6.148671430672821e-05, + "loss": 0.0635, + "step": 59860 + }, + { + "epoch": 361.45, + "learning_rate": 6.144303017535066e-05, + "loss": 0.0664, + "step": 59880 + }, + { + "epoch": 361.57, + "learning_rate": 6.139933682453036e-05, + "loss": 0.0681, + "step": 59900 + }, + { + "epoch": 361.69, + "learning_rate": 6.135563428947027e-05, + "loss": 0.0702, + "step": 59920 + }, + { + "epoch": 361.81, + "learning_rate": 6.131192260538073e-05, + "loss": 0.0721, + "step": 59940 + }, + { + "epoch": 361.93, + "learning_rate": 6.12682018074795e-05, + "loss": 0.0725, + "step": 59960 + }, + { + "epoch": 362.05, + "learning_rate": 6.122447193099158e-05, + "loss": 0.065, + "step": 59980 + }, + { + "epoch": 362.17, + "learning_rate": 6.118073301114937e-05, + "loss": 0.058, + "step": 60000 + }, + { + "epoch": 362.17, + "eval_accuracy": 0.001262953047979374, + "eval_loss": 21.586803436279297, + "eval_runtime": 18.2045, + "eval_samples_per_second": 112.06, + "eval_steps_per_second": 2.362, + "step": 60000 + }, + { + "epoch": 362.29, + "learning_rate": 6.113698508319251e-05, + "loss": 0.0637, + "step": 60020 + }, + { + "epoch": 362.41, + "learning_rate": 6.109322818236793e-05, + "loss": 0.0646, + "step": 60040 + }, + { + "epoch": 362.54, + "learning_rate": 6.104946234392979e-05, + "loss": 0.0672, + "step": 60060 + }, + { + "epoch": 362.66, + "learning_rate": 6.1005687603139394e-05, + "loss": 0.0685, + "step": 60080 + }, + { + "epoch": 362.78, + "learning_rate": 6.096190399526529e-05, + "loss": 0.0703, + "step": 60100 + }, + { + "epoch": 362.9, + "learning_rate": 6.091811155558313e-05, + "loss": 0.0718, + "step": 60120 + }, + { + "epoch": 363.02, + "learning_rate": 6.0874310319375706e-05, + "loss": 0.0694, + "step": 60140 + }, + { + "epoch": 363.14, + "learning_rate": 6.083050032193286e-05, + "loss": 0.0545, + "step": 60160 + }, + { + "epoch": 363.26, + "learning_rate": 6.078668159855154e-05, + "loss": 0.0605, + "step": 60180 + }, + { + "epoch": 363.38, + "learning_rate": 6.074285418453567e-05, + "loss": 0.0622, + "step": 60200 + }, + { + "epoch": 363.5, + "learning_rate": 6.069901811519623e-05, + "loss": 0.0648, + "step": 60220 + }, + { + "epoch": 363.62, + "learning_rate": 6.065517342585114e-05, + "loss": 0.0659, + "step": 60240 + }, + { + "epoch": 363.74, + "learning_rate": 6.061132015182528e-05, + "loss": 0.0681, + "step": 60260 + }, + { + "epoch": 363.86, + "learning_rate": 6.056745832845043e-05, + "loss": 0.0703, + "step": 60280 + }, + { + "epoch": 363.98, + "learning_rate": 6.052358799106528e-05, + "loss": 0.0702, + "step": 60300 + }, + { + "epoch": 364.1, + "learning_rate": 6.047970917501538e-05, + "loss": 0.0556, + "step": 60320 + }, + { + "epoch": 364.23, + "learning_rate": 6.043582191565307e-05, + "loss": 0.057, + "step": 60340 + }, + { + "epoch": 364.35, + "learning_rate": 6.0391926248337536e-05, + "loss": 0.0602, + "step": 60360 + }, + { + "epoch": 364.47, + "learning_rate": 6.0348022208434716e-05, + "loss": 0.0633, + "step": 60380 + }, + { + "epoch": 364.59, + "learning_rate": 6.0304109831317324e-05, + "loss": 0.0652, + "step": 60400 + }, + { + "epoch": 364.71, + "learning_rate": 6.026018915236474e-05, + "loss": 0.0675, + "step": 60420 + }, + { + "epoch": 364.83, + "learning_rate": 6.021626020696311e-05, + "loss": 0.0677, + "step": 60440 + }, + { + "epoch": 364.95, + "learning_rate": 6.017232303050513e-05, + "loss": 0.0697, + "step": 60460 + }, + { + "epoch": 365.07, + "learning_rate": 6.012837765839022e-05, + "loss": 0.0609, + "step": 60480 + }, + { + "epoch": 365.19, + "learning_rate": 6.008442412602438e-05, + "loss": 0.0564, + "step": 60500 + }, + { + "epoch": 365.31, + "learning_rate": 6.004046246882016e-05, + "loss": 0.0598, + "step": 60520 + }, + { + "epoch": 365.43, + "learning_rate": 5.999649272219668e-05, + "loss": 0.0632, + "step": 60540 + }, + { + "epoch": 365.55, + "learning_rate": 5.995251492157958e-05, + "loss": 0.0652, + "step": 60560 + }, + { + "epoch": 365.67, + "learning_rate": 5.990852910240098e-05, + "loss": 0.0671, + "step": 60580 + }, + { + "epoch": 365.79, + "learning_rate": 5.9864535300099435e-05, + "loss": 0.0678, + "step": 60600 + }, + { + "epoch": 365.92, + "learning_rate": 5.9820533550119986e-05, + "loss": 0.0679, + "step": 60620 + }, + { + "epoch": 366.04, + "learning_rate": 5.9776523887914026e-05, + "loss": 0.0634, + "step": 60640 + }, + { + "epoch": 366.16, + "learning_rate": 5.973250634893936e-05, + "loss": 0.0533, + "step": 60660 + }, + { + "epoch": 366.28, + "learning_rate": 5.968848096866011e-05, + "loss": 0.058, + "step": 60680 + }, + { + "epoch": 366.4, + "learning_rate": 5.964444778254675e-05, + "loss": 0.0602, + "step": 60700 + }, + { + "epoch": 366.52, + "learning_rate": 5.9600406826076006e-05, + "loss": 0.0621, + "step": 60720 + }, + { + "epoch": 366.64, + "learning_rate": 5.955635813473089e-05, + "loss": 0.0642, + "step": 60740 + }, + { + "epoch": 366.76, + "learning_rate": 5.951230174400063e-05, + "loss": 0.067, + "step": 60760 + }, + { + "epoch": 366.88, + "learning_rate": 5.946823768938067e-05, + "loss": 0.067, + "step": 60780 + }, + { + "epoch": 367.0, + "learning_rate": 5.9424166006372615e-05, + "loss": 0.0687, + "step": 60800 + }, + { + "epoch": 367.12, + "learning_rate": 5.938008673048423e-05, + "loss": 0.0515, + "step": 60820 + }, + { + "epoch": 367.24, + "learning_rate": 5.93359998972294e-05, + "loss": 0.0552, + "step": 60840 + }, + { + "epoch": 367.36, + "learning_rate": 5.929190554212807e-05, + "loss": 0.0594, + "step": 60860 + }, + { + "epoch": 367.48, + "learning_rate": 5.924780370070629e-05, + "loss": 0.0621, + "step": 60880 + }, + { + "epoch": 367.61, + "learning_rate": 5.920369440849609e-05, + "loss": 0.0645, + "step": 60900 + }, + { + "epoch": 367.73, + "learning_rate": 5.915957770103556e-05, + "loss": 0.0656, + "step": 60920 + }, + { + "epoch": 367.85, + "learning_rate": 5.91154536138687e-05, + "loss": 0.0661, + "step": 60940 + }, + { + "epoch": 367.97, + "learning_rate": 5.907132218254552e-05, + "loss": 0.0682, + "step": 60960 + }, + { + "epoch": 368.09, + "learning_rate": 5.90271834426219e-05, + "loss": 0.0542, + "step": 60980 + }, + { + "epoch": 368.21, + "learning_rate": 5.898303742965964e-05, + "loss": 0.0532, + "step": 61000 + }, + { + "epoch": 368.21, + "eval_accuracy": 0.0012604834487459375, + "eval_loss": 21.773988723754883, + "eval_runtime": 18.2904, + "eval_samples_per_second": 111.534, + "eval_steps_per_second": 2.351, + "step": 61000 + }, + { + "epoch": 368.33, + "learning_rate": 5.893888417922636e-05, + "loss": 0.0562, + "step": 61020 + }, + { + "epoch": 368.45, + "learning_rate": 5.889472372689556e-05, + "loss": 0.059, + "step": 61040 + }, + { + "epoch": 368.57, + "learning_rate": 5.885055610824652e-05, + "loss": 0.0618, + "step": 61060 + }, + { + "epoch": 368.69, + "learning_rate": 5.880638135886427e-05, + "loss": 0.0632, + "step": 61080 + }, + { + "epoch": 368.81, + "learning_rate": 5.8762199514339624e-05, + "loss": 0.0652, + "step": 61100 + }, + { + "epoch": 368.93, + "learning_rate": 5.8718010610269095e-05, + "loss": 0.067, + "step": 61120 + }, + { + "epoch": 369.05, + "learning_rate": 5.867381468225489e-05, + "loss": 0.06, + "step": 61140 + }, + { + "epoch": 369.18, + "learning_rate": 5.862961176590486e-05, + "loss": 0.0581, + "step": 61160 + }, + { + "epoch": 369.3, + "learning_rate": 5.85854018968325e-05, + "loss": 0.0553, + "step": 61180 + }, + { + "epoch": 369.42, + "learning_rate": 5.8541185110656906e-05, + "loss": 0.0586, + "step": 61200 + }, + { + "epoch": 369.54, + "learning_rate": 5.849696144300273e-05, + "loss": 0.0606, + "step": 61220 + }, + { + "epoch": 369.66, + "learning_rate": 5.845273092950019e-05, + "loss": 0.0622, + "step": 61240 + }, + { + "epoch": 369.78, + "learning_rate": 5.840849360578503e-05, + "loss": 0.0627, + "step": 61260 + }, + { + "epoch": 369.9, + "learning_rate": 5.8364249507498435e-05, + "loss": 0.0649, + "step": 61280 + }, + { + "epoch": 370.02, + "learning_rate": 5.83199986702871e-05, + "loss": 0.0633, + "step": 61300 + }, + { + "epoch": 370.14, + "learning_rate": 5.827574112980311e-05, + "loss": 0.0502, + "step": 61320 + }, + { + "epoch": 370.26, + "learning_rate": 5.8231476921703964e-05, + "loss": 0.0523, + "step": 61340 + }, + { + "epoch": 370.38, + "learning_rate": 5.8187206081652554e-05, + "loss": 0.0553, + "step": 61360 + }, + { + "epoch": 370.5, + "learning_rate": 5.814292864531705e-05, + "loss": 0.0591, + "step": 61380 + }, + { + "epoch": 370.62, + "learning_rate": 5.8098644648371046e-05, + "loss": 0.0611, + "step": 61400 + }, + { + "epoch": 370.74, + "learning_rate": 5.8054354126493324e-05, + "loss": 0.0608, + "step": 61420 + }, + { + "epoch": 370.87, + "learning_rate": 5.8010057115367966e-05, + "loss": 0.0639, + "step": 61440 + }, + { + "epoch": 370.99, + "learning_rate": 5.796575365068425e-05, + "loss": 0.0644, + "step": 61460 + }, + { + "epoch": 371.11, + "learning_rate": 5.792144376813671e-05, + "loss": 0.0504, + "step": 61480 + }, + { + "epoch": 371.23, + "learning_rate": 5.787712750342501e-05, + "loss": 0.0506, + "step": 61500 + }, + { + "epoch": 371.35, + "learning_rate": 5.7832804892253946e-05, + "loss": 0.0532, + "step": 61520 + }, + { + "epoch": 371.47, + "learning_rate": 5.7788475970333455e-05, + "loss": 0.0562, + "step": 61540 + }, + { + "epoch": 371.59, + "learning_rate": 5.774414077337855e-05, + "loss": 0.0588, + "step": 61560 + }, + { + "epoch": 371.71, + "learning_rate": 5.769979933710932e-05, + "loss": 0.061, + "step": 61580 + }, + { + "epoch": 371.83, + "learning_rate": 5.765766922602742e-05, + "loss": 0.0618, + "step": 61600 + }, + { + "epoch": 371.95, + "learning_rate": 5.7613315725854077e-05, + "loss": 0.0632, + "step": 61620 + }, + { + "epoch": 372.07, + "learning_rate": 5.756895609176974e-05, + "loss": 0.0546, + "step": 61640 + }, + { + "epoch": 372.19, + "learning_rate": 5.752459035951421e-05, + "loss": 0.0487, + "step": 61660 + }, + { + "epoch": 372.31, + "learning_rate": 5.7480218564832125e-05, + "loss": 0.0506, + "step": 61680 + }, + { + "epoch": 372.43, + "learning_rate": 5.743584074347306e-05, + "loss": 0.0537, + "step": 61700 + }, + { + "epoch": 372.56, + "learning_rate": 5.739145693119141e-05, + "loss": 0.0572, + "step": 61720 + }, + { + "epoch": 372.68, + "learning_rate": 5.7347067163746405e-05, + "loss": 0.0601, + "step": 61740 + }, + { + "epoch": 372.8, + "learning_rate": 5.730267147690212e-05, + "loss": 0.0614, + "step": 61760 + }, + { + "epoch": 372.92, + "learning_rate": 5.72582699064273e-05, + "loss": 0.0624, + "step": 61780 + }, + { + "epoch": 373.04, + "learning_rate": 5.7213862488095526e-05, + "loss": 0.0578, + "step": 61800 + }, + { + "epoch": 373.16, + "learning_rate": 5.716944925768505e-05, + "loss": 0.0485, + "step": 61820 + }, + { + "epoch": 373.28, + "learning_rate": 5.71250302509788e-05, + "loss": 0.0506, + "step": 61840 + }, + { + "epoch": 373.4, + "learning_rate": 5.708060550376436e-05, + "loss": 0.053, + "step": 61860 + }, + { + "epoch": 373.52, + "learning_rate": 5.7036175051833964e-05, + "loss": 0.0556, + "step": 61880 + }, + { + "epoch": 373.64, + "learning_rate": 5.699173893098442e-05, + "loss": 0.0584, + "step": 61900 + }, + { + "epoch": 373.76, + "learning_rate": 5.69472971770171e-05, + "loss": 0.0597, + "step": 61920 + }, + { + "epoch": 373.88, + "learning_rate": 5.690284982573792e-05, + "loss": 0.0612, + "step": 61940 + }, + { + "epoch": 374.0, + "learning_rate": 5.685839691295733e-05, + "loss": 0.0625, + "step": 61960 + }, + { + "epoch": 374.12, + "learning_rate": 5.681393847449024e-05, + "loss": 0.0473, + "step": 61980 + }, + { + "epoch": 374.25, + "learning_rate": 5.676947454615596e-05, + "loss": 0.0481, + "step": 62000 + }, + { + "epoch": 374.25, + "eval_accuracy": 0.0012693740059863084, + "eval_loss": 21.933935165405273, + "eval_runtime": 18.3333, + "eval_samples_per_second": 111.273, + "eval_steps_per_second": 2.345, + "step": 62000 + }, + { + "epoch": 374.37, + "learning_rate": 5.672500516377832e-05, + "loss": 0.0499, + "step": 62020 + }, + { + "epoch": 374.49, + "learning_rate": 5.668053036318549e-05, + "loss": 0.0524, + "step": 62040 + }, + { + "epoch": 374.61, + "learning_rate": 5.6636050180210035e-05, + "loss": 0.055, + "step": 62060 + }, + { + "epoch": 374.73, + "learning_rate": 5.6591564650688776e-05, + "loss": 0.0575, + "step": 62080 + }, + { + "epoch": 374.85, + "learning_rate": 5.6547073810462956e-05, + "loss": 0.0589, + "step": 62100 + }, + { + "epoch": 374.97, + "learning_rate": 5.650257769537802e-05, + "loss": 0.0616, + "step": 62120 + }, + { + "epoch": 375.09, + "learning_rate": 5.6458076341283684e-05, + "loss": 0.0494, + "step": 62140 + }, + { + "epoch": 375.21, + "learning_rate": 5.641356978403388e-05, + "loss": 0.0453, + "step": 62160 + }, + { + "epoch": 375.33, + "learning_rate": 5.636905805948676e-05, + "loss": 0.0467, + "step": 62180 + }, + { + "epoch": 375.45, + "learning_rate": 5.6324541203504576e-05, + "loss": 0.0478, + "step": 62200 + }, + { + "epoch": 375.57, + "learning_rate": 5.628001925195379e-05, + "loss": 0.0502, + "step": 62220 + }, + { + "epoch": 375.69, + "learning_rate": 5.6235492240704936e-05, + "loss": 0.053, + "step": 62240 + }, + { + "epoch": 375.81, + "learning_rate": 5.619096020563259e-05, + "loss": 0.0557, + "step": 62260 + }, + { + "epoch": 375.94, + "learning_rate": 5.614642318261545e-05, + "loss": 0.0592, + "step": 62280 + }, + { + "epoch": 376.06, + "learning_rate": 5.610188120753616e-05, + "loss": 0.0535, + "step": 62300 + }, + { + "epoch": 376.18, + "learning_rate": 5.605733431628139e-05, + "loss": 0.0471, + "step": 62320 + }, + { + "epoch": 376.3, + "learning_rate": 5.601278254474176e-05, + "loss": 0.0479, + "step": 62340 + }, + { + "epoch": 376.42, + "learning_rate": 5.5968225928811856e-05, + "loss": 0.0486, + "step": 62360 + }, + { + "epoch": 376.54, + "learning_rate": 5.592366450439012e-05, + "loss": 0.0513, + "step": 62380 + }, + { + "epoch": 376.66, + "learning_rate": 5.587909830737887e-05, + "loss": 0.0536, + "step": 62400 + }, + { + "epoch": 376.78, + "learning_rate": 5.583452737368432e-05, + "loss": 0.0553, + "step": 62420 + }, + { + "epoch": 376.9, + "learning_rate": 5.578995173921644e-05, + "loss": 0.0576, + "step": 62440 + }, + { + "epoch": 377.02, + "learning_rate": 5.574537143988903e-05, + "loss": 0.0566, + "step": 62460 + }, + { + "epoch": 377.14, + "learning_rate": 5.5700786511619605e-05, + "loss": 0.0453, + "step": 62480 + }, + { + "epoch": 377.26, + "learning_rate": 5.565619699032949e-05, + "loss": 0.0452, + "step": 62500 + }, + { + "epoch": 377.38, + "learning_rate": 5.5611602911943596e-05, + "loss": 0.0469, + "step": 62520 + }, + { + "epoch": 377.51, + "learning_rate": 5.556700431239061e-05, + "loss": 0.0483, + "step": 62540 + }, + { + "epoch": 377.63, + "learning_rate": 5.552240122760279e-05, + "loss": 0.0505, + "step": 62560 + }, + { + "epoch": 377.75, + "learning_rate": 5.547779369351606e-05, + "loss": 0.0536, + "step": 62580 + }, + { + "epoch": 377.87, + "learning_rate": 5.543318174606987e-05, + "loss": 0.0553, + "step": 62600 + }, + { + "epoch": 377.99, + "learning_rate": 5.5388565421207304e-05, + "loss": 0.0562, + "step": 62620 + }, + { + "epoch": 378.11, + "learning_rate": 5.53439447548749e-05, + "loss": 0.0455, + "step": 62640 + }, + { + "epoch": 378.23, + "learning_rate": 5.529931978302272e-05, + "loss": 0.0432, + "step": 62660 + }, + { + "epoch": 378.35, + "learning_rate": 5.525469054160433e-05, + "loss": 0.0434, + "step": 62680 + }, + { + "epoch": 378.47, + "learning_rate": 5.521005706657666e-05, + "loss": 0.0441, + "step": 62700 + }, + { + "epoch": 378.59, + "learning_rate": 5.516541939390013e-05, + "loss": 0.0459, + "step": 62720 + }, + { + "epoch": 378.71, + "learning_rate": 5.5120777559538486e-05, + "loss": 0.047, + "step": 62740 + }, + { + "epoch": 378.83, + "learning_rate": 5.5076131599458855e-05, + "loss": 0.0489, + "step": 62760 + }, + { + "epoch": 378.95, + "learning_rate": 5.503148154963168e-05, + "loss": 0.0513, + "step": 62780 + }, + { + "epoch": 379.07, + "learning_rate": 5.498682744603071e-05, + "loss": 0.0459, + "step": 62800 + }, + { + "epoch": 379.2, + "learning_rate": 5.494216932463293e-05, + "loss": 0.0407, + "step": 62820 + }, + { + "epoch": 379.32, + "learning_rate": 5.48975072214186e-05, + "loss": 0.0405, + "step": 62840 + }, + { + "epoch": 379.44, + "learning_rate": 5.485284117237115e-05, + "loss": 0.041, + "step": 62860 + }, + { + "epoch": 379.56, + "learning_rate": 5.480817121347722e-05, + "loss": 0.0417, + "step": 62880 + }, + { + "epoch": 379.68, + "learning_rate": 5.476349738072659e-05, + "loss": 0.0432, + "step": 62900 + }, + { + "epoch": 379.8, + "learning_rate": 5.4718819710112166e-05, + "loss": 0.0449, + "step": 62920 + }, + { + "epoch": 379.92, + "learning_rate": 5.467413823762993e-05, + "loss": 0.0469, + "step": 62940 + }, + { + "epoch": 380.04, + "learning_rate": 5.462945299927894e-05, + "loss": 0.0444, + "step": 62960 + }, + { + "epoch": 380.16, + "learning_rate": 5.4584764031061296e-05, + "loss": 0.0368, + "step": 62980 + }, + { + "epoch": 380.28, + "learning_rate": 5.454007136898207e-05, + "loss": 0.0358, + "step": 63000 + }, + { + "epoch": 380.28, + "eval_accuracy": 0.0012466536930386936, + "eval_loss": 22.16596221923828, + "eval_runtime": 18.1736, + "eval_samples_per_second": 112.25, + "eval_steps_per_second": 2.366, + "step": 63000 + }, + { + "epoch": 380.4, + "learning_rate": 5.4495375049049366e-05, + "loss": 0.0357, + "step": 63020 + }, + { + "epoch": 380.52, + "learning_rate": 5.445067510727415e-05, + "loss": 0.0361, + "step": 63040 + }, + { + "epoch": 380.64, + "learning_rate": 5.4405971579670414e-05, + "loss": 0.0369, + "step": 63060 + }, + { + "epoch": 380.76, + "learning_rate": 5.436126450225495e-05, + "loss": 0.0377, + "step": 63080 + }, + { + "epoch": 380.89, + "learning_rate": 5.4316553911047455e-05, + "loss": 0.0388, + "step": 63100 + }, + { + "epoch": 381.01, + "learning_rate": 5.427183984207043e-05, + "loss": 0.0395, + "step": 63120 + }, + { + "epoch": 381.13, + "learning_rate": 5.42271223313492e-05, + "loss": 0.031, + "step": 63140 + }, + { + "epoch": 381.25, + "learning_rate": 5.4182401414911873e-05, + "loss": 0.0305, + "step": 63160 + }, + { + "epoch": 381.37, + "learning_rate": 5.413767712878926e-05, + "loss": 0.0296, + "step": 63180 + }, + { + "epoch": 381.49, + "learning_rate": 5.409294950901492e-05, + "loss": 0.029, + "step": 63200 + }, + { + "epoch": 381.61, + "learning_rate": 5.404821859162509e-05, + "loss": 0.0285, + "step": 63220 + }, + { + "epoch": 381.73, + "learning_rate": 5.4003484412658665e-05, + "loss": 0.0287, + "step": 63240 + }, + { + "epoch": 381.85, + "learning_rate": 5.395874700815715e-05, + "loss": 0.0286, + "step": 63260 + }, + { + "epoch": 381.97, + "learning_rate": 5.39140064141647e-05, + "loss": 0.0283, + "step": 63280 + }, + { + "epoch": 382.09, + "learning_rate": 5.386926266672795e-05, + "loss": 0.0231, + "step": 63300 + }, + { + "epoch": 382.21, + "learning_rate": 5.382451580189616e-05, + "loss": 0.0213, + "step": 63320 + }, + { + "epoch": 382.33, + "learning_rate": 5.3779765855721066e-05, + "loss": 0.0205, + "step": 63340 + }, + { + "epoch": 382.45, + "learning_rate": 5.373501286425691e-05, + "loss": 0.0198, + "step": 63360 + }, + { + "epoch": 382.58, + "learning_rate": 5.369025686356033e-05, + "loss": 0.0193, + "step": 63380 + }, + { + "epoch": 382.7, + "learning_rate": 5.364549788969045e-05, + "loss": 0.0191, + "step": 63400 + }, + { + "epoch": 382.82, + "learning_rate": 5.360073597870879e-05, + "loss": 0.0186, + "step": 63420 + }, + { + "epoch": 382.94, + "learning_rate": 5.355597116667918e-05, + "loss": 0.0185, + "step": 63440 + }, + { + "epoch": 383.06, + "learning_rate": 5.351120348966783e-05, + "loss": 0.0163, + "step": 63460 + }, + { + "epoch": 383.18, + "learning_rate": 5.346643298374322e-05, + "loss": 0.0145, + "step": 63480 + }, + { + "epoch": 383.3, + "learning_rate": 5.3421659684976197e-05, + "loss": 0.0143, + "step": 63500 + }, + { + "epoch": 383.42, + "learning_rate": 5.337688362943976e-05, + "loss": 0.0141, + "step": 63520 + }, + { + "epoch": 383.54, + "learning_rate": 5.333210485320919e-05, + "loss": 0.0142, + "step": 63540 + }, + { + "epoch": 383.66, + "learning_rate": 5.32873233923619e-05, + "loss": 0.0145, + "step": 63560 + }, + { + "epoch": 383.78, + "learning_rate": 5.3242539282977543e-05, + "loss": 0.0142, + "step": 63580 + }, + { + "epoch": 383.9, + "learning_rate": 5.319775256113785e-05, + "loss": 0.0141, + "step": 63600 + }, + { + "epoch": 384.02, + "learning_rate": 5.315296326292667e-05, + "loss": 0.0135, + "step": 63620 + }, + { + "epoch": 384.14, + "learning_rate": 5.310817142442994e-05, + "loss": 0.0122, + "step": 63640 + }, + { + "epoch": 384.27, + "learning_rate": 5.3063377081735645e-05, + "loss": 0.0125, + "step": 63660 + }, + { + "epoch": 384.39, + "learning_rate": 5.301858027093376e-05, + "loss": 0.0125, + "step": 63680 + }, + { + "epoch": 384.51, + "learning_rate": 5.2973781028116275e-05, + "loss": 0.0125, + "step": 63700 + }, + { + "epoch": 384.63, + "learning_rate": 5.292897938937713e-05, + "loss": 0.0129, + "step": 63720 + }, + { + "epoch": 384.75, + "learning_rate": 5.2884175390812195e-05, + "loss": 0.0127, + "step": 63740 + }, + { + "epoch": 384.87, + "learning_rate": 5.283936906851923e-05, + "loss": 0.0128, + "step": 63760 + }, + { + "epoch": 384.99, + "learning_rate": 5.2794560458597897e-05, + "loss": 0.0128, + "step": 63780 + }, + { + "epoch": 385.11, + "learning_rate": 5.274974959714969e-05, + "loss": 0.0118, + "step": 63800 + }, + { + "epoch": 385.23, + "learning_rate": 5.2704936520277895e-05, + "loss": 0.0118, + "step": 63820 + }, + { + "epoch": 385.35, + "learning_rate": 5.2660121264087605e-05, + "loss": 0.0119, + "step": 63840 + }, + { + "epoch": 385.47, + "learning_rate": 5.261530386468565e-05, + "loss": 0.0121, + "step": 63860 + }, + { + "epoch": 385.59, + "learning_rate": 5.2570484358180605e-05, + "loss": 0.0122, + "step": 63880 + }, + { + "epoch": 385.71, + "learning_rate": 5.252566278068275e-05, + "loss": 0.0124, + "step": 63900 + }, + { + "epoch": 385.84, + "learning_rate": 5.2480839168304e-05, + "loss": 0.0126, + "step": 63920 + }, + { + "epoch": 385.96, + "learning_rate": 5.243601355715796e-05, + "loss": 0.0124, + "step": 63940 + }, + { + "epoch": 386.08, + "learning_rate": 5.239118598335978e-05, + "loss": 0.0117, + "step": 63960 + }, + { + "epoch": 386.2, + "learning_rate": 5.234635648302626e-05, + "loss": 0.0115, + "step": 63980 + }, + { + "epoch": 386.32, + "learning_rate": 5.23015250922757e-05, + "loss": 0.0117, + "step": 64000 + }, + { + "epoch": 386.32, + "eval_accuracy": 0.0012555442502790648, + "eval_loss": 22.422649383544922, + "eval_runtime": 18.1838, + "eval_samples_per_second": 112.188, + "eval_steps_per_second": 2.365, + "step": 64000 + }, + { + "epoch": 386.44, + "learning_rate": 5.225669184722795e-05, + "loss": 0.0118, + "step": 64020 + }, + { + "epoch": 386.56, + "learning_rate": 5.221185678400431e-05, + "loss": 0.012, + "step": 64040 + }, + { + "epoch": 386.68, + "learning_rate": 5.216701993872762e-05, + "loss": 0.0121, + "step": 64060 + }, + { + "epoch": 386.8, + "learning_rate": 5.2122181347522116e-05, + "loss": 0.012, + "step": 64080 + }, + { + "epoch": 386.92, + "learning_rate": 5.207734104651344e-05, + "loss": 0.0124, + "step": 64100 + }, + { + "epoch": 387.04, + "learning_rate": 5.203249907182859e-05, + "loss": 0.0119, + "step": 64120 + }, + { + "epoch": 387.16, + "learning_rate": 5.1987655459595944e-05, + "loss": 0.0115, + "step": 64140 + }, + { + "epoch": 387.28, + "learning_rate": 5.194281024594522e-05, + "loss": 0.0121, + "step": 64160 + }, + { + "epoch": 387.4, + "learning_rate": 5.189796346700735e-05, + "loss": 0.012, + "step": 64180 + }, + { + "epoch": 387.53, + "learning_rate": 5.185311515891459e-05, + "loss": 0.0118, + "step": 64200 + }, + { + "epoch": 387.65, + "learning_rate": 5.180826535780043e-05, + "loss": 0.012, + "step": 64220 + }, + { + "epoch": 387.77, + "learning_rate": 5.1763414099799534e-05, + "loss": 0.012, + "step": 64240 + }, + { + "epoch": 387.89, + "learning_rate": 5.171856142104775e-05, + "loss": 0.0121, + "step": 64260 + }, + { + "epoch": 388.01, + "learning_rate": 5.1673707357682077e-05, + "loss": 0.0121, + "step": 64280 + }, + { + "epoch": 388.13, + "learning_rate": 5.1628851945840596e-05, + "loss": 0.0111, + "step": 64300 + }, + { + "epoch": 388.25, + "learning_rate": 5.158399522166253e-05, + "loss": 0.0114, + "step": 64320 + }, + { + "epoch": 388.37, + "learning_rate": 5.153913722128813e-05, + "loss": 0.0123, + "step": 64340 + }, + { + "epoch": 388.49, + "learning_rate": 5.1494277980858674e-05, + "loss": 0.0125, + "step": 64360 + }, + { + "epoch": 388.61, + "learning_rate": 5.144941753651643e-05, + "loss": 0.014, + "step": 64380 + }, + { + "epoch": 388.73, + "learning_rate": 5.140679903218679e-05, + "loss": 0.0268, + "step": 64400 + }, + { + "epoch": 388.85, + "learning_rate": 5.1361936344172525e-05, + "loss": 0.0354, + "step": 64420 + }, + { + "epoch": 388.97, + "learning_rate": 5.1317072558870726e-05, + "loss": 0.0412, + "step": 64440 + }, + { + "epoch": 389.09, + "learning_rate": 5.127220771242737e-05, + "loss": 0.0621, + "step": 64460 + }, + { + "epoch": 389.22, + "learning_rate": 5.122734184098922e-05, + "loss": 0.0829, + "step": 64480 + }, + { + "epoch": 389.34, + "learning_rate": 5.118247498070393e-05, + "loss": 0.0872, + "step": 64500 + }, + { + "epoch": 389.46, + "learning_rate": 5.1137607167719905e-05, + "loss": 0.0932, + "step": 64520 + }, + { + "epoch": 389.58, + "learning_rate": 5.109273843818633e-05, + "loss": 0.1013, + "step": 64540 + }, + { + "epoch": 389.7, + "learning_rate": 5.104786882825313e-05, + "loss": 0.1057, + "step": 64560 + }, + { + "epoch": 389.82, + "learning_rate": 5.100299837407094e-05, + "loss": 0.1033, + "step": 64580 + }, + { + "epoch": 389.94, + "learning_rate": 5.0958127111791086e-05, + "loss": 0.1057, + "step": 64600 + }, + { + "epoch": 390.06, + "learning_rate": 5.0913255077565514e-05, + "loss": 0.0964, + "step": 64620 + }, + { + "epoch": 390.18, + "learning_rate": 5.0868382307546834e-05, + "loss": 0.099, + "step": 64640 + }, + { + "epoch": 390.3, + "learning_rate": 5.08235088378882e-05, + "loss": 0.1026, + "step": 64660 + }, + { + "epoch": 390.42, + "learning_rate": 5.077863470474339e-05, + "loss": 0.0996, + "step": 64680 + }, + { + "epoch": 390.54, + "learning_rate": 5.073375994426664e-05, + "loss": 0.1021, + "step": 64700 + }, + { + "epoch": 390.66, + "learning_rate": 5.068888459261276e-05, + "loss": 0.1036, + "step": 64720 + }, + { + "epoch": 390.78, + "learning_rate": 5.0644008685937005e-05, + "loss": 0.1073, + "step": 64740 + }, + { + "epoch": 390.91, + "learning_rate": 5.05991322603951e-05, + "loss": 0.1049, + "step": 64760 + }, + { + "epoch": 391.03, + "learning_rate": 5.055425535214314e-05, + "loss": 0.1001, + "step": 64780 + }, + { + "epoch": 391.15, + "learning_rate": 5.050937799733766e-05, + "loss": 0.0803, + "step": 64800 + }, + { + "epoch": 391.27, + "learning_rate": 5.0464500232135537e-05, + "loss": 0.0844, + "step": 64820 + }, + { + "epoch": 391.39, + "learning_rate": 5.041962209269393e-05, + "loss": 0.0872, + "step": 64840 + }, + { + "epoch": 391.51, + "learning_rate": 5.037474361517042e-05, + "loss": 0.0898, + "step": 64860 + }, + { + "epoch": 391.63, + "learning_rate": 5.032986483572273e-05, + "loss": 0.0907, + "step": 64880 + }, + { + "epoch": 391.75, + "learning_rate": 5.0284985790508924e-05, + "loss": 0.0911, + "step": 64900 + }, + { + "epoch": 391.87, + "learning_rate": 5.0240106515687194e-05, + "loss": 0.0916, + "step": 64920 + }, + { + "epoch": 391.99, + "learning_rate": 5.019522704741598e-05, + "loss": 0.0913, + "step": 64940 + }, + { + "epoch": 392.11, + "learning_rate": 5.015034742185386e-05, + "loss": 0.0692, + "step": 64960 + }, + { + "epoch": 392.23, + "learning_rate": 5.010546767515953e-05, + "loss": 0.0741, + "step": 64980 + }, + { + "epoch": 392.35, + "learning_rate": 5.006058784349181e-05, + "loss": 0.0768, + "step": 65000 + }, + { + "epoch": 392.35, + "eval_accuracy": 0.0012575199296658139, + "eval_loss": 22.219345092773438, + "eval_runtime": 18.2617, + "eval_samples_per_second": 111.709, + "eval_steps_per_second": 2.355, + "step": 65000 + }, + { + "epoch": 392.47, + "learning_rate": 5.0015707963009575e-05, + "loss": 0.0777, + "step": 65020 + }, + { + "epoch": 392.6, + "learning_rate": 4.99708280698717e-05, + "loss": 0.0808, + "step": 65040 + }, + { + "epoch": 392.72, + "learning_rate": 4.992594820023712e-05, + "loss": 0.0818, + "step": 65060 + }, + { + "epoch": 392.84, + "learning_rate": 4.988106839026474e-05, + "loss": 0.0829, + "step": 65080 + }, + { + "epoch": 392.96, + "learning_rate": 4.983618867611342e-05, + "loss": 0.0821, + "step": 65100 + }, + { + "epoch": 393.08, + "learning_rate": 4.979130909394191e-05, + "loss": 0.0686, + "step": 65120 + }, + { + "epoch": 393.2, + "learning_rate": 4.97464296799089e-05, + "loss": 0.0649, + "step": 65140 + }, + { + "epoch": 393.32, + "learning_rate": 4.9701550470172914e-05, + "loss": 0.0684, + "step": 65160 + }, + { + "epoch": 393.44, + "learning_rate": 4.965667150089234e-05, + "loss": 0.0699, + "step": 65180 + }, + { + "epoch": 393.56, + "learning_rate": 4.9611792808225316e-05, + "loss": 0.0699, + "step": 65200 + }, + { + "epoch": 393.68, + "learning_rate": 4.956691442832981e-05, + "loss": 0.0717, + "step": 65220 + }, + { + "epoch": 393.8, + "learning_rate": 4.952203639736354e-05, + "loss": 0.0734, + "step": 65240 + }, + { + "epoch": 393.92, + "learning_rate": 4.947715875148391e-05, + "loss": 0.0741, + "step": 65260 + }, + { + "epoch": 394.04, + "learning_rate": 4.9432281526848034e-05, + "loss": 0.0675, + "step": 65280 + }, + { + "epoch": 394.16, + "learning_rate": 4.9387404759612674e-05, + "loss": 0.0563, + "step": 65300 + }, + { + "epoch": 394.29, + "learning_rate": 4.934252848593424e-05, + "loss": 0.0609, + "step": 65320 + }, + { + "epoch": 394.41, + "learning_rate": 4.929765274196872e-05, + "loss": 0.0634, + "step": 65340 + }, + { + "epoch": 394.53, + "learning_rate": 4.925277756387169e-05, + "loss": 0.0642, + "step": 65360 + }, + { + "epoch": 394.65, + "learning_rate": 4.920790298779831e-05, + "loss": 0.0662, + "step": 65380 + }, + { + "epoch": 394.77, + "learning_rate": 4.916302904990315e-05, + "loss": 0.067, + "step": 65400 + }, + { + "epoch": 394.89, + "learning_rate": 4.911815578634041e-05, + "loss": 0.0673, + "step": 65420 + }, + { + "epoch": 395.01, + "learning_rate": 4.907328323326359e-05, + "loss": 0.0652, + "step": 65440 + }, + { + "epoch": 395.13, + "learning_rate": 4.9028411426825746e-05, + "loss": 0.0507, + "step": 65460 + }, + { + "epoch": 395.25, + "learning_rate": 4.898354040317926e-05, + "loss": 0.0547, + "step": 65480 + }, + { + "epoch": 395.37, + "learning_rate": 4.8938670198475904e-05, + "loss": 0.0574, + "step": 65500 + }, + { + "epoch": 395.49, + "learning_rate": 4.8893800848866824e-05, + "loss": 0.0586, + "step": 65520 + }, + { + "epoch": 395.61, + "learning_rate": 4.8848932390502396e-05, + "loss": 0.0593, + "step": 65540 + }, + { + "epoch": 395.73, + "learning_rate": 4.8804064859532366e-05, + "loss": 0.0607, + "step": 65560 + }, + { + "epoch": 395.86, + "learning_rate": 4.8759198292105655e-05, + "loss": 0.063, + "step": 65580 + }, + { + "epoch": 395.98, + "learning_rate": 4.8714332724370504e-05, + "loss": 0.0637, + "step": 65600 + }, + { + "epoch": 396.1, + "learning_rate": 4.866946819247422e-05, + "loss": 0.0489, + "step": 65620 + }, + { + "epoch": 396.22, + "learning_rate": 4.8624604732563396e-05, + "loss": 0.0468, + "step": 65640 + }, + { + "epoch": 396.34, + "learning_rate": 4.85797423807837e-05, + "loss": 0.0474, + "step": 65660 + }, + { + "epoch": 396.46, + "learning_rate": 4.8534881173279905e-05, + "loss": 0.0491, + "step": 65680 + }, + { + "epoch": 396.58, + "learning_rate": 4.8490021146195904e-05, + "loss": 0.0516, + "step": 65700 + }, + { + "epoch": 396.7, + "learning_rate": 4.8445162335674574e-05, + "loss": 0.0532, + "step": 65720 + }, + { + "epoch": 396.82, + "learning_rate": 4.840030477785788e-05, + "loss": 0.0552, + "step": 65740 + }, + { + "epoch": 396.94, + "learning_rate": 4.835544850888671e-05, + "loss": 0.0573, + "step": 65760 + }, + { + "epoch": 397.06, + "learning_rate": 4.831059356490098e-05, + "loss": 0.0502, + "step": 65780 + }, + { + "epoch": 397.18, + "learning_rate": 4.826573998203952e-05, + "loss": 0.0423, + "step": 65800 + }, + { + "epoch": 397.3, + "learning_rate": 4.822088779644001e-05, + "loss": 0.0409, + "step": 65820 + }, + { + "epoch": 397.42, + "learning_rate": 4.817603704423909e-05, + "loss": 0.0411, + "step": 65840 + }, + { + "epoch": 397.55, + "learning_rate": 4.813118776157216e-05, + "loss": 0.0403, + "step": 65860 + }, + { + "epoch": 397.67, + "learning_rate": 4.808633998457351e-05, + "loss": 0.0416, + "step": 65880 + }, + { + "epoch": 397.79, + "learning_rate": 4.804149374937615e-05, + "loss": 0.0447, + "step": 65900 + }, + { + "epoch": 397.91, + "learning_rate": 4.79966490921119e-05, + "loss": 0.0455, + "step": 65920 + }, + { + "epoch": 398.03, + "learning_rate": 4.795180604891132e-05, + "loss": 0.0456, + "step": 65940 + }, + { + "epoch": 398.15, + "learning_rate": 4.790696465590358e-05, + "loss": 0.0395, + "step": 65960 + }, + { + "epoch": 398.27, + "learning_rate": 4.786212494921664e-05, + "loss": 0.0377, + "step": 65980 + }, + { + "epoch": 398.39, + "learning_rate": 4.7817286964976995e-05, + "loss": 0.0339, + "step": 66000 + }, + { + "epoch": 398.39, + "eval_accuracy": 0.0012530746510456283, + "eval_loss": 22.383317947387695, + "eval_runtime": 18.182, + "eval_samples_per_second": 112.199, + "eval_steps_per_second": 2.365, + "step": 66000 + }, + { + "epoch": 398.51, + "learning_rate": 4.777245073930984e-05, + "loss": 0.0323, + "step": 66020 + }, + { + "epoch": 398.63, + "learning_rate": 4.772761630833886e-05, + "loss": 0.031, + "step": 66040 + }, + { + "epoch": 398.75, + "learning_rate": 4.76827837081864e-05, + "loss": 0.0301, + "step": 66060 + }, + { + "epoch": 398.87, + "learning_rate": 4.7637952974973244e-05, + "loss": 0.0333, + "step": 66080 + }, + { + "epoch": 398.99, + "learning_rate": 4.7593124144818696e-05, + "loss": 0.0308, + "step": 66100 + }, + { + "epoch": 399.11, + "learning_rate": 4.754829725384059e-05, + "loss": 0.023, + "step": 66120 + }, + { + "epoch": 399.24, + "learning_rate": 4.750347233815506e-05, + "loss": 0.021, + "step": 66140 + }, + { + "epoch": 399.36, + "learning_rate": 4.74586494338768e-05, + "loss": 0.0216, + "step": 66160 + }, + { + "epoch": 399.48, + "learning_rate": 4.741382857711878e-05, + "loss": 0.0238, + "step": 66180 + }, + { + "epoch": 399.6, + "learning_rate": 4.736900980399237e-05, + "loss": 0.0214, + "step": 66200 + }, + { + "epoch": 399.72, + "learning_rate": 4.732419315060723e-05, + "loss": 0.0206, + "step": 66220 + }, + { + "epoch": 399.84, + "learning_rate": 4.727937865307134e-05, + "loss": 0.0237, + "step": 66240 + }, + { + "epoch": 399.96, + "learning_rate": 4.723456634749095e-05, + "loss": 0.0205, + "step": 66260 + }, + { + "epoch": 400.08, + "learning_rate": 4.718975626997047e-05, + "loss": 0.017, + "step": 66280 + }, + { + "epoch": 400.2, + "learning_rate": 4.7144948456612634e-05, + "loss": 0.017, + "step": 66300 + }, + { + "epoch": 400.32, + "learning_rate": 4.7100142943518235e-05, + "loss": 0.0177, + "step": 66320 + }, + { + "epoch": 400.44, + "learning_rate": 4.705533976678631e-05, + "loss": 0.0158, + "step": 66340 + }, + { + "epoch": 400.56, + "learning_rate": 4.701053896251392e-05, + "loss": 0.0195, + "step": 66360 + }, + { + "epoch": 400.68, + "learning_rate": 4.6965740566796304e-05, + "loss": 0.0234, + "step": 66380 + }, + { + "epoch": 400.8, + "learning_rate": 4.692094461572672e-05, + "loss": 0.0198, + "step": 66400 + }, + { + "epoch": 400.93, + "learning_rate": 4.687615114539644e-05, + "loss": 0.0211, + "step": 66420 + }, + { + "epoch": 401.05, + "learning_rate": 4.683136019189476e-05, + "loss": 0.0174, + "step": 66440 + }, + { + "epoch": 401.17, + "learning_rate": 4.6788811150149395e-05, + "loss": 0.0221, + "step": 66460 + }, + { + "epoch": 401.29, + "learning_rate": 4.674402520825759e-05, + "loss": 0.0155, + "step": 66480 + }, + { + "epoch": 401.41, + "learning_rate": 4.6699241889645874e-05, + "loss": 0.0175, + "step": 66500 + }, + { + "epoch": 401.53, + "learning_rate": 4.665446123039534e-05, + "loss": 0.0172, + "step": 66520 + }, + { + "epoch": 401.65, + "learning_rate": 4.6609683266584974e-05, + "loss": 0.0156, + "step": 66540 + }, + { + "epoch": 401.77, + "learning_rate": 4.6564908034291553e-05, + "loss": 0.0184, + "step": 66560 + }, + { + "epoch": 401.89, + "learning_rate": 4.6520135569589675e-05, + "loss": 0.0167, + "step": 66580 + }, + { + "epoch": 402.01, + "learning_rate": 4.647536590855174e-05, + "loss": 0.0152, + "step": 66600 + }, + { + "epoch": 402.13, + "learning_rate": 4.643059908724781e-05, + "loss": 0.0143, + "step": 66620 + }, + { + "epoch": 402.25, + "learning_rate": 4.6385835141745734e-05, + "loss": 0.013, + "step": 66640 + }, + { + "epoch": 402.37, + "learning_rate": 4.6341074108111e-05, + "loss": 0.0212, + "step": 66660 + }, + { + "epoch": 402.49, + "learning_rate": 4.6296316022406776e-05, + "loss": 0.0212, + "step": 66680 + }, + { + "epoch": 402.62, + "learning_rate": 4.625156092069383e-05, + "loss": 0.0166, + "step": 66700 + }, + { + "epoch": 402.74, + "learning_rate": 4.620680883903054e-05, + "loss": 0.0207, + "step": 66720 + }, + { + "epoch": 402.86, + "learning_rate": 4.6162059813472876e-05, + "loss": 0.0158, + "step": 66740 + }, + { + "epoch": 402.98, + "learning_rate": 4.611731388007426e-05, + "loss": 0.0153, + "step": 66760 + }, + { + "epoch": 403.1, + "learning_rate": 4.6072571074885774e-05, + "loss": 0.0129, + "step": 66780 + }, + { + "epoch": 403.22, + "learning_rate": 4.602783143395579e-05, + "loss": 0.0155, + "step": 66800 + }, + { + "epoch": 403.34, + "learning_rate": 4.5983094993330296e-05, + "loss": 0.0158, + "step": 66820 + }, + { + "epoch": 403.46, + "learning_rate": 4.59383617890526e-05, + "loss": 0.0194, + "step": 66840 + }, + { + "epoch": 403.58, + "learning_rate": 4.5893631857163457e-05, + "loss": 0.0187, + "step": 66860 + }, + { + "epoch": 403.7, + "learning_rate": 4.584890523370096e-05, + "loss": 0.016, + "step": 66880 + }, + { + "epoch": 403.82, + "learning_rate": 4.5804181954700524e-05, + "loss": 0.021, + "step": 66900 + }, + { + "epoch": 403.94, + "learning_rate": 4.575946205619492e-05, + "loss": 0.0194, + "step": 66920 + }, + { + "epoch": 404.06, + "learning_rate": 4.5714745574214126e-05, + "loss": 0.0163, + "step": 66940 + }, + { + "epoch": 404.19, + "learning_rate": 4.5670032544785444e-05, + "loss": 0.0153, + "step": 66960 + }, + { + "epoch": 404.31, + "learning_rate": 4.5625323003933304e-05, + "loss": 0.027, + "step": 66980 + }, + { + "epoch": 404.43, + "learning_rate": 4.558061698767941e-05, + "loss": 0.0191, + "step": 67000 + }, + { + "epoch": 404.43, + "eval_accuracy": 0.0012545564105856902, + "eval_loss": 22.592748641967773, + "eval_runtime": 18.1967, + "eval_samples_per_second": 112.108, + "eval_steps_per_second": 2.363, + "step": 67000 + }, + { + "epoch": 404.55, + "learning_rate": 4.553591453204261e-05, + "loss": 0.0186, + "step": 67020 + }, + { + "epoch": 404.67, + "learning_rate": 4.549121567303881e-05, + "loss": 0.0181, + "step": 67040 + }, + { + "epoch": 404.79, + "learning_rate": 4.544652044668113e-05, + "loss": 0.0183, + "step": 67060 + }, + { + "epoch": 404.91, + "learning_rate": 4.5401828888979665e-05, + "loss": 0.02, + "step": 67080 + }, + { + "epoch": 405.03, + "learning_rate": 4.535714103594162e-05, + "loss": 0.0195, + "step": 67100 + }, + { + "epoch": 405.15, + "learning_rate": 4.531245692357117e-05, + "loss": 0.0172, + "step": 67120 + }, + { + "epoch": 405.27, + "learning_rate": 4.5267776587869503e-05, + "loss": 0.0164, + "step": 67140 + }, + { + "epoch": 405.39, + "learning_rate": 4.5223100064834786e-05, + "loss": 0.0172, + "step": 67160 + }, + { + "epoch": 405.51, + "learning_rate": 4.517842739046205e-05, + "loss": 0.0305, + "step": 67180 + }, + { + "epoch": 405.63, + "learning_rate": 4.5133758600743295e-05, + "loss": 0.0237, + "step": 67200 + }, + { + "epoch": 405.75, + "learning_rate": 4.5089093731667333e-05, + "loss": 0.0231, + "step": 67220 + }, + { + "epoch": 405.88, + "learning_rate": 4.504443281921985e-05, + "loss": 0.0239, + "step": 67240 + }, + { + "epoch": 406.0, + "learning_rate": 4.499977589938333e-05, + "loss": 0.0247, + "step": 67260 + }, + { + "epoch": 406.12, + "learning_rate": 4.495512300813707e-05, + "loss": 0.0231, + "step": 67280 + }, + { + "epoch": 406.24, + "learning_rate": 4.4910474181457055e-05, + "loss": 0.0243, + "step": 67300 + }, + { + "epoch": 406.36, + "learning_rate": 4.486582945531607e-05, + "loss": 0.0259, + "step": 67320 + }, + { + "epoch": 406.48, + "learning_rate": 4.482118886568357e-05, + "loss": 0.0348, + "step": 67340 + }, + { + "epoch": 406.6, + "learning_rate": 4.4776552448525646e-05, + "loss": 0.0428, + "step": 67360 + }, + { + "epoch": 406.72, + "learning_rate": 4.473192023980509e-05, + "loss": 0.0538, + "step": 67380 + }, + { + "epoch": 406.84, + "learning_rate": 4.468729227548122e-05, + "loss": 0.067, + "step": 67400 + }, + { + "epoch": 406.96, + "learning_rate": 4.464266859151003e-05, + "loss": 0.0714, + "step": 67420 + }, + { + "epoch": 407.08, + "learning_rate": 4.459804922384398e-05, + "loss": 0.0662, + "step": 67440 + }, + { + "epoch": 407.2, + "learning_rate": 4.455343420843207e-05, + "loss": 0.0684, + "step": 67460 + }, + { + "epoch": 407.32, + "learning_rate": 4.4508823581219886e-05, + "loss": 0.0719, + "step": 67480 + }, + { + "epoch": 407.44, + "learning_rate": 4.446421737814933e-05, + "loss": 0.0727, + "step": 67500 + }, + { + "epoch": 407.57, + "learning_rate": 4.4419615635158875e-05, + "loss": 0.0756, + "step": 67520 + }, + { + "epoch": 407.69, + "learning_rate": 4.437501838818327e-05, + "loss": 0.0771, + "step": 67540 + }, + { + "epoch": 407.81, + "learning_rate": 4.433042567315378e-05, + "loss": 0.0765, + "step": 67560 + }, + { + "epoch": 407.93, + "learning_rate": 4.42858375259979e-05, + "loss": 0.0769, + "step": 67580 + }, + { + "epoch": 408.05, + "learning_rate": 4.4241253982639525e-05, + "loss": 0.0689, + "step": 67600 + }, + { + "epoch": 408.17, + "learning_rate": 4.4196675078998814e-05, + "loss": 0.0606, + "step": 67620 + }, + { + "epoch": 408.29, + "learning_rate": 4.415210085099215e-05, + "loss": 0.0647, + "step": 67640 + }, + { + "epoch": 408.41, + "learning_rate": 4.410753133453222e-05, + "loss": 0.0667, + "step": 67660 + }, + { + "epoch": 408.53, + "learning_rate": 4.4062966565527844e-05, + "loss": 0.0672, + "step": 67680 + }, + { + "epoch": 408.65, + "learning_rate": 4.40184065798841e-05, + "loss": 0.0675, + "step": 67700 + }, + { + "epoch": 408.77, + "learning_rate": 4.39738514135021e-05, + "loss": 0.0684, + "step": 67720 + }, + { + "epoch": 408.89, + "learning_rate": 4.3929301102279183e-05, + "loss": 0.0696, + "step": 67740 + }, + { + "epoch": 409.01, + "learning_rate": 4.388475568210873e-05, + "loss": 0.067, + "step": 67760 + }, + { + "epoch": 409.13, + "learning_rate": 4.3840215188880157e-05, + "loss": 0.052, + "step": 67780 + }, + { + "epoch": 409.26, + "learning_rate": 4.379567965847896e-05, + "loss": 0.0557, + "step": 67800 + }, + { + "epoch": 409.38, + "learning_rate": 4.3751149126786584e-05, + "loss": 0.0573, + "step": 67820 + }, + { + "epoch": 409.5, + "learning_rate": 4.370662362968051e-05, + "loss": 0.0599, + "step": 67840 + }, + { + "epoch": 409.62, + "learning_rate": 4.366210320303407e-05, + "loss": 0.0615, + "step": 67860 + }, + { + "epoch": 409.74, + "learning_rate": 4.361758788271662e-05, + "loss": 0.062, + "step": 67880 + }, + { + "epoch": 409.86, + "learning_rate": 4.357307770459335e-05, + "loss": 0.0625, + "step": 67900 + }, + { + "epoch": 409.98, + "learning_rate": 4.352857270452527e-05, + "loss": 0.0627, + "step": 67920 + }, + { + "epoch": 410.1, + "learning_rate": 4.348407291836928e-05, + "loss": 0.0482, + "step": 67940 + }, + { + "epoch": 410.22, + "learning_rate": 4.343957838197805e-05, + "loss": 0.0469, + "step": 67960 + }, + { + "epoch": 410.34, + "learning_rate": 4.339508913120002e-05, + "loss": 0.0474, + "step": 67980 + }, + { + "epoch": 410.46, + "learning_rate": 4.335060520187938e-05, + "loss": 0.0493, + "step": 68000 + }, + { + "epoch": 410.46, + "eval_accuracy": 0.0012644348075194357, + "eval_loss": 22.606916427612305, + "eval_runtime": 18.1884, + "eval_samples_per_second": 112.159, + "eval_steps_per_second": 2.364, + "step": 68000 + }, + { + "epoch": 410.58, + "learning_rate": 4.3306126629856e-05, + "loss": 0.0515, + "step": 68020 + }, + { + "epoch": 410.7, + "learning_rate": 4.3261653450965536e-05, + "loss": 0.0526, + "step": 68040 + }, + { + "epoch": 410.82, + "learning_rate": 4.3217185701039144e-05, + "loss": 0.055, + "step": 68060 + }, + { + "epoch": 410.95, + "learning_rate": 4.317272341590373e-05, + "loss": 0.0548, + "step": 68080 + }, + { + "epoch": 411.07, + "learning_rate": 4.312826663138174e-05, + "loss": 0.0479, + "step": 68100 + }, + { + "epoch": 411.19, + "learning_rate": 4.308381538329123e-05, + "loss": 0.0404, + "step": 68120 + }, + { + "epoch": 411.31, + "learning_rate": 4.3039369707445714e-05, + "loss": 0.0391, + "step": 68140 + }, + { + "epoch": 411.43, + "learning_rate": 4.2994929639654315e-05, + "loss": 0.0384, + "step": 68160 + }, + { + "epoch": 411.55, + "learning_rate": 4.295049521572157e-05, + "loss": 0.0377, + "step": 68180 + }, + { + "epoch": 411.67, + "learning_rate": 4.29060664714475e-05, + "loss": 0.0376, + "step": 68200 + }, + { + "epoch": 411.79, + "learning_rate": 4.2861643442627564e-05, + "loss": 0.0374, + "step": 68220 + }, + { + "epoch": 411.91, + "learning_rate": 4.281722616505254e-05, + "loss": 0.0371, + "step": 68240 + }, + { + "epoch": 412.03, + "learning_rate": 4.277281467450869e-05, + "loss": 0.0345, + "step": 68260 + }, + { + "epoch": 412.15, + "learning_rate": 4.27284090067775e-05, + "loss": 0.0258, + "step": 68280 + }, + { + "epoch": 412.27, + "learning_rate": 4.2684009197635824e-05, + "loss": 0.0242, + "step": 68300 + }, + { + "epoch": 412.39, + "learning_rate": 4.2639615282855816e-05, + "loss": 0.0229, + "step": 68320 + }, + { + "epoch": 412.52, + "learning_rate": 4.259522729820482e-05, + "loss": 0.0223, + "step": 68340 + }, + { + "epoch": 412.64, + "learning_rate": 4.255084527944545e-05, + "loss": 0.0217, + "step": 68360 + }, + { + "epoch": 412.76, + "learning_rate": 4.250646926233547e-05, + "loss": 0.021, + "step": 68380 + }, + { + "epoch": 412.88, + "learning_rate": 4.246209928262789e-05, + "loss": 0.0206, + "step": 68400 + }, + { + "epoch": 413.0, + "learning_rate": 4.241773537607073e-05, + "loss": 0.0201, + "step": 68420 + }, + { + "epoch": 413.12, + "learning_rate": 4.237337757840726e-05, + "loss": 0.0153, + "step": 68440 + }, + { + "epoch": 413.24, + "learning_rate": 4.232902592537569e-05, + "loss": 0.0145, + "step": 68460 + }, + { + "epoch": 413.36, + "learning_rate": 4.2284680452709394e-05, + "loss": 0.0141, + "step": 68480 + }, + { + "epoch": 413.48, + "learning_rate": 4.22403411961367e-05, + "loss": 0.014, + "step": 68500 + }, + { + "epoch": 413.6, + "learning_rate": 4.219600819138095e-05, + "loss": 0.0139, + "step": 68520 + }, + { + "epoch": 413.72, + "learning_rate": 4.215168147416045e-05, + "loss": 0.0141, + "step": 68540 + }, + { + "epoch": 413.84, + "learning_rate": 4.2107361080188415e-05, + "loss": 0.0139, + "step": 68560 + }, + { + "epoch": 413.96, + "learning_rate": 4.206304704517301e-05, + "loss": 0.0139, + "step": 68580 + }, + { + "epoch": 414.08, + "learning_rate": 4.201873940481722e-05, + "loss": 0.0124, + "step": 68600 + }, + { + "epoch": 414.21, + "learning_rate": 4.1974438194818946e-05, + "loss": 0.012, + "step": 68620 + }, + { + "epoch": 414.33, + "learning_rate": 4.193014345087087e-05, + "loss": 0.012, + "step": 68640 + }, + { + "epoch": 414.45, + "learning_rate": 4.188585520866045e-05, + "loss": 0.0121, + "step": 68660 + }, + { + "epoch": 414.57, + "learning_rate": 4.184157350386994e-05, + "loss": 0.0122, + "step": 68680 + }, + { + "epoch": 414.69, + "learning_rate": 4.179729837217628e-05, + "loss": 0.0122, + "step": 68700 + }, + { + "epoch": 414.81, + "learning_rate": 4.1753029849251175e-05, + "loss": 0.0124, + "step": 68720 + }, + { + "epoch": 414.93, + "learning_rate": 4.170876797076095e-05, + "loss": 0.0124, + "step": 68740 + }, + { + "epoch": 415.05, + "learning_rate": 4.166451277236661e-05, + "loss": 0.0121, + "step": 68760 + }, + { + "epoch": 415.17, + "learning_rate": 4.16202642897238e-05, + "loss": 0.0114, + "step": 68780 + }, + { + "epoch": 415.29, + "learning_rate": 4.1576022558482666e-05, + "loss": 0.0115, + "step": 68800 + }, + { + "epoch": 415.41, + "learning_rate": 4.153178761428802e-05, + "loss": 0.0115, + "step": 68820 + }, + { + "epoch": 415.53, + "learning_rate": 4.148755949277913e-05, + "loss": 0.0116, + "step": 68840 + }, + { + "epoch": 415.65, + "learning_rate": 4.144333822958981e-05, + "loss": 0.0118, + "step": 68860 + }, + { + "epoch": 415.77, + "learning_rate": 4.139912386034833e-05, + "loss": 0.0119, + "step": 68880 + }, + { + "epoch": 415.9, + "learning_rate": 4.135491642067739e-05, + "loss": 0.0119, + "step": 68900 + }, + { + "epoch": 416.02, + "learning_rate": 4.131071594619416e-05, + "loss": 0.0119, + "step": 68920 + }, + { + "epoch": 416.14, + "learning_rate": 4.1266522472510115e-05, + "loss": 0.0111, + "step": 68940 + }, + { + "epoch": 416.26, + "learning_rate": 4.122233603523121e-05, + "loss": 0.0112, + "step": 68960 + }, + { + "epoch": 416.38, + "learning_rate": 4.117815666995758e-05, + "loss": 0.0113, + "step": 68980 + }, + { + "epoch": 416.5, + "learning_rate": 4.11339844122838e-05, + "loss": 0.0115, + "step": 69000 + }, + { + "epoch": 416.5, + "eval_accuracy": 0.001247147612885381, + "eval_loss": 22.865169525146484, + "eval_runtime": 18.2075, + "eval_samples_per_second": 112.042, + "eval_steps_per_second": 2.362, + "step": 69000 + }, + { + "epoch": 416.62, + "learning_rate": 4.108981929779863e-05, + "loss": 0.0115, + "step": 69020 + }, + { + "epoch": 416.74, + "learning_rate": 4.104566136208512e-05, + "loss": 0.0117, + "step": 69040 + }, + { + "epoch": 416.86, + "learning_rate": 4.100151064072052e-05, + "loss": 0.0118, + "step": 69060 + }, + { + "epoch": 416.98, + "learning_rate": 4.095736716927625e-05, + "loss": 0.0118, + "step": 69080 + }, + { + "epoch": 417.1, + "learning_rate": 4.091323098331794e-05, + "loss": 0.0109, + "step": 69100 + }, + { + "epoch": 417.22, + "learning_rate": 4.0869102118405276e-05, + "loss": 0.011, + "step": 69120 + }, + { + "epoch": 417.34, + "learning_rate": 4.082498061009215e-05, + "loss": 0.0112, + "step": 69140 + }, + { + "epoch": 417.46, + "learning_rate": 4.078086649392639e-05, + "loss": 0.0113, + "step": 69160 + }, + { + "epoch": 417.59, + "learning_rate": 4.0736759805449986e-05, + "loss": 0.0114, + "step": 69180 + }, + { + "epoch": 417.71, + "learning_rate": 4.06926605801989e-05, + "loss": 0.0116, + "step": 69200 + }, + { + "epoch": 417.83, + "learning_rate": 4.0648568853703076e-05, + "loss": 0.0115, + "step": 69220 + }, + { + "epoch": 417.95, + "learning_rate": 4.060448466148642e-05, + "loss": 0.0117, + "step": 69240 + }, + { + "epoch": 418.07, + "learning_rate": 4.056040803906675e-05, + "loss": 0.0111, + "step": 69260 + }, + { + "epoch": 418.19, + "learning_rate": 4.051633902195585e-05, + "loss": 0.0109, + "step": 69280 + }, + { + "epoch": 418.31, + "learning_rate": 4.047227764565929e-05, + "loss": 0.0112, + "step": 69300 + }, + { + "epoch": 418.43, + "learning_rate": 4.0428223945676524e-05, + "loss": 0.0112, + "step": 69320 + }, + { + "epoch": 418.55, + "learning_rate": 4.0384177957500866e-05, + "loss": 0.0114, + "step": 69340 + }, + { + "epoch": 418.67, + "learning_rate": 4.034013971661933e-05, + "loss": 0.0113, + "step": 69360 + }, + { + "epoch": 418.79, + "learning_rate": 4.0296109258512755e-05, + "loss": 0.0115, + "step": 69380 + }, + { + "epoch": 418.91, + "learning_rate": 4.025208661865567e-05, + "loss": 0.0116, + "step": 69400 + }, + { + "epoch": 419.03, + "learning_rate": 4.0208071832516333e-05, + "loss": 0.0112, + "step": 69420 + }, + { + "epoch": 419.15, + "learning_rate": 4.016406493555666e-05, + "loss": 0.0108, + "step": 69440 + }, + { + "epoch": 419.28, + "learning_rate": 4.0120065963232186e-05, + "loss": 0.011, + "step": 69460 + }, + { + "epoch": 419.4, + "learning_rate": 4.007607495099215e-05, + "loss": 0.0112, + "step": 69480 + }, + { + "epoch": 419.52, + "learning_rate": 4.003209193427924e-05, + "loss": 0.0112, + "step": 69500 + }, + { + "epoch": 419.64, + "learning_rate": 3.9988116948529844e-05, + "loss": 0.0114, + "step": 69520 + }, + { + "epoch": 419.76, + "learning_rate": 3.994415002917378e-05, + "loss": 0.0115, + "step": 69540 + }, + { + "epoch": 419.88, + "learning_rate": 3.990019121163441e-05, + "loss": 0.0115, + "step": 69560 + }, + { + "epoch": 420.0, + "learning_rate": 3.985624053132855e-05, + "loss": 0.0115, + "step": 69580 + }, + { + "epoch": 420.12, + "learning_rate": 3.981229802366647e-05, + "loss": 0.0105, + "step": 69600 + }, + { + "epoch": 420.24, + "learning_rate": 3.976836372405188e-05, + "loss": 0.0108, + "step": 69620 + }, + { + "epoch": 420.36, + "learning_rate": 3.9724437667881796e-05, + "loss": 0.011, + "step": 69640 + }, + { + "epoch": 420.48, + "learning_rate": 3.968051989054673e-05, + "loss": 0.0111, + "step": 69660 + }, + { + "epoch": 420.6, + "learning_rate": 3.963661042743036e-05, + "loss": 0.0112, + "step": 69680 + }, + { + "epoch": 420.72, + "learning_rate": 3.9592709313909817e-05, + "loss": 0.0113, + "step": 69700 + }, + { + "epoch": 420.85, + "learning_rate": 3.9548816585355366e-05, + "loss": 0.0114, + "step": 69720 + }, + { + "epoch": 420.97, + "learning_rate": 3.950493227713064e-05, + "loss": 0.0116, + "step": 69740 + }, + { + "epoch": 421.09, + "learning_rate": 3.9461056424592416e-05, + "loss": 0.0109, + "step": 69760 + }, + { + "epoch": 421.21, + "learning_rate": 3.941718906309066e-05, + "loss": 0.0108, + "step": 69780 + }, + { + "epoch": 421.33, + "learning_rate": 3.937333022796854e-05, + "loss": 0.011, + "step": 69800 + }, + { + "epoch": 421.45, + "learning_rate": 3.932947995456231e-05, + "loss": 0.0111, + "step": 69820 + }, + { + "epoch": 421.57, + "learning_rate": 3.928563827820136e-05, + "loss": 0.0112, + "step": 69840 + }, + { + "epoch": 421.69, + "learning_rate": 3.92418052342081e-05, + "loss": 0.0113, + "step": 69860 + }, + { + "epoch": 421.81, + "learning_rate": 3.9197980857898074e-05, + "loss": 0.0114, + "step": 69880 + }, + { + "epoch": 421.93, + "learning_rate": 3.915416518457974e-05, + "loss": 0.0114, + "step": 69900 + }, + { + "epoch": 422.05, + "learning_rate": 3.9110358249554625e-05, + "loss": 0.0111, + "step": 69920 + }, + { + "epoch": 422.17, + "learning_rate": 3.906656008811719e-05, + "loss": 0.0106, + "step": 69940 + }, + { + "epoch": 422.29, + "learning_rate": 3.902277073555479e-05, + "loss": 0.0109, + "step": 69960 + }, + { + "epoch": 422.41, + "learning_rate": 3.8978990227147755e-05, + "loss": 0.011, + "step": 69980 + }, + { + "epoch": 422.54, + "learning_rate": 3.893521859816922e-05, + "loss": 0.0111, + "step": 70000 + }, + { + "epoch": 422.54, + "eval_accuracy": 0.0012476415327320682, + "eval_loss": 22.998226165771484, + "eval_runtime": 18.2088, + "eval_samples_per_second": 112.033, + "eval_steps_per_second": 2.361, + "step": 70000 + }, + { + "epoch": 422.66, + "learning_rate": 3.889145588388521e-05, + "loss": 0.0114, + "step": 70020 + }, + { + "epoch": 422.78, + "learning_rate": 3.884770211955454e-05, + "loss": 0.0114, + "step": 70040 + }, + { + "epoch": 422.9, + "learning_rate": 3.880395734042884e-05, + "loss": 0.0114, + "step": 70060 + }, + { + "epoch": 423.02, + "learning_rate": 3.87602215817525e-05, + "loss": 0.0114, + "step": 70080 + }, + { + "epoch": 423.14, + "learning_rate": 3.87164948787626e-05, + "loss": 0.0105, + "step": 70100 + }, + { + "epoch": 423.26, + "learning_rate": 3.867277726668899e-05, + "loss": 0.0108, + "step": 70120 + }, + { + "epoch": 423.38, + "learning_rate": 3.862906878075413e-05, + "loss": 0.011, + "step": 70140 + }, + { + "epoch": 423.5, + "learning_rate": 3.858536945617318e-05, + "loss": 0.0113, + "step": 70160 + }, + { + "epoch": 423.62, + "learning_rate": 3.854167932815387e-05, + "loss": 0.0113, + "step": 70180 + }, + { + "epoch": 423.74, + "learning_rate": 3.849799843189657e-05, + "loss": 0.0113, + "step": 70200 + }, + { + "epoch": 423.86, + "learning_rate": 3.845432680259419e-05, + "loss": 0.0114, + "step": 70220 + }, + { + "epoch": 423.98, + "learning_rate": 3.841066447543214e-05, + "loss": 0.0114, + "step": 70240 + }, + { + "epoch": 424.1, + "learning_rate": 3.836701148558843e-05, + "loss": 0.0107, + "step": 70260 + }, + { + "epoch": 424.23, + "learning_rate": 3.8323367868233426e-05, + "loss": 0.0108, + "step": 70280 + }, + { + "epoch": 424.35, + "learning_rate": 3.8279733658530036e-05, + "loss": 0.0109, + "step": 70300 + }, + { + "epoch": 424.47, + "learning_rate": 3.8236108891633535e-05, + "loss": 0.0111, + "step": 70320 + }, + { + "epoch": 424.59, + "learning_rate": 3.819249360269162e-05, + "loss": 0.0111, + "step": 70340 + }, + { + "epoch": 424.71, + "learning_rate": 3.814888782684435e-05, + "loss": 0.0113, + "step": 70360 + }, + { + "epoch": 424.83, + "learning_rate": 3.810529159922408e-05, + "loss": 0.0115, + "step": 70380 + }, + { + "epoch": 424.95, + "learning_rate": 3.8061704954955556e-05, + "loss": 0.0116, + "step": 70400 + }, + { + "epoch": 425.07, + "learning_rate": 3.8018127929155687e-05, + "loss": 0.0108, + "step": 70420 + }, + { + "epoch": 425.19, + "learning_rate": 3.797456055693375e-05, + "loss": 0.0107, + "step": 70440 + }, + { + "epoch": 425.31, + "learning_rate": 3.7931002873391154e-05, + "loss": 0.0109, + "step": 70460 + }, + { + "epoch": 425.43, + "learning_rate": 3.788745491362155e-05, + "loss": 0.011, + "step": 70480 + }, + { + "epoch": 425.55, + "learning_rate": 3.7843916712710756e-05, + "loss": 0.0113, + "step": 70500 + }, + { + "epoch": 425.67, + "learning_rate": 3.780038830573668e-05, + "loss": 0.0113, + "step": 70520 + }, + { + "epoch": 425.79, + "learning_rate": 3.775686972776942e-05, + "loss": 0.0115, + "step": 70540 + }, + { + "epoch": 425.92, + "learning_rate": 3.771336101387105e-05, + "loss": 0.0116, + "step": 70560 + }, + { + "epoch": 426.04, + "learning_rate": 3.766986219909583e-05, + "loss": 0.0115, + "step": 70580 + }, + { + "epoch": 426.16, + "learning_rate": 3.7626373318489886e-05, + "loss": 0.0109, + "step": 70600 + }, + { + "epoch": 426.28, + "learning_rate": 3.758289440709149e-05, + "loss": 0.0114, + "step": 70620 + }, + { + "epoch": 426.4, + "learning_rate": 3.753942549993079e-05, + "loss": 0.0117, + "step": 70640 + }, + { + "epoch": 426.52, + "learning_rate": 3.749596663202989e-05, + "loss": 0.0118, + "step": 70660 + }, + { + "epoch": 426.64, + "learning_rate": 3.7452517838402834e-05, + "loss": 0.0118, + "step": 70680 + }, + { + "epoch": 426.76, + "learning_rate": 3.74090791540555e-05, + "loss": 0.0123, + "step": 70700 + }, + { + "epoch": 426.88, + "learning_rate": 3.7365650613985706e-05, + "loss": 0.0126, + "step": 70720 + }, + { + "epoch": 427.0, + "learning_rate": 3.732223225318298e-05, + "loss": 0.0135, + "step": 70740 + }, + { + "epoch": 427.12, + "learning_rate": 3.727882410662874e-05, + "loss": 0.0146, + "step": 70760 + }, + { + "epoch": 427.24, + "learning_rate": 3.723542620929616e-05, + "loss": 0.0192, + "step": 70780 + }, + { + "epoch": 427.36, + "learning_rate": 3.7194207732018324e-05, + "loss": 0.0373, + "step": 70800 + }, + { + "epoch": 427.48, + "learning_rate": 3.715299856619e-05, + "loss": 0.0973, + "step": 70820 + }, + { + "epoch": 427.61, + "learning_rate": 3.710963058929701e-05, + "loss": 0.1284, + "step": 70840 + }, + { + "epoch": 427.73, + "learning_rate": 3.706844063031376e-05, + "loss": 0.1367, + "step": 70860 + }, + { + "epoch": 427.85, + "learning_rate": 3.702509293757578e-05, + "loss": 0.1379, + "step": 70880 + }, + { + "epoch": 427.97, + "learning_rate": 3.698175569848608e-05, + "loss": 0.1376, + "step": 70900 + }, + { + "epoch": 428.09, + "learning_rate": 3.6938428947960645e-05, + "loss": 0.1232, + "step": 70920 + }, + { + "epoch": 428.21, + "learning_rate": 3.6895112720907124e-05, + "loss": 0.1227, + "step": 70940 + }, + { + "epoch": 428.33, + "learning_rate": 3.6851807052224546e-05, + "loss": 0.1246, + "step": 70960 + }, + { + "epoch": 428.45, + "learning_rate": 3.680851197680355e-05, + "loss": 0.1241, + "step": 70980 + }, + { + "epoch": 428.57, + "learning_rate": 3.67652275295262e-05, + "loss": 0.1182, + "step": 71000 + }, + { + "epoch": 428.57, + "eval_accuracy": 0.0012550503304323774, + "eval_loss": 22.662765502929688, + "eval_runtime": 18.1641, + "eval_samples_per_second": 112.31, + "eval_steps_per_second": 2.367, + "step": 71000 + }, + { + "epoch": 428.69, + "learning_rate": 3.672195374526598e-05, + "loss": 0.1179, + "step": 71020 + }, + { + "epoch": 428.81, + "learning_rate": 3.667869065888779e-05, + "loss": 0.1156, + "step": 71040 + }, + { + "epoch": 428.93, + "learning_rate": 3.663543830524793e-05, + "loss": 0.1116, + "step": 71060 + }, + { + "epoch": 429.05, + "learning_rate": 3.659219671919404e-05, + "loss": 0.0908, + "step": 71080 + }, + { + "epoch": 429.18, + "learning_rate": 3.654896593556506e-05, + "loss": 0.0793, + "step": 71100 + }, + { + "epoch": 429.3, + "learning_rate": 3.6505745989191274e-05, + "loss": 0.0815, + "step": 71120 + }, + { + "epoch": 429.42, + "learning_rate": 3.6462536914894215e-05, + "loss": 0.0813, + "step": 71140 + }, + { + "epoch": 429.54, + "learning_rate": 3.6419338747486656e-05, + "loss": 0.0819, + "step": 71160 + }, + { + "epoch": 429.66, + "learning_rate": 3.6376151521772565e-05, + "loss": 0.0826, + "step": 71180 + }, + { + "epoch": 429.78, + "learning_rate": 3.633297527254711e-05, + "loss": 0.0829, + "step": 71200 + }, + { + "epoch": 429.9, + "learning_rate": 3.628981003459664e-05, + "loss": 0.0794, + "step": 71220 + }, + { + "epoch": 430.02, + "learning_rate": 3.6246655842698575e-05, + "loss": 0.0743, + "step": 71240 + }, + { + "epoch": 430.14, + "learning_rate": 3.620351273162147e-05, + "loss": 0.0547, + "step": 71260 + }, + { + "epoch": 430.26, + "learning_rate": 3.6160380736125e-05, + "loss": 0.0586, + "step": 71280 + }, + { + "epoch": 430.38, + "learning_rate": 3.611725989095976e-05, + "loss": 0.0598, + "step": 71300 + }, + { + "epoch": 430.5, + "learning_rate": 3.60741502308675e-05, + "loss": 0.0622, + "step": 71320 + }, + { + "epoch": 430.62, + "learning_rate": 3.603105179058085e-05, + "loss": 0.0603, + "step": 71340 + }, + { + "epoch": 430.74, + "learning_rate": 3.598796460482348e-05, + "loss": 0.0602, + "step": 71360 + }, + { + "epoch": 430.87, + "learning_rate": 3.594488870830992e-05, + "loss": 0.061, + "step": 71380 + }, + { + "epoch": 430.99, + "learning_rate": 3.5901824135745664e-05, + "loss": 0.0614, + "step": 71400 + }, + { + "epoch": 431.11, + "learning_rate": 3.5858770921827075e-05, + "loss": 0.0439, + "step": 71420 + }, + { + "epoch": 431.23, + "learning_rate": 3.581572910124131e-05, + "loss": 0.0416, + "step": 71440 + }, + { + "epoch": 431.35, + "learning_rate": 3.577269870866644e-05, + "loss": 0.0403, + "step": 71460 + }, + { + "epoch": 431.47, + "learning_rate": 3.572967977877122e-05, + "loss": 0.0399, + "step": 71480 + }, + { + "epoch": 431.59, + "learning_rate": 3.568667234621527e-05, + "loss": 0.0397, + "step": 71500 + }, + { + "epoch": 431.71, + "learning_rate": 3.5643676445648855e-05, + "loss": 0.0402, + "step": 71520 + }, + { + "epoch": 431.83, + "learning_rate": 3.560069211171303e-05, + "loss": 0.0394, + "step": 71540 + }, + { + "epoch": 431.95, + "learning_rate": 3.55577193790395e-05, + "loss": 0.0388, + "step": 71560 + }, + { + "epoch": 432.07, + "learning_rate": 3.55147582822506e-05, + "loss": 0.0318, + "step": 71580 + }, + { + "epoch": 432.19, + "learning_rate": 3.547180885595932e-05, + "loss": 0.0254, + "step": 71600 + }, + { + "epoch": 432.31, + "learning_rate": 3.5428871134769226e-05, + "loss": 0.0237, + "step": 71620 + }, + { + "epoch": 432.43, + "learning_rate": 3.5385945153274494e-05, + "loss": 0.0225, + "step": 71640 + }, + { + "epoch": 432.56, + "learning_rate": 3.534303094605975e-05, + "loss": 0.0218, + "step": 71660 + }, + { + "epoch": 432.68, + "learning_rate": 3.530012854770026e-05, + "loss": 0.0208, + "step": 71680 + }, + { + "epoch": 432.8, + "learning_rate": 3.525723799276169e-05, + "loss": 0.0207, + "step": 71700 + }, + { + "epoch": 432.92, + "learning_rate": 3.521435931580018e-05, + "loss": 0.0199, + "step": 71720 + }, + { + "epoch": 433.04, + "learning_rate": 3.517149255136232e-05, + "loss": 0.0179, + "step": 71740 + }, + { + "epoch": 433.16, + "learning_rate": 3.512863773398506e-05, + "loss": 0.0143, + "step": 71760 + }, + { + "epoch": 433.28, + "learning_rate": 3.508579489819578e-05, + "loss": 0.0137, + "step": 71780 + }, + { + "epoch": 433.4, + "learning_rate": 3.5042964078512166e-05, + "loss": 0.0135, + "step": 71800 + }, + { + "epoch": 433.52, + "learning_rate": 3.500014530944223e-05, + "loss": 0.0135, + "step": 71820 + }, + { + "epoch": 433.64, + "learning_rate": 3.495733862548428e-05, + "loss": 0.0134, + "step": 71840 + }, + { + "epoch": 433.76, + "learning_rate": 3.491454406112691e-05, + "loss": 0.0134, + "step": 71860 + }, + { + "epoch": 433.88, + "learning_rate": 3.4871761650848916e-05, + "loss": 0.0133, + "step": 71880 + }, + { + "epoch": 434.0, + "learning_rate": 3.482899142911931e-05, + "loss": 0.0133, + "step": 71900 + }, + { + "epoch": 434.12, + "learning_rate": 3.478623343039729e-05, + "loss": 0.0116, + "step": 71920 + }, + { + "epoch": 434.25, + "learning_rate": 3.474348768913219e-05, + "loss": 0.0116, + "step": 71940 + }, + { + "epoch": 434.37, + "learning_rate": 3.470075423976351e-05, + "loss": 0.0117, + "step": 71960 + }, + { + "epoch": 434.49, + "learning_rate": 3.465803311672079e-05, + "loss": 0.0117, + "step": 71980 + }, + { + "epoch": 434.61, + "learning_rate": 3.4615324354423663e-05, + "loss": 0.0118, + "step": 72000 + }, + { + "epoch": 434.61, + "eval_accuracy": 0.0012604834487459375, + "eval_loss": 22.90355682373047, + "eval_runtime": 18.1657, + "eval_samples_per_second": 112.3, + "eval_steps_per_second": 2.367, + "step": 72000 + }, + { + "epoch": 434.73, + "learning_rate": 3.457262798728186e-05, + "loss": 0.0119, + "step": 72020 + }, + { + "epoch": 434.85, + "learning_rate": 3.452994404969499e-05, + "loss": 0.0119, + "step": 72040 + }, + { + "epoch": 434.97, + "learning_rate": 3.448727257605282e-05, + "loss": 0.012, + "step": 72060 + }, + { + "epoch": 435.09, + "learning_rate": 3.4444613600734924e-05, + "loss": 0.0113, + "step": 72080 + }, + { + "epoch": 435.21, + "learning_rate": 3.4401967158110894e-05, + "loss": 0.0111, + "step": 72100 + }, + { + "epoch": 435.33, + "learning_rate": 3.435933328254017e-05, + "loss": 0.0113, + "step": 72120 + }, + { + "epoch": 435.45, + "learning_rate": 3.431671200837213e-05, + "loss": 0.0113, + "step": 72140 + }, + { + "epoch": 435.57, + "learning_rate": 3.4274103369945955e-05, + "loss": 0.0114, + "step": 72160 + }, + { + "epoch": 435.69, + "learning_rate": 3.423150740159064e-05, + "loss": 0.0116, + "step": 72180 + }, + { + "epoch": 435.81, + "learning_rate": 3.418892413762501e-05, + "loss": 0.0115, + "step": 72200 + }, + { + "epoch": 435.94, + "learning_rate": 3.4146353612357604e-05, + "loss": 0.0116, + "step": 72220 + }, + { + "epoch": 436.06, + "learning_rate": 3.4103795860086765e-05, + "loss": 0.0113, + "step": 72240 + }, + { + "epoch": 436.18, + "learning_rate": 3.406125091510046e-05, + "loss": 0.0108, + "step": 72260 + }, + { + "epoch": 436.3, + "learning_rate": 3.4018718811676406e-05, + "loss": 0.011, + "step": 72280 + }, + { + "epoch": 436.42, + "learning_rate": 3.397619958408196e-05, + "loss": 0.0112, + "step": 72300 + }, + { + "epoch": 436.54, + "learning_rate": 3.393369326657408e-05, + "loss": 0.0112, + "step": 72320 + }, + { + "epoch": 436.66, + "learning_rate": 3.389119989339935e-05, + "loss": 0.0113, + "step": 72340 + }, + { + "epoch": 436.78, + "learning_rate": 3.384871949879388e-05, + "loss": 0.0115, + "step": 72360 + }, + { + "epoch": 436.9, + "learning_rate": 3.380625211698342e-05, + "loss": 0.0115, + "step": 72380 + }, + { + "epoch": 437.02, + "learning_rate": 3.3763797782183104e-05, + "loss": 0.0114, + "step": 72400 + }, + { + "epoch": 437.14, + "learning_rate": 3.372135652859766e-05, + "loss": 0.0107, + "step": 72420 + }, + { + "epoch": 437.26, + "learning_rate": 3.367892839042125e-05, + "loss": 0.0109, + "step": 72440 + }, + { + "epoch": 437.38, + "learning_rate": 3.363651340183743e-05, + "loss": 0.011, + "step": 72460 + }, + { + "epoch": 437.51, + "learning_rate": 3.359411159701921e-05, + "loss": 0.0112, + "step": 72480 + }, + { + "epoch": 437.63, + "learning_rate": 3.355172301012893e-05, + "loss": 0.0112, + "step": 72500 + }, + { + "epoch": 437.75, + "learning_rate": 3.350934767531836e-05, + "loss": 0.0113, + "step": 72520 + }, + { + "epoch": 437.87, + "learning_rate": 3.346698562672849e-05, + "loss": 0.0113, + "step": 72540 + }, + { + "epoch": 437.99, + "learning_rate": 3.342463689848967e-05, + "loss": 0.0114, + "step": 72560 + }, + { + "epoch": 438.11, + "learning_rate": 3.3382301524721525e-05, + "loss": 0.0107, + "step": 72580 + }, + { + "epoch": 438.23, + "learning_rate": 3.3339979539532894e-05, + "loss": 0.0107, + "step": 72600 + }, + { + "epoch": 438.35, + "learning_rate": 3.329767097702183e-05, + "loss": 0.011, + "step": 72620 + }, + { + "epoch": 438.47, + "learning_rate": 3.325537587127558e-05, + "loss": 0.0111, + "step": 72640 + }, + { + "epoch": 438.59, + "learning_rate": 3.3213094256370546e-05, + "loss": 0.0111, + "step": 72660 + }, + { + "epoch": 438.71, + "learning_rate": 3.317082616637226e-05, + "loss": 0.0112, + "step": 72680 + }, + { + "epoch": 438.83, + "learning_rate": 3.3128571635335345e-05, + "loss": 0.0113, + "step": 72700 + }, + { + "epoch": 438.95, + "learning_rate": 3.308633069730357e-05, + "loss": 0.0114, + "step": 72720 + }, + { + "epoch": 439.07, + "learning_rate": 3.304410338630961e-05, + "loss": 0.0108, + "step": 72740 + }, + { + "epoch": 439.2, + "learning_rate": 3.3001889736375326e-05, + "loss": 0.0107, + "step": 72760 + }, + { + "epoch": 439.32, + "learning_rate": 3.295968978151144e-05, + "loss": 0.0108, + "step": 72780 + }, + { + "epoch": 439.44, + "learning_rate": 3.291750355571772e-05, + "loss": 0.0109, + "step": 72800 + }, + { + "epoch": 439.56, + "learning_rate": 3.287533109298283e-05, + "loss": 0.0111, + "step": 72820 + }, + { + "epoch": 439.68, + "learning_rate": 3.283317242728437e-05, + "loss": 0.0111, + "step": 72840 + }, + { + "epoch": 439.8, + "learning_rate": 3.279102759258882e-05, + "loss": 0.0112, + "step": 72860 + }, + { + "epoch": 439.92, + "learning_rate": 3.27488966228515e-05, + "loss": 0.0113, + "step": 72880 + }, + { + "epoch": 440.04, + "learning_rate": 3.2706779552016595e-05, + "loss": 0.011, + "step": 72900 + }, + { + "epoch": 440.16, + "learning_rate": 3.266467641401703e-05, + "loss": 0.0106, + "step": 72920 + }, + { + "epoch": 440.28, + "learning_rate": 3.262258724277459e-05, + "loss": 0.0108, + "step": 72940 + }, + { + "epoch": 440.4, + "learning_rate": 3.258051207219971e-05, + "loss": 0.0109, + "step": 72960 + }, + { + "epoch": 440.52, + "learning_rate": 3.253845093619163e-05, + "loss": 0.011, + "step": 72980 + }, + { + "epoch": 440.64, + "learning_rate": 3.249640386863825e-05, + "loss": 0.0111, + "step": 73000 + }, + { + "epoch": 440.64, + "eval_accuracy": 0.0012664104869061848, + "eval_loss": 23.069204330444336, + "eval_runtime": 18.1572, + "eval_samples_per_second": 112.352, + "eval_steps_per_second": 2.368, + "step": 73000 + }, + { + "epoch": 440.76, + "learning_rate": 3.24543709034161e-05, + "loss": 0.0112, + "step": 73020 + }, + { + "epoch": 440.89, + "learning_rate": 3.2412352074390415e-05, + "loss": 0.0112, + "step": 73040 + }, + { + "epoch": 441.01, + "learning_rate": 3.237034741541499e-05, + "loss": 0.0112, + "step": 73060 + }, + { + "epoch": 441.13, + "learning_rate": 3.232835696033222e-05, + "loss": 0.0104, + "step": 73080 + }, + { + "epoch": 441.25, + "learning_rate": 3.228638074297304e-05, + "loss": 0.0106, + "step": 73100 + }, + { + "epoch": 441.37, + "learning_rate": 3.224441879715699e-05, + "loss": 0.0109, + "step": 73120 + }, + { + "epoch": 441.49, + "learning_rate": 3.220247115669198e-05, + "loss": 0.0109, + "step": 73140 + }, + { + "epoch": 441.61, + "learning_rate": 3.216053785537449e-05, + "loss": 0.011, + "step": 73160 + }, + { + "epoch": 441.73, + "learning_rate": 3.2118618926989466e-05, + "loss": 0.0111, + "step": 73180 + }, + { + "epoch": 441.85, + "learning_rate": 3.2076714405310174e-05, + "loss": 0.0112, + "step": 73200 + }, + { + "epoch": 441.97, + "learning_rate": 3.203482432409836e-05, + "loss": 0.0112, + "step": 73220 + }, + { + "epoch": 442.09, + "learning_rate": 3.1992948717104085e-05, + "loss": 0.0106, + "step": 73240 + }, + { + "epoch": 442.21, + "learning_rate": 3.19510876180658e-05, + "loss": 0.0105, + "step": 73260 + }, + { + "epoch": 442.33, + "learning_rate": 3.1909241060710194e-05, + "loss": 0.0108, + "step": 73280 + }, + { + "epoch": 442.45, + "learning_rate": 3.1867409078752316e-05, + "loss": 0.0109, + "step": 73300 + }, + { + "epoch": 442.58, + "learning_rate": 3.182559170589544e-05, + "loss": 0.0109, + "step": 73320 + }, + { + "epoch": 442.7, + "learning_rate": 3.178378897583105e-05, + "loss": 0.0111, + "step": 73340 + }, + { + "epoch": 442.82, + "learning_rate": 3.174200092223887e-05, + "loss": 0.0113, + "step": 73360 + }, + { + "epoch": 442.94, + "learning_rate": 3.1700227578786746e-05, + "loss": 0.0111, + "step": 73380 + }, + { + "epoch": 443.06, + "learning_rate": 3.165846897913075e-05, + "loss": 0.0108, + "step": 73400 + }, + { + "epoch": 443.18, + "learning_rate": 3.1616725156914995e-05, + "loss": 0.0106, + "step": 73420 + }, + { + "epoch": 443.3, + "learning_rate": 3.157499614577173e-05, + "loss": 0.0107, + "step": 73440 + }, + { + "epoch": 443.42, + "learning_rate": 3.1533281979321306e-05, + "loss": 0.0108, + "step": 73460 + }, + { + "epoch": 443.54, + "learning_rate": 3.149158269117202e-05, + "loss": 0.0108, + "step": 73480 + }, + { + "epoch": 443.66, + "learning_rate": 3.144989831492026e-05, + "loss": 0.011, + "step": 73500 + }, + { + "epoch": 443.78, + "learning_rate": 3.140822888415038e-05, + "loss": 0.0111, + "step": 73520 + }, + { + "epoch": 443.9, + "learning_rate": 3.136657443243468e-05, + "loss": 0.0112, + "step": 73540 + }, + { + "epoch": 444.02, + "learning_rate": 3.132493499333339e-05, + "loss": 0.011, + "step": 73560 + }, + { + "epoch": 444.14, + "learning_rate": 3.1283310600394666e-05, + "loss": 0.0103, + "step": 73580 + }, + { + "epoch": 444.27, + "learning_rate": 3.1241701287154536e-05, + "loss": 0.0106, + "step": 73600 + }, + { + "epoch": 444.39, + "learning_rate": 3.120010708713684e-05, + "loss": 0.0107, + "step": 73620 + }, + { + "epoch": 444.51, + "learning_rate": 3.115852803385334e-05, + "loss": 0.0109, + "step": 73640 + }, + { + "epoch": 444.63, + "learning_rate": 3.111696416080344e-05, + "loss": 0.011, + "step": 73660 + }, + { + "epoch": 444.75, + "learning_rate": 3.107541550147448e-05, + "loss": 0.0111, + "step": 73680 + }, + { + "epoch": 444.87, + "learning_rate": 3.1033882089341415e-05, + "loss": 0.0111, + "step": 73700 + }, + { + "epoch": 444.99, + "learning_rate": 3.0992363957866975e-05, + "loss": 0.0112, + "step": 73720 + }, + { + "epoch": 445.11, + "learning_rate": 3.09508611405016e-05, + "loss": 0.0104, + "step": 73740 + }, + { + "epoch": 445.23, + "learning_rate": 3.090937367068331e-05, + "loss": 0.0105, + "step": 73760 + }, + { + "epoch": 445.35, + "learning_rate": 3.0867901581837854e-05, + "loss": 0.0107, + "step": 73780 + }, + { + "epoch": 445.47, + "learning_rate": 3.08264449073785e-05, + "loss": 0.0108, + "step": 73800 + }, + { + "epoch": 445.59, + "learning_rate": 3.078500368070621e-05, + "loss": 0.0109, + "step": 73820 + }, + { + "epoch": 445.71, + "learning_rate": 3.0743577935209353e-05, + "loss": 0.011, + "step": 73840 + }, + { + "epoch": 445.84, + "learning_rate": 3.0702167704263954e-05, + "loss": 0.0111, + "step": 73860 + }, + { + "epoch": 445.96, + "learning_rate": 3.066077302123348e-05, + "loss": 0.0111, + "step": 73880 + }, + { + "epoch": 446.08, + "learning_rate": 3.061939391946886e-05, + "loss": 0.0107, + "step": 73900 + }, + { + "epoch": 446.2, + "learning_rate": 3.0578030432308526e-05, + "loss": 0.0104, + "step": 73920 + }, + { + "epoch": 446.32, + "learning_rate": 3.0536682593078246e-05, + "loss": 0.0106, + "step": 73940 + }, + { + "epoch": 446.44, + "learning_rate": 3.0495350435091273e-05, + "loss": 0.0108, + "step": 73960 + }, + { + "epoch": 446.56, + "learning_rate": 3.0454033991648147e-05, + "loss": 0.0108, + "step": 73980 + }, + { + "epoch": 446.68, + "learning_rate": 3.0412733296036776e-05, + "loss": 0.011, + "step": 74000 + }, + { + "epoch": 446.68, + "eval_accuracy": 0.0012619652082859994, + "eval_loss": 23.18569564819336, + "eval_runtime": 18.1369, + "eval_samples_per_second": 112.478, + "eval_steps_per_second": 2.371, + "step": 74000 + }, + { + "epoch": 446.8, + "learning_rate": 3.0371448381532418e-05, + "loss": 0.0111, + "step": 74020 + }, + { + "epoch": 446.92, + "learning_rate": 3.0330179281397565e-05, + "loss": 0.0112, + "step": 74040 + }, + { + "epoch": 447.04, + "learning_rate": 3.0288926028881986e-05, + "loss": 0.011, + "step": 74060 + }, + { + "epoch": 447.16, + "learning_rate": 3.024768865722267e-05, + "loss": 0.0104, + "step": 74080 + }, + { + "epoch": 447.28, + "learning_rate": 3.0206467199643852e-05, + "loss": 0.0106, + "step": 74100 + }, + { + "epoch": 447.4, + "learning_rate": 3.0165261689356884e-05, + "loss": 0.0108, + "step": 74120 + }, + { + "epoch": 447.53, + "learning_rate": 3.01240721595603e-05, + "loss": 0.0109, + "step": 74140 + }, + { + "epoch": 447.65, + "learning_rate": 3.0082898643439804e-05, + "loss": 0.011, + "step": 74160 + }, + { + "epoch": 447.77, + "learning_rate": 3.0041741174168103e-05, + "loss": 0.011, + "step": 74180 + }, + { + "epoch": 447.89, + "learning_rate": 3.0000599784905065e-05, + "loss": 0.0113, + "step": 74200 + }, + { + "epoch": 448.01, + "learning_rate": 2.9959474508797535e-05, + "loss": 0.0112, + "step": 74220 + }, + { + "epoch": 448.13, + "learning_rate": 2.991836537897943e-05, + "loss": 0.0104, + "step": 74240 + }, + { + "epoch": 448.25, + "learning_rate": 2.9877272428571613e-05, + "loss": 0.0107, + "step": 74260 + }, + { + "epoch": 448.37, + "learning_rate": 2.9836195690681935e-05, + "loss": 0.0108, + "step": 74280 + }, + { + "epoch": 448.49, + "learning_rate": 2.97951351984052e-05, + "loss": 0.0109, + "step": 74300 + }, + { + "epoch": 448.61, + "learning_rate": 2.9754090984823073e-05, + "loss": 0.011, + "step": 74320 + }, + { + "epoch": 448.73, + "learning_rate": 2.9713063083004188e-05, + "loss": 0.0113, + "step": 74340 + }, + { + "epoch": 448.85, + "learning_rate": 2.9672051526003912e-05, + "loss": 0.0113, + "step": 74360 + }, + { + "epoch": 448.97, + "learning_rate": 2.9631056346864594e-05, + "loss": 0.0114, + "step": 74380 + }, + { + "epoch": 449.09, + "learning_rate": 2.9590077578615234e-05, + "loss": 0.0109, + "step": 74400 + }, + { + "epoch": 449.22, + "learning_rate": 2.954911525427174e-05, + "loss": 0.0112, + "step": 74420 + }, + { + "epoch": 449.34, + "learning_rate": 2.9508169406836696e-05, + "loss": 0.0116, + "step": 74440 + }, + { + "epoch": 449.46, + "learning_rate": 2.946724006929943e-05, + "loss": 0.0124, + "step": 74460 + }, + { + "epoch": 449.58, + "learning_rate": 2.942632727463598e-05, + "loss": 0.0133, + "step": 74480 + }, + { + "epoch": 449.7, + "learning_rate": 2.9385431055809032e-05, + "loss": 0.0143, + "step": 74500 + }, + { + "epoch": 449.82, + "learning_rate": 2.9344551445767954e-05, + "loss": 0.0169, + "step": 74520 + }, + { + "epoch": 449.94, + "learning_rate": 2.9303688477448667e-05, + "loss": 0.0311, + "step": 74540 + }, + { + "epoch": 450.06, + "learning_rate": 2.9262842183773788e-05, + "loss": 0.0887, + "step": 74560 + }, + { + "epoch": 450.18, + "learning_rate": 2.9222012597652372e-05, + "loss": 0.1177, + "step": 74580 + }, + { + "epoch": 450.3, + "learning_rate": 2.9181199751980114e-05, + "loss": 0.1211, + "step": 74600 + }, + { + "epoch": 450.42, + "learning_rate": 2.914040367963921e-05, + "loss": 0.1186, + "step": 74620 + }, + { + "epoch": 450.54, + "learning_rate": 2.909962441349825e-05, + "loss": 0.1154, + "step": 74640 + }, + { + "epoch": 450.66, + "learning_rate": 2.905886198641244e-05, + "loss": 0.1104, + "step": 74660 + }, + { + "epoch": 450.78, + "learning_rate": 2.9018116431223257e-05, + "loss": 0.1068, + "step": 74680 + }, + { + "epoch": 450.91, + "learning_rate": 2.897738778075869e-05, + "loss": 0.0997, + "step": 74700 + }, + { + "epoch": 451.03, + "learning_rate": 2.893667606783308e-05, + "loss": 0.0908, + "step": 74720 + }, + { + "epoch": 451.15, + "learning_rate": 2.88959813252471e-05, + "loss": 0.0712, + "step": 74740 + }, + { + "epoch": 451.27, + "learning_rate": 2.8855303585787813e-05, + "loss": 0.0733, + "step": 74760 + }, + { + "epoch": 451.39, + "learning_rate": 2.881464288222846e-05, + "loss": 0.0725, + "step": 74780 + }, + { + "epoch": 451.51, + "learning_rate": 2.877399924732873e-05, + "loss": 0.0716, + "step": 74800 + }, + { + "epoch": 451.63, + "learning_rate": 2.8733372713834396e-05, + "loss": 0.0697, + "step": 74820 + }, + { + "epoch": 451.75, + "learning_rate": 2.8692763314477566e-05, + "loss": 0.0685, + "step": 74840 + }, + { + "epoch": 451.87, + "learning_rate": 2.8652171081976435e-05, + "loss": 0.0668, + "step": 74860 + }, + { + "epoch": 451.99, + "learning_rate": 2.86115960490355e-05, + "loss": 0.0644, + "step": 74880 + }, + { + "epoch": 452.11, + "learning_rate": 2.857103824834533e-05, + "loss": 0.0438, + "step": 74900 + }, + { + "epoch": 452.23, + "learning_rate": 2.853049771258256e-05, + "loss": 0.0421, + "step": 74920 + }, + { + "epoch": 452.35, + "learning_rate": 2.8489974474410003e-05, + "loss": 0.0412, + "step": 74940 + }, + { + "epoch": 452.47, + "learning_rate": 2.844946856647649e-05, + "loss": 0.0404, + "step": 74960 + }, + { + "epoch": 452.6, + "learning_rate": 2.840898002141693e-05, + "loss": 0.0396, + "step": 74980 + }, + { + "epoch": 452.72, + "learning_rate": 2.836850887185214e-05, + "loss": 0.0386, + "step": 75000 + }, + { + "epoch": 452.72, + "eval_accuracy": 0.0012659165670594976, + "eval_loss": 22.926258087158203, + "eval_runtime": 18.1625, + "eval_samples_per_second": 112.319, + "eval_steps_per_second": 2.368, + "step": 75000 + }, + { + "epoch": 452.84, + "learning_rate": 2.8328055150389064e-05, + "loss": 0.0373, + "step": 75020 + }, + { + "epoch": 452.96, + "learning_rate": 2.8287618889620537e-05, + "loss": 0.037, + "step": 75040 + }, + { + "epoch": 453.08, + "learning_rate": 2.824720012212527e-05, + "loss": 0.0286, + "step": 75060 + }, + { + "epoch": 453.2, + "learning_rate": 2.8206798880467967e-05, + "loss": 0.0226, + "step": 75080 + }, + { + "epoch": 453.32, + "learning_rate": 2.816641519719917e-05, + "loss": 0.0212, + "step": 75100 + }, + { + "epoch": 453.44, + "learning_rate": 2.81260491048553e-05, + "loss": 0.0199, + "step": 75120 + }, + { + "epoch": 453.56, + "learning_rate": 2.808570063595857e-05, + "loss": 0.0194, + "step": 75140 + }, + { + "epoch": 453.68, + "learning_rate": 2.8045369823016988e-05, + "loss": 0.0186, + "step": 75160 + }, + { + "epoch": 453.8, + "learning_rate": 2.800505669852443e-05, + "loss": 0.018, + "step": 75180 + }, + { + "epoch": 453.92, + "learning_rate": 2.7964761294960396e-05, + "loss": 0.0177, + "step": 75200 + }, + { + "epoch": 454.04, + "learning_rate": 2.7924483644790184e-05, + "loss": 0.0158, + "step": 75220 + }, + { + "epoch": 454.16, + "learning_rate": 2.788422378046477e-05, + "loss": 0.0129, + "step": 75240 + }, + { + "epoch": 454.29, + "learning_rate": 2.784398173442081e-05, + "loss": 0.0127, + "step": 75260 + }, + { + "epoch": 454.41, + "learning_rate": 2.7803757539080565e-05, + "loss": 0.0125, + "step": 75280 + }, + { + "epoch": 454.53, + "learning_rate": 2.7763551226851943e-05, + "loss": 0.0126, + "step": 75300 + }, + { + "epoch": 454.65, + "learning_rate": 2.7723362830128496e-05, + "loss": 0.0124, + "step": 75320 + }, + { + "epoch": 454.77, + "learning_rate": 2.7683192381289226e-05, + "loss": 0.0124, + "step": 75340 + }, + { + "epoch": 454.89, + "learning_rate": 2.7643039912698786e-05, + "loss": 0.0125, + "step": 75360 + }, + { + "epoch": 455.01, + "learning_rate": 2.760290545670724e-05, + "loss": 0.0123, + "step": 75380 + }, + { + "epoch": 455.13, + "learning_rate": 2.7562789045650268e-05, + "loss": 0.0112, + "step": 75400 + }, + { + "epoch": 455.25, + "learning_rate": 2.7522690711848877e-05, + "loss": 0.0112, + "step": 75420 + }, + { + "epoch": 455.37, + "learning_rate": 2.7482610487609584e-05, + "loss": 0.0113, + "step": 75440 + }, + { + "epoch": 455.49, + "learning_rate": 2.744254840522431e-05, + "loss": 0.0113, + "step": 75460 + }, + { + "epoch": 455.61, + "learning_rate": 2.740250449697035e-05, + "loss": 0.0114, + "step": 75480 + }, + { + "epoch": 455.73, + "learning_rate": 2.736247879511038e-05, + "loss": 0.0115, + "step": 75500 + }, + { + "epoch": 455.86, + "learning_rate": 2.732247133189232e-05, + "loss": 0.0114, + "step": 75520 + }, + { + "epoch": 455.98, + "learning_rate": 2.7282482139549538e-05, + "loss": 0.0116, + "step": 75540 + }, + { + "epoch": 456.1, + "learning_rate": 2.7242511250300552e-05, + "loss": 0.0109, + "step": 75560 + }, + { + "epoch": 456.22, + "learning_rate": 2.7202558696349206e-05, + "loss": 0.0109, + "step": 75580 + }, + { + "epoch": 456.34, + "learning_rate": 2.7162624509884543e-05, + "loss": 0.0109, + "step": 75600 + }, + { + "epoch": 456.46, + "learning_rate": 2.7122708723080825e-05, + "loss": 0.011, + "step": 75620 + }, + { + "epoch": 456.58, + "learning_rate": 2.7082811368097487e-05, + "loss": 0.0113, + "step": 75640 + }, + { + "epoch": 456.7, + "learning_rate": 2.704293247707908e-05, + "loss": 0.0112, + "step": 75660 + }, + { + "epoch": 456.82, + "learning_rate": 2.7003072082155324e-05, + "loss": 0.0113, + "step": 75680 + }, + { + "epoch": 456.94, + "learning_rate": 2.6963230215441016e-05, + "loss": 0.0112, + "step": 75700 + }, + { + "epoch": 457.06, + "learning_rate": 2.692340690903604e-05, + "loss": 0.011, + "step": 75720 + }, + { + "epoch": 457.18, + "learning_rate": 2.688360219502528e-05, + "loss": 0.0106, + "step": 75740 + }, + { + "epoch": 457.3, + "learning_rate": 2.6843816105478715e-05, + "loss": 0.0108, + "step": 75760 + }, + { + "epoch": 457.42, + "learning_rate": 2.6804048672451275e-05, + "loss": 0.0108, + "step": 75780 + }, + { + "epoch": 457.55, + "learning_rate": 2.6764299927982833e-05, + "loss": 0.0111, + "step": 75800 + }, + { + "epoch": 457.67, + "learning_rate": 2.672456990409825e-05, + "loss": 0.0111, + "step": 75820 + }, + { + "epoch": 457.79, + "learning_rate": 2.6684858632807298e-05, + "loss": 0.0112, + "step": 75840 + }, + { + "epoch": 457.91, + "learning_rate": 2.664516614610464e-05, + "loss": 0.0112, + "step": 75860 + }, + { + "epoch": 458.03, + "learning_rate": 2.660549247596976e-05, + "loss": 0.011, + "step": 75880 + }, + { + "epoch": 458.15, + "learning_rate": 2.6565837654367016e-05, + "loss": 0.0104, + "step": 75900 + }, + { + "epoch": 458.27, + "learning_rate": 2.652620171324564e-05, + "loss": 0.0107, + "step": 75920 + }, + { + "epoch": 458.39, + "learning_rate": 2.648658468453954e-05, + "loss": 0.0108, + "step": 75940 + }, + { + "epoch": 458.51, + "learning_rate": 2.6446986600167456e-05, + "loss": 0.0109, + "step": 75960 + }, + { + "epoch": 458.63, + "learning_rate": 2.6407407492032843e-05, + "loss": 0.011, + "step": 75980 + }, + { + "epoch": 458.75, + "learning_rate": 2.6367847392023904e-05, + "loss": 0.0109, + "step": 76000 + }, + { + "epoch": 458.75, + "eval_accuracy": 0.0012575199296658139, + "eval_loss": 23.15482521057129, + "eval_runtime": 18.205, + "eval_samples_per_second": 112.057, + "eval_steps_per_second": 2.362, + "step": 76000 + }, + { + "epoch": 458.87, + "learning_rate": 2.632830633201346e-05, + "loss": 0.0111, + "step": 76020 + }, + { + "epoch": 458.99, + "learning_rate": 2.6288784343859013e-05, + "loss": 0.0112, + "step": 76040 + }, + { + "epoch": 459.11, + "learning_rate": 2.6249281459402787e-05, + "loss": 0.0104, + "step": 76060 + }, + { + "epoch": 459.24, + "learning_rate": 2.6209797710471483e-05, + "loss": 0.0106, + "step": 76080 + }, + { + "epoch": 459.36, + "learning_rate": 2.6170333128876455e-05, + "loss": 0.0106, + "step": 76100 + }, + { + "epoch": 459.48, + "learning_rate": 2.6130887746413614e-05, + "loss": 0.0108, + "step": 76120 + }, + { + "epoch": 459.6, + "learning_rate": 2.6091461594863408e-05, + "loss": 0.0108, + "step": 76140 + }, + { + "epoch": 459.72, + "learning_rate": 2.6052054705990735e-05, + "loss": 0.0109, + "step": 76160 + }, + { + "epoch": 459.84, + "learning_rate": 2.601266711154503e-05, + "loss": 0.011, + "step": 76180 + }, + { + "epoch": 459.96, + "learning_rate": 2.5973298843260173e-05, + "loss": 0.0111, + "step": 76200 + }, + { + "epoch": 460.08, + "learning_rate": 2.593394993285446e-05, + "loss": 0.0106, + "step": 76220 + }, + { + "epoch": 460.2, + "learning_rate": 2.5894620412030628e-05, + "loss": 0.0105, + "step": 76240 + }, + { + "epoch": 460.32, + "learning_rate": 2.585531031247569e-05, + "loss": 0.0106, + "step": 76260 + }, + { + "epoch": 460.44, + "learning_rate": 2.581601966586117e-05, + "loss": 0.0107, + "step": 76280 + }, + { + "epoch": 460.56, + "learning_rate": 2.5776748503842774e-05, + "loss": 0.0109, + "step": 76300 + }, + { + "epoch": 460.68, + "learning_rate": 2.5737496858060572e-05, + "loss": 0.0109, + "step": 76320 + }, + { + "epoch": 460.8, + "learning_rate": 2.569826476013893e-05, + "loss": 0.0109, + "step": 76340 + }, + { + "epoch": 460.93, + "learning_rate": 2.565905224168642e-05, + "loss": 0.0111, + "step": 76360 + }, + { + "epoch": 461.05, + "learning_rate": 2.5619859334295894e-05, + "loss": 0.0107, + "step": 76380 + }, + { + "epoch": 461.17, + "learning_rate": 2.5580686069544324e-05, + "loss": 0.0104, + "step": 76400 + }, + { + "epoch": 461.29, + "learning_rate": 2.554153247899292e-05, + "loss": 0.0105, + "step": 76420 + }, + { + "epoch": 461.41, + "learning_rate": 2.5502398594187025e-05, + "loss": 0.0107, + "step": 76440 + }, + { + "epoch": 461.53, + "learning_rate": 2.54632844466561e-05, + "loss": 0.0108, + "step": 76460 + }, + { + "epoch": 461.65, + "learning_rate": 2.542419006791371e-05, + "loss": 0.0107, + "step": 76480 + }, + { + "epoch": 461.77, + "learning_rate": 2.53851154894575e-05, + "loss": 0.0109, + "step": 76500 + }, + { + "epoch": 461.89, + "learning_rate": 2.534606074276915e-05, + "loss": 0.011, + "step": 76520 + }, + { + "epoch": 462.01, + "learning_rate": 2.5307025859314344e-05, + "loss": 0.011, + "step": 76540 + }, + { + "epoch": 462.13, + "learning_rate": 2.526801087054279e-05, + "loss": 0.0103, + "step": 76560 + }, + { + "epoch": 462.25, + "learning_rate": 2.5229015807888166e-05, + "loss": 0.0105, + "step": 76580 + }, + { + "epoch": 462.37, + "learning_rate": 2.5190040702768085e-05, + "loss": 0.0106, + "step": 76600 + }, + { + "epoch": 462.49, + "learning_rate": 2.5151085586584095e-05, + "loss": 0.0107, + "step": 76620 + }, + { + "epoch": 462.62, + "learning_rate": 2.511215049072162e-05, + "loss": 0.0107, + "step": 76640 + }, + { + "epoch": 462.74, + "learning_rate": 2.507323544654998e-05, + "loss": 0.0108, + "step": 76660 + }, + { + "epoch": 462.86, + "learning_rate": 2.503434048542229e-05, + "loss": 0.011, + "step": 76680 + }, + { + "epoch": 462.98, + "learning_rate": 2.4995465638675547e-05, + "loss": 0.011, + "step": 76700 + }, + { + "epoch": 463.1, + "learning_rate": 2.4956610937630492e-05, + "loss": 0.0104, + "step": 76720 + }, + { + "epoch": 463.22, + "learning_rate": 2.4917776413591675e-05, + "loss": 0.0103, + "step": 76740 + }, + { + "epoch": 463.34, + "learning_rate": 2.4878962097847375e-05, + "loss": 0.0107, + "step": 76760 + }, + { + "epoch": 463.46, + "learning_rate": 2.4840168021669534e-05, + "loss": 0.0108, + "step": 76780 + }, + { + "epoch": 463.58, + "learning_rate": 2.4801394216313922e-05, + "loss": 0.0107, + "step": 76800 + }, + { + "epoch": 463.7, + "learning_rate": 2.4762640713019824e-05, + "loss": 0.0107, + "step": 76820 + }, + { + "epoch": 463.82, + "learning_rate": 2.4723907543010266e-05, + "loss": 0.011, + "step": 76840 + }, + { + "epoch": 463.94, + "learning_rate": 2.4685194737491857e-05, + "loss": 0.011, + "step": 76860 + }, + { + "epoch": 464.06, + "learning_rate": 2.4646502327654825e-05, + "loss": 0.0106, + "step": 76880 + }, + { + "epoch": 464.19, + "learning_rate": 2.460783034467291e-05, + "loss": 0.0103, + "step": 76900 + }, + { + "epoch": 464.31, + "learning_rate": 2.4569178819703448e-05, + "loss": 0.0104, + "step": 76920 + }, + { + "epoch": 464.43, + "learning_rate": 2.453054778388728e-05, + "loss": 0.0106, + "step": 76940 + }, + { + "epoch": 464.55, + "learning_rate": 2.4491937268348724e-05, + "loss": 0.0107, + "step": 76960 + }, + { + "epoch": 464.67, + "learning_rate": 2.44533473041956e-05, + "loss": 0.0108, + "step": 76980 + }, + { + "epoch": 464.79, + "learning_rate": 2.441477792251909e-05, + "loss": 0.0109, + "step": 77000 + }, + { + "epoch": 464.79, + "eval_accuracy": 0.0012496172121188173, + "eval_loss": 23.27609634399414, + "eval_runtime": 18.2071, + "eval_samples_per_second": 112.044, + "eval_steps_per_second": 2.362, + "step": 77000 + }, + { + "epoch": 464.91, + "learning_rate": 2.437622915439392e-05, + "loss": 0.0109, + "step": 77020 + }, + { + "epoch": 465.03, + "learning_rate": 2.433770103087808e-05, + "loss": 0.0108, + "step": 77040 + }, + { + "epoch": 465.15, + "learning_rate": 2.4299193583013e-05, + "loss": 0.0103, + "step": 77060 + }, + { + "epoch": 465.27, + "learning_rate": 2.426070684182344e-05, + "loss": 0.0104, + "step": 77080 + }, + { + "epoch": 465.39, + "learning_rate": 2.4222240838317472e-05, + "loss": 0.0106, + "step": 77100 + }, + { + "epoch": 465.51, + "learning_rate": 2.4183795603486475e-05, + "loss": 0.0106, + "step": 77120 + }, + { + "epoch": 465.63, + "learning_rate": 2.4145371168305025e-05, + "loss": 0.0109, + "step": 77140 + }, + { + "epoch": 465.75, + "learning_rate": 2.4106967563731077e-05, + "loss": 0.0107, + "step": 77160 + }, + { + "epoch": 465.88, + "learning_rate": 2.4068584820705654e-05, + "loss": 0.0108, + "step": 77180 + }, + { + "epoch": 466.0, + "learning_rate": 2.4030222970153065e-05, + "loss": 0.0109, + "step": 77200 + }, + { + "epoch": 466.12, + "learning_rate": 2.3991882042980756e-05, + "loss": 0.0102, + "step": 77220 + }, + { + "epoch": 466.24, + "learning_rate": 2.395356207007933e-05, + "loss": 0.0103, + "step": 77240 + }, + { + "epoch": 466.36, + "learning_rate": 2.39152630823225e-05, + "loss": 0.0104, + "step": 77260 + }, + { + "epoch": 466.48, + "learning_rate": 2.387698511056704e-05, + "loss": 0.0106, + "step": 77280 + }, + { + "epoch": 466.6, + "learning_rate": 2.3838728185652843e-05, + "loss": 0.0107, + "step": 77300 + }, + { + "epoch": 466.72, + "learning_rate": 2.3800492338402813e-05, + "loss": 0.0108, + "step": 77320 + }, + { + "epoch": 466.84, + "learning_rate": 2.3762277599622885e-05, + "loss": 0.0109, + "step": 77340 + }, + { + "epoch": 466.96, + "learning_rate": 2.3724084000101988e-05, + "loss": 0.011, + "step": 77360 + }, + { + "epoch": 467.08, + "learning_rate": 2.3685911570612012e-05, + "loss": 0.0105, + "step": 77380 + }, + { + "epoch": 467.2, + "learning_rate": 2.364776034190781e-05, + "loss": 0.0103, + "step": 77400 + }, + { + "epoch": 467.32, + "learning_rate": 2.3609630344727106e-05, + "loss": 0.0105, + "step": 77420 + }, + { + "epoch": 467.44, + "learning_rate": 2.3571521609790566e-05, + "loss": 0.0106, + "step": 77440 + }, + { + "epoch": 467.57, + "learning_rate": 2.353343416780171e-05, + "loss": 0.0107, + "step": 77460 + }, + { + "epoch": 467.69, + "learning_rate": 2.3495368049446898e-05, + "loss": 0.0107, + "step": 77480 + }, + { + "epoch": 467.81, + "learning_rate": 2.3457323285395337e-05, + "loss": 0.0108, + "step": 77500 + }, + { + "epoch": 467.93, + "learning_rate": 2.3419299906298953e-05, + "loss": 0.0109, + "step": 77520 + }, + { + "epoch": 468.05, + "learning_rate": 2.3381297942792564e-05, + "loss": 0.0106, + "step": 77540 + }, + { + "epoch": 468.17, + "learning_rate": 2.334331742549361e-05, + "loss": 0.0103, + "step": 77560 + }, + { + "epoch": 468.29, + "learning_rate": 2.3305358385002333e-05, + "loss": 0.0103, + "step": 77580 + }, + { + "epoch": 468.41, + "learning_rate": 2.326742085190164e-05, + "loss": 0.0106, + "step": 77600 + }, + { + "epoch": 468.53, + "learning_rate": 2.322950485675712e-05, + "loss": 0.0107, + "step": 77620 + }, + { + "epoch": 468.65, + "learning_rate": 2.3191610430117023e-05, + "loss": 0.0108, + "step": 77640 + }, + { + "epoch": 468.77, + "learning_rate": 2.3153737602512143e-05, + "loss": 0.0108, + "step": 77660 + }, + { + "epoch": 468.89, + "learning_rate": 2.3115886404456015e-05, + "loss": 0.0108, + "step": 77680 + }, + { + "epoch": 469.01, + "learning_rate": 2.307805686644461e-05, + "loss": 0.0108, + "step": 77700 + }, + { + "epoch": 469.13, + "learning_rate": 2.3040249018956527e-05, + "loss": 0.0101, + "step": 77720 + }, + { + "epoch": 469.26, + "learning_rate": 2.300246289245283e-05, + "loss": 0.0104, + "step": 77740 + }, + { + "epoch": 469.38, + "learning_rate": 2.2964698517377165e-05, + "loss": 0.0104, + "step": 77760 + }, + { + "epoch": 469.5, + "learning_rate": 2.2926955924155608e-05, + "loss": 0.0106, + "step": 77780 + }, + { + "epoch": 469.62, + "learning_rate": 2.2889235143196642e-05, + "loss": 0.0107, + "step": 77800 + }, + { + "epoch": 469.74, + "learning_rate": 2.2851536204891244e-05, + "loss": 0.0108, + "step": 77820 + }, + { + "epoch": 469.86, + "learning_rate": 2.2813859139612764e-05, + "loss": 0.0109, + "step": 77840 + }, + { + "epoch": 469.98, + "learning_rate": 2.2776203977716953e-05, + "loss": 0.0109, + "step": 77860 + }, + { + "epoch": 470.1, + "learning_rate": 2.2738570749541844e-05, + "loss": 0.0103, + "step": 77880 + }, + { + "epoch": 470.22, + "learning_rate": 2.2700959485407897e-05, + "loss": 0.0104, + "step": 77900 + }, + { + "epoch": 470.34, + "learning_rate": 2.266337021561783e-05, + "loss": 0.0104, + "step": 77920 + }, + { + "epoch": 470.46, + "learning_rate": 2.26258029704566e-05, + "loss": 0.0106, + "step": 77940 + }, + { + "epoch": 470.58, + "learning_rate": 2.2588257780191484e-05, + "loss": 0.0106, + "step": 77960 + }, + { + "epoch": 470.7, + "learning_rate": 2.255073467507196e-05, + "loss": 0.0108, + "step": 77980 + }, + { + "epoch": 470.82, + "learning_rate": 2.2513233685329733e-05, + "loss": 0.0108, + "step": 78000 + }, + { + "epoch": 470.82, + "eval_accuracy": 0.0012604834487459375, + "eval_loss": 23.376338958740234, + "eval_runtime": 18.1777, + "eval_samples_per_second": 112.225, + "eval_steps_per_second": 2.366, + "step": 78000 + }, + { + "epoch": 470.95, + "learning_rate": 2.247575484117864e-05, + "loss": 0.011, + "step": 78020 + }, + { + "epoch": 471.07, + "learning_rate": 2.2438298172814714e-05, + "loss": 0.0105, + "step": 78040 + }, + { + "epoch": 471.19, + "learning_rate": 2.240086371041613e-05, + "loss": 0.0103, + "step": 78060 + }, + { + "epoch": 471.31, + "learning_rate": 2.2363451484143155e-05, + "loss": 0.0105, + "step": 78080 + }, + { + "epoch": 471.43, + "learning_rate": 2.2326061524138146e-05, + "loss": 0.0106, + "step": 78100 + }, + { + "epoch": 471.55, + "learning_rate": 2.228869386052551e-05, + "loss": 0.0107, + "step": 78120 + }, + { + "epoch": 471.67, + "learning_rate": 2.225134852341173e-05, + "loss": 0.0108, + "step": 78140 + }, + { + "epoch": 471.79, + "learning_rate": 2.2214025542885218e-05, + "loss": 0.0108, + "step": 78160 + }, + { + "epoch": 471.91, + "learning_rate": 2.217672494901646e-05, + "loss": 0.0109, + "step": 78180 + }, + { + "epoch": 472.03, + "learning_rate": 2.2139446771857858e-05, + "loss": 0.0108, + "step": 78200 + }, + { + "epoch": 472.15, + "learning_rate": 2.2102191041443766e-05, + "loss": 0.0102, + "step": 78220 + }, + { + "epoch": 472.27, + "learning_rate": 2.206495778779048e-05, + "loss": 0.0106, + "step": 78240 + }, + { + "epoch": 472.39, + "learning_rate": 2.2027747040896097e-05, + "loss": 0.0108, + "step": 78260 + }, + { + "epoch": 472.52, + "learning_rate": 2.199055883074072e-05, + "loss": 0.0109, + "step": 78280 + }, + { + "epoch": 472.64, + "learning_rate": 2.195339318728617e-05, + "loss": 0.0112, + "step": 78300 + }, + { + "epoch": 472.76, + "learning_rate": 2.191625014047614e-05, + "loss": 0.0111, + "step": 78320 + }, + { + "epoch": 472.88, + "learning_rate": 2.1879129720236118e-05, + "loss": 0.0114, + "step": 78340 + }, + { + "epoch": 473.0, + "learning_rate": 2.1842031956473365e-05, + "loss": 0.0114, + "step": 78360 + }, + { + "epoch": 473.12, + "learning_rate": 2.1804956879076876e-05, + "loss": 0.0113, + "step": 78380 + }, + { + "epoch": 473.24, + "learning_rate": 2.1767904517917336e-05, + "loss": 0.0119, + "step": 78400 + }, + { + "epoch": 473.36, + "learning_rate": 2.1730874902847232e-05, + "loss": 0.0134, + "step": 78420 + }, + { + "epoch": 473.48, + "learning_rate": 2.1693868063700596e-05, + "loss": 0.0163, + "step": 78440 + }, + { + "epoch": 473.6, + "learning_rate": 2.165873268986719e-05, + "loss": 0.0263, + "step": 78460 + }, + { + "epoch": 473.72, + "learning_rate": 2.162177034951215e-05, + "loss": 0.0626, + "step": 78480 + }, + { + "epoch": 473.84, + "learning_rate": 2.158667730333516e-05, + "loss": 0.0854, + "step": 78500 + }, + { + "epoch": 473.96, + "learning_rate": 2.1549759575009732e-05, + "loss": 0.0855, + "step": 78520 + }, + { + "epoch": 474.08, + "learning_rate": 2.1512864768529134e-05, + "loss": 0.0768, + "step": 78540 + }, + { + "epoch": 474.21, + "learning_rate": 2.1475992913618858e-05, + "loss": 0.0756, + "step": 78560 + }, + { + "epoch": 474.33, + "learning_rate": 2.1439144039985898e-05, + "loss": 0.07, + "step": 78580 + }, + { + "epoch": 474.45, + "learning_rate": 2.1402318177318703e-05, + "loss": 0.0656, + "step": 78600 + }, + { + "epoch": 474.57, + "learning_rate": 2.1365515355287192e-05, + "loss": 0.0627, + "step": 78620 + }, + { + "epoch": 474.69, + "learning_rate": 2.13287356035428e-05, + "loss": 0.0611, + "step": 78640 + }, + { + "epoch": 474.81, + "learning_rate": 2.1291978951718246e-05, + "loss": 0.0562, + "step": 78660 + }, + { + "epoch": 474.93, + "learning_rate": 2.1255245429427723e-05, + "loss": 0.0539, + "step": 78680 + }, + { + "epoch": 475.05, + "learning_rate": 2.1218535066266758e-05, + "loss": 0.0452, + "step": 78700 + }, + { + "epoch": 475.17, + "learning_rate": 2.1181847891812262e-05, + "loss": 0.0338, + "step": 78720 + }, + { + "epoch": 475.29, + "learning_rate": 2.1145183935622377e-05, + "loss": 0.0311, + "step": 78740 + }, + { + "epoch": 475.41, + "learning_rate": 2.1108543227236616e-05, + "loss": 0.0285, + "step": 78760 + }, + { + "epoch": 475.53, + "learning_rate": 2.107192579617574e-05, + "loss": 0.0265, + "step": 78780 + }, + { + "epoch": 475.65, + "learning_rate": 2.1035331671941737e-05, + "loss": 0.0254, + "step": 78800 + }, + { + "epoch": 475.77, + "learning_rate": 2.0998760884017865e-05, + "loss": 0.0237, + "step": 78820 + }, + { + "epoch": 475.9, + "learning_rate": 2.0962213461868485e-05, + "loss": 0.0228, + "step": 78840 + }, + { + "epoch": 476.02, + "learning_rate": 2.092568943493927e-05, + "loss": 0.0212, + "step": 78860 + }, + { + "epoch": 476.14, + "learning_rate": 2.0889188832656918e-05, + "loss": 0.015, + "step": 78880 + }, + { + "epoch": 476.26, + "learning_rate": 2.0852711684429314e-05, + "loss": 0.0141, + "step": 78900 + }, + { + "epoch": 476.38, + "learning_rate": 2.0816258019645446e-05, + "loss": 0.0139, + "step": 78920 + }, + { + "epoch": 476.5, + "learning_rate": 2.077982786767537e-05, + "loss": 0.0133, + "step": 78940 + }, + { + "epoch": 476.62, + "learning_rate": 2.0743421257870215e-05, + "loss": 0.0133, + "step": 78960 + }, + { + "epoch": 476.74, + "learning_rate": 2.07070382195621e-05, + "loss": 0.013, + "step": 78980 + }, + { + "epoch": 476.86, + "learning_rate": 2.06706787820642e-05, + "loss": 0.0131, + "step": 79000 + }, + { + "epoch": 476.86, + "eval_accuracy": 0.0012590016892058756, + "eval_loss": 23.20476722717285, + "eval_runtime": 18.1597, + "eval_samples_per_second": 112.336, + "eval_steps_per_second": 2.368, + "step": 79000 + }, + { + "epoch": 476.98, + "learning_rate": 2.063434297467065e-05, + "loss": 0.0131, + "step": 79020 + }, + { + "epoch": 477.1, + "learning_rate": 2.0598030826656577e-05, + "loss": 0.0114, + "step": 79040 + }, + { + "epoch": 477.22, + "learning_rate": 2.056174236727797e-05, + "loss": 0.0113, + "step": 79060 + }, + { + "epoch": 477.34, + "learning_rate": 2.052547762577184e-05, + "loss": 0.0111, + "step": 79080 + }, + { + "epoch": 477.46, + "learning_rate": 2.0489236631356023e-05, + "loss": 0.0112, + "step": 79100 + }, + { + "epoch": 477.59, + "learning_rate": 2.045301941322921e-05, + "loss": 0.0113, + "step": 79120 + }, + { + "epoch": 477.71, + "learning_rate": 2.041682600057097e-05, + "loss": 0.0114, + "step": 79140 + }, + { + "epoch": 477.83, + "learning_rate": 2.0380656422541687e-05, + "loss": 0.0113, + "step": 79160 + }, + { + "epoch": 477.95, + "learning_rate": 2.0344510708282556e-05, + "loss": 0.0115, + "step": 79180 + }, + { + "epoch": 478.07, + "learning_rate": 2.0308388886915464e-05, + "loss": 0.011, + "step": 79200 + }, + { + "epoch": 478.19, + "learning_rate": 2.027229098754317e-05, + "loss": 0.0106, + "step": 79220 + }, + { + "epoch": 478.31, + "learning_rate": 2.02362170392491e-05, + "loss": 0.0108, + "step": 79240 + }, + { + "epoch": 478.43, + "learning_rate": 2.0200167071097343e-05, + "loss": 0.0108, + "step": 79260 + }, + { + "epoch": 478.55, + "learning_rate": 2.016414111213273e-05, + "loss": 0.0111, + "step": 79280 + }, + { + "epoch": 478.67, + "learning_rate": 2.0128139191380723e-05, + "loss": 0.011, + "step": 79300 + }, + { + "epoch": 478.79, + "learning_rate": 2.0092161337847433e-05, + "loss": 0.011, + "step": 79320 + }, + { + "epoch": 478.91, + "learning_rate": 2.0056207580519538e-05, + "loss": 0.0111, + "step": 79340 + }, + { + "epoch": 479.03, + "learning_rate": 2.0020277948364325e-05, + "loss": 0.0109, + "step": 79360 + }, + { + "epoch": 479.15, + "learning_rate": 1.998437247032971e-05, + "loss": 0.0105, + "step": 79380 + }, + { + "epoch": 479.28, + "learning_rate": 1.9948491175344025e-05, + "loss": 0.0106, + "step": 79400 + }, + { + "epoch": 479.4, + "learning_rate": 1.9912634092316206e-05, + "loss": 0.0107, + "step": 79420 + }, + { + "epoch": 479.52, + "learning_rate": 1.9876801250135662e-05, + "loss": 0.0108, + "step": 79440 + }, + { + "epoch": 479.64, + "learning_rate": 1.984099267767227e-05, + "loss": 0.0108, + "step": 79460 + }, + { + "epoch": 479.76, + "learning_rate": 1.9805208403776333e-05, + "loss": 0.0109, + "step": 79480 + }, + { + "epoch": 479.88, + "learning_rate": 1.976944845727859e-05, + "loss": 0.0109, + "step": 79500 + }, + { + "epoch": 480.0, + "learning_rate": 1.97337128669902e-05, + "loss": 0.0111, + "step": 79520 + }, + { + "epoch": 480.12, + "learning_rate": 1.969800166170268e-05, + "loss": 0.0103, + "step": 79540 + }, + { + "epoch": 480.24, + "learning_rate": 1.966231487018792e-05, + "loss": 0.0105, + "step": 79560 + }, + { + "epoch": 480.36, + "learning_rate": 1.962665252119807e-05, + "loss": 0.0106, + "step": 79580 + }, + { + "epoch": 480.48, + "learning_rate": 1.9591014643465706e-05, + "loss": 0.0106, + "step": 79600 + }, + { + "epoch": 480.6, + "learning_rate": 1.955540126570358e-05, + "loss": 0.0107, + "step": 79620 + }, + { + "epoch": 480.72, + "learning_rate": 1.9519812416604748e-05, + "loss": 0.0108, + "step": 79640 + }, + { + "epoch": 480.85, + "learning_rate": 1.9484248124842526e-05, + "loss": 0.0108, + "step": 79660 + }, + { + "epoch": 480.97, + "learning_rate": 1.94487084190704e-05, + "loss": 0.0108, + "step": 79680 + }, + { + "epoch": 481.09, + "learning_rate": 1.9413193327922104e-05, + "loss": 0.0105, + "step": 79700 + }, + { + "epoch": 481.21, + "learning_rate": 1.9377702880011434e-05, + "loss": 0.0103, + "step": 79720 + }, + { + "epoch": 481.33, + "learning_rate": 1.934223710393249e-05, + "loss": 0.0105, + "step": 79740 + }, + { + "epoch": 481.45, + "learning_rate": 1.930679602825935e-05, + "loss": 0.0106, + "step": 79760 + }, + { + "epoch": 481.57, + "learning_rate": 1.927137968154626e-05, + "loss": 0.0107, + "step": 79780 + }, + { + "epoch": 481.69, + "learning_rate": 1.923598809232754e-05, + "loss": 0.0106, + "step": 79800 + }, + { + "epoch": 481.81, + "learning_rate": 1.9200621289117544e-05, + "loss": 0.0107, + "step": 79820 + }, + { + "epoch": 481.93, + "learning_rate": 1.9165279300410694e-05, + "loss": 0.0108, + "step": 79840 + }, + { + "epoch": 482.05, + "learning_rate": 1.9129962154681346e-05, + "loss": 0.0106, + "step": 79860 + }, + { + "epoch": 482.17, + "learning_rate": 1.9094669880383915e-05, + "loss": 0.0103, + "step": 79880 + }, + { + "epoch": 482.29, + "learning_rate": 1.9059402505952744e-05, + "loss": 0.0103, + "step": 79900 + }, + { + "epoch": 482.41, + "learning_rate": 1.9024160059802122e-05, + "loss": 0.0105, + "step": 79920 + }, + { + "epoch": 482.54, + "learning_rate": 1.8988942570326245e-05, + "loss": 0.0106, + "step": 79940 + }, + { + "epoch": 482.66, + "learning_rate": 1.8953750065899212e-05, + "loss": 0.0106, + "step": 79960 + }, + { + "epoch": 482.78, + "learning_rate": 1.8918582574875003e-05, + "loss": 0.0107, + "step": 79980 + }, + { + "epoch": 482.9, + "learning_rate": 1.8883440125587402e-05, + "loss": 0.0108, + "step": 80000 + }, + { + "epoch": 482.9, + "eval_accuracy": 0.001258013849512501, + "eval_loss": 23.377166748046875, + "eval_runtime": 18.2373, + "eval_samples_per_second": 111.859, + "eval_steps_per_second": 2.358, + "step": 80000 + }, + { + "epoch": 483.02, + "learning_rate": 1.884832274635006e-05, + "loss": 0.0107, + "step": 80020 + }, + { + "epoch": 483.14, + "learning_rate": 1.8813230465456416e-05, + "loss": 0.0102, + "step": 80040 + }, + { + "epoch": 483.26, + "learning_rate": 1.877816331117968e-05, + "loss": 0.0103, + "step": 80060 + }, + { + "epoch": 483.38, + "learning_rate": 1.8743121311772856e-05, + "loss": 0.0104, + "step": 80080 + }, + { + "epoch": 483.5, + "learning_rate": 1.870810449546858e-05, + "loss": 0.0106, + "step": 80100 + }, + { + "epoch": 483.62, + "learning_rate": 1.867311289047935e-05, + "loss": 0.0106, + "step": 80120 + }, + { + "epoch": 483.74, + "learning_rate": 1.8638146524997217e-05, + "loss": 0.0107, + "step": 80140 + }, + { + "epoch": 483.86, + "learning_rate": 1.8603205427193964e-05, + "loss": 0.0106, + "step": 80160 + }, + { + "epoch": 483.98, + "learning_rate": 1.856828962522101e-05, + "loss": 0.0107, + "step": 80180 + }, + { + "epoch": 484.1, + "learning_rate": 1.8533399147209375e-05, + "loss": 0.0101, + "step": 80200 + }, + { + "epoch": 484.23, + "learning_rate": 1.8498534021269708e-05, + "loss": 0.0103, + "step": 80220 + }, + { + "epoch": 484.35, + "learning_rate": 1.846369427549216e-05, + "loss": 0.0103, + "step": 80240 + }, + { + "epoch": 484.47, + "learning_rate": 1.8428879937946557e-05, + "loss": 0.0105, + "step": 80260 + }, + { + "epoch": 484.59, + "learning_rate": 1.8394091036682133e-05, + "loss": 0.0105, + "step": 80280 + }, + { + "epoch": 484.71, + "learning_rate": 1.8359327599727698e-05, + "loss": 0.0106, + "step": 80300 + }, + { + "epoch": 484.83, + "learning_rate": 1.832458965509149e-05, + "loss": 0.0107, + "step": 80320 + }, + { + "epoch": 484.95, + "learning_rate": 1.82898772307613e-05, + "loss": 0.0107, + "step": 80340 + }, + { + "epoch": 485.07, + "learning_rate": 1.8255190354704256e-05, + "loss": 0.0104, + "step": 80360 + }, + { + "epoch": 485.19, + "learning_rate": 1.8220529054866958e-05, + "loss": 0.0102, + "step": 80380 + }, + { + "epoch": 485.31, + "learning_rate": 1.81858933591754e-05, + "loss": 0.0103, + "step": 80400 + }, + { + "epoch": 485.43, + "learning_rate": 1.8151283295534932e-05, + "loss": 0.0104, + "step": 80420 + }, + { + "epoch": 485.55, + "learning_rate": 1.811669889183027e-05, + "loss": 0.0104, + "step": 80440 + }, + { + "epoch": 485.67, + "learning_rate": 1.8082140175925393e-05, + "loss": 0.0105, + "step": 80460 + }, + { + "epoch": 485.79, + "learning_rate": 1.8047607175663712e-05, + "loss": 0.0107, + "step": 80480 + }, + { + "epoch": 485.92, + "learning_rate": 1.8013099918867778e-05, + "loss": 0.0106, + "step": 80500 + }, + { + "epoch": 486.04, + "learning_rate": 1.7978618433339484e-05, + "loss": 0.0106, + "step": 80520 + }, + { + "epoch": 486.16, + "learning_rate": 1.794416274685995e-05, + "loss": 0.0101, + "step": 80540 + }, + { + "epoch": 486.28, + "learning_rate": 1.7909732887189485e-05, + "loss": 0.0102, + "step": 80560 + }, + { + "epoch": 486.4, + "learning_rate": 1.7875328882067634e-05, + "loss": 0.0104, + "step": 80580 + }, + { + "epoch": 486.52, + "learning_rate": 1.7840950759213043e-05, + "loss": 0.0105, + "step": 80600 + }, + { + "epoch": 486.64, + "learning_rate": 1.7806598546323567e-05, + "loss": 0.0105, + "step": 80620 + }, + { + "epoch": 486.76, + "learning_rate": 1.7772272271076157e-05, + "loss": 0.0106, + "step": 80640 + }, + { + "epoch": 486.88, + "learning_rate": 1.7737971961126875e-05, + "loss": 0.0106, + "step": 80660 + }, + { + "epoch": 487.0, + "learning_rate": 1.7703697644110863e-05, + "loss": 0.0107, + "step": 80680 + }, + { + "epoch": 487.12, + "learning_rate": 1.7669449347642313e-05, + "loss": 0.01, + "step": 80700 + }, + { + "epoch": 487.24, + "learning_rate": 1.763522709931449e-05, + "loss": 0.0102, + "step": 80720 + }, + { + "epoch": 487.36, + "learning_rate": 1.760103092669959e-05, + "loss": 0.0103, + "step": 80740 + }, + { + "epoch": 487.48, + "learning_rate": 1.7566860857348884e-05, + "loss": 0.0104, + "step": 80760 + }, + { + "epoch": 487.61, + "learning_rate": 1.7532716918792565e-05, + "loss": 0.0105, + "step": 80780 + }, + { + "epoch": 487.73, + "learning_rate": 1.7498599138539797e-05, + "loss": 0.0105, + "step": 80800 + }, + { + "epoch": 487.85, + "learning_rate": 1.7464507544078672e-05, + "loss": 0.0107, + "step": 80820 + }, + { + "epoch": 487.97, + "learning_rate": 1.743044216287613e-05, + "loss": 0.0107, + "step": 80840 + }, + { + "epoch": 488.09, + "learning_rate": 1.7396403022378095e-05, + "loss": 0.0101, + "step": 80860 + }, + { + "epoch": 488.21, + "learning_rate": 1.7362390150009228e-05, + "loss": 0.0102, + "step": 80880 + }, + { + "epoch": 488.33, + "learning_rate": 1.7328403573173118e-05, + "loss": 0.0102, + "step": 80900 + }, + { + "epoch": 488.45, + "learning_rate": 1.7294443319252125e-05, + "loss": 0.0103, + "step": 80920 + }, + { + "epoch": 488.57, + "learning_rate": 1.726050941560742e-05, + "loss": 0.0105, + "step": 80940 + }, + { + "epoch": 488.69, + "learning_rate": 1.7226601889578946e-05, + "loss": 0.0105, + "step": 80960 + }, + { + "epoch": 488.81, + "learning_rate": 1.7192720768485336e-05, + "loss": 0.0106, + "step": 80980 + }, + { + "epoch": 488.93, + "learning_rate": 1.7158866079624075e-05, + "loss": 0.0106, + "step": 81000 + }, + { + "epoch": 488.93, + "eval_accuracy": 0.0012565320899724393, + "eval_loss": 23.473268508911133, + "eval_runtime": 18.1503, + "eval_samples_per_second": 112.395, + "eval_steps_per_second": 2.369, + "step": 81000 + }, + { + "epoch": 489.05, + "learning_rate": 1.712503785027121e-05, + "loss": 0.0104, + "step": 81020 + }, + { + "epoch": 489.18, + "learning_rate": 1.7091236107681552e-05, + "loss": 0.0101, + "step": 81040 + }, + { + "epoch": 489.3, + "learning_rate": 1.705746087908856e-05, + "loss": 0.0102, + "step": 81060 + }, + { + "epoch": 489.42, + "learning_rate": 1.702371219170433e-05, + "loss": 0.0104, + "step": 81080 + }, + { + "epoch": 489.54, + "learning_rate": 1.698999007271957e-05, + "loss": 0.0104, + "step": 81100 + }, + { + "epoch": 489.66, + "learning_rate": 1.6956294549303562e-05, + "loss": 0.0106, + "step": 81120 + }, + { + "epoch": 489.78, + "learning_rate": 1.69226256486042e-05, + "loss": 0.0105, + "step": 81140 + }, + { + "epoch": 489.9, + "learning_rate": 1.6888983397747902e-05, + "loss": 0.0106, + "step": 81160 + }, + { + "epoch": 490.02, + "learning_rate": 1.685536782383964e-05, + "loss": 0.0105, + "step": 81180 + }, + { + "epoch": 490.14, + "learning_rate": 1.6821778953962826e-05, + "loss": 0.01, + "step": 81200 + }, + { + "epoch": 490.26, + "learning_rate": 1.6788216815179453e-05, + "loss": 0.0102, + "step": 81220 + }, + { + "epoch": 490.38, + "learning_rate": 1.6754681434529934e-05, + "loss": 0.0102, + "step": 81240 + }, + { + "epoch": 490.5, + "learning_rate": 1.6721172839033082e-05, + "loss": 0.0103, + "step": 81260 + }, + { + "epoch": 490.62, + "learning_rate": 1.6687691055686187e-05, + "loss": 0.0105, + "step": 81280 + }, + { + "epoch": 490.74, + "learning_rate": 1.6654236111464917e-05, + "loss": 0.0105, + "step": 81300 + }, + { + "epoch": 490.87, + "learning_rate": 1.662080803332333e-05, + "loss": 0.0107, + "step": 81320 + }, + { + "epoch": 490.99, + "learning_rate": 1.6587406848193792e-05, + "loss": 0.0106, + "step": 81340 + }, + { + "epoch": 491.11, + "learning_rate": 1.655403258298704e-05, + "loss": 0.0101, + "step": 81360 + }, + { + "epoch": 491.23, + "learning_rate": 1.6520685264592162e-05, + "loss": 0.01, + "step": 81380 + }, + { + "epoch": 491.35, + "learning_rate": 1.6487364919876435e-05, + "loss": 0.0103, + "step": 81400 + }, + { + "epoch": 491.47, + "learning_rate": 1.6454071575685488e-05, + "loss": 0.0104, + "step": 81420 + }, + { + "epoch": 491.59, + "learning_rate": 1.642080525884316e-05, + "loss": 0.0104, + "step": 81440 + }, + { + "epoch": 491.71, + "learning_rate": 1.6387565996151532e-05, + "loss": 0.0105, + "step": 81460 + }, + { + "epoch": 491.83, + "learning_rate": 1.6354353814390854e-05, + "loss": 0.0105, + "step": 81480 + }, + { + "epoch": 491.95, + "learning_rate": 1.632116874031958e-05, + "loss": 0.0107, + "step": 81500 + }, + { + "epoch": 492.07, + "learning_rate": 1.6288010800674368e-05, + "loss": 0.0103, + "step": 81520 + }, + { + "epoch": 492.19, + "learning_rate": 1.6254880022169933e-05, + "loss": 0.0101, + "step": 81540 + }, + { + "epoch": 492.31, + "learning_rate": 1.6221776431499148e-05, + "loss": 0.0101, + "step": 81560 + }, + { + "epoch": 492.43, + "learning_rate": 1.6188700055332983e-05, + "loss": 0.0103, + "step": 81580 + }, + { + "epoch": 492.56, + "learning_rate": 1.6155650920320496e-05, + "loss": 0.0104, + "step": 81600 + }, + { + "epoch": 492.68, + "learning_rate": 1.6122629053088752e-05, + "loss": 0.0105, + "step": 81620 + }, + { + "epoch": 492.8, + "learning_rate": 1.6089634480242872e-05, + "loss": 0.0106, + "step": 81640 + }, + { + "epoch": 492.92, + "learning_rate": 1.6056667228365996e-05, + "loss": 0.0106, + "step": 81660 + }, + { + "epoch": 493.04, + "learning_rate": 1.602372732401925e-05, + "loss": 0.0104, + "step": 81680 + }, + { + "epoch": 493.16, + "learning_rate": 1.5990814793741725e-05, + "loss": 0.01, + "step": 81700 + }, + { + "epoch": 493.28, + "learning_rate": 1.595792966405041e-05, + "loss": 0.0101, + "step": 81720 + }, + { + "epoch": 493.4, + "learning_rate": 1.5925071961440324e-05, + "loss": 0.0103, + "step": 81740 + }, + { + "epoch": 493.52, + "learning_rate": 1.5892241712384276e-05, + "loss": 0.0105, + "step": 81760 + }, + { + "epoch": 493.64, + "learning_rate": 1.585943894333302e-05, + "loss": 0.0104, + "step": 81780 + }, + { + "epoch": 493.76, + "learning_rate": 1.5826663680715152e-05, + "loss": 0.0105, + "step": 81800 + }, + { + "epoch": 493.88, + "learning_rate": 1.5793915950937116e-05, + "loss": 0.0106, + "step": 81820 + }, + { + "epoch": 494.0, + "learning_rate": 1.5761195780383174e-05, + "loss": 0.0107, + "step": 81840 + }, + { + "epoch": 494.12, + "learning_rate": 1.5728503195415355e-05, + "loss": 0.0101, + "step": 81860 + }, + { + "epoch": 494.25, + "learning_rate": 1.5695838222373495e-05, + "loss": 0.0101, + "step": 81880 + }, + { + "epoch": 494.37, + "learning_rate": 1.5663200887575162e-05, + "loss": 0.0102, + "step": 81900 + }, + { + "epoch": 494.49, + "learning_rate": 1.56305912173157e-05, + "loss": 0.0104, + "step": 81920 + }, + { + "epoch": 494.61, + "learning_rate": 1.559800923786807e-05, + "loss": 0.0105, + "step": 81940 + }, + { + "epoch": 494.73, + "learning_rate": 1.5565454975483047e-05, + "loss": 0.0105, + "step": 81960 + }, + { + "epoch": 494.85, + "learning_rate": 1.5532928456389e-05, + "loss": 0.0105, + "step": 81980 + }, + { + "epoch": 494.97, + "learning_rate": 1.550042970679193e-05, + "loss": 0.0106, + "step": 82000 + }, + { + "epoch": 494.97, + "eval_accuracy": 0.0012590016892058756, + "eval_loss": 23.56543731689453, + "eval_runtime": 18.1587, + "eval_samples_per_second": 112.343, + "eval_steps_per_second": 2.368, + "step": 82000 + }, + { + "epoch": 495.09, + "learning_rate": 1.5467958752875505e-05, + "loss": 0.0101, + "step": 82020 + }, + { + "epoch": 495.21, + "learning_rate": 1.543551562080099e-05, + "loss": 0.01, + "step": 82040 + }, + { + "epoch": 495.33, + "learning_rate": 1.540310033670725e-05, + "loss": 0.0102, + "step": 82060 + }, + { + "epoch": 495.45, + "learning_rate": 1.537071292671064e-05, + "loss": 0.0103, + "step": 82080 + }, + { + "epoch": 495.57, + "learning_rate": 1.533835341690516e-05, + "loss": 0.0104, + "step": 82100 + }, + { + "epoch": 495.69, + "learning_rate": 1.5306021833362282e-05, + "loss": 0.0105, + "step": 82120 + }, + { + "epoch": 495.81, + "learning_rate": 1.527371820213095e-05, + "loss": 0.0106, + "step": 82140 + }, + { + "epoch": 495.94, + "learning_rate": 1.5241442549237628e-05, + "loss": 0.0106, + "step": 82160 + }, + { + "epoch": 496.06, + "learning_rate": 1.5209194900686235e-05, + "loss": 0.0102, + "step": 82180 + }, + { + "epoch": 496.18, + "learning_rate": 1.5176975282458122e-05, + "loss": 0.01, + "step": 82200 + }, + { + "epoch": 496.3, + "learning_rate": 1.5144783720512035e-05, + "loss": 0.0101, + "step": 82220 + }, + { + "epoch": 496.42, + "learning_rate": 1.5112620240784126e-05, + "loss": 0.0103, + "step": 82240 + }, + { + "epoch": 496.54, + "learning_rate": 1.5080484869187977e-05, + "loss": 0.0104, + "step": 82260 + }, + { + "epoch": 496.66, + "learning_rate": 1.504837763161443e-05, + "loss": 0.0105, + "step": 82280 + }, + { + "epoch": 496.78, + "learning_rate": 1.501629855393173e-05, + "loss": 0.0106, + "step": 82300 + }, + { + "epoch": 496.9, + "learning_rate": 1.4984247661985396e-05, + "loss": 0.0106, + "step": 82320 + }, + { + "epoch": 497.02, + "learning_rate": 1.4952224981598278e-05, + "loss": 0.0105, + "step": 82340 + }, + { + "epoch": 497.14, + "learning_rate": 1.492023053857044e-05, + "loss": 0.01, + "step": 82360 + }, + { + "epoch": 497.26, + "learning_rate": 1.4888264358679238e-05, + "loss": 0.0101, + "step": 82380 + }, + { + "epoch": 497.38, + "learning_rate": 1.4856326467679248e-05, + "loss": 0.0103, + "step": 82400 + }, + { + "epoch": 497.51, + "learning_rate": 1.4824416891302256e-05, + "loss": 0.0104, + "step": 82420 + }, + { + "epoch": 497.63, + "learning_rate": 1.479253565525724e-05, + "loss": 0.0104, + "step": 82440 + }, + { + "epoch": 497.75, + "learning_rate": 1.476068278523029e-05, + "loss": 0.0106, + "step": 82460 + }, + { + "epoch": 497.87, + "learning_rate": 1.4728858306884757e-05, + "loss": 0.0105, + "step": 82480 + }, + { + "epoch": 497.99, + "learning_rate": 1.4697062245860992e-05, + "loss": 0.0106, + "step": 82500 + }, + { + "epoch": 498.11, + "learning_rate": 1.4665294627776533e-05, + "loss": 0.01, + "step": 82520 + }, + { + "epoch": 498.23, + "learning_rate": 1.463355547822597e-05, + "loss": 0.0102, + "step": 82540 + }, + { + "epoch": 498.35, + "learning_rate": 1.4601844822780964e-05, + "loss": 0.0102, + "step": 82560 + }, + { + "epoch": 498.47, + "learning_rate": 1.457016268699023e-05, + "loss": 0.0103, + "step": 82580 + }, + { + "epoch": 498.59, + "learning_rate": 1.4538509096379449e-05, + "loss": 0.0105, + "step": 82600 + }, + { + "epoch": 498.71, + "learning_rate": 1.450688407645141e-05, + "loss": 0.0105, + "step": 82620 + }, + { + "epoch": 498.83, + "learning_rate": 1.4475287652685775e-05, + "loss": 0.0107, + "step": 82640 + }, + { + "epoch": 498.95, + "learning_rate": 1.4443719850539222e-05, + "loss": 0.0106, + "step": 82660 + }, + { + "epoch": 499.07, + "learning_rate": 1.441218069544536e-05, + "loss": 0.0102, + "step": 82680 + }, + { + "epoch": 499.2, + "learning_rate": 1.4380670212814718e-05, + "loss": 0.0101, + "step": 82700 + }, + { + "epoch": 499.32, + "learning_rate": 1.4349188428034742e-05, + "loss": 0.0107, + "step": 82720 + }, + { + "epoch": 499.44, + "learning_rate": 1.4317735366469703e-05, + "loss": 0.011, + "step": 82740 + }, + { + "epoch": 499.56, + "learning_rate": 1.4286311053460777e-05, + "loss": 0.0108, + "step": 82760 + }, + { + "epoch": 499.68, + "learning_rate": 1.4254915514325972e-05, + "loss": 0.0108, + "step": 82780 + }, + { + "epoch": 499.8, + "learning_rate": 1.4223548774360117e-05, + "loss": 0.0109, + "step": 82800 + }, + { + "epoch": 499.92, + "learning_rate": 1.4192210858834786e-05, + "loss": 0.0109, + "step": 82820 + }, + { + "epoch": 500.04, + "learning_rate": 1.416090179299841e-05, + "loss": 0.011, + "step": 82840 + }, + { + "epoch": 500.16, + "learning_rate": 1.4129621602076149e-05, + "loss": 0.0106, + "step": 82860 + }, + { + "epoch": 500.28, + "learning_rate": 1.4098370311269848e-05, + "loss": 0.0113, + "step": 82880 + }, + { + "epoch": 500.4, + "learning_rate": 1.4067147945758125e-05, + "loss": 0.0121, + "step": 82900 + }, + { + "epoch": 500.52, + "learning_rate": 1.4035954530696277e-05, + "loss": 0.0124, + "step": 82920 + }, + { + "epoch": 500.64, + "learning_rate": 1.4004790091216291e-05, + "loss": 0.0131, + "step": 82940 + }, + { + "epoch": 500.76, + "learning_rate": 1.3973654652426766e-05, + "loss": 0.0146, + "step": 82960 + }, + { + "epoch": 500.89, + "learning_rate": 1.394254823941295e-05, + "loss": 0.0171, + "step": 82980 + }, + { + "epoch": 501.01, + "learning_rate": 1.3911470877236777e-05, + "loss": 0.0242, + "step": 83000 + }, + { + "epoch": 501.01, + "eval_accuracy": 0.0012713496853730577, + "eval_loss": 23.54586410522461, + "eval_runtime": 18.1613, + "eval_samples_per_second": 112.327, + "eval_steps_per_second": 2.368, + "step": 83000 + }, + { + "epoch": 501.13, + "learning_rate": 1.3881974314313356e-05, + "loss": 0.0352, + "step": 83020 + }, + { + "epoch": 501.25, + "learning_rate": 1.3850953673266066e-05, + "loss": 0.0441, + "step": 83040 + }, + { + "epoch": 501.37, + "learning_rate": 1.3819962156852468e-05, + "loss": 0.0434, + "step": 83060 + }, + { + "epoch": 501.49, + "learning_rate": 1.3788999790041867e-05, + "loss": 0.0399, + "step": 83080 + }, + { + "epoch": 501.61, + "learning_rate": 1.3758066597780106e-05, + "loss": 0.0358, + "step": 83100 + }, + { + "epoch": 501.73, + "learning_rate": 1.3727162604989451e-05, + "loss": 0.0313, + "step": 83120 + }, + { + "epoch": 501.85, + "learning_rate": 1.3696287836568744e-05, + "loss": 0.0278, + "step": 83140 + }, + { + "epoch": 501.97, + "learning_rate": 1.3665442317393196e-05, + "loss": 0.0249, + "step": 83160 + }, + { + "epoch": 502.09, + "learning_rate": 1.3634626072314482e-05, + "loss": 0.019, + "step": 83180 + }, + { + "epoch": 502.21, + "learning_rate": 1.3603839126160711e-05, + "loss": 0.0165, + "step": 83200 + }, + { + "epoch": 502.33, + "learning_rate": 1.3573081503736362e-05, + "loss": 0.0161, + "step": 83220 + }, + { + "epoch": 502.45, + "learning_rate": 1.3542353229822308e-05, + "loss": 0.0154, + "step": 83240 + }, + { + "epoch": 502.58, + "learning_rate": 1.3511654329175727e-05, + "loss": 0.0149, + "step": 83260 + }, + { + "epoch": 502.7, + "learning_rate": 1.3480984826530218e-05, + "loss": 0.0149, + "step": 83280 + }, + { + "epoch": 502.82, + "learning_rate": 1.3450344746595612e-05, + "loss": 0.0146, + "step": 83300 + }, + { + "epoch": 502.94, + "learning_rate": 1.3419734114058075e-05, + "loss": 0.0144, + "step": 83320 + }, + { + "epoch": 503.06, + "learning_rate": 1.3389152953580047e-05, + "loss": 0.0129, + "step": 83340 + }, + { + "epoch": 503.18, + "learning_rate": 1.335860128980021e-05, + "loss": 0.0112, + "step": 83360 + }, + { + "epoch": 503.3, + "learning_rate": 1.3328079147333505e-05, + "loss": 0.0112, + "step": 83380 + }, + { + "epoch": 503.42, + "learning_rate": 1.329758655077104e-05, + "loss": 0.0112, + "step": 83400 + }, + { + "epoch": 503.54, + "learning_rate": 1.326712352468017e-05, + "loss": 0.0112, + "step": 83420 + }, + { + "epoch": 503.66, + "learning_rate": 1.3236690093604399e-05, + "loss": 0.0111, + "step": 83440 + }, + { + "epoch": 503.78, + "learning_rate": 1.3206286282063395e-05, + "loss": 0.0112, + "step": 83460 + }, + { + "epoch": 503.9, + "learning_rate": 1.3175912114552963e-05, + "loss": 0.0112, + "step": 83480 + }, + { + "epoch": 504.02, + "learning_rate": 1.3145567615545012e-05, + "loss": 0.011, + "step": 83500 + }, + { + "epoch": 504.14, + "learning_rate": 1.3115252809487577e-05, + "loss": 0.0103, + "step": 83520 + }, + { + "epoch": 504.27, + "learning_rate": 1.3084967720804725e-05, + "loss": 0.0105, + "step": 83540 + }, + { + "epoch": 504.39, + "learning_rate": 1.3054712373896611e-05, + "loss": 0.0105, + "step": 83560 + }, + { + "epoch": 504.51, + "learning_rate": 1.3024486793139423e-05, + "loss": 0.0106, + "step": 83580 + }, + { + "epoch": 504.63, + "learning_rate": 1.2994291002885373e-05, + "loss": 0.0107, + "step": 83600 + }, + { + "epoch": 504.75, + "learning_rate": 1.296412502746267e-05, + "loss": 0.0106, + "step": 83620 + }, + { + "epoch": 504.87, + "learning_rate": 1.2933988891175458e-05, + "loss": 0.0106, + "step": 83640 + }, + { + "epoch": 504.99, + "learning_rate": 1.290388261830393e-05, + "loss": 0.0107, + "step": 83660 + }, + { + "epoch": 505.11, + "learning_rate": 1.287380623310413e-05, + "loss": 0.0102, + "step": 83680 + }, + { + "epoch": 505.23, + "learning_rate": 1.2843759759808061e-05, + "loss": 0.0103, + "step": 83700 + }, + { + "epoch": 505.35, + "learning_rate": 1.2813743222623637e-05, + "loss": 0.0103, + "step": 83720 + }, + { + "epoch": 505.47, + "learning_rate": 1.2783756645734635e-05, + "loss": 0.0104, + "step": 83740 + }, + { + "epoch": 505.59, + "learning_rate": 1.2753800053300707e-05, + "loss": 0.0105, + "step": 83760 + }, + { + "epoch": 505.71, + "learning_rate": 1.2723873469457304e-05, + "loss": 0.0105, + "step": 83780 + }, + { + "epoch": 505.84, + "learning_rate": 1.2693976918315786e-05, + "loss": 0.0105, + "step": 83800 + }, + { + "epoch": 505.96, + "learning_rate": 1.2664110423963221e-05, + "loss": 0.0106, + "step": 83820 + }, + { + "epoch": 506.08, + "learning_rate": 1.2634274010462538e-05, + "loss": 0.0103, + "step": 83840 + }, + { + "epoch": 506.2, + "learning_rate": 1.2604467701852351e-05, + "loss": 0.0102, + "step": 83860 + }, + { + "epoch": 506.32, + "learning_rate": 1.2574691522147103e-05, + "loss": 0.0102, + "step": 83880 + }, + { + "epoch": 506.44, + "learning_rate": 1.254494549533693e-05, + "loss": 0.0103, + "step": 83900 + }, + { + "epoch": 506.56, + "learning_rate": 1.2515229645387638e-05, + "loss": 0.0104, + "step": 83920 + }, + { + "epoch": 506.68, + "learning_rate": 1.2485543996240762e-05, + "loss": 0.0103, + "step": 83940 + }, + { + "epoch": 506.8, + "learning_rate": 1.2455888571813495e-05, + "loss": 0.0104, + "step": 83960 + }, + { + "epoch": 506.92, + "learning_rate": 1.2426263395998694e-05, + "loss": 0.0105, + "step": 83980 + }, + { + "epoch": 507.04, + "learning_rate": 1.2396668492664788e-05, + "loss": 0.0104, + "step": 84000 + }, + { + "epoch": 507.04, + "eval_accuracy": 0.0012540624907390028, + "eval_loss": 23.56952667236328, + "eval_runtime": 18.1296, + "eval_samples_per_second": 112.523, + "eval_steps_per_second": 2.372, + "step": 84000 + }, + { + "epoch": 507.16, + "learning_rate": 1.23671038856559e-05, + "loss": 0.01, + "step": 84020 + }, + { + "epoch": 507.28, + "learning_rate": 1.233756959879167e-05, + "loss": 0.0102, + "step": 84040 + }, + { + "epoch": 507.4, + "learning_rate": 1.2308065655867346e-05, + "loss": 0.0102, + "step": 84060 + }, + { + "epoch": 507.53, + "learning_rate": 1.2278592080653728e-05, + "loss": 0.0103, + "step": 84080 + }, + { + "epoch": 507.65, + "learning_rate": 1.2249148896897139e-05, + "loss": 0.0104, + "step": 84100 + }, + { + "epoch": 507.77, + "learning_rate": 1.2219736128319436e-05, + "loss": 0.0104, + "step": 84120 + }, + { + "epoch": 507.89, + "learning_rate": 1.2190353798617925e-05, + "loss": 0.0104, + "step": 84140 + }, + { + "epoch": 508.01, + "learning_rate": 1.2161001931465437e-05, + "loss": 0.0104, + "step": 84160 + }, + { + "epoch": 508.13, + "learning_rate": 1.2131680550510238e-05, + "loss": 0.0101, + "step": 84180 + }, + { + "epoch": 508.25, + "learning_rate": 1.2102389679376036e-05, + "loss": 0.0101, + "step": 84200 + }, + { + "epoch": 508.37, + "learning_rate": 1.207312934166196e-05, + "loss": 0.0101, + "step": 84220 + }, + { + "epoch": 508.49, + "learning_rate": 1.2043899560942523e-05, + "loss": 0.0102, + "step": 84240 + }, + { + "epoch": 508.61, + "learning_rate": 1.2014700360767662e-05, + "loss": 0.0102, + "step": 84260 + }, + { + "epoch": 508.73, + "learning_rate": 1.1985531764662605e-05, + "loss": 0.0103, + "step": 84280 + }, + { + "epoch": 508.85, + "learning_rate": 1.1956393796127979e-05, + "loss": 0.0104, + "step": 84300 + }, + { + "epoch": 508.97, + "learning_rate": 1.1927286478639726e-05, + "loss": 0.0104, + "step": 84320 + }, + { + "epoch": 509.09, + "learning_rate": 1.1898209835649082e-05, + "loss": 0.01, + "step": 84340 + }, + { + "epoch": 509.22, + "learning_rate": 1.1869163890582586e-05, + "loss": 0.01, + "step": 84360 + }, + { + "epoch": 509.34, + "learning_rate": 1.1840148666841989e-05, + "loss": 0.0102, + "step": 84380 + }, + { + "epoch": 509.46, + "learning_rate": 1.181116418780439e-05, + "loss": 0.0101, + "step": 84400 + }, + { + "epoch": 509.58, + "learning_rate": 1.1782210476822015e-05, + "loss": 0.0103, + "step": 84420 + }, + { + "epoch": 509.7, + "learning_rate": 1.1753287557222365e-05, + "loss": 0.0103, + "step": 84440 + }, + { + "epoch": 509.82, + "learning_rate": 1.1724395452308112e-05, + "loss": 0.0103, + "step": 84460 + }, + { + "epoch": 509.94, + "learning_rate": 1.1695534185357099e-05, + "loss": 0.0104, + "step": 84480 + }, + { + "epoch": 510.06, + "learning_rate": 1.1666703779622351e-05, + "loss": 0.0102, + "step": 84500 + }, + { + "epoch": 510.18, + "learning_rate": 1.1637904258331956e-05, + "loss": 0.01, + "step": 84520 + }, + { + "epoch": 510.3, + "learning_rate": 1.1609135644689224e-05, + "loss": 0.01, + "step": 84540 + }, + { + "epoch": 510.42, + "learning_rate": 1.1580397961872463e-05, + "loss": 0.0101, + "step": 84560 + }, + { + "epoch": 510.54, + "learning_rate": 1.1551691233035144e-05, + "loss": 0.0102, + "step": 84580 + }, + { + "epoch": 510.66, + "learning_rate": 1.1523015481305704e-05, + "loss": 0.0103, + "step": 84600 + }, + { + "epoch": 510.78, + "learning_rate": 1.1494370729787728e-05, + "loss": 0.0103, + "step": 84620 + }, + { + "epoch": 510.91, + "learning_rate": 1.146575700155978e-05, + "loss": 0.0104, + "step": 84640 + }, + { + "epoch": 511.03, + "learning_rate": 1.1437174319675386e-05, + "loss": 0.0103, + "step": 84660 + }, + { + "epoch": 511.15, + "learning_rate": 1.1408622707163113e-05, + "loss": 0.0098, + "step": 84680 + }, + { + "epoch": 511.27, + "learning_rate": 1.1380102187026481e-05, + "loss": 0.01, + "step": 84700 + }, + { + "epoch": 511.39, + "learning_rate": 1.1351612782243976e-05, + "loss": 0.0101, + "step": 84720 + }, + { + "epoch": 511.51, + "learning_rate": 1.1323154515768947e-05, + "loss": 0.0102, + "step": 84740 + }, + { + "epoch": 511.63, + "learning_rate": 1.1294727410529754e-05, + "loss": 0.0102, + "step": 84760 + }, + { + "epoch": 511.75, + "learning_rate": 1.1266331489429593e-05, + "loss": 0.0103, + "step": 84780 + }, + { + "epoch": 511.87, + "learning_rate": 1.1237966775346526e-05, + "loss": 0.0103, + "step": 84800 + }, + { + "epoch": 511.99, + "learning_rate": 1.1209633291133503e-05, + "loss": 0.0104, + "step": 84820 + }, + { + "epoch": 512.11, + "learning_rate": 1.11813310596183e-05, + "loss": 0.0099, + "step": 84840 + }, + { + "epoch": 512.23, + "learning_rate": 1.1153060103603524e-05, + "loss": 0.0099, + "step": 84860 + }, + { + "epoch": 512.35, + "learning_rate": 1.1124820445866551e-05, + "loss": 0.0101, + "step": 84880 + }, + { + "epoch": 512.47, + "learning_rate": 1.109661210915956e-05, + "loss": 0.0101, + "step": 84900 + }, + { + "epoch": 512.6, + "learning_rate": 1.106843511620954e-05, + "loss": 0.0102, + "step": 84920 + }, + { + "epoch": 512.72, + "learning_rate": 1.1040289489718148e-05, + "loss": 0.0103, + "step": 84940 + }, + { + "epoch": 512.84, + "learning_rate": 1.1012175252361818e-05, + "loss": 0.0103, + "step": 84960 + }, + { + "epoch": 512.96, + "learning_rate": 1.0984092426791676e-05, + "loss": 0.0104, + "step": 84980 + }, + { + "epoch": 513.08, + "learning_rate": 1.0956041035633574e-05, + "loss": 0.01, + "step": 85000 + }, + { + "epoch": 513.08, + "eval_accuracy": 0.0012575199296658139, + "eval_loss": 23.66585350036621, + "eval_runtime": 18.2862, + "eval_samples_per_second": 111.56, + "eval_steps_per_second": 2.352, + "step": 85000 + }, + { + "epoch": 513.2, + "learning_rate": 1.0928021101487973e-05, + "loss": 0.01, + "step": 85020 + }, + { + "epoch": 513.32, + "learning_rate": 1.0900032646930031e-05, + "loss": 0.0099, + "step": 85040 + }, + { + "epoch": 513.44, + "learning_rate": 1.0872075694509587e-05, + "loss": 0.0101, + "step": 85060 + }, + { + "epoch": 513.56, + "learning_rate": 1.0844150266751007e-05, + "loss": 0.0102, + "step": 85080 + }, + { + "epoch": 513.68, + "learning_rate": 1.0816256386153328e-05, + "loss": 0.0103, + "step": 85100 + }, + { + "epoch": 513.8, + "learning_rate": 1.0788394075190112e-05, + "loss": 0.0102, + "step": 85120 + }, + { + "epoch": 513.92, + "learning_rate": 1.0760563356309566e-05, + "loss": 0.0103, + "step": 85140 + }, + { + "epoch": 514.04, + "learning_rate": 1.0732764251934363e-05, + "loss": 0.0102, + "step": 85160 + }, + { + "epoch": 514.16, + "learning_rate": 1.0704996784461752e-05, + "loss": 0.0099, + "step": 85180 + }, + { + "epoch": 514.29, + "learning_rate": 1.0677260976263476e-05, + "loss": 0.01, + "step": 85200 + }, + { + "epoch": 514.41, + "learning_rate": 1.0649556849685777e-05, + "loss": 0.01, + "step": 85220 + }, + { + "epoch": 514.53, + "learning_rate": 1.0621884427049389e-05, + "loss": 0.0101, + "step": 85240 + }, + { + "epoch": 514.65, + "learning_rate": 1.0594243730649428e-05, + "loss": 0.0101, + "step": 85260 + }, + { + "epoch": 514.77, + "learning_rate": 1.0566634782755574e-05, + "loss": 0.0103, + "step": 85280 + }, + { + "epoch": 514.89, + "learning_rate": 1.0539057605611808e-05, + "loss": 0.0103, + "step": 85300 + }, + { + "epoch": 515.01, + "learning_rate": 1.0511512221436581e-05, + "loss": 0.0103, + "step": 85320 + }, + { + "epoch": 515.13, + "learning_rate": 1.0483998652422706e-05, + "loss": 0.0098, + "step": 85340 + }, + { + "epoch": 515.25, + "learning_rate": 1.0456516920737375e-05, + "loss": 0.0099, + "step": 85360 + }, + { + "epoch": 515.37, + "learning_rate": 1.042906704852213e-05, + "loss": 0.01, + "step": 85380 + }, + { + "epoch": 515.49, + "learning_rate": 1.0401649057892814e-05, + "loss": 0.0101, + "step": 85400 + }, + { + "epoch": 515.61, + "learning_rate": 1.0374262970939625e-05, + "loss": 0.0102, + "step": 85420 + }, + { + "epoch": 515.73, + "learning_rate": 1.0346908809727029e-05, + "loss": 0.0102, + "step": 85440 + }, + { + "epoch": 515.86, + "learning_rate": 1.03195865962938e-05, + "loss": 0.0102, + "step": 85460 + }, + { + "epoch": 515.98, + "learning_rate": 1.0292296352652908e-05, + "loss": 0.0103, + "step": 85480 + }, + { + "epoch": 516.1, + "learning_rate": 1.0265038100791652e-05, + "loss": 0.0099, + "step": 85500 + }, + { + "epoch": 516.22, + "learning_rate": 1.0237811862671509e-05, + "loss": 0.01, + "step": 85520 + }, + { + "epoch": 516.34, + "learning_rate": 1.0210617660228145e-05, + "loss": 0.01, + "step": 85540 + }, + { + "epoch": 516.46, + "learning_rate": 1.0183455515371454e-05, + "loss": 0.0101, + "step": 85560 + }, + { + "epoch": 516.58, + "learning_rate": 1.0156325449985477e-05, + "loss": 0.0101, + "step": 85580 + }, + { + "epoch": 516.7, + "learning_rate": 1.0129227485928433e-05, + "loss": 0.0103, + "step": 85600 + }, + { + "epoch": 516.82, + "learning_rate": 1.0102161645032643e-05, + "loss": 0.0102, + "step": 85620 + }, + { + "epoch": 516.94, + "learning_rate": 1.0075127949104558e-05, + "loss": 0.0103, + "step": 85640 + }, + { + "epoch": 517.06, + "learning_rate": 1.004812641992478e-05, + "loss": 0.01, + "step": 85660 + }, + { + "epoch": 517.18, + "learning_rate": 1.0021157079247917e-05, + "loss": 0.0098, + "step": 85680 + }, + { + "epoch": 517.3, + "learning_rate": 9.994219948802686e-06, + "loss": 0.0099, + "step": 85700 + }, + { + "epoch": 517.42, + "learning_rate": 9.96731505029186e-06, + "loss": 0.01, + "step": 85720 + }, + { + "epoch": 517.55, + "learning_rate": 9.940442405392226e-06, + "loss": 0.0102, + "step": 85740 + }, + { + "epoch": 517.67, + "learning_rate": 9.913602035754572e-06, + "loss": 0.0102, + "step": 85760 + }, + { + "epoch": 517.79, + "learning_rate": 9.886793963003698e-06, + "loss": 0.0102, + "step": 85780 + }, + { + "epoch": 517.91, + "learning_rate": 9.860018208738425e-06, + "loss": 0.0103, + "step": 85800 + }, + { + "epoch": 518.03, + "learning_rate": 9.83327479453145e-06, + "loss": 0.0102, + "step": 85820 + }, + { + "epoch": 518.15, + "learning_rate": 9.806563741929475e-06, + "loss": 0.0097, + "step": 85840 + }, + { + "epoch": 518.27, + "learning_rate": 9.779885072453116e-06, + "loss": 0.0099, + "step": 85860 + }, + { + "epoch": 518.39, + "learning_rate": 9.753238807596903e-06, + "loss": 0.01, + "step": 85880 + }, + { + "epoch": 518.51, + "learning_rate": 9.726624968829223e-06, + "loss": 0.0101, + "step": 85900 + }, + { + "epoch": 518.63, + "learning_rate": 9.700043577592388e-06, + "loss": 0.0102, + "step": 85920 + }, + { + "epoch": 518.75, + "learning_rate": 9.673494655302533e-06, + "loss": 0.0102, + "step": 85940 + }, + { + "epoch": 518.87, + "learning_rate": 9.646978223349651e-06, + "loss": 0.0103, + "step": 85960 + }, + { + "epoch": 518.99, + "learning_rate": 9.620494303097566e-06, + "loss": 0.0103, + "step": 85980 + }, + { + "epoch": 519.11, + "learning_rate": 9.594042915883849e-06, + "loss": 0.0098, + "step": 86000 + }, + { + "epoch": 519.11, + "eval_accuracy": 0.0012545564105856902, + "eval_loss": 23.733713150024414, + "eval_runtime": 18.3445, + "eval_samples_per_second": 111.205, + "eval_steps_per_second": 2.344, + "step": 86000 + }, + { + "epoch": 519.24, + "learning_rate": 9.567624083019966e-06, + "loss": 0.0098, + "step": 86020 + }, + { + "epoch": 519.36, + "learning_rate": 9.541237825791044e-06, + "loss": 0.0099, + "step": 86040 + }, + { + "epoch": 519.48, + "learning_rate": 9.51488416545604e-06, + "loss": 0.0101, + "step": 86060 + }, + { + "epoch": 519.6, + "learning_rate": 9.488563123247612e-06, + "loss": 0.0101, + "step": 86080 + }, + { + "epoch": 519.72, + "learning_rate": 9.46227472037215e-06, + "loss": 0.0102, + "step": 86100 + }, + { + "epoch": 519.84, + "learning_rate": 9.436018978009759e-06, + "loss": 0.0102, + "step": 86120 + }, + { + "epoch": 519.96, + "learning_rate": 9.409795917314195e-06, + "loss": 0.0103, + "step": 86140 + }, + { + "epoch": 520.08, + "learning_rate": 9.383605559412911e-06, + "loss": 0.01, + "step": 86160 + }, + { + "epoch": 520.2, + "learning_rate": 9.357447925407008e-06, + "loss": 0.0098, + "step": 86180 + }, + { + "epoch": 520.32, + "learning_rate": 9.33132303637122e-06, + "loss": 0.01, + "step": 86200 + }, + { + "epoch": 520.44, + "learning_rate": 9.305230913353896e-06, + "loss": 0.0101, + "step": 86220 + }, + { + "epoch": 520.56, + "learning_rate": 9.279171577376988e-06, + "loss": 0.0102, + "step": 86240 + }, + { + "epoch": 520.68, + "learning_rate": 9.253145049436046e-06, + "loss": 0.0101, + "step": 86260 + }, + { + "epoch": 520.8, + "learning_rate": 9.227151350500151e-06, + "loss": 0.0103, + "step": 86280 + }, + { + "epoch": 520.93, + "learning_rate": 9.201190501511964e-06, + "loss": 0.0102, + "step": 86300 + }, + { + "epoch": 521.05, + "learning_rate": 9.175262523387678e-06, + "loss": 0.0101, + "step": 86320 + }, + { + "epoch": 521.17, + "learning_rate": 9.149367437016992e-06, + "loss": 0.0097, + "step": 86340 + }, + { + "epoch": 521.29, + "learning_rate": 9.12350526326311e-06, + "loss": 0.0099, + "step": 86360 + }, + { + "epoch": 521.41, + "learning_rate": 9.09767602296272e-06, + "loss": 0.01, + "step": 86380 + }, + { + "epoch": 521.53, + "learning_rate": 9.071879736925987e-06, + "loss": 0.0101, + "step": 86400 + }, + { + "epoch": 521.65, + "learning_rate": 9.046116425936491e-06, + "loss": 0.0101, + "step": 86420 + }, + { + "epoch": 521.77, + "learning_rate": 9.020386110751266e-06, + "loss": 0.0102, + "step": 86440 + }, + { + "epoch": 521.89, + "learning_rate": 8.994688812100776e-06, + "loss": 0.0102, + "step": 86460 + }, + { + "epoch": 522.01, + "learning_rate": 8.969024550688881e-06, + "loss": 0.0103, + "step": 86480 + }, + { + "epoch": 522.13, + "learning_rate": 8.94339334719278e-06, + "loss": 0.0097, + "step": 86500 + }, + { + "epoch": 522.25, + "learning_rate": 8.917795222263076e-06, + "loss": 0.0099, + "step": 86520 + }, + { + "epoch": 522.37, + "learning_rate": 8.892230196523754e-06, + "loss": 0.0101, + "step": 86540 + }, + { + "epoch": 522.49, + "learning_rate": 8.866698290572051e-06, + "loss": 0.01, + "step": 86560 + }, + { + "epoch": 522.62, + "learning_rate": 8.841199524978583e-06, + "loss": 0.01, + "step": 86580 + }, + { + "epoch": 522.74, + "learning_rate": 8.81573392028724e-06, + "loss": 0.0101, + "step": 86600 + }, + { + "epoch": 522.86, + "learning_rate": 8.790301497015207e-06, + "loss": 0.0103, + "step": 86620 + }, + { + "epoch": 522.98, + "learning_rate": 8.764902275652914e-06, + "loss": 0.0102, + "step": 86640 + }, + { + "epoch": 523.1, + "learning_rate": 8.739536276664063e-06, + "loss": 0.0098, + "step": 86660 + }, + { + "epoch": 523.22, + "learning_rate": 8.714203520485582e-06, + "loss": 0.0099, + "step": 86680 + }, + { + "epoch": 523.34, + "learning_rate": 8.688904027527605e-06, + "loss": 0.0099, + "step": 86700 + }, + { + "epoch": 523.46, + "learning_rate": 8.663637818173504e-06, + "loss": 0.01, + "step": 86720 + }, + { + "epoch": 523.58, + "learning_rate": 8.638404912779763e-06, + "loss": 0.0101, + "step": 86740 + }, + { + "epoch": 523.7, + "learning_rate": 8.613205331676133e-06, + "loss": 0.0101, + "step": 86760 + }, + { + "epoch": 523.82, + "learning_rate": 8.588039095165412e-06, + "loss": 0.0102, + "step": 86780 + }, + { + "epoch": 523.94, + "learning_rate": 8.562906223523603e-06, + "loss": 0.0103, + "step": 86800 + }, + { + "epoch": 524.06, + "learning_rate": 8.53780673699981e-06, + "loss": 0.01, + "step": 86820 + }, + { + "epoch": 524.19, + "learning_rate": 8.512740655816232e-06, + "loss": 0.0098, + "step": 86840 + }, + { + "epoch": 524.31, + "learning_rate": 8.487708000168165e-06, + "loss": 0.0099, + "step": 86860 + }, + { + "epoch": 524.43, + "learning_rate": 8.46270879022394e-06, + "loss": 0.01, + "step": 86880 + }, + { + "epoch": 524.55, + "learning_rate": 8.437743046125013e-06, + "loss": 0.0101, + "step": 86900 + }, + { + "epoch": 524.67, + "learning_rate": 8.412810787985797e-06, + "loss": 0.0101, + "step": 86920 + }, + { + "epoch": 524.79, + "learning_rate": 8.387912035893774e-06, + "loss": 0.0102, + "step": 86940 + }, + { + "epoch": 524.91, + "learning_rate": 8.36304680990942e-06, + "loss": 0.0102, + "step": 86960 + }, + { + "epoch": 525.03, + "learning_rate": 8.338215130066195e-06, + "loss": 0.0102, + "step": 86980 + }, + { + "epoch": 525.15, + "learning_rate": 8.313417016370556e-06, + "loss": 0.0097, + "step": 87000 + }, + { + "epoch": 525.15, + "eval_accuracy": 0.001259495609052563, + "eval_loss": 23.796077728271484, + "eval_runtime": 18.2469, + "eval_samples_per_second": 111.8, + "eval_steps_per_second": 2.357, + "step": 87000 + }, + { + "epoch": 525.27, + "learning_rate": 8.288652488801858e-06, + "loss": 0.0098, + "step": 87020 + }, + { + "epoch": 525.39, + "learning_rate": 8.263921567312454e-06, + "loss": 0.0099, + "step": 87040 + }, + { + "epoch": 525.51, + "learning_rate": 8.2392242718276e-06, + "loss": 0.01, + "step": 87060 + }, + { + "epoch": 525.63, + "learning_rate": 8.21456062224546e-06, + "loss": 0.0102, + "step": 87080 + }, + { + "epoch": 525.75, + "learning_rate": 8.189930638437094e-06, + "loss": 0.0101, + "step": 87100 + }, + { + "epoch": 525.88, + "learning_rate": 8.165334340246427e-06, + "loss": 0.0103, + "step": 87120 + }, + { + "epoch": 526.0, + "learning_rate": 8.140771747490273e-06, + "loss": 0.0102, + "step": 87140 + }, + { + "epoch": 526.12, + "learning_rate": 8.116242879958236e-06, + "loss": 0.0098, + "step": 87160 + }, + { + "epoch": 526.24, + "learning_rate": 8.091747757412804e-06, + "loss": 0.0098, + "step": 87180 + }, + { + "epoch": 526.36, + "learning_rate": 8.067286399589246e-06, + "loss": 0.0099, + "step": 87200 + }, + { + "epoch": 526.48, + "learning_rate": 8.042858826195648e-06, + "loss": 0.01, + "step": 87220 + }, + { + "epoch": 526.6, + "learning_rate": 8.01846505691286e-06, + "loss": 0.01, + "step": 87240 + }, + { + "epoch": 526.72, + "learning_rate": 7.99410511139448e-06, + "loss": 0.0102, + "step": 87260 + }, + { + "epoch": 526.84, + "learning_rate": 7.969779009266915e-06, + "loss": 0.0102, + "step": 87280 + }, + { + "epoch": 526.96, + "learning_rate": 7.945486770129234e-06, + "loss": 0.0103, + "step": 87300 + }, + { + "epoch": 527.08, + "learning_rate": 7.921228413553272e-06, + "loss": 0.0098, + "step": 87320 + }, + { + "epoch": 527.2, + "learning_rate": 7.897003959083538e-06, + "loss": 0.0098, + "step": 87340 + }, + { + "epoch": 527.32, + "learning_rate": 7.87281342623724e-06, + "loss": 0.0099, + "step": 87360 + }, + { + "epoch": 527.44, + "learning_rate": 7.848656834504276e-06, + "loss": 0.01, + "step": 87380 + }, + { + "epoch": 527.57, + "learning_rate": 7.824534203347122e-06, + "loss": 0.01, + "step": 87400 + }, + { + "epoch": 527.69, + "learning_rate": 7.800445552201013e-06, + "loss": 0.0102, + "step": 87420 + }, + { + "epoch": 527.81, + "learning_rate": 7.77639090047369e-06, + "loss": 0.0102, + "step": 87440 + }, + { + "epoch": 527.93, + "learning_rate": 7.752370267545584e-06, + "loss": 0.0103, + "step": 87460 + }, + { + "epoch": 528.05, + "learning_rate": 7.728383672769641e-06, + "loss": 0.0099, + "step": 87480 + }, + { + "epoch": 528.17, + "learning_rate": 7.704431135471473e-06, + "loss": 0.0097, + "step": 87500 + }, + { + "epoch": 528.29, + "learning_rate": 7.680512674949197e-06, + "loss": 0.0099, + "step": 87520 + }, + { + "epoch": 528.41, + "learning_rate": 7.656628310473468e-06, + "loss": 0.0099, + "step": 87540 + }, + { + "epoch": 528.53, + "learning_rate": 7.632778061287493e-06, + "loss": 0.01, + "step": 87560 + }, + { + "epoch": 528.65, + "learning_rate": 7.608961946606996e-06, + "loss": 0.0101, + "step": 87580 + }, + { + "epoch": 528.77, + "learning_rate": 7.5851799856201945e-06, + "loss": 0.0101, + "step": 87600 + }, + { + "epoch": 528.89, + "learning_rate": 7.56143219748775e-06, + "loss": 0.0102, + "step": 87620 + }, + { + "epoch": 529.01, + "learning_rate": 7.537718601342858e-06, + "loss": 0.0102, + "step": 87640 + }, + { + "epoch": 529.13, + "learning_rate": 7.514039216291147e-06, + "loss": 0.0097, + "step": 87660 + }, + { + "epoch": 529.26, + "learning_rate": 7.490394061410638e-06, + "loss": 0.0098, + "step": 87680 + }, + { + "epoch": 529.38, + "learning_rate": 7.4667831557518165e-06, + "loss": 0.0099, + "step": 87700 + }, + { + "epoch": 529.5, + "learning_rate": 7.443206518337564e-06, + "loss": 0.0101, + "step": 87720 + }, + { + "epoch": 529.62, + "learning_rate": 7.419664168163165e-06, + "loss": 0.01, + "step": 87740 + }, + { + "epoch": 529.74, + "learning_rate": 7.396156124196241e-06, + "loss": 0.0101, + "step": 87760 + }, + { + "epoch": 529.86, + "learning_rate": 7.372682405376807e-06, + "loss": 0.0102, + "step": 87780 + }, + { + "epoch": 529.98, + "learning_rate": 7.34924303061722e-06, + "loss": 0.0102, + "step": 87800 + }, + { + "epoch": 530.1, + "learning_rate": 7.325838018802156e-06, + "loss": 0.0097, + "step": 87820 + }, + { + "epoch": 530.22, + "learning_rate": 7.302467388788614e-06, + "loss": 0.0098, + "step": 87840 + }, + { + "epoch": 530.34, + "learning_rate": 7.279131159405888e-06, + "loss": 0.0099, + "step": 87860 + }, + { + "epoch": 530.46, + "learning_rate": 7.255829349455567e-06, + "loss": 0.01, + "step": 87880 + }, + { + "epoch": 530.58, + "learning_rate": 7.232561977711472e-06, + "loss": 0.01, + "step": 87900 + }, + { + "epoch": 530.7, + "learning_rate": 7.209329062919723e-06, + "loss": 0.0102, + "step": 87920 + }, + { + "epoch": 530.82, + "learning_rate": 7.186130623798648e-06, + "loss": 0.0101, + "step": 87940 + }, + { + "epoch": 530.95, + "learning_rate": 7.1629666790388236e-06, + "loss": 0.0102, + "step": 87960 + }, + { + "epoch": 531.07, + "learning_rate": 7.139837247303028e-06, + "loss": 0.0099, + "step": 87980 + }, + { + "epoch": 531.19, + "learning_rate": 7.11674234722619e-06, + "loss": 0.0097, + "step": 88000 + }, + { + "epoch": 531.19, + "eval_accuracy": 0.0012550503304323774, + "eval_loss": 23.857345581054688, + "eval_runtime": 18.1585, + "eval_samples_per_second": 112.344, + "eval_steps_per_second": 2.368, + "step": 88000 + }, + { + "epoch": 531.31, + "learning_rate": 7.093681997415508e-06, + "loss": 0.0099, + "step": 88020 + }, + { + "epoch": 531.43, + "learning_rate": 7.070656216450239e-06, + "loss": 0.0099, + "step": 88040 + }, + { + "epoch": 531.55, + "learning_rate": 7.047665022881866e-06, + "loss": 0.01, + "step": 88060 + }, + { + "epoch": 531.67, + "learning_rate": 7.0247084352339675e-06, + "loss": 0.0101, + "step": 88080 + }, + { + "epoch": 531.79, + "learning_rate": 7.001786472002259e-06, + "loss": 0.0101, + "step": 88100 + }, + { + "epoch": 531.91, + "learning_rate": 6.978899151654555e-06, + "loss": 0.0102, + "step": 88120 + }, + { + "epoch": 532.03, + "learning_rate": 6.95604649263073e-06, + "loss": 0.01, + "step": 88140 + }, + { + "epoch": 532.15, + "learning_rate": 6.933228513342804e-06, + "loss": 0.0096, + "step": 88160 + }, + { + "epoch": 532.27, + "learning_rate": 6.910445232174772e-06, + "loss": 0.0098, + "step": 88180 + }, + { + "epoch": 532.39, + "learning_rate": 6.887696667482729e-06, + "loss": 0.0099, + "step": 88200 + }, + { + "epoch": 532.52, + "learning_rate": 6.8649828375947745e-06, + "loss": 0.01, + "step": 88220 + }, + { + "epoch": 532.64, + "learning_rate": 6.8423037608110415e-06, + "loss": 0.01, + "step": 88240 + }, + { + "epoch": 532.76, + "learning_rate": 6.8196594554036545e-06, + "loss": 0.0101, + "step": 88260 + }, + { + "epoch": 532.88, + "learning_rate": 6.797049939616701e-06, + "loss": 0.0102, + "step": 88280 + }, + { + "epoch": 533.0, + "learning_rate": 6.774475231666272e-06, + "loss": 0.0102, + "step": 88300 + }, + { + "epoch": 533.12, + "learning_rate": 6.751935349740407e-06, + "loss": 0.0096, + "step": 88320 + }, + { + "epoch": 533.24, + "learning_rate": 6.729430311999085e-06, + "loss": 0.0097, + "step": 88340 + }, + { + "epoch": 533.36, + "learning_rate": 6.706960136574175e-06, + "loss": 0.0099, + "step": 88360 + }, + { + "epoch": 533.48, + "learning_rate": 6.684524841569534e-06, + "loss": 0.0099, + "step": 88380 + }, + { + "epoch": 533.6, + "learning_rate": 6.662124445060863e-06, + "loss": 0.01, + "step": 88400 + }, + { + "epoch": 533.72, + "learning_rate": 6.639758965095744e-06, + "loss": 0.01, + "step": 88420 + }, + { + "epoch": 533.84, + "learning_rate": 6.617428419693639e-06, + "loss": 0.0102, + "step": 88440 + }, + { + "epoch": 533.96, + "learning_rate": 6.595132826845879e-06, + "loss": 0.0102, + "step": 88460 + }, + { + "epoch": 534.08, + "learning_rate": 6.5728722045156285e-06, + "loss": 0.0099, + "step": 88480 + }, + { + "epoch": 534.21, + "learning_rate": 6.550646570637836e-06, + "loss": 0.0097, + "step": 88500 + }, + { + "epoch": 534.33, + "learning_rate": 6.528455943119305e-06, + "loss": 0.0098, + "step": 88520 + }, + { + "epoch": 534.45, + "learning_rate": 6.506300339838656e-06, + "loss": 0.0099, + "step": 88540 + }, + { + "epoch": 534.57, + "learning_rate": 6.484179778646216e-06, + "loss": 0.0099, + "step": 88560 + }, + { + "epoch": 534.69, + "learning_rate": 6.462094277364139e-06, + "loss": 0.0101, + "step": 88580 + }, + { + "epoch": 534.81, + "learning_rate": 6.440043853786315e-06, + "loss": 0.0101, + "step": 88600 + }, + { + "epoch": 534.93, + "learning_rate": 6.418028525678382e-06, + "loss": 0.0102, + "step": 88620 + }, + { + "epoch": 535.05, + "learning_rate": 6.396048310777669e-06, + "loss": 0.01, + "step": 88640 + }, + { + "epoch": 535.17, + "learning_rate": 6.374103226793243e-06, + "loss": 0.0097, + "step": 88660 + }, + { + "epoch": 535.29, + "learning_rate": 6.352193291405883e-06, + "loss": 0.0097, + "step": 88680 + }, + { + "epoch": 535.41, + "learning_rate": 6.330318522268008e-06, + "loss": 0.0099, + "step": 88700 + }, + { + "epoch": 535.53, + "learning_rate": 6.308478937003731e-06, + "loss": 0.01, + "step": 88720 + }, + { + "epoch": 535.65, + "learning_rate": 6.28667455320881e-06, + "loss": 0.0099, + "step": 88740 + }, + { + "epoch": 535.77, + "learning_rate": 6.264905388450659e-06, + "loss": 0.0101, + "step": 88760 + }, + { + "epoch": 535.9, + "learning_rate": 6.2431714602682714e-06, + "loss": 0.0101, + "step": 88780 + }, + { + "epoch": 536.02, + "learning_rate": 6.221472786172294e-06, + "loss": 0.0101, + "step": 88800 + }, + { + "epoch": 536.14, + "learning_rate": 6.199809383644956e-06, + "loss": 0.0096, + "step": 88820 + }, + { + "epoch": 536.26, + "learning_rate": 6.178181270140077e-06, + "loss": 0.0098, + "step": 88840 + }, + { + "epoch": 536.38, + "learning_rate": 6.156588463083035e-06, + "loss": 0.0098, + "step": 88860 + }, + { + "epoch": 536.5, + "learning_rate": 6.135030979870743e-06, + "loss": 0.0099, + "step": 88880 + }, + { + "epoch": 536.62, + "learning_rate": 6.113508837871718e-06, + "loss": 0.01, + "step": 88900 + }, + { + "epoch": 536.74, + "learning_rate": 6.092022054425928e-06, + "loss": 0.0101, + "step": 88920 + }, + { + "epoch": 536.86, + "learning_rate": 6.070570646844886e-06, + "loss": 0.01, + "step": 88940 + }, + { + "epoch": 536.98, + "learning_rate": 6.049154632411624e-06, + "loss": 0.0102, + "step": 88960 + }, + { + "epoch": 537.1, + "learning_rate": 6.027774028380623e-06, + "loss": 0.0097, + "step": 88980 + }, + { + "epoch": 537.22, + "learning_rate": 6.0064288519778635e-06, + "loss": 0.0097, + "step": 89000 + }, + { + "epoch": 537.22, + "eval_accuracy": 0.0012664104869061848, + "eval_loss": 23.905231475830078, + "eval_runtime": 18.1439, + "eval_samples_per_second": 112.435, + "eval_steps_per_second": 2.37, + "step": 89000 + }, + { + "epoch": 537.34, + "learning_rate": 5.985119120400745e-06, + "loss": 0.0099, + "step": 89020 + }, + { + "epoch": 537.46, + "learning_rate": 5.963844850818151e-06, + "loss": 0.0099, + "step": 89040 + }, + { + "epoch": 537.59, + "learning_rate": 5.94260606037037e-06, + "loss": 0.01, + "step": 89060 + }, + { + "epoch": 537.71, + "learning_rate": 5.921402766169126e-06, + "loss": 0.01, + "step": 89080 + }, + { + "epoch": 537.83, + "learning_rate": 5.900234985297498e-06, + "loss": 0.0101, + "step": 89100 + }, + { + "epoch": 537.95, + "learning_rate": 5.879102734810016e-06, + "loss": 0.0102, + "step": 89120 + }, + { + "epoch": 538.07, + "learning_rate": 5.858006031732549e-06, + "loss": 0.0098, + "step": 89140 + }, + { + "epoch": 538.19, + "learning_rate": 5.836944893062318e-06, + "loss": 0.0097, + "step": 89160 + }, + { + "epoch": 538.31, + "learning_rate": 5.8159193357679e-06, + "loss": 0.0097, + "step": 89180 + }, + { + "epoch": 538.43, + "learning_rate": 5.794929376789215e-06, + "loss": 0.0099, + "step": 89200 + }, + { + "epoch": 538.55, + "learning_rate": 5.773975033037499e-06, + "loss": 0.0099, + "step": 89220 + }, + { + "epoch": 538.67, + "learning_rate": 5.753056321395267e-06, + "loss": 0.01, + "step": 89240 + }, + { + "epoch": 538.79, + "learning_rate": 5.732173258716366e-06, + "loss": 0.0102, + "step": 89260 + }, + { + "epoch": 538.91, + "learning_rate": 5.711325861825906e-06, + "loss": 0.01, + "step": 89280 + }, + { + "epoch": 539.03, + "learning_rate": 5.690514147520243e-06, + "loss": 0.01, + "step": 89300 + }, + { + "epoch": 539.15, + "learning_rate": 5.66973813256701e-06, + "loss": 0.0096, + "step": 89320 + }, + { + "epoch": 539.28, + "learning_rate": 5.6489978337050555e-06, + "loss": 0.0098, + "step": 89340 + }, + { + "epoch": 539.4, + "learning_rate": 5.6282932676444856e-06, + "loss": 0.0098, + "step": 89360 + }, + { + "epoch": 539.52, + "learning_rate": 5.607624451066568e-06, + "loss": 0.0098, + "step": 89380 + }, + { + "epoch": 539.64, + "learning_rate": 5.586991400623798e-06, + "loss": 0.01, + "step": 89400 + }, + { + "epoch": 539.76, + "learning_rate": 5.566394132939884e-06, + "loss": 0.01, + "step": 89420 + }, + { + "epoch": 539.88, + "learning_rate": 5.54583266460964e-06, + "loss": 0.0101, + "step": 89440 + }, + { + "epoch": 540.0, + "learning_rate": 5.525307012199077e-06, + "loss": 0.0101, + "step": 89460 + }, + { + "epoch": 540.12, + "learning_rate": 5.504817192245343e-06, + "loss": 0.0096, + "step": 89480 + }, + { + "epoch": 540.24, + "learning_rate": 5.484363221256733e-06, + "loss": 0.0097, + "step": 89500 + }, + { + "epoch": 540.36, + "learning_rate": 5.463945115712609e-06, + "loss": 0.0098, + "step": 89520 + }, + { + "epoch": 540.48, + "learning_rate": 5.443562892063497e-06, + "loss": 0.0099, + "step": 89540 + }, + { + "epoch": 540.6, + "learning_rate": 5.423216566730971e-06, + "loss": 0.0099, + "step": 89560 + }, + { + "epoch": 540.72, + "learning_rate": 5.4029061561077064e-06, + "loss": 0.01, + "step": 89580 + }, + { + "epoch": 540.85, + "learning_rate": 5.382631676557437e-06, + "loss": 0.0101, + "step": 89600 + }, + { + "epoch": 540.97, + "learning_rate": 5.3623931444149235e-06, + "loss": 0.0101, + "step": 89620 + }, + { + "epoch": 541.09, + "learning_rate": 5.342190575986022e-06, + "loss": 0.0097, + "step": 89640 + }, + { + "epoch": 541.21, + "learning_rate": 5.322023987547547e-06, + "loss": 0.0096, + "step": 89660 + }, + { + "epoch": 541.33, + "learning_rate": 5.301893395347363e-06, + "loss": 0.0098, + "step": 89680 + }, + { + "epoch": 541.45, + "learning_rate": 5.281798815604327e-06, + "loss": 0.0099, + "step": 89700 + }, + { + "epoch": 541.57, + "learning_rate": 5.261740264508275e-06, + "loss": 0.0099, + "step": 89720 + }, + { + "epoch": 541.69, + "learning_rate": 5.2417177582200325e-06, + "loss": 0.01, + "step": 89740 + }, + { + "epoch": 541.81, + "learning_rate": 5.2217313128713415e-06, + "loss": 0.0099, + "step": 89760 + }, + { + "epoch": 541.93, + "learning_rate": 5.20178094456496e-06, + "loss": 0.01, + "step": 89780 + }, + { + "epoch": 542.05, + "learning_rate": 5.1818666693745076e-06, + "loss": 0.0098, + "step": 89800 + }, + { + "epoch": 542.17, + "learning_rate": 5.161988503344561e-06, + "loss": 0.0096, + "step": 89820 + }, + { + "epoch": 542.29, + "learning_rate": 5.1421464624906155e-06, + "loss": 0.0097, + "step": 89840 + }, + { + "epoch": 542.41, + "learning_rate": 5.122340562799027e-06, + "loss": 0.0098, + "step": 89860 + }, + { + "epoch": 542.54, + "learning_rate": 5.1025708202270765e-06, + "loss": 0.0098, + "step": 89880 + }, + { + "epoch": 542.66, + "learning_rate": 5.0828372507028545e-06, + "loss": 0.01, + "step": 89900 + }, + { + "epoch": 542.78, + "learning_rate": 5.063139870125367e-06, + "loss": 0.0101, + "step": 89920 + }, + { + "epoch": 542.9, + "learning_rate": 5.043478694364423e-06, + "loss": 0.01, + "step": 89940 + }, + { + "epoch": 543.02, + "learning_rate": 5.023853739260681e-06, + "loss": 0.0099, + "step": 89960 + }, + { + "epoch": 543.14, + "learning_rate": 5.0042650206256146e-06, + "loss": 0.0096, + "step": 89980 + }, + { + "epoch": 543.26, + "learning_rate": 4.9847125542415055e-06, + "loss": 0.0097, + "step": 90000 + }, + { + "epoch": 543.26, + "eval_accuracy": 0.001261471288439312, + "eval_loss": 23.952375411987305, + "eval_runtime": 18.3553, + "eval_samples_per_second": 111.139, + "eval_steps_per_second": 2.343, + "step": 90000 + }, + { + "epoch": 543.38, + "learning_rate": 4.965196355861423e-06, + "loss": 0.0098, + "step": 90020 + }, + { + "epoch": 543.5, + "learning_rate": 4.9457164412092025e-06, + "loss": 0.0098, + "step": 90040 + }, + { + "epoch": 543.62, + "learning_rate": 4.926272825979466e-06, + "loss": 0.0099, + "step": 90060 + }, + { + "epoch": 543.74, + "learning_rate": 4.906865525837589e-06, + "loss": 0.01, + "step": 90080 + }, + { + "epoch": 543.86, + "learning_rate": 4.887494556419675e-06, + "loss": 0.0099, + "step": 90100 + }, + { + "epoch": 543.98, + "learning_rate": 4.868159933332572e-06, + "loss": 0.01, + "step": 90120 + }, + { + "epoch": 544.1, + "learning_rate": 4.8488616721538205e-06, + "loss": 0.0096, + "step": 90140 + }, + { + "epoch": 544.23, + "learning_rate": 4.82959978843171e-06, + "loss": 0.0096, + "step": 90160 + }, + { + "epoch": 544.35, + "learning_rate": 4.810374297685161e-06, + "loss": 0.0097, + "step": 90180 + }, + { + "epoch": 544.47, + "learning_rate": 4.791185215403821e-06, + "loss": 0.0098, + "step": 90200 + }, + { + "epoch": 544.59, + "learning_rate": 4.772032557047984e-06, + "loss": 0.0099, + "step": 90220 + }, + { + "epoch": 544.71, + "learning_rate": 4.7529163380486074e-06, + "loss": 0.01, + "step": 90240 + }, + { + "epoch": 544.83, + "learning_rate": 4.7338365738072655e-06, + "loss": 0.0099, + "step": 90260 + }, + { + "epoch": 544.95, + "learning_rate": 4.714793279696189e-06, + "loss": 0.01, + "step": 90280 + }, + { + "epoch": 545.07, + "learning_rate": 4.695786471058233e-06, + "loss": 0.0097, + "step": 90300 + }, + { + "epoch": 545.19, + "learning_rate": 4.676816163206815e-06, + "loss": 0.0096, + "step": 90320 + }, + { + "epoch": 545.31, + "learning_rate": 4.657882371425987e-06, + "loss": 0.0096, + "step": 90340 + }, + { + "epoch": 545.43, + "learning_rate": 4.638985110970339e-06, + "loss": 0.0097, + "step": 90360 + }, + { + "epoch": 545.55, + "learning_rate": 4.62012439706509e-06, + "loss": 0.0099, + "step": 90380 + }, + { + "epoch": 545.67, + "learning_rate": 4.601300244905943e-06, + "loss": 0.0099, + "step": 90400 + }, + { + "epoch": 545.79, + "learning_rate": 4.582512669659189e-06, + "loss": 0.0099, + "step": 90420 + }, + { + "epoch": 545.92, + "learning_rate": 4.563761686461638e-06, + "loss": 0.01, + "step": 90440 + }, + { + "epoch": 546.04, + "learning_rate": 4.545047310420619e-06, + "loss": 0.0098, + "step": 90460 + }, + { + "epoch": 546.16, + "learning_rate": 4.52636955661398e-06, + "loss": 0.0095, + "step": 90480 + }, + { + "epoch": 546.28, + "learning_rate": 4.507728440090014e-06, + "loss": 0.0096, + "step": 90500 + }, + { + "epoch": 546.4, + "learning_rate": 4.489123975867576e-06, + "loss": 0.0097, + "step": 90520 + }, + { + "epoch": 546.52, + "learning_rate": 4.47055617893592e-06, + "loss": 0.0098, + "step": 90540 + }, + { + "epoch": 546.64, + "learning_rate": 4.4520250642547835e-06, + "loss": 0.01, + "step": 90560 + }, + { + "epoch": 546.76, + "learning_rate": 4.433530646754364e-06, + "loss": 0.0099, + "step": 90580 + }, + { + "epoch": 546.88, + "learning_rate": 4.415072941335269e-06, + "loss": 0.01, + "step": 90600 + }, + { + "epoch": 547.0, + "learning_rate": 4.396651962868553e-06, + "loss": 0.01, + "step": 90620 + }, + { + "epoch": 547.12, + "learning_rate": 4.378267726195645e-06, + "loss": 0.0095, + "step": 90640 + }, + { + "epoch": 547.24, + "learning_rate": 4.359920246128402e-06, + "loss": 0.0096, + "step": 90660 + }, + { + "epoch": 547.36, + "learning_rate": 4.34160953744906e-06, + "loss": 0.0097, + "step": 90680 + }, + { + "epoch": 547.48, + "learning_rate": 4.323335614910224e-06, + "loss": 0.0097, + "step": 90700 + }, + { + "epoch": 547.61, + "learning_rate": 4.30509849323486e-06, + "loss": 0.0099, + "step": 90720 + }, + { + "epoch": 547.73, + "learning_rate": 4.286898187116295e-06, + "loss": 0.0099, + "step": 90740 + }, + { + "epoch": 547.85, + "learning_rate": 4.268734711218192e-06, + "loss": 0.0099, + "step": 90760 + }, + { + "epoch": 547.97, + "learning_rate": 4.250608080174512e-06, + "loss": 0.01, + "step": 90780 + }, + { + "epoch": 548.09, + "learning_rate": 4.232518308589573e-06, + "loss": 0.0096, + "step": 90800 + }, + { + "epoch": 548.21, + "learning_rate": 4.214465411037971e-06, + "loss": 0.0095, + "step": 90820 + }, + { + "epoch": 548.33, + "learning_rate": 4.196449402064606e-06, + "loss": 0.0097, + "step": 90840 + }, + { + "epoch": 548.45, + "learning_rate": 4.17847029618465e-06, + "loss": 0.0097, + "step": 90860 + }, + { + "epoch": 548.57, + "learning_rate": 4.160528107883527e-06, + "loss": 0.0098, + "step": 90880 + }, + { + "epoch": 548.69, + "learning_rate": 4.142622851616962e-06, + "loss": 0.0099, + "step": 90900 + }, + { + "epoch": 548.81, + "learning_rate": 4.1247545418108715e-06, + "loss": 0.0099, + "step": 90920 + }, + { + "epoch": 548.93, + "learning_rate": 4.106923192861445e-06, + "loss": 0.01, + "step": 90940 + }, + { + "epoch": 549.05, + "learning_rate": 4.089128819135069e-06, + "loss": 0.0097, + "step": 90960 + }, + { + "epoch": 549.18, + "learning_rate": 4.071371434968352e-06, + "loss": 0.0095, + "step": 90980 + }, + { + "epoch": 549.3, + "learning_rate": 4.053651054668112e-06, + "loss": 0.0096, + "step": 91000 + }, + { + "epoch": 549.3, + "eval_accuracy": 0.0012683861662929339, + "eval_loss": 23.982318878173828, + "eval_runtime": 18.2415, + "eval_samples_per_second": 111.833, + "eval_steps_per_second": 2.357, + "step": 91000 + }, + { + "epoch": 549.42, + "learning_rate": 4.035967692511311e-06, + "loss": 0.0097, + "step": 91020 + }, + { + "epoch": 549.54, + "learning_rate": 4.018321362745142e-06, + "loss": 0.0098, + "step": 91040 + }, + { + "epoch": 549.66, + "learning_rate": 4.000712079586916e-06, + "loss": 0.0098, + "step": 91060 + }, + { + "epoch": 549.78, + "learning_rate": 3.983139857224122e-06, + "loss": 0.0099, + "step": 91080 + }, + { + "epoch": 549.9, + "learning_rate": 3.96560470981438e-06, + "loss": 0.0098, + "step": 91100 + }, + { + "epoch": 550.02, + "learning_rate": 3.948106651485439e-06, + "loss": 0.0099, + "step": 91120 + }, + { + "epoch": 550.14, + "learning_rate": 3.9306456963351835e-06, + "loss": 0.0095, + "step": 91140 + }, + { + "epoch": 550.26, + "learning_rate": 3.9132218584315704e-06, + "loss": 0.0095, + "step": 91160 + }, + { + "epoch": 550.38, + "learning_rate": 3.895835151812677e-06, + "loss": 0.0097, + "step": 91180 + }, + { + "epoch": 550.5, + "learning_rate": 3.8784855904866635e-06, + "loss": 0.0097, + "step": 91200 + }, + { + "epoch": 550.62, + "learning_rate": 3.86117318843176e-06, + "loss": 0.0098, + "step": 91220 + }, + { + "epoch": 550.74, + "learning_rate": 3.843897959596238e-06, + "loss": 0.0098, + "step": 91240 + }, + { + "epoch": 550.87, + "learning_rate": 3.826659917898457e-06, + "loss": 0.0099, + "step": 91260 + }, + { + "epoch": 550.99, + "learning_rate": 3.8094590772268013e-06, + "loss": 0.0099, + "step": 91280 + }, + { + "epoch": 551.11, + "learning_rate": 3.7922954514396535e-06, + "loss": 0.0095, + "step": 91300 + }, + { + "epoch": 551.23, + "learning_rate": 3.775169054365446e-06, + "loss": 0.0095, + "step": 91320 + }, + { + "epoch": 551.35, + "learning_rate": 3.758079899802619e-06, + "loss": 0.0096, + "step": 91340 + }, + { + "epoch": 551.47, + "learning_rate": 3.7410280015195866e-06, + "loss": 0.0097, + "step": 91360 + }, + { + "epoch": 551.59, + "learning_rate": 3.724013373254753e-06, + "loss": 0.0098, + "step": 91380 + }, + { + "epoch": 551.71, + "learning_rate": 3.707036028716493e-06, + "loss": 0.0098, + "step": 91400 + }, + { + "epoch": 551.83, + "learning_rate": 3.6900959815831692e-06, + "loss": 0.0098, + "step": 91420 + }, + { + "epoch": 551.95, + "learning_rate": 3.6731932455030425e-06, + "loss": 0.0099, + "step": 91440 + }, + { + "epoch": 552.07, + "learning_rate": 3.656327834094364e-06, + "loss": 0.0096, + "step": 91460 + }, + { + "epoch": 552.19, + "learning_rate": 3.6394997609452752e-06, + "loss": 0.0094, + "step": 91480 + }, + { + "epoch": 552.31, + "learning_rate": 3.6227090396138697e-06, + "loss": 0.0096, + "step": 91500 + }, + { + "epoch": 552.43, + "learning_rate": 3.605955683628104e-06, + "loss": 0.0097, + "step": 91520 + }, + { + "epoch": 552.56, + "learning_rate": 3.5892397064858697e-06, + "loss": 0.0098, + "step": 91540 + }, + { + "epoch": 552.68, + "learning_rate": 3.572561121654927e-06, + "loss": 0.0097, + "step": 91560 + }, + { + "epoch": 552.8, + "learning_rate": 3.555919942572905e-06, + "loss": 0.0098, + "step": 91580 + }, + { + "epoch": 552.92, + "learning_rate": 3.5393161826473066e-06, + "loss": 0.0098, + "step": 91600 + }, + { + "epoch": 553.04, + "learning_rate": 3.5227498552554807e-06, + "loss": 0.0098, + "step": 91620 + }, + { + "epoch": 553.16, + "learning_rate": 3.5062209737446226e-06, + "loss": 0.0094, + "step": 91640 + }, + { + "epoch": 553.28, + "learning_rate": 3.489729551431742e-06, + "loss": 0.0095, + "step": 91660 + }, + { + "epoch": 553.4, + "learning_rate": 3.4732756016036814e-06, + "loss": 0.0097, + "step": 91680 + }, + { + "epoch": 553.52, + "learning_rate": 3.4568591375170935e-06, + "loss": 0.0097, + "step": 91700 + }, + { + "epoch": 553.64, + "learning_rate": 3.440480172398436e-06, + "loss": 0.0098, + "step": 91720 + }, + { + "epoch": 553.76, + "learning_rate": 3.4241387194439367e-06, + "loss": 0.0098, + "step": 91740 + }, + { + "epoch": 553.88, + "learning_rate": 3.407834791819603e-06, + "loss": 0.0098, + "step": 91760 + }, + { + "epoch": 554.0, + "learning_rate": 3.3915684026612327e-06, + "loss": 0.0099, + "step": 91780 + }, + { + "epoch": 554.12, + "learning_rate": 3.3753395650743423e-06, + "loss": 0.0094, + "step": 91800 + }, + { + "epoch": 554.25, + "learning_rate": 3.3591482921342234e-06, + "loss": 0.0095, + "step": 91820 + }, + { + "epoch": 554.37, + "learning_rate": 3.3429945968858956e-06, + "loss": 0.0097, + "step": 91840 + }, + { + "epoch": 554.49, + "learning_rate": 3.326878492344088e-06, + "loss": 0.0096, + "step": 91860 + }, + { + "epoch": 554.61, + "learning_rate": 3.3107999914932686e-06, + "loss": 0.0097, + "step": 91880 + }, + { + "epoch": 554.73, + "learning_rate": 3.2947591072875814e-06, + "loss": 0.0098, + "step": 91900 + }, + { + "epoch": 554.85, + "learning_rate": 3.2787558526508887e-06, + "loss": 0.0097, + "step": 91920 + }, + { + "epoch": 554.97, + "learning_rate": 3.2627902404767153e-06, + "loss": 0.0099, + "step": 91940 + }, + { + "epoch": 555.09, + "learning_rate": 3.246862283628277e-06, + "loss": 0.0095, + "step": 91960 + }, + { + "epoch": 555.21, + "learning_rate": 3.230971994938414e-06, + "loss": 0.0095, + "step": 91980 + }, + { + "epoch": 555.33, + "learning_rate": 3.215119387209681e-06, + "loss": 0.0096, + "step": 92000 + }, + { + "epoch": 555.33, + "eval_accuracy": 0.0012604834487459375, + "eval_loss": 24.008394241333008, + "eval_runtime": 18.2132, + "eval_samples_per_second": 112.007, + "eval_steps_per_second": 2.361, + "step": 92000 + }, + { + "epoch": 555.45, + "learning_rate": 3.1993044732142206e-06, + "loss": 0.0096, + "step": 92020 + }, + { + "epoch": 555.57, + "learning_rate": 3.1835272656938197e-06, + "loss": 0.0097, + "step": 92040 + }, + { + "epoch": 555.69, + "learning_rate": 3.1677877773598984e-06, + "loss": 0.0097, + "step": 92060 + }, + { + "epoch": 555.81, + "learning_rate": 3.152086020893469e-06, + "loss": 0.0098, + "step": 92080 + }, + { + "epoch": 555.94, + "learning_rate": 3.1364220089451667e-06, + "loss": 0.0098, + "step": 92100 + }, + { + "epoch": 556.06, + "learning_rate": 3.120795754135175e-06, + "loss": 0.0096, + "step": 92120 + }, + { + "epoch": 556.18, + "learning_rate": 3.1052072690533173e-06, + "loss": 0.0094, + "step": 92140 + }, + { + "epoch": 556.3, + "learning_rate": 3.089656566258942e-06, + "loss": 0.0095, + "step": 92160 + }, + { + "epoch": 556.42, + "learning_rate": 3.0741436582809703e-06, + "loss": 0.0096, + "step": 92180 + }, + { + "epoch": 556.54, + "learning_rate": 3.0586685576178608e-06, + "loss": 0.0097, + "step": 92200 + }, + { + "epoch": 556.66, + "learning_rate": 3.0432312767376402e-06, + "loss": 0.0097, + "step": 92220 + }, + { + "epoch": 556.78, + "learning_rate": 3.0278318280778426e-06, + "loss": 0.0097, + "step": 92240 + }, + { + "epoch": 556.9, + "learning_rate": 3.012470224045516e-06, + "loss": 0.0098, + "step": 92260 + }, + { + "epoch": 557.02, + "learning_rate": 2.997146477017221e-06, + "loss": 0.0098, + "step": 92280 + }, + { + "epoch": 557.14, + "learning_rate": 2.9818605993390593e-06, + "loss": 0.0094, + "step": 92300 + }, + { + "epoch": 557.26, + "learning_rate": 2.9666126033265517e-06, + "loss": 0.0095, + "step": 92320 + }, + { + "epoch": 557.38, + "learning_rate": 2.951402501264744e-06, + "loss": 0.0096, + "step": 92340 + }, + { + "epoch": 557.51, + "learning_rate": 2.9362303054081387e-06, + "loss": 0.0096, + "step": 92360 + }, + { + "epoch": 557.63, + "learning_rate": 2.9210960279807133e-06, + "loss": 0.0097, + "step": 92380 + }, + { + "epoch": 557.75, + "learning_rate": 2.9059996811758637e-06, + "loss": 0.0096, + "step": 92400 + }, + { + "epoch": 557.87, + "learning_rate": 2.8909412771564502e-06, + "loss": 0.0098, + "step": 92420 + }, + { + "epoch": 557.99, + "learning_rate": 2.875920828054757e-06, + "loss": 0.0098, + "step": 92440 + }, + { + "epoch": 558.11, + "learning_rate": 2.8609383459724913e-06, + "loss": 0.0094, + "step": 92460 + }, + { + "epoch": 558.23, + "learning_rate": 2.8459938429807763e-06, + "loss": 0.0094, + "step": 92480 + }, + { + "epoch": 558.35, + "learning_rate": 2.8310873311201027e-06, + "loss": 0.0096, + "step": 92500 + }, + { + "epoch": 558.47, + "learning_rate": 2.816218822400407e-06, + "loss": 0.0096, + "step": 92520 + }, + { + "epoch": 558.59, + "learning_rate": 2.8013883288009522e-06, + "loss": 0.0097, + "step": 92540 + }, + { + "epoch": 558.71, + "learning_rate": 2.7865958622704192e-06, + "loss": 0.0097, + "step": 92560 + }, + { + "epoch": 558.83, + "learning_rate": 2.7718414347268183e-06, + "loss": 0.0097, + "step": 92580 + }, + { + "epoch": 558.95, + "learning_rate": 2.757125058057536e-06, + "loss": 0.0097, + "step": 92600 + }, + { + "epoch": 559.07, + "learning_rate": 2.7424467441192927e-06, + "loss": 0.0095, + "step": 92620 + }, + { + "epoch": 559.2, + "learning_rate": 2.727806504738112e-06, + "loss": 0.0094, + "step": 92640 + }, + { + "epoch": 559.32, + "learning_rate": 2.713204351709414e-06, + "loss": 0.0094, + "step": 92660 + }, + { + "epoch": 559.44, + "learning_rate": 2.6986402967978554e-06, + "loss": 0.0096, + "step": 92680 + }, + { + "epoch": 559.56, + "learning_rate": 2.6841143517374543e-06, + "loss": 0.0096, + "step": 92700 + }, + { + "epoch": 559.68, + "learning_rate": 2.669626528231478e-06, + "loss": 0.0097, + "step": 92720 + }, + { + "epoch": 559.8, + "learning_rate": 2.6551768379525277e-06, + "loss": 0.0097, + "step": 92740 + }, + { + "epoch": 559.92, + "learning_rate": 2.640765292542452e-06, + "loss": 0.0098, + "step": 92760 + }, + { + "epoch": 560.04, + "learning_rate": 2.6263919036123687e-06, + "loss": 0.0097, + "step": 92780 + }, + { + "epoch": 560.16, + "learning_rate": 2.612056682742653e-06, + "loss": 0.0094, + "step": 92800 + }, + { + "epoch": 560.28, + "learning_rate": 2.5977596414829388e-06, + "loss": 0.0095, + "step": 92820 + }, + { + "epoch": 560.4, + "learning_rate": 2.583500791352106e-06, + "loss": 0.0095, + "step": 92840 + }, + { + "epoch": 560.52, + "learning_rate": 2.5692801438382265e-06, + "loss": 0.0096, + "step": 92860 + }, + { + "epoch": 560.64, + "learning_rate": 2.555097710398635e-06, + "loss": 0.0096, + "step": 92880 + }, + { + "epoch": 560.76, + "learning_rate": 2.5409535024598694e-06, + "loss": 0.0097, + "step": 92900 + }, + { + "epoch": 560.89, + "learning_rate": 2.5268475314176476e-06, + "loss": 0.0097, + "step": 92920 + }, + { + "epoch": 561.01, + "learning_rate": 2.512779808636906e-06, + "loss": 0.0096, + "step": 92940 + }, + { + "epoch": 561.13, + "learning_rate": 2.4987503454517446e-06, + "loss": 0.0094, + "step": 92960 + }, + { + "epoch": 561.25, + "learning_rate": 2.4847591531654614e-06, + "loss": 0.0094, + "step": 92980 + }, + { + "epoch": 561.37, + "learning_rate": 2.470806243050494e-06, + "loss": 0.0095, + "step": 93000 + }, + { + "epoch": 561.37, + "eval_accuracy": 0.0012673983265995593, + "eval_loss": 24.03643035888672, + "eval_runtime": 18.3059, + "eval_samples_per_second": 111.439, + "eval_steps_per_second": 2.349, + "step": 93000 + }, + { + "epoch": 561.49, + "learning_rate": 2.4568916263484508e-06, + "loss": 0.0096, + "step": 93020 + }, + { + "epoch": 561.61, + "learning_rate": 2.4430153142701082e-06, + "loss": 0.0096, + "step": 93040 + }, + { + "epoch": 561.73, + "learning_rate": 2.4291773179953346e-06, + "loss": 0.0096, + "step": 93060 + }, + { + "epoch": 561.85, + "learning_rate": 2.4153776486731617e-06, + "loss": 0.0096, + "step": 93080 + }, + { + "epoch": 561.97, + "learning_rate": 2.40161631742174e-06, + "loss": 0.0097, + "step": 93100 + }, + { + "epoch": 562.09, + "learning_rate": 2.3878933353283283e-06, + "loss": 0.0095, + "step": 93120 + }, + { + "epoch": 562.21, + "learning_rate": 2.374208713449272e-06, + "loss": 0.0094, + "step": 93140 + }, + { + "epoch": 562.33, + "learning_rate": 2.3605624628100175e-06, + "loss": 0.0095, + "step": 93160 + }, + { + "epoch": 562.45, + "learning_rate": 2.346954594405132e-06, + "loss": 0.0096, + "step": 93180 + }, + { + "epoch": 562.58, + "learning_rate": 2.333385119198206e-06, + "loss": 0.0096, + "step": 93200 + }, + { + "epoch": 562.7, + "learning_rate": 2.319854048121939e-06, + "loss": 0.0096, + "step": 93220 + }, + { + "epoch": 562.82, + "learning_rate": 2.306361392078038e-06, + "loss": 0.0096, + "step": 93240 + }, + { + "epoch": 562.94, + "learning_rate": 2.2929071619373298e-06, + "loss": 0.0097, + "step": 93260 + }, + { + "epoch": 563.06, + "learning_rate": 2.2794913685396214e-06, + "loss": 0.0095, + "step": 93280 + }, + { + "epoch": 563.18, + "learning_rate": 2.266114022693777e-06, + "loss": 0.0094, + "step": 93300 + }, + { + "epoch": 563.3, + "learning_rate": 2.2527751351776927e-06, + "loss": 0.0095, + "step": 93320 + }, + { + "epoch": 563.42, + "learning_rate": 2.23947471673826e-06, + "loss": 0.0095, + "step": 93340 + }, + { + "epoch": 563.54, + "learning_rate": 2.2262127780913965e-06, + "loss": 0.0096, + "step": 93360 + }, + { + "epoch": 563.66, + "learning_rate": 2.2129893299219886e-06, + "loss": 0.0096, + "step": 93380 + }, + { + "epoch": 563.78, + "learning_rate": 2.1998043828839523e-06, + "loss": 0.0096, + "step": 93400 + }, + { + "epoch": 563.9, + "learning_rate": 2.1866579476001404e-06, + "loss": 0.0096, + "step": 93420 + }, + { + "epoch": 564.02, + "learning_rate": 2.173550034662408e-06, + "loss": 0.0097, + "step": 93440 + }, + { + "epoch": 564.14, + "learning_rate": 2.160480654631569e-06, + "loss": 0.0093, + "step": 93460 + }, + { + "epoch": 564.27, + "learning_rate": 2.147449818037378e-06, + "loss": 0.0095, + "step": 93480 + }, + { + "epoch": 564.39, + "learning_rate": 2.1344575353785533e-06, + "loss": 0.0094, + "step": 93500 + }, + { + "epoch": 564.51, + "learning_rate": 2.1215038171227276e-06, + "loss": 0.0096, + "step": 93520 + }, + { + "epoch": 564.63, + "learning_rate": 2.108588673706491e-06, + "loss": 0.0096, + "step": 93540 + }, + { + "epoch": 564.75, + "learning_rate": 2.0957121155353364e-06, + "loss": 0.0096, + "step": 93560 + }, + { + "epoch": 564.87, + "learning_rate": 2.082874152983677e-06, + "loss": 0.0097, + "step": 93580 + }, + { + "epoch": 564.99, + "learning_rate": 2.070074796394822e-06, + "loss": 0.0097, + "step": 93600 + }, + { + "epoch": 565.11, + "learning_rate": 2.057314056080989e-06, + "loss": 0.0094, + "step": 93620 + }, + { + "epoch": 565.23, + "learning_rate": 2.044591942323276e-06, + "loss": 0.0094, + "step": 93640 + }, + { + "epoch": 565.35, + "learning_rate": 2.031908465371646e-06, + "loss": 0.0095, + "step": 93660 + }, + { + "epoch": 565.47, + "learning_rate": 2.0192636354449623e-06, + "loss": 0.0094, + "step": 93680 + }, + { + "epoch": 565.59, + "learning_rate": 2.006657462730932e-06, + "loss": 0.0095, + "step": 93700 + }, + { + "epoch": 565.71, + "learning_rate": 1.9940899573861195e-06, + "loss": 0.0096, + "step": 93720 + }, + { + "epoch": 565.84, + "learning_rate": 1.9815611295359416e-06, + "loss": 0.0096, + "step": 93740 + }, + { + "epoch": 565.96, + "learning_rate": 1.9690709892746352e-06, + "loss": 0.0097, + "step": 93760 + }, + { + "epoch": 566.08, + "learning_rate": 1.9566195466653003e-06, + "loss": 0.0094, + "step": 93780 + }, + { + "epoch": 566.2, + "learning_rate": 1.9442068117398238e-06, + "loss": 0.0094, + "step": 93800 + }, + { + "epoch": 566.32, + "learning_rate": 1.931832794498928e-06, + "loss": 0.0094, + "step": 93820 + }, + { + "epoch": 566.44, + "learning_rate": 1.9194975049121324e-06, + "loss": 0.0095, + "step": 93840 + }, + { + "epoch": 566.56, + "learning_rate": 1.9072009529177648e-06, + "loss": 0.0095, + "step": 93860 + }, + { + "epoch": 566.68, + "learning_rate": 1.8949431484229285e-06, + "loss": 0.0096, + "step": 93880 + }, + { + "epoch": 566.8, + "learning_rate": 1.8827241013035068e-06, + "loss": 0.0096, + "step": 93900 + }, + { + "epoch": 566.92, + "learning_rate": 1.8705438214041859e-06, + "loss": 0.0096, + "step": 93920 + }, + { + "epoch": 567.04, + "learning_rate": 1.858402318538377e-06, + "loss": 0.0095, + "step": 93940 + }, + { + "epoch": 567.16, + "learning_rate": 1.8462996024882828e-06, + "loss": 0.0093, + "step": 93960 + }, + { + "epoch": 567.28, + "learning_rate": 1.8342356830048369e-06, + "loss": 0.0094, + "step": 93980 + }, + { + "epoch": 567.4, + "learning_rate": 1.8222105698077251e-06, + "loss": 0.0095, + "step": 94000 + }, + { + "epoch": 567.4, + "eval_accuracy": 0.0012624591281326866, + "eval_loss": 24.054536819458008, + "eval_runtime": 18.1691, + "eval_samples_per_second": 112.278, + "eval_steps_per_second": 2.367, + "step": 94000 + }, + { + "epoch": 567.53, + "learning_rate": 1.8102242725853646e-06, + "loss": 0.0095, + "step": 94020 + }, + { + "epoch": 567.65, + "learning_rate": 1.7982768009948858e-06, + "loss": 0.0095, + "step": 94040 + }, + { + "epoch": 567.77, + "learning_rate": 1.7863681646621666e-06, + "loss": 0.0096, + "step": 94060 + }, + { + "epoch": 567.89, + "learning_rate": 1.7744983731817765e-06, + "loss": 0.0096, + "step": 94080 + }, + { + "epoch": 568.01, + "learning_rate": 1.7626674361169938e-06, + "loss": 0.0096, + "step": 94100 + }, + { + "epoch": 568.13, + "learning_rate": 1.750875362999771e-06, + "loss": 0.0094, + "step": 94120 + }, + { + "epoch": 568.25, + "learning_rate": 1.739122163330803e-06, + "loss": 0.0094, + "step": 94140 + }, + { + "epoch": 568.37, + "learning_rate": 1.727407846579404e-06, + "loss": 0.0094, + "step": 94160 + }, + { + "epoch": 568.49, + "learning_rate": 1.7157324221835912e-06, + "loss": 0.0095, + "step": 94180 + }, + { + "epoch": 568.61, + "learning_rate": 1.704095899550051e-06, + "loss": 0.0095, + "step": 94200 + }, + { + "epoch": 568.73, + "learning_rate": 1.6924982880541174e-06, + "loss": 0.0095, + "step": 94220 + }, + { + "epoch": 568.85, + "learning_rate": 1.6809395970397767e-06, + "loss": 0.0095, + "step": 94240 + }, + { + "epoch": 568.97, + "learning_rate": 1.6694198358196523e-06, + "loss": 0.0096, + "step": 94260 + }, + { + "epoch": 569.09, + "learning_rate": 1.6579390136750084e-06, + "loss": 0.0095, + "step": 94280 + }, + { + "epoch": 569.22, + "learning_rate": 1.6464971398557406e-06, + "loss": 0.0093, + "step": 94300 + }, + { + "epoch": 569.34, + "learning_rate": 1.6350942235803579e-06, + "loss": 0.0094, + "step": 94320 + }, + { + "epoch": 569.46, + "learning_rate": 1.6237302740359838e-06, + "loss": 0.0095, + "step": 94340 + }, + { + "epoch": 569.58, + "learning_rate": 1.61240530037835e-06, + "loss": 0.0095, + "step": 94360 + }, + { + "epoch": 569.7, + "learning_rate": 1.6011193117317857e-06, + "loss": 0.0095, + "step": 94380 + }, + { + "epoch": 569.82, + "learning_rate": 1.5898723171891948e-06, + "loss": 0.0095, + "step": 94400 + }, + { + "epoch": 569.94, + "learning_rate": 1.5786643258120904e-06, + "loss": 0.0096, + "step": 94420 + }, + { + "epoch": 570.06, + "learning_rate": 1.567495346630543e-06, + "loss": 0.0094, + "step": 94440 + }, + { + "epoch": 570.18, + "learning_rate": 1.5563653886431994e-06, + "loss": 0.0093, + "step": 94460 + }, + { + "epoch": 570.3, + "learning_rate": 1.5452744608172586e-06, + "loss": 0.0094, + "step": 94480 + }, + { + "epoch": 570.42, + "learning_rate": 1.534222572088495e-06, + "loss": 0.0094, + "step": 94500 + }, + { + "epoch": 570.54, + "learning_rate": 1.5232097313612082e-06, + "loss": 0.0095, + "step": 94520 + }, + { + "epoch": 570.66, + "learning_rate": 1.512235947508245e-06, + "loss": 0.0095, + "step": 94540 + }, + { + "epoch": 570.78, + "learning_rate": 1.5013012293709827e-06, + "loss": 0.0095, + "step": 94560 + }, + { + "epoch": 570.91, + "learning_rate": 1.4904055857593246e-06, + "loss": 0.0096, + "step": 94580 + }, + { + "epoch": 571.03, + "learning_rate": 1.479549025451704e-06, + "loss": 0.0095, + "step": 94600 + }, + { + "epoch": 571.15, + "learning_rate": 1.4687315571950465e-06, + "loss": 0.0094, + "step": 94620 + }, + { + "epoch": 571.27, + "learning_rate": 1.4579531897047915e-06, + "loss": 0.0093, + "step": 94640 + }, + { + "epoch": 571.39, + "learning_rate": 1.4472139316648814e-06, + "loss": 0.0094, + "step": 94660 + }, + { + "epoch": 571.51, + "learning_rate": 1.4365137917277337e-06, + "loss": 0.0095, + "step": 94680 + }, + { + "epoch": 571.63, + "learning_rate": 1.425852778514264e-06, + "loss": 0.0095, + "step": 94700 + }, + { + "epoch": 571.75, + "learning_rate": 1.4152309006138565e-06, + "loss": 0.0095, + "step": 94720 + }, + { + "epoch": 571.87, + "learning_rate": 1.4046481665843603e-06, + "loss": 0.0095, + "step": 94740 + }, + { + "epoch": 571.99, + "learning_rate": 1.3941045849521106e-06, + "loss": 0.0095, + "step": 94760 + }, + { + "epoch": 572.11, + "learning_rate": 1.3836001642118624e-06, + "loss": 0.0093, + "step": 94780 + }, + { + "epoch": 572.23, + "learning_rate": 1.3731349128268457e-06, + "loss": 0.0093, + "step": 94800 + }, + { + "epoch": 572.35, + "learning_rate": 1.3627088392287212e-06, + "loss": 0.0094, + "step": 94820 + }, + { + "epoch": 572.47, + "learning_rate": 1.3523219518175923e-06, + "loss": 0.0094, + "step": 94840 + }, + { + "epoch": 572.6, + "learning_rate": 1.3419742589619756e-06, + "loss": 0.0095, + "step": 94860 + }, + { + "epoch": 572.72, + "learning_rate": 1.3316657689988355e-06, + "loss": 0.0096, + "step": 94880 + }, + { + "epoch": 572.84, + "learning_rate": 1.3213964902335341e-06, + "loss": 0.0095, + "step": 94900 + }, + { + "epoch": 572.96, + "learning_rate": 1.3111664309398364e-06, + "loss": 0.0095, + "step": 94920 + }, + { + "epoch": 573.08, + "learning_rate": 1.300975599359916e-06, + "loss": 0.0094, + "step": 94940 + }, + { + "epoch": 573.2, + "learning_rate": 1.2908240037043495e-06, + "loss": 0.0093, + "step": 94960 + }, + { + "epoch": 573.32, + "learning_rate": 1.2807116521520945e-06, + "loss": 0.0094, + "step": 94980 + }, + { + "epoch": 573.44, + "learning_rate": 1.2706385528504728e-06, + "loss": 0.0094, + "step": 95000 + }, + { + "epoch": 573.44, + "eval_accuracy": 0.0012634469678260611, + "eval_loss": 24.07013702392578, + "eval_runtime": 18.2478, + "eval_samples_per_second": 111.795, + "eval_steps_per_second": 2.356, + "step": 95000 + }, + { + "epoch": 573.56, + "learning_rate": 1.2606047139152143e-06, + "loss": 0.0094, + "step": 95020 + }, + { + "epoch": 573.68, + "learning_rate": 1.2506101434304086e-06, + "loss": 0.0094, + "step": 95040 + }, + { + "epoch": 573.8, + "learning_rate": 1.2406548494484805e-06, + "loss": 0.0095, + "step": 95060 + }, + { + "epoch": 573.92, + "learning_rate": 1.230738839990242e-06, + "loss": 0.0095, + "step": 95080 + }, + { + "epoch": 574.04, + "learning_rate": 1.220862123044847e-06, + "loss": 0.0094, + "step": 95100 + }, + { + "epoch": 574.16, + "learning_rate": 1.21102470656978e-06, + "loss": 0.0093, + "step": 95120 + }, + { + "epoch": 574.29, + "learning_rate": 1.2012265984908733e-06, + "loss": 0.0093, + "step": 95140 + }, + { + "epoch": 574.41, + "learning_rate": 1.1914678067022789e-06, + "loss": 0.0095, + "step": 95160 + }, + { + "epoch": 574.53, + "learning_rate": 1.1817483390664908e-06, + "loss": 0.0094, + "step": 95180 + }, + { + "epoch": 574.65, + "learning_rate": 1.1720682034143005e-06, + "loss": 0.0094, + "step": 95200 + }, + { + "epoch": 574.77, + "learning_rate": 1.1624274075448193e-06, + "loss": 0.0095, + "step": 95220 + }, + { + "epoch": 574.89, + "learning_rate": 1.152825959225462e-06, + "loss": 0.0095, + "step": 95240 + }, + { + "epoch": 575.01, + "learning_rate": 1.1432638661919516e-06, + "loss": 0.0095, + "step": 95260 + }, + { + "epoch": 575.13, + "learning_rate": 1.1337411361482752e-06, + "loss": 0.0093, + "step": 95280 + }, + { + "epoch": 575.25, + "learning_rate": 1.1242577767667407e-06, + "loss": 0.0093, + "step": 95300 + }, + { + "epoch": 575.37, + "learning_rate": 1.114813795687908e-06, + "loss": 0.0093, + "step": 95320 + }, + { + "epoch": 575.49, + "learning_rate": 1.1054092005206351e-06, + "loss": 0.0094, + "step": 95340 + }, + { + "epoch": 575.61, + "learning_rate": 1.096043998842028e-06, + "loss": 0.0095, + "step": 95360 + }, + { + "epoch": 575.73, + "learning_rate": 1.0867181981974506e-06, + "loss": 0.0095, + "step": 95380 + }, + { + "epoch": 575.86, + "learning_rate": 1.0774318061005483e-06, + "loss": 0.0095, + "step": 95400 + }, + { + "epoch": 575.98, + "learning_rate": 1.0681848300331866e-06, + "loss": 0.0095, + "step": 95420 + }, + { + "epoch": 576.1, + "learning_rate": 1.0589772774454942e-06, + "loss": 0.0093, + "step": 95440 + }, + { + "epoch": 576.22, + "learning_rate": 1.049809155755832e-06, + "loss": 0.0093, + "step": 95460 + }, + { + "epoch": 576.34, + "learning_rate": 1.0406804723507857e-06, + "loss": 0.0094, + "step": 95480 + }, + { + "epoch": 576.46, + "learning_rate": 1.0315912345851718e-06, + "loss": 0.0094, + "step": 95500 + }, + { + "epoch": 576.58, + "learning_rate": 1.0225414497820162e-06, + "loss": 0.0094, + "step": 95520 + }, + { + "epoch": 576.7, + "learning_rate": 1.0135311252325864e-06, + "loss": 0.0095, + "step": 95540 + }, + { + "epoch": 576.82, + "learning_rate": 1.0045602681963195e-06, + "loss": 0.0094, + "step": 95560 + }, + { + "epoch": 576.94, + "learning_rate": 9.9562888590089e-07, + "loss": 0.0095, + "step": 95580 + }, + { + "epoch": 577.06, + "learning_rate": 9.867369855421305e-07, + "loss": 0.0094, + "step": 95600 + }, + { + "epoch": 577.18, + "learning_rate": 9.778845742840992e-07, + "loss": 0.0093, + "step": 95620 + }, + { + "epoch": 577.3, + "learning_rate": 9.690716592590298e-07, + "loss": 0.0093, + "step": 95640 + }, + { + "epoch": 577.42, + "learning_rate": 9.602982475673095e-07, + "loss": 0.0094, + "step": 95660 + }, + { + "epoch": 577.55, + "learning_rate": 9.515643462775337e-07, + "loss": 0.0093, + "step": 95680 + }, + { + "epoch": 577.67, + "learning_rate": 9.428699624264403e-07, + "loss": 0.0094, + "step": 95700 + }, + { + "epoch": 577.79, + "learning_rate": 9.342151030189428e-07, + "loss": 0.0095, + "step": 95720 + }, + { + "epoch": 577.91, + "learning_rate": 9.255997750280909e-07, + "loss": 0.0095, + "step": 95740 + }, + { + "epoch": 578.03, + "learning_rate": 9.170239853951212e-07, + "loss": 0.0095, + "step": 95760 + }, + { + "epoch": 578.15, + "learning_rate": 9.084877410293846e-07, + "loss": 0.0093, + "step": 95780 + }, + { + "epoch": 578.27, + "learning_rate": 8.999910488083685e-07, + "loss": 0.0094, + "step": 95800 + }, + { + "epoch": 578.39, + "learning_rate": 8.915339155777136e-07, + "loss": 0.0094, + "step": 95820 + }, + { + "epoch": 578.51, + "learning_rate": 8.83116348151175e-07, + "loss": 0.0094, + "step": 95840 + }, + { + "epoch": 578.63, + "learning_rate": 8.747383533106445e-07, + "loss": 0.0094, + "step": 95860 + }, + { + "epoch": 578.75, + "learning_rate": 8.663999378061116e-07, + "loss": 0.0094, + "step": 95880 + }, + { + "epoch": 578.87, + "learning_rate": 8.5810110835568e-07, + "loss": 0.0094, + "step": 95900 + }, + { + "epoch": 578.99, + "learning_rate": 8.498418716455903e-07, + "loss": 0.0094, + "step": 95920 + }, + { + "epoch": 579.11, + "learning_rate": 8.416222343301361e-07, + "loss": 0.0092, + "step": 95940 + }, + { + "epoch": 579.24, + "learning_rate": 8.334422030317424e-07, + "loss": 0.0093, + "step": 95960 + }, + { + "epoch": 579.36, + "learning_rate": 8.25301784340915e-07, + "loss": 0.0094, + "step": 95980 + }, + { + "epoch": 579.48, + "learning_rate": 8.172009848162409e-07, + "loss": 0.0094, + "step": 96000 + }, + { + "epoch": 579.48, + "eval_accuracy": 0.0012644348075194357, + "eval_loss": 24.082643508911133, + "eval_runtime": 18.205, + "eval_samples_per_second": 112.057, + "eval_steps_per_second": 2.362, + "step": 96000 + }, + { + "epoch": 579.6, + "learning_rate": 8.091398109843773e-07, + "loss": 0.0094, + "step": 96020 + }, + { + "epoch": 579.72, + "learning_rate": 8.011182693400732e-07, + "loss": 0.0094, + "step": 96040 + }, + { + "epoch": 579.84, + "learning_rate": 7.931363663461477e-07, + "loss": 0.0094, + "step": 96060 + }, + { + "epoch": 579.96, + "learning_rate": 7.85194108433468e-07, + "loss": 0.0094, + "step": 96080 + }, + { + "epoch": 580.08, + "learning_rate": 7.772915020009707e-07, + "loss": 0.0093, + "step": 96100 + }, + { + "epoch": 580.2, + "learning_rate": 7.694285534156354e-07, + "loss": 0.0093, + "step": 96120 + }, + { + "epoch": 580.32, + "learning_rate": 7.616052690125109e-07, + "loss": 0.0093, + "step": 96140 + }, + { + "epoch": 580.44, + "learning_rate": 7.538216550946664e-07, + "loss": 0.0093, + "step": 96160 + }, + { + "epoch": 580.56, + "learning_rate": 7.46077717933219e-07, + "loss": 0.0093, + "step": 96180 + }, + { + "epoch": 580.68, + "learning_rate": 7.383734637673223e-07, + "loss": 0.0094, + "step": 96200 + }, + { + "epoch": 580.8, + "learning_rate": 7.307088988041555e-07, + "loss": 0.0095, + "step": 96220 + }, + { + "epoch": 580.93, + "learning_rate": 7.230840292189178e-07, + "loss": 0.0094, + "step": 96240 + }, + { + "epoch": 581.05, + "learning_rate": 7.154988611548286e-07, + "loss": 0.0094, + "step": 96260 + }, + { + "epoch": 581.17, + "learning_rate": 7.07953400723127e-07, + "loss": 0.0093, + "step": 96280 + }, + { + "epoch": 581.29, + "learning_rate": 7.004476540030447e-07, + "loss": 0.0093, + "step": 96300 + }, + { + "epoch": 581.41, + "learning_rate": 6.929816270418332e-07, + "loss": 0.0093, + "step": 96320 + }, + { + "epoch": 581.53, + "learning_rate": 6.855553258547309e-07, + "loss": 0.0094, + "step": 96340 + }, + { + "epoch": 581.65, + "learning_rate": 6.781687564249739e-07, + "loss": 0.0094, + "step": 96360 + }, + { + "epoch": 581.77, + "learning_rate": 6.708219247038017e-07, + "loss": 0.0094, + "step": 96380 + }, + { + "epoch": 581.89, + "learning_rate": 6.635148366104016e-07, + "loss": 0.0094, + "step": 96400 + }, + { + "epoch": 582.01, + "learning_rate": 6.56247498031981e-07, + "loss": 0.0094, + "step": 96420 + }, + { + "epoch": 582.13, + "learning_rate": 6.490199148236898e-07, + "loss": 0.0093, + "step": 96440 + }, + { + "epoch": 582.25, + "learning_rate": 6.418320928086752e-07, + "loss": 0.0093, + "step": 96460 + }, + { + "epoch": 582.37, + "learning_rate": 6.346840377780216e-07, + "loss": 0.0093, + "step": 96480 + }, + { + "epoch": 582.49, + "learning_rate": 6.275757554907946e-07, + "loss": 0.0093, + "step": 96500 + }, + { + "epoch": 582.62, + "learning_rate": 6.205072516740129e-07, + "loss": 0.0094, + "step": 96520 + }, + { + "epoch": 582.74, + "learning_rate": 6.134785320226322e-07, + "loss": 0.0093, + "step": 96540 + }, + { + "epoch": 582.86, + "learning_rate": 6.064896021995725e-07, + "loss": 0.0094, + "step": 96560 + }, + { + "epoch": 582.98, + "learning_rate": 5.995404678356853e-07, + "loss": 0.0094, + "step": 96580 + }, + { + "epoch": 583.1, + "learning_rate": 5.926311345297697e-07, + "loss": 0.0093, + "step": 96600 + }, + { + "epoch": 583.22, + "learning_rate": 5.857616078485395e-07, + "loss": 0.0093, + "step": 96620 + }, + { + "epoch": 583.34, + "learning_rate": 5.789318933266508e-07, + "loss": 0.0094, + "step": 96640 + }, + { + "epoch": 583.46, + "learning_rate": 5.721419964666908e-07, + "loss": 0.0094, + "step": 96660 + }, + { + "epoch": 583.58, + "learning_rate": 5.653919227391502e-07, + "loss": 0.0093, + "step": 96680 + }, + { + "epoch": 583.7, + "learning_rate": 5.586816775824344e-07, + "loss": 0.0093, + "step": 96700 + }, + { + "epoch": 583.82, + "learning_rate": 5.520112664028799e-07, + "loss": 0.0093, + "step": 96720 + }, + { + "epoch": 583.94, + "learning_rate": 5.453806945747098e-07, + "loss": 0.0094, + "step": 96740 + }, + { + "epoch": 584.06, + "learning_rate": 5.387899674400509e-07, + "loss": 0.0093, + "step": 96760 + }, + { + "epoch": 584.19, + "learning_rate": 5.322390903089392e-07, + "loss": 0.0093, + "step": 96780 + }, + { + "epoch": 584.31, + "learning_rate": 5.257280684593024e-07, + "loss": 0.0093, + "step": 96800 + }, + { + "epoch": 584.43, + "learning_rate": 5.192569071369446e-07, + "loss": 0.0093, + "step": 96820 + }, + { + "epoch": 584.55, + "learning_rate": 5.128256115555675e-07, + "loss": 0.0093, + "step": 96840 + }, + { + "epoch": 584.67, + "learning_rate": 5.064341868967537e-07, + "loss": 0.0093, + "step": 96860 + }, + { + "epoch": 584.79, + "learning_rate": 5.000826383099511e-07, + "loss": 0.0094, + "step": 96880 + }, + { + "epoch": 584.91, + "learning_rate": 4.937709709124937e-07, + "loss": 0.0094, + "step": 96900 + }, + { + "epoch": 585.03, + "learning_rate": 4.874991897895697e-07, + "loss": 0.0093, + "step": 96920 + }, + { + "epoch": 585.15, + "learning_rate": 4.81267299994248e-07, + "loss": 0.0093, + "step": 96940 + }, + { + "epoch": 585.27, + "learning_rate": 4.750753065474456e-07, + "loss": 0.0093, + "step": 96960 + }, + { + "epoch": 585.39, + "learning_rate": 4.689232144379496e-07, + "loss": 0.0093, + "step": 96980 + }, + { + "epoch": 585.51, + "learning_rate": 4.628110286223675e-07, + "loss": 0.0093, + "step": 97000 + }, + { + "epoch": 585.51, + "eval_accuracy": 0.001262953047979374, + "eval_loss": 24.08979034423828, + "eval_runtime": 18.188, + "eval_samples_per_second": 112.162, + "eval_steps_per_second": 2.364, + "step": 97000 + }, + { + "epoch": 585.63, + "learning_rate": 4.57041419787968e-07, + "loss": 0.0093, + "step": 97020 + }, + { + "epoch": 585.75, + "learning_rate": 4.5100706538029334e-07, + "loss": 0.0094, + "step": 97040 + }, + { + "epoch": 585.88, + "learning_rate": 4.450126317012637e-07, + "loss": 0.0093, + "step": 97060 + }, + { + "epoch": 586.0, + "learning_rate": 4.3905812358048246e-07, + "loss": 0.0094, + "step": 97080 + }, + { + "epoch": 586.12, + "learning_rate": 4.331435458153954e-07, + "loss": 0.0092, + "step": 97100 + }, + { + "epoch": 586.24, + "learning_rate": 4.2726890317126845e-07, + "loss": 0.0093, + "step": 97120 + }, + { + "epoch": 586.36, + "learning_rate": 4.2143420038119884e-07, + "loss": 0.0093, + "step": 97140 + }, + { + "epoch": 586.48, + "learning_rate": 4.15639442146093e-07, + "loss": 0.0093, + "step": 97160 + }, + { + "epoch": 586.6, + "learning_rate": 4.0988463313469395e-07, + "loss": 0.0093, + "step": 97180 + }, + { + "epoch": 586.72, + "learning_rate": 4.0416977798353737e-07, + "loss": 0.0093, + "step": 97200 + }, + { + "epoch": 586.84, + "learning_rate": 3.984948812969791e-07, + "loss": 0.0093, + "step": 97220 + }, + { + "epoch": 586.96, + "learning_rate": 3.9285994764718945e-07, + "loss": 0.0094, + "step": 97240 + }, + { + "epoch": 587.08, + "learning_rate": 3.8726498157412026e-07, + "loss": 0.0093, + "step": 97260 + }, + { + "epoch": 587.2, + "learning_rate": 3.81709987585549e-07, + "loss": 0.0093, + "step": 97280 + }, + { + "epoch": 587.32, + "learning_rate": 3.761949701570233e-07, + "loss": 0.0093, + "step": 97300 + }, + { + "epoch": 587.44, + "learning_rate": 3.7071993373190004e-07, + "loss": 0.0093, + "step": 97320 + }, + { + "epoch": 587.57, + "learning_rate": 3.652848827213118e-07, + "loss": 0.0093, + "step": 97340 + }, + { + "epoch": 587.69, + "learning_rate": 3.598898215041835e-07, + "loss": 0.0093, + "step": 97360 + }, + { + "epoch": 587.81, + "learning_rate": 3.5453475442721597e-07, + "loss": 0.0093, + "step": 97380 + }, + { + "epoch": 587.93, + "learning_rate": 3.492196858048913e-07, + "loss": 0.0094, + "step": 97400 + }, + { + "epoch": 588.05, + "learning_rate": 3.4394461991946736e-07, + "loss": 0.0093, + "step": 97420 + }, + { + "epoch": 588.17, + "learning_rate": 3.3870956102096675e-07, + "loss": 0.0093, + "step": 97440 + }, + { + "epoch": 588.29, + "learning_rate": 3.335145133271822e-07, + "loss": 0.0093, + "step": 97460 + }, + { + "epoch": 588.41, + "learning_rate": 3.283594810236712e-07, + "loss": 0.0093, + "step": 97480 + }, + { + "epoch": 588.53, + "learning_rate": 3.232444682637503e-07, + "loss": 0.0093, + "step": 97500 + }, + { + "epoch": 588.65, + "learning_rate": 3.181694791684842e-07, + "loss": 0.0093, + "step": 97520 + }, + { + "epoch": 588.77, + "learning_rate": 3.1313451782671313e-07, + "loss": 0.0093, + "step": 97540 + }, + { + "epoch": 588.89, + "learning_rate": 3.0813958829500886e-07, + "loss": 0.0093, + "step": 97560 + }, + { + "epoch": 589.01, + "learning_rate": 3.031846945976913e-07, + "loss": 0.0093, + "step": 97580 + }, + { + "epoch": 589.13, + "learning_rate": 2.982698407268336e-07, + "loss": 0.0093, + "step": 97600 + }, + { + "epoch": 589.26, + "learning_rate": 2.9339503064224615e-07, + "loss": 0.0093, + "step": 97620 + }, + { + "epoch": 589.38, + "learning_rate": 2.885602682714761e-07, + "loss": 0.0093, + "step": 97640 + }, + { + "epoch": 589.5, + "learning_rate": 2.837655575097964e-07, + "loss": 0.0093, + "step": 97660 + }, + { + "epoch": 589.62, + "learning_rate": 2.790109022202281e-07, + "loss": 0.0093, + "step": 97680 + }, + { + "epoch": 589.74, + "learning_rate": 2.742963062335069e-07, + "loss": 0.0093, + "step": 97700 + }, + { + "epoch": 589.86, + "learning_rate": 2.6962177334809436e-07, + "loss": 0.0094, + "step": 97720 + }, + { + "epoch": 589.98, + "learning_rate": 2.649873073301834e-07, + "loss": 0.0093, + "step": 97740 + }, + { + "epoch": 590.1, + "learning_rate": 2.603929119136761e-07, + "loss": 0.0093, + "step": 97760 + }, + { + "epoch": 590.22, + "learning_rate": 2.558385908001948e-07, + "loss": 0.0093, + "step": 97780 + }, + { + "epoch": 590.34, + "learning_rate": 2.5132434765907654e-07, + "loss": 0.0093, + "step": 97800 + }, + { + "epoch": 590.46, + "learning_rate": 2.4685018612735644e-07, + "loss": 0.0093, + "step": 97820 + }, + { + "epoch": 590.58, + "learning_rate": 2.4241610980979546e-07, + "loss": 0.0093, + "step": 97840 + }, + { + "epoch": 590.7, + "learning_rate": 2.3802212227884145e-07, + "loss": 0.0093, + "step": 97860 + }, + { + "epoch": 590.82, + "learning_rate": 2.336682270746571e-07, + "loss": 0.0093, + "step": 97880 + }, + { + "epoch": 590.95, + "learning_rate": 2.2935442770509187e-07, + "loss": 0.0093, + "step": 97900 + }, + { + "epoch": 591.07, + "learning_rate": 2.2508072764570454e-07, + "loss": 0.0093, + "step": 97920 + }, + { + "epoch": 591.19, + "learning_rate": 2.2084713033972415e-07, + "loss": 0.0092, + "step": 97940 + }, + { + "epoch": 591.31, + "learning_rate": 2.1665363919809445e-07, + "loss": 0.0093, + "step": 97960 + }, + { + "epoch": 591.43, + "learning_rate": 2.1250025759943504e-07, + "loss": 0.0093, + "step": 97980 + }, + { + "epoch": 591.55, + "learning_rate": 2.0838698889004693e-07, + "loss": 0.0093, + "step": 98000 + }, + { + "epoch": 591.55, + "eval_accuracy": 0.0012634469678260611, + "eval_loss": 24.093477249145508, + "eval_runtime": 18.1793, + "eval_samples_per_second": 112.215, + "eval_steps_per_second": 2.365, + "step": 98000 + }, + { + "epoch": 591.67, + "learning_rate": 2.043138363839181e-07, + "loss": 0.0093, + "step": 98020 + }, + { + "epoch": 591.79, + "learning_rate": 2.0028080336270683e-07, + "loss": 0.0093, + "step": 98040 + }, + { + "epoch": 591.91, + "learning_rate": 1.9628789307576945e-07, + "loss": 0.0093, + "step": 98060 + }, + { + "epoch": 592.03, + "learning_rate": 1.923351087401104e-07, + "loss": 0.0093, + "step": 98080 + }, + { + "epoch": 592.15, + "learning_rate": 1.884224535404211e-07, + "loss": 0.0093, + "step": 98100 + }, + { + "epoch": 592.27, + "learning_rate": 1.8454993062905212e-07, + "loss": 0.0093, + "step": 98120 + }, + { + "epoch": 592.39, + "learning_rate": 1.8071754312602995e-07, + "loss": 0.0093, + "step": 98140 + }, + { + "epoch": 592.52, + "learning_rate": 1.7692529411904578e-07, + "loss": 0.0093, + "step": 98160 + }, + { + "epoch": 592.64, + "learning_rate": 1.7317318666342786e-07, + "loss": 0.0093, + "step": 98180 + }, + { + "epoch": 592.76, + "learning_rate": 1.6946122378220242e-07, + "loss": 0.0092, + "step": 98200 + }, + { + "epoch": 592.88, + "learning_rate": 1.6578940846602165e-07, + "loss": 0.0093, + "step": 98220 + }, + { + "epoch": 593.0, + "learning_rate": 1.6215774367319691e-07, + "loss": 0.0093, + "step": 98240 + }, + { + "epoch": 593.12, + "learning_rate": 1.5856623232969324e-07, + "loss": 0.0093, + "step": 98260 + }, + { + "epoch": 593.24, + "learning_rate": 1.5501487732914045e-07, + "loss": 0.0092, + "step": 98280 + }, + { + "epoch": 593.36, + "learning_rate": 1.515036815327886e-07, + "loss": 0.0092, + "step": 98300 + }, + { + "epoch": 593.48, + "learning_rate": 1.4803264776955262e-07, + "loss": 0.0093, + "step": 98320 + }, + { + "epoch": 593.6, + "learning_rate": 1.4460177883597327e-07, + "loss": 0.0092, + "step": 98340 + }, + { + "epoch": 593.72, + "learning_rate": 1.4121107749624497e-07, + "loss": 0.0093, + "step": 98360 + }, + { + "epoch": 593.84, + "learning_rate": 1.378605464821936e-07, + "loss": 0.0093, + "step": 98380 + }, + { + "epoch": 593.96, + "learning_rate": 1.3455018849328204e-07, + "loss": 0.0093, + "step": 98400 + }, + { + "epoch": 594.08, + "learning_rate": 1.3128000619660462e-07, + "loss": 0.0093, + "step": 98420 + }, + { + "epoch": 594.21, + "learning_rate": 1.2805000222689267e-07, + "loss": 0.0093, + "step": 98440 + }, + { + "epoch": 594.33, + "learning_rate": 1.2486017918649784e-07, + "loss": 0.0093, + "step": 98460 + }, + { + "epoch": 594.45, + "learning_rate": 1.217105396453977e-07, + "loss": 0.0093, + "step": 98480 + }, + { + "epoch": 594.57, + "learning_rate": 1.1860108614121234e-07, + "loss": 0.0093, + "step": 98500 + }, + { + "epoch": 594.69, + "learning_rate": 1.1553182117916006e-07, + "loss": 0.0093, + "step": 98520 + }, + { + "epoch": 594.81, + "learning_rate": 1.1250274723210163e-07, + "loss": 0.0093, + "step": 98540 + }, + { + "epoch": 594.93, + "learning_rate": 1.0951386674050157e-07, + "loss": 0.0093, + "step": 98560 + }, + { + "epoch": 595.05, + "learning_rate": 1.0656518211245025e-07, + "loss": 0.0093, + "step": 98580 + }, + { + "epoch": 595.17, + "learning_rate": 1.0365669572364733e-07, + "loss": 0.0093, + "step": 98600 + }, + { + "epoch": 595.29, + "learning_rate": 1.0078840991740723e-07, + "loss": 0.0092, + "step": 98620 + }, + { + "epoch": 595.41, + "learning_rate": 9.796032700465918e-08, + "loss": 0.0092, + "step": 98640 + }, + { + "epoch": 595.53, + "learning_rate": 9.517244926393609e-08, + "loss": 0.0093, + "step": 98660 + }, + { + "epoch": 595.65, + "learning_rate": 9.242477894138568e-08, + "loss": 0.0093, + "step": 98680 + }, + { + "epoch": 595.77, + "learning_rate": 8.97173182507427e-08, + "loss": 0.0093, + "step": 98700 + }, + { + "epoch": 595.9, + "learning_rate": 8.705006937337334e-08, + "loss": 0.0093, + "step": 98720 + }, + { + "epoch": 596.02, + "learning_rate": 8.442303445823085e-08, + "loss": 0.0093, + "step": 98740 + }, + { + "epoch": 596.14, + "learning_rate": 8.183621562186106e-08, + "loss": 0.0092, + "step": 98760 + }, + { + "epoch": 596.26, + "learning_rate": 7.928961494841903e-08, + "loss": 0.0092, + "step": 98780 + }, + { + "epoch": 596.38, + "learning_rate": 7.678323448966351e-08, + "loss": 0.0093, + "step": 98800 + }, + { + "epoch": 596.5, + "learning_rate": 7.431707626493478e-08, + "loss": 0.0093, + "step": 98820 + }, + { + "epoch": 596.62, + "learning_rate": 7.189114226117122e-08, + "loss": 0.0093, + "step": 98840 + }, + { + "epoch": 596.74, + "learning_rate": 6.950543443290381e-08, + "loss": 0.0093, + "step": 98860 + }, + { + "epoch": 596.86, + "learning_rate": 6.715995470225056e-08, + "loss": 0.0093, + "step": 98880 + }, + { + "epoch": 596.98, + "learning_rate": 6.48547049589332e-08, + "loss": 0.0093, + "step": 98900 + }, + { + "epoch": 597.1, + "learning_rate": 6.258968706023827e-08, + "loss": 0.0093, + "step": 98920 + }, + { + "epoch": 597.22, + "learning_rate": 6.0364902831056e-08, + "loss": 0.0092, + "step": 98940 + }, + { + "epoch": 597.34, + "learning_rate": 5.818035406385258e-08, + "loss": 0.0092, + "step": 98960 + }, + { + "epoch": 597.46, + "learning_rate": 5.603604251868122e-08, + "loss": 0.0093, + "step": 98980 + }, + { + "epoch": 597.59, + "learning_rate": 5.393196992317662e-08, + "loss": 0.0093, + "step": 99000 + }, + { + "epoch": 597.59, + "eval_accuracy": 0.001262953047979374, + "eval_loss": 24.094446182250977, + "eval_runtime": 18.2941, + "eval_samples_per_second": 111.512, + "eval_steps_per_second": 2.35, + "step": 99000 + }, + { + "epoch": 597.71, + "learning_rate": 5.1970373828963416e-08, + "loss": 0.0092, + "step": 99020 + }, + { + "epoch": 597.83, + "learning_rate": 4.9944772031629284e-08, + "loss": 0.0093, + "step": 99040 + }, + { + "epoch": 597.95, + "learning_rate": 4.795941409158622e-08, + "loss": 0.0093, + "step": 99060 + }, + { + "epoch": 598.07, + "learning_rate": 4.601430160839915e-08, + "loss": 0.0093, + "step": 99080 + }, + { + "epoch": 598.19, + "learning_rate": 4.410943614921448e-08, + "loss": 0.0093, + "step": 99100 + }, + { + "epoch": 598.31, + "learning_rate": 4.2244819248743465e-08, + "loss": 0.0093, + "step": 99120 + }, + { + "epoch": 598.43, + "learning_rate": 4.042045240927883e-08, + "loss": 0.0092, + "step": 99140 + }, + { + "epoch": 598.55, + "learning_rate": 3.863633710067815e-08, + "loss": 0.0092, + "step": 99160 + }, + { + "epoch": 598.67, + "learning_rate": 3.689247476036939e-08, + "loss": 0.0093, + "step": 99180 + }, + { + "epoch": 598.79, + "learning_rate": 3.518886679334532e-08, + "loss": 0.0093, + "step": 99200 + }, + { + "epoch": 598.91, + "learning_rate": 3.352551457218578e-08, + "loss": 0.0093, + "step": 99220 + }, + { + "epoch": 599.03, + "learning_rate": 3.190241943701322e-08, + "loss": 0.0093, + "step": 99240 + }, + { + "epoch": 599.15, + "learning_rate": 3.031958269553159e-08, + "loss": 0.0093, + "step": 99260 + }, + { + "epoch": 599.28, + "learning_rate": 2.8777005622998564e-08, + "loss": 0.0093, + "step": 99280 + }, + { + "epoch": 599.4, + "learning_rate": 2.727468946224776e-08, + "loss": 0.0093, + "step": 99300 + }, + { + "epoch": 599.52, + "learning_rate": 2.581263542366097e-08, + "loss": 0.0092, + "step": 99320 + }, + { + "epoch": 599.64, + "learning_rate": 2.4390844685190372e-08, + "loss": 0.0092, + "step": 99340 + }, + { + "epoch": 599.76, + "learning_rate": 2.3009318392347435e-08, + "loss": 0.0093, + "step": 99360 + }, + { + "epoch": 599.88, + "learning_rate": 2.1668057658208453e-08, + "loss": 0.0093, + "step": 99380 + }, + { + "epoch": 600.0, + "learning_rate": 2.036706356339235e-08, + "loss": 0.0092, + "step": 99400 + }, + { + "epoch": 600.12, + "learning_rate": 1.910633715609955e-08, + "loss": 0.0093, + "step": 99420 + }, + { + "epoch": 600.24, + "learning_rate": 1.7885879452061993e-08, + "loss": 0.0093, + "step": 99440 + }, + { + "epoch": 600.36, + "learning_rate": 1.6705691434587556e-08, + "loss": 0.0093, + "step": 99460 + }, + { + "epoch": 600.48, + "learning_rate": 1.55657740545323e-08, + "loss": 0.0092, + "step": 99480 + }, + { + "epoch": 600.6, + "learning_rate": 1.446612823030602e-08, + "loss": 0.0092, + "step": 99500 + }, + { + "epoch": 600.72, + "learning_rate": 1.3406754847872238e-08, + "loss": 0.0093, + "step": 99520 + }, + { + "epoch": 600.85, + "learning_rate": 1.2387654760748213e-08, + "loss": 0.0093, + "step": 99540 + }, + { + "epoch": 600.97, + "learning_rate": 1.1408828790010484e-08, + "loss": 0.0092, + "step": 99560 + }, + { + "epoch": 601.09, + "learning_rate": 1.047027772427267e-08, + "loss": 0.0092, + "step": 99580 + }, + { + "epoch": 601.21, + "learning_rate": 9.572002319713225e-09, + "loss": 0.0093, + "step": 99600 + }, + { + "epoch": 601.33, + "learning_rate": 8.714003300058782e-09, + "loss": 0.0093, + "step": 99620 + }, + { + "epoch": 601.45, + "learning_rate": 7.896281356578605e-09, + "loss": 0.0092, + "step": 99640 + }, + { + "epoch": 601.57, + "learning_rate": 7.118837148101243e-09, + "loss": 0.0093, + "step": 99660 + }, + { + "epoch": 601.69, + "learning_rate": 6.38167130099232e-09, + "loss": 0.0093, + "step": 99680 + }, + { + "epoch": 601.81, + "learning_rate": 5.684784409182298e-09, + "loss": 0.0092, + "step": 99700 + }, + { + "epoch": 601.93, + "learning_rate": 5.0281770341331635e-09, + "loss": 0.0093, + "step": 99720 + }, + { + "epoch": 602.05, + "learning_rate": 4.411849704866189e-09, + "loss": 0.0093, + "step": 99740 + }, + { + "epoch": 602.17, + "learning_rate": 3.835802917939724e-09, + "loss": 0.0093, + "step": 99760 + }, + { + "epoch": 602.29, + "learning_rate": 3.3000371374714013e-09, + "loss": 0.0093, + "step": 99780 + }, + { + "epoch": 602.41, + "learning_rate": 2.8045527951103822e-09, + "loss": 0.0093, + "step": 99800 + }, + { + "epoch": 602.54, + "learning_rate": 2.349350290070662e-09, + "loss": 0.0092, + "step": 99820 + }, + { + "epoch": 602.66, + "learning_rate": 1.934429989086661e-09, + "loss": 0.0093, + "step": 99840 + }, + { + "epoch": 602.78, + "learning_rate": 1.5597922264631859e-09, + "loss": 0.0092, + "step": 99860 + }, + { + "epoch": 602.9, + "learning_rate": 1.2254373040310185e-09, + "loss": 0.0092, + "step": 99880 + }, + { + "epoch": 603.02, + "learning_rate": 9.31365491180225e-10, + "loss": 0.0092, + "step": 99900 + }, + { + "epoch": 603.14, + "learning_rate": 6.775770248379499e-10, + "loss": 0.0093, + "step": 99920 + }, + { + "epoch": 603.26, + "learning_rate": 4.640721094684164e-10, + "loss": 0.0093, + "step": 99940 + }, + { + "epoch": 603.38, + "learning_rate": 2.908509171062335e-10, + "loss": 0.0092, + "step": 99960 + }, + { + "epoch": 603.5, + "learning_rate": 1.57913587295333e-10, + "loss": 0.0092, + "step": 99980 + }, + { + "epoch": 603.62, + "learning_rate": 6.526022715003244e-11, + "loss": 0.0092, + "step": 100000 + }, + { + "epoch": 603.62, + "eval_accuracy": 0.0012644348075194357, + "eval_loss": 24.09497833251953, + "eval_runtime": 18.2893, + "eval_samples_per_second": 111.54, + "eval_steps_per_second": 2.351, + "step": 100000 + }, + { + "epoch": 603.62, + "step": 100000, + "total_flos": 1.0802570660564705e+18, + "train_loss": 1.1590602387964726, + "train_runtime": 91564.1404, + "train_samples_per_second": 52.422, + "train_steps_per_second": 1.092 + } + ], + "logging_steps": 20, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 607, + "save_steps": 1000, + "total_flos": 1.0802570660564705e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}