{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48, "learning_rate": 1.980952380952381e-05, "loss": 1.4241, "step": 20 }, { "epoch": 0.95, "learning_rate": 1.961904761904762e-05, "loss": 1.3539, "step": 40 }, { "epoch": 1.43, "learning_rate": 1.942857142857143e-05, "loss": 0.8083, "step": 60 }, { "epoch": 1.9, "learning_rate": 1.923809523809524e-05, "loss": 0.7274, "step": 80 }, { "epoch": 2.38, "learning_rate": 1.904761904761905e-05, "loss": 0.3911, "step": 100 }, { "epoch": 2.86, "learning_rate": 1.885714285714286e-05, "loss": 0.2785, "step": 120 }, { "epoch": 3.33, "learning_rate": 1.866666666666667e-05, "loss": 0.1367, "step": 140 }, { "epoch": 3.81, "learning_rate": 1.8476190476190478e-05, "loss": 0.1004, "step": 160 }, { "epoch": 4.29, "learning_rate": 1.8285714285714288e-05, "loss": 0.0718, "step": 180 }, { "epoch": 4.76, "learning_rate": 1.8095238095238097e-05, "loss": 0.0588, "step": 200 }, { "epoch": 5.24, "learning_rate": 1.7904761904761907e-05, "loss": 0.0456, "step": 220 }, { "epoch": 5.71, "learning_rate": 1.7714285714285717e-05, "loss": 0.0378, "step": 240 }, { "epoch": 6.19, "learning_rate": 1.7523809523809526e-05, "loss": 0.0297, "step": 260 }, { "epoch": 6.67, "learning_rate": 1.7333333333333336e-05, "loss": 0.0232, "step": 280 }, { "epoch": 7.14, "learning_rate": 1.7142857142857142e-05, "loss": 0.0231, "step": 300 }, { "epoch": 7.62, "learning_rate": 1.6952380952380955e-05, "loss": 0.016, "step": 320 }, { "epoch": 8.1, "learning_rate": 1.6761904761904764e-05, "loss": 0.016, "step": 340 }, { "epoch": 8.57, "learning_rate": 1.6571428571428574e-05, "loss": 0.0109, "step": 360 }, { "epoch": 9.05, "learning_rate": 1.6380952380952384e-05, "loss": 0.0114, "step": 380 }, { "epoch": 9.52, "learning_rate": 1.6190476190476193e-05, "loss": 0.0078, "step": 400 }, { "epoch": 10.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.0103, "step": 420 }, { "epoch": 10.48, "learning_rate": 1.580952380952381e-05, "loss": 0.0061, "step": 440 }, { "epoch": 10.95, "learning_rate": 1.5619047619047622e-05, "loss": 0.0085, "step": 460 }, { "epoch": 11.43, "learning_rate": 1.542857142857143e-05, "loss": 0.005, "step": 480 }, { "epoch": 11.9, "learning_rate": 1.523809523809524e-05, "loss": 0.0093, "step": 500 }, { "epoch": 12.38, "learning_rate": 1.5047619047619049e-05, "loss": 0.0066, "step": 520 }, { "epoch": 12.86, "learning_rate": 1.4857142857142858e-05, "loss": 0.0061, "step": 540 }, { "epoch": 13.33, "learning_rate": 1.4666666666666666e-05, "loss": 0.0048, "step": 560 }, { "epoch": 13.81, "learning_rate": 1.4476190476190478e-05, "loss": 0.0051, "step": 580 }, { "epoch": 14.29, "learning_rate": 1.4285714285714287e-05, "loss": 0.0036, "step": 600 }, { "epoch": 14.76, "learning_rate": 1.4095238095238097e-05, "loss": 0.0053, "step": 620 }, { "epoch": 15.24, "learning_rate": 1.3904761904761905e-05, "loss": 0.0052, "step": 640 }, { "epoch": 15.71, "learning_rate": 1.3714285714285716e-05, "loss": 0.0054, "step": 660 }, { "epoch": 16.19, "learning_rate": 1.3523809523809525e-05, "loss": 0.0049, "step": 680 }, { "epoch": 16.67, "learning_rate": 1.3333333333333333e-05, "loss": 0.0055, "step": 700 }, { "epoch": 17.14, "learning_rate": 1.3142857142857145e-05, "loss": 0.0046, "step": 720 }, { "epoch": 17.62, "learning_rate": 1.2952380952380954e-05, "loss": 0.004, "step": 740 }, { "epoch": 18.1, "learning_rate": 1.2761904761904762e-05, "loss": 0.0047, "step": 760 }, { "epoch": 18.57, "learning_rate": 1.2571428571428572e-05, "loss": 0.0047, "step": 780 }, { "epoch": 19.05, "learning_rate": 1.2380952380952383e-05, "loss": 0.0047, "step": 800 }, { "epoch": 19.52, "learning_rate": 1.2190476190476192e-05, "loss": 0.0038, "step": 820 }, { "epoch": 20.0, "learning_rate": 1.2e-05, "loss": 0.0046, "step": 840 }, { "epoch": 20.48, "learning_rate": 1.180952380952381e-05, "loss": 0.0034, "step": 860 }, { "epoch": 20.95, "learning_rate": 1.1619047619047621e-05, "loss": 0.004, "step": 880 }, { "epoch": 21.43, "learning_rate": 1.1428571428571429e-05, "loss": 0.0032, "step": 900 }, { "epoch": 21.9, "learning_rate": 1.1238095238095239e-05, "loss": 0.0039, "step": 920 }, { "epoch": 22.38, "learning_rate": 1.104761904761905e-05, "loss": 0.0033, "step": 940 }, { "epoch": 22.86, "learning_rate": 1.0857142857142858e-05, "loss": 0.0032, "step": 960 }, { "epoch": 23.33, "learning_rate": 1.0666666666666667e-05, "loss": 0.0033, "step": 980 }, { "epoch": 23.81, "learning_rate": 1.0476190476190477e-05, "loss": 0.003, "step": 1000 }, { "epoch": 24.29, "learning_rate": 1.0285714285714285e-05, "loss": 0.003, "step": 1020 }, { "epoch": 24.76, "learning_rate": 1.0095238095238096e-05, "loss": 0.003, "step": 1040 }, { "epoch": 25.24, "learning_rate": 9.904761904761906e-06, "loss": 0.0034, "step": 1060 }, { "epoch": 25.71, "learning_rate": 9.714285714285715e-06, "loss": 0.0029, "step": 1080 }, { "epoch": 26.19, "learning_rate": 9.523809523809525e-06, "loss": 0.0031, "step": 1100 }, { "epoch": 26.67, "learning_rate": 9.333333333333334e-06, "loss": 0.0032, "step": 1120 }, { "epoch": 27.14, "learning_rate": 9.142857142857144e-06, "loss": 0.0032, "step": 1140 }, { "epoch": 27.62, "learning_rate": 8.952380952380953e-06, "loss": 0.003, "step": 1160 }, { "epoch": 28.1, "learning_rate": 8.761904761904763e-06, "loss": 0.0031, "step": 1180 }, { "epoch": 28.57, "learning_rate": 8.571428571428571e-06, "loss": 0.0026, "step": 1200 }, { "epoch": 29.05, "learning_rate": 8.380952380952382e-06, "loss": 0.0032, "step": 1220 }, { "epoch": 29.52, "learning_rate": 8.190476190476192e-06, "loss": 0.0026, "step": 1240 }, { "epoch": 30.0, "learning_rate": 8.000000000000001e-06, "loss": 0.004, "step": 1260 }, { "epoch": 30.48, "learning_rate": 7.809523809523811e-06, "loss": 0.0031, "step": 1280 }, { "epoch": 30.95, "learning_rate": 7.61904761904762e-06, "loss": 0.0034, "step": 1300 }, { "epoch": 31.43, "learning_rate": 7.428571428571429e-06, "loss": 0.0032, "step": 1320 }, { "epoch": 31.9, "learning_rate": 7.238095238095239e-06, "loss": 0.0031, "step": 1340 }, { "epoch": 32.38, "learning_rate": 7.047619047619048e-06, "loss": 0.0029, "step": 1360 }, { "epoch": 32.86, "learning_rate": 6.857142857142858e-06, "loss": 0.0031, "step": 1380 }, { "epoch": 33.33, "learning_rate": 6.666666666666667e-06, "loss": 0.0033, "step": 1400 }, { "epoch": 33.81, "learning_rate": 6.476190476190477e-06, "loss": 0.0031, "step": 1420 }, { "epoch": 34.29, "learning_rate": 6.285714285714286e-06, "loss": 0.0027, "step": 1440 }, { "epoch": 34.76, "learning_rate": 6.095238095238096e-06, "loss": 0.0028, "step": 1460 }, { "epoch": 35.24, "learning_rate": 5.904761904761905e-06, "loss": 0.0027, "step": 1480 }, { "epoch": 35.71, "learning_rate": 5.7142857142857145e-06, "loss": 0.0029, "step": 1500 }, { "epoch": 36.19, "learning_rate": 5.523809523809525e-06, "loss": 0.0026, "step": 1520 }, { "epoch": 36.67, "learning_rate": 5.333333333333334e-06, "loss": 0.0024, "step": 1540 }, { "epoch": 37.14, "learning_rate": 5.142857142857142e-06, "loss": 0.0029, "step": 1560 }, { "epoch": 37.62, "learning_rate": 4.952380952380953e-06, "loss": 0.0023, "step": 1580 }, { "epoch": 38.1, "learning_rate": 4.761904761904762e-06, "loss": 0.0029, "step": 1600 }, { "epoch": 38.57, "learning_rate": 4.571428571428572e-06, "loss": 0.0023, "step": 1620 }, { "epoch": 39.05, "learning_rate": 4.3809523809523815e-06, "loss": 0.0027, "step": 1640 }, { "epoch": 39.52, "learning_rate": 4.190476190476191e-06, "loss": 0.0022, "step": 1660 }, { "epoch": 40.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0028, "step": 1680 }, { "epoch": 40.48, "learning_rate": 3.80952380952381e-06, "loss": 0.0024, "step": 1700 }, { "epoch": 40.95, "learning_rate": 3.6190476190476194e-06, "loss": 0.0025, "step": 1720 }, { "epoch": 41.43, "learning_rate": 3.428571428571429e-06, "loss": 0.0022, "step": 1740 }, { "epoch": 41.9, "learning_rate": 3.2380952380952385e-06, "loss": 0.0026, "step": 1760 }, { "epoch": 42.38, "learning_rate": 3.047619047619048e-06, "loss": 0.0021, "step": 1780 }, { "epoch": 42.86, "learning_rate": 2.8571428571428573e-06, "loss": 0.0027, "step": 1800 }, { "epoch": 43.33, "learning_rate": 2.666666666666667e-06, "loss": 0.0023, "step": 1820 }, { "epoch": 43.81, "learning_rate": 2.4761904761904764e-06, "loss": 0.0025, "step": 1840 }, { "epoch": 44.29, "learning_rate": 2.285714285714286e-06, "loss": 0.0022, "step": 1860 }, { "epoch": 44.76, "learning_rate": 2.0952380952380955e-06, "loss": 0.0024, "step": 1880 }, { "epoch": 45.24, "learning_rate": 1.904761904761905e-06, "loss": 0.0023, "step": 1900 }, { "epoch": 45.71, "learning_rate": 1.7142857142857145e-06, "loss": 0.0023, "step": 1920 }, { "epoch": 46.19, "learning_rate": 1.523809523809524e-06, "loss": 0.0022, "step": 1940 }, { "epoch": 46.67, "learning_rate": 1.3333333333333334e-06, "loss": 0.0024, "step": 1960 }, { "epoch": 47.14, "learning_rate": 1.142857142857143e-06, "loss": 0.002, "step": 1980 }, { "epoch": 47.62, "learning_rate": 9.523809523809525e-07, "loss": 0.0021, "step": 2000 }, { "epoch": 48.1, "learning_rate": 7.61904761904762e-07, "loss": 0.0023, "step": 2020 }, { "epoch": 48.57, "learning_rate": 5.714285714285715e-07, "loss": 0.0022, "step": 2040 }, { "epoch": 49.05, "learning_rate": 3.80952380952381e-07, "loss": 0.0021, "step": 2060 }, { "epoch": 49.52, "learning_rate": 1.904761904761905e-07, "loss": 0.0021, "step": 2080 }, { "epoch": 50.0, "learning_rate": 0.0, "loss": 0.0021, "step": 2100 }, { "epoch": 50.0, "step": 2100, "total_flos": 13733996789760.0, "train_loss": 0.0559101588953109, "train_runtime": 46787.7757, "train_samples_per_second": 0.357, "train_steps_per_second": 0.045 } ], "max_steps": 2100, "num_train_epochs": 50, "total_flos": 13733996789760.0, "trial_name": null, "trial_params": null }