{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9943289224952743, "global_step": 1188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 5e-06, "loss": 1.6488, "step": 10 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 1.6585, "step": 20 }, { "epoch": 0.08, "learning_rate": 1.5e-05, "loss": 1.6373, "step": 30 }, { "epoch": 0.1, "learning_rate": 2e-05, "loss": 1.5933, "step": 40 }, { "epoch": 0.13, "learning_rate": 2.5e-05, "loss": 1.4982, "step": 50 }, { "epoch": 0.15, "learning_rate": 3e-05, "loss": 1.3297, "step": 60 }, { "epoch": 0.18, "learning_rate": 3.5e-05, "loss": 1.1515, "step": 70 }, { "epoch": 0.2, "learning_rate": 4e-05, "loss": 0.9944, "step": 80 }, { "epoch": 0.23, "learning_rate": 4.5e-05, "loss": 0.8887, "step": 90 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 0.8275, "step": 100 }, { "epoch": 0.28, "learning_rate": 4.954044117647059e-05, "loss": 0.8152, "step": 110 }, { "epoch": 0.3, "learning_rate": 4.908088235294118e-05, "loss": 0.7991, "step": 120 }, { "epoch": 0.33, "learning_rate": 4.8621323529411765e-05, "loss": 0.7932, "step": 130 }, { "epoch": 0.35, "learning_rate": 4.816176470588236e-05, "loss": 0.7928, "step": 140 }, { "epoch": 0.38, "learning_rate": 4.7702205882352946e-05, "loss": 0.7968, "step": 150 }, { "epoch": 0.4, "learning_rate": 4.7242647058823534e-05, "loss": 0.7744, "step": 160 }, { "epoch": 0.43, "learning_rate": 4.678308823529412e-05, "loss": 0.7791, "step": 170 }, { "epoch": 0.45, "learning_rate": 4.632352941176471e-05, "loss": 0.7736, "step": 180 }, { "epoch": 0.48, "learning_rate": 4.5863970588235296e-05, "loss": 0.766, "step": 190 }, { "epoch": 0.5, "learning_rate": 4.5404411764705883e-05, "loss": 0.7717, "step": 200 }, { "epoch": 0.53, "learning_rate": 4.494485294117647e-05, "loss": 0.7638, "step": 210 }, { "epoch": 0.55, "learning_rate": 4.448529411764706e-05, "loss": 0.768, "step": 220 }, { "epoch": 0.58, "learning_rate": 4.4025735294117646e-05, "loss": 0.7573, "step": 230 }, { "epoch": 0.6, "learning_rate": 4.356617647058824e-05, "loss": 0.7633, "step": 240 }, { "epoch": 0.63, "learning_rate": 4.310661764705883e-05, "loss": 0.7562, "step": 250 }, { "epoch": 0.66, "learning_rate": 4.2647058823529415e-05, "loss": 0.7576, "step": 260 }, { "epoch": 0.68, "learning_rate": 4.21875e-05, "loss": 0.7516, "step": 270 }, { "epoch": 0.71, "learning_rate": 4.172794117647059e-05, "loss": 0.7501, "step": 280 }, { "epoch": 0.73, "learning_rate": 4.126838235294118e-05, "loss": 0.7601, "step": 290 }, { "epoch": 0.76, "learning_rate": 4.0808823529411765e-05, "loss": 0.7502, "step": 300 }, { "epoch": 0.78, "learning_rate": 4.034926470588236e-05, "loss": 0.7583, "step": 310 }, { "epoch": 0.81, "learning_rate": 3.9889705882352946e-05, "loss": 0.756, "step": 320 }, { "epoch": 0.83, "learning_rate": 3.943014705882353e-05, "loss": 0.7511, "step": 330 }, { "epoch": 0.86, "learning_rate": 3.897058823529412e-05, "loss": 0.7434, "step": 340 }, { "epoch": 0.88, "learning_rate": 3.851102941176471e-05, "loss": 0.7586, "step": 350 }, { "epoch": 0.91, "learning_rate": 3.8051470588235296e-05, "loss": 0.7486, "step": 360 }, { "epoch": 0.93, "learning_rate": 3.759191176470588e-05, "loss": 0.7499, "step": 370 }, { "epoch": 0.96, "learning_rate": 3.713235294117647e-05, "loss": 0.7516, "step": 380 }, { "epoch": 0.98, "learning_rate": 3.667279411764706e-05, "loss": 0.7451, "step": 390 }, { "epoch": 1.01, "learning_rate": 3.6213235294117646e-05, "loss": 0.7491, "step": 400 }, { "epoch": 1.03, "learning_rate": 3.575367647058824e-05, "loss": 0.7484, "step": 410 }, { "epoch": 1.06, "learning_rate": 3.529411764705883e-05, "loss": 0.7423, "step": 420 }, { "epoch": 1.08, "learning_rate": 3.4834558823529415e-05, "loss": 0.7458, "step": 430 }, { "epoch": 1.11, "learning_rate": 3.4375e-05, "loss": 0.7428, "step": 440 }, { "epoch": 1.13, "learning_rate": 3.391544117647059e-05, "loss": 0.7423, "step": 450 }, { "epoch": 1.16, "learning_rate": 3.345588235294118e-05, "loss": 0.7396, "step": 460 }, { "epoch": 1.18, "learning_rate": 3.2996323529411764e-05, "loss": 0.7426, "step": 470 }, { "epoch": 1.21, "learning_rate": 3.253676470588236e-05, "loss": 0.7421, "step": 480 }, { "epoch": 1.24, "learning_rate": 3.2077205882352946e-05, "loss": 0.7377, "step": 490 }, { "epoch": 1.26, "learning_rate": 3.161764705882353e-05, "loss": 0.7358, "step": 500 }, { "epoch": 1.29, "learning_rate": 3.115808823529412e-05, "loss": 0.7346, "step": 510 }, { "epoch": 1.31, "learning_rate": 3.069852941176471e-05, "loss": 0.7335, "step": 520 }, { "epoch": 1.34, "learning_rate": 3.0238970588235292e-05, "loss": 0.7242, "step": 530 }, { "epoch": 1.36, "learning_rate": 2.9779411764705883e-05, "loss": 0.7295, "step": 540 }, { "epoch": 1.39, "learning_rate": 2.9319852941176474e-05, "loss": 0.7294, "step": 550 }, { "epoch": 1.41, "learning_rate": 2.8860294117647058e-05, "loss": 0.7292, "step": 560 }, { "epoch": 1.44, "learning_rate": 2.840073529411765e-05, "loss": 0.7238, "step": 570 }, { "epoch": 1.46, "learning_rate": 2.7941176470588236e-05, "loss": 0.7383, "step": 580 }, { "epoch": 1.49, "learning_rate": 2.7481617647058827e-05, "loss": 0.7363, "step": 590 }, { "epoch": 1.51, "learning_rate": 2.702205882352941e-05, "loss": 0.7269, "step": 600 }, { "epoch": 1.54, "learning_rate": 2.6562500000000002e-05, "loss": 0.7408, "step": 610 }, { "epoch": 1.56, "learning_rate": 2.6102941176470593e-05, "loss": 0.7341, "step": 620 }, { "epoch": 1.59, "learning_rate": 2.5643382352941177e-05, "loss": 0.7372, "step": 630 }, { "epoch": 1.61, "learning_rate": 2.5183823529411764e-05, "loss": 0.7333, "step": 640 }, { "epoch": 1.64, "learning_rate": 2.4724264705882355e-05, "loss": 0.7261, "step": 650 }, { "epoch": 1.66, "learning_rate": 2.4264705882352942e-05, "loss": 0.7305, "step": 660 }, { "epoch": 1.69, "learning_rate": 2.380514705882353e-05, "loss": 0.7339, "step": 670 }, { "epoch": 1.71, "learning_rate": 2.334558823529412e-05, "loss": 0.7351, "step": 680 }, { "epoch": 1.74, "learning_rate": 2.2886029411764705e-05, "loss": 0.7307, "step": 690 }, { "epoch": 1.76, "learning_rate": 2.2426470588235296e-05, "loss": 0.7354, "step": 700 }, { "epoch": 1.79, "learning_rate": 2.1966911764705883e-05, "loss": 0.7254, "step": 710 }, { "epoch": 1.81, "learning_rate": 2.1507352941176474e-05, "loss": 0.7342, "step": 720 }, { "epoch": 1.84, "learning_rate": 2.104779411764706e-05, "loss": 0.7236, "step": 730 }, { "epoch": 1.87, "learning_rate": 2.058823529411765e-05, "loss": 0.7256, "step": 740 }, { "epoch": 1.89, "learning_rate": 2.0128676470588236e-05, "loss": 0.7299, "step": 750 }, { "epoch": 1.92, "learning_rate": 1.9669117647058824e-05, "loss": 0.7185, "step": 760 }, { "epoch": 1.94, "learning_rate": 1.9209558823529414e-05, "loss": 0.7333, "step": 770 }, { "epoch": 1.97, "learning_rate": 1.8750000000000002e-05, "loss": 0.7229, "step": 780 }, { "epoch": 1.99, "learning_rate": 1.829044117647059e-05, "loss": 0.7239, "step": 790 }, { "epoch": 2.02, "learning_rate": 1.7830882352941177e-05, "loss": 0.7271, "step": 800 }, { "epoch": 2.04, "learning_rate": 1.7371323529411764e-05, "loss": 0.7201, "step": 810 }, { "epoch": 2.07, "learning_rate": 1.6911764705882355e-05, "loss": 0.7228, "step": 820 }, { "epoch": 2.09, "learning_rate": 1.6452205882352942e-05, "loss": 0.7337, "step": 830 }, { "epoch": 2.12, "learning_rate": 1.599264705882353e-05, "loss": 0.7279, "step": 840 }, { "epoch": 2.14, "learning_rate": 1.5533088235294117e-05, "loss": 0.7283, "step": 850 }, { "epoch": 2.17, "learning_rate": 1.5073529411764706e-05, "loss": 0.714, "step": 860 }, { "epoch": 2.19, "learning_rate": 1.4613970588235295e-05, "loss": 0.7185, "step": 870 }, { "epoch": 2.22, "learning_rate": 1.4154411764705883e-05, "loss": 0.7216, "step": 880 }, { "epoch": 2.24, "learning_rate": 1.3694852941176472e-05, "loss": 0.7239, "step": 890 }, { "epoch": 2.27, "learning_rate": 1.323529411764706e-05, "loss": 0.7309, "step": 900 }, { "epoch": 2.29, "learning_rate": 1.2775735294117647e-05, "loss": 0.727, "step": 910 }, { "epoch": 2.32, "learning_rate": 1.2316176470588236e-05, "loss": 0.7165, "step": 920 }, { "epoch": 2.34, "learning_rate": 1.1856617647058823e-05, "loss": 0.723, "step": 930 }, { "epoch": 2.37, "learning_rate": 1.1397058823529412e-05, "loss": 0.7166, "step": 940 }, { "epoch": 2.39, "learning_rate": 1.09375e-05, "loss": 0.7178, "step": 950 }, { "epoch": 2.42, "learning_rate": 1.0477941176470589e-05, "loss": 0.7094, "step": 960 }, { "epoch": 2.44, "learning_rate": 1.0018382352941178e-05, "loss": 0.7229, "step": 970 }, { "epoch": 2.47, "learning_rate": 9.558823529411764e-06, "loss": 0.7116, "step": 980 }, { "epoch": 2.5, "learning_rate": 9.099264705882353e-06, "loss": 0.7187, "step": 990 }, { "epoch": 2.52, "learning_rate": 8.639705882352942e-06, "loss": 0.7103, "step": 1000 }, { "epoch": 2.55, "learning_rate": 8.18014705882353e-06, "loss": 0.7241, "step": 1010 }, { "epoch": 2.57, "learning_rate": 7.720588235294119e-06, "loss": 0.7336, "step": 1020 }, { "epoch": 2.6, "learning_rate": 7.261029411764707e-06, "loss": 0.7168, "step": 1030 }, { "epoch": 2.62, "learning_rate": 6.8014705882352935e-06, "loss": 0.7242, "step": 1040 }, { "epoch": 2.65, "learning_rate": 6.341911764705883e-06, "loss": 0.7199, "step": 1050 }, { "epoch": 2.67, "learning_rate": 5.882352941176471e-06, "loss": 0.725, "step": 1060 }, { "epoch": 2.7, "learning_rate": 5.422794117647059e-06, "loss": 0.7252, "step": 1070 }, { "epoch": 2.72, "learning_rate": 4.963235294117647e-06, "loss": 0.7183, "step": 1080 }, { "epoch": 2.75, "learning_rate": 4.503676470588236e-06, "loss": 0.7172, "step": 1090 }, { "epoch": 2.77, "learning_rate": 4.044117647058824e-06, "loss": 0.7195, "step": 1100 }, { "epoch": 2.8, "learning_rate": 3.584558823529412e-06, "loss": 0.7155, "step": 1110 }, { "epoch": 2.82, "learning_rate": 3.125e-06, "loss": 0.7209, "step": 1120 }, { "epoch": 2.85, "learning_rate": 2.6654411764705884e-06, "loss": 0.7112, "step": 1130 }, { "epoch": 2.87, "learning_rate": 2.2058823529411767e-06, "loss": 0.7105, "step": 1140 }, { "epoch": 2.9, "learning_rate": 1.7463235294117648e-06, "loss": 0.7217, "step": 1150 }, { "epoch": 2.92, "learning_rate": 1.286764705882353e-06, "loss": 0.7183, "step": 1160 }, { "epoch": 2.95, "learning_rate": 8.272058823529412e-07, "loss": 0.7143, "step": 1170 }, { "epoch": 2.97, "learning_rate": 3.6764705882352943e-07, "loss": 0.7126, "step": 1180 } ], "max_steps": 1188, "num_train_epochs": 3, "total_flos": 6.076984402892554e+19, "trial_name": null, "trial_params": null }