{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1500.0, "global_step": 27000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 27.78, "learning_rate": 9.814814814814815e-06, "loss": 3.2486, "step": 500 }, { "epoch": 55.56, "learning_rate": 9.62962962962963e-06, "loss": 1.3764, "step": 1000 }, { "epoch": 83.33, "learning_rate": 9.444444444444445e-06, "loss": 0.5771, "step": 1500 }, { "epoch": 111.11, "learning_rate": 9.25925925925926e-06, "loss": 0.2647, "step": 2000 }, { "epoch": 138.89, "learning_rate": 9.074074074074075e-06, "loss": 0.1349, "step": 2500 }, { "epoch": 166.67, "learning_rate": 8.888888888888888e-06, "loss": 0.0852, "step": 3000 }, { "epoch": 194.44, "learning_rate": 8.703703703703705e-06, "loss": 0.0669, "step": 3500 }, { "epoch": 222.22, "learning_rate": 8.518518518518519e-06, "loss": 0.0575, "step": 4000 }, { "epoch": 250.0, "learning_rate": 8.333333333333334e-06, "loss": 0.0514, "step": 4500 }, { "epoch": 277.78, "learning_rate": 8.148148148148148e-06, "loss": 0.0477, "step": 5000 }, { "epoch": 305.56, "learning_rate": 7.962962962962963e-06, "loss": 0.0446, "step": 5500 }, { "epoch": 333.33, "learning_rate": 7.77777777777778e-06, "loss": 0.0428, "step": 6000 }, { "epoch": 361.11, "learning_rate": 7.592592592592594e-06, "loss": 0.0412, "step": 6500 }, { "epoch": 388.89, "learning_rate": 7.4074074074074075e-06, "loss": 0.0401, "step": 7000 }, { "epoch": 416.67, "learning_rate": 7.222222222222223e-06, "loss": 0.0393, "step": 7500 }, { "epoch": 444.44, "learning_rate": 7.0370370370370375e-06, "loss": 0.0384, "step": 8000 }, { "epoch": 472.22, "learning_rate": 6.851851851851853e-06, "loss": 0.0376, "step": 8500 }, { "epoch": 500.0, "learning_rate": 6.666666666666667e-06, "loss": 0.037, "step": 9000 }, { "epoch": 527.78, "learning_rate": 6.481481481481482e-06, "loss": 0.0366, "step": 9500 }, { "epoch": 555.56, "learning_rate": 6.296296296296297e-06, "loss": 0.0362, "step": 10000 }, { "epoch": 583.33, "learning_rate": 6.111111111111112e-06, "loss": 0.0356, "step": 10500 }, { "epoch": 611.11, "learning_rate": 5.925925925925926e-06, "loss": 0.0353, "step": 11000 }, { "epoch": 638.89, "learning_rate": 5.740740740740741e-06, "loss": 0.0348, "step": 11500 }, { "epoch": 666.67, "learning_rate": 5.555555555555557e-06, "loss": 0.0346, "step": 12000 }, { "epoch": 694.44, "learning_rate": 5.370370370370371e-06, "loss": 0.0343, "step": 12500 }, { "epoch": 722.22, "learning_rate": 5.185185185185185e-06, "loss": 0.0339, "step": 13000 }, { "epoch": 750.0, "learning_rate": 5e-06, "loss": 0.0338, "step": 13500 }, { "epoch": 777.78, "learning_rate": 4.814814814814815e-06, "loss": 0.0335, "step": 14000 }, { "epoch": 805.56, "learning_rate": 4.62962962962963e-06, "loss": 0.0335, "step": 14500 }, { "epoch": 833.33, "learning_rate": 4.444444444444444e-06, "loss": 0.0331, "step": 15000 }, { "epoch": 861.11, "learning_rate": 4.2592592592592596e-06, "loss": 0.0329, "step": 15500 }, { "epoch": 888.89, "learning_rate": 4.074074074074074e-06, "loss": 0.0327, "step": 16000 }, { "epoch": 916.67, "learning_rate": 3.88888888888889e-06, "loss": 0.0325, "step": 16500 }, { "epoch": 944.44, "learning_rate": 3.7037037037037037e-06, "loss": 0.0322, "step": 17000 }, { "epoch": 972.22, "learning_rate": 3.5185185185185187e-06, "loss": 0.0323, "step": 17500 }, { "epoch": 1000.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.032, "step": 18000 }, { "epoch": 1027.78, "learning_rate": 3.1481481481481483e-06, "loss": 0.0319, "step": 18500 }, { "epoch": 1055.56, "learning_rate": 2.962962962962963e-06, "loss": 0.0316, "step": 19000 }, { "epoch": 1083.33, "learning_rate": 2.7777777777777783e-06, "loss": 0.0317, "step": 19500 }, { "epoch": 1111.11, "learning_rate": 2.5925925925925925e-06, "loss": 0.0315, "step": 20000 }, { "epoch": 1138.89, "learning_rate": 2.4074074074074075e-06, "loss": 0.0313, "step": 20500 }, { "epoch": 1166.67, "learning_rate": 2.222222222222222e-06, "loss": 0.0312, "step": 21000 }, { "epoch": 1194.44, "learning_rate": 2.037037037037037e-06, "loss": 0.0312, "step": 21500 }, { "epoch": 1222.22, "learning_rate": 1.8518518518518519e-06, "loss": 0.031, "step": 22000 }, { "epoch": 1250.0, "learning_rate": 1.6666666666666667e-06, "loss": 0.031, "step": 22500 }, { "epoch": 1277.78, "learning_rate": 1.4814814814814815e-06, "loss": 0.031, "step": 23000 }, { "epoch": 1305.56, "learning_rate": 1.2962962962962962e-06, "loss": 0.0308, "step": 23500 }, { "epoch": 1333.33, "learning_rate": 1.111111111111111e-06, "loss": 0.0306, "step": 24000 }, { "epoch": 1361.11, "learning_rate": 9.259259259259259e-07, "loss": 0.0307, "step": 24500 }, { "epoch": 1388.89, "learning_rate": 7.407407407407407e-07, "loss": 0.0306, "step": 25000 }, { "epoch": 1416.67, "learning_rate": 5.555555555555555e-07, "loss": 0.0306, "step": 25500 }, { "epoch": 1444.44, "learning_rate": 3.7037037037037036e-07, "loss": 0.0305, "step": 26000 }, { "epoch": 1472.22, "learning_rate": 1.8518518518518518e-07, "loss": 0.0305, "step": 26500 }, { "epoch": 1500.0, "learning_rate": 0.0, "loss": 0.0305, "step": 27000 }, { "epoch": 1500.0, "step": 27000, "total_flos": 1.8317092847616e+16, "train_runtime": 3225.4397, "train_samples_per_second": 8.371 } ], "max_steps": 27000, "num_train_epochs": 1500, "total_flos": 1.8317092847616e+16, "trial_name": null, "trial_params": null }