{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.5, "learning_rate": 1e-05, "loss": 2.2814, "step": 10 }, { "epoch": 5.0, "learning_rate": 2e-05, "loss": 2.2162, "step": 20 }, { "epoch": 7.5, "learning_rate": 3e-05, "loss": 2.06, "step": 30 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 1.7632, "step": 40 }, { "epoch": 12.5, "learning_rate": 5e-05, "loss": 1.3906, "step": 50 }, { "epoch": 15.0, "learning_rate": 6e-05, "loss": 1.0958, "step": 60 }, { "epoch": 17.5, "learning_rate": 7e-05, "loss": 0.8168, "step": 70 }, { "epoch": 20.0, "learning_rate": 8e-05, "loss": 0.6212, "step": 80 }, { "epoch": 22.5, "learning_rate": 9e-05, "loss": 0.5377, "step": 90 }, { "epoch": 25.0, "learning_rate": 0.0001, "loss": 0.4409, "step": 100 }, { "epoch": 27.5, "learning_rate": 9.972609476841367e-05, "loss": 0.4009, "step": 110 }, { "epoch": 30.0, "learning_rate": 9.890738003669029e-05, "loss": 0.3598, "step": 120 }, { "epoch": 32.5, "learning_rate": 9.755282581475769e-05, "loss": 0.2784, "step": 130 }, { "epoch": 35.0, "learning_rate": 9.567727288213005e-05, "loss": 0.3299, "step": 140 }, { "epoch": 37.5, "learning_rate": 9.330127018922194e-05, "loss": 0.3236, "step": 150 }, { "epoch": 40.0, "learning_rate": 9.045084971874738e-05, "loss": 0.2714, "step": 160 }, { "epoch": 42.5, "learning_rate": 8.715724127386972e-05, "loss": 0.2654, "step": 170 }, { "epoch": 45.0, "learning_rate": 8.345653031794292e-05, "loss": 0.254, "step": 180 }, { "epoch": 47.5, "learning_rate": 7.938926261462366e-05, "loss": 0.2223, "step": 190 }, { "epoch": 50.0, "learning_rate": 7.500000000000001e-05, "loss": 0.2671, "step": 200 }, { "epoch": 52.5, "learning_rate": 7.033683215379002e-05, "loss": 0.2293, "step": 210 }, { "epoch": 55.0, "learning_rate": 6.545084971874738e-05, "loss": 0.2235, "step": 220 }, { "epoch": 57.5, "learning_rate": 6.0395584540887963e-05, "loss": 0.2031, "step": 230 }, { "epoch": 60.0, "learning_rate": 5.522642316338268e-05, "loss": 0.2034, "step": 240 }, { "epoch": 62.5, "learning_rate": 5e-05, "loss": 0.189, "step": 250 }, { "epoch": 65.0, "learning_rate": 4.477357683661734e-05, "loss": 0.1788, "step": 260 }, { "epoch": 67.5, "learning_rate": 3.960441545911204e-05, "loss": 0.1698, "step": 270 }, { "epoch": 70.0, "learning_rate": 3.4549150281252636e-05, "loss": 0.2309, "step": 280 }, { "epoch": 72.5, "learning_rate": 2.9663167846209998e-05, "loss": 0.1637, "step": 290 }, { "epoch": 75.0, "learning_rate": 2.500000000000001e-05, "loss": 0.1669, "step": 300 }, { "epoch": 77.5, "learning_rate": 2.061073738537635e-05, "loss": 0.1786, "step": 310 }, { "epoch": 80.0, "learning_rate": 1.6543469682057106e-05, "loss": 0.1792, "step": 320 }, { "epoch": 82.5, "learning_rate": 1.2842758726130283e-05, "loss": 0.1874, "step": 330 }, { "epoch": 85.0, "learning_rate": 9.549150281252633e-06, "loss": 0.1805, "step": 340 }, { "epoch": 87.5, "learning_rate": 6.698729810778065e-06, "loss": 0.196, "step": 350 }, { "epoch": 90.0, "learning_rate": 4.322727117869951e-06, "loss": 0.1615, "step": 360 }, { "epoch": 92.5, "learning_rate": 2.4471741852423237e-06, "loss": 0.1481, "step": 370 }, { "epoch": 95.0, "learning_rate": 1.0926199633097157e-06, "loss": 0.1633, "step": 380 }, { "epoch": 97.5, "learning_rate": 2.7390523158633554e-07, "loss": 0.1673, "step": 390 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.1748, "step": 400 }, { "epoch": 100.0, "step": 400, "total_flos": 1.3814551337974377e+23, "train_loss": 0.4972930908203125, "train_runtime": 820.6819, "train_samples_per_second": 4242.326, "train_steps_per_second": 0.487 } ], "max_steps": 400, "num_train_epochs": 100, "start_time": 1656513160.5917685, "total_flos": 1.3814551337974377e+23, "trial_name": null, "trial_params": null }