| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.824392126592049, |
| "global_step": 50000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 5.3189, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 5.3266, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 5.3215, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 5.3156, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 5.3094, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 5.294, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5e-05, |
| "loss": 5.2852, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.883720930232558e-05, |
| "loss": 5.2715, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.7674418604651164e-05, |
| "loss": 5.2477, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.651162790697675e-05, |
| "loss": 5.2361, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 4.5348837209302326e-05, |
| "loss": 5.2234, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 4.418604651162791e-05, |
| "loss": 5.2037, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 4.302325581395349e-05, |
| "loss": 5.1853, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.186046511627907e-05, |
| "loss": 5.1751, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 4.0697674418604655e-05, |
| "loss": 5.1663, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 3.953488372093023e-05, |
| "loss": 5.1615, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 3.837209302325582e-05, |
| "loss": 5.1547, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 3.7209302325581394e-05, |
| "loss": 5.1431, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 3.604651162790698e-05, |
| "loss": 5.1293, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 3.488372093023256e-05, |
| "loss": 5.1278, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 3.372093023255814e-05, |
| "loss": 5.1174, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 3.2558139534883724e-05, |
| "loss": 5.1033, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.13953488372093e-05, |
| "loss": 5.1019, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 3.0232558139534883e-05, |
| "loss": 5.096, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 2.9069767441860467e-05, |
| "loss": 5.0878, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 2.7906976744186048e-05, |
| "loss": 5.0847, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2.674418604651163e-05, |
| "loss": 5.0731, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 2.5581395348837212e-05, |
| "loss": 5.0672, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.441860465116279e-05, |
| "loss": 5.0717, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 2.3255813953488374e-05, |
| "loss": 5.0631, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2.2093023255813955e-05, |
| "loss": 5.0626, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 2.0930232558139536e-05, |
| "loss": 5.06, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 1.9767441860465116e-05, |
| "loss": 5.0461, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 1.8604651162790697e-05, |
| "loss": 5.039, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 1.744186046511628e-05, |
| "loss": 5.0416, |
| "step": 35000 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 1.6279069767441862e-05, |
| "loss": 5.0437, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 1.5116279069767441e-05, |
| "loss": 5.03, |
| "step": 37000 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 1.3953488372093024e-05, |
| "loss": 5.027, |
| "step": 38000 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 1.2790697674418606e-05, |
| "loss": 5.0289, |
| "step": 39000 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 1.1627906976744187e-05, |
| "loss": 5.0246, |
| "step": 40000 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 1.0465116279069768e-05, |
| "loss": 5.0209, |
| "step": 41000 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 9.302325581395349e-06, |
| "loss": 5.0167, |
| "step": 42000 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 8.139534883720931e-06, |
| "loss": 5.0108, |
| "step": 43000 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 6.976744186046512e-06, |
| "loss": 5.0121, |
| "step": 44000 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 5.8139534883720935e-06, |
| "loss": 5.0122, |
| "step": 45000 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 4.651162790697674e-06, |
| "loss": 5.0222, |
| "step": 46000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 3.488372093023256e-06, |
| "loss": 5.0162, |
| "step": 47000 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 2.325581395348837e-06, |
| "loss": 5.0045, |
| "step": 48000 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 1.1627906976744186e-06, |
| "loss": 5.0047, |
| "step": 49000 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 0.0, |
| "loss": 5.005, |
| "step": 50000 |
| } |
| ], |
| "max_steps": 50000, |
| "num_train_epochs": 5, |
| "total_flos": 1.2629229170809897e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|