| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.2270624795289868, |
| "global_step": 2400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.47457627118644e-06, |
| "loss": 1.695, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.694915254237288e-05, |
| "loss": 1.6613, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9996485952627554e-05, |
| "loss": 1.5992, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.99769328594951e-05, |
| "loss": 1.5037, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9940262356746553e-05, |
| "loss": 1.4537, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9886537359911694e-05, |
| "loss": 1.3935, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.981585004489171e-05, |
| "loss": 1.4331, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9728321689813142e-05, |
| "loss": 1.3732, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.962410246695118e-05, |
| "loss": 1.3806, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9503371185079295e-05, |
| "loss": 1.3911, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.936633498268728e-05, |
| "loss": 1.3657, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.9213228972594032e-05, |
| "loss": 1.3487, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.9044315838564835e-05, |
| "loss": 1.3528, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.885988538462517e-05, |
| "loss": 1.366, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.866025403784439e-05, |
| "loss": 1.348, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.8445764305442205e-05, |
| "loss": 1.334, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.821678418714957e-05, |
| "loss": 1.3501, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.797370654383204e-05, |
| "loss": 1.3119, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.771694842345894e-05, |
| "loss": 1.3515, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.7446950345574762e-05, |
| "loss": 1.3176, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.71641755455004e-05, |
| "loss": 1.3265, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.686910917956096e-05, |
| "loss": 1.3298, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 1.6562257492703756e-05, |
| "loss": 1.3303, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.624414694993454e-05, |
| "loss": 1.3136, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.5915323333062255e-05, |
| "loss": 1.282, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.557635080430196e-05, |
| "loss": 1.3079, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.5227810938342493e-05, |
| "loss": 1.3088, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.4870301724539627e-05, |
| "loss": 1.3084, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.4504436540946548e-05, |
| "loss": 1.3018, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.4130843101942017e-05, |
| "loss": 1.2903, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.3750162381261693e-05, |
| "loss": 1.3041, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.3363047512280391e-05, |
| "loss": 1.2849, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2970162667432075e-05, |
| "loss": 1.2975, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 1.2572181918690162e-05, |
| "loss": 1.2865, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.2169788081063181e-05, |
| "loss": 1.3047, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.1763671541090027e-05, |
| "loss": 1.3033, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1354529072344749e-05, |
| "loss": 1.2714, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.0943062639983119e-05, |
| "loss": 1.3111, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.0529978196382011e-05, |
| "loss": 1.2791, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.0115984469937883e-05, |
| "loss": 1.2748, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 9.701791749102496e-06, |
| "loss": 1.2642, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 9.288110663742001e-06, |
| "loss": 1.2482, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 8.87565096591028e-06, |
| "loss": 1.2742, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 8.465120312128371e-06, |
| "loss": 1.2865, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 8.057223049259155e-06, |
| "loss": 1.2748, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 7.652659006060436e-06, |
| "loss": 1.3144, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 7.252122292489747e-06, |
| "loss": 1.2646, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 6.856300108820865e-06, |
| "loss": 1.3096, |
| "step": 2400 |
| } |
| ], |
| "max_steps": 3910, |
| "num_train_epochs": 2, |
| "total_flos": 1604270627487744.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|