|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8670770831526923, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9277435764039425e-05, |
|
"loss": 4.5138, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.855487152807885e-05, |
|
"loss": 4.3608, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.783230729211827e-05, |
|
"loss": 4.031, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.710974305615769e-05, |
|
"loss": 3.3475, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.638717882019712e-05, |
|
"loss": 2.8365, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5664614584236545e-05, |
|
"loss": 2.5081, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.494205034827597e-05, |
|
"loss": 2.2906, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.421948611231539e-05, |
|
"loss": 2.1787, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.349692187635481e-05, |
|
"loss": 2.1026, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.277435764039423e-05, |
|
"loss": 2.0387, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.205179340443365e-05, |
|
"loss": 2.0001, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1329229168473074e-05, |
|
"loss": 1.9804, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.06066649325125e-05, |
|
"loss": 1.951, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9884100696551927e-05, |
|
"loss": 1.9113, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.916153646059135e-05, |
|
"loss": 1.9078, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.843897222463077e-05, |
|
"loss": 1.8793, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7716407988670195e-05, |
|
"loss": 1.8833, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.699384375270962e-05, |
|
"loss": 1.8618, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.627127951674904e-05, |
|
"loss": 1.835, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.554871528078846e-05, |
|
"loss": 1.8187, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4826151044827885e-05, |
|
"loss": 1.8097, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.410358680886731e-05, |
|
"loss": 1.7973, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.338102257290673e-05, |
|
"loss": 1.8006, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.265845833694616e-05, |
|
"loss": 1.7698, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.193589410098558e-05, |
|
"loss": 1.7673, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1213329865025005e-05, |
|
"loss": 1.7731, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0490765629064428e-05, |
|
"loss": 1.7643, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.976820139310385e-05, |
|
"loss": 1.7534, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9045637157143273e-05, |
|
"loss": 1.7307, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8323072921182693e-05, |
|
"loss": 1.7279, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7600508685222115e-05, |
|
"loss": 1.7162, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6877944449261538e-05, |
|
"loss": 1.7006, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.615538021330096e-05, |
|
"loss": 1.7176, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5432815977340387e-05, |
|
"loss": 1.6928, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.471025174137981e-05, |
|
"loss": 1.6819, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.3987687505419232e-05, |
|
"loss": 1.6864, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3265123269458655e-05, |
|
"loss": 1.6813, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2542559033498077e-05, |
|
"loss": 1.6785, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1819994797537503e-05, |
|
"loss": 1.6734, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1097430561576926e-05, |
|
"loss": 1.6543, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.037486632561635e-05, |
|
"loss": 1.6633, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.965230208965577e-05, |
|
"loss": 1.668, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8929737853695194e-05, |
|
"loss": 1.6554, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.820717361773462e-05, |
|
"loss": 1.6451, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.748460938177404e-05, |
|
"loss": 1.6347, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6762045145813462e-05, |
|
"loss": 1.6288, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6039480909852885e-05, |
|
"loss": 1.6476, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.531691667389231e-05, |
|
"loss": 1.629, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4594352437931733e-05, |
|
"loss": 1.6197, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.3871788201971156e-05, |
|
"loss": 1.6158, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3149223966010579e-05, |
|
"loss": 1.6208, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2426659730050003e-05, |
|
"loss": 1.6081, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1704095494089424e-05, |
|
"loss": 1.6147, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0981531258128848e-05, |
|
"loss": 1.6003, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0258967022168271e-05, |
|
"loss": 1.5853, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.536402786207694e-06, |
|
"loss": 1.6039, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.813838550247118e-06, |
|
"loss": 1.594, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.091274314286539e-06, |
|
"loss": 1.5874, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.3687100783259635e-06, |
|
"loss": 1.5707, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.646145842365387e-06, |
|
"loss": 1.5902, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 34599, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.89617673216e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|