|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 25284, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9011232399936727e-05, |
|
"loss": 3.3688, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.802246479987344e-05, |
|
"loss": 3.2716, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.703369719981016e-05, |
|
"loss": 3.2238, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.604492959974688e-05, |
|
"loss": 3.2109, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.5056161999683596e-05, |
|
"loss": 3.1767, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.406739439962031e-05, |
|
"loss": 3.1522, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.307862679955704e-05, |
|
"loss": 3.1347, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.2089859199493755e-05, |
|
"loss": 3.129, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.110109159943047e-05, |
|
"loss": 3.0598, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.011232399936719e-05, |
|
"loss": 3.0041, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.9123556399303914e-05, |
|
"loss": 3.0206, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.813478879924063e-05, |
|
"loss": 3.0094, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.714602119917735e-05, |
|
"loss": 3.0046, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.6157253599114066e-05, |
|
"loss": 3.0031, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.516848599905079e-05, |
|
"loss": 2.9904, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.41797183989875e-05, |
|
"loss": 2.9906, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.3190950798924225e-05, |
|
"loss": 2.9812, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.220218319886094e-05, |
|
"loss": 2.8911, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.1213415598797666e-05, |
|
"loss": 2.9061, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.022464799873438e-05, |
|
"loss": 2.9192, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.92358803986711e-05, |
|
"loss": 2.9018, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.8247112798607815e-05, |
|
"loss": 2.9167, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.725834519854454e-05, |
|
"loss": 2.923, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.6269577598481253e-05, |
|
"loss": 2.8988, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.5280809998417977e-05, |
|
"loss": 2.8976, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.429204239835469e-05, |
|
"loss": 2.8552, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.3303274798291412e-05, |
|
"loss": 2.8315, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.231450719822813e-05, |
|
"loss": 2.8407, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.1325739598164847e-05, |
|
"loss": 2.8406, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.0336971998101567e-05, |
|
"loss": 2.8386, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.9348204398038285e-05, |
|
"loss": 2.8402, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.8359436797975006e-05, |
|
"loss": 2.8409, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.7370669197911723e-05, |
|
"loss": 2.8446, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.6381901597848444e-05, |
|
"loss": 2.8165, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.539313399778516e-05, |
|
"loss": 2.7863, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.440436639772188e-05, |
|
"loss": 2.7812, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.3415598797658599e-05, |
|
"loss": 2.7759, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.2426831197595318e-05, |
|
"loss": 2.7893, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.1438063597532037e-05, |
|
"loss": 2.8047, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.0449295997468755e-05, |
|
"loss": 2.7915, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.460528397405474e-06, |
|
"loss": 2.8009, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 8.471760797342193e-06, |
|
"loss": 2.7765, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 7.482993197278912e-06, |
|
"loss": 2.7556, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.494225597215631e-06, |
|
"loss": 2.7601, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 5.50545799715235e-06, |
|
"loss": 2.7608, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.516690397089068e-06, |
|
"loss": 2.7527, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 3.5279227970257872e-06, |
|
"loss": 2.7481, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 2.539155196962506e-06, |
|
"loss": 2.7657, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1.550387596899225e-06, |
|
"loss": 2.7518, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 5.616199968359437e-07, |
|
"loss": 2.7425, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 25284, |
|
"total_flos": 1.3213015474176e+16, |
|
"train_loss": 2.915600132591418, |
|
"train_runtime": 6526.9461, |
|
"train_samples_per_second": 3.874, |
|
"train_steps_per_second": 3.874 |
|
} |
|
], |
|
"max_steps": 25284, |
|
"num_train_epochs": 6, |
|
"total_flos": 1.3213015474176e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|