|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.997442455242968, |
|
"global_step": 3900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9274965524673462, |
|
"eval_loss": 0.23106738924980164, |
|
"eval_runtime": 10.0851, |
|
"eval_samples_per_second": 72.483, |
|
"eval_steps_per_second": 9.122, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9165526628494263, |
|
"eval_loss": 0.3205892741680145, |
|
"eval_runtime": 10.086, |
|
"eval_samples_per_second": 72.477, |
|
"eval_steps_per_second": 9.122, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2754443842729812e-05, |
|
"loss": 0.1729, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9274965524673462, |
|
"eval_loss": 0.360751748085022, |
|
"eval_runtime": 9.9443, |
|
"eval_samples_per_second": 73.509, |
|
"eval_steps_per_second": 9.251, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9233925938606262, |
|
"eval_loss": 0.4383067190647125, |
|
"eval_runtime": 10.0596, |
|
"eval_samples_per_second": 72.667, |
|
"eval_steps_per_second": 9.145, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9206566214561462, |
|
"eval_loss": 0.5189540982246399, |
|
"eval_runtime": 10.0775, |
|
"eval_samples_per_second": 72.538, |
|
"eval_steps_per_second": 9.129, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 1.0878790336446016e-05, |
|
"loss": 0.0589, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9233925938606262, |
|
"eval_loss": 0.5728073716163635, |
|
"eval_runtime": 9.9637, |
|
"eval_samples_per_second": 73.366, |
|
"eval_steps_per_second": 9.233, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9274965524673462, |
|
"eval_loss": 0.5684310793876648, |
|
"eval_runtime": 10.2744, |
|
"eval_samples_per_second": 71.148, |
|
"eval_steps_per_second": 8.954, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 9.003136830162221e-06, |
|
"loss": 0.0171, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9261285662651062, |
|
"eval_loss": 0.5847771167755127, |
|
"eval_runtime": 10.1261, |
|
"eval_samples_per_second": 72.189, |
|
"eval_steps_per_second": 9.085, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.928864598274231, |
|
"eval_loss": 0.5692142248153687, |
|
"eval_runtime": 10.1226, |
|
"eval_samples_per_second": 72.214, |
|
"eval_steps_per_second": 9.089, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9261285662651062, |
|
"eval_loss": 0.6423346400260925, |
|
"eval_runtime": 10.1278, |
|
"eval_samples_per_second": 72.178, |
|
"eval_steps_per_second": 9.084, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 7.1274833238784245e-06, |
|
"loss": 0.0094, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9274965524673462, |
|
"eval_loss": 0.6332233548164368, |
|
"eval_runtime": 10.2523, |
|
"eval_samples_per_second": 71.301, |
|
"eval_steps_per_second": 8.974, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9316005706787109, |
|
"eval_loss": 0.6691190600395203, |
|
"eval_runtime": 10.205, |
|
"eval_samples_per_second": 71.631, |
|
"eval_steps_per_second": 9.015, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 5.2518298175946285e-06, |
|
"loss": 0.0015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.930232584476471, |
|
"eval_loss": 0.6589073538780212, |
|
"eval_runtime": 10.1851, |
|
"eval_samples_per_second": 71.771, |
|
"eval_steps_per_second": 9.033, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.928864598274231, |
|
"eval_loss": 0.6745939254760742, |
|
"eval_runtime": 10.1553, |
|
"eval_samples_per_second": 71.982, |
|
"eval_steps_per_second": 9.059, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9357045292854309, |
|
"eval_loss": 0.6391911506652832, |
|
"eval_runtime": 10.1864, |
|
"eval_samples_per_second": 71.762, |
|
"eval_steps_per_second": 9.032, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 3.376176311310833e-06, |
|
"loss": 0.0002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.928864598274231, |
|
"eval_loss": 0.6711709499359131, |
|
"eval_runtime": 10.0882, |
|
"eval_samples_per_second": 72.461, |
|
"eval_steps_per_second": 9.12, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9343365430831909, |
|
"eval_loss": 0.6648762226104736, |
|
"eval_runtime": 9.9723, |
|
"eval_samples_per_second": 73.303, |
|
"eval_steps_per_second": 9.226, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 1.5005228050270366e-06, |
|
"loss": 0.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.930232584476471, |
|
"eval_loss": 0.6720365881919861, |
|
"eval_runtime": 10.1302, |
|
"eval_samples_per_second": 72.16, |
|
"eval_steps_per_second": 9.082, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9316005706787109, |
|
"eval_loss": 0.6750090718269348, |
|
"eval_runtime": 10.1546, |
|
"eval_samples_per_second": 71.987, |
|
"eval_steps_per_second": 9.06, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9316005706787109, |
|
"eval_loss": 0.6705575585365295, |
|
"eval_runtime": 10.275, |
|
"eval_samples_per_second": 71.144, |
|
"eval_steps_per_second": 8.954, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 3900, |
|
"total_flos": 1.4573542659723264e+16, |
|
"train_loss": 0.03345794048399115, |
|
"train_runtime": 4631.396, |
|
"train_samples_per_second": 13.508, |
|
"train_steps_per_second": 0.842 |
|
} |
|
], |
|
"max_steps": 3900, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.4573542659723264e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|