|
{ |
|
"best_metric": 0.9815789473684211, |
|
"best_model_checkpoint": "neunit-ks-20230509V3/checkpoint-107", |
|
"epoch": 4.953271028037383, |
|
"global_step": 265, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.111111111111111e-05, |
|
"loss": 1.3681, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.222222222222222e-05, |
|
"loss": 1.2913, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.9621848739495802e-05, |
|
"loss": 1.1358, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.8361344537815126e-05, |
|
"loss": 0.8529, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7100840336134453e-05, |
|
"loss": 0.5857, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9328947368421052, |
|
"eval_loss": 0.40735843777656555, |
|
"eval_runtime": 2.4866, |
|
"eval_samples_per_second": 305.64, |
|
"eval_steps_per_second": 9.652, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.5840336134453784e-05, |
|
"loss": 0.4539, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.4579831932773108e-05, |
|
"loss": 0.3856, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.331932773109244e-05, |
|
"loss": 0.2886, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.2826, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0798319327731094e-05, |
|
"loss": 0.2251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9815789473684211, |
|
"eval_loss": 0.16959501802921295, |
|
"eval_runtime": 2.5415, |
|
"eval_samples_per_second": 299.033, |
|
"eval_steps_per_second": 9.443, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.953781512605042e-05, |
|
"loss": 0.1937, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.8277310924369748e-05, |
|
"loss": 0.1776, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.7016806722689076e-05, |
|
"loss": 0.18, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.5756302521008403e-05, |
|
"loss": 0.1676, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.4495798319327732e-05, |
|
"loss": 0.1526, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.323529411764706e-05, |
|
"loss": 0.1465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.975, |
|
"eval_loss": 0.12837235629558563, |
|
"eval_runtime": 2.5633, |
|
"eval_samples_per_second": 296.489, |
|
"eval_steps_per_second": 9.363, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.1974789915966388e-05, |
|
"loss": 0.1262, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.1375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 9.453781512605041e-06, |
|
"loss": 0.1449, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 8.193277310924369e-06, |
|
"loss": 0.1212, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.932773109243698e-06, |
|
"loss": 0.1169, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9802631578947368, |
|
"eval_loss": 0.09386323392391205, |
|
"eval_runtime": 2.5788, |
|
"eval_samples_per_second": 294.714, |
|
"eval_steps_per_second": 9.307, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 5.672268907563025e-06, |
|
"loss": 0.1209, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.1219, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 3.1512605042016808e-06, |
|
"loss": 0.1207, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.8907563025210083e-06, |
|
"loss": 0.0989, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 6.302521008403362e-07, |
|
"loss": 0.1153, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.9789473684210527, |
|
"eval_loss": 0.09296128153800964, |
|
"eval_runtime": 2.5784, |
|
"eval_samples_per_second": 294.755, |
|
"eval_steps_per_second": 9.308, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"step": 265, |
|
"total_flos": 4.1714064555397216e+17, |
|
"train_loss": 0.34560299594447297, |
|
"train_runtime": 321.4966, |
|
"train_samples_per_second": 106.284, |
|
"train_steps_per_second": 0.824 |
|
} |
|
], |
|
"max_steps": 265, |
|
"num_train_epochs": 5, |
|
"total_flos": 4.1714064555397216e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|