|
{ |
|
"best_metric": 0.12367749214172363, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-300", |
|
"epoch": 1.8072289156626506, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019951807228915663, |
|
"loss": 1.6515, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019903614457831325, |
|
"loss": 1.0551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001985542168674699, |
|
"loss": 0.6568, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019807228915662652, |
|
"loss": 0.4811, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019759036144578314, |
|
"loss": 0.3576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019710843373493977, |
|
"loss": 0.2787, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019662650602409642, |
|
"loss": 0.233, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000196144578313253, |
|
"loss": 0.1287, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019566265060240966, |
|
"loss": 0.1755, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019518072289156628, |
|
"loss": 0.109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.959349593495935, |
|
"eval_loss": 0.16569873690605164, |
|
"eval_runtime": 4.541, |
|
"eval_samples_per_second": 54.173, |
|
"eval_steps_per_second": 6.827, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001946987951807229, |
|
"loss": 0.1129, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019421686746987952, |
|
"loss": 0.1769, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019373493975903617, |
|
"loss": 0.1511, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00019325301204819277, |
|
"loss": 0.2121, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00019277108433734942, |
|
"loss": 0.135, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019228915662650604, |
|
"loss": 0.1046, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019180722891566266, |
|
"loss": 0.0742, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019132530120481928, |
|
"loss": 0.0655, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00019084337349397593, |
|
"loss": 0.0735, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019036144578313252, |
|
"loss": 0.0599, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.9471544715447154, |
|
"eval_loss": 0.16865180432796478, |
|
"eval_runtime": 4.123, |
|
"eval_samples_per_second": 59.665, |
|
"eval_steps_per_second": 7.519, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00018987951807228917, |
|
"loss": 0.1547, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001893975903614458, |
|
"loss": 0.0723, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00018891566265060242, |
|
"loss": 0.045, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00018843373493975904, |
|
"loss": 0.0339, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00018795180722891569, |
|
"loss": 0.0424, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00018746987951807228, |
|
"loss": 0.1061, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00018698795180722893, |
|
"loss": 0.0557, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00018650602409638555, |
|
"loss": 0.0491, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018602409638554217, |
|
"loss": 0.0489, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001855421686746988, |
|
"loss": 0.0387, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.9715447154471545, |
|
"eval_loss": 0.12367749214172363, |
|
"eval_runtime": 4.2743, |
|
"eval_samples_per_second": 57.553, |
|
"eval_steps_per_second": 7.253, |
|
"step": 300 |
|
} |
|
], |
|
"max_steps": 4150, |
|
"num_train_epochs": 25, |
|
"total_flos": 3.716682371694674e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|