|
{ |
|
"best_metric": 1.5611639022827148, |
|
"best_model_checkpoint": "67_cat_breeds_image_detection/checkpoint-25328", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 25328, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.821979586992643e-06, |
|
"loss": 3.1114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.624179128095577e-06, |
|
"loss": 2.6215, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.426378669198514e-06, |
|
"loss": 2.4034, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.228578210301448e-06, |
|
"loss": 2.2396, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.030777751404383e-06, |
|
"loss": 2.1317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.83297729250732e-06, |
|
"loss": 2.0335, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5173761946133797, |
|
"eval_loss": 1.9850196838378906, |
|
"eval_runtime": 296.7547, |
|
"eval_samples_per_second": 85.33, |
|
"eval_steps_per_second": 10.669, |
|
"step": 3166 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.635176833610254e-06, |
|
"loss": 1.9677, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.437376374713189e-06, |
|
"loss": 1.9292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.239575915816125e-06, |
|
"loss": 1.8358, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.04177545691906e-06, |
|
"loss": 1.8098, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.843974998021997e-06, |
|
"loss": 1.7845, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.646174539124931e-06, |
|
"loss": 1.7439, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5424926940999921, |
|
"eval_loss": 1.7416865825653076, |
|
"eval_runtime": 305.3072, |
|
"eval_samples_per_second": 82.939, |
|
"eval_steps_per_second": 10.37, |
|
"step": 6332 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.448374080227867e-06, |
|
"loss": 1.7391, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.250573621330802e-06, |
|
"loss": 1.678, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.052773162433737e-06, |
|
"loss": 1.6614, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.8549727035366735e-06, |
|
"loss": 1.6771, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.657172244639608e-06, |
|
"loss": 1.6238, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 6.459371785742543e-06, |
|
"loss": 1.6347, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5577758470894874, |
|
"eval_loss": 1.64887535572052, |
|
"eval_runtime": 301.4384, |
|
"eval_samples_per_second": 84.004, |
|
"eval_steps_per_second": 10.503, |
|
"step": 9498 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.261571326845479e-06, |
|
"loss": 1.6022, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.063770867948414e-06, |
|
"loss": 1.5793, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 5.865970409051349e-06, |
|
"loss": 1.5794, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 5.668169950154285e-06, |
|
"loss": 1.5531, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 5.47036949125722e-06, |
|
"loss": 1.564, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 5.2725690323601555e-06, |
|
"loss": 1.5424, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.074768573463091e-06, |
|
"loss": 1.5426, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5635415843930179, |
|
"eval_loss": 1.6023435592651367, |
|
"eval_runtime": 296.7308, |
|
"eval_samples_per_second": 85.337, |
|
"eval_steps_per_second": 10.67, |
|
"step": 12664 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.876968114566027e-06, |
|
"loss": 1.5209, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 4.679167655668961e-06, |
|
"loss": 1.518, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.481367196771897e-06, |
|
"loss": 1.4888, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.2835667378748325e-06, |
|
"loss": 1.499, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 4.085766278977767e-06, |
|
"loss": 1.4888, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.887965820080703e-06, |
|
"loss": 1.4907, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5614485427691336, |
|
"eval_loss": 1.5871776342391968, |
|
"eval_runtime": 298.3387, |
|
"eval_samples_per_second": 84.877, |
|
"eval_steps_per_second": 10.612, |
|
"step": 15830 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.690165361183638e-06, |
|
"loss": 1.4685, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3.4923649022865734e-06, |
|
"loss": 1.4495, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 3.294564443389509e-06, |
|
"loss": 1.4432, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 3.096763984492444e-06, |
|
"loss": 1.449, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.8989635255953797e-06, |
|
"loss": 1.4445, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 2.7011630666983153e-06, |
|
"loss": 1.4592, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5697022352104889, |
|
"eval_loss": 1.5659719705581665, |
|
"eval_runtime": 299.3375, |
|
"eval_samples_per_second": 84.593, |
|
"eval_steps_per_second": 10.577, |
|
"step": 18996 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.50336260780125e-06, |
|
"loss": 1.4347, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 2.3055621489041856e-06, |
|
"loss": 1.4233, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 2.107761690007121e-06, |
|
"loss": 1.4259, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.9099612311100563e-06, |
|
"loss": 1.4131, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 1.7121607722129916e-06, |
|
"loss": 1.3964, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.5143603133159272e-06, |
|
"loss": 1.4152, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.3165598544188623e-06, |
|
"loss": 1.4004, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5668193665587237, |
|
"eval_loss": 1.565866231918335, |
|
"eval_runtime": 298.5036, |
|
"eval_samples_per_second": 84.83, |
|
"eval_steps_per_second": 10.606, |
|
"step": 22162 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.1187593955217977e-06, |
|
"loss": 1.4024, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 9.20958936624733e-07, |
|
"loss": 1.4075, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 7.231584777276685e-07, |
|
"loss": 1.3715, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 5.253580188306037e-07, |
|
"loss": 1.3919, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.275575599335391e-07, |
|
"loss": 1.384, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.2975710103647441e-07, |
|
"loss": 1.384, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5691493562909723, |
|
"eval_loss": 1.5611639022827148, |
|
"eval_runtime": 298.9869, |
|
"eval_samples_per_second": 84.693, |
|
"eval_steps_per_second": 10.589, |
|
"step": 25328 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 25328, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 6.282678931597911e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|