|
{ |
|
"best_metric": 0.8716852010265184, |
|
"best_model_checkpoint": "skincare-detection/checkpoint-553", |
|
"epoch": 11.902439024390244, |
|
"eval_steps": 500, |
|
"global_step": 732, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0143417119979858, |
|
"learning_rate": 8.64864864864865e-05, |
|
"loss": 1.3961, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7724550898203593, |
|
"eval_loss": 0.5629431009292603, |
|
"eval_runtime": 12.1216, |
|
"eval_samples_per_second": 96.439, |
|
"eval_steps_per_second": 3.052, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.6657726764678955, |
|
"learning_rate": 0.000172972972972973, |
|
"loss": 0.6454, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.6649633049964905, |
|
"learning_rate": 0.0001933130699088146, |
|
"loss": 0.4982, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8434559452523525, |
|
"eval_loss": 0.3991105854511261, |
|
"eval_runtime": 12.1709, |
|
"eval_samples_per_second": 96.049, |
|
"eval_steps_per_second": 3.04, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.7564620971679688, |
|
"learning_rate": 0.00018358662613981763, |
|
"loss": 0.4536, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.6977857351303101, |
|
"learning_rate": 0.00017386018237082067, |
|
"loss": 0.3563, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8272027373823782, |
|
"eval_loss": 0.43296942114830017, |
|
"eval_runtime": 12.3181, |
|
"eval_samples_per_second": 94.901, |
|
"eval_steps_per_second": 3.004, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.7642468214035034, |
|
"learning_rate": 0.0001641337386018237, |
|
"loss": 0.3169, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 0.9004422426223755, |
|
"learning_rate": 0.00015440729483282676, |
|
"loss": 0.2314, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8554319931565441, |
|
"eval_loss": 0.39688870310783386, |
|
"eval_runtime": 12.3055, |
|
"eval_samples_per_second": 94.998, |
|
"eval_steps_per_second": 3.007, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.9273125529289246, |
|
"learning_rate": 0.0001446808510638298, |
|
"loss": 0.2055, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 0.6541422009468079, |
|
"learning_rate": 0.00013495440729483285, |
|
"loss": 0.1815, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8434559452523525, |
|
"eval_loss": 0.44923701882362366, |
|
"eval_runtime": 12.4125, |
|
"eval_samples_per_second": 94.179, |
|
"eval_steps_per_second": 2.981, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 1.0498323440551758, |
|
"learning_rate": 0.00012522796352583589, |
|
"loss": 0.1514, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"grad_norm": 1.087367057800293, |
|
"learning_rate": 0.00011550151975683892, |
|
"loss": 0.1332, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8579982891360137, |
|
"eval_loss": 0.44741156697273254, |
|
"eval_runtime": 12.1735, |
|
"eval_samples_per_second": 96.029, |
|
"eval_steps_per_second": 3.039, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 0.9595869183540344, |
|
"learning_rate": 0.00010577507598784195, |
|
"loss": 0.1201, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 0.39300984144210815, |
|
"learning_rate": 9.6048632218845e-05, |
|
"loss": 0.0869, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.863130881094953, |
|
"eval_loss": 0.45202794671058655, |
|
"eval_runtime": 12.4693, |
|
"eval_samples_per_second": 93.75, |
|
"eval_steps_per_second": 2.967, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 0.9669052362442017, |
|
"learning_rate": 8.632218844984803e-05, |
|
"loss": 0.0991, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 0.8003025650978088, |
|
"learning_rate": 7.659574468085106e-05, |
|
"loss": 0.0844, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8639863130881095, |
|
"eval_loss": 0.44686540961265564, |
|
"eval_runtime": 12.1322, |
|
"eval_samples_per_second": 96.355, |
|
"eval_steps_per_second": 3.05, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 0.3683207333087921, |
|
"learning_rate": 6.686930091185411e-05, |
|
"loss": 0.0811, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"grad_norm": 0.6750203371047974, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.0681, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8716852010265184, |
|
"eval_loss": 0.45333394408226013, |
|
"eval_runtime": 12.2392, |
|
"eval_samples_per_second": 95.513, |
|
"eval_steps_per_second": 3.023, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"grad_norm": 0.48275861144065857, |
|
"learning_rate": 4.741641337386019e-05, |
|
"loss": 0.0635, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"grad_norm": 0.8461657762527466, |
|
"learning_rate": 3.768996960486322e-05, |
|
"loss": 0.0574, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8597091531223268, |
|
"eval_loss": 0.4952048361301422, |
|
"eval_runtime": 12.278, |
|
"eval_samples_per_second": 95.211, |
|
"eval_steps_per_second": 3.014, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"grad_norm": 0.2595687806606293, |
|
"learning_rate": 2.796352583586626e-05, |
|
"loss": 0.0518, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"grad_norm": 0.39481160044670105, |
|
"learning_rate": 1.82370820668693e-05, |
|
"loss": 0.0477, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8674080410607357, |
|
"eval_loss": 0.4772116541862488, |
|
"eval_runtime": 12.2102, |
|
"eval_samples_per_second": 95.74, |
|
"eval_steps_per_second": 3.03, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"grad_norm": 0.43194687366485596, |
|
"learning_rate": 8.510638297872341e-06, |
|
"loss": 0.0454, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_accuracy": 0.864841745081266, |
|
"eval_loss": 0.48397254943847656, |
|
"eval_runtime": 12.6433, |
|
"eval_samples_per_second": 92.46, |
|
"eval_steps_per_second": 2.926, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"step": 732, |
|
"total_flos": 7.238851133027512e+18, |
|
"train_loss": 0.2366401759978852, |
|
"train_runtime": 2240.6293, |
|
"train_samples_per_second": 42.02, |
|
"train_steps_per_second": 0.327 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 732, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"total_flos": 7.238851133027512e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|