{ "best_metric": 0.8716852010265184, "best_model_checkpoint": "skincare-detection/checkpoint-553", "epoch": 11.902439024390244, "eval_steps": 500, "global_step": 732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.52, "grad_norm": 1.0143417119979858, "learning_rate": 8.64864864864865e-05, "loss": 1.3961, "step": 32 }, { "epoch": 0.99, "eval_accuracy": 0.7724550898203593, "eval_loss": 0.5629431009292603, "eval_runtime": 12.1216, "eval_samples_per_second": 96.439, "eval_steps_per_second": 3.052, "step": 61 }, { "epoch": 1.04, "grad_norm": 0.6657726764678955, "learning_rate": 0.000172972972972973, "loss": 0.6454, "step": 64 }, { "epoch": 1.56, "grad_norm": 0.6649633049964905, "learning_rate": 0.0001933130699088146, "loss": 0.4982, "step": 96 }, { "epoch": 2.0, "eval_accuracy": 0.8434559452523525, "eval_loss": 0.3991105854511261, "eval_runtime": 12.1709, "eval_samples_per_second": 96.049, "eval_steps_per_second": 3.04, "step": 123 }, { "epoch": 2.08, "grad_norm": 0.7564620971679688, "learning_rate": 0.00018358662613981763, "loss": 0.4536, "step": 128 }, { "epoch": 2.6, "grad_norm": 0.6977857351303101, "learning_rate": 0.00017386018237082067, "loss": 0.3563, "step": 160 }, { "epoch": 2.99, "eval_accuracy": 0.8272027373823782, "eval_loss": 0.43296942114830017, "eval_runtime": 12.3181, "eval_samples_per_second": 94.901, "eval_steps_per_second": 3.004, "step": 184 }, { "epoch": 3.12, "grad_norm": 0.7642468214035034, "learning_rate": 0.0001641337386018237, "loss": 0.3169, "step": 192 }, { "epoch": 3.64, "grad_norm": 0.9004422426223755, "learning_rate": 0.00015440729483282676, "loss": 0.2314, "step": 224 }, { "epoch": 4.0, "eval_accuracy": 0.8554319931565441, "eval_loss": 0.39688870310783386, "eval_runtime": 12.3055, "eval_samples_per_second": 94.998, "eval_steps_per_second": 3.007, "step": 246 }, { "epoch": 4.16, "grad_norm": 0.9273125529289246, "learning_rate": 0.0001446808510638298, "loss": 0.2055, "step": 256 }, { "epoch": 4.68, "grad_norm": 0.6541422009468079, "learning_rate": 0.00013495440729483285, "loss": 0.1815, "step": 288 }, { "epoch": 4.99, "eval_accuracy": 0.8434559452523525, "eval_loss": 0.44923701882362366, "eval_runtime": 12.4125, "eval_samples_per_second": 94.179, "eval_steps_per_second": 2.981, "step": 307 }, { "epoch": 5.2, "grad_norm": 1.0498323440551758, "learning_rate": 0.00012522796352583589, "loss": 0.1514, "step": 320 }, { "epoch": 5.72, "grad_norm": 1.087367057800293, "learning_rate": 0.00011550151975683892, "loss": 0.1332, "step": 352 }, { "epoch": 6.0, "eval_accuracy": 0.8579982891360137, "eval_loss": 0.44741156697273254, "eval_runtime": 12.1735, "eval_samples_per_second": 96.029, "eval_steps_per_second": 3.039, "step": 369 }, { "epoch": 6.24, "grad_norm": 0.9595869183540344, "learning_rate": 0.00010577507598784195, "loss": 0.1201, "step": 384 }, { "epoch": 6.76, "grad_norm": 0.39300984144210815, "learning_rate": 9.6048632218845e-05, "loss": 0.0869, "step": 416 }, { "epoch": 6.99, "eval_accuracy": 0.863130881094953, "eval_loss": 0.45202794671058655, "eval_runtime": 12.4693, "eval_samples_per_second": 93.75, "eval_steps_per_second": 2.967, "step": 430 }, { "epoch": 7.28, "grad_norm": 0.9669052362442017, "learning_rate": 8.632218844984803e-05, "loss": 0.0991, "step": 448 }, { "epoch": 7.8, "grad_norm": 0.8003025650978088, "learning_rate": 7.659574468085106e-05, "loss": 0.0844, "step": 480 }, { "epoch": 8.0, "eval_accuracy": 0.8639863130881095, "eval_loss": 0.44686540961265564, "eval_runtime": 12.1322, "eval_samples_per_second": 96.355, "eval_steps_per_second": 3.05, "step": 492 }, { "epoch": 8.33, "grad_norm": 0.3683207333087921, "learning_rate": 6.686930091185411e-05, "loss": 0.0811, "step": 512 }, { "epoch": 8.85, "grad_norm": 0.6750203371047974, "learning_rate": 5.714285714285714e-05, "loss": 0.0681, "step": 544 }, { "epoch": 8.99, "eval_accuracy": 0.8716852010265184, "eval_loss": 0.45333394408226013, "eval_runtime": 12.2392, "eval_samples_per_second": 95.513, "eval_steps_per_second": 3.023, "step": 553 }, { "epoch": 9.37, "grad_norm": 0.48275861144065857, "learning_rate": 4.741641337386019e-05, "loss": 0.0635, "step": 576 }, { "epoch": 9.89, "grad_norm": 0.8461657762527466, "learning_rate": 3.768996960486322e-05, "loss": 0.0574, "step": 608 }, { "epoch": 10.0, "eval_accuracy": 0.8597091531223268, "eval_loss": 0.4952048361301422, "eval_runtime": 12.278, "eval_samples_per_second": 95.211, "eval_steps_per_second": 3.014, "step": 615 }, { "epoch": 10.41, "grad_norm": 0.2595687806606293, "learning_rate": 2.796352583586626e-05, "loss": 0.0518, "step": 640 }, { "epoch": 10.93, "grad_norm": 0.39481160044670105, "learning_rate": 1.82370820668693e-05, "loss": 0.0477, "step": 672 }, { "epoch": 10.99, "eval_accuracy": 0.8674080410607357, "eval_loss": 0.4772116541862488, "eval_runtime": 12.2102, "eval_samples_per_second": 95.74, "eval_steps_per_second": 3.03, "step": 676 }, { "epoch": 11.45, "grad_norm": 0.43194687366485596, "learning_rate": 8.510638297872341e-06, "loss": 0.0454, "step": 704 }, { "epoch": 11.9, "eval_accuracy": 0.864841745081266, "eval_loss": 0.48397254943847656, "eval_runtime": 12.6433, "eval_samples_per_second": 92.46, "eval_steps_per_second": 2.926, "step": 732 }, { "epoch": 11.9, "step": 732, "total_flos": 7.238851133027512e+18, "train_loss": 0.2366401759978852, "train_runtime": 2240.6293, "train_samples_per_second": 42.02, "train_steps_per_second": 0.327 } ], "logging_steps": 32, "max_steps": 732, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "total_flos": 7.238851133027512e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }