{ "best_metric": 0.20017504692077637, "best_model_checkpoint": "retr00h/deberta-v3-xsmall-NER-FINETUNED/checkpoint-21750", "epoch": 10.0, "eval_steps": 500, "global_step": 21750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "grad_norm": 2.853134870529175, "learning_rate": 5.747126436781609e-06, "loss": 3.0474, "step": 500 }, { "epoch": 0.46, "grad_norm": 6.231101989746094, "learning_rate": 1.1494252873563218e-05, "loss": 1.6994, "step": 1000 }, { "epoch": 0.69, "grad_norm": 3.9581143856048584, "learning_rate": 1.7241379310344828e-05, "loss": 1.3816, "step": 1500 }, { "epoch": 0.92, "grad_norm": 4.630077362060547, "learning_rate": 2.2988505747126437e-05, "loss": 1.1601, "step": 2000 }, { "epoch": 1.0, "eval_accuracy": 0.7779691278743173, "eval_f1": 0.16480717205426432, "eval_loss": 0.9353219270706177, "eval_precision": 0.255042170883755, "eval_recall": 0.12173634003325651, "eval_runtime": 79.6748, "eval_samples_per_second": 109.206, "eval_steps_per_second": 6.828, "step": 2175 }, { "epoch": 1.15, "grad_norm": 4.457046985626221, "learning_rate": 2.8735632183908045e-05, "loss": 0.9478, "step": 2500 }, { "epoch": 1.38, "grad_norm": 4.775448799133301, "learning_rate": 3.4482758620689657e-05, "loss": 0.8116, "step": 3000 }, { "epoch": 1.61, "grad_norm": 5.369730472564697, "learning_rate": 4.0229885057471265e-05, "loss": 0.6936, "step": 3500 }, { "epoch": 1.84, "grad_norm": 4.07623815536499, "learning_rate": 4.597701149425287e-05, "loss": 0.6135, "step": 4000 }, { "epoch": 2.0, "eval_accuracy": 0.8369943324621483, "eval_f1": 0.3358227912411726, "eval_loss": 0.5102753043174744, "eval_precision": 0.4002825999192572, "eval_recall": 0.2892441436447971, "eval_runtime": 71.0073, "eval_samples_per_second": 122.537, "eval_steps_per_second": 7.661, "step": 4350 }, { "epoch": 2.07, "grad_norm": 5.637992858886719, "learning_rate": 4.99908321555821e-05, "loss": 0.5371, "step": 4500 }, { "epoch": 2.3, "grad_norm": 3.2398834228515625, "learning_rate": 4.982803524033569e-05, "loss": 0.4694, "step": 5000 }, { "epoch": 2.53, "grad_norm": 1.9868240356445312, "learning_rate": 4.9463034836859035e-05, "loss": 0.4455, "step": 5500 }, { "epoch": 2.76, "grad_norm": 4.583148002624512, "learning_rate": 4.8898803566790296e-05, "loss": 0.4219, "step": 6000 }, { "epoch": 2.99, "grad_norm": 5.9840850830078125, "learning_rate": 4.813993661979598e-05, "loss": 0.398, "step": 6500 }, { "epoch": 3.0, "eval_accuracy": 0.8726373171368517, "eval_f1": 0.4454696078749878, "eval_loss": 0.36235153675079346, "eval_precision": 0.5025840266832826, "eval_recall": 0.4000116689518364, "eval_runtime": 70.9398, "eval_samples_per_second": 122.653, "eval_steps_per_second": 7.668, "step": 6525 }, { "epoch": 3.22, "grad_norm": 3.646019458770752, "learning_rate": 4.7192614329611715e-05, "loss": 0.3553, "step": 7000 }, { "epoch": 3.45, "grad_norm": 3.0514578819274902, "learning_rate": 4.606455184041622e-05, "loss": 0.331, "step": 7500 }, { "epoch": 3.68, "grad_norm": 3.9326324462890625, "learning_rate": 4.4764936273463734e-05, "loss": 0.3217, "step": 8000 }, { "epoch": 3.91, "grad_norm": 4.370193958282471, "learning_rate": 4.3304351905699714e-05, "loss": 0.3176, "step": 8500 }, { "epoch": 4.0, "eval_accuracy": 0.8849798379121097, "eval_f1": 0.5237382906107038, "eval_loss": 0.3116133511066437, "eval_precision": 0.5214888940305414, "eval_recall": 0.5260071764053794, "eval_runtime": 71.2115, "eval_samples_per_second": 122.185, "eval_steps_per_second": 7.639, "step": 8700 }, { "epoch": 4.14, "grad_norm": 3.159623146057129, "learning_rate": 4.169469396971739e-05, "loss": 0.2854, "step": 9000 }, { "epoch": 4.37, "grad_norm": 2.860635757446289, "learning_rate": 3.994907177708181e-05, "loss": 0.272, "step": 9500 }, { "epoch": 4.6, "grad_norm": 3.7297463417053223, "learning_rate": 3.808170195400064e-05, "loss": 0.2613, "step": 10000 }, { "epoch": 4.83, "grad_norm": 2.5407516956329346, "learning_rate": 3.6107792658847595e-05, "loss": 0.2517, "step": 10500 }, { "epoch": 5.0, "eval_accuracy": 0.9037705346052107, "eval_f1": 0.5800842276864557, "eval_loss": 0.2615918517112732, "eval_precision": 0.6010322177040975, "eval_recall": 0.5605472738411272, "eval_runtime": 70.3111, "eval_samples_per_second": 123.75, "eval_steps_per_second": 7.737, "step": 10875 }, { "epoch": 5.06, "grad_norm": 3.3271915912628174, "learning_rate": 3.404341972448928e-05, "loss": 0.247, "step": 11000 }, { "epoch": 5.29, "grad_norm": 1.9302195310592651, "learning_rate": 3.1905395734132326e-05, "loss": 0.2211, "step": 11500 }, { "epoch": 5.52, "grad_norm": 1.3263697624206543, "learning_rate": 2.9711133096957962e-05, "loss": 0.2132, "step": 12000 }, { "epoch": 5.75, "grad_norm": 2.6910924911499023, "learning_rate": 2.7478502238677862e-05, "loss": 0.2143, "step": 12500 }, { "epoch": 5.98, "grad_norm": 4.239352703094482, "learning_rate": 2.5225686061930326e-05, "loss": 0.2085, "step": 13000 }, { "epoch": 6.0, "eval_accuracy": 0.9127478723656762, "eval_f1": 0.6213380291938935, "eval_loss": 0.23839746415615082, "eval_precision": 0.6199395945867456, "eval_recall": 0.6227427871291461, "eval_runtime": 70.8696, "eval_samples_per_second": 122.775, "eval_steps_per_second": 7.676, "step": 13050 }, { "epoch": 6.21, "grad_norm": 2.263456106185913, "learning_rate": 2.2971031861814223e-05, "loss": 0.1895, "step": 13500 }, { "epoch": 6.44, "grad_norm": 3.127639055252075, "learning_rate": 2.073290190258459e-05, "loss": 0.1891, "step": 14000 }, { "epoch": 6.67, "grad_norm": 6.135817527770996, "learning_rate": 1.852952387243698e-05, "loss": 0.1729, "step": 14500 }, { "epoch": 6.9, "grad_norm": 2.980323076248169, "learning_rate": 1.6378842434300746e-05, "loss": 0.1752, "step": 15000 }, { "epoch": 7.0, "eval_accuracy": 0.9185261764294775, "eval_f1": 0.6522748506554175, "eval_loss": 0.22249895334243774, "eval_precision": 0.633583587712785, "eval_recall": 0.6721024533971236, "eval_runtime": 71.7327, "eval_samples_per_second": 121.298, "eval_steps_per_second": 7.584, "step": 15225 }, { "epoch": 7.13, "grad_norm": 3.3994078636169434, "learning_rate": 1.4298373081635322e-05, "loss": 0.1721, "step": 15500 }, { "epoch": 7.36, "grad_norm": 2.7240686416625977, "learning_rate": 1.2305059489451364e-05, "loss": 0.1549, "step": 16000 }, { "epoch": 7.59, "grad_norm": 2.529590606689453, "learning_rate": 1.041513552231265e-05, "loss": 0.1577, "step": 16500 }, { "epoch": 7.82, "grad_norm": 2.3119211196899414, "learning_rate": 8.643993023147797e-06, "loss": 0.1513, "step": 17000 }, { "epoch": 8.0, "eval_accuracy": 0.92717385284445, "eval_f1": 0.6904867478151416, "eval_loss": 0.20956499874591827, "eval_precision": 0.6773587023629118, "eval_recall": 0.7041337261880452, "eval_runtime": 69.3099, "eval_samples_per_second": 125.538, "eval_steps_per_second": 7.849, "step": 17400 }, { "epoch": 8.05, "grad_norm": 1.2401313781738281, "learning_rate": 7.00605645962078e-06, "loss": 0.1493, "step": 17500 }, { "epoch": 8.28, "grad_norm": 4.699456691741943, "learning_rate": 5.51466544896021e-06, "loss": 0.1456, "step": 18000 }, { "epoch": 8.51, "grad_norm": 1.5753775835037231, "learning_rate": 4.181966117984099e-06, "loss": 0.1389, "step": 18500 }, { "epoch": 8.74, "grad_norm": 0.9824960231781006, "learning_rate": 3.0188121831012023e-06, "loss": 0.1399, "step": 19000 }, { "epoch": 8.97, "grad_norm": 1.2070369720458984, "learning_rate": 2.0346765559094567e-06, "loss": 0.1365, "step": 19500 }, { "epoch": 9.0, "eval_accuracy": 0.9298507029698532, "eval_f1": 0.7024456131275685, "eval_loss": 0.20172493159770966, "eval_precision": 0.6921421492283732, "eval_recall": 0.7130604743428921, "eval_runtime": 71.3537, "eval_samples_per_second": 121.942, "eval_steps_per_second": 7.624, "step": 19575 }, { "epoch": 9.2, "grad_norm": 2.9319190979003906, "learning_rate": 1.2375741942894869e-06, "loss": 0.1354, "step": 20000 }, { "epoch": 9.43, "grad_norm": 2.670553684234619, "learning_rate": 6.339968273062741e-07, "loss": 0.1298, "step": 20500 }, { "epoch": 9.66, "grad_norm": 1.6918872594833374, "learning_rate": 2.288600855298306e-07, "loss": 0.1291, "step": 21000 }, { "epoch": 9.89, "grad_norm": 1.8715825080871582, "learning_rate": 2.546346735399219e-08, "loss": 0.1314, "step": 21500 }, { "epoch": 10.0, "eval_accuracy": 0.9306313410359806, "eval_f1": 0.7046287809349221, "eval_loss": 0.20017504692077637, "eval_precision": 0.6920804613869742, "eval_recall": 0.7176405379386797, "eval_runtime": 70.9745, "eval_samples_per_second": 122.593, "eval_steps_per_second": 7.665, "step": 21750 }, { "epoch": 10.0, "step": 21750, "total_flos": 3852992789501856.0, "train_loss": 0.4320273476085444, "train_runtime": 4787.7797, "train_samples_per_second": 72.685, "train_steps_per_second": 4.543 }, { "epoch": 10.0, "eval_accuracy": 0.9306313410359806, "eval_f1": 0.7046287809349221, "eval_loss": 0.20017504692077637, "eval_precision": 0.6920804613869742, "eval_recall": 0.7176405379386797, "eval_runtime": 71.7164, "eval_samples_per_second": 121.325, "eval_steps_per_second": 7.585, "step": 21750 } ], "logging_steps": 500, "max_steps": 21750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3852992789501856.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }