deberta-v3-xsmall-NER-FINETUNED / trainer_state.json
retr00h's picture
Upload 12 files
474eaca verified
raw
history blame contribute delete
No virus
11.7 kB
{
"best_metric": 0.20017504692077637,
"best_model_checkpoint": "retr00h/deberta-v3-xsmall-NER-FINETUNED/checkpoint-21750",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 21750,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"grad_norm": 2.853134870529175,
"learning_rate": 5.747126436781609e-06,
"loss": 3.0474,
"step": 500
},
{
"epoch": 0.46,
"grad_norm": 6.231101989746094,
"learning_rate": 1.1494252873563218e-05,
"loss": 1.6994,
"step": 1000
},
{
"epoch": 0.69,
"grad_norm": 3.9581143856048584,
"learning_rate": 1.7241379310344828e-05,
"loss": 1.3816,
"step": 1500
},
{
"epoch": 0.92,
"grad_norm": 4.630077362060547,
"learning_rate": 2.2988505747126437e-05,
"loss": 1.1601,
"step": 2000
},
{
"epoch": 1.0,
"eval_accuracy": 0.7779691278743173,
"eval_f1": 0.16480717205426432,
"eval_loss": 0.9353219270706177,
"eval_precision": 0.255042170883755,
"eval_recall": 0.12173634003325651,
"eval_runtime": 79.6748,
"eval_samples_per_second": 109.206,
"eval_steps_per_second": 6.828,
"step": 2175
},
{
"epoch": 1.15,
"grad_norm": 4.457046985626221,
"learning_rate": 2.8735632183908045e-05,
"loss": 0.9478,
"step": 2500
},
{
"epoch": 1.38,
"grad_norm": 4.775448799133301,
"learning_rate": 3.4482758620689657e-05,
"loss": 0.8116,
"step": 3000
},
{
"epoch": 1.61,
"grad_norm": 5.369730472564697,
"learning_rate": 4.0229885057471265e-05,
"loss": 0.6936,
"step": 3500
},
{
"epoch": 1.84,
"grad_norm": 4.07623815536499,
"learning_rate": 4.597701149425287e-05,
"loss": 0.6135,
"step": 4000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8369943324621483,
"eval_f1": 0.3358227912411726,
"eval_loss": 0.5102753043174744,
"eval_precision": 0.4002825999192572,
"eval_recall": 0.2892441436447971,
"eval_runtime": 71.0073,
"eval_samples_per_second": 122.537,
"eval_steps_per_second": 7.661,
"step": 4350
},
{
"epoch": 2.07,
"grad_norm": 5.637992858886719,
"learning_rate": 4.99908321555821e-05,
"loss": 0.5371,
"step": 4500
},
{
"epoch": 2.3,
"grad_norm": 3.2398834228515625,
"learning_rate": 4.982803524033569e-05,
"loss": 0.4694,
"step": 5000
},
{
"epoch": 2.53,
"grad_norm": 1.9868240356445312,
"learning_rate": 4.9463034836859035e-05,
"loss": 0.4455,
"step": 5500
},
{
"epoch": 2.76,
"grad_norm": 4.583148002624512,
"learning_rate": 4.8898803566790296e-05,
"loss": 0.4219,
"step": 6000
},
{
"epoch": 2.99,
"grad_norm": 5.9840850830078125,
"learning_rate": 4.813993661979598e-05,
"loss": 0.398,
"step": 6500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8726373171368517,
"eval_f1": 0.4454696078749878,
"eval_loss": 0.36235153675079346,
"eval_precision": 0.5025840266832826,
"eval_recall": 0.4000116689518364,
"eval_runtime": 70.9398,
"eval_samples_per_second": 122.653,
"eval_steps_per_second": 7.668,
"step": 6525
},
{
"epoch": 3.22,
"grad_norm": 3.646019458770752,
"learning_rate": 4.7192614329611715e-05,
"loss": 0.3553,
"step": 7000
},
{
"epoch": 3.45,
"grad_norm": 3.0514578819274902,
"learning_rate": 4.606455184041622e-05,
"loss": 0.331,
"step": 7500
},
{
"epoch": 3.68,
"grad_norm": 3.9326324462890625,
"learning_rate": 4.4764936273463734e-05,
"loss": 0.3217,
"step": 8000
},
{
"epoch": 3.91,
"grad_norm": 4.370193958282471,
"learning_rate": 4.3304351905699714e-05,
"loss": 0.3176,
"step": 8500
},
{
"epoch": 4.0,
"eval_accuracy": 0.8849798379121097,
"eval_f1": 0.5237382906107038,
"eval_loss": 0.3116133511066437,
"eval_precision": 0.5214888940305414,
"eval_recall": 0.5260071764053794,
"eval_runtime": 71.2115,
"eval_samples_per_second": 122.185,
"eval_steps_per_second": 7.639,
"step": 8700
},
{
"epoch": 4.14,
"grad_norm": 3.159623146057129,
"learning_rate": 4.169469396971739e-05,
"loss": 0.2854,
"step": 9000
},
{
"epoch": 4.37,
"grad_norm": 2.860635757446289,
"learning_rate": 3.994907177708181e-05,
"loss": 0.272,
"step": 9500
},
{
"epoch": 4.6,
"grad_norm": 3.7297463417053223,
"learning_rate": 3.808170195400064e-05,
"loss": 0.2613,
"step": 10000
},
{
"epoch": 4.83,
"grad_norm": 2.5407516956329346,
"learning_rate": 3.6107792658847595e-05,
"loss": 0.2517,
"step": 10500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9037705346052107,
"eval_f1": 0.5800842276864557,
"eval_loss": 0.2615918517112732,
"eval_precision": 0.6010322177040975,
"eval_recall": 0.5605472738411272,
"eval_runtime": 70.3111,
"eval_samples_per_second": 123.75,
"eval_steps_per_second": 7.737,
"step": 10875
},
{
"epoch": 5.06,
"grad_norm": 3.3271915912628174,
"learning_rate": 3.404341972448928e-05,
"loss": 0.247,
"step": 11000
},
{
"epoch": 5.29,
"grad_norm": 1.9302195310592651,
"learning_rate": 3.1905395734132326e-05,
"loss": 0.2211,
"step": 11500
},
{
"epoch": 5.52,
"grad_norm": 1.3263697624206543,
"learning_rate": 2.9711133096957962e-05,
"loss": 0.2132,
"step": 12000
},
{
"epoch": 5.75,
"grad_norm": 2.6910924911499023,
"learning_rate": 2.7478502238677862e-05,
"loss": 0.2143,
"step": 12500
},
{
"epoch": 5.98,
"grad_norm": 4.239352703094482,
"learning_rate": 2.5225686061930326e-05,
"loss": 0.2085,
"step": 13000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9127478723656762,
"eval_f1": 0.6213380291938935,
"eval_loss": 0.23839746415615082,
"eval_precision": 0.6199395945867456,
"eval_recall": 0.6227427871291461,
"eval_runtime": 70.8696,
"eval_samples_per_second": 122.775,
"eval_steps_per_second": 7.676,
"step": 13050
},
{
"epoch": 6.21,
"grad_norm": 2.263456106185913,
"learning_rate": 2.2971031861814223e-05,
"loss": 0.1895,
"step": 13500
},
{
"epoch": 6.44,
"grad_norm": 3.127639055252075,
"learning_rate": 2.073290190258459e-05,
"loss": 0.1891,
"step": 14000
},
{
"epoch": 6.67,
"grad_norm": 6.135817527770996,
"learning_rate": 1.852952387243698e-05,
"loss": 0.1729,
"step": 14500
},
{
"epoch": 6.9,
"grad_norm": 2.980323076248169,
"learning_rate": 1.6378842434300746e-05,
"loss": 0.1752,
"step": 15000
},
{
"epoch": 7.0,
"eval_accuracy": 0.9185261764294775,
"eval_f1": 0.6522748506554175,
"eval_loss": 0.22249895334243774,
"eval_precision": 0.633583587712785,
"eval_recall": 0.6721024533971236,
"eval_runtime": 71.7327,
"eval_samples_per_second": 121.298,
"eval_steps_per_second": 7.584,
"step": 15225
},
{
"epoch": 7.13,
"grad_norm": 3.3994078636169434,
"learning_rate": 1.4298373081635322e-05,
"loss": 0.1721,
"step": 15500
},
{
"epoch": 7.36,
"grad_norm": 2.7240686416625977,
"learning_rate": 1.2305059489451364e-05,
"loss": 0.1549,
"step": 16000
},
{
"epoch": 7.59,
"grad_norm": 2.529590606689453,
"learning_rate": 1.041513552231265e-05,
"loss": 0.1577,
"step": 16500
},
{
"epoch": 7.82,
"grad_norm": 2.3119211196899414,
"learning_rate": 8.643993023147797e-06,
"loss": 0.1513,
"step": 17000
},
{
"epoch": 8.0,
"eval_accuracy": 0.92717385284445,
"eval_f1": 0.6904867478151416,
"eval_loss": 0.20956499874591827,
"eval_precision": 0.6773587023629118,
"eval_recall": 0.7041337261880452,
"eval_runtime": 69.3099,
"eval_samples_per_second": 125.538,
"eval_steps_per_second": 7.849,
"step": 17400
},
{
"epoch": 8.05,
"grad_norm": 1.2401313781738281,
"learning_rate": 7.00605645962078e-06,
"loss": 0.1493,
"step": 17500
},
{
"epoch": 8.28,
"grad_norm": 4.699456691741943,
"learning_rate": 5.51466544896021e-06,
"loss": 0.1456,
"step": 18000
},
{
"epoch": 8.51,
"grad_norm": 1.5753775835037231,
"learning_rate": 4.181966117984099e-06,
"loss": 0.1389,
"step": 18500
},
{
"epoch": 8.74,
"grad_norm": 0.9824960231781006,
"learning_rate": 3.0188121831012023e-06,
"loss": 0.1399,
"step": 19000
},
{
"epoch": 8.97,
"grad_norm": 1.2070369720458984,
"learning_rate": 2.0346765559094567e-06,
"loss": 0.1365,
"step": 19500
},
{
"epoch": 9.0,
"eval_accuracy": 0.9298507029698532,
"eval_f1": 0.7024456131275685,
"eval_loss": 0.20172493159770966,
"eval_precision": 0.6921421492283732,
"eval_recall": 0.7130604743428921,
"eval_runtime": 71.3537,
"eval_samples_per_second": 121.942,
"eval_steps_per_second": 7.624,
"step": 19575
},
{
"epoch": 9.2,
"grad_norm": 2.9319190979003906,
"learning_rate": 1.2375741942894869e-06,
"loss": 0.1354,
"step": 20000
},
{
"epoch": 9.43,
"grad_norm": 2.670553684234619,
"learning_rate": 6.339968273062741e-07,
"loss": 0.1298,
"step": 20500
},
{
"epoch": 9.66,
"grad_norm": 1.6918872594833374,
"learning_rate": 2.288600855298306e-07,
"loss": 0.1291,
"step": 21000
},
{
"epoch": 9.89,
"grad_norm": 1.8715825080871582,
"learning_rate": 2.546346735399219e-08,
"loss": 0.1314,
"step": 21500
},
{
"epoch": 10.0,
"eval_accuracy": 0.9306313410359806,
"eval_f1": 0.7046287809349221,
"eval_loss": 0.20017504692077637,
"eval_precision": 0.6920804613869742,
"eval_recall": 0.7176405379386797,
"eval_runtime": 70.9745,
"eval_samples_per_second": 122.593,
"eval_steps_per_second": 7.665,
"step": 21750
},
{
"epoch": 10.0,
"step": 21750,
"total_flos": 3852992789501856.0,
"train_loss": 0.4320273476085444,
"train_runtime": 4787.7797,
"train_samples_per_second": 72.685,
"train_steps_per_second": 4.543
},
{
"epoch": 10.0,
"eval_accuracy": 0.9306313410359806,
"eval_f1": 0.7046287809349221,
"eval_loss": 0.20017504692077637,
"eval_precision": 0.6920804613869742,
"eval_recall": 0.7176405379386797,
"eval_runtime": 71.7164,
"eval_samples_per_second": 121.325,
"eval_steps_per_second": 7.585,
"step": 21750
}
],
"logging_steps": 500,
"max_steps": 21750,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3852992789501856.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}