roberta-base-defteval-t6-st3 / trainer_state.json
tobiaslee's picture
update base model
f2a4681
{
"best_metric": 0.7991586011098205,
"best_model_checkpoint": "results/roberta-base-2e-5-16-256/checkpoint-1000",
"epoch": 5.0,
"global_step": 2300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 8.695652173913044e-06,
"loss": 1.3164,
"step": 100
},
{
"epoch": 0.22,
"eval_accuracy": 0.8347826086956521,
"eval_f1": 0.15165876777251183,
"eval_loss": 0.659010648727417,
"eval_runtime": 1.8554,
"eval_samples_per_second": 185.947,
"step": 100
},
{
"epoch": 0.43,
"learning_rate": 1.730434782608696e-05,
"loss": 0.7929,
"step": 200
},
{
"epoch": 0.43,
"eval_accuracy": 0.8956521739130435,
"eval_f1": 0.3681933057098357,
"eval_loss": 0.4090113639831543,
"eval_runtime": 1.8693,
"eval_samples_per_second": 184.559,
"step": 200
},
{
"epoch": 0.65,
"learning_rate": 1.934299516908213e-05,
"loss": 0.4876,
"step": 300
},
{
"epoch": 0.65,
"eval_accuracy": 0.9101449275362319,
"eval_f1": 0.45359927879197653,
"eval_loss": 0.2804220914840698,
"eval_runtime": 1.8812,
"eval_samples_per_second": 183.389,
"step": 300
},
{
"epoch": 0.87,
"learning_rate": 1.83768115942029e-05,
"loss": 0.3733,
"step": 400
},
{
"epoch": 0.87,
"eval_accuracy": 0.9565217391304348,
"eval_f1": 0.6091620621342052,
"eval_loss": 0.20709100365638733,
"eval_runtime": 1.8861,
"eval_samples_per_second": 182.913,
"step": 400
},
{
"epoch": 1.09,
"learning_rate": 1.7410628019323673e-05,
"loss": 0.2586,
"step": 500
},
{
"epoch": 1.09,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7547141566653762,
"eval_loss": 0.14703336358070374,
"eval_runtime": 1.8842,
"eval_samples_per_second": 183.1,
"step": 500
},
{
"epoch": 1.3,
"learning_rate": 1.6444444444444444e-05,
"loss": 0.1932,
"step": 600
},
{
"epoch": 1.3,
"eval_accuracy": 0.9652173913043478,
"eval_f1": 0.7599251142269877,
"eval_loss": 0.14065894484519958,
"eval_runtime": 1.8891,
"eval_samples_per_second": 182.625,
"step": 600
},
{
"epoch": 1.52,
"learning_rate": 1.5478260869565217e-05,
"loss": 0.1952,
"step": 700
},
{
"epoch": 1.52,
"eval_accuracy": 0.9739130434782609,
"eval_f1": 0.7965866610767565,
"eval_loss": 0.15803956985473633,
"eval_runtime": 1.8925,
"eval_samples_per_second": 182.3,
"step": 700
},
{
"epoch": 1.74,
"learning_rate": 1.4521739130434785e-05,
"loss": 0.1734,
"step": 800
},
{
"epoch": 1.74,
"eval_accuracy": 0.9623188405797102,
"eval_f1": 0.7857069143446852,
"eval_loss": 0.1854896992444992,
"eval_runtime": 1.8925,
"eval_samples_per_second": 182.299,
"step": 800
},
{
"epoch": 1.96,
"learning_rate": 1.3555555555555557e-05,
"loss": 0.1515,
"step": 900
},
{
"epoch": 1.96,
"eval_accuracy": 0.9681159420289855,
"eval_f1": 0.7569010619010618,
"eval_loss": 0.1608872264623642,
"eval_runtime": 1.8927,
"eval_samples_per_second": 182.276,
"step": 900
},
{
"epoch": 2.17,
"learning_rate": 1.2589371980676331e-05,
"loss": 0.1214,
"step": 1000
},
{
"epoch": 2.17,
"eval_accuracy": 0.9739130434782609,
"eval_f1": 0.7991586011098205,
"eval_loss": 0.14474469423294067,
"eval_runtime": 1.8926,
"eval_samples_per_second": 182.29,
"step": 1000
},
{
"epoch": 2.39,
"learning_rate": 1.1623188405797103e-05,
"loss": 0.1145,
"step": 1100
},
{
"epoch": 2.39,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7922399586487883,
"eval_loss": 0.14988109469413757,
"eval_runtime": 1.8948,
"eval_samples_per_second": 182.077,
"step": 1100
},
{
"epoch": 2.61,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.0978,
"step": 1200
},
{
"epoch": 2.61,
"eval_accuracy": 0.9478260869565217,
"eval_f1": 0.7102037440793926,
"eval_loss": 0.21022763848304749,
"eval_runtime": 1.8939,
"eval_samples_per_second": 182.162,
"step": 1200
},
{
"epoch": 2.83,
"learning_rate": 9.700483091787441e-06,
"loss": 0.112,
"step": 1300
},
{
"epoch": 2.83,
"eval_accuracy": 0.9681159420289855,
"eval_f1": 0.7895157299399082,
"eval_loss": 0.15368108451366425,
"eval_runtime": 1.8983,
"eval_samples_per_second": 181.738,
"step": 1300
},
{
"epoch": 3.04,
"learning_rate": 8.734299516908213e-06,
"loss": 0.0879,
"step": 1400
},
{
"epoch": 3.04,
"eval_accuracy": 0.9681159420289855,
"eval_f1": 0.7895157299399082,
"eval_loss": 0.160787895321846,
"eval_runtime": 1.8947,
"eval_samples_per_second": 182.086,
"step": 1400
},
{
"epoch": 3.26,
"learning_rate": 7.768115942028987e-06,
"loss": 0.0721,
"step": 1500
},
{
"epoch": 3.26,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7932638888888889,
"eval_loss": 0.1662783920764923,
"eval_runtime": 1.895,
"eval_samples_per_second": 182.054,
"step": 1500
},
{
"epoch": 3.48,
"learning_rate": 6.801932367149759e-06,
"loss": 0.0744,
"step": 1600
},
{
"epoch": 3.48,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7932638888888889,
"eval_loss": 0.17374691367149353,
"eval_runtime": 1.8929,
"eval_samples_per_second": 182.259,
"step": 1600
},
{
"epoch": 3.7,
"learning_rate": 5.835748792270531e-06,
"loss": 0.0614,
"step": 1700
},
{
"epoch": 3.7,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7929378560367236,
"eval_loss": 0.15358248353004456,
"eval_runtime": 1.8971,
"eval_samples_per_second": 181.86,
"step": 1700
},
{
"epoch": 3.91,
"learning_rate": 4.869565217391305e-06,
"loss": 0.0702,
"step": 1800
},
{
"epoch": 3.91,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7929378560367236,
"eval_loss": 0.1646239012479782,
"eval_runtime": 1.8948,
"eval_samples_per_second": 182.073,
"step": 1800
},
{
"epoch": 4.13,
"learning_rate": 3.903381642512077e-06,
"loss": 0.0536,
"step": 1900
},
{
"epoch": 4.13,
"eval_accuracy": 0.9739130434782609,
"eval_f1": 0.7966849998231528,
"eval_loss": 0.1578085571527481,
"eval_runtime": 1.8964,
"eval_samples_per_second": 181.922,
"step": 1900
},
{
"epoch": 4.35,
"learning_rate": 2.9371980676328504e-06,
"loss": 0.0489,
"step": 2000
},
{
"epoch": 4.35,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7932638888888889,
"eval_loss": 0.16712208092212677,
"eval_runtime": 1.9014,
"eval_samples_per_second": 181.448,
"step": 2000
},
{
"epoch": 4.57,
"learning_rate": 1.9710144927536233e-06,
"loss": 0.0267,
"step": 2100
},
{
"epoch": 4.57,
"eval_accuracy": 0.9739130434782609,
"eval_f1": 0.7982255213856485,
"eval_loss": 0.1600024402141571,
"eval_runtime": 1.8949,
"eval_samples_per_second": 182.068,
"step": 2100
},
{
"epoch": 4.78,
"learning_rate": 1.0048309178743963e-06,
"loss": 0.0511,
"step": 2200
},
{
"epoch": 4.78,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7932638888888889,
"eval_loss": 0.17408017814159393,
"eval_runtime": 1.8946,
"eval_samples_per_second": 182.094,
"step": 2200
},
{
"epoch": 5.0,
"learning_rate": 3.864734299516908e-08,
"loss": 0.0291,
"step": 2300
},
{
"epoch": 5.0,
"eval_accuracy": 0.9710144927536232,
"eval_f1": 0.7932638888888889,
"eval_loss": 0.180899977684021,
"eval_runtime": 1.8959,
"eval_samples_per_second": 181.972,
"step": 2300
},
{
"epoch": 5.0,
"step": 2300,
"total_flos": 0,
"train_runtime": 487.2382,
"train_samples_per_second": 4.72
}
],
"max_steps": 2300,
"num_train_epochs": 5,
"total_flos": 0,
"trial_name": null,
"trial_params": null
}