deberta-v3-large-finetuned-mnli / trainer_state.json
mrm8488's picture
Initial commit from mrm8488
9c0a241
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 122720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 2.975700782268579e-05,
"loss": 0.5382,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 2.951254889178618e-05,
"loss": 0.4368,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 2.9268334419817473e-05,
"loss": 0.4277,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 2.9023875488917863e-05,
"loss": 0.4026,
"step": 4000
},
{
"epoch": 0.2,
"learning_rate": 2.877966101694915e-05,
"loss": 0.4137,
"step": 5000
},
{
"epoch": 0.24,
"learning_rate": 2.8535202086049545e-05,
"loss": 0.4048,
"step": 6000
},
{
"epoch": 0.29,
"learning_rate": 2.8290743155149935e-05,
"loss": 0.3949,
"step": 7000
},
{
"epoch": 0.33,
"learning_rate": 2.804628422425033e-05,
"loss": 0.3883,
"step": 8000
},
{
"epoch": 0.37,
"learning_rate": 2.780182529335072e-05,
"loss": 0.3759,
"step": 9000
},
{
"epoch": 0.41,
"learning_rate": 2.755736636245111e-05,
"loss": 0.3868,
"step": 10000
},
{
"epoch": 0.45,
"learning_rate": 2.73129074315515e-05,
"loss": 0.3809,
"step": 11000
},
{
"epoch": 0.49,
"learning_rate": 2.7068448500651893e-05,
"loss": 0.3754,
"step": 12000
},
{
"epoch": 0.53,
"learning_rate": 2.6823989569752283e-05,
"loss": 0.3733,
"step": 13000
},
{
"epoch": 0.57,
"learning_rate": 2.6579530638852673e-05,
"loss": 0.3825,
"step": 14000
},
{
"epoch": 0.61,
"learning_rate": 2.6335071707953063e-05,
"loss": 0.3719,
"step": 15000
},
{
"epoch": 0.65,
"learning_rate": 2.6091101694915253e-05,
"loss": 0.3725,
"step": 16000
},
{
"epoch": 0.69,
"learning_rate": 2.5846642764015644e-05,
"loss": 0.3694,
"step": 17000
},
{
"epoch": 0.73,
"learning_rate": 2.5602428292046935e-05,
"loss": 0.3714,
"step": 18000
},
{
"epoch": 0.77,
"learning_rate": 2.5358213820078227e-05,
"loss": 0.3818,
"step": 19000
},
{
"epoch": 0.81,
"learning_rate": 2.511399934810952e-05,
"loss": 0.374,
"step": 20000
},
{
"epoch": 0.86,
"learning_rate": 2.486954041720991e-05,
"loss": 0.4574,
"step": 21000
},
{
"epoch": 0.9,
"learning_rate": 2.46250814863103e-05,
"loss": 0.426,
"step": 22000
},
{
"epoch": 0.94,
"learning_rate": 2.4380622555410693e-05,
"loss": 0.3714,
"step": 23000
},
{
"epoch": 0.98,
"learning_rate": 2.4136163624511083e-05,
"loss": 0.3676,
"step": 24000
},
{
"epoch": 1.0,
"eval_accuracy": 0.8680590932246561,
"eval_loss": 0.3760605454444885,
"eval_runtime": 67.1189,
"eval_samples_per_second": 146.233,
"eval_steps_per_second": 18.281,
"step": 24544
},
{
"epoch": 1.02,
"learning_rate": 2.3892193611473273e-05,
"loss": 0.3246,
"step": 25000
},
{
"epoch": 1.06,
"learning_rate": 2.3647734680573663e-05,
"loss": 0.2698,
"step": 26000
},
{
"epoch": 1.1,
"learning_rate": 2.3403275749674057e-05,
"loss": 0.2783,
"step": 27000
},
{
"epoch": 1.14,
"learning_rate": 2.3158816818774447e-05,
"loss": 0.2761,
"step": 28000
},
{
"epoch": 1.18,
"learning_rate": 2.2914357887874837e-05,
"loss": 0.2831,
"step": 29000
},
{
"epoch": 1.22,
"learning_rate": 2.2669898956975228e-05,
"loss": 0.2819,
"step": 30000
},
{
"epoch": 1.26,
"learning_rate": 2.2425440026075618e-05,
"loss": 0.2822,
"step": 31000
},
{
"epoch": 1.3,
"learning_rate": 2.2180981095176008e-05,
"loss": 0.2769,
"step": 32000
},
{
"epoch": 1.34,
"learning_rate": 2.1936522164276405e-05,
"loss": 0.2853,
"step": 33000
},
{
"epoch": 1.39,
"learning_rate": 2.1692063233376795e-05,
"loss": 0.2891,
"step": 34000
},
{
"epoch": 1.43,
"learning_rate": 2.1447848761408084e-05,
"loss": 0.281,
"step": 35000
},
{
"epoch": 1.47,
"learning_rate": 2.1203389830508474e-05,
"loss": 0.2733,
"step": 36000
},
{
"epoch": 1.51,
"learning_rate": 2.0959175358539766e-05,
"loss": 0.2779,
"step": 37000
},
{
"epoch": 1.55,
"learning_rate": 2.0714716427640156e-05,
"loss": 0.2799,
"step": 38000
},
{
"epoch": 1.59,
"learning_rate": 2.0470257496740546e-05,
"loss": 0.2824,
"step": 39000
},
{
"epoch": 1.63,
"learning_rate": 2.022628748370274e-05,
"loss": 0.2823,
"step": 40000
},
{
"epoch": 1.67,
"learning_rate": 1.998182855280313e-05,
"loss": 0.2789,
"step": 41000
},
{
"epoch": 1.71,
"learning_rate": 1.973736962190352e-05,
"loss": 0.2806,
"step": 42000
},
{
"epoch": 1.75,
"learning_rate": 1.9492910691003913e-05,
"loss": 0.2808,
"step": 43000
},
{
"epoch": 1.79,
"learning_rate": 1.9248696219035202e-05,
"loss": 0.2745,
"step": 44000
},
{
"epoch": 1.83,
"learning_rate": 1.9004481747066494e-05,
"loss": 0.2918,
"step": 45000
},
{
"epoch": 1.87,
"learning_rate": 1.8760511734028684e-05,
"loss": 0.2789,
"step": 46000
},
{
"epoch": 1.91,
"learning_rate": 1.8516052803129074e-05,
"loss": 0.2754,
"step": 47000
},
{
"epoch": 1.96,
"learning_rate": 1.8271593872229467e-05,
"loss": 0.2709,
"step": 48000
},
{
"epoch": 2.0,
"learning_rate": 1.8027134941329858e-05,
"loss": 0.2782,
"step": 49000
},
{
"epoch": 2.0,
"eval_accuracy": 0.8881304126337239,
"eval_loss": 0.36052629351615906,
"eval_runtime": 68.3607,
"eval_samples_per_second": 143.577,
"eval_steps_per_second": 17.949,
"step": 49088
},
{
"epoch": 2.04,
"learning_rate": 1.7782676010430248e-05,
"loss": 0.2017,
"step": 50000
},
{
"epoch": 2.08,
"learning_rate": 1.7538217079530638e-05,
"loss": 0.1921,
"step": 51000
},
{
"epoch": 2.12,
"learning_rate": 1.7293758148631028e-05,
"loss": 0.2017,
"step": 52000
},
{
"epoch": 2.16,
"learning_rate": 1.704929921773142e-05,
"loss": 0.1894,
"step": 53000
},
{
"epoch": 2.2,
"learning_rate": 1.6805084745762714e-05,
"loss": 0.1978,
"step": 54000
},
{
"epoch": 2.24,
"learning_rate": 1.6560625814863104e-05,
"loss": 0.1983,
"step": 55000
},
{
"epoch": 2.28,
"learning_rate": 1.6316166883963494e-05,
"loss": 0.2003,
"step": 56000
},
{
"epoch": 2.32,
"learning_rate": 1.6071707953063884e-05,
"loss": 0.2059,
"step": 57000
},
{
"epoch": 2.36,
"learning_rate": 1.5827249022164274e-05,
"loss": 0.1971,
"step": 58000
},
{
"epoch": 2.4,
"learning_rate": 1.5582790091264668e-05,
"loss": 0.2014,
"step": 59000
},
{
"epoch": 2.44,
"learning_rate": 1.533833116036506e-05,
"loss": 0.1922,
"step": 60000
},
{
"epoch": 2.49,
"learning_rate": 1.5094116688396348e-05,
"loss": 0.1927,
"step": 61000
},
{
"epoch": 2.53,
"learning_rate": 1.4849657757496742e-05,
"loss": 0.1965,
"step": 62000
},
{
"epoch": 2.57,
"learning_rate": 1.4605443285528032e-05,
"loss": 0.1971,
"step": 63000
},
{
"epoch": 2.61,
"learning_rate": 1.4360984354628422e-05,
"loss": 0.2095,
"step": 64000
},
{
"epoch": 2.65,
"learning_rate": 1.4116769882659712e-05,
"loss": 0.199,
"step": 65000
},
{
"epoch": 2.69,
"learning_rate": 1.3872555410691004e-05,
"loss": 0.2072,
"step": 66000
},
{
"epoch": 2.73,
"learning_rate": 1.3628096479791396e-05,
"loss": 0.1955,
"step": 67000
},
{
"epoch": 2.77,
"learning_rate": 1.3383637548891786e-05,
"loss": 0.1924,
"step": 68000
},
{
"epoch": 2.81,
"learning_rate": 1.3139178617992178e-05,
"loss": 0.2055,
"step": 69000
},
{
"epoch": 2.85,
"learning_rate": 1.2894719687092568e-05,
"loss": 0.2016,
"step": 70000
},
{
"epoch": 2.89,
"learning_rate": 1.265026075619296e-05,
"loss": 0.2011,
"step": 71000
},
{
"epoch": 2.93,
"learning_rate": 1.2405801825293352e-05,
"loss": 0.1915,
"step": 72000
},
{
"epoch": 2.97,
"learning_rate": 1.2161587353324642e-05,
"loss": 0.1986,
"step": 73000
},
{
"epoch": 3.0,
"eval_accuracy": 0.8893530310748854,
"eval_loss": 0.4672204256057739,
"eval_runtime": 67.0424,
"eval_samples_per_second": 146.4,
"eval_steps_per_second": 18.302,
"step": 73632
},
{
"epoch": 3.01,
"learning_rate": 1.1917128422425032e-05,
"loss": 0.1697,
"step": 74000
},
{
"epoch": 3.06,
"learning_rate": 1.1672669491525423e-05,
"loss": 0.1246,
"step": 75000
},
{
"epoch": 3.1,
"learning_rate": 1.1428455019556716e-05,
"loss": 0.1231,
"step": 76000
},
{
"epoch": 3.14,
"learning_rate": 1.1183996088657106e-05,
"loss": 0.1314,
"step": 77000
},
{
"epoch": 3.18,
"learning_rate": 1.0939537157757497e-05,
"loss": 0.1272,
"step": 78000
},
{
"epoch": 3.22,
"learning_rate": 1.0695567144719687e-05,
"loss": 0.1257,
"step": 79000
},
{
"epoch": 3.26,
"learning_rate": 1.0451108213820078e-05,
"loss": 0.1278,
"step": 80000
},
{
"epoch": 3.3,
"learning_rate": 1.0206893741851369e-05,
"loss": 0.1365,
"step": 81000
},
{
"epoch": 3.34,
"learning_rate": 9.96243481095176e-06,
"loss": 0.1284,
"step": 82000
},
{
"epoch": 3.38,
"learning_rate": 9.717975880052152e-06,
"loss": 0.1435,
"step": 83000
},
{
"epoch": 3.42,
"learning_rate": 9.473761408083443e-06,
"loss": 0.1386,
"step": 84000
},
{
"epoch": 3.46,
"learning_rate": 9.229546936114733e-06,
"loss": 0.1301,
"step": 85000
},
{
"epoch": 3.5,
"learning_rate": 8.985088005215123e-06,
"loss": 0.1261,
"step": 86000
},
{
"epoch": 3.54,
"learning_rate": 8.740629074315516e-06,
"loss": 0.1291,
"step": 87000
},
{
"epoch": 3.59,
"learning_rate": 8.496170143415907e-06,
"loss": 0.1281,
"step": 88000
},
{
"epoch": 3.63,
"learning_rate": 8.251711212516297e-06,
"loss": 0.1166,
"step": 89000
},
{
"epoch": 3.67,
"learning_rate": 8.007252281616689e-06,
"loss": 0.124,
"step": 90000
},
{
"epoch": 3.71,
"learning_rate": 7.762793350717079e-06,
"loss": 0.1257,
"step": 91000
},
{
"epoch": 3.75,
"learning_rate": 7.518334419817472e-06,
"loss": 0.1232,
"step": 92000
},
{
"epoch": 3.79,
"learning_rate": 7.274119947848762e-06,
"loss": 0.1271,
"step": 93000
},
{
"epoch": 3.83,
"learning_rate": 7.029905475880053e-06,
"loss": 0.128,
"step": 94000
},
{
"epoch": 3.87,
"learning_rate": 6.785446544980443e-06,
"loss": 0.1212,
"step": 95000
},
{
"epoch": 3.91,
"learning_rate": 6.540987614080835e-06,
"loss": 0.1193,
"step": 96000
},
{
"epoch": 3.95,
"learning_rate": 6.296773142112125e-06,
"loss": 0.1199,
"step": 97000
},
{
"epoch": 3.99,
"learning_rate": 6.052314211212517e-06,
"loss": 0.1299,
"step": 98000
},
{
"epoch": 4.0,
"eval_accuracy": 0.8966887417218543,
"eval_loss": 0.5247582197189331,
"eval_runtime": 66.9471,
"eval_samples_per_second": 146.608,
"eval_steps_per_second": 18.328,
"step": 98176
},
{
"epoch": 4.03,
"learning_rate": 5.807855280312908e-06,
"loss": 0.0766,
"step": 99000
},
{
"epoch": 4.07,
"learning_rate": 5.563640808344198e-06,
"loss": 0.0642,
"step": 100000
},
{
"epoch": 4.12,
"learning_rate": 5.31918187744459e-06,
"loss": 0.0744,
"step": 101000
},
{
"epoch": 4.16,
"learning_rate": 5.07496740547588e-06,
"loss": 0.0684,
"step": 102000
},
{
"epoch": 4.2,
"learning_rate": 4.830508474576271e-06,
"loss": 0.0732,
"step": 103000
},
{
"epoch": 4.24,
"learning_rate": 4.586294002607562e-06,
"loss": 0.0637,
"step": 104000
},
{
"epoch": 4.28,
"learning_rate": 4.342323989569753e-06,
"loss": 0.071,
"step": 105000
},
{
"epoch": 4.32,
"learning_rate": 4.097865058670143e-06,
"loss": 0.066,
"step": 106000
},
{
"epoch": 4.36,
"learning_rate": 3.853406127770535e-06,
"loss": 0.0715,
"step": 107000
},
{
"epoch": 4.4,
"learning_rate": 3.6089471968709256e-06,
"loss": 0.069,
"step": 108000
},
{
"epoch": 4.44,
"learning_rate": 3.3647327249022166e-06,
"loss": 0.0707,
"step": 109000
},
{
"epoch": 4.48,
"learning_rate": 3.1202737940026076e-06,
"loss": 0.0702,
"step": 110000
},
{
"epoch": 4.52,
"learning_rate": 2.875814863102999e-06,
"loss": 0.0632,
"step": 111000
},
{
"epoch": 4.56,
"learning_rate": 2.63135593220339e-06,
"loss": 0.0672,
"step": 112000
},
{
"epoch": 4.6,
"learning_rate": 2.3871414602346807e-06,
"loss": 0.0665,
"step": 113000
},
{
"epoch": 4.64,
"learning_rate": 2.1426825293350717e-06,
"loss": 0.0637,
"step": 114000
},
{
"epoch": 4.69,
"learning_rate": 1.898223598435463e-06,
"loss": 0.0712,
"step": 115000
},
{
"epoch": 4.73,
"learning_rate": 1.653764667535854e-06,
"loss": 0.064,
"step": 116000
},
{
"epoch": 4.77,
"learning_rate": 1.4093057366362453e-06,
"loss": 0.0644,
"step": 117000
},
{
"epoch": 4.81,
"learning_rate": 1.1653357235984355e-06,
"loss": 0.0683,
"step": 118000
},
{
"epoch": 4.85,
"learning_rate": 9.208767926988267e-07,
"loss": 0.0622,
"step": 119000
},
{
"epoch": 4.89,
"learning_rate": 6.766623207301173e-07,
"loss": 0.0678,
"step": 120000
},
{
"epoch": 4.93,
"learning_rate": 4.322033898305085e-07,
"loss": 0.0578,
"step": 121000
},
{
"epoch": 4.97,
"learning_rate": 1.877444589308996e-07,
"loss": 0.0643,
"step": 122000
},
{
"epoch": 5.0,
"eval_accuracy": 0.8999490575649516,
"eval_loss": 0.6488531231880188,
"eval_runtime": 67.1753,
"eval_samples_per_second": 146.11,
"eval_steps_per_second": 18.266,
"step": 122720
},
{
"epoch": 5.0,
"step": 122720,
"total_flos": 4.5746877181130496e+17,
"train_loss": 0.2137734965519613,
"train_runtime": 32769.5489,
"train_samples_per_second": 59.919,
"train_steps_per_second": 3.745
}
],
"max_steps": 122720,
"num_train_epochs": 5,
"total_flos": 4.5746877181130496e+17,
"trial_name": null,
"trial_params": null
}