bert_hateracism90000 / trainer_state.json
MutazYoune's picture
Upload 7 files
c6d043d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.4703720041170416,
"global_step": 90000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.7438734499828454e-05,
"loss": 2.389,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 3.737746899965691e-05,
"loss": 2.4302,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 3.731620349948536e-05,
"loss": 2.383,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 3.725493799931382e-05,
"loss": 2.3834,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 3.719367249914228e-05,
"loss": 2.3638,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 3.7132406998970736e-05,
"loss": 2.3711,
"step": 3000
},
{
"epoch": 0.06,
"learning_rate": 3.7071141498799194e-05,
"loss": 2.3405,
"step": 3500
},
{
"epoch": 0.07,
"learning_rate": 3.700987599862765e-05,
"loss": 2.3589,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 3.694861049845611e-05,
"loss": 2.3535,
"step": 4500
},
{
"epoch": 0.08,
"learning_rate": 3.688734499828457e-05,
"loss": 2.3622,
"step": 5000
},
{
"epoch": 0.09,
"learning_rate": 3.682607949811302e-05,
"loss": 2.4021,
"step": 5500
},
{
"epoch": 0.1,
"learning_rate": 3.6764813997941475e-05,
"loss": 2.3588,
"step": 6000
},
{
"epoch": 0.11,
"learning_rate": 3.670354849776993e-05,
"loss": 2.3654,
"step": 6500
},
{
"epoch": 0.11,
"learning_rate": 3.664228299759839e-05,
"loss": 2.3883,
"step": 7000
},
{
"epoch": 0.12,
"learning_rate": 3.658101749742685e-05,
"loss": 2.3613,
"step": 7500
},
{
"epoch": 0.13,
"learning_rate": 3.65197519972553e-05,
"loss": 2.3459,
"step": 8000
},
{
"epoch": 0.14,
"learning_rate": 3.645848649708376e-05,
"loss": 2.3994,
"step": 8500
},
{
"epoch": 0.15,
"learning_rate": 3.6397220996912214e-05,
"loss": 2.3753,
"step": 9000
},
{
"epoch": 0.16,
"learning_rate": 3.633595549674067e-05,
"loss": 2.3511,
"step": 9500
},
{
"epoch": 0.16,
"learning_rate": 3.627468999656913e-05,
"loss": 2.3771,
"step": 10000
},
{
"epoch": 0.17,
"learning_rate": 3.621342449639758e-05,
"loss": 2.3563,
"step": 10500
},
{
"epoch": 0.18,
"learning_rate": 3.6152158996226045e-05,
"loss": 2.3872,
"step": 11000
},
{
"epoch": 0.19,
"learning_rate": 3.60908934960545e-05,
"loss": 2.36,
"step": 11500
},
{
"epoch": 0.2,
"learning_rate": 3.6029627995882954e-05,
"loss": 2.3595,
"step": 12000
},
{
"epoch": 0.2,
"learning_rate": 3.596836249571141e-05,
"loss": 2.3835,
"step": 12500
},
{
"epoch": 0.21,
"learning_rate": 3.590709699553987e-05,
"loss": 2.3837,
"step": 13000
},
{
"epoch": 0.22,
"learning_rate": 3.584583149536833e-05,
"loss": 2.3349,
"step": 13500
},
{
"epoch": 0.23,
"learning_rate": 3.5784565995196784e-05,
"loss": 2.3229,
"step": 14000
},
{
"epoch": 0.24,
"learning_rate": 3.5723300495025235e-05,
"loss": 2.3744,
"step": 14500
},
{
"epoch": 0.25,
"learning_rate": 3.566203499485369e-05,
"loss": 2.3317,
"step": 15000
},
{
"epoch": 0.25,
"learning_rate": 3.560076949468215e-05,
"loss": 2.3539,
"step": 15500
},
{
"epoch": 0.26,
"learning_rate": 3.553950399451061e-05,
"loss": 2.3477,
"step": 16000
},
{
"epoch": 0.27,
"learning_rate": 3.5478238494339066e-05,
"loss": 2.3519,
"step": 16500
},
{
"epoch": 0.28,
"learning_rate": 3.541697299416752e-05,
"loss": 2.3691,
"step": 17000
},
{
"epoch": 0.29,
"learning_rate": 3.5355707493995974e-05,
"loss": 2.3149,
"step": 17500
},
{
"epoch": 0.29,
"learning_rate": 3.529444199382444e-05,
"loss": 2.3209,
"step": 18000
},
{
"epoch": 0.3,
"learning_rate": 3.523317649365289e-05,
"loss": 2.3377,
"step": 18500
},
{
"epoch": 0.31,
"learning_rate": 3.517191099348135e-05,
"loss": 2.3635,
"step": 19000
},
{
"epoch": 0.32,
"learning_rate": 3.5110645493309805e-05,
"loss": 2.3815,
"step": 19500
},
{
"epoch": 0.33,
"learning_rate": 3.504937999313826e-05,
"loss": 2.355,
"step": 20000
},
{
"epoch": 0.33,
"learning_rate": 3.498811449296672e-05,
"loss": 2.399,
"step": 20500
},
{
"epoch": 0.34,
"learning_rate": 3.492684899279517e-05,
"loss": 2.4075,
"step": 21000
},
{
"epoch": 0.35,
"learning_rate": 3.486558349262363e-05,
"loss": 2.3906,
"step": 21500
},
{
"epoch": 0.36,
"learning_rate": 3.480431799245209e-05,
"loss": 2.3569,
"step": 22000
},
{
"epoch": 0.37,
"learning_rate": 3.4743052492280544e-05,
"loss": 2.3433,
"step": 22500
},
{
"epoch": 0.38,
"learning_rate": 3.4681786992109e-05,
"loss": 2.3908,
"step": 23000
},
{
"epoch": 0.38,
"learning_rate": 3.462052149193745e-05,
"loss": 2.3431,
"step": 23500
},
{
"epoch": 0.39,
"learning_rate": 3.455925599176591e-05,
"loss": 2.3096,
"step": 24000
},
{
"epoch": 0.4,
"learning_rate": 3.449799049159437e-05,
"loss": 2.3466,
"step": 24500
},
{
"epoch": 0.41,
"learning_rate": 3.4436724991422826e-05,
"loss": 2.3523,
"step": 25000
},
{
"epoch": 0.42,
"learning_rate": 3.4375459491251284e-05,
"loss": 2.3728,
"step": 25500
},
{
"epoch": 0.42,
"learning_rate": 3.431419399107974e-05,
"loss": 2.3215,
"step": 26000
},
{
"epoch": 0.43,
"learning_rate": 3.42529284909082e-05,
"loss": 2.3309,
"step": 26500
},
{
"epoch": 0.44,
"learning_rate": 3.419166299073666e-05,
"loss": 2.3341,
"step": 27000
},
{
"epoch": 0.45,
"learning_rate": 3.413039749056511e-05,
"loss": 2.3729,
"step": 27500
},
{
"epoch": 0.46,
"learning_rate": 3.4069131990393565e-05,
"loss": 2.3595,
"step": 28000
},
{
"epoch": 0.47,
"learning_rate": 3.400786649022202e-05,
"loss": 2.4054,
"step": 28500
},
{
"epoch": 0.47,
"learning_rate": 3.394660099005048e-05,
"loss": 2.3213,
"step": 29000
},
{
"epoch": 0.48,
"learning_rate": 3.388533548987894e-05,
"loss": 2.3364,
"step": 29500
},
{
"epoch": 0.49,
"learning_rate": 3.382406998970739e-05,
"loss": 2.3332,
"step": 30000
},
{
"epoch": 0.5,
"learning_rate": 3.376280448953585e-05,
"loss": 2.304,
"step": 30500
},
{
"epoch": 0.51,
"learning_rate": 3.3701538989364304e-05,
"loss": 2.3333,
"step": 31000
},
{
"epoch": 0.51,
"learning_rate": 3.364027348919276e-05,
"loss": 2.3562,
"step": 31500
},
{
"epoch": 0.52,
"learning_rate": 3.357900798902122e-05,
"loss": 2.3353,
"step": 32000
},
{
"epoch": 0.53,
"learning_rate": 3.351774248884968e-05,
"loss": 2.3224,
"step": 32500
},
{
"epoch": 0.54,
"learning_rate": 3.3456476988678135e-05,
"loss": 2.3283,
"step": 33000
},
{
"epoch": 0.55,
"learning_rate": 3.339521148850659e-05,
"loss": 2.3357,
"step": 33500
},
{
"epoch": 0.56,
"learning_rate": 3.3333945988335044e-05,
"loss": 2.3352,
"step": 34000
},
{
"epoch": 0.56,
"learning_rate": 3.32726804881635e-05,
"loss": 2.3468,
"step": 34500
},
{
"epoch": 0.57,
"learning_rate": 3.321141498799196e-05,
"loss": 2.3411,
"step": 35000
},
{
"epoch": 0.58,
"learning_rate": 3.315014948782042e-05,
"loss": 2.3306,
"step": 35500
},
{
"epoch": 0.59,
"learning_rate": 3.3088883987648874e-05,
"loss": 2.3449,
"step": 36000
},
{
"epoch": 0.6,
"learning_rate": 3.3027618487477325e-05,
"loss": 2.3349,
"step": 36500
},
{
"epoch": 0.6,
"learning_rate": 3.296635298730578e-05,
"loss": 2.3192,
"step": 37000
},
{
"epoch": 0.61,
"learning_rate": 3.290508748713424e-05,
"loss": 2.3061,
"step": 37500
},
{
"epoch": 0.62,
"learning_rate": 3.28438219869627e-05,
"loss": 2.3491,
"step": 38000
},
{
"epoch": 0.63,
"learning_rate": 3.2782556486791156e-05,
"loss": 2.324,
"step": 38500
},
{
"epoch": 0.64,
"learning_rate": 3.2721290986619614e-05,
"loss": 2.3562,
"step": 39000
},
{
"epoch": 0.65,
"learning_rate": 3.266002548644807e-05,
"loss": 2.315,
"step": 39500
},
{
"epoch": 0.65,
"learning_rate": 3.259875998627653e-05,
"loss": 2.3585,
"step": 40000
},
{
"epoch": 0.66,
"learning_rate": 3.253749448610498e-05,
"loss": 2.2778,
"step": 40500
},
{
"epoch": 0.67,
"learning_rate": 3.247622898593344e-05,
"loss": 2.3479,
"step": 41000
},
{
"epoch": 0.68,
"learning_rate": 3.2414963485761895e-05,
"loss": 2.3489,
"step": 41500
},
{
"epoch": 0.69,
"learning_rate": 3.235369798559035e-05,
"loss": 2.3143,
"step": 42000
},
{
"epoch": 0.69,
"learning_rate": 3.229243248541881e-05,
"loss": 2.3453,
"step": 42500
},
{
"epoch": 0.7,
"learning_rate": 3.223116698524726e-05,
"loss": 2.335,
"step": 43000
},
{
"epoch": 0.71,
"learning_rate": 3.216990148507572e-05,
"loss": 2.327,
"step": 43500
},
{
"epoch": 0.72,
"learning_rate": 3.210863598490418e-05,
"loss": 2.2807,
"step": 44000
},
{
"epoch": 0.73,
"learning_rate": 3.2047370484732634e-05,
"loss": 2.2983,
"step": 44500
},
{
"epoch": 0.74,
"learning_rate": 3.198610498456109e-05,
"loss": 2.339,
"step": 45000
},
{
"epoch": 0.74,
"learning_rate": 3.192483948438954e-05,
"loss": 2.3275,
"step": 45500
},
{
"epoch": 0.75,
"learning_rate": 3.186357398421801e-05,
"loss": 2.3162,
"step": 46000
},
{
"epoch": 0.76,
"learning_rate": 3.1802308484046465e-05,
"loss": 2.2959,
"step": 46500
},
{
"epoch": 0.77,
"learning_rate": 3.1741042983874916e-05,
"loss": 2.3119,
"step": 47000
},
{
"epoch": 0.78,
"learning_rate": 3.1679777483703374e-05,
"loss": 2.294,
"step": 47500
},
{
"epoch": 0.78,
"learning_rate": 3.161851198353183e-05,
"loss": 2.3198,
"step": 48000
},
{
"epoch": 0.79,
"learning_rate": 3.155724648336029e-05,
"loss": 2.3277,
"step": 48500
},
{
"epoch": 0.8,
"learning_rate": 3.149598098318875e-05,
"loss": 2.3395,
"step": 49000
},
{
"epoch": 0.81,
"learning_rate": 3.14347154830172e-05,
"loss": 2.3118,
"step": 49500
},
{
"epoch": 0.82,
"learning_rate": 3.1373449982845655e-05,
"loss": 2.3149,
"step": 50000
},
{
"epoch": 0.83,
"learning_rate": 3.131218448267411e-05,
"loss": 2.2908,
"step": 50500
},
{
"epoch": 0.83,
"learning_rate": 3.125091898250257e-05,
"loss": 2.3073,
"step": 51000
},
{
"epoch": 0.84,
"learning_rate": 3.118965348233103e-05,
"loss": 2.343,
"step": 51500
},
{
"epoch": 0.85,
"learning_rate": 3.112838798215948e-05,
"loss": 2.3371,
"step": 52000
},
{
"epoch": 0.86,
"learning_rate": 3.106712248198794e-05,
"loss": 2.3074,
"step": 52500
},
{
"epoch": 0.87,
"learning_rate": 3.10058569818164e-05,
"loss": 2.2749,
"step": 53000
},
{
"epoch": 0.87,
"learning_rate": 3.094459148164485e-05,
"loss": 2.3164,
"step": 53500
},
{
"epoch": 0.88,
"learning_rate": 3.088332598147331e-05,
"loss": 2.337,
"step": 54000
},
{
"epoch": 0.89,
"learning_rate": 3.082206048130177e-05,
"loss": 2.3028,
"step": 54500
},
{
"epoch": 0.9,
"learning_rate": 3.0760794981130225e-05,
"loss": 2.3165,
"step": 55000
},
{
"epoch": 0.91,
"learning_rate": 3.069952948095868e-05,
"loss": 2.2895,
"step": 55500
},
{
"epoch": 0.91,
"learning_rate": 3.0638263980787134e-05,
"loss": 2.2824,
"step": 56000
},
{
"epoch": 0.92,
"learning_rate": 3.057699848061559e-05,
"loss": 2.3228,
"step": 56500
},
{
"epoch": 0.93,
"learning_rate": 3.051573298044405e-05,
"loss": 2.2546,
"step": 57000
},
{
"epoch": 0.94,
"learning_rate": 3.0454467480272507e-05,
"loss": 2.3119,
"step": 57500
},
{
"epoch": 0.95,
"learning_rate": 3.039320198010096e-05,
"loss": 2.2771,
"step": 58000
},
{
"epoch": 0.96,
"learning_rate": 3.033193647992942e-05,
"loss": 2.2607,
"step": 58500
},
{
"epoch": 0.96,
"learning_rate": 3.0270670979757873e-05,
"loss": 2.3198,
"step": 59000
},
{
"epoch": 0.97,
"learning_rate": 3.020940547958633e-05,
"loss": 2.2541,
"step": 59500
},
{
"epoch": 0.98,
"learning_rate": 3.0148139979414792e-05,
"loss": 2.261,
"step": 60000
},
{
"epoch": 0.99,
"learning_rate": 3.0086874479243246e-05,
"loss": 2.2986,
"step": 60500
},
{
"epoch": 1.0,
"learning_rate": 3.0025608979071704e-05,
"loss": 2.317,
"step": 61000
},
{
"epoch": 1.0,
"learning_rate": 2.996434347890016e-05,
"loss": 2.3038,
"step": 61500
},
{
"epoch": 1.01,
"learning_rate": 2.9903077978728616e-05,
"loss": 2.2841,
"step": 62000
},
{
"epoch": 1.02,
"learning_rate": 2.9841812478557073e-05,
"loss": 2.2385,
"step": 62500
},
{
"epoch": 1.03,
"learning_rate": 2.9780546978385528e-05,
"loss": 2.2838,
"step": 63000
},
{
"epoch": 1.04,
"learning_rate": 2.9719281478213985e-05,
"loss": 2.2644,
"step": 63500
},
{
"epoch": 1.05,
"learning_rate": 2.9658015978042443e-05,
"loss": 2.2696,
"step": 64000
},
{
"epoch": 1.05,
"learning_rate": 2.9596750477870897e-05,
"loss": 2.2769,
"step": 64500
},
{
"epoch": 1.06,
"learning_rate": 2.9535484977699355e-05,
"loss": 2.2684,
"step": 65000
},
{
"epoch": 1.07,
"learning_rate": 2.947421947752781e-05,
"loss": 2.24,
"step": 65500
},
{
"epoch": 1.08,
"learning_rate": 2.9412953977356267e-05,
"loss": 2.279,
"step": 66000
},
{
"epoch": 1.09,
"learning_rate": 2.9351688477184724e-05,
"loss": 2.2503,
"step": 66500
},
{
"epoch": 1.09,
"learning_rate": 2.9290422977013182e-05,
"loss": 2.2148,
"step": 67000
},
{
"epoch": 1.1,
"learning_rate": 2.922915747684164e-05,
"loss": 2.2341,
"step": 67500
},
{
"epoch": 1.11,
"learning_rate": 2.9167891976670097e-05,
"loss": 2.2791,
"step": 68000
},
{
"epoch": 1.12,
"learning_rate": 2.9106626476498552e-05,
"loss": 2.2022,
"step": 68500
},
{
"epoch": 1.13,
"learning_rate": 2.904536097632701e-05,
"loss": 2.2412,
"step": 69000
},
{
"epoch": 1.14,
"learning_rate": 2.8984095476155464e-05,
"loss": 2.2377,
"step": 69500
},
{
"epoch": 1.14,
"learning_rate": 2.892282997598392e-05,
"loss": 2.2535,
"step": 70000
},
{
"epoch": 1.15,
"learning_rate": 2.886156447581238e-05,
"loss": 2.2462,
"step": 70500
},
{
"epoch": 1.16,
"learning_rate": 2.8800298975640833e-05,
"loss": 2.2594,
"step": 71000
},
{
"epoch": 1.17,
"learning_rate": 2.873903347546929e-05,
"loss": 2.2296,
"step": 71500
},
{
"epoch": 1.18,
"learning_rate": 2.8677767975297745e-05,
"loss": 2.2141,
"step": 72000
},
{
"epoch": 1.18,
"learning_rate": 2.8616502475126203e-05,
"loss": 2.2137,
"step": 72500
},
{
"epoch": 1.19,
"learning_rate": 2.855523697495466e-05,
"loss": 2.2503,
"step": 73000
},
{
"epoch": 1.2,
"learning_rate": 2.8493971474783115e-05,
"loss": 2.214,
"step": 73500
},
{
"epoch": 1.21,
"learning_rate": 2.8432705974611576e-05,
"loss": 2.1766,
"step": 74000
},
{
"epoch": 1.22,
"learning_rate": 2.8371440474440034e-05,
"loss": 2.232,
"step": 74500
},
{
"epoch": 1.23,
"learning_rate": 2.8310174974268488e-05,
"loss": 2.1983,
"step": 75000
},
{
"epoch": 1.23,
"learning_rate": 2.8248909474096946e-05,
"loss": 2.2665,
"step": 75500
},
{
"epoch": 1.24,
"learning_rate": 2.81876439739254e-05,
"loss": 2.2109,
"step": 76000
},
{
"epoch": 1.25,
"learning_rate": 2.8126378473753858e-05,
"loss": 2.2699,
"step": 76500
},
{
"epoch": 1.26,
"learning_rate": 2.8065112973582315e-05,
"loss": 2.2059,
"step": 77000
},
{
"epoch": 1.27,
"learning_rate": 2.800384747341077e-05,
"loss": 2.2246,
"step": 77500
},
{
"epoch": 1.27,
"learning_rate": 2.7942581973239227e-05,
"loss": 2.2178,
"step": 78000
},
{
"epoch": 1.28,
"learning_rate": 2.788131647306768e-05,
"loss": 2.2359,
"step": 78500
},
{
"epoch": 1.29,
"learning_rate": 2.782005097289614e-05,
"loss": 2.2262,
"step": 79000
},
{
"epoch": 1.3,
"learning_rate": 2.7758785472724597e-05,
"loss": 2.2429,
"step": 79500
},
{
"epoch": 1.31,
"learning_rate": 2.769751997255305e-05,
"loss": 2.2291,
"step": 80000
},
{
"epoch": 1.32,
"learning_rate": 2.763625447238151e-05,
"loss": 2.2199,
"step": 80500
},
{
"epoch": 1.32,
"learning_rate": 2.757498897220997e-05,
"loss": 2.2423,
"step": 81000
},
{
"epoch": 1.33,
"learning_rate": 2.7513723472038424e-05,
"loss": 2.1731,
"step": 81500
},
{
"epoch": 1.34,
"learning_rate": 2.7452457971866882e-05,
"loss": 2.2085,
"step": 82000
},
{
"epoch": 1.35,
"learning_rate": 2.7391192471695336e-05,
"loss": 2.2361,
"step": 82500
},
{
"epoch": 1.36,
"learning_rate": 2.7329926971523794e-05,
"loss": 2.2206,
"step": 83000
},
{
"epoch": 1.36,
"learning_rate": 2.726866147135225e-05,
"loss": 2.2487,
"step": 83500
},
{
"epoch": 1.37,
"learning_rate": 2.7207395971180706e-05,
"loss": 2.245,
"step": 84000
},
{
"epoch": 1.38,
"learning_rate": 2.7146130471009163e-05,
"loss": 2.2191,
"step": 84500
},
{
"epoch": 1.39,
"learning_rate": 2.7084864970837618e-05,
"loss": 2.2127,
"step": 85000
},
{
"epoch": 1.4,
"learning_rate": 2.7023599470666075e-05,
"loss": 2.2063,
"step": 85500
},
{
"epoch": 1.41,
"learning_rate": 2.6962333970494533e-05,
"loss": 2.2355,
"step": 86000
},
{
"epoch": 1.41,
"learning_rate": 2.6901068470322987e-05,
"loss": 2.2017,
"step": 86500
},
{
"epoch": 1.42,
"learning_rate": 2.6839802970151445e-05,
"loss": 2.2103,
"step": 87000
},
{
"epoch": 1.43,
"learning_rate": 2.67785374699799e-05,
"loss": 2.2399,
"step": 87500
},
{
"epoch": 1.44,
"learning_rate": 2.671727196980836e-05,
"loss": 2.177,
"step": 88000
},
{
"epoch": 1.45,
"learning_rate": 2.6656006469636818e-05,
"loss": 2.1587,
"step": 88500
},
{
"epoch": 1.45,
"learning_rate": 2.6594740969465272e-05,
"loss": 2.2154,
"step": 89000
},
{
"epoch": 1.46,
"learning_rate": 2.653347546929373e-05,
"loss": 2.2174,
"step": 89500
},
{
"epoch": 1.47,
"learning_rate": 2.6472209969122188e-05,
"loss": 2.1947,
"step": 90000
}
],
"max_steps": 306045,
"num_train_epochs": 5,
"total_flos": 2.01759268838634e+16,
"trial_name": null,
"trial_params": null
}