camemberta-chatgptdetect-noisy / trainer_state.json
ANTOUN Wissam
added files
a245625
raw
history blame
27.8 kB
{
"best_metric": 0.9790566381351302,
"best_model_checkpoint": "/scratch_results/fr/full-1.0_0.5_0.5/camemberta/gradient_accumulation_steps-4-learning_rate-2e-05-epochs-5-lr_scheduler-linear-warmup_steps-0.1/SEED-25/checkpoint-4267",
"epoch": 4.999824263370629,
"global_step": 21335,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.372071227741332e-07,
"loss": 0.6655,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 1.8744142455482663e-06,
"loss": 0.6256,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 2.8116213683223997e-06,
"loss": 0.5199,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 3.7488284910965327e-06,
"loss": 0.3668,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.686035613870666e-06,
"loss": 0.2792,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 5.623242736644799e-06,
"loss": 0.2248,
"step": 600
},
{
"epoch": 0.16,
"learning_rate": 6.560449859418933e-06,
"loss": 0.1798,
"step": 700
},
{
"epoch": 0.19,
"learning_rate": 7.497656982193065e-06,
"loss": 0.1277,
"step": 800
},
{
"epoch": 0.21,
"learning_rate": 8.434864104967199e-06,
"loss": 0.1229,
"step": 900
},
{
"epoch": 0.23,
"learning_rate": 9.372071227741332e-06,
"loss": 0.1047,
"step": 1000
},
{
"epoch": 0.26,
"learning_rate": 1.0299906279287724e-05,
"loss": 0.0756,
"step": 1100
},
{
"epoch": 0.28,
"learning_rate": 1.1237113402061856e-05,
"loss": 0.0914,
"step": 1200
},
{
"epoch": 0.3,
"learning_rate": 1.217432052483599e-05,
"loss": 0.066,
"step": 1300
},
{
"epoch": 0.33,
"learning_rate": 1.3111527647610123e-05,
"loss": 0.059,
"step": 1400
},
{
"epoch": 0.35,
"learning_rate": 1.4048734770384256e-05,
"loss": 0.0647,
"step": 1500
},
{
"epoch": 0.37,
"learning_rate": 1.498594189315839e-05,
"loss": 0.063,
"step": 1600
},
{
"epoch": 0.4,
"learning_rate": 1.592314901593252e-05,
"loss": 0.054,
"step": 1700
},
{
"epoch": 0.42,
"learning_rate": 1.6860356138706654e-05,
"loss": 0.0515,
"step": 1800
},
{
"epoch": 0.45,
"learning_rate": 1.7797563261480788e-05,
"loss": 0.052,
"step": 1900
},
{
"epoch": 0.47,
"learning_rate": 1.873477038425492e-05,
"loss": 0.0413,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 1.9671977507029055e-05,
"loss": 0.0382,
"step": 2100
},
{
"epoch": 0.52,
"learning_rate": 1.99322951929587e-05,
"loss": 0.0487,
"step": 2200
},
{
"epoch": 0.54,
"learning_rate": 1.9828133951356704e-05,
"loss": 0.0419,
"step": 2300
},
{
"epoch": 0.56,
"learning_rate": 1.9723972709754703e-05,
"loss": 0.031,
"step": 2400
},
{
"epoch": 0.59,
"learning_rate": 1.9619811468152703e-05,
"loss": 0.0379,
"step": 2500
},
{
"epoch": 0.61,
"learning_rate": 1.9515650226550702e-05,
"loss": 0.0396,
"step": 2600
},
{
"epoch": 0.63,
"learning_rate": 1.9411488984948704e-05,
"loss": 0.0436,
"step": 2700
},
{
"epoch": 0.66,
"learning_rate": 1.9307327743346703e-05,
"loss": 0.039,
"step": 2800
},
{
"epoch": 0.68,
"learning_rate": 1.9203166501744703e-05,
"loss": 0.0279,
"step": 2900
},
{
"epoch": 0.7,
"learning_rate": 1.9099005260142702e-05,
"loss": 0.0305,
"step": 3000
},
{
"epoch": 0.73,
"learning_rate": 1.8994844018540704e-05,
"loss": 0.0257,
"step": 3100
},
{
"epoch": 0.75,
"learning_rate": 1.8890682776938704e-05,
"loss": 0.0418,
"step": 3200
},
{
"epoch": 0.77,
"learning_rate": 1.8786521535336703e-05,
"loss": 0.0421,
"step": 3300
},
{
"epoch": 0.8,
"learning_rate": 1.8682360293734702e-05,
"loss": 0.0352,
"step": 3400
},
{
"epoch": 0.82,
"learning_rate": 1.8578199052132704e-05,
"loss": 0.0343,
"step": 3500
},
{
"epoch": 0.84,
"learning_rate": 1.8474037810530704e-05,
"loss": 0.0215,
"step": 3600
},
{
"epoch": 0.87,
"learning_rate": 1.8369876568928703e-05,
"loss": 0.0226,
"step": 3700
},
{
"epoch": 0.89,
"learning_rate": 1.8265715327326702e-05,
"loss": 0.0232,
"step": 3800
},
{
"epoch": 0.91,
"learning_rate": 1.8161554085724705e-05,
"loss": 0.0196,
"step": 3900
},
{
"epoch": 0.94,
"learning_rate": 1.8057392844122704e-05,
"loss": 0.0158,
"step": 4000
},
{
"epoch": 0.96,
"learning_rate": 1.7953231602520703e-05,
"loss": 0.0305,
"step": 4100
},
{
"epoch": 0.98,
"learning_rate": 1.7849070360918702e-05,
"loss": 0.0199,
"step": 4200
},
{
"epoch": 1.0,
"eval_f1": 0.9790566381351302,
"eval_loss": 0.04303711652755737,
"eval_runtime": 47.3378,
"eval_samples_per_second": 361.382,
"eval_steps_per_second": 45.186,
"step": 4267
},
{
"epoch": 1.01,
"learning_rate": 1.7744909119316705e-05,
"loss": 0.0167,
"step": 4300
},
{
"epoch": 1.03,
"learning_rate": 1.7640747877714704e-05,
"loss": 0.0084,
"step": 4400
},
{
"epoch": 1.05,
"learning_rate": 1.7536586636112703e-05,
"loss": 0.0179,
"step": 4500
},
{
"epoch": 1.08,
"learning_rate": 1.7432425394510702e-05,
"loss": 0.0093,
"step": 4600
},
{
"epoch": 1.1,
"learning_rate": 1.7328264152908705e-05,
"loss": 0.0052,
"step": 4700
},
{
"epoch": 1.12,
"learning_rate": 1.7224102911306704e-05,
"loss": 0.0091,
"step": 4800
},
{
"epoch": 1.15,
"learning_rate": 1.7119941669704706e-05,
"loss": 0.0137,
"step": 4900
},
{
"epoch": 1.17,
"learning_rate": 1.7015780428102702e-05,
"loss": 0.0112,
"step": 5000
},
{
"epoch": 1.2,
"learning_rate": 1.6911619186500705e-05,
"loss": 0.0101,
"step": 5100
},
{
"epoch": 1.22,
"learning_rate": 1.6808499557314724e-05,
"loss": 0.009,
"step": 5200
},
{
"epoch": 1.24,
"learning_rate": 1.6704338315712726e-05,
"loss": 0.0108,
"step": 5300
},
{
"epoch": 1.27,
"learning_rate": 1.6600177074110725e-05,
"loss": 0.0076,
"step": 5400
},
{
"epoch": 1.29,
"learning_rate": 1.6496015832508725e-05,
"loss": 0.0125,
"step": 5500
},
{
"epoch": 1.31,
"learning_rate": 1.6391854590906724e-05,
"loss": 0.0117,
"step": 5600
},
{
"epoch": 1.34,
"learning_rate": 1.6287693349304726e-05,
"loss": 0.013,
"step": 5700
},
{
"epoch": 1.36,
"learning_rate": 1.6183532107702725e-05,
"loss": 0.0022,
"step": 5800
},
{
"epoch": 1.38,
"learning_rate": 1.6079370866100725e-05,
"loss": 0.012,
"step": 5900
},
{
"epoch": 1.41,
"learning_rate": 1.5975209624498724e-05,
"loss": 0.0087,
"step": 6000
},
{
"epoch": 1.43,
"learning_rate": 1.5871048382896726e-05,
"loss": 0.0156,
"step": 6100
},
{
"epoch": 1.45,
"learning_rate": 1.5766887141294726e-05,
"loss": 0.0157,
"step": 6200
},
{
"epoch": 1.48,
"learning_rate": 1.5662725899692725e-05,
"loss": 0.0094,
"step": 6300
},
{
"epoch": 1.5,
"learning_rate": 1.5558564658090727e-05,
"loss": 0.0083,
"step": 6400
},
{
"epoch": 1.52,
"learning_rate": 1.5454403416488726e-05,
"loss": 0.0064,
"step": 6500
},
{
"epoch": 1.55,
"learning_rate": 1.5350242174886726e-05,
"loss": 0.0098,
"step": 6600
},
{
"epoch": 1.57,
"learning_rate": 1.5246080933284727e-05,
"loss": 0.0044,
"step": 6700
},
{
"epoch": 1.59,
"learning_rate": 1.5141919691682727e-05,
"loss": 0.009,
"step": 6800
},
{
"epoch": 1.62,
"learning_rate": 1.5037758450080727e-05,
"loss": 0.0044,
"step": 6900
},
{
"epoch": 1.64,
"learning_rate": 1.4934638820894747e-05,
"loss": 0.0122,
"step": 7000
},
{
"epoch": 1.66,
"learning_rate": 1.4830477579292746e-05,
"loss": 0.0067,
"step": 7100
},
{
"epoch": 1.69,
"learning_rate": 1.4726316337690747e-05,
"loss": 0.0056,
"step": 7200
},
{
"epoch": 1.71,
"learning_rate": 1.4622155096088746e-05,
"loss": 0.0105,
"step": 7300
},
{
"epoch": 1.73,
"learning_rate": 1.4517993854486747e-05,
"loss": 0.011,
"step": 7400
},
{
"epoch": 1.76,
"learning_rate": 1.4413832612884746e-05,
"loss": 0.004,
"step": 7500
},
{
"epoch": 1.78,
"learning_rate": 1.4309671371282747e-05,
"loss": 0.0098,
"step": 7600
},
{
"epoch": 1.8,
"learning_rate": 1.4205510129680746e-05,
"loss": 0.0067,
"step": 7700
},
{
"epoch": 1.83,
"learning_rate": 1.4101348888078747e-05,
"loss": 0.0153,
"step": 7800
},
{
"epoch": 1.85,
"learning_rate": 1.3997187646476746e-05,
"loss": 0.0081,
"step": 7900
},
{
"epoch": 1.87,
"learning_rate": 1.3893026404874747e-05,
"loss": 0.0117,
"step": 8000
},
{
"epoch": 1.9,
"learning_rate": 1.3788865163272746e-05,
"loss": 0.0171,
"step": 8100
},
{
"epoch": 1.92,
"learning_rate": 1.3684703921670747e-05,
"loss": 0.0046,
"step": 8200
},
{
"epoch": 1.94,
"learning_rate": 1.3580542680068747e-05,
"loss": 0.0063,
"step": 8300
},
{
"epoch": 1.97,
"learning_rate": 1.3476381438466747e-05,
"loss": 0.005,
"step": 8400
},
{
"epoch": 1.99,
"learning_rate": 1.3372220196864747e-05,
"loss": 0.0104,
"step": 8500
},
{
"epoch": 2.0,
"eval_f1": 0.9462686567164179,
"eval_loss": 0.14571115374565125,
"eval_runtime": 42.5908,
"eval_samples_per_second": 401.66,
"eval_steps_per_second": 50.222,
"step": 8534
},
{
"epoch": 2.02,
"learning_rate": 1.3268058955262747e-05,
"loss": 0.0065,
"step": 8600
},
{
"epoch": 2.04,
"learning_rate": 1.3163897713660747e-05,
"loss": 0.0023,
"step": 8700
},
{
"epoch": 2.06,
"learning_rate": 1.3060778084474767e-05,
"loss": 0.0064,
"step": 8800
},
{
"epoch": 2.09,
"learning_rate": 1.295661684287277e-05,
"loss": 0.0023,
"step": 8900
},
{
"epoch": 2.11,
"learning_rate": 1.2852455601270767e-05,
"loss": 0.0007,
"step": 9000
},
{
"epoch": 2.13,
"learning_rate": 1.274829435966877e-05,
"loss": 0.0052,
"step": 9100
},
{
"epoch": 2.16,
"learning_rate": 1.2644133118066769e-05,
"loss": 0.0018,
"step": 9200
},
{
"epoch": 2.18,
"learning_rate": 1.253997187646477e-05,
"loss": 0.0023,
"step": 9300
},
{
"epoch": 2.2,
"learning_rate": 1.2435810634862769e-05,
"loss": 0.0032,
"step": 9400
},
{
"epoch": 2.23,
"learning_rate": 1.233164939326077e-05,
"loss": 0.0039,
"step": 9500
},
{
"epoch": 2.25,
"learning_rate": 1.2227488151658769e-05,
"loss": 0.0044,
"step": 9600
},
{
"epoch": 2.27,
"learning_rate": 1.212332691005677e-05,
"loss": 0.0032,
"step": 9700
},
{
"epoch": 2.3,
"learning_rate": 1.2019165668454769e-05,
"loss": 0.0033,
"step": 9800
},
{
"epoch": 2.32,
"learning_rate": 1.191500442685277e-05,
"loss": 0.0072,
"step": 9900
},
{
"epoch": 2.34,
"learning_rate": 1.1810843185250769e-05,
"loss": 0.0009,
"step": 10000
},
{
"epoch": 2.37,
"learning_rate": 1.170668194364877e-05,
"loss": 0.002,
"step": 10100
},
{
"epoch": 2.39,
"learning_rate": 1.1602520702046769e-05,
"loss": 0.0027,
"step": 10200
},
{
"epoch": 2.41,
"learning_rate": 1.149835946044477e-05,
"loss": 0.0016,
"step": 10300
},
{
"epoch": 2.44,
"learning_rate": 1.139419821884277e-05,
"loss": 0.0032,
"step": 10400
},
{
"epoch": 2.46,
"learning_rate": 1.129003697724077e-05,
"loss": 0.0036,
"step": 10500
},
{
"epoch": 2.48,
"learning_rate": 1.118587573563877e-05,
"loss": 0.0017,
"step": 10600
},
{
"epoch": 2.51,
"learning_rate": 1.108171449403677e-05,
"loss": 0.0059,
"step": 10700
},
{
"epoch": 2.53,
"learning_rate": 1.097755325243477e-05,
"loss": 0.0068,
"step": 10800
},
{
"epoch": 2.55,
"learning_rate": 1.087339201083277e-05,
"loss": 0.0011,
"step": 10900
},
{
"epoch": 2.58,
"learning_rate": 1.076923076923077e-05,
"loss": 0.0052,
"step": 11000
},
{
"epoch": 2.6,
"learning_rate": 1.066506952762877e-05,
"loss": 0.0024,
"step": 11100
},
{
"epoch": 2.62,
"learning_rate": 1.056090828602677e-05,
"loss": 0.0012,
"step": 11200
},
{
"epoch": 2.65,
"learning_rate": 1.045674704442477e-05,
"loss": 0.0066,
"step": 11300
},
{
"epoch": 2.67,
"learning_rate": 1.035258580282277e-05,
"loss": 0.0008,
"step": 11400
},
{
"epoch": 2.69,
"learning_rate": 1.024842456122077e-05,
"loss": 0.0006,
"step": 11500
},
{
"epoch": 2.72,
"learning_rate": 1.014426331961877e-05,
"loss": 0.0037,
"step": 11600
},
{
"epoch": 2.74,
"learning_rate": 1.0040102078016772e-05,
"loss": 0.0055,
"step": 11700
},
{
"epoch": 2.77,
"learning_rate": 9.935940836414771e-06,
"loss": 0.0063,
"step": 11800
},
{
"epoch": 2.79,
"learning_rate": 9.831779594812772e-06,
"loss": 0.0048,
"step": 11900
},
{
"epoch": 2.81,
"learning_rate": 9.727618353210771e-06,
"loss": 0.0019,
"step": 12000
},
{
"epoch": 2.84,
"learning_rate": 9.623457111608772e-06,
"loss": 0.0086,
"step": 12100
},
{
"epoch": 2.86,
"learning_rate": 9.519295870006771e-06,
"loss": 0.0044,
"step": 12200
},
{
"epoch": 2.88,
"learning_rate": 9.415134628404772e-06,
"loss": 0.0026,
"step": 12300
},
{
"epoch": 2.91,
"learning_rate": 9.310973386802771e-06,
"loss": 0.0016,
"step": 12400
},
{
"epoch": 2.93,
"learning_rate": 9.206812145200772e-06,
"loss": 0.005,
"step": 12500
},
{
"epoch": 2.95,
"learning_rate": 9.102650903598771e-06,
"loss": 0.0035,
"step": 12600
},
{
"epoch": 2.98,
"learning_rate": 8.998489661996772e-06,
"loss": 0.0041,
"step": 12700
},
{
"epoch": 3.0,
"learning_rate": 8.894328420394771e-06,
"loss": 0.0026,
"step": 12800
},
{
"epoch": 3.0,
"eval_f1": 0.972016609496299,
"eval_loss": 0.08046123385429382,
"eval_runtime": 44.1477,
"eval_samples_per_second": 387.495,
"eval_steps_per_second": 48.451,
"step": 12801
},
{
"epoch": 3.02,
"learning_rate": 8.790167178792772e-06,
"loss": 0.0061,
"step": 12900
},
{
"epoch": 3.05,
"learning_rate": 8.686005937190773e-06,
"loss": 0.003,
"step": 13000
},
{
"epoch": 3.07,
"learning_rate": 8.581844695588772e-06,
"loss": 0.0001,
"step": 13100
},
{
"epoch": 3.09,
"learning_rate": 8.477683453986773e-06,
"loss": 0.0009,
"step": 13200
},
{
"epoch": 3.12,
"learning_rate": 8.373522212384772e-06,
"loss": 0.0007,
"step": 13300
},
{
"epoch": 3.14,
"learning_rate": 8.269360970782773e-06,
"loss": 0.0001,
"step": 13400
},
{
"epoch": 3.16,
"learning_rate": 8.165199729180772e-06,
"loss": 0.0027,
"step": 13500
},
{
"epoch": 3.19,
"learning_rate": 8.061038487578773e-06,
"loss": 0.0001,
"step": 13600
},
{
"epoch": 3.21,
"learning_rate": 7.956877245976772e-06,
"loss": 0.0,
"step": 13700
},
{
"epoch": 3.23,
"learning_rate": 7.852716004374773e-06,
"loss": 0.0004,
"step": 13800
},
{
"epoch": 3.26,
"learning_rate": 7.748554762772773e-06,
"loss": 0.004,
"step": 13900
},
{
"epoch": 3.28,
"learning_rate": 7.644393521170773e-06,
"loss": 0.0043,
"step": 14000
},
{
"epoch": 3.3,
"learning_rate": 7.5402322795687734e-06,
"loss": 0.003,
"step": 14100
},
{
"epoch": 3.33,
"learning_rate": 7.4360710379667735e-06,
"loss": 0.0,
"step": 14200
},
{
"epoch": 3.35,
"learning_rate": 7.3319097963647735e-06,
"loss": 0.0013,
"step": 14300
},
{
"epoch": 3.37,
"learning_rate": 7.2277485547627735e-06,
"loss": 0.0002,
"step": 14400
},
{
"epoch": 3.4,
"learning_rate": 7.1235873131607735e-06,
"loss": 0.002,
"step": 14500
},
{
"epoch": 3.42,
"learning_rate": 7.0194260715587736e-06,
"loss": 0.0023,
"step": 14600
},
{
"epoch": 3.44,
"learning_rate": 6.9152648299567736e-06,
"loss": 0.0032,
"step": 14700
},
{
"epoch": 3.47,
"learning_rate": 6.812145200770793e-06,
"loss": 0.0015,
"step": 14800
},
{
"epoch": 3.49,
"learning_rate": 6.707983959168793e-06,
"loss": 0.0032,
"step": 14900
},
{
"epoch": 3.52,
"learning_rate": 6.603822717566793e-06,
"loss": 0.0002,
"step": 15000
},
{
"epoch": 3.54,
"learning_rate": 6.499661475964793e-06,
"loss": 0.0,
"step": 15100
},
{
"epoch": 3.56,
"learning_rate": 6.395500234362793e-06,
"loss": 0.0,
"step": 15200
},
{
"epoch": 3.59,
"learning_rate": 6.291338992760793e-06,
"loss": 0.0053,
"step": 15300
},
{
"epoch": 3.61,
"learning_rate": 6.187177751158794e-06,
"loss": 0.0,
"step": 15400
},
{
"epoch": 3.63,
"learning_rate": 6.084058121972815e-06,
"loss": 0.0006,
"step": 15500
},
{
"epoch": 3.66,
"learning_rate": 5.979896880370815e-06,
"loss": 0.0003,
"step": 15600
},
{
"epoch": 3.68,
"learning_rate": 5.875735638768815e-06,
"loss": 0.001,
"step": 15700
},
{
"epoch": 3.7,
"learning_rate": 5.771574397166815e-06,
"loss": 0.0,
"step": 15800
},
{
"epoch": 3.73,
"learning_rate": 5.667413155564815e-06,
"loss": 0.0,
"step": 15900
},
{
"epoch": 3.75,
"learning_rate": 5.563251913962815e-06,
"loss": 0.0003,
"step": 16000
},
{
"epoch": 3.77,
"learning_rate": 5.459090672360815e-06,
"loss": 0.0,
"step": 16100
},
{
"epoch": 3.8,
"learning_rate": 5.354929430758815e-06,
"loss": 0.0,
"step": 16200
},
{
"epoch": 3.82,
"learning_rate": 5.250768189156815e-06,
"loss": 0.0024,
"step": 16300
},
{
"epoch": 3.84,
"learning_rate": 5.146606947554815e-06,
"loss": 0.0001,
"step": 16400
},
{
"epoch": 3.87,
"learning_rate": 5.042445705952815e-06,
"loss": 0.0006,
"step": 16500
},
{
"epoch": 3.89,
"learning_rate": 4.938284464350816e-06,
"loss": 0.0003,
"step": 16600
},
{
"epoch": 3.91,
"learning_rate": 4.834123222748816e-06,
"loss": 0.0005,
"step": 16700
},
{
"epoch": 3.94,
"learning_rate": 4.729961981146816e-06,
"loss": 0.0,
"step": 16800
},
{
"epoch": 3.96,
"learning_rate": 4.625800739544816e-06,
"loss": 0.0,
"step": 16900
},
{
"epoch": 3.98,
"learning_rate": 4.521639497942816e-06,
"loss": 0.0,
"step": 17000
},
{
"epoch": 4.0,
"eval_f1": 0.9419163245698314,
"eval_loss": 0.2514629065990448,
"eval_runtime": 42.0947,
"eval_samples_per_second": 406.393,
"eval_steps_per_second": 50.814,
"step": 17068
},
{
"epoch": 4.01,
"learning_rate": 4.417478256340816e-06,
"loss": 0.0,
"step": 17100
},
{
"epoch": 4.03,
"learning_rate": 4.313317014738816e-06,
"loss": 0.0,
"step": 17200
},
{
"epoch": 4.05,
"learning_rate": 4.209155773136816e-06,
"loss": 0.0,
"step": 17300
},
{
"epoch": 4.08,
"learning_rate": 4.104994531534816e-06,
"loss": 0.0,
"step": 17400
},
{
"epoch": 4.1,
"learning_rate": 4.000833289932816e-06,
"loss": 0.0,
"step": 17500
},
{
"epoch": 4.12,
"learning_rate": 3.896672048330816e-06,
"loss": 0.0,
"step": 17600
},
{
"epoch": 4.15,
"learning_rate": 3.7925108067288165e-06,
"loss": 0.0,
"step": 17700
},
{
"epoch": 4.17,
"learning_rate": 3.6883495651268165e-06,
"loss": 0.0,
"step": 17800
},
{
"epoch": 4.19,
"learning_rate": 3.5841883235248166e-06,
"loss": 0.0,
"step": 17900
},
{
"epoch": 4.22,
"learning_rate": 3.4800270819228166e-06,
"loss": 0.0,
"step": 18000
},
{
"epoch": 4.24,
"learning_rate": 3.3758658403208166e-06,
"loss": 0.0,
"step": 18100
},
{
"epoch": 4.27,
"learning_rate": 3.2717045987188166e-06,
"loss": 0.0,
"step": 18200
},
{
"epoch": 4.29,
"learning_rate": 3.1675433571168166e-06,
"loss": 0.0,
"step": 18300
},
{
"epoch": 4.31,
"learning_rate": 3.0633821155148175e-06,
"loss": 0.0,
"step": 18400
},
{
"epoch": 4.34,
"learning_rate": 2.9592208739128175e-06,
"loss": 0.0,
"step": 18500
},
{
"epoch": 4.36,
"learning_rate": 2.8550596323108176e-06,
"loss": 0.0,
"step": 18600
},
{
"epoch": 4.38,
"learning_rate": 2.7508983907088176e-06,
"loss": 0.0028,
"step": 18700
},
{
"epoch": 4.41,
"learning_rate": 2.6467371491068176e-06,
"loss": 0.0004,
"step": 18800
},
{
"epoch": 4.43,
"learning_rate": 2.542575907504818e-06,
"loss": 0.0004,
"step": 18900
},
{
"epoch": 4.45,
"learning_rate": 2.4384146659028177e-06,
"loss": 0.0001,
"step": 19000
},
{
"epoch": 4.48,
"learning_rate": 2.3342534243008177e-06,
"loss": 0.0013,
"step": 19100
},
{
"epoch": 4.5,
"learning_rate": 2.2300921826988177e-06,
"loss": 0.0,
"step": 19200
},
{
"epoch": 4.52,
"learning_rate": 2.1259309410968177e-06,
"loss": 0.0001,
"step": 19300
},
{
"epoch": 4.55,
"learning_rate": 2.021769699494818e-06,
"loss": 0.0001,
"step": 19400
},
{
"epoch": 4.57,
"learning_rate": 1.917608457892818e-06,
"loss": 0.002,
"step": 19500
},
{
"epoch": 4.59,
"learning_rate": 1.8134472162908184e-06,
"loss": 0.0,
"step": 19600
},
{
"epoch": 4.62,
"learning_rate": 1.7103275871048383e-06,
"loss": 0.0002,
"step": 19700
},
{
"epoch": 4.64,
"learning_rate": 1.6061663455028383e-06,
"loss": 0.0,
"step": 19800
},
{
"epoch": 4.66,
"learning_rate": 1.5020051039008388e-06,
"loss": 0.0,
"step": 19900
},
{
"epoch": 4.69,
"learning_rate": 1.3978438622988388e-06,
"loss": 0.0,
"step": 20000
},
{
"epoch": 4.71,
"learning_rate": 1.2936826206968388e-06,
"loss": 0.0032,
"step": 20100
},
{
"epoch": 4.73,
"learning_rate": 1.1895213790948388e-06,
"loss": 0.0014,
"step": 20200
},
{
"epoch": 4.76,
"learning_rate": 1.0853601374928389e-06,
"loss": 0.0,
"step": 20300
},
{
"epoch": 4.78,
"learning_rate": 9.81198895890839e-07,
"loss": 0.0001,
"step": 20400
},
{
"epoch": 4.8,
"learning_rate": 8.770376542888392e-07,
"loss": 0.0,
"step": 20500
},
{
"epoch": 4.83,
"learning_rate": 7.728764126868394e-07,
"loss": 0.0,
"step": 20600
},
{
"epoch": 4.85,
"learning_rate": 6.687151710848394e-07,
"loss": 0.0,
"step": 20700
},
{
"epoch": 4.87,
"learning_rate": 5.645539294828395e-07,
"loss": 0.0,
"step": 20800
},
{
"epoch": 4.9,
"learning_rate": 4.614343002968596e-07,
"loss": 0.0037,
"step": 20900
},
{
"epoch": 4.92,
"learning_rate": 3.5727305869485966e-07,
"loss": 0.0,
"step": 21000
},
{
"epoch": 4.94,
"learning_rate": 2.531118170928598e-07,
"loss": 0.0005,
"step": 21100
},
{
"epoch": 4.97,
"learning_rate": 1.4895057549085987e-07,
"loss": 0.0003,
"step": 21200
},
{
"epoch": 4.99,
"learning_rate": 4.478933388885996e-08,
"loss": 0.0,
"step": 21300
},
{
"epoch": 5.0,
"eval_f1": 0.9567145644846549,
"eval_loss": 0.20001018047332764,
"eval_runtime": 41.779,
"eval_samples_per_second": 409.464,
"eval_steps_per_second": 51.198,
"step": 21335
},
{
"epoch": 5.0,
"step": 21335,
"total_flos": 4.584323349762662e+16,
"train_loss": 0.024299116742673387,
"train_runtime": 5984.8496,
"train_samples_per_second": 114.092,
"train_steps_per_second": 3.565
}
],
"max_steps": 21335,
"num_train_epochs": 5,
"total_flos": 4.584323349762662e+16,
"trial_name": null,
"trial_params": null
}