gpt-neo-1.3B-emailgen / trainer_state.json
pszemraj's picture
adding model files
c55e890
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.999920792079208,
"global_step": 1578,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.5625e-05,
"loss": 4.3204,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 3.125e-05,
"loss": 3.777,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 4.6875e-05,
"loss": 3.065,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 6.25e-05,
"loss": 2.8056,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 7.8125e-05,
"loss": 2.6582,
"step": 25
},
{
"epoch": 0.04,
"learning_rate": 9.375e-05,
"loss": 2.5482,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 9.999907090111407e-05,
"loss": 2.4959,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 9.999339319963168e-05,
"loss": 2.4399,
"step": 40
},
{
"epoch": 0.06,
"learning_rate": 9.998255454812832e-05,
"loss": 2.3978,
"step": 45
},
{
"epoch": 0.06,
"learning_rate": 9.996655606550656e-05,
"loss": 2.3887,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 9.994539940333206e-05,
"loss": 2.3651,
"step": 55
},
{
"epoch": 0.08,
"learning_rate": 9.99190867456629e-05,
"loss": 2.4034,
"step": 60
},
{
"epoch": 0.08,
"learning_rate": 9.988762080882424e-05,
"loss": 2.2935,
"step": 65
},
{
"epoch": 0.09,
"learning_rate": 9.985100484112785e-05,
"loss": 2.3129,
"step": 70
},
{
"epoch": 0.1,
"learning_rate": 9.980924262253681e-05,
"loss": 2.3247,
"step": 75
},
{
"epoch": 0.1,
"learning_rate": 9.97623384642752e-05,
"loss": 2.3264,
"step": 80
},
{
"epoch": 0.11,
"learning_rate": 9.971029720838318e-05,
"loss": 2.3193,
"step": 85
},
{
"epoch": 0.11,
"learning_rate": 9.965312422721704e-05,
"loss": 2.2133,
"step": 90
},
{
"epoch": 0.12,
"learning_rate": 9.959082542289465e-05,
"loss": 2.1766,
"step": 95
},
{
"epoch": 0.13,
"learning_rate": 9.952340722668609e-05,
"loss": 2.2439,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 9.945087659834985e-05,
"loss": 2.1283,
"step": 105
},
{
"epoch": 0.14,
"learning_rate": 9.937324102541423e-05,
"loss": 2.1173,
"step": 110
},
{
"epoch": 0.15,
"learning_rate": 9.929050852240449e-05,
"loss": 2.1494,
"step": 115
},
{
"epoch": 0.15,
"learning_rate": 9.920268763001542e-05,
"loss": 2.2087,
"step": 120
},
{
"epoch": 0.16,
"learning_rate": 9.910978741422969e-05,
"loss": 2.2189,
"step": 125
},
{
"epoch": 0.16,
"learning_rate": 9.901181746538196e-05,
"loss": 2.12,
"step": 130
},
{
"epoch": 0.17,
"learning_rate": 9.890878789716877e-05,
"loss": 2.0899,
"step": 135
},
{
"epoch": 0.18,
"learning_rate": 9.880070934560458e-05,
"loss": 2.1151,
"step": 140
},
{
"epoch": 0.18,
"learning_rate": 9.868759296792373e-05,
"loss": 2.1848,
"step": 145
},
{
"epoch": 0.19,
"learning_rate": 9.856945044142865e-05,
"loss": 2.0883,
"step": 150
},
{
"epoch": 0.2,
"learning_rate": 9.844629396228438e-05,
"loss": 2.1461,
"step": 155
},
{
"epoch": 0.2,
"learning_rate": 9.831813624425952e-05,
"loss": 2.0882,
"step": 160
},
{
"epoch": 0.21,
"learning_rate": 9.818499051741381e-05,
"loss": 2.1026,
"step": 165
},
{
"epoch": 0.22,
"learning_rate": 9.804687052673229e-05,
"loss": 2.0476,
"step": 170
},
{
"epoch": 0.22,
"learning_rate": 9.790379053070638e-05,
"loss": 2.118,
"step": 175
},
{
"epoch": 0.23,
"learning_rate": 9.775576529986199e-05,
"loss": 2.1244,
"step": 180
},
{
"epoch": 0.23,
"learning_rate": 9.760281011523465e-05,
"loss": 2.1312,
"step": 185
},
{
"epoch": 0.24,
"learning_rate": 9.744494076679205e-05,
"loss": 2.032,
"step": 190
},
{
"epoch": 0.25,
"learning_rate": 9.7282173551804e-05,
"loss": 2.0547,
"step": 195
},
{
"epoch": 0.25,
"learning_rate": 9.711452527315998e-05,
"loss": 2.087,
"step": 200
},
{
"epoch": 0.26,
"learning_rate": 9.694201323763459e-05,
"loss": 1.9386,
"step": 205
},
{
"epoch": 0.27,
"learning_rate": 9.676465525410088e-05,
"loss": 2.0093,
"step": 210
},
{
"epoch": 0.27,
"learning_rate": 9.658246963169195e-05,
"loss": 2.0762,
"step": 215
},
{
"epoch": 0.28,
"learning_rate": 9.639547517791076e-05,
"loss": 2.0568,
"step": 220
},
{
"epoch": 0.29,
"learning_rate": 9.620369119668866e-05,
"loss": 2.0782,
"step": 225
},
{
"epoch": 0.29,
"learning_rate": 9.600713748639258e-05,
"loss": 2.0198,
"step": 230
},
{
"epoch": 0.3,
"learning_rate": 9.580583433778116e-05,
"loss": 2.042,
"step": 235
},
{
"epoch": 0.3,
"learning_rate": 9.55998025319101e-05,
"loss": 2.0037,
"step": 240
},
{
"epoch": 0.31,
"learning_rate": 9.538906333798689e-05,
"loss": 1.953,
"step": 245
},
{
"epoch": 0.32,
"learning_rate": 9.517363851117512e-05,
"loss": 2.0362,
"step": 250
},
{
"epoch": 0.32,
"learning_rate": 9.49535502903486e-05,
"loss": 2.0131,
"step": 255
},
{
"epoch": 0.33,
"learning_rate": 9.472882139579572e-05,
"loss": 2.0257,
"step": 260
},
{
"epoch": 0.34,
"learning_rate": 9.449947502687376e-05,
"loss": 2.0066,
"step": 265
},
{
"epoch": 0.34,
"learning_rate": 9.426553485961415e-05,
"loss": 2.0057,
"step": 270
},
{
"epoch": 0.35,
"learning_rate": 9.402702504427823e-05,
"loss": 1.9991,
"step": 275
},
{
"epoch": 0.35,
"learning_rate": 9.378397020286417e-05,
"loss": 2.021,
"step": 280
},
{
"epoch": 0.36,
"learning_rate": 9.353639542656521e-05,
"loss": 1.9371,
"step": 285
},
{
"epoch": 0.37,
"learning_rate": 9.328432627317938e-05,
"loss": 1.9793,
"step": 290
},
{
"epoch": 0.37,
"learning_rate": 9.302778876447115e-05,
"loss": 1.9013,
"step": 295
},
{
"epoch": 0.38,
"learning_rate": 9.276680938348512e-05,
"loss": 1.9733,
"step": 300
},
{
"epoch": 0.39,
"learning_rate": 9.250141507181206e-05,
"loss": 1.9828,
"step": 305
},
{
"epoch": 0.39,
"learning_rate": 9.223163322680772e-05,
"loss": 1.9855,
"step": 310
},
{
"epoch": 0.4,
"learning_rate": 9.195749169876448e-05,
"loss": 1.9448,
"step": 315
},
{
"epoch": 0.41,
"learning_rate": 9.167901878803638e-05,
"loss": 1.9595,
"step": 320
},
{
"epoch": 0.41,
"learning_rate": 9.139624324211744e-05,
"loss": 1.9461,
"step": 325
},
{
"epoch": 0.42,
"learning_rate": 9.110919425267415e-05,
"loss": 2.0007,
"step": 330
},
{
"epoch": 0.42,
"learning_rate": 9.081790145253182e-05,
"loss": 1.9819,
"step": 335
},
{
"epoch": 0.43,
"learning_rate": 9.052239491261559e-05,
"loss": 1.8421,
"step": 340
},
{
"epoch": 0.44,
"learning_rate": 9.022270513884601e-05,
"loss": 1.8801,
"step": 345
},
{
"epoch": 0.44,
"learning_rate": 8.991886306899002e-05,
"loss": 1.9804,
"step": 350
},
{
"epoch": 0.45,
"learning_rate": 8.961090006946693e-05,
"loss": 1.9277,
"step": 355
},
{
"epoch": 0.46,
"learning_rate": 8.92988479321106e-05,
"loss": 1.9278,
"step": 360
},
{
"epoch": 0.46,
"learning_rate": 8.898273887088735e-05,
"loss": 1.9603,
"step": 365
},
{
"epoch": 0.47,
"learning_rate": 8.866260551857045e-05,
"loss": 1.8603,
"step": 370
},
{
"epoch": 0.48,
"learning_rate": 8.83384809233714e-05,
"loss": 1.9064,
"step": 375
},
{
"epoch": 0.48,
"learning_rate": 8.801039854552821e-05,
"loss": 2.0242,
"step": 380
},
{
"epoch": 0.49,
"learning_rate": 8.76783922538513e-05,
"loss": 1.9399,
"step": 385
},
{
"epoch": 0.49,
"learning_rate": 8.734249632222702e-05,
"loss": 1.9208,
"step": 390
},
{
"epoch": 0.5,
"learning_rate": 8.700274542607955e-05,
"loss": 1.9567,
"step": 395
},
{
"epoch": 0.51,
"learning_rate": 8.665917463879125e-05,
"loss": 1.7881,
"step": 400
},
{
"epoch": 0.51,
"learning_rate": 8.631181942808191e-05,
"loss": 1.8781,
"step": 405
},
{
"epoch": 0.52,
"learning_rate": 8.596071565234733e-05,
"loss": 1.8789,
"step": 410
},
{
"epoch": 0.53,
"learning_rate": 8.560589955695768e-05,
"loss": 1.8068,
"step": 415
},
{
"epoch": 0.53,
"learning_rate": 8.524740777051555e-05,
"loss": 1.9045,
"step": 420
},
{
"epoch": 0.54,
"learning_rate": 8.48852773010749e-05,
"loss": 1.8835,
"step": 425
},
{
"epoch": 0.54,
"learning_rate": 8.451954553232055e-05,
"loss": 1.9104,
"step": 430
},
{
"epoch": 0.55,
"learning_rate": 8.415025021970897e-05,
"loss": 1.8687,
"step": 435
},
{
"epoch": 0.56,
"learning_rate": 8.37774294865706e-05,
"loss": 1.8392,
"step": 440
},
{
"epoch": 0.56,
"learning_rate": 8.340112182017445e-05,
"loss": 1.952,
"step": 445
},
{
"epoch": 0.57,
"learning_rate": 8.30213660677548e-05,
"loss": 1.9331,
"step": 450
},
{
"epoch": 0.58,
"learning_rate": 8.263820143250096e-05,
"loss": 1.8135,
"step": 455
},
{
"epoch": 0.58,
"learning_rate": 8.225166746951023e-05,
"loss": 1.9092,
"step": 460
},
{
"epoch": 0.59,
"learning_rate": 8.186180408170453e-05,
"loss": 1.8667,
"step": 465
},
{
"epoch": 0.6,
"learning_rate": 8.146865151571108e-05,
"loss": 1.7837,
"step": 470
},
{
"epoch": 0.6,
"learning_rate": 8.107225035770763e-05,
"loss": 1.8695,
"step": 475
},
{
"epoch": 0.61,
"learning_rate": 8.067264152923268e-05,
"loss": 1.8875,
"step": 480
},
{
"epoch": 0.61,
"learning_rate": 8.026986628296104e-05,
"loss": 1.8252,
"step": 485
},
{
"epoch": 0.62,
"learning_rate": 7.986396619844519e-05,
"loss": 1.8115,
"step": 490
},
{
"epoch": 0.63,
"learning_rate": 7.945498317782289e-05,
"loss": 1.8459,
"step": 495
},
{
"epoch": 0.63,
"learning_rate": 7.904295944149157e-05,
"loss": 1.7909,
"step": 500
},
{
"epoch": 0.64,
"learning_rate": 7.86279375237497e-05,
"loss": 1.8135,
"step": 505
},
{
"epoch": 0.65,
"learning_rate": 7.820996026840607e-05,
"loss": 1.8054,
"step": 510
},
{
"epoch": 0.65,
"learning_rate": 7.778907082435664e-05,
"loss": 1.7808,
"step": 515
},
{
"epoch": 0.66,
"learning_rate": 7.736531264113041e-05,
"loss": 1.812,
"step": 520
},
{
"epoch": 0.67,
"learning_rate": 7.693872946440381e-05,
"loss": 1.8145,
"step": 525
},
{
"epoch": 0.67,
"learning_rate": 7.650936533148485e-05,
"loss": 1.8522,
"step": 530
},
{
"epoch": 0.68,
"learning_rate": 7.607726456676701e-05,
"loss": 1.8313,
"step": 535
},
{
"epoch": 0.68,
"learning_rate": 7.56424717771535e-05,
"loss": 1.8377,
"step": 540
},
{
"epoch": 0.69,
"learning_rate": 7.520503184745231e-05,
"loss": 1.8008,
"step": 545
},
{
"epoch": 0.7,
"learning_rate": 7.476498993574277e-05,
"loss": 1.7855,
"step": 550
},
{
"epoch": 0.7,
"learning_rate": 7.432239146871364e-05,
"loss": 1.8009,
"step": 555
},
{
"epoch": 0.71,
"learning_rate": 7.387728213697365e-05,
"loss": 1.9104,
"step": 560
},
{
"epoch": 0.72,
"learning_rate": 7.342970789033473e-05,
"loss": 1.8005,
"step": 565
},
{
"epoch": 0.72,
"learning_rate": 7.297971493306848e-05,
"loss": 1.8387,
"step": 570
},
{
"epoch": 0.73,
"learning_rate": 7.252734971913636e-05,
"loss": 1.8135,
"step": 575
},
{
"epoch": 0.74,
"learning_rate": 7.20726589473942e-05,
"loss": 1.7277,
"step": 580
},
{
"epoch": 0.74,
"learning_rate": 7.161568955677129e-05,
"loss": 1.8434,
"step": 585
},
{
"epoch": 0.75,
"learning_rate": 7.115648872142475e-05,
"loss": 1.8187,
"step": 590
},
{
"epoch": 0.75,
"learning_rate": 7.069510384586962e-05,
"loss": 1.7758,
"step": 595
},
{
"epoch": 0.76,
"learning_rate": 7.023158256008521e-05,
"loss": 1.6581,
"step": 600
},
{
"epoch": 0.77,
"learning_rate": 6.976597271459811e-05,
"loss": 1.793,
"step": 605
},
{
"epoch": 0.77,
"learning_rate": 6.929832237554241e-05,
"loss": 1.7647,
"step": 610
},
{
"epoch": 0.78,
"learning_rate": 6.882867981969777e-05,
"loss": 1.7386,
"step": 615
},
{
"epoch": 0.79,
"learning_rate": 6.835709352950557e-05,
"loss": 1.7605,
"step": 620
},
{
"epoch": 0.79,
"learning_rate": 6.788361218806416e-05,
"loss": 1.782,
"step": 625
},
{
"epoch": 0.8,
"learning_rate": 6.740828467410294e-05,
"loss": 1.7424,
"step": 630
},
{
"epoch": 0.8,
"learning_rate": 6.693116005693659e-05,
"loss": 1.6963,
"step": 635
},
{
"epoch": 0.81,
"learning_rate": 6.64522875913995e-05,
"loss": 1.7481,
"step": 640
},
{
"epoch": 0.82,
"learning_rate": 6.597171671276109e-05,
"loss": 1.7889,
"step": 645
},
{
"epoch": 0.82,
"learning_rate": 6.54894970316224e-05,
"loss": 1.7209,
"step": 650
},
{
"epoch": 0.83,
"learning_rate": 6.500567832879478e-05,
"loss": 1.7533,
"step": 655
},
{
"epoch": 0.84,
"learning_rate": 6.452031055016073e-05,
"loss": 1.8314,
"step": 660
},
{
"epoch": 0.84,
"learning_rate": 6.41309346585278e-05,
"loss": 1.7855,
"step": 665
},
{
"epoch": 0.85,
"learning_rate": 6.364290491421375e-05,
"loss": 1.8475,
"step": 670
},
{
"epoch": 0.86,
"learning_rate": 6.315346677679218e-05,
"loss": 1.7069,
"step": 675
},
{
"epoch": 0.86,
"learning_rate": 6.266267077225471e-05,
"loss": 1.7635,
"step": 680
},
{
"epoch": 0.87,
"learning_rate": 6.217056756676917e-05,
"loss": 1.7859,
"step": 685
},
{
"epoch": 0.87,
"learning_rate": 6.16772079614492e-05,
"loss": 1.7227,
"step": 690
},
{
"epoch": 0.88,
"learning_rate": 6.118264288710988e-05,
"loss": 1.796,
"step": 695
},
{
"epoch": 0.89,
"learning_rate": 6.068692339901006e-05,
"loss": 1.7148,
"step": 700
},
{
"epoch": 0.89,
"learning_rate": 6.019010067158181e-05,
"loss": 1.7648,
"step": 705
},
{
"epoch": 0.9,
"learning_rate": 5.9692225993147444e-05,
"loss": 1.7023,
"step": 710
},
{
"epoch": 0.91,
"learning_rate": 5.9193350760625014e-05,
"loss": 1.7477,
"step": 715
},
{
"epoch": 0.91,
"learning_rate": 5.869352647422235e-05,
"loss": 1.7712,
"step": 720
},
{
"epoch": 0.92,
"learning_rate": 5.81928047321207e-05,
"loss": 1.7387,
"step": 725
},
{
"epoch": 0.93,
"learning_rate": 5.769123722514795e-05,
"loss": 1.7172,
"step": 730
},
{
"epoch": 0.93,
"learning_rate": 5.7188875731442605e-05,
"loss": 1.6791,
"step": 735
},
{
"epoch": 0.94,
"learning_rate": 5.668577211110848e-05,
"loss": 1.8073,
"step": 740
},
{
"epoch": 0.94,
"learning_rate": 5.6181978300861046e-05,
"loss": 1.7626,
"step": 745
},
{
"epoch": 0.95,
"learning_rate": 5.5677546308665985e-05,
"loss": 1.8242,
"step": 750
},
{
"epoch": 0.96,
"learning_rate": 5.517252820837011e-05,
"loss": 1.6738,
"step": 755
},
{
"epoch": 0.96,
"learning_rate": 5.4666976134325776e-05,
"loss": 1.7522,
"step": 760
},
{
"epoch": 0.97,
"learning_rate": 5.416094227600881e-05,
"loss": 1.72,
"step": 765
},
{
"epoch": 0.98,
"learning_rate": 5.365447887263093e-05,
"loss": 1.6826,
"step": 770
},
{
"epoch": 0.98,
"learning_rate": 5.314763820774695e-05,
"loss": 1.7828,
"step": 775
},
{
"epoch": 0.99,
"learning_rate": 5.264047260385734e-05,
"loss": 1.7497,
"step": 780
},
{
"epoch": 0.99,
"learning_rate": 5.213303441700691e-05,
"loss": 1.8669,
"step": 785
},
{
"epoch": 1.0,
"eval_loss": 1.786563515663147,
"eval_runtime": 649.3288,
"eval_samples_per_second": 19.723,
"eval_steps_per_second": 4.931,
"step": 789
},
{
"epoch": 1.0,
"learning_rate": 5.162537603137989e-05,
"loss": 1.7179,
"step": 790
},
{
"epoch": 1.01,
"learning_rate": 5.1117549853892254e-05,
"loss": 1.5756,
"step": 795
},
{
"epoch": 1.01,
"learning_rate": 5.060960830878158e-05,
"loss": 1.5276,
"step": 800
},
{
"epoch": 1.02,
"learning_rate": 5.01016038321951e-05,
"loss": 1.4801,
"step": 805
},
{
"epoch": 1.03,
"learning_rate": 4.959358886677667e-05,
"loss": 1.5357,
"step": 810
},
{
"epoch": 1.03,
"learning_rate": 4.908561585625293e-05,
"loss": 1.508,
"step": 815
},
{
"epoch": 1.04,
"learning_rate": 4.857773724001939e-05,
"loss": 1.4764,
"step": 820
},
{
"epoch": 1.05,
"learning_rate": 4.807000544772698e-05,
"loss": 1.509,
"step": 825
},
{
"epoch": 1.05,
"learning_rate": 4.756247289386967e-05,
"loss": 1.4372,
"step": 830
},
{
"epoch": 1.06,
"learning_rate": 4.705519197237344e-05,
"loss": 1.475,
"step": 835
},
{
"epoch": 1.06,
"learning_rate": 4.664958360274928e-05,
"loss": 1.4519,
"step": 840
},
{
"epoch": 1.07,
"learning_rate": 4.6142887565827685e-05,
"loss": 1.5711,
"step": 845
},
{
"epoch": 1.08,
"learning_rate": 4.563658970880996e-05,
"loss": 1.597,
"step": 850
},
{
"epoch": 1.08,
"learning_rate": 4.513074229816111e-05,
"loss": 1.5804,
"step": 855
},
{
"epoch": 1.09,
"learning_rate": 4.462539755384534e-05,
"loss": 1.5397,
"step": 860
},
{
"epoch": 1.1,
"learning_rate": 4.4120607643935336e-05,
"loss": 1.4828,
"step": 865
},
{
"epoch": 1.1,
"learning_rate": 4.3616424679226705e-05,
"loss": 1.4731,
"step": 870
},
{
"epoch": 1.11,
"learning_rate": 4.3112900707858534e-05,
"loss": 1.4711,
"step": 875
},
{
"epoch": 1.12,
"learning_rate": 4.26100877099403e-05,
"loss": 1.4487,
"step": 880
},
{
"epoch": 1.12,
"learning_rate": 4.210803759218575e-05,
"loss": 1.4584,
"step": 885
},
{
"epoch": 1.13,
"learning_rate": 4.160680218255458e-05,
"loss": 1.468,
"step": 890
},
{
"epoch": 1.13,
"learning_rate": 4.1106433224901966e-05,
"loss": 1.4801,
"step": 895
},
{
"epoch": 1.14,
"learning_rate": 4.060698237363695e-05,
"loss": 1.5046,
"step": 900
},
{
"epoch": 1.15,
"learning_rate": 4.0108501188390055e-05,
"loss": 1.474,
"step": 905
},
{
"epoch": 1.15,
"learning_rate": 3.9611041128690586e-05,
"loss": 1.4256,
"step": 910
},
{
"epoch": 1.16,
"learning_rate": 3.911465354865441e-05,
"loss": 1.4885,
"step": 915
},
{
"epoch": 1.17,
"learning_rate": 3.861938969168245e-05,
"loss": 1.5432,
"step": 920
},
{
"epoch": 1.17,
"learning_rate": 3.812530068517073e-05,
"loss": 1.4848,
"step": 925
},
{
"epoch": 1.18,
"learning_rate": 3.763243753523237e-05,
"loss": 1.5015,
"step": 930
},
{
"epoch": 1.19,
"learning_rate": 3.7140851121432054e-05,
"loss": 1.4893,
"step": 935
},
{
"epoch": 1.19,
"learning_rate": 3.665059219153366e-05,
"loss": 1.4881,
"step": 940
},
{
"epoch": 1.2,
"learning_rate": 3.616171135626139e-05,
"loss": 1.4386,
"step": 945
},
{
"epoch": 1.2,
"learning_rate": 3.567425908407507e-05,
"loss": 1.4236,
"step": 950
},
{
"epoch": 1.21,
"learning_rate": 3.5188285695960185e-05,
"loss": 1.4149,
"step": 955
},
{
"epoch": 1.22,
"learning_rate": 3.470384136023316e-05,
"loss": 1.5044,
"step": 960
},
{
"epoch": 1.22,
"learning_rate": 3.422097608736226e-05,
"loss": 1.4973,
"step": 965
},
{
"epoch": 1.23,
"learning_rate": 3.373973972480493e-05,
"loss": 1.514,
"step": 970
},
{
"epoch": 1.24,
"learning_rate": 3.32601819518619e-05,
"loss": 1.5015,
"step": 975
},
{
"epoch": 1.24,
"learning_rate": 3.278235227454865e-05,
"loss": 1.471,
"step": 980
},
{
"epoch": 1.25,
"learning_rate": 3.230630002048485e-05,
"loss": 1.4508,
"step": 985
},
{
"epoch": 1.25,
"learning_rate": 3.1832074333801954e-05,
"loss": 1.5247,
"step": 990
},
{
"epoch": 1.26,
"learning_rate": 3.135972417007016e-05,
"loss": 1.49,
"step": 995
},
{
"epoch": 1.27,
"learning_rate": 3.08892982912444e-05,
"loss": 1.4816,
"step": 1000
},
{
"epoch": 1.27,
"learning_rate": 3.0420845260630616e-05,
"loss": 1.3529,
"step": 1005
},
{
"epoch": 1.28,
"learning_rate": 2.9954413437872464e-05,
"loss": 1.3932,
"step": 1010
},
{
"epoch": 1.29,
"learning_rate": 2.949005097395887e-05,
"loss": 1.3895,
"step": 1015
},
{
"epoch": 1.29,
"learning_rate": 2.9027805806253505e-05,
"loss": 1.4641,
"step": 1020
},
{
"epoch": 1.3,
"learning_rate": 2.856772565354583e-05,
"loss": 1.4891,
"step": 1025
},
{
"epoch": 1.31,
"learning_rate": 2.810985801112521e-05,
"loss": 1.3609,
"step": 1030
},
{
"epoch": 1.31,
"learning_rate": 2.765425014587769e-05,
"loss": 1.4134,
"step": 1035
},
{
"epoch": 1.32,
"learning_rate": 2.7200949091406496e-05,
"loss": 1.4344,
"step": 1040
},
{
"epoch": 1.32,
"learning_rate": 2.675000164317672e-05,
"loss": 1.4872,
"step": 1045
},
{
"epoch": 1.33,
"learning_rate": 2.630145435368453e-05,
"loss": 1.5234,
"step": 1050
},
{
"epoch": 1.34,
"learning_rate": 2.585535352765125e-05,
"loss": 1.463,
"step": 1055
},
{
"epoch": 1.34,
"learning_rate": 2.541174521724341e-05,
"loss": 1.3836,
"step": 1060
},
{
"epoch": 1.35,
"learning_rate": 2.4970675217318516e-05,
"loss": 1.415,
"step": 1065
},
{
"epoch": 1.36,
"learning_rate": 2.4532189060697624e-05,
"loss": 1.4798,
"step": 1070
},
{
"epoch": 1.36,
"learning_rate": 2.4096332013464784e-05,
"loss": 1.4155,
"step": 1075
},
{
"epoch": 1.37,
"learning_rate": 2.366314907029416e-05,
"loss": 1.3719,
"step": 1080
},
{
"epoch": 1.38,
"learning_rate": 2.32326849498051e-05,
"loss": 1.4487,
"step": 1085
},
{
"epoch": 1.38,
"learning_rate": 2.280498408994565e-05,
"loss": 1.4238,
"step": 1090
},
{
"epoch": 1.39,
"learning_rate": 2.2380090643405216e-05,
"loss": 1.4892,
"step": 1095
},
{
"epoch": 1.39,
"learning_rate": 2.195804847305654e-05,
"loss": 1.4752,
"step": 1100
},
{
"epoch": 1.4,
"learning_rate": 2.1538901147427508e-05,
"loss": 1.36,
"step": 1105
},
{
"epoch": 1.41,
"learning_rate": 2.1122691936203598e-05,
"loss": 1.3771,
"step": 1110
},
{
"epoch": 1.41,
"learning_rate": 2.0709463805760966e-05,
"loss": 1.4437,
"step": 1115
},
{
"epoch": 1.42,
"learning_rate": 2.0299259414730914e-05,
"loss": 1.4338,
"step": 1120
},
{
"epoch": 1.43,
"learning_rate": 1.9892121109596173e-05,
"loss": 1.5161,
"step": 1125
},
{
"epoch": 1.43,
"learning_rate": 1.9488090920319247e-05,
"loss": 1.5169,
"step": 1130
},
{
"epoch": 1.44,
"learning_rate": 1.9087210556003743e-05,
"loss": 1.3437,
"step": 1135
},
{
"epoch": 1.44,
"learning_rate": 1.8689521400588463e-05,
"loss": 1.5007,
"step": 1140
},
{
"epoch": 1.45,
"learning_rate": 1.8295064508575223e-05,
"loss": 1.394,
"step": 1145
},
{
"epoch": 1.46,
"learning_rate": 1.790388060079089e-05,
"loss": 1.3743,
"step": 1150
},
{
"epoch": 1.46,
"learning_rate": 1.7516010060183396e-05,
"loss": 1.4011,
"step": 1155
},
{
"epoch": 1.47,
"learning_rate": 1.7131492927653098e-05,
"loss": 1.3722,
"step": 1160
},
{
"epoch": 1.48,
"learning_rate": 1.6750368897919157e-05,
"loss": 1.4329,
"step": 1165
},
{
"epoch": 1.48,
"learning_rate": 1.6372677315421768e-05,
"loss": 1.3472,
"step": 1170
},
{
"epoch": 1.49,
"learning_rate": 1.5998457170260577e-05,
"loss": 1.3845,
"step": 1175
},
{
"epoch": 1.5,
"learning_rate": 1.5627747094169482e-05,
"loss": 1.3696,
"step": 1180
},
{
"epoch": 1.5,
"learning_rate": 1.52605853565288e-05,
"loss": 1.4439,
"step": 1185
},
{
"epoch": 1.51,
"learning_rate": 1.4897009860414441e-05,
"loss": 1.4946,
"step": 1190
},
{
"epoch": 1.51,
"learning_rate": 1.4537058138685106e-05,
"loss": 1.382,
"step": 1195
},
{
"epoch": 1.52,
"learning_rate": 1.4180767350107733e-05,
"loss": 1.4348,
"step": 1200
},
{
"epoch": 1.53,
"learning_rate": 1.3828174275521455e-05,
"loss": 1.3328,
"step": 1205
},
{
"epoch": 1.53,
"learning_rate": 1.3479315314040619e-05,
"loss": 1.3845,
"step": 1210
},
{
"epoch": 1.54,
"learning_rate": 1.3134226479297213e-05,
"loss": 1.4184,
"step": 1215
},
{
"epoch": 1.55,
"learning_rate": 1.2792943395723083e-05,
"loss": 1.4065,
"step": 1220
},
{
"epoch": 1.55,
"learning_rate": 1.2455501294872341e-05,
"loss": 1.4034,
"step": 1225
},
{
"epoch": 1.56,
"learning_rate": 1.2121935011784286e-05,
"loss": 1.4754,
"step": 1230
},
{
"epoch": 1.57,
"learning_rate": 1.1792278981387338e-05,
"loss": 1.4132,
"step": 1235
},
{
"epoch": 1.57,
"learning_rate": 1.1466567234944203e-05,
"loss": 1.4578,
"step": 1240
},
{
"epoch": 1.58,
"learning_rate": 1.1144833396538695e-05,
"loss": 1.3845,
"step": 1245
},
{
"epoch": 1.58,
"learning_rate": 1.0827110679604712e-05,
"loss": 1.3815,
"step": 1250
},
{
"epoch": 1.59,
"learning_rate": 1.0513431883497516e-05,
"loss": 1.458,
"step": 1255
},
{
"epoch": 1.6,
"learning_rate": 1.0203829390107673e-05,
"loss": 1.3987,
"step": 1260
},
{
"epoch": 1.6,
"learning_rate": 9.898335160518295e-06,
"loss": 1.476,
"step": 1265
},
{
"epoch": 1.61,
"learning_rate": 9.596980731705574e-06,
"loss": 1.4043,
"step": 1270
},
{
"epoch": 1.62,
"learning_rate": 9.299797213283118e-06,
"loss": 1.3216,
"step": 1275
},
{
"epoch": 1.62,
"learning_rate": 9.006815284290443e-06,
"loss": 1.4191,
"step": 1280
},
{
"epoch": 1.63,
"learning_rate": 8.718065190025899e-06,
"loss": 1.3306,
"step": 1285
},
{
"epoch": 1.63,
"learning_rate": 8.433576738924353e-06,
"loss": 1.3633,
"step": 1290
},
{
"epoch": 1.64,
"learning_rate": 8.153379299480013e-06,
"loss": 1.4831,
"step": 1295
},
{
"epoch": 1.65,
"learning_rate": 7.877501797214553e-06,
"loss": 1.2842,
"step": 1300
},
{
"epoch": 1.65,
"learning_rate": 7.605972711691228e-06,
"loss": 1.4293,
"step": 1305
},
{
"epoch": 1.66,
"learning_rate": 7.3388200735746486e-06,
"loss": 1.3349,
"step": 1310
},
{
"epoch": 1.67,
"learning_rate": 7.0760714617372605e-06,
"loss": 1.4032,
"step": 1315
},
{
"epoch": 1.67,
"learning_rate": 6.817754000412219e-06,
"loss": 1.5321,
"step": 1320
},
{
"epoch": 1.68,
"learning_rate": 6.56389435639333e-06,
"loss": 1.3822,
"step": 1325
},
{
"epoch": 1.69,
"learning_rate": 6.314518736282132e-06,
"loss": 1.4665,
"step": 1330
},
{
"epoch": 1.69,
"learning_rate": 6.069652883782501e-06,
"loss": 1.4638,
"step": 1335
},
{
"epoch": 1.7,
"learning_rate": 5.829322077043159e-06,
"loss": 1.417,
"step": 1340
},
{
"epoch": 1.7,
"learning_rate": 5.593551126048041e-06,
"loss": 1.4486,
"step": 1345
},
{
"epoch": 1.71,
"learning_rate": 5.36236437005509e-06,
"loss": 1.4988,
"step": 1350
},
{
"epoch": 1.72,
"learning_rate": 5.1357856750837765e-06,
"loss": 1.4268,
"step": 1355
},
{
"epoch": 1.72,
"learning_rate": 4.913838431451184e-06,
"loss": 1.392,
"step": 1360
},
{
"epoch": 1.73,
"learning_rate": 4.696545551357484e-06,
"loss": 1.3894,
"step": 1365
},
{
"epoch": 1.74,
"learning_rate": 4.483929466520592e-06,
"loss": 1.4446,
"step": 1370
},
{
"epoch": 1.74,
"learning_rate": 4.276012125860507e-06,
"loss": 1.3722,
"step": 1375
},
{
"epoch": 1.75,
"learning_rate": 4.072814993233442e-06,
"loss": 1.4785,
"step": 1380
},
{
"epoch": 1.76,
"learning_rate": 3.874359045216086e-06,
"loss": 1.4526,
"step": 1385
},
{
"epoch": 1.76,
"learning_rate": 3.680664768940101e-06,
"loss": 1.3871,
"step": 1390
},
{
"epoch": 1.77,
"learning_rate": 3.491752159977213e-06,
"loss": 1.4023,
"step": 1395
},
{
"epoch": 1.77,
"learning_rate": 3.3076407202749903e-06,
"loss": 1.3858,
"step": 1400
},
{
"epoch": 1.78,
"learning_rate": 3.128349456143609e-06,
"loss": 1.3945,
"step": 1405
},
{
"epoch": 1.79,
"learning_rate": 2.9538968762938377e-06,
"loss": 1.4255,
"step": 1410
},
{
"epoch": 1.79,
"learning_rate": 2.7843009899262253e-06,
"loss": 1.34,
"step": 1415
},
{
"epoch": 1.8,
"learning_rate": 2.619579304872061e-06,
"loss": 1.3443,
"step": 1420
},
{
"epoch": 1.81,
"learning_rate": 2.4597488257859457e-06,
"loss": 1.2774,
"step": 1425
},
{
"epoch": 1.81,
"learning_rate": 2.304826052390385e-06,
"loss": 1.4238,
"step": 1430
},
{
"epoch": 1.82,
"learning_rate": 2.1548269777724505e-06,
"loss": 1.383,
"step": 1435
},
{
"epoch": 1.83,
"learning_rate": 2.0097670867327835e-06,
"loss": 1.4196,
"step": 1440
},
{
"epoch": 1.83,
"learning_rate": 1.8696613541870745e-06,
"loss": 1.4295,
"step": 1445
},
{
"epoch": 1.84,
"learning_rate": 1.734524243620117e-06,
"loss": 1.3821,
"step": 1450
},
{
"epoch": 1.84,
"learning_rate": 1.6043697055927676e-06,
"loss": 1.4504,
"step": 1455
},
{
"epoch": 1.85,
"learning_rate": 1.4792111763017536e-06,
"loss": 1.4768,
"step": 1460
},
{
"epoch": 1.86,
"learning_rate": 1.3590615761926051e-06,
"loss": 1.3775,
"step": 1465
},
{
"epoch": 1.86,
"learning_rate": 1.243933308625883e-06,
"loss": 1.4304,
"step": 1470
},
{
"epoch": 1.87,
"learning_rate": 1.1338382585967255e-06,
"loss": 1.4992,
"step": 1475
},
{
"epoch": 1.88,
"learning_rate": 1.028787791507918e-06,
"loss": 1.4249,
"step": 1480
},
{
"epoch": 1.88,
"learning_rate": 9.287927519966378e-07,
"loss": 1.3822,
"step": 1485
},
{
"epoch": 1.89,
"learning_rate": 8.338634628149211e-07,
"loss": 1.3807,
"step": 1490
},
{
"epoch": 1.89,
"learning_rate": 7.440097237640109e-07,
"loss": 1.5082,
"step": 1495
},
{
"epoch": 1.9,
"learning_rate": 6.592408106827152e-07,
"loss": 1.3872,
"step": 1500
},
{
"epoch": 1.91,
"learning_rate": 5.795654744898016e-07,
"loss": 1.3692,
"step": 1505
},
{
"epoch": 1.91,
"learning_rate": 5.049919402806802e-07,
"loss": 1.3778,
"step": 1510
},
{
"epoch": 1.92,
"learning_rate": 4.355279064782447e-07,
"loss": 1.4629,
"step": 1515
},
{
"epoch": 1.93,
"learning_rate": 3.7118054403816326e-07,
"loss": 1.3796,
"step": 1520
},
{
"epoch": 1.93,
"learning_rate": 3.119564957086207e-07,
"loss": 1.462,
"step": 1525
},
{
"epoch": 1.94,
"learning_rate": 2.578618753445394e-07,
"loss": 1.3725,
"step": 1530
},
{
"epoch": 1.95,
"learning_rate": 2.0890226727646733e-07,
"loss": 1.3738,
"step": 1535
},
{
"epoch": 1.95,
"learning_rate": 1.6508272573403373e-07,
"loss": 1.3906,
"step": 1540
},
{
"epoch": 1.96,
"learning_rate": 1.2640777432426642e-07,
"loss": 1.4195,
"step": 1545
},
{
"epoch": 1.96,
"learning_rate": 9.288140556453773e-08,
"loss": 1.436,
"step": 1550
},
{
"epoch": 1.97,
"learning_rate": 6.450708047044396e-08,
"loss": 1.4758,
"step": 1555
},
{
"epoch": 1.98,
"learning_rate": 4.128772819850801e-08,
"loss": 1.4119,
"step": 1560
},
{
"epoch": 1.98,
"learning_rate": 2.3225745743787887e-08,
"loss": 1.4828,
"step": 1565
},
{
"epoch": 1.99,
"learning_rate": 1.0322997692441272e-08,
"loss": 1.4087,
"step": 1570
},
{
"epoch": 2.0,
"learning_rate": 2.580816029223998e-09,
"loss": 1.4049,
"step": 1575
},
{
"epoch": 2.0,
"eval_loss": 1.6930410861968994,
"eval_runtime": 651.437,
"eval_samples_per_second": 19.66,
"eval_steps_per_second": 4.915,
"step": 1578
},
{
"epoch": 2.0,
"step": 1578,
"total_flos": 1.4997323849651978e+18,
"train_loss": 1.7094378060412196,
"train_runtime": 39550.5514,
"train_samples_per_second": 5.107,
"train_steps_per_second": 0.04
}
],
"max_steps": 1578,
"num_train_epochs": 2,
"total_flos": 1.4997323849651978e+18,
"trial_name": null,
"trial_params": null
}