oxxxymiron / trainer_state.json
AlekseyKorshuk's picture
huggingartists
ff26826
{
"best_metric": 1.2792030572891235,
"best_model_checkpoint": "output/oxxxymiron/checkpoint-4807",
"epoch": 19.0,
"global_step": 4807,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.00013707387498762246,
"loss": 2.6224,
"step": 5
},
{
"epoch": 0.04,
"learning_rate": 0.00013669596372654658,
"loss": 2.5403,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 0.00013606765583958522,
"loss": 2.4692,
"step": 15
},
{
"epoch": 0.08,
"learning_rate": 0.00013519126168650572,
"loss": 2.3553,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 0.00013407000386857356,
"loss": 2.2478,
"step": 25
},
{
"epoch": 0.12,
"learning_rate": 0.0001327080053786798,
"loss": 2.4781,
"step": 30
},
{
"epoch": 0.14,
"learning_rate": 0.0001311102744406261,
"loss": 2.2512,
"step": 35
},
{
"epoch": 0.15,
"learning_rate": 0.00012928268609331444,
"loss": 2.2522,
"step": 40
},
{
"epoch": 0.17,
"learning_rate": 0.00012723196058755901,
"loss": 2.4658,
"step": 45
},
{
"epoch": 0.19,
"learning_rate": 0.00012496563867495734,
"loss": 2.2714,
"step": 50
},
{
"epoch": 0.21,
"learning_rate": 0.0001224920538796866,
"loss": 2.2569,
"step": 55
},
{
"epoch": 0.23,
"learning_rate": 0.00011982030185518478,
"loss": 2.2897,
"step": 60
},
{
"epoch": 0.25,
"learning_rate": 0.00011696020693839531,
"loss": 2.1927,
"step": 65
},
{
"epoch": 0.27,
"learning_rate": 0.00011392228602455959,
"loss": 2.1867,
"step": 70
},
{
"epoch": 0.29,
"learning_rate": 0.00011071770989539364,
"loss": 2.1953,
"step": 75
},
{
"epoch": 0.31,
"learning_rate": 0.00010735826214284955,
"loss": 2.2485,
"step": 80
},
{
"epoch": 0.33,
"learning_rate": 0.00010385629583950372,
"loss": 2.1775,
"step": 85
},
{
"epoch": 0.35,
"learning_rate": 0.00010022468811490008,
"loss": 2.2653,
"step": 90
},
{
"epoch": 0.37,
"learning_rate": 9.647679280487579e-05,
"loss": 2.2253,
"step": 95
},
{
"epoch": 0.39,
"learning_rate": 9.262639134798382e-05,
"loss": 2.2196,
"step": 100
},
{
"epoch": 0.41,
"learning_rate": 8.868764210957135e-05,
"loss": 2.1454,
"step": 105
},
{
"epoch": 0.42,
"learning_rate": 8.467502831985555e-05,
"loss": 2.1683,
"step": 110
},
{
"epoch": 0.44,
"learning_rate": 8.060330481743385e-05,
"loss": 2.1135,
"step": 115
},
{
"epoch": 0.46,
"learning_rate": 7.648744379405968e-05,
"loss": 2.2169,
"step": 120
},
{
"epoch": 0.48,
"learning_rate": 7.234257974018543e-05,
"loss": 2.0504,
"step": 125
},
{
"epoch": 0.5,
"learning_rate": 6.818395379371482e-05,
"loss": 2.1956,
"step": 130
},
{
"epoch": 0.52,
"learning_rate": 6.40268576966004e-05,
"loss": 2.1556,
"step": 135
},
{
"epoch": 0.54,
"learning_rate": 5.9886577565364115e-05,
"loss": 2.1258,
"step": 140
},
{
"epoch": 0.56,
"learning_rate": 5.577833768230335e-05,
"loss": 2.0906,
"step": 145
},
{
"epoch": 0.58,
"learning_rate": 5.171724451406823e-05,
"loss": 2.0648,
"step": 150
},
{
"epoch": 0.6,
"learning_rate": 4.7718231163460816e-05,
"loss": 2.1031,
"step": 155
},
{
"epoch": 0.62,
"learning_rate": 4.379600245871268e-05,
"loss": 2.456,
"step": 160
},
{
"epoch": 0.64,
"learning_rate": 3.99649808821543e-05,
"loss": 2.1486,
"step": 165
},
{
"epoch": 0.66,
"learning_rate": 3.6239253537102286e-05,
"loss": 2.1327,
"step": 170
},
{
"epoch": 0.68,
"learning_rate": 3.263252034797391e-05,
"loss": 2.1147,
"step": 175
},
{
"epoch": 0.69,
"learning_rate": 2.9158043684102103e-05,
"loss": 2.0989,
"step": 180
},
{
"epoch": 0.71,
"learning_rate": 2.5828599592490882e-05,
"loss": 2.082,
"step": 185
},
{
"epoch": 0.73,
"learning_rate": 2.265643081883308e-05,
"loss": 2.0394,
"step": 190
},
{
"epoch": 0.75,
"learning_rate": 1.9653201789538038e-05,
"loss": 2.0346,
"step": 195
},
{
"epoch": 0.77,
"learning_rate": 1.682995572030529e-05,
"loss": 2.1082,
"step": 200
},
{
"epoch": 0.79,
"learning_rate": 1.419707400896067e-05,
"loss": 2.1204,
"step": 205
},
{
"epoch": 0.81,
"learning_rate": 1.1764238061872434e-05,
"loss": 1.976,
"step": 210
},
{
"epoch": 0.83,
"learning_rate": 9.540393694315812e-06,
"loss": 1.9811,
"step": 215
},
{
"epoch": 0.85,
"learning_rate": 7.533718235689727e-06,
"loss": 2.0259,
"step": 220
},
{
"epoch": 0.87,
"learning_rate": 5.751590460543944e-06,
"loss": 2.0717,
"step": 225
},
{
"epoch": 0.89,
"learning_rate": 4.2005634559834046e-06,
"loss": 2.0042,
"step": 230
},
{
"epoch": 0.91,
"learning_rate": 2.8863405252193965e-06,
"loss": 2.0301,
"step": 235
},
{
"epoch": 0.93,
"learning_rate": 1.8137542158731914e-06,
"loss": 2.0554,
"step": 240
},
{
"epoch": 0.95,
"learning_rate": 9.867485501471999e-07,
"loss": 2.0167,
"step": 245
},
{
"epoch": 0.97,
"learning_rate": 4.083645222054329e-07,
"loss": 2.0252,
"step": 250
},
{
"epoch": 0.98,
"learning_rate": 8.072891609113784e-08,
"loss": 2.0128,
"step": 255
},
{
"epoch": 1.0,
"learning_rate": 5.046485299251069e-09,
"loss": 2.1545,
"step": 260
},
{
"epoch": 1.02,
"learning_rate": 1.815955227603999e-07,
"loss": 2.1436,
"step": 265
},
{
"epoch": 1.04,
"learning_rate": 6.097268375260298e-07,
"loss": 2.055,
"step": 270
},
{
"epoch": 1.06,
"learning_rate": 1.2878661419176275e-06,
"loss": 2.0446,
"step": 275
},
{
"epoch": 1.08,
"learning_rate": 2.2135198403619775e-06,
"loss": 2.0962,
"step": 280
},
{
"epoch": 1.1,
"learning_rate": 3.3832841986266328e-06,
"loss": 1.9858,
"step": 285
},
{
"epoch": 1.12,
"learning_rate": 4.7928578597388414e-06,
"loss": 2.0331,
"step": 290
},
{
"epoch": 1.14,
"learning_rate": 6.437057660565834e-06,
"loss": 2.0861,
"step": 295
},
{
"epoch": 1.16,
"learning_rate": 8.309837690896773e-06,
"loss": 2.0373,
"step": 300
},
{
"epoch": 1.18,
"learning_rate": 1.0404311524944352e-05,
"loss": 2.0409,
"step": 305
},
{
"epoch": 1.2,
"learning_rate": 1.271277754351776e-05,
"loss": 2.1014,
"step": 310
},
{
"epoch": 1.22,
"learning_rate": 1.5226747253755048e-05,
"loss": 2.0414,
"step": 315
},
{
"epoch": 1.24,
"learning_rate": 1.79369765022795e-05,
"loss": 2.1258,
"step": 320
},
{
"epoch": 1.25,
"learning_rate": 2.083349946700612e-05,
"loss": 2.0382,
"step": 325
},
{
"epoch": 1.27,
"learning_rate": 2.3905665302606086e-05,
"loss": 1.9499,
"step": 330
},
{
"epoch": 1.29,
"learning_rate": 2.7142177304879985e-05,
"loss": 2.1053,
"step": 335
},
{
"epoch": 1.31,
"learning_rate": 3.05311344500276e-05,
"loss": 2.0373,
"step": 340
},
{
"epoch": 1.33,
"learning_rate": 3.4060075156069894e-05,
"loss": 1.9227,
"step": 345
},
{
"epoch": 1.35,
"learning_rate": 3.771602310550759e-05,
"loss": 2.0504,
"step": 350
},
{
"epoch": 1.37,
"learning_rate": 4.148553496072039e-05,
"loss": 2.0583,
"step": 355
},
{
"epoch": 1.39,
"learning_rate": 4.5354749796652995e-05,
"loss": 2.0166,
"step": 360
},
{
"epoch": 1.41,
"learning_rate": 4.930944006901758e-05,
"loss": 2.0524,
"step": 365
},
{
"epoch": 1.43,
"learning_rate": 5.333506393059682e-05,
"loss": 2.0846,
"step": 370
},
{
"epoch": 1.45,
"learning_rate": 5.741681870327513e-05,
"loss": 2.0408,
"step": 375
},
{
"epoch": 1.47,
"learning_rate": 6.153969530917408e-05,
"loss": 1.9538,
"step": 380
},
{
"epoch": 1.49,
"learning_rate": 6.56885334607442e-05,
"loss": 2.1089,
"step": 385
},
{
"epoch": 1.51,
"learning_rate": 6.984807740687121e-05,
"loss": 2.0182,
"step": 390
},
{
"epoch": 1.53,
"learning_rate": 7.400303203001308e-05,
"loss": 1.9797,
"step": 395
},
{
"epoch": 1.54,
"learning_rate": 7.813811908809188e-05,
"loss": 2.0459,
"step": 400
},
{
"epoch": 1.56,
"learning_rate": 8.22381333943327e-05,
"loss": 2.0528,
"step": 405
},
{
"epoch": 1.58,
"learning_rate": 8.628799872846947e-05,
"loss": 2.0164,
"step": 410
},
{
"epoch": 1.6,
"learning_rate": 9.027282327372695e-05,
"loss": 1.973,
"step": 415
},
{
"epoch": 1.62,
"learning_rate": 9.417795437572915e-05,
"loss": 2.1012,
"step": 420
},
{
"epoch": 1.64,
"learning_rate": 9.798903242198118e-05,
"loss": 1.9342,
"step": 425
},
{
"epoch": 1.66,
"learning_rate": 0.00010169204364380207,
"loss": 1.9812,
"step": 430
},
{
"epoch": 1.68,
"learning_rate": 0.0001052733716465509,
"loss": 2.1037,
"step": 435
},
{
"epoch": 1.7,
"learning_rate": 0.0001087198474786628,
"loss": 2.0833,
"step": 440
},
{
"epoch": 1.72,
"learning_rate": 0.00011201879805538593,
"loss": 2.088,
"step": 445
},
{
"epoch": 1.74,
"learning_rate": 0.00011515809275916009,
"loss": 2.0211,
"step": 450
},
{
"epoch": 1.76,
"learning_rate": 0.00011812618804528036,
"loss": 1.9749,
"step": 455
},
{
"epoch": 1.78,
"learning_rate": 0.00012091216988882859,
"loss": 2.1269,
"step": 460
},
{
"epoch": 1.8,
"learning_rate": 0.0001235057939167871,
"loss": 2.1407,
"step": 465
},
{
"epoch": 1.81,
"learning_rate": 0.0001258975230777674,
"loss": 1.9911,
"step": 470
},
{
"epoch": 1.83,
"learning_rate": 0.00012807856271083559,
"loss": 1.8852,
"step": 475
},
{
"epoch": 1.85,
"learning_rate": 0.00013004089288448385,
"loss": 2.084,
"step": 480
},
{
"epoch": 1.87,
"learning_rate": 0.00013177729788683344,
"loss": 2.0223,
"step": 485
},
{
"epoch": 1.89,
"learning_rate": 0.00013328139275863037,
"loss": 1.8615,
"step": 490
},
{
"epoch": 1.91,
"learning_rate": 0.00013454764677146868,
"loss": 1.9669,
"step": 495
},
{
"epoch": 1.93,
"learning_rate": 0.00013557140376490993,
"loss": 1.997,
"step": 500
},
{
"epoch": 1.95,
"learning_rate": 0.00013634889926771582,
"loss": 2.0132,
"step": 505
},
{
"epoch": 1.97,
"learning_rate": 0.00013687727434023874,
"loss": 2.1001,
"step": 510
},
{
"epoch": 1.99,
"learning_rate": 0.00013715458608706872,
"loss": 1.9605,
"step": 515
},
{
"epoch": 2.01,
"learning_rate": 0.00013717981480128154,
"loss": 1.9686,
"step": 520
},
{
"epoch": 2.03,
"learning_rate": 0.0001369528677140173,
"loss": 1.9054,
"step": 525
},
{
"epoch": 2.05,
"learning_rate": 0.00013647457933560234,
"loss": 1.9621,
"step": 530
},
{
"epoch": 2.07,
"learning_rate": 0.00013574670838695924,
"loss": 1.9001,
"step": 535
},
{
"epoch": 2.08,
"learning_rate": 0.00013477193133258972,
"loss": 1.9553,
"step": 540
},
{
"epoch": 2.1,
"learning_rate": 0.0001335538325389091,
"loss": 1.9963,
"step": 545
},
{
"epoch": 2.12,
"learning_rate": 0.00013209689109412243,
"loss": 1.9554,
"step": 550
},
{
"epoch": 2.14,
"learning_rate": 0.00013040646433810595,
"loss": 2.0022,
"step": 555
},
{
"epoch": 2.16,
"learning_rate": 0.00012848876816285752,
"loss": 1.8702,
"step": 560
},
{
"epoch": 2.18,
"learning_rate": 0.00012635085415595263,
"loss": 1.9121,
"step": 565
},
{
"epoch": 2.2,
"learning_rate": 0.00012400058367105258,
"loss": 2.0007,
"step": 570
},
{
"epoch": 2.22,
"learning_rate": 0.0001214465989208104,
"loss": 2.0254,
"step": 575
},
{
"epoch": 2.24,
"learning_rate": 0.00011869829119846914,
"loss": 1.9571,
"step": 580
},
{
"epoch": 2.26,
"learning_rate": 0.00011576576634500534,
"loss": 1.9448,
"step": 585
},
{
"epoch": 2.28,
"learning_rate": 0.00011265980758879933,
"loss": 2.0852,
"step": 590
},
{
"epoch": 2.3,
"learning_rate": 0.00010939183589447406,
"loss": 1.9757,
"step": 595
},
{
"epoch": 2.32,
"learning_rate": 0.00010597386796670587,
"loss": 2.0051,
"step": 600
},
{
"epoch": 2.34,
"learning_rate": 0.0001024184720634304,
"loss": 2.015,
"step": 605
},
{
"epoch": 2.36,
"learning_rate": 9.873872178092492e-05,
"loss": 1.9478,
"step": 610
},
{
"epoch": 2.37,
"learning_rate": 9.494814798070336e-05,
"loss": 1.8666,
"step": 615
},
{
"epoch": 2.39,
"learning_rate": 9.106068903499513e-05,
"loss": 1.9268,
"step": 620
},
{
"epoch": 2.41,
"learning_rate": 8.709063957376078e-05,
"loss": 1.8729,
"step": 625
},
{
"epoch": 2.43,
"learning_rate": 8.305259792170682e-05,
"loss": 1.9046,
"step": 630
},
{
"epoch": 2.45,
"learning_rate": 7.89614124185811e-05,
"loss": 1.8255,
"step": 635
},
{
"epoch": 2.47,
"learning_rate": 7.48321268201335e-05,
"loss": 1.9063,
"step": 640
},
{
"epoch": 2.49,
"learning_rate": 7.06799249805101e-05,
"loss": 1.9487,
"step": 645
},
{
"epoch": 2.51,
"learning_rate": 6.652007501948994e-05,
"loss": 1.9612,
"step": 650
},
{
"epoch": 2.53,
"learning_rate": 6.236787317986654e-05,
"loss": 1.829,
"step": 655
},
{
"epoch": 2.55,
"learning_rate": 5.8238587581418946e-05,
"loss": 1.8833,
"step": 660
},
{
"epoch": 2.57,
"learning_rate": 5.414740207829316e-05,
"loss": 1.9743,
"step": 665
},
{
"epoch": 2.59,
"learning_rate": 5.010936042623931e-05,
"loss": 1.8592,
"step": 670
},
{
"epoch": 2.61,
"learning_rate": 4.6139310965004967e-05,
"loss": 1.8143,
"step": 675
},
{
"epoch": 2.63,
"learning_rate": 4.225185201929667e-05,
"loss": 1.9597,
"step": 680
},
{
"epoch": 2.64,
"learning_rate": 3.8461278219075114e-05,
"loss": 1.9673,
"step": 685
},
{
"epoch": 2.66,
"learning_rate": 3.478152793656968e-05,
"loss": 1.9703,
"step": 690
},
{
"epoch": 2.68,
"learning_rate": 3.1226132033294165e-05,
"loss": 1.8479,
"step": 695
},
{
"epoch": 2.7,
"learning_rate": 2.7808164105525978e-05,
"loss": 1.8916,
"step": 700
},
{
"epoch": 2.72,
"learning_rate": 2.454019241120065e-05,
"loss": 1.9233,
"step": 705
},
{
"epoch": 2.74,
"learning_rate": 2.1434233654994646e-05,
"loss": 1.8525,
"step": 710
},
{
"epoch": 2.76,
"learning_rate": 1.850170880153093e-05,
"loss": 1.9783,
"step": 715
},
{
"epoch": 2.78,
"learning_rate": 1.5753401079189615e-05,
"loss": 1.8898,
"step": 720
},
{
"epoch": 2.8,
"learning_rate": 1.3199416328947464e-05,
"loss": 1.9018,
"step": 725
},
{
"epoch": 2.82,
"learning_rate": 1.0849145844047363e-05,
"loss": 1.9754,
"step": 730
},
{
"epoch": 2.84,
"learning_rate": 8.711231837142545e-06,
"loss": 1.9263,
"step": 735
},
{
"epoch": 2.86,
"learning_rate": 6.793535661894062e-06,
"loss": 1.836,
"step": 740
},
{
"epoch": 2.88,
"learning_rate": 5.103108905877591e-06,
"loss": 1.9056,
"step": 745
},
{
"epoch": 2.9,
"learning_rate": 3.6461674610908713e-06,
"loss": 1.9471,
"step": 750
},
{
"epoch": 2.92,
"learning_rate": 2.4280686674102973e-06,
"loss": 1.7961,
"step": 755
},
{
"epoch": 2.93,
"learning_rate": 1.453291613040777e-06,
"loss": 1.9143,
"step": 760
},
{
"epoch": 2.95,
"learning_rate": 7.254206643976737e-07,
"loss": 1.9633,
"step": 765
},
{
"epoch": 2.97,
"learning_rate": 2.4713228598268823e-07,
"loss": 1.9096,
"step": 770
},
{
"epoch": 2.99,
"learning_rate": 2.018519871846962e-08,
"loss": 1.7972,
"step": 775
},
{
"epoch": 3.01,
"learning_rate": 4.5413912931266996e-08,
"loss": 1.8293,
"step": 780
},
{
"epoch": 3.03,
"learning_rate": 3.2272565976124403e-07,
"loss": 1.9095,
"step": 785
},
{
"epoch": 3.05,
"learning_rate": 8.511007322841792e-07,
"loss": 1.8653,
"step": 790
},
{
"epoch": 3.07,
"learning_rate": 1.628596235090069e-06,
"loss": 1.7876,
"step": 795
},
{
"epoch": 3.09,
"learning_rate": 2.652353228531267e-06,
"loss": 1.8415,
"step": 800
},
{
"epoch": 3.11,
"learning_rate": 3.918607241369593e-06,
"loss": 1.9383,
"step": 805
},
{
"epoch": 3.13,
"learning_rate": 5.4227021131665505e-06,
"loss": 1.8484,
"step": 810
},
{
"epoch": 3.15,
"learning_rate": 7.159107115516193e-06,
"loss": 1.8539,
"step": 815
},
{
"epoch": 3.17,
"learning_rate": 9.121437289164363e-06,
"loss": 1.8604,
"step": 820
},
{
"epoch": 3.19,
"learning_rate": 1.1302476922232561e-05,
"loss": 1.857,
"step": 825
},
{
"epoch": 3.2,
"learning_rate": 1.3694206083212835e-05,
"loss": 1.8542,
"step": 830
},
{
"epoch": 3.22,
"learning_rate": 1.6287830111171433e-05,
"loss": 1.8419,
"step": 835
},
{
"epoch": 3.24,
"learning_rate": 1.90738119547196e-05,
"loss": 1.853,
"step": 840
},
{
"epoch": 3.26,
"learning_rate": 2.204190724083989e-05,
"loss": 1.7492,
"step": 845
},
{
"epoch": 3.28,
"learning_rate": 2.5181201944614038e-05,
"loss": 1.8399,
"step": 850
},
{
"epoch": 3.3,
"learning_rate": 2.8480152521337186e-05,
"loss": 1.8052,
"step": 855
},
{
"epoch": 3.32,
"learning_rate": 3.192662835344901e-05,
"loss": 1.9257,
"step": 860
},
{
"epoch": 3.34,
"learning_rate": 3.550795635619789e-05,
"loss": 1.8094,
"step": 865
},
{
"epoch": 3.36,
"learning_rate": 3.921096757801878e-05,
"loss": 1.806,
"step": 870
},
{
"epoch": 3.38,
"learning_rate": 4.302204562427086e-05,
"loss": 1.8443,
"step": 875
},
{
"epoch": 3.4,
"learning_rate": 4.692717672627302e-05,
"loss": 1.892,
"step": 880
},
{
"epoch": 3.42,
"learning_rate": 5.091200127153043e-05,
"loss": 1.8859,
"step": 885
},
{
"epoch": 3.44,
"learning_rate": 5.496186660566721e-05,
"loss": 1.8386,
"step": 890
},
{
"epoch": 3.46,
"learning_rate": 5.906188091190809e-05,
"loss": 1.7457,
"step": 895
},
{
"epoch": 3.47,
"learning_rate": 6.3196967969987e-05,
"loss": 1.8338,
"step": 900
},
{
"epoch": 3.49,
"learning_rate": 6.73519225931287e-05,
"loss": 1.9389,
"step": 905
},
{
"epoch": 3.51,
"learning_rate": 7.151146653925576e-05,
"loss": 1.8629,
"step": 910
},
{
"epoch": 3.53,
"learning_rate": 7.566030469082582e-05,
"loss": 1.8406,
"step": 915
},
{
"epoch": 3.55,
"learning_rate": 7.978318129672488e-05,
"loss": 1.8205,
"step": 920
},
{
"epoch": 3.57,
"learning_rate": 8.386493606940314e-05,
"loss": 1.9223,
"step": 925
},
{
"epoch": 3.59,
"learning_rate": 8.789055993098239e-05,
"loss": 1.9075,
"step": 930
},
{
"epoch": 3.61,
"learning_rate": 9.184525020334701e-05,
"loss": 1.9361,
"step": 935
},
{
"epoch": 3.63,
"learning_rate": 9.571446503927958e-05,
"loss": 1.9117,
"step": 940
},
{
"epoch": 3.65,
"learning_rate": 9.948397689449231e-05,
"loss": 1.8154,
"step": 945
},
{
"epoch": 3.67,
"learning_rate": 0.00010313992484393007,
"loss": 1.7656,
"step": 950
},
{
"epoch": 3.69,
"learning_rate": 0.00010666886554997237,
"loss": 1.8084,
"step": 955
},
{
"epoch": 3.71,
"learning_rate": 0.00011005782269512003,
"loss": 1.8921,
"step": 960
},
{
"epoch": 3.73,
"learning_rate": 0.00011329433469739388,
"loss": 1.8615,
"step": 965
},
{
"epoch": 3.75,
"learning_rate": 0.00011636650053299383,
"loss": 1.9111,
"step": 970
},
{
"epoch": 3.76,
"learning_rate": 0.00011926302349772043,
"loss": 1.8464,
"step": 975
},
{
"epoch": 3.78,
"learning_rate": 0.00012197325274624493,
"loss": 1.8594,
"step": 980
},
{
"epoch": 3.8,
"learning_rate": 0.00012448722245648227,
"loss": 1.9001,
"step": 985
},
{
"epoch": 3.82,
"learning_rate": 0.00012679568847505558,
"loss": 1.9249,
"step": 990
},
{
"epoch": 3.84,
"learning_rate": 0.00012889016230910322,
"loss": 1.8819,
"step": 995
},
{
"epoch": 3.86,
"learning_rate": 0.00013076294233943414,
"loss": 1.8397,
"step": 1000
},
{
"epoch": 3.88,
"learning_rate": 0.00013240714214026114,
"loss": 1.8645,
"step": 1005
},
{
"epoch": 3.9,
"learning_rate": 0.00013381671580137337,
"loss": 1.8523,
"step": 1010
},
{
"epoch": 3.92,
"learning_rate": 0.00013498648015963801,
"loss": 1.8243,
"step": 1015
},
{
"epoch": 3.94,
"learning_rate": 0.00013591213385808236,
"loss": 1.7541,
"step": 1020
},
{
"epoch": 3.96,
"learning_rate": 0.00013659027316247397,
"loss": 1.8299,
"step": 1025
},
{
"epoch": 3.98,
"learning_rate": 0.00013701840447723958,
"loss": 1.8042,
"step": 1030
},
{
"epoch": 4.0,
"learning_rate": 0.00013719495351470075,
"loss": 1.8895,
"step": 1035
},
{
"epoch": 4.02,
"learning_rate": 0.00013711927108390887,
"loss": 1.7727,
"step": 1040
},
{
"epoch": 4.03,
"learning_rate": 0.00013679163547779458,
"loss": 1.7147,
"step": 1045
},
{
"epoch": 4.05,
"learning_rate": 0.0001362132514498528,
"loss": 1.7369,
"step": 1050
},
{
"epoch": 4.07,
"learning_rate": 0.00013538624578412684,
"loss": 1.8461,
"step": 1055
},
{
"epoch": 4.09,
"learning_rate": 0.00013431365947478064,
"loss": 1.8544,
"step": 1060
},
{
"epoch": 4.11,
"learning_rate": 0.00013299943654401656,
"loss": 1.7556,
"step": 1065
},
{
"epoch": 4.13,
"learning_rate": 0.00013144840953945602,
"loss": 1.8978,
"step": 1070
},
{
"epoch": 4.15,
"learning_rate": 0.00012966628176431033,
"loss": 1.7574,
"step": 1075
},
{
"epoch": 4.17,
"learning_rate": 0.00012765960630568425,
"loss": 1.8181,
"step": 1080
},
{
"epoch": 4.19,
"learning_rate": 0.00012543576193812755,
"loss": 1.8873,
"step": 1085
},
{
"epoch": 4.21,
"learning_rate": 0.00012300292599103934,
"loss": 1.8158,
"step": 1090
},
{
"epoch": 4.23,
"learning_rate": 0.00012037004427969473,
"loss": 1.7751,
"step": 1095
},
{
"epoch": 4.25,
"learning_rate": 0.00011754679821046194,
"loss": 1.8045,
"step": 1100
},
{
"epoch": 4.27,
"learning_rate": 0.00011454356918116694,
"loss": 1.8204,
"step": 1105
},
{
"epoch": 4.29,
"learning_rate": 0.00011137140040750914,
"loss": 1.7895,
"step": 1110
},
{
"epoch": 4.31,
"learning_rate": 0.00010804195631589795,
"loss": 1.8798,
"step": 1115
},
{
"epoch": 4.32,
"learning_rate": 0.0001045674796520261,
"loss": 1.8371,
"step": 1120
},
{
"epoch": 4.34,
"learning_rate": 0.00010096074646289774,
"loss": 1.8113,
"step": 1125
},
{
"epoch": 4.36,
"learning_rate": 9.72350191178458e-05,
"loss": 1.7679,
"step": 1130
},
{
"epoch": 4.38,
"learning_rate": 9.340399754128733e-05,
"loss": 1.8109,
"step": 1135
},
{
"epoch": 4.4,
"learning_rate": 8.948176883653924e-05,
"loss": 1.791,
"step": 1140
},
{
"epoch": 4.42,
"learning_rate": 8.548275548593188e-05,
"loss": 1.7472,
"step": 1145
},
{
"epoch": 4.44,
"learning_rate": 8.14216623176968e-05,
"loss": 1.7982,
"step": 1150
},
{
"epoch": 4.46,
"learning_rate": 7.731342243463583e-05,
"loss": 1.8122,
"step": 1155
},
{
"epoch": 4.48,
"learning_rate": 7.317314230339972e-05,
"loss": 1.8021,
"step": 1160
},
{
"epoch": 4.5,
"learning_rate": 6.901604620628534e-05,
"loss": 1.767,
"step": 1165
},
{
"epoch": 4.52,
"learning_rate": 6.485742025981452e-05,
"loss": 1.773,
"step": 1170
},
{
"epoch": 4.54,
"learning_rate": 6.0712556205940305e-05,
"loss": 1.7419,
"step": 1175
},
{
"epoch": 4.56,
"learning_rate": 5.6596695182566174e-05,
"loss": 1.7812,
"step": 1180
},
{
"epoch": 4.58,
"learning_rate": 5.2524971680144414e-05,
"loss": 1.7998,
"step": 1185
},
{
"epoch": 4.59,
"learning_rate": 4.8512357890428636e-05,
"loss": 1.8107,
"step": 1190
},
{
"epoch": 4.61,
"learning_rate": 4.4573608652016233e-05,
"loss": 1.8343,
"step": 1195
},
{
"epoch": 4.63,
"learning_rate": 4.0723207195124294e-05,
"loss": 1.7872,
"step": 1200
},
{
"epoch": 4.65,
"learning_rate": 3.697531188509992e-05,
"loss": 1.8365,
"step": 1205
},
{
"epoch": 4.67,
"learning_rate": 3.334370416049629e-05,
"loss": 1.7498,
"step": 1210
},
{
"epoch": 4.69,
"learning_rate": 2.9841737857150516e-05,
"loss": 1.7864,
"step": 1215
},
{
"epoch": 4.71,
"learning_rate": 2.6482290104606358e-05,
"loss": 1.7891,
"step": 1220
},
{
"epoch": 4.73,
"learning_rate": 2.327771397544045e-05,
"loss": 1.821,
"step": 1225
},
{
"epoch": 4.75,
"learning_rate": 2.0239793061604753e-05,
"loss": 1.6957,
"step": 1230
},
{
"epoch": 4.77,
"learning_rate": 1.7379698144815295e-05,
"loss": 1.7592,
"step": 1235
},
{
"epoch": 4.79,
"learning_rate": 1.4707946120313422e-05,
"loss": 1.7464,
"step": 1240
},
{
"epoch": 4.81,
"learning_rate": 1.2234361325042733e-05,
"loss": 1.7876,
"step": 1245
},
{
"epoch": 4.83,
"learning_rate": 9.968039412441069e-06,
"loss": 1.7211,
"step": 1250
},
{
"epoch": 4.85,
"learning_rate": 7.917313906685515e-06,
"loss": 1.7474,
"step": 1255
},
{
"epoch": 4.86,
"learning_rate": 6.089725559373884e-06,
"loss": 1.7813,
"step": 1260
},
{
"epoch": 4.88,
"learning_rate": 4.491994621320209e-06,
"loss": 1.7099,
"step": 1265
},
{
"epoch": 4.9,
"learning_rate": 3.1299961314264275e-06,
"loss": 1.697,
"step": 1270
},
{
"epoch": 4.92,
"learning_rate": 2.0087383134942665e-06,
"loss": 1.8274,
"step": 1275
},
{
"epoch": 4.94,
"learning_rate": 1.1323441604147912e-06,
"loss": 1.8383,
"step": 1280
},
{
"epoch": 4.96,
"learning_rate": 5.040362734534388e-07,
"loss": 1.8429,
"step": 1285
},
{
"epoch": 4.98,
"learning_rate": 1.2612501237755182e-07,
"loss": 1.7653,
"step": 1290
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 1.7624,
"step": 1295
},
{
"epoch": 5.02,
"learning_rate": 1.261250123775442e-07,
"loss": 1.6889,
"step": 1300
},
{
"epoch": 5.04,
"learning_rate": 5.040362734534236e-07,
"loss": 1.6715,
"step": 1305
},
{
"epoch": 5.06,
"learning_rate": 1.132344160414776e-06,
"loss": 1.7252,
"step": 1310
},
{
"epoch": 5.08,
"learning_rate": 2.0087383134942512e-06,
"loss": 1.7355,
"step": 1315
},
{
"epoch": 5.1,
"learning_rate": 3.1299961314264046e-06,
"loss": 1.8099,
"step": 1320
},
{
"epoch": 5.12,
"learning_rate": 4.491994621320179e-06,
"loss": 1.7203,
"step": 1325
},
{
"epoch": 5.14,
"learning_rate": 6.089725559373899e-06,
"loss": 1.7262,
"step": 1330
},
{
"epoch": 5.15,
"learning_rate": 7.917313906685478e-06,
"loss": 1.721,
"step": 1335
},
{
"epoch": 5.17,
"learning_rate": 9.968039412440962e-06,
"loss": 1.6592,
"step": 1340
},
{
"epoch": 5.19,
"learning_rate": 1.2234361325042687e-05,
"loss": 1.6871,
"step": 1345
},
{
"epoch": 5.21,
"learning_rate": 1.4707946120313293e-05,
"loss": 1.7319,
"step": 1350
},
{
"epoch": 5.23,
"learning_rate": 1.737969814481516e-05,
"loss": 1.7102,
"step": 1355
},
{
"epoch": 5.25,
"learning_rate": 2.0239793061604692e-05,
"loss": 1.6969,
"step": 1360
},
{
"epoch": 5.27,
"learning_rate": 2.3277713975440297e-05,
"loss": 1.6891,
"step": 1365
},
{
"epoch": 5.29,
"learning_rate": 2.648229010460629e-05,
"loss": 1.7584,
"step": 1370
},
{
"epoch": 5.31,
"learning_rate": 2.9841737857150448e-05,
"loss": 1.7074,
"step": 1375
},
{
"epoch": 5.33,
"learning_rate": 3.334370416049612e-05,
"loss": 1.6984,
"step": 1380
},
{
"epoch": 5.35,
"learning_rate": 3.697531188509984e-05,
"loss": 1.6734,
"step": 1385
},
{
"epoch": 5.37,
"learning_rate": 4.072320719512421e-05,
"loss": 1.6728,
"step": 1390
},
{
"epoch": 5.39,
"learning_rate": 4.457360865201626e-05,
"loss": 1.7748,
"step": 1395
},
{
"epoch": 5.41,
"learning_rate": 4.8512357890428555e-05,
"loss": 1.6899,
"step": 1400
},
{
"epoch": 5.42,
"learning_rate": 5.252497168014445e-05,
"loss": 1.6741,
"step": 1405
},
{
"epoch": 5.44,
"learning_rate": 5.659669518256621e-05,
"loss": 1.6965,
"step": 1410
},
{
"epoch": 5.46,
"learning_rate": 6.071255620594022e-05,
"loss": 1.6641,
"step": 1415
},
{
"epoch": 5.48,
"learning_rate": 6.485742025981456e-05,
"loss": 1.7494,
"step": 1420
},
{
"epoch": 5.5,
"learning_rate": 6.901604620628525e-05,
"loss": 1.7469,
"step": 1425
},
{
"epoch": 5.52,
"learning_rate": 7.31731423033995e-05,
"loss": 1.7503,
"step": 1430
},
{
"epoch": 5.54,
"learning_rate": 7.731342243463585e-05,
"loss": 1.7359,
"step": 1435
},
{
"epoch": 5.56,
"learning_rate": 8.14216623176967e-05,
"loss": 1.7196,
"step": 1440
},
{
"epoch": 5.58,
"learning_rate": 8.548275548593167e-05,
"loss": 1.7352,
"step": 1445
},
{
"epoch": 5.6,
"learning_rate": 8.948176883653917e-05,
"loss": 1.7017,
"step": 1450
},
{
"epoch": 5.62,
"learning_rate": 9.340399754128714e-05,
"loss": 1.7402,
"step": 1455
},
{
"epoch": 5.64,
"learning_rate": 9.723501911784583e-05,
"loss": 1.7463,
"step": 1460
},
{
"epoch": 5.66,
"learning_rate": 0.00010096074646289766,
"loss": 1.6842,
"step": 1465
},
{
"epoch": 5.68,
"learning_rate": 0.00010456747965202592,
"loss": 1.7311,
"step": 1470
},
{
"epoch": 5.69,
"learning_rate": 0.00010804195631589798,
"loss": 1.7528,
"step": 1475
},
{
"epoch": 5.71,
"learning_rate": 0.00011137140040750908,
"loss": 1.7338,
"step": 1480
},
{
"epoch": 5.73,
"learning_rate": 0.00011454356918116697,
"loss": 1.8454,
"step": 1485
},
{
"epoch": 5.75,
"learning_rate": 0.00011754679821046187,
"loss": 1.6556,
"step": 1490
},
{
"epoch": 5.77,
"learning_rate": 0.00012037004427969469,
"loss": 1.7088,
"step": 1495
},
{
"epoch": 5.79,
"learning_rate": 0.00012300292599103937,
"loss": 1.7949,
"step": 1500
},
{
"epoch": 5.81,
"learning_rate": 0.0001254357619381275,
"loss": 1.7168,
"step": 1505
},
{
"epoch": 5.83,
"learning_rate": 0.00012765960630568417,
"loss": 1.8008,
"step": 1510
},
{
"epoch": 5.85,
"learning_rate": 0.00012966628176431028,
"loss": 1.7668,
"step": 1515
},
{
"epoch": 5.87,
"learning_rate": 0.000131448409539456,
"loss": 1.8123,
"step": 1520
},
{
"epoch": 5.89,
"learning_rate": 0.00013299943654401658,
"loss": 1.6967,
"step": 1525
},
{
"epoch": 5.91,
"learning_rate": 0.0001343136594747806,
"loss": 1.7767,
"step": 1530
},
{
"epoch": 5.93,
"learning_rate": 0.00013538624578412676,
"loss": 1.8144,
"step": 1535
},
{
"epoch": 5.95,
"learning_rate": 0.0001362132514498528,
"loss": 1.7673,
"step": 1540
},
{
"epoch": 5.97,
"learning_rate": 0.00013679163547779458,
"loss": 1.7239,
"step": 1545
},
{
"epoch": 5.98,
"learning_rate": 0.00013711927108390887,
"loss": 1.7237,
"step": 1550
},
{
"epoch": 6.0,
"learning_rate": 0.00013719495351470075,
"loss": 1.8186,
"step": 1555
},
{
"epoch": 6.02,
"learning_rate": 0.0001370184044772396,
"loss": 1.7571,
"step": 1560
},
{
"epoch": 6.04,
"learning_rate": 0.00013659027316247397,
"loss": 1.6908,
"step": 1565
},
{
"epoch": 6.06,
"learning_rate": 0.00013591213385808238,
"loss": 1.6378,
"step": 1570
},
{
"epoch": 6.08,
"learning_rate": 0.0001349864801596381,
"loss": 1.763,
"step": 1575
},
{
"epoch": 6.1,
"learning_rate": 0.00013381671580137334,
"loss": 1.8114,
"step": 1580
},
{
"epoch": 6.12,
"learning_rate": 0.00013240714214026117,
"loss": 1.7691,
"step": 1585
},
{
"epoch": 6.14,
"learning_rate": 0.00013076294233943417,
"loss": 1.7388,
"step": 1590
},
{
"epoch": 6.16,
"learning_rate": 0.0001288901623091032,
"loss": 1.6458,
"step": 1595
},
{
"epoch": 6.18,
"learning_rate": 0.00012679568847505571,
"loss": 1.6852,
"step": 1600
},
{
"epoch": 6.2,
"learning_rate": 0.00012448722245648225,
"loss": 1.7267,
"step": 1605
},
{
"epoch": 6.22,
"learning_rate": 0.00012197325274624507,
"loss": 1.7517,
"step": 1610
},
{
"epoch": 6.24,
"learning_rate": 0.00011926302349772057,
"loss": 1.7343,
"step": 1615
},
{
"epoch": 6.25,
"learning_rate": 0.0001163665005329939,
"loss": 1.6811,
"step": 1620
},
{
"epoch": 6.27,
"learning_rate": 0.00011329433469739406,
"loss": 1.7056,
"step": 1625
},
{
"epoch": 6.29,
"learning_rate": 0.00011005782269511991,
"loss": 1.7447,
"step": 1630
},
{
"epoch": 6.31,
"learning_rate": 0.00010666886554997244,
"loss": 1.6661,
"step": 1635
},
{
"epoch": 6.33,
"learning_rate": 0.00010313992484393024,
"loss": 1.723,
"step": 1640
},
{
"epoch": 6.35,
"learning_rate": 9.948397689449228e-05,
"loss": 1.6887,
"step": 1645
},
{
"epoch": 6.37,
"learning_rate": 9.571446503927964e-05,
"loss": 1.6767,
"step": 1650
},
{
"epoch": 6.39,
"learning_rate": 9.184525020334699e-05,
"loss": 1.6593,
"step": 1655
},
{
"epoch": 6.41,
"learning_rate": 8.789055993098258e-05,
"loss": 1.6807,
"step": 1660
},
{
"epoch": 6.43,
"learning_rate": 8.386493606940322e-05,
"loss": 1.7043,
"step": 1665
},
{
"epoch": 6.45,
"learning_rate": 7.978318129672484e-05,
"loss": 1.7188,
"step": 1670
},
{
"epoch": 6.47,
"learning_rate": 7.566030469082603e-05,
"loss": 1.6494,
"step": 1675
},
{
"epoch": 6.49,
"learning_rate": 7.151146653925584e-05,
"loss": 1.6752,
"step": 1680
},
{
"epoch": 6.51,
"learning_rate": 6.735192259312878e-05,
"loss": 1.6569,
"step": 1685
},
{
"epoch": 6.53,
"learning_rate": 6.319696796998709e-05,
"loss": 1.6728,
"step": 1690
},
{
"epoch": 6.54,
"learning_rate": 5.906188091190817e-05,
"loss": 1.6875,
"step": 1695
},
{
"epoch": 6.56,
"learning_rate": 5.4961866605667284e-05,
"loss": 1.6511,
"step": 1700
},
{
"epoch": 6.58,
"learning_rate": 5.091200127153063e-05,
"loss": 1.6906,
"step": 1705
},
{
"epoch": 6.6,
"learning_rate": 4.6927176726273094e-05,
"loss": 1.6586,
"step": 1710
},
{
"epoch": 6.62,
"learning_rate": 4.302204562427082e-05,
"loss": 1.6804,
"step": 1715
},
{
"epoch": 6.64,
"learning_rate": 3.921096757801896e-05,
"loss": 1.6353,
"step": 1720
},
{
"epoch": 6.66,
"learning_rate": 3.550795635619796e-05,
"loss": 1.6895,
"step": 1725
},
{
"epoch": 6.68,
"learning_rate": 3.192662835344908e-05,
"loss": 1.7331,
"step": 1730
},
{
"epoch": 6.7,
"learning_rate": 2.8480152521337155e-05,
"loss": 1.6892,
"step": 1735
},
{
"epoch": 6.72,
"learning_rate": 2.51812019446141e-05,
"loss": 1.6841,
"step": 1740
},
{
"epoch": 6.74,
"learning_rate": 2.2041907240840133e-05,
"loss": 1.7096,
"step": 1745
},
{
"epoch": 6.76,
"learning_rate": 1.907381195471957e-05,
"loss": 1.7284,
"step": 1750
},
{
"epoch": 6.78,
"learning_rate": 1.6287830111171488e-05,
"loss": 1.6272,
"step": 1755
},
{
"epoch": 6.8,
"learning_rate": 1.3694206083212888e-05,
"loss": 1.5783,
"step": 1760
},
{
"epoch": 6.81,
"learning_rate": 1.1302476922232546e-05,
"loss": 1.5607,
"step": 1765
},
{
"epoch": 6.83,
"learning_rate": 9.121437289164463e-06,
"loss": 1.6762,
"step": 1770
},
{
"epoch": 6.85,
"learning_rate": 7.159107115516178e-06,
"loss": 1.6488,
"step": 1775
},
{
"epoch": 6.87,
"learning_rate": 5.422702113166627e-06,
"loss": 1.6201,
"step": 1780
},
{
"epoch": 6.89,
"learning_rate": 3.918607241369662e-06,
"loss": 1.7022,
"step": 1785
},
{
"epoch": 6.91,
"learning_rate": 2.65235322853129e-06,
"loss": 1.632,
"step": 1790
},
{
"epoch": 6.93,
"learning_rate": 1.6285962350901147e-06,
"loss": 1.6661,
"step": 1795
},
{
"epoch": 6.95,
"learning_rate": 8.511007322841488e-07,
"loss": 1.6079,
"step": 1800
},
{
"epoch": 6.97,
"learning_rate": 3.2272565976125165e-07,
"loss": 1.6758,
"step": 1805
},
{
"epoch": 6.99,
"learning_rate": 4.541391293127461e-08,
"loss": 1.6987,
"step": 1810
},
{
"epoch": 7.01,
"learning_rate": 2.018519871846962e-08,
"loss": 1.5689,
"step": 1815
},
{
"epoch": 7.03,
"learning_rate": 2.471322859826806e-07,
"loss": 1.5376,
"step": 1820
},
{
"epoch": 7.05,
"learning_rate": 7.254206643976737e-07,
"loss": 1.6088,
"step": 1825
},
{
"epoch": 7.07,
"learning_rate": 1.4532916130407314e-06,
"loss": 1.6132,
"step": 1830
},
{
"epoch": 7.08,
"learning_rate": 2.4280686674102744e-06,
"loss": 1.6823,
"step": 1835
},
{
"epoch": 7.1,
"learning_rate": 3.6461674610908866e-06,
"loss": 1.5818,
"step": 1840
},
{
"epoch": 7.12,
"learning_rate": 5.103108905877507e-06,
"loss": 1.5745,
"step": 1845
},
{
"epoch": 7.14,
"learning_rate": 6.793535661894024e-06,
"loss": 1.5783,
"step": 1850
},
{
"epoch": 7.16,
"learning_rate": 8.7112318371425e-06,
"loss": 1.6224,
"step": 1855
},
{
"epoch": 7.18,
"learning_rate": 1.0849145844047318e-05,
"loss": 1.6016,
"step": 1860
},
{
"epoch": 7.2,
"learning_rate": 1.3199416328947412e-05,
"loss": 1.5826,
"step": 1865
},
{
"epoch": 7.22,
"learning_rate": 1.5753401079189635e-05,
"loss": 1.6424,
"step": 1870
},
{
"epoch": 7.24,
"learning_rate": 1.8501708801530793e-05,
"loss": 1.5944,
"step": 1875
},
{
"epoch": 7.26,
"learning_rate": 2.1434233654994585e-05,
"loss": 1.5702,
"step": 1880
},
{
"epoch": 7.28,
"learning_rate": 2.454019241120068e-05,
"loss": 1.5819,
"step": 1885
},
{
"epoch": 7.3,
"learning_rate": 2.780816410552581e-05,
"loss": 1.5461,
"step": 1890
},
{
"epoch": 7.32,
"learning_rate": 3.12261320332941e-05,
"loss": 1.6276,
"step": 1895
},
{
"epoch": 7.34,
"learning_rate": 3.4781527936569615e-05,
"loss": 1.6333,
"step": 1900
},
{
"epoch": 7.36,
"learning_rate": 3.8461278219075155e-05,
"loss": 1.5744,
"step": 1905
},
{
"epoch": 7.37,
"learning_rate": 4.2251852019296586e-05,
"loss": 1.601,
"step": 1910
},
{
"epoch": 7.39,
"learning_rate": 4.6139310965004655e-05,
"loss": 1.4994,
"step": 1915
},
{
"epoch": 7.41,
"learning_rate": 5.010936042623934e-05,
"loss": 1.5667,
"step": 1920
},
{
"epoch": 7.43,
"learning_rate": 5.4147402078293086e-05,
"loss": 1.6055,
"step": 1925
},
{
"epoch": 7.45,
"learning_rate": 5.823858758141886e-05,
"loss": 1.6403,
"step": 1930
},
{
"epoch": 7.47,
"learning_rate": 6.236787317986658e-05,
"loss": 1.5103,
"step": 1935
},
{
"epoch": 7.49,
"learning_rate": 6.65200750194898e-05,
"loss": 1.5918,
"step": 1940
},
{
"epoch": 7.51,
"learning_rate": 7.067992498051008e-05,
"loss": 1.5905,
"step": 1945
},
{
"epoch": 7.53,
"learning_rate": 7.48321268201333e-05,
"loss": 1.6157,
"step": 1950
},
{
"epoch": 7.55,
"learning_rate": 7.896141241858101e-05,
"loss": 1.5963,
"step": 1955
},
{
"epoch": 7.57,
"learning_rate": 8.305259792170679e-05,
"loss": 1.586,
"step": 1960
},
{
"epoch": 7.59,
"learning_rate": 8.709063957376054e-05,
"loss": 1.6324,
"step": 1965
},
{
"epoch": 7.61,
"learning_rate": 9.106068903499522e-05,
"loss": 1.6232,
"step": 1970
},
{
"epoch": 7.63,
"learning_rate": 9.494814798070329e-05,
"loss": 1.6404,
"step": 1975
},
{
"epoch": 7.64,
"learning_rate": 9.873872178092473e-05,
"loss": 1.6215,
"step": 1980
},
{
"epoch": 7.66,
"learning_rate": 0.00010241847206343028,
"loss": 1.617,
"step": 1985
},
{
"epoch": 7.68,
"learning_rate": 0.0001059738679667058,
"loss": 1.6565,
"step": 1990
},
{
"epoch": 7.7,
"learning_rate": 0.0001093918358944741,
"loss": 1.7342,
"step": 1995
},
{
"epoch": 7.72,
"learning_rate": 0.00011265980758879924,
"loss": 1.6063,
"step": 2000
},
{
"epoch": 7.74,
"learning_rate": 0.00011576576634500532,
"loss": 1.6993,
"step": 2005
},
{
"epoch": 7.76,
"learning_rate": 0.00011869829119846911,
"loss": 1.6355,
"step": 2010
},
{
"epoch": 7.78,
"learning_rate": 0.00012144659892081027,
"loss": 1.6087,
"step": 2015
},
{
"epoch": 7.8,
"learning_rate": 0.00012400058367105252,
"loss": 1.6668,
"step": 2020
},
{
"epoch": 7.82,
"learning_rate": 0.00012635085415595263,
"loss": 1.7275,
"step": 2025
},
{
"epoch": 7.84,
"learning_rate": 0.00012848876816285744,
"loss": 1.6637,
"step": 2030
},
{
"epoch": 7.86,
"learning_rate": 0.00013040646433810593,
"loss": 1.6713,
"step": 2035
},
{
"epoch": 7.88,
"learning_rate": 0.00013209689109412246,
"loss": 1.6358,
"step": 2040
},
{
"epoch": 7.9,
"learning_rate": 0.00013355383253890908,
"loss": 1.6572,
"step": 2045
},
{
"epoch": 7.92,
"learning_rate": 0.0001347719313325897,
"loss": 1.6781,
"step": 2050
},
{
"epoch": 7.93,
"learning_rate": 0.00013574670838695924,
"loss": 1.6401,
"step": 2055
},
{
"epoch": 7.95,
"learning_rate": 0.0001364745793356023,
"loss": 1.673,
"step": 2060
},
{
"epoch": 7.97,
"learning_rate": 0.0001369528677140173,
"loss": 1.7179,
"step": 2065
},
{
"epoch": 7.99,
"learning_rate": 0.00013717981480128154,
"loss": 1.7015,
"step": 2070
},
{
"epoch": 8.01,
"learning_rate": 0.00013715458608706872,
"loss": 1.6596,
"step": 2075
},
{
"epoch": 8.03,
"learning_rate": 0.00013687727434023877,
"loss": 1.6462,
"step": 2080
},
{
"epoch": 8.05,
"learning_rate": 0.00013634889926771588,
"loss": 1.6041,
"step": 2085
},
{
"epoch": 8.07,
"learning_rate": 0.00013557140376490998,
"loss": 1.5571,
"step": 2090
},
{
"epoch": 8.09,
"learning_rate": 0.00013454764677146882,
"loss": 1.5541,
"step": 2095
},
{
"epoch": 8.11,
"learning_rate": 0.00013328139275863037,
"loss": 1.6773,
"step": 2100
},
{
"epoch": 8.13,
"learning_rate": 0.00013177729788683341,
"loss": 1.6003,
"step": 2105
},
{
"epoch": 8.15,
"learning_rate": 0.00013004089288448387,
"loss": 1.5288,
"step": 2110
},
{
"epoch": 8.17,
"learning_rate": 0.0001280785627108356,
"loss": 1.6059,
"step": 2115
},
{
"epoch": 8.19,
"learning_rate": 0.00012589752307776752,
"loss": 1.6265,
"step": 2120
},
{
"epoch": 8.2,
"learning_rate": 0.0001235057939167872,
"loss": 1.6372,
"step": 2125
},
{
"epoch": 8.22,
"learning_rate": 0.00012091216988882845,
"loss": 1.7324,
"step": 2130
},
{
"epoch": 8.24,
"learning_rate": 0.00011812618804528034,
"loss": 1.5938,
"step": 2135
},
{
"epoch": 8.26,
"learning_rate": 0.00011515809275915997,
"loss": 1.5805,
"step": 2140
},
{
"epoch": 8.28,
"learning_rate": 0.00011201879805538599,
"loss": 1.62,
"step": 2145
},
{
"epoch": 8.3,
"learning_rate": 0.00010871984747866294,
"loss": 1.5884,
"step": 2150
},
{
"epoch": 8.32,
"learning_rate": 0.00010527337164655102,
"loss": 1.576,
"step": 2155
},
{
"epoch": 8.34,
"learning_rate": 0.00010169204364380236,
"loss": 1.5746,
"step": 2160
},
{
"epoch": 8.36,
"learning_rate": 9.798903242198116e-05,
"loss": 1.6259,
"step": 2165
},
{
"epoch": 8.38,
"learning_rate": 9.417795437572906e-05,
"loss": 1.6047,
"step": 2170
},
{
"epoch": 8.4,
"learning_rate": 9.027282327372703e-05,
"loss": 1.5615,
"step": 2175
},
{
"epoch": 8.42,
"learning_rate": 8.628799872846948e-05,
"loss": 1.6294,
"step": 2180
},
{
"epoch": 8.44,
"learning_rate": 8.223813339433283e-05,
"loss": 1.5485,
"step": 2185
},
{
"epoch": 8.46,
"learning_rate": 7.813811908809194e-05,
"loss": 1.5291,
"step": 2190
},
{
"epoch": 8.47,
"learning_rate": 7.400303203001327e-05,
"loss": 1.6178,
"step": 2195
},
{
"epoch": 8.49,
"learning_rate": 6.98480774068711e-05,
"loss": 1.6335,
"step": 2200
},
{
"epoch": 8.51,
"learning_rate": 6.568853346074429e-05,
"loss": 1.5607,
"step": 2205
},
{
"epoch": 8.53,
"learning_rate": 6.15396953091741e-05,
"loss": 1.552,
"step": 2210
},
{
"epoch": 8.55,
"learning_rate": 5.741681870327528e-05,
"loss": 1.6358,
"step": 2215
},
{
"epoch": 8.57,
"learning_rate": 5.33350639305969e-05,
"loss": 1.6499,
"step": 2220
},
{
"epoch": 8.59,
"learning_rate": 4.930944006901777e-05,
"loss": 1.5632,
"step": 2225
},
{
"epoch": 8.61,
"learning_rate": 4.535474979665314e-05,
"loss": 1.5825,
"step": 2230
},
{
"epoch": 8.63,
"learning_rate": 4.148553496072023e-05,
"loss": 1.6277,
"step": 2235
},
{
"epoch": 8.65,
"learning_rate": 3.7716023105507615e-05,
"loss": 1.5497,
"step": 2240
},
{
"epoch": 8.67,
"learning_rate": 3.406007515606987e-05,
"loss": 1.5159,
"step": 2245
},
{
"epoch": 8.69,
"learning_rate": 3.0531134450027666e-05,
"loss": 1.5683,
"step": 2250
},
{
"epoch": 8.71,
"learning_rate": 2.7142177304880198e-05,
"loss": 1.5193,
"step": 2255
},
{
"epoch": 8.73,
"learning_rate": 2.390566530260624e-05,
"loss": 1.6145,
"step": 2260
},
{
"epoch": 8.75,
"learning_rate": 2.0833499467006378e-05,
"loss": 1.5854,
"step": 2265
},
{
"epoch": 8.76,
"learning_rate": 1.7936976502279525e-05,
"loss": 1.5426,
"step": 2270
},
{
"epoch": 8.78,
"learning_rate": 1.5226747253755011e-05,
"loss": 1.5862,
"step": 2275
},
{
"epoch": 8.8,
"learning_rate": 1.2712777543517822e-05,
"loss": 1.5478,
"step": 2280
},
{
"epoch": 8.82,
"learning_rate": 1.0404311524944368e-05,
"loss": 1.6329,
"step": 2285
},
{
"epoch": 8.84,
"learning_rate": 8.309837690896873e-06,
"loss": 1.535,
"step": 2290
},
{
"epoch": 8.86,
"learning_rate": 6.43705766056588e-06,
"loss": 1.5849,
"step": 2295
},
{
"epoch": 8.88,
"learning_rate": 4.792857859738948e-06,
"loss": 1.6253,
"step": 2300
},
{
"epoch": 8.9,
"learning_rate": 3.3832841986266175e-06,
"loss": 1.537,
"step": 2305
},
{
"epoch": 8.92,
"learning_rate": 2.213519840361947e-06,
"loss": 1.5028,
"step": 2310
},
{
"epoch": 8.94,
"learning_rate": 1.2878661419176351e-06,
"loss": 1.5904,
"step": 2315
},
{
"epoch": 8.96,
"learning_rate": 6.097268375260679e-07,
"loss": 1.6009,
"step": 2320
},
{
"epoch": 8.98,
"learning_rate": 1.8159552276040752e-07,
"loss": 1.5465,
"step": 2325
},
{
"epoch": 9.0,
"learning_rate": 5.046485299251069e-09,
"loss": 1.5575,
"step": 2330
},
{
"epoch": 9.02,
"learning_rate": 8.072891609113784e-08,
"loss": 1.5791,
"step": 2335
},
{
"epoch": 9.03,
"learning_rate": 4.0836452220544814e-07,
"loss": 1.4865,
"step": 2340
},
{
"epoch": 9.05,
"learning_rate": 9.867485501471922e-07,
"loss": 1.5316,
"step": 2345
},
{
"epoch": 9.07,
"learning_rate": 1.813754215873199e-06,
"loss": 1.5403,
"step": 2350
},
{
"epoch": 9.09,
"learning_rate": 2.8863405252193584e-06,
"loss": 1.4183,
"step": 2355
},
{
"epoch": 9.11,
"learning_rate": 4.200563455983382e-06,
"loss": 1.5547,
"step": 2360
},
{
"epoch": 9.13,
"learning_rate": 5.75159046054386e-06,
"loss": 1.5414,
"step": 2365
},
{
"epoch": 9.15,
"learning_rate": 7.5337182356897725e-06,
"loss": 1.5445,
"step": 2370
},
{
"epoch": 9.17,
"learning_rate": 9.540393694315775e-06,
"loss": 1.4539,
"step": 2375
},
{
"epoch": 9.19,
"learning_rate": 1.1764238061872442e-05,
"loss": 1.4992,
"step": 2380
},
{
"epoch": 9.21,
"learning_rate": 1.4197074008960564e-05,
"loss": 1.5203,
"step": 2385
},
{
"epoch": 9.23,
"learning_rate": 1.6829955720305234e-05,
"loss": 1.4989,
"step": 2390
},
{
"epoch": 9.25,
"learning_rate": 1.965320178953787e-05,
"loss": 1.5128,
"step": 2395
},
{
"epoch": 9.27,
"learning_rate": 2.265643081883295e-05,
"loss": 1.5033,
"step": 2400
},
{
"epoch": 9.29,
"learning_rate": 2.582859959249101e-05,
"loss": 1.4938,
"step": 2405
},
{
"epoch": 9.31,
"learning_rate": 2.915804368410211e-05,
"loss": 1.5157,
"step": 2410
},
{
"epoch": 9.32,
"learning_rate": 3.2632520347973973e-05,
"loss": 1.4103,
"step": 2415
},
{
"epoch": 9.34,
"learning_rate": 3.623925353710222e-05,
"loss": 1.524,
"step": 2420
},
{
"epoch": 9.36,
"learning_rate": 3.996498088215406e-05,
"loss": 1.5389,
"step": 2425
},
{
"epoch": 9.38,
"learning_rate": 4.3796002458712527e-05,
"loss": 1.5645,
"step": 2430
},
{
"epoch": 9.4,
"learning_rate": 4.7718231163460484e-05,
"loss": 1.5511,
"step": 2435
},
{
"epoch": 9.42,
"learning_rate": 5.1717244514068206e-05,
"loss": 1.5406,
"step": 2440
},
{
"epoch": 9.44,
"learning_rate": 5.57783376823034e-05,
"loss": 1.567,
"step": 2445
},
{
"epoch": 9.46,
"learning_rate": 5.988657756536402e-05,
"loss": 1.602,
"step": 2450
},
{
"epoch": 9.48,
"learning_rate": 6.402685769660036e-05,
"loss": 1.4789,
"step": 2455
},
{
"epoch": 9.5,
"learning_rate": 6.818395379371463e-05,
"loss": 1.5673,
"step": 2460
},
{
"epoch": 9.52,
"learning_rate": 7.234257974018531e-05,
"loss": 1.5527,
"step": 2465
},
{
"epoch": 9.54,
"learning_rate": 7.64874437940594e-05,
"loss": 1.4721,
"step": 2470
},
{
"epoch": 9.56,
"learning_rate": 8.060330481743391e-05,
"loss": 1.4447,
"step": 2475
},
{
"epoch": 9.58,
"learning_rate": 8.467502831985544e-05,
"loss": 1.5768,
"step": 2480
},
{
"epoch": 9.59,
"learning_rate": 8.868764210957132e-05,
"loss": 1.4808,
"step": 2485
},
{
"epoch": 9.61,
"learning_rate": 9.262639134798362e-05,
"loss": 1.4197,
"step": 2490
},
{
"epoch": 9.63,
"learning_rate": 9.647679280487567e-05,
"loss": 1.6109,
"step": 2495
},
{
"epoch": 9.65,
"learning_rate": 0.00010022468811489983,
"loss": 1.5907,
"step": 2500
},
{
"epoch": 9.67,
"learning_rate": 0.00010385629583950378,
"loss": 1.5902,
"step": 2505
},
{
"epoch": 9.69,
"learning_rate": 0.00010735826214284965,
"loss": 1.6053,
"step": 2510
},
{
"epoch": 9.71,
"learning_rate": 0.00011071770989539361,
"loss": 1.552,
"step": 2515
},
{
"epoch": 9.73,
"learning_rate": 0.00011392228602455961,
"loss": 1.5787,
"step": 2520
},
{
"epoch": 9.75,
"learning_rate": 0.00011696020693839523,
"loss": 1.4997,
"step": 2525
},
{
"epoch": 9.77,
"learning_rate": 0.00011982030185518476,
"loss": 1.6354,
"step": 2530
},
{
"epoch": 9.79,
"learning_rate": 0.00012249205387968647,
"loss": 1.586,
"step": 2535
},
{
"epoch": 9.81,
"learning_rate": 0.0001249656386749574,
"loss": 1.511,
"step": 2540
},
{
"epoch": 9.83,
"learning_rate": 0.000127231960587559,
"loss": 1.5002,
"step": 2545
},
{
"epoch": 9.85,
"learning_rate": 0.00012928268609331444,
"loss": 1.5829,
"step": 2550
},
{
"epoch": 9.86,
"learning_rate": 0.00013111027444062605,
"loss": 1.6407,
"step": 2555
},
{
"epoch": 9.88,
"learning_rate": 0.00013270800537867978,
"loss": 1.5058,
"step": 2560
},
{
"epoch": 9.9,
"learning_rate": 0.00013407000386857348,
"loss": 1.4854,
"step": 2565
},
{
"epoch": 9.92,
"learning_rate": 0.0001351912616865057,
"loss": 1.4912,
"step": 2570
},
{
"epoch": 9.94,
"learning_rate": 0.00013606765583958525,
"loss": 1.5218,
"step": 2575
},
{
"epoch": 9.96,
"learning_rate": 0.00013669596372654658,
"loss": 1.5828,
"step": 2580
},
{
"epoch": 9.98,
"learning_rate": 0.00013707387498762246,
"loss": 1.5816,
"step": 2585
},
{
"epoch": 10.0,
"learning_rate": 0.0001372,
"loss": 1.4944,
"step": 2590
},
{
"epoch": 10.02,
"learning_rate": 0.00013707387498762246,
"loss": 1.5296,
"step": 2595
},
{
"epoch": 10.04,
"learning_rate": 0.0001366959637265466,
"loss": 1.5509,
"step": 2600
},
{
"epoch": 10.06,
"learning_rate": 0.00013606765583958527,
"loss": 1.5305,
"step": 2605
},
{
"epoch": 10.08,
"learning_rate": 0.00013519126168650574,
"loss": 1.4872,
"step": 2610
},
{
"epoch": 10.1,
"learning_rate": 0.00013407000386857353,
"loss": 1.544,
"step": 2615
},
{
"epoch": 10.12,
"learning_rate": 0.00013270800537867983,
"loss": 1.5421,
"step": 2620
},
{
"epoch": 10.14,
"learning_rate": 0.0001311102744406261,
"loss": 1.5468,
"step": 2625
},
{
"epoch": 10.15,
"learning_rate": 0.00012928268609331455,
"loss": 1.5529,
"step": 2630
},
{
"epoch": 10.17,
"learning_rate": 0.00012723196058755907,
"loss": 1.5357,
"step": 2635
},
{
"epoch": 10.19,
"learning_rate": 0.00012496563867495748,
"loss": 1.5077,
"step": 2640
},
{
"epoch": 10.21,
"learning_rate": 0.00012249205387968658,
"loss": 1.6099,
"step": 2645
},
{
"epoch": 10.23,
"learning_rate": 0.00011982030185518488,
"loss": 1.5701,
"step": 2650
},
{
"epoch": 10.25,
"learning_rate": 0.0001169602069383955,
"loss": 1.5479,
"step": 2655
},
{
"epoch": 10.27,
"learning_rate": 0.00011392228602455956,
"loss": 1.5486,
"step": 2660
},
{
"epoch": 10.29,
"learning_rate": 0.00011071770989539373,
"loss": 1.5367,
"step": 2665
},
{
"epoch": 10.31,
"learning_rate": 0.0001073582621428498,
"loss": 1.5361,
"step": 2670
},
{
"epoch": 10.33,
"learning_rate": 0.00010385629583950413,
"loss": 1.3755,
"step": 2675
},
{
"epoch": 10.35,
"learning_rate": 0.00010022468811490019,
"loss": 1.464,
"step": 2680
},
{
"epoch": 10.37,
"learning_rate": 9.64767928048756e-05,
"loss": 1.5142,
"step": 2685
},
{
"epoch": 10.39,
"learning_rate": 9.262639134798378e-05,
"loss": 1.5207,
"step": 2690
},
{
"epoch": 10.41,
"learning_rate": 8.868764210957149e-05,
"loss": 1.5357,
"step": 2695
},
{
"epoch": 10.42,
"learning_rate": 8.467502831985583e-05,
"loss": 1.454,
"step": 2700
},
{
"epoch": 10.44,
"learning_rate": 8.06033048174343e-05,
"loss": 1.5096,
"step": 2705
},
{
"epoch": 10.46,
"learning_rate": 7.648744379405981e-05,
"loss": 1.5628,
"step": 2710
},
{
"epoch": 10.48,
"learning_rate": 7.234257974018524e-05,
"loss": 1.468,
"step": 2715
},
{
"epoch": 10.5,
"learning_rate": 6.818395379371479e-05,
"loss": 1.4858,
"step": 2720
},
{
"epoch": 10.52,
"learning_rate": 6.402685769660054e-05,
"loss": 1.4885,
"step": 2725
},
{
"epoch": 10.54,
"learning_rate": 5.988657756536443e-05,
"loss": 1.4577,
"step": 2730
},
{
"epoch": 10.56,
"learning_rate": 5.577833768230333e-05,
"loss": 1.5513,
"step": 2735
},
{
"epoch": 10.58,
"learning_rate": 5.171724451406837e-05,
"loss": 1.4957,
"step": 2740
},
{
"epoch": 10.6,
"learning_rate": 4.7718231163460647e-05,
"loss": 1.5075,
"step": 2745
},
{
"epoch": 10.62,
"learning_rate": 4.379600245871268e-05,
"loss": 1.448,
"step": 2750
},
{
"epoch": 10.64,
"learning_rate": 3.996498088215443e-05,
"loss": 1.53,
"step": 2755
},
{
"epoch": 10.66,
"learning_rate": 3.623925353710258e-05,
"loss": 1.5454,
"step": 2760
},
{
"epoch": 10.68,
"learning_rate": 3.2632520347973906e-05,
"loss": 1.5139,
"step": 2765
},
{
"epoch": 10.69,
"learning_rate": 2.915804368410225e-05,
"loss": 1.5848,
"step": 2770
},
{
"epoch": 10.71,
"learning_rate": 2.5828599592491143e-05,
"loss": 1.514,
"step": 2775
},
{
"epoch": 10.73,
"learning_rate": 2.2656430818833073e-05,
"loss": 1.4666,
"step": 2780
},
{
"epoch": 10.75,
"learning_rate": 1.965320178953816e-05,
"loss": 1.4546,
"step": 2785
},
{
"epoch": 10.77,
"learning_rate": 1.682995572030518e-05,
"loss": 1.4552,
"step": 2790
},
{
"epoch": 10.79,
"learning_rate": 1.4197074008960664e-05,
"loss": 1.482,
"step": 2795
},
{
"epoch": 10.81,
"learning_rate": 1.1764238061872534e-05,
"loss": 1.5422,
"step": 2800
},
{
"epoch": 10.83,
"learning_rate": 9.54039369431598e-06,
"loss": 1.492,
"step": 2805
},
{
"epoch": 10.85,
"learning_rate": 7.5337182356897344e-06,
"loss": 1.4219,
"step": 2810
},
{
"epoch": 10.87,
"learning_rate": 5.75159046054383e-06,
"loss": 1.4972,
"step": 2815
},
{
"epoch": 10.89,
"learning_rate": 4.200563455983359e-06,
"loss": 1.4525,
"step": 2820
},
{
"epoch": 10.91,
"learning_rate": 2.886340525219404e-06,
"loss": 1.4337,
"step": 2825
},
{
"epoch": 10.93,
"learning_rate": 1.8137542158732371e-06,
"loss": 1.5066,
"step": 2830
},
{
"epoch": 10.95,
"learning_rate": 9.867485501472609e-07,
"loss": 1.4053,
"step": 2835
},
{
"epoch": 10.97,
"learning_rate": 4.083645222054405e-07,
"loss": 1.4861,
"step": 2840
},
{
"epoch": 10.98,
"learning_rate": 8.072891609114545e-08,
"loss": 1.4625,
"step": 2845
},
{
"epoch": 11.0,
"learning_rate": 5.046485299251069e-09,
"loss": 1.4993,
"step": 2850
},
{
"epoch": 11.02,
"learning_rate": 1.8159552276039227e-07,
"loss": 1.4475,
"step": 2855
},
{
"epoch": 11.04,
"learning_rate": 6.097268375260069e-07,
"loss": 1.4133,
"step": 2860
},
{
"epoch": 11.06,
"learning_rate": 1.2878661419176504e-06,
"loss": 1.4672,
"step": 2865
},
{
"epoch": 11.08,
"learning_rate": 2.21351984036197e-06,
"loss": 1.4601,
"step": 2870
},
{
"epoch": 11.1,
"learning_rate": 3.383284198626564e-06,
"loss": 1.4559,
"step": 2875
},
{
"epoch": 11.12,
"learning_rate": 4.792857859738887e-06,
"loss": 1.3926,
"step": 2880
},
{
"epoch": 11.14,
"learning_rate": 6.437057660565811e-06,
"loss": 1.3658,
"step": 2885
},
{
"epoch": 11.16,
"learning_rate": 8.309837690896675e-06,
"loss": 1.3814,
"step": 2890
},
{
"epoch": 11.18,
"learning_rate": 1.0404311524944405e-05,
"loss": 1.4756,
"step": 2895
},
{
"epoch": 11.2,
"learning_rate": 1.271277754351773e-05,
"loss": 1.3856,
"step": 2900
},
{
"epoch": 11.22,
"learning_rate": 1.5226747253754904e-05,
"loss": 1.3775,
"step": 2905
},
{
"epoch": 11.24,
"learning_rate": 1.7936976502279244e-05,
"loss": 1.4091,
"step": 2910
},
{
"epoch": 11.25,
"learning_rate": 2.083349946700608e-05,
"loss": 1.4543,
"step": 2915
},
{
"epoch": 11.27,
"learning_rate": 2.39056653026063e-05,
"loss": 1.4107,
"step": 2920
},
{
"epoch": 11.29,
"learning_rate": 2.714217730488006e-05,
"loss": 1.4381,
"step": 2925
},
{
"epoch": 11.31,
"learning_rate": 3.053113445002753e-05,
"loss": 1.4025,
"step": 2930
},
{
"epoch": 11.33,
"learning_rate": 3.4060075156069725e-05,
"loss": 1.3656,
"step": 2935
},
{
"epoch": 11.35,
"learning_rate": 3.771602310550724e-05,
"loss": 1.4167,
"step": 2940
},
{
"epoch": 11.37,
"learning_rate": 4.148553496072031e-05,
"loss": 1.4686,
"step": 2945
},
{
"epoch": 11.39,
"learning_rate": 4.5354749796653205e-05,
"loss": 1.3998,
"step": 2950
},
{
"epoch": 11.41,
"learning_rate": 4.9309440069017615e-05,
"loss": 1.4714,
"step": 2955
},
{
"epoch": 11.43,
"learning_rate": 5.333506393059674e-05,
"loss": 1.3963,
"step": 2960
},
{
"epoch": 11.45,
"learning_rate": 5.7416818703274866e-05,
"loss": 1.5068,
"step": 2965
},
{
"epoch": 11.47,
"learning_rate": 6.153969530917418e-05,
"loss": 1.4338,
"step": 2970
},
{
"epoch": 11.49,
"learning_rate": 6.568853346074412e-05,
"loss": 1.3606,
"step": 2975
},
{
"epoch": 11.51,
"learning_rate": 6.984807740687094e-05,
"loss": 1.4016,
"step": 2980
},
{
"epoch": 11.53,
"learning_rate": 7.400303203001311e-05,
"loss": 1.4269,
"step": 2985
},
{
"epoch": 11.54,
"learning_rate": 7.813811908809178e-05,
"loss": 1.44,
"step": 2990
},
{
"epoch": 11.56,
"learning_rate": 8.223813339433243e-05,
"loss": 1.4977,
"step": 2995
},
{
"epoch": 11.58,
"learning_rate": 8.628799872846956e-05,
"loss": 1.436,
"step": 3000
},
{
"epoch": 11.6,
"learning_rate": 9.027282327372687e-05,
"loss": 1.513,
"step": 3005
},
{
"epoch": 11.62,
"learning_rate": 9.417795437572891e-05,
"loss": 1.4691,
"step": 3010
},
{
"epoch": 11.64,
"learning_rate": 9.798903242198079e-05,
"loss": 1.5133,
"step": 3015
},
{
"epoch": 11.66,
"learning_rate": 0.000101692043643802,
"loss": 1.5047,
"step": 3020
},
{
"epoch": 11.68,
"learning_rate": 0.00010527337164655109,
"loss": 1.4302,
"step": 3025
},
{
"epoch": 11.7,
"learning_rate": 0.00010871984747866282,
"loss": 1.5886,
"step": 3030
},
{
"epoch": 11.72,
"learning_rate": 0.00011201879805538586,
"loss": 1.4925,
"step": 3035
},
{
"epoch": 11.74,
"learning_rate": 0.00011515809275915985,
"loss": 1.4445,
"step": 3040
},
{
"epoch": 11.76,
"learning_rate": 0.00011812618804528006,
"loss": 1.4577,
"step": 3045
},
{
"epoch": 11.78,
"learning_rate": 0.00012091216988882848,
"loss": 1.4792,
"step": 3050
},
{
"epoch": 11.8,
"learning_rate": 0.00012350579391678723,
"loss": 1.4425,
"step": 3055
},
{
"epoch": 11.81,
"learning_rate": 0.00012589752307776744,
"loss": 1.4294,
"step": 3060
},
{
"epoch": 11.83,
"learning_rate": 0.0001280785627108355,
"loss": 1.4138,
"step": 3065
},
{
"epoch": 11.85,
"learning_rate": 0.0001300408928844837,
"loss": 1.5429,
"step": 3070
},
{
"epoch": 11.87,
"learning_rate": 0.00013177729788683344,
"loss": 1.5001,
"step": 3075
},
{
"epoch": 11.89,
"learning_rate": 0.00013328139275863032,
"loss": 1.444,
"step": 3080
},
{
"epoch": 11.91,
"learning_rate": 0.00013454764677146876,
"loss": 1.4294,
"step": 3085
},
{
"epoch": 11.93,
"learning_rate": 0.00013557140376490993,
"loss": 1.5483,
"step": 3090
},
{
"epoch": 11.95,
"learning_rate": 0.0001363488992677158,
"loss": 1.5026,
"step": 3095
},
{
"epoch": 11.97,
"learning_rate": 0.00013687727434023872,
"loss": 1.5176,
"step": 3100
},
{
"epoch": 11.99,
"learning_rate": 0.00013715458608706872,
"loss": 1.4418,
"step": 3105
},
{
"epoch": 12.01,
"learning_rate": 0.00013717981480128154,
"loss": 1.441,
"step": 3110
},
{
"epoch": 12.03,
"learning_rate": 0.00013695286771401734,
"loss": 1.3854,
"step": 3115
},
{
"epoch": 12.05,
"learning_rate": 0.00013647457933560234,
"loss": 1.4397,
"step": 3120
},
{
"epoch": 12.07,
"learning_rate": 0.00013574670838695926,
"loss": 1.4672,
"step": 3125
},
{
"epoch": 12.08,
"learning_rate": 0.0001347719313325897,
"loss": 1.4525,
"step": 3130
},
{
"epoch": 12.1,
"learning_rate": 0.00013355383253890914,
"loss": 1.4068,
"step": 3135
},
{
"epoch": 12.12,
"learning_rate": 0.0001320968910941225,
"loss": 1.4855,
"step": 3140
},
{
"epoch": 12.14,
"learning_rate": 0.0001304064643381061,
"loss": 1.4212,
"step": 3145
},
{
"epoch": 12.16,
"learning_rate": 0.00012848876816285777,
"loss": 1.4849,
"step": 3150
},
{
"epoch": 12.18,
"learning_rate": 0.00012635085415595244,
"loss": 1.3912,
"step": 3155
},
{
"epoch": 12.2,
"learning_rate": 0.00012400058367105247,
"loss": 1.483,
"step": 3160
},
{
"epoch": 12.22,
"learning_rate": 0.00012144659892081038,
"loss": 1.3818,
"step": 3165
},
{
"epoch": 12.24,
"learning_rate": 0.00011869829119846924,
"loss": 1.4634,
"step": 3170
},
{
"epoch": 12.26,
"learning_rate": 0.00011576576634500562,
"loss": 1.4034,
"step": 3175
},
{
"epoch": 12.28,
"learning_rate": 0.00011265980758879936,
"loss": 1.4014,
"step": 3180
},
{
"epoch": 12.3,
"learning_rate": 0.00010939183589447423,
"loss": 1.4222,
"step": 3185
},
{
"epoch": 12.32,
"learning_rate": 0.00010597386796670575,
"loss": 1.4854,
"step": 3190
},
{
"epoch": 12.34,
"learning_rate": 0.00010241847206343044,
"loss": 1.4472,
"step": 3195
},
{
"epoch": 12.36,
"learning_rate": 9.87387217809251e-05,
"loss": 1.5271,
"step": 3200
},
{
"epoch": 12.37,
"learning_rate": 9.494814798070321e-05,
"loss": 1.401,
"step": 3205
},
{
"epoch": 12.39,
"learning_rate": 9.106068903499514e-05,
"loss": 1.5122,
"step": 3210
},
{
"epoch": 12.41,
"learning_rate": 8.709063957376094e-05,
"loss": 1.4755,
"step": 3215
},
{
"epoch": 12.43,
"learning_rate": 8.30525979217072e-05,
"loss": 1.4605,
"step": 3220
},
{
"epoch": 12.45,
"learning_rate": 7.896141241858118e-05,
"loss": 1.3958,
"step": 3225
},
{
"epoch": 12.47,
"learning_rate": 7.48321268201337e-05,
"loss": 1.4285,
"step": 3230
},
{
"epoch": 12.49,
"learning_rate": 7.067992498051e-05,
"loss": 1.4276,
"step": 3235
},
{
"epoch": 12.51,
"learning_rate": 6.652007501948996e-05,
"loss": 1.4174,
"step": 3240
},
{
"epoch": 12.53,
"learning_rate": 6.236787317986674e-05,
"loss": 1.4845,
"step": 3245
},
{
"epoch": 12.55,
"learning_rate": 5.823858758141927e-05,
"loss": 1.4357,
"step": 3250
},
{
"epoch": 12.57,
"learning_rate": 5.414740207829325e-05,
"loss": 1.4382,
"step": 3255
},
{
"epoch": 12.59,
"learning_rate": 5.010936042623904e-05,
"loss": 1.3395,
"step": 3260
},
{
"epoch": 12.61,
"learning_rate": 4.6139310965004824e-05,
"loss": 1.3898,
"step": 3265
},
{
"epoch": 12.63,
"learning_rate": 4.225185201929675e-05,
"loss": 1.4521,
"step": 3270
},
{
"epoch": 12.64,
"learning_rate": 3.8461278219075304e-05,
"loss": 1.4757,
"step": 3275
},
{
"epoch": 12.66,
"learning_rate": 3.478152793656996e-05,
"loss": 1.4128,
"step": 3280
},
{
"epoch": 12.68,
"learning_rate": 3.122613203329423e-05,
"loss": 1.4424,
"step": 3285
},
{
"epoch": 12.7,
"learning_rate": 2.780816410552575e-05,
"loss": 1.4287,
"step": 3290
},
{
"epoch": 12.72,
"learning_rate": 2.454019241120062e-05,
"loss": 1.3953,
"step": 3295
},
{
"epoch": 12.74,
"learning_rate": 2.1434233654994707e-05,
"loss": 1.3275,
"step": 3300
},
{
"epoch": 12.76,
"learning_rate": 1.8501708801531077e-05,
"loss": 1.3897,
"step": 3305
},
{
"epoch": 12.78,
"learning_rate": 1.575340107918959e-05,
"loss": 1.3657,
"step": 3310
},
{
"epoch": 12.8,
"learning_rate": 1.319941632894751e-05,
"loss": 1.3897,
"step": 3315
},
{
"epoch": 12.82,
"learning_rate": 1.0849145844047538e-05,
"loss": 1.4783,
"step": 3320
},
{
"epoch": 12.84,
"learning_rate": 8.711231837142462e-06,
"loss": 1.4102,
"step": 3325
},
{
"epoch": 12.86,
"learning_rate": 6.793535661894092e-06,
"loss": 1.4442,
"step": 3330
},
{
"epoch": 12.88,
"learning_rate": 5.1031089058776675e-06,
"loss": 1.3875,
"step": 3335
},
{
"epoch": 12.9,
"learning_rate": 3.6461674610908637e-06,
"loss": 1.4228,
"step": 3340
},
{
"epoch": 12.92,
"learning_rate": 2.42806866741032e-06,
"loss": 1.4015,
"step": 3345
},
{
"epoch": 12.93,
"learning_rate": 1.453291613040815e-06,
"loss": 1.3937,
"step": 3350
},
{
"epoch": 12.95,
"learning_rate": 7.254206643977347e-07,
"loss": 1.4905,
"step": 3355
},
{
"epoch": 12.97,
"learning_rate": 2.4713228598269586e-07,
"loss": 1.4419,
"step": 3360
},
{
"epoch": 12.99,
"learning_rate": 2.0185198718462007e-08,
"loss": 1.4331,
"step": 3365
},
{
"epoch": 13.01,
"learning_rate": 4.5413912931266996e-08,
"loss": 1.4014,
"step": 3370
},
{
"epoch": 13.03,
"learning_rate": 3.227256597612364e-07,
"loss": 1.3146,
"step": 3375
},
{
"epoch": 13.05,
"learning_rate": 8.51100732284126e-07,
"loss": 1.3623,
"step": 3380
},
{
"epoch": 13.07,
"learning_rate": 1.62859623508997e-06,
"loss": 1.3259,
"step": 3385
},
{
"epoch": 13.09,
"learning_rate": 2.652353228531244e-06,
"loss": 1.3975,
"step": 3390
},
{
"epoch": 13.11,
"learning_rate": 3.9186072413696845e-06,
"loss": 1.3585,
"step": 3395
},
{
"epoch": 13.13,
"learning_rate": 5.422702113166566e-06,
"loss": 1.3596,
"step": 3400
},
{
"epoch": 13.15,
"learning_rate": 7.159107115516102e-06,
"loss": 1.4021,
"step": 3405
},
{
"epoch": 13.17,
"learning_rate": 9.121437289164265e-06,
"loss": 1.4666,
"step": 3410
},
{
"epoch": 13.19,
"learning_rate": 1.1302476922232583e-05,
"loss": 1.327,
"step": 3415
},
{
"epoch": 13.2,
"learning_rate": 1.3694206083212781e-05,
"loss": 1.2798,
"step": 3420
},
{
"epoch": 13.22,
"learning_rate": 1.628783011117153e-05,
"loss": 1.3184,
"step": 3425
},
{
"epoch": 13.24,
"learning_rate": 1.9073811954719624e-05,
"loss": 1.3236,
"step": 3430
},
{
"epoch": 13.26,
"learning_rate": 2.2041907240839828e-05,
"loss": 1.3766,
"step": 3435
},
{
"epoch": 13.28,
"learning_rate": 2.518120194461378e-05,
"loss": 1.2779,
"step": 3440
},
{
"epoch": 13.3,
"learning_rate": 2.8480152521337216e-05,
"loss": 1.3743,
"step": 3445
},
{
"epoch": 13.32,
"learning_rate": 3.1926628353448936e-05,
"loss": 1.336,
"step": 3450
},
{
"epoch": 13.34,
"learning_rate": 3.5507956356197615e-05,
"loss": 1.3522,
"step": 3455
},
{
"epoch": 13.36,
"learning_rate": 3.9210967578018804e-05,
"loss": 1.3693,
"step": 3460
},
{
"epoch": 13.38,
"learning_rate": 4.302204562427067e-05,
"loss": 1.3374,
"step": 3465
},
{
"epoch": 13.4,
"learning_rate": 4.692717672627317e-05,
"loss": 1.3881,
"step": 3470
},
{
"epoch": 13.42,
"learning_rate": 5.091200127153047e-05,
"loss": 1.2653,
"step": 3475
},
{
"epoch": 13.44,
"learning_rate": 5.496186660566713e-05,
"loss": 1.3907,
"step": 3480
},
{
"epoch": 13.46,
"learning_rate": 5.906188091190777e-05,
"loss": 1.3586,
"step": 3485
},
{
"epoch": 13.47,
"learning_rate": 6.319696796998643e-05,
"loss": 1.3102,
"step": 3490
},
{
"epoch": 13.49,
"learning_rate": 6.735192259312862e-05,
"loss": 1.3599,
"step": 3495
},
{
"epoch": 13.51,
"learning_rate": 7.151146653925592e-05,
"loss": 1.3715,
"step": 3500
},
{
"epoch": 13.53,
"learning_rate": 7.566030469082585e-05,
"loss": 1.4274,
"step": 3505
},
{
"epoch": 13.55,
"learning_rate": 7.978318129672468e-05,
"loss": 1.2634,
"step": 3510
},
{
"epoch": 13.57,
"learning_rate": 8.386493606940281e-05,
"loss": 1.3939,
"step": 3515
},
{
"epoch": 13.59,
"learning_rate": 8.789055993098241e-05,
"loss": 1.4498,
"step": 3520
},
{
"epoch": 13.61,
"learning_rate": 9.184525020334682e-05,
"loss": 1.4425,
"step": 3525
},
{
"epoch": 13.63,
"learning_rate": 9.571446503927972e-05,
"loss": 1.3688,
"step": 3530
},
{
"epoch": 13.65,
"learning_rate": 9.948397689449235e-05,
"loss": 1.3409,
"step": 3535
},
{
"epoch": 13.67,
"learning_rate": 0.00010313992484392988,
"loss": 1.4686,
"step": 3540
},
{
"epoch": 13.69,
"learning_rate": 0.00010666886554997249,
"loss": 1.3646,
"step": 3545
},
{
"epoch": 13.71,
"learning_rate": 0.00011005782269511996,
"loss": 1.411,
"step": 3550
},
{
"epoch": 13.73,
"learning_rate": 0.00011329433469739373,
"loss": 1.3738,
"step": 3555
},
{
"epoch": 13.75,
"learning_rate": 0.0001163665005329936,
"loss": 1.3912,
"step": 3560
},
{
"epoch": 13.76,
"learning_rate": 0.00011926302349772045,
"loss": 1.3728,
"step": 3565
},
{
"epoch": 13.78,
"learning_rate": 0.00012197325274624481,
"loss": 1.3925,
"step": 3570
},
{
"epoch": 13.8,
"learning_rate": 0.0001244872224564823,
"loss": 1.3735,
"step": 3575
},
{
"epoch": 13.82,
"learning_rate": 0.0001267956884750556,
"loss": 1.4361,
"step": 3580
},
{
"epoch": 13.84,
"learning_rate": 0.0001288901623091031,
"loss": 1.4661,
"step": 3585
},
{
"epoch": 13.86,
"learning_rate": 0.000130762942339434,
"loss": 1.4177,
"step": 3590
},
{
"epoch": 13.88,
"learning_rate": 0.00013240714214026112,
"loss": 1.4351,
"step": 3595
},
{
"epoch": 13.9,
"learning_rate": 0.00013381671580137345,
"loss": 1.4243,
"step": 3600
},
{
"epoch": 13.92,
"learning_rate": 0.00013498648015963804,
"loss": 1.3258,
"step": 3605
},
{
"epoch": 13.94,
"learning_rate": 0.00013591213385808236,
"loss": 1.3917,
"step": 3610
},
{
"epoch": 13.96,
"learning_rate": 0.00013659027316247394,
"loss": 1.3626,
"step": 3615
},
{
"epoch": 13.98,
"learning_rate": 0.00013701840447723958,
"loss": 1.505,
"step": 3620
},
{
"epoch": 14.0,
"learning_rate": 0.00013719495351470075,
"loss": 1.3238,
"step": 3625
},
{
"epoch": 14.02,
"learning_rate": 0.00013711927108390887,
"loss": 1.3589,
"step": 3630
},
{
"epoch": 14.03,
"learning_rate": 0.00013679163547779456,
"loss": 1.4241,
"step": 3635
},
{
"epoch": 14.05,
"learning_rate": 0.00013621325144985282,
"loss": 1.4179,
"step": 3640
},
{
"epoch": 14.07,
"learning_rate": 0.00013538624578412686,
"loss": 1.3404,
"step": 3645
},
{
"epoch": 14.09,
"learning_rate": 0.00013431365947478058,
"loss": 1.3758,
"step": 3650
},
{
"epoch": 14.11,
"learning_rate": 0.00013299943654401664,
"loss": 1.4247,
"step": 3655
},
{
"epoch": 14.13,
"learning_rate": 0.00013144840953945616,
"loss": 1.3701,
"step": 3660
},
{
"epoch": 14.15,
"learning_rate": 0.00012966628176431025,
"loss": 1.3553,
"step": 3665
},
{
"epoch": 14.17,
"learning_rate": 0.00012765960630568425,
"loss": 1.381,
"step": 3670
},
{
"epoch": 14.19,
"learning_rate": 0.00012543576193812774,
"loss": 1.442,
"step": 3675
},
{
"epoch": 14.21,
"learning_rate": 0.0001230029259910393,
"loss": 1.3873,
"step": 3680
},
{
"epoch": 14.23,
"learning_rate": 0.0001203700442796948,
"loss": 1.3884,
"step": 3685
},
{
"epoch": 14.25,
"learning_rate": 0.00011754679821046217,
"loss": 1.3278,
"step": 3690
},
{
"epoch": 14.27,
"learning_rate": 0.00011454356918116728,
"loss": 1.3606,
"step": 3695
},
{
"epoch": 14.29,
"learning_rate": 0.00011137140040750922,
"loss": 1.2409,
"step": 3700
},
{
"epoch": 14.31,
"learning_rate": 0.00010804195631589772,
"loss": 1.411,
"step": 3705
},
{
"epoch": 14.32,
"learning_rate": 0.00010456747965202607,
"loss": 1.38,
"step": 3710
},
{
"epoch": 14.34,
"learning_rate": 0.00010096074646289782,
"loss": 1.3982,
"step": 3715
},
{
"epoch": 14.36,
"learning_rate": 9.723501911784598e-05,
"loss": 1.3883,
"step": 3720
},
{
"epoch": 14.38,
"learning_rate": 9.340399754128775e-05,
"loss": 1.3611,
"step": 3725
},
{
"epoch": 14.4,
"learning_rate": 8.948176883653932e-05,
"loss": 1.4344,
"step": 3730
},
{
"epoch": 14.42,
"learning_rate": 8.548275548593159e-05,
"loss": 1.2783,
"step": 3735
},
{
"epoch": 14.44,
"learning_rate": 8.142166231769664e-05,
"loss": 1.335,
"step": 3740
},
{
"epoch": 14.46,
"learning_rate": 7.731342243463601e-05,
"loss": 1.3506,
"step": 3745
},
{
"epoch": 14.48,
"learning_rate": 7.317314230339991e-05,
"loss": 1.4243,
"step": 3750
},
{
"epoch": 14.5,
"learning_rate": 6.901604620628517e-05,
"loss": 1.3969,
"step": 3755
},
{
"epoch": 14.52,
"learning_rate": 6.485742025981473e-05,
"loss": 1.3597,
"step": 3760
},
{
"epoch": 14.54,
"learning_rate": 6.071255620594063e-05,
"loss": 1.4289,
"step": 3765
},
{
"epoch": 14.56,
"learning_rate": 5.659669518256613e-05,
"loss": 1.3466,
"step": 3770
},
{
"epoch": 14.58,
"learning_rate": 5.252497168014461e-05,
"loss": 1.279,
"step": 3775
},
{
"epoch": 14.59,
"learning_rate": 4.8512357890428955e-05,
"loss": 1.3786,
"step": 3780
},
{
"epoch": 14.61,
"learning_rate": 4.457360865201619e-05,
"loss": 1.2442,
"step": 3785
},
{
"epoch": 14.63,
"learning_rate": 4.072320719512437e-05,
"loss": 1.2467,
"step": 3790
},
{
"epoch": 14.65,
"learning_rate": 3.697531188510021e-05,
"loss": 1.326,
"step": 3795
},
{
"epoch": 14.67,
"learning_rate": 3.3343704160496265e-05,
"loss": 1.3049,
"step": 3800
},
{
"epoch": 14.69,
"learning_rate": 2.9841737857150583e-05,
"loss": 1.3741,
"step": 3805
},
{
"epoch": 14.71,
"learning_rate": 2.648229010460623e-05,
"loss": 1.3036,
"step": 3810
},
{
"epoch": 14.73,
"learning_rate": 2.3277713975440426e-05,
"loss": 1.3118,
"step": 3815
},
{
"epoch": 14.75,
"learning_rate": 2.0239793061604814e-05,
"loss": 1.3541,
"step": 3820
},
{
"epoch": 14.77,
"learning_rate": 1.7379698144815434e-05,
"loss": 1.3646,
"step": 3825
},
{
"epoch": 14.79,
"learning_rate": 1.4707946120313696e-05,
"loss": 1.3313,
"step": 3830
},
{
"epoch": 14.81,
"learning_rate": 1.2234361325042786e-05,
"loss": 1.3923,
"step": 3835
},
{
"epoch": 14.83,
"learning_rate": 9.968039412440925e-06,
"loss": 1.2976,
"step": 3840
},
{
"epoch": 14.85,
"learning_rate": 7.917313906685554e-06,
"loss": 1.3127,
"step": 3845
},
{
"epoch": 14.86,
"learning_rate": 6.089725559373968e-06,
"loss": 1.3699,
"step": 3850
},
{
"epoch": 14.88,
"learning_rate": 4.4919946213203235e-06,
"loss": 1.2705,
"step": 3855
},
{
"epoch": 14.9,
"learning_rate": 3.129996131426458e-06,
"loss": 1.3474,
"step": 3860
},
{
"epoch": 14.92,
"learning_rate": 2.00873831349432e-06,
"loss": 1.3704,
"step": 3865
},
{
"epoch": 14.94,
"learning_rate": 1.1323441604147607e-06,
"loss": 1.3555,
"step": 3870
},
{
"epoch": 14.96,
"learning_rate": 5.040362734534312e-07,
"loss": 1.3937,
"step": 3875
},
{
"epoch": 14.98,
"learning_rate": 1.2612501237755945e-07,
"loss": 1.425,
"step": 3880
},
{
"epoch": 15.0,
"learning_rate": 0.0,
"loss": 1.42,
"step": 3885
},
{
"epoch": 15.02,
"learning_rate": 1.261250123775442e-07,
"loss": 1.4818,
"step": 3890
},
{
"epoch": 15.04,
"learning_rate": 5.040362734534007e-07,
"loss": 1.4719,
"step": 3895
},
{
"epoch": 15.06,
"learning_rate": 1.132344160414715e-06,
"loss": 1.3957,
"step": 3900
},
{
"epoch": 15.08,
"learning_rate": 2.008738313494259e-06,
"loss": 1.4262,
"step": 3905
},
{
"epoch": 15.1,
"learning_rate": 3.1299961314263817e-06,
"loss": 1.3789,
"step": 3910
},
{
"epoch": 15.12,
"learning_rate": 4.491994621320232e-06,
"loss": 1.4566,
"step": 3915
},
{
"epoch": 15.14,
"learning_rate": 6.089725559373869e-06,
"loss": 1.4144,
"step": 3920
},
{
"epoch": 15.15,
"learning_rate": 7.91731390668544e-06,
"loss": 1.4525,
"step": 3925
},
{
"epoch": 15.17,
"learning_rate": 9.968039412440788e-06,
"loss": 1.4557,
"step": 3930
},
{
"epoch": 15.19,
"learning_rate": 1.2234361325042642e-05,
"loss": 1.4918,
"step": 3935
},
{
"epoch": 15.21,
"learning_rate": 1.4707946120313543e-05,
"loss": 1.5974,
"step": 3940
},
{
"epoch": 15.23,
"learning_rate": 1.7379698144815265e-05,
"loss": 1.3931,
"step": 3945
},
{
"epoch": 15.25,
"learning_rate": 2.0239793061604638e-05,
"loss": 1.3826,
"step": 3950
},
{
"epoch": 15.27,
"learning_rate": 2.3277713975440236e-05,
"loss": 1.4445,
"step": 3955
},
{
"epoch": 15.29,
"learning_rate": 2.6482290104606033e-05,
"loss": 1.4149,
"step": 3960
},
{
"epoch": 15.31,
"learning_rate": 2.984173785715038e-05,
"loss": 1.3804,
"step": 3965
},
{
"epoch": 15.33,
"learning_rate": 3.334370416049605e-05,
"loss": 1.3937,
"step": 3970
},
{
"epoch": 15.35,
"learning_rate": 3.697531188509998e-05,
"loss": 1.4221,
"step": 3975
},
{
"epoch": 15.37,
"learning_rate": 4.072320719512414e-05,
"loss": 1.4485,
"step": 3980
},
{
"epoch": 15.39,
"learning_rate": 4.4573608652015956e-05,
"loss": 1.5171,
"step": 3985
},
{
"epoch": 15.41,
"learning_rate": 4.851235789042871e-05,
"loss": 1.4849,
"step": 3990
},
{
"epoch": 15.42,
"learning_rate": 5.2524971680144367e-05,
"loss": 1.4614,
"step": 3995
},
{
"epoch": 15.44,
"learning_rate": 5.659669518256589e-05,
"loss": 1.413,
"step": 4000
},
{
"epoch": 15.46,
"learning_rate": 6.071255620594038e-05,
"loss": 1.4743,
"step": 4005
},
{
"epoch": 15.48,
"learning_rate": 6.485742025981448e-05,
"loss": 1.4832,
"step": 4010
},
{
"epoch": 15.5,
"learning_rate": 6.901604620628492e-05,
"loss": 1.5146,
"step": 4015
},
{
"epoch": 15.52,
"learning_rate": 7.317314230339967e-05,
"loss": 1.5513,
"step": 4020
},
{
"epoch": 15.54,
"learning_rate": 7.731342243463577e-05,
"loss": 1.5379,
"step": 4025
},
{
"epoch": 15.56,
"learning_rate": 8.142166231769639e-05,
"loss": 1.4753,
"step": 4030
},
{
"epoch": 15.58,
"learning_rate": 8.548275548593135e-05,
"loss": 1.5384,
"step": 4035
},
{
"epoch": 15.6,
"learning_rate": 8.948176883653908e-05,
"loss": 1.5967,
"step": 4040
},
{
"epoch": 15.62,
"learning_rate": 9.340399754128752e-05,
"loss": 1.4906,
"step": 4045
},
{
"epoch": 15.64,
"learning_rate": 9.723501911784575e-05,
"loss": 1.5322,
"step": 4050
},
{
"epoch": 15.66,
"learning_rate": 0.0001009607464628976,
"loss": 1.5223,
"step": 4055
},
{
"epoch": 15.68,
"learning_rate": 0.00010456747965202585,
"loss": 1.4992,
"step": 4060
},
{
"epoch": 15.69,
"learning_rate": 0.00010804195631589752,
"loss": 1.5217,
"step": 4065
},
{
"epoch": 15.71,
"learning_rate": 0.00011137140040750902,
"loss": 1.4526,
"step": 4070
},
{
"epoch": 15.73,
"learning_rate": 0.00011454356918116707,
"loss": 1.5553,
"step": 4075
},
{
"epoch": 15.75,
"learning_rate": 0.00011754679821046198,
"loss": 1.4297,
"step": 4080
},
{
"epoch": 15.77,
"learning_rate": 0.00012037004427969463,
"loss": 1.4843,
"step": 4085
},
{
"epoch": 15.79,
"learning_rate": 0.00012300292599103915,
"loss": 1.4636,
"step": 4090
},
{
"epoch": 15.81,
"learning_rate": 0.00012543576193812758,
"loss": 1.4776,
"step": 4095
},
{
"epoch": 15.83,
"learning_rate": 0.00012765960630568412,
"loss": 1.514,
"step": 4100
},
{
"epoch": 15.85,
"learning_rate": 0.00012966628176431014,
"loss": 1.4759,
"step": 4105
},
{
"epoch": 15.87,
"learning_rate": 0.00013144840953945605,
"loss": 1.396,
"step": 4110
},
{
"epoch": 15.89,
"learning_rate": 0.00013299943654401656,
"loss": 1.5696,
"step": 4115
},
{
"epoch": 15.91,
"learning_rate": 0.0001343136594747805,
"loss": 1.5059,
"step": 4120
},
{
"epoch": 15.93,
"learning_rate": 0.0001353862457841268,
"loss": 1.4919,
"step": 4125
},
{
"epoch": 15.95,
"learning_rate": 0.00013621325144985277,
"loss": 1.515,
"step": 4130
},
{
"epoch": 15.97,
"learning_rate": 0.00013679163547779453,
"loss": 1.4448,
"step": 4135
},
{
"epoch": 15.98,
"learning_rate": 0.00013711927108390882,
"loss": 1.5143,
"step": 4140
},
{
"epoch": 16.0,
"eval_loss": 1.3626197576522827,
"eval_runtime": 22.0604,
"eval_samples_per_second": 20.897,
"eval_steps_per_second": 2.629,
"step": 4144
},
{
"epoch": 15.41,
"learning_rate": 4.923820788333643e-05,
"loss": 1.4417,
"step": 4145
},
{
"epoch": 15.43,
"learning_rate": 5.311198428226757e-05,
"loss": 1.5224,
"step": 4150
},
{
"epoch": 15.45,
"learning_rate": 5.7038557476801184e-05,
"loss": 1.4984,
"step": 4155
},
{
"epoch": 15.46,
"learning_rate": 6.100454224793001e-05,
"loss": 1.4427,
"step": 4160
},
{
"epoch": 15.48,
"learning_rate": 6.49964190272892e-05,
"loss": 1.4789,
"step": 4165
},
{
"epoch": 15.5,
"learning_rate": 6.900057998375254e-05,
"loss": 1.5665,
"step": 4170
},
{
"epoch": 15.52,
"learning_rate": 7.300337541089789e-05,
"loss": 1.5002,
"step": 4175
},
{
"epoch": 15.54,
"learning_rate": 7.699116025723293e-05,
"loss": 1.4668,
"step": 4180
},
{
"epoch": 15.56,
"learning_rate": 8.09503406405399e-05,
"loss": 1.3757,
"step": 4185
},
{
"epoch": 15.58,
"learning_rate": 8.48674201878012e-05,
"loss": 1.4722,
"step": 4190
},
{
"epoch": 15.59,
"learning_rate": 8.872904604271726e-05,
"loss": 1.4961,
"step": 4195
},
{
"epoch": 15.61,
"learning_rate": 9.252205438400528e-05,
"loss": 1.4798,
"step": 4200
},
{
"epoch": 15.63,
"learning_rate": 9.623351529928802e-05,
"loss": 1.5392,
"step": 4205
},
{
"epoch": 15.65,
"learning_rate": 9.985077686162523e-05,
"loss": 1.5653,
"step": 4210
},
{
"epoch": 15.67,
"learning_rate": 0.00010336150825841603,
"loss": 1.4743,
"step": 4215
},
{
"epoch": 15.69,
"learning_rate": 0.00010675374182567242,
"loss": 1.4201,
"step": 4220
},
{
"epoch": 15.71,
"learning_rate": 0.00011001591384435138,
"loss": 1.3889,
"step": 4225
},
{
"epoch": 15.72,
"learning_rate": 0.00011313690395969416,
"loss": 1.4913,
"step": 4230
},
{
"epoch": 15.74,
"learning_rate": 0.00011610607308918656,
"loss": 1.3722,
"step": 4235
},
{
"epoch": 15.76,
"learning_rate": 0.00011891329968992182,
"loss": 1.4133,
"step": 4240
},
{
"epoch": 15.78,
"learning_rate": 0.0001215490142617292,
"loss": 1.36,
"step": 4245
},
{
"epoch": 15.8,
"learning_rate": 0.00012400423196845864,
"loss": 1.361,
"step": 4250
},
{
"epoch": 15.82,
"learning_rate": 0.00012627058326621316,
"loss": 1.542,
"step": 4255
},
{
"epoch": 15.84,
"learning_rate": 0.0001283403424341258,
"loss": 1.4983,
"step": 4260
},
{
"epoch": 15.86,
"learning_rate": 0.00013020645391041629,
"loss": 1.4985,
"step": 4265
},
{
"epoch": 15.87,
"learning_rate": 0.00013186255634396195,
"loss": 1.4767,
"step": 4270
},
{
"epoch": 15.89,
"learning_rate": 0.00013330300427938103,
"loss": 1.4258,
"step": 4275
},
{
"epoch": 15.91,
"learning_rate": 0.00013452288740171763,
"loss": 1.4773,
"step": 4280
},
{
"epoch": 15.93,
"learning_rate": 0.00013551804727511717,
"loss": 1.462,
"step": 4285
},
{
"epoch": 15.95,
"learning_rate": 0.0001362850915184393,
"loss": 1.4688,
"step": 4290
},
{
"epoch": 15.97,
"learning_rate": 0.00013682140536947865,
"loss": 1.5146,
"step": 4295
},
{
"epoch": 15.99,
"learning_rate": 0.00013712516059837763,
"loss": 1.5462,
"step": 4300
},
{
"epoch": 16.0,
"eval_loss": 1.3411859273910522,
"eval_runtime": 18.5136,
"eval_samples_per_second": 20.85,
"eval_steps_per_second": 2.647,
"step": 4304
},
{
"epoch": 16.0,
"learning_rate": 0.00013719532173984305,
"loss": 1.3395,
"step": 4305
},
{
"epoch": 16.02,
"learning_rate": 0.00013703164962292424,
"loss": 1.3995,
"step": 4310
},
{
"epoch": 16.04,
"learning_rate": 0.00013663470218631772,
"loss": 1.4118,
"step": 4315
},
{
"epoch": 16.06,
"learning_rate": 0.00013600583257642132,
"loss": 1.3778,
"step": 4320
},
{
"epoch": 16.08,
"learning_rate": 0.00013514718453461912,
"loss": 1.4416,
"step": 4325
},
{
"epoch": 16.1,
"learning_rate": 0.0001340616850895236,
"loss": 1.4926,
"step": 4330
},
{
"epoch": 16.12,
"learning_rate": 0.00013275303457908525,
"loss": 1.4668,
"step": 4335
},
{
"epoch": 16.13,
"learning_rate": 0.00013122569403658038,
"loss": 1.3931,
"step": 4340
},
{
"epoch": 16.15,
"learning_rate": 0.00012948486998348453,
"loss": 1.403,
"step": 4345
},
{
"epoch": 16.17,
"learning_rate": 0.0001275364966810606,
"loss": 1.3802,
"step": 4350
},
{
"epoch": 16.19,
"learning_rate": 0.00012538721590117088,
"loss": 1.429,
"step": 4355
},
{
"epoch": 16.21,
"learning_rate": 0.00012304435428527134,
"loss": 1.4773,
"step": 4360
},
{
"epoch": 16.23,
"learning_rate": 0.00012051589836876666,
"loss": 1.3717,
"step": 4365
},
{
"epoch": 16.25,
"learning_rate": 0.00011781046735586077,
"loss": 1.4166,
"step": 4370
},
{
"epoch": 16.26,
"learning_rate": 0.00011493728373772612,
"loss": 1.432,
"step": 4375
},
{
"epoch": 16.28,
"learning_rate": 0.00011190614185412497,
"loss": 1.4722,
"step": 4380
},
{
"epoch": 16.3,
"learning_rate": 0.00010872737450568259,
"loss": 1.3411,
"step": 4385
},
{
"epoch": 16.32,
"learning_rate": 0.00010541181773059928,
"loss": 1.4268,
"step": 4390
},
{
"epoch": 16.34,
"learning_rate": 0.00010197077386589103,
"loss": 1.4257,
"step": 4395
},
{
"epoch": 16.36,
"learning_rate": 9.841597301907411e-05,
"loss": 1.4367,
"step": 4400
},
{
"epoch": 16.38,
"learning_rate": 9.475953308163089e-05,
"loss": 1.388,
"step": 4405
},
{
"epoch": 16.39,
"learning_rate": 9.101391842055883e-05,
"loss": 1.4486,
"step": 4410
},
{
"epoch": 16.41,
"learning_rate": 8.719189738884117e-05,
"loss": 1.3824,
"step": 4415
},
{
"epoch": 16.43,
"learning_rate": 8.330649879965051e-05,
"loss": 1.4313,
"step": 4420
},
{
"epoch": 16.45,
"learning_rate": 7.937096751268169e-05,
"loss": 1.3933,
"step": 4425
},
{
"epoch": 16.47,
"learning_rate": 7.539871928400956e-05,
"loss": 1.4352,
"step": 4430
},
{
"epoch": 16.49,
"learning_rate": 7.140329503337758e-05,
"loss": 1.4244,
"step": 4435
},
{
"epoch": 16.51,
"learning_rate": 6.739831468481779e-05,
"loss": 1.4062,
"step": 4440
},
{
"epoch": 16.52,
"learning_rate": 6.33974307379626e-05,
"loss": 1.3753,
"step": 4445
},
{
"epoch": 16.54,
"learning_rate": 5.94142817282949e-05,
"loss": 1.3918,
"step": 4450
},
{
"epoch": 16.56,
"learning_rate": 5.546244573501996e-05,
"loss": 1.423,
"step": 4455
},
{
"epoch": 16.58,
"learning_rate": 5.155539409500841e-05,
"loss": 1.4141,
"step": 4460
},
{
"epoch": 16.6,
"learning_rate": 4.7706445480618974e-05,
"loss": 1.4364,
"step": 4465
},
{
"epoch": 16.62,
"learning_rate": 4.3928720497937174e-05,
"loss": 1.405,
"step": 4470
},
{
"epoch": 16.64,
"learning_rate": 4.02350969601972e-05,
"loss": 1.4752,
"step": 4475
},
{
"epoch": 16.65,
"learning_rate": 3.663816598884848e-05,
"loss": 1.4515,
"step": 4480
},
{
"epoch": 16.67,
"learning_rate": 3.315018909193563e-05,
"loss": 1.4503,
"step": 4485
},
{
"epoch": 16.69,
"learning_rate": 2.9783056366075814e-05,
"loss": 1.3878,
"step": 4490
},
{
"epoch": 16.71,
"learning_rate": 2.6548245964540616e-05,
"loss": 1.3826,
"step": 4495
},
{
"epoch": 16.73,
"learning_rate": 2.345678496960497e-05,
"loss": 1.3709,
"step": 4500
},
{
"epoch": 16.75,
"learning_rate": 2.051921180253764e-05,
"loss": 1.4434,
"step": 4505
},
{
"epoch": 16.77,
"learning_rate": 1.774554029938429e-05,
"loss": 1.4217,
"step": 4510
},
{
"epoch": 16.78,
"learning_rate": 1.5145225574996895e-05,
"loss": 1.3259,
"step": 4515
},
{
"epoch": 16.8,
"learning_rate": 1.272713179167218e-05,
"loss": 1.3681,
"step": 4520
},
{
"epoch": 16.82,
"learning_rate": 1.0499501942287456e-05,
"loss": 1.3708,
"step": 4525
},
{
"epoch": 16.84,
"learning_rate": 8.469929750918058e-06,
"loss": 1.4352,
"step": 4530
},
{
"epoch": 16.86,
"learning_rate": 6.6453337867398825e-06,
"loss": 1.4355,
"step": 4535
},
{
"epoch": 16.88,
"learning_rate": 5.031933879454651e-06,
"loss": 1.4338,
"step": 4540
},
{
"epoch": 16.9,
"learning_rate": 3.6352299166325223e-06,
"loss": 1.3822,
"step": 4545
},
{
"epoch": 16.91,
"learning_rate": 2.459983095251791e-06,
"loss": 1.3442,
"step": 4550
},
{
"epoch": 16.93,
"learning_rate": 1.5101996913488535e-06,
"loss": 1.356,
"step": 4555
},
{
"epoch": 16.95,
"learning_rate": 7.891174030992353e-07,
"loss": 1.3681,
"step": 4560
},
{
"epoch": 16.97,
"learning_rate": 2.991943138937121e-07,
"loss": 1.3964,
"step": 4565
},
{
"epoch": 16.99,
"learning_rate": 4.2100513024036057e-08,
"loss": 1.4004,
"step": 4570
},
{
"epoch": 17.0,
"eval_loss": 1.3110859394073486,
"eval_runtime": 18.5084,
"eval_samples_per_second": 20.855,
"eval_steps_per_second": 2.647,
"step": 4573
},
{
"epoch": 18.08,
"learning_rate": 0.000134880848712477,
"loss": 1.3191,
"step": 4575
},
{
"epoch": 18.1,
"learning_rate": 0.00013365575351388775,
"loss": 1.4082,
"step": 4580
},
{
"epoch": 18.12,
"learning_rate": 0.00013217996375537754,
"loss": 1.381,
"step": 4585
},
{
"epoch": 18.14,
"learning_rate": 0.0001304591664429994,
"loss": 1.3937,
"step": 4590
},
{
"epoch": 18.16,
"learning_rate": 0.00012849999272775362,
"loss": 1.3955,
"step": 4595
},
{
"epoch": 18.18,
"learning_rate": 0.000126309992352219,
"loss": 1.3851,
"step": 4600
},
{
"epoch": 18.2,
"learning_rate": 0.00012389760455736593,
"loss": 1.3328,
"step": 4605
},
{
"epoch": 18.22,
"learning_rate": 0.00012127212556165209,
"loss": 1.3809,
"step": 4610
},
{
"epoch": 18.24,
"learning_rate": 0.00011844367273772787,
"loss": 1.2981,
"step": 4615
},
{
"epoch": 18.26,
"learning_rate": 0.00011542314562479984,
"loss": 1.4094,
"step": 4620
},
{
"epoch": 18.28,
"learning_rate": 0.00011222218392688052,
"loss": 1.4044,
"step": 4625
},
{
"epoch": 18.3,
"learning_rate": 0.0001088531226587985,
"loss": 1.4849,
"step": 4630
},
{
"epoch": 18.32,
"learning_rate": 0.00010532894461279404,
"loss": 1.4488,
"step": 4635
},
{
"epoch": 18.34,
"learning_rate": 0.00010166323032888931,
"loss": 1.4335,
"step": 4640
},
{
"epoch": 18.36,
"learning_rate": 9.78701057618181e-05,
"loss": 1.3215,
"step": 4645
},
{
"epoch": 18.38,
"learning_rate": 9.396418784617256e-05,
"loss": 1.4931,
"step": 4650
},
{
"epoch": 18.4,
"learning_rate": 8.996052816955526e-05,
"loss": 1.4301,
"step": 4655
},
{
"epoch": 18.42,
"learning_rate": 8.587455497076757e-05,
"loss": 1.3555,
"step": 4660
},
{
"epoch": 18.44,
"learning_rate": 8.172201368657088e-05,
"loss": 1.3862,
"step": 4665
},
{
"epoch": 18.46,
"learning_rate": 7.751890627611039e-05,
"loss": 1.3795,
"step": 4670
},
{
"epoch": 18.48,
"learning_rate": 7.328142955681618e-05,
"loss": 1.4168,
"step": 4675
},
{
"epoch": 18.5,
"learning_rate": 6.902591278942331e-05,
"loss": 1.4594,
"step": 4680
},
{
"epoch": 18.52,
"learning_rate": 6.47687547526032e-05,
"loss": 1.4803,
"step": 4685
},
{
"epoch": 18.54,
"learning_rate": 6.0526360549714816e-05,
"loss": 1.4239,
"step": 4690
},
{
"epoch": 18.56,
"learning_rate": 5.6315078391183605e-05,
"loss": 1.3304,
"step": 4695
},
{
"epoch": 18.58,
"learning_rate": 5.21511365961095e-05,
"loss": 1.3828,
"step": 4700
},
{
"epoch": 18.6,
"learning_rate": 4.80505810558948e-05,
"loss": 1.3273,
"step": 4705
},
{
"epoch": 18.62,
"learning_rate": 4.402921340084794e-05,
"loss": 1.3661,
"step": 4710
},
{
"epoch": 18.64,
"learning_rate": 4.0102530108070474e-05,
"loss": 1.287,
"step": 4715
},
{
"epoch": 18.66,
"learning_rate": 3.6285662785250574e-05,
"loss": 1.3865,
"step": 4720
},
{
"epoch": 18.68,
"learning_rate": 3.2593319860498044e-05,
"loss": 1.428,
"step": 4725
},
{
"epoch": 18.7,
"learning_rate": 2.9039729902920295e-05,
"loss": 1.2403,
"step": 4730
},
{
"epoch": 18.72,
"learning_rate": 2.5638586792340877e-05,
"loss": 1.4223,
"step": 4735
},
{
"epoch": 18.74,
"learning_rate": 2.2402996949474048e-05,
"loss": 1.3913,
"step": 4740
},
{
"epoch": 18.75,
"learning_rate": 1.9345428829881034e-05,
"loss": 1.3764,
"step": 4745
},
{
"epoch": 18.77,
"learning_rate": 1.647766487635479e-05,
"loss": 1.4167,
"step": 4750
},
{
"epoch": 18.79,
"learning_rate": 1.3810756114877466e-05,
"loss": 1.3081,
"step": 4755
},
{
"epoch": 18.81,
"learning_rate": 1.1354979569111334e-05,
"loss": 1.3206,
"step": 4760
},
{
"epoch": 18.83,
"learning_rate": 9.119798657542995e-06,
"loss": 1.3369,
"step": 4765
},
{
"epoch": 18.85,
"learning_rate": 7.113826725875128e-06,
"loss": 1.3328,
"step": 4770
},
{
"epoch": 18.87,
"learning_rate": 5.344793855206173e-06,
"loss": 1.4008,
"step": 4775
},
{
"epoch": 18.89,
"learning_rate": 3.819517073901737e-06,
"loss": 1.41,
"step": 4780
},
{
"epoch": 18.91,
"learning_rate": 2.5438740879408957e-06,
"loss": 1.1899,
"step": 4785
},
{
"epoch": 18.93,
"learning_rate": 1.522780630978951e-06,
"loss": 1.3401,
"step": 4790
},
{
"epoch": 18.95,
"learning_rate": 7.601715213983543e-07,
"loss": 1.3232,
"step": 4795
},
{
"epoch": 18.97,
"learning_rate": 2.5898549935329754e-07,
"loss": 1.3369,
"step": 4800
},
{
"epoch": 18.99,
"learning_rate": 2.1153902234608112e-08,
"loss": 1.4171,
"step": 4805
},
{
"epoch": 19.0,
"eval_loss": 1.2792030572891235,
"eval_runtime": 11.7078,
"eval_samples_per_second": 43.902,
"eval_steps_per_second": 5.552,
"step": 4807
}
],
"max_steps": 6831,
"num_train_epochs": 27,
"total_flos": 5020465102848000.0,
"trial_name": null,
"trial_params": null
}