mistral-7b-sft-qlora-2ep / trainer_state.json
mimicheng's picture
Model save
73e233a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9997705106138841,
"eval_steps": 500,
"global_step": 8714,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.2935779816513764e-07,
"loss": 1.6585,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.1467889908256882e-06,
"loss": 1.6894,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 2.2935779816513764e-06,
"loss": 1.6975,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 3.4403669724770644e-06,
"loss": 1.6499,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 4.587155963302753e-06,
"loss": 1.67,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 5.733944954128441e-06,
"loss": 1.6319,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 6.880733944954129e-06,
"loss": 1.5944,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 8.027522935779817e-06,
"loss": 1.5345,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 9.174311926605506e-06,
"loss": 1.4475,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.0321100917431194e-05,
"loss": 1.4025,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 1.1467889908256882e-05,
"loss": 1.3724,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.2614678899082569e-05,
"loss": 1.3035,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 1.3761467889908258e-05,
"loss": 1.3084,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.4908256880733946e-05,
"loss": 1.2827,
"step": 65
},
{
"epoch": 0.02,
"learning_rate": 1.6055045871559634e-05,
"loss": 1.2762,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 1.7201834862385323e-05,
"loss": 1.2332,
"step": 75
},
{
"epoch": 0.02,
"learning_rate": 1.834862385321101e-05,
"loss": 1.259,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 1.94954128440367e-05,
"loss": 1.2198,
"step": 85
},
{
"epoch": 0.02,
"learning_rate": 2.0642201834862388e-05,
"loss": 1.1714,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 2.1788990825688073e-05,
"loss": 1.1934,
"step": 95
},
{
"epoch": 0.02,
"learning_rate": 2.2935779816513765e-05,
"loss": 1.1567,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 2.408256880733945e-05,
"loss": 1.1338,
"step": 105
},
{
"epoch": 0.03,
"learning_rate": 2.5229357798165138e-05,
"loss": 1.1182,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 2.6376146788990823e-05,
"loss": 1.1278,
"step": 115
},
{
"epoch": 0.03,
"learning_rate": 2.7522935779816515e-05,
"loss": 1.1047,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 2.8669724770642203e-05,
"loss": 1.1185,
"step": 125
},
{
"epoch": 0.03,
"learning_rate": 2.9816513761467892e-05,
"loss": 1.1142,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 3.096330275229358e-05,
"loss": 1.1013,
"step": 135
},
{
"epoch": 0.03,
"learning_rate": 3.211009174311927e-05,
"loss": 1.0514,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 3.325688073394496e-05,
"loss": 1.0727,
"step": 145
},
{
"epoch": 0.03,
"learning_rate": 3.4403669724770645e-05,
"loss": 1.1009,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 3.555045871559633e-05,
"loss": 1.0711,
"step": 155
},
{
"epoch": 0.04,
"learning_rate": 3.669724770642202e-05,
"loss": 1.0761,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 3.784403669724771e-05,
"loss": 1.0391,
"step": 165
},
{
"epoch": 0.04,
"learning_rate": 3.89908256880734e-05,
"loss": 1.1219,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 4.013761467889908e-05,
"loss": 1.0719,
"step": 175
},
{
"epoch": 0.04,
"learning_rate": 4.1284403669724776e-05,
"loss": 1.0562,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 4.2431192660550464e-05,
"loss": 1.05,
"step": 185
},
{
"epoch": 0.04,
"learning_rate": 4.3577981651376146e-05,
"loss": 1.0729,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 4.4724770642201834e-05,
"loss": 1.0563,
"step": 195
},
{
"epoch": 0.05,
"learning_rate": 4.587155963302753e-05,
"loss": 1.0261,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 4.701834862385321e-05,
"loss": 1.0511,
"step": 205
},
{
"epoch": 0.05,
"learning_rate": 4.81651376146789e-05,
"loss": 1.0172,
"step": 210
},
{
"epoch": 0.05,
"learning_rate": 4.931192660550459e-05,
"loss": 1.0347,
"step": 215
},
{
"epoch": 0.05,
"learning_rate": 5.0458715596330276e-05,
"loss": 1.0125,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 5.1605504587155965e-05,
"loss": 1.0627,
"step": 225
},
{
"epoch": 0.05,
"learning_rate": 5.2752293577981646e-05,
"loss": 1.0431,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 5.389908256880735e-05,
"loss": 1.0538,
"step": 235
},
{
"epoch": 0.06,
"learning_rate": 5.504587155963303e-05,
"loss": 1.0462,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 5.619266055045872e-05,
"loss": 0.9942,
"step": 245
},
{
"epoch": 0.06,
"learning_rate": 5.733944954128441e-05,
"loss": 1.0193,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 5.8486238532110095e-05,
"loss": 1.0587,
"step": 255
},
{
"epoch": 0.06,
"learning_rate": 5.9633027522935784e-05,
"loss": 1.0127,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 6.0779816513761465e-05,
"loss": 0.9959,
"step": 265
},
{
"epoch": 0.06,
"learning_rate": 6.192660550458716e-05,
"loss": 1.0016,
"step": 270
},
{
"epoch": 0.06,
"learning_rate": 6.307339449541286e-05,
"loss": 0.9981,
"step": 275
},
{
"epoch": 0.06,
"learning_rate": 6.422018348623854e-05,
"loss": 1.0489,
"step": 280
},
{
"epoch": 0.07,
"learning_rate": 6.536697247706422e-05,
"loss": 1.0379,
"step": 285
},
{
"epoch": 0.07,
"learning_rate": 6.651376146788991e-05,
"loss": 1.0224,
"step": 290
},
{
"epoch": 0.07,
"learning_rate": 6.76605504587156e-05,
"loss": 0.9877,
"step": 295
},
{
"epoch": 0.07,
"learning_rate": 6.880733944954129e-05,
"loss": 0.9851,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 6.995412844036697e-05,
"loss": 0.9941,
"step": 305
},
{
"epoch": 0.07,
"learning_rate": 7.110091743119265e-05,
"loss": 0.9819,
"step": 310
},
{
"epoch": 0.07,
"learning_rate": 7.224770642201836e-05,
"loss": 1.001,
"step": 315
},
{
"epoch": 0.07,
"learning_rate": 7.339449541284404e-05,
"loss": 1.0097,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 7.454128440366973e-05,
"loss": 0.9966,
"step": 325
},
{
"epoch": 0.08,
"learning_rate": 7.568807339449542e-05,
"loss": 1.0303,
"step": 330
},
{
"epoch": 0.08,
"learning_rate": 7.68348623853211e-05,
"loss": 1.0198,
"step": 335
},
{
"epoch": 0.08,
"learning_rate": 7.79816513761468e-05,
"loss": 1.0267,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 7.912844036697248e-05,
"loss": 0.9932,
"step": 345
},
{
"epoch": 0.08,
"learning_rate": 8.027522935779816e-05,
"loss": 1.03,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 8.142201834862386e-05,
"loss": 0.9871,
"step": 355
},
{
"epoch": 0.08,
"learning_rate": 8.256880733944955e-05,
"loss": 0.9775,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 8.371559633027523e-05,
"loss": 1.0259,
"step": 365
},
{
"epoch": 0.08,
"learning_rate": 8.486238532110093e-05,
"loss": 1.0187,
"step": 370
},
{
"epoch": 0.09,
"learning_rate": 8.600917431192661e-05,
"loss": 0.9691,
"step": 375
},
{
"epoch": 0.09,
"learning_rate": 8.715596330275229e-05,
"loss": 1.0352,
"step": 380
},
{
"epoch": 0.09,
"learning_rate": 8.830275229357799e-05,
"loss": 1.0247,
"step": 385
},
{
"epoch": 0.09,
"learning_rate": 8.944954128440367e-05,
"loss": 1.0261,
"step": 390
},
{
"epoch": 0.09,
"learning_rate": 9.059633027522936e-05,
"loss": 0.9829,
"step": 395
},
{
"epoch": 0.09,
"learning_rate": 9.174311926605506e-05,
"loss": 1.0048,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 9.288990825688074e-05,
"loss": 0.9894,
"step": 405
},
{
"epoch": 0.09,
"learning_rate": 9.403669724770642e-05,
"loss": 0.9692,
"step": 410
},
{
"epoch": 0.1,
"learning_rate": 9.518348623853212e-05,
"loss": 0.9943,
"step": 415
},
{
"epoch": 0.1,
"learning_rate": 9.63302752293578e-05,
"loss": 0.9668,
"step": 420
},
{
"epoch": 0.1,
"learning_rate": 9.74770642201835e-05,
"loss": 0.9863,
"step": 425
},
{
"epoch": 0.1,
"learning_rate": 9.862385321100918e-05,
"loss": 1.0023,
"step": 430
},
{
"epoch": 0.1,
"learning_rate": 9.977064220183486e-05,
"loss": 1.0009,
"step": 435
},
{
"epoch": 0.1,
"learning_rate": 0.00010091743119266055,
"loss": 0.9834,
"step": 440
},
{
"epoch": 0.1,
"learning_rate": 0.00010206422018348625,
"loss": 1.0098,
"step": 445
},
{
"epoch": 0.1,
"learning_rate": 0.00010321100917431193,
"loss": 1.0153,
"step": 450
},
{
"epoch": 0.1,
"learning_rate": 0.00010435779816513762,
"loss": 1.0075,
"step": 455
},
{
"epoch": 0.11,
"learning_rate": 0.00010550458715596329,
"loss": 0.9688,
"step": 460
},
{
"epoch": 0.11,
"learning_rate": 0.000106651376146789,
"loss": 0.979,
"step": 465
},
{
"epoch": 0.11,
"learning_rate": 0.0001077981651376147,
"loss": 0.9917,
"step": 470
},
{
"epoch": 0.11,
"learning_rate": 0.00010894495412844036,
"loss": 0.9693,
"step": 475
},
{
"epoch": 0.11,
"learning_rate": 0.00011009174311926606,
"loss": 1.0148,
"step": 480
},
{
"epoch": 0.11,
"learning_rate": 0.00011123853211009174,
"loss": 0.9609,
"step": 485
},
{
"epoch": 0.11,
"learning_rate": 0.00011238532110091744,
"loss": 1.0058,
"step": 490
},
{
"epoch": 0.11,
"learning_rate": 0.00011353211009174312,
"loss": 1.0091,
"step": 495
},
{
"epoch": 0.11,
"learning_rate": 0.00011467889908256881,
"loss": 1.0326,
"step": 500
},
{
"epoch": 0.12,
"learning_rate": 0.00011582568807339451,
"loss": 1.0317,
"step": 505
},
{
"epoch": 0.12,
"learning_rate": 0.00011697247706422019,
"loss": 1.0165,
"step": 510
},
{
"epoch": 0.12,
"learning_rate": 0.00011811926605504589,
"loss": 0.9853,
"step": 515
},
{
"epoch": 0.12,
"learning_rate": 0.00011926605504587157,
"loss": 0.9794,
"step": 520
},
{
"epoch": 0.12,
"learning_rate": 0.00012041284403669726,
"loss": 1.0263,
"step": 525
},
{
"epoch": 0.12,
"learning_rate": 0.00012155963302752293,
"loss": 0.9859,
"step": 530
},
{
"epoch": 0.12,
"learning_rate": 0.00012270642201834864,
"loss": 1.0099,
"step": 535
},
{
"epoch": 0.12,
"learning_rate": 0.00012385321100917432,
"loss": 1.0113,
"step": 540
},
{
"epoch": 0.13,
"learning_rate": 0.000125,
"loss": 1.0108,
"step": 545
},
{
"epoch": 0.13,
"learning_rate": 0.0001261467889908257,
"loss": 1.019,
"step": 550
},
{
"epoch": 0.13,
"learning_rate": 0.0001272935779816514,
"loss": 1.0102,
"step": 555
},
{
"epoch": 0.13,
"learning_rate": 0.00012844036697247707,
"loss": 0.9832,
"step": 560
},
{
"epoch": 0.13,
"learning_rate": 0.00012958715596330276,
"loss": 0.9758,
"step": 565
},
{
"epoch": 0.13,
"learning_rate": 0.00013073394495412844,
"loss": 0.9706,
"step": 570
},
{
"epoch": 0.13,
"learning_rate": 0.00013188073394495412,
"loss": 0.9925,
"step": 575
},
{
"epoch": 0.13,
"learning_rate": 0.00013302752293577983,
"loss": 0.9824,
"step": 580
},
{
"epoch": 0.13,
"learning_rate": 0.0001341743119266055,
"loss": 0.9866,
"step": 585
},
{
"epoch": 0.14,
"learning_rate": 0.0001353211009174312,
"loss": 0.9674,
"step": 590
},
{
"epoch": 0.14,
"learning_rate": 0.0001364678899082569,
"loss": 0.9735,
"step": 595
},
{
"epoch": 0.14,
"learning_rate": 0.00013761467889908258,
"loss": 0.9868,
"step": 600
},
{
"epoch": 0.14,
"learning_rate": 0.00013876146788990826,
"loss": 0.9869,
"step": 605
},
{
"epoch": 0.14,
"learning_rate": 0.00013990825688073395,
"loss": 0.9869,
"step": 610
},
{
"epoch": 0.14,
"learning_rate": 0.00014105504587155965,
"loss": 0.9901,
"step": 615
},
{
"epoch": 0.14,
"learning_rate": 0.0001422018348623853,
"loss": 1.0292,
"step": 620
},
{
"epoch": 0.14,
"learning_rate": 0.00014334862385321102,
"loss": 0.9755,
"step": 625
},
{
"epoch": 0.14,
"learning_rate": 0.00014449541284403673,
"loss": 0.9811,
"step": 630
},
{
"epoch": 0.15,
"learning_rate": 0.00014564220183486238,
"loss": 1.01,
"step": 635
},
{
"epoch": 0.15,
"learning_rate": 0.0001467889908256881,
"loss": 0.9728,
"step": 640
},
{
"epoch": 0.15,
"learning_rate": 0.00014793577981651377,
"loss": 0.9966,
"step": 645
},
{
"epoch": 0.15,
"learning_rate": 0.00014908256880733945,
"loss": 1.0176,
"step": 650
},
{
"epoch": 0.15,
"learning_rate": 0.00015022935779816513,
"loss": 0.9708,
"step": 655
},
{
"epoch": 0.15,
"learning_rate": 0.00015137614678899084,
"loss": 0.9673,
"step": 660
},
{
"epoch": 0.15,
"learning_rate": 0.00015252293577981652,
"loss": 0.9735,
"step": 665
},
{
"epoch": 0.15,
"learning_rate": 0.0001536697247706422,
"loss": 0.9707,
"step": 670
},
{
"epoch": 0.15,
"learning_rate": 0.00015481651376146791,
"loss": 0.9657,
"step": 675
},
{
"epoch": 0.16,
"learning_rate": 0.0001559633027522936,
"loss": 0.9753,
"step": 680
},
{
"epoch": 0.16,
"learning_rate": 0.00015711009174311928,
"loss": 1.0284,
"step": 685
},
{
"epoch": 0.16,
"learning_rate": 0.00015825688073394496,
"loss": 0.9755,
"step": 690
},
{
"epoch": 0.16,
"learning_rate": 0.00015940366972477064,
"loss": 0.978,
"step": 695
},
{
"epoch": 0.16,
"learning_rate": 0.00016055045871559632,
"loss": 0.9792,
"step": 700
},
{
"epoch": 0.16,
"learning_rate": 0.00016169724770642203,
"loss": 0.9706,
"step": 705
},
{
"epoch": 0.16,
"learning_rate": 0.0001628440366972477,
"loss": 0.9856,
"step": 710
},
{
"epoch": 0.16,
"learning_rate": 0.0001639908256880734,
"loss": 0.9805,
"step": 715
},
{
"epoch": 0.17,
"learning_rate": 0.0001651376146788991,
"loss": 0.9914,
"step": 720
},
{
"epoch": 0.17,
"learning_rate": 0.00016628440366972479,
"loss": 0.9314,
"step": 725
},
{
"epoch": 0.17,
"learning_rate": 0.00016743119266055047,
"loss": 0.9705,
"step": 730
},
{
"epoch": 0.17,
"learning_rate": 0.00016857798165137615,
"loss": 0.9763,
"step": 735
},
{
"epoch": 0.17,
"learning_rate": 0.00016972477064220186,
"loss": 1.0024,
"step": 740
},
{
"epoch": 0.17,
"learning_rate": 0.0001708715596330275,
"loss": 0.9623,
"step": 745
},
{
"epoch": 0.17,
"learning_rate": 0.00017201834862385322,
"loss": 0.996,
"step": 750
},
{
"epoch": 0.17,
"learning_rate": 0.00017316513761467893,
"loss": 0.9984,
"step": 755
},
{
"epoch": 0.17,
"learning_rate": 0.00017431192660550458,
"loss": 0.9738,
"step": 760
},
{
"epoch": 0.18,
"learning_rate": 0.0001754587155963303,
"loss": 0.9751,
"step": 765
},
{
"epoch": 0.18,
"learning_rate": 0.00017660550458715597,
"loss": 0.9783,
"step": 770
},
{
"epoch": 0.18,
"learning_rate": 0.00017775229357798166,
"loss": 1.0108,
"step": 775
},
{
"epoch": 0.18,
"learning_rate": 0.00017889908256880734,
"loss": 0.9498,
"step": 780
},
{
"epoch": 0.18,
"learning_rate": 0.00018004587155963305,
"loss": 0.9716,
"step": 785
},
{
"epoch": 0.18,
"learning_rate": 0.00018119266055045873,
"loss": 1.0429,
"step": 790
},
{
"epoch": 0.18,
"learning_rate": 0.0001823394495412844,
"loss": 1.0272,
"step": 795
},
{
"epoch": 0.18,
"learning_rate": 0.00018348623853211012,
"loss": 0.9578,
"step": 800
},
{
"epoch": 0.18,
"learning_rate": 0.0001846330275229358,
"loss": 0.9731,
"step": 805
},
{
"epoch": 0.19,
"learning_rate": 0.00018577981651376148,
"loss": 0.9592,
"step": 810
},
{
"epoch": 0.19,
"learning_rate": 0.00018692660550458716,
"loss": 0.9514,
"step": 815
},
{
"epoch": 0.19,
"learning_rate": 0.00018807339449541284,
"loss": 0.955,
"step": 820
},
{
"epoch": 0.19,
"learning_rate": 0.00018922018348623853,
"loss": 0.9706,
"step": 825
},
{
"epoch": 0.19,
"learning_rate": 0.00019036697247706424,
"loss": 0.975,
"step": 830
},
{
"epoch": 0.19,
"learning_rate": 0.00019151376146788992,
"loss": 0.9891,
"step": 835
},
{
"epoch": 0.19,
"learning_rate": 0.0001926605504587156,
"loss": 1.0004,
"step": 840
},
{
"epoch": 0.19,
"learning_rate": 0.0001938073394495413,
"loss": 0.9773,
"step": 845
},
{
"epoch": 0.2,
"learning_rate": 0.000194954128440367,
"loss": 0.9578,
"step": 850
},
{
"epoch": 0.2,
"learning_rate": 0.00019610091743119267,
"loss": 0.9549,
"step": 855
},
{
"epoch": 0.2,
"learning_rate": 0.00019724770642201835,
"loss": 0.9628,
"step": 860
},
{
"epoch": 0.2,
"learning_rate": 0.00019839449541284406,
"loss": 0.9863,
"step": 865
},
{
"epoch": 0.2,
"learning_rate": 0.00019954128440366972,
"loss": 0.982,
"step": 870
},
{
"epoch": 0.2,
"learning_rate": 0.00019999992777982591,
"loss": 1.0301,
"step": 875
},
{
"epoch": 0.2,
"learning_rate": 0.00019999948643469536,
"loss": 0.9882,
"step": 880
},
{
"epoch": 0.2,
"learning_rate": 0.00019999864386852187,
"loss": 0.9953,
"step": 885
},
{
"epoch": 0.2,
"learning_rate": 0.00019999740008468594,
"loss": 0.9656,
"step": 890
},
{
"epoch": 0.21,
"learning_rate": 0.00019999575508817795,
"loss": 1.0045,
"step": 895
},
{
"epoch": 0.21,
"learning_rate": 0.00019999370888559804,
"loss": 0.9876,
"step": 900
},
{
"epoch": 0.21,
"learning_rate": 0.00019999126148515601,
"loss": 0.9823,
"step": 905
},
{
"epoch": 0.21,
"learning_rate": 0.0001999884128966714,
"loss": 0.9675,
"step": 910
},
{
"epoch": 0.21,
"learning_rate": 0.0001999851631315734,
"loss": 0.9774,
"step": 915
},
{
"epoch": 0.21,
"learning_rate": 0.00019998151220290082,
"loss": 0.9583,
"step": 920
},
{
"epoch": 0.21,
"learning_rate": 0.00019997746012530206,
"loss": 0.9841,
"step": 925
},
{
"epoch": 0.21,
"learning_rate": 0.00019997300691503497,
"loss": 0.9405,
"step": 930
},
{
"epoch": 0.21,
"learning_rate": 0.00019996815258996686,
"loss": 0.9624,
"step": 935
},
{
"epoch": 0.22,
"learning_rate": 0.0001999628971695744,
"loss": 0.9885,
"step": 940
},
{
"epoch": 0.22,
"learning_rate": 0.00019995724067494353,
"loss": 1.0082,
"step": 945
},
{
"epoch": 0.22,
"learning_rate": 0.00019995118312876944,
"loss": 0.959,
"step": 950
},
{
"epoch": 0.22,
"learning_rate": 0.0001999447245553564,
"loss": 1.0017,
"step": 955
},
{
"epoch": 0.22,
"learning_rate": 0.0001999378649806177,
"loss": 0.9921,
"step": 960
},
{
"epoch": 0.22,
"learning_rate": 0.00019993060443207554,
"loss": 1.0108,
"step": 965
},
{
"epoch": 0.22,
"learning_rate": 0.00019992294293886095,
"loss": 0.9921,
"step": 970
},
{
"epoch": 0.22,
"learning_rate": 0.00019991488053171353,
"loss": 0.9854,
"step": 975
},
{
"epoch": 0.22,
"learning_rate": 0.00019990641724298156,
"loss": 0.9967,
"step": 980
},
{
"epoch": 0.23,
"learning_rate": 0.00019989755310662174,
"loss": 0.9682,
"step": 985
},
{
"epoch": 0.23,
"learning_rate": 0.000199888288158199,
"loss": 0.9793,
"step": 990
},
{
"epoch": 0.23,
"learning_rate": 0.00019987862243488647,
"loss": 0.963,
"step": 995
},
{
"epoch": 0.23,
"learning_rate": 0.00019986855597546526,
"loss": 0.9852,
"step": 1000
},
{
"epoch": 0.23,
"learning_rate": 0.0001998580888203243,
"loss": 0.9586,
"step": 1005
},
{
"epoch": 0.23,
"learning_rate": 0.00019984722101146029,
"loss": 0.9769,
"step": 1010
},
{
"epoch": 0.23,
"learning_rate": 0.00019983595259247735,
"loss": 0.9582,
"step": 1015
},
{
"epoch": 0.23,
"learning_rate": 0.000199824283608587,
"loss": 0.9767,
"step": 1020
},
{
"epoch": 0.24,
"learning_rate": 0.00019981221410660788,
"loss": 0.9681,
"step": 1025
},
{
"epoch": 0.24,
"learning_rate": 0.00019979974413496566,
"loss": 0.9942,
"step": 1030
},
{
"epoch": 0.24,
"learning_rate": 0.00019978687374369271,
"loss": 0.982,
"step": 1035
},
{
"epoch": 0.24,
"learning_rate": 0.00019977360298442803,
"loss": 0.9758,
"step": 1040
},
{
"epoch": 0.24,
"learning_rate": 0.00019975993191041695,
"loss": 1.0581,
"step": 1045
},
{
"epoch": 0.24,
"learning_rate": 0.00019974586057651102,
"loss": 0.9499,
"step": 1050
},
{
"epoch": 0.24,
"learning_rate": 0.0001997313890391676,
"loss": 0.9758,
"step": 1055
},
{
"epoch": 0.24,
"learning_rate": 0.00019971651735644995,
"loss": 0.9742,
"step": 1060
},
{
"epoch": 0.24,
"learning_rate": 0.00019970124558802663,
"loss": 0.9327,
"step": 1065
},
{
"epoch": 0.25,
"learning_rate": 0.00019968557379517152,
"loss": 0.9624,
"step": 1070
},
{
"epoch": 0.25,
"learning_rate": 0.00019966950204076346,
"loss": 0.9812,
"step": 1075
},
{
"epoch": 0.25,
"learning_rate": 0.00019965303038928608,
"loss": 0.9728,
"step": 1080
},
{
"epoch": 0.25,
"learning_rate": 0.0001996361589068274,
"loss": 0.9561,
"step": 1085
},
{
"epoch": 0.25,
"learning_rate": 0.00019961888766107972,
"loss": 0.9741,
"step": 1090
},
{
"epoch": 0.25,
"learning_rate": 0.0001996012167213393,
"loss": 1.0002,
"step": 1095
},
{
"epoch": 0.25,
"learning_rate": 0.00019958314615850598,
"loss": 0.955,
"step": 1100
},
{
"epoch": 0.25,
"learning_rate": 0.00019956467604508312,
"loss": 0.9826,
"step": 1105
},
{
"epoch": 0.25,
"learning_rate": 0.00019954580645517697,
"loss": 0.9633,
"step": 1110
},
{
"epoch": 0.26,
"learning_rate": 0.00019952653746449676,
"loss": 0.9641,
"step": 1115
},
{
"epoch": 0.26,
"learning_rate": 0.0001995068691503541,
"loss": 0.9907,
"step": 1120
},
{
"epoch": 0.26,
"learning_rate": 0.00019948680159166283,
"loss": 0.9567,
"step": 1125
},
{
"epoch": 0.26,
"learning_rate": 0.00019946633486893865,
"loss": 0.9908,
"step": 1130
},
{
"epoch": 0.26,
"learning_rate": 0.00019944546906429874,
"loss": 0.9769,
"step": 1135
},
{
"epoch": 0.26,
"learning_rate": 0.00019942420426146153,
"loss": 1.0025,
"step": 1140
},
{
"epoch": 0.26,
"learning_rate": 0.00019940254054574637,
"loss": 0.9797,
"step": 1145
},
{
"epoch": 0.26,
"learning_rate": 0.00019938047800407302,
"loss": 0.9944,
"step": 1150
},
{
"epoch": 0.27,
"learning_rate": 0.00019935801672496157,
"loss": 0.9826,
"step": 1155
},
{
"epoch": 0.27,
"learning_rate": 0.00019933515679853182,
"loss": 0.9898,
"step": 1160
},
{
"epoch": 0.27,
"learning_rate": 0.00019931189831650303,
"loss": 0.9531,
"step": 1165
},
{
"epoch": 0.27,
"learning_rate": 0.0001992882413721937,
"loss": 0.9755,
"step": 1170
},
{
"epoch": 0.27,
"learning_rate": 0.00019926418606052088,
"loss": 0.9976,
"step": 1175
},
{
"epoch": 0.27,
"learning_rate": 0.0001992397324780001,
"loss": 0.9403,
"step": 1180
},
{
"epoch": 0.27,
"learning_rate": 0.00019921488072274475,
"loss": 0.9594,
"step": 1185
},
{
"epoch": 0.27,
"learning_rate": 0.00019918963089446577,
"loss": 0.951,
"step": 1190
},
{
"epoch": 0.27,
"learning_rate": 0.0001991639830944714,
"loss": 0.9648,
"step": 1195
},
{
"epoch": 0.28,
"learning_rate": 0.00019913793742566647,
"loss": 0.9617,
"step": 1200
},
{
"epoch": 0.28,
"learning_rate": 0.00019911149399255224,
"loss": 0.9618,
"step": 1205
},
{
"epoch": 0.28,
"learning_rate": 0.00019908465290122585,
"loss": 0.9366,
"step": 1210
},
{
"epoch": 0.28,
"learning_rate": 0.00019905741425938002,
"loss": 0.9376,
"step": 1215
},
{
"epoch": 0.28,
"learning_rate": 0.00019902977817630243,
"loss": 1.0024,
"step": 1220
},
{
"epoch": 0.28,
"learning_rate": 0.0001990017447628754,
"loss": 0.938,
"step": 1225
},
{
"epoch": 0.28,
"learning_rate": 0.00019897331413157548,
"loss": 0.9832,
"step": 1230
},
{
"epoch": 0.28,
"learning_rate": 0.00019894448639647304,
"loss": 0.9587,
"step": 1235
},
{
"epoch": 0.28,
"learning_rate": 0.00019891526167323145,
"loss": 0.9529,
"step": 1240
},
{
"epoch": 0.29,
"learning_rate": 0.00019888564007910714,
"loss": 0.9418,
"step": 1245
},
{
"epoch": 0.29,
"learning_rate": 0.0001988556217329488,
"loss": 0.9461,
"step": 1250
},
{
"epoch": 0.29,
"learning_rate": 0.0001988252067551969,
"loss": 1.0006,
"step": 1255
},
{
"epoch": 0.29,
"learning_rate": 0.00019879439526788341,
"loss": 0.9565,
"step": 1260
},
{
"epoch": 0.29,
"learning_rate": 0.00019876318739463114,
"loss": 0.9365,
"step": 1265
},
{
"epoch": 0.29,
"learning_rate": 0.00019873158326065327,
"loss": 0.9786,
"step": 1270
},
{
"epoch": 0.29,
"learning_rate": 0.00019869958299275289,
"loss": 1.0009,
"step": 1275
},
{
"epoch": 0.29,
"learning_rate": 0.00019866718671932249,
"loss": 0.9631,
"step": 1280
},
{
"epoch": 0.29,
"learning_rate": 0.00019863439457034333,
"loss": 0.9444,
"step": 1285
},
{
"epoch": 0.3,
"learning_rate": 0.00019860120667738516,
"loss": 0.955,
"step": 1290
},
{
"epoch": 0.3,
"learning_rate": 0.00019856762317360546,
"loss": 0.9722,
"step": 1295
},
{
"epoch": 0.3,
"learning_rate": 0.00019853364419374902,
"loss": 0.9711,
"step": 1300
},
{
"epoch": 0.3,
"learning_rate": 0.00019849926987414732,
"loss": 0.9807,
"step": 1305
},
{
"epoch": 0.3,
"learning_rate": 0.00019846450035271808,
"loss": 0.9659,
"step": 1310
},
{
"epoch": 0.3,
"learning_rate": 0.0001984293357689647,
"loss": 0.9607,
"step": 1315
},
{
"epoch": 0.3,
"learning_rate": 0.00019839377626397554,
"loss": 1.013,
"step": 1320
},
{
"epoch": 0.3,
"learning_rate": 0.00019835782198042364,
"loss": 0.9871,
"step": 1325
},
{
"epoch": 0.31,
"learning_rate": 0.00019832147306256576,
"loss": 0.9709,
"step": 1330
},
{
"epoch": 0.31,
"learning_rate": 0.0001982847296562423,
"loss": 0.9503,
"step": 1335
},
{
"epoch": 0.31,
"learning_rate": 0.00019824759190887622,
"loss": 0.9515,
"step": 1340
},
{
"epoch": 0.31,
"learning_rate": 0.00019821005996947273,
"loss": 0.9858,
"step": 1345
},
{
"epoch": 0.31,
"learning_rate": 0.00019817213398861866,
"loss": 0.9564,
"step": 1350
},
{
"epoch": 0.31,
"learning_rate": 0.00019813381411848185,
"loss": 0.9755,
"step": 1355
},
{
"epoch": 0.31,
"learning_rate": 0.0001980951005128104,
"loss": 0.975,
"step": 1360
},
{
"epoch": 0.31,
"learning_rate": 0.0001980559933269323,
"loss": 0.9844,
"step": 1365
},
{
"epoch": 0.31,
"learning_rate": 0.00019801649271775459,
"loss": 0.9649,
"step": 1370
},
{
"epoch": 0.32,
"learning_rate": 0.00019797659884376282,
"loss": 0.9912,
"step": 1375
},
{
"epoch": 0.32,
"learning_rate": 0.00019793631186502047,
"loss": 0.9805,
"step": 1380
},
{
"epoch": 0.32,
"learning_rate": 0.00019789563194316817,
"loss": 0.9657,
"step": 1385
},
{
"epoch": 0.32,
"learning_rate": 0.00019785455924142318,
"loss": 0.9654,
"step": 1390
},
{
"epoch": 0.32,
"learning_rate": 0.00019781309392457867,
"loss": 0.9533,
"step": 1395
},
{
"epoch": 0.32,
"learning_rate": 0.0001977712361590031,
"loss": 0.9811,
"step": 1400
},
{
"epoch": 0.32,
"learning_rate": 0.00019772898611263942,
"loss": 0.9532,
"step": 1405
},
{
"epoch": 0.32,
"learning_rate": 0.00019768634395500465,
"loss": 0.9574,
"step": 1410
},
{
"epoch": 0.32,
"learning_rate": 0.00019764330985718896,
"loss": 0.9648,
"step": 1415
},
{
"epoch": 0.33,
"learning_rate": 0.00019759988399185505,
"loss": 1.0009,
"step": 1420
},
{
"epoch": 0.33,
"learning_rate": 0.0001975560665332376,
"loss": 0.981,
"step": 1425
},
{
"epoch": 0.33,
"learning_rate": 0.00019751185765714234,
"loss": 0.9837,
"step": 1430
},
{
"epoch": 0.33,
"learning_rate": 0.00019746725754094544,
"loss": 0.9704,
"step": 1435
},
{
"epoch": 0.33,
"learning_rate": 0.00019742226636359296,
"loss": 0.9908,
"step": 1440
},
{
"epoch": 0.33,
"learning_rate": 0.00019737688430559986,
"loss": 0.9459,
"step": 1445
},
{
"epoch": 0.33,
"learning_rate": 0.00019733111154904943,
"loss": 0.925,
"step": 1450
},
{
"epoch": 0.33,
"learning_rate": 0.00019728494827759256,
"loss": 0.9632,
"step": 1455
},
{
"epoch": 0.34,
"learning_rate": 0.00019723839467644699,
"loss": 0.9579,
"step": 1460
},
{
"epoch": 0.34,
"learning_rate": 0.00019719145093239645,
"loss": 0.9994,
"step": 1465
},
{
"epoch": 0.34,
"learning_rate": 0.00019714411723379015,
"loss": 0.9736,
"step": 1470
},
{
"epoch": 0.34,
"learning_rate": 0.00019709639377054177,
"loss": 0.9684,
"step": 1475
},
{
"epoch": 0.34,
"learning_rate": 0.0001970482807341289,
"loss": 0.9459,
"step": 1480
},
{
"epoch": 0.34,
"learning_rate": 0.00019699977831759218,
"loss": 0.9807,
"step": 1485
},
{
"epoch": 0.34,
"learning_rate": 0.0001969508867155345,
"loss": 0.9577,
"step": 1490
},
{
"epoch": 0.34,
"learning_rate": 0.00019690160612412022,
"loss": 0.979,
"step": 1495
},
{
"epoch": 0.34,
"learning_rate": 0.00019685193674107452,
"loss": 0.9722,
"step": 1500
},
{
"epoch": 0.35,
"learning_rate": 0.0001968018787656824,
"loss": 0.9831,
"step": 1505
},
{
"epoch": 0.35,
"learning_rate": 0.00019675143239878805,
"loss": 0.9767,
"step": 1510
},
{
"epoch": 0.35,
"learning_rate": 0.00019670059784279394,
"loss": 0.9814,
"step": 1515
},
{
"epoch": 0.35,
"learning_rate": 0.00019664937530166002,
"loss": 1.0078,
"step": 1520
},
{
"epoch": 0.35,
"learning_rate": 0.000196597764980903,
"loss": 1.2574,
"step": 1525
},
{
"epoch": 0.35,
"learning_rate": 0.00019654576708759538,
"loss": 1.0538,
"step": 1530
},
{
"epoch": 0.35,
"learning_rate": 0.00019649338183036475,
"loss": 1.0472,
"step": 1535
},
{
"epoch": 0.35,
"learning_rate": 0.00019644060941939286,
"loss": 5.8167,
"step": 1540
},
{
"epoch": 0.35,
"learning_rate": 0.0001963874500664148,
"loss": 8.9193,
"step": 1545
},
{
"epoch": 0.36,
"learning_rate": 0.00019633390398471817,
"loss": 9.1751,
"step": 1550
},
{
"epoch": 0.36,
"learning_rate": 0.00019627997138914224,
"loss": 7.9315,
"step": 1555
},
{
"epoch": 0.36,
"learning_rate": 0.00019622565249607704,
"loss": 7.6649,
"step": 1560
},
{
"epoch": 0.36,
"learning_rate": 0.00019617094752346254,
"loss": 7.5712,
"step": 1565
},
{
"epoch": 0.36,
"learning_rate": 0.0001961158566907877,
"loss": 7.5273,
"step": 1570
},
{
"epoch": 0.36,
"learning_rate": 0.00019606038021908963,
"loss": 7.5129,
"step": 1575
},
{
"epoch": 0.36,
"learning_rate": 0.00019600451833095287,
"loss": 7.4763,
"step": 1580
},
{
"epoch": 0.36,
"learning_rate": 0.00019594827125050807,
"loss": 7.4253,
"step": 1585
},
{
"epoch": 0.36,
"learning_rate": 0.00019589163920343163,
"loss": 7.3907,
"step": 1590
},
{
"epoch": 0.37,
"learning_rate": 0.00019583462241694436,
"loss": 7.3799,
"step": 1595
},
{
"epoch": 0.37,
"learning_rate": 0.00019577722111981078,
"loss": 7.349,
"step": 1600
},
{
"epoch": 0.37,
"learning_rate": 0.00019571943554233819,
"loss": 7.3447,
"step": 1605
},
{
"epoch": 0.37,
"learning_rate": 0.00019566126591637558,
"loss": 7.2859,
"step": 1610
},
{
"epoch": 0.37,
"learning_rate": 0.00019560271247531308,
"loss": 7.309,
"step": 1615
},
{
"epoch": 0.37,
"learning_rate": 0.0001955437754540805,
"loss": 7.3254,
"step": 1620
},
{
"epoch": 0.37,
"learning_rate": 0.00019548445508914681,
"loss": 7.2993,
"step": 1625
},
{
"epoch": 0.37,
"learning_rate": 0.00019542475161851906,
"loss": 7.3311,
"step": 1630
},
{
"epoch": 0.38,
"learning_rate": 0.00019536466528174133,
"loss": 7.3128,
"step": 1635
},
{
"epoch": 0.38,
"learning_rate": 0.00019530419631989392,
"loss": 7.316,
"step": 1640
},
{
"epoch": 0.38,
"learning_rate": 0.00019524334497559226,
"loss": 7.2758,
"step": 1645
},
{
"epoch": 0.38,
"learning_rate": 0.00019518211149298595,
"loss": 7.3082,
"step": 1650
},
{
"epoch": 0.38,
"learning_rate": 0.00019512049611775795,
"loss": 7.2448,
"step": 1655
},
{
"epoch": 0.38,
"learning_rate": 0.00019505849909712332,
"loss": 7.2816,
"step": 1660
},
{
"epoch": 0.38,
"learning_rate": 0.00019499612067982848,
"loss": 7.2852,
"step": 1665
},
{
"epoch": 0.38,
"learning_rate": 0.00019493336111615003,
"loss": 7.2445,
"step": 1670
},
{
"epoch": 0.38,
"learning_rate": 0.00019487022065789385,
"loss": 7.2607,
"step": 1675
},
{
"epoch": 0.39,
"learning_rate": 0.00019480669955839402,
"loss": 7.2453,
"step": 1680
},
{
"epoch": 0.39,
"learning_rate": 0.00019474279807251186,
"loss": 7.2839,
"step": 1685
},
{
"epoch": 0.39,
"learning_rate": 0.00019467851645663494,
"loss": 7.2569,
"step": 1690
},
{
"epoch": 0.39,
"learning_rate": 0.00019461385496867594,
"loss": 7.2633,
"step": 1695
},
{
"epoch": 0.39,
"learning_rate": 0.00019454881386807163,
"loss": 7.2762,
"step": 1700
},
{
"epoch": 0.39,
"learning_rate": 0.000194483393415782,
"loss": 7.2273,
"step": 1705
},
{
"epoch": 0.39,
"learning_rate": 0.00019441759387428903,
"loss": 7.289,
"step": 1710
},
{
"epoch": 0.39,
"learning_rate": 0.0001943514155075956,
"loss": 7.2969,
"step": 1715
},
{
"epoch": 0.39,
"learning_rate": 0.00019428485858122472,
"loss": 7.2368,
"step": 1720
},
{
"epoch": 0.4,
"learning_rate": 0.00019421792336221807,
"loss": 7.2478,
"step": 1725
},
{
"epoch": 0.4,
"learning_rate": 0.00019415061011913523,
"loss": 7.2593,
"step": 1730
},
{
"epoch": 0.4,
"learning_rate": 0.00019408291912205246,
"loss": 7.2571,
"step": 1735
},
{
"epoch": 0.4,
"learning_rate": 0.00019401485064256176,
"loss": 7.2432,
"step": 1740
},
{
"epoch": 0.4,
"learning_rate": 0.00019394640495376954,
"loss": 7.2514,
"step": 1745
},
{
"epoch": 0.4,
"learning_rate": 0.0001938775823302957,
"loss": 7.2465,
"step": 1750
},
{
"epoch": 0.4,
"learning_rate": 0.00019380838304827256,
"loss": 7.2325,
"step": 1755
},
{
"epoch": 0.4,
"learning_rate": 0.00019373880738534358,
"loss": 7.2787,
"step": 1760
},
{
"epoch": 0.41,
"learning_rate": 0.0001936688556206624,
"loss": 7.2248,
"step": 1765
},
{
"epoch": 0.41,
"learning_rate": 0.00019359852803489168,
"loss": 7.2439,
"step": 1770
},
{
"epoch": 0.41,
"learning_rate": 0.00019352782491020195,
"loss": 7.2413,
"step": 1775
},
{
"epoch": 0.41,
"learning_rate": 0.0001934567465302704,
"loss": 7.2431,
"step": 1780
},
{
"epoch": 0.41,
"learning_rate": 0.00019338529318028,
"loss": 7.2493,
"step": 1785
},
{
"epoch": 0.41,
"learning_rate": 0.00019331346514691813,
"loss": 7.2592,
"step": 1790
},
{
"epoch": 0.41,
"learning_rate": 0.00019324126271837538,
"loss": 7.2195,
"step": 1795
},
{
"epoch": 0.41,
"learning_rate": 0.00019316868618434455,
"loss": 7.2303,
"step": 1800
},
{
"epoch": 0.41,
"learning_rate": 0.00019309573583601954,
"loss": 7.2496,
"step": 1805
},
{
"epoch": 0.42,
"learning_rate": 0.00019302241196609397,
"loss": 7.2521,
"step": 1810
},
{
"epoch": 0.42,
"learning_rate": 0.00019294871486876013,
"loss": 7.246,
"step": 1815
},
{
"epoch": 0.42,
"learning_rate": 0.0001928746448397078,
"loss": 7.2781,
"step": 1820
},
{
"epoch": 0.42,
"learning_rate": 0.0001928002021761231,
"loss": 7.2274,
"step": 1825
},
{
"epoch": 0.42,
"learning_rate": 0.00019272538717668715,
"loss": 7.2651,
"step": 1830
},
{
"epoch": 0.42,
"learning_rate": 0.000192650200141575,
"loss": 7.2463,
"step": 1835
},
{
"epoch": 0.42,
"learning_rate": 0.00019257464137245446,
"loss": 7.2502,
"step": 1840
},
{
"epoch": 0.42,
"learning_rate": 0.0001924987111724847,
"loss": 7.2209,
"step": 1845
},
{
"epoch": 0.42,
"learning_rate": 0.0001924224098463153,
"loss": 7.2259,
"step": 1850
},
{
"epoch": 0.43,
"learning_rate": 0.00019234573770008474,
"loss": 7.2525,
"step": 1855
},
{
"epoch": 0.43,
"learning_rate": 0.00019226869504141943,
"loss": 7.2342,
"step": 1860
},
{
"epoch": 0.43,
"learning_rate": 0.00019219128217943233,
"loss": 7.2226,
"step": 1865
},
{
"epoch": 0.43,
"learning_rate": 0.00019211349942472165,
"loss": 7.2734,
"step": 1870
},
{
"epoch": 0.43,
"learning_rate": 0.00019203534708936985,
"loss": 7.237,
"step": 1875
},
{
"epoch": 0.43,
"learning_rate": 0.00019195682548694208,
"loss": 7.2262,
"step": 1880
},
{
"epoch": 0.43,
"learning_rate": 0.00019187793493248524,
"loss": 7.2182,
"step": 1885
},
{
"epoch": 0.43,
"learning_rate": 0.00019179867574252638,
"loss": 7.2421,
"step": 1890
},
{
"epoch": 0.43,
"learning_rate": 0.00019171904823507178,
"loss": 7.2366,
"step": 1895
},
{
"epoch": 0.44,
"learning_rate": 0.00019163905272960528,
"loss": 7.221,
"step": 1900
},
{
"epoch": 0.44,
"learning_rate": 0.0001915586895470874,
"loss": 7.2467,
"step": 1905
},
{
"epoch": 0.44,
"learning_rate": 0.0001914779590099538,
"loss": 7.2481,
"step": 1910
},
{
"epoch": 0.44,
"learning_rate": 0.00019139686144211405,
"loss": 7.2351,
"step": 1915
},
{
"epoch": 0.44,
"learning_rate": 0.00019131539716895024,
"loss": 7.246,
"step": 1920
},
{
"epoch": 0.44,
"learning_rate": 0.00019123356651731593,
"loss": 7.2139,
"step": 1925
},
{
"epoch": 0.44,
"learning_rate": 0.00019115136981553464,
"loss": 7.2491,
"step": 1930
},
{
"epoch": 0.44,
"learning_rate": 0.00019106880739339844,
"loss": 7.334,
"step": 1935
},
{
"epoch": 0.45,
"learning_rate": 0.00019098587958216688,
"loss": 7.2713,
"step": 1940
},
{
"epoch": 0.45,
"learning_rate": 0.0001909025867145655,
"loss": 7.2806,
"step": 1945
},
{
"epoch": 0.45,
"learning_rate": 0.00019081892912478456,
"loss": 7.2481,
"step": 1950
},
{
"epoch": 0.45,
"learning_rate": 0.00019073490714847759,
"loss": 7.2035,
"step": 1955
},
{
"epoch": 0.45,
"learning_rate": 0.00019065052112276018,
"loss": 7.2353,
"step": 1960
},
{
"epoch": 0.45,
"learning_rate": 0.00019056577138620856,
"loss": 7.2377,
"step": 1965
},
{
"epoch": 0.45,
"learning_rate": 0.00019048065827885827,
"loss": 7.2368,
"step": 1970
},
{
"epoch": 0.45,
"learning_rate": 0.00019039518214220276,
"loss": 7.2461,
"step": 1975
},
{
"epoch": 0.45,
"learning_rate": 0.000190309343319192,
"loss": 7.2415,
"step": 1980
},
{
"epoch": 0.46,
"learning_rate": 0.00019022314215423123,
"loss": 7.2384,
"step": 1985
},
{
"epoch": 0.46,
"learning_rate": 0.00019013657899317942,
"loss": 7.2566,
"step": 1990
},
{
"epoch": 0.46,
"learning_rate": 0.00019004965418334798,
"loss": 7.2231,
"step": 1995
},
{
"epoch": 0.46,
"learning_rate": 0.0001899623680734993,
"loss": 7.2459,
"step": 2000
},
{
"epoch": 0.46,
"learning_rate": 0.00018987472101384545,
"loss": 7.214,
"step": 2005
},
{
"epoch": 0.46,
"learning_rate": 0.00018978671335604665,
"loss": 7.221,
"step": 2010
},
{
"epoch": 0.46,
"learning_rate": 0.00018969834545321,
"loss": 7.2021,
"step": 2015
},
{
"epoch": 0.46,
"learning_rate": 0.00018960961765988792,
"loss": 7.2066,
"step": 2020
},
{
"epoch": 0.46,
"learning_rate": 0.00018952053033207683,
"loss": 7.3269,
"step": 2025
},
{
"epoch": 0.47,
"learning_rate": 0.00018943108382721562,
"loss": 7.3757,
"step": 2030
},
{
"epoch": 0.47,
"learning_rate": 0.0001893412785041844,
"loss": 7.2544,
"step": 2035
},
{
"epoch": 0.47,
"learning_rate": 0.00018925111472330283,
"loss": 7.2194,
"step": 2040
},
{
"epoch": 0.47,
"learning_rate": 0.00018916059284632884,
"loss": 7.2536,
"step": 2045
},
{
"epoch": 0.47,
"learning_rate": 0.00018906971323645713,
"loss": 7.2394,
"step": 2050
},
{
"epoch": 0.47,
"learning_rate": 0.0001889784762583177,
"loss": 7.2531,
"step": 2055
},
{
"epoch": 0.47,
"learning_rate": 0.00018888688227797432,
"loss": 7.2228,
"step": 2060
},
{
"epoch": 0.47,
"learning_rate": 0.0001887949316629233,
"loss": 7.2619,
"step": 2065
},
{
"epoch": 0.48,
"learning_rate": 0.00018870262478209163,
"loss": 7.2391,
"step": 2070
},
{
"epoch": 0.48,
"learning_rate": 0.00018860996200583595,
"loss": 7.3725,
"step": 2075
},
{
"epoch": 0.48,
"learning_rate": 0.00018851694370594069,
"loss": 7.3694,
"step": 2080
},
{
"epoch": 0.48,
"learning_rate": 0.00018842357025561674,
"loss": 7.4133,
"step": 2085
},
{
"epoch": 0.48,
"learning_rate": 0.00018832984202949996,
"loss": 7.3363,
"step": 2090
},
{
"epoch": 0.48,
"learning_rate": 0.0001882357594036497,
"loss": 7.2878,
"step": 2095
},
{
"epoch": 0.48,
"learning_rate": 0.00018814132275554713,
"loss": 7.2645,
"step": 2100
},
{
"epoch": 0.48,
"learning_rate": 0.0001880465324640939,
"loss": 7.2928,
"step": 2105
},
{
"epoch": 0.48,
"learning_rate": 0.0001879513889096106,
"loss": 7.289,
"step": 2110
},
{
"epoch": 0.49,
"learning_rate": 0.00018785589247383513,
"loss": 7.2577,
"step": 2115
},
{
"epoch": 0.49,
"learning_rate": 0.00018776004353992124,
"loss": 7.2575,
"step": 2120
},
{
"epoch": 0.49,
"learning_rate": 0.00018766384249243703,
"loss": 7.2455,
"step": 2125
},
{
"epoch": 0.49,
"learning_rate": 0.00018756728971736327,
"loss": 7.1998,
"step": 2130
},
{
"epoch": 0.49,
"learning_rate": 0.00018747038560209208,
"loss": 7.1431,
"step": 2135
},
{
"epoch": 0.49,
"learning_rate": 0.00018737313053542512,
"loss": 7.1091,
"step": 2140
},
{
"epoch": 0.49,
"learning_rate": 0.00018727552490757217,
"loss": 7.1545,
"step": 2145
},
{
"epoch": 0.49,
"learning_rate": 0.0001871775691101496,
"loss": 7.1363,
"step": 2150
},
{
"epoch": 0.49,
"learning_rate": 0.00018707926353617865,
"loss": 7.0623,
"step": 2155
},
{
"epoch": 0.5,
"learning_rate": 0.00018698060858008403,
"loss": 7.0172,
"step": 2160
},
{
"epoch": 0.5,
"learning_rate": 0.00018688160463769222,
"loss": 7.0213,
"step": 2165
},
{
"epoch": 0.5,
"learning_rate": 0.00018678225210622986,
"loss": 6.9647,
"step": 2170
},
{
"epoch": 0.5,
"learning_rate": 0.00018668255138432237,
"loss": 6.9448,
"step": 2175
},
{
"epoch": 0.5,
"learning_rate": 0.00018658250287199196,
"loss": 6.9318,
"step": 2180
},
{
"epoch": 0.5,
"learning_rate": 0.0001864821069706564,
"loss": 6.9102,
"step": 2185
},
{
"epoch": 0.5,
"learning_rate": 0.00018638136408312728,
"loss": 6.9001,
"step": 2190
},
{
"epoch": 0.5,
"learning_rate": 0.00018628027461360834,
"loss": 6.8805,
"step": 2195
},
{
"epoch": 0.5,
"learning_rate": 0.0001861788389676939,
"loss": 6.8864,
"step": 2200
},
{
"epoch": 0.51,
"learning_rate": 0.00018607705755236716,
"loss": 6.8746,
"step": 2205
},
{
"epoch": 0.51,
"learning_rate": 0.00018597493077599867,
"loss": 6.8675,
"step": 2210
},
{
"epoch": 0.51,
"learning_rate": 0.0001858724590483447,
"loss": 6.8548,
"step": 2215
},
{
"epoch": 0.51,
"learning_rate": 0.00018576964278054544,
"loss": 6.8581,
"step": 2220
},
{
"epoch": 0.51,
"learning_rate": 0.00018566648238512357,
"loss": 6.8075,
"step": 2225
},
{
"epoch": 0.51,
"learning_rate": 0.00018556297827598242,
"loss": 6.8101,
"step": 2230
},
{
"epoch": 0.51,
"learning_rate": 0.00018545913086840434,
"loss": 6.8087,
"step": 2235
},
{
"epoch": 0.51,
"learning_rate": 0.00018535494057904915,
"loss": 6.778,
"step": 2240
},
{
"epoch": 0.52,
"learning_rate": 0.00018525040782595235,
"loss": 6.7706,
"step": 2245
},
{
"epoch": 0.52,
"learning_rate": 0.00018514553302852356,
"loss": 6.723,
"step": 2250
},
{
"epoch": 0.52,
"learning_rate": 0.0001850403166075446,
"loss": 6.7416,
"step": 2255
},
{
"epoch": 0.52,
"learning_rate": 0.00018493475898516813,
"loss": 6.7556,
"step": 2260
},
{
"epoch": 0.52,
"learning_rate": 0.0001848288605849157,
"loss": 6.7387,
"step": 2265
},
{
"epoch": 0.52,
"learning_rate": 0.00018472262183167614,
"loss": 6.7505,
"step": 2270
},
{
"epoch": 0.52,
"learning_rate": 0.00018461604315170389,
"loss": 6.727,
"step": 2275
},
{
"epoch": 0.52,
"learning_rate": 0.00018450912497261723,
"loss": 6.6703,
"step": 2280
},
{
"epoch": 0.52,
"learning_rate": 0.00018440186772339663,
"loss": 6.7113,
"step": 2285
},
{
"epoch": 0.53,
"learning_rate": 0.00018429427183438288,
"loss": 6.6782,
"step": 2290
},
{
"epoch": 0.53,
"learning_rate": 0.00018418633773727562,
"loss": 6.6859,
"step": 2295
},
{
"epoch": 0.53,
"learning_rate": 0.00018407806586513134,
"loss": 6.6368,
"step": 2300
},
{
"epoch": 0.53,
"learning_rate": 0.00018396945665236184,
"loss": 6.6691,
"step": 2305
},
{
"epoch": 0.53,
"learning_rate": 0.00018386051053473232,
"loss": 6.6673,
"step": 2310
},
{
"epoch": 0.53,
"learning_rate": 0.00018375122794935984,
"loss": 6.641,
"step": 2315
},
{
"epoch": 0.53,
"learning_rate": 0.00018364160933471134,
"loss": 6.6094,
"step": 2320
},
{
"epoch": 0.53,
"learning_rate": 0.00018353165513060202,
"loss": 6.5716,
"step": 2325
},
{
"epoch": 0.53,
"learning_rate": 0.0001834213657781936,
"loss": 6.6307,
"step": 2330
},
{
"epoch": 0.54,
"learning_rate": 0.00018331074171999237,
"loss": 6.5556,
"step": 2335
},
{
"epoch": 0.54,
"learning_rate": 0.00018319978339984767,
"loss": 6.5416,
"step": 2340
},
{
"epoch": 0.54,
"learning_rate": 0.0001830884912629499,
"loss": 6.5371,
"step": 2345
},
{
"epoch": 0.54,
"learning_rate": 0.0001829768657558288,
"loss": 6.52,
"step": 2350
},
{
"epoch": 0.54,
"learning_rate": 0.00018286490732635172,
"loss": 6.5359,
"step": 2355
},
{
"epoch": 0.54,
"learning_rate": 0.00018275261642372175,
"loss": 6.5164,
"step": 2360
},
{
"epoch": 0.54,
"learning_rate": 0.0001826399934984759,
"loss": 6.5095,
"step": 2365
},
{
"epoch": 0.54,
"learning_rate": 0.0001825270390024834,
"loss": 6.5213,
"step": 2370
},
{
"epoch": 0.55,
"learning_rate": 0.0001824137533889438,
"loss": 6.4413,
"step": 2375
},
{
"epoch": 0.55,
"learning_rate": 0.00018230013711238513,
"loss": 6.4896,
"step": 2380
},
{
"epoch": 0.55,
"learning_rate": 0.00018218619062866211,
"loss": 6.4467,
"step": 2385
},
{
"epoch": 0.55,
"learning_rate": 0.00018207191439495438,
"loss": 6.4661,
"step": 2390
},
{
"epoch": 0.55,
"learning_rate": 0.00018195730886976462,
"loss": 6.4062,
"step": 2395
},
{
"epoch": 0.55,
"learning_rate": 0.00018184237451291665,
"loss": 6.4019,
"step": 2400
},
{
"epoch": 0.55,
"learning_rate": 0.00018172711178555368,
"loss": 6.4315,
"step": 2405
},
{
"epoch": 0.55,
"learning_rate": 0.00018161152115013637,
"loss": 6.4537,
"step": 2410
},
{
"epoch": 0.55,
"learning_rate": 0.0001814956030704411,
"loss": 6.4277,
"step": 2415
},
{
"epoch": 0.56,
"learning_rate": 0.00018137935801155794,
"loss": 6.4079,
"step": 2420
},
{
"epoch": 0.56,
"learning_rate": 0.00018126278643988895,
"loss": 6.3882,
"step": 2425
},
{
"epoch": 0.56,
"learning_rate": 0.0001811458888231462,
"loss": 6.3683,
"step": 2430
},
{
"epoch": 0.56,
"learning_rate": 0.00018102866563034995,
"loss": 6.3062,
"step": 2435
},
{
"epoch": 0.56,
"learning_rate": 0.0001809111173318267,
"loss": 6.324,
"step": 2440
},
{
"epoch": 0.56,
"learning_rate": 0.00018079324439920742,
"loss": 6.2728,
"step": 2445
},
{
"epoch": 0.56,
"learning_rate": 0.00018067504730542551,
"loss": 6.3452,
"step": 2450
},
{
"epoch": 0.56,
"learning_rate": 0.00018055652652471506,
"loss": 6.3298,
"step": 2455
},
{
"epoch": 0.56,
"learning_rate": 0.0001804376825326088,
"loss": 6.288,
"step": 2460
},
{
"epoch": 0.57,
"learning_rate": 0.00018031851580593627,
"loss": 6.2508,
"step": 2465
},
{
"epoch": 0.57,
"learning_rate": 0.00018019902682282193,
"loss": 6.2802,
"step": 2470
},
{
"epoch": 0.57,
"learning_rate": 0.0001800792160626832,
"loss": 6.2777,
"step": 2475
},
{
"epoch": 0.57,
"learning_rate": 0.0001799590840062285,
"loss": 6.2097,
"step": 2480
},
{
"epoch": 0.57,
"learning_rate": 0.0001798386311354554,
"loss": 6.2434,
"step": 2485
},
{
"epoch": 0.57,
"learning_rate": 0.00017971785793364866,
"loss": 6.2158,
"step": 2490
},
{
"epoch": 0.57,
"learning_rate": 0.00017959676488537825,
"loss": 6.1986,
"step": 2495
},
{
"epoch": 0.57,
"learning_rate": 0.0001794753524764975,
"loss": 6.1748,
"step": 2500
},
{
"epoch": 0.57,
"learning_rate": 0.00017935362119414098,
"loss": 6.1924,
"step": 2505
},
{
"epoch": 0.58,
"learning_rate": 0.00017923157152672278,
"loss": 6.3381,
"step": 2510
},
{
"epoch": 0.58,
"learning_rate": 0.0001791092039639343,
"loss": 6.255,
"step": 2515
},
{
"epoch": 0.58,
"learning_rate": 0.00017898651899674254,
"loss": 6.2656,
"step": 2520
},
{
"epoch": 0.58,
"learning_rate": 0.0001788635171173879,
"loss": 6.2003,
"step": 2525
},
{
"epoch": 0.58,
"learning_rate": 0.00017874019881938233,
"loss": 6.1718,
"step": 2530
},
{
"epoch": 0.58,
"learning_rate": 0.00017861656459750735,
"loss": 6.1582,
"step": 2535
},
{
"epoch": 0.58,
"learning_rate": 0.000178492614947812,
"loss": 6.2319,
"step": 2540
},
{
"epoch": 0.58,
"learning_rate": 0.00017836835036761094,
"loss": 6.4222,
"step": 2545
},
{
"epoch": 0.59,
"learning_rate": 0.00017824377135548236,
"loss": 6.2872,
"step": 2550
},
{
"epoch": 0.59,
"learning_rate": 0.000178118878411266,
"loss": 6.2422,
"step": 2555
},
{
"epoch": 0.59,
"learning_rate": 0.00017799367203606128,
"loss": 6.233,
"step": 2560
},
{
"epoch": 0.59,
"learning_rate": 0.0001778681527322251,
"loss": 6.1516,
"step": 2565
},
{
"epoch": 0.59,
"learning_rate": 0.00017774232100336982,
"loss": 6.1454,
"step": 2570
},
{
"epoch": 0.59,
"learning_rate": 0.00017761617735436152,
"loss": 6.1147,
"step": 2575
},
{
"epoch": 0.59,
"learning_rate": 0.00017748972229131757,
"loss": 6.179,
"step": 2580
},
{
"epoch": 0.59,
"learning_rate": 0.00017736295632160494,
"loss": 6.1024,
"step": 2585
},
{
"epoch": 0.59,
"learning_rate": 0.000177235879953838,
"loss": 6.1314,
"step": 2590
},
{
"epoch": 0.6,
"learning_rate": 0.00017710849369787646,
"loss": 6.1308,
"step": 2595
},
{
"epoch": 0.6,
"learning_rate": 0.00017698079806482343,
"loss": 6.0657,
"step": 2600
},
{
"epoch": 0.6,
"learning_rate": 0.0001768527935670233,
"loss": 6.1253,
"step": 2605
},
{
"epoch": 0.6,
"learning_rate": 0.0001767244807180597,
"loss": 6.0759,
"step": 2610
},
{
"epoch": 0.6,
"learning_rate": 0.0001765958600327534,
"loss": 6.078,
"step": 2615
},
{
"epoch": 0.6,
"learning_rate": 0.00017646693202716033,
"loss": 6.0314,
"step": 2620
},
{
"epoch": 0.6,
"learning_rate": 0.00017633769721856938,
"loss": 6.0668,
"step": 2625
},
{
"epoch": 0.6,
"learning_rate": 0.0001762081561255005,
"loss": 6.0417,
"step": 2630
},
{
"epoch": 0.6,
"learning_rate": 0.0001760783092677025,
"loss": 6.0718,
"step": 2635
},
{
"epoch": 0.61,
"learning_rate": 0.00017594815716615093,
"loss": 6.0442,
"step": 2640
},
{
"epoch": 0.61,
"learning_rate": 0.00017581770034304613,
"loss": 6.06,
"step": 2645
},
{
"epoch": 0.61,
"learning_rate": 0.000175686939321811,
"loss": 6.0552,
"step": 2650
},
{
"epoch": 0.61,
"learning_rate": 0.00017555587462708894,
"loss": 6.0108,
"step": 2655
},
{
"epoch": 0.61,
"learning_rate": 0.00017542450678474184,
"loss": 6.0079,
"step": 2660
},
{
"epoch": 0.61,
"learning_rate": 0.00017529283632184784,
"loss": 5.9592,
"step": 2665
},
{
"epoch": 0.61,
"learning_rate": 0.00017516086376669917,
"loss": 5.9704,
"step": 2670
},
{
"epoch": 0.61,
"learning_rate": 0.00017502858964880028,
"loss": 5.9467,
"step": 2675
},
{
"epoch": 0.62,
"learning_rate": 0.00017489601449886547,
"loss": 5.992,
"step": 2680
},
{
"epoch": 0.62,
"learning_rate": 0.0001747631388488169,
"loss": 6.0237,
"step": 2685
},
{
"epoch": 0.62,
"learning_rate": 0.00017462996323178235,
"loss": 5.9693,
"step": 2690
},
{
"epoch": 0.62,
"learning_rate": 0.00017449648818209314,
"loss": 5.9799,
"step": 2695
},
{
"epoch": 0.62,
"learning_rate": 0.00017436271423528206,
"loss": 5.9773,
"step": 2700
},
{
"epoch": 0.62,
"learning_rate": 0.00017422864192808107,
"loss": 5.891,
"step": 2705
},
{
"epoch": 0.62,
"learning_rate": 0.0001740942717984192,
"loss": 5.9719,
"step": 2710
},
{
"epoch": 0.62,
"learning_rate": 0.00017395960438542057,
"loss": 5.9558,
"step": 2715
},
{
"epoch": 0.62,
"learning_rate": 0.00017382464022940182,
"loss": 5.9125,
"step": 2720
},
{
"epoch": 0.63,
"learning_rate": 0.0001736893798718704,
"loss": 5.9074,
"step": 2725
},
{
"epoch": 0.63,
"learning_rate": 0.00017355382385552206,
"loss": 5.9078,
"step": 2730
},
{
"epoch": 0.63,
"learning_rate": 0.00017341797272423895,
"loss": 5.8941,
"step": 2735
},
{
"epoch": 0.63,
"learning_rate": 0.0001732818270230871,
"loss": 5.8736,
"step": 2740
},
{
"epoch": 0.63,
"learning_rate": 0.00017314538729831455,
"loss": 5.9292,
"step": 2745
},
{
"epoch": 0.63,
"learning_rate": 0.000173008654097349,
"loss": 5.898,
"step": 2750
},
{
"epoch": 0.63,
"learning_rate": 0.00017287162796879568,
"loss": 5.8592,
"step": 2755
},
{
"epoch": 0.63,
"learning_rate": 0.000172734309462435,
"loss": 5.9182,
"step": 2760
},
{
"epoch": 0.63,
"learning_rate": 0.00017259669912922062,
"loss": 5.8856,
"step": 2765
},
{
"epoch": 0.64,
"learning_rate": 0.00017245879752127692,
"loss": 5.8911,
"step": 2770
},
{
"epoch": 0.64,
"learning_rate": 0.00017232060519189703,
"loss": 5.8303,
"step": 2775
},
{
"epoch": 0.64,
"learning_rate": 0.0001721821226955405,
"loss": 5.8551,
"step": 2780
},
{
"epoch": 0.64,
"learning_rate": 0.00017204335058783112,
"loss": 5.8347,
"step": 2785
},
{
"epoch": 0.64,
"learning_rate": 0.00017190428942555463,
"loss": 5.8918,
"step": 2790
},
{
"epoch": 0.64,
"learning_rate": 0.00017176493976665656,
"loss": 5.8401,
"step": 2795
},
{
"epoch": 0.64,
"learning_rate": 0.0001716253021702399,
"loss": 5.8697,
"step": 2800
},
{
"epoch": 0.64,
"learning_rate": 0.000171485377196563,
"loss": 5.807,
"step": 2805
},
{
"epoch": 0.64,
"learning_rate": 0.0001713451654070371,
"loss": 5.8286,
"step": 2810
},
{
"epoch": 0.65,
"learning_rate": 0.00017120466736422437,
"loss": 5.8396,
"step": 2815
},
{
"epoch": 0.65,
"learning_rate": 0.0001710638836318354,
"loss": 5.7998,
"step": 2820
},
{
"epoch": 0.65,
"learning_rate": 0.00017092281477472704,
"loss": 5.8392,
"step": 2825
},
{
"epoch": 0.65,
"learning_rate": 0.00017078146135890014,
"loss": 5.7698,
"step": 2830
},
{
"epoch": 0.65,
"learning_rate": 0.0001706398239514973,
"loss": 5.7643,
"step": 2835
},
{
"epoch": 0.65,
"learning_rate": 0.0001704979031208004,
"loss": 5.8052,
"step": 2840
},
{
"epoch": 0.65,
"learning_rate": 0.00017035569943622876,
"loss": 5.7825,
"step": 2845
},
{
"epoch": 0.65,
"learning_rate": 0.0001702132134683363,
"loss": 5.7984,
"step": 2850
},
{
"epoch": 0.66,
"learning_rate": 0.00017007044578880968,
"loss": 5.7616,
"step": 2855
},
{
"epoch": 0.66,
"learning_rate": 0.00016992739697046586,
"loss": 5.7678,
"step": 2860
},
{
"epoch": 0.66,
"learning_rate": 0.00016978406758724968,
"loss": 5.7437,
"step": 2865
},
{
"epoch": 0.66,
"learning_rate": 0.00016964045821423178,
"loss": 5.7978,
"step": 2870
},
{
"epoch": 0.66,
"learning_rate": 0.0001694965694276061,
"loss": 5.7463,
"step": 2875
},
{
"epoch": 0.66,
"learning_rate": 0.00016935240180468775,
"loss": 5.7477,
"step": 2880
},
{
"epoch": 0.66,
"learning_rate": 0.0001692079559239105,
"loss": 5.7617,
"step": 2885
},
{
"epoch": 0.66,
"learning_rate": 0.00016906323236482465,
"loss": 5.7463,
"step": 2890
},
{
"epoch": 0.66,
"learning_rate": 0.00016891823170809447,
"loss": 5.7469,
"step": 2895
},
{
"epoch": 0.67,
"learning_rate": 0.00016877295453549614,
"loss": 5.7635,
"step": 2900
},
{
"epoch": 0.67,
"learning_rate": 0.00016862740142991527,
"loss": 5.6833,
"step": 2905
},
{
"epoch": 0.67,
"learning_rate": 0.00016848157297534453,
"loss": 5.721,
"step": 2910
},
{
"epoch": 0.67,
"learning_rate": 0.00016833546975688135,
"loss": 5.7013,
"step": 2915
},
{
"epoch": 0.67,
"learning_rate": 0.0001681890923607256,
"loss": 5.6713,
"step": 2920
},
{
"epoch": 0.67,
"learning_rate": 0.00016804244137417725,
"loss": 5.6403,
"step": 2925
},
{
"epoch": 0.67,
"learning_rate": 0.00016789551738563384,
"loss": 5.7303,
"step": 2930
},
{
"epoch": 0.67,
"learning_rate": 0.00016774832098458847,
"loss": 5.6954,
"step": 2935
},
{
"epoch": 0.67,
"learning_rate": 0.00016760085276162708,
"loss": 5.6597,
"step": 2940
},
{
"epoch": 0.68,
"learning_rate": 0.00016745311330842622,
"loss": 5.6494,
"step": 2945
},
{
"epoch": 0.68,
"learning_rate": 0.00016730510321775075,
"loss": 5.6584,
"step": 2950
},
{
"epoch": 0.68,
"learning_rate": 0.00016715682308345131,
"loss": 5.614,
"step": 2955
},
{
"epoch": 0.68,
"learning_rate": 0.00016700827350046206,
"loss": 5.6699,
"step": 2960
},
{
"epoch": 0.68,
"learning_rate": 0.00016685945506479829,
"loss": 5.702,
"step": 2965
},
{
"epoch": 0.68,
"learning_rate": 0.00016671036837355386,
"loss": 5.6822,
"step": 2970
},
{
"epoch": 0.68,
"learning_rate": 0.00016656101402489913,
"loss": 5.6072,
"step": 2975
},
{
"epoch": 0.68,
"learning_rate": 0.00016641139261807818,
"loss": 5.6516,
"step": 2980
},
{
"epoch": 0.69,
"learning_rate": 0.00016626150475340667,
"loss": 5.6156,
"step": 2985
},
{
"epoch": 0.69,
"learning_rate": 0.00016611135103226937,
"loss": 5.6864,
"step": 2990
},
{
"epoch": 0.69,
"learning_rate": 0.00016596093205711765,
"loss": 5.6564,
"step": 2995
},
{
"epoch": 0.69,
"learning_rate": 0.00016581024843146725,
"loss": 5.6612,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 0.0001656593007598956,
"loss": 5.6673,
"step": 3005
},
{
"epoch": 0.69,
"learning_rate": 0.00016550808964803978,
"loss": 5.6044,
"step": 3010
},
{
"epoch": 0.69,
"learning_rate": 0.00016535661570259359,
"loss": 5.6465,
"step": 3015
},
{
"epoch": 0.69,
"learning_rate": 0.00016520487953130552,
"loss": 5.6889,
"step": 3020
},
{
"epoch": 0.69,
"learning_rate": 0.00016505288174297616,
"loss": 5.6237,
"step": 3025
},
{
"epoch": 0.7,
"learning_rate": 0.00016490062294745571,
"loss": 5.621,
"step": 3030
},
{
"epoch": 0.7,
"learning_rate": 0.0001647481037556417,
"loss": 5.592,
"step": 3035
},
{
"epoch": 0.7,
"learning_rate": 0.00016459532477947634,
"loss": 5.6435,
"step": 3040
},
{
"epoch": 0.7,
"learning_rate": 0.00016444228663194417,
"loss": 5.6503,
"step": 3045
},
{
"epoch": 0.7,
"learning_rate": 0.00016428898992706955,
"loss": 5.5757,
"step": 3050
},
{
"epoch": 0.7,
"learning_rate": 0.0001641354352799144,
"loss": 5.6234,
"step": 3055
},
{
"epoch": 0.7,
"learning_rate": 0.00016398162330657533,
"loss": 5.6008,
"step": 3060
},
{
"epoch": 0.7,
"learning_rate": 0.00016382755462418153,
"loss": 5.5648,
"step": 3065
},
{
"epoch": 0.7,
"learning_rate": 0.0001636732298508922,
"loss": 5.5678,
"step": 3070
},
{
"epoch": 0.71,
"learning_rate": 0.00016351864960589393,
"loss": 5.6054,
"step": 3075
},
{
"epoch": 0.71,
"learning_rate": 0.0001633638145093984,
"loss": 5.5597,
"step": 3080
},
{
"epoch": 0.71,
"learning_rate": 0.00016320872518263982,
"loss": 5.5446,
"step": 3085
},
{
"epoch": 0.71,
"learning_rate": 0.00016305338224787235,
"loss": 5.5611,
"step": 3090
},
{
"epoch": 0.71,
"learning_rate": 0.00016289778632836774,
"loss": 5.5962,
"step": 3095
},
{
"epoch": 0.71,
"learning_rate": 0.0001627419380484128,
"loss": 5.6262,
"step": 3100
},
{
"epoch": 0.71,
"learning_rate": 0.00016258583803330681,
"loss": 5.5137,
"step": 3105
},
{
"epoch": 0.71,
"learning_rate": 0.00016242948690935912,
"loss": 5.5613,
"step": 3110
},
{
"epoch": 0.71,
"learning_rate": 0.0001622728853038865,
"loss": 5.5821,
"step": 3115
},
{
"epoch": 0.72,
"learning_rate": 0.00016211603384521083,
"loss": 5.5496,
"step": 3120
},
{
"epoch": 0.72,
"learning_rate": 0.0001619589331626564,
"loss": 5.5583,
"step": 3125
},
{
"epoch": 0.72,
"learning_rate": 0.00016180158388654742,
"loss": 5.553,
"step": 3130
},
{
"epoch": 0.72,
"learning_rate": 0.00016164398664820557,
"loss": 5.5156,
"step": 3135
},
{
"epoch": 0.72,
"learning_rate": 0.00016148614207994735,
"loss": 5.5404,
"step": 3140
},
{
"epoch": 0.72,
"learning_rate": 0.00016132805081508163,
"loss": 5.5206,
"step": 3145
},
{
"epoch": 0.72,
"learning_rate": 0.00016116971348790712,
"loss": 5.5238,
"step": 3150
},
{
"epoch": 0.72,
"learning_rate": 0.0001610111307337097,
"loss": 5.4944,
"step": 3155
},
{
"epoch": 0.73,
"learning_rate": 0.0001608523031887601,
"loss": 5.5935,
"step": 3160
},
{
"epoch": 0.73,
"learning_rate": 0.00016069323149031105,
"loss": 5.5346,
"step": 3165
},
{
"epoch": 0.73,
"learning_rate": 0.00016053391627659505,
"loss": 5.4831,
"step": 3170
},
{
"epoch": 0.73,
"learning_rate": 0.00016037435818682142,
"loss": 5.5352,
"step": 3175
},
{
"epoch": 0.73,
"learning_rate": 0.0001602145578611742,
"loss": 5.5195,
"step": 3180
},
{
"epoch": 0.73,
"learning_rate": 0.00016005451594080911,
"loss": 5.5148,
"step": 3185
},
{
"epoch": 0.73,
"learning_rate": 0.00015989423306785142,
"loss": 5.4785,
"step": 3190
},
{
"epoch": 0.73,
"learning_rate": 0.00015973370988539294,
"loss": 5.4914,
"step": 3195
},
{
"epoch": 0.73,
"learning_rate": 0.00015957294703748982,
"loss": 5.4942,
"step": 3200
},
{
"epoch": 0.74,
"learning_rate": 0.00015941194516915977,
"loss": 5.4827,
"step": 3205
},
{
"epoch": 0.74,
"learning_rate": 0.00015925070492637944,
"loss": 5.4518,
"step": 3210
},
{
"epoch": 0.74,
"learning_rate": 0.00015908922695608192,
"loss": 5.4863,
"step": 3215
},
{
"epoch": 0.74,
"learning_rate": 0.0001589275119061542,
"loss": 5.4688,
"step": 3220
},
{
"epoch": 0.74,
"learning_rate": 0.00015876556042543433,
"loss": 5.4822,
"step": 3225
},
{
"epoch": 0.74,
"learning_rate": 0.00015860337316370916,
"loss": 5.4418,
"step": 3230
},
{
"epoch": 0.74,
"learning_rate": 0.00015844095077171142,
"loss": 5.4795,
"step": 3235
},
{
"epoch": 0.74,
"learning_rate": 0.0001582782939011173,
"loss": 5.4708,
"step": 3240
},
{
"epoch": 0.74,
"learning_rate": 0.0001581154032045437,
"loss": 5.4705,
"step": 3245
},
{
"epoch": 0.75,
"learning_rate": 0.00015795227933554568,
"loss": 5.4587,
"step": 3250
},
{
"epoch": 0.75,
"learning_rate": 0.000157788922948614,
"loss": 5.4739,
"step": 3255
},
{
"epoch": 0.75,
"learning_rate": 0.00015762533469917216,
"loss": 5.4528,
"step": 3260
},
{
"epoch": 0.75,
"learning_rate": 0.00015746151524357403,
"loss": 5.5438,
"step": 3265
},
{
"epoch": 0.75,
"learning_rate": 0.00015729746523910113,
"loss": 5.4371,
"step": 3270
},
{
"epoch": 0.75,
"learning_rate": 0.00015713318534395996,
"loss": 5.4538,
"step": 3275
},
{
"epoch": 0.75,
"learning_rate": 0.00015696867621727942,
"loss": 5.4474,
"step": 3280
},
{
"epoch": 0.75,
"learning_rate": 0.00015680393851910811,
"loss": 5.4713,
"step": 3285
},
{
"epoch": 0.76,
"learning_rate": 0.00015663897291041175,
"loss": 5.4192,
"step": 3290
},
{
"epoch": 0.76,
"learning_rate": 0.0001564737800530705,
"loss": 5.4642,
"step": 3295
},
{
"epoch": 0.76,
"learning_rate": 0.00015630836060987624,
"loss": 5.4001,
"step": 3300
},
{
"epoch": 0.76,
"learning_rate": 0.00015614271524453,
"loss": 5.4734,
"step": 3305
},
{
"epoch": 0.76,
"learning_rate": 0.00015597684462163923,
"loss": 5.4254,
"step": 3310
},
{
"epoch": 0.76,
"learning_rate": 0.00015581074940671527,
"loss": 5.4866,
"step": 3315
},
{
"epoch": 0.76,
"learning_rate": 0.0001556444302661704,
"loss": 5.4851,
"step": 3320
},
{
"epoch": 0.76,
"learning_rate": 0.00015547788786731553,
"loss": 5.4155,
"step": 3325
},
{
"epoch": 0.76,
"learning_rate": 0.00015531112287835717,
"loss": 5.4763,
"step": 3330
},
{
"epoch": 0.77,
"learning_rate": 0.00015514413596839498,
"loss": 5.3893,
"step": 3335
},
{
"epoch": 0.77,
"learning_rate": 0.00015497692780741908,
"loss": 5.4356,
"step": 3340
},
{
"epoch": 0.77,
"learning_rate": 0.00015480949906630724,
"loss": 5.4113,
"step": 3345
},
{
"epoch": 0.77,
"learning_rate": 0.0001546418504168222,
"loss": 5.429,
"step": 3350
},
{
"epoch": 0.77,
"learning_rate": 0.0001544739825316091,
"loss": 5.4215,
"step": 3355
},
{
"epoch": 0.77,
"learning_rate": 0.00015430589608419264,
"loss": 5.4567,
"step": 3360
},
{
"epoch": 0.77,
"learning_rate": 0.00015413759174897455,
"loss": 5.4459,
"step": 3365
},
{
"epoch": 0.77,
"learning_rate": 0.00015396907020123068,
"loss": 5.4098,
"step": 3370
},
{
"epoch": 0.77,
"learning_rate": 0.00015380033211710842,
"loss": 5.3947,
"step": 3375
},
{
"epoch": 0.78,
"learning_rate": 0.00015363137817362392,
"loss": 5.3972,
"step": 3380
},
{
"epoch": 0.78,
"learning_rate": 0.00015346220904865945,
"loss": 5.4151,
"step": 3385
},
{
"epoch": 0.78,
"learning_rate": 0.00015329282542096064,
"loss": 5.3657,
"step": 3390
},
{
"epoch": 0.78,
"learning_rate": 0.0001531232279701337,
"loss": 5.4261,
"step": 3395
},
{
"epoch": 0.78,
"learning_rate": 0.00015295341737664285,
"loss": 5.4168,
"step": 3400
},
{
"epoch": 0.78,
"learning_rate": 0.00015278339432180736,
"loss": 5.4326,
"step": 3405
},
{
"epoch": 0.78,
"learning_rate": 0.000152613159487799,
"loss": 5.3843,
"step": 3410
},
{
"epoch": 0.78,
"learning_rate": 0.00015244271355763933,
"loss": 5.3671,
"step": 3415
},
{
"epoch": 0.78,
"learning_rate": 0.00015227205721519675,
"loss": 5.3901,
"step": 3420
},
{
"epoch": 0.79,
"learning_rate": 0.0001521011911451839,
"loss": 5.321,
"step": 3425
},
{
"epoch": 0.79,
"learning_rate": 0.00015193011603315503,
"loss": 5.4188,
"step": 3430
},
{
"epoch": 0.79,
"learning_rate": 0.00015175883256550291,
"loss": 5.3682,
"step": 3435
},
{
"epoch": 0.79,
"learning_rate": 0.00015158734142945644,
"loss": 5.3324,
"step": 3440
},
{
"epoch": 0.79,
"learning_rate": 0.00015141564331307768,
"loss": 5.3596,
"step": 3445
},
{
"epoch": 0.79,
"learning_rate": 0.0001512437389052591,
"loss": 5.3984,
"step": 3450
},
{
"epoch": 0.79,
"learning_rate": 0.00015107162889572092,
"loss": 5.3599,
"step": 3455
},
{
"epoch": 0.79,
"learning_rate": 0.0001508993139750083,
"loss": 5.3632,
"step": 3460
},
{
"epoch": 0.8,
"learning_rate": 0.00015072679483448852,
"loss": 5.3456,
"step": 3465
},
{
"epoch": 0.8,
"learning_rate": 0.0001505540721663481,
"loss": 5.3457,
"step": 3470
},
{
"epoch": 0.8,
"learning_rate": 0.00015038114666359042,
"loss": 5.339,
"step": 3475
},
{
"epoch": 0.8,
"learning_rate": 0.0001502080190200325,
"loss": 5.3411,
"step": 3480
},
{
"epoch": 0.8,
"learning_rate": 0.00015003468993030248,
"loss": 5.3975,
"step": 3485
},
{
"epoch": 0.8,
"learning_rate": 0.00014986116008983664,
"loss": 5.3451,
"step": 3490
},
{
"epoch": 0.8,
"learning_rate": 0.00014968743019487686,
"loss": 5.3741,
"step": 3495
},
{
"epoch": 0.8,
"learning_rate": 0.00014951350094246762,
"loss": 5.3924,
"step": 3500
},
{
"epoch": 0.8,
"learning_rate": 0.00014933937303045325,
"loss": 5.3798,
"step": 3505
},
{
"epoch": 0.81,
"learning_rate": 0.0001491650471574751,
"loss": 5.3578,
"step": 3510
},
{
"epoch": 0.81,
"learning_rate": 0.00014899052402296895,
"loss": 5.3184,
"step": 3515
},
{
"epoch": 0.81,
"learning_rate": 0.00014881580432716182,
"loss": 5.3755,
"step": 3520
},
{
"epoch": 0.81,
"learning_rate": 0.0001486408887710696,
"loss": 5.3483,
"step": 3525
},
{
"epoch": 0.81,
"learning_rate": 0.00014846577805649388,
"loss": 5.4266,
"step": 3530
},
{
"epoch": 0.81,
"learning_rate": 0.0001482904728860193,
"loss": 5.3295,
"step": 3535
},
{
"epoch": 0.81,
"learning_rate": 0.00014811497396301072,
"loss": 5.3377,
"step": 3540
},
{
"epoch": 0.81,
"learning_rate": 0.0001479392819916103,
"loss": 5.3249,
"step": 3545
},
{
"epoch": 0.81,
"learning_rate": 0.00014776339767673491,
"loss": 5.319,
"step": 3550
},
{
"epoch": 0.82,
"learning_rate": 0.00014758732172407302,
"loss": 5.3046,
"step": 3555
},
{
"epoch": 0.82,
"learning_rate": 0.000147411054840082,
"loss": 5.2972,
"step": 3560
},
{
"epoch": 0.82,
"learning_rate": 0.00014723459773198535,
"loss": 5.3475,
"step": 3565
},
{
"epoch": 0.82,
"learning_rate": 0.00014705795110776974,
"loss": 5.3537,
"step": 3570
},
{
"epoch": 0.82,
"learning_rate": 0.00014688111567618225,
"loss": 5.3175,
"step": 3575
},
{
"epoch": 0.82,
"learning_rate": 0.0001467040921467275,
"loss": 5.337,
"step": 3580
},
{
"epoch": 0.82,
"learning_rate": 0.00014652688122966475,
"loss": 5.3152,
"step": 3585
},
{
"epoch": 0.82,
"learning_rate": 0.00014634948363600518,
"loss": 5.36,
"step": 3590
},
{
"epoch": 0.83,
"learning_rate": 0.0001461719000775089,
"loss": 5.3679,
"step": 3595
},
{
"epoch": 0.83,
"learning_rate": 0.00014599413126668213,
"loss": 5.3203,
"step": 3600
},
{
"epoch": 0.83,
"learning_rate": 0.00014581617791677452,
"loss": 5.3254,
"step": 3605
},
{
"epoch": 0.83,
"learning_rate": 0.00014563804074177588,
"loss": 5.3276,
"step": 3610
},
{
"epoch": 0.83,
"learning_rate": 0.0001454597204564139,
"loss": 5.339,
"step": 3615
},
{
"epoch": 0.83,
"learning_rate": 0.00014528121777615058,
"loss": 5.2971,
"step": 3620
},
{
"epoch": 0.83,
"learning_rate": 0.00014510253341718,
"loss": 5.2645,
"step": 3625
},
{
"epoch": 0.83,
"learning_rate": 0.0001449236680964251,
"loss": 5.2741,
"step": 3630
},
{
"epoch": 0.83,
"learning_rate": 0.00014474462253153482,
"loss": 5.3032,
"step": 3635
},
{
"epoch": 0.84,
"learning_rate": 0.00014456539744088138,
"loss": 5.3179,
"step": 3640
},
{
"epoch": 0.84,
"learning_rate": 0.00014438599354355723,
"loss": 5.2993,
"step": 3645
},
{
"epoch": 0.84,
"learning_rate": 0.00014420641155937224,
"loss": 5.3037,
"step": 3650
},
{
"epoch": 0.84,
"learning_rate": 0.00014402665220885087,
"loss": 5.3506,
"step": 3655
},
{
"epoch": 0.84,
"learning_rate": 0.00014384671621322915,
"loss": 5.2998,
"step": 3660
},
{
"epoch": 0.84,
"learning_rate": 0.00014366660429445183,
"loss": 5.2896,
"step": 3665
},
{
"epoch": 0.84,
"learning_rate": 0.00014348631717516953,
"loss": 5.2643,
"step": 3670
},
{
"epoch": 0.84,
"learning_rate": 0.00014330585557873588,
"loss": 5.3258,
"step": 3675
},
{
"epoch": 0.84,
"learning_rate": 0.00014312522022920444,
"loss": 5.2385,
"step": 3680
},
{
"epoch": 0.85,
"learning_rate": 0.00014294441185132607,
"loss": 5.2566,
"step": 3685
},
{
"epoch": 0.85,
"learning_rate": 0.00014276343117054563,
"loss": 5.2949,
"step": 3690
},
{
"epoch": 0.85,
"learning_rate": 0.00014258227891299948,
"loss": 5.3243,
"step": 3695
},
{
"epoch": 0.85,
"learning_rate": 0.00014240095580551234,
"loss": 5.2613,
"step": 3700
},
{
"epoch": 0.85,
"learning_rate": 0.00014221946257559438,
"loss": 5.2501,
"step": 3705
},
{
"epoch": 0.85,
"learning_rate": 0.0001420377999514384,
"loss": 5.2639,
"step": 3710
},
{
"epoch": 0.85,
"learning_rate": 0.0001418559686619168,
"loss": 5.2594,
"step": 3715
},
{
"epoch": 0.85,
"learning_rate": 0.00014167396943657874,
"loss": 5.2639,
"step": 3720
},
{
"epoch": 0.85,
"learning_rate": 0.00014149180300564713,
"loss": 5.3033,
"step": 3725
},
{
"epoch": 0.86,
"learning_rate": 0.0001413094701000158,
"loss": 5.2797,
"step": 3730
},
{
"epoch": 0.86,
"learning_rate": 0.0001411269714512465,
"loss": 5.2528,
"step": 3735
},
{
"epoch": 0.86,
"learning_rate": 0.00014094430779156597,
"loss": 5.296,
"step": 3740
},
{
"epoch": 0.86,
"learning_rate": 0.00014076147985386296,
"loss": 5.2637,
"step": 3745
},
{
"epoch": 0.86,
"learning_rate": 0.00014057848837168547,
"loss": 5.291,
"step": 3750
},
{
"epoch": 0.86,
"learning_rate": 0.00014039533407923758,
"loss": 5.2754,
"step": 3755
},
{
"epoch": 0.86,
"learning_rate": 0.00014021201771137663,
"loss": 5.2591,
"step": 3760
},
{
"epoch": 0.86,
"learning_rate": 0.0001400285400036102,
"loss": 5.2479,
"step": 3765
},
{
"epoch": 0.87,
"learning_rate": 0.00013984490169209333,
"loss": 5.2711,
"step": 3770
},
{
"epoch": 0.87,
"learning_rate": 0.00013966110351362523,
"loss": 5.299,
"step": 3775
},
{
"epoch": 0.87,
"learning_rate": 0.00013947714620564678,
"loss": 5.2509,
"step": 3780
},
{
"epoch": 0.87,
"learning_rate": 0.0001392930305062371,
"loss": 5.2775,
"step": 3785
},
{
"epoch": 0.87,
"learning_rate": 0.00013910875715411098,
"loss": 5.2834,
"step": 3790
},
{
"epoch": 0.87,
"learning_rate": 0.00013892432688861566,
"loss": 5.2648,
"step": 3795
},
{
"epoch": 0.87,
"learning_rate": 0.000138739740449728,
"loss": 5.2384,
"step": 3800
},
{
"epoch": 0.87,
"learning_rate": 0.00013855499857805137,
"loss": 5.272,
"step": 3805
},
{
"epoch": 0.87,
"learning_rate": 0.0001383701020148129,
"loss": 5.2573,
"step": 3810
},
{
"epoch": 0.88,
"learning_rate": 0.0001381850515018603,
"loss": 5.2959,
"step": 3815
},
{
"epoch": 0.88,
"learning_rate": 0.000137999847781659,
"loss": 5.2411,
"step": 3820
},
{
"epoch": 0.88,
"learning_rate": 0.00013781449159728907,
"loss": 5.2197,
"step": 3825
},
{
"epoch": 0.88,
"learning_rate": 0.00013762898369244238,
"loss": 5.2314,
"step": 3830
},
{
"epoch": 0.88,
"learning_rate": 0.00013744332481141952,
"loss": 5.2228,
"step": 3835
},
{
"epoch": 0.88,
"learning_rate": 0.00013725751569912682,
"loss": 5.231,
"step": 3840
},
{
"epoch": 0.88,
"learning_rate": 0.00013707155710107326,
"loss": 5.1971,
"step": 3845
},
{
"epoch": 0.88,
"learning_rate": 0.00013688544976336783,
"loss": 5.2345,
"step": 3850
},
{
"epoch": 0.88,
"learning_rate": 0.0001366991944327161,
"loss": 5.238,
"step": 3855
},
{
"epoch": 0.89,
"learning_rate": 0.00013651279185641752,
"loss": 5.246,
"step": 3860
},
{
"epoch": 0.89,
"learning_rate": 0.00013632624278236223,
"loss": 5.2443,
"step": 3865
},
{
"epoch": 0.89,
"learning_rate": 0.0001361395479590283,
"loss": 5.2232,
"step": 3870
},
{
"epoch": 0.89,
"learning_rate": 0.00013595270813547845,
"loss": 5.2496,
"step": 3875
},
{
"epoch": 0.89,
"learning_rate": 0.00013576572406135722,
"loss": 5.1688,
"step": 3880
},
{
"epoch": 0.89,
"learning_rate": 0.0001355785964868879,
"loss": 5.21,
"step": 3885
},
{
"epoch": 0.89,
"learning_rate": 0.00013539132616286956,
"loss": 5.2414,
"step": 3890
},
{
"epoch": 0.89,
"learning_rate": 0.000135203913840674,
"loss": 5.2139,
"step": 3895
},
{
"epoch": 0.9,
"learning_rate": 0.00013501636027224282,
"loss": 5.214,
"step": 3900
},
{
"epoch": 0.9,
"learning_rate": 0.00013482866621008417,
"loss": 5.2154,
"step": 3905
},
{
"epoch": 0.9,
"learning_rate": 0.0001346408324072701,
"loss": 5.1666,
"step": 3910
},
{
"epoch": 0.9,
"learning_rate": 0.0001344528596174331,
"loss": 5.2159,
"step": 3915
},
{
"epoch": 0.9,
"learning_rate": 0.0001342647485947635,
"loss": 5.2013,
"step": 3920
},
{
"epoch": 0.9,
"learning_rate": 0.0001340765000940062,
"loss": 5.2193,
"step": 3925
},
{
"epoch": 0.9,
"learning_rate": 0.00013388811487045766,
"loss": 5.2116,
"step": 3930
},
{
"epoch": 0.9,
"learning_rate": 0.00013369959367996286,
"loss": 5.2398,
"step": 3935
},
{
"epoch": 0.9,
"learning_rate": 0.0001335109372789125,
"loss": 5.2073,
"step": 3940
},
{
"epoch": 0.91,
"learning_rate": 0.00013332214642423952,
"loss": 5.2629,
"step": 3945
},
{
"epoch": 0.91,
"learning_rate": 0.00013313322187341652,
"loss": 5.2636,
"step": 3950
},
{
"epoch": 0.91,
"learning_rate": 0.00013294416438445244,
"loss": 5.2398,
"step": 3955
},
{
"epoch": 0.91,
"learning_rate": 0.00013275497471588953,
"loss": 5.1794,
"step": 3960
},
{
"epoch": 0.91,
"learning_rate": 0.00013256565362680057,
"loss": 5.1936,
"step": 3965
},
{
"epoch": 0.91,
"learning_rate": 0.0001323762018767854,
"loss": 5.2014,
"step": 3970
},
{
"epoch": 0.91,
"learning_rate": 0.00013218662022596828,
"loss": 5.2163,
"step": 3975
},
{
"epoch": 0.91,
"learning_rate": 0.00013199690943499457,
"loss": 5.1886,
"step": 3980
},
{
"epoch": 0.91,
"learning_rate": 0.0001318070702650278,
"loss": 5.2418,
"step": 3985
},
{
"epoch": 0.92,
"learning_rate": 0.0001316171034777466,
"loss": 5.2023,
"step": 3990
},
{
"epoch": 0.92,
"learning_rate": 0.00013142700983534157,
"loss": 5.1956,
"step": 3995
},
{
"epoch": 0.92,
"learning_rate": 0.00013123679010051232,
"loss": 5.1696,
"step": 4000
},
{
"epoch": 0.92,
"learning_rate": 0.0001310464450364644,
"loss": 5.2116,
"step": 4005
},
{
"epoch": 0.92,
"learning_rate": 0.00013085597540690618,
"loss": 5.1862,
"step": 4010
},
{
"epoch": 0.92,
"learning_rate": 0.0001306653819760458,
"loss": 5.1857,
"step": 4015
},
{
"epoch": 0.92,
"learning_rate": 0.00013047466550858812,
"loss": 5.163,
"step": 4020
},
{
"epoch": 0.92,
"learning_rate": 0.00013028382676973178,
"loss": 5.1765,
"step": 4025
},
{
"epoch": 0.92,
"learning_rate": 0.00013009286652516575,
"loss": 5.199,
"step": 4030
},
{
"epoch": 0.93,
"learning_rate": 0.00012990178554106674,
"loss": 5.1622,
"step": 4035
},
{
"epoch": 0.93,
"learning_rate": 0.00012971058458409576,
"loss": 5.1589,
"step": 4040
},
{
"epoch": 0.93,
"learning_rate": 0.0001295192644213952,
"loss": 5.1641,
"step": 4045
},
{
"epoch": 0.93,
"learning_rate": 0.00012932782582058584,
"loss": 5.1361,
"step": 4050
},
{
"epoch": 0.93,
"learning_rate": 0.00012913626954976355,
"loss": 5.1973,
"step": 4055
},
{
"epoch": 0.93,
"learning_rate": 0.00012894459637749627,
"loss": 5.1735,
"step": 4060
},
{
"epoch": 0.93,
"learning_rate": 0.00012875280707282116,
"loss": 5.1678,
"step": 4065
},
{
"epoch": 0.93,
"learning_rate": 0.0001285609024052411,
"loss": 5.187,
"step": 4070
},
{
"epoch": 0.94,
"learning_rate": 0.00012836888314472208,
"loss": 5.1512,
"step": 4075
},
{
"epoch": 0.94,
"learning_rate": 0.00012817675006168963,
"loss": 5.1546,
"step": 4080
},
{
"epoch": 0.94,
"learning_rate": 0.00012798450392702615,
"loss": 5.1529,
"step": 4085
},
{
"epoch": 0.94,
"learning_rate": 0.00012779214551206746,
"loss": 5.1515,
"step": 4090
},
{
"epoch": 0.94,
"learning_rate": 0.00012759967558860006,
"loss": 5.1514,
"step": 4095
},
{
"epoch": 0.94,
"learning_rate": 0.0001274070949288577,
"loss": 5.1721,
"step": 4100
},
{
"epoch": 0.94,
"learning_rate": 0.00012721440430551849,
"loss": 5.1295,
"step": 4105
},
{
"epoch": 0.94,
"learning_rate": 0.00012702160449170165,
"loss": 5.134,
"step": 4110
},
{
"epoch": 0.94,
"learning_rate": 0.0001268286962609647,
"loss": 5.1717,
"step": 4115
},
{
"epoch": 0.95,
"learning_rate": 0.0001266356803873,
"loss": 5.1103,
"step": 4120
},
{
"epoch": 0.95,
"learning_rate": 0.00012644255764513178,
"loss": 5.1361,
"step": 4125
},
{
"epoch": 0.95,
"learning_rate": 0.0001262493288093131,
"loss": 5.1331,
"step": 4130
},
{
"epoch": 0.95,
"learning_rate": 0.00012605599465512278,
"loss": 5.1673,
"step": 4135
},
{
"epoch": 0.95,
"learning_rate": 0.000125862555958262,
"loss": 5.1172,
"step": 4140
},
{
"epoch": 0.95,
"learning_rate": 0.00012566901349485158,
"loss": 5.1707,
"step": 4145
},
{
"epoch": 0.95,
"learning_rate": 0.00012547536804142858,
"loss": 5.0802,
"step": 4150
},
{
"epoch": 0.95,
"learning_rate": 0.00012528162037494332,
"loss": 5.1604,
"step": 4155
},
{
"epoch": 0.95,
"learning_rate": 0.0001250877712727561,
"loss": 5.1485,
"step": 4160
},
{
"epoch": 0.96,
"learning_rate": 0.00012489382151263442,
"loss": 5.1316,
"step": 4165
},
{
"epoch": 0.96,
"learning_rate": 0.00012469977187274945,
"loss": 5.1652,
"step": 4170
},
{
"epoch": 0.96,
"learning_rate": 0.00012450562313167326,
"loss": 5.1424,
"step": 4175
},
{
"epoch": 0.96,
"learning_rate": 0.00012431137606837538,
"loss": 5.1375,
"step": 4180
},
{
"epoch": 0.96,
"learning_rate": 0.00012411703146221993,
"loss": 5.1874,
"step": 4185
},
{
"epoch": 0.96,
"learning_rate": 0.0001239225900929624,
"loss": 5.1192,
"step": 4190
},
{
"epoch": 0.96,
"learning_rate": 0.00012372805274074643,
"loss": 5.1478,
"step": 4195
},
{
"epoch": 0.96,
"learning_rate": 0.00012353342018610084,
"loss": 5.1118,
"step": 4200
},
{
"epoch": 0.97,
"learning_rate": 0.0001233386932099364,
"loss": 5.1292,
"step": 4205
},
{
"epoch": 0.97,
"learning_rate": 0.00012314387259354282,
"loss": 5.1816,
"step": 4210
},
{
"epoch": 0.97,
"learning_rate": 0.00012294895911858528,
"loss": 5.1334,
"step": 4215
},
{
"epoch": 0.97,
"learning_rate": 0.00012275395356710177,
"loss": 5.1112,
"step": 4220
},
{
"epoch": 0.97,
"learning_rate": 0.00012255885672149953,
"loss": 5.0939,
"step": 4225
},
{
"epoch": 0.97,
"learning_rate": 0.0001223636693645523,
"loss": 5.1542,
"step": 4230
},
{
"epoch": 0.97,
"learning_rate": 0.00012216839227939676,
"loss": 5.1012,
"step": 4235
},
{
"epoch": 0.97,
"learning_rate": 0.00012197302624952971,
"loss": 5.1341,
"step": 4240
},
{
"epoch": 0.97,
"learning_rate": 0.0001217775720588048,
"loss": 5.1443,
"step": 4245
},
{
"epoch": 0.98,
"learning_rate": 0.00012158203049142947,
"loss": 5.0631,
"step": 4250
},
{
"epoch": 0.98,
"learning_rate": 0.00012138640233196152,
"loss": 5.1662,
"step": 4255
},
{
"epoch": 0.98,
"learning_rate": 0.00012119068836530644,
"loss": 5.1628,
"step": 4260
},
{
"epoch": 0.98,
"learning_rate": 0.00012099488937671384,
"loss": 5.1924,
"step": 4265
},
{
"epoch": 0.98,
"learning_rate": 0.00012079900615177449,
"loss": 5.165,
"step": 4270
},
{
"epoch": 0.98,
"learning_rate": 0.00012060303947641715,
"loss": 5.1227,
"step": 4275
},
{
"epoch": 0.98,
"learning_rate": 0.00012040699013690543,
"loss": 5.0873,
"step": 4280
},
{
"epoch": 0.98,
"learning_rate": 0.00012021085891983456,
"loss": 5.1478,
"step": 4285
},
{
"epoch": 0.98,
"learning_rate": 0.00012001464661212827,
"loss": 5.1332,
"step": 4290
},
{
"epoch": 0.99,
"learning_rate": 0.00011981835400103568,
"loss": 5.1551,
"step": 4295
},
{
"epoch": 0.99,
"learning_rate": 0.0001196219818741281,
"loss": 5.0721,
"step": 4300
},
{
"epoch": 0.99,
"learning_rate": 0.00011942553101929585,
"loss": 5.1071,
"step": 4305
},
{
"epoch": 0.99,
"learning_rate": 0.00011922900222474523,
"loss": 5.1758,
"step": 4310
},
{
"epoch": 0.99,
"learning_rate": 0.00011903239627899503,
"loss": 5.109,
"step": 4315
},
{
"epoch": 0.99,
"learning_rate": 0.00011883571397087387,
"loss": 5.1134,
"step": 4320
},
{
"epoch": 0.99,
"learning_rate": 0.00011863895608951654,
"loss": 5.1427,
"step": 4325
},
{
"epoch": 0.99,
"learning_rate": 0.00011844212342436112,
"loss": 5.088,
"step": 4330
},
{
"epoch": 0.99,
"learning_rate": 0.0001182452167651457,
"loss": 5.1406,
"step": 4335
},
{
"epoch": 1.0,
"learning_rate": 0.00011804823690190538,
"loss": 5.1298,
"step": 4340
},
{
"epoch": 1.0,
"learning_rate": 0.0001178511846249688,
"loss": 5.1483,
"step": 4345
},
{
"epoch": 1.0,
"learning_rate": 0.00011765406072495528,
"loss": 5.1122,
"step": 4350
},
{
"epoch": 1.0,
"learning_rate": 0.0001174568659927714,
"loss": 5.113,
"step": 4355
},
{
"epoch": 1.0,
"eval_loss": 5.059106826782227,
"eval_runtime": 1138.4406,
"eval_samples_per_second": 13.555,
"eval_steps_per_second": 0.424,
"step": 4357
},
{
"epoch": 1.0,
"learning_rate": 0.00011725960121960806,
"loss": 5.125,
"step": 4360
},
{
"epoch": 1.0,
"learning_rate": 0.00011706226719693698,
"loss": 5.0662,
"step": 4365
},
{
"epoch": 1.0,
"learning_rate": 0.00011686486471650798,
"loss": 5.1071,
"step": 4370
},
{
"epoch": 1.0,
"learning_rate": 0.00011666739457034532,
"loss": 5.1138,
"step": 4375
},
{
"epoch": 1.01,
"learning_rate": 0.0001164698575507449,
"loss": 5.0708,
"step": 4380
},
{
"epoch": 1.01,
"learning_rate": 0.00011627225445027086,
"loss": 5.0868,
"step": 4385
},
{
"epoch": 1.01,
"learning_rate": 0.00011607458606175253,
"loss": 5.0646,
"step": 4390
},
{
"epoch": 1.01,
"learning_rate": 0.00011587685317828112,
"loss": 5.0664,
"step": 4395
},
{
"epoch": 1.01,
"learning_rate": 0.00011567905659320663,
"loss": 5.1633,
"step": 4400
},
{
"epoch": 1.01,
"learning_rate": 0.0001154811971001347,
"loss": 5.1106,
"step": 4405
},
{
"epoch": 1.01,
"learning_rate": 0.00011528327549292326,
"loss": 5.056,
"step": 4410
},
{
"epoch": 1.01,
"learning_rate": 0.00011508529256567961,
"loss": 5.1278,
"step": 4415
},
{
"epoch": 1.01,
"learning_rate": 0.00011488724911275694,
"loss": 5.0478,
"step": 4420
},
{
"epoch": 1.02,
"learning_rate": 0.00011468914592875135,
"loss": 5.1035,
"step": 4425
},
{
"epoch": 1.02,
"learning_rate": 0.00011449098380849858,
"loss": 5.0953,
"step": 4430
},
{
"epoch": 1.02,
"learning_rate": 0.00011429276354707086,
"loss": 5.1338,
"step": 4435
},
{
"epoch": 1.02,
"learning_rate": 0.00011409448593977363,
"loss": 5.1327,
"step": 4440
},
{
"epoch": 1.02,
"learning_rate": 0.00011389615178214253,
"loss": 5.133,
"step": 4445
},
{
"epoch": 1.02,
"learning_rate": 0.00011369776186993994,
"loss": 5.0279,
"step": 4450
},
{
"epoch": 1.02,
"learning_rate": 0.00011349931699915209,
"loss": 5.0927,
"step": 4455
},
{
"epoch": 1.02,
"learning_rate": 0.0001133008179659856,
"loss": 5.0782,
"step": 4460
},
{
"epoch": 1.02,
"learning_rate": 0.0001131022655668645,
"loss": 5.0554,
"step": 4465
},
{
"epoch": 1.03,
"learning_rate": 0.00011290366059842683,
"loss": 5.0716,
"step": 4470
},
{
"epoch": 1.03,
"learning_rate": 0.00011270500385752167,
"loss": 5.0599,
"step": 4475
},
{
"epoch": 1.03,
"learning_rate": 0.00011250629614120571,
"loss": 5.0337,
"step": 4480
},
{
"epoch": 1.03,
"learning_rate": 0.00011230753824674025,
"loss": 5.0706,
"step": 4485
},
{
"epoch": 1.03,
"learning_rate": 0.00011210873097158786,
"loss": 5.0528,
"step": 4490
},
{
"epoch": 1.03,
"learning_rate": 0.00011190987511340928,
"loss": 5.0766,
"step": 4495
},
{
"epoch": 1.03,
"learning_rate": 0.00011171097147006013,
"loss": 5.0202,
"step": 4500
},
{
"epoch": 1.03,
"learning_rate": 0.0001115120208395878,
"loss": 5.0974,
"step": 4505
},
{
"epoch": 1.03,
"learning_rate": 0.00011131302402022821,
"loss": 5.0267,
"step": 4510
},
{
"epoch": 1.04,
"learning_rate": 0.00011111398181040251,
"loss": 5.0824,
"step": 4515
},
{
"epoch": 1.04,
"learning_rate": 0.00011091489500871408,
"loss": 5.0885,
"step": 4520
},
{
"epoch": 1.04,
"learning_rate": 0.00011071576441394516,
"loss": 5.0802,
"step": 4525
},
{
"epoch": 1.04,
"learning_rate": 0.00011051659082505366,
"loss": 5.0019,
"step": 4530
},
{
"epoch": 1.04,
"learning_rate": 0.0001103173750411701,
"loss": 5.0051,
"step": 4535
},
{
"epoch": 1.04,
"learning_rate": 0.00011011811786159416,
"loss": 5.0516,
"step": 4540
},
{
"epoch": 1.04,
"learning_rate": 0.00010991882008579177,
"loss": 5.0614,
"step": 4545
},
{
"epoch": 1.04,
"learning_rate": 0.00010971948251339157,
"loss": 5.0201,
"step": 4550
},
{
"epoch": 1.05,
"learning_rate": 0.00010952010594418201,
"loss": 5.0697,
"step": 4555
},
{
"epoch": 1.05,
"learning_rate": 0.00010932069117810787,
"loss": 5.0648,
"step": 4560
},
{
"epoch": 1.05,
"learning_rate": 0.00010912123901526736,
"loss": 4.9865,
"step": 4565
},
{
"epoch": 1.05,
"learning_rate": 0.00010892175025590856,
"loss": 5.0553,
"step": 4570
},
{
"epoch": 1.05,
"learning_rate": 0.00010872222570042652,
"loss": 4.9917,
"step": 4575
},
{
"epoch": 1.05,
"learning_rate": 0.00010852266614935982,
"loss": 4.9836,
"step": 4580
},
{
"epoch": 1.05,
"learning_rate": 0.00010832307240338752,
"loss": 5.045,
"step": 4585
},
{
"epoch": 1.05,
"learning_rate": 0.00010812344526332578,
"loss": 5.0025,
"step": 4590
},
{
"epoch": 1.05,
"learning_rate": 0.00010792378553012488,
"loss": 5.0356,
"step": 4595
},
{
"epoch": 1.06,
"learning_rate": 0.00010772409400486571,
"loss": 5.062,
"step": 4600
},
{
"epoch": 1.06,
"learning_rate": 0.0001075243714887569,
"loss": 5.038,
"step": 4605
},
{
"epoch": 1.06,
"learning_rate": 0.00010732461878313125,
"loss": 5.0387,
"step": 4610
},
{
"epoch": 1.06,
"learning_rate": 0.0001071248366894428,
"loss": 5.0372,
"step": 4615
},
{
"epoch": 1.06,
"learning_rate": 0.00010692502600926348,
"loss": 4.9963,
"step": 4620
},
{
"epoch": 1.06,
"learning_rate": 0.00010672518754427988,
"loss": 4.9673,
"step": 4625
},
{
"epoch": 1.06,
"learning_rate": 0.00010652532209629011,
"loss": 4.9897,
"step": 4630
},
{
"epoch": 1.06,
"learning_rate": 0.0001063254304672005,
"loss": 5.026,
"step": 4635
},
{
"epoch": 1.06,
"learning_rate": 0.00010612551345902244,
"loss": 5.0018,
"step": 4640
},
{
"epoch": 1.07,
"learning_rate": 0.0001059255718738692,
"loss": 4.9921,
"step": 4645
},
{
"epoch": 1.07,
"learning_rate": 0.00010572560651395258,
"loss": 5.0137,
"step": 4650
},
{
"epoch": 1.07,
"learning_rate": 0.0001055256181815798,
"loss": 4.9699,
"step": 4655
},
{
"epoch": 1.07,
"learning_rate": 0.0001053256076791503,
"loss": 5.0523,
"step": 4660
},
{
"epoch": 1.07,
"learning_rate": 0.00010512557580915235,
"loss": 4.9911,
"step": 4665
},
{
"epoch": 1.07,
"learning_rate": 0.00010492552337416007,
"loss": 4.9636,
"step": 4670
},
{
"epoch": 1.07,
"learning_rate": 0.00010472545117683004,
"loss": 4.9693,
"step": 4675
},
{
"epoch": 1.07,
"learning_rate": 0.00010452536001989815,
"loss": 5.0237,
"step": 4680
},
{
"epoch": 1.08,
"learning_rate": 0.00010432525070617633,
"loss": 4.9668,
"step": 4685
},
{
"epoch": 1.08,
"learning_rate": 0.00010412512403854942,
"loss": 5.0143,
"step": 4690
},
{
"epoch": 1.08,
"learning_rate": 0.00010392498081997182,
"loss": 4.9906,
"step": 4695
},
{
"epoch": 1.08,
"learning_rate": 0.00010372482185346435,
"loss": 5.0234,
"step": 4700
},
{
"epoch": 1.08,
"learning_rate": 0.00010352464794211104,
"loss": 4.9865,
"step": 4705
},
{
"epoch": 1.08,
"learning_rate": 0.00010332445988905586,
"loss": 4.974,
"step": 4710
},
{
"epoch": 1.08,
"learning_rate": 0.00010312425849749951,
"loss": 4.9991,
"step": 4715
},
{
"epoch": 1.08,
"learning_rate": 0.00010292404457069631,
"loss": 4.9621,
"step": 4720
},
{
"epoch": 1.08,
"learning_rate": 0.00010272381891195071,
"loss": 4.885,
"step": 4725
},
{
"epoch": 1.09,
"learning_rate": 0.00010252358232461436,
"loss": 4.9878,
"step": 4730
},
{
"epoch": 1.09,
"learning_rate": 0.00010232333561208264,
"loss": 4.9553,
"step": 4735
},
{
"epoch": 1.09,
"learning_rate": 0.00010212307957779173,
"loss": 5.036,
"step": 4740
},
{
"epoch": 1.09,
"learning_rate": 0.00010192281502521499,
"loss": 4.9396,
"step": 4745
},
{
"epoch": 1.09,
"learning_rate": 0.00010172254275786017,
"loss": 5.0147,
"step": 4750
},
{
"epoch": 1.09,
"learning_rate": 0.00010152226357926582,
"loss": 4.9507,
"step": 4755
},
{
"epoch": 1.09,
"learning_rate": 0.0001013219782929983,
"loss": 4.9798,
"step": 4760
},
{
"epoch": 1.09,
"learning_rate": 0.00010112168770264843,
"loss": 4.961,
"step": 4765
},
{
"epoch": 1.09,
"learning_rate": 0.0001009213926118284,
"loss": 4.9977,
"step": 4770
},
{
"epoch": 1.1,
"learning_rate": 0.00010072109382416828,
"loss": 4.9127,
"step": 4775
},
{
"epoch": 1.1,
"learning_rate": 0.00010052079214331318,
"loss": 4.9549,
"step": 4780
},
{
"epoch": 1.1,
"learning_rate": 0.00010032048837291963,
"loss": 5.0015,
"step": 4785
},
{
"epoch": 1.1,
"learning_rate": 0.00010012018331665272,
"loss": 5.0122,
"step": 4790
},
{
"epoch": 1.1,
"learning_rate": 9.991987777818258e-05,
"loss": 4.995,
"step": 4795
},
{
"epoch": 1.1,
"learning_rate": 9.971957256118129e-05,
"loss": 4.9526,
"step": 4800
},
{
"epoch": 1.1,
"learning_rate": 9.951926846931969e-05,
"loss": 4.9129,
"step": 4805
},
{
"epoch": 1.1,
"learning_rate": 9.931896630626402e-05,
"loss": 4.968,
"step": 4810
},
{
"epoch": 1.1,
"learning_rate": 9.911866687567291e-05,
"loss": 4.9704,
"step": 4815
},
{
"epoch": 1.11,
"learning_rate": 9.891837098119389e-05,
"loss": 4.95,
"step": 4820
},
{
"epoch": 1.11,
"learning_rate": 9.87180794264604e-05,
"loss": 4.9443,
"step": 4825
},
{
"epoch": 1.11,
"learning_rate": 9.851779301508842e-05,
"loss": 4.9094,
"step": 4830
},
{
"epoch": 1.11,
"learning_rate": 9.831751255067329e-05,
"loss": 5.0039,
"step": 4835
},
{
"epoch": 1.11,
"learning_rate": 9.811723883678654e-05,
"loss": 4.9285,
"step": 4840
},
{
"epoch": 1.11,
"learning_rate": 9.791697267697255e-05,
"loss": 4.9114,
"step": 4845
},
{
"epoch": 1.11,
"learning_rate": 9.771671487474546e-05,
"loss": 4.8915,
"step": 4850
},
{
"epoch": 1.11,
"learning_rate": 9.751646623358576e-05,
"loss": 4.9254,
"step": 4855
},
{
"epoch": 1.12,
"learning_rate": 9.731622755693737e-05,
"loss": 4.9335,
"step": 4860
},
{
"epoch": 1.12,
"learning_rate": 9.711599964820405e-05,
"loss": 4.8724,
"step": 4865
},
{
"epoch": 1.12,
"learning_rate": 9.691578331074643e-05,
"loss": 4.9849,
"step": 4870
},
{
"epoch": 1.12,
"learning_rate": 9.671557934787874e-05,
"loss": 4.9726,
"step": 4875
},
{
"epoch": 1.12,
"learning_rate": 9.651538856286551e-05,
"loss": 4.93,
"step": 4880
},
{
"epoch": 1.12,
"learning_rate": 9.631521175891844e-05,
"loss": 4.8794,
"step": 4885
},
{
"epoch": 1.12,
"learning_rate": 9.611504973919311e-05,
"loss": 4.8882,
"step": 4890
},
{
"epoch": 1.12,
"learning_rate": 9.591490330678579e-05,
"loss": 4.9173,
"step": 4895
},
{
"epoch": 1.12,
"learning_rate": 9.571477326473021e-05,
"loss": 4.9371,
"step": 4900
},
{
"epoch": 1.13,
"learning_rate": 9.551466041599432e-05,
"loss": 4.8697,
"step": 4905
},
{
"epoch": 1.13,
"learning_rate": 9.531456556347712e-05,
"loss": 4.935,
"step": 4910
},
{
"epoch": 1.13,
"learning_rate": 9.511448951000535e-05,
"loss": 4.9023,
"step": 4915
},
{
"epoch": 1.13,
"learning_rate": 9.491443305833043e-05,
"loss": 4.9414,
"step": 4920
},
{
"epoch": 1.13,
"learning_rate": 9.4714397011125e-05,
"loss": 4.9216,
"step": 4925
},
{
"epoch": 1.13,
"learning_rate": 9.451438217097994e-05,
"loss": 4.9164,
"step": 4930
},
{
"epoch": 1.13,
"learning_rate": 9.4314389340401e-05,
"loss": 4.9352,
"step": 4935
},
{
"epoch": 1.13,
"learning_rate": 9.411441932180554e-05,
"loss": 4.8866,
"step": 4940
},
{
"epoch": 1.13,
"learning_rate": 9.391447291751961e-05,
"loss": 4.902,
"step": 4945
},
{
"epoch": 1.14,
"learning_rate": 9.371455092977423e-05,
"loss": 4.916,
"step": 4950
},
{
"epoch": 1.14,
"learning_rate": 9.351465416070274e-05,
"loss": 4.8924,
"step": 4955
},
{
"epoch": 1.14,
"learning_rate": 9.331478341233706e-05,
"loss": 4.9241,
"step": 4960
},
{
"epoch": 1.14,
"learning_rate": 9.311493948660488e-05,
"loss": 4.9589,
"step": 4965
},
{
"epoch": 1.14,
"learning_rate": 9.291512318532614e-05,
"loss": 4.874,
"step": 4970
},
{
"epoch": 1.14,
"learning_rate": 9.271533531021005e-05,
"loss": 4.8862,
"step": 4975
},
{
"epoch": 1.14,
"learning_rate": 9.251557666285174e-05,
"loss": 4.8859,
"step": 4980
},
{
"epoch": 1.14,
"learning_rate": 9.231584804472898e-05,
"loss": 4.8948,
"step": 4985
},
{
"epoch": 1.15,
"learning_rate": 9.211615025719919e-05,
"loss": 4.8823,
"step": 4990
},
{
"epoch": 1.15,
"learning_rate": 9.1916484101496e-05,
"loss": 4.9055,
"step": 4995
},
{
"epoch": 1.15,
"learning_rate": 9.17168503787262e-05,
"loss": 4.9045,
"step": 5000
},
{
"epoch": 1.15,
"learning_rate": 9.151724988986635e-05,
"loss": 4.9271,
"step": 5005
},
{
"epoch": 1.15,
"learning_rate": 9.131768343575979e-05,
"loss": 4.8845,
"step": 5010
},
{
"epoch": 1.15,
"learning_rate": 9.11181518171132e-05,
"loss": 4.9525,
"step": 5015
},
{
"epoch": 1.15,
"learning_rate": 9.091865583449351e-05,
"loss": 4.8578,
"step": 5020
},
{
"epoch": 1.15,
"learning_rate": 9.071919628832476e-05,
"loss": 4.8773,
"step": 5025
},
{
"epoch": 1.15,
"learning_rate": 9.051977397888464e-05,
"loss": 4.8676,
"step": 5030
},
{
"epoch": 1.16,
"learning_rate": 9.032038970630163e-05,
"loss": 4.8945,
"step": 5035
},
{
"epoch": 1.16,
"learning_rate": 9.01210442705514e-05,
"loss": 4.8338,
"step": 5040
},
{
"epoch": 1.16,
"learning_rate": 8.992173847145401e-05,
"loss": 4.8511,
"step": 5045
},
{
"epoch": 1.16,
"learning_rate": 8.972247310867027e-05,
"loss": 4.9281,
"step": 5050
},
{
"epoch": 1.16,
"learning_rate": 8.952324898169888e-05,
"loss": 4.8169,
"step": 5055
},
{
"epoch": 1.16,
"learning_rate": 8.932406688987309e-05,
"loss": 4.8685,
"step": 5060
},
{
"epoch": 1.16,
"learning_rate": 8.912492763235744e-05,
"loss": 4.878,
"step": 5065
},
{
"epoch": 1.16,
"learning_rate": 8.892583200814466e-05,
"loss": 4.8541,
"step": 5070
},
{
"epoch": 1.16,
"learning_rate": 8.872678081605236e-05,
"loss": 4.8263,
"step": 5075
},
{
"epoch": 1.17,
"learning_rate": 8.852777485471997e-05,
"loss": 4.908,
"step": 5080
},
{
"epoch": 1.17,
"learning_rate": 8.832881492260535e-05,
"loss": 4.8555,
"step": 5085
},
{
"epoch": 1.17,
"learning_rate": 8.81299018179817e-05,
"loss": 4.8749,
"step": 5090
},
{
"epoch": 1.17,
"learning_rate": 8.793103633893437e-05,
"loss": 4.8645,
"step": 5095
},
{
"epoch": 1.17,
"learning_rate": 8.773221928335759e-05,
"loss": 4.8361,
"step": 5100
},
{
"epoch": 1.17,
"learning_rate": 8.753345144895136e-05,
"loss": 4.9032,
"step": 5105
},
{
"epoch": 1.17,
"learning_rate": 8.73347336332181e-05,
"loss": 4.855,
"step": 5110
},
{
"epoch": 1.17,
"learning_rate": 8.713606663345968e-05,
"loss": 4.834,
"step": 5115
},
{
"epoch": 1.17,
"learning_rate": 8.693745124677386e-05,
"loss": 4.7977,
"step": 5120
},
{
"epoch": 1.18,
"learning_rate": 8.673888827005164e-05,
"loss": 4.8372,
"step": 5125
},
{
"epoch": 1.18,
"learning_rate": 8.654037849997342e-05,
"loss": 4.8728,
"step": 5130
},
{
"epoch": 1.18,
"learning_rate": 8.634192273300629e-05,
"loss": 4.8253,
"step": 5135
},
{
"epoch": 1.18,
"learning_rate": 8.614352176540067e-05,
"loss": 4.8623,
"step": 5140
},
{
"epoch": 1.18,
"learning_rate": 8.594517639318705e-05,
"loss": 4.9098,
"step": 5145
},
{
"epoch": 1.18,
"learning_rate": 8.57468874121729e-05,
"loss": 4.8379,
"step": 5150
},
{
"epoch": 1.18,
"learning_rate": 8.554865561793942e-05,
"loss": 4.8979,
"step": 5155
},
{
"epoch": 1.18,
"learning_rate": 8.535048180583838e-05,
"loss": 4.8408,
"step": 5160
},
{
"epoch": 1.19,
"learning_rate": 8.515236677098889e-05,
"loss": 4.8904,
"step": 5165
},
{
"epoch": 1.19,
"learning_rate": 8.495431130827422e-05,
"loss": 4.8321,
"step": 5170
},
{
"epoch": 1.19,
"learning_rate": 8.475631621233869e-05,
"loss": 4.8462,
"step": 5175
},
{
"epoch": 1.19,
"learning_rate": 8.455838227758432e-05,
"loss": 4.8631,
"step": 5180
},
{
"epoch": 1.19,
"learning_rate": 8.436051029816783e-05,
"loss": 4.8231,
"step": 5185
},
{
"epoch": 1.19,
"learning_rate": 8.416270106799726e-05,
"loss": 4.8626,
"step": 5190
},
{
"epoch": 1.19,
"learning_rate": 8.396495538072902e-05,
"loss": 4.8349,
"step": 5195
},
{
"epoch": 1.19,
"learning_rate": 8.376727402976447e-05,
"loss": 4.83,
"step": 5200
},
{
"epoch": 1.19,
"learning_rate": 8.356965780824677e-05,
"loss": 4.9056,
"step": 5205
},
{
"epoch": 1.2,
"learning_rate": 8.3372107509058e-05,
"loss": 4.8734,
"step": 5210
},
{
"epoch": 1.2,
"learning_rate": 8.317462392481546e-05,
"loss": 4.8278,
"step": 5215
},
{
"epoch": 1.2,
"learning_rate": 8.297720784786906e-05,
"loss": 4.7766,
"step": 5220
},
{
"epoch": 1.2,
"learning_rate": 8.277986007029756e-05,
"loss": 4.8468,
"step": 5225
},
{
"epoch": 1.2,
"learning_rate": 8.2582581383906e-05,
"loss": 4.8094,
"step": 5230
},
{
"epoch": 1.2,
"learning_rate": 8.238537258022194e-05,
"loss": 4.8178,
"step": 5235
},
{
"epoch": 1.2,
"learning_rate": 8.218823445049265e-05,
"loss": 4.8364,
"step": 5240
},
{
"epoch": 1.2,
"learning_rate": 8.199116778568192e-05,
"loss": 4.8331,
"step": 5245
},
{
"epoch": 1.2,
"learning_rate": 8.179417337646669e-05,
"loss": 4.8482,
"step": 5250
},
{
"epoch": 1.21,
"learning_rate": 8.159725201323408e-05,
"loss": 4.8213,
"step": 5255
},
{
"epoch": 1.21,
"learning_rate": 8.140040448607804e-05,
"loss": 4.8293,
"step": 5260
},
{
"epoch": 1.21,
"learning_rate": 8.12036315847964e-05,
"loss": 4.8363,
"step": 5265
},
{
"epoch": 1.21,
"learning_rate": 8.100693409888748e-05,
"loss": 4.8323,
"step": 5270
},
{
"epoch": 1.21,
"learning_rate": 8.081031281754695e-05,
"loss": 4.7994,
"step": 5275
},
{
"epoch": 1.21,
"learning_rate": 8.061376852966495e-05,
"loss": 4.8269,
"step": 5280
},
{
"epoch": 1.21,
"learning_rate": 8.041730202382245e-05,
"loss": 4.8294,
"step": 5285
},
{
"epoch": 1.21,
"learning_rate": 8.02209140882886e-05,
"loss": 4.8326,
"step": 5290
},
{
"epoch": 1.22,
"learning_rate": 8.002460551101702e-05,
"loss": 4.8782,
"step": 5295
},
{
"epoch": 1.22,
"learning_rate": 7.982837707964321e-05,
"loss": 4.8432,
"step": 5300
},
{
"epoch": 1.22,
"learning_rate": 7.963222958148085e-05,
"loss": 4.8019,
"step": 5305
},
{
"epoch": 1.22,
"learning_rate": 7.943616380351913e-05,
"loss": 4.7782,
"step": 5310
},
{
"epoch": 1.22,
"learning_rate": 7.92401805324192e-05,
"loss": 4.8038,
"step": 5315
},
{
"epoch": 1.22,
"learning_rate": 7.904428055451118e-05,
"loss": 4.8114,
"step": 5320
},
{
"epoch": 1.22,
"learning_rate": 7.884846465579113e-05,
"loss": 4.7944,
"step": 5325
},
{
"epoch": 1.22,
"learning_rate": 7.865273362191759e-05,
"loss": 4.8051,
"step": 5330
},
{
"epoch": 1.22,
"learning_rate": 7.845708823820876e-05,
"loss": 4.822,
"step": 5335
},
{
"epoch": 1.23,
"learning_rate": 7.826152928963904e-05,
"loss": 4.8269,
"step": 5340
},
{
"epoch": 1.23,
"learning_rate": 7.806605756083622e-05,
"loss": 4.8259,
"step": 5345
},
{
"epoch": 1.23,
"learning_rate": 7.787067383607796e-05,
"loss": 4.8119,
"step": 5350
},
{
"epoch": 1.23,
"learning_rate": 7.767537889928889e-05,
"loss": 4.7746,
"step": 5355
},
{
"epoch": 1.23,
"learning_rate": 7.748017353403748e-05,
"loss": 4.8108,
"step": 5360
},
{
"epoch": 1.23,
"learning_rate": 7.72850585235327e-05,
"loss": 4.7756,
"step": 5365
},
{
"epoch": 1.23,
"learning_rate": 7.70900346506211e-05,
"loss": 4.7291,
"step": 5370
},
{
"epoch": 1.23,
"learning_rate": 7.689510269778347e-05,
"loss": 4.7695,
"step": 5375
},
{
"epoch": 1.23,
"learning_rate": 7.670026344713189e-05,
"loss": 4.7634,
"step": 5380
},
{
"epoch": 1.24,
"learning_rate": 7.650551768040641e-05,
"loss": 4.7772,
"step": 5385
},
{
"epoch": 1.24,
"learning_rate": 7.631086617897203e-05,
"loss": 4.7792,
"step": 5390
},
{
"epoch": 1.24,
"learning_rate": 7.611630972381557e-05,
"loss": 4.7878,
"step": 5395
},
{
"epoch": 1.24,
"learning_rate": 7.592184909554245e-05,
"loss": 4.8142,
"step": 5400
},
{
"epoch": 1.24,
"learning_rate": 7.572748507437368e-05,
"loss": 4.8524,
"step": 5405
},
{
"epoch": 1.24,
"learning_rate": 7.553321844014258e-05,
"loss": 4.803,
"step": 5410
},
{
"epoch": 1.24,
"learning_rate": 7.533904997229183e-05,
"loss": 4.8006,
"step": 5415
},
{
"epoch": 1.24,
"learning_rate": 7.514498044987009e-05,
"loss": 4.8241,
"step": 5420
},
{
"epoch": 1.24,
"learning_rate": 7.495101065152917e-05,
"loss": 4.8204,
"step": 5425
},
{
"epoch": 1.25,
"learning_rate": 7.475714135552074e-05,
"loss": 4.8527,
"step": 5430
},
{
"epoch": 1.25,
"learning_rate": 7.456337333969316e-05,
"loss": 4.7756,
"step": 5435
},
{
"epoch": 1.25,
"learning_rate": 7.43697073814885e-05,
"loss": 4.7708,
"step": 5440
},
{
"epoch": 1.25,
"learning_rate": 7.417614425793932e-05,
"loss": 4.8252,
"step": 5445
},
{
"epoch": 1.25,
"learning_rate": 7.39826847456656e-05,
"loss": 4.8143,
"step": 5450
},
{
"epoch": 1.25,
"learning_rate": 7.37893296208716e-05,
"loss": 4.7498,
"step": 5455
},
{
"epoch": 1.25,
"learning_rate": 7.359607965934274e-05,
"loss": 4.7539,
"step": 5460
},
{
"epoch": 1.25,
"learning_rate": 7.340293563644256e-05,
"loss": 4.7292,
"step": 5465
},
{
"epoch": 1.26,
"learning_rate": 7.32098983271094e-05,
"loss": 4.8232,
"step": 5470
},
{
"epoch": 1.26,
"learning_rate": 7.301696850585366e-05,
"loss": 4.8129,
"step": 5475
},
{
"epoch": 1.26,
"learning_rate": 7.282414694675426e-05,
"loss": 4.7475,
"step": 5480
},
{
"epoch": 1.26,
"learning_rate": 7.263143442345592e-05,
"loss": 4.7766,
"step": 5485
},
{
"epoch": 1.26,
"learning_rate": 7.243883170916574e-05,
"loss": 4.7741,
"step": 5490
},
{
"epoch": 1.26,
"learning_rate": 7.22463395766504e-05,
"loss": 4.7999,
"step": 5495
},
{
"epoch": 1.26,
"learning_rate": 7.205395879823271e-05,
"loss": 4.7793,
"step": 5500
},
{
"epoch": 1.26,
"learning_rate": 7.186169014578883e-05,
"loss": 4.7619,
"step": 5505
},
{
"epoch": 1.26,
"learning_rate": 7.166953439074504e-05,
"loss": 4.7841,
"step": 5510
},
{
"epoch": 1.27,
"learning_rate": 7.14774923040746e-05,
"loss": 4.7674,
"step": 5515
},
{
"epoch": 1.27,
"learning_rate": 7.128556465629475e-05,
"loss": 4.7724,
"step": 5520
},
{
"epoch": 1.27,
"learning_rate": 7.109375221746352e-05,
"loss": 4.7496,
"step": 5525
},
{
"epoch": 1.27,
"learning_rate": 7.090205575717678e-05,
"loss": 4.7153,
"step": 5530
},
{
"epoch": 1.27,
"learning_rate": 7.071047604456499e-05,
"loss": 4.7254,
"step": 5535
},
{
"epoch": 1.27,
"learning_rate": 7.051901384829021e-05,
"loss": 4.767,
"step": 5540
},
{
"epoch": 1.27,
"learning_rate": 7.032766993654303e-05,
"loss": 4.7913,
"step": 5545
},
{
"epoch": 1.27,
"learning_rate": 7.013644507703937e-05,
"loss": 4.8,
"step": 5550
},
{
"epoch": 1.27,
"learning_rate": 6.994534003701765e-05,
"loss": 4.7623,
"step": 5555
},
{
"epoch": 1.28,
"learning_rate": 6.975435558323532e-05,
"loss": 4.7464,
"step": 5560
},
{
"epoch": 1.28,
"learning_rate": 6.956349248196627e-05,
"loss": 4.7224,
"step": 5565
},
{
"epoch": 1.28,
"learning_rate": 6.937275149899725e-05,
"loss": 4.6828,
"step": 5570
},
{
"epoch": 1.28,
"learning_rate": 6.918213339962518e-05,
"loss": 4.7704,
"step": 5575
},
{
"epoch": 1.28,
"learning_rate": 6.899163894865395e-05,
"loss": 4.742,
"step": 5580
},
{
"epoch": 1.28,
"learning_rate": 6.880126891039124e-05,
"loss": 4.7355,
"step": 5585
},
{
"epoch": 1.28,
"learning_rate": 6.86110240486457e-05,
"loss": 4.7362,
"step": 5590
},
{
"epoch": 1.28,
"learning_rate": 6.842090512672358e-05,
"loss": 4.7741,
"step": 5595
},
{
"epoch": 1.29,
"learning_rate": 6.823091290742602e-05,
"loss": 4.7723,
"step": 5600
},
{
"epoch": 1.29,
"learning_rate": 6.804104815304566e-05,
"loss": 4.7569,
"step": 5605
},
{
"epoch": 1.29,
"learning_rate": 6.785131162536374e-05,
"loss": 4.7718,
"step": 5610
},
{
"epoch": 1.29,
"learning_rate": 6.766170408564705e-05,
"loss": 4.7562,
"step": 5615
},
{
"epoch": 1.29,
"learning_rate": 6.747222629464484e-05,
"loss": 4.7369,
"step": 5620
},
{
"epoch": 1.29,
"learning_rate": 6.728287901258581e-05,
"loss": 4.7388,
"step": 5625
},
{
"epoch": 1.29,
"learning_rate": 6.709366299917497e-05,
"loss": 4.7534,
"step": 5630
},
{
"epoch": 1.29,
"learning_rate": 6.690457901359073e-05,
"loss": 4.7534,
"step": 5635
},
{
"epoch": 1.29,
"learning_rate": 6.671562781448166e-05,
"loss": 4.6859,
"step": 5640
},
{
"epoch": 1.3,
"learning_rate": 6.652681015996369e-05,
"loss": 4.738,
"step": 5645
},
{
"epoch": 1.3,
"learning_rate": 6.633812680761684e-05,
"loss": 4.7085,
"step": 5650
},
{
"epoch": 1.3,
"learning_rate": 6.614957851448227e-05,
"loss": 4.7201,
"step": 5655
},
{
"epoch": 1.3,
"learning_rate": 6.59611660370594e-05,
"loss": 4.7149,
"step": 5660
},
{
"epoch": 1.3,
"learning_rate": 6.577289013130252e-05,
"loss": 4.7387,
"step": 5665
},
{
"epoch": 1.3,
"learning_rate": 6.558475155261811e-05,
"loss": 4.6589,
"step": 5670
},
{
"epoch": 1.3,
"learning_rate": 6.539675105586158e-05,
"loss": 4.735,
"step": 5675
},
{
"epoch": 1.3,
"learning_rate": 6.52088893953344e-05,
"loss": 4.7227,
"step": 5680
},
{
"epoch": 1.3,
"learning_rate": 6.502116732478096e-05,
"loss": 4.7259,
"step": 5685
},
{
"epoch": 1.31,
"learning_rate": 6.48335855973855e-05,
"loss": 4.687,
"step": 5690
},
{
"epoch": 1.31,
"learning_rate": 6.464614496576935e-05,
"loss": 4.7622,
"step": 5695
},
{
"epoch": 1.31,
"learning_rate": 6.445884618198754e-05,
"loss": 4.6623,
"step": 5700
},
{
"epoch": 1.31,
"learning_rate": 6.427168999752614e-05,
"loss": 4.738,
"step": 5705
},
{
"epoch": 1.31,
"learning_rate": 6.408467716329894e-05,
"loss": 4.7262,
"step": 5710
},
{
"epoch": 1.31,
"learning_rate": 6.389780842964468e-05,
"loss": 4.7594,
"step": 5715
},
{
"epoch": 1.31,
"learning_rate": 6.371108454632391e-05,
"loss": 4.7301,
"step": 5720
},
{
"epoch": 1.31,
"learning_rate": 6.352450626251587e-05,
"loss": 4.739,
"step": 5725
},
{
"epoch": 1.31,
"learning_rate": 6.33380743268159e-05,
"loss": 4.7733,
"step": 5730
},
{
"epoch": 1.32,
"learning_rate": 6.315178948723186e-05,
"loss": 4.7411,
"step": 5735
},
{
"epoch": 1.32,
"learning_rate": 6.29656524911817e-05,
"loss": 4.7295,
"step": 5740
},
{
"epoch": 1.32,
"learning_rate": 6.277966408548992e-05,
"loss": 4.7167,
"step": 5745
},
{
"epoch": 1.32,
"learning_rate": 6.259382501638509e-05,
"loss": 4.7173,
"step": 5750
},
{
"epoch": 1.32,
"learning_rate": 6.240813602949641e-05,
"loss": 4.7548,
"step": 5755
},
{
"epoch": 1.32,
"learning_rate": 6.222259786985101e-05,
"loss": 4.7193,
"step": 5760
},
{
"epoch": 1.32,
"learning_rate": 6.20372112818709e-05,
"loss": 4.7211,
"step": 5765
},
{
"epoch": 1.32,
"learning_rate": 6.185197700936982e-05,
"loss": 4.6844,
"step": 5770
},
{
"epoch": 1.33,
"learning_rate": 6.166689579555053e-05,
"loss": 4.705,
"step": 5775
},
{
"epoch": 1.33,
"learning_rate": 6.14819683830016e-05,
"loss": 4.6324,
"step": 5780
},
{
"epoch": 1.33,
"learning_rate": 6.129719551369456e-05,
"loss": 4.7002,
"step": 5785
},
{
"epoch": 1.33,
"learning_rate": 6.111257792898082e-05,
"loss": 4.7011,
"step": 5790
},
{
"epoch": 1.33,
"learning_rate": 6.092811636958877e-05,
"loss": 4.7764,
"step": 5795
},
{
"epoch": 1.33,
"learning_rate": 6.0743811575620846e-05,
"loss": 4.7315,
"step": 5800
},
{
"epoch": 1.33,
"learning_rate": 6.055966428655042e-05,
"loss": 4.7281,
"step": 5805
},
{
"epoch": 1.33,
"learning_rate": 6.0375675241219e-05,
"loss": 4.7234,
"step": 5810
},
{
"epoch": 1.33,
"learning_rate": 6.019184517783311e-05,
"loss": 4.6957,
"step": 5815
},
{
"epoch": 1.34,
"learning_rate": 6.000817483396148e-05,
"loss": 4.6906,
"step": 5820
},
{
"epoch": 1.34,
"learning_rate": 5.982466494653187e-05,
"loss": 4.7133,
"step": 5825
},
{
"epoch": 1.34,
"learning_rate": 5.96413162518285e-05,
"loss": 4.734,
"step": 5830
},
{
"epoch": 1.34,
"learning_rate": 5.945812948548855e-05,
"loss": 4.7603,
"step": 5835
},
{
"epoch": 1.34,
"learning_rate": 5.9275105382499694e-05,
"loss": 4.7241,
"step": 5840
},
{
"epoch": 1.34,
"learning_rate": 5.909224467719694e-05,
"loss": 4.7101,
"step": 5845
},
{
"epoch": 1.34,
"learning_rate": 5.890954810325966e-05,
"loss": 4.7671,
"step": 5850
},
{
"epoch": 1.34,
"learning_rate": 5.8727016393708746e-05,
"loss": 4.6897,
"step": 5855
},
{
"epoch": 1.34,
"learning_rate": 5.854465028090355e-05,
"loss": 4.7117,
"step": 5860
},
{
"epoch": 1.35,
"learning_rate": 5.836245049653908e-05,
"loss": 4.6882,
"step": 5865
},
{
"epoch": 1.35,
"learning_rate": 5.8180417771643006e-05,
"loss": 4.7032,
"step": 5870
},
{
"epoch": 1.35,
"learning_rate": 5.799855283657254e-05,
"loss": 4.7798,
"step": 5875
},
{
"epoch": 1.35,
"learning_rate": 5.781685642101196e-05,
"loss": 4.7255,
"step": 5880
},
{
"epoch": 1.35,
"learning_rate": 5.7635329253969195e-05,
"loss": 4.684,
"step": 5885
},
{
"epoch": 1.35,
"learning_rate": 5.7453972063773184e-05,
"loss": 4.7053,
"step": 5890
},
{
"epoch": 1.35,
"learning_rate": 5.727278557807085e-05,
"loss": 4.6871,
"step": 5895
},
{
"epoch": 1.35,
"learning_rate": 5.7091770523824317e-05,
"loss": 4.6497,
"step": 5900
},
{
"epoch": 1.36,
"learning_rate": 5.691092762730774e-05,
"loss": 4.7039,
"step": 5905
},
{
"epoch": 1.36,
"learning_rate": 5.673025761410462e-05,
"loss": 4.6889,
"step": 5910
},
{
"epoch": 1.36,
"learning_rate": 5.654976120910478e-05,
"loss": 4.7159,
"step": 5915
},
{
"epoch": 1.36,
"learning_rate": 5.636943913650147e-05,
"loss": 4.6792,
"step": 5920
},
{
"epoch": 1.36,
"learning_rate": 5.618929211978857e-05,
"loss": 4.692,
"step": 5925
},
{
"epoch": 1.36,
"learning_rate": 5.60093208817575e-05,
"loss": 4.6622,
"step": 5930
},
{
"epoch": 1.36,
"learning_rate": 5.582952614449445e-05,
"loss": 4.73,
"step": 5935
},
{
"epoch": 1.36,
"learning_rate": 5.564990862937744e-05,
"loss": 4.6451,
"step": 5940
},
{
"epoch": 1.36,
"learning_rate": 5.5470469057073404e-05,
"loss": 4.6663,
"step": 5945
},
{
"epoch": 1.37,
"learning_rate": 5.5291208147535466e-05,
"loss": 4.7326,
"step": 5950
},
{
"epoch": 1.37,
"learning_rate": 5.511212661999967e-05,
"loss": 4.6735,
"step": 5955
},
{
"epoch": 1.37,
"learning_rate": 5.4933225192982586e-05,
"loss": 4.6278,
"step": 5960
},
{
"epoch": 1.37,
"learning_rate": 5.4754504584278e-05,
"loss": 4.6864,
"step": 5965
},
{
"epoch": 1.37,
"learning_rate": 5.457596551095441e-05,
"loss": 4.7086,
"step": 5970
},
{
"epoch": 1.37,
"learning_rate": 5.4397608689351656e-05,
"loss": 4.7157,
"step": 5975
},
{
"epoch": 1.37,
"learning_rate": 5.421943483507863e-05,
"loss": 4.6579,
"step": 5980
},
{
"epoch": 1.37,
"learning_rate": 5.404144466300998e-05,
"loss": 4.6555,
"step": 5985
},
{
"epoch": 1.37,
"learning_rate": 5.3863638887283364e-05,
"loss": 4.687,
"step": 5990
},
{
"epoch": 1.38,
"learning_rate": 5.3686018221296665e-05,
"loss": 4.6402,
"step": 5995
},
{
"epoch": 1.38,
"learning_rate": 5.350858337770498e-05,
"loss": 4.6733,
"step": 6000
},
{
"epoch": 1.38,
"learning_rate": 5.333133506841797e-05,
"loss": 4.6318,
"step": 6005
},
{
"epoch": 1.38,
"learning_rate": 5.315427400459678e-05,
"loss": 4.6787,
"step": 6010
},
{
"epoch": 1.38,
"learning_rate": 5.297740089665129e-05,
"loss": 4.683,
"step": 6015
},
{
"epoch": 1.38,
"learning_rate": 5.280071645423726e-05,
"loss": 4.6409,
"step": 6020
},
{
"epoch": 1.38,
"learning_rate": 5.262422138625349e-05,
"loss": 4.6501,
"step": 6025
},
{
"epoch": 1.38,
"learning_rate": 5.244791640083906e-05,
"loss": 4.6639,
"step": 6030
},
{
"epoch": 1.38,
"learning_rate": 5.227180220537016e-05,
"loss": 4.6952,
"step": 6035
},
{
"epoch": 1.39,
"learning_rate": 5.2095879506457736e-05,
"loss": 4.7301,
"step": 6040
},
{
"epoch": 1.39,
"learning_rate": 5.192014900994423e-05,
"loss": 4.7045,
"step": 6045
},
{
"epoch": 1.39,
"learning_rate": 5.174461142090111e-05,
"loss": 4.6886,
"step": 6050
},
{
"epoch": 1.39,
"learning_rate": 5.156926744362562e-05,
"loss": 4.6499,
"step": 6055
},
{
"epoch": 1.39,
"learning_rate": 5.139411778163832e-05,
"loss": 4.6756,
"step": 6060
},
{
"epoch": 1.39,
"learning_rate": 5.121916313768018e-05,
"loss": 4.6674,
"step": 6065
},
{
"epoch": 1.39,
"learning_rate": 5.104440421370962e-05,
"loss": 4.6264,
"step": 6070
},
{
"epoch": 1.39,
"learning_rate": 5.0869841710899815e-05,
"loss": 4.6938,
"step": 6075
},
{
"epoch": 1.4,
"learning_rate": 5.0695476329635825e-05,
"loss": 4.6492,
"step": 6080
},
{
"epoch": 1.4,
"learning_rate": 5.052130876951192e-05,
"loss": 4.6972,
"step": 6085
},
{
"epoch": 1.4,
"learning_rate": 5.034733972932855e-05,
"loss": 4.6605,
"step": 6090
},
{
"epoch": 1.4,
"learning_rate": 5.017356990708969e-05,
"loss": 4.668,
"step": 6095
},
{
"epoch": 1.4,
"learning_rate": 5.000000000000002e-05,
"loss": 4.623,
"step": 6100
},
{
"epoch": 1.4,
"learning_rate": 4.982663070446207e-05,
"loss": 4.6858,
"step": 6105
},
{
"epoch": 1.4,
"learning_rate": 4.9653462716073594e-05,
"loss": 4.6676,
"step": 6110
},
{
"epoch": 1.4,
"learning_rate": 4.9480496729624515e-05,
"loss": 4.6803,
"step": 6115
},
{
"epoch": 1.4,
"learning_rate": 4.930773343909434e-05,
"loss": 4.6284,
"step": 6120
},
{
"epoch": 1.41,
"learning_rate": 4.91351735376493e-05,
"loss": 4.6984,
"step": 6125
},
{
"epoch": 1.41,
"learning_rate": 4.8962817717639555e-05,
"loss": 4.6631,
"step": 6130
},
{
"epoch": 1.41,
"learning_rate": 4.879066667059659e-05,
"loss": 4.6599,
"step": 6135
},
{
"epoch": 1.41,
"learning_rate": 4.8618721087230014e-05,
"loss": 4.6954,
"step": 6140
},
{
"epoch": 1.41,
"learning_rate": 4.844698165742536e-05,
"loss": 4.6713,
"step": 6145
},
{
"epoch": 1.41,
"learning_rate": 4.8275449070240854e-05,
"loss": 4.6837,
"step": 6150
},
{
"epoch": 1.41,
"learning_rate": 4.810412401390487e-05,
"loss": 4.7168,
"step": 6155
},
{
"epoch": 1.41,
"learning_rate": 4.793300717581308e-05,
"loss": 4.632,
"step": 6160
},
{
"epoch": 1.41,
"learning_rate": 4.7762099242525847e-05,
"loss": 4.6783,
"step": 6165
},
{
"epoch": 1.42,
"learning_rate": 4.7591400899765234e-05,
"loss": 4.6229,
"step": 6170
},
{
"epoch": 1.42,
"learning_rate": 4.7420912832412445e-05,
"loss": 4.6727,
"step": 6175
},
{
"epoch": 1.42,
"learning_rate": 4.7250635724505e-05,
"loss": 4.655,
"step": 6180
},
{
"epoch": 1.42,
"learning_rate": 4.7080570259233935e-05,
"loss": 4.6868,
"step": 6185
},
{
"epoch": 1.42,
"learning_rate": 4.6910717118941286e-05,
"loss": 4.6991,
"step": 6190
},
{
"epoch": 1.42,
"learning_rate": 4.674107698511704e-05,
"loss": 4.6705,
"step": 6195
},
{
"epoch": 1.42,
"learning_rate": 4.6571650538396615e-05,
"loss": 4.6316,
"step": 6200
},
{
"epoch": 1.42,
"learning_rate": 4.640243845855806e-05,
"loss": 4.6259,
"step": 6205
},
{
"epoch": 1.43,
"learning_rate": 4.6233441424519295e-05,
"loss": 4.717,
"step": 6210
},
{
"epoch": 1.43,
"learning_rate": 4.606466011433557e-05,
"loss": 4.6154,
"step": 6215
},
{
"epoch": 1.43,
"learning_rate": 4.5896095205196356e-05,
"loss": 4.6612,
"step": 6220
},
{
"epoch": 1.43,
"learning_rate": 4.5727747373423116e-05,
"loss": 4.6936,
"step": 6225
},
{
"epoch": 1.43,
"learning_rate": 4.5559617294466176e-05,
"loss": 4.6148,
"step": 6230
},
{
"epoch": 1.43,
"learning_rate": 4.539170564290237e-05,
"loss": 4.6156,
"step": 6235
},
{
"epoch": 1.43,
"learning_rate": 4.522401309243193e-05,
"loss": 4.6261,
"step": 6240
},
{
"epoch": 1.43,
"learning_rate": 4.50565403158761e-05,
"loss": 4.699,
"step": 6245
},
{
"epoch": 1.43,
"learning_rate": 4.488928798517442e-05,
"loss": 4.6464,
"step": 6250
},
{
"epoch": 1.44,
"learning_rate": 4.472225677138186e-05,
"loss": 4.6681,
"step": 6255
},
{
"epoch": 1.44,
"learning_rate": 4.455544734466624e-05,
"loss": 4.6342,
"step": 6260
},
{
"epoch": 1.44,
"learning_rate": 4.4388860374305474e-05,
"loss": 4.6214,
"step": 6265
},
{
"epoch": 1.44,
"learning_rate": 4.422249652868506e-05,
"loss": 4.6874,
"step": 6270
},
{
"epoch": 1.44,
"learning_rate": 4.405635647529514e-05,
"loss": 4.6813,
"step": 6275
},
{
"epoch": 1.44,
"learning_rate": 4.389044088072798e-05,
"loss": 4.6827,
"step": 6280
},
{
"epoch": 1.44,
"learning_rate": 4.3724750410675287e-05,
"loss": 4.6237,
"step": 6285
},
{
"epoch": 1.44,
"learning_rate": 4.355928572992547e-05,
"loss": 4.7075,
"step": 6290
},
{
"epoch": 1.44,
"learning_rate": 4.339404750236117e-05,
"loss": 4.6709,
"step": 6295
},
{
"epoch": 1.45,
"learning_rate": 4.322903639095619e-05,
"loss": 4.6309,
"step": 6300
},
{
"epoch": 1.45,
"learning_rate": 4.306425305777333e-05,
"loss": 4.6,
"step": 6305
},
{
"epoch": 1.45,
"learning_rate": 4.289969816396132e-05,
"loss": 4.6527,
"step": 6310
},
{
"epoch": 1.45,
"learning_rate": 4.2735372369752535e-05,
"loss": 4.6906,
"step": 6315
},
{
"epoch": 1.45,
"learning_rate": 4.2571276334459895e-05,
"loss": 4.6251,
"step": 6320
},
{
"epoch": 1.45,
"learning_rate": 4.240741071647464e-05,
"loss": 4.6286,
"step": 6325
},
{
"epoch": 1.45,
"learning_rate": 4.224377617326353e-05,
"loss": 4.6605,
"step": 6330
},
{
"epoch": 1.45,
"learning_rate": 4.208037336136612e-05,
"loss": 4.6472,
"step": 6335
},
{
"epoch": 1.45,
"learning_rate": 4.1917202936392265e-05,
"loss": 4.6194,
"step": 6340
},
{
"epoch": 1.46,
"learning_rate": 4.175426555301937e-05,
"loss": 4.6127,
"step": 6345
},
{
"epoch": 1.46,
"learning_rate": 4.15915618649899e-05,
"loss": 4.6649,
"step": 6350
},
{
"epoch": 1.46,
"learning_rate": 4.142909252510866e-05,
"loss": 4.6137,
"step": 6355
},
{
"epoch": 1.46,
"learning_rate": 4.126685818524013e-05,
"loss": 4.6434,
"step": 6360
},
{
"epoch": 1.46,
"learning_rate": 4.1104859496305994e-05,
"loss": 4.6443,
"step": 6365
},
{
"epoch": 1.46,
"learning_rate": 4.094309710828236e-05,
"loss": 4.6514,
"step": 6370
},
{
"epoch": 1.46,
"learning_rate": 4.078157167019738e-05,
"loss": 4.6482,
"step": 6375
},
{
"epoch": 1.46,
"learning_rate": 4.0620283830128414e-05,
"loss": 4.6226,
"step": 6380
},
{
"epoch": 1.47,
"learning_rate": 4.0459234235199515e-05,
"loss": 4.6699,
"step": 6385
},
{
"epoch": 1.47,
"learning_rate": 4.029842353157888e-05,
"loss": 4.6841,
"step": 6390
},
{
"epoch": 1.47,
"learning_rate": 4.013785236447616e-05,
"loss": 4.5908,
"step": 6395
},
{
"epoch": 1.47,
"learning_rate": 3.9977521378140084e-05,
"loss": 4.6057,
"step": 6400
},
{
"epoch": 1.47,
"learning_rate": 3.981743121585547e-05,
"loss": 4.623,
"step": 6405
},
{
"epoch": 1.47,
"learning_rate": 3.965758251994115e-05,
"loss": 4.6461,
"step": 6410
},
{
"epoch": 1.47,
"learning_rate": 3.949797593174698e-05,
"loss": 4.6257,
"step": 6415
},
{
"epoch": 1.47,
"learning_rate": 3.933861209165146e-05,
"loss": 4.626,
"step": 6420
},
{
"epoch": 1.47,
"learning_rate": 3.917949163905914e-05,
"loss": 4.6798,
"step": 6425
},
{
"epoch": 1.48,
"learning_rate": 3.9020615212398016e-05,
"loss": 4.6667,
"step": 6430
},
{
"epoch": 1.48,
"learning_rate": 3.886198344911707e-05,
"loss": 4.64,
"step": 6435
},
{
"epoch": 1.48,
"learning_rate": 3.8703596985683556e-05,
"loss": 4.5999,
"step": 6440
},
{
"epoch": 1.48,
"learning_rate": 3.8545456457580566e-05,
"loss": 4.6126,
"step": 6445
},
{
"epoch": 1.48,
"learning_rate": 3.838756249930439e-05,
"loss": 4.6735,
"step": 6450
},
{
"epoch": 1.48,
"learning_rate": 3.822991574436213e-05,
"loss": 4.661,
"step": 6455
},
{
"epoch": 1.48,
"learning_rate": 3.807251682526902e-05,
"loss": 4.6394,
"step": 6460
},
{
"epoch": 1.48,
"learning_rate": 3.791536637354576e-05,
"loss": 4.593,
"step": 6465
},
{
"epoch": 1.48,
"learning_rate": 3.775846501971636e-05,
"loss": 4.5347,
"step": 6470
},
{
"epoch": 1.49,
"learning_rate": 3.760181339330526e-05,
"loss": 4.5798,
"step": 6475
},
{
"epoch": 1.49,
"learning_rate": 3.7445412122835077e-05,
"loss": 4.6295,
"step": 6480
},
{
"epoch": 1.49,
"learning_rate": 3.7289261835823695e-05,
"loss": 4.6271,
"step": 6485
},
{
"epoch": 1.49,
"learning_rate": 3.713336315878224e-05,
"loss": 4.6211,
"step": 6490
},
{
"epoch": 1.49,
"learning_rate": 3.6977716717212165e-05,
"loss": 4.6093,
"step": 6495
},
{
"epoch": 1.49,
"learning_rate": 3.6822323135603054e-05,
"loss": 4.6382,
"step": 6500
},
{
"epoch": 1.49,
"learning_rate": 3.6667183037429756e-05,
"loss": 4.6428,
"step": 6505
},
{
"epoch": 1.49,
"learning_rate": 3.651229704515018e-05,
"loss": 4.6137,
"step": 6510
},
{
"epoch": 1.5,
"learning_rate": 3.635766578020279e-05,
"loss": 4.5749,
"step": 6515
},
{
"epoch": 1.5,
"learning_rate": 3.6203289863003905e-05,
"loss": 4.5919,
"step": 6520
},
{
"epoch": 1.5,
"learning_rate": 3.604916991294537e-05,
"loss": 4.6232,
"step": 6525
},
{
"epoch": 1.5,
"learning_rate": 3.5895306548392005e-05,
"loss": 4.6407,
"step": 6530
},
{
"epoch": 1.5,
"learning_rate": 3.574170038667923e-05,
"loss": 4.5777,
"step": 6535
},
{
"epoch": 1.5,
"learning_rate": 3.558835204411044e-05,
"loss": 4.6254,
"step": 6540
},
{
"epoch": 1.5,
"learning_rate": 3.543526213595461e-05,
"loss": 4.6482,
"step": 6545
},
{
"epoch": 1.5,
"learning_rate": 3.52824312764438e-05,
"loss": 4.615,
"step": 6550
},
{
"epoch": 1.5,
"learning_rate": 3.512986007877072e-05,
"loss": 4.646,
"step": 6555
},
{
"epoch": 1.51,
"learning_rate": 3.497754915508632e-05,
"loss": 4.6678,
"step": 6560
},
{
"epoch": 1.51,
"learning_rate": 3.4825499116497176e-05,
"loss": 4.6985,
"step": 6565
},
{
"epoch": 1.51,
"learning_rate": 3.467371057306318e-05,
"loss": 4.6124,
"step": 6570
},
{
"epoch": 1.51,
"learning_rate": 3.452218413379504e-05,
"loss": 4.6164,
"step": 6575
},
{
"epoch": 1.51,
"learning_rate": 3.437092040665183e-05,
"loss": 4.6304,
"step": 6580
},
{
"epoch": 1.51,
"learning_rate": 3.421991999853857e-05,
"loss": 4.625,
"step": 6585
},
{
"epoch": 1.51,
"learning_rate": 3.406918351530376e-05,
"loss": 4.5795,
"step": 6590
},
{
"epoch": 1.51,
"learning_rate": 3.3918711561737046e-05,
"loss": 4.6565,
"step": 6595
},
{
"epoch": 1.51,
"learning_rate": 3.3768504741566664e-05,
"loss": 4.5725,
"step": 6600
},
{
"epoch": 1.52,
"learning_rate": 3.361856365745705e-05,
"loss": 4.5355,
"step": 6605
},
{
"epoch": 1.52,
"learning_rate": 3.346888891100649e-05,
"loss": 4.62,
"step": 6610
},
{
"epoch": 1.52,
"learning_rate": 3.331948110274462e-05,
"loss": 4.6074,
"step": 6615
},
{
"epoch": 1.52,
"learning_rate": 3.3170340832130134e-05,
"loss": 4.6097,
"step": 6620
},
{
"epoch": 1.52,
"learning_rate": 3.302146869754823e-05,
"loss": 4.6278,
"step": 6625
},
{
"epoch": 1.52,
"learning_rate": 3.287286529630832e-05,
"loss": 4.6484,
"step": 6630
},
{
"epoch": 1.52,
"learning_rate": 3.272453122464152e-05,
"loss": 4.5728,
"step": 6635
},
{
"epoch": 1.52,
"learning_rate": 3.2576467077698493e-05,
"loss": 4.6126,
"step": 6640
},
{
"epoch": 1.52,
"learning_rate": 3.242867344954674e-05,
"loss": 4.6321,
"step": 6645
},
{
"epoch": 1.53,
"learning_rate": 3.228115093316848e-05,
"loss": 4.627,
"step": 6650
},
{
"epoch": 1.53,
"learning_rate": 3.213390012045811e-05,
"loss": 4.6281,
"step": 6655
},
{
"epoch": 1.53,
"learning_rate": 3.198692160221987e-05,
"loss": 4.5743,
"step": 6660
},
{
"epoch": 1.53,
"learning_rate": 3.184021596816563e-05,
"loss": 4.5734,
"step": 6665
},
{
"epoch": 1.53,
"learning_rate": 3.169378380691218e-05,
"loss": 4.5622,
"step": 6670
},
{
"epoch": 1.53,
"learning_rate": 3.1547625705979265e-05,
"loss": 4.5935,
"step": 6675
},
{
"epoch": 1.53,
"learning_rate": 3.140174225178692e-05,
"loss": 4.612,
"step": 6680
},
{
"epoch": 1.53,
"learning_rate": 3.1256134029653275e-05,
"loss": 4.615,
"step": 6685
},
{
"epoch": 1.54,
"learning_rate": 3.111080162379215e-05,
"loss": 4.6019,
"step": 6690
},
{
"epoch": 1.54,
"learning_rate": 3.096574561731072e-05,
"loss": 4.5816,
"step": 6695
},
{
"epoch": 1.54,
"learning_rate": 3.082096659220722e-05,
"loss": 4.5663,
"step": 6700
},
{
"epoch": 1.54,
"learning_rate": 3.0676465129368556e-05,
"loss": 4.5518,
"step": 6705
},
{
"epoch": 1.54,
"learning_rate": 3.0532241808567966e-05,
"loss": 4.6307,
"step": 6710
},
{
"epoch": 1.54,
"learning_rate": 3.0388297208462703e-05,
"loss": 4.6792,
"step": 6715
},
{
"epoch": 1.54,
"learning_rate": 3.0244631906591825e-05,
"loss": 4.5948,
"step": 6720
},
{
"epoch": 1.54,
"learning_rate": 3.01012464793737e-05,
"loss": 4.5964,
"step": 6725
},
{
"epoch": 1.54,
"learning_rate": 2.9958141502103722e-05,
"loss": 4.6444,
"step": 6730
},
{
"epoch": 1.55,
"learning_rate": 2.9815317548952192e-05,
"loss": 4.5379,
"step": 6735
},
{
"epoch": 1.55,
"learning_rate": 2.9672775192961756e-05,
"loss": 4.5627,
"step": 6740
},
{
"epoch": 1.55,
"learning_rate": 2.9530515006045368e-05,
"loss": 4.6281,
"step": 6745
},
{
"epoch": 1.55,
"learning_rate": 2.938853755898364e-05,
"loss": 4.5695,
"step": 6750
},
{
"epoch": 1.55,
"learning_rate": 2.9246843421422998e-05,
"loss": 4.6154,
"step": 6755
},
{
"epoch": 1.55,
"learning_rate": 2.910543316187301e-05,
"loss": 4.5986,
"step": 6760
},
{
"epoch": 1.55,
"learning_rate": 2.896430734770431e-05,
"loss": 4.5794,
"step": 6765
},
{
"epoch": 1.55,
"learning_rate": 2.882346654514627e-05,
"loss": 4.5925,
"step": 6770
},
{
"epoch": 1.55,
"learning_rate": 2.8682911319284712e-05,
"loss": 4.5376,
"step": 6775
},
{
"epoch": 1.56,
"learning_rate": 2.8542642234059725e-05,
"loss": 4.6149,
"step": 6780
},
{
"epoch": 1.56,
"learning_rate": 2.8402659852263257e-05,
"loss": 4.5777,
"step": 6785
},
{
"epoch": 1.56,
"learning_rate": 2.826296473553697e-05,
"loss": 4.532,
"step": 6790
},
{
"epoch": 1.56,
"learning_rate": 2.812355744436993e-05,
"loss": 4.5913,
"step": 6795
},
{
"epoch": 1.56,
"learning_rate": 2.7984438538096392e-05,
"loss": 4.5839,
"step": 6800
},
{
"epoch": 1.56,
"learning_rate": 2.784560857489358e-05,
"loss": 4.6034,
"step": 6805
},
{
"epoch": 1.56,
"learning_rate": 2.7707068111779377e-05,
"loss": 4.501,
"step": 6810
},
{
"epoch": 1.56,
"learning_rate": 2.7568817704610116e-05,
"loss": 4.5865,
"step": 6815
},
{
"epoch": 1.57,
"learning_rate": 2.7430857908078345e-05,
"loss": 4.5889,
"step": 6820
},
{
"epoch": 1.57,
"learning_rate": 2.7293189275710706e-05,
"loss": 4.5401,
"step": 6825
},
{
"epoch": 1.57,
"learning_rate": 2.7155812359865517e-05,
"loss": 4.595,
"step": 6830
},
{
"epoch": 1.57,
"learning_rate": 2.7018727711730706e-05,
"loss": 4.5628,
"step": 6835
},
{
"epoch": 1.57,
"learning_rate": 2.6881935881321563e-05,
"loss": 4.5191,
"step": 6840
},
{
"epoch": 1.57,
"learning_rate": 2.6745437417478502e-05,
"loss": 4.5842,
"step": 6845
},
{
"epoch": 1.57,
"learning_rate": 2.6609232867864896e-05,
"loss": 4.6019,
"step": 6850
},
{
"epoch": 1.57,
"learning_rate": 2.6473322778964847e-05,
"loss": 4.5637,
"step": 6855
},
{
"epoch": 1.57,
"learning_rate": 2.6337707696081094e-05,
"loss": 4.5541,
"step": 6860
},
{
"epoch": 1.58,
"learning_rate": 2.6202388163332637e-05,
"loss": 4.6075,
"step": 6865
},
{
"epoch": 1.58,
"learning_rate": 2.606736472365272e-05,
"loss": 4.5411,
"step": 6870
},
{
"epoch": 1.58,
"learning_rate": 2.5932637918786563e-05,
"loss": 4.5657,
"step": 6875
},
{
"epoch": 1.58,
"learning_rate": 2.5798208289289204e-05,
"loss": 4.5503,
"step": 6880
},
{
"epoch": 1.58,
"learning_rate": 2.566407637452345e-05,
"loss": 4.4874,
"step": 6885
},
{
"epoch": 1.58,
"learning_rate": 2.5530242712657492e-05,
"loss": 4.5598,
"step": 6890
},
{
"epoch": 1.58,
"learning_rate": 2.5396707840662903e-05,
"loss": 4.6072,
"step": 6895
},
{
"epoch": 1.58,
"learning_rate": 2.526347229431242e-05,
"loss": 4.544,
"step": 6900
},
{
"epoch": 1.58,
"learning_rate": 2.513053660817788e-05,
"loss": 4.6272,
"step": 6905
},
{
"epoch": 1.59,
"learning_rate": 2.499790131562797e-05,
"loss": 4.5902,
"step": 6910
},
{
"epoch": 1.59,
"learning_rate": 2.4865566948826048e-05,
"loss": 4.5739,
"step": 6915
},
{
"epoch": 1.59,
"learning_rate": 2.4733534038728257e-05,
"loss": 4.6072,
"step": 6920
},
{
"epoch": 1.59,
"learning_rate": 2.4601803115081068e-05,
"loss": 4.6311,
"step": 6925
},
{
"epoch": 1.59,
"learning_rate": 2.4470374706419485e-05,
"loss": 4.558,
"step": 6930
},
{
"epoch": 1.59,
"learning_rate": 2.4339249340064507e-05,
"loss": 4.634,
"step": 6935
},
{
"epoch": 1.59,
"learning_rate": 2.4208427542121504e-05,
"loss": 4.5723,
"step": 6940
},
{
"epoch": 1.59,
"learning_rate": 2.407790983747773e-05,
"loss": 4.5794,
"step": 6945
},
{
"epoch": 1.59,
"learning_rate": 2.394769674980035e-05,
"loss": 4.5407,
"step": 6950
},
{
"epoch": 1.6,
"learning_rate": 2.3817788801534367e-05,
"loss": 4.5956,
"step": 6955
},
{
"epoch": 1.6,
"learning_rate": 2.3688186513900455e-05,
"loss": 4.6563,
"step": 6960
},
{
"epoch": 1.6,
"learning_rate": 2.3558890406892986e-05,
"loss": 4.6067,
"step": 6965
},
{
"epoch": 1.6,
"learning_rate": 2.34299009992778e-05,
"loss": 4.5887,
"step": 6970
},
{
"epoch": 1.6,
"learning_rate": 2.3301218808590176e-05,
"loss": 4.5513,
"step": 6975
},
{
"epoch": 1.6,
"learning_rate": 2.317284435113278e-05,
"loss": 4.5791,
"step": 6980
},
{
"epoch": 1.6,
"learning_rate": 2.3044778141973655e-05,
"loss": 4.5854,
"step": 6985
},
{
"epoch": 1.6,
"learning_rate": 2.2917020694944023e-05,
"loss": 4.5818,
"step": 6990
},
{
"epoch": 1.61,
"learning_rate": 2.278957252263617e-05,
"loss": 4.5703,
"step": 6995
},
{
"epoch": 1.61,
"learning_rate": 2.2662434136401722e-05,
"loss": 4.5608,
"step": 7000
},
{
"epoch": 1.61,
"learning_rate": 2.2535606046349177e-05,
"loss": 4.5799,
"step": 7005
},
{
"epoch": 1.61,
"learning_rate": 2.2409088761342235e-05,
"loss": 4.578,
"step": 7010
},
{
"epoch": 1.61,
"learning_rate": 2.228288278899735e-05,
"loss": 4.6076,
"step": 7015
},
{
"epoch": 1.61,
"learning_rate": 2.215698863568213e-05,
"loss": 4.5601,
"step": 7020
},
{
"epoch": 1.61,
"learning_rate": 2.203140680651298e-05,
"loss": 4.6048,
"step": 7025
},
{
"epoch": 1.61,
"learning_rate": 2.1906137805353212e-05,
"loss": 4.5915,
"step": 7030
},
{
"epoch": 1.61,
"learning_rate": 2.1781182134810997e-05,
"loss": 4.5576,
"step": 7035
},
{
"epoch": 1.62,
"learning_rate": 2.1656540296237316e-05,
"loss": 4.5468,
"step": 7040
},
{
"epoch": 1.62,
"learning_rate": 2.1532212789724094e-05,
"loss": 4.5916,
"step": 7045
},
{
"epoch": 1.62,
"learning_rate": 2.1408200114101985e-05,
"loss": 4.5717,
"step": 7050
},
{
"epoch": 1.62,
"learning_rate": 2.1284502766938475e-05,
"loss": 4.6184,
"step": 7055
},
{
"epoch": 1.62,
"learning_rate": 2.116112124453592e-05,
"loss": 4.6147,
"step": 7060
},
{
"epoch": 1.62,
"learning_rate": 2.1038056041929456e-05,
"loss": 4.5336,
"step": 7065
},
{
"epoch": 1.62,
"learning_rate": 2.0915307652885164e-05,
"loss": 4.563,
"step": 7070
},
{
"epoch": 1.62,
"learning_rate": 2.0792876569897912e-05,
"loss": 4.5645,
"step": 7075
},
{
"epoch": 1.62,
"learning_rate": 2.067076328418949e-05,
"loss": 4.5716,
"step": 7080
},
{
"epoch": 1.63,
"learning_rate": 2.0548968285706593e-05,
"loss": 4.544,
"step": 7085
},
{
"epoch": 1.63,
"learning_rate": 2.0427492063118935e-05,
"loss": 4.5293,
"step": 7090
},
{
"epoch": 1.63,
"learning_rate": 2.0306335103817208e-05,
"loss": 4.5446,
"step": 7095
},
{
"epoch": 1.63,
"learning_rate": 2.018549789391102e-05,
"loss": 4.6236,
"step": 7100
},
{
"epoch": 1.63,
"learning_rate": 2.006498091822726e-05,
"loss": 4.5056,
"step": 7105
},
{
"epoch": 1.63,
"learning_rate": 1.994478466030787e-05,
"loss": 4.5405,
"step": 7110
},
{
"epoch": 1.63,
"learning_rate": 1.982490960240798e-05,
"loss": 4.584,
"step": 7115
},
{
"epoch": 1.63,
"learning_rate": 1.970535622549401e-05,
"loss": 4.509,
"step": 7120
},
{
"epoch": 1.64,
"learning_rate": 1.9586125009241774e-05,
"loss": 4.5719,
"step": 7125
},
{
"epoch": 1.64,
"learning_rate": 1.946721643203443e-05,
"loss": 4.5562,
"step": 7130
},
{
"epoch": 1.64,
"learning_rate": 1.934863097096067e-05,
"loss": 4.5957,
"step": 7135
},
{
"epoch": 1.64,
"learning_rate": 1.923036910181275e-05,
"loss": 4.5781,
"step": 7140
},
{
"epoch": 1.64,
"learning_rate": 1.9112431299084598e-05,
"loss": 4.5014,
"step": 7145
},
{
"epoch": 1.64,
"learning_rate": 1.8994818035969975e-05,
"loss": 4.573,
"step": 7150
},
{
"epoch": 1.64,
"learning_rate": 1.8877529784360437e-05,
"loss": 4.5279,
"step": 7155
},
{
"epoch": 1.64,
"learning_rate": 1.8760567014843545e-05,
"loss": 4.5326,
"step": 7160
},
{
"epoch": 1.64,
"learning_rate": 1.864393019670092e-05,
"loss": 4.55,
"step": 7165
},
{
"epoch": 1.65,
"learning_rate": 1.8527619797906494e-05,
"loss": 4.5847,
"step": 7170
},
{
"epoch": 1.65,
"learning_rate": 1.8411636285124457e-05,
"loss": 4.6046,
"step": 7175
},
{
"epoch": 1.65,
"learning_rate": 1.8295980123707357e-05,
"loss": 4.5281,
"step": 7180
},
{
"epoch": 1.65,
"learning_rate": 1.8180651777694535e-05,
"loss": 4.5239,
"step": 7185
},
{
"epoch": 1.65,
"learning_rate": 1.8065651709809905e-05,
"loss": 4.5677,
"step": 7190
},
{
"epoch": 1.65,
"learning_rate": 1.795098038146038e-05,
"loss": 4.5765,
"step": 7195
},
{
"epoch": 1.65,
"learning_rate": 1.783663825273372e-05,
"loss": 4.579,
"step": 7200
},
{
"epoch": 1.65,
"learning_rate": 1.772262578239704e-05,
"loss": 4.4754,
"step": 7205
},
{
"epoch": 1.65,
"learning_rate": 1.7608943427894686e-05,
"loss": 4.5839,
"step": 7210
},
{
"epoch": 1.66,
"learning_rate": 1.7495591645346533e-05,
"loss": 4.523,
"step": 7215
},
{
"epoch": 1.66,
"learning_rate": 1.7382570889546124e-05,
"loss": 4.5437,
"step": 7220
},
{
"epoch": 1.66,
"learning_rate": 1.7269881613958805e-05,
"loss": 4.5336,
"step": 7225
},
{
"epoch": 1.66,
"learning_rate": 1.7157524270720036e-05,
"loss": 4.5725,
"step": 7230
},
{
"epoch": 1.66,
"learning_rate": 1.7045499310633428e-05,
"loss": 4.5616,
"step": 7235
},
{
"epoch": 1.66,
"learning_rate": 1.6933807183168994e-05,
"loss": 4.5541,
"step": 7240
},
{
"epoch": 1.66,
"learning_rate": 1.682244833646135e-05,
"loss": 4.5655,
"step": 7245
},
{
"epoch": 1.66,
"learning_rate": 1.6711423217307885e-05,
"loss": 4.537,
"step": 7250
},
{
"epoch": 1.66,
"learning_rate": 1.6600732271167098e-05,
"loss": 4.5957,
"step": 7255
},
{
"epoch": 1.67,
"learning_rate": 1.64903759421566e-05,
"loss": 4.5155,
"step": 7260
},
{
"epoch": 1.67,
"learning_rate": 1.638035467305148e-05,
"loss": 4.5458,
"step": 7265
},
{
"epoch": 1.67,
"learning_rate": 1.627066890528247e-05,
"loss": 4.5722,
"step": 7270
},
{
"epoch": 1.67,
"learning_rate": 1.6161319078934278e-05,
"loss": 4.5482,
"step": 7275
},
{
"epoch": 1.67,
"learning_rate": 1.6052305632743592e-05,
"loss": 4.547,
"step": 7280
},
{
"epoch": 1.67,
"learning_rate": 1.594362900409756e-05,
"loss": 4.5308,
"step": 7285
},
{
"epoch": 1.67,
"learning_rate": 1.583528962903197e-05,
"loss": 4.5775,
"step": 7290
},
{
"epoch": 1.67,
"learning_rate": 1.5727287942229387e-05,
"loss": 4.6227,
"step": 7295
},
{
"epoch": 1.68,
"learning_rate": 1.5619624377017537e-05,
"loss": 4.5775,
"step": 7300
},
{
"epoch": 1.68,
"learning_rate": 1.55122993653675e-05,
"loss": 4.572,
"step": 7305
},
{
"epoch": 1.68,
"learning_rate": 1.540531333789207e-05,
"loss": 4.5922,
"step": 7310
},
{
"epoch": 1.68,
"learning_rate": 1.5298666723843867e-05,
"loss": 4.5424,
"step": 7315
},
{
"epoch": 1.68,
"learning_rate": 1.5192359951113755e-05,
"loss": 4.5955,
"step": 7320
},
{
"epoch": 1.68,
"learning_rate": 1.5086393446229063e-05,
"loss": 4.5597,
"step": 7325
},
{
"epoch": 1.68,
"learning_rate": 1.4980767634351877e-05,
"loss": 4.5101,
"step": 7330
},
{
"epoch": 1.68,
"learning_rate": 1.4875482939277396e-05,
"loss": 4.5899,
"step": 7335
},
{
"epoch": 1.68,
"learning_rate": 1.4770539783432113e-05,
"loss": 4.5811,
"step": 7340
},
{
"epoch": 1.69,
"learning_rate": 1.466593858787223e-05,
"loss": 4.5274,
"step": 7345
},
{
"epoch": 1.69,
"learning_rate": 1.4561679772281877e-05,
"loss": 4.5397,
"step": 7350
},
{
"epoch": 1.69,
"learning_rate": 1.4457763754971553e-05,
"loss": 4.5231,
"step": 7355
},
{
"epoch": 1.69,
"learning_rate": 1.4354190952876334e-05,
"loss": 4.5274,
"step": 7360
},
{
"epoch": 1.69,
"learning_rate": 1.425096178155415e-05,
"loss": 4.4804,
"step": 7365
},
{
"epoch": 1.69,
"learning_rate": 1.4148076655184373e-05,
"loss": 4.5091,
"step": 7370
},
{
"epoch": 1.69,
"learning_rate": 1.404553598656585e-05,
"loss": 4.5516,
"step": 7375
},
{
"epoch": 1.69,
"learning_rate": 1.3943340187115494e-05,
"loss": 4.5279,
"step": 7380
},
{
"epoch": 1.69,
"learning_rate": 1.3841489666866369e-05,
"loss": 4.5114,
"step": 7385
},
{
"epoch": 1.7,
"learning_rate": 1.373998483446638e-05,
"loss": 4.5324,
"step": 7390
},
{
"epoch": 1.7,
"learning_rate": 1.3638826097176328e-05,
"loss": 4.5319,
"step": 7395
},
{
"epoch": 1.7,
"learning_rate": 1.3538013860868436e-05,
"loss": 4.482,
"step": 7400
},
{
"epoch": 1.7,
"learning_rate": 1.3437548530024691e-05,
"loss": 4.5697,
"step": 7405
},
{
"epoch": 1.7,
"learning_rate": 1.3337430507735205e-05,
"loss": 4.5513,
"step": 7410
},
{
"epoch": 1.7,
"learning_rate": 1.3237660195696633e-05,
"loss": 4.5081,
"step": 7415
},
{
"epoch": 1.7,
"learning_rate": 1.313823799421051e-05,
"loss": 4.5395,
"step": 7420
},
{
"epoch": 1.7,
"learning_rate": 1.3039164302181683e-05,
"loss": 4.5547,
"step": 7425
},
{
"epoch": 1.71,
"learning_rate": 1.2940439517116676e-05,
"loss": 4.5383,
"step": 7430
},
{
"epoch": 1.71,
"learning_rate": 1.2842064035122125e-05,
"loss": 4.5505,
"step": 7435
},
{
"epoch": 1.71,
"learning_rate": 1.2744038250903267e-05,
"loss": 4.5571,
"step": 7440
},
{
"epoch": 1.71,
"learning_rate": 1.264636255776208e-05,
"loss": 4.551,
"step": 7445
},
{
"epoch": 1.71,
"learning_rate": 1.2549037347596115e-05,
"loss": 4.5631,
"step": 7450
},
{
"epoch": 1.71,
"learning_rate": 1.245206301089652e-05,
"loss": 4.5251,
"step": 7455
},
{
"epoch": 1.71,
"learning_rate": 1.2355439936746827e-05,
"loss": 4.5173,
"step": 7460
},
{
"epoch": 1.71,
"learning_rate": 1.2259168512821062e-05,
"loss": 4.4905,
"step": 7465
},
{
"epoch": 1.71,
"learning_rate": 1.2163249125382426e-05,
"loss": 4.5231,
"step": 7470
},
{
"epoch": 1.72,
"learning_rate": 1.206768215928169e-05,
"loss": 4.5016,
"step": 7475
},
{
"epoch": 1.72,
"learning_rate": 1.1972467997955595e-05,
"loss": 4.5838,
"step": 7480
},
{
"epoch": 1.72,
"learning_rate": 1.187760702342534e-05,
"loss": 4.5593,
"step": 7485
},
{
"epoch": 1.72,
"learning_rate": 1.1783099616295056e-05,
"loss": 4.5412,
"step": 7490
},
{
"epoch": 1.72,
"learning_rate": 1.1688946155750347e-05,
"loss": 4.5768,
"step": 7495
},
{
"epoch": 1.72,
"learning_rate": 1.1595147019556607e-05,
"loss": 4.5214,
"step": 7500
},
{
"epoch": 1.72,
"learning_rate": 1.1501702584057661e-05,
"loss": 4.5133,
"step": 7505
},
{
"epoch": 1.72,
"learning_rate": 1.140861322417417e-05,
"loss": 4.5332,
"step": 7510
},
{
"epoch": 1.72,
"learning_rate": 1.1315879313402123e-05,
"loss": 4.5513,
"step": 7515
},
{
"epoch": 1.73,
"learning_rate": 1.1223501223811451e-05,
"loss": 4.5954,
"step": 7520
},
{
"epoch": 1.73,
"learning_rate": 1.1131479326044348e-05,
"loss": 4.4921,
"step": 7525
},
{
"epoch": 1.73,
"learning_rate": 1.1039813989313951e-05,
"loss": 4.5349,
"step": 7530
},
{
"epoch": 1.73,
"learning_rate": 1.0948505581402735e-05,
"loss": 4.5111,
"step": 7535
},
{
"epoch": 1.73,
"learning_rate": 1.085755446866119e-05,
"loss": 4.5016,
"step": 7540
},
{
"epoch": 1.73,
"learning_rate": 1.076696101600615e-05,
"loss": 4.4748,
"step": 7545
},
{
"epoch": 1.73,
"learning_rate": 1.0676725586919457e-05,
"loss": 4.5399,
"step": 7550
},
{
"epoch": 1.73,
"learning_rate": 1.0586848543446537e-05,
"loss": 4.5744,
"step": 7555
},
{
"epoch": 1.73,
"learning_rate": 1.0497330246194848e-05,
"loss": 4.4877,
"step": 7560
},
{
"epoch": 1.74,
"learning_rate": 1.0408171054332483e-05,
"loss": 4.5206,
"step": 7565
},
{
"epoch": 1.74,
"learning_rate": 1.031937132558668e-05,
"loss": 4.5463,
"step": 7570
},
{
"epoch": 1.74,
"learning_rate": 1.0230931416242518e-05,
"loss": 4.553,
"step": 7575
},
{
"epoch": 1.74,
"learning_rate": 1.014285168114133e-05,
"loss": 4.4749,
"step": 7580
},
{
"epoch": 1.74,
"learning_rate": 1.0055132473679363e-05,
"loss": 4.4981,
"step": 7585
},
{
"epoch": 1.74,
"learning_rate": 9.96777414580633e-06,
"loss": 4.5109,
"step": 7590
},
{
"epoch": 1.74,
"learning_rate": 9.880777048024014e-06,
"loss": 4.5291,
"step": 7595
},
{
"epoch": 1.74,
"learning_rate": 9.794141529384915e-06,
"loss": 4.5821,
"step": 7600
},
{
"epoch": 1.75,
"learning_rate": 9.707867937490722e-06,
"loss": 4.5805,
"step": 7605
},
{
"epoch": 1.75,
"learning_rate": 9.621956618491024e-06,
"loss": 4.4649,
"step": 7610
},
{
"epoch": 1.75,
"learning_rate": 9.536407917081869e-06,
"loss": 4.5653,
"step": 7615
},
{
"epoch": 1.75,
"learning_rate": 9.451222176504414e-06,
"loss": 4.5206,
"step": 7620
},
{
"epoch": 1.75,
"learning_rate": 9.366399738543574e-06,
"loss": 4.5356,
"step": 7625
},
{
"epoch": 1.75,
"learning_rate": 9.281940943526491e-06,
"loss": 4.5063,
"step": 7630
},
{
"epoch": 1.75,
"learning_rate": 9.197846130321419e-06,
"loss": 4.5086,
"step": 7635
},
{
"epoch": 1.75,
"learning_rate": 9.114115636336152e-06,
"loss": 4.5602,
"step": 7640
},
{
"epoch": 1.75,
"learning_rate": 9.030749797516825e-06,
"loss": 4.4951,
"step": 7645
},
{
"epoch": 1.76,
"learning_rate": 8.947748948346357e-06,
"loss": 4.5339,
"step": 7650
},
{
"epoch": 1.76,
"learning_rate": 8.865113421843407e-06,
"loss": 4.4816,
"step": 7655
},
{
"epoch": 1.76,
"learning_rate": 8.782843549560771e-06,
"loss": 4.5468,
"step": 7660
},
{
"epoch": 1.76,
"learning_rate": 8.700939661584184e-06,
"loss": 4.5583,
"step": 7665
},
{
"epoch": 1.76,
"learning_rate": 8.61940208653097e-06,
"loss": 4.5727,
"step": 7670
},
{
"epoch": 1.76,
"learning_rate": 8.538231151548693e-06,
"loss": 4.4903,
"step": 7675
},
{
"epoch": 1.76,
"learning_rate": 8.457427182313937e-06,
"loss": 4.5253,
"step": 7680
},
{
"epoch": 1.76,
"learning_rate": 8.37699050303089e-06,
"loss": 4.5662,
"step": 7685
},
{
"epoch": 1.76,
"learning_rate": 8.296921436430071e-06,
"loss": 4.5034,
"step": 7690
},
{
"epoch": 1.77,
"learning_rate": 8.217220303767092e-06,
"loss": 4.5178,
"step": 7695
},
{
"epoch": 1.77,
"learning_rate": 8.137887424821277e-06,
"loss": 4.5397,
"step": 7700
},
{
"epoch": 1.77,
"learning_rate": 8.058923117894534e-06,
"loss": 4.5795,
"step": 7705
},
{
"epoch": 1.77,
"learning_rate": 7.980327699809832e-06,
"loss": 4.5315,
"step": 7710
},
{
"epoch": 1.77,
"learning_rate": 7.902101485910185e-06,
"loss": 4.5321,
"step": 7715
},
{
"epoch": 1.77,
"learning_rate": 7.824244790057223e-06,
"loss": 4.5856,
"step": 7720
},
{
"epoch": 1.77,
"learning_rate": 7.746757924630033e-06,
"loss": 4.553,
"step": 7725
},
{
"epoch": 1.77,
"learning_rate": 7.66964120052377e-06,
"loss": 4.4908,
"step": 7730
},
{
"epoch": 1.78,
"learning_rate": 7.592894927148553e-06,
"loss": 4.5423,
"step": 7735
},
{
"epoch": 1.78,
"learning_rate": 7.516519412428203e-06,
"loss": 4.533,
"step": 7740
},
{
"epoch": 1.78,
"learning_rate": 7.440514962798905e-06,
"loss": 4.498,
"step": 7745
},
{
"epoch": 1.78,
"learning_rate": 7.3648818832080745e-06,
"loss": 4.5032,
"step": 7750
},
{
"epoch": 1.78,
"learning_rate": 7.289620477113068e-06,
"loss": 4.537,
"step": 7755
},
{
"epoch": 1.78,
"learning_rate": 7.214731046480094e-06,
"loss": 4.5323,
"step": 7760
},
{
"epoch": 1.78,
"learning_rate": 7.140213891782821e-06,
"loss": 4.4721,
"step": 7765
},
{
"epoch": 1.78,
"learning_rate": 7.066069312001289e-06,
"loss": 4.4735,
"step": 7770
},
{
"epoch": 1.78,
"learning_rate": 6.992297604620679e-06,
"loss": 4.4806,
"step": 7775
},
{
"epoch": 1.79,
"learning_rate": 6.918899065630113e-06,
"loss": 4.5053,
"step": 7780
},
{
"epoch": 1.79,
"learning_rate": 6.845873989521523e-06,
"loss": 4.5612,
"step": 7785
},
{
"epoch": 1.79,
"learning_rate": 6.773222669288359e-06,
"loss": 4.5158,
"step": 7790
},
{
"epoch": 1.79,
"learning_rate": 6.700945396424518e-06,
"loss": 4.506,
"step": 7795
},
{
"epoch": 1.79,
"learning_rate": 6.629042460923096e-06,
"loss": 4.474,
"step": 7800
},
{
"epoch": 1.79,
"learning_rate": 6.5575141512753015e-06,
"loss": 4.539,
"step": 7805
},
{
"epoch": 1.79,
"learning_rate": 6.486360754469234e-06,
"loss": 4.511,
"step": 7810
},
{
"epoch": 1.79,
"learning_rate": 6.415582555988742e-06,
"loss": 4.4885,
"step": 7815
},
{
"epoch": 1.79,
"learning_rate": 6.345179839812343e-06,
"loss": 4.5612,
"step": 7820
},
{
"epoch": 1.8,
"learning_rate": 6.275152888411984e-06,
"loss": 4.5791,
"step": 7825
},
{
"epoch": 1.8,
"learning_rate": 6.205501982751971e-06,
"loss": 4.5407,
"step": 7830
},
{
"epoch": 1.8,
"learning_rate": 6.136227402287809e-06,
"loss": 4.539,
"step": 7835
},
{
"epoch": 1.8,
"learning_rate": 6.067329424965162e-06,
"loss": 4.5695,
"step": 7840
},
{
"epoch": 1.8,
"learning_rate": 5.998808327218619e-06,
"loss": 4.5741,
"step": 7845
},
{
"epoch": 1.8,
"learning_rate": 5.930664383970641e-06,
"loss": 4.5079,
"step": 7850
},
{
"epoch": 1.8,
"learning_rate": 5.862897868630468e-06,
"loss": 4.5102,
"step": 7855
},
{
"epoch": 1.8,
"learning_rate": 5.795509053093029e-06,
"loss": 4.4488,
"step": 7860
},
{
"epoch": 1.8,
"learning_rate": 5.72849820773782e-06,
"loss": 4.4671,
"step": 7865
},
{
"epoch": 1.81,
"learning_rate": 5.6618656014278406e-06,
"loss": 4.5261,
"step": 7870
},
{
"epoch": 1.81,
"learning_rate": 5.595611501508491e-06,
"loss": 4.5491,
"step": 7875
},
{
"epoch": 1.81,
"learning_rate": 5.5297361738065325e-06,
"loss": 4.5605,
"step": 7880
},
{
"epoch": 1.81,
"learning_rate": 5.464239882628985e-06,
"loss": 4.5522,
"step": 7885
},
{
"epoch": 1.81,
"learning_rate": 5.399122890762143e-06,
"loss": 4.5584,
"step": 7890
},
{
"epoch": 1.81,
"learning_rate": 5.334385459470359e-06,
"loss": 4.521,
"step": 7895
},
{
"epoch": 1.81,
"learning_rate": 5.270027848495207e-06,
"loss": 4.5363,
"step": 7900
},
{
"epoch": 1.81,
"learning_rate": 5.2060503160542785e-06,
"loss": 4.5035,
"step": 7905
},
{
"epoch": 1.82,
"learning_rate": 5.1424531188402405e-06,
"loss": 4.495,
"step": 7910
},
{
"epoch": 1.82,
"learning_rate": 5.079236512019703e-06,
"loss": 4.4942,
"step": 7915
},
{
"epoch": 1.82,
"learning_rate": 5.016400749232297e-06,
"loss": 4.5203,
"step": 7920
},
{
"epoch": 1.82,
"learning_rate": 4.953946082589655e-06,
"loss": 4.5076,
"step": 7925
},
{
"epoch": 1.82,
"learning_rate": 4.89187276267431e-06,
"loss": 4.514,
"step": 7930
},
{
"epoch": 1.82,
"learning_rate": 4.83018103853875e-06,
"loss": 4.5452,
"step": 7935
},
{
"epoch": 1.82,
"learning_rate": 4.7688711577044354e-06,
"loss": 4.4271,
"step": 7940
},
{
"epoch": 1.82,
"learning_rate": 4.707943366160794e-06,
"loss": 4.5472,
"step": 7945
},
{
"epoch": 1.82,
"learning_rate": 4.647397908364182e-06,
"loss": 4.5236,
"step": 7950
},
{
"epoch": 1.83,
"learning_rate": 4.587235027236958e-06,
"loss": 4.5212,
"step": 7955
},
{
"epoch": 1.83,
"learning_rate": 4.5274549641665105e-06,
"loss": 4.5034,
"step": 7960
},
{
"epoch": 1.83,
"learning_rate": 4.468057959004246e-06,
"loss": 4.5204,
"step": 7965
},
{
"epoch": 1.83,
"learning_rate": 4.40904425006472e-06,
"loss": 4.4921,
"step": 7970
},
{
"epoch": 1.83,
"learning_rate": 4.3504140741245095e-06,
"loss": 4.5376,
"step": 7975
},
{
"epoch": 1.83,
"learning_rate": 4.2921676664214535e-06,
"loss": 4.5167,
"step": 7980
},
{
"epoch": 1.83,
"learning_rate": 4.234305260653604e-06,
"loss": 4.4914,
"step": 7985
},
{
"epoch": 1.83,
"learning_rate": 4.176827088978297e-06,
"loss": 4.5076,
"step": 7990
},
{
"epoch": 1.83,
"learning_rate": 4.119733382011215e-06,
"loss": 4.5555,
"step": 7995
},
{
"epoch": 1.84,
"learning_rate": 4.0630243688255185e-06,
"loss": 4.5328,
"step": 8000
},
{
"epoch": 1.84,
"learning_rate": 4.00670027695087e-06,
"loss": 4.5124,
"step": 8005
},
{
"epoch": 1.84,
"learning_rate": 3.950761332372543e-06,
"loss": 4.533,
"step": 8010
},
{
"epoch": 1.84,
"learning_rate": 3.8952077595305055e-06,
"loss": 4.4785,
"step": 8015
},
{
"epoch": 1.84,
"learning_rate": 3.8400397813185054e-06,
"loss": 4.4634,
"step": 8020
},
{
"epoch": 1.84,
"learning_rate": 3.785257619083249e-06,
"loss": 4.5603,
"step": 8025
},
{
"epoch": 1.84,
"learning_rate": 3.7308614926234165e-06,
"loss": 4.5241,
"step": 8030
},
{
"epoch": 1.84,
"learning_rate": 3.676851620188826e-06,
"loss": 4.5561,
"step": 8035
},
{
"epoch": 1.85,
"learning_rate": 3.6232282184795794e-06,
"loss": 4.4843,
"step": 8040
},
{
"epoch": 1.85,
"learning_rate": 3.56999150264512e-06,
"loss": 4.5225,
"step": 8045
},
{
"epoch": 1.85,
"learning_rate": 3.517141686283498e-06,
"loss": 4.5195,
"step": 8050
},
{
"epoch": 1.85,
"learning_rate": 3.4646789814403503e-06,
"loss": 4.5384,
"step": 8055
},
{
"epoch": 1.85,
"learning_rate": 3.412603598608188e-06,
"loss": 4.5282,
"step": 8060
},
{
"epoch": 1.85,
"learning_rate": 3.360915746725479e-06,
"loss": 4.4846,
"step": 8065
},
{
"epoch": 1.85,
"learning_rate": 3.3096156331758e-06,
"loss": 4.5073,
"step": 8070
},
{
"epoch": 1.85,
"learning_rate": 3.258703463787105e-06,
"loss": 4.5283,
"step": 8075
},
{
"epoch": 1.85,
"learning_rate": 3.2081794428307278e-06,
"loss": 4.6029,
"step": 8080
},
{
"epoch": 1.86,
"learning_rate": 3.1580437730207578e-06,
"loss": 4.5324,
"step": 8085
},
{
"epoch": 1.86,
"learning_rate": 3.1082966555130654e-06,
"loss": 4.4754,
"step": 8090
},
{
"epoch": 1.86,
"learning_rate": 3.058938289904578e-06,
"loss": 4.4994,
"step": 8095
},
{
"epoch": 1.86,
"learning_rate": 3.0099688742324715e-06,
"loss": 4.4886,
"step": 8100
},
{
"epoch": 1.86,
"learning_rate": 2.9613886049733365e-06,
"loss": 4.4838,
"step": 8105
},
{
"epoch": 1.86,
"learning_rate": 2.913197677042456e-06,
"loss": 4.5552,
"step": 8110
},
{
"epoch": 1.86,
"learning_rate": 2.865396283792965e-06,
"loss": 4.4864,
"step": 8115
},
{
"epoch": 1.86,
"learning_rate": 2.8179846170150903e-06,
"loss": 4.4679,
"step": 8120
},
{
"epoch": 1.86,
"learning_rate": 2.7709628669353895e-06,
"loss": 4.5362,
"step": 8125
},
{
"epoch": 1.87,
"learning_rate": 2.7243312222159924e-06,
"loss": 4.5041,
"step": 8130
},
{
"epoch": 1.87,
"learning_rate": 2.6780898699538483e-06,
"loss": 4.5783,
"step": 8135
},
{
"epoch": 1.87,
"learning_rate": 2.6322389956799143e-06,
"loss": 4.5313,
"step": 8140
},
{
"epoch": 1.87,
"learning_rate": 2.5867787833585124e-06,
"loss": 4.51,
"step": 8145
},
{
"epoch": 1.87,
"learning_rate": 2.541709415386495e-06,
"loss": 4.5084,
"step": 8150
},
{
"epoch": 1.87,
"learning_rate": 2.4970310725926148e-06,
"loss": 4.4684,
"step": 8155
},
{
"epoch": 1.87,
"learning_rate": 2.4527439342366785e-06,
"loss": 4.4974,
"step": 8160
},
{
"epoch": 1.87,
"learning_rate": 2.4088481780089267e-06,
"loss": 4.528,
"step": 8165
},
{
"epoch": 1.87,
"learning_rate": 2.3653439800292556e-06,
"loss": 4.5452,
"step": 8170
},
{
"epoch": 1.88,
"learning_rate": 2.3222315148465956e-06,
"loss": 4.4839,
"step": 8175
},
{
"epoch": 1.88,
"learning_rate": 2.2795109554381024e-06,
"loss": 4.5123,
"step": 8180
},
{
"epoch": 1.88,
"learning_rate": 2.237182473208499e-06,
"loss": 4.5087,
"step": 8185
},
{
"epoch": 1.88,
"learning_rate": 2.195246237989479e-06,
"loss": 4.4865,
"step": 8190
},
{
"epoch": 1.88,
"learning_rate": 2.1537024180388834e-06,
"loss": 4.4883,
"step": 8195
},
{
"epoch": 1.88,
"learning_rate": 2.1125511800401234e-06,
"loss": 4.4671,
"step": 8200
},
{
"epoch": 1.88,
"learning_rate": 2.0717926891014706e-06,
"loss": 4.5324,
"step": 8205
},
{
"epoch": 1.88,
"learning_rate": 2.0314271087554126e-06,
"loss": 4.4919,
"step": 8210
},
{
"epoch": 1.89,
"learning_rate": 1.991454600957976e-06,
"loss": 4.56,
"step": 8215
},
{
"epoch": 1.89,
"learning_rate": 1.951875326088104e-06,
"loss": 4.4642,
"step": 8220
},
{
"epoch": 1.89,
"learning_rate": 1.9126894429469912e-06,
"loss": 4.5828,
"step": 8225
},
{
"epoch": 1.89,
"learning_rate": 1.8738971087574275e-06,
"loss": 4.5374,
"step": 8230
},
{
"epoch": 1.89,
"learning_rate": 1.8354984791632778e-06,
"loss": 4.4741,
"step": 8235
},
{
"epoch": 1.89,
"learning_rate": 1.797493708228659e-06,
"loss": 4.4987,
"step": 8240
},
{
"epoch": 1.89,
"learning_rate": 1.759882948437519e-06,
"loss": 4.4883,
"step": 8245
},
{
"epoch": 1.89,
"learning_rate": 1.7226663506929142e-06,
"loss": 4.4875,
"step": 8250
},
{
"epoch": 1.89,
"learning_rate": 1.685844064316433e-06,
"loss": 4.5228,
"step": 8255
},
{
"epoch": 1.9,
"learning_rate": 1.6494162370475852e-06,
"loss": 4.5291,
"step": 8260
},
{
"epoch": 1.9,
"learning_rate": 1.6133830150432349e-06,
"loss": 4.53,
"step": 8265
},
{
"epoch": 1.9,
"learning_rate": 1.5777445428770022e-06,
"loss": 4.524,
"step": 8270
},
{
"epoch": 1.9,
"learning_rate": 1.5425009635386622e-06,
"loss": 4.5094,
"step": 8275
},
{
"epoch": 1.9,
"learning_rate": 1.5076524184336027e-06,
"loss": 4.5161,
"step": 8280
},
{
"epoch": 1.9,
"learning_rate": 1.4731990473822454e-06,
"loss": 4.4785,
"step": 8285
},
{
"epoch": 1.9,
"learning_rate": 1.4391409886194474e-06,
"loss": 4.5361,
"step": 8290
},
{
"epoch": 1.9,
"learning_rate": 1.405478378794034e-06,
"loss": 4.5615,
"step": 8295
},
{
"epoch": 1.9,
"learning_rate": 1.3722113529681668e-06,
"loss": 4.5426,
"step": 8300
},
{
"epoch": 1.91,
"learning_rate": 1.3393400446168435e-06,
"loss": 4.5407,
"step": 8305
},
{
"epoch": 1.91,
"learning_rate": 1.306864585627332e-06,
"loss": 4.5364,
"step": 8310
},
{
"epoch": 1.91,
"learning_rate": 1.2747851062986926e-06,
"loss": 4.5093,
"step": 8315
},
{
"epoch": 1.91,
"learning_rate": 1.2431017353412233e-06,
"loss": 4.5303,
"step": 8320
},
{
"epoch": 1.91,
"learning_rate": 1.2118145998758824e-06,
"loss": 4.4821,
"step": 8325
},
{
"epoch": 1.91,
"learning_rate": 1.1809238254339105e-06,
"loss": 4.5601,
"step": 8330
},
{
"epoch": 1.91,
"learning_rate": 1.1504295359562434e-06,
"loss": 4.4995,
"step": 8335
},
{
"epoch": 1.91,
"learning_rate": 1.1203318537929996e-06,
"loss": 4.5452,
"step": 8340
},
{
"epoch": 1.92,
"learning_rate": 1.090630899703038e-06,
"loss": 4.5717,
"step": 8345
},
{
"epoch": 1.92,
"learning_rate": 1.0613267928534453e-06,
"loss": 4.5007,
"step": 8350
},
{
"epoch": 1.92,
"learning_rate": 1.0324196508190832e-06,
"loss": 4.4803,
"step": 8355
},
{
"epoch": 1.92,
"learning_rate": 1.0039095895820639e-06,
"loss": 4.5672,
"step": 8360
},
{
"epoch": 1.92,
"learning_rate": 9.757967235313526e-07,
"loss": 4.5471,
"step": 8365
},
{
"epoch": 1.92,
"learning_rate": 9.480811654622557e-07,
"loss": 4.5204,
"step": 8370
},
{
"epoch": 1.92,
"learning_rate": 9.207630265760103e-07,
"loss": 4.4771,
"step": 8375
},
{
"epoch": 1.92,
"learning_rate": 8.938424164792736e-07,
"loss": 4.4564,
"step": 8380
},
{
"epoch": 1.92,
"learning_rate": 8.673194431837784e-07,
"loss": 4.514,
"step": 8385
},
{
"epoch": 1.93,
"learning_rate": 8.411942131058115e-07,
"loss": 4.5511,
"step": 8390
},
{
"epoch": 1.93,
"learning_rate": 8.154668310658253e-07,
"loss": 4.5572,
"step": 8395
},
{
"epoch": 1.93,
"learning_rate": 7.90137400288049e-07,
"loss": 4.4605,
"step": 8400
},
{
"epoch": 1.93,
"learning_rate": 7.652060223999669e-07,
"loss": 4.5842,
"step": 8405
},
{
"epoch": 1.93,
"learning_rate": 7.406727974320627e-07,
"loss": 4.4843,
"step": 8410
},
{
"epoch": 1.93,
"learning_rate": 7.16537823817276e-07,
"loss": 4.4995,
"step": 8415
},
{
"epoch": 1.93,
"learning_rate": 6.928011983907245e-07,
"loss": 4.5065,
"step": 8420
},
{
"epoch": 1.93,
"learning_rate": 6.694630163892046e-07,
"loss": 4.5232,
"step": 8425
},
{
"epoch": 1.93,
"learning_rate": 6.465233714509245e-07,
"loss": 4.5572,
"step": 8430
},
{
"epoch": 1.94,
"learning_rate": 6.239823556150159e-07,
"loss": 4.4493,
"step": 8435
},
{
"epoch": 1.94,
"learning_rate": 6.01840059321257e-07,
"loss": 4.4967,
"step": 8440
},
{
"epoch": 1.94,
"learning_rate": 5.800965714096496e-07,
"loss": 4.5267,
"step": 8445
},
{
"epoch": 1.94,
"learning_rate": 5.587519791200869e-07,
"loss": 4.5056,
"step": 8450
},
{
"epoch": 1.94,
"learning_rate": 5.378063680920087e-07,
"loss": 4.4833,
"step": 8455
},
{
"epoch": 1.94,
"learning_rate": 5.172598223640468e-07,
"loss": 4.5048,
"step": 8460
},
{
"epoch": 1.94,
"learning_rate": 4.971124243736913e-07,
"loss": 4.503,
"step": 8465
},
{
"epoch": 1.94,
"learning_rate": 4.773642549569579e-07,
"loss": 4.5161,
"step": 8470
},
{
"epoch": 1.94,
"learning_rate": 4.5801539334805466e-07,
"loss": 4.532,
"step": 8475
},
{
"epoch": 1.95,
"learning_rate": 4.390659171790934e-07,
"loss": 4.5352,
"step": 8480
},
{
"epoch": 1.95,
"learning_rate": 4.205159024797456e-07,
"loss": 4.497,
"step": 8485
},
{
"epoch": 1.95,
"learning_rate": 4.023654236769647e-07,
"loss": 4.54,
"step": 8490
},
{
"epoch": 1.95,
"learning_rate": 3.8461455359466435e-07,
"loss": 4.4851,
"step": 8495
},
{
"epoch": 1.95,
"learning_rate": 3.672633634534295e-07,
"loss": 4.4889,
"step": 8500
},
{
"epoch": 1.95,
"learning_rate": 3.5031192287023895e-07,
"loss": 4.4845,
"step": 8505
},
{
"epoch": 1.95,
"learning_rate": 3.3376029985819903e-07,
"loss": 4.5011,
"step": 8510
},
{
"epoch": 1.95,
"learning_rate": 3.176085608262436e-07,
"loss": 4.524,
"step": 8515
},
{
"epoch": 1.96,
"learning_rate": 3.0185677057887885e-07,
"loss": 4.5358,
"step": 8520
},
{
"epoch": 1.96,
"learning_rate": 2.8650499231591685e-07,
"loss": 4.4509,
"step": 8525
},
{
"epoch": 1.96,
"learning_rate": 2.715532876322646e-07,
"loss": 4.4907,
"step": 8530
},
{
"epoch": 1.96,
"learning_rate": 2.570017165175909e-07,
"loss": 4.5124,
"step": 8535
},
{
"epoch": 1.96,
"learning_rate": 2.428503373561708e-07,
"loss": 4.5268,
"step": 8540
},
{
"epoch": 1.96,
"learning_rate": 2.2909920692660847e-07,
"loss": 4.5055,
"step": 8545
},
{
"epoch": 1.96,
"learning_rate": 2.1574838040161473e-07,
"loss": 4.5079,
"step": 8550
},
{
"epoch": 1.96,
"learning_rate": 2.0279791134778515e-07,
"loss": 4.5324,
"step": 8555
},
{
"epoch": 1.96,
"learning_rate": 1.9024785172541136e-07,
"loss": 4.5082,
"step": 8560
},
{
"epoch": 1.97,
"learning_rate": 1.780982518882035e-07,
"loss": 4.515,
"step": 8565
},
{
"epoch": 1.97,
"learning_rate": 1.6634916058319018e-07,
"loss": 4.5209,
"step": 8570
},
{
"epoch": 1.97,
"learning_rate": 1.5500062495041878e-07,
"loss": 4.5149,
"step": 8575
},
{
"epoch": 1.97,
"learning_rate": 1.4405269052284455e-07,
"loss": 4.4695,
"step": 8580
},
{
"epoch": 1.97,
"learning_rate": 1.3350540122611942e-07,
"loss": 4.5516,
"step": 8585
},
{
"epoch": 1.97,
"learning_rate": 1.2335879937839246e-07,
"loss": 4.5657,
"step": 8590
},
{
"epoch": 1.97,
"learning_rate": 1.1361292569017635e-07,
"loss": 4.5608,
"step": 8595
},
{
"epoch": 1.97,
"learning_rate": 1.0426781926416996e-07,
"loss": 4.5156,
"step": 8600
},
{
"epoch": 1.97,
"learning_rate": 9.532351759510283e-08,
"loss": 4.5054,
"step": 8605
},
{
"epoch": 1.98,
"learning_rate": 8.678005656957977e-08,
"loss": 4.5362,
"step": 8610
},
{
"epoch": 1.98,
"learning_rate": 7.86374704659254e-08,
"loss": 4.5645,
"step": 8615
},
{
"epoch": 1.98,
"learning_rate": 7.089579195409534e-08,
"loss": 4.5103,
"step": 8620
},
{
"epoch": 1.98,
"learning_rate": 6.355505209548751e-08,
"loss": 4.4785,
"step": 8625
},
{
"epoch": 1.98,
"learning_rate": 5.6615280342842135e-08,
"loss": 4.5025,
"step": 8630
},
{
"epoch": 1.98,
"learning_rate": 5.00765045401197e-08,
"loss": 4.4908,
"step": 8635
},
{
"epoch": 1.98,
"learning_rate": 4.3938750922412064e-08,
"loss": 4.5106,
"step": 8640
},
{
"epoch": 1.98,
"learning_rate": 3.82020441158093e-08,
"loss": 4.5141,
"step": 8645
},
{
"epoch": 1.99,
"learning_rate": 3.286640713727751e-08,
"loss": 4.5108,
"step": 8650
},
{
"epoch": 1.99,
"learning_rate": 2.7931861394658865e-08,
"loss": 4.4968,
"step": 8655
},
{
"epoch": 1.99,
"learning_rate": 2.3398426686471743e-08,
"loss": 4.5065,
"step": 8660
},
{
"epoch": 1.99,
"learning_rate": 1.9266121201899634e-08,
"loss": 4.4638,
"step": 8665
},
{
"epoch": 1.99,
"learning_rate": 1.5534961520724533e-08,
"loss": 4.5557,
"step": 8670
},
{
"epoch": 1.99,
"learning_rate": 1.2204962613204807e-08,
"loss": 4.5437,
"step": 8675
},
{
"epoch": 1.99,
"learning_rate": 9.276137840075194e-09,
"loss": 4.5022,
"step": 8680
},
{
"epoch": 1.99,
"learning_rate": 6.748498952446891e-09,
"loss": 4.4982,
"step": 8685
},
{
"epoch": 1.99,
"learning_rate": 4.6220560918075474e-09,
"loss": 4.5106,
"step": 8690
},
{
"epoch": 2.0,
"learning_rate": 2.8968177899213465e-09,
"loss": 4.5028,
"step": 8695
},
{
"epoch": 2.0,
"learning_rate": 1.572790968851212e-09,
"loss": 4.5242,
"step": 8700
},
{
"epoch": 2.0,
"learning_rate": 6.49980940892192e-10,
"loss": 4.4505,
"step": 8705
},
{
"epoch": 2.0,
"learning_rate": 1.2839140858256215e-10,
"loss": 4.5056,
"step": 8710
},
{
"epoch": 2.0,
"eval_loss": 4.468899726867676,
"eval_runtime": 1139.6781,
"eval_samples_per_second": 13.54,
"eval_steps_per_second": 0.424,
"step": 8714
},
{
"epoch": 2.0,
"step": 8714,
"total_flos": 6.917398977105101e+16,
"train_loss": 4.463997247680382,
"train_runtime": 72530.8249,
"train_samples_per_second": 3.845,
"train_steps_per_second": 0.12
}
],
"logging_steps": 5,
"max_steps": 8714,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 6.917398977105101e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}