opt-peter-2.7B / trainer_state.json
pszemraj's picture
update model
b8010b6
raw
history blame
120 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"global_step": 4944,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.0161290322580645e-07,
"loss": 1.7344,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 4.032258064516129e-07,
"loss": 1.6586,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 6.048387096774194e-07,
"loss": 1.6465,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 8.064516129032258e-07,
"loss": 1.5832,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 1.0080645161290323e-06,
"loss": 1.5574,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 1.2096774193548388e-06,
"loss": 1.4381,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 1.4112903225806455e-06,
"loss": 1.5215,
"step": 35
},
{
"epoch": 0.03,
"learning_rate": 1.6129032258064516e-06,
"loss": 1.5566,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 1.8145161290322583e-06,
"loss": 1.5641,
"step": 45
},
{
"epoch": 0.04,
"learning_rate": 2.0161290322580646e-06,
"loss": 1.4707,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 2.217741935483871e-06,
"loss": 1.4484,
"step": 55
},
{
"epoch": 0.05,
"learning_rate": 2.4193548387096776e-06,
"loss": 1.4656,
"step": 60
},
{
"epoch": 0.05,
"learning_rate": 2.620967741935484e-06,
"loss": 1.3938,
"step": 65
},
{
"epoch": 0.06,
"learning_rate": 2.822580645161291e-06,
"loss": 1.4152,
"step": 70
},
{
"epoch": 0.06,
"learning_rate": 3.024193548387097e-06,
"loss": 1.4182,
"step": 75
},
{
"epoch": 0.06,
"learning_rate": 3.225806451612903e-06,
"loss": 1.5051,
"step": 80
},
{
"epoch": 0.07,
"learning_rate": 3.4274193548387097e-06,
"loss": 1.4918,
"step": 85
},
{
"epoch": 0.07,
"learning_rate": 3.6290322580645166e-06,
"loss": 1.4738,
"step": 90
},
{
"epoch": 0.08,
"learning_rate": 3.830645161290323e-06,
"loss": 1.4035,
"step": 95
},
{
"epoch": 0.08,
"learning_rate": 4.032258064516129e-06,
"loss": 1.4367,
"step": 100
},
{
"epoch": 0.08,
"learning_rate": 4.233870967741936e-06,
"loss": 1.4076,
"step": 105
},
{
"epoch": 0.09,
"learning_rate": 4.435483870967742e-06,
"loss": 1.4902,
"step": 110
},
{
"epoch": 0.09,
"learning_rate": 4.637096774193548e-06,
"loss": 1.3578,
"step": 115
},
{
"epoch": 0.1,
"learning_rate": 4.838709677419355e-06,
"loss": 1.4467,
"step": 120
},
{
"epoch": 0.1,
"learning_rate": 5.040322580645161e-06,
"loss": 1.4766,
"step": 125
},
{
"epoch": 0.11,
"learning_rate": 5.241935483870968e-06,
"loss": 1.4828,
"step": 130
},
{
"epoch": 0.11,
"learning_rate": 5.443548387096774e-06,
"loss": 1.4258,
"step": 135
},
{
"epoch": 0.11,
"learning_rate": 5.645161290322582e-06,
"loss": 1.4602,
"step": 140
},
{
"epoch": 0.12,
"learning_rate": 5.846774193548388e-06,
"loss": 1.4902,
"step": 145
},
{
"epoch": 0.12,
"learning_rate": 6.048387096774194e-06,
"loss": 1.3729,
"step": 150
},
{
"epoch": 0.13,
"learning_rate": 6.25e-06,
"loss": 1.4902,
"step": 155
},
{
"epoch": 0.13,
"learning_rate": 6.451612903225806e-06,
"loss": 1.435,
"step": 160
},
{
"epoch": 0.13,
"learning_rate": 6.653225806451613e-06,
"loss": 1.4096,
"step": 165
},
{
"epoch": 0.14,
"learning_rate": 6.854838709677419e-06,
"loss": 1.4508,
"step": 170
},
{
"epoch": 0.14,
"learning_rate": 7.056451612903227e-06,
"loss": 1.4266,
"step": 175
},
{
"epoch": 0.15,
"learning_rate": 7.258064516129033e-06,
"loss": 1.4648,
"step": 180
},
{
"epoch": 0.15,
"learning_rate": 7.459677419354839e-06,
"loss": 1.3996,
"step": 185
},
{
"epoch": 0.15,
"learning_rate": 7.661290322580646e-06,
"loss": 1.4152,
"step": 190
},
{
"epoch": 0.16,
"learning_rate": 7.862903225806451e-06,
"loss": 1.4648,
"step": 195
},
{
"epoch": 0.16,
"learning_rate": 8.064516129032258e-06,
"loss": 1.3128,
"step": 200
},
{
"epoch": 0.17,
"learning_rate": 8.266129032258065e-06,
"loss": 1.4242,
"step": 205
},
{
"epoch": 0.17,
"learning_rate": 8.467741935483872e-06,
"loss": 1.4648,
"step": 210
},
{
"epoch": 0.17,
"learning_rate": 8.669354838709677e-06,
"loss": 1.4527,
"step": 215
},
{
"epoch": 0.18,
"learning_rate": 8.870967741935484e-06,
"loss": 1.4,
"step": 220
},
{
"epoch": 0.18,
"learning_rate": 9.072580645161291e-06,
"loss": 1.4984,
"step": 225
},
{
"epoch": 0.19,
"learning_rate": 9.274193548387097e-06,
"loss": 1.4297,
"step": 230
},
{
"epoch": 0.19,
"learning_rate": 9.475806451612905e-06,
"loss": 1.4645,
"step": 235
},
{
"epoch": 0.19,
"learning_rate": 9.67741935483871e-06,
"loss": 1.4107,
"step": 240
},
{
"epoch": 0.2,
"learning_rate": 9.879032258064517e-06,
"loss": 1.4328,
"step": 245
},
{
"epoch": 0.2,
"learning_rate": 9.999995524479982e-06,
"loss": 1.4279,
"step": 250
},
{
"epoch": 0.21,
"learning_rate": 9.999945174971776e-06,
"loss": 1.4738,
"step": 255
},
{
"epoch": 0.21,
"learning_rate": 9.999838882120566e-06,
"loss": 1.3898,
"step": 260
},
{
"epoch": 0.21,
"learning_rate": 9.999676647115646e-06,
"loss": 1.3346,
"step": 265
},
{
"epoch": 0.22,
"learning_rate": 9.999458471772225e-06,
"loss": 1.5164,
"step": 270
},
{
"epoch": 0.22,
"learning_rate": 9.999184358531422e-06,
"loss": 1.4148,
"step": 275
},
{
"epoch": 0.23,
"learning_rate": 9.998854310460233e-06,
"loss": 1.4273,
"step": 280
},
{
"epoch": 0.23,
"learning_rate": 9.998468331251499e-06,
"loss": 1.4592,
"step": 285
},
{
"epoch": 0.23,
"learning_rate": 9.998026425223858e-06,
"loss": 1.4504,
"step": 290
},
{
"epoch": 0.24,
"learning_rate": 9.997528597321704e-06,
"loss": 1.448,
"step": 295
},
{
"epoch": 0.24,
"learning_rate": 9.996974853115132e-06,
"loss": 1.4523,
"step": 300
},
{
"epoch": 0.25,
"learning_rate": 9.996365198799868e-06,
"loss": 1.5063,
"step": 305
},
{
"epoch": 0.25,
"learning_rate": 9.99569964119721e-06,
"loss": 1.4051,
"step": 310
},
{
"epoch": 0.25,
"learning_rate": 9.99497818775394e-06,
"loss": 1.4312,
"step": 315
},
{
"epoch": 0.26,
"learning_rate": 9.994200846542251e-06,
"loss": 1.4758,
"step": 320
},
{
"epoch": 0.26,
"learning_rate": 9.993367626259652e-06,
"loss": 1.468,
"step": 325
},
{
"epoch": 0.27,
"learning_rate": 9.99247853622887e-06,
"loss": 1.527,
"step": 330
},
{
"epoch": 0.27,
"learning_rate": 9.991533586397751e-06,
"loss": 1.4969,
"step": 335
},
{
"epoch": 0.28,
"learning_rate": 9.990532787339137e-06,
"loss": 1.4996,
"step": 340
},
{
"epoch": 0.28,
"learning_rate": 9.98947615025076e-06,
"loss": 1.5262,
"step": 345
},
{
"epoch": 0.28,
"learning_rate": 9.988363686955116e-06,
"loss": 1.5992,
"step": 350
},
{
"epoch": 0.29,
"learning_rate": 9.987195409899322e-06,
"loss": 1.4711,
"step": 355
},
{
"epoch": 0.29,
"learning_rate": 9.985971332154985e-06,
"loss": 1.45,
"step": 360
},
{
"epoch": 0.3,
"learning_rate": 9.984691467418057e-06,
"loss": 1.4863,
"step": 365
},
{
"epoch": 0.3,
"learning_rate": 9.983355830008678e-06,
"loss": 1.5219,
"step": 370
},
{
"epoch": 0.3,
"learning_rate": 9.981964434871015e-06,
"loss": 1.5977,
"step": 375
},
{
"epoch": 0.31,
"learning_rate": 9.980517297573097e-06,
"loss": 1.4539,
"step": 380
},
{
"epoch": 0.31,
"learning_rate": 9.979014434306642e-06,
"loss": 1.3713,
"step": 385
},
{
"epoch": 0.32,
"learning_rate": 9.977455861886874e-06,
"loss": 1.4434,
"step": 390
},
{
"epoch": 0.32,
"learning_rate": 9.975841597752334e-06,
"loss": 1.5469,
"step": 395
},
{
"epoch": 0.32,
"learning_rate": 9.974171659964688e-06,
"loss": 1.4531,
"step": 400
},
{
"epoch": 0.33,
"learning_rate": 9.972446067208519e-06,
"loss": 1.4828,
"step": 405
},
{
"epoch": 0.33,
"learning_rate": 9.970664838791126e-06,
"loss": 1.4512,
"step": 410
},
{
"epoch": 0.34,
"learning_rate": 9.9688279946423e-06,
"loss": 1.4076,
"step": 415
},
{
"epoch": 0.34,
"learning_rate": 9.966935555314107e-06,
"loss": 1.4969,
"step": 420
},
{
"epoch": 0.34,
"learning_rate": 9.96498754198066e-06,
"loss": 1.498,
"step": 425
},
{
"epoch": 0.35,
"learning_rate": 9.962983976437868e-06,
"loss": 1.4393,
"step": 430
},
{
"epoch": 0.35,
"learning_rate": 9.96092488110321e-06,
"loss": 1.5219,
"step": 435
},
{
"epoch": 0.36,
"learning_rate": 9.958810279015474e-06,
"loss": 1.4484,
"step": 440
},
{
"epoch": 0.36,
"learning_rate": 9.956640193834501e-06,
"loss": 1.4805,
"step": 445
},
{
"epoch": 0.36,
"learning_rate": 9.954414649840922e-06,
"loss": 1.4832,
"step": 450
},
{
"epoch": 0.37,
"learning_rate": 9.952133671935885e-06,
"loss": 1.3789,
"step": 455
},
{
"epoch": 0.37,
"learning_rate": 9.949797285640771e-06,
"loss": 1.4773,
"step": 460
},
{
"epoch": 0.38,
"learning_rate": 9.947405517096927e-06,
"loss": 1.5246,
"step": 465
},
{
"epoch": 0.38,
"learning_rate": 9.944958393065343e-06,
"loss": 1.5094,
"step": 470
},
{
"epoch": 0.38,
"learning_rate": 9.942455940926384e-06,
"loss": 1.4253,
"step": 475
},
{
"epoch": 0.39,
"learning_rate": 9.939898188679465e-06,
"loss": 1.4396,
"step": 480
},
{
"epoch": 0.39,
"learning_rate": 9.93728516494274e-06,
"loss": 1.3863,
"step": 485
},
{
"epoch": 0.4,
"learning_rate": 9.934616898952787e-06,
"loss": 1.4965,
"step": 490
},
{
"epoch": 0.4,
"learning_rate": 9.931893420564277e-06,
"loss": 1.5633,
"step": 495
},
{
"epoch": 0.4,
"learning_rate": 9.929114760249642e-06,
"loss": 1.3689,
"step": 500
},
{
"epoch": 0.41,
"learning_rate": 9.926280949098732e-06,
"loss": 1.4434,
"step": 505
},
{
"epoch": 0.41,
"learning_rate": 9.923392018818467e-06,
"loss": 1.4758,
"step": 510
},
{
"epoch": 0.42,
"learning_rate": 9.92044800173249e-06,
"loss": 1.4133,
"step": 515
},
{
"epoch": 0.42,
"learning_rate": 9.917448930780786e-06,
"loss": 1.451,
"step": 520
},
{
"epoch": 0.42,
"learning_rate": 9.91439483951934e-06,
"loss": 1.5117,
"step": 525
},
{
"epoch": 0.43,
"learning_rate": 9.91128576211974e-06,
"loss": 1.4949,
"step": 530
},
{
"epoch": 0.43,
"learning_rate": 9.908121733368803e-06,
"loss": 1.3984,
"step": 535
},
{
"epoch": 0.44,
"learning_rate": 9.904902788668187e-06,
"loss": 1.6094,
"step": 540
},
{
"epoch": 0.44,
"learning_rate": 9.901628964033993e-06,
"loss": 1.452,
"step": 545
},
{
"epoch": 0.44,
"learning_rate": 9.89830029609636e-06,
"loss": 1.4293,
"step": 550
},
{
"epoch": 0.45,
"learning_rate": 9.894916822099062e-06,
"loss": 1.4957,
"step": 555
},
{
"epoch": 0.45,
"learning_rate": 9.89147857989908e-06,
"loss": 1.507,
"step": 560
},
{
"epoch": 0.46,
"learning_rate": 9.887985607966188e-06,
"loss": 1.4379,
"step": 565
},
{
"epoch": 0.46,
"learning_rate": 9.884437945382523e-06,
"loss": 1.4812,
"step": 570
},
{
"epoch": 0.47,
"learning_rate": 9.880835631842141e-06,
"loss": 1.4477,
"step": 575
},
{
"epoch": 0.47,
"learning_rate": 9.877178707650573e-06,
"loss": 1.4484,
"step": 580
},
{
"epoch": 0.47,
"learning_rate": 9.873467213724384e-06,
"loss": 1.4897,
"step": 585
},
{
"epoch": 0.48,
"learning_rate": 9.869701191590703e-06,
"loss": 1.4641,
"step": 590
},
{
"epoch": 0.48,
"learning_rate": 9.865880683386766e-06,
"loss": 1.4809,
"step": 595
},
{
"epoch": 0.49,
"learning_rate": 9.862005731859443e-06,
"loss": 1.4742,
"step": 600
},
{
"epoch": 0.49,
"learning_rate": 9.858076380364757e-06,
"loss": 1.4902,
"step": 605
},
{
"epoch": 0.49,
"learning_rate": 9.854092672867399e-06,
"loss": 1.5898,
"step": 610
},
{
"epoch": 0.5,
"learning_rate": 9.850054653940247e-06,
"loss": 1.4984,
"step": 615
},
{
"epoch": 0.5,
"learning_rate": 9.845962368763847e-06,
"loss": 1.4898,
"step": 620
},
{
"epoch": 0.51,
"learning_rate": 9.841815863125923e-06,
"loss": 1.5082,
"step": 625
},
{
"epoch": 0.51,
"learning_rate": 9.837615183420866e-06,
"loss": 1.4992,
"step": 630
},
{
"epoch": 0.51,
"learning_rate": 9.8333603766492e-06,
"loss": 1.3744,
"step": 635
},
{
"epoch": 0.52,
"learning_rate": 9.829051490417074e-06,
"loss": 1.527,
"step": 640
},
{
"epoch": 0.52,
"learning_rate": 9.824688572935713e-06,
"loss": 1.5008,
"step": 645
},
{
"epoch": 0.53,
"learning_rate": 9.820271673020891e-06,
"loss": 1.4721,
"step": 650
},
{
"epoch": 0.53,
"learning_rate": 9.81580084009238e-06,
"loss": 1.5555,
"step": 655
},
{
"epoch": 0.53,
"learning_rate": 9.811276124173395e-06,
"loss": 1.5285,
"step": 660
},
{
"epoch": 0.54,
"learning_rate": 9.806697575890043e-06,
"loss": 1.4777,
"step": 665
},
{
"epoch": 0.54,
"learning_rate": 9.802065246470738e-06,
"loss": 1.4322,
"step": 670
},
{
"epoch": 0.55,
"learning_rate": 9.797379187745652e-06,
"loss": 1.5197,
"step": 675
},
{
"epoch": 0.55,
"learning_rate": 9.792639452146116e-06,
"loss": 1.4703,
"step": 680
},
{
"epoch": 0.55,
"learning_rate": 9.787846092704043e-06,
"loss": 1.4539,
"step": 685
},
{
"epoch": 0.56,
"learning_rate": 9.782999163051332e-06,
"loss": 1.4326,
"step": 690
},
{
"epoch": 0.56,
"learning_rate": 9.778098717419266e-06,
"loss": 1.3992,
"step": 695
},
{
"epoch": 0.57,
"learning_rate": 9.773144810637908e-06,
"loss": 1.457,
"step": 700
},
{
"epoch": 0.57,
"learning_rate": 9.768137498135489e-06,
"loss": 1.477,
"step": 705
},
{
"epoch": 0.57,
"learning_rate": 9.763076835937782e-06,
"loss": 1.4455,
"step": 710
},
{
"epoch": 0.58,
"learning_rate": 9.75796288066748e-06,
"loss": 1.442,
"step": 715
},
{
"epoch": 0.58,
"learning_rate": 9.752795689543563e-06,
"loss": 1.5156,
"step": 720
},
{
"epoch": 0.59,
"learning_rate": 9.747575320380652e-06,
"loss": 1.5129,
"step": 725
},
{
"epoch": 0.59,
"learning_rate": 9.742301831588368e-06,
"loss": 1.3873,
"step": 730
},
{
"epoch": 0.59,
"learning_rate": 9.736975282170678e-06,
"loss": 1.448,
"step": 735
},
{
"epoch": 0.6,
"learning_rate": 9.731595731725232e-06,
"loss": 1.4695,
"step": 740
},
{
"epoch": 0.6,
"learning_rate": 9.726163240442695e-06,
"loss": 1.4898,
"step": 745
},
{
"epoch": 0.61,
"learning_rate": 9.720677869106077e-06,
"loss": 1.4619,
"step": 750
},
{
"epoch": 0.61,
"learning_rate": 9.715139679090057e-06,
"loss": 1.5293,
"step": 755
},
{
"epoch": 0.61,
"learning_rate": 9.709548732360286e-06,
"loss": 1.4535,
"step": 760
},
{
"epoch": 0.62,
"learning_rate": 9.703905091472698e-06,
"loss": 1.3992,
"step": 765
},
{
"epoch": 0.62,
"learning_rate": 9.698208819572815e-06,
"loss": 1.5254,
"step": 770
},
{
"epoch": 0.63,
"learning_rate": 9.692459980395034e-06,
"loss": 1.351,
"step": 775
},
{
"epoch": 0.63,
"learning_rate": 9.686658638261916e-06,
"loss": 1.3639,
"step": 780
},
{
"epoch": 0.64,
"learning_rate": 9.680804858083468e-06,
"loss": 1.3813,
"step": 785
},
{
"epoch": 0.64,
"learning_rate": 9.674898705356413e-06,
"loss": 1.4016,
"step": 790
},
{
"epoch": 0.64,
"learning_rate": 9.668940246163464e-06,
"loss": 1.4611,
"step": 795
},
{
"epoch": 0.65,
"learning_rate": 9.662929547172575e-06,
"loss": 1.4902,
"step": 800
},
{
"epoch": 0.65,
"learning_rate": 9.656866675636204e-06,
"loss": 1.5445,
"step": 805
},
{
"epoch": 0.66,
"learning_rate": 9.650751699390554e-06,
"loss": 1.5363,
"step": 810
},
{
"epoch": 0.66,
"learning_rate": 9.64458468685482e-06,
"loss": 1.4508,
"step": 815
},
{
"epoch": 0.66,
"learning_rate": 9.638365707030415e-06,
"loss": 1.4367,
"step": 820
},
{
"epoch": 0.67,
"learning_rate": 9.632094829500206e-06,
"loss": 1.4594,
"step": 825
},
{
"epoch": 0.67,
"learning_rate": 9.62577212442774e-06,
"loss": 1.498,
"step": 830
},
{
"epoch": 0.68,
"learning_rate": 9.619397662556434e-06,
"loss": 1.4816,
"step": 835
},
{
"epoch": 0.68,
"learning_rate": 9.61297151520882e-06,
"loss": 1.4754,
"step": 840
},
{
"epoch": 0.68,
"learning_rate": 9.606493754285712e-06,
"loss": 1.4805,
"step": 845
},
{
"epoch": 0.69,
"learning_rate": 9.599964452265434e-06,
"loss": 1.5539,
"step": 850
},
{
"epoch": 0.69,
"learning_rate": 9.593383682202974e-06,
"loss": 1.4996,
"step": 855
},
{
"epoch": 0.7,
"learning_rate": 9.586751517729203e-06,
"loss": 1.484,
"step": 860
},
{
"epoch": 0.7,
"learning_rate": 9.580068033050019e-06,
"loss": 1.4781,
"step": 865
},
{
"epoch": 0.7,
"learning_rate": 9.57333330294554e-06,
"loss": 1.5156,
"step": 870
},
{
"epoch": 0.71,
"learning_rate": 9.566547402769255e-06,
"loss": 1.4734,
"step": 875
},
{
"epoch": 0.71,
"learning_rate": 9.559710408447185e-06,
"loss": 1.5398,
"step": 880
},
{
"epoch": 0.72,
"learning_rate": 9.55282239647703e-06,
"loss": 1.5621,
"step": 885
},
{
"epoch": 0.72,
"learning_rate": 9.545883443927325e-06,
"loss": 1.4793,
"step": 890
},
{
"epoch": 0.72,
"learning_rate": 9.538893628436554e-06,
"loss": 1.3898,
"step": 895
},
{
"epoch": 0.73,
"learning_rate": 9.531853028212308e-06,
"loss": 1.4719,
"step": 900
},
{
"epoch": 0.73,
"learning_rate": 9.52476172203039e-06,
"loss": 1.5215,
"step": 905
},
{
"epoch": 0.74,
"learning_rate": 9.51761978923395e-06,
"loss": 1.4264,
"step": 910
},
{
"epoch": 0.74,
"learning_rate": 9.51042730973258e-06,
"loss": 1.502,
"step": 915
},
{
"epoch": 0.74,
"learning_rate": 9.503184364001432e-06,
"loss": 1.4287,
"step": 920
},
{
"epoch": 0.75,
"learning_rate": 9.495891033080315e-06,
"loss": 1.4693,
"step": 925
},
{
"epoch": 0.75,
"learning_rate": 9.488547398572787e-06,
"loss": 1.5344,
"step": 930
},
{
"epoch": 0.76,
"learning_rate": 9.48115354264524e-06,
"loss": 1.3703,
"step": 935
},
{
"epoch": 0.76,
"learning_rate": 9.473709548025987e-06,
"loss": 1.6094,
"step": 940
},
{
"epoch": 0.76,
"learning_rate": 9.466215498004328e-06,
"loss": 1.4773,
"step": 945
},
{
"epoch": 0.77,
"learning_rate": 9.458671476429624e-06,
"loss": 1.4602,
"step": 950
},
{
"epoch": 0.77,
"learning_rate": 9.45107756771036e-06,
"loss": 1.4798,
"step": 955
},
{
"epoch": 0.78,
"learning_rate": 9.443433856813197e-06,
"loss": 1.516,
"step": 960
},
{
"epoch": 0.78,
"learning_rate": 9.435740429262016e-06,
"loss": 1.4941,
"step": 965
},
{
"epoch": 0.78,
"learning_rate": 9.427997371136976e-06,
"loss": 1.402,
"step": 970
},
{
"epoch": 0.79,
"learning_rate": 9.420204769073538e-06,
"loss": 1.5305,
"step": 975
},
{
"epoch": 0.79,
"learning_rate": 9.4123627102615e-06,
"loss": 1.4604,
"step": 980
},
{
"epoch": 0.8,
"learning_rate": 9.404471282444019e-06,
"loss": 1.5129,
"step": 985
},
{
"epoch": 0.8,
"learning_rate": 9.396530573916636e-06,
"loss": 1.5453,
"step": 990
},
{
"epoch": 0.81,
"learning_rate": 9.38854067352628e-06,
"loss": 1.4834,
"step": 995
},
{
"epoch": 0.81,
"learning_rate": 9.38050167067028e-06,
"loss": 1.4387,
"step": 1000
},
{
"epoch": 0.81,
"learning_rate": 9.372413655295362e-06,
"loss": 1.4383,
"step": 1005
},
{
"epoch": 0.82,
"learning_rate": 9.364276717896639e-06,
"loss": 1.602,
"step": 1010
},
{
"epoch": 0.82,
"learning_rate": 9.356090949516608e-06,
"loss": 1.4193,
"step": 1015
},
{
"epoch": 0.83,
"learning_rate": 9.347856441744122e-06,
"loss": 1.5031,
"step": 1020
},
{
"epoch": 0.83,
"learning_rate": 9.339573286713369e-06,
"loss": 1.4828,
"step": 1025
},
{
"epoch": 0.83,
"learning_rate": 9.331241577102841e-06,
"loss": 1.5191,
"step": 1030
},
{
"epoch": 0.84,
"learning_rate": 9.322861406134302e-06,
"loss": 1.4305,
"step": 1035
},
{
"epoch": 0.84,
"learning_rate": 9.314432867571732e-06,
"loss": 1.4625,
"step": 1040
},
{
"epoch": 0.85,
"learning_rate": 9.30595605572029e-06,
"loss": 1.5246,
"step": 1045
},
{
"epoch": 0.85,
"learning_rate": 9.297431065425257e-06,
"loss": 1.5227,
"step": 1050
},
{
"epoch": 0.85,
"learning_rate": 9.28885799207097e-06,
"loss": 1.5367,
"step": 1055
},
{
"epoch": 0.86,
"learning_rate": 9.280236931579754e-06,
"loss": 1.4879,
"step": 1060
},
{
"epoch": 0.86,
"learning_rate": 9.271567980410859e-06,
"loss": 1.5137,
"step": 1065
},
{
"epoch": 0.87,
"learning_rate": 9.26285123555937e-06,
"loss": 1.4449,
"step": 1070
},
{
"epoch": 0.87,
"learning_rate": 9.254086794555121e-06,
"loss": 1.4602,
"step": 1075
},
{
"epoch": 0.87,
"learning_rate": 9.245274755461621e-06,
"loss": 1.4187,
"step": 1080
},
{
"epoch": 0.88,
"learning_rate": 9.23641521687493e-06,
"loss": 1.5391,
"step": 1085
},
{
"epoch": 0.88,
"learning_rate": 9.227508277922579e-06,
"loss": 1.3988,
"step": 1090
},
{
"epoch": 0.89,
"learning_rate": 9.218554038262448e-06,
"loss": 1.5984,
"step": 1095
},
{
"epoch": 0.89,
"learning_rate": 9.209552598081657e-06,
"loss": 1.5109,
"step": 1100
},
{
"epoch": 0.89,
"learning_rate": 9.200504058095439e-06,
"loss": 1.5418,
"step": 1105
},
{
"epoch": 0.9,
"learning_rate": 9.191408519546022e-06,
"loss": 1.4275,
"step": 1110
},
{
"epoch": 0.9,
"learning_rate": 9.182266084201486e-06,
"loss": 1.4074,
"step": 1115
},
{
"epoch": 0.91,
"learning_rate": 9.173076854354634e-06,
"loss": 1.5016,
"step": 1120
},
{
"epoch": 0.91,
"learning_rate": 9.16384093282184e-06,
"loss": 1.5188,
"step": 1125
},
{
"epoch": 0.91,
"learning_rate": 9.154558422941901e-06,
"loss": 1.4738,
"step": 1130
},
{
"epoch": 0.92,
"learning_rate": 9.145229428574886e-06,
"loss": 1.4049,
"step": 1135
},
{
"epoch": 0.92,
"learning_rate": 9.135854054100961e-06,
"loss": 1.4363,
"step": 1140
},
{
"epoch": 0.93,
"learning_rate": 9.126432404419239e-06,
"loss": 1.5211,
"step": 1145
},
{
"epoch": 0.93,
"learning_rate": 9.11696458494659e-06,
"loss": 1.457,
"step": 1150
},
{
"epoch": 0.93,
"learning_rate": 9.107450701616469e-06,
"loss": 1.4543,
"step": 1155
},
{
"epoch": 0.94,
"learning_rate": 9.097890860877732e-06,
"loss": 1.534,
"step": 1160
},
{
"epoch": 0.94,
"learning_rate": 9.088285169693442e-06,
"loss": 1.5254,
"step": 1165
},
{
"epoch": 0.95,
"learning_rate": 9.078633735539673e-06,
"loss": 1.5371,
"step": 1170
},
{
"epoch": 0.95,
"learning_rate": 9.068936666404307e-06,
"loss": 1.475,
"step": 1175
},
{
"epoch": 0.95,
"learning_rate": 9.059194070785823e-06,
"loss": 1.457,
"step": 1180
},
{
"epoch": 0.96,
"learning_rate": 9.049406057692097e-06,
"loss": 1.4891,
"step": 1185
},
{
"epoch": 0.96,
"learning_rate": 9.03957273663916e-06,
"loss": 1.5648,
"step": 1190
},
{
"epoch": 0.97,
"learning_rate": 9.02969421764999e-06,
"loss": 1.5855,
"step": 1195
},
{
"epoch": 0.97,
"learning_rate": 9.019770611253272e-06,
"loss": 1.4734,
"step": 1200
},
{
"epoch": 0.97,
"learning_rate": 9.009802028482169e-06,
"loss": 1.4867,
"step": 1205
},
{
"epoch": 0.98,
"learning_rate": 8.999788580873074e-06,
"loss": 1.5094,
"step": 1210
},
{
"epoch": 0.98,
"learning_rate": 8.989730380464362e-06,
"loss": 1.4965,
"step": 1215
},
{
"epoch": 0.99,
"learning_rate": 8.979627539795136e-06,
"loss": 1.4887,
"step": 1220
},
{
"epoch": 0.99,
"learning_rate": 8.969480171903973e-06,
"loss": 1.4398,
"step": 1225
},
{
"epoch": 1.0,
"learning_rate": 8.959288390327656e-06,
"loss": 1.4301,
"step": 1230
},
{
"epoch": 1.0,
"learning_rate": 8.949052309099897e-06,
"loss": 1.5309,
"step": 1235
},
{
"epoch": 1.0,
"learning_rate": 8.938772042750078e-06,
"loss": 1.3054,
"step": 1240
},
{
"epoch": 1.01,
"learning_rate": 8.928447706301951e-06,
"loss": 1.2152,
"step": 1245
},
{
"epoch": 1.01,
"learning_rate": 8.91807941527236e-06,
"loss": 1.2238,
"step": 1250
},
{
"epoch": 1.02,
"learning_rate": 8.907667285669955e-06,
"loss": 1.1881,
"step": 1255
},
{
"epoch": 1.02,
"learning_rate": 8.897211433993873e-06,
"loss": 1.234,
"step": 1260
},
{
"epoch": 1.02,
"learning_rate": 8.886711977232463e-06,
"loss": 1.1724,
"step": 1265
},
{
"epoch": 1.03,
"learning_rate": 8.87616903286195e-06,
"loss": 1.3313,
"step": 1270
},
{
"epoch": 1.03,
"learning_rate": 8.865582718845142e-06,
"loss": 1.2277,
"step": 1275
},
{
"epoch": 1.04,
"learning_rate": 8.854953153630097e-06,
"loss": 1.2145,
"step": 1280
},
{
"epoch": 1.04,
"learning_rate": 8.844280456148799e-06,
"loss": 1.243,
"step": 1285
},
{
"epoch": 1.04,
"learning_rate": 8.833564745815835e-06,
"loss": 1.1816,
"step": 1290
},
{
"epoch": 1.05,
"learning_rate": 8.82280614252705e-06,
"loss": 1.1965,
"step": 1295
},
{
"epoch": 1.05,
"learning_rate": 8.81200476665821e-06,
"loss": 1.2035,
"step": 1300
},
{
"epoch": 1.06,
"learning_rate": 8.801160739063657e-06,
"loss": 1.2477,
"step": 1305
},
{
"epoch": 1.06,
"learning_rate": 8.790274181074951e-06,
"loss": 1.1686,
"step": 1310
},
{
"epoch": 1.06,
"learning_rate": 8.779345214499517e-06,
"loss": 1.1877,
"step": 1315
},
{
"epoch": 1.07,
"learning_rate": 8.768373961619283e-06,
"loss": 1.2209,
"step": 1320
},
{
"epoch": 1.07,
"learning_rate": 8.757360545189308e-06,
"loss": 1.2066,
"step": 1325
},
{
"epoch": 1.08,
"learning_rate": 8.746305088436406e-06,
"loss": 1.2484,
"step": 1330
},
{
"epoch": 1.08,
"learning_rate": 8.735207715057779e-06,
"loss": 1.2068,
"step": 1335
},
{
"epoch": 1.08,
"learning_rate": 8.724068549219618e-06,
"loss": 1.1803,
"step": 1340
},
{
"epoch": 1.09,
"learning_rate": 8.712887715555728e-06,
"loss": 1.234,
"step": 1345
},
{
"epoch": 1.09,
"learning_rate": 8.701665339166122e-06,
"loss": 1.2441,
"step": 1350
},
{
"epoch": 1.1,
"learning_rate": 8.690401545615626e-06,
"loss": 1.2082,
"step": 1355
},
{
"epoch": 1.1,
"learning_rate": 8.679096460932477e-06,
"loss": 1.2176,
"step": 1360
},
{
"epoch": 1.1,
"learning_rate": 8.667750211606906e-06,
"loss": 1.2516,
"step": 1365
},
{
"epoch": 1.11,
"learning_rate": 8.65636292458973e-06,
"loss": 1.1766,
"step": 1370
},
{
"epoch": 1.11,
"learning_rate": 8.644934727290927e-06,
"loss": 1.2277,
"step": 1375
},
{
"epoch": 1.12,
"learning_rate": 8.63346574757821e-06,
"loss": 1.2773,
"step": 1380
},
{
"epoch": 1.12,
"learning_rate": 8.621956113775601e-06,
"loss": 1.2162,
"step": 1385
},
{
"epoch": 1.12,
"learning_rate": 8.610405954661988e-06,
"loss": 1.2551,
"step": 1390
},
{
"epoch": 1.13,
"learning_rate": 8.598815399469694e-06,
"loss": 1.2625,
"step": 1395
},
{
"epoch": 1.13,
"learning_rate": 8.587184577883018e-06,
"loss": 1.2465,
"step": 1400
},
{
"epoch": 1.14,
"learning_rate": 8.5755136200368e-06,
"loss": 1.2008,
"step": 1405
},
{
"epoch": 1.14,
"learning_rate": 8.563802656514946e-06,
"loss": 1.1623,
"step": 1410
},
{
"epoch": 1.14,
"learning_rate": 8.552051818348986e-06,
"loss": 1.1625,
"step": 1415
},
{
"epoch": 1.15,
"learning_rate": 8.540261237016597e-06,
"loss": 1.1723,
"step": 1420
},
{
"epoch": 1.15,
"learning_rate": 8.528431044440127e-06,
"loss": 1.268,
"step": 1425
},
{
"epoch": 1.16,
"learning_rate": 8.516561372985137e-06,
"loss": 1.2488,
"step": 1430
},
{
"epoch": 1.16,
"learning_rate": 8.504652355458901e-06,
"loss": 1.298,
"step": 1435
},
{
"epoch": 1.17,
"learning_rate": 8.492704125108933e-06,
"loss": 1.2168,
"step": 1440
},
{
"epoch": 1.17,
"learning_rate": 8.480716815621486e-06,
"loss": 1.2166,
"step": 1445
},
{
"epoch": 1.17,
"learning_rate": 8.468690561120064e-06,
"loss": 1.201,
"step": 1450
},
{
"epoch": 1.18,
"learning_rate": 8.456625496163921e-06,
"loss": 1.2266,
"step": 1455
},
{
"epoch": 1.18,
"learning_rate": 8.444521755746547e-06,
"loss": 1.1812,
"step": 1460
},
{
"epoch": 1.19,
"learning_rate": 8.43237947529417e-06,
"loss": 1.1762,
"step": 1465
},
{
"epoch": 1.19,
"learning_rate": 8.420198790664232e-06,
"loss": 1.2473,
"step": 1470
},
{
"epoch": 1.19,
"learning_rate": 8.407979838143869e-06,
"loss": 1.1887,
"step": 1475
},
{
"epoch": 1.2,
"learning_rate": 8.395722754448392e-06,
"loss": 1.2277,
"step": 1480
},
{
"epoch": 1.2,
"learning_rate": 8.38342767671975e-06,
"loss": 1.2418,
"step": 1485
},
{
"epoch": 1.21,
"learning_rate": 8.371094742525006e-06,
"loss": 1.2081,
"step": 1490
},
{
"epoch": 1.21,
"learning_rate": 8.358724089854784e-06,
"loss": 1.3461,
"step": 1495
},
{
"epoch": 1.21,
"learning_rate": 8.346315857121732e-06,
"loss": 1.1977,
"step": 1500
},
{
"epoch": 1.22,
"learning_rate": 8.33387018315898e-06,
"loss": 1.2336,
"step": 1505
},
{
"epoch": 1.22,
"learning_rate": 8.321387207218578e-06,
"loss": 1.249,
"step": 1510
},
{
"epoch": 1.23,
"learning_rate": 8.308867068969933e-06,
"loss": 1.2188,
"step": 1515
},
{
"epoch": 1.23,
"learning_rate": 8.296309908498264e-06,
"loss": 1.1823,
"step": 1520
},
{
"epoch": 1.23,
"learning_rate": 8.283715866303016e-06,
"loss": 1.2462,
"step": 1525
},
{
"epoch": 1.24,
"learning_rate": 8.271085083296295e-06,
"loss": 1.218,
"step": 1530
},
{
"epoch": 1.24,
"learning_rate": 8.258417700801301e-06,
"loss": 1.249,
"step": 1535
},
{
"epoch": 1.25,
"learning_rate": 8.245713860550734e-06,
"loss": 1.2629,
"step": 1540
},
{
"epoch": 1.25,
"learning_rate": 8.232973704685208e-06,
"loss": 1.2605,
"step": 1545
},
{
"epoch": 1.25,
"learning_rate": 8.220197375751667e-06,
"loss": 1.2232,
"step": 1550
},
{
"epoch": 1.26,
"learning_rate": 8.207385016701792e-06,
"loss": 1.2242,
"step": 1555
},
{
"epoch": 1.26,
"learning_rate": 8.194536770890392e-06,
"loss": 1.1824,
"step": 1560
},
{
"epoch": 1.27,
"learning_rate": 8.181652782073808e-06,
"loss": 1.275,
"step": 1565
},
{
"epoch": 1.27,
"learning_rate": 8.168733194408302e-06,
"loss": 1.2164,
"step": 1570
},
{
"epoch": 1.27,
"learning_rate": 8.155778152448443e-06,
"loss": 1.2207,
"step": 1575
},
{
"epoch": 1.28,
"learning_rate": 8.142787801145495e-06,
"loss": 1.2266,
"step": 1580
},
{
"epoch": 1.28,
"learning_rate": 8.129762285845784e-06,
"loss": 1.1971,
"step": 1585
},
{
"epoch": 1.29,
"learning_rate": 8.116701752289084e-06,
"loss": 1.2107,
"step": 1590
},
{
"epoch": 1.29,
"learning_rate": 8.103606346606978e-06,
"loss": 1.1904,
"step": 1595
},
{
"epoch": 1.29,
"learning_rate": 8.090476215321226e-06,
"loss": 1.2895,
"step": 1600
},
{
"epoch": 1.3,
"learning_rate": 8.07731150534213e-06,
"loss": 1.235,
"step": 1605
},
{
"epoch": 1.3,
"learning_rate": 8.064112363966877e-06,
"loss": 1.2238,
"step": 1610
},
{
"epoch": 1.31,
"learning_rate": 8.050878938877908e-06,
"loss": 1.2535,
"step": 1615
},
{
"epoch": 1.31,
"learning_rate": 8.037611378141257e-06,
"loss": 1.2336,
"step": 1620
},
{
"epoch": 1.31,
"learning_rate": 8.024309830204888e-06,
"loss": 1.2285,
"step": 1625
},
{
"epoch": 1.32,
"learning_rate": 8.010974443897046e-06,
"loss": 1.25,
"step": 1630
},
{
"epoch": 1.32,
"learning_rate": 7.997605368424585e-06,
"loss": 1.2492,
"step": 1635
},
{
"epoch": 1.33,
"learning_rate": 7.9842027533713e-06,
"loss": 1.1783,
"step": 1640
},
{
"epoch": 1.33,
"learning_rate": 7.970766748696254e-06,
"loss": 1.171,
"step": 1645
},
{
"epoch": 1.33,
"learning_rate": 7.9572975047321e-06,
"loss": 1.2336,
"step": 1650
},
{
"epoch": 1.34,
"learning_rate": 7.943795172183394e-06,
"loss": 1.204,
"step": 1655
},
{
"epoch": 1.34,
"learning_rate": 7.93025990212492e-06,
"loss": 1.2342,
"step": 1660
},
{
"epoch": 1.35,
"learning_rate": 7.916691845999986e-06,
"loss": 1.1936,
"step": 1665
},
{
"epoch": 1.35,
"learning_rate": 7.903091155618747e-06,
"loss": 1.1992,
"step": 1670
},
{
"epoch": 1.36,
"learning_rate": 7.889457983156484e-06,
"loss": 1.1707,
"step": 1675
},
{
"epoch": 1.36,
"learning_rate": 7.875792481151916e-06,
"loss": 1.1835,
"step": 1680
},
{
"epoch": 1.36,
"learning_rate": 7.862094802505498e-06,
"loss": 1.2359,
"step": 1685
},
{
"epoch": 1.37,
"learning_rate": 7.848365100477695e-06,
"loss": 1.2754,
"step": 1690
},
{
"epoch": 1.37,
"learning_rate": 7.834603528687277e-06,
"loss": 1.2664,
"step": 1695
},
{
"epoch": 1.38,
"learning_rate": 7.8208102411096e-06,
"loss": 1.2535,
"step": 1700
},
{
"epoch": 1.38,
"learning_rate": 7.806985392074877e-06,
"loss": 1.2158,
"step": 1705
},
{
"epoch": 1.38,
"learning_rate": 7.793129136266464e-06,
"loss": 1.2504,
"step": 1710
},
{
"epoch": 1.39,
"learning_rate": 7.779241628719108e-06,
"loss": 1.1505,
"step": 1715
},
{
"epoch": 1.39,
"learning_rate": 7.765323024817237e-06,
"loss": 1.2695,
"step": 1720
},
{
"epoch": 1.4,
"learning_rate": 7.751373480293205e-06,
"loss": 1.2059,
"step": 1725
},
{
"epoch": 1.4,
"learning_rate": 7.737393151225555e-06,
"loss": 1.2547,
"step": 1730
},
{
"epoch": 1.4,
"learning_rate": 7.723382194037266e-06,
"loss": 1.2127,
"step": 1735
},
{
"epoch": 1.41,
"learning_rate": 7.709340765494017e-06,
"loss": 1.1734,
"step": 1740
},
{
"epoch": 1.41,
"learning_rate": 7.695269022702425e-06,
"loss": 1.2037,
"step": 1745
},
{
"epoch": 1.42,
"learning_rate": 7.681167123108277e-06,
"loss": 1.2891,
"step": 1750
},
{
"epoch": 1.42,
"learning_rate": 7.667035224494787e-06,
"loss": 1.2285,
"step": 1755
},
{
"epoch": 1.42,
"learning_rate": 7.65287348498082e-06,
"loss": 1.217,
"step": 1760
},
{
"epoch": 1.43,
"learning_rate": 7.63868206301912e-06,
"loss": 1.1856,
"step": 1765
},
{
"epoch": 1.43,
"learning_rate": 7.62446111739455e-06,
"loss": 1.2613,
"step": 1770
},
{
"epoch": 1.44,
"learning_rate": 7.6102108072223e-06,
"loss": 1.1617,
"step": 1775
},
{
"epoch": 1.44,
"learning_rate": 7.595931291946116e-06,
"loss": 1.2006,
"step": 1780
},
{
"epoch": 1.44,
"learning_rate": 7.581622731336515e-06,
"loss": 1.2543,
"step": 1785
},
{
"epoch": 1.45,
"learning_rate": 7.567285285488994e-06,
"loss": 1.2498,
"step": 1790
},
{
"epoch": 1.45,
"learning_rate": 7.552919114822246e-06,
"loss": 1.2484,
"step": 1795
},
{
"epoch": 1.46,
"learning_rate": 7.5385243800763505e-06,
"loss": 1.2543,
"step": 1800
},
{
"epoch": 1.46,
"learning_rate": 7.524101242310993e-06,
"loss": 1.2621,
"step": 1805
},
{
"epoch": 1.46,
"learning_rate": 7.509649862903652e-06,
"loss": 1.2176,
"step": 1810
},
{
"epoch": 1.47,
"learning_rate": 7.495170403547797e-06,
"loss": 1.2189,
"step": 1815
},
{
"epoch": 1.47,
"learning_rate": 7.480663026251073e-06,
"loss": 1.2503,
"step": 1820
},
{
"epoch": 1.48,
"learning_rate": 7.466127893333498e-06,
"loss": 1.2186,
"step": 1825
},
{
"epoch": 1.48,
"learning_rate": 7.451565167425642e-06,
"loss": 1.2805,
"step": 1830
},
{
"epoch": 1.48,
"learning_rate": 7.436975011466805e-06,
"loss": 1.2347,
"step": 1835
},
{
"epoch": 1.49,
"learning_rate": 7.422357588703195e-06,
"loss": 1.266,
"step": 1840
},
{
"epoch": 1.49,
"learning_rate": 7.407713062686107e-06,
"loss": 1.2496,
"step": 1845
},
{
"epoch": 1.5,
"learning_rate": 7.393041597270085e-06,
"loss": 1.2902,
"step": 1850
},
{
"epoch": 1.5,
"learning_rate": 7.378343356611093e-06,
"loss": 1.2367,
"step": 1855
},
{
"epoch": 1.5,
"learning_rate": 7.363618505164678e-06,
"loss": 1.274,
"step": 1860
},
{
"epoch": 1.51,
"learning_rate": 7.348867207684132e-06,
"loss": 1.2242,
"step": 1865
},
{
"epoch": 1.51,
"learning_rate": 7.334089629218639e-06,
"loss": 1.2844,
"step": 1870
},
{
"epoch": 1.52,
"learning_rate": 7.319285935111444e-06,
"loss": 1.2672,
"step": 1875
},
{
"epoch": 1.52,
"learning_rate": 7.304456290997991e-06,
"loss": 1.1542,
"step": 1880
},
{
"epoch": 1.53,
"learning_rate": 7.289600862804069e-06,
"loss": 1.15,
"step": 1885
},
{
"epoch": 1.53,
"learning_rate": 7.274719816743967e-06,
"loss": 1.2385,
"step": 1890
},
{
"epoch": 1.53,
"learning_rate": 7.259813319318601e-06,
"loss": 1.2348,
"step": 1895
},
{
"epoch": 1.54,
"learning_rate": 7.244881537313664e-06,
"loss": 1.2578,
"step": 1900
},
{
"epoch": 1.54,
"learning_rate": 7.229924637797742e-06,
"loss": 1.2191,
"step": 1905
},
{
"epoch": 1.55,
"learning_rate": 7.214942788120466e-06,
"loss": 1.251,
"step": 1910
},
{
"epoch": 1.55,
"learning_rate": 7.1999361559106225e-06,
"loss": 1.2031,
"step": 1915
},
{
"epoch": 1.55,
"learning_rate": 7.184904909074293e-06,
"loss": 1.2766,
"step": 1920
},
{
"epoch": 1.56,
"learning_rate": 7.169849215792955e-06,
"loss": 1.2299,
"step": 1925
},
{
"epoch": 1.56,
"learning_rate": 7.15476924452162e-06,
"loss": 1.2355,
"step": 1930
},
{
"epoch": 1.57,
"learning_rate": 7.139665163986938e-06,
"loss": 1.2336,
"step": 1935
},
{
"epoch": 1.57,
"learning_rate": 7.124537143185317e-06,
"loss": 1.3566,
"step": 1940
},
{
"epoch": 1.57,
"learning_rate": 7.109385351381022e-06,
"loss": 1.1423,
"step": 1945
},
{
"epoch": 1.58,
"learning_rate": 7.09420995810429e-06,
"loss": 1.2576,
"step": 1950
},
{
"epoch": 1.58,
"learning_rate": 7.079011133149427e-06,
"loss": 1.2563,
"step": 1955
},
{
"epoch": 1.59,
"learning_rate": 7.0637890465729165e-06,
"loss": 1.2695,
"step": 1960
},
{
"epoch": 1.59,
"learning_rate": 7.048543868691506e-06,
"loss": 1.1986,
"step": 1965
},
{
"epoch": 1.59,
"learning_rate": 7.033275770080309e-06,
"loss": 1.25,
"step": 1970
},
{
"epoch": 1.6,
"learning_rate": 7.017984921570895e-06,
"loss": 1.2025,
"step": 1975
},
{
"epoch": 1.6,
"learning_rate": 7.002671494249376e-06,
"loss": 1.2465,
"step": 1980
},
{
"epoch": 1.61,
"learning_rate": 6.987335659454493e-06,
"loss": 1.2336,
"step": 1985
},
{
"epoch": 1.61,
"learning_rate": 6.971977588775703e-06,
"loss": 1.2436,
"step": 1990
},
{
"epoch": 1.61,
"learning_rate": 6.956597454051253e-06,
"loss": 1.2429,
"step": 1995
},
{
"epoch": 1.62,
"learning_rate": 6.941195427366259e-06,
"loss": 1.2574,
"step": 2000
},
{
"epoch": 1.62,
"learning_rate": 6.925771681050784e-06,
"loss": 1.2465,
"step": 2005
},
{
"epoch": 1.63,
"learning_rate": 6.910326387677906e-06,
"loss": 1.2805,
"step": 2010
},
{
"epoch": 1.63,
"learning_rate": 6.89485972006179e-06,
"loss": 1.2664,
"step": 2015
},
{
"epoch": 1.63,
"learning_rate": 6.879371851255747e-06,
"loss": 1.1826,
"step": 2020
},
{
"epoch": 1.64,
"learning_rate": 6.863862954550315e-06,
"loss": 1.2441,
"step": 2025
},
{
"epoch": 1.64,
"learning_rate": 6.8483332034713006e-06,
"loss": 1.191,
"step": 2030
},
{
"epoch": 1.65,
"learning_rate": 6.832782771777846e-06,
"loss": 1.2574,
"step": 2035
},
{
"epoch": 1.65,
"learning_rate": 6.817211833460484e-06,
"loss": 1.2865,
"step": 2040
},
{
"epoch": 1.65,
"learning_rate": 6.801620562739197e-06,
"loss": 1.2504,
"step": 2045
},
{
"epoch": 1.66,
"learning_rate": 6.7860091340614575e-06,
"loss": 1.2084,
"step": 2050
},
{
"epoch": 1.66,
"learning_rate": 6.770377722100284e-06,
"loss": 1.2609,
"step": 2055
},
{
"epoch": 1.67,
"learning_rate": 6.75472650175228e-06,
"loss": 1.2723,
"step": 2060
},
{
"epoch": 1.67,
"learning_rate": 6.739055648135685e-06,
"loss": 1.1243,
"step": 2065
},
{
"epoch": 1.67,
"learning_rate": 6.723365336588409e-06,
"loss": 1.2529,
"step": 2070
},
{
"epoch": 1.68,
"learning_rate": 6.707655742666074e-06,
"loss": 1.3047,
"step": 2075
},
{
"epoch": 1.68,
"learning_rate": 6.691927042140044e-06,
"loss": 1.257,
"step": 2080
},
{
"epoch": 1.69,
"learning_rate": 6.6761794109954714e-06,
"loss": 1.2086,
"step": 2085
},
{
"epoch": 1.69,
"learning_rate": 6.660413025429312e-06,
"loss": 1.2711,
"step": 2090
},
{
"epoch": 1.69,
"learning_rate": 6.644628061848363e-06,
"loss": 1.1157,
"step": 2095
},
{
"epoch": 1.7,
"learning_rate": 6.628824696867286e-06,
"loss": 1.2309,
"step": 2100
},
{
"epoch": 1.7,
"learning_rate": 6.613003107306637e-06,
"loss": 1.2363,
"step": 2105
},
{
"epoch": 1.71,
"learning_rate": 6.597163470190877e-06,
"loss": 1.207,
"step": 2110
},
{
"epoch": 1.71,
"learning_rate": 6.5813059627464e-06,
"loss": 1.2641,
"step": 2115
},
{
"epoch": 1.72,
"learning_rate": 6.565430762399546e-06,
"loss": 1.252,
"step": 2120
},
{
"epoch": 1.72,
"learning_rate": 6.549538046774621e-06,
"loss": 1.2586,
"step": 2125
},
{
"epoch": 1.72,
"learning_rate": 6.533627993691901e-06,
"loss": 1.3012,
"step": 2130
},
{
"epoch": 1.73,
"learning_rate": 6.517700781165649e-06,
"loss": 1.1842,
"step": 2135
},
{
"epoch": 1.73,
"learning_rate": 6.501756587402124e-06,
"loss": 1.2016,
"step": 2140
},
{
"epoch": 1.74,
"learning_rate": 6.485795590797579e-06,
"loss": 1.2988,
"step": 2145
},
{
"epoch": 1.74,
"learning_rate": 6.469817969936277e-06,
"loss": 1.2547,
"step": 2150
},
{
"epoch": 1.74,
"learning_rate": 6.453823903588481e-06,
"loss": 1.2309,
"step": 2155
},
{
"epoch": 1.75,
"learning_rate": 6.437813570708463e-06,
"loss": 1.2855,
"step": 2160
},
{
"epoch": 1.75,
"learning_rate": 6.421787150432493e-06,
"loss": 1.1488,
"step": 2165
},
{
"epoch": 1.76,
"learning_rate": 6.405744822076845e-06,
"loss": 1.2115,
"step": 2170
},
{
"epoch": 1.76,
"learning_rate": 6.389686765135782e-06,
"loss": 1.2336,
"step": 2175
},
{
"epoch": 1.76,
"learning_rate": 6.3736131592795525e-06,
"loss": 1.2746,
"step": 2180
},
{
"epoch": 1.77,
"learning_rate": 6.357524184352375e-06,
"loss": 1.201,
"step": 2185
},
{
"epoch": 1.77,
"learning_rate": 6.341420020370435e-06,
"loss": 1.2703,
"step": 2190
},
{
"epoch": 1.78,
"learning_rate": 6.325300847519859e-06,
"loss": 1.2441,
"step": 2195
},
{
"epoch": 1.78,
"learning_rate": 6.309166846154713e-06,
"loss": 1.2684,
"step": 2200
},
{
"epoch": 1.78,
"learning_rate": 6.293018196794964e-06,
"loss": 1.2449,
"step": 2205
},
{
"epoch": 1.79,
"learning_rate": 6.276855080124483e-06,
"loss": 1.268,
"step": 2210
},
{
"epoch": 1.79,
"learning_rate": 6.260677676989008e-06,
"loss": 1.2906,
"step": 2215
},
{
"epoch": 1.8,
"learning_rate": 6.24448616839413e-06,
"loss": 1.307,
"step": 2220
},
{
"epoch": 1.8,
"learning_rate": 6.228280735503254e-06,
"loss": 1.2699,
"step": 2225
},
{
"epoch": 1.8,
"learning_rate": 6.212061559635588e-06,
"loss": 1.24,
"step": 2230
},
{
"epoch": 1.81,
"learning_rate": 6.195828822264107e-06,
"loss": 1.2605,
"step": 2235
},
{
"epoch": 1.81,
"learning_rate": 6.179582705013519e-06,
"loss": 1.3457,
"step": 2240
},
{
"epoch": 1.82,
"learning_rate": 6.163323389658242e-06,
"loss": 1.2301,
"step": 2245
},
{
"epoch": 1.82,
"learning_rate": 6.147051058120359e-06,
"loss": 1.257,
"step": 2250
},
{
"epoch": 1.82,
"learning_rate": 6.130765892467595e-06,
"loss": 1.1584,
"step": 2255
},
{
"epoch": 1.83,
"learning_rate": 6.114468074911265e-06,
"loss": 1.2537,
"step": 2260
},
{
"epoch": 1.83,
"learning_rate": 6.098157787804252e-06,
"loss": 1.2559,
"step": 2265
},
{
"epoch": 1.84,
"learning_rate": 6.081835213638951e-06,
"loss": 1.2307,
"step": 2270
},
{
"epoch": 1.84,
"learning_rate": 6.0655005350452414e-06,
"loss": 1.1664,
"step": 2275
},
{
"epoch": 1.84,
"learning_rate": 6.049153934788429e-06,
"loss": 1.2146,
"step": 2280
},
{
"epoch": 1.85,
"learning_rate": 6.032795595767214e-06,
"loss": 1.2498,
"step": 2285
},
{
"epoch": 1.85,
"learning_rate": 6.016425701011637e-06,
"loss": 1.2379,
"step": 2290
},
{
"epoch": 1.86,
"learning_rate": 6.000044433681034e-06,
"loss": 1.2584,
"step": 2295
},
{
"epoch": 1.86,
"learning_rate": 5.9836519770619865e-06,
"loss": 1.2805,
"step": 2300
},
{
"epoch": 1.86,
"learning_rate": 5.967248514566271e-06,
"loss": 1.2348,
"step": 2305
},
{
"epoch": 1.87,
"learning_rate": 5.9508342297288035e-06,
"loss": 1.2572,
"step": 2310
},
{
"epoch": 1.87,
"learning_rate": 5.934409306205593e-06,
"loss": 1.2018,
"step": 2315
},
{
"epoch": 1.88,
"learning_rate": 5.917973927771678e-06,
"loss": 1.2641,
"step": 2320
},
{
"epoch": 1.88,
"learning_rate": 5.901528278319083e-06,
"loss": 1.2293,
"step": 2325
},
{
"epoch": 1.89,
"learning_rate": 5.885072541854742e-06,
"loss": 1.3113,
"step": 2330
},
{
"epoch": 1.89,
"learning_rate": 5.868606902498457e-06,
"loss": 1.26,
"step": 2335
},
{
"epoch": 1.89,
"learning_rate": 5.852131544480831e-06,
"loss": 1.2092,
"step": 2340
},
{
"epoch": 1.9,
"learning_rate": 5.835646652141208e-06,
"loss": 1.2535,
"step": 2345
},
{
"epoch": 1.9,
"learning_rate": 5.8191524099256035e-06,
"loss": 1.2535,
"step": 2350
},
{
"epoch": 1.91,
"learning_rate": 5.802649002384655e-06,
"loss": 1.2629,
"step": 2355
},
{
"epoch": 1.91,
"learning_rate": 5.786136614171542e-06,
"loss": 1.233,
"step": 2360
},
{
"epoch": 1.91,
"learning_rate": 5.769615430039931e-06,
"loss": 1.2375,
"step": 2365
},
{
"epoch": 1.92,
"learning_rate": 5.753085634841903e-06,
"loss": 1.2312,
"step": 2370
},
{
"epoch": 1.92,
"learning_rate": 5.736547413525888e-06,
"loss": 1.1715,
"step": 2375
},
{
"epoch": 1.93,
"learning_rate": 5.72000095113459e-06,
"loss": 1.2695,
"step": 2380
},
{
"epoch": 1.93,
"learning_rate": 5.703446432802924e-06,
"loss": 1.1672,
"step": 2385
},
{
"epoch": 1.93,
"learning_rate": 5.686884043755942e-06,
"loss": 1.2637,
"step": 2390
},
{
"epoch": 1.94,
"learning_rate": 5.6703139693067554e-06,
"loss": 1.1591,
"step": 2395
},
{
"epoch": 1.94,
"learning_rate": 5.653736394854471e-06,
"loss": 1.2343,
"step": 2400
},
{
"epoch": 1.95,
"learning_rate": 5.637151505882109e-06,
"loss": 1.2172,
"step": 2405
},
{
"epoch": 1.95,
"learning_rate": 5.620559487954531e-06,
"loss": 1.2121,
"step": 2410
},
{
"epoch": 1.95,
"learning_rate": 5.603960526716361e-06,
"loss": 1.178,
"step": 2415
},
{
"epoch": 1.96,
"learning_rate": 5.587354807889913e-06,
"loss": 1.2256,
"step": 2420
},
{
"epoch": 1.96,
"learning_rate": 5.570742517273109e-06,
"loss": 1.241,
"step": 2425
},
{
"epoch": 1.97,
"learning_rate": 5.554123840737402e-06,
"loss": 1.2773,
"step": 2430
},
{
"epoch": 1.97,
"learning_rate": 5.537498964225694e-06,
"loss": 1.2383,
"step": 2435
},
{
"epoch": 1.97,
"learning_rate": 5.520868073750261e-06,
"loss": 1.177,
"step": 2440
},
{
"epoch": 1.98,
"learning_rate": 5.50423135539066e-06,
"loss": 1.1607,
"step": 2445
},
{
"epoch": 1.98,
"learning_rate": 5.487588995291666e-06,
"loss": 1.2957,
"step": 2450
},
{
"epoch": 1.99,
"learning_rate": 5.47094117966117e-06,
"loss": 1.164,
"step": 2455
},
{
"epoch": 1.99,
"learning_rate": 5.454288094768108e-06,
"loss": 1.225,
"step": 2460
},
{
"epoch": 1.99,
"learning_rate": 5.437629926940367e-06,
"loss": 1.2602,
"step": 2465
},
{
"epoch": 2.0,
"learning_rate": 5.420966862562718e-06,
"loss": 1.2434,
"step": 2470
},
{
"epoch": 2.0,
"learning_rate": 5.404299088074702e-06,
"loss": 1.0836,
"step": 2475
},
{
"epoch": 2.01,
"learning_rate": 5.387626789968574e-06,
"loss": 1.0635,
"step": 2480
},
{
"epoch": 2.01,
"learning_rate": 5.370950154787195e-06,
"loss": 1.032,
"step": 2485
},
{
"epoch": 2.01,
"learning_rate": 5.354269369121958e-06,
"loss": 1.0236,
"step": 2490
},
{
"epoch": 2.02,
"learning_rate": 5.337584619610691e-06,
"loss": 1.0402,
"step": 2495
},
{
"epoch": 2.02,
"learning_rate": 5.320896092935575e-06,
"loss": 1.0713,
"step": 2500
},
{
"epoch": 2.03,
"learning_rate": 5.304203975821048e-06,
"loss": 1.0443,
"step": 2505
},
{
"epoch": 2.03,
"learning_rate": 5.287508455031729e-06,
"loss": 1.0523,
"step": 2510
},
{
"epoch": 2.03,
"learning_rate": 5.270809717370314e-06,
"loss": 1.0072,
"step": 2515
},
{
"epoch": 2.04,
"learning_rate": 5.254107949675493e-06,
"loss": 1.0473,
"step": 2520
},
{
"epoch": 2.04,
"learning_rate": 5.237403338819859e-06,
"loss": 1.0189,
"step": 2525
},
{
"epoch": 2.05,
"learning_rate": 5.220696071707816e-06,
"loss": 1.027,
"step": 2530
},
{
"epoch": 2.05,
"learning_rate": 5.20398633527349e-06,
"loss": 0.9773,
"step": 2535
},
{
"epoch": 2.06,
"learning_rate": 5.187274316478632e-06,
"loss": 0.9916,
"step": 2540
},
{
"epoch": 2.06,
"learning_rate": 5.170560202310536e-06,
"loss": 1.0252,
"step": 2545
},
{
"epoch": 2.06,
"learning_rate": 5.153844179779932e-06,
"loss": 1.0508,
"step": 2550
},
{
"epoch": 2.07,
"learning_rate": 5.137126435918912e-06,
"loss": 1.0217,
"step": 2555
},
{
"epoch": 2.07,
"learning_rate": 5.12040715777882e-06,
"loss": 1.0367,
"step": 2560
},
{
"epoch": 2.08,
"learning_rate": 5.1036865324281716e-06,
"loss": 1.0121,
"step": 2565
},
{
"epoch": 2.08,
"learning_rate": 5.08696474695055e-06,
"loss": 0.9992,
"step": 2570
},
{
"epoch": 2.08,
"learning_rate": 5.070241988442528e-06,
"loss": 1.0778,
"step": 2575
},
{
"epoch": 2.09,
"learning_rate": 5.053518444011557e-06,
"loss": 1.0703,
"step": 2580
},
{
"epoch": 2.09,
"learning_rate": 5.036794300773887e-06,
"loss": 1.017,
"step": 2585
},
{
"epoch": 2.1,
"learning_rate": 5.020069745852463e-06,
"loss": 0.9813,
"step": 2590
},
{
"epoch": 2.1,
"learning_rate": 5.003344966374843e-06,
"loss": 1.0287,
"step": 2595
},
{
"epoch": 2.1,
"learning_rate": 4.9866201494710934e-06,
"loss": 1.0617,
"step": 2600
},
{
"epoch": 2.11,
"learning_rate": 4.969895482271695e-06,
"loss": 1.1227,
"step": 2605
},
{
"epoch": 2.11,
"learning_rate": 4.953171151905466e-06,
"loss": 1.0496,
"step": 2610
},
{
"epoch": 2.12,
"learning_rate": 4.936447345497443e-06,
"loss": 1.0287,
"step": 2615
},
{
"epoch": 2.12,
"learning_rate": 4.919724250166808e-06,
"loss": 1.0656,
"step": 2620
},
{
"epoch": 2.12,
"learning_rate": 4.903002053024782e-06,
"loss": 1.0287,
"step": 2625
},
{
"epoch": 2.13,
"learning_rate": 4.886280941172539e-06,
"loss": 1.0293,
"step": 2630
},
{
"epoch": 2.13,
"learning_rate": 4.869561101699113e-06,
"loss": 1.0805,
"step": 2635
},
{
"epoch": 2.14,
"learning_rate": 4.852842721679293e-06,
"loss": 1.0068,
"step": 2640
},
{
"epoch": 2.14,
"learning_rate": 4.836125988171547e-06,
"loss": 1.0056,
"step": 2645
},
{
"epoch": 2.14,
"learning_rate": 4.8194110882159175e-06,
"loss": 1.0256,
"step": 2650
},
{
"epoch": 2.15,
"learning_rate": 4.802698208831929e-06,
"loss": 1.0551,
"step": 2655
},
{
"epoch": 2.15,
"learning_rate": 4.785987537016504e-06,
"loss": 1.002,
"step": 2660
},
{
"epoch": 2.16,
"learning_rate": 4.769279259741858e-06,
"loss": 1.0378,
"step": 2665
},
{
"epoch": 2.16,
"learning_rate": 4.752573563953422e-06,
"loss": 1.0088,
"step": 2670
},
{
"epoch": 2.16,
"learning_rate": 4.735870636567736e-06,
"loss": 0.9963,
"step": 2675
},
{
"epoch": 2.17,
"learning_rate": 4.719170664470371e-06,
"loss": 0.9977,
"step": 2680
},
{
"epoch": 2.17,
"learning_rate": 4.702473834513826e-06,
"loss": 1.0533,
"step": 2685
},
{
"epoch": 2.18,
"learning_rate": 4.685780333515449e-06,
"loss": 1.0148,
"step": 2690
},
{
"epoch": 2.18,
"learning_rate": 4.669090348255338e-06,
"loss": 1.0023,
"step": 2695
},
{
"epoch": 2.18,
"learning_rate": 4.652404065474257e-06,
"loss": 1.0227,
"step": 2700
},
{
"epoch": 2.19,
"learning_rate": 4.6357216718715375e-06,
"loss": 1.0236,
"step": 2705
},
{
"epoch": 2.19,
"learning_rate": 4.619043354103002e-06,
"loss": 1.01,
"step": 2710
},
{
"epoch": 2.2,
"learning_rate": 4.602369298778866e-06,
"loss": 1.0625,
"step": 2715
},
{
"epoch": 2.2,
"learning_rate": 4.585699692461655e-06,
"loss": 1.0154,
"step": 2720
},
{
"epoch": 2.2,
"learning_rate": 4.569034721664114e-06,
"loss": 1.0547,
"step": 2725
},
{
"epoch": 2.21,
"learning_rate": 4.552374572847122e-06,
"loss": 0.981,
"step": 2730
},
{
"epoch": 2.21,
"learning_rate": 4.535719432417612e-06,
"loss": 1.0691,
"step": 2735
},
{
"epoch": 2.22,
"learning_rate": 4.519069486726468e-06,
"loss": 1.0451,
"step": 2740
},
{
"epoch": 2.22,
"learning_rate": 4.502424922066462e-06,
"loss": 0.9773,
"step": 2745
},
{
"epoch": 2.22,
"learning_rate": 4.485785924670151e-06,
"loss": 0.9898,
"step": 2750
},
{
"epoch": 2.23,
"learning_rate": 4.469152680707804e-06,
"loss": 1.0496,
"step": 2755
},
{
"epoch": 2.23,
"learning_rate": 4.452525376285319e-06,
"loss": 1.0211,
"step": 2760
},
{
"epoch": 2.24,
"learning_rate": 4.435904197442131e-06,
"loss": 1.0961,
"step": 2765
},
{
"epoch": 2.24,
"learning_rate": 4.419289330149145e-06,
"loss": 1.0279,
"step": 2770
},
{
"epoch": 2.25,
"learning_rate": 4.4026809603066375e-06,
"loss": 1.0081,
"step": 2775
},
{
"epoch": 2.25,
"learning_rate": 4.386079273742199e-06,
"loss": 1.0764,
"step": 2780
},
{
"epoch": 2.25,
"learning_rate": 4.3694844562086325e-06,
"loss": 1.0342,
"step": 2785
},
{
"epoch": 2.26,
"learning_rate": 4.3528966933818865e-06,
"loss": 1.0707,
"step": 2790
},
{
"epoch": 2.26,
"learning_rate": 4.33631617085898e-06,
"loss": 1.0127,
"step": 2795
},
{
"epoch": 2.27,
"learning_rate": 4.319743074155916e-06,
"loss": 1.0658,
"step": 2800
},
{
"epoch": 2.27,
"learning_rate": 4.3031775887056176e-06,
"loss": 1.0881,
"step": 2805
},
{
"epoch": 2.27,
"learning_rate": 4.2866198998558404e-06,
"loss": 1.043,
"step": 2810
},
{
"epoch": 2.28,
"learning_rate": 4.2700701928671105e-06,
"loss": 1.008,
"step": 2815
},
{
"epoch": 2.28,
"learning_rate": 4.253528652910647e-06,
"loss": 1.0571,
"step": 2820
},
{
"epoch": 2.29,
"learning_rate": 4.236995465066287e-06,
"loss": 1.0859,
"step": 2825
},
{
"epoch": 2.29,
"learning_rate": 4.220470814320417e-06,
"loss": 1.0085,
"step": 2830
},
{
"epoch": 2.29,
"learning_rate": 4.203954885563909e-06,
"loss": 1.0146,
"step": 2835
},
{
"epoch": 2.3,
"learning_rate": 4.187447863590039e-06,
"loss": 1.0562,
"step": 2840
},
{
"epoch": 2.3,
"learning_rate": 4.170949933092432e-06,
"loss": 1.1096,
"step": 2845
},
{
"epoch": 2.31,
"learning_rate": 4.154461278662989e-06,
"loss": 1.0555,
"step": 2850
},
{
"epoch": 2.31,
"learning_rate": 4.137982084789823e-06,
"loss": 0.9902,
"step": 2855
},
{
"epoch": 2.31,
"learning_rate": 4.121512535855193e-06,
"loss": 1.06,
"step": 2860
},
{
"epoch": 2.32,
"learning_rate": 4.105052816133448e-06,
"loss": 1.0412,
"step": 2865
},
{
"epoch": 2.32,
"learning_rate": 4.0886031097889556e-06,
"loss": 1.0354,
"step": 2870
},
{
"epoch": 2.33,
"learning_rate": 4.072163600874045e-06,
"loss": 1.0928,
"step": 2875
},
{
"epoch": 2.33,
"learning_rate": 4.0557344733269505e-06,
"loss": 1.0645,
"step": 2880
},
{
"epoch": 2.33,
"learning_rate": 4.039315910969754e-06,
"loss": 0.9994,
"step": 2885
},
{
"epoch": 2.34,
"learning_rate": 4.02290809750632e-06,
"loss": 1.003,
"step": 2890
},
{
"epoch": 2.34,
"learning_rate": 4.006511216520251e-06,
"loss": 1.0512,
"step": 2895
},
{
"epoch": 2.35,
"learning_rate": 3.9901254514728225e-06,
"loss": 1.06,
"step": 2900
},
{
"epoch": 2.35,
"learning_rate": 3.973750985700943e-06,
"loss": 1.0541,
"step": 2905
},
{
"epoch": 2.35,
"learning_rate": 3.957388002415093e-06,
"loss": 1.0078,
"step": 2910
},
{
"epoch": 2.36,
"learning_rate": 3.941036684697274e-06,
"loss": 1.0104,
"step": 2915
},
{
"epoch": 2.36,
"learning_rate": 3.924697215498971e-06,
"loss": 1.0465,
"step": 2920
},
{
"epoch": 2.37,
"learning_rate": 3.908369777639091e-06,
"loss": 0.9527,
"step": 2925
},
{
"epoch": 2.37,
"learning_rate": 3.892054553801931e-06,
"loss": 1.0559,
"step": 2930
},
{
"epoch": 2.37,
"learning_rate": 3.875751726535124e-06,
"loss": 1.041,
"step": 2935
},
{
"epoch": 2.38,
"learning_rate": 3.8594614782476024e-06,
"loss": 1.0352,
"step": 2940
},
{
"epoch": 2.38,
"learning_rate": 3.843183991207551e-06,
"loss": 1.0175,
"step": 2945
},
{
"epoch": 2.39,
"learning_rate": 3.82691944754038e-06,
"loss": 0.9959,
"step": 2950
},
{
"epoch": 2.39,
"learning_rate": 3.8106680292266717e-06,
"loss": 1.0094,
"step": 2955
},
{
"epoch": 2.39,
"learning_rate": 3.7944299181001544e-06,
"loss": 1.0367,
"step": 2960
},
{
"epoch": 2.4,
"learning_rate": 3.778205295845663e-06,
"loss": 1.0443,
"step": 2965
},
{
"epoch": 2.4,
"learning_rate": 3.7619943439971107e-06,
"loss": 1.0074,
"step": 2970
},
{
"epoch": 2.41,
"learning_rate": 3.7457972439354526e-06,
"loss": 1.0396,
"step": 2975
},
{
"epoch": 2.41,
"learning_rate": 3.7296141768866635e-06,
"loss": 1.0506,
"step": 2980
},
{
"epoch": 2.42,
"learning_rate": 3.7134453239196987e-06,
"loss": 1.0268,
"step": 2985
},
{
"epoch": 2.42,
"learning_rate": 3.6972908659444828e-06,
"loss": 1.0101,
"step": 2990
},
{
"epoch": 2.42,
"learning_rate": 3.6811509837098756e-06,
"loss": 1.0076,
"step": 2995
},
{
"epoch": 2.43,
"learning_rate": 3.6650258578016474e-06,
"loss": 1.0602,
"step": 3000
},
{
"epoch": 2.43,
"learning_rate": 3.6489156686404683e-06,
"loss": 1.0418,
"step": 3005
},
{
"epoch": 2.44,
"learning_rate": 3.6328205964798822e-06,
"loss": 1.0498,
"step": 3010
},
{
"epoch": 2.44,
"learning_rate": 3.616740821404292e-06,
"loss": 1.0277,
"step": 3015
},
{
"epoch": 2.44,
"learning_rate": 3.600676523326946e-06,
"loss": 1.0979,
"step": 3020
},
{
"epoch": 2.45,
"learning_rate": 3.5846278819879197e-06,
"loss": 1.0467,
"step": 3025
},
{
"epoch": 2.45,
"learning_rate": 3.568595076952113e-06,
"loss": 1.0344,
"step": 3030
},
{
"epoch": 2.46,
"learning_rate": 3.552578287607237e-06,
"loss": 0.9874,
"step": 3035
},
{
"epoch": 2.46,
"learning_rate": 3.536577693161801e-06,
"loss": 1.0688,
"step": 3040
},
{
"epoch": 2.46,
"learning_rate": 3.520593472643122e-06,
"loss": 1.0023,
"step": 3045
},
{
"epoch": 2.47,
"learning_rate": 3.504625804895302e-06,
"loss": 1.0315,
"step": 3050
},
{
"epoch": 2.47,
"learning_rate": 3.488674868577246e-06,
"loss": 1.0318,
"step": 3055
},
{
"epoch": 2.48,
"learning_rate": 3.472740842160649e-06,
"loss": 1.057,
"step": 3060
},
{
"epoch": 2.48,
"learning_rate": 3.4568239039280094e-06,
"loss": 1.041,
"step": 3065
},
{
"epoch": 2.48,
"learning_rate": 3.4409242319706225e-06,
"loss": 1.126,
"step": 3070
},
{
"epoch": 2.49,
"learning_rate": 3.4250420041866057e-06,
"loss": 1.0151,
"step": 3075
},
{
"epoch": 2.49,
"learning_rate": 3.4091773982788867e-06,
"loss": 1.0395,
"step": 3080
},
{
"epoch": 2.5,
"learning_rate": 3.393330591753231e-06,
"loss": 1.0207,
"step": 3085
},
{
"epoch": 2.5,
"learning_rate": 3.377501761916249e-06,
"loss": 0.9663,
"step": 3090
},
{
"epoch": 2.5,
"learning_rate": 3.3616910858734143e-06,
"loss": 1.055,
"step": 3095
},
{
"epoch": 2.51,
"learning_rate": 3.3458987405270803e-06,
"loss": 1.017,
"step": 3100
},
{
"epoch": 2.51,
"learning_rate": 3.330124902574505e-06,
"loss": 1.0034,
"step": 3105
},
{
"epoch": 2.52,
"learning_rate": 3.3143697485058666e-06,
"loss": 1.0262,
"step": 3110
},
{
"epoch": 2.52,
"learning_rate": 3.2986334546022964e-06,
"loss": 1.0723,
"step": 3115
},
{
"epoch": 2.52,
"learning_rate": 3.282916196933904e-06,
"loss": 1.0314,
"step": 3120
},
{
"epoch": 2.53,
"learning_rate": 3.2672181513578038e-06,
"loss": 1.0613,
"step": 3125
},
{
"epoch": 2.53,
"learning_rate": 3.251539493516152e-06,
"loss": 1.0641,
"step": 3130
},
{
"epoch": 2.54,
"learning_rate": 3.2358803988341776e-06,
"loss": 1.0283,
"step": 3135
},
{
"epoch": 2.54,
"learning_rate": 3.220241042518223e-06,
"loss": 1.0502,
"step": 3140
},
{
"epoch": 2.54,
"learning_rate": 3.2046215995537837e-06,
"loss": 1.0416,
"step": 3145
},
{
"epoch": 2.55,
"learning_rate": 3.1890222447035444e-06,
"loss": 1.0549,
"step": 3150
},
{
"epoch": 2.55,
"learning_rate": 3.173443152505431e-06,
"loss": 1.034,
"step": 3155
},
{
"epoch": 2.56,
"learning_rate": 3.157884497270658e-06,
"loss": 1.0594,
"step": 3160
},
{
"epoch": 2.56,
"learning_rate": 3.1423464530817673e-06,
"loss": 1.0637,
"step": 3165
},
{
"epoch": 2.56,
"learning_rate": 3.1268291937906957e-06,
"loss": 1.0402,
"step": 3170
},
{
"epoch": 2.57,
"learning_rate": 3.1113328930168153e-06,
"loss": 1.0236,
"step": 3175
},
{
"epoch": 2.57,
"learning_rate": 3.095857724145004e-06,
"loss": 1.0414,
"step": 3180
},
{
"epoch": 2.58,
"learning_rate": 3.0804038603236943e-06,
"loss": 1.0465,
"step": 3185
},
{
"epoch": 2.58,
"learning_rate": 3.0649714744629454e-06,
"loss": 1.0561,
"step": 3190
},
{
"epoch": 2.58,
"learning_rate": 3.0495607392324987e-06,
"loss": 1.0414,
"step": 3195
},
{
"epoch": 2.59,
"learning_rate": 3.0341718270598557e-06,
"loss": 1.0492,
"step": 3200
},
{
"epoch": 2.59,
"learning_rate": 3.0188049101283433e-06,
"loss": 1.0053,
"step": 3205
},
{
"epoch": 2.6,
"learning_rate": 3.003460160375189e-06,
"loss": 1.0193,
"step": 3210
},
{
"epoch": 2.6,
"learning_rate": 2.9881377494895925e-06,
"loss": 1.093,
"step": 3215
},
{
"epoch": 2.61,
"learning_rate": 2.9728378489108135e-06,
"loss": 1.0285,
"step": 3220
},
{
"epoch": 2.61,
"learning_rate": 2.957560629826244e-06,
"loss": 1.0982,
"step": 3225
},
{
"epoch": 2.61,
"learning_rate": 2.942306263169502e-06,
"loss": 1.0438,
"step": 3230
},
{
"epoch": 2.62,
"learning_rate": 2.9270749196185095e-06,
"loss": 1.0695,
"step": 3235
},
{
"epoch": 2.62,
"learning_rate": 2.911866769593592e-06,
"loss": 1.0139,
"step": 3240
},
{
"epoch": 2.63,
"learning_rate": 2.896681983255565e-06,
"loss": 1.1477,
"step": 3245
},
{
"epoch": 2.63,
"learning_rate": 2.881520730503837e-06,
"loss": 1.0437,
"step": 3250
},
{
"epoch": 2.63,
"learning_rate": 2.866383180974498e-06,
"loss": 1.0455,
"step": 3255
},
{
"epoch": 2.64,
"learning_rate": 2.8512695040384287e-06,
"loss": 1.0014,
"step": 3260
},
{
"epoch": 2.64,
"learning_rate": 2.8361798687994097e-06,
"loss": 1.0016,
"step": 3265
},
{
"epoch": 2.65,
"learning_rate": 2.8211144440922176e-06,
"loss": 0.9983,
"step": 3270
},
{
"epoch": 2.65,
"learning_rate": 2.8060733984807466e-06,
"loss": 1.0927,
"step": 3275
},
{
"epoch": 2.65,
"learning_rate": 2.7910569002561137e-06,
"loss": 1.0424,
"step": 3280
},
{
"epoch": 2.66,
"learning_rate": 2.7760651174347854e-06,
"loss": 1.0555,
"step": 3285
},
{
"epoch": 2.66,
"learning_rate": 2.7610982177566926e-06,
"loss": 0.983,
"step": 3290
},
{
"epoch": 2.67,
"learning_rate": 2.7461563686833504e-06,
"loss": 0.9712,
"step": 3295
},
{
"epoch": 2.67,
"learning_rate": 2.7312397373959894e-06,
"loss": 1.04,
"step": 3300
},
{
"epoch": 2.67,
"learning_rate": 2.716348490793681e-06,
"loss": 1.092,
"step": 3305
},
{
"epoch": 2.68,
"learning_rate": 2.7014827954914814e-06,
"loss": 0.9855,
"step": 3310
},
{
"epoch": 2.68,
"learning_rate": 2.686642817818548e-06,
"loss": 1.0319,
"step": 3315
},
{
"epoch": 2.69,
"learning_rate": 2.6718287238162963e-06,
"loss": 0.9938,
"step": 3320
},
{
"epoch": 2.69,
"learning_rate": 2.6570406792365268e-06,
"loss": 1.0662,
"step": 3325
},
{
"epoch": 2.69,
"learning_rate": 2.6422788495395912e-06,
"loss": 1.0263,
"step": 3330
},
{
"epoch": 2.7,
"learning_rate": 2.6275433998925176e-06,
"loss": 1.0584,
"step": 3335
},
{
"epoch": 2.7,
"learning_rate": 2.612834495167177e-06,
"loss": 1.0334,
"step": 3340
},
{
"epoch": 2.71,
"learning_rate": 2.5981522999384323e-06,
"loss": 1.0426,
"step": 3345
},
{
"epoch": 2.71,
"learning_rate": 2.583496978482305e-06,
"loss": 1.0199,
"step": 3350
},
{
"epoch": 2.71,
"learning_rate": 2.568868694774127e-06,
"loss": 1.0363,
"step": 3355
},
{
"epoch": 2.72,
"learning_rate": 2.5542676124867103e-06,
"loss": 0.9959,
"step": 3360
},
{
"epoch": 2.72,
"learning_rate": 2.5396938949885163e-06,
"loss": 1.0357,
"step": 3365
},
{
"epoch": 2.73,
"learning_rate": 2.52514770534183e-06,
"loss": 1.0444,
"step": 3370
},
{
"epoch": 2.73,
"learning_rate": 2.510629206300933e-06,
"loss": 1.0627,
"step": 3375
},
{
"epoch": 2.73,
"learning_rate": 2.4961385603102794e-06,
"loss": 1.0535,
"step": 3380
},
{
"epoch": 2.74,
"learning_rate": 2.481675929502682e-06,
"loss": 1.0276,
"step": 3385
},
{
"epoch": 2.74,
"learning_rate": 2.467241475697498e-06,
"loss": 1.0057,
"step": 3390
},
{
"epoch": 2.75,
"learning_rate": 2.45283536039882e-06,
"loss": 1.0055,
"step": 3395
},
{
"epoch": 2.75,
"learning_rate": 2.438457744793665e-06,
"loss": 1.0001,
"step": 3400
},
{
"epoch": 2.75,
"learning_rate": 2.4241087897501703e-06,
"loss": 1.1129,
"step": 3405
},
{
"epoch": 2.76,
"learning_rate": 2.409788655815802e-06,
"loss": 0.9816,
"step": 3410
},
{
"epoch": 2.76,
"learning_rate": 2.395497503215551e-06,
"loss": 1.008,
"step": 3415
},
{
"epoch": 2.77,
"learning_rate": 2.3812354918501397e-06,
"loss": 1.0068,
"step": 3420
},
{
"epoch": 2.77,
"learning_rate": 2.3670027812942353e-06,
"loss": 1.0779,
"step": 3425
},
{
"epoch": 2.78,
"learning_rate": 2.3527995307946655e-06,
"loss": 1.0264,
"step": 3430
},
{
"epoch": 2.78,
"learning_rate": 2.338625899268638e-06,
"loss": 1.0395,
"step": 3435
},
{
"epoch": 2.78,
"learning_rate": 2.3244820453019566e-06,
"loss": 1.0604,
"step": 3440
},
{
"epoch": 2.79,
"learning_rate": 2.3103681271472516e-06,
"loss": 1.0236,
"step": 3445
},
{
"epoch": 2.79,
"learning_rate": 2.296284302722205e-06,
"loss": 1.0918,
"step": 3450
},
{
"epoch": 2.8,
"learning_rate": 2.28223072960779e-06,
"loss": 1.0504,
"step": 3455
},
{
"epoch": 2.8,
"learning_rate": 2.2682075650465063e-06,
"loss": 1.0361,
"step": 3460
},
{
"epoch": 2.8,
"learning_rate": 2.2542149659406126e-06,
"loss": 1.0268,
"step": 3465
},
{
"epoch": 2.81,
"learning_rate": 2.2402530888503783e-06,
"loss": 1.0434,
"step": 3470
},
{
"epoch": 2.81,
"learning_rate": 2.226322089992336e-06,
"loss": 1.0348,
"step": 3475
},
{
"epoch": 2.82,
"learning_rate": 2.2124221252375215e-06,
"loss": 1.0135,
"step": 3480
},
{
"epoch": 2.82,
"learning_rate": 2.1985533501097407e-06,
"loss": 1.0488,
"step": 3485
},
{
"epoch": 2.82,
"learning_rate": 2.1847159197838213e-06,
"loss": 0.9809,
"step": 3490
},
{
"epoch": 2.83,
"learning_rate": 2.1709099890838846e-06,
"loss": 1.0627,
"step": 3495
},
{
"epoch": 2.83,
"learning_rate": 2.1571357124816107e-06,
"loss": 1.0373,
"step": 3500
},
{
"epoch": 2.84,
"learning_rate": 2.1433932440945028e-06,
"loss": 1.0068,
"step": 3505
},
{
"epoch": 2.84,
"learning_rate": 2.129682737684171e-06,
"loss": 1.0604,
"step": 3510
},
{
"epoch": 2.84,
"learning_rate": 2.11600434665461e-06,
"loss": 1.0337,
"step": 3515
},
{
"epoch": 2.85,
"learning_rate": 2.1023582240504836e-06,
"loss": 1.0668,
"step": 3520
},
{
"epoch": 2.85,
"learning_rate": 2.088744522555409e-06,
"loss": 1.0088,
"step": 3525
},
{
"epoch": 2.86,
"learning_rate": 2.0751633944902487e-06,
"loss": 1.0436,
"step": 3530
},
{
"epoch": 2.86,
"learning_rate": 2.061614991811414e-06,
"loss": 1.0138,
"step": 3535
},
{
"epoch": 2.86,
"learning_rate": 2.0480994661091507e-06,
"loss": 1.1406,
"step": 3540
},
{
"epoch": 2.87,
"learning_rate": 2.0346169686058586e-06,
"loss": 1.0391,
"step": 3545
},
{
"epoch": 2.87,
"learning_rate": 2.0211676501543866e-06,
"loss": 1.0592,
"step": 3550
},
{
"epoch": 2.88,
"learning_rate": 2.00775166123635e-06,
"loss": 0.9783,
"step": 3555
},
{
"epoch": 2.88,
"learning_rate": 1.9943691519604523e-06,
"loss": 1.0473,
"step": 3560
},
{
"epoch": 2.88,
"learning_rate": 1.9810202720607945e-06,
"loss": 1.0555,
"step": 3565
},
{
"epoch": 2.89,
"learning_rate": 1.967705170895208e-06,
"loss": 1.0691,
"step": 3570
},
{
"epoch": 2.89,
"learning_rate": 1.9544239974435797e-06,
"loss": 1.026,
"step": 3575
},
{
"epoch": 2.9,
"learning_rate": 1.9411769003061874e-06,
"loss": 1.0588,
"step": 3580
},
{
"epoch": 2.9,
"learning_rate": 1.9279640277020396e-06,
"loss": 1.0635,
"step": 3585
},
{
"epoch": 2.9,
"learning_rate": 1.9147855274672073e-06,
"loss": 0.9919,
"step": 3590
},
{
"epoch": 2.91,
"learning_rate": 1.9016415470531773e-06,
"loss": 1.0053,
"step": 3595
},
{
"epoch": 2.91,
"learning_rate": 1.8885322335252076e-06,
"loss": 1.0461,
"step": 3600
},
{
"epoch": 2.92,
"learning_rate": 1.8754577335606689e-06,
"loss": 1.0051,
"step": 3605
},
{
"epoch": 2.92,
"learning_rate": 1.8624181934474117e-06,
"loss": 1.0521,
"step": 3610
},
{
"epoch": 2.92,
"learning_rate": 1.8494137590821282e-06,
"loss": 0.9926,
"step": 3615
},
{
"epoch": 2.93,
"learning_rate": 1.8364445759687233e-06,
"loss": 1.0264,
"step": 3620
},
{
"epoch": 2.93,
"learning_rate": 1.823510789216676e-06,
"loss": 1.0475,
"step": 3625
},
{
"epoch": 2.94,
"learning_rate": 1.8106125435394312e-06,
"loss": 1.012,
"step": 3630
},
{
"epoch": 2.94,
"learning_rate": 1.7977499832527655e-06,
"loss": 1.0269,
"step": 3635
},
{
"epoch": 2.94,
"learning_rate": 1.7849232522731797e-06,
"loss": 1.0463,
"step": 3640
},
{
"epoch": 2.95,
"learning_rate": 1.7721324941162933e-06,
"loss": 1.025,
"step": 3645
},
{
"epoch": 2.95,
"learning_rate": 1.7593778518952275e-06,
"loss": 1.0326,
"step": 3650
},
{
"epoch": 2.96,
"learning_rate": 1.7466594683190107e-06,
"loss": 1.0389,
"step": 3655
},
{
"epoch": 2.96,
"learning_rate": 1.7339774856909851e-06,
"loss": 1.0609,
"step": 3660
},
{
"epoch": 2.97,
"learning_rate": 1.7213320459072047e-06,
"loss": 0.9949,
"step": 3665
},
{
"epoch": 2.97,
"learning_rate": 1.7087232904548595e-06,
"loss": 1.0083,
"step": 3670
},
{
"epoch": 2.97,
"learning_rate": 1.69615136041068e-06,
"loss": 1.0377,
"step": 3675
},
{
"epoch": 2.98,
"learning_rate": 1.6836163964393664e-06,
"loss": 1.0514,
"step": 3680
},
{
"epoch": 2.98,
"learning_rate": 1.6711185387920176e-06,
"loss": 0.99,
"step": 3685
},
{
"epoch": 2.99,
"learning_rate": 1.6586579273045529e-06,
"loss": 1.0146,
"step": 3690
},
{
"epoch": 2.99,
"learning_rate": 1.6462347013961526e-06,
"loss": 1.0445,
"step": 3695
},
{
"epoch": 2.99,
"learning_rate": 1.6338490000676987e-06,
"loss": 1.0674,
"step": 3700
},
{
"epoch": 3.0,
"learning_rate": 1.6215009619002197e-06,
"loss": 1.0215,
"step": 3705
},
{
"epoch": 3.0,
"learning_rate": 1.609190725053335e-06,
"loss": 0.9832,
"step": 3710
},
{
"epoch": 3.01,
"learning_rate": 1.5969184272637184e-06,
"loss": 0.9313,
"step": 3715
},
{
"epoch": 3.01,
"learning_rate": 1.5846842058435457e-06,
"loss": 1.0244,
"step": 3720
},
{
"epoch": 3.01,
"learning_rate": 1.5724881976789696e-06,
"loss": 0.9002,
"step": 3725
},
{
"epoch": 3.02,
"learning_rate": 1.5603305392285785e-06,
"loss": 0.957,
"step": 3730
},
{
"epoch": 3.02,
"learning_rate": 1.548211366521875e-06,
"loss": 0.9404,
"step": 3735
},
{
"epoch": 3.03,
"learning_rate": 1.5361308151577526e-06,
"loss": 0.9199,
"step": 3740
},
{
"epoch": 3.03,
"learning_rate": 1.5240890203029813e-06,
"loss": 0.9224,
"step": 3745
},
{
"epoch": 3.03,
"learning_rate": 1.5120861166906869e-06,
"loss": 0.9822,
"step": 3750
},
{
"epoch": 3.04,
"learning_rate": 1.5001222386188573e-06,
"loss": 0.9063,
"step": 3755
},
{
"epoch": 3.04,
"learning_rate": 1.4881975199488247e-06,
"loss": 0.9455,
"step": 3760
},
{
"epoch": 3.05,
"learning_rate": 1.4763120941037757e-06,
"loss": 0.8986,
"step": 3765
},
{
"epoch": 3.05,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.9297,
"step": 3770
},
{
"epoch": 3.05,
"learning_rate": 1.4526596523817066e-06,
"loss": 0.9889,
"step": 3775
},
{
"epoch": 3.06,
"learning_rate": 1.4408929011469175e-06,
"loss": 0.9387,
"step": 3780
},
{
"epoch": 3.06,
"learning_rate": 1.4291659720186218e-06,
"loss": 0.8889,
"step": 3785
},
{
"epoch": 3.07,
"learning_rate": 1.4174789962069808e-06,
"loss": 0.9965,
"step": 3790
},
{
"epoch": 3.07,
"learning_rate": 1.4058321044751255e-06,
"loss": 0.9279,
"step": 3795
},
{
"epoch": 3.07,
"learning_rate": 1.3942254271377004e-06,
"loss": 0.9621,
"step": 3800
},
{
"epoch": 3.08,
"learning_rate": 1.3826590940593926e-06,
"loss": 0.9081,
"step": 3805
},
{
"epoch": 3.08,
"learning_rate": 1.3711332346534916e-06,
"loss": 0.9201,
"step": 3810
},
{
"epoch": 3.09,
"learning_rate": 1.3596479778804312e-06,
"loss": 0.9013,
"step": 3815
},
{
"epoch": 3.09,
"learning_rate": 1.3482034522463522e-06,
"loss": 0.9255,
"step": 3820
},
{
"epoch": 3.09,
"learning_rate": 1.3367997858016619e-06,
"loss": 0.9678,
"step": 3825
},
{
"epoch": 3.1,
"learning_rate": 1.325437106139607e-06,
"loss": 0.9334,
"step": 3830
},
{
"epoch": 3.1,
"learning_rate": 1.3141155403948358e-06,
"loss": 0.9455,
"step": 3835
},
{
"epoch": 3.11,
"learning_rate": 1.3028352152419876e-06,
"loss": 0.9025,
"step": 3840
},
{
"epoch": 3.11,
"learning_rate": 1.291596256894263e-06,
"loss": 0.8933,
"step": 3845
},
{
"epoch": 3.11,
"learning_rate": 1.2803987911020239e-06,
"loss": 0.999,
"step": 3850
},
{
"epoch": 3.12,
"learning_rate": 1.269242943151377e-06,
"loss": 0.8996,
"step": 3855
},
{
"epoch": 3.12,
"learning_rate": 1.2581288378627759e-06,
"loss": 0.9594,
"step": 3860
},
{
"epoch": 3.13,
"learning_rate": 1.2470565995896244e-06,
"loss": 0.9385,
"step": 3865
},
{
"epoch": 3.13,
"learning_rate": 1.236026352216888e-06,
"loss": 0.9508,
"step": 3870
},
{
"epoch": 3.14,
"learning_rate": 1.2250382191597015e-06,
"loss": 0.9479,
"step": 3875
},
{
"epoch": 3.14,
"learning_rate": 1.21409232336199e-06,
"loss": 0.8861,
"step": 3880
},
{
"epoch": 3.14,
"learning_rate": 1.2031887872951004e-06,
"loss": 0.9539,
"step": 3885
},
{
"epoch": 3.15,
"learning_rate": 1.1923277329564192e-06,
"loss": 0.8969,
"step": 3890
},
{
"epoch": 3.15,
"learning_rate": 1.181509281868019e-06,
"loss": 0.9248,
"step": 3895
},
{
"epoch": 3.16,
"learning_rate": 1.1707335550752901e-06,
"loss": 0.8923,
"step": 3900
},
{
"epoch": 3.16,
"learning_rate": 1.1600006731455888e-06,
"loss": 0.8534,
"step": 3905
},
{
"epoch": 3.16,
"learning_rate": 1.1493107561668943e-06,
"loss": 0.9193,
"step": 3910
},
{
"epoch": 3.17,
"learning_rate": 1.1386639237464542e-06,
"loss": 0.9688,
"step": 3915
},
{
"epoch": 3.17,
"learning_rate": 1.1280602950094532e-06,
"loss": 0.8982,
"step": 3920
},
{
"epoch": 3.18,
"learning_rate": 1.1174999885976834e-06,
"loss": 0.9001,
"step": 3925
},
{
"epoch": 3.18,
"learning_rate": 1.106983122668206e-06,
"loss": 0.9189,
"step": 3930
},
{
"epoch": 3.18,
"learning_rate": 1.0965098148920422e-06,
"loss": 0.9842,
"step": 3935
},
{
"epoch": 3.19,
"learning_rate": 1.0860801824528443e-06,
"loss": 0.9438,
"step": 3940
},
{
"epoch": 3.19,
"learning_rate": 1.0756943420455934e-06,
"loss": 0.9412,
"step": 3945
},
{
"epoch": 3.2,
"learning_rate": 1.0653524098752894e-06,
"loss": 0.9695,
"step": 3950
},
{
"epoch": 3.2,
"learning_rate": 1.055054501655654e-06,
"loss": 0.9145,
"step": 3955
},
{
"epoch": 3.2,
"learning_rate": 1.0448007326078336e-06,
"loss": 0.9602,
"step": 3960
},
{
"epoch": 3.21,
"learning_rate": 1.0345912174591071e-06,
"loss": 0.9009,
"step": 3965
},
{
"epoch": 3.21,
"learning_rate": 1.0244260704416104e-06,
"loss": 0.9375,
"step": 3970
},
{
"epoch": 3.22,
"learning_rate": 1.0143054052910534e-06,
"loss": 0.9402,
"step": 3975
},
{
"epoch": 3.22,
"learning_rate": 1.0042293352454446e-06,
"loss": 0.9182,
"step": 3980
},
{
"epoch": 3.22,
"learning_rate": 9.94197973043829e-07,
"loss": 0.909,
"step": 3985
},
{
"epoch": 3.23,
"learning_rate": 9.842114309250222e-07,
"loss": 0.9285,
"step": 3990
},
{
"epoch": 3.23,
"learning_rate": 9.74269820626364e-07,
"loss": 0.9264,
"step": 3995
},
{
"epoch": 3.24,
"learning_rate": 9.643732533824545e-07,
"loss": 0.9205,
"step": 4000
},
{
"epoch": 3.24,
"learning_rate": 9.545218399239186e-07,
"loss": 0.96,
"step": 4005
},
{
"epoch": 3.24,
"learning_rate": 9.447156904761668e-07,
"loss": 0.9473,
"step": 4010
},
{
"epoch": 3.25,
"learning_rate": 9.349549147581571e-07,
"loss": 0.9281,
"step": 4015
},
{
"epoch": 3.25,
"learning_rate": 9.252396219811737e-07,
"loss": 0.9311,
"step": 4020
},
{
"epoch": 3.26,
"learning_rate": 9.155699208475988e-07,
"loss": 0.9789,
"step": 4025
},
{
"epoch": 3.26,
"learning_rate": 9.059459195496989e-07,
"loss": 0.8984,
"step": 4030
},
{
"epoch": 3.26,
"learning_rate": 8.963677257684184e-07,
"loss": 0.9564,
"step": 4035
},
{
"epoch": 3.27,
"learning_rate": 8.868354466721668e-07,
"loss": 0.9293,
"step": 4040
},
{
"epoch": 3.27,
"learning_rate": 8.773491889156254e-07,
"loss": 0.9678,
"step": 4045
},
{
"epoch": 3.28,
"learning_rate": 8.679090586385519e-07,
"loss": 0.9275,
"step": 4050
},
{
"epoch": 3.28,
"learning_rate": 8.585151614645942e-07,
"loss": 0.966,
"step": 4055
},
{
"epoch": 3.28,
"learning_rate": 8.491676025001083e-07,
"loss": 0.9049,
"step": 4060
},
{
"epoch": 3.29,
"learning_rate": 8.398664863329792e-07,
"loss": 0.9385,
"step": 4065
},
{
"epoch": 3.29,
"learning_rate": 8.306119170314553e-07,
"loss": 0.9529,
"step": 4070
},
{
"epoch": 3.3,
"learning_rate": 8.214039981429789e-07,
"loss": 0.9412,
"step": 4075
},
{
"epoch": 3.3,
"learning_rate": 8.122428326930348e-07,
"loss": 0.9852,
"step": 4080
},
{
"epoch": 3.31,
"learning_rate": 8.031285231839908e-07,
"loss": 0.9223,
"step": 4085
},
{
"epoch": 3.31,
"learning_rate": 7.940611715939522e-07,
"loss": 0.9592,
"step": 4090
},
{
"epoch": 3.31,
"learning_rate": 7.850408793756242e-07,
"loss": 0.9758,
"step": 4095
},
{
"epoch": 3.32,
"learning_rate": 7.760677474551759e-07,
"loss": 0.842,
"step": 4100
},
{
"epoch": 3.32,
"learning_rate": 7.67141876231105e-07,
"loss": 0.9406,
"step": 4105
},
{
"epoch": 3.33,
"learning_rate": 7.582633655731231e-07,
"loss": 0.9397,
"step": 4110
},
{
"epoch": 3.33,
"learning_rate": 7.494323148210303e-07,
"loss": 0.9193,
"step": 4115
},
{
"epoch": 3.33,
"learning_rate": 7.406488227836139e-07,
"loss": 0.9529,
"step": 4120
},
{
"epoch": 3.34,
"learning_rate": 7.319129877375314e-07,
"loss": 0.973,
"step": 4125
},
{
"epoch": 3.34,
"learning_rate": 7.232249074262176e-07,
"loss": 0.9596,
"step": 4130
},
{
"epoch": 3.35,
"learning_rate": 7.145846790587891e-07,
"loss": 0.9477,
"step": 4135
},
{
"epoch": 3.35,
"learning_rate": 7.059923993089585e-07,
"loss": 0.9809,
"step": 4140
},
{
"epoch": 3.35,
"learning_rate": 6.974481643139514e-07,
"loss": 0.9863,
"step": 4145
},
{
"epoch": 3.36,
"learning_rate": 6.889520696734297e-07,
"loss": 0.9666,
"step": 4150
},
{
"epoch": 3.36,
"learning_rate": 6.805042104484216e-07,
"loss": 0.9328,
"step": 4155
},
{
"epoch": 3.37,
"learning_rate": 6.721046811602622e-07,
"loss": 0.8867,
"step": 4160
},
{
"epoch": 3.37,
"learning_rate": 6.63753575789532e-07,
"loss": 0.9635,
"step": 4165
},
{
"epoch": 3.37,
"learning_rate": 6.554509877750042e-07,
"loss": 0.9605,
"step": 4170
},
{
"epoch": 3.38,
"learning_rate": 6.471970100126035e-07,
"loss": 0.989,
"step": 4175
},
{
"epoch": 3.38,
"learning_rate": 6.389917348543651e-07,
"loss": 0.9393,
"step": 4180
},
{
"epoch": 3.39,
"learning_rate": 6.308352541074014e-07,
"loss": 0.9385,
"step": 4185
},
{
"epoch": 3.39,
"learning_rate": 6.227276590328713e-07,
"loss": 0.9325,
"step": 4190
},
{
"epoch": 3.39,
"learning_rate": 6.146690403449646e-07,
"loss": 0.9801,
"step": 4195
},
{
"epoch": 3.4,
"learning_rate": 6.066594882098831e-07,
"loss": 0.976,
"step": 4200
},
{
"epoch": 3.4,
"learning_rate": 5.98699092244835e-07,
"loss": 0.9523,
"step": 4205
},
{
"epoch": 3.41,
"learning_rate": 5.907879415170287e-07,
"loss": 0.8773,
"step": 4210
},
{
"epoch": 3.41,
"learning_rate": 5.829261245426793e-07,
"loss": 0.8939,
"step": 4215
},
{
"epoch": 3.41,
"learning_rate": 5.751137292860126e-07,
"loss": 0.9383,
"step": 4220
},
{
"epoch": 3.42,
"learning_rate": 5.673508431582936e-07,
"loss": 0.9797,
"step": 4225
},
{
"epoch": 3.42,
"learning_rate": 5.596375530168329e-07,
"loss": 0.932,
"step": 4230
},
{
"epoch": 3.43,
"learning_rate": 5.519739451640238e-07,
"loss": 0.9015,
"step": 4235
},
{
"epoch": 3.43,
"learning_rate": 5.443601053463743e-07,
"loss": 0.966,
"step": 4240
},
{
"epoch": 3.43,
"learning_rate": 5.367961187535504e-07,
"loss": 0.9252,
"step": 4245
},
{
"epoch": 3.44,
"learning_rate": 5.292820700174189e-07,
"loss": 0.925,
"step": 4250
},
{
"epoch": 3.44,
"learning_rate": 5.218180432111026e-07,
"loss": 0.9445,
"step": 4255
},
{
"epoch": 3.45,
"learning_rate": 5.144041218480389e-07,
"loss": 0.9461,
"step": 4260
},
{
"epoch": 3.45,
"learning_rate": 5.070403888810471e-07,
"loss": 0.926,
"step": 4265
},
{
"epoch": 3.45,
"learning_rate": 4.997269267013993e-07,
"loss": 0.9242,
"step": 4270
},
{
"epoch": 3.46,
"learning_rate": 4.924638171378976e-07,
"loss": 0.9514,
"step": 4275
},
{
"epoch": 3.46,
"learning_rate": 4.852511414559575e-07,
"loss": 0.9877,
"step": 4280
},
{
"epoch": 3.47,
"learning_rate": 4.780889803567018e-07,
"loss": 0.9541,
"step": 4285
},
{
"epoch": 3.47,
"learning_rate": 4.7097741397605754e-07,
"loss": 0.9449,
"step": 4290
},
{
"epoch": 3.47,
"learning_rate": 4.639165218838559e-07,
"loss": 0.9361,
"step": 4295
},
{
"epoch": 3.48,
"learning_rate": 4.569063830829445e-07,
"loss": 0.9908,
"step": 4300
},
{
"epoch": 3.48,
"learning_rate": 4.49947076008303e-07,
"loss": 0.9355,
"step": 4305
},
{
"epoch": 3.49,
"learning_rate": 4.4303867852616755e-07,
"loss": 0.9096,
"step": 4310
},
{
"epoch": 3.49,
"learning_rate": 4.361812679331551e-07,
"loss": 0.9555,
"step": 4315
},
{
"epoch": 3.5,
"learning_rate": 4.2937492095540043e-07,
"loss": 0.9221,
"step": 4320
},
{
"epoch": 3.5,
"learning_rate": 4.2261971374769893e-07,
"loss": 0.9594,
"step": 4325
},
{
"epoch": 3.5,
"learning_rate": 4.159157218926557e-07,
"loss": 0.914,
"step": 4330
},
{
"epoch": 3.51,
"learning_rate": 4.09263020399836e-07,
"loss": 0.9935,
"step": 4335
},
{
"epoch": 3.51,
"learning_rate": 4.02661683704928e-07,
"loss": 0.9467,
"step": 4340
},
{
"epoch": 3.52,
"learning_rate": 3.9611178566890894e-07,
"loss": 0.943,
"step": 4345
},
{
"epoch": 3.52,
"learning_rate": 3.896133995772233e-07,
"loss": 0.9232,
"step": 4350
},
{
"epoch": 3.52,
"learning_rate": 3.8316659813895597e-07,
"loss": 0.9545,
"step": 4355
},
{
"epoch": 3.53,
"learning_rate": 3.767714534860223e-07,
"loss": 0.9242,
"step": 4360
},
{
"epoch": 3.53,
"learning_rate": 3.704280371723601e-07,
"loss": 0.9379,
"step": 4365
},
{
"epoch": 3.54,
"learning_rate": 3.6413642017313233e-07,
"loss": 0.9506,
"step": 4370
},
{
"epoch": 3.54,
"learning_rate": 3.5789667288392784e-07,
"loss": 0.9465,
"step": 4375
},
{
"epoch": 3.54,
"learning_rate": 3.517088651199768e-07,
"loss": 0.9365,
"step": 4380
},
{
"epoch": 3.55,
"learning_rate": 3.455730661153672e-07,
"loss": 0.9195,
"step": 4385
},
{
"epoch": 3.55,
"learning_rate": 3.394893445222752e-07,
"loss": 0.9746,
"step": 4390
},
{
"epoch": 3.56,
"learning_rate": 3.334577684101925e-07,
"loss": 0.9289,
"step": 4395
},
{
"epoch": 3.56,
"learning_rate": 3.2747840526516414e-07,
"loss": 0.9038,
"step": 4400
},
{
"epoch": 3.56,
"learning_rate": 3.215513219890365e-07,
"loss": 0.9098,
"step": 4405
},
{
"epoch": 3.57,
"learning_rate": 3.15676584898707e-07,
"loss": 0.9435,
"step": 4410
},
{
"epoch": 3.57,
"learning_rate": 3.0985425972538343e-07,
"loss": 0.9098,
"step": 4415
},
{
"epoch": 3.58,
"learning_rate": 3.040844116138475e-07,
"loss": 0.9318,
"step": 4420
},
{
"epoch": 3.58,
"learning_rate": 2.9836710512172353e-07,
"loss": 0.9592,
"step": 4425
},
{
"epoch": 3.58,
"learning_rate": 2.9270240421876204e-07,
"loss": 0.9756,
"step": 4430
},
{
"epoch": 3.59,
"learning_rate": 2.8709037228611903e-07,
"loss": 0.9189,
"step": 4435
},
{
"epoch": 3.59,
"learning_rate": 2.815310721156489e-07,
"loss": 0.9139,
"step": 4440
},
{
"epoch": 3.6,
"learning_rate": 2.7602456590920034e-07,
"loss": 0.9127,
"step": 4445
},
{
"epoch": 3.6,
"learning_rate": 2.7057091527792125e-07,
"loss": 0.9602,
"step": 4450
},
{
"epoch": 3.6,
"learning_rate": 2.6517018124157137e-07,
"loss": 0.9787,
"step": 4455
},
{
"epoch": 3.61,
"learning_rate": 2.598224242278369e-07,
"loss": 0.916,
"step": 4460
},
{
"epoch": 3.61,
"learning_rate": 2.545277040716537e-07,
"loss": 0.9846,
"step": 4465
},
{
"epoch": 3.62,
"learning_rate": 2.492860800145408e-07,
"loss": 0.9484,
"step": 4470
},
{
"epoch": 3.62,
"learning_rate": 2.4409761070393614e-07,
"loss": 0.9191,
"step": 4475
},
{
"epoch": 3.62,
"learning_rate": 2.389623541925407e-07,
"loss": 0.9266,
"step": 4480
},
{
"epoch": 3.63,
"learning_rate": 2.3388036793766723e-07,
"loss": 0.9034,
"step": 4485
},
{
"epoch": 3.63,
"learning_rate": 2.2885170880059758e-07,
"loss": 0.896,
"step": 4490
},
{
"epoch": 3.64,
"learning_rate": 2.2387643304595196e-07,
"loss": 0.9574,
"step": 4495
},
{
"epoch": 3.64,
"learning_rate": 2.189545963410511e-07,
"loss": 0.9387,
"step": 4500
},
{
"epoch": 3.64,
"learning_rate": 2.1408625375529845e-07,
"loss": 0.9322,
"step": 4505
},
{
"epoch": 3.65,
"learning_rate": 2.0927145975956297e-07,
"loss": 0.9088,
"step": 4510
},
{
"epoch": 3.65,
"learning_rate": 2.0451026822556952e-07,
"loss": 0.9168,
"step": 4515
},
{
"epoch": 3.66,
"learning_rate": 1.9980273242529825e-07,
"loss": 0.951,
"step": 4520
},
{
"epoch": 3.66,
"learning_rate": 1.951489050303834e-07,
"loss": 0.916,
"step": 4525
},
{
"epoch": 3.67,
"learning_rate": 1.9054883811152837e-07,
"loss": 0.8936,
"step": 4530
},
{
"epoch": 3.67,
"learning_rate": 1.8600258313792142e-07,
"loss": 0.9279,
"step": 4535
},
{
"epoch": 3.67,
"learning_rate": 1.8151019097666146e-07,
"loss": 0.9666,
"step": 4540
},
{
"epoch": 3.68,
"learning_rate": 1.7707171189218663e-07,
"loss": 0.9555,
"step": 4545
},
{
"epoch": 3.68,
"learning_rate": 1.7268719554571157e-07,
"loss": 0.945,
"step": 4550
},
{
"epoch": 3.69,
"learning_rate": 1.683566909946771e-07,
"loss": 0.9357,
"step": 4555
},
{
"epoch": 3.69,
"learning_rate": 1.640802466921926e-07,
"loss": 0.9528,
"step": 4560
},
{
"epoch": 3.69,
"learning_rate": 1.5985791048650223e-07,
"loss": 0.8418,
"step": 4565
},
{
"epoch": 3.7,
"learning_rate": 1.5568972962044405e-07,
"loss": 0.9797,
"step": 4570
},
{
"epoch": 3.7,
"learning_rate": 1.515757507309229e-07,
"loss": 0.9197,
"step": 4575
},
{
"epoch": 3.71,
"learning_rate": 1.4751601984839159e-07,
"loss": 1.0133,
"step": 4580
},
{
"epoch": 3.71,
"learning_rate": 1.4351058239633065e-07,
"loss": 0.9518,
"step": 4585
},
{
"epoch": 3.71,
"learning_rate": 1.3955948319074374e-07,
"loss": 0.881,
"step": 4590
},
{
"epoch": 3.72,
"learning_rate": 1.3566276643965538e-07,
"loss": 0.9238,
"step": 4595
},
{
"epoch": 3.72,
"learning_rate": 1.3182047574261557e-07,
"loss": 0.9002,
"step": 4600
},
{
"epoch": 3.73,
"learning_rate": 1.2803265409021436e-07,
"loss": 0.948,
"step": 4605
},
{
"epoch": 3.73,
"learning_rate": 1.2429934386359643e-07,
"loss": 0.9025,
"step": 4610
},
{
"epoch": 3.73,
"learning_rate": 1.2062058683399048e-07,
"loss": 0.9354,
"step": 4615
},
{
"epoch": 3.74,
"learning_rate": 1.1699642416224233e-07,
"loss": 0.9582,
"step": 4620
},
{
"epoch": 3.74,
"learning_rate": 1.1342689639835036e-07,
"loss": 0.9734,
"step": 4625
},
{
"epoch": 3.75,
"learning_rate": 1.0991204348101692e-07,
"loss": 0.9267,
"step": 4630
},
{
"epoch": 3.75,
"learning_rate": 1.0645190473719647e-07,
"loss": 0.9705,
"step": 4635
},
{
"epoch": 3.75,
"learning_rate": 1.0304651888166039e-07,
"loss": 0.9285,
"step": 4640
},
{
"epoch": 3.76,
"learning_rate": 9.969592401655903e-08,
"loss": 0.9494,
"step": 4645
},
{
"epoch": 3.76,
"learning_rate": 9.640015763100031e-08,
"loss": 0.8965,
"step": 4650
},
{
"epoch": 3.77,
"learning_rate": 9.315925660062619e-08,
"loss": 0.9922,
"step": 4655
},
{
"epoch": 3.77,
"learning_rate": 8.997325718720085e-08,
"loss": 0.9295,
"step": 4660
},
{
"epoch": 3.77,
"learning_rate": 8.684219503820756e-08,
"loss": 0.9564,
"step": 4665
},
{
"epoch": 3.78,
"learning_rate": 8.376610518644746e-08,
"loss": 0.9201,
"step": 4670
},
{
"epoch": 3.78,
"learning_rate": 8.074502204964696e-08,
"loss": 0.9303,
"step": 4675
},
{
"epoch": 3.79,
"learning_rate": 7.777897943007595e-08,
"loss": 0.9636,
"step": 4680
},
{
"epoch": 3.79,
"learning_rate": 7.486801051416525e-08,
"loss": 0.9542,
"step": 4685
},
{
"epoch": 3.79,
"learning_rate": 7.201214787213862e-08,
"loss": 0.9684,
"step": 4690
},
{
"epoch": 3.8,
"learning_rate": 6.921142345764798e-08,
"loss": 0.924,
"step": 4695
},
{
"epoch": 3.8,
"learning_rate": 6.646586860741322e-08,
"loss": 0.9271,
"step": 4700
},
{
"epoch": 3.81,
"learning_rate": 6.377551404087467e-08,
"loss": 0.9333,
"step": 4705
},
{
"epoch": 3.81,
"learning_rate": 6.114038985984894e-08,
"loss": 0.9413,
"step": 4710
},
{
"epoch": 3.81,
"learning_rate": 5.856052554818969e-08,
"loss": 0.9223,
"step": 4715
},
{
"epoch": 3.82,
"learning_rate": 5.603594997145967e-08,
"loss": 0.9301,
"step": 4720
},
{
"epoch": 3.82,
"learning_rate": 5.3566691376609744e-08,
"loss": 0.9072,
"step": 4725
},
{
"epoch": 3.83,
"learning_rate": 5.115277739165703e-08,
"loss": 0.9152,
"step": 4730
},
{
"epoch": 3.83,
"learning_rate": 4.8794235025383386e-08,
"loss": 0.9234,
"step": 4735
},
{
"epoch": 3.83,
"learning_rate": 4.6491090667025176e-08,
"loss": 0.943,
"step": 4740
},
{
"epoch": 3.84,
"learning_rate": 4.4243370085985114e-08,
"loss": 0.8847,
"step": 4745
},
{
"epoch": 3.84,
"learning_rate": 4.2051098431539764e-08,
"loss": 1.0156,
"step": 4750
},
{
"epoch": 3.85,
"learning_rate": 3.991430023255804e-08,
"loss": 0.866,
"step": 4755
},
{
"epoch": 3.85,
"learning_rate": 3.783299939722984e-08,
"loss": 0.9083,
"step": 4760
},
{
"epoch": 3.86,
"learning_rate": 3.580721921279562e-08,
"loss": 0.9077,
"step": 4765
},
{
"epoch": 3.86,
"learning_rate": 3.383698234528665e-08,
"loss": 0.9351,
"step": 4770
},
{
"epoch": 3.86,
"learning_rate": 3.1922310839272444e-08,
"loss": 0.9322,
"step": 4775
},
{
"epoch": 3.87,
"learning_rate": 3.006322611761314e-08,
"loss": 0.9379,
"step": 4780
},
{
"epoch": 3.87,
"learning_rate": 2.8259748981219194e-08,
"loss": 0.9136,
"step": 4785
},
{
"epoch": 3.88,
"learning_rate": 2.651189960882039e-08,
"loss": 0.9764,
"step": 4790
},
{
"epoch": 3.88,
"learning_rate": 2.4819697556737742e-08,
"loss": 0.9348,
"step": 4795
},
{
"epoch": 3.88,
"learning_rate": 2.318316175866697e-08,
"loss": 0.9345,
"step": 4800
},
{
"epoch": 3.89,
"learning_rate": 2.1602310525466464e-08,
"loss": 0.8879,
"step": 4805
},
{
"epoch": 3.89,
"learning_rate": 2.007716154494965e-08,
"loss": 0.9619,
"step": 4810
},
{
"epoch": 3.9,
"learning_rate": 1.8607731881690737e-08,
"loss": 0.9516,
"step": 4815
},
{
"epoch": 3.9,
"learning_rate": 1.7194037976831502e-08,
"loss": 0.9471,
"step": 4820
},
{
"epoch": 3.9,
"learning_rate": 1.583609564789812e-08,
"loss": 0.9197,
"step": 4825
},
{
"epoch": 3.91,
"learning_rate": 1.4533920088623533e-08,
"loss": 0.8611,
"step": 4830
},
{
"epoch": 3.91,
"learning_rate": 1.3287525868778128e-08,
"loss": 0.9449,
"step": 4835
},
{
"epoch": 3.92,
"learning_rate": 1.2096926934007103e-08,
"loss": 0.9418,
"step": 4840
},
{
"epoch": 3.92,
"learning_rate": 1.0962136605673357e-08,
"loss": 0.9337,
"step": 4845
},
{
"epoch": 3.92,
"learning_rate": 9.883167580709285e-09,
"loss": 0.9118,
"step": 4850
},
{
"epoch": 3.93,
"learning_rate": 8.860031931473555e-09,
"loss": 0.9563,
"step": 4855
},
{
"epoch": 3.93,
"learning_rate": 7.892741105617329e-09,
"loss": 0.9342,
"step": 4860
},
{
"epoch": 3.94,
"learning_rate": 6.981305925956583e-09,
"loss": 0.9553,
"step": 4865
},
{
"epoch": 3.94,
"learning_rate": 6.1257365903488745e-09,
"loss": 0.9455,
"step": 4870
},
{
"epoch": 3.94,
"learning_rate": 5.326042671580655e-09,
"loss": 0.8813,
"step": 4875
},
{
"epoch": 3.95,
"learning_rate": 4.582233117260693e-09,
"loss": 0.8929,
"step": 4880
},
{
"epoch": 3.95,
"learning_rate": 3.894316249717922e-09,
"loss": 0.9463,
"step": 4885
},
{
"epoch": 3.96,
"learning_rate": 3.2622997659120802e-09,
"loss": 0.9428,
"step": 4890
},
{
"epoch": 3.96,
"learning_rate": 2.6861907373432193e-09,
"loss": 0.866,
"step": 4895
},
{
"epoch": 3.96,
"learning_rate": 2.165995609973992e-09,
"loss": 0.94,
"step": 4900
},
{
"epoch": 3.97,
"learning_rate": 1.7017202041602621e-09,
"loss": 0.9525,
"step": 4905
},
{
"epoch": 3.97,
"learning_rate": 1.293369714582271e-09,
"loss": 0.9548,
"step": 4910
},
{
"epoch": 3.98,
"learning_rate": 9.409487101880167e-10,
"loss": 0.9668,
"step": 4915
},
{
"epoch": 3.98,
"learning_rate": 6.444611341432927e-10,
"loss": 0.9349,
"step": 4920
},
{
"epoch": 3.98,
"learning_rate": 4.0391030378561513e-10,
"loss": 0.974,
"step": 4925
},
{
"epoch": 3.99,
"learning_rate": 2.1929891058758424e-10,
"loss": 0.9563,
"step": 4930
},
{
"epoch": 3.99,
"learning_rate": 9.0629020127464e-11,
"loss": 0.9373,
"step": 4935
},
{
"epoch": 4.0,
"learning_rate": 1.790207206586736e-11,
"loss": 0.9326,
"step": 4940
},
{
"epoch": 4.0,
"step": 4944,
"total_flos": 2.446826463366742e+18,
"train_loss": 1.1695684537918436,
"train_runtime": 57751.42,
"train_samples_per_second": 5.478,
"train_steps_per_second": 0.086
}
],
"max_steps": 4944,
"num_train_epochs": 4,
"total_flos": 2.446826463366742e+18,
"trial_name": null,
"trial_params": null
}