SUS-Chat-34B-roleplay-lora / trainer_state.json
zgce's picture
Upload 11 files
6d6a1ac
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5184,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0002999993113941634,
"loss": 1.412,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 0.00029999724558297603,
"loss": 1.2243,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.0002999946013584945,
"loss": 1.6929,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 0.00029999005663396985,
"loss": 2.0793,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 0.00029998413478906613,
"loss": 2.4653,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 0.00029997683587815427,
"loss": 2.1023,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00029996815996824846,
"loss": 1.4915,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 0.000299958107139006,
"loss": 1.6685,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00029995370049014505,
"loss": 2.2307,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 0.0002999490735552899,
"loss": 1.7744,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 0.0002999365425123301,
"loss": 2.0726,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 0.00029992263484032635,
"loss": 1.5018,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 0.0002999073506669708,
"loss": 1.887,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 0.00029989069013259374,
"loss": 1.4459,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 0.00029987265339016236,
"loss": 1.906,
"step": 75
},
{
"epoch": 0.02,
"learning_rate": 0.0002998532406052794,
"loss": 1.8526,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.00029983245195618156,
"loss": 2.3401,
"step": 85
},
{
"epoch": 0.02,
"learning_rate": 0.000299810287633738,
"loss": 1.7854,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 0.00029978674784144856,
"loss": 2.0329,
"step": 95
},
{
"epoch": 0.02,
"learning_rate": 0.00029976183279544166,
"loss": 1.5686,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 0.0002997355427244727,
"loss": 1.3467,
"step": 105
},
{
"epoch": 0.02,
"learning_rate": 0.00029970787786992147,
"loss": 1.7594,
"step": 110
},
{
"epoch": 0.02,
"learning_rate": 0.0002996788384857905,
"loss": 1.1309,
"step": 115
},
{
"epoch": 0.02,
"learning_rate": 0.00029964842483870226,
"loss": 1.845,
"step": 120
},
{
"epoch": 0.02,
"learning_rate": 0.00029961663720789696,
"loss": 1.7158,
"step": 125
},
{
"epoch": 0.03,
"learning_rate": 0.00029958347588522984,
"loss": 1.6382,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 0.0002995489411751688,
"loss": 1.2599,
"step": 135
},
{
"epoch": 0.03,
"learning_rate": 0.00029951303339479106,
"loss": 1.4989,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.0002994757528737808,
"loss": 2.1127,
"step": 145
},
{
"epoch": 0.03,
"learning_rate": 0.00029943709995442576,
"loss": 1.2286,
"step": 150
},
{
"epoch": 0.03,
"learning_rate": 0.0002993970749916143,
"loss": 1.719,
"step": 155
},
{
"epoch": 0.03,
"learning_rate": 0.00029935567835283203,
"loss": 1.3937,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 0.00029931291041815865,
"loss": 1.3153,
"step": 165
},
{
"epoch": 0.03,
"learning_rate": 0.000299268771580264,
"loss": 1.673,
"step": 170
},
{
"epoch": 0.03,
"learning_rate": 0.00029922326224440495,
"loss": 1.2415,
"step": 175
},
{
"epoch": 0.03,
"learning_rate": 0.0002991763828284214,
"loss": 1.9943,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.0002991281337627326,
"loss": 1.383,
"step": 185
},
{
"epoch": 0.04,
"learning_rate": 0.000299078515490333,
"loss": 1.5981,
"step": 190
},
{
"epoch": 0.04,
"learning_rate": 0.0002990275284667884,
"loss": 1.1489,
"step": 195
},
{
"epoch": 0.04,
"learning_rate": 0.0002989751731602316,
"loss": 1.6789,
"step": 200
},
{
"epoch": 0.04,
"learning_rate": 0.0002989214500513582,
"loss": 1.9486,
"step": 205
},
{
"epoch": 0.04,
"learning_rate": 0.00029886635963342217,
"loss": 1.4526,
"step": 210
},
{
"epoch": 0.04,
"learning_rate": 0.0002988099024122313,
"loss": 1.7287,
"step": 215
},
{
"epoch": 0.04,
"learning_rate": 0.0002987520789061425,
"loss": 1.1696,
"step": 220
},
{
"epoch": 0.04,
"learning_rate": 0.0002986928896460572,
"loss": 1.4302,
"step": 225
},
{
"epoch": 0.04,
"learning_rate": 0.00029863233517541635,
"loss": 2.153,
"step": 230
},
{
"epoch": 0.05,
"learning_rate": 0.00029857041605019534,
"loss": 1.6862,
"step": 235
},
{
"epoch": 0.05,
"learning_rate": 0.0002985071328388992,
"loss": 1.3822,
"step": 240
},
{
"epoch": 0.05,
"learning_rate": 0.0002984424861225571,
"loss": 1.858,
"step": 245
},
{
"epoch": 0.05,
"learning_rate": 0.00029837647649471715,
"loss": 1.6337,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 0.0002983091045614408,
"loss": 2.4304,
"step": 255
},
{
"epoch": 0.05,
"learning_rate": 0.0002982403709412975,
"loss": 1.7602,
"step": 260
},
{
"epoch": 0.05,
"learning_rate": 0.00029817027626535895,
"loss": 1.3822,
"step": 265
},
{
"epoch": 0.05,
"learning_rate": 0.00029809882117719314,
"loss": 1.3085,
"step": 270
},
{
"epoch": 0.05,
"learning_rate": 0.0002980260063328586,
"loss": 1.6656,
"step": 275
},
{
"epoch": 0.05,
"learning_rate": 0.00029795183240089834,
"loss": 1.3397,
"step": 280
},
{
"epoch": 0.05,
"learning_rate": 0.00029787630006233374,
"loss": 1.8768,
"step": 285
},
{
"epoch": 0.06,
"learning_rate": 0.0002977994100106583,
"loss": 1.3506,
"step": 290
},
{
"epoch": 0.06,
"learning_rate": 0.0002977211629518312,
"loss": 1.976,
"step": 295
},
{
"epoch": 0.06,
"learning_rate": 0.00029764155960427076,
"loss": 1.5258,
"step": 300
},
{
"epoch": 0.06,
"learning_rate": 0.00029756060069884813,
"loss": 1.1524,
"step": 305
},
{
"epoch": 0.06,
"learning_rate": 0.0002974782869788803,
"loss": 1.7161,
"step": 310
},
{
"epoch": 0.06,
"learning_rate": 0.00029739461920012334,
"loss": 2.1702,
"step": 315
},
{
"epoch": 0.06,
"learning_rate": 0.0002973095981307655,
"loss": 2.1052,
"step": 320
},
{
"epoch": 0.06,
"learning_rate": 0.0002972232245514203,
"loss": 1.429,
"step": 325
},
{
"epoch": 0.06,
"learning_rate": 0.000297135499255119,
"loss": 1.3791,
"step": 330
},
{
"epoch": 0.06,
"learning_rate": 0.0002970464230473036,
"loss": 1.6739,
"step": 335
},
{
"epoch": 0.07,
"learning_rate": 0.0002969559967458194,
"loss": 1.7643,
"step": 340
},
{
"epoch": 0.07,
"learning_rate": 0.00029686422118090745,
"loss": 1.7344,
"step": 345
},
{
"epoch": 0.07,
"learning_rate": 0.00029677109719519697,
"loss": 1.6696,
"step": 350
},
{
"epoch": 0.07,
"learning_rate": 0.00029667662564369755,
"loss": 1.7149,
"step": 355
},
{
"epoch": 0.07,
"learning_rate": 0.0002965808073937914,
"loss": 0.9939,
"step": 360
},
{
"epoch": 0.07,
"learning_rate": 0.0002964836433252251,
"loss": 1.7887,
"step": 365
},
{
"epoch": 0.07,
"learning_rate": 0.00029638513433010214,
"loss": 1.5627,
"step": 370
},
{
"epoch": 0.07,
"learning_rate": 0.00029628528131287396,
"loss": 1.4555,
"step": 375
},
{
"epoch": 0.07,
"learning_rate": 0.0002961840851903322,
"loss": 1.8208,
"step": 380
},
{
"epoch": 0.07,
"learning_rate": 0.0002960815468916,
"loss": 1.4685,
"step": 385
},
{
"epoch": 0.08,
"learning_rate": 0.00029597766735812373,
"loss": 1.3059,
"step": 390
},
{
"epoch": 0.08,
"learning_rate": 0.00029587244754366416,
"loss": 1.9683,
"step": 395
},
{
"epoch": 0.08,
"learning_rate": 0.00029576588841428753,
"loss": 1.6673,
"step": 400
},
{
"epoch": 0.08,
"learning_rate": 0.00029565799094835707,
"loss": 1.9922,
"step": 405
},
{
"epoch": 0.08,
"learning_rate": 0.00029554875613652375,
"loss": 1.4377,
"step": 410
},
{
"epoch": 0.08,
"learning_rate": 0.00029543818498171736,
"loss": 2.0199,
"step": 415
},
{
"epoch": 0.08,
"learning_rate": 0.0002953262784991371,
"loss": 1.3798,
"step": 420
},
{
"epoch": 0.08,
"learning_rate": 0.0002952130377162424,
"loss": 1.466,
"step": 425
},
{
"epoch": 0.08,
"learning_rate": 0.00029509846367274336,
"loss": 1.548,
"step": 430
},
{
"epoch": 0.08,
"learning_rate": 0.00029498255742059157,
"loss": 1.7984,
"step": 435
},
{
"epoch": 0.08,
"learning_rate": 0.0002948653200239698,
"loss": 1.4208,
"step": 440
},
{
"epoch": 0.09,
"learning_rate": 0.0002947467525592829,
"loss": 1.6134,
"step": 445
},
{
"epoch": 0.09,
"learning_rate": 0.00029462685611514745,
"loss": 1.4112,
"step": 450
},
{
"epoch": 0.09,
"learning_rate": 0.000294505631792382,
"loss": 2.0775,
"step": 455
},
{
"epoch": 0.09,
"learning_rate": 0.000294383080703997,
"loss": 1.5725,
"step": 460
},
{
"epoch": 0.09,
"learning_rate": 0.0002942592039751843,
"loss": 1.5205,
"step": 465
},
{
"epoch": 0.09,
"learning_rate": 0.00029413400274330703,
"loss": 2.3163,
"step": 470
},
{
"epoch": 0.09,
"learning_rate": 0.0002940074781578893,
"loss": 1.8447,
"step": 475
},
{
"epoch": 0.09,
"learning_rate": 0.0002938796313806052,
"loss": 1.903,
"step": 480
},
{
"epoch": 0.09,
"learning_rate": 0.0002937504635852687,
"loss": 1.4324,
"step": 485
},
{
"epoch": 0.09,
"learning_rate": 0.0002936199759578224,
"loss": 1.2384,
"step": 490
},
{
"epoch": 0.1,
"learning_rate": 0.0002934881696963268,
"loss": 1.9072,
"step": 495
},
{
"epoch": 0.1,
"learning_rate": 0.00029335504601094947,
"loss": 1.5876,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 0.00029322060612395356,
"loss": 2.3766,
"step": 505
},
{
"epoch": 0.1,
"learning_rate": 0.0002930848512696871,
"loss": 1.5821,
"step": 510
},
{
"epoch": 0.1,
"learning_rate": 0.0002929477826945711,
"loss": 1.9367,
"step": 515
},
{
"epoch": 0.1,
"learning_rate": 0.0002928094016570886,
"loss": 2.2331,
"step": 520
},
{
"epoch": 0.1,
"learning_rate": 0.0002926697094277728,
"loss": 1.7781,
"step": 525
},
{
"epoch": 0.1,
"learning_rate": 0.0002925287072891954,
"loss": 1.7931,
"step": 530
},
{
"epoch": 0.1,
"learning_rate": 0.00029238639653595506,
"loss": 1.7712,
"step": 535
},
{
"epoch": 0.1,
"learning_rate": 0.00029224277847466533,
"loss": 2.0733,
"step": 540
},
{
"epoch": 0.11,
"learning_rate": 0.0002920978544239427,
"loss": 1.6243,
"step": 545
},
{
"epoch": 0.11,
"learning_rate": 0.0002919516257143944,
"loss": 1.831,
"step": 550
},
{
"epoch": 0.11,
"learning_rate": 0.00029180409368860635,
"loss": 1.9509,
"step": 555
},
{
"epoch": 0.11,
"learning_rate": 0.0002916552597011308,
"loss": 1.1314,
"step": 560
},
{
"epoch": 0.11,
"learning_rate": 0.00029150512511847375,
"loss": 1.4511,
"step": 565
},
{
"epoch": 0.11,
"learning_rate": 0.00029135369131908255,
"loss": 1.6977,
"step": 570
},
{
"epoch": 0.11,
"learning_rate": 0.0002912009596933331,
"loss": 1.3039,
"step": 575
},
{
"epoch": 0.11,
"learning_rate": 0.00029104693164351726,
"loss": 1.2062,
"step": 580
},
{
"epoch": 0.11,
"learning_rate": 0.00029089160858382997,
"loss": 1.5104,
"step": 585
},
{
"epoch": 0.11,
"learning_rate": 0.0002907349919403561,
"loss": 2.2833,
"step": 590
},
{
"epoch": 0.11,
"learning_rate": 0.0002905770831510573,
"loss": 1.5332,
"step": 595
},
{
"epoch": 0.12,
"learning_rate": 0.00029041788366575933,
"loss": 1.9756,
"step": 600
},
{
"epoch": 0.12,
"learning_rate": 0.0002902573949461379,
"loss": 1.6587,
"step": 605
},
{
"epoch": 0.12,
"learning_rate": 0.00029009561846570604,
"loss": 1.5348,
"step": 610
},
{
"epoch": 0.12,
"learning_rate": 0.0002899325557098001,
"loss": 1.9013,
"step": 615
},
{
"epoch": 0.12,
"learning_rate": 0.00028976820817556623,
"loss": 1.3699,
"step": 620
},
{
"epoch": 0.12,
"learning_rate": 0.0002896025773719469,
"loss": 1.9326,
"step": 625
},
{
"epoch": 0.12,
"learning_rate": 0.0002894356648196664,
"loss": 1.6738,
"step": 630
},
{
"epoch": 0.12,
"learning_rate": 0.00028926747205121757,
"loss": 1.9836,
"step": 635
},
{
"epoch": 0.12,
"learning_rate": 0.0002890980006108475,
"loss": 2.0351,
"step": 640
},
{
"epoch": 0.12,
"learning_rate": 0.00028892725205454295,
"loss": 2.0746,
"step": 645
},
{
"epoch": 0.13,
"learning_rate": 0.0002887552279500168,
"loss": 1.9237,
"step": 650
},
{
"epoch": 0.13,
"learning_rate": 0.000288581929876693,
"loss": 1.6076,
"step": 655
},
{
"epoch": 0.13,
"learning_rate": 0.0002884073594256924,
"loss": 1.5154,
"step": 660
},
{
"epoch": 0.13,
"learning_rate": 0.00028823151819981814,
"loss": 2.1754,
"step": 665
},
{
"epoch": 0.13,
"learning_rate": 0.00028805440781354074,
"loss": 1.6397,
"step": 670
},
{
"epoch": 0.13,
"learning_rate": 0.00028787602989298355,
"loss": 1.6161,
"step": 675
},
{
"epoch": 0.13,
"learning_rate": 0.00028769638607590756,
"loss": 1.8723,
"step": 680
},
{
"epoch": 0.13,
"learning_rate": 0.0002875154780116965,
"loss": 2.3238,
"step": 685
},
{
"epoch": 0.13,
"learning_rate": 0.00028733330736134175,
"loss": 1.5394,
"step": 690
},
{
"epoch": 0.13,
"learning_rate": 0.0002871498757974269,
"loss": 1.5529,
"step": 695
},
{
"epoch": 0.14,
"learning_rate": 0.00028696518500411254,
"loss": 1.3396,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 0.00028677923667712085,
"loss": 1.4043,
"step": 705
},
{
"epoch": 0.14,
"learning_rate": 0.0002865920325237198,
"loss": 2.2666,
"step": 710
},
{
"epoch": 0.14,
"learning_rate": 0.00028640357426270777,
"loss": 1.4279,
"step": 715
},
{
"epoch": 0.14,
"learning_rate": 0.0002862138636243974,
"loss": 1.2806,
"step": 720
},
{
"epoch": 0.14,
"learning_rate": 0.00028602290235060025,
"loss": 1.6162,
"step": 725
},
{
"epoch": 0.14,
"learning_rate": 0.00028583069219461016,
"loss": 1.4642,
"step": 730
},
{
"epoch": 0.14,
"learning_rate": 0.00028563723492118764,
"loss": 1.639,
"step": 735
},
{
"epoch": 0.14,
"learning_rate": 0.0002854425323065435,
"loss": 1.9078,
"step": 740
},
{
"epoch": 0.14,
"learning_rate": 0.0002852465861383224,
"loss": 1.5942,
"step": 745
},
{
"epoch": 0.14,
"learning_rate": 0.0002850493982155867,
"loss": 1.6403,
"step": 750
},
{
"epoch": 0.15,
"learning_rate": 0.0002848509703487999,
"loss": 1.7215,
"step": 755
},
{
"epoch": 0.15,
"learning_rate": 0.0002846513043598097,
"loss": 0.7587,
"step": 760
},
{
"epoch": 0.15,
"learning_rate": 0.00028445040208183164,
"loss": 1.7533,
"step": 765
},
{
"epoch": 0.15,
"learning_rate": 0.00028424826535943226,
"loss": 1.5318,
"step": 770
},
{
"epoch": 0.15,
"learning_rate": 0.0002840448960485118,
"loss": 1.5995,
"step": 775
},
{
"epoch": 0.15,
"learning_rate": 0.0002838402960162876,
"loss": 1.7403,
"step": 780
},
{
"epoch": 0.15,
"learning_rate": 0.0002836344671412766,
"loss": 1.3032,
"step": 785
},
{
"epoch": 0.15,
"learning_rate": 0.0002834274113132784,
"loss": 1.2807,
"step": 790
},
{
"epoch": 0.15,
"learning_rate": 0.0002832191304333576,
"loss": 1.5225,
"step": 795
},
{
"epoch": 0.15,
"learning_rate": 0.0002830096264138267,
"loss": 1.3622,
"step": 800
},
{
"epoch": 0.16,
"learning_rate": 0.0002827989011782282,
"loss": 1.4411,
"step": 805
},
{
"epoch": 0.16,
"learning_rate": 0.0002825869566613171,
"loss": 1.5546,
"step": 810
},
{
"epoch": 0.16,
"learning_rate": 0.00028237379480904317,
"loss": 1.7998,
"step": 815
},
{
"epoch": 0.16,
"learning_rate": 0.00028215941757853304,
"loss": 1.5628,
"step": 820
},
{
"epoch": 0.16,
"learning_rate": 0.00028194382693807216,
"loss": 1.8731,
"step": 825
},
{
"epoch": 0.16,
"learning_rate": 0.0002817270248670869,
"loss": 1.6162,
"step": 830
},
{
"epoch": 0.16,
"learning_rate": 0.00028150901335612615,
"loss": 1.2305,
"step": 835
},
{
"epoch": 0.16,
"learning_rate": 0.00028128979440684326,
"loss": 1.1375,
"step": 840
},
{
"epoch": 0.16,
"learning_rate": 0.0002810693700319775,
"loss": 1.5766,
"step": 845
},
{
"epoch": 0.16,
"learning_rate": 0.0002808477422553358,
"loss": 1.7187,
"step": 850
},
{
"epoch": 0.16,
"learning_rate": 0.0002806249131117738,
"loss": 1.471,
"step": 855
},
{
"epoch": 0.17,
"learning_rate": 0.0002804008846471775,
"loss": 1.5664,
"step": 860
},
{
"epoch": 0.17,
"learning_rate": 0.00028017565891844435,
"loss": 1.1007,
"step": 865
},
{
"epoch": 0.17,
"learning_rate": 0.00027994923799346433,
"loss": 1.8754,
"step": 870
},
{
"epoch": 0.17,
"learning_rate": 0.0002797216239511011,
"loss": 1.4075,
"step": 875
},
{
"epoch": 0.17,
"learning_rate": 0.0002794928188811727,
"loss": 1.9204,
"step": 880
},
{
"epoch": 0.17,
"learning_rate": 0.0002792628248844327,
"loss": 1.5619,
"step": 885
},
{
"epoch": 0.17,
"learning_rate": 0.0002790316440725504,
"loss": 1.0522,
"step": 890
},
{
"epoch": 0.17,
"learning_rate": 0.00027879927856809197,
"loss": 1.0902,
"step": 895
},
{
"epoch": 0.17,
"learning_rate": 0.0002785657305045005,
"loss": 1.1806,
"step": 900
},
{
"epoch": 0.17,
"learning_rate": 0.000278331002026077,
"loss": 1.4444,
"step": 905
},
{
"epoch": 0.18,
"learning_rate": 0.00027809509528796,
"loss": 1.9458,
"step": 910
},
{
"epoch": 0.18,
"learning_rate": 0.0002778580124561063,
"loss": 1.3607,
"step": 915
},
{
"epoch": 0.18,
"learning_rate": 0.00027761975570727085,
"loss": 1.4185,
"step": 920
},
{
"epoch": 0.18,
"learning_rate": 0.00027738032722898683,
"loss": 1.6605,
"step": 925
},
{
"epoch": 0.18,
"learning_rate": 0.0002771397292195455,
"loss": 1.6314,
"step": 930
},
{
"epoch": 0.18,
"learning_rate": 0.0002768979638879761,
"loss": 1.5514,
"step": 935
},
{
"epoch": 0.18,
"learning_rate": 0.0002766550334540257,
"loss": 1.9447,
"step": 940
},
{
"epoch": 0.18,
"learning_rate": 0.00027641094014813826,
"loss": 2.1207,
"step": 945
},
{
"epoch": 0.18,
"learning_rate": 0.00027616568621143493,
"loss": 1.5794,
"step": 950
},
{
"epoch": 0.18,
"learning_rate": 0.000275919273895693,
"loss": 1.5328,
"step": 955
},
{
"epoch": 0.19,
"learning_rate": 0.00027567170546332503,
"loss": 1.6665,
"step": 960
},
{
"epoch": 0.19,
"learning_rate": 0.0002754229831873588,
"loss": 2.2289,
"step": 965
},
{
"epoch": 0.19,
"learning_rate": 0.00027517310935141565,
"loss": 1.4299,
"step": 970
},
{
"epoch": 0.19,
"learning_rate": 0.0002749220862496901,
"loss": 1.7164,
"step": 975
},
{
"epoch": 0.19,
"learning_rate": 0.00027466991618692836,
"loss": 1.1377,
"step": 980
},
{
"epoch": 0.19,
"learning_rate": 0.00027441660147840754,
"loss": 1.8367,
"step": 985
},
{
"epoch": 0.19,
"learning_rate": 0.00027416214444991416,
"loss": 1.7409,
"step": 990
},
{
"epoch": 0.19,
"learning_rate": 0.00027390654743772273,
"loss": 1.5072,
"step": 995
},
{
"epoch": 0.19,
"learning_rate": 0.0002736498127885745,
"loss": 1.8343,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 0.0002733919428596559,
"loss": 1.6706,
"step": 1005
},
{
"epoch": 0.19,
"learning_rate": 0.00027313294001857685,
"loss": 2.049,
"step": 1010
},
{
"epoch": 0.2,
"learning_rate": 0.00027287280664334875,
"loss": 1.992,
"step": 1015
},
{
"epoch": 0.2,
"learning_rate": 0.0002726115451223632,
"loss": 1.3719,
"step": 1020
},
{
"epoch": 0.2,
"learning_rate": 0.0002723491578543695,
"loss": 1.1513,
"step": 1025
},
{
"epoch": 0.2,
"learning_rate": 0.0002720856472484533,
"loss": 1.0665,
"step": 1030
},
{
"epoch": 0.2,
"learning_rate": 0.0002718210157240135,
"loss": 1.9594,
"step": 1035
},
{
"epoch": 0.2,
"learning_rate": 0.0002715552657107411,
"loss": 1.6081,
"step": 1040
},
{
"epoch": 0.2,
"learning_rate": 0.0002712883996485963,
"loss": 1.9058,
"step": 1045
},
{
"epoch": 0.2,
"learning_rate": 0.000271020419987786,
"loss": 1.5741,
"step": 1050
},
{
"epoch": 0.2,
"learning_rate": 0.00027075132918874176,
"loss": 1.0194,
"step": 1055
},
{
"epoch": 0.2,
"learning_rate": 0.0002704811297220967,
"loss": 1.4918,
"step": 1060
},
{
"epoch": 0.21,
"learning_rate": 0.00027020982406866334,
"loss": 1.7175,
"step": 1065
},
{
"epoch": 0.21,
"learning_rate": 0.00026993741471941044,
"loss": 1.6855,
"step": 1070
},
{
"epoch": 0.21,
"learning_rate": 0.0002696639041754402,
"loss": 1.6377,
"step": 1075
},
{
"epoch": 0.21,
"learning_rate": 0.00026938929494796534,
"loss": 1.7234,
"step": 1080
},
{
"epoch": 0.21,
"learning_rate": 0.0002691135895582861,
"loss": 1.9078,
"step": 1085
},
{
"epoch": 0.21,
"learning_rate": 0.0002688367905377671,
"loss": 1.5927,
"step": 1090
},
{
"epoch": 0.21,
"learning_rate": 0.00026855890042781387,
"loss": 1.8701,
"step": 1095
},
{
"epoch": 0.21,
"learning_rate": 0.0002682799217798498,
"loss": 1.8161,
"step": 1100
},
{
"epoch": 0.21,
"learning_rate": 0.0002679998571552925,
"loss": 1.5415,
"step": 1105
},
{
"epoch": 0.21,
"learning_rate": 0.00026771870912553055,
"loss": 1.3043,
"step": 1110
},
{
"epoch": 0.22,
"learning_rate": 0.0002674364802718995,
"loss": 1.2023,
"step": 1115
},
{
"epoch": 0.22,
"learning_rate": 0.0002671531731856586,
"loss": 1.648,
"step": 1120
},
{
"epoch": 0.22,
"learning_rate": 0.0002668687904679666,
"loss": 1.5379,
"step": 1125
},
{
"epoch": 0.22,
"learning_rate": 0.0002665833347298582,
"loss": 2.1148,
"step": 1130
},
{
"epoch": 0.22,
"learning_rate": 0.00026629680859221983,
"loss": 1.5271,
"step": 1135
},
{
"epoch": 0.22,
"learning_rate": 0.0002660092146857659,
"loss": 1.6819,
"step": 1140
},
{
"epoch": 0.22,
"learning_rate": 0.0002657205556510142,
"loss": 2.1589,
"step": 1145
},
{
"epoch": 0.22,
"learning_rate": 0.00026543083413826203,
"loss": 1.2536,
"step": 1150
},
{
"epoch": 0.22,
"learning_rate": 0.00026514005280756174,
"loss": 1.2755,
"step": 1155
},
{
"epoch": 0.22,
"learning_rate": 0.0002648482143286963,
"loss": 2.0277,
"step": 1160
},
{
"epoch": 0.22,
"learning_rate": 0.00026455532138115474,
"loss": 2.1261,
"step": 1165
},
{
"epoch": 0.23,
"learning_rate": 0.00026426137665410765,
"loss": 1.479,
"step": 1170
},
{
"epoch": 0.23,
"learning_rate": 0.0002639663828463824,
"loss": 1.6757,
"step": 1175
},
{
"epoch": 0.23,
"learning_rate": 0.00026367034266643844,
"loss": 1.6926,
"step": 1180
},
{
"epoch": 0.23,
"learning_rate": 0.0002633732588323424,
"loss": 1.7553,
"step": 1185
},
{
"epoch": 0.23,
"learning_rate": 0.0002630751340717431,
"loss": 2.3215,
"step": 1190
},
{
"epoch": 0.23,
"learning_rate": 0.0002627759711218466,
"loss": 1.505,
"step": 1195
},
{
"epoch": 0.23,
"learning_rate": 0.0002624757727293908,
"loss": 1.5585,
"step": 1200
},
{
"epoch": 0.23,
"learning_rate": 0.00026217454165062073,
"loss": 2.3911,
"step": 1205
},
{
"epoch": 0.23,
"learning_rate": 0.00026187228065126266,
"loss": 1.3217,
"step": 1210
},
{
"epoch": 0.23,
"learning_rate": 0.00026156899250649913,
"loss": 1.0692,
"step": 1215
},
{
"epoch": 0.24,
"learning_rate": 0.00026126468000094334,
"loss": 1.784,
"step": 1220
},
{
"epoch": 0.24,
"learning_rate": 0.0002609593459286135,
"loss": 1.3396,
"step": 1225
},
{
"epoch": 0.24,
"learning_rate": 0.0002606529930929072,
"loss": 1.8426,
"step": 1230
},
{
"epoch": 0.24,
"learning_rate": 0.0002603456243065759,
"loss": 1.7322,
"step": 1235
},
{
"epoch": 0.24,
"learning_rate": 0.00026003724239169874,
"loss": 1.5849,
"step": 1240
},
{
"epoch": 0.24,
"learning_rate": 0.00025972785017965683,
"loss": 1.853,
"step": 1245
},
{
"epoch": 0.24,
"learning_rate": 0.0002594174505111074,
"loss": 1.3056,
"step": 1250
},
{
"epoch": 0.24,
"learning_rate": 0.0002591060462359573,
"loss": 1.7144,
"step": 1255
},
{
"epoch": 0.24,
"learning_rate": 0.00025879364021333726,
"loss": 1.5749,
"step": 1260
},
{
"epoch": 0.24,
"learning_rate": 0.00025848023531157546,
"loss": 1.575,
"step": 1265
},
{
"epoch": 0.24,
"learning_rate": 0.0002581658344081711,
"loss": 1.4125,
"step": 1270
},
{
"epoch": 0.25,
"learning_rate": 0.0002578504403897682,
"loss": 1.7587,
"step": 1275
},
{
"epoch": 0.25,
"learning_rate": 0.0002575340561521289,
"loss": 1.6839,
"step": 1280
},
{
"epoch": 0.25,
"learning_rate": 0.00025721668460010696,
"loss": 1.3159,
"step": 1285
},
{
"epoch": 0.25,
"learning_rate": 0.00025689832864762103,
"loss": 1.4817,
"step": 1290
},
{
"epoch": 0.25,
"learning_rate": 0.00025657899121762814,
"loss": 1.4595,
"step": 1295
},
{
"epoch": 0.25,
"learning_rate": 0.0002562586752420964,
"loss": 1.5914,
"step": 1300
},
{
"epoch": 0.25,
"learning_rate": 0.00025593738366197857,
"loss": 1.3689,
"step": 1305
},
{
"epoch": 0.25,
"learning_rate": 0.00025561511942718467,
"loss": 1.3941,
"step": 1310
},
{
"epoch": 0.25,
"learning_rate": 0.0002552918854965552,
"loss": 1.3645,
"step": 1315
},
{
"epoch": 0.25,
"learning_rate": 0.0002549676848378338,
"loss": 1.8918,
"step": 1320
},
{
"epoch": 0.26,
"learning_rate": 0.0002546425204276399,
"loss": 1.5738,
"step": 1325
},
{
"epoch": 0.26,
"learning_rate": 0.00025431639525144175,
"loss": 1.4829,
"step": 1330
},
{
"epoch": 0.26,
"learning_rate": 0.0002539893123035286,
"loss": 1.9539,
"step": 1335
},
{
"epoch": 0.26,
"learning_rate": 0.0002536612745869836,
"loss": 1.619,
"step": 1340
},
{
"epoch": 0.26,
"learning_rate": 0.00025333228511365577,
"loss": 2.1926,
"step": 1345
},
{
"epoch": 0.26,
"learning_rate": 0.0002530023469041328,
"loss": 1.0672,
"step": 1350
},
{
"epoch": 0.26,
"learning_rate": 0.000252671462987713,
"loss": 1.3778,
"step": 1355
},
{
"epoch": 0.26,
"learning_rate": 0.0002523396364023777,
"loss": 2.2418,
"step": 1360
},
{
"epoch": 0.26,
"learning_rate": 0.00025200687019476313,
"loss": 1.3776,
"step": 1365
},
{
"epoch": 0.26,
"learning_rate": 0.0002516731674201328,
"loss": 1.7229,
"step": 1370
},
{
"epoch": 0.27,
"learning_rate": 0.00025133853114234905,
"loss": 1.8493,
"step": 1375
},
{
"epoch": 0.27,
"learning_rate": 0.000251002964433845,
"loss": 1.7188,
"step": 1380
},
{
"epoch": 0.27,
"learning_rate": 0.0002506664703755967,
"loss": 1.5356,
"step": 1385
},
{
"epoch": 0.27,
"learning_rate": 0.0002503290520570944,
"loss": 2.0891,
"step": 1390
},
{
"epoch": 0.27,
"learning_rate": 0.0002499907125763144,
"loss": 1.3363,
"step": 1395
},
{
"epoch": 0.27,
"learning_rate": 0.00024965145503969065,
"loss": 0.9084,
"step": 1400
},
{
"epoch": 0.27,
"learning_rate": 0.00024931128256208603,
"loss": 1.4176,
"step": 1405
},
{
"epoch": 0.27,
"learning_rate": 0.00024897019826676395,
"loss": 2.092,
"step": 1410
},
{
"epoch": 0.27,
"learning_rate": 0.00024862820528535954,
"loss": 1.302,
"step": 1415
},
{
"epoch": 0.27,
"learning_rate": 0.00024828530675785094,
"loss": 1.4547,
"step": 1420
},
{
"epoch": 0.27,
"learning_rate": 0.0002479415058325306,
"loss": 1.571,
"step": 1425
},
{
"epoch": 0.28,
"learning_rate": 0.00024759680566597604,
"loss": 1.6497,
"step": 1430
},
{
"epoch": 0.28,
"learning_rate": 0.0002472512094230214,
"loss": 2.3622,
"step": 1435
},
{
"epoch": 0.28,
"learning_rate": 0.00024690472027672774,
"loss": 1.6807,
"step": 1440
},
{
"epoch": 0.28,
"learning_rate": 0.00024655734140835445,
"loss": 1.3086,
"step": 1445
},
{
"epoch": 0.28,
"learning_rate": 0.0002462090760073296,
"loss": 1.4113,
"step": 1450
},
{
"epoch": 0.28,
"learning_rate": 0.0002458599272712212,
"loss": 1.9112,
"step": 1455
},
{
"epoch": 0.28,
"learning_rate": 0.0002455098984057073,
"loss": 1.6202,
"step": 1460
},
{
"epoch": 0.28,
"learning_rate": 0.0002451589926245468,
"loss": 1.955,
"step": 1465
},
{
"epoch": 0.28,
"learning_rate": 0.00024480721314955,
"loss": 0.958,
"step": 1470
},
{
"epoch": 0.28,
"learning_rate": 0.00024445456321054885,
"loss": 1.2851,
"step": 1475
},
{
"epoch": 0.29,
"learning_rate": 0.0002441010460453674,
"loss": 1.5792,
"step": 1480
},
{
"epoch": 0.29,
"learning_rate": 0.0002437466648997922,
"loss": 1.6548,
"step": 1485
},
{
"epoch": 0.29,
"learning_rate": 0.00024339142302754214,
"loss": 1.9109,
"step": 1490
},
{
"epoch": 0.29,
"learning_rate": 0.00024303532369023898,
"loss": 1.5052,
"step": 1495
},
{
"epoch": 0.29,
"learning_rate": 0.00024267837015737716,
"loss": 1.5422,
"step": 1500
},
{
"epoch": 0.29,
"learning_rate": 0.0002423205657062938,
"loss": 1.2181,
"step": 1505
},
{
"epoch": 0.29,
"learning_rate": 0.00024196191362213862,
"loss": 1.3582,
"step": 1510
},
{
"epoch": 0.29,
"learning_rate": 0.00024160241719784398,
"loss": 1.6467,
"step": 1515
},
{
"epoch": 0.29,
"learning_rate": 0.0002412420797340943,
"loss": 1.1192,
"step": 1520
},
{
"epoch": 0.29,
"learning_rate": 0.00024088090453929596,
"loss": 1.5344,
"step": 1525
},
{
"epoch": 0.3,
"learning_rate": 0.000240518894929547,
"loss": 1.3969,
"step": 1530
},
{
"epoch": 0.3,
"learning_rate": 0.0002401560542286064,
"loss": 1.8009,
"step": 1535
},
{
"epoch": 0.3,
"learning_rate": 0.00023979238576786387,
"loss": 1.2987,
"step": 1540
},
{
"epoch": 0.3,
"learning_rate": 0.0002394278928863091,
"loss": 2.0219,
"step": 1545
},
{
"epoch": 0.3,
"learning_rate": 0.00023906257893050104,
"loss": 1.8617,
"step": 1550
},
{
"epoch": 0.3,
"learning_rate": 0.00023869644725453735,
"loss": 1.5987,
"step": 1555
},
{
"epoch": 0.3,
"learning_rate": 0.00023832950122002345,
"loss": 2.2982,
"step": 1560
},
{
"epoch": 0.3,
"learning_rate": 0.0002379617441960418,
"loss": 1.9818,
"step": 1565
},
{
"epoch": 0.3,
"learning_rate": 0.00023759317955912083,
"loss": 1.1432,
"step": 1570
},
{
"epoch": 0.3,
"learning_rate": 0.00023722381069320398,
"loss": 1.7589,
"step": 1575
},
{
"epoch": 0.3,
"learning_rate": 0.0002368536409896187,
"loss": 1.4148,
"step": 1580
},
{
"epoch": 0.31,
"learning_rate": 0.00023648267384704526,
"loss": 1.278,
"step": 1585
},
{
"epoch": 0.31,
"learning_rate": 0.00023611091267148547,
"loss": 1.9946,
"step": 1590
},
{
"epoch": 0.31,
"learning_rate": 0.00023573836087623155,
"loss": 2.0949,
"step": 1595
},
{
"epoch": 0.31,
"learning_rate": 0.00023536502188183472,
"loss": 1.7023,
"step": 1600
},
{
"epoch": 0.31,
"learning_rate": 0.00023499089911607377,
"loss": 2.1282,
"step": 1605
},
{
"epoch": 0.31,
"learning_rate": 0.00023461599601392368,
"loss": 0.9914,
"step": 1610
},
{
"epoch": 0.31,
"learning_rate": 0.0002342403160175239,
"loss": 1.5996,
"step": 1615
},
{
"epoch": 0.31,
"learning_rate": 0.00023386386257614705,
"loss": 1.6788,
"step": 1620
},
{
"epoch": 0.31,
"learning_rate": 0.0002334866391461668,
"loss": 0.9417,
"step": 1625
},
{
"epoch": 0.31,
"learning_rate": 0.00023310864919102667,
"loss": 1.8813,
"step": 1630
},
{
"epoch": 0.32,
"learning_rate": 0.00023272989618120787,
"loss": 1.2746,
"step": 1635
},
{
"epoch": 0.32,
"learning_rate": 0.00023235038359419743,
"loss": 1.8115,
"step": 1640
},
{
"epoch": 0.32,
"learning_rate": 0.0002319701149144565,
"loss": 1.8113,
"step": 1645
},
{
"epoch": 0.32,
"learning_rate": 0.00023158909363338816,
"loss": 1.7773,
"step": 1650
},
{
"epoch": 0.32,
"learning_rate": 0.00023120732324930543,
"loss": 1.5997,
"step": 1655
},
{
"epoch": 0.32,
"learning_rate": 0.00023082480726739922,
"loss": 1.5149,
"step": 1660
},
{
"epoch": 0.32,
"learning_rate": 0.00023044154919970596,
"loss": 1.732,
"step": 1665
},
{
"epoch": 0.32,
"learning_rate": 0.00023005755256507563,
"loss": 1.3192,
"step": 1670
},
{
"epoch": 0.32,
"learning_rate": 0.0002296728208891392,
"loss": 1.529,
"step": 1675
},
{
"epoch": 0.32,
"learning_rate": 0.00022928735770427633,
"loss": 1.9453,
"step": 1680
},
{
"epoch": 0.33,
"learning_rate": 0.000228901166549583,
"loss": 1.6557,
"step": 1685
},
{
"epoch": 0.33,
"learning_rate": 0.00022851425097083906,
"loss": 1.5921,
"step": 1690
},
{
"epoch": 0.33,
"learning_rate": 0.00022812661452047546,
"loss": 1.3226,
"step": 1695
},
{
"epoch": 0.33,
"learning_rate": 0.00022773826075754181,
"loss": 1.8023,
"step": 1700
},
{
"epoch": 0.33,
"learning_rate": 0.0002273491932476737,
"loss": 1.6419,
"step": 1705
},
{
"epoch": 0.33,
"learning_rate": 0.00022695941556305998,
"loss": 0.9914,
"step": 1710
},
{
"epoch": 0.33,
"learning_rate": 0.0002265689312824097,
"loss": 1.3565,
"step": 1715
},
{
"epoch": 0.33,
"learning_rate": 0.0002261777439909197,
"loss": 1.6539,
"step": 1720
},
{
"epoch": 0.33,
"learning_rate": 0.0002257858572802413,
"loss": 1.5568,
"step": 1725
},
{
"epoch": 0.33,
"learning_rate": 0.00022539327474844752,
"loss": 1.2279,
"step": 1730
},
{
"epoch": 0.33,
"learning_rate": 0.000225,
"loss": 1.8043,
"step": 1735
},
{
"epoch": 0.34,
"learning_rate": 0.00022460603664571586,
"loss": 1.633,
"step": 1740
},
{
"epoch": 0.34,
"learning_rate": 0.0002242113883027347,
"loss": 1.3194,
"step": 1745
},
{
"epoch": 0.34,
"learning_rate": 0.00022381605859448514,
"loss": 2.1916,
"step": 1750
},
{
"epoch": 0.34,
"learning_rate": 0.00022342005115065186,
"loss": 1.4717,
"step": 1755
},
{
"epoch": 0.34,
"learning_rate": 0.00022302336960714202,
"loss": 1.6223,
"step": 1760
},
{
"epoch": 0.34,
"learning_rate": 0.0002226260176060519,
"loss": 1.1155,
"step": 1765
},
{
"epoch": 0.34,
"learning_rate": 0.00022222799879563357,
"loss": 1.9552,
"step": 1770
},
{
"epoch": 0.34,
"learning_rate": 0.00022182931683026148,
"loss": 1.6145,
"step": 1775
},
{
"epoch": 0.34,
"learning_rate": 0.0002214299753703987,
"loss": 1.4544,
"step": 1780
},
{
"epoch": 0.34,
"learning_rate": 0.00022102997808256318,
"loss": 1.4521,
"step": 1785
},
{
"epoch": 0.35,
"learning_rate": 0.00022062932863929464,
"loss": 1.895,
"step": 1790
},
{
"epoch": 0.35,
"learning_rate": 0.00022022803071912027,
"loss": 1.5163,
"step": 1795
},
{
"epoch": 0.35,
"learning_rate": 0.00021982608800652132,
"loss": 1.9087,
"step": 1800
},
{
"epoch": 0.35,
"learning_rate": 0.00021942350419189912,
"loss": 1.3489,
"step": 1805
},
{
"epoch": 0.35,
"learning_rate": 0.0002190202829715411,
"loss": 1.4871,
"step": 1810
},
{
"epoch": 0.35,
"learning_rate": 0.0002186164280475871,
"loss": 1.0417,
"step": 1815
},
{
"epoch": 0.35,
"learning_rate": 0.00021821194312799527,
"loss": 1.1155,
"step": 1820
},
{
"epoch": 0.35,
"learning_rate": 0.00021780683192650796,
"loss": 1.8977,
"step": 1825
},
{
"epoch": 0.35,
"learning_rate": 0.0002174010981626176,
"loss": 1.6845,
"step": 1830
},
{
"epoch": 0.35,
"learning_rate": 0.00021699474556153276,
"loss": 1.7154,
"step": 1835
},
{
"epoch": 0.35,
"learning_rate": 0.00021658777785414375,
"loss": 1.8151,
"step": 1840
},
{
"epoch": 0.36,
"learning_rate": 0.00021618019877698842,
"loss": 1.7573,
"step": 1845
},
{
"epoch": 0.36,
"learning_rate": 0.0002157720120722178,
"loss": 1.4932,
"step": 1850
},
{
"epoch": 0.36,
"learning_rate": 0.0002153632214875619,
"loss": 1.4163,
"step": 1855
},
{
"epoch": 0.36,
"learning_rate": 0.00021495383077629513,
"loss": 1.4138,
"step": 1860
},
{
"epoch": 0.36,
"learning_rate": 0.00021454384369720195,
"loss": 1.7746,
"step": 1865
},
{
"epoch": 0.36,
"learning_rate": 0.0002141332640145423,
"loss": 1.9383,
"step": 1870
},
{
"epoch": 0.36,
"learning_rate": 0.00021372209549801706,
"loss": 1.58,
"step": 1875
},
{
"epoch": 0.36,
"learning_rate": 0.00021331034192273343,
"loss": 1.7546,
"step": 1880
},
{
"epoch": 0.36,
"learning_rate": 0.00021289800706917025,
"loss": 1.5329,
"step": 1885
},
{
"epoch": 0.36,
"learning_rate": 0.0002124850947231434,
"loss": 1.1873,
"step": 1890
},
{
"epoch": 0.37,
"learning_rate": 0.00021207160867577087,
"loss": 1.7837,
"step": 1895
},
{
"epoch": 0.37,
"learning_rate": 0.00021165755272343806,
"loss": 1.4512,
"step": 1900
},
{
"epoch": 0.37,
"learning_rate": 0.0002112429306677629,
"loss": 1.7442,
"step": 1905
},
{
"epoch": 0.37,
"learning_rate": 0.00021082774631556102,
"loss": 1.5365,
"step": 1910
},
{
"epoch": 0.37,
"learning_rate": 0.00021041200347881057,
"loss": 1.8142,
"step": 1915
},
{
"epoch": 0.37,
"learning_rate": 0.0002099957059746175,
"loss": 1.9143,
"step": 1920
},
{
"epoch": 0.37,
"learning_rate": 0.00020957885762518043,
"loss": 2.0534,
"step": 1925
},
{
"epoch": 0.37,
"learning_rate": 0.0002091614622577554,
"loss": 1.7375,
"step": 1930
},
{
"epoch": 0.37,
"learning_rate": 0.00020874352370462085,
"loss": 1.6084,
"step": 1935
},
{
"epoch": 0.37,
"learning_rate": 0.00020832504580304247,
"loss": 1.1369,
"step": 1940
},
{
"epoch": 0.38,
"learning_rate": 0.000207906032395238,
"loss": 1.473,
"step": 1945
},
{
"epoch": 0.38,
"learning_rate": 0.00020748648732834182,
"loss": 1.5715,
"step": 1950
},
{
"epoch": 0.38,
"learning_rate": 0.0002070664144543696,
"loss": 1.5102,
"step": 1955
},
{
"epoch": 0.38,
"learning_rate": 0.00020664581763018324,
"loss": 1.2595,
"step": 1960
},
{
"epoch": 0.38,
"learning_rate": 0.00020622470071745498,
"loss": 1.7648,
"step": 1965
},
{
"epoch": 0.38,
"learning_rate": 0.0002058030675826324,
"loss": 1.5256,
"step": 1970
},
{
"epoch": 0.38,
"learning_rate": 0.00020538092209690265,
"loss": 1.5349,
"step": 1975
},
{
"epoch": 0.38,
"learning_rate": 0.00020495826813615706,
"loss": 1.1114,
"step": 1980
},
{
"epoch": 0.38,
"learning_rate": 0.0002045351095809553,
"loss": 1.5913,
"step": 1985
},
{
"epoch": 0.38,
"learning_rate": 0.0002041114503164902,
"loss": 1.7158,
"step": 1990
},
{
"epoch": 0.38,
"learning_rate": 0.00020368729423255155,
"loss": 1.6397,
"step": 1995
},
{
"epoch": 0.39,
"learning_rate": 0.0002032626452234908,
"loss": 1.7161,
"step": 2000
},
{
"epoch": 0.39,
"learning_rate": 0.00020283750718818501,
"loss": 1.4809,
"step": 2005
},
{
"epoch": 0.39,
"learning_rate": 0.00020241188403000138,
"loss": 1.8065,
"step": 2010
},
{
"epoch": 0.39,
"learning_rate": 0.0002019857796567611,
"loss": 1.5828,
"step": 2015
},
{
"epoch": 0.39,
"learning_rate": 0.00020155919798070366,
"loss": 1.5952,
"step": 2020
},
{
"epoch": 0.39,
"learning_rate": 0.00020113214291845067,
"loss": 1.3225,
"step": 2025
},
{
"epoch": 0.39,
"learning_rate": 0.00020070461839097038,
"loss": 1.2879,
"step": 2030
},
{
"epoch": 0.39,
"learning_rate": 0.00020027662832354122,
"loss": 1.4086,
"step": 2035
},
{
"epoch": 0.39,
"learning_rate": 0.00019984817664571596,
"loss": 2.7161,
"step": 2040
},
{
"epoch": 0.39,
"learning_rate": 0.00019941926729128565,
"loss": 1.661,
"step": 2045
},
{
"epoch": 0.4,
"learning_rate": 0.00019898990419824333,
"loss": 1.1885,
"step": 2050
},
{
"epoch": 0.4,
"learning_rate": 0.00019856009130874816,
"loss": 1.3895,
"step": 2055
},
{
"epoch": 0.4,
"learning_rate": 0.00019812983256908893,
"loss": 1.6416,
"step": 2060
},
{
"epoch": 0.4,
"learning_rate": 0.0001976991319296482,
"loss": 1.2385,
"step": 2065
},
{
"epoch": 0.4,
"learning_rate": 0.00019726799334486545,
"loss": 1.658,
"step": 2070
},
{
"epoch": 0.4,
"learning_rate": 0.00019683642077320134,
"loss": 1.2902,
"step": 2075
},
{
"epoch": 0.4,
"learning_rate": 0.00019640441817710117,
"loss": 1.8334,
"step": 2080
},
{
"epoch": 0.4,
"learning_rate": 0.0001959719895229583,
"loss": 1.1194,
"step": 2085
},
{
"epoch": 0.4,
"learning_rate": 0.000195539138781078,
"loss": 1.3623,
"step": 2090
},
{
"epoch": 0.4,
"learning_rate": 0.00019510586992564093,
"loss": 1.6465,
"step": 2095
},
{
"epoch": 0.41,
"learning_rate": 0.00019467218693466665,
"loss": 1.2476,
"step": 2100
},
{
"epoch": 0.41,
"learning_rate": 0.00019423809378997696,
"loss": 1.6968,
"step": 2105
},
{
"epoch": 0.41,
"learning_rate": 0.00019380359447715944,
"loss": 1.379,
"step": 2110
},
{
"epoch": 0.41,
"learning_rate": 0.000193368692985531,
"loss": 1.4896,
"step": 2115
},
{
"epoch": 0.41,
"learning_rate": 0.000192933393308101,
"loss": 1.5245,
"step": 2120
},
{
"epoch": 0.41,
"learning_rate": 0.0001924976994415348,
"loss": 1.6641,
"step": 2125
},
{
"epoch": 0.41,
"learning_rate": 0.00019206161538611692,
"loss": 1.4637,
"step": 2130
},
{
"epoch": 0.41,
"learning_rate": 0.00019162514514571433,
"loss": 1.4069,
"step": 2135
},
{
"epoch": 0.41,
"learning_rate": 0.00019118829272773985,
"loss": 1.3833,
"step": 2140
},
{
"epoch": 0.41,
"learning_rate": 0.00019075106214311503,
"loss": 1.519,
"step": 2145
},
{
"epoch": 0.41,
"learning_rate": 0.00019031345740623372,
"loss": 1.5278,
"step": 2150
},
{
"epoch": 0.42,
"learning_rate": 0.0001898754825349249,
"loss": 1.4175,
"step": 2155
},
{
"epoch": 0.42,
"learning_rate": 0.00018943714155041593,
"loss": 2.3051,
"step": 2160
},
{
"epoch": 0.42,
"learning_rate": 0.0001889984384772957,
"loss": 1.9164,
"step": 2165
},
{
"epoch": 0.42,
"learning_rate": 0.00018855937734347736,
"loss": 1.6033,
"step": 2170
},
{
"epoch": 0.42,
"learning_rate": 0.00018811996218016176,
"loss": 1.5784,
"step": 2175
},
{
"epoch": 0.42,
"learning_rate": 0.0001876801970218002,
"loss": 1.6539,
"step": 2180
},
{
"epoch": 0.42,
"learning_rate": 0.00018724008590605742,
"loss": 1.3472,
"step": 2185
},
{
"epoch": 0.42,
"learning_rate": 0.00018679963287377452,
"loss": 1.577,
"step": 2190
},
{
"epoch": 0.42,
"learning_rate": 0.00018635884196893188,
"loss": 1.4079,
"step": 2195
},
{
"epoch": 0.42,
"learning_rate": 0.00018591771723861197,
"loss": 1.2238,
"step": 2200
},
{
"epoch": 0.43,
"learning_rate": 0.00018547626273296238,
"loss": 2.0988,
"step": 2205
},
{
"epoch": 0.43,
"learning_rate": 0.00018503448250515843,
"loss": 1.7247,
"step": 2210
},
{
"epoch": 0.43,
"learning_rate": 0.00018459238061136602,
"loss": 1.7499,
"step": 2215
},
{
"epoch": 0.43,
"learning_rate": 0.0001841499611107044,
"loss": 1.3963,
"step": 2220
},
{
"epoch": 0.43,
"learning_rate": 0.0001837072280652089,
"loss": 1.68,
"step": 2225
},
{
"epoch": 0.43,
"learning_rate": 0.00018326418553979367,
"loss": 1.5986,
"step": 2230
},
{
"epoch": 0.43,
"learning_rate": 0.0001828208376022143,
"loss": 1.817,
"step": 2235
},
{
"epoch": 0.43,
"learning_rate": 0.00018237718832303043,
"loss": 2.1561,
"step": 2240
},
{
"epoch": 0.43,
"learning_rate": 0.00018193324177556854,
"loss": 1.5795,
"step": 2245
},
{
"epoch": 0.43,
"learning_rate": 0.00018148900203588444,
"loss": 1.3028,
"step": 2250
},
{
"epoch": 0.43,
"learning_rate": 0.00018104447318272583,
"loss": 1.516,
"step": 2255
},
{
"epoch": 0.44,
"learning_rate": 0.0001805996592974948,
"loss": 1.6465,
"step": 2260
},
{
"epoch": 0.44,
"learning_rate": 0.00018015456446421059,
"loss": 1.3915,
"step": 2265
},
{
"epoch": 0.44,
"learning_rate": 0.0001797091927694718,
"loss": 1.2381,
"step": 2270
},
{
"epoch": 0.44,
"learning_rate": 0.00017926354830241924,
"loss": 1.4619,
"step": 2275
},
{
"epoch": 0.44,
"learning_rate": 0.00017881763515469783,
"loss": 2.0805,
"step": 2280
},
{
"epoch": 0.44,
"learning_rate": 0.00017837145742041955,
"loss": 1.2531,
"step": 2285
},
{
"epoch": 0.44,
"learning_rate": 0.0001779250191961256,
"loss": 1.3414,
"step": 2290
},
{
"epoch": 0.44,
"learning_rate": 0.00017747832458074898,
"loss": 1.9254,
"step": 2295
},
{
"epoch": 0.44,
"learning_rate": 0.00017703137767557652,
"loss": 1.2747,
"step": 2300
},
{
"epoch": 0.44,
"learning_rate": 0.0001765841825842116,
"loss": 1.7596,
"step": 2305
},
{
"epoch": 0.45,
"learning_rate": 0.00017613674341253606,
"loss": 1.7383,
"step": 2310
},
{
"epoch": 0.45,
"learning_rate": 0.00017568906426867312,
"loss": 2.1931,
"step": 2315
},
{
"epoch": 0.45,
"learning_rate": 0.00017524114926294887,
"loss": 0.9475,
"step": 2320
},
{
"epoch": 0.45,
"learning_rate": 0.0001747930025078553,
"loss": 1.419,
"step": 2325
},
{
"epoch": 0.45,
"learning_rate": 0.0001743446281180119,
"loss": 1.4287,
"step": 2330
},
{
"epoch": 0.45,
"learning_rate": 0.0001738960302101284,
"loss": 1.8408,
"step": 2335
},
{
"epoch": 0.45,
"learning_rate": 0.00017344721290296655,
"loss": 1.7793,
"step": 2340
},
{
"epoch": 0.45,
"learning_rate": 0.0001729981803173026,
"loss": 1.3913,
"step": 2345
},
{
"epoch": 0.45,
"learning_rate": 0.00017254893657588934,
"loss": 1.6219,
"step": 2350
},
{
"epoch": 0.45,
"learning_rate": 0.0001720994858034183,
"loss": 0.8477,
"step": 2355
},
{
"epoch": 0.46,
"learning_rate": 0.00017164983212648177,
"loss": 2.008,
"step": 2360
},
{
"epoch": 0.46,
"learning_rate": 0.00017119997967353514,
"loss": 1.2596,
"step": 2365
},
{
"epoch": 0.46,
"learning_rate": 0.0001707499325748586,
"loss": 2.0161,
"step": 2370
},
{
"epoch": 0.46,
"learning_rate": 0.00017029969496251966,
"loss": 1.4243,
"step": 2375
},
{
"epoch": 0.46,
"learning_rate": 0.00016984927097033495,
"loss": 1.6643,
"step": 2380
},
{
"epoch": 0.46,
"learning_rate": 0.00016939866473383237,
"loss": 1.4719,
"step": 2385
},
{
"epoch": 0.46,
"learning_rate": 0.00016894788039021304,
"loss": 1.4121,
"step": 2390
},
{
"epoch": 0.46,
"learning_rate": 0.0001684969220783133,
"loss": 2.2479,
"step": 2395
},
{
"epoch": 0.46,
"learning_rate": 0.00016804579393856688,
"loss": 1.4241,
"step": 2400
},
{
"epoch": 0.46,
"learning_rate": 0.00016759450011296674,
"loss": 1.6946,
"step": 2405
},
{
"epoch": 0.46,
"learning_rate": 0.00016714304474502696,
"loss": 1.6951,
"step": 2410
},
{
"epoch": 0.47,
"learning_rate": 0.00016669143197974495,
"loss": 1.6273,
"step": 2415
},
{
"epoch": 0.47,
"learning_rate": 0.00016623966596356315,
"loss": 1.8207,
"step": 2420
},
{
"epoch": 0.47,
"learning_rate": 0.00016578775084433116,
"loss": 1.781,
"step": 2425
},
{
"epoch": 0.47,
"learning_rate": 0.00016533569077126749,
"loss": 1.2038,
"step": 2430
},
{
"epoch": 0.47,
"learning_rate": 0.00016488348989492145,
"loss": 1.4287,
"step": 2435
},
{
"epoch": 0.47,
"learning_rate": 0.00016443115236713528,
"loss": 1.4447,
"step": 2440
},
{
"epoch": 0.47,
"learning_rate": 0.00016397868234100584,
"loss": 1.1601,
"step": 2445
},
{
"epoch": 0.47,
"learning_rate": 0.0001635260839708464,
"loss": 1.4291,
"step": 2450
},
{
"epoch": 0.47,
"learning_rate": 0.00016307336141214873,
"loss": 1.4518,
"step": 2455
},
{
"epoch": 0.47,
"learning_rate": 0.00016262051882154477,
"loss": 1.6841,
"step": 2460
},
{
"epoch": 0.48,
"learning_rate": 0.00016216756035676845,
"loss": 1.4064,
"step": 2465
},
{
"epoch": 0.48,
"learning_rate": 0.0001617144901766178,
"loss": 0.8423,
"step": 2470
},
{
"epoch": 0.48,
"learning_rate": 0.00016126131244091643,
"loss": 1.6949,
"step": 2475
},
{
"epoch": 0.48,
"learning_rate": 0.00016080803131047534,
"loss": 1.3659,
"step": 2480
},
{
"epoch": 0.48,
"learning_rate": 0.00016035465094705502,
"loss": 1.547,
"step": 2485
},
{
"epoch": 0.48,
"learning_rate": 0.00015990117551332702,
"loss": 1.4655,
"step": 2490
},
{
"epoch": 0.48,
"learning_rate": 0.0001594476091728357,
"loss": 1.5663,
"step": 2495
},
{
"epoch": 0.48,
"learning_rate": 0.00015899395608996015,
"loss": 1.1099,
"step": 2500
},
{
"epoch": 0.48,
"learning_rate": 0.0001585402204298758,
"loss": 1.6187,
"step": 2505
},
{
"epoch": 0.48,
"learning_rate": 0.00015808640635851638,
"loss": 1.8124,
"step": 2510
},
{
"epoch": 0.49,
"learning_rate": 0.00015763251804253538,
"loss": 1.1152,
"step": 2515
},
{
"epoch": 0.49,
"learning_rate": 0.00015717855964926812,
"loss": 2.091,
"step": 2520
},
{
"epoch": 0.49,
"learning_rate": 0.00015672453534669318,
"loss": 1.3521,
"step": 2525
},
{
"epoch": 0.49,
"learning_rate": 0.00015627044930339442,
"loss": 1.3461,
"step": 2530
},
{
"epoch": 0.49,
"learning_rate": 0.0001558163056885225,
"loss": 1.7911,
"step": 2535
},
{
"epoch": 0.49,
"learning_rate": 0.00015536210867175666,
"loss": 1.4027,
"step": 2540
},
{
"epoch": 0.49,
"learning_rate": 0.00015490786242326643,
"loss": 1.1494,
"step": 2545
},
{
"epoch": 0.49,
"learning_rate": 0.00015445357111367337,
"loss": 2.7537,
"step": 2550
},
{
"epoch": 0.49,
"learning_rate": 0.00015399923891401286,
"loss": 1.2787,
"step": 2555
},
{
"epoch": 0.49,
"learning_rate": 0.00015354486999569558,
"loss": 1.7394,
"step": 2560
},
{
"epoch": 0.49,
"learning_rate": 0.00015309046853046937,
"loss": 1.5708,
"step": 2565
},
{
"epoch": 0.5,
"learning_rate": 0.00015263603869038086,
"loss": 1.9391,
"step": 2570
},
{
"epoch": 0.5,
"learning_rate": 0.0001521815846477374,
"loss": 1.6462,
"step": 2575
},
{
"epoch": 0.5,
"learning_rate": 0.00015172711057506827,
"loss": 1.1655,
"step": 2580
},
{
"epoch": 0.5,
"learning_rate": 0.00015127262064508688,
"loss": 1.6431,
"step": 2585
},
{
"epoch": 0.5,
"learning_rate": 0.00015081811903065205,
"loss": 1.4204,
"step": 2590
},
{
"epoch": 0.5,
"learning_rate": 0.00015036360990473008,
"loss": 1.3463,
"step": 2595
},
{
"epoch": 0.5,
"learning_rate": 0.000149909097440356,
"loss": 1.807,
"step": 2600
},
{
"epoch": 0.5,
"learning_rate": 0.0001494545858105958,
"loss": 1.1753,
"step": 2605
},
{
"epoch": 0.5,
"learning_rate": 0.0001490000791885075,
"loss": 1.5372,
"step": 2610
},
{
"epoch": 0.5,
"learning_rate": 0.00014854558174710322,
"loss": 1.9133,
"step": 2615
},
{
"epoch": 0.51,
"learning_rate": 0.00014809109765931097,
"loss": 1.6995,
"step": 2620
},
{
"epoch": 0.51,
"learning_rate": 0.00014763663109793596,
"loss": 2.2809,
"step": 2625
},
{
"epoch": 0.51,
"learning_rate": 0.00014718218623562256,
"loss": 1.2495,
"step": 2630
},
{
"epoch": 0.51,
"learning_rate": 0.00014672776724481584,
"loss": 1.4304,
"step": 2635
},
{
"epoch": 0.51,
"learning_rate": 0.00014627337829772346,
"loss": 1.4201,
"step": 2640
},
{
"epoch": 0.51,
"learning_rate": 0.00014581902356627718,
"loss": 1.6049,
"step": 2645
},
{
"epoch": 0.51,
"learning_rate": 0.00014536470722209449,
"loss": 1.6997,
"step": 2650
},
{
"epoch": 0.51,
"learning_rate": 0.00014491043343644065,
"loss": 1.5557,
"step": 2655
},
{
"epoch": 0.51,
"learning_rate": 0.00014445620638019004,
"loss": 1.5758,
"step": 2660
},
{
"epoch": 0.51,
"learning_rate": 0.000144002030223788,
"loss": 0.9978,
"step": 2665
},
{
"epoch": 0.52,
"learning_rate": 0.0001435479091372126,
"loss": 1.3099,
"step": 2670
},
{
"epoch": 0.52,
"learning_rate": 0.0001430938472899362,
"loss": 1.6165,
"step": 2675
},
{
"epoch": 0.52,
"learning_rate": 0.0001426398488508873,
"loss": 1.7321,
"step": 2680
},
{
"epoch": 0.52,
"learning_rate": 0.00014218591798841228,
"loss": 1.5079,
"step": 2685
},
{
"epoch": 0.52,
"learning_rate": 0.00014173205887023702,
"loss": 1.5661,
"step": 2690
},
{
"epoch": 0.52,
"learning_rate": 0.00014127827566342863,
"loss": 1.4923,
"step": 2695
},
{
"epoch": 0.52,
"learning_rate": 0.00014082457253435739,
"loss": 1.2609,
"step": 2700
},
{
"epoch": 0.52,
"learning_rate": 0.0001403709536486582,
"loss": 1.7925,
"step": 2705
},
{
"epoch": 0.52,
"learning_rate": 0.00013991742317119257,
"loss": 1.6333,
"step": 2710
},
{
"epoch": 0.52,
"learning_rate": 0.00013946398526601034,
"loss": 1.649,
"step": 2715
},
{
"epoch": 0.52,
"learning_rate": 0.00013901064409631132,
"loss": 1.6406,
"step": 2720
},
{
"epoch": 0.53,
"learning_rate": 0.00013855740382440714,
"loss": 1.6813,
"step": 2725
},
{
"epoch": 0.53,
"learning_rate": 0.00013810426861168314,
"loss": 2.0483,
"step": 2730
},
{
"epoch": 0.53,
"learning_rate": 0.00013765124261856,
"loss": 1.2954,
"step": 2735
},
{
"epoch": 0.53,
"learning_rate": 0.00013719833000445564,
"loss": 1.8643,
"step": 2740
},
{
"epoch": 0.53,
"learning_rate": 0.00013674553492774697,
"loss": 1.0872,
"step": 2745
},
{
"epoch": 0.53,
"learning_rate": 0.00013629286154573174,
"loss": 1.8002,
"step": 2750
},
{
"epoch": 0.53,
"learning_rate": 0.00013584031401459045,
"loss": 1.2758,
"step": 2755
},
{
"epoch": 0.53,
"learning_rate": 0.00013538789648934795,
"loss": 1.6906,
"step": 2760
},
{
"epoch": 0.53,
"learning_rate": 0.00013493561312383565,
"loss": 1.6229,
"step": 2765
},
{
"epoch": 0.53,
"learning_rate": 0.000134483468070653,
"loss": 1.5937,
"step": 2770
},
{
"epoch": 0.54,
"learning_rate": 0.00013403146548112975,
"loss": 1.6342,
"step": 2775
},
{
"epoch": 0.54,
"learning_rate": 0.00013357960950528743,
"loss": 1.5929,
"step": 2780
},
{
"epoch": 0.54,
"learning_rate": 0.00013312790429180154,
"loss": 1.9885,
"step": 2785
},
{
"epoch": 0.54,
"learning_rate": 0.0001326763539879634,
"loss": 1.8865,
"step": 2790
},
{
"epoch": 0.54,
"learning_rate": 0.00013222496273964204,
"loss": 1.1842,
"step": 2795
},
{
"epoch": 0.54,
"learning_rate": 0.000131773734691246,
"loss": 1.088,
"step": 2800
},
{
"epoch": 0.54,
"learning_rate": 0.00013132267398568555,
"loss": 1.5499,
"step": 2805
},
{
"epoch": 0.54,
"learning_rate": 0.00013087178476433456,
"loss": 1.6725,
"step": 2810
},
{
"epoch": 0.54,
"learning_rate": 0.00013042107116699228,
"loss": 1.3022,
"step": 2815
},
{
"epoch": 0.54,
"learning_rate": 0.00012997053733184558,
"loss": 1.1577,
"step": 2820
},
{
"epoch": 0.54,
"learning_rate": 0.0001295201873954308,
"loss": 1.751,
"step": 2825
},
{
"epoch": 0.55,
"learning_rate": 0.0001290700254925959,
"loss": 1.4562,
"step": 2830
},
{
"epoch": 0.55,
"learning_rate": 0.00012862005575646246,
"loss": 1.5687,
"step": 2835
},
{
"epoch": 0.55,
"learning_rate": 0.00012817028231838757,
"loss": 1.9392,
"step": 2840
},
{
"epoch": 0.55,
"learning_rate": 0.00012772070930792612,
"loss": 1.5673,
"step": 2845
},
{
"epoch": 0.55,
"learning_rate": 0.0001272713408527927,
"loss": 1.2749,
"step": 2850
},
{
"epoch": 0.55,
"learning_rate": 0.00012682218107882393,
"loss": 1.5739,
"step": 2855
},
{
"epoch": 0.55,
"learning_rate": 0.00012637323410994033,
"loss": 1.4778,
"step": 2860
},
{
"epoch": 0.55,
"learning_rate": 0.00012592450406810862,
"loss": 1.5713,
"step": 2865
},
{
"epoch": 0.55,
"learning_rate": 0.00012547599507330379,
"loss": 1.2776,
"step": 2870
},
{
"epoch": 0.55,
"learning_rate": 0.00012502771124347137,
"loss": 1.8842,
"step": 2875
},
{
"epoch": 0.56,
"learning_rate": 0.0001245796566944895,
"loss": 2.065,
"step": 2880
},
{
"epoch": 0.56,
"learning_rate": 0.00012413183554013112,
"loss": 1.5904,
"step": 2885
},
{
"epoch": 0.56,
"learning_rate": 0.00012368425189202654,
"loss": 1.8088,
"step": 2890
},
{
"epoch": 0.56,
"learning_rate": 0.00012323690985962515,
"loss": 1.8503,
"step": 2895
},
{
"epoch": 0.56,
"learning_rate": 0.00012278981355015811,
"loss": 1.211,
"step": 2900
},
{
"epoch": 0.56,
"learning_rate": 0.0001223429670686005,
"loss": 1.4097,
"step": 2905
},
{
"epoch": 0.56,
"learning_rate": 0.0001218963745176335,
"loss": 1.3921,
"step": 2910
},
{
"epoch": 0.56,
"learning_rate": 0.00012145003999760698,
"loss": 1.6914,
"step": 2915
},
{
"epoch": 0.56,
"learning_rate": 0.00012100396760650171,
"loss": 2.0817,
"step": 2920
},
{
"epoch": 0.56,
"learning_rate": 0.0001205581614398917,
"loss": 1.4066,
"step": 2925
},
{
"epoch": 0.57,
"learning_rate": 0.00012011262559090664,
"loss": 1.9319,
"step": 2930
},
{
"epoch": 0.57,
"learning_rate": 0.00011966736415019432,
"loss": 1.3314,
"step": 2935
},
{
"epoch": 0.57,
"learning_rate": 0.00011922238120588319,
"loss": 1.2883,
"step": 2940
},
{
"epoch": 0.57,
"learning_rate": 0.00011877768084354448,
"loss": 1.9158,
"step": 2945
},
{
"epoch": 0.57,
"learning_rate": 0.00011833326714615522,
"loss": 1.7002,
"step": 2950
},
{
"epoch": 0.57,
"learning_rate": 0.0001178891441940602,
"loss": 1.1873,
"step": 2955
},
{
"epoch": 0.57,
"learning_rate": 0.00011744531606493482,
"loss": 1.4304,
"step": 2960
},
{
"epoch": 0.57,
"learning_rate": 0.00011700178683374769,
"loss": 1.5897,
"step": 2965
},
{
"epoch": 0.57,
"learning_rate": 0.000116558560572723,
"loss": 1.7278,
"step": 2970
},
{
"epoch": 0.57,
"learning_rate": 0.00011611564135130329,
"loss": 1.8035,
"step": 2975
},
{
"epoch": 0.57,
"learning_rate": 0.00011567303323611203,
"loss": 1.7011,
"step": 2980
},
{
"epoch": 0.58,
"learning_rate": 0.00011523074029091633,
"loss": 1.3019,
"step": 2985
},
{
"epoch": 0.58,
"learning_rate": 0.00011478876657658953,
"loss": 1.5944,
"step": 2990
},
{
"epoch": 0.58,
"learning_rate": 0.00011434711615107404,
"loss": 1.1372,
"step": 2995
},
{
"epoch": 0.58,
"learning_rate": 0.00011390579306934402,
"loss": 1.4772,
"step": 3000
},
{
"epoch": 0.58,
"learning_rate": 0.00011346480138336811,
"loss": 1.2776,
"step": 3005
},
{
"epoch": 0.58,
"learning_rate": 0.00011302414514207233,
"loss": 1.9572,
"step": 3010
},
{
"epoch": 0.58,
"learning_rate": 0.00011258382839130281,
"loss": 1.4264,
"step": 3015
},
{
"epoch": 0.58,
"learning_rate": 0.00011214385517378864,
"loss": 2.216,
"step": 3020
},
{
"epoch": 0.58,
"learning_rate": 0.00011170422952910482,
"loss": 1.4154,
"step": 3025
},
{
"epoch": 0.58,
"learning_rate": 0.0001112649554936352,
"loss": 1.5587,
"step": 3030
},
{
"epoch": 0.59,
"learning_rate": 0.00011082603710053528,
"loss": 1.9853,
"step": 3035
},
{
"epoch": 0.59,
"learning_rate": 0.00011038747837969526,
"loss": 1.2621,
"step": 3040
},
{
"epoch": 0.59,
"learning_rate": 0.00010994928335770312,
"loss": 1.5037,
"step": 3045
},
{
"epoch": 0.59,
"learning_rate": 0.00010951145605780751,
"loss": 1.6375,
"step": 3050
},
{
"epoch": 0.59,
"learning_rate": 0.00010907400049988088,
"loss": 1.3973,
"step": 3055
},
{
"epoch": 0.59,
"learning_rate": 0.0001086369207003825,
"loss": 1.5211,
"step": 3060
},
{
"epoch": 0.59,
"learning_rate": 0.00010820022067232186,
"loss": 1.1739,
"step": 3065
},
{
"epoch": 0.59,
"learning_rate": 0.00010776390442522137,
"loss": 1.7858,
"step": 3070
},
{
"epoch": 0.59,
"learning_rate": 0.00010732797596507992,
"loss": 1.4643,
"step": 3075
},
{
"epoch": 0.59,
"learning_rate": 0.00010689243929433593,
"loss": 1.5519,
"step": 3080
},
{
"epoch": 0.6,
"learning_rate": 0.00010645729841183066,
"loss": 1.2495,
"step": 3085
},
{
"epoch": 0.6,
"learning_rate": 0.00010602255731277142,
"loss": 1.3118,
"step": 3090
},
{
"epoch": 0.6,
"learning_rate": 0.00010558821998869503,
"loss": 0.9586,
"step": 3095
},
{
"epoch": 0.6,
"learning_rate": 0.00010515429042743104,
"loss": 1.6216,
"step": 3100
},
{
"epoch": 0.6,
"learning_rate": 0.00010472077261306509,
"loss": 1.2725,
"step": 3105
},
{
"epoch": 0.6,
"learning_rate": 0.00010428767052590258,
"loss": 1.3954,
"step": 3110
},
{
"epoch": 0.6,
"learning_rate": 0.00010385498814243176,
"loss": 1.3674,
"step": 3115
},
{
"epoch": 0.6,
"learning_rate": 0.00010342272943528756,
"loss": 1.7398,
"step": 3120
},
{
"epoch": 0.6,
"learning_rate": 0.00010299089837321489,
"loss": 1.4755,
"step": 3125
},
{
"epoch": 0.6,
"learning_rate": 0.00010255949892103225,
"loss": 1.7468,
"step": 3130
},
{
"epoch": 0.6,
"learning_rate": 0.00010212853503959535,
"loss": 1.2841,
"step": 3135
},
{
"epoch": 0.61,
"learning_rate": 0.00010169801068576088,
"loss": 1.3969,
"step": 3140
},
{
"epoch": 0.61,
"learning_rate": 0.00010126792981234985,
"loss": 1.4495,
"step": 3145
},
{
"epoch": 0.61,
"learning_rate": 0.00010083829636811161,
"loss": 1.7657,
"step": 3150
},
{
"epoch": 0.61,
"learning_rate": 0.00010040911429768748,
"loss": 1.6876,
"step": 3155
},
{
"epoch": 0.61,
"learning_rate": 9.998038754157452e-05,
"loss": 1.1386,
"step": 3160
},
{
"epoch": 0.61,
"learning_rate": 9.95521200360893e-05,
"loss": 1.2276,
"step": 3165
},
{
"epoch": 0.61,
"learning_rate": 9.91243157133319e-05,
"loss": 1.4063,
"step": 3170
},
{
"epoch": 0.61,
"learning_rate": 9.869697850114969e-05,
"loss": 1.1884,
"step": 3175
},
{
"epoch": 0.61,
"learning_rate": 9.827011232310132e-05,
"loss": 1.0424,
"step": 3180
},
{
"epoch": 0.61,
"learning_rate": 9.784372109842071e-05,
"loss": 1.5932,
"step": 3185
},
{
"epoch": 0.62,
"learning_rate": 9.7417808741981e-05,
"loss": 1.5054,
"step": 3190
},
{
"epoch": 0.62,
"learning_rate": 9.699237916425861e-05,
"loss": 1.648,
"step": 3195
},
{
"epoch": 0.62,
"learning_rate": 9.656743627129745e-05,
"loss": 1.5159,
"step": 3200
},
{
"epoch": 0.62,
"learning_rate": 9.614298396467291e-05,
"loss": 1.6202,
"step": 3205
},
{
"epoch": 0.62,
"learning_rate": 9.571902614145616e-05,
"loss": 1.4917,
"step": 3210
},
{
"epoch": 0.62,
"learning_rate": 9.529556669417823e-05,
"loss": 1.4311,
"step": 3215
},
{
"epoch": 0.62,
"learning_rate": 9.487260951079448e-05,
"loss": 1.4103,
"step": 3220
},
{
"epoch": 0.62,
"learning_rate": 9.445015847464868e-05,
"loss": 1.4298,
"step": 3225
},
{
"epoch": 0.62,
"learning_rate": 9.402821746443744e-05,
"loss": 1.3865,
"step": 3230
},
{
"epoch": 0.62,
"learning_rate": 9.360679035417479e-05,
"loss": 1.4313,
"step": 3235
},
{
"epoch": 0.62,
"learning_rate": 9.318588101315616e-05,
"loss": 1.5334,
"step": 3240
},
{
"epoch": 0.63,
"learning_rate": 9.276549330592334e-05,
"loss": 1.0339,
"step": 3245
},
{
"epoch": 0.63,
"learning_rate": 9.234563109222874e-05,
"loss": 1.4073,
"step": 3250
},
{
"epoch": 0.63,
"learning_rate": 9.192629822699993e-05,
"loss": 1.2445,
"step": 3255
},
{
"epoch": 0.63,
"learning_rate": 9.150749856030429e-05,
"loss": 1.3676,
"step": 3260
},
{
"epoch": 0.63,
"learning_rate": 9.10892359373139e-05,
"loss": 0.9688,
"step": 3265
},
{
"epoch": 0.63,
"learning_rate": 9.067151419826976e-05,
"loss": 1.3644,
"step": 3270
},
{
"epoch": 0.63,
"learning_rate": 9.025433717844695e-05,
"loss": 1.648,
"step": 3275
},
{
"epoch": 0.63,
"learning_rate": 8.983770870811918e-05,
"loss": 1.5491,
"step": 3280
},
{
"epoch": 0.63,
"learning_rate": 8.942163261252378e-05,
"loss": 1.1729,
"step": 3285
},
{
"epoch": 0.63,
"learning_rate": 8.900611271182636e-05,
"loss": 1.6664,
"step": 3290
},
{
"epoch": 0.64,
"learning_rate": 8.85911528210861e-05,
"loss": 1.4335,
"step": 3295
},
{
"epoch": 0.64,
"learning_rate": 8.817675675022034e-05,
"loss": 1.6384,
"step": 3300
},
{
"epoch": 0.64,
"learning_rate": 8.77629283039697e-05,
"loss": 1.5178,
"step": 3305
},
{
"epoch": 0.64,
"learning_rate": 8.734967128186338e-05,
"loss": 0.9798,
"step": 3310
},
{
"epoch": 0.64,
"learning_rate": 8.693698947818397e-05,
"loss": 1.7922,
"step": 3315
},
{
"epoch": 0.64,
"learning_rate": 8.652488668193286e-05,
"loss": 1.8276,
"step": 3320
},
{
"epoch": 0.64,
"learning_rate": 8.611336667679511e-05,
"loss": 1.3232,
"step": 3325
},
{
"epoch": 0.64,
"learning_rate": 8.570243324110529e-05,
"loss": 1.4273,
"step": 3330
},
{
"epoch": 0.64,
"learning_rate": 8.529209014781201e-05,
"loss": 1.5293,
"step": 3335
},
{
"epoch": 0.64,
"learning_rate": 8.488234116444404e-05,
"loss": 1.2581,
"step": 3340
},
{
"epoch": 0.65,
"learning_rate": 8.447319005307532e-05,
"loss": 1.3106,
"step": 3345
},
{
"epoch": 0.65,
"learning_rate": 8.406464057029023e-05,
"loss": 0.7905,
"step": 3350
},
{
"epoch": 0.65,
"learning_rate": 8.365669646714983e-05,
"loss": 1.51,
"step": 3355
},
{
"epoch": 0.65,
"learning_rate": 8.324936148915641e-05,
"loss": 1.6895,
"step": 3360
},
{
"epoch": 0.65,
"learning_rate": 8.284263937622001e-05,
"loss": 1.5375,
"step": 3365
},
{
"epoch": 0.65,
"learning_rate": 8.243653386262362e-05,
"loss": 2.1622,
"step": 3370
},
{
"epoch": 0.65,
"learning_rate": 8.203104867698876e-05,
"loss": 1.4393,
"step": 3375
},
{
"epoch": 0.65,
"learning_rate": 8.162618754224177e-05,
"loss": 1.5568,
"step": 3380
},
{
"epoch": 0.65,
"learning_rate": 8.122195417557919e-05,
"loss": 1.1689,
"step": 3385
},
{
"epoch": 0.65,
"learning_rate": 8.081835228843367e-05,
"loss": 1.4658,
"step": 3390
},
{
"epoch": 0.65,
"learning_rate": 8.041538558644015e-05,
"loss": 2.065,
"step": 3395
},
{
"epoch": 0.66,
"learning_rate": 8.001305776940163e-05,
"loss": 1.5075,
"step": 3400
},
{
"epoch": 0.66,
"learning_rate": 7.961137253125504e-05,
"loss": 1.2665,
"step": 3405
},
{
"epoch": 0.66,
"learning_rate": 7.921033356003797e-05,
"loss": 1.4331,
"step": 3410
},
{
"epoch": 0.66,
"learning_rate": 7.880994453785393e-05,
"loss": 1.2724,
"step": 3415
},
{
"epoch": 0.66,
"learning_rate": 7.841020914083923e-05,
"loss": 1.4093,
"step": 3420
},
{
"epoch": 0.66,
"learning_rate": 7.801113103912882e-05,
"loss": 1.1195,
"step": 3425
},
{
"epoch": 0.66,
"learning_rate": 7.76127138968229e-05,
"loss": 1.741,
"step": 3430
},
{
"epoch": 0.66,
"learning_rate": 7.721496137195308e-05,
"loss": 1.4885,
"step": 3435
},
{
"epoch": 0.66,
"learning_rate": 7.681787711644877e-05,
"loss": 1.4523,
"step": 3440
},
{
"epoch": 0.66,
"learning_rate": 7.64214647761038e-05,
"loss": 1.3673,
"step": 3445
},
{
"epoch": 0.67,
"learning_rate": 7.602572799054291e-05,
"loss": 1.4642,
"step": 3450
},
{
"epoch": 0.67,
"learning_rate": 7.563067039318815e-05,
"loss": 1.6162,
"step": 3455
},
{
"epoch": 0.67,
"learning_rate": 7.523629561122577e-05,
"loss": 1.6921,
"step": 3460
},
{
"epoch": 0.67,
"learning_rate": 7.484260726557283e-05,
"loss": 1.8285,
"step": 3465
},
{
"epoch": 0.67,
"learning_rate": 7.444960897084388e-05,
"loss": 1.6991,
"step": 3470
},
{
"epoch": 0.67,
"learning_rate": 7.405730433531792e-05,
"loss": 1.282,
"step": 3475
},
{
"epoch": 0.67,
"learning_rate": 7.366569696090496e-05,
"loss": 1.7964,
"step": 3480
},
{
"epoch": 0.67,
"learning_rate": 7.327479044311346e-05,
"loss": 1.7076,
"step": 3485
},
{
"epoch": 0.67,
"learning_rate": 7.288458837101675e-05,
"loss": 1.4987,
"step": 3490
},
{
"epoch": 0.67,
"learning_rate": 7.249509432722056e-05,
"loss": 1.7264,
"step": 3495
},
{
"epoch": 0.68,
"learning_rate": 7.210631188782988e-05,
"loss": 1.5205,
"step": 3500
},
{
"epoch": 0.68,
"learning_rate": 7.171824462241607e-05,
"loss": 1.3453,
"step": 3505
},
{
"epoch": 0.68,
"learning_rate": 7.133089609398425e-05,
"loss": 1.6853,
"step": 3510
},
{
"epoch": 0.68,
"learning_rate": 7.094426985894059e-05,
"loss": 1.5102,
"step": 3515
},
{
"epoch": 0.68,
"learning_rate": 7.055836946705939e-05,
"loss": 1.773,
"step": 3520
},
{
"epoch": 0.68,
"learning_rate": 7.017319846145099e-05,
"loss": 1.6526,
"step": 3525
},
{
"epoch": 0.68,
"learning_rate": 6.978876037852862e-05,
"loss": 1.4008,
"step": 3530
},
{
"epoch": 0.68,
"learning_rate": 6.940505874797639e-05,
"loss": 1.64,
"step": 3535
},
{
"epoch": 0.68,
"learning_rate": 6.902209709271685e-05,
"loss": 1.6294,
"step": 3540
},
{
"epoch": 0.68,
"learning_rate": 6.863987892887826e-05,
"loss": 1.656,
"step": 3545
},
{
"epoch": 0.68,
"learning_rate": 6.825840776576282e-05,
"loss": 1.35,
"step": 3550
},
{
"epoch": 0.69,
"learning_rate": 6.787768710581414e-05,
"loss": 1.1362,
"step": 3555
},
{
"epoch": 0.69,
"learning_rate": 6.749772044458505e-05,
"loss": 1.6395,
"step": 3560
},
{
"epoch": 0.69,
"learning_rate": 6.711851127070579e-05,
"loss": 1.6092,
"step": 3565
},
{
"epoch": 0.69,
"learning_rate": 6.67400630658516e-05,
"loss": 1.4654,
"step": 3570
},
{
"epoch": 0.69,
"learning_rate": 6.636237930471108e-05,
"loss": 1.5386,
"step": 3575
},
{
"epoch": 0.69,
"learning_rate": 6.598546345495417e-05,
"loss": 1.5661,
"step": 3580
},
{
"epoch": 0.69,
"learning_rate": 6.56093189772002e-05,
"loss": 1.4736,
"step": 3585
},
{
"epoch": 0.69,
"learning_rate": 6.52339493249864e-05,
"loss": 1.3499,
"step": 3590
},
{
"epoch": 0.69,
"learning_rate": 6.485935794473571e-05,
"loss": 0.8297,
"step": 3595
},
{
"epoch": 0.69,
"learning_rate": 6.448554827572567e-05,
"loss": 1.4855,
"step": 3600
},
{
"epoch": 0.7,
"learning_rate": 6.411252375005663e-05,
"loss": 1.6239,
"step": 3605
},
{
"epoch": 0.7,
"learning_rate": 6.374028779262003e-05,
"loss": 1.4557,
"step": 3610
},
{
"epoch": 0.7,
"learning_rate": 6.336884382106723e-05,
"loss": 1.5754,
"step": 3615
},
{
"epoch": 0.7,
"learning_rate": 6.299819524577817e-05,
"loss": 1.3462,
"step": 3620
},
{
"epoch": 0.7,
"learning_rate": 6.262834546982969e-05,
"loss": 1.528,
"step": 3625
},
{
"epoch": 0.7,
"learning_rate": 6.225929788896473e-05,
"loss": 0.67,
"step": 3630
},
{
"epoch": 0.7,
"learning_rate": 6.189105589156076e-05,
"loss": 1.187,
"step": 3635
},
{
"epoch": 0.7,
"learning_rate": 6.152362285859906e-05,
"loss": 1.5347,
"step": 3640
},
{
"epoch": 0.7,
"learning_rate": 6.115700216363342e-05,
"loss": 1.5134,
"step": 3645
},
{
"epoch": 0.7,
"learning_rate": 6.079119717275905e-05,
"loss": 1.1493,
"step": 3650
},
{
"epoch": 0.71,
"learning_rate": 6.04262112445821e-05,
"loss": 0.9713,
"step": 3655
},
{
"epoch": 0.71,
"learning_rate": 6.006204773018831e-05,
"loss": 1.4658,
"step": 3660
},
{
"epoch": 0.71,
"learning_rate": 5.96987099731127e-05,
"loss": 1.8118,
"step": 3665
},
{
"epoch": 0.71,
"learning_rate": 5.933620130930867e-05,
"loss": 1.8907,
"step": 3670
},
{
"epoch": 0.71,
"learning_rate": 5.897452506711719e-05,
"loss": 1.2703,
"step": 3675
},
{
"epoch": 0.71,
"learning_rate": 5.86136845672366e-05,
"loss": 1.4875,
"step": 3680
},
{
"epoch": 0.71,
"learning_rate": 5.8253683122691944e-05,
"loss": 1.6493,
"step": 3685
},
{
"epoch": 0.71,
"learning_rate": 5.7894524038804373e-05,
"loss": 1.481,
"step": 3690
},
{
"epoch": 0.71,
"learning_rate": 5.75362106131612e-05,
"loss": 1.376,
"step": 3695
},
{
"epoch": 0.71,
"learning_rate": 5.7178746135585276e-05,
"loss": 1.4796,
"step": 3700
},
{
"epoch": 0.71,
"learning_rate": 5.682213388810497e-05,
"loss": 2.0087,
"step": 3705
},
{
"epoch": 0.72,
"learning_rate": 5.6466377144923985e-05,
"loss": 1.8715,
"step": 3710
},
{
"epoch": 0.72,
"learning_rate": 5.6111479172391136e-05,
"loss": 1.7617,
"step": 3715
},
{
"epoch": 0.72,
"learning_rate": 5.575744322897074e-05,
"loss": 1.5077,
"step": 3720
},
{
"epoch": 0.72,
"learning_rate": 5.540427256521226e-05,
"loss": 1.6198,
"step": 3725
},
{
"epoch": 0.72,
"learning_rate": 5.505197042372082e-05,
"loss": 1.6346,
"step": 3730
},
{
"epoch": 0.72,
"learning_rate": 5.470054003912727e-05,
"loss": 1.7283,
"step": 3735
},
{
"epoch": 0.72,
"learning_rate": 5.434998463805837e-05,
"loss": 0.9581,
"step": 3740
},
{
"epoch": 0.72,
"learning_rate": 5.4000307439107434e-05,
"loss": 1.6349,
"step": 3745
},
{
"epoch": 0.72,
"learning_rate": 5.3651511652804663e-05,
"loss": 1.8797,
"step": 3750
},
{
"epoch": 0.72,
"learning_rate": 5.330360048158741e-05,
"loss": 1.2728,
"step": 3755
},
{
"epoch": 0.73,
"learning_rate": 5.2956577119771405e-05,
"loss": 1.306,
"step": 3760
},
{
"epoch": 0.73,
"learning_rate": 5.261044475352064e-05,
"loss": 1.7036,
"step": 3765
},
{
"epoch": 0.73,
"learning_rate": 5.226520656081875e-05,
"loss": 1.3375,
"step": 3770
},
{
"epoch": 0.73,
"learning_rate": 5.192086571143956e-05,
"loss": 1.1267,
"step": 3775
},
{
"epoch": 0.73,
"learning_rate": 5.1577425366917915e-05,
"loss": 1.4579,
"step": 3780
},
{
"epoch": 0.73,
"learning_rate": 5.12348886805209e-05,
"loss": 0.9636,
"step": 3785
},
{
"epoch": 0.73,
"learning_rate": 5.089325879721856e-05,
"loss": 1.844,
"step": 3790
},
{
"epoch": 0.73,
"learning_rate": 5.05525388536554e-05,
"loss": 1.3017,
"step": 3795
},
{
"epoch": 0.73,
"learning_rate": 5.021273197812134e-05,
"loss": 1.299,
"step": 3800
},
{
"epoch": 0.73,
"learning_rate": 4.987384129052291e-05,
"loss": 1.3614,
"step": 3805
},
{
"epoch": 0.73,
"learning_rate": 4.953586990235492e-05,
"loss": 1.7793,
"step": 3810
},
{
"epoch": 0.74,
"learning_rate": 4.9198820916671634e-05,
"loss": 1.561,
"step": 3815
},
{
"epoch": 0.74,
"learning_rate": 4.886269742805836e-05,
"loss": 1.7795,
"step": 3820
},
{
"epoch": 0.74,
"learning_rate": 4.8527502522603104e-05,
"loss": 0.8945,
"step": 3825
},
{
"epoch": 0.74,
"learning_rate": 4.8193239277868004e-05,
"loss": 1.5663,
"step": 3830
},
{
"epoch": 0.74,
"learning_rate": 4.7859910762861406e-05,
"loss": 1.4586,
"step": 3835
},
{
"epoch": 0.74,
"learning_rate": 4.752752003800951e-05,
"loss": 1.4256,
"step": 3840
},
{
"epoch": 0.74,
"learning_rate": 4.719607015512809e-05,
"loss": 1.6858,
"step": 3845
},
{
"epoch": 0.74,
"learning_rate": 4.686556415739488e-05,
"loss": 1.5589,
"step": 3850
},
{
"epoch": 0.74,
"learning_rate": 4.653600507932137e-05,
"loss": 1.6629,
"step": 3855
},
{
"epoch": 0.74,
"learning_rate": 4.620739594672488e-05,
"loss": 1.7728,
"step": 3860
},
{
"epoch": 0.75,
"learning_rate": 4.587973977670103e-05,
"loss": 1.8571,
"step": 3865
},
{
"epoch": 0.75,
"learning_rate": 4.555303957759569e-05,
"loss": 0.9403,
"step": 3870
},
{
"epoch": 0.75,
"learning_rate": 4.522729834897795e-05,
"loss": 1.4019,
"step": 3875
},
{
"epoch": 0.75,
"learning_rate": 4.490251908161175e-05,
"loss": 1.4581,
"step": 3880
},
{
"epoch": 0.75,
"learning_rate": 4.4578704757429186e-05,
"loss": 1.4361,
"step": 3885
},
{
"epoch": 0.75,
"learning_rate": 4.4255858349502724e-05,
"loss": 1.587,
"step": 3890
},
{
"epoch": 0.75,
"learning_rate": 4.3933982822017876e-05,
"loss": 2.0539,
"step": 3895
},
{
"epoch": 0.75,
"learning_rate": 4.361308113024626e-05,
"loss": 1.2896,
"step": 3900
},
{
"epoch": 0.75,
"learning_rate": 4.32931562205183e-05,
"loss": 1.1724,
"step": 3905
},
{
"epoch": 0.75,
"learning_rate": 4.2974211030196e-05,
"loss": 1.6518,
"step": 3910
},
{
"epoch": 0.76,
"learning_rate": 4.265624848764639e-05,
"loss": 1.6734,
"step": 3915
},
{
"epoch": 0.76,
"learning_rate": 4.233927151221432e-05,
"loss": 1.8174,
"step": 3920
},
{
"epoch": 0.76,
"learning_rate": 4.2023283014195596e-05,
"loss": 1.1279,
"step": 3925
},
{
"epoch": 0.76,
"learning_rate": 4.170828589481071e-05,
"loss": 1.5869,
"step": 3930
},
{
"epoch": 0.76,
"learning_rate": 4.1394283046177605e-05,
"loss": 1.2266,
"step": 3935
},
{
"epoch": 0.76,
"learning_rate": 4.108127735128561e-05,
"loss": 1.4214,
"step": 3940
},
{
"epoch": 0.76,
"learning_rate": 4.076927168396865e-05,
"loss": 1.0697,
"step": 3945
},
{
"epoch": 0.76,
"learning_rate": 4.0458268908879056e-05,
"loss": 1.6432,
"step": 3950
},
{
"epoch": 0.76,
"learning_rate": 4.014827188146122e-05,
"loss": 1.6831,
"step": 3955
},
{
"epoch": 0.76,
"learning_rate": 3.983928344792523e-05,
"loss": 1.1097,
"step": 3960
},
{
"epoch": 0.76,
"learning_rate": 3.953130644522096e-05,
"loss": 1.6208,
"step": 3965
},
{
"epoch": 0.77,
"learning_rate": 3.922434370101198e-05,
"loss": 1.7125,
"step": 3970
},
{
"epoch": 0.77,
"learning_rate": 3.891839803364934e-05,
"loss": 1.145,
"step": 3975
},
{
"epoch": 0.77,
"learning_rate": 3.861347225214603e-05,
"loss": 1.1312,
"step": 3980
},
{
"epoch": 0.77,
"learning_rate": 3.830956915615106e-05,
"loss": 1.5828,
"step": 3985
},
{
"epoch": 0.77,
"learning_rate": 3.8006691535923673e-05,
"loss": 1.1374,
"step": 3990
},
{
"epoch": 0.77,
"learning_rate": 3.7704842172307894e-05,
"loss": 1.1668,
"step": 3995
},
{
"epoch": 0.77,
"learning_rate": 3.740402383670674e-05,
"loss": 1.6879,
"step": 4000
},
{
"epoch": 0.77,
"learning_rate": 3.7104239291057085e-05,
"loss": 1.3145,
"step": 4005
},
{
"epoch": 0.77,
"learning_rate": 3.6805491287804144e-05,
"loss": 1.4949,
"step": 4010
},
{
"epoch": 0.77,
"learning_rate": 3.650778256987607e-05,
"loss": 1.916,
"step": 4015
},
{
"epoch": 0.78,
"learning_rate": 3.62111158706591e-05,
"loss": 1.3152,
"step": 4020
},
{
"epoch": 0.78,
"learning_rate": 3.591549391397208e-05,
"loss": 1.6343,
"step": 4025
},
{
"epoch": 0.78,
"learning_rate": 3.562091941404179e-05,
"loss": 1.4102,
"step": 4030
},
{
"epoch": 0.78,
"learning_rate": 3.532739507547788e-05,
"loss": 1.6651,
"step": 4035
},
{
"epoch": 0.78,
"learning_rate": 3.5034923593247844e-05,
"loss": 1.7953,
"step": 4040
},
{
"epoch": 0.78,
"learning_rate": 3.474350765265279e-05,
"loss": 1.1191,
"step": 4045
},
{
"epoch": 0.78,
"learning_rate": 3.4453149929302145e-05,
"loss": 2.0854,
"step": 4050
},
{
"epoch": 0.78,
"learning_rate": 3.416385308908962e-05,
"loss": 1.0797,
"step": 4055
},
{
"epoch": 0.78,
"learning_rate": 3.387561978816847e-05,
"loss": 2.0707,
"step": 4060
},
{
"epoch": 0.78,
"learning_rate": 3.358845267292706e-05,
"loss": 1.244,
"step": 4065
},
{
"epoch": 0.79,
"learning_rate": 3.330235437996478e-05,
"loss": 1.3131,
"step": 4070
},
{
"epoch": 0.79,
"learning_rate": 3.301732753606776e-05,
"loss": 1.6666,
"step": 4075
},
{
"epoch": 0.79,
"learning_rate": 3.2733374758184513e-05,
"loss": 1.7402,
"step": 4080
},
{
"epoch": 0.79,
"learning_rate": 3.245049865340236e-05,
"loss": 1.4406,
"step": 4085
},
{
"epoch": 0.79,
"learning_rate": 3.216870181892303e-05,
"loss": 1.5123,
"step": 4090
},
{
"epoch": 0.79,
"learning_rate": 3.1887986842039165e-05,
"loss": 1.5698,
"step": 4095
},
{
"epoch": 0.79,
"learning_rate": 3.160835630011038e-05,
"loss": 1.3733,
"step": 4100
},
{
"epoch": 0.79,
"learning_rate": 3.132981276053966e-05,
"loss": 1.6571,
"step": 4105
},
{
"epoch": 0.79,
"learning_rate": 3.105235878074981e-05,
"loss": 1.7105,
"step": 4110
},
{
"epoch": 0.79,
"learning_rate": 3.077599690815981e-05,
"loss": 1.6224,
"step": 4115
},
{
"epoch": 0.79,
"learning_rate": 3.0500729680161663e-05,
"loss": 1.4937,
"step": 4120
},
{
"epoch": 0.8,
"learning_rate": 3.0226559624097034e-05,
"loss": 1.7792,
"step": 4125
},
{
"epoch": 0.8,
"learning_rate": 2.9953489257233844e-05,
"loss": 1.3071,
"step": 4130
},
{
"epoch": 0.8,
"learning_rate": 2.9681521086743422e-05,
"loss": 1.4754,
"step": 4135
},
{
"epoch": 0.8,
"learning_rate": 2.9410657609677434e-05,
"loss": 1.7724,
"step": 4140
},
{
"epoch": 0.8,
"learning_rate": 2.914090131294475e-05,
"loss": 1.4711,
"step": 4145
},
{
"epoch": 0.8,
"learning_rate": 2.8872254673288904e-05,
"loss": 1.4936,
"step": 4150
},
{
"epoch": 0.8,
"learning_rate": 2.860472015726506e-05,
"loss": 1.7267,
"step": 4155
},
{
"epoch": 0.8,
"learning_rate": 2.8338300221217646e-05,
"loss": 1.6548,
"step": 4160
},
{
"epoch": 0.8,
"learning_rate": 2.807299731125773e-05,
"loss": 1.3744,
"step": 4165
},
{
"epoch": 0.8,
"learning_rate": 2.780881386324027e-05,
"loss": 1.6085,
"step": 4170
},
{
"epoch": 0.81,
"learning_rate": 2.7545752302742245e-05,
"loss": 1.5895,
"step": 4175
},
{
"epoch": 0.81,
"learning_rate": 2.7283815045039885e-05,
"loss": 1.3282,
"step": 4180
},
{
"epoch": 0.81,
"learning_rate": 2.702300449508691e-05,
"loss": 1.6033,
"step": 4185
},
{
"epoch": 0.81,
"learning_rate": 2.6763323047492258e-05,
"loss": 1.6469,
"step": 4190
},
{
"epoch": 0.81,
"learning_rate": 2.650477308649797e-05,
"loss": 1.727,
"step": 4195
},
{
"epoch": 0.81,
"learning_rate": 2.624735698595758e-05,
"loss": 1.4403,
"step": 4200
},
{
"epoch": 0.81,
"learning_rate": 2.5991077109314206e-05,
"loss": 1.6997,
"step": 4205
},
{
"epoch": 0.81,
"learning_rate": 2.5735935809578656e-05,
"loss": 1.7687,
"step": 4210
},
{
"epoch": 0.81,
"learning_rate": 2.5481935429308176e-05,
"loss": 1.6448,
"step": 4215
},
{
"epoch": 0.81,
"learning_rate": 2.5229078300584693e-05,
"loss": 1.3343,
"step": 4220
},
{
"epoch": 0.82,
"learning_rate": 2.4977366744993432e-05,
"loss": 1.6547,
"step": 4225
},
{
"epoch": 0.82,
"learning_rate": 2.472680307360175e-05,
"loss": 1.7316,
"step": 4230
},
{
"epoch": 0.82,
"learning_rate": 2.447738958693764e-05,
"loss": 1.7601,
"step": 4235
},
{
"epoch": 0.82,
"learning_rate": 2.422912857496894e-05,
"loss": 1.7816,
"step": 4240
},
{
"epoch": 0.82,
"learning_rate": 2.3982022317082032e-05,
"loss": 1.8284,
"step": 4245
},
{
"epoch": 0.82,
"learning_rate": 2.3736073082061074e-05,
"loss": 1.5585,
"step": 4250
},
{
"epoch": 0.82,
"learning_rate": 2.3491283128067174e-05,
"loss": 1.5799,
"step": 4255
},
{
"epoch": 0.82,
"learning_rate": 2.3247654702617454e-05,
"loss": 1.3991,
"step": 4260
},
{
"epoch": 0.82,
"learning_rate": 2.3005190042564697e-05,
"loss": 1.2329,
"step": 4265
},
{
"epoch": 0.82,
"learning_rate": 2.276389137407669e-05,
"loss": 1.7101,
"step": 4270
},
{
"epoch": 0.82,
"learning_rate": 2.25237609126156e-05,
"loss": 1.6476,
"step": 4275
},
{
"epoch": 0.83,
"learning_rate": 2.2284800862918068e-05,
"loss": 1.5978,
"step": 4280
},
{
"epoch": 0.83,
"learning_rate": 2.2047013418974446e-05,
"loss": 2.0835,
"step": 4285
},
{
"epoch": 0.83,
"learning_rate": 2.1810400764009072e-05,
"loss": 1.1277,
"step": 4290
},
{
"epoch": 0.83,
"learning_rate": 2.1574965070460043e-05,
"loss": 1.24,
"step": 4295
},
{
"epoch": 0.83,
"learning_rate": 2.1340708499959197e-05,
"loss": 1.5577,
"step": 4300
},
{
"epoch": 0.83,
"learning_rate": 2.1107633203312452e-05,
"loss": 1.1969,
"step": 4305
},
{
"epoch": 0.83,
"learning_rate": 2.0875741320479967e-05,
"loss": 1.3219,
"step": 4310
},
{
"epoch": 0.83,
"learning_rate": 2.0645034980556408e-05,
"loss": 1.548,
"step": 4315
},
{
"epoch": 0.83,
"learning_rate": 2.04155163017516e-05,
"loss": 1.3474,
"step": 4320
},
{
"epoch": 0.83,
"learning_rate": 2.0187187391370824e-05,
"loss": 1.349,
"step": 4325
},
{
"epoch": 0.84,
"learning_rate": 1.996005034579573e-05,
"loss": 1.2657,
"step": 4330
},
{
"epoch": 0.84,
"learning_rate": 1.9734107250464926e-05,
"loss": 0.997,
"step": 4335
},
{
"epoch": 0.84,
"learning_rate": 1.950936017985485e-05,
"loss": 1.8957,
"step": 4340
},
{
"epoch": 0.84,
"learning_rate": 1.928581119746081e-05,
"loss": 1.589,
"step": 4345
},
{
"epoch": 0.84,
"learning_rate": 1.9063462355777847e-05,
"loss": 1.077,
"step": 4350
},
{
"epoch": 0.84,
"learning_rate": 1.8842315696282152e-05,
"loss": 1.7227,
"step": 4355
},
{
"epoch": 0.84,
"learning_rate": 1.8622373249412137e-05,
"loss": 2.0054,
"step": 4360
},
{
"epoch": 0.84,
"learning_rate": 1.8403637034549785e-05,
"loss": 1.3692,
"step": 4365
},
{
"epoch": 0.84,
"learning_rate": 1.8186109060002262e-05,
"loss": 1.7883,
"step": 4370
},
{
"epoch": 0.84,
"learning_rate": 1.796979132298335e-05,
"loss": 1.2446,
"step": 4375
},
{
"epoch": 0.84,
"learning_rate": 1.775468580959508e-05,
"loss": 1.3009,
"step": 4380
},
{
"epoch": 0.85,
"learning_rate": 1.754079449480969e-05,
"loss": 1.6341,
"step": 4385
},
{
"epoch": 0.85,
"learning_rate": 1.7328119342451165e-05,
"loss": 1.6593,
"step": 4390
},
{
"epoch": 0.85,
"learning_rate": 1.7116662305177654e-05,
"loss": 1.9338,
"step": 4395
},
{
"epoch": 0.85,
"learning_rate": 1.6906425324463058e-05,
"loss": 1.6533,
"step": 4400
},
{
"epoch": 0.85,
"learning_rate": 1.6697410330579586e-05,
"loss": 1.5444,
"step": 4405
},
{
"epoch": 0.85,
"learning_rate": 1.6489619242579856e-05,
"loss": 1.1762,
"step": 4410
},
{
"epoch": 0.85,
"learning_rate": 1.628305396827922e-05,
"loss": 1.4165,
"step": 4415
},
{
"epoch": 0.85,
"learning_rate": 1.607771640423842e-05,
"loss": 1.4529,
"step": 4420
},
{
"epoch": 0.85,
"learning_rate": 1.587360843574609e-05,
"loss": 1.3064,
"step": 4425
},
{
"epoch": 0.85,
"learning_rate": 1.567073193680134e-05,
"loss": 0.9361,
"step": 4430
},
{
"epoch": 0.86,
"learning_rate": 1.546908877009676e-05,
"loss": 1.3433,
"step": 4435
},
{
"epoch": 0.86,
"learning_rate": 1.526868078700119e-05,
"loss": 1.2926,
"step": 4440
},
{
"epoch": 0.86,
"learning_rate": 1.5069509827542648e-05,
"loss": 0.8124,
"step": 4445
},
{
"epoch": 0.86,
"learning_rate": 1.4871577720391708e-05,
"loss": 1.6786,
"step": 4450
},
{
"epoch": 0.86,
"learning_rate": 1.467488628284434e-05,
"loss": 1.9403,
"step": 4455
},
{
"epoch": 0.86,
"learning_rate": 1.4479437320805548e-05,
"loss": 1.5124,
"step": 4460
},
{
"epoch": 0.86,
"learning_rate": 1.4285232628772602e-05,
"loss": 1.4309,
"step": 4465
},
{
"epoch": 0.86,
"learning_rate": 1.4092273989818626e-05,
"loss": 1.5283,
"step": 4470
},
{
"epoch": 0.86,
"learning_rate": 1.3900563175576224e-05,
"loss": 1.753,
"step": 4475
},
{
"epoch": 0.86,
"learning_rate": 1.371010194622117e-05,
"loss": 1.3044,
"step": 4480
},
{
"epoch": 0.87,
"learning_rate": 1.3520892050456361e-05,
"loss": 1.6555,
"step": 4485
},
{
"epoch": 0.87,
"learning_rate": 1.3332935225495656e-05,
"loss": 1.189,
"step": 4490
},
{
"epoch": 0.87,
"learning_rate": 1.3146233197047895e-05,
"loss": 2.1216,
"step": 4495
},
{
"epoch": 0.87,
"learning_rate": 1.2960787679301171e-05,
"loss": 1.1811,
"step": 4500
},
{
"epoch": 0.87,
"learning_rate": 1.277660037490707e-05,
"loss": 1.8617,
"step": 4505
},
{
"epoch": 0.87,
"learning_rate": 1.2593672974964936e-05,
"loss": 1.828,
"step": 4510
},
{
"epoch": 0.87,
"learning_rate": 1.2412007159006465e-05,
"loss": 1.8121,
"step": 4515
},
{
"epoch": 0.87,
"learning_rate": 1.2231604594980155e-05,
"loss": 1.3953,
"step": 4520
},
{
"epoch": 0.87,
"learning_rate": 1.205246693923616e-05,
"loss": 1.4313,
"step": 4525
},
{
"epoch": 0.87,
"learning_rate": 1.1874595836510964e-05,
"loss": 1.1439,
"step": 4530
},
{
"epoch": 0.87,
"learning_rate": 1.1697992919912247e-05,
"loss": 1.5366,
"step": 4535
},
{
"epoch": 0.88,
"learning_rate": 1.152265981090404e-05,
"loss": 1.6374,
"step": 4540
},
{
"epoch": 0.88,
"learning_rate": 1.1348598119291646e-05,
"loss": 1.5402,
"step": 4545
},
{
"epoch": 0.88,
"learning_rate": 1.117580944320708e-05,
"loss": 1.7178,
"step": 4550
},
{
"epoch": 0.88,
"learning_rate": 1.100429536909423e-05,
"loss": 1.5968,
"step": 4555
},
{
"epoch": 0.88,
"learning_rate": 1.0834057471694269e-05,
"loss": 1.0687,
"step": 4560
},
{
"epoch": 0.88,
"learning_rate": 1.0665097314031357e-05,
"loss": 1.731,
"step": 4565
},
{
"epoch": 0.88,
"learning_rate": 1.0497416447398187e-05,
"loss": 1.4297,
"step": 4570
},
{
"epoch": 0.88,
"learning_rate": 1.0331016411341703e-05,
"loss": 1.8428,
"step": 4575
},
{
"epoch": 0.88,
"learning_rate": 1.0165898733649113e-05,
"loss": 1.9671,
"step": 4580
},
{
"epoch": 0.88,
"learning_rate": 1.0002064930333637e-05,
"loss": 1.6723,
"step": 4585
},
{
"epoch": 0.89,
"learning_rate": 9.839516505620815e-06,
"loss": 0.9007,
"step": 4590
},
{
"epoch": 0.89,
"learning_rate": 9.678254951934584e-06,
"loss": 1.5605,
"step": 4595
},
{
"epoch": 0.89,
"learning_rate": 9.518281749883528e-06,
"loss": 1.8201,
"step": 4600
},
{
"epoch": 0.89,
"learning_rate": 9.359598368247429e-06,
"loss": 1.4871,
"step": 4605
},
{
"epoch": 0.89,
"learning_rate": 9.202206263963625e-06,
"loss": 1.2976,
"step": 4610
},
{
"epoch": 0.89,
"learning_rate": 9.046106882113751e-06,
"loss": 1.5821,
"step": 4615
},
{
"epoch": 0.89,
"learning_rate": 8.89130165591041e-06,
"loss": 0.9298,
"step": 4620
},
{
"epoch": 0.89,
"learning_rate": 8.737792006684052e-06,
"loss": 1.3903,
"step": 4625
},
{
"epoch": 0.89,
"learning_rate": 8.585579343869881e-06,
"loss": 1.7596,
"step": 4630
},
{
"epoch": 0.89,
"learning_rate": 8.434665064994905e-06,
"loss": 1.2298,
"step": 4635
},
{
"epoch": 0.9,
"learning_rate": 8.28505055566519e-06,
"loss": 1.5793,
"step": 4640
},
{
"epoch": 0.9,
"learning_rate": 8.136737189553077e-06,
"loss": 1.0533,
"step": 4645
},
{
"epoch": 0.9,
"learning_rate": 7.989726328384516e-06,
"loss": 1.7101,
"step": 4650
},
{
"epoch": 0.9,
"learning_rate": 7.844019321926686e-06,
"loss": 1.5813,
"step": 4655
},
{
"epoch": 0.9,
"learning_rate": 7.699617507975563e-06,
"loss": 0.8989,
"step": 4660
},
{
"epoch": 0.9,
"learning_rate": 7.5565222123435075e-06,
"loss": 1.0196,
"step": 4665
},
{
"epoch": 0.9,
"learning_rate": 7.414734748847306e-06,
"loss": 1.1217,
"step": 4670
},
{
"epoch": 0.9,
"learning_rate": 7.274256419295954e-06,
"loss": 1.6232,
"step": 4675
},
{
"epoch": 0.9,
"learning_rate": 7.135088513478693e-06,
"loss": 1.6138,
"step": 4680
},
{
"epoch": 0.9,
"learning_rate": 6.997232309153389e-06,
"loss": 1.4079,
"step": 4685
},
{
"epoch": 0.9,
"learning_rate": 6.8606890720344585e-06,
"loss": 1.36,
"step": 4690
},
{
"epoch": 0.91,
"learning_rate": 6.725460055781545e-06,
"loss": 1.5277,
"step": 4695
},
{
"epoch": 0.91,
"learning_rate": 6.59154650198781e-06,
"loss": 1.4765,
"step": 4700
},
{
"epoch": 0.91,
"learning_rate": 6.458949640168675e-06,
"loss": 2.1942,
"step": 4705
},
{
"epoch": 0.91,
"learning_rate": 6.327670687750436e-06,
"loss": 1.602,
"step": 4710
},
{
"epoch": 0.91,
"learning_rate": 6.197710850059129e-06,
"loss": 1.4992,
"step": 4715
},
{
"epoch": 0.91,
"learning_rate": 6.0690713203094484e-06,
"loss": 1.692,
"step": 4720
},
{
"epoch": 0.91,
"learning_rate": 5.941753279593852e-06,
"loss": 1.3642,
"step": 4725
},
{
"epoch": 0.91,
"learning_rate": 5.81575789687157e-06,
"loss": 1.6257,
"step": 4730
},
{
"epoch": 0.91,
"learning_rate": 5.691086328958028e-06,
"loss": 1.1301,
"step": 4735
},
{
"epoch": 0.91,
"learning_rate": 5.5677397205141415e-06,
"loss": 1.5586,
"step": 4740
},
{
"epoch": 0.92,
"learning_rate": 5.445719204035875e-06,
"loss": 1.4131,
"step": 4745
},
{
"epoch": 0.92,
"learning_rate": 5.325025899843732e-06,
"loss": 1.2939,
"step": 4750
},
{
"epoch": 0.92,
"learning_rate": 5.205660916072546e-06,
"loss": 1.2161,
"step": 4755
},
{
"epoch": 0.92,
"learning_rate": 5.087625348661339e-06,
"loss": 1.6379,
"step": 4760
},
{
"epoch": 0.92,
"learning_rate": 4.9709202813431994e-06,
"loss": 1.2947,
"step": 4765
},
{
"epoch": 0.92,
"learning_rate": 4.855546785635283e-06,
"loss": 1.5706,
"step": 4770
},
{
"epoch": 0.92,
"learning_rate": 4.741505920829131e-06,
"loss": 1.5105,
"step": 4775
},
{
"epoch": 0.92,
"learning_rate": 4.628798733980782e-06,
"loss": 1.4684,
"step": 4780
},
{
"epoch": 0.92,
"learning_rate": 4.517426259901291e-06,
"loss": 1.6168,
"step": 4785
},
{
"epoch": 0.92,
"learning_rate": 4.407389521147114e-06,
"loss": 1.5196,
"step": 4790
},
{
"epoch": 0.92,
"learning_rate": 4.298689528010785e-06,
"loss": 1.3683,
"step": 4795
},
{
"epoch": 0.93,
"learning_rate": 4.191327278511653e-06,
"loss": 0.9307,
"step": 4800
},
{
"epoch": 0.93,
"learning_rate": 4.08530375838666e-06,
"loss": 1.8983,
"step": 4805
},
{
"epoch": 0.93,
"learning_rate": 3.980619941081348e-06,
"loss": 1.7198,
"step": 4810
},
{
"epoch": 0.93,
"learning_rate": 3.877276787740879e-06,
"loss": 0.8416,
"step": 4815
},
{
"epoch": 0.93,
"learning_rate": 3.7752752472012315e-06,
"loss": 0.979,
"step": 4820
},
{
"epoch": 0.93,
"learning_rate": 3.674616255980467e-06,
"loss": 1.647,
"step": 4825
},
{
"epoch": 0.93,
"learning_rate": 3.5753007382702258e-06,
"loss": 1.5859,
"step": 4830
},
{
"epoch": 0.93,
"learning_rate": 3.477329605927048e-06,
"loss": 1.505,
"step": 4835
},
{
"epoch": 0.93,
"learning_rate": 3.3807037584642316e-06,
"loss": 1.6099,
"step": 4840
},
{
"epoch": 0.93,
"learning_rate": 3.28542408304337e-06,
"loss": 1.8445,
"step": 4845
},
{
"epoch": 0.94,
"learning_rate": 3.1914914544663437e-06,
"loss": 0.9932,
"step": 4850
},
{
"epoch": 0.94,
"learning_rate": 3.098906735167278e-06,
"loss": 1.5865,
"step": 4855
},
{
"epoch": 0.94,
"learning_rate": 3.0076707752044958e-06,
"loss": 1.4505,
"step": 4860
},
{
"epoch": 0.94,
"learning_rate": 2.9177844122528936e-06,
"loss": 1.7117,
"step": 4865
},
{
"epoch": 0.94,
"learning_rate": 2.8292484715961127e-06,
"loss": 1.1955,
"step": 4870
},
{
"epoch": 0.94,
"learning_rate": 2.7420637661190124e-06,
"loss": 1.1857,
"step": 4875
},
{
"epoch": 0.94,
"learning_rate": 2.656231096300243e-06,
"loss": 1.4263,
"step": 4880
},
{
"epoch": 0.94,
"learning_rate": 2.5717512502048342e-06,
"loss": 1.2868,
"step": 4885
},
{
"epoch": 0.94,
"learning_rate": 2.488625003476952e-06,
"loss": 1.0146,
"step": 4890
},
{
"epoch": 0.94,
"learning_rate": 2.4068531193329022e-06,
"loss": 1.3149,
"step": 4895
},
{
"epoch": 0.95,
"learning_rate": 2.3264363485539396e-06,
"loss": 1.6428,
"step": 4900
},
{
"epoch": 0.95,
"learning_rate": 2.2473754294795032e-06,
"loss": 1.2771,
"step": 4905
},
{
"epoch": 0.95,
"learning_rate": 2.169671088000391e-06,
"loss": 1.087,
"step": 4910
},
{
"epoch": 0.95,
"learning_rate": 2.0933240375521297e-06,
"loss": 1.8595,
"step": 4915
},
{
"epoch": 0.95,
"learning_rate": 2.0183349791083827e-06,
"loss": 1.2098,
"step": 4920
},
{
"epoch": 0.95,
"learning_rate": 1.944704601174485e-06,
"loss": 1.5938,
"step": 4925
},
{
"epoch": 0.95,
"learning_rate": 1.8724335797812685e-06,
"loss": 1.446,
"step": 4930
},
{
"epoch": 0.95,
"learning_rate": 1.801522578478648e-06,
"loss": 1.4064,
"step": 4935
},
{
"epoch": 0.95,
"learning_rate": 1.7319722483297106e-06,
"loss": 1.1977,
"step": 4940
},
{
"epoch": 0.95,
"learning_rate": 1.663783227904636e-06,
"loss": 1.4832,
"step": 4945
},
{
"epoch": 0.95,
"learning_rate": 1.596956143274819e-06,
"loss": 1.4754,
"step": 4950
},
{
"epoch": 0.96,
"learning_rate": 1.5314916080072404e-06,
"loss": 1.8633,
"step": 4955
},
{
"epoch": 0.96,
"learning_rate": 1.4673902231586875e-06,
"loss": 1.3627,
"step": 4960
},
{
"epoch": 0.96,
"learning_rate": 1.4046525772703432e-06,
"loss": 1.028,
"step": 4965
},
{
"epoch": 0.96,
"learning_rate": 1.3432792463623387e-06,
"loss": 1.4373,
"step": 4970
},
{
"epoch": 0.96,
"learning_rate": 1.2832707939284427e-06,
"loss": 1.643,
"step": 4975
},
{
"epoch": 0.96,
"learning_rate": 1.2246277709309305e-06,
"loss": 1.1771,
"step": 4980
},
{
"epoch": 0.96,
"learning_rate": 1.167350715795523e-06,
"loss": 1.3419,
"step": 4985
},
{
"epoch": 0.96,
"learning_rate": 1.11144015440639e-06,
"loss": 1.5908,
"step": 4990
},
{
"epoch": 0.96,
"learning_rate": 1.0568966001014033e-06,
"loss": 1.6198,
"step": 4995
},
{
"epoch": 0.96,
"learning_rate": 1.0037205536673588e-06,
"loss": 1.6799,
"step": 5000
},
{
"epoch": 0.97,
"learning_rate": 9.51912503335378e-07,
"loss": 1.7136,
"step": 5005
},
{
"epoch": 0.97,
"learning_rate": 9.01472924776514e-07,
"loss": 1.3827,
"step": 5010
},
{
"epoch": 0.97,
"learning_rate": 8.524022810972364e-07,
"loss": 1.3498,
"step": 5015
},
{
"epoch": 0.97,
"learning_rate": 8.04701022835319e-07,
"loss": 1.1361,
"step": 5020
},
{
"epoch": 0.97,
"learning_rate": 7.583695879556262e-07,
"loss": 1.4652,
"step": 5025
},
{
"epoch": 0.97,
"learning_rate": 7.134084018461161e-07,
"loss": 1.9276,
"step": 5030
},
{
"epoch": 0.97,
"learning_rate": 6.698178773139107e-07,
"loss": 1.3343,
"step": 5035
},
{
"epoch": 0.97,
"learning_rate": 6.275984145815483e-07,
"loss": 1.414,
"step": 5040
},
{
"epoch": 0.97,
"learning_rate": 5.867504012832703e-07,
"loss": 1.7457,
"step": 5045
},
{
"epoch": 0.97,
"learning_rate": 5.472742124614904e-07,
"loss": 1.3583,
"step": 5050
},
{
"epoch": 0.98,
"learning_rate": 5.091702105633145e-07,
"loss": 1.5982,
"step": 5055
},
{
"epoch": 0.98,
"learning_rate": 4.724387454372591e-07,
"loss": 1.4183,
"step": 5060
},
{
"epoch": 0.98,
"learning_rate": 4.370801543300051e-07,
"loss": 1.6305,
"step": 5065
},
{
"epoch": 0.98,
"learning_rate": 4.030947618833158e-07,
"loss": 1.4188,
"step": 5070
},
{
"epoch": 0.98,
"learning_rate": 3.7048288013105663e-07,
"loss": 1.4879,
"step": 5075
},
{
"epoch": 0.98,
"learning_rate": 3.3924480849633064e-07,
"loss": 0.6158,
"step": 5080
},
{
"epoch": 0.98,
"learning_rate": 3.093808337886805e-07,
"loss": 1.4235,
"step": 5085
},
{
"epoch": 0.98,
"learning_rate": 2.8089123020157424e-07,
"loss": 0.8873,
"step": 5090
},
{
"epoch": 0.98,
"learning_rate": 2.5377625930977363e-07,
"loss": 1.2172,
"step": 5095
},
{
"epoch": 0.98,
"learning_rate": 2.2803617006695308e-07,
"loss": 1.4942,
"step": 5100
},
{
"epoch": 0.98,
"learning_rate": 2.0367119880346782e-07,
"loss": 1.514,
"step": 5105
},
{
"epoch": 0.99,
"learning_rate": 1.8068156922413924e-07,
"loss": 1.7758,
"step": 5110
},
{
"epoch": 0.99,
"learning_rate": 1.590674924062063e-07,
"loss": 1.7397,
"step": 5115
},
{
"epoch": 0.99,
"learning_rate": 1.3882916679739398e-07,
"loss": 1.3743,
"step": 5120
},
{
"epoch": 0.99,
"learning_rate": 1.1996677821409784e-07,
"loss": 1.4851,
"step": 5125
},
{
"epoch": 0.99,
"learning_rate": 1.0248049983965223e-07,
"loss": 1.5487,
"step": 5130
},
{
"epoch": 0.99,
"learning_rate": 8.637049222276482e-08,
"loss": 0.9175,
"step": 5135
},
{
"epoch": 0.99,
"learning_rate": 7.16369032760511e-08,
"loss": 1.3961,
"step": 5140
},
{
"epoch": 0.99,
"learning_rate": 5.827986827463549e-08,
"loss": 1.3167,
"step": 5145
},
{
"epoch": 0.99,
"learning_rate": 4.6299509854968994e-08,
"loss": 1.7824,
"step": 5150
},
{
"epoch": 0.99,
"learning_rate": 3.5695938013630134e-08,
"loss": 1.6007,
"step": 5155
},
{
"epoch": 1.0,
"learning_rate": 2.6469250106392338e-08,
"loss": 1.2018,
"step": 5160
},
{
"epoch": 1.0,
"learning_rate": 1.8619530847241436e-08,
"loss": 1.6535,
"step": 5165
},
{
"epoch": 1.0,
"learning_rate": 1.2146852307692856e-08,
"loss": 1.4676,
"step": 5170
},
{
"epoch": 1.0,
"learning_rate": 7.0512739160755176e-09,
"loss": 1.3325,
"step": 5175
},
{
"epoch": 1.0,
"learning_rate": 3.3328424569822876e-09,
"loss": 1.1266,
"step": 5180
},
{
"epoch": 1.0,
"step": 5184,
"total_flos": 1.830374044626125e+16,
"train_loss": 1.552536604029161,
"train_runtime": 23883.8454,
"train_samples_per_second": 0.217,
"train_steps_per_second": 0.217
}
],
"logging_steps": 5,
"max_steps": 5184,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3000,
"total_flos": 1.830374044626125e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}