bob-dylan / trainer_state.json
AlekseyKorshuk's picture
huggingartists
0a935f8
{
"best_metric": 1.0779144763946533,
"best_model_checkpoint": "output/bob-dylan/checkpoint-3542",
"epoch": 11.0,
"global_step": 3542,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 0.00013711788223044424,
"loss": 3.6967,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 0.0001368717255202631,
"loss": 3.7692,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 0.00013646211919423798,
"loss": 3.4916,
"step": 15
},
{
"epoch": 0.06,
"learning_rate": 0.00013589004389254062,
"loss": 3.6186,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 0.00013515686922297834,
"loss": 3.5756,
"step": 25
},
{
"epoch": 0.09,
"learning_rate": 0.00013426435048201062,
"loss": 3.236,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 0.0001332146244523866,
"loss": 3.3956,
"step": 35
},
{
"epoch": 0.12,
"learning_rate": 0.00013201020428746477,
"loss": 3.3277,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 0.0001306539734944624,
"loss": 3.2617,
"step": 45
},
{
"epoch": 0.16,
"learning_rate": 0.00012914917903103908,
"loss": 3.554,
"step": 50
},
{
"epoch": 0.17,
"learning_rate": 0.00012749942353174222,
"loss": 3.2625,
"step": 55
},
{
"epoch": 0.19,
"learning_rate": 0.00012570865668292503,
"loss": 3.2986,
"step": 60
},
{
"epoch": 0.2,
"learning_rate": 0.0001237811657667863,
"loss": 3.264,
"step": 65
},
{
"epoch": 0.22,
"learning_rate": 0.00012172156539717071,
"loss": 3.0437,
"step": 70
},
{
"epoch": 0.23,
"learning_rate": 0.00011953478647170303,
"loss": 3.0527,
"step": 75
},
{
"epoch": 0.25,
"learning_rate": 0.000117226064366706,
"loss": 3.3623,
"step": 80
},
{
"epoch": 0.26,
"learning_rate": 0.0001148009264031647,
"loss": 3.0801,
"step": 85
},
{
"epoch": 0.28,
"learning_rate": 0.0001122651786137447,
"loss": 3.0471,
"step": 90
},
{
"epoch": 0.3,
"learning_rate": 0.00010962489184254581,
"loss": 3.051,
"step": 95
},
{
"epoch": 0.31,
"learning_rate": 0.00010688638721086951,
"loss": 3.2191,
"step": 100
},
{
"epoch": 0.33,
"learning_rate": 0.0001040562209837965,
"loss": 2.9343,
"step": 105
},
{
"epoch": 0.34,
"learning_rate": 0.00010114116887380613,
"loss": 2.9893,
"step": 110
},
{
"epoch": 0.36,
"learning_rate": 9.814820981901533e-05,
"loss": 3.1068,
"step": 115
},
{
"epoch": 0.37,
"learning_rate": 9.508450927487455e-05,
"loss": 3.1636,
"step": 120
},
{
"epoch": 0.39,
"learning_rate": 9.195740205932179e-05,
"loss": 3.1864,
"step": 125
},
{
"epoch": 0.4,
"learning_rate": 8.877437479246497e-05,
"loss": 2.9116,
"step": 130
},
{
"epoch": 0.42,
"learning_rate": 8.554304797283438e-05,
"loss": 3.1795,
"step": 135
},
{
"epoch": 0.44,
"learning_rate": 8.227115773311617e-05,
"loss": 2.9074,
"step": 140
},
{
"epoch": 0.45,
"learning_rate": 7.896653731904552e-05,
"loss": 3.1763,
"step": 145
},
{
"epoch": 0.47,
"learning_rate": 7.56370983358012e-05,
"loss": 3.0588,
"step": 150
},
{
"epoch": 0.48,
"learning_rate": 7.229081180679942e-05,
"loss": 2.9373,
"step": 155
},
{
"epoch": 0.5,
"learning_rate": 6.893568909023427e-05,
"loss": 3.092,
"step": 160
},
{
"epoch": 0.51,
"learning_rate": 6.557976269905237e-05,
"loss": 3.0733,
"step": 165
},
{
"epoch": 0.53,
"learning_rate": 6.223106707028106e-05,
"loss": 2.9187,
"step": 170
},
{
"epoch": 0.55,
"learning_rate": 5.889761932974993e-05,
"loss": 2.9659,
"step": 175
},
{
"epoch": 0.56,
"learning_rate": 5.5587400098257335e-05,
"loss": 3.1799,
"step": 180
},
{
"epoch": 0.58,
"learning_rate": 5.230833438513365e-05,
"loss": 2.9959,
"step": 185
},
{
"epoch": 0.59,
"learning_rate": 4.9068272614944106e-05,
"loss": 2.8529,
"step": 190
},
{
"epoch": 0.61,
"learning_rate": 4.58749718327555e-05,
"loss": 3.0337,
"step": 195
},
{
"epoch": 0.62,
"learning_rate": 4.2736077132963006e-05,
"loss": 3.0827,
"step": 200
},
{
"epoch": 0.64,
"learning_rate": 3.9659103356138536e-05,
"loss": 2.8453,
"step": 205
},
{
"epoch": 0.65,
"learning_rate": 3.6651417097720435e-05,
"loss": 2.9619,
"step": 210
},
{
"epoch": 0.67,
"learning_rate": 3.372021907161731e-05,
"loss": 3.0061,
"step": 215
},
{
"epoch": 0.69,
"learning_rate": 3.0872526870949537e-05,
"loss": 2.8687,
"step": 220
},
{
"epoch": 0.7,
"learning_rate": 2.8115158167201102e-05,
"loss": 2.9913,
"step": 225
},
{
"epoch": 0.72,
"learning_rate": 2.5454714388004492e-05,
"loss": 2.9778,
"step": 230
},
{
"epoch": 0.73,
"learning_rate": 2.289756491263597e-05,
"loss": 3.0748,
"step": 235
},
{
"epoch": 0.75,
"learning_rate": 2.0449831823058788e-05,
"loss": 2.7658,
"step": 240
},
{
"epoch": 0.76,
"learning_rate": 1.8117375247021725e-05,
"loss": 2.9792,
"step": 245
},
{
"epoch": 0.78,
"learning_rate": 1.5905779328303487e-05,
"loss": 2.7718,
"step": 250
},
{
"epoch": 0.79,
"learning_rate": 1.3820338857691364e-05,
"loss": 2.9954,
"step": 255
},
{
"epoch": 0.81,
"learning_rate": 1.1866046596701035e-05,
"loss": 2.9246,
"step": 260
},
{
"epoch": 0.83,
"learning_rate": 1.0047581324385938e-05,
"loss": 2.9875,
"step": 265
},
{
"epoch": 0.84,
"learning_rate": 8.36929663585326e-06,
"loss": 2.673,
"step": 270
},
{
"epoch": 0.86,
"learning_rate": 6.835210519304257e-06,
"loss": 2.8515,
"step": 275
},
{
"epoch": 0.87,
"learning_rate": 5.448995736552248e-06,
"loss": 2.9416,
"step": 280
},
{
"epoch": 0.89,
"learning_rate": 4.213971030048682e-06,
"loss": 2.8573,
"step": 285
},
{
"epoch": 0.9,
"learning_rate": 3.133093177468323e-06,
"loss": 2.6045,
"step": 290
},
{
"epoch": 0.92,
"learning_rate": 2.208949912875789e-06,
"loss": 3.0414,
"step": 295
},
{
"epoch": 0.93,
"learning_rate": 1.4437537314208725e-06,
"loss": 2.7666,
"step": 300
},
{
"epoch": 0.95,
"learning_rate": 8.39336592394954e-07,
"loss": 2.7331,
"step": 305
},
{
"epoch": 0.97,
"learning_rate": 3.971455333297437e-07,
"loss": 2.847,
"step": 310
},
{
"epoch": 0.98,
"learning_rate": 1.1823920563887646e-07,
"loss": 2.7128,
"step": 315
},
{
"epoch": 1.0,
"learning_rate": 3.2853400962779e-09,
"loss": 2.6074,
"step": 320
},
{
"epoch": 1.0,
"eval_loss": 2.670438766479492,
"eval_runtime": 18.6137,
"eval_samples_per_second": 22.833,
"eval_steps_per_second": 2.901,
"step": 321
},
{
"epoch": 1.02,
"learning_rate": 1.6396932901668288e-07,
"loss": 2.7644,
"step": 325
},
{
"epoch": 1.04,
"learning_rate": 4.814968134727319e-07,
"loss": 2.6609,
"step": 330
},
{
"epoch": 1.05,
"learning_rate": 9.651980029285235e-07,
"loss": 2.681,
"step": 335
},
{
"epoch": 1.07,
"learning_rate": 1.6138929181565879e-06,
"loss": 2.6512,
"step": 340
},
{
"epoch": 1.08,
"learning_rate": 2.4259990810865978e-06,
"loss": 2.6379,
"step": 345
},
{
"epoch": 1.1,
"learning_rate": 3.3995353752283744e-06,
"loss": 2.9539,
"step": 350
},
{
"epoch": 1.12,
"learning_rate": 4.532126878565439e-06,
"loss": 2.7261,
"step": 355
},
{
"epoch": 1.13,
"learning_rate": 5.821010657128926e-06,
"loss": 2.8164,
"step": 360
},
{
"epoch": 1.15,
"learning_rate": 7.26304250511898e-06,
"loss": 2.8454,
"step": 365
},
{
"epoch": 1.16,
"learning_rate": 8.854704615130857e-06,
"loss": 2.7863,
"step": 370
},
{
"epoch": 1.18,
"learning_rate": 1.0592114159774732e-05,
"loss": 2.8572,
"step": 375
},
{
"epoch": 1.19,
"learning_rate": 1.2471032763754147e-05,
"loss": 2.8415,
"step": 380
},
{
"epoch": 1.21,
"learning_rate": 1.4486876843296586e-05,
"loss": 2.6313,
"step": 385
},
{
"epoch": 1.23,
"learning_rate": 1.663472878771285e-05,
"loss": 2.7228,
"step": 390
},
{
"epoch": 1.24,
"learning_rate": 1.8909348955808624e-05,
"loss": 2.5958,
"step": 395
},
{
"epoch": 1.26,
"learning_rate": 2.1305188457882567e-05,
"loss": 2.7318,
"step": 400
},
{
"epoch": 1.27,
"learning_rate": 2.3816402692130242e-05,
"loss": 2.7112,
"step": 405
},
{
"epoch": 1.29,
"learning_rate": 2.6436865602431172e-05,
"loss": 2.7692,
"step": 410
},
{
"epoch": 1.31,
"learning_rate": 2.9160184622738906e-05,
"loss": 2.7993,
"step": 415
},
{
"epoch": 1.32,
"learning_rate": 3.197971627161534e-05,
"loss": 2.8784,
"step": 420
},
{
"epoch": 1.34,
"learning_rate": 3.4888582358869375e-05,
"loss": 2.7869,
"step": 425
},
{
"epoch": 1.35,
"learning_rate": 3.7879686764761624e-05,
"loss": 2.8056,
"step": 430
},
{
"epoch": 1.37,
"learning_rate": 4.09457327508451e-05,
"loss": 2.553,
"step": 435
},
{
"epoch": 1.38,
"learning_rate": 4.407924076020965e-05,
"loss": 3.1824,
"step": 440
},
{
"epoch": 1.4,
"learning_rate": 4.7272566663709714e-05,
"loss": 2.8178,
"step": 445
},
{
"epoch": 1.42,
"learning_rate": 5.0517920407661915e-05,
"loss": 2.6659,
"step": 450
},
{
"epoch": 1.43,
"learning_rate": 5.3807385017522816e-05,
"loss": 2.5079,
"step": 455
},
{
"epoch": 1.45,
"learning_rate": 5.7132935911187364e-05,
"loss": 3.0069,
"step": 460
},
{
"epoch": 1.46,
"learning_rate": 6.048646047479427e-05,
"loss": 2.7084,
"step": 465
},
{
"epoch": 1.48,
"learning_rate": 6.385977785328252e-05,
"loss": 2.738,
"step": 470
},
{
"epoch": 1.49,
"learning_rate": 6.724465890742127e-05,
"loss": 2.7037,
"step": 475
},
{
"epoch": 1.51,
"learning_rate": 7.063284628862763e-05,
"loss": 2.7949,
"step": 480
},
{
"epoch": 1.53,
"learning_rate": 7.401607458260099e-05,
"loss": 2.8525,
"step": 485
},
{
"epoch": 1.54,
"learning_rate": 7.738609047263364e-05,
"loss": 2.7132,
"step": 490
},
{
"epoch": 1.56,
"learning_rate": 8.07346728734085e-05,
"loss": 2.7785,
"step": 495
},
{
"epoch": 1.57,
"learning_rate": 8.405365298617048e-05,
"loss": 2.7346,
"step": 500
},
{
"epoch": 1.59,
"learning_rate": 8.733493422634478e-05,
"loss": 2.8556,
"step": 505
},
{
"epoch": 1.6,
"learning_rate": 9.057051197499066e-05,
"loss": 2.7322,
"step": 510
},
{
"epoch": 1.62,
"learning_rate": 9.375249310590655e-05,
"loss": 2.6462,
"step": 515
},
{
"epoch": 1.64,
"learning_rate": 9.687311524075135e-05,
"loss": 2.7953,
"step": 520
},
{
"epoch": 1.65,
"learning_rate": 9.992476568520869e-05,
"loss": 2.7156,
"step": 525
},
{
"epoch": 1.67,
"learning_rate": 0.00010290000000000001,
"loss": 2.8443,
"step": 530
},
{
"epoch": 1.68,
"learning_rate": 0.00010579156016144291,
"loss": 2.73,
"step": 535
},
{
"epoch": 1.7,
"learning_rate": 0.00010859239226725277,
"loss": 2.6283,
"step": 540
},
{
"epoch": 1.71,
"learning_rate": 0.00011129566374439388,
"loss": 2.7128,
"step": 545
},
{
"epoch": 1.73,
"learning_rate": 0.00011389478001700271,
"loss": 2.848,
"step": 550
},
{
"epoch": 1.75,
"learning_rate": 0.00011638340059372158,
"loss": 2.6623,
"step": 555
},
{
"epoch": 1.76,
"learning_rate": 0.00011875545453519892,
"loss": 2.5335,
"step": 560
},
{
"epoch": 1.78,
"learning_rate": 0.00012100515526402216,
"loss": 2.7063,
"step": 565
},
{
"epoch": 1.79,
"learning_rate": 0.00012312701468095605,
"loss": 2.5592,
"step": 570
},
{
"epoch": 1.81,
"learning_rate": 0.00012511585655304897,
"loss": 2.658,
"step": 575
},
{
"epoch": 1.82,
"learning_rate": 0.00012696682914094848,
"loss": 2.7849,
"step": 580
},
{
"epoch": 1.84,
"learning_rate": 0.00012867541703462067,
"loss": 2.4521,
"step": 585
},
{
"epoch": 1.86,
"learning_rate": 0.0001302374521686013,
"loss": 2.7116,
"step": 590
},
{
"epoch": 1.87,
"learning_rate": 0.00013164912398990668,
"loss": 2.5754,
"step": 595
},
{
"epoch": 1.89,
"learning_rate": 0.0001329069887538007,
"loss": 2.6935,
"step": 600
},
{
"epoch": 1.9,
"learning_rate": 0.00013400797792473965,
"loss": 2.4856,
"step": 605
},
{
"epoch": 1.92,
"learning_rate": 0.0001349494056620028,
"loss": 2.7162,
"step": 610
},
{
"epoch": 1.93,
"learning_rate": 0.000135728975371746,
"loss": 2.8196,
"step": 615
},
{
"epoch": 1.95,
"learning_rate": 0.0001363447853094957,
"loss": 2.6532,
"step": 620
},
{
"epoch": 1.97,
"learning_rate": 0.00013679533321941633,
"loss": 2.5941,
"step": 625
},
{
"epoch": 1.98,
"learning_rate": 0.00013707951999903246,
"loss": 2.4365,
"step": 630
},
{
"epoch": 2.0,
"learning_rate": 0.00013719665238046719,
"loss": 2.6559,
"step": 635
},
{
"epoch": 2.0,
"eval_loss": 2.319049835205078,
"eval_runtime": 20.2442,
"eval_samples_per_second": 22.179,
"eval_steps_per_second": 2.816,
"step": 636
},
{
"epoch": 2.01,
"learning_rate": 0.00013714644462165502,
"loss": 2.3683,
"step": 640
},
{
"epoch": 2.03,
"learning_rate": 0.00013692901920340388,
"loss": 2.3765,
"step": 645
},
{
"epoch": 2.04,
"learning_rate": 0.00013654490653060555,
"loss": 2.7581,
"step": 650
},
{
"epoch": 2.06,
"learning_rate": 0.00013599504363832372,
"loss": 2.5057,
"step": 655
},
{
"epoch": 2.08,
"learning_rate": 0.00013528077190591619,
"loss": 2.5706,
"step": 660
},
{
"epoch": 2.09,
"learning_rate": 0.00013440383378476688,
"loss": 2.5847,
"step": 665
},
{
"epoch": 2.11,
"learning_rate": 0.00013336636854761118,
"loss": 2.3126,
"step": 670
},
{
"epoch": 2.12,
"learning_rate": 0.00013217090706982377,
"loss": 2.309,
"step": 675
},
{
"epoch": 2.14,
"learning_rate": 0.00013082036565539919,
"loss": 2.5847,
"step": 680
},
{
"epoch": 2.15,
"learning_rate": 0.00012931803892268768,
"loss": 2.4275,
"step": 685
},
{
"epoch": 2.17,
"learning_rate": 0.00012766759176724058,
"loss": 2.3388,
"step": 690
},
{
"epoch": 2.19,
"learning_rate": 0.00012587305042137224,
"loss": 2.4289,
"step": 695
},
{
"epoch": 2.2,
"learning_rate": 0.00012393879263224768,
"loss": 2.3432,
"step": 700
},
{
"epoch": 2.22,
"learning_rate": 0.00012186953698245773,
"loss": 2.1438,
"step": 705
},
{
"epoch": 2.23,
"learning_rate": 0.00011967033137913228,
"loss": 2.4482,
"step": 710
},
{
"epoch": 2.25,
"learning_rate": 0.00011734654073967362,
"loss": 2.596,
"step": 715
},
{
"epoch": 2.26,
"learning_rate": 0.00011490383390414927,
"loss": 2.3528,
"step": 720
},
{
"epoch": 2.28,
"learning_rate": 0.00011234816980627178,
"loss": 2.2922,
"step": 725
},
{
"epoch": 2.3,
"learning_rate": 0.0001096857829367009,
"loss": 2.2967,
"step": 730
},
{
"epoch": 2.31,
"learning_rate": 0.00010692316813413058,
"loss": 2.3893,
"step": 735
},
{
"epoch": 2.33,
"learning_rate": 0.0001040670647412614,
"loss": 2.2958,
"step": 740
},
{
"epoch": 2.34,
"learning_rate": 0.00010112444016431127,
"loss": 2.3878,
"step": 745
},
{
"epoch": 2.36,
"learning_rate": 9.810247287616931e-05,
"loss": 2.0135,
"step": 750
},
{
"epoch": 2.37,
"learning_rate": 9.500853490465723e-05,
"loss": 2.5128,
"step": 755
},
{
"epoch": 2.39,
"learning_rate": 9.185017384861694e-05,
"loss": 2.2281,
"step": 760
},
{
"epoch": 2.41,
"learning_rate": 8.86350944656966e-05,
"loss": 2.2168,
"step": 765
},
{
"epoch": 2.42,
"learning_rate": 8.537113987675077e-05,
"loss": 2.2228,
"step": 770
},
{
"epoch": 2.44,
"learning_rate": 8.206627243270665e-05,
"loss": 2.5163,
"step": 775
},
{
"epoch": 2.45,
"learning_rate": 7.872855429057025e-05,
"loss": 2.5593,
"step": 780
},
{
"epoch": 2.47,
"learning_rate": 7.536612774595843e-05,
"loss": 1.8992,
"step": 785
},
{
"epoch": 2.48,
"learning_rate": 7.198719537013403e-05,
"loss": 2.2331,
"step": 790
},
{
"epoch": 2.5,
"learning_rate": 6.860000000000001e-05,
"loss": 2.2157,
"step": 795
},
{
"epoch": 2.52,
"learning_rate": 6.521280462986602e-05,
"loss": 2.2611,
"step": 800
},
{
"epoch": 2.53,
"learning_rate": 6.183387225404161e-05,
"loss": 2.2053,
"step": 805
},
{
"epoch": 2.55,
"learning_rate": 5.8471445709429735e-05,
"loss": 2.1826,
"step": 810
},
{
"epoch": 2.56,
"learning_rate": 5.513372756729345e-05,
"loss": 1.995,
"step": 815
},
{
"epoch": 2.58,
"learning_rate": 5.182886012324932e-05,
"loss": 2.3863,
"step": 820
},
{
"epoch": 2.59,
"learning_rate": 4.856490553430346e-05,
"loss": 2.3232,
"step": 825
},
{
"epoch": 2.61,
"learning_rate": 4.534982615138311e-05,
"loss": 2.3315,
"step": 830
},
{
"epoch": 2.63,
"learning_rate": 4.2191465095342816e-05,
"loss": 2.3571,
"step": 835
},
{
"epoch": 2.64,
"learning_rate": 3.909752712383074e-05,
"loss": 2.1077,
"step": 840
},
{
"epoch": 2.66,
"learning_rate": 3.607555983568871e-05,
"loss": 2.1479,
"step": 845
},
{
"epoch": 2.67,
"learning_rate": 3.313293525873858e-05,
"loss": 2.2508,
"step": 850
},
{
"epoch": 2.69,
"learning_rate": 3.027683186586951e-05,
"loss": 2.5729,
"step": 855
},
{
"epoch": 2.7,
"learning_rate": 2.7514217063299187e-05,
"loss": 2.4242,
"step": 860
},
{
"epoch": 2.72,
"learning_rate": 2.485183019372827e-05,
"loss": 2.2222,
"step": 865
},
{
"epoch": 2.74,
"learning_rate": 2.2296166095850762e-05,
"loss": 2.3744,
"step": 870
},
{
"epoch": 2.75,
"learning_rate": 1.9853459260326405e-05,
"loss": 2.2229,
"step": 875
},
{
"epoch": 2.77,
"learning_rate": 1.752966862086776e-05,
"loss": 2.4542,
"step": 880
},
{
"epoch": 2.78,
"learning_rate": 1.5330463017542246e-05,
"loss": 2.253,
"step": 885
},
{
"epoch": 2.8,
"learning_rate": 1.3261207367752365e-05,
"loss": 2.2606,
"step": 890
},
{
"epoch": 2.81,
"learning_rate": 1.1326949578627828e-05,
"loss": 2.535,
"step": 895
},
{
"epoch": 2.83,
"learning_rate": 9.532408232759462e-06,
"loss": 2.3032,
"step": 900
},
{
"epoch": 2.85,
"learning_rate": 7.881961077312348e-06,
"loss": 2.2779,
"step": 905
},
{
"epoch": 2.86,
"learning_rate": 6.379634344600831e-06,
"loss": 2.2839,
"step": 910
},
{
"epoch": 2.88,
"learning_rate": 5.029092930176238e-06,
"loss": 2.0958,
"step": 915
},
{
"epoch": 2.89,
"learning_rate": 3.833631452388814e-06,
"loss": 2.1064,
"step": 920
},
{
"epoch": 2.91,
"learning_rate": 2.7961662152331326e-06,
"loss": 2.3978,
"step": 925
},
{
"epoch": 2.92,
"learning_rate": 1.919228094083838e-06,
"loss": 2.2206,
"step": 930
},
{
"epoch": 2.94,
"learning_rate": 1.204956361676291e-06,
"loss": 2.2185,
"step": 935
},
{
"epoch": 2.96,
"learning_rate": 6.550934693944858e-07,
"loss": 2.4076,
"step": 940
},
{
"epoch": 2.97,
"learning_rate": 2.709807965961209e-07,
"loss": 2.2124,
"step": 945
},
{
"epoch": 2.99,
"learning_rate": 5.355537834497188e-08,
"loss": 2.6099,
"step": 950
},
{
"epoch": 3.0,
"eval_loss": 2.0997118949890137,
"eval_runtime": 20.2461,
"eval_samples_per_second": 22.177,
"eval_steps_per_second": 2.815,
"step": 954
},
{
"epoch": 3.0,
"learning_rate": 3.347619532822632e-09,
"loss": 2.1903,
"step": 955
},
{
"epoch": 3.02,
"learning_rate": 1.2048000096755528e-07,
"loss": 2.2037,
"step": 960
},
{
"epoch": 3.03,
"learning_rate": 4.0466678058365933e-07,
"loss": 1.8544,
"step": 965
},
{
"epoch": 3.05,
"learning_rate": 8.552146905042755e-07,
"loss": 2.1987,
"step": 970
},
{
"epoch": 3.07,
"learning_rate": 1.4710246282540082e-06,
"loss": 2.1019,
"step": 975
},
{
"epoch": 3.08,
"learning_rate": 2.250594337997185e-06,
"loss": 2.1517,
"step": 980
},
{
"epoch": 3.1,
"learning_rate": 3.192022075260327e-06,
"loss": 2.1346,
"step": 985
},
{
"epoch": 3.11,
"learning_rate": 4.293011246199299e-06,
"loss": 2.0847,
"step": 990
},
{
"epoch": 3.13,
"learning_rate": 5.550876010093297e-06,
"loss": 1.9358,
"step": 995
},
{
"epoch": 3.14,
"learning_rate": 6.962547831398709e-06,
"loss": 2.2579,
"step": 1000
},
{
"epoch": 3.16,
"learning_rate": 8.524582965379288e-06,
"loss": 2.0876,
"step": 1005
},
{
"epoch": 3.18,
"learning_rate": 1.0233170859051466e-05,
"loss": 1.8737,
"step": 1010
},
{
"epoch": 3.19,
"learning_rate": 1.2084143446950978e-05,
"loss": 2.1,
"step": 1015
},
{
"epoch": 3.21,
"learning_rate": 1.4072985319043973e-05,
"loss": 2.0016,
"step": 1020
},
{
"epoch": 3.22,
"learning_rate": 1.619484473597781e-05,
"loss": 2.1936,
"step": 1025
},
{
"epoch": 3.24,
"learning_rate": 1.844454546480105e-05,
"loss": 2.2236,
"step": 1030
},
{
"epoch": 3.25,
"learning_rate": 2.081659940627838e-05,
"loss": 1.94,
"step": 1035
},
{
"epoch": 3.27,
"learning_rate": 2.330521998299727e-05,
"loss": 2.103,
"step": 1040
},
{
"epoch": 3.29,
"learning_rate": 2.5904336255606053e-05,
"loss": 2.0909,
"step": 1045
},
{
"epoch": 3.3,
"learning_rate": 2.860760773274715e-05,
"loss": 2.1943,
"step": 1050
},
{
"epoch": 3.32,
"learning_rate": 3.1408439838557e-05,
"loss": 2.3713,
"step": 1055
},
{
"epoch": 3.33,
"learning_rate": 3.4300000000000014e-05,
"loss": 2.1534,
"step": 1060
},
{
"epoch": 3.35,
"learning_rate": 3.727523431479128e-05,
"loss": 2.1973,
"step": 1065
},
{
"epoch": 3.36,
"learning_rate": 4.0326884759248605e-05,
"loss": 2.1412,
"step": 1070
},
{
"epoch": 3.38,
"learning_rate": 4.3447506894093424e-05,
"loss": 2.1419,
"step": 1075
},
{
"epoch": 3.4,
"learning_rate": 4.662948802500929e-05,
"loss": 2.179,
"step": 1080
},
{
"epoch": 3.41,
"learning_rate": 4.986506577365512e-05,
"loss": 2.0405,
"step": 1085
},
{
"epoch": 3.43,
"learning_rate": 5.314634701382942e-05,
"loss": 2.1397,
"step": 1090
},
{
"epoch": 3.44,
"learning_rate": 5.646532712659141e-05,
"loss": 2.0391,
"step": 1095
},
{
"epoch": 3.46,
"learning_rate": 5.9813909527366384e-05,
"loss": 2.2312,
"step": 1100
},
{
"epoch": 3.47,
"learning_rate": 6.318392541739896e-05,
"loss": 1.8413,
"step": 1105
},
{
"epoch": 3.49,
"learning_rate": 6.656715371137238e-05,
"loss": 2.2519,
"step": 1110
},
{
"epoch": 3.51,
"learning_rate": 6.995534109257869e-05,
"loss": 1.871,
"step": 1115
},
{
"epoch": 3.52,
"learning_rate": 7.334022214671738e-05,
"loss": 1.9633,
"step": 1120
},
{
"epoch": 3.54,
"learning_rate": 7.671353952520564e-05,
"loss": 2.3068,
"step": 1125
},
{
"epoch": 3.55,
"learning_rate": 8.006706408881254e-05,
"loss": 2.083,
"step": 1130
},
{
"epoch": 3.57,
"learning_rate": 8.33926149824772e-05,
"loss": 2.1478,
"step": 1135
},
{
"epoch": 3.58,
"learning_rate": 8.668207959233806e-05,
"loss": 2.1284,
"step": 1140
},
{
"epoch": 3.6,
"learning_rate": 8.992743333629024e-05,
"loss": 2.1958,
"step": 1145
},
{
"epoch": 3.62,
"learning_rate": 9.312075923979032e-05,
"loss": 1.9476,
"step": 1150
},
{
"epoch": 3.63,
"learning_rate": 9.625426724915486e-05,
"loss": 2.1252,
"step": 1155
},
{
"epoch": 3.65,
"learning_rate": 9.932031323523828e-05,
"loss": 1.921,
"step": 1160
},
{
"epoch": 3.66,
"learning_rate": 0.00010231141764113059,
"loss": 2.1346,
"step": 1165
},
{
"epoch": 3.68,
"learning_rate": 0.00010522028372838456,
"loss": 2.1729,
"step": 1170
},
{
"epoch": 3.69,
"learning_rate": 0.00010803981537726111,
"loss": 1.7231,
"step": 1175
},
{
"epoch": 3.71,
"learning_rate": 0.0001107631343975688,
"loss": 1.9233,
"step": 1180
},
{
"epoch": 3.73,
"learning_rate": 0.00011338359730786977,
"loss": 2.2671,
"step": 1185
},
{
"epoch": 3.74,
"learning_rate": 0.0001158948115421174,
"loss": 1.9335,
"step": 1190
},
{
"epoch": 3.76,
"learning_rate": 0.00011829065104419134,
"loss": 2.0918,
"step": 1195
},
{
"epoch": 3.77,
"learning_rate": 0.00012056527121228711,
"loss": 2.2418,
"step": 1200
},
{
"epoch": 3.79,
"learning_rate": 0.00012271312315670338,
"loss": 2.1403,
"step": 1205
},
{
"epoch": 3.81,
"learning_rate": 0.0001247289672362458,
"loss": 2.1248,
"step": 1210
},
{
"epoch": 3.82,
"learning_rate": 0.00012660788584022527,
"loss": 2.0255,
"step": 1215
},
{
"epoch": 3.84,
"learning_rate": 0.00012834529538486916,
"loss": 2.1805,
"step": 1220
},
{
"epoch": 3.85,
"learning_rate": 0.000129936957494881,
"loss": 1.9559,
"step": 1225
},
{
"epoch": 3.87,
"learning_rate": 0.00013137898934287106,
"loss": 2.1316,
"step": 1230
},
{
"epoch": 3.88,
"learning_rate": 0.00013266787312143455,
"loss": 2.4223,
"step": 1235
},
{
"epoch": 3.9,
"learning_rate": 0.0001338004646247716,
"loss": 2.0369,
"step": 1240
},
{
"epoch": 3.92,
"learning_rate": 0.00013477400091891338,
"loss": 2.0681,
"step": 1245
},
{
"epoch": 3.93,
"learning_rate": 0.00013558610708184338,
"loss": 2.1821,
"step": 1250
},
{
"epoch": 3.95,
"learning_rate": 0.00013623480199707148,
"loss": 2.2183,
"step": 1255
},
{
"epoch": 3.96,
"learning_rate": 0.00013671850318652728,
"loss": 2.0026,
"step": 1260
},
{
"epoch": 3.98,
"learning_rate": 0.00013703603067098332,
"loss": 1.9368,
"step": 1265
},
{
"epoch": 3.99,
"learning_rate": 0.0001371866098485905,
"loss": 2.1825,
"step": 1270
},
{
"epoch": 4.0,
"eval_loss": 2.0486814975738525,
"eval_runtime": 20.2715,
"eval_samples_per_second": 22.149,
"eval_steps_per_second": 2.812,
"step": 1272
},
{
"epoch": 4.01,
"learning_rate": 0.0001371698733845033,
"loss": 2.0223,
"step": 1275
},
{
"epoch": 4.03,
"learning_rate": 0.000136985862106986,
"loss": 2.0536,
"step": 1280
},
{
"epoch": 4.04,
"learning_rate": 0.0001366350249078127,
"loss": 1.962,
"step": 1285
},
{
"epoch": 4.06,
"learning_rate": 0.00013611821764720515,
"loss": 2.0683,
"step": 1290
},
{
"epoch": 4.07,
"learning_rate": 0.00013543670106597888,
"loss": 1.9271,
"step": 1295
},
{
"epoch": 4.09,
"learning_rate": 0.00013459213770999188,
"loss": 2.0152,
"step": 1300
},
{
"epoch": 4.1,
"learning_rate": 0.00013358658787439754,
"loss": 1.7685,
"step": 1305
},
{
"epoch": 4.12,
"learning_rate": 0.0001324225045775965,
"loss": 2.2589,
"step": 1310
},
{
"epoch": 4.14,
"learning_rate": 0.00013110272757714818,
"loss": 1.9458,
"step": 1315
},
{
"epoch": 4.15,
"learning_rate": 0.00012963047644223968,
"loss": 2.0727,
"step": 1320
},
{
"epoch": 4.17,
"learning_rate": 0.00012800934269961248,
"loss": 2.0622,
"step": 1325
},
{
"epoch": 4.18,
"learning_rate": 0.0001262432810721057,
"loss": 1.7668,
"step": 1330
},
{
"epoch": 4.2,
"learning_rate": 0.00012433659983118975,
"loss": 1.6953,
"step": 1335
},
{
"epoch": 4.21,
"learning_rate": 0.00012229395028702625,
"loss": 2.1107,
"step": 1340
},
{
"epoch": 4.23,
"learning_rate": 0.00012012031544169091,
"loss": 1.7084,
"step": 1345
},
{
"epoch": 4.25,
"learning_rate": 0.00011782099783324114,
"loss": 2.281,
"step": 1350
},
{
"epoch": 4.26,
"learning_rate": 0.00011540160660028247,
"loss": 2.0195,
"step": 1355
},
{
"epoch": 4.28,
"learning_rate": 0.0001128680437985883,
"loss": 1.9339,
"step": 1360
},
{
"epoch": 4.29,
"learning_rate": 0.00011022649000315548,
"loss": 1.8057,
"step": 1365
},
{
"epoch": 4.31,
"learning_rate": 0.00010748338923081677,
"loss": 1.9412,
"step": 1370
},
{
"epoch": 4.32,
"learning_rate": 0.00010464543322019205,
"loss": 1.6871,
"step": 1375
},
{
"epoch": 4.34,
"learning_rate": 0.0001017195451073291,
"loss": 1.8352,
"step": 1380
},
{
"epoch": 4.36,
"learning_rate": 9.871286253685277e-05,
"loss": 1.8437,
"step": 1385
},
{
"epoch": 4.37,
"learning_rate": 9.56327202498264e-05,
"loss": 2.0612,
"step": 1390
},
{
"epoch": 4.39,
"learning_rate": 9.248663219079991e-05,
"loss": 1.9911,
"step": 1395
},
{
"epoch": 4.4,
"learning_rate": 8.928227317769423e-05,
"loss": 1.8443,
"step": 1400
},
{
"epoch": 4.42,
"learning_rate": 8.602746017923975e-05,
"loss": 1.9599,
"step": 1405
},
{
"epoch": 4.43,
"learning_rate": 8.273013324563943e-05,
"loss": 1.8763,
"step": 1410
},
{
"epoch": 4.45,
"learning_rate": 7.939833613897684e-05,
"loss": 1.7859,
"step": 1415
},
{
"epoch": 4.47,
"learning_rate": 7.60401967106234e-05,
"loss": 2.1938,
"step": 1420
},
{
"epoch": 4.48,
"learning_rate": 7.266390707350893e-05,
"loss": 1.554,
"step": 1425
},
{
"epoch": 4.5,
"learning_rate": 6.927770361762858e-05,
"loss": 1.9093,
"step": 1430
},
{
"epoch": 4.51,
"learning_rate": 6.588984691753754e-05,
"loss": 1.6916,
"step": 1435
},
{
"epoch": 4.53,
"learning_rate": 6.250860158084673e-05,
"loss": 1.9688,
"step": 1440
},
{
"epoch": 4.54,
"learning_rate": 5.91422160868796e-05,
"loss": 1.9263,
"step": 1445
},
{
"epoch": 4.56,
"learning_rate": 5.579890266467603e-05,
"loss": 1.6181,
"step": 1450
},
{
"epoch": 4.58,
"learning_rate": 5.248681725942451e-05,
"loss": 2.0187,
"step": 1455
},
{
"epoch": 4.59,
"learning_rate": 4.921403963620125e-05,
"loss": 1.6975,
"step": 1460
},
{
"epoch": 4.61,
"learning_rate": 4.598855366954619e-05,
"loss": 1.995,
"step": 1465
},
{
"epoch": 4.62,
"learning_rate": 4.28182278669633e-05,
"loss": 1.9041,
"step": 1470
},
{
"epoch": 4.64,
"learning_rate": 3.9710796173857146e-05,
"loss": 1.8769,
"step": 1475
},
{
"epoch": 4.65,
"learning_rate": 3.667383910672967e-05,
"loss": 1.6473,
"step": 1480
},
{
"epoch": 4.67,
"learning_rate": 3.371476526066344e-05,
"loss": 1.5014,
"step": 1485
},
{
"epoch": 4.69,
"learning_rate": 3.084079323620596e-05,
"loss": 1.8409,
"step": 1490
},
{
"epoch": 4.7,
"learning_rate": 2.805893402973855e-05,
"loss": 1.8548,
"step": 1495
},
{
"epoch": 4.72,
"learning_rate": 2.5375973930294628e-05,
"loss": 1.8673,
"step": 1500
},
{
"epoch": 4.73,
"learning_rate": 2.2798457964544384e-05,
"loss": 1.8588,
"step": 1505
},
{
"epoch": 4.75,
"learning_rate": 2.0332673930335255e-05,
"loss": 2.0708,
"step": 1510
},
{
"epoch": 4.76,
"learning_rate": 1.7984637057737467e-05,
"loss": 1.782,
"step": 1515
},
{
"epoch": 4.78,
"learning_rate": 1.5760075335011966e-05,
"loss": 1.9664,
"step": 1520
},
{
"epoch": 4.8,
"learning_rate": 1.3664415535298983e-05,
"loss": 1.9151,
"step": 1525
},
{
"epoch": 4.81,
"learning_rate": 1.1702769978116211e-05,
"loss": 1.9315,
"step": 1530
},
{
"epoch": 4.83,
"learning_rate": 9.879924057958108e-06,
"loss": 1.8344,
"step": 1535
},
{
"epoch": 4.84,
"learning_rate": 8.20032457042391e-06,
"loss": 1.8142,
"step": 1540
},
{
"epoch": 4.86,
"learning_rate": 6.6680688643500544e-06,
"loss": 1.7706,
"step": 1545
},
{
"epoch": 4.87,
"learning_rate": 5.2868948464103564e-06,
"loss": 2.0751,
"step": 1550
},
{
"epoch": 4.89,
"learning_rate": 4.060171862569283e-06,
"loss": 2.0826,
"step": 1555
},
{
"epoch": 4.91,
"learning_rate": 2.9908924786305843e-06,
"loss": 1.9677,
"step": 1560
},
{
"epoch": 4.92,
"learning_rate": 2.0816651799335755e-06,
"loss": 1.7251,
"step": 1565
},
{
"epoch": 4.94,
"learning_rate": 1.3347080080066644e-06,
"loss": 1.9202,
"step": 1570
},
{
"epoch": 4.95,
"learning_rate": 7.518431496995702e-07,
"loss": 1.833,
"step": 1575
},
{
"epoch": 4.97,
"learning_rate": 3.34492491995841e-07,
"loss": 1.8957,
"step": 1580
},
{
"epoch": 4.98,
"learning_rate": 8.367415334837176e-08,
"loss": 1.7695,
"step": 1585
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 1.9115,
"step": 1590
},
{
"epoch": 5.0,
"eval_loss": 1.906249761581421,
"eval_runtime": 20.2347,
"eval_samples_per_second": 22.19,
"eval_steps_per_second": 2.817,
"step": 1590
},
{
"epoch": 5.02,
"learning_rate": 8.367415334837176e-08,
"loss": 1.6316,
"step": 1595
},
{
"epoch": 5.03,
"learning_rate": 3.3449249199583335e-07,
"loss": 1.7125,
"step": 1600
},
{
"epoch": 5.05,
"learning_rate": 7.51843149699555e-07,
"loss": 1.631,
"step": 1605
},
{
"epoch": 5.06,
"learning_rate": 1.3347080080066491e-06,
"loss": 1.5559,
"step": 1610
},
{
"epoch": 5.08,
"learning_rate": 2.0816651799335526e-06,
"loss": 1.7693,
"step": 1615
},
{
"epoch": 5.09,
"learning_rate": 2.990892478630592e-06,
"loss": 1.5954,
"step": 1620
},
{
"epoch": 5.11,
"learning_rate": 4.060171862569298e-06,
"loss": 1.9519,
"step": 1625
},
{
"epoch": 5.13,
"learning_rate": 5.2868948464102726e-06,
"loss": 1.5427,
"step": 1630
},
{
"epoch": 5.14,
"learning_rate": 6.668068864349971e-06,
"loss": 1.5142,
"step": 1635
},
{
"epoch": 5.16,
"learning_rate": 8.200324570423812e-06,
"loss": 1.4704,
"step": 1640
},
{
"epoch": 5.17,
"learning_rate": 9.879924057958001e-06,
"loss": 1.724,
"step": 1645
},
{
"epoch": 5.19,
"learning_rate": 1.1702769978116166e-05,
"loss": 1.7845,
"step": 1650
},
{
"epoch": 5.2,
"learning_rate": 1.366441553529893e-05,
"loss": 1.5651,
"step": 1655
},
{
"epoch": 5.22,
"learning_rate": 1.5760075335011912e-05,
"loss": 1.5684,
"step": 1660
},
{
"epoch": 5.24,
"learning_rate": 1.7984637057737406e-05,
"loss": 1.8357,
"step": 1665
},
{
"epoch": 5.25,
"learning_rate": 2.0332673930335194e-05,
"loss": 1.6561,
"step": 1670
},
{
"epoch": 5.27,
"learning_rate": 2.2798457964544323e-05,
"loss": 1.7467,
"step": 1675
},
{
"epoch": 5.28,
"learning_rate": 2.5375973930294556e-05,
"loss": 1.9728,
"step": 1680
},
{
"epoch": 5.3,
"learning_rate": 2.805893402973858e-05,
"loss": 1.813,
"step": 1685
},
{
"epoch": 5.31,
"learning_rate": 3.084079323620599e-05,
"loss": 1.7629,
"step": 1690
},
{
"epoch": 5.33,
"learning_rate": 3.3714765260663475e-05,
"loss": 1.7309,
"step": 1695
},
{
"epoch": 5.35,
"learning_rate": 3.66738391067297e-05,
"loss": 1.5675,
"step": 1700
},
{
"epoch": 5.36,
"learning_rate": 3.971079617385719e-05,
"loss": 1.4041,
"step": 1705
},
{
"epoch": 5.38,
"learning_rate": 4.28182278669631e-05,
"loss": 1.5323,
"step": 1710
},
{
"epoch": 5.39,
"learning_rate": 4.5988553669545996e-05,
"loss": 1.7705,
"step": 1715
},
{
"epoch": 5.41,
"learning_rate": 4.921403963620105e-05,
"loss": 1.7652,
"step": 1720
},
{
"epoch": 5.42,
"learning_rate": 5.2486817259424427e-05,
"loss": 1.8281,
"step": 1725
},
{
"epoch": 5.44,
"learning_rate": 5.579890266467595e-05,
"loss": 1.6899,
"step": 1730
},
{
"epoch": 5.46,
"learning_rate": 5.914221608687952e-05,
"loss": 1.6888,
"step": 1735
},
{
"epoch": 5.47,
"learning_rate": 6.250860158084665e-05,
"loss": 1.7589,
"step": 1740
},
{
"epoch": 5.49,
"learning_rate": 6.588984691753746e-05,
"loss": 1.6297,
"step": 1745
},
{
"epoch": 5.5,
"learning_rate": 6.92777036176285e-05,
"loss": 1.8036,
"step": 1750
},
{
"epoch": 5.52,
"learning_rate": 7.266390707350887e-05,
"loss": 1.8385,
"step": 1755
},
{
"epoch": 5.53,
"learning_rate": 7.604019671062332e-05,
"loss": 1.7527,
"step": 1760
},
{
"epoch": 5.55,
"learning_rate": 7.939833613897687e-05,
"loss": 1.5839,
"step": 1765
},
{
"epoch": 5.57,
"learning_rate": 8.273013324563949e-05,
"loss": 1.9817,
"step": 1770
},
{
"epoch": 5.58,
"learning_rate": 8.602746017923979e-05,
"loss": 2.1168,
"step": 1775
},
{
"epoch": 5.6,
"learning_rate": 8.928227317769404e-05,
"loss": 1.8069,
"step": 1780
},
{
"epoch": 5.61,
"learning_rate": 9.248663219079973e-05,
"loss": 1.886,
"step": 1785
},
{
"epoch": 5.63,
"learning_rate": 9.56327202498262e-05,
"loss": 1.8172,
"step": 1790
},
{
"epoch": 5.64,
"learning_rate": 9.871286253685269e-05,
"loss": 1.6818,
"step": 1795
},
{
"epoch": 5.66,
"learning_rate": 0.00010171954510732892,
"loss": 1.4689,
"step": 1800
},
{
"epoch": 5.68,
"learning_rate": 0.00010464543322019198,
"loss": 1.3972,
"step": 1805
},
{
"epoch": 5.69,
"learning_rate": 0.00010748338923081671,
"loss": 1.8636,
"step": 1810
},
{
"epoch": 5.71,
"learning_rate": 0.0001102264900031554,
"loss": 1.5656,
"step": 1815
},
{
"epoch": 5.72,
"learning_rate": 0.00011286804379858823,
"loss": 1.834,
"step": 1820
},
{
"epoch": 5.74,
"learning_rate": 0.00011540160660028242,
"loss": 1.6874,
"step": 1825
},
{
"epoch": 5.75,
"learning_rate": 0.00011782099783324108,
"loss": 1.7932,
"step": 1830
},
{
"epoch": 5.77,
"learning_rate": 0.00012012031544169092,
"loss": 1.8093,
"step": 1835
},
{
"epoch": 5.79,
"learning_rate": 0.00012229395028702622,
"loss": 1.4865,
"step": 1840
},
{
"epoch": 5.8,
"learning_rate": 0.00012433659983118975,
"loss": 1.7283,
"step": 1845
},
{
"epoch": 5.82,
"learning_rate": 0.0001262432810721057,
"loss": 1.5611,
"step": 1850
},
{
"epoch": 5.83,
"learning_rate": 0.00012800934269961248,
"loss": 1.7974,
"step": 1855
},
{
"epoch": 5.85,
"learning_rate": 0.0001296304764422396,
"loss": 1.6754,
"step": 1860
},
{
"epoch": 5.86,
"learning_rate": 0.0001311027275771481,
"loss": 1.5481,
"step": 1865
},
{
"epoch": 5.88,
"learning_rate": 0.0001324225045775964,
"loss": 1.7931,
"step": 1870
},
{
"epoch": 5.9,
"learning_rate": 0.0001335865878743975,
"loss": 1.9985,
"step": 1875
},
{
"epoch": 5.91,
"learning_rate": 0.00013459213770999182,
"loss": 1.9837,
"step": 1880
},
{
"epoch": 5.93,
"learning_rate": 0.00013543670106597888,
"loss": 1.7165,
"step": 1885
},
{
"epoch": 5.94,
"learning_rate": 0.00013611821764720515,
"loss": 1.9364,
"step": 1890
},
{
"epoch": 5.96,
"learning_rate": 0.0001366350249078127,
"loss": 1.6662,
"step": 1895
},
{
"epoch": 5.97,
"learning_rate": 0.000136985862106986,
"loss": 1.8671,
"step": 1900
},
{
"epoch": 5.99,
"learning_rate": 0.0001371698733845033,
"loss": 1.7105,
"step": 1905
},
{
"epoch": 6.0,
"eval_loss": 1.9032058715820312,
"eval_runtime": 20.2363,
"eval_samples_per_second": 22.188,
"eval_steps_per_second": 2.817,
"step": 1908
},
{
"epoch": 6.01,
"learning_rate": 0.0001371866098485905,
"loss": 1.4646,
"step": 1910
},
{
"epoch": 6.02,
"learning_rate": 0.00013703603067098332,
"loss": 1.572,
"step": 1915
},
{
"epoch": 6.04,
"learning_rate": 0.00013671850318652725,
"loss": 1.6881,
"step": 1920
},
{
"epoch": 6.05,
"learning_rate": 0.00013623480199707148,
"loss": 1.8478,
"step": 1925
},
{
"epoch": 6.07,
"learning_rate": 0.00013558610708184343,
"loss": 1.6836,
"step": 1930
},
{
"epoch": 6.08,
"learning_rate": 0.0001347740009189134,
"loss": 1.7053,
"step": 1935
},
{
"epoch": 6.1,
"learning_rate": 0.00013380046462477165,
"loss": 1.5923,
"step": 1940
},
{
"epoch": 6.12,
"learning_rate": 0.00013266787312143458,
"loss": 1.5511,
"step": 1945
},
{
"epoch": 6.13,
"learning_rate": 0.00013137898934287114,
"loss": 1.6041,
"step": 1950
},
{
"epoch": 6.15,
"learning_rate": 0.00012993695749488105,
"loss": 1.5811,
"step": 1955
},
{
"epoch": 6.16,
"learning_rate": 0.0001283452953848691,
"loss": 1.6477,
"step": 1960
},
{
"epoch": 6.18,
"learning_rate": 0.00012660788584022533,
"loss": 1.6821,
"step": 1965
},
{
"epoch": 6.19,
"learning_rate": 0.00012472896723624585,
"loss": 1.9101,
"step": 1970
},
{
"epoch": 6.21,
"learning_rate": 0.00012271312315670352,
"loss": 1.7961,
"step": 1975
},
{
"epoch": 6.23,
"learning_rate": 0.00012056527121228716,
"loss": 1.7949,
"step": 1980
},
{
"epoch": 6.24,
"learning_rate": 0.00011829065104419132,
"loss": 1.4401,
"step": 1985
},
{
"epoch": 6.26,
"learning_rate": 0.00011589481154211747,
"loss": 1.6164,
"step": 1990
},
{
"epoch": 6.27,
"learning_rate": 0.00011338359730786976,
"loss": 1.8185,
"step": 1995
},
{
"epoch": 6.29,
"learning_rate": 0.00011076313439756885,
"loss": 1.9472,
"step": 2000
},
{
"epoch": 6.31,
"learning_rate": 0.00010803981537726108,
"loss": 1.5825,
"step": 2005
},
{
"epoch": 6.32,
"learning_rate": 0.00010522028372838475,
"loss": 1.6182,
"step": 2010
},
{
"epoch": 6.34,
"learning_rate": 0.00010231141764113088,
"loss": 1.6272,
"step": 2015
},
{
"epoch": 6.35,
"learning_rate": 9.932031323523847e-05,
"loss": 1.4251,
"step": 2020
},
{
"epoch": 6.37,
"learning_rate": 9.625426724915494e-05,
"loss": 1.5946,
"step": 2025
},
{
"epoch": 6.38,
"learning_rate": 9.31207592397905e-05,
"loss": 1.5311,
"step": 2030
},
{
"epoch": 6.4,
"learning_rate": 8.992743333629033e-05,
"loss": 1.7549,
"step": 2035
},
{
"epoch": 6.42,
"learning_rate": 8.6682079592338e-05,
"loss": 1.7651,
"step": 2040
},
{
"epoch": 6.43,
"learning_rate": 8.339261498247729e-05,
"loss": 1.7894,
"step": 2045
},
{
"epoch": 6.45,
"learning_rate": 8.006706408881262e-05,
"loss": 1.2016,
"step": 2050
},
{
"epoch": 6.46,
"learning_rate": 7.671353952520582e-05,
"loss": 1.5423,
"step": 2055
},
{
"epoch": 6.48,
"learning_rate": 7.334022214671746e-05,
"loss": 1.2711,
"step": 2060
},
{
"epoch": 6.49,
"learning_rate": 6.995534109257865e-05,
"loss": 1.582,
"step": 2065
},
{
"epoch": 6.51,
"learning_rate": 6.656715371137246e-05,
"loss": 1.55,
"step": 2070
},
{
"epoch": 6.53,
"learning_rate": 6.318392541739893e-05,
"loss": 1.5744,
"step": 2075
},
{
"epoch": 6.54,
"learning_rate": 5.9813909527366465e-05,
"loss": 1.7316,
"step": 2080
},
{
"epoch": 6.56,
"learning_rate": 5.646532712659148e-05,
"loss": 1.5926,
"step": 2085
},
{
"epoch": 6.57,
"learning_rate": 5.314634701382963e-05,
"loss": 1.3707,
"step": 2090
},
{
"epoch": 6.59,
"learning_rate": 4.98650657736552e-05,
"loss": 1.3874,
"step": 2095
},
{
"epoch": 6.6,
"learning_rate": 4.6629488025009487e-05,
"loss": 1.6198,
"step": 2100
},
{
"epoch": 6.62,
"learning_rate": 4.3447506894093505e-05,
"loss": 1.6056,
"step": 2105
},
{
"epoch": 6.64,
"learning_rate": 4.0326884759248795e-05,
"loss": 1.3487,
"step": 2110
},
{
"epoch": 6.65,
"learning_rate": 3.7275234314791357e-05,
"loss": 1.5276,
"step": 2115
},
{
"epoch": 6.67,
"learning_rate": 3.429999999999998e-05,
"loss": 1.6535,
"step": 2120
},
{
"epoch": 6.68,
"learning_rate": 3.140843983855718e-05,
"loss": 1.8044,
"step": 2125
},
{
"epoch": 6.7,
"learning_rate": 2.860760773274722e-05,
"loss": 1.705,
"step": 2130
},
{
"epoch": 6.71,
"learning_rate": 2.5904336255606023e-05,
"loss": 1.4356,
"step": 2135
},
{
"epoch": 6.73,
"learning_rate": 2.3305219982997338e-05,
"loss": 1.4672,
"step": 2140
},
{
"epoch": 6.75,
"learning_rate": 2.0816599406278358e-05,
"loss": 1.5759,
"step": 2145
},
{
"epoch": 6.76,
"learning_rate": 1.8444545464801106e-05,
"loss": 1.655,
"step": 2150
},
{
"epoch": 6.78,
"learning_rate": 1.6194844735977787e-05,
"loss": 1.5595,
"step": 2155
},
{
"epoch": 6.79,
"learning_rate": 1.4072985319044027e-05,
"loss": 1.5155,
"step": 2160
},
{
"epoch": 6.81,
"learning_rate": 1.208414344695116e-05,
"loss": 1.6561,
"step": 2165
},
{
"epoch": 6.82,
"learning_rate": 1.0233170859051572e-05,
"loss": 1.4243,
"step": 2170
},
{
"epoch": 6.84,
"learning_rate": 8.524582965379327e-06,
"loss": 1.3178,
"step": 2175
},
{
"epoch": 6.86,
"learning_rate": 6.9625478313988e-06,
"loss": 1.2895,
"step": 2180
},
{
"epoch": 6.87,
"learning_rate": 5.5508760100933275e-06,
"loss": 1.8021,
"step": 2185
},
{
"epoch": 6.89,
"learning_rate": 4.293011246199375e-06,
"loss": 1.2922,
"step": 2190
},
{
"epoch": 6.9,
"learning_rate": 3.19202207526035e-06,
"loss": 1.514,
"step": 2195
},
{
"epoch": 6.92,
"learning_rate": 2.2505943379971774e-06,
"loss": 1.7064,
"step": 2200
},
{
"epoch": 6.93,
"learning_rate": 1.4710246282540463e-06,
"loss": 1.7752,
"step": 2205
},
{
"epoch": 6.95,
"learning_rate": 8.552146905042831e-07,
"loss": 1.4982,
"step": 2210
},
{
"epoch": 6.97,
"learning_rate": 4.0466678058365933e-07,
"loss": 1.4699,
"step": 2215
},
{
"epoch": 6.98,
"learning_rate": 1.2048000096755528e-07,
"loss": 1.7008,
"step": 2220
},
{
"epoch": 7.0,
"learning_rate": 3.347619532822632e-09,
"loss": 1.5587,
"step": 2225
},
{
"epoch": 7.0,
"eval_loss": 1.8221737146377563,
"eval_runtime": 20.3992,
"eval_samples_per_second": 22.011,
"eval_steps_per_second": 2.794,
"step": 2226
},
{
"epoch": 7.01,
"learning_rate": 5.355537834497188e-08,
"loss": 1.248,
"step": 2230
},
{
"epoch": 7.03,
"learning_rate": 2.709807965961209e-07,
"loss": 1.3298,
"step": 2235
},
{
"epoch": 7.04,
"learning_rate": 6.550934693944553e-07,
"loss": 1.4704,
"step": 2240
},
{
"epoch": 7.06,
"learning_rate": 1.2049563616762301e-06,
"loss": 1.4347,
"step": 2245
},
{
"epoch": 7.08,
"learning_rate": 1.919228094083792e-06,
"loss": 1.3554,
"step": 2250
},
{
"epoch": 7.09,
"learning_rate": 2.7961662152331403e-06,
"loss": 1.428,
"step": 2255
},
{
"epoch": 7.11,
"learning_rate": 3.833631452388745e-06,
"loss": 1.4177,
"step": 2260
},
{
"epoch": 7.12,
"learning_rate": 5.029092930176208e-06,
"loss": 1.3861,
"step": 2265
},
{
"epoch": 7.14,
"learning_rate": 6.379634344600846e-06,
"loss": 1.3699,
"step": 2270
},
{
"epoch": 7.15,
"learning_rate": 7.88196107731231e-06,
"loss": 1.4377,
"step": 2275
},
{
"epoch": 7.17,
"learning_rate": 9.532408232759425e-06,
"loss": 1.4078,
"step": 2280
},
{
"epoch": 7.19,
"learning_rate": 1.1326949578627714e-05,
"loss": 1.4196,
"step": 2285
},
{
"epoch": 7.2,
"learning_rate": 1.3261207367752312e-05,
"loss": 1.4826,
"step": 2290
},
{
"epoch": 7.22,
"learning_rate": 1.533046301754235e-05,
"loss": 1.4701,
"step": 2295
},
{
"epoch": 7.23,
"learning_rate": 1.7529668620867698e-05,
"loss": 1.4641,
"step": 2300
},
{
"epoch": 7.25,
"learning_rate": 1.985345926032643e-05,
"loss": 1.4294,
"step": 2305
},
{
"epoch": 7.26,
"learning_rate": 2.22961660958507e-05,
"loss": 1.4558,
"step": 2310
},
{
"epoch": 7.28,
"learning_rate": 2.4851830193728118e-05,
"loss": 1.5906,
"step": 2315
},
{
"epoch": 7.3,
"learning_rate": 2.751421706329902e-05,
"loss": 1.6085,
"step": 2320
},
{
"epoch": 7.31,
"learning_rate": 3.027683186586924e-05,
"loss": 1.2953,
"step": 2325
},
{
"epoch": 7.33,
"learning_rate": 3.313293525873851e-05,
"loss": 1.4112,
"step": 2330
},
{
"epoch": 7.34,
"learning_rate": 3.607555983568874e-05,
"loss": 1.3934,
"step": 2335
},
{
"epoch": 7.36,
"learning_rate": 3.909752712383054e-05,
"loss": 1.3835,
"step": 2340
},
{
"epoch": 7.37,
"learning_rate": 4.219146509534274e-05,
"loss": 1.4961,
"step": 2345
},
{
"epoch": 7.39,
"learning_rate": 4.5349826151383146e-05,
"loss": 1.2224,
"step": 2350
},
{
"epoch": 7.41,
"learning_rate": 4.856490553430337e-05,
"loss": 1.4432,
"step": 2355
},
{
"epoch": 7.42,
"learning_rate": 5.182886012324924e-05,
"loss": 1.5696,
"step": 2360
},
{
"epoch": 7.44,
"learning_rate": 5.5133727567293246e-05,
"loss": 1.6135,
"step": 2365
},
{
"epoch": 7.45,
"learning_rate": 5.8471445709429775e-05,
"loss": 1.3836,
"step": 2370
},
{
"epoch": 7.47,
"learning_rate": 6.18338722540417e-05,
"loss": 1.0351,
"step": 2375
},
{
"epoch": 7.48,
"learning_rate": 6.521280462986592e-05,
"loss": 1.5105,
"step": 2380
},
{
"epoch": 7.5,
"learning_rate": 6.859999999999982e-05,
"loss": 1.6121,
"step": 2385
},
{
"epoch": 7.52,
"learning_rate": 7.198719537013396e-05,
"loss": 1.6241,
"step": 2390
},
{
"epoch": 7.53,
"learning_rate": 7.536612774595818e-05,
"loss": 1.522,
"step": 2395
},
{
"epoch": 7.55,
"learning_rate": 7.872855429057012e-05,
"loss": 1.4888,
"step": 2400
},
{
"epoch": 7.56,
"learning_rate": 8.206627243270664e-05,
"loss": 1.4955,
"step": 2405
},
{
"epoch": 7.58,
"learning_rate": 8.537113987675064e-05,
"loss": 1.4161,
"step": 2410
},
{
"epoch": 7.59,
"learning_rate": 8.86350944656965e-05,
"loss": 1.8936,
"step": 2415
},
{
"epoch": 7.61,
"learning_rate": 9.185017384861673e-05,
"loss": 1.6053,
"step": 2420
},
{
"epoch": 7.63,
"learning_rate": 9.500853490465716e-05,
"loss": 1.5626,
"step": 2425
},
{
"epoch": 7.64,
"learning_rate": 9.810247287616934e-05,
"loss": 1.1654,
"step": 2430
},
{
"epoch": 7.66,
"learning_rate": 0.00010112444016431114,
"loss": 1.353,
"step": 2435
},
{
"epoch": 7.67,
"learning_rate": 0.00010406706474126137,
"loss": 1.5396,
"step": 2440
},
{
"epoch": 7.69,
"learning_rate": 0.00010692316813413065,
"loss": 1.3699,
"step": 2445
},
{
"epoch": 7.7,
"learning_rate": 0.0001096857829367009,
"loss": 1.3597,
"step": 2450
},
{
"epoch": 7.72,
"learning_rate": 0.0001123481698062718,
"loss": 1.5768,
"step": 2455
},
{
"epoch": 7.74,
"learning_rate": 0.00011490383390414922,
"loss": 1.3661,
"step": 2460
},
{
"epoch": 7.75,
"learning_rate": 0.00011734654073967348,
"loss": 1.4304,
"step": 2465
},
{
"epoch": 7.77,
"learning_rate": 0.00011967033137913221,
"loss": 1.4475,
"step": 2470
},
{
"epoch": 7.78,
"learning_rate": 0.00012186953698245757,
"loss": 1.5644,
"step": 2475
},
{
"epoch": 7.8,
"learning_rate": 0.00012393879263224763,
"loss": 1.2184,
"step": 2480
},
{
"epoch": 7.81,
"learning_rate": 0.00012587305042137222,
"loss": 1.55,
"step": 2485
},
{
"epoch": 7.83,
"learning_rate": 0.00012766759176724053,
"loss": 1.5614,
"step": 2490
},
{
"epoch": 7.85,
"learning_rate": 0.00012931803892268765,
"loss": 1.4788,
"step": 2495
},
{
"epoch": 7.86,
"learning_rate": 0.0001308203656553991,
"loss": 1.3325,
"step": 2500
},
{
"epoch": 7.88,
"learning_rate": 0.00013217090706982374,
"loss": 1.6197,
"step": 2505
},
{
"epoch": 7.89,
"learning_rate": 0.0001333663685476112,
"loss": 1.5014,
"step": 2510
},
{
"epoch": 7.91,
"learning_rate": 0.00013440383378476682,
"loss": 1.4764,
"step": 2515
},
{
"epoch": 7.92,
"learning_rate": 0.00013528077190591619,
"loss": 1.8615,
"step": 2520
},
{
"epoch": 7.94,
"learning_rate": 0.00013599504363832375,
"loss": 1.2448,
"step": 2525
},
{
"epoch": 7.96,
"learning_rate": 0.00013654490653060552,
"loss": 1.3242,
"step": 2530
},
{
"epoch": 7.97,
"learning_rate": 0.00013692901920340386,
"loss": 1.3224,
"step": 2535
},
{
"epoch": 7.99,
"learning_rate": 0.00013714644462165502,
"loss": 1.4513,
"step": 2540
},
{
"epoch": 8.0,
"eval_loss": 1.8965427875518799,
"eval_runtime": 20.4074,
"eval_samples_per_second": 22.002,
"eval_steps_per_second": 2.793,
"step": 2544
},
{
"epoch": 8.0,
"learning_rate": 0.00013719665238046719,
"loss": 1.3932,
"step": 2545
},
{
"epoch": 8.02,
"learning_rate": 0.00013707951999903246,
"loss": 1.2432,
"step": 2550
},
{
"epoch": 8.03,
"learning_rate": 0.00013679533321941633,
"loss": 1.2907,
"step": 2555
},
{
"epoch": 8.05,
"learning_rate": 0.00013634478530949573,
"loss": 1.2857,
"step": 2560
},
{
"epoch": 8.07,
"learning_rate": 0.00013572897537174604,
"loss": 1.3178,
"step": 2565
},
{
"epoch": 8.08,
"learning_rate": 0.00013494940566200278,
"loss": 1.3444,
"step": 2570
},
{
"epoch": 8.1,
"learning_rate": 0.0001340079779247397,
"loss": 1.3778,
"step": 2575
},
{
"epoch": 8.11,
"learning_rate": 0.00013290698875380075,
"loss": 1.5204,
"step": 2580
},
{
"epoch": 8.13,
"learning_rate": 0.00013164912398990663,
"loss": 1.196,
"step": 2585
},
{
"epoch": 8.14,
"learning_rate": 0.00013023745216860123,
"loss": 1.5108,
"step": 2590
},
{
"epoch": 8.16,
"learning_rate": 0.00012867541703462073,
"loss": 1.414,
"step": 2595
},
{
"epoch": 8.18,
"learning_rate": 0.00012696682914094848,
"loss": 1.299,
"step": 2600
},
{
"epoch": 8.19,
"learning_rate": 0.00012511585655304892,
"loss": 1.5168,
"step": 2605
},
{
"epoch": 8.21,
"learning_rate": 0.00012312701468095605,
"loss": 1.4117,
"step": 2610
},
{
"epoch": 8.22,
"learning_rate": 0.0001210051552640223,
"loss": 1.4762,
"step": 2615
},
{
"epoch": 8.24,
"learning_rate": 0.00011875545453519897,
"loss": 1.3127,
"step": 2620
},
{
"epoch": 8.25,
"learning_rate": 0.00011638340059372173,
"loss": 1.5447,
"step": 2625
},
{
"epoch": 8.27,
"learning_rate": 0.00011389478001700295,
"loss": 1.3891,
"step": 2630
},
{
"epoch": 8.29,
"learning_rate": 0.00011129566374439389,
"loss": 1.7022,
"step": 2635
},
{
"epoch": 8.3,
"learning_rate": 0.00010859239226725287,
"loss": 1.2989,
"step": 2640
},
{
"epoch": 8.32,
"learning_rate": 0.00010579156016144313,
"loss": 1.4672,
"step": 2645
},
{
"epoch": 8.33,
"learning_rate": 0.00010289999999999993,
"loss": 1.4019,
"step": 2650
},
{
"epoch": 8.35,
"learning_rate": 9.992476568520875e-05,
"loss": 1.3174,
"step": 2655
},
{
"epoch": 8.36,
"learning_rate": 9.687311524075153e-05,
"loss": 1.4993,
"step": 2660
},
{
"epoch": 8.38,
"learning_rate": 9.375249310590639e-05,
"loss": 1.4268,
"step": 2665
},
{
"epoch": 8.4,
"learning_rate": 9.057051197499064e-05,
"loss": 1.3762,
"step": 2670
},
{
"epoch": 8.41,
"learning_rate": 8.733493422634493e-05,
"loss": 1.6376,
"step": 2675
},
{
"epoch": 8.43,
"learning_rate": 8.405365298617051e-05,
"loss": 1.3535,
"step": 2680
},
{
"epoch": 8.44,
"learning_rate": 8.073467287340865e-05,
"loss": 1.4565,
"step": 2685
},
{
"epoch": 8.46,
"learning_rate": 7.738609047263366e-05,
"loss": 1.4825,
"step": 2690
},
{
"epoch": 8.47,
"learning_rate": 7.401607458260121e-05,
"loss": 1.2098,
"step": 2695
},
{
"epoch": 8.49,
"learning_rate": 7.063284628862766e-05,
"loss": 1.114,
"step": 2700
},
{
"epoch": 8.51,
"learning_rate": 6.724465890742147e-05,
"loss": 1.1928,
"step": 2705
},
{
"epoch": 8.52,
"learning_rate": 6.385977785328291e-05,
"loss": 1.2321,
"step": 2710
},
{
"epoch": 8.54,
"learning_rate": 6.048646047479429e-05,
"loss": 1.4018,
"step": 2715
},
{
"epoch": 8.55,
"learning_rate": 5.71329359111875e-05,
"loss": 1.4829,
"step": 2720
},
{
"epoch": 8.57,
"learning_rate": 5.3807385017523074e-05,
"loss": 1.3501,
"step": 2725
},
{
"epoch": 8.58,
"learning_rate": 5.051792040766187e-05,
"loss": 1.344,
"step": 2730
},
{
"epoch": 8.6,
"learning_rate": 4.7272566663709795e-05,
"loss": 1.0449,
"step": 2735
},
{
"epoch": 8.62,
"learning_rate": 4.407924076020983e-05,
"loss": 1.3851,
"step": 2740
},
{
"epoch": 8.63,
"learning_rate": 4.0945732750844954e-05,
"loss": 1.3359,
"step": 2745
},
{
"epoch": 8.65,
"learning_rate": 3.787968676476165e-05,
"loss": 1.2086,
"step": 2750
},
{
"epoch": 8.66,
"learning_rate": 3.488858235886944e-05,
"loss": 1.2969,
"step": 2755
},
{
"epoch": 8.68,
"learning_rate": 3.1979716271615364e-05,
"loss": 1.1869,
"step": 2760
},
{
"epoch": 8.69,
"learning_rate": 2.916018462273902e-05,
"loss": 1.4603,
"step": 2765
},
{
"epoch": 8.71,
"learning_rate": 2.643686560243124e-05,
"loss": 1.2891,
"step": 2770
},
{
"epoch": 8.73,
"learning_rate": 2.3816402692130164e-05,
"loss": 1.4753,
"step": 2775
},
{
"epoch": 8.74,
"learning_rate": 2.1305188457882628e-05,
"loss": 1.4468,
"step": 2780
},
{
"epoch": 8.76,
"learning_rate": 1.890934895580877e-05,
"loss": 1.2143,
"step": 2785
},
{
"epoch": 8.77,
"learning_rate": 1.6634728787713087e-05,
"loss": 1.4829,
"step": 2790
},
{
"epoch": 8.79,
"learning_rate": 1.4486876843296578e-05,
"loss": 1.5124,
"step": 2795
},
{
"epoch": 8.81,
"learning_rate": 1.247103276375423e-05,
"loss": 1.1507,
"step": 2800
},
{
"epoch": 8.82,
"learning_rate": 1.0592114159774876e-05,
"loss": 1.2259,
"step": 2805
},
{
"epoch": 8.84,
"learning_rate": 8.854704615130826e-06,
"loss": 1.3434,
"step": 2810
},
{
"epoch": 8.85,
"learning_rate": 7.263042505119003e-06,
"loss": 1.4906,
"step": 2815
},
{
"epoch": 8.87,
"learning_rate": 5.82101065712901e-06,
"loss": 1.2034,
"step": 2820
},
{
"epoch": 8.88,
"learning_rate": 4.532126878565386e-06,
"loss": 1.3118,
"step": 2825
},
{
"epoch": 8.9,
"learning_rate": 3.3995353752283744e-06,
"loss": 1.4476,
"step": 2830
},
{
"epoch": 8.92,
"learning_rate": 2.4259990810866283e-06,
"loss": 1.5325,
"step": 2835
},
{
"epoch": 8.93,
"learning_rate": 1.6138929181565955e-06,
"loss": 1.3443,
"step": 2840
},
{
"epoch": 8.95,
"learning_rate": 9.651980029285464e-07,
"loss": 1.665,
"step": 2845
},
{
"epoch": 8.96,
"learning_rate": 4.814968134727699e-07,
"loss": 1.3313,
"step": 2850
},
{
"epoch": 8.98,
"learning_rate": 1.6396932901667525e-07,
"loss": 1.504,
"step": 2855
},
{
"epoch": 8.99,
"learning_rate": 1.339015140952895e-08,
"loss": 1.2679,
"step": 2860
},
{
"epoch": 9.0,
"eval_loss": 1.7906934022903442,
"eval_runtime": 20.4159,
"eval_samples_per_second": 21.993,
"eval_steps_per_second": 2.792,
"step": 2862
},
{
"epoch": 9.01,
"learning_rate": 3.01266154967001e-08,
"loss": 1.4496,
"step": 2865
},
{
"epoch": 9.03,
"learning_rate": 2.1413789301401304e-07,
"loss": 1.2338,
"step": 2870
},
{
"epoch": 9.04,
"learning_rate": 5.649750921872831e-07,
"loss": 1.1185,
"step": 2875
},
{
"epoch": 9.06,
"learning_rate": 1.0817823527948346e-06,
"loss": 1.0349,
"step": 2880
},
{
"epoch": 9.07,
"learning_rate": 1.7632989340210505e-06,
"loss": 1.1285,
"step": 2885
},
{
"epoch": 9.09,
"learning_rate": 2.6078622900081364e-06,
"loss": 1.1348,
"step": 2890
},
{
"epoch": 9.1,
"learning_rate": 3.613412125602455e-06,
"loss": 1.0489,
"step": 2895
},
{
"epoch": 9.12,
"learning_rate": 4.777495422403447e-06,
"loss": 1.1893,
"step": 2900
},
{
"epoch": 9.14,
"learning_rate": 6.0972724228519425e-06,
"loss": 1.3905,
"step": 2905
},
{
"epoch": 9.15,
"learning_rate": 7.569523557760349e-06,
"loss": 1.2451,
"step": 2910
},
{
"epoch": 9.17,
"learning_rate": 9.190657300387452e-06,
"loss": 1.2443,
"step": 2915
},
{
"epoch": 9.18,
"learning_rate": 1.0956718927894355e-05,
"loss": 1.2626,
"step": 2920
},
{
"epoch": 9.2,
"learning_rate": 1.2863400168810178e-05,
"loss": 1.0339,
"step": 2925
},
{
"epoch": 9.21,
"learning_rate": 1.4906049712973553e-05,
"loss": 1.1284,
"step": 2930
},
{
"epoch": 9.23,
"learning_rate": 1.7079684558309144e-05,
"loss": 1.2964,
"step": 2935
},
{
"epoch": 9.25,
"learning_rate": 1.9379002166758836e-05,
"loss": 0.9701,
"step": 2940
},
{
"epoch": 9.26,
"learning_rate": 2.1798393399717496e-05,
"loss": 1.2379,
"step": 2945
},
{
"epoch": 9.28,
"learning_rate": 2.4331956201411865e-05,
"loss": 1.2294,
"step": 2950
},
{
"epoch": 9.29,
"learning_rate": 2.69735099968445e-05,
"loss": 1.2279,
"step": 2955
},
{
"epoch": 9.31,
"learning_rate": 2.9716610769183196e-05,
"loss": 1.1694,
"step": 2960
},
{
"epoch": 9.32,
"learning_rate": 3.255456677980771e-05,
"loss": 1.3376,
"step": 2965
},
{
"epoch": 9.34,
"learning_rate": 3.548045489267097e-05,
"loss": 1.1979,
"step": 2970
},
{
"epoch": 9.36,
"learning_rate": 3.848713746314718e-05,
"loss": 1.0518,
"step": 2975
},
{
"epoch": 9.37,
"learning_rate": 4.1567279750173456e-05,
"loss": 1.1192,
"step": 2980
},
{
"epoch": 9.39,
"learning_rate": 4.471336780920016e-05,
"loss": 1.258,
"step": 2985
},
{
"epoch": 9.4,
"learning_rate": 4.791772682230585e-05,
"loss": 1.1698,
"step": 2990
},
{
"epoch": 9.42,
"learning_rate": 5.1172539820760084e-05,
"loss": 1.2648,
"step": 2995
},
{
"epoch": 9.43,
"learning_rate": 5.4469866754360636e-05,
"loss": 1.1929,
"step": 3000
},
{
"epoch": 9.45,
"learning_rate": 5.7801663861022995e-05,
"loss": 1.1723,
"step": 3005
},
{
"epoch": 9.47,
"learning_rate": 6.115980328937633e-05,
"loss": 1.225,
"step": 3010
},
{
"epoch": 9.48,
"learning_rate": 6.453609292649126e-05,
"loss": 1.281,
"step": 3015
},
{
"epoch": 9.5,
"learning_rate": 6.792229638237138e-05,
"loss": 1.1719,
"step": 3020
},
{
"epoch": 9.51,
"learning_rate": 7.131015308246217e-05,
"loss": 1.3758,
"step": 3025
},
{
"epoch": 9.53,
"learning_rate": 7.469139841915347e-05,
"loss": 1.3241,
"step": 3030
},
{
"epoch": 9.54,
"learning_rate": 7.805778391312036e-05,
"loss": 1.1407,
"step": 3035
},
{
"epoch": 9.56,
"learning_rate": 8.140109733532393e-05,
"loss": 1.2411,
"step": 3040
},
{
"epoch": 9.58,
"learning_rate": 8.471318274057568e-05,
"loss": 1.413,
"step": 3045
},
{
"epoch": 9.59,
"learning_rate": 8.798596036379883e-05,
"loss": 1.222,
"step": 3050
},
{
"epoch": 9.61,
"learning_rate": 9.121144633045388e-05,
"loss": 1.3584,
"step": 3055
},
{
"epoch": 9.62,
"learning_rate": 9.438177213303655e-05,
"loss": 1.1437,
"step": 3060
},
{
"epoch": 9.64,
"learning_rate": 9.74892038261427e-05,
"loss": 1.2668,
"step": 3065
},
{
"epoch": 9.65,
"learning_rate": 0.00010052616089327018,
"loss": 1.3637,
"step": 3070
},
{
"epoch": 9.67,
"learning_rate": 0.00010348523473933642,
"loss": 1.1909,
"step": 3075
},
{
"epoch": 9.69,
"learning_rate": 0.0001063592067637941,
"loss": 1.3763,
"step": 3080
},
{
"epoch": 9.7,
"learning_rate": 0.00010914106597026132,
"loss": 1.4548,
"step": 3085
},
{
"epoch": 9.72,
"learning_rate": 0.00011182402606970514,
"loss": 1.3378,
"step": 3090
},
{
"epoch": 9.73,
"learning_rate": 0.00011440154203545576,
"loss": 1.3677,
"step": 3095
},
{
"epoch": 9.75,
"learning_rate": 0.00011686732606966472,
"loss": 1.2485,
"step": 3100
},
{
"epoch": 9.76,
"learning_rate": 0.00011921536294226233,
"loss": 1.3363,
"step": 3105
},
{
"epoch": 9.78,
"learning_rate": 0.00012143992466498816,
"loss": 1.1575,
"step": 3110
},
{
"epoch": 9.8,
"learning_rate": 0.00012353558446470098,
"loss": 1.1208,
"step": 3115
},
{
"epoch": 9.81,
"learning_rate": 0.00012549723002188375,
"loss": 1.4087,
"step": 3120
},
{
"epoch": 9.83,
"learning_rate": 0.00012732007594204206,
"loss": 1.3399,
"step": 3125
},
{
"epoch": 9.84,
"learning_rate": 0.00012899967542957612,
"loss": 1.4988,
"step": 3130
},
{
"epoch": 9.86,
"learning_rate": 0.00013053193113564998,
"loss": 0.989,
"step": 3135
},
{
"epoch": 9.87,
"learning_rate": 0.00013191310515358958,
"loss": 1.349,
"step": 3140
},
{
"epoch": 9.89,
"learning_rate": 0.00013313982813743067,
"loss": 1.1874,
"step": 3145
},
{
"epoch": 9.91,
"learning_rate": 0.00013420910752136937,
"loss": 1.2978,
"step": 3150
},
{
"epoch": 9.92,
"learning_rate": 0.00013511833482006638,
"loss": 1.5572,
"step": 3155
},
{
"epoch": 9.94,
"learning_rate": 0.00013586529199199334,
"loss": 1.3689,
"step": 3160
},
{
"epoch": 9.95,
"learning_rate": 0.00013644815685030044,
"loss": 1.1297,
"step": 3165
},
{
"epoch": 9.97,
"learning_rate": 0.00013686550750800414,
"loss": 1.3856,
"step": 3170
},
{
"epoch": 9.98,
"learning_rate": 0.00013711632584665164,
"loss": 1.2732,
"step": 3175
},
{
"epoch": 10.0,
"learning_rate": 0.0001372,
"loss": 1.5975,
"step": 3180
},
{
"epoch": 10.0,
"eval_loss": 1.7831153869628906,
"eval_runtime": 20.3518,
"eval_samples_per_second": 22.062,
"eval_steps_per_second": 2.801,
"step": 3180
},
{
"epoch": 9.95,
"learning_rate": 0.00013645750858358395,
"loss": 1.2433,
"step": 3185
},
{
"epoch": 9.97,
"learning_rate": 0.0001368696722497127,
"loss": 1.547,
"step": 3190
},
{
"epoch": 9.98,
"learning_rate": 0.00013711736829567482,
"loss": 1.4594,
"step": 3195
},
{
"epoch": 10.0,
"learning_rate": 0.0001372,
"loss": 1.3407,
"step": 3200
},
{
"epoch": 10.0,
"eval_loss": 1.139600157737732,
"eval_runtime": 5.2723,
"eval_samples_per_second": 82.317,
"eval_steps_per_second": 10.432,
"step": 3200
},
{
"epoch": 10.02,
"learning_rate": 0.00013711736829567482,
"loss": 1.4415,
"step": 3205
},
{
"epoch": 10.03,
"learning_rate": 0.00013686967224971273,
"loss": 1.2348,
"step": 3210
},
{
"epoch": 10.05,
"learning_rate": 0.00013645750858358398,
"loss": 1.4623,
"step": 3215
},
{
"epoch": 10.06,
"learning_rate": 0.00013588187023566163,
"loss": 1.437,
"step": 3220
},
{
"epoch": 10.08,
"learning_rate": 0.00013514414396914573,
"loss": 1.6916,
"step": 3225
},
{
"epoch": 10.09,
"learning_rate": 0.00013424610703122958,
"loss": 1.7023,
"step": 3230
},
{
"epoch": 10.11,
"learning_rate": 0.00013318992287155525,
"loss": 1.3172,
"step": 3235
},
{
"epoch": 10.12,
"learning_rate": 0.00013197813593027435,
"loss": 1.2053,
"step": 3240
},
{
"epoch": 10.14,
"learning_rate": 0.00013061366550826825,
"loss": 1.1869,
"step": 3245
},
{
"epoch": 10.16,
"learning_rate": 0.00012909979873429724,
"loss": 1.2981,
"step": 3250
},
{
"epoch": 10.17,
"learning_rate": 0.0001274401826460187,
"loss": 1.6608,
"step": 3255
},
{
"epoch": 10.19,
"learning_rate": 0.00012563881540395474,
"loss": 1.3115,
"step": 3260
},
{
"epoch": 10.2,
"learning_rate": 0.00012370003665957216,
"loss": 1.2824,
"step": 3265
},
{
"epoch": 10.22,
"learning_rate": 0.00012162851710068375,
"loss": 1.4082,
"step": 3270
},
{
"epoch": 10.23,
"learning_rate": 0.00011942924719935029,
"loss": 1.3048,
"step": 3275
},
{
"epoch": 10.25,
"learning_rate": 0.00011710752518939736,
"loss": 1.3276,
"step": 3280
},
{
"epoch": 10.27,
"learning_rate": 0.0001146689443025054,
"loss": 1.4064,
"step": 3285
},
{
"epoch": 10.28,
"learning_rate": 0.00011211937929362613,
"loss": 1.2408,
"step": 3290
},
{
"epoch": 10.3,
"learning_rate": 0.00010946497228818107,
"loss": 1.3932,
"step": 3295
},
{
"epoch": 10.31,
"learning_rate": 0.00010671211798514499,
"loss": 1.4576,
"step": 3300
},
{
"epoch": 10.33,
"learning_rate": 0.00010386744825165496,
"loss": 1.455,
"step": 3305
},
{
"epoch": 10.34,
"learning_rate": 0.00010093781614626351,
"loss": 1.3289,
"step": 3310
},
{
"epoch": 10.36,
"learning_rate": 9.793027940931756e-05,
"loss": 1.2645,
"step": 3315
},
{
"epoch": 10.38,
"learning_rate": 9.485208346024504e-05,
"loss": 1.39,
"step": 3320
},
{
"epoch": 10.39,
"learning_rate": 9.17106439427063e-05,
"loss": 1.3945,
"step": 3325
},
{
"epoch": 10.41,
"learning_rate": 8.851352885965625e-05,
"loss": 1.5375,
"step": 3330
},
{
"epoch": 10.42,
"learning_rate": 8.526844034136417e-05,
"loss": 1.4077,
"step": 3335
},
{
"epoch": 10.44,
"learning_rate": 8.198319609030632e-05,
"loss": 1.4331,
"step": 3340
},
{
"epoch": 10.45,
"learning_rate": 7.866571054763788e-05,
"loss": 1.8602,
"step": 3345
},
{
"epoch": 10.47,
"learning_rate": 7.532397582660805e-05,
"loss": 1.4865,
"step": 3350
},
{
"epoch": 10.48,
"learning_rate": 7.19660424588612e-05,
"loss": 1.2815,
"step": 3355
},
{
"epoch": 10.5,
"learning_rate": 6.859999999999997e-05,
"loss": 1.4705,
"step": 3360
},
{
"epoch": 10.52,
"learning_rate": 6.523395754113922e-05,
"loss": 1.1969,
"step": 3365
},
{
"epoch": 10.53,
"learning_rate": 6.187602417339237e-05,
"loss": 1.4564,
"step": 3370
},
{
"epoch": 10.55,
"learning_rate": 5.853428945236207e-05,
"loss": 1.4113,
"step": 3375
},
{
"epoch": 10.56,
"learning_rate": 5.521680390969362e-05,
"loss": 1.4642,
"step": 3380
},
{
"epoch": 10.58,
"learning_rate": 5.193155965863624e-05,
"loss": 1.4196,
"step": 3385
},
{
"epoch": 10.59,
"learning_rate": 4.8686471140344147e-05,
"loss": 1.3666,
"step": 3390
},
{
"epoch": 10.61,
"learning_rate": 4.548935605729363e-05,
"loss": 1.3908,
"step": 3395
},
{
"epoch": 10.62,
"learning_rate": 4.23479165397549e-05,
"loss": 1.4785,
"step": 3400
},
{
"epoch": 10.64,
"learning_rate": 3.926972059068282e-05,
"loss": 1.4775,
"step": 3405
},
{
"epoch": 10.66,
"learning_rate": 3.626218385373685e-05,
"loss": 1.4841,
"step": 3410
},
{
"epoch": 10.67,
"learning_rate": 3.333255174834496e-05,
"loss": 1.4263,
"step": 3415
},
{
"epoch": 10.69,
"learning_rate": 3.0487882014855373e-05,
"loss": 1.4815,
"step": 3420
},
{
"epoch": 10.7,
"learning_rate": 2.7735027711819264e-05,
"loss": 1.3612,
"step": 3425
},
{
"epoch": 10.72,
"learning_rate": 2.508062070637383e-05,
"loss": 1.3586,
"step": 3430
},
{
"epoch": 10.73,
"learning_rate": 2.253105569749455e-05,
"loss": 1.4036,
"step": 3435
},
{
"epoch": 10.75,
"learning_rate": 2.0092474810602945e-05,
"loss": 1.2455,
"step": 3440
},
{
"epoch": 10.77,
"learning_rate": 1.7770752800649997e-05,
"loss": 1.3747,
"step": 3445
},
{
"epoch": 10.78,
"learning_rate": 1.5571482899316204e-05,
"loss": 1.2848,
"step": 3450
},
{
"epoch": 10.8,
"learning_rate": 1.3499963340427795e-05,
"loss": 1.5623,
"step": 3455
},
{
"epoch": 10.81,
"learning_rate": 1.1561184596045504e-05,
"loss": 1.4704,
"step": 3460
},
{
"epoch": 10.83,
"learning_rate": 9.759817353981509e-06,
"loss": 1.3271,
"step": 3465
},
{
"epoch": 10.84,
"learning_rate": 8.100201265702836e-06,
"loss": 1.2696,
"step": 3470
},
{
"epoch": 10.86,
"learning_rate": 6.586334491731833e-06,
"loss": 1.5138,
"step": 3475
},
{
"epoch": 10.88,
"learning_rate": 5.221864069725821e-06,
"loss": 1.344,
"step": 3480
},
{
"epoch": 10.89,
"learning_rate": 4.010077128444735e-06,
"loss": 1.3544,
"step": 3485
},
{
"epoch": 10.91,
"learning_rate": 2.9538929687704825e-06,
"loss": 1.6602,
"step": 3490
},
{
"epoch": 10.92,
"learning_rate": 2.0558560308543213e-06,
"loss": 1.3761,
"step": 3495
},
{
"epoch": 10.94,
"learning_rate": 1.3181297643384459e-06,
"loss": 1.3709,
"step": 3500
},
{
"epoch": 10.95,
"learning_rate": 7.424914164160148e-07,
"loss": 1.3595,
"step": 3505
},
{
"epoch": 10.97,
"learning_rate": 3.303277502872983e-07,
"loss": 1.4077,
"step": 3510
},
{
"epoch": 10.98,
"learning_rate": 8.263170432518063e-08,
"loss": 1.4356,
"step": 3515
},
{
"epoch": 11.0,
"learning_rate": 0.0,
"loss": 1.7243,
"step": 3520
},
{
"epoch": 11.0,
"eval_loss": 1.1156859397888184,
"eval_runtime": 5.2715,
"eval_samples_per_second": 82.33,
"eval_steps_per_second": 10.433,
"step": 3520
},
{
"epoch": 10.95,
"learning_rate": 9.414215321223168e-07,
"loss": 1.3031,
"step": 3525
},
{
"epoch": 10.96,
"learning_rate": 4.696220449804098e-07,
"loss": 1.0973,
"step": 3530
},
{
"epoch": 10.98,
"learning_rate": 1.5992243352901425e-07,
"loss": 1.1205,
"step": 3535
},
{
"epoch": 10.99,
"learning_rate": 1.3059553632214649e-08,
"loss": 1.5828,
"step": 3540
},
{
"epoch": 11.0,
"eval_loss": 1.0779144763946533,
"eval_runtime": 8.8469,
"eval_samples_per_second": 46.57,
"eval_steps_per_second": 5.878,
"step": 3542
}
],
"max_steps": 3864,
"num_train_epochs": 12,
"total_flos": 3691011244032000.0,
"trial_name": null,
"trial_params": null
}