{ "best_metric": 1.0779144763946533, "best_model_checkpoint": "output/bob-dylan/checkpoint-3542", "epoch": 11.0, "global_step": 3542, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00013711788223044424, "loss": 3.6967, "step": 5 }, { "epoch": 0.03, "learning_rate": 0.0001368717255202631, "loss": 3.7692, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00013646211919423798, "loss": 3.4916, "step": 15 }, { "epoch": 0.06, "learning_rate": 0.00013589004389254062, "loss": 3.6186, "step": 20 }, { "epoch": 0.08, "learning_rate": 0.00013515686922297834, "loss": 3.5756, "step": 25 }, { "epoch": 0.09, "learning_rate": 0.00013426435048201062, "loss": 3.236, "step": 30 }, { "epoch": 0.11, "learning_rate": 0.0001332146244523866, "loss": 3.3956, "step": 35 }, { "epoch": 0.12, "learning_rate": 0.00013201020428746477, "loss": 3.3277, "step": 40 }, { "epoch": 0.14, "learning_rate": 0.0001306539734944624, "loss": 3.2617, "step": 45 }, { "epoch": 0.16, "learning_rate": 0.00012914917903103908, "loss": 3.554, "step": 50 }, { "epoch": 0.17, "learning_rate": 0.00012749942353174222, "loss": 3.2625, "step": 55 }, { "epoch": 0.19, "learning_rate": 0.00012570865668292503, "loss": 3.2986, "step": 60 }, { "epoch": 0.2, "learning_rate": 0.0001237811657667863, "loss": 3.264, "step": 65 }, { "epoch": 0.22, "learning_rate": 0.00012172156539717071, "loss": 3.0437, "step": 70 }, { "epoch": 0.23, "learning_rate": 0.00011953478647170303, "loss": 3.0527, "step": 75 }, { "epoch": 0.25, "learning_rate": 0.000117226064366706, "loss": 3.3623, "step": 80 }, { "epoch": 0.26, "learning_rate": 0.0001148009264031647, "loss": 3.0801, "step": 85 }, { "epoch": 0.28, "learning_rate": 0.0001122651786137447, "loss": 3.0471, "step": 90 }, { "epoch": 0.3, "learning_rate": 0.00010962489184254581, "loss": 3.051, "step": 95 }, { "epoch": 0.31, "learning_rate": 0.00010688638721086951, "loss": 3.2191, "step": 100 }, { "epoch": 0.33, "learning_rate": 0.0001040562209837965, "loss": 2.9343, "step": 105 }, { "epoch": 0.34, "learning_rate": 0.00010114116887380613, "loss": 2.9893, "step": 110 }, { "epoch": 0.36, "learning_rate": 9.814820981901533e-05, "loss": 3.1068, "step": 115 }, { "epoch": 0.37, "learning_rate": 9.508450927487455e-05, "loss": 3.1636, "step": 120 }, { "epoch": 0.39, "learning_rate": 9.195740205932179e-05, "loss": 3.1864, "step": 125 }, { "epoch": 0.4, "learning_rate": 8.877437479246497e-05, "loss": 2.9116, "step": 130 }, { "epoch": 0.42, "learning_rate": 8.554304797283438e-05, "loss": 3.1795, "step": 135 }, { "epoch": 0.44, "learning_rate": 8.227115773311617e-05, "loss": 2.9074, "step": 140 }, { "epoch": 0.45, "learning_rate": 7.896653731904552e-05, "loss": 3.1763, "step": 145 }, { "epoch": 0.47, "learning_rate": 7.56370983358012e-05, "loss": 3.0588, "step": 150 }, { "epoch": 0.48, "learning_rate": 7.229081180679942e-05, "loss": 2.9373, "step": 155 }, { "epoch": 0.5, "learning_rate": 6.893568909023427e-05, "loss": 3.092, "step": 160 }, { "epoch": 0.51, "learning_rate": 6.557976269905237e-05, "loss": 3.0733, "step": 165 }, { "epoch": 0.53, "learning_rate": 6.223106707028106e-05, "loss": 2.9187, "step": 170 }, { "epoch": 0.55, "learning_rate": 5.889761932974993e-05, "loss": 2.9659, "step": 175 }, { "epoch": 0.56, "learning_rate": 5.5587400098257335e-05, "loss": 3.1799, "step": 180 }, { "epoch": 0.58, "learning_rate": 5.230833438513365e-05, "loss": 2.9959, "step": 185 }, { "epoch": 0.59, "learning_rate": 4.9068272614944106e-05, "loss": 2.8529, "step": 190 }, { "epoch": 0.61, "learning_rate": 4.58749718327555e-05, "loss": 3.0337, "step": 195 }, { "epoch": 0.62, "learning_rate": 4.2736077132963006e-05, "loss": 3.0827, "step": 200 }, { "epoch": 0.64, "learning_rate": 3.9659103356138536e-05, "loss": 2.8453, "step": 205 }, { "epoch": 0.65, "learning_rate": 3.6651417097720435e-05, "loss": 2.9619, "step": 210 }, { "epoch": 0.67, "learning_rate": 3.372021907161731e-05, "loss": 3.0061, "step": 215 }, { "epoch": 0.69, "learning_rate": 3.0872526870949537e-05, "loss": 2.8687, "step": 220 }, { "epoch": 0.7, "learning_rate": 2.8115158167201102e-05, "loss": 2.9913, "step": 225 }, { "epoch": 0.72, "learning_rate": 2.5454714388004492e-05, "loss": 2.9778, "step": 230 }, { "epoch": 0.73, "learning_rate": 2.289756491263597e-05, "loss": 3.0748, "step": 235 }, { "epoch": 0.75, "learning_rate": 2.0449831823058788e-05, "loss": 2.7658, "step": 240 }, { "epoch": 0.76, "learning_rate": 1.8117375247021725e-05, "loss": 2.9792, "step": 245 }, { "epoch": 0.78, "learning_rate": 1.5905779328303487e-05, "loss": 2.7718, "step": 250 }, { "epoch": 0.79, "learning_rate": 1.3820338857691364e-05, "loss": 2.9954, "step": 255 }, { "epoch": 0.81, "learning_rate": 1.1866046596701035e-05, "loss": 2.9246, "step": 260 }, { "epoch": 0.83, "learning_rate": 1.0047581324385938e-05, "loss": 2.9875, "step": 265 }, { "epoch": 0.84, "learning_rate": 8.36929663585326e-06, "loss": 2.673, "step": 270 }, { "epoch": 0.86, "learning_rate": 6.835210519304257e-06, "loss": 2.8515, "step": 275 }, { "epoch": 0.87, "learning_rate": 5.448995736552248e-06, "loss": 2.9416, "step": 280 }, { "epoch": 0.89, "learning_rate": 4.213971030048682e-06, "loss": 2.8573, "step": 285 }, { "epoch": 0.9, "learning_rate": 3.133093177468323e-06, "loss": 2.6045, "step": 290 }, { "epoch": 0.92, "learning_rate": 2.208949912875789e-06, "loss": 3.0414, "step": 295 }, { "epoch": 0.93, "learning_rate": 1.4437537314208725e-06, "loss": 2.7666, "step": 300 }, { "epoch": 0.95, "learning_rate": 8.39336592394954e-07, "loss": 2.7331, "step": 305 }, { "epoch": 0.97, "learning_rate": 3.971455333297437e-07, "loss": 2.847, "step": 310 }, { "epoch": 0.98, "learning_rate": 1.1823920563887646e-07, "loss": 2.7128, "step": 315 }, { "epoch": 1.0, "learning_rate": 3.2853400962779e-09, "loss": 2.6074, "step": 320 }, { "epoch": 1.0, "eval_loss": 2.670438766479492, "eval_runtime": 18.6137, "eval_samples_per_second": 22.833, "eval_steps_per_second": 2.901, "step": 321 }, { "epoch": 1.02, "learning_rate": 1.6396932901668288e-07, "loss": 2.7644, "step": 325 }, { "epoch": 1.04, "learning_rate": 4.814968134727319e-07, "loss": 2.6609, "step": 330 }, { "epoch": 1.05, "learning_rate": 9.651980029285235e-07, "loss": 2.681, "step": 335 }, { "epoch": 1.07, "learning_rate": 1.6138929181565879e-06, "loss": 2.6512, "step": 340 }, { "epoch": 1.08, "learning_rate": 2.4259990810865978e-06, "loss": 2.6379, "step": 345 }, { "epoch": 1.1, "learning_rate": 3.3995353752283744e-06, "loss": 2.9539, "step": 350 }, { "epoch": 1.12, "learning_rate": 4.532126878565439e-06, "loss": 2.7261, "step": 355 }, { "epoch": 1.13, "learning_rate": 5.821010657128926e-06, "loss": 2.8164, "step": 360 }, { "epoch": 1.15, "learning_rate": 7.26304250511898e-06, "loss": 2.8454, "step": 365 }, { "epoch": 1.16, "learning_rate": 8.854704615130857e-06, "loss": 2.7863, "step": 370 }, { "epoch": 1.18, "learning_rate": 1.0592114159774732e-05, "loss": 2.8572, "step": 375 }, { "epoch": 1.19, "learning_rate": 1.2471032763754147e-05, "loss": 2.8415, "step": 380 }, { "epoch": 1.21, "learning_rate": 1.4486876843296586e-05, "loss": 2.6313, "step": 385 }, { "epoch": 1.23, "learning_rate": 1.663472878771285e-05, "loss": 2.7228, "step": 390 }, { "epoch": 1.24, "learning_rate": 1.8909348955808624e-05, "loss": 2.5958, "step": 395 }, { "epoch": 1.26, "learning_rate": 2.1305188457882567e-05, "loss": 2.7318, "step": 400 }, { "epoch": 1.27, "learning_rate": 2.3816402692130242e-05, "loss": 2.7112, "step": 405 }, { "epoch": 1.29, "learning_rate": 2.6436865602431172e-05, "loss": 2.7692, "step": 410 }, { "epoch": 1.31, "learning_rate": 2.9160184622738906e-05, "loss": 2.7993, "step": 415 }, { "epoch": 1.32, "learning_rate": 3.197971627161534e-05, "loss": 2.8784, "step": 420 }, { "epoch": 1.34, "learning_rate": 3.4888582358869375e-05, "loss": 2.7869, "step": 425 }, { "epoch": 1.35, "learning_rate": 3.7879686764761624e-05, "loss": 2.8056, "step": 430 }, { "epoch": 1.37, "learning_rate": 4.09457327508451e-05, "loss": 2.553, "step": 435 }, { "epoch": 1.38, "learning_rate": 4.407924076020965e-05, "loss": 3.1824, "step": 440 }, { "epoch": 1.4, "learning_rate": 4.7272566663709714e-05, "loss": 2.8178, "step": 445 }, { "epoch": 1.42, "learning_rate": 5.0517920407661915e-05, "loss": 2.6659, "step": 450 }, { "epoch": 1.43, "learning_rate": 5.3807385017522816e-05, "loss": 2.5079, "step": 455 }, { "epoch": 1.45, "learning_rate": 5.7132935911187364e-05, "loss": 3.0069, "step": 460 }, { "epoch": 1.46, "learning_rate": 6.048646047479427e-05, "loss": 2.7084, "step": 465 }, { "epoch": 1.48, "learning_rate": 6.385977785328252e-05, "loss": 2.738, "step": 470 }, { "epoch": 1.49, "learning_rate": 6.724465890742127e-05, "loss": 2.7037, "step": 475 }, { "epoch": 1.51, "learning_rate": 7.063284628862763e-05, "loss": 2.7949, "step": 480 }, { "epoch": 1.53, "learning_rate": 7.401607458260099e-05, "loss": 2.8525, "step": 485 }, { "epoch": 1.54, "learning_rate": 7.738609047263364e-05, "loss": 2.7132, "step": 490 }, { "epoch": 1.56, "learning_rate": 8.07346728734085e-05, "loss": 2.7785, "step": 495 }, { "epoch": 1.57, "learning_rate": 8.405365298617048e-05, "loss": 2.7346, "step": 500 }, { "epoch": 1.59, "learning_rate": 8.733493422634478e-05, "loss": 2.8556, "step": 505 }, { "epoch": 1.6, "learning_rate": 9.057051197499066e-05, "loss": 2.7322, "step": 510 }, { "epoch": 1.62, "learning_rate": 9.375249310590655e-05, "loss": 2.6462, "step": 515 }, { "epoch": 1.64, "learning_rate": 9.687311524075135e-05, "loss": 2.7953, "step": 520 }, { "epoch": 1.65, "learning_rate": 9.992476568520869e-05, "loss": 2.7156, "step": 525 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.8443, "step": 530 }, { "epoch": 1.68, "learning_rate": 0.00010579156016144291, "loss": 2.73, "step": 535 }, { "epoch": 1.7, "learning_rate": 0.00010859239226725277, "loss": 2.6283, "step": 540 }, { "epoch": 1.71, "learning_rate": 0.00011129566374439388, "loss": 2.7128, "step": 545 }, { "epoch": 1.73, "learning_rate": 0.00011389478001700271, "loss": 2.848, "step": 550 }, { "epoch": 1.75, "learning_rate": 0.00011638340059372158, "loss": 2.6623, "step": 555 }, { "epoch": 1.76, "learning_rate": 0.00011875545453519892, "loss": 2.5335, "step": 560 }, { "epoch": 1.78, "learning_rate": 0.00012100515526402216, "loss": 2.7063, "step": 565 }, { "epoch": 1.79, "learning_rate": 0.00012312701468095605, "loss": 2.5592, "step": 570 }, { "epoch": 1.81, "learning_rate": 0.00012511585655304897, "loss": 2.658, "step": 575 }, { "epoch": 1.82, "learning_rate": 0.00012696682914094848, "loss": 2.7849, "step": 580 }, { "epoch": 1.84, "learning_rate": 0.00012867541703462067, "loss": 2.4521, "step": 585 }, { "epoch": 1.86, "learning_rate": 0.0001302374521686013, "loss": 2.7116, "step": 590 }, { "epoch": 1.87, "learning_rate": 0.00013164912398990668, "loss": 2.5754, "step": 595 }, { "epoch": 1.89, "learning_rate": 0.0001329069887538007, "loss": 2.6935, "step": 600 }, { "epoch": 1.9, "learning_rate": 0.00013400797792473965, "loss": 2.4856, "step": 605 }, { "epoch": 1.92, "learning_rate": 0.0001349494056620028, "loss": 2.7162, "step": 610 }, { "epoch": 1.93, "learning_rate": 0.000135728975371746, "loss": 2.8196, "step": 615 }, { "epoch": 1.95, "learning_rate": 0.0001363447853094957, "loss": 2.6532, "step": 620 }, { "epoch": 1.97, "learning_rate": 0.00013679533321941633, "loss": 2.5941, "step": 625 }, { "epoch": 1.98, "learning_rate": 0.00013707951999903246, "loss": 2.4365, "step": 630 }, { "epoch": 2.0, "learning_rate": 0.00013719665238046719, "loss": 2.6559, "step": 635 }, { "epoch": 2.0, "eval_loss": 2.319049835205078, "eval_runtime": 20.2442, "eval_samples_per_second": 22.179, "eval_steps_per_second": 2.816, "step": 636 }, { "epoch": 2.01, "learning_rate": 0.00013714644462165502, "loss": 2.3683, "step": 640 }, { "epoch": 2.03, "learning_rate": 0.00013692901920340388, "loss": 2.3765, "step": 645 }, { "epoch": 2.04, "learning_rate": 0.00013654490653060555, "loss": 2.7581, "step": 650 }, { "epoch": 2.06, "learning_rate": 0.00013599504363832372, "loss": 2.5057, "step": 655 }, { "epoch": 2.08, "learning_rate": 0.00013528077190591619, "loss": 2.5706, "step": 660 }, { "epoch": 2.09, "learning_rate": 0.00013440383378476688, "loss": 2.5847, "step": 665 }, { "epoch": 2.11, "learning_rate": 0.00013336636854761118, "loss": 2.3126, "step": 670 }, { "epoch": 2.12, "learning_rate": 0.00013217090706982377, "loss": 2.309, "step": 675 }, { "epoch": 2.14, "learning_rate": 0.00013082036565539919, "loss": 2.5847, "step": 680 }, { "epoch": 2.15, "learning_rate": 0.00012931803892268768, "loss": 2.4275, "step": 685 }, { "epoch": 2.17, "learning_rate": 0.00012766759176724058, "loss": 2.3388, "step": 690 }, { "epoch": 2.19, "learning_rate": 0.00012587305042137224, "loss": 2.4289, "step": 695 }, { "epoch": 2.2, "learning_rate": 0.00012393879263224768, "loss": 2.3432, "step": 700 }, { "epoch": 2.22, "learning_rate": 0.00012186953698245773, "loss": 2.1438, "step": 705 }, { "epoch": 2.23, "learning_rate": 0.00011967033137913228, "loss": 2.4482, "step": 710 }, { "epoch": 2.25, "learning_rate": 0.00011734654073967362, "loss": 2.596, "step": 715 }, { "epoch": 2.26, "learning_rate": 0.00011490383390414927, "loss": 2.3528, "step": 720 }, { "epoch": 2.28, "learning_rate": 0.00011234816980627178, "loss": 2.2922, "step": 725 }, { "epoch": 2.3, "learning_rate": 0.0001096857829367009, "loss": 2.2967, "step": 730 }, { "epoch": 2.31, "learning_rate": 0.00010692316813413058, "loss": 2.3893, "step": 735 }, { "epoch": 2.33, "learning_rate": 0.0001040670647412614, "loss": 2.2958, "step": 740 }, { "epoch": 2.34, "learning_rate": 0.00010112444016431127, "loss": 2.3878, "step": 745 }, { "epoch": 2.36, "learning_rate": 9.810247287616931e-05, "loss": 2.0135, "step": 750 }, { "epoch": 2.37, "learning_rate": 9.500853490465723e-05, "loss": 2.5128, "step": 755 }, { "epoch": 2.39, "learning_rate": 9.185017384861694e-05, "loss": 2.2281, "step": 760 }, { "epoch": 2.41, "learning_rate": 8.86350944656966e-05, "loss": 2.2168, "step": 765 }, { "epoch": 2.42, "learning_rate": 8.537113987675077e-05, "loss": 2.2228, "step": 770 }, { "epoch": 2.44, "learning_rate": 8.206627243270665e-05, "loss": 2.5163, "step": 775 }, { "epoch": 2.45, "learning_rate": 7.872855429057025e-05, "loss": 2.5593, "step": 780 }, { "epoch": 2.47, "learning_rate": 7.536612774595843e-05, "loss": 1.8992, "step": 785 }, { "epoch": 2.48, "learning_rate": 7.198719537013403e-05, "loss": 2.2331, "step": 790 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 2.2157, "step": 795 }, { "epoch": 2.52, "learning_rate": 6.521280462986602e-05, "loss": 2.2611, "step": 800 }, { "epoch": 2.53, "learning_rate": 6.183387225404161e-05, "loss": 2.2053, "step": 805 }, { "epoch": 2.55, "learning_rate": 5.8471445709429735e-05, "loss": 2.1826, "step": 810 }, { "epoch": 2.56, "learning_rate": 5.513372756729345e-05, "loss": 1.995, "step": 815 }, { "epoch": 2.58, "learning_rate": 5.182886012324932e-05, "loss": 2.3863, "step": 820 }, { "epoch": 2.59, "learning_rate": 4.856490553430346e-05, "loss": 2.3232, "step": 825 }, { "epoch": 2.61, "learning_rate": 4.534982615138311e-05, "loss": 2.3315, "step": 830 }, { "epoch": 2.63, "learning_rate": 4.2191465095342816e-05, "loss": 2.3571, "step": 835 }, { "epoch": 2.64, "learning_rate": 3.909752712383074e-05, "loss": 2.1077, "step": 840 }, { "epoch": 2.66, "learning_rate": 3.607555983568871e-05, "loss": 2.1479, "step": 845 }, { "epoch": 2.67, "learning_rate": 3.313293525873858e-05, "loss": 2.2508, "step": 850 }, { "epoch": 2.69, "learning_rate": 3.027683186586951e-05, "loss": 2.5729, "step": 855 }, { "epoch": 2.7, "learning_rate": 2.7514217063299187e-05, "loss": 2.4242, "step": 860 }, { "epoch": 2.72, "learning_rate": 2.485183019372827e-05, "loss": 2.2222, "step": 865 }, { "epoch": 2.74, "learning_rate": 2.2296166095850762e-05, "loss": 2.3744, "step": 870 }, { "epoch": 2.75, "learning_rate": 1.9853459260326405e-05, "loss": 2.2229, "step": 875 }, { "epoch": 2.77, "learning_rate": 1.752966862086776e-05, "loss": 2.4542, "step": 880 }, { "epoch": 2.78, "learning_rate": 1.5330463017542246e-05, "loss": 2.253, "step": 885 }, { "epoch": 2.8, "learning_rate": 1.3261207367752365e-05, "loss": 2.2606, "step": 890 }, { "epoch": 2.81, "learning_rate": 1.1326949578627828e-05, "loss": 2.535, "step": 895 }, { "epoch": 2.83, "learning_rate": 9.532408232759462e-06, "loss": 2.3032, "step": 900 }, { "epoch": 2.85, "learning_rate": 7.881961077312348e-06, "loss": 2.2779, "step": 905 }, { "epoch": 2.86, "learning_rate": 6.379634344600831e-06, "loss": 2.2839, "step": 910 }, { "epoch": 2.88, "learning_rate": 5.029092930176238e-06, "loss": 2.0958, "step": 915 }, { "epoch": 2.89, "learning_rate": 3.833631452388814e-06, "loss": 2.1064, "step": 920 }, { "epoch": 2.91, "learning_rate": 2.7961662152331326e-06, "loss": 2.3978, "step": 925 }, { "epoch": 2.92, "learning_rate": 1.919228094083838e-06, "loss": 2.2206, "step": 930 }, { "epoch": 2.94, "learning_rate": 1.204956361676291e-06, "loss": 2.2185, "step": 935 }, { "epoch": 2.96, "learning_rate": 6.550934693944858e-07, "loss": 2.4076, "step": 940 }, { "epoch": 2.97, "learning_rate": 2.709807965961209e-07, "loss": 2.2124, "step": 945 }, { "epoch": 2.99, "learning_rate": 5.355537834497188e-08, "loss": 2.6099, "step": 950 }, { "epoch": 3.0, "eval_loss": 2.0997118949890137, "eval_runtime": 20.2461, "eval_samples_per_second": 22.177, "eval_steps_per_second": 2.815, "step": 954 }, { "epoch": 3.0, "learning_rate": 3.347619532822632e-09, "loss": 2.1903, "step": 955 }, { "epoch": 3.02, "learning_rate": 1.2048000096755528e-07, "loss": 2.2037, "step": 960 }, { "epoch": 3.03, "learning_rate": 4.0466678058365933e-07, "loss": 1.8544, "step": 965 }, { "epoch": 3.05, "learning_rate": 8.552146905042755e-07, "loss": 2.1987, "step": 970 }, { "epoch": 3.07, "learning_rate": 1.4710246282540082e-06, "loss": 2.1019, "step": 975 }, { "epoch": 3.08, "learning_rate": 2.250594337997185e-06, "loss": 2.1517, "step": 980 }, { "epoch": 3.1, "learning_rate": 3.192022075260327e-06, "loss": 2.1346, "step": 985 }, { "epoch": 3.11, "learning_rate": 4.293011246199299e-06, "loss": 2.0847, "step": 990 }, { "epoch": 3.13, "learning_rate": 5.550876010093297e-06, "loss": 1.9358, "step": 995 }, { "epoch": 3.14, "learning_rate": 6.962547831398709e-06, "loss": 2.2579, "step": 1000 }, { "epoch": 3.16, "learning_rate": 8.524582965379288e-06, "loss": 2.0876, "step": 1005 }, { "epoch": 3.18, "learning_rate": 1.0233170859051466e-05, "loss": 1.8737, "step": 1010 }, { "epoch": 3.19, "learning_rate": 1.2084143446950978e-05, "loss": 2.1, "step": 1015 }, { "epoch": 3.21, "learning_rate": 1.4072985319043973e-05, "loss": 2.0016, "step": 1020 }, { "epoch": 3.22, "learning_rate": 1.619484473597781e-05, "loss": 2.1936, "step": 1025 }, { "epoch": 3.24, "learning_rate": 1.844454546480105e-05, "loss": 2.2236, "step": 1030 }, { "epoch": 3.25, "learning_rate": 2.081659940627838e-05, "loss": 1.94, "step": 1035 }, { "epoch": 3.27, "learning_rate": 2.330521998299727e-05, "loss": 2.103, "step": 1040 }, { "epoch": 3.29, "learning_rate": 2.5904336255606053e-05, "loss": 2.0909, "step": 1045 }, { "epoch": 3.3, "learning_rate": 2.860760773274715e-05, "loss": 2.1943, "step": 1050 }, { "epoch": 3.32, "learning_rate": 3.1408439838557e-05, "loss": 2.3713, "step": 1055 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.1534, "step": 1060 }, { "epoch": 3.35, "learning_rate": 3.727523431479128e-05, "loss": 2.1973, "step": 1065 }, { "epoch": 3.36, "learning_rate": 4.0326884759248605e-05, "loss": 2.1412, "step": 1070 }, { "epoch": 3.38, "learning_rate": 4.3447506894093424e-05, "loss": 2.1419, "step": 1075 }, { "epoch": 3.4, "learning_rate": 4.662948802500929e-05, "loss": 2.179, "step": 1080 }, { "epoch": 3.41, "learning_rate": 4.986506577365512e-05, "loss": 2.0405, "step": 1085 }, { "epoch": 3.43, "learning_rate": 5.314634701382942e-05, "loss": 2.1397, "step": 1090 }, { "epoch": 3.44, "learning_rate": 5.646532712659141e-05, "loss": 2.0391, "step": 1095 }, { "epoch": 3.46, "learning_rate": 5.9813909527366384e-05, "loss": 2.2312, "step": 1100 }, { "epoch": 3.47, "learning_rate": 6.318392541739896e-05, "loss": 1.8413, "step": 1105 }, { "epoch": 3.49, "learning_rate": 6.656715371137238e-05, "loss": 2.2519, "step": 1110 }, { "epoch": 3.51, "learning_rate": 6.995534109257869e-05, "loss": 1.871, "step": 1115 }, { "epoch": 3.52, "learning_rate": 7.334022214671738e-05, "loss": 1.9633, "step": 1120 }, { "epoch": 3.54, "learning_rate": 7.671353952520564e-05, "loss": 2.3068, "step": 1125 }, { "epoch": 3.55, "learning_rate": 8.006706408881254e-05, "loss": 2.083, "step": 1130 }, { "epoch": 3.57, "learning_rate": 8.33926149824772e-05, "loss": 2.1478, "step": 1135 }, { "epoch": 3.58, "learning_rate": 8.668207959233806e-05, "loss": 2.1284, "step": 1140 }, { "epoch": 3.6, "learning_rate": 8.992743333629024e-05, "loss": 2.1958, "step": 1145 }, { "epoch": 3.62, "learning_rate": 9.312075923979032e-05, "loss": 1.9476, "step": 1150 }, { "epoch": 3.63, "learning_rate": 9.625426724915486e-05, "loss": 2.1252, "step": 1155 }, { "epoch": 3.65, "learning_rate": 9.932031323523828e-05, "loss": 1.921, "step": 1160 }, { "epoch": 3.66, "learning_rate": 0.00010231141764113059, "loss": 2.1346, "step": 1165 }, { "epoch": 3.68, "learning_rate": 0.00010522028372838456, "loss": 2.1729, "step": 1170 }, { "epoch": 3.69, "learning_rate": 0.00010803981537726111, "loss": 1.7231, "step": 1175 }, { "epoch": 3.71, "learning_rate": 0.0001107631343975688, "loss": 1.9233, "step": 1180 }, { "epoch": 3.73, "learning_rate": 0.00011338359730786977, "loss": 2.2671, "step": 1185 }, { "epoch": 3.74, "learning_rate": 0.0001158948115421174, "loss": 1.9335, "step": 1190 }, { "epoch": 3.76, "learning_rate": 0.00011829065104419134, "loss": 2.0918, "step": 1195 }, { "epoch": 3.77, "learning_rate": 0.00012056527121228711, "loss": 2.2418, "step": 1200 }, { "epoch": 3.79, "learning_rate": 0.00012271312315670338, "loss": 2.1403, "step": 1205 }, { "epoch": 3.81, "learning_rate": 0.0001247289672362458, "loss": 2.1248, "step": 1210 }, { "epoch": 3.82, "learning_rate": 0.00012660788584022527, "loss": 2.0255, "step": 1215 }, { "epoch": 3.84, "learning_rate": 0.00012834529538486916, "loss": 2.1805, "step": 1220 }, { "epoch": 3.85, "learning_rate": 0.000129936957494881, "loss": 1.9559, "step": 1225 }, { "epoch": 3.87, "learning_rate": 0.00013137898934287106, "loss": 2.1316, "step": 1230 }, { "epoch": 3.88, "learning_rate": 0.00013266787312143455, "loss": 2.4223, "step": 1235 }, { "epoch": 3.9, "learning_rate": 0.0001338004646247716, "loss": 2.0369, "step": 1240 }, { "epoch": 3.92, "learning_rate": 0.00013477400091891338, "loss": 2.0681, "step": 1245 }, { "epoch": 3.93, "learning_rate": 0.00013558610708184338, "loss": 2.1821, "step": 1250 }, { "epoch": 3.95, "learning_rate": 0.00013623480199707148, "loss": 2.2183, "step": 1255 }, { "epoch": 3.96, "learning_rate": 0.00013671850318652728, "loss": 2.0026, "step": 1260 }, { "epoch": 3.98, "learning_rate": 0.00013703603067098332, "loss": 1.9368, "step": 1265 }, { "epoch": 3.99, "learning_rate": 0.0001371866098485905, "loss": 2.1825, "step": 1270 }, { "epoch": 4.0, "eval_loss": 2.0486814975738525, "eval_runtime": 20.2715, "eval_samples_per_second": 22.149, "eval_steps_per_second": 2.812, "step": 1272 }, { "epoch": 4.01, "learning_rate": 0.0001371698733845033, "loss": 2.0223, "step": 1275 }, { "epoch": 4.03, "learning_rate": 0.000136985862106986, "loss": 2.0536, "step": 1280 }, { "epoch": 4.04, "learning_rate": 0.0001366350249078127, "loss": 1.962, "step": 1285 }, { "epoch": 4.06, "learning_rate": 0.00013611821764720515, "loss": 2.0683, "step": 1290 }, { "epoch": 4.07, "learning_rate": 0.00013543670106597888, "loss": 1.9271, "step": 1295 }, { "epoch": 4.09, "learning_rate": 0.00013459213770999188, "loss": 2.0152, "step": 1300 }, { "epoch": 4.1, "learning_rate": 0.00013358658787439754, "loss": 1.7685, "step": 1305 }, { "epoch": 4.12, "learning_rate": 0.0001324225045775965, "loss": 2.2589, "step": 1310 }, { "epoch": 4.14, "learning_rate": 0.00013110272757714818, "loss": 1.9458, "step": 1315 }, { "epoch": 4.15, "learning_rate": 0.00012963047644223968, "loss": 2.0727, "step": 1320 }, { "epoch": 4.17, "learning_rate": 0.00012800934269961248, "loss": 2.0622, "step": 1325 }, { "epoch": 4.18, "learning_rate": 0.0001262432810721057, "loss": 1.7668, "step": 1330 }, { "epoch": 4.2, "learning_rate": 0.00012433659983118975, "loss": 1.6953, "step": 1335 }, { "epoch": 4.21, "learning_rate": 0.00012229395028702625, "loss": 2.1107, "step": 1340 }, { "epoch": 4.23, "learning_rate": 0.00012012031544169091, "loss": 1.7084, "step": 1345 }, { "epoch": 4.25, "learning_rate": 0.00011782099783324114, "loss": 2.281, "step": 1350 }, { "epoch": 4.26, "learning_rate": 0.00011540160660028247, "loss": 2.0195, "step": 1355 }, { "epoch": 4.28, "learning_rate": 0.0001128680437985883, "loss": 1.9339, "step": 1360 }, { "epoch": 4.29, "learning_rate": 0.00011022649000315548, "loss": 1.8057, "step": 1365 }, { "epoch": 4.31, "learning_rate": 0.00010748338923081677, "loss": 1.9412, "step": 1370 }, { "epoch": 4.32, "learning_rate": 0.00010464543322019205, "loss": 1.6871, "step": 1375 }, { "epoch": 4.34, "learning_rate": 0.0001017195451073291, "loss": 1.8352, "step": 1380 }, { "epoch": 4.36, "learning_rate": 9.871286253685277e-05, "loss": 1.8437, "step": 1385 }, { "epoch": 4.37, "learning_rate": 9.56327202498264e-05, "loss": 2.0612, "step": 1390 }, { "epoch": 4.39, "learning_rate": 9.248663219079991e-05, "loss": 1.9911, "step": 1395 }, { "epoch": 4.4, "learning_rate": 8.928227317769423e-05, "loss": 1.8443, "step": 1400 }, { "epoch": 4.42, "learning_rate": 8.602746017923975e-05, "loss": 1.9599, "step": 1405 }, { "epoch": 4.43, "learning_rate": 8.273013324563943e-05, "loss": 1.8763, "step": 1410 }, { "epoch": 4.45, "learning_rate": 7.939833613897684e-05, "loss": 1.7859, "step": 1415 }, { "epoch": 4.47, "learning_rate": 7.60401967106234e-05, "loss": 2.1938, "step": 1420 }, { "epoch": 4.48, "learning_rate": 7.266390707350893e-05, "loss": 1.554, "step": 1425 }, { "epoch": 4.5, "learning_rate": 6.927770361762858e-05, "loss": 1.9093, "step": 1430 }, { "epoch": 4.51, "learning_rate": 6.588984691753754e-05, "loss": 1.6916, "step": 1435 }, { "epoch": 4.53, "learning_rate": 6.250860158084673e-05, "loss": 1.9688, "step": 1440 }, { "epoch": 4.54, "learning_rate": 5.91422160868796e-05, "loss": 1.9263, "step": 1445 }, { "epoch": 4.56, "learning_rate": 5.579890266467603e-05, "loss": 1.6181, "step": 1450 }, { "epoch": 4.58, "learning_rate": 5.248681725942451e-05, "loss": 2.0187, "step": 1455 }, { "epoch": 4.59, "learning_rate": 4.921403963620125e-05, "loss": 1.6975, "step": 1460 }, { "epoch": 4.61, "learning_rate": 4.598855366954619e-05, "loss": 1.995, "step": 1465 }, { "epoch": 4.62, "learning_rate": 4.28182278669633e-05, "loss": 1.9041, "step": 1470 }, { "epoch": 4.64, "learning_rate": 3.9710796173857146e-05, "loss": 1.8769, "step": 1475 }, { "epoch": 4.65, "learning_rate": 3.667383910672967e-05, "loss": 1.6473, "step": 1480 }, { "epoch": 4.67, "learning_rate": 3.371476526066344e-05, "loss": 1.5014, "step": 1485 }, { "epoch": 4.69, "learning_rate": 3.084079323620596e-05, "loss": 1.8409, "step": 1490 }, { "epoch": 4.7, "learning_rate": 2.805893402973855e-05, "loss": 1.8548, "step": 1495 }, { "epoch": 4.72, "learning_rate": 2.5375973930294628e-05, "loss": 1.8673, "step": 1500 }, { "epoch": 4.73, "learning_rate": 2.2798457964544384e-05, "loss": 1.8588, "step": 1505 }, { "epoch": 4.75, "learning_rate": 2.0332673930335255e-05, "loss": 2.0708, "step": 1510 }, { "epoch": 4.76, "learning_rate": 1.7984637057737467e-05, "loss": 1.782, "step": 1515 }, { "epoch": 4.78, "learning_rate": 1.5760075335011966e-05, "loss": 1.9664, "step": 1520 }, { "epoch": 4.8, "learning_rate": 1.3664415535298983e-05, "loss": 1.9151, "step": 1525 }, { "epoch": 4.81, "learning_rate": 1.1702769978116211e-05, "loss": 1.9315, "step": 1530 }, { "epoch": 4.83, "learning_rate": 9.879924057958108e-06, "loss": 1.8344, "step": 1535 }, { "epoch": 4.84, "learning_rate": 8.20032457042391e-06, "loss": 1.8142, "step": 1540 }, { "epoch": 4.86, "learning_rate": 6.6680688643500544e-06, "loss": 1.7706, "step": 1545 }, { "epoch": 4.87, "learning_rate": 5.2868948464103564e-06, "loss": 2.0751, "step": 1550 }, { "epoch": 4.89, "learning_rate": 4.060171862569283e-06, "loss": 2.0826, "step": 1555 }, { "epoch": 4.91, "learning_rate": 2.9908924786305843e-06, "loss": 1.9677, "step": 1560 }, { "epoch": 4.92, "learning_rate": 2.0816651799335755e-06, "loss": 1.7251, "step": 1565 }, { "epoch": 4.94, "learning_rate": 1.3347080080066644e-06, "loss": 1.9202, "step": 1570 }, { "epoch": 4.95, "learning_rate": 7.518431496995702e-07, "loss": 1.833, "step": 1575 }, { "epoch": 4.97, "learning_rate": 3.34492491995841e-07, "loss": 1.8957, "step": 1580 }, { "epoch": 4.98, "learning_rate": 8.367415334837176e-08, "loss": 1.7695, "step": 1585 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.9115, "step": 1590 }, { "epoch": 5.0, "eval_loss": 1.906249761581421, "eval_runtime": 20.2347, "eval_samples_per_second": 22.19, "eval_steps_per_second": 2.817, "step": 1590 }, { "epoch": 5.02, "learning_rate": 8.367415334837176e-08, "loss": 1.6316, "step": 1595 }, { "epoch": 5.03, "learning_rate": 3.3449249199583335e-07, "loss": 1.7125, "step": 1600 }, { "epoch": 5.05, "learning_rate": 7.51843149699555e-07, "loss": 1.631, "step": 1605 }, { "epoch": 5.06, "learning_rate": 1.3347080080066491e-06, "loss": 1.5559, "step": 1610 }, { "epoch": 5.08, "learning_rate": 2.0816651799335526e-06, "loss": 1.7693, "step": 1615 }, { "epoch": 5.09, "learning_rate": 2.990892478630592e-06, "loss": 1.5954, "step": 1620 }, { "epoch": 5.11, "learning_rate": 4.060171862569298e-06, "loss": 1.9519, "step": 1625 }, { "epoch": 5.13, "learning_rate": 5.2868948464102726e-06, "loss": 1.5427, "step": 1630 }, { "epoch": 5.14, "learning_rate": 6.668068864349971e-06, "loss": 1.5142, "step": 1635 }, { "epoch": 5.16, "learning_rate": 8.200324570423812e-06, "loss": 1.4704, "step": 1640 }, { "epoch": 5.17, "learning_rate": 9.879924057958001e-06, "loss": 1.724, "step": 1645 }, { "epoch": 5.19, "learning_rate": 1.1702769978116166e-05, "loss": 1.7845, "step": 1650 }, { "epoch": 5.2, "learning_rate": 1.366441553529893e-05, "loss": 1.5651, "step": 1655 }, { "epoch": 5.22, "learning_rate": 1.5760075335011912e-05, "loss": 1.5684, "step": 1660 }, { "epoch": 5.24, "learning_rate": 1.7984637057737406e-05, "loss": 1.8357, "step": 1665 }, { "epoch": 5.25, "learning_rate": 2.0332673930335194e-05, "loss": 1.6561, "step": 1670 }, { "epoch": 5.27, "learning_rate": 2.2798457964544323e-05, "loss": 1.7467, "step": 1675 }, { "epoch": 5.28, "learning_rate": 2.5375973930294556e-05, "loss": 1.9728, "step": 1680 }, { "epoch": 5.3, "learning_rate": 2.805893402973858e-05, "loss": 1.813, "step": 1685 }, { "epoch": 5.31, "learning_rate": 3.084079323620599e-05, "loss": 1.7629, "step": 1690 }, { "epoch": 5.33, "learning_rate": 3.3714765260663475e-05, "loss": 1.7309, "step": 1695 }, { "epoch": 5.35, "learning_rate": 3.66738391067297e-05, "loss": 1.5675, "step": 1700 }, { "epoch": 5.36, "learning_rate": 3.971079617385719e-05, "loss": 1.4041, "step": 1705 }, { "epoch": 5.38, "learning_rate": 4.28182278669631e-05, "loss": 1.5323, "step": 1710 }, { "epoch": 5.39, "learning_rate": 4.5988553669545996e-05, "loss": 1.7705, "step": 1715 }, { "epoch": 5.41, "learning_rate": 4.921403963620105e-05, "loss": 1.7652, "step": 1720 }, { "epoch": 5.42, "learning_rate": 5.2486817259424427e-05, "loss": 1.8281, "step": 1725 }, { "epoch": 5.44, "learning_rate": 5.579890266467595e-05, "loss": 1.6899, "step": 1730 }, { "epoch": 5.46, "learning_rate": 5.914221608687952e-05, "loss": 1.6888, "step": 1735 }, { "epoch": 5.47, "learning_rate": 6.250860158084665e-05, "loss": 1.7589, "step": 1740 }, { "epoch": 5.49, "learning_rate": 6.588984691753746e-05, "loss": 1.6297, "step": 1745 }, { "epoch": 5.5, "learning_rate": 6.92777036176285e-05, "loss": 1.8036, "step": 1750 }, { "epoch": 5.52, "learning_rate": 7.266390707350887e-05, "loss": 1.8385, "step": 1755 }, { "epoch": 5.53, "learning_rate": 7.604019671062332e-05, "loss": 1.7527, "step": 1760 }, { "epoch": 5.55, "learning_rate": 7.939833613897687e-05, "loss": 1.5839, "step": 1765 }, { "epoch": 5.57, "learning_rate": 8.273013324563949e-05, "loss": 1.9817, "step": 1770 }, { "epoch": 5.58, "learning_rate": 8.602746017923979e-05, "loss": 2.1168, "step": 1775 }, { "epoch": 5.6, "learning_rate": 8.928227317769404e-05, "loss": 1.8069, "step": 1780 }, { "epoch": 5.61, "learning_rate": 9.248663219079973e-05, "loss": 1.886, "step": 1785 }, { "epoch": 5.63, "learning_rate": 9.56327202498262e-05, "loss": 1.8172, "step": 1790 }, { "epoch": 5.64, "learning_rate": 9.871286253685269e-05, "loss": 1.6818, "step": 1795 }, { "epoch": 5.66, "learning_rate": 0.00010171954510732892, "loss": 1.4689, "step": 1800 }, { "epoch": 5.68, "learning_rate": 0.00010464543322019198, "loss": 1.3972, "step": 1805 }, { "epoch": 5.69, "learning_rate": 0.00010748338923081671, "loss": 1.8636, "step": 1810 }, { "epoch": 5.71, "learning_rate": 0.0001102264900031554, "loss": 1.5656, "step": 1815 }, { "epoch": 5.72, "learning_rate": 0.00011286804379858823, "loss": 1.834, "step": 1820 }, { "epoch": 5.74, "learning_rate": 0.00011540160660028242, "loss": 1.6874, "step": 1825 }, { "epoch": 5.75, "learning_rate": 0.00011782099783324108, "loss": 1.7932, "step": 1830 }, { "epoch": 5.77, "learning_rate": 0.00012012031544169092, "loss": 1.8093, "step": 1835 }, { "epoch": 5.79, "learning_rate": 0.00012229395028702622, "loss": 1.4865, "step": 1840 }, { "epoch": 5.8, "learning_rate": 0.00012433659983118975, "loss": 1.7283, "step": 1845 }, { "epoch": 5.82, "learning_rate": 0.0001262432810721057, "loss": 1.5611, "step": 1850 }, { "epoch": 5.83, "learning_rate": 0.00012800934269961248, "loss": 1.7974, "step": 1855 }, { "epoch": 5.85, "learning_rate": 0.0001296304764422396, "loss": 1.6754, "step": 1860 }, { "epoch": 5.86, "learning_rate": 0.0001311027275771481, "loss": 1.5481, "step": 1865 }, { "epoch": 5.88, "learning_rate": 0.0001324225045775964, "loss": 1.7931, "step": 1870 }, { "epoch": 5.9, "learning_rate": 0.0001335865878743975, "loss": 1.9985, "step": 1875 }, { "epoch": 5.91, "learning_rate": 0.00013459213770999182, "loss": 1.9837, "step": 1880 }, { "epoch": 5.93, "learning_rate": 0.00013543670106597888, "loss": 1.7165, "step": 1885 }, { "epoch": 5.94, "learning_rate": 0.00013611821764720515, "loss": 1.9364, "step": 1890 }, { "epoch": 5.96, "learning_rate": 0.0001366350249078127, "loss": 1.6662, "step": 1895 }, { "epoch": 5.97, "learning_rate": 0.000136985862106986, "loss": 1.8671, "step": 1900 }, { "epoch": 5.99, "learning_rate": 0.0001371698733845033, "loss": 1.7105, "step": 1905 }, { "epoch": 6.0, "eval_loss": 1.9032058715820312, "eval_runtime": 20.2363, "eval_samples_per_second": 22.188, "eval_steps_per_second": 2.817, "step": 1908 }, { "epoch": 6.01, "learning_rate": 0.0001371866098485905, "loss": 1.4646, "step": 1910 }, { "epoch": 6.02, "learning_rate": 0.00013703603067098332, "loss": 1.572, "step": 1915 }, { "epoch": 6.04, "learning_rate": 0.00013671850318652725, "loss": 1.6881, "step": 1920 }, { "epoch": 6.05, "learning_rate": 0.00013623480199707148, "loss": 1.8478, "step": 1925 }, { "epoch": 6.07, "learning_rate": 0.00013558610708184343, "loss": 1.6836, "step": 1930 }, { "epoch": 6.08, "learning_rate": 0.0001347740009189134, "loss": 1.7053, "step": 1935 }, { "epoch": 6.1, "learning_rate": 0.00013380046462477165, "loss": 1.5923, "step": 1940 }, { "epoch": 6.12, "learning_rate": 0.00013266787312143458, "loss": 1.5511, "step": 1945 }, { "epoch": 6.13, "learning_rate": 0.00013137898934287114, "loss": 1.6041, "step": 1950 }, { "epoch": 6.15, "learning_rate": 0.00012993695749488105, "loss": 1.5811, "step": 1955 }, { "epoch": 6.16, "learning_rate": 0.0001283452953848691, "loss": 1.6477, "step": 1960 }, { "epoch": 6.18, "learning_rate": 0.00012660788584022533, "loss": 1.6821, "step": 1965 }, { "epoch": 6.19, "learning_rate": 0.00012472896723624585, "loss": 1.9101, "step": 1970 }, { "epoch": 6.21, "learning_rate": 0.00012271312315670352, "loss": 1.7961, "step": 1975 }, { "epoch": 6.23, "learning_rate": 0.00012056527121228716, "loss": 1.7949, "step": 1980 }, { "epoch": 6.24, "learning_rate": 0.00011829065104419132, "loss": 1.4401, "step": 1985 }, { "epoch": 6.26, "learning_rate": 0.00011589481154211747, "loss": 1.6164, "step": 1990 }, { "epoch": 6.27, "learning_rate": 0.00011338359730786976, "loss": 1.8185, "step": 1995 }, { "epoch": 6.29, "learning_rate": 0.00011076313439756885, "loss": 1.9472, "step": 2000 }, { "epoch": 6.31, "learning_rate": 0.00010803981537726108, "loss": 1.5825, "step": 2005 }, { "epoch": 6.32, "learning_rate": 0.00010522028372838475, "loss": 1.6182, "step": 2010 }, { "epoch": 6.34, "learning_rate": 0.00010231141764113088, "loss": 1.6272, "step": 2015 }, { "epoch": 6.35, "learning_rate": 9.932031323523847e-05, "loss": 1.4251, "step": 2020 }, { "epoch": 6.37, "learning_rate": 9.625426724915494e-05, "loss": 1.5946, "step": 2025 }, { "epoch": 6.38, "learning_rate": 9.31207592397905e-05, "loss": 1.5311, "step": 2030 }, { "epoch": 6.4, "learning_rate": 8.992743333629033e-05, "loss": 1.7549, "step": 2035 }, { "epoch": 6.42, "learning_rate": 8.6682079592338e-05, "loss": 1.7651, "step": 2040 }, { "epoch": 6.43, "learning_rate": 8.339261498247729e-05, "loss": 1.7894, "step": 2045 }, { "epoch": 6.45, "learning_rate": 8.006706408881262e-05, "loss": 1.2016, "step": 2050 }, { "epoch": 6.46, "learning_rate": 7.671353952520582e-05, "loss": 1.5423, "step": 2055 }, { "epoch": 6.48, "learning_rate": 7.334022214671746e-05, "loss": 1.2711, "step": 2060 }, { "epoch": 6.49, "learning_rate": 6.995534109257865e-05, "loss": 1.582, "step": 2065 }, { "epoch": 6.51, "learning_rate": 6.656715371137246e-05, "loss": 1.55, "step": 2070 }, { "epoch": 6.53, "learning_rate": 6.318392541739893e-05, "loss": 1.5744, "step": 2075 }, { "epoch": 6.54, "learning_rate": 5.9813909527366465e-05, "loss": 1.7316, "step": 2080 }, { "epoch": 6.56, "learning_rate": 5.646532712659148e-05, "loss": 1.5926, "step": 2085 }, { "epoch": 6.57, "learning_rate": 5.314634701382963e-05, "loss": 1.3707, "step": 2090 }, { "epoch": 6.59, "learning_rate": 4.98650657736552e-05, "loss": 1.3874, "step": 2095 }, { "epoch": 6.6, "learning_rate": 4.6629488025009487e-05, "loss": 1.6198, "step": 2100 }, { "epoch": 6.62, "learning_rate": 4.3447506894093505e-05, "loss": 1.6056, "step": 2105 }, { "epoch": 6.64, "learning_rate": 4.0326884759248795e-05, "loss": 1.3487, "step": 2110 }, { "epoch": 6.65, "learning_rate": 3.7275234314791357e-05, "loss": 1.5276, "step": 2115 }, { "epoch": 6.67, "learning_rate": 3.429999999999998e-05, "loss": 1.6535, "step": 2120 }, { "epoch": 6.68, "learning_rate": 3.140843983855718e-05, "loss": 1.8044, "step": 2125 }, { "epoch": 6.7, "learning_rate": 2.860760773274722e-05, "loss": 1.705, "step": 2130 }, { "epoch": 6.71, "learning_rate": 2.5904336255606023e-05, "loss": 1.4356, "step": 2135 }, { "epoch": 6.73, "learning_rate": 2.3305219982997338e-05, "loss": 1.4672, "step": 2140 }, { "epoch": 6.75, "learning_rate": 2.0816599406278358e-05, "loss": 1.5759, "step": 2145 }, { "epoch": 6.76, "learning_rate": 1.8444545464801106e-05, "loss": 1.655, "step": 2150 }, { "epoch": 6.78, "learning_rate": 1.6194844735977787e-05, "loss": 1.5595, "step": 2155 }, { "epoch": 6.79, "learning_rate": 1.4072985319044027e-05, "loss": 1.5155, "step": 2160 }, { "epoch": 6.81, "learning_rate": 1.208414344695116e-05, "loss": 1.6561, "step": 2165 }, { "epoch": 6.82, "learning_rate": 1.0233170859051572e-05, "loss": 1.4243, "step": 2170 }, { "epoch": 6.84, "learning_rate": 8.524582965379327e-06, "loss": 1.3178, "step": 2175 }, { "epoch": 6.86, "learning_rate": 6.9625478313988e-06, "loss": 1.2895, "step": 2180 }, { "epoch": 6.87, "learning_rate": 5.5508760100933275e-06, "loss": 1.8021, "step": 2185 }, { "epoch": 6.89, "learning_rate": 4.293011246199375e-06, "loss": 1.2922, "step": 2190 }, { "epoch": 6.9, "learning_rate": 3.19202207526035e-06, "loss": 1.514, "step": 2195 }, { "epoch": 6.92, "learning_rate": 2.2505943379971774e-06, "loss": 1.7064, "step": 2200 }, { "epoch": 6.93, "learning_rate": 1.4710246282540463e-06, "loss": 1.7752, "step": 2205 }, { "epoch": 6.95, "learning_rate": 8.552146905042831e-07, "loss": 1.4982, "step": 2210 }, { "epoch": 6.97, "learning_rate": 4.0466678058365933e-07, "loss": 1.4699, "step": 2215 }, { "epoch": 6.98, "learning_rate": 1.2048000096755528e-07, "loss": 1.7008, "step": 2220 }, { "epoch": 7.0, "learning_rate": 3.347619532822632e-09, "loss": 1.5587, "step": 2225 }, { "epoch": 7.0, "eval_loss": 1.8221737146377563, "eval_runtime": 20.3992, "eval_samples_per_second": 22.011, "eval_steps_per_second": 2.794, "step": 2226 }, { "epoch": 7.01, "learning_rate": 5.355537834497188e-08, "loss": 1.248, "step": 2230 }, { "epoch": 7.03, "learning_rate": 2.709807965961209e-07, "loss": 1.3298, "step": 2235 }, { "epoch": 7.04, "learning_rate": 6.550934693944553e-07, "loss": 1.4704, "step": 2240 }, { "epoch": 7.06, "learning_rate": 1.2049563616762301e-06, "loss": 1.4347, "step": 2245 }, { "epoch": 7.08, "learning_rate": 1.919228094083792e-06, "loss": 1.3554, "step": 2250 }, { "epoch": 7.09, "learning_rate": 2.7961662152331403e-06, "loss": 1.428, "step": 2255 }, { "epoch": 7.11, "learning_rate": 3.833631452388745e-06, "loss": 1.4177, "step": 2260 }, { "epoch": 7.12, "learning_rate": 5.029092930176208e-06, "loss": 1.3861, "step": 2265 }, { "epoch": 7.14, "learning_rate": 6.379634344600846e-06, "loss": 1.3699, "step": 2270 }, { "epoch": 7.15, "learning_rate": 7.88196107731231e-06, "loss": 1.4377, "step": 2275 }, { "epoch": 7.17, "learning_rate": 9.532408232759425e-06, "loss": 1.4078, "step": 2280 }, { "epoch": 7.19, "learning_rate": 1.1326949578627714e-05, "loss": 1.4196, "step": 2285 }, { "epoch": 7.2, "learning_rate": 1.3261207367752312e-05, "loss": 1.4826, "step": 2290 }, { "epoch": 7.22, "learning_rate": 1.533046301754235e-05, "loss": 1.4701, "step": 2295 }, { "epoch": 7.23, "learning_rate": 1.7529668620867698e-05, "loss": 1.4641, "step": 2300 }, { "epoch": 7.25, "learning_rate": 1.985345926032643e-05, "loss": 1.4294, "step": 2305 }, { "epoch": 7.26, "learning_rate": 2.22961660958507e-05, "loss": 1.4558, "step": 2310 }, { "epoch": 7.28, "learning_rate": 2.4851830193728118e-05, "loss": 1.5906, "step": 2315 }, { "epoch": 7.3, "learning_rate": 2.751421706329902e-05, "loss": 1.6085, "step": 2320 }, { "epoch": 7.31, "learning_rate": 3.027683186586924e-05, "loss": 1.2953, "step": 2325 }, { "epoch": 7.33, "learning_rate": 3.313293525873851e-05, "loss": 1.4112, "step": 2330 }, { "epoch": 7.34, "learning_rate": 3.607555983568874e-05, "loss": 1.3934, "step": 2335 }, { "epoch": 7.36, "learning_rate": 3.909752712383054e-05, "loss": 1.3835, "step": 2340 }, { "epoch": 7.37, "learning_rate": 4.219146509534274e-05, "loss": 1.4961, "step": 2345 }, { "epoch": 7.39, "learning_rate": 4.5349826151383146e-05, "loss": 1.2224, "step": 2350 }, { "epoch": 7.41, "learning_rate": 4.856490553430337e-05, "loss": 1.4432, "step": 2355 }, { "epoch": 7.42, "learning_rate": 5.182886012324924e-05, "loss": 1.5696, "step": 2360 }, { "epoch": 7.44, "learning_rate": 5.5133727567293246e-05, "loss": 1.6135, "step": 2365 }, { "epoch": 7.45, "learning_rate": 5.8471445709429775e-05, "loss": 1.3836, "step": 2370 }, { "epoch": 7.47, "learning_rate": 6.18338722540417e-05, "loss": 1.0351, "step": 2375 }, { "epoch": 7.48, "learning_rate": 6.521280462986592e-05, "loss": 1.5105, "step": 2380 }, { "epoch": 7.5, "learning_rate": 6.859999999999982e-05, "loss": 1.6121, "step": 2385 }, { "epoch": 7.52, "learning_rate": 7.198719537013396e-05, "loss": 1.6241, "step": 2390 }, { "epoch": 7.53, "learning_rate": 7.536612774595818e-05, "loss": 1.522, "step": 2395 }, { "epoch": 7.55, "learning_rate": 7.872855429057012e-05, "loss": 1.4888, "step": 2400 }, { "epoch": 7.56, "learning_rate": 8.206627243270664e-05, "loss": 1.4955, "step": 2405 }, { "epoch": 7.58, "learning_rate": 8.537113987675064e-05, "loss": 1.4161, "step": 2410 }, { "epoch": 7.59, "learning_rate": 8.86350944656965e-05, "loss": 1.8936, "step": 2415 }, { "epoch": 7.61, "learning_rate": 9.185017384861673e-05, "loss": 1.6053, "step": 2420 }, { "epoch": 7.63, "learning_rate": 9.500853490465716e-05, "loss": 1.5626, "step": 2425 }, { "epoch": 7.64, "learning_rate": 9.810247287616934e-05, "loss": 1.1654, "step": 2430 }, { "epoch": 7.66, "learning_rate": 0.00010112444016431114, "loss": 1.353, "step": 2435 }, { "epoch": 7.67, "learning_rate": 0.00010406706474126137, "loss": 1.5396, "step": 2440 }, { "epoch": 7.69, "learning_rate": 0.00010692316813413065, "loss": 1.3699, "step": 2445 }, { "epoch": 7.7, "learning_rate": 0.0001096857829367009, "loss": 1.3597, "step": 2450 }, { "epoch": 7.72, "learning_rate": 0.0001123481698062718, "loss": 1.5768, "step": 2455 }, { "epoch": 7.74, "learning_rate": 0.00011490383390414922, "loss": 1.3661, "step": 2460 }, { "epoch": 7.75, "learning_rate": 0.00011734654073967348, "loss": 1.4304, "step": 2465 }, { "epoch": 7.77, "learning_rate": 0.00011967033137913221, "loss": 1.4475, "step": 2470 }, { "epoch": 7.78, "learning_rate": 0.00012186953698245757, "loss": 1.5644, "step": 2475 }, { "epoch": 7.8, "learning_rate": 0.00012393879263224763, "loss": 1.2184, "step": 2480 }, { "epoch": 7.81, "learning_rate": 0.00012587305042137222, "loss": 1.55, "step": 2485 }, { "epoch": 7.83, "learning_rate": 0.00012766759176724053, "loss": 1.5614, "step": 2490 }, { "epoch": 7.85, "learning_rate": 0.00012931803892268765, "loss": 1.4788, "step": 2495 }, { "epoch": 7.86, "learning_rate": 0.0001308203656553991, "loss": 1.3325, "step": 2500 }, { "epoch": 7.88, "learning_rate": 0.00013217090706982374, "loss": 1.6197, "step": 2505 }, { "epoch": 7.89, "learning_rate": 0.0001333663685476112, "loss": 1.5014, "step": 2510 }, { "epoch": 7.91, "learning_rate": 0.00013440383378476682, "loss": 1.4764, "step": 2515 }, { "epoch": 7.92, "learning_rate": 0.00013528077190591619, "loss": 1.8615, "step": 2520 }, { "epoch": 7.94, "learning_rate": 0.00013599504363832375, "loss": 1.2448, "step": 2525 }, { "epoch": 7.96, "learning_rate": 0.00013654490653060552, "loss": 1.3242, "step": 2530 }, { "epoch": 7.97, "learning_rate": 0.00013692901920340386, "loss": 1.3224, "step": 2535 }, { "epoch": 7.99, "learning_rate": 0.00013714644462165502, "loss": 1.4513, "step": 2540 }, { "epoch": 8.0, "eval_loss": 1.8965427875518799, "eval_runtime": 20.4074, "eval_samples_per_second": 22.002, "eval_steps_per_second": 2.793, "step": 2544 }, { "epoch": 8.0, "learning_rate": 0.00013719665238046719, "loss": 1.3932, "step": 2545 }, { "epoch": 8.02, "learning_rate": 0.00013707951999903246, "loss": 1.2432, "step": 2550 }, { "epoch": 8.03, "learning_rate": 0.00013679533321941633, "loss": 1.2907, "step": 2555 }, { "epoch": 8.05, "learning_rate": 0.00013634478530949573, "loss": 1.2857, "step": 2560 }, { "epoch": 8.07, "learning_rate": 0.00013572897537174604, "loss": 1.3178, "step": 2565 }, { "epoch": 8.08, "learning_rate": 0.00013494940566200278, "loss": 1.3444, "step": 2570 }, { "epoch": 8.1, "learning_rate": 0.0001340079779247397, "loss": 1.3778, "step": 2575 }, { "epoch": 8.11, "learning_rate": 0.00013290698875380075, "loss": 1.5204, "step": 2580 }, { "epoch": 8.13, "learning_rate": 0.00013164912398990663, "loss": 1.196, "step": 2585 }, { "epoch": 8.14, "learning_rate": 0.00013023745216860123, "loss": 1.5108, "step": 2590 }, { "epoch": 8.16, "learning_rate": 0.00012867541703462073, "loss": 1.414, "step": 2595 }, { "epoch": 8.18, "learning_rate": 0.00012696682914094848, "loss": 1.299, "step": 2600 }, { "epoch": 8.19, "learning_rate": 0.00012511585655304892, "loss": 1.5168, "step": 2605 }, { "epoch": 8.21, "learning_rate": 0.00012312701468095605, "loss": 1.4117, "step": 2610 }, { "epoch": 8.22, "learning_rate": 0.0001210051552640223, "loss": 1.4762, "step": 2615 }, { "epoch": 8.24, "learning_rate": 0.00011875545453519897, "loss": 1.3127, "step": 2620 }, { "epoch": 8.25, "learning_rate": 0.00011638340059372173, "loss": 1.5447, "step": 2625 }, { "epoch": 8.27, "learning_rate": 0.00011389478001700295, "loss": 1.3891, "step": 2630 }, { "epoch": 8.29, "learning_rate": 0.00011129566374439389, "loss": 1.7022, "step": 2635 }, { "epoch": 8.3, "learning_rate": 0.00010859239226725287, "loss": 1.2989, "step": 2640 }, { "epoch": 8.32, "learning_rate": 0.00010579156016144313, "loss": 1.4672, "step": 2645 }, { "epoch": 8.33, "learning_rate": 0.00010289999999999993, "loss": 1.4019, "step": 2650 }, { "epoch": 8.35, "learning_rate": 9.992476568520875e-05, "loss": 1.3174, "step": 2655 }, { "epoch": 8.36, "learning_rate": 9.687311524075153e-05, "loss": 1.4993, "step": 2660 }, { "epoch": 8.38, "learning_rate": 9.375249310590639e-05, "loss": 1.4268, "step": 2665 }, { "epoch": 8.4, "learning_rate": 9.057051197499064e-05, "loss": 1.3762, "step": 2670 }, { "epoch": 8.41, "learning_rate": 8.733493422634493e-05, "loss": 1.6376, "step": 2675 }, { "epoch": 8.43, "learning_rate": 8.405365298617051e-05, "loss": 1.3535, "step": 2680 }, { "epoch": 8.44, "learning_rate": 8.073467287340865e-05, "loss": 1.4565, "step": 2685 }, { "epoch": 8.46, "learning_rate": 7.738609047263366e-05, "loss": 1.4825, "step": 2690 }, { "epoch": 8.47, "learning_rate": 7.401607458260121e-05, "loss": 1.2098, "step": 2695 }, { "epoch": 8.49, "learning_rate": 7.063284628862766e-05, "loss": 1.114, "step": 2700 }, { "epoch": 8.51, "learning_rate": 6.724465890742147e-05, "loss": 1.1928, "step": 2705 }, { "epoch": 8.52, "learning_rate": 6.385977785328291e-05, "loss": 1.2321, "step": 2710 }, { "epoch": 8.54, "learning_rate": 6.048646047479429e-05, "loss": 1.4018, "step": 2715 }, { "epoch": 8.55, "learning_rate": 5.71329359111875e-05, "loss": 1.4829, "step": 2720 }, { "epoch": 8.57, "learning_rate": 5.3807385017523074e-05, "loss": 1.3501, "step": 2725 }, { "epoch": 8.58, "learning_rate": 5.051792040766187e-05, "loss": 1.344, "step": 2730 }, { "epoch": 8.6, "learning_rate": 4.7272566663709795e-05, "loss": 1.0449, "step": 2735 }, { "epoch": 8.62, "learning_rate": 4.407924076020983e-05, "loss": 1.3851, "step": 2740 }, { "epoch": 8.63, "learning_rate": 4.0945732750844954e-05, "loss": 1.3359, "step": 2745 }, { "epoch": 8.65, "learning_rate": 3.787968676476165e-05, "loss": 1.2086, "step": 2750 }, { "epoch": 8.66, "learning_rate": 3.488858235886944e-05, "loss": 1.2969, "step": 2755 }, { "epoch": 8.68, "learning_rate": 3.1979716271615364e-05, "loss": 1.1869, "step": 2760 }, { "epoch": 8.69, "learning_rate": 2.916018462273902e-05, "loss": 1.4603, "step": 2765 }, { "epoch": 8.71, "learning_rate": 2.643686560243124e-05, "loss": 1.2891, "step": 2770 }, { "epoch": 8.73, "learning_rate": 2.3816402692130164e-05, "loss": 1.4753, "step": 2775 }, { "epoch": 8.74, "learning_rate": 2.1305188457882628e-05, "loss": 1.4468, "step": 2780 }, { "epoch": 8.76, "learning_rate": 1.890934895580877e-05, "loss": 1.2143, "step": 2785 }, { "epoch": 8.77, "learning_rate": 1.6634728787713087e-05, "loss": 1.4829, "step": 2790 }, { "epoch": 8.79, "learning_rate": 1.4486876843296578e-05, "loss": 1.5124, "step": 2795 }, { "epoch": 8.81, "learning_rate": 1.247103276375423e-05, "loss": 1.1507, "step": 2800 }, { "epoch": 8.82, "learning_rate": 1.0592114159774876e-05, "loss": 1.2259, "step": 2805 }, { "epoch": 8.84, "learning_rate": 8.854704615130826e-06, "loss": 1.3434, "step": 2810 }, { "epoch": 8.85, "learning_rate": 7.263042505119003e-06, "loss": 1.4906, "step": 2815 }, { "epoch": 8.87, "learning_rate": 5.82101065712901e-06, "loss": 1.2034, "step": 2820 }, { "epoch": 8.88, "learning_rate": 4.532126878565386e-06, "loss": 1.3118, "step": 2825 }, { "epoch": 8.9, "learning_rate": 3.3995353752283744e-06, "loss": 1.4476, "step": 2830 }, { "epoch": 8.92, "learning_rate": 2.4259990810866283e-06, "loss": 1.5325, "step": 2835 }, { "epoch": 8.93, "learning_rate": 1.6138929181565955e-06, "loss": 1.3443, "step": 2840 }, { "epoch": 8.95, "learning_rate": 9.651980029285464e-07, "loss": 1.665, "step": 2845 }, { "epoch": 8.96, "learning_rate": 4.814968134727699e-07, "loss": 1.3313, "step": 2850 }, { "epoch": 8.98, "learning_rate": 1.6396932901667525e-07, "loss": 1.504, "step": 2855 }, { "epoch": 8.99, "learning_rate": 1.339015140952895e-08, "loss": 1.2679, "step": 2860 }, { "epoch": 9.0, "eval_loss": 1.7906934022903442, "eval_runtime": 20.4159, "eval_samples_per_second": 21.993, "eval_steps_per_second": 2.792, "step": 2862 }, { "epoch": 9.01, "learning_rate": 3.01266154967001e-08, "loss": 1.4496, "step": 2865 }, { "epoch": 9.03, "learning_rate": 2.1413789301401304e-07, "loss": 1.2338, "step": 2870 }, { "epoch": 9.04, "learning_rate": 5.649750921872831e-07, "loss": 1.1185, "step": 2875 }, { "epoch": 9.06, "learning_rate": 1.0817823527948346e-06, "loss": 1.0349, "step": 2880 }, { "epoch": 9.07, "learning_rate": 1.7632989340210505e-06, "loss": 1.1285, "step": 2885 }, { "epoch": 9.09, "learning_rate": 2.6078622900081364e-06, "loss": 1.1348, "step": 2890 }, { "epoch": 9.1, "learning_rate": 3.613412125602455e-06, "loss": 1.0489, "step": 2895 }, { "epoch": 9.12, "learning_rate": 4.777495422403447e-06, "loss": 1.1893, "step": 2900 }, { "epoch": 9.14, "learning_rate": 6.0972724228519425e-06, "loss": 1.3905, "step": 2905 }, { "epoch": 9.15, "learning_rate": 7.569523557760349e-06, "loss": 1.2451, "step": 2910 }, { "epoch": 9.17, "learning_rate": 9.190657300387452e-06, "loss": 1.2443, "step": 2915 }, { "epoch": 9.18, "learning_rate": 1.0956718927894355e-05, "loss": 1.2626, "step": 2920 }, { "epoch": 9.2, "learning_rate": 1.2863400168810178e-05, "loss": 1.0339, "step": 2925 }, { "epoch": 9.21, "learning_rate": 1.4906049712973553e-05, "loss": 1.1284, "step": 2930 }, { "epoch": 9.23, "learning_rate": 1.7079684558309144e-05, "loss": 1.2964, "step": 2935 }, { "epoch": 9.25, "learning_rate": 1.9379002166758836e-05, "loss": 0.9701, "step": 2940 }, { "epoch": 9.26, "learning_rate": 2.1798393399717496e-05, "loss": 1.2379, "step": 2945 }, { "epoch": 9.28, "learning_rate": 2.4331956201411865e-05, "loss": 1.2294, "step": 2950 }, { "epoch": 9.29, "learning_rate": 2.69735099968445e-05, "loss": 1.2279, "step": 2955 }, { "epoch": 9.31, "learning_rate": 2.9716610769183196e-05, "loss": 1.1694, "step": 2960 }, { "epoch": 9.32, "learning_rate": 3.255456677980771e-05, "loss": 1.3376, "step": 2965 }, { "epoch": 9.34, "learning_rate": 3.548045489267097e-05, "loss": 1.1979, "step": 2970 }, { "epoch": 9.36, "learning_rate": 3.848713746314718e-05, "loss": 1.0518, "step": 2975 }, { "epoch": 9.37, "learning_rate": 4.1567279750173456e-05, "loss": 1.1192, "step": 2980 }, { "epoch": 9.39, "learning_rate": 4.471336780920016e-05, "loss": 1.258, "step": 2985 }, { "epoch": 9.4, "learning_rate": 4.791772682230585e-05, "loss": 1.1698, "step": 2990 }, { "epoch": 9.42, "learning_rate": 5.1172539820760084e-05, "loss": 1.2648, "step": 2995 }, { "epoch": 9.43, "learning_rate": 5.4469866754360636e-05, "loss": 1.1929, "step": 3000 }, { "epoch": 9.45, "learning_rate": 5.7801663861022995e-05, "loss": 1.1723, "step": 3005 }, { "epoch": 9.47, "learning_rate": 6.115980328937633e-05, "loss": 1.225, "step": 3010 }, { "epoch": 9.48, "learning_rate": 6.453609292649126e-05, "loss": 1.281, "step": 3015 }, { "epoch": 9.5, "learning_rate": 6.792229638237138e-05, "loss": 1.1719, "step": 3020 }, { "epoch": 9.51, "learning_rate": 7.131015308246217e-05, "loss": 1.3758, "step": 3025 }, { "epoch": 9.53, "learning_rate": 7.469139841915347e-05, "loss": 1.3241, "step": 3030 }, { "epoch": 9.54, "learning_rate": 7.805778391312036e-05, "loss": 1.1407, "step": 3035 }, { "epoch": 9.56, "learning_rate": 8.140109733532393e-05, "loss": 1.2411, "step": 3040 }, { "epoch": 9.58, "learning_rate": 8.471318274057568e-05, "loss": 1.413, "step": 3045 }, { "epoch": 9.59, "learning_rate": 8.798596036379883e-05, "loss": 1.222, "step": 3050 }, { "epoch": 9.61, "learning_rate": 9.121144633045388e-05, "loss": 1.3584, "step": 3055 }, { "epoch": 9.62, "learning_rate": 9.438177213303655e-05, "loss": 1.1437, "step": 3060 }, { "epoch": 9.64, "learning_rate": 9.74892038261427e-05, "loss": 1.2668, "step": 3065 }, { "epoch": 9.65, "learning_rate": 0.00010052616089327018, "loss": 1.3637, "step": 3070 }, { "epoch": 9.67, "learning_rate": 0.00010348523473933642, "loss": 1.1909, "step": 3075 }, { "epoch": 9.69, "learning_rate": 0.0001063592067637941, "loss": 1.3763, "step": 3080 }, { "epoch": 9.7, "learning_rate": 0.00010914106597026132, "loss": 1.4548, "step": 3085 }, { "epoch": 9.72, "learning_rate": 0.00011182402606970514, "loss": 1.3378, "step": 3090 }, { "epoch": 9.73, "learning_rate": 0.00011440154203545576, "loss": 1.3677, "step": 3095 }, { "epoch": 9.75, "learning_rate": 0.00011686732606966472, "loss": 1.2485, "step": 3100 }, { "epoch": 9.76, "learning_rate": 0.00011921536294226233, "loss": 1.3363, "step": 3105 }, { "epoch": 9.78, "learning_rate": 0.00012143992466498816, "loss": 1.1575, "step": 3110 }, { "epoch": 9.8, "learning_rate": 0.00012353558446470098, "loss": 1.1208, "step": 3115 }, { "epoch": 9.81, "learning_rate": 0.00012549723002188375, "loss": 1.4087, "step": 3120 }, { "epoch": 9.83, "learning_rate": 0.00012732007594204206, "loss": 1.3399, "step": 3125 }, { "epoch": 9.84, "learning_rate": 0.00012899967542957612, "loss": 1.4988, "step": 3130 }, { "epoch": 9.86, "learning_rate": 0.00013053193113564998, "loss": 0.989, "step": 3135 }, { "epoch": 9.87, "learning_rate": 0.00013191310515358958, "loss": 1.349, "step": 3140 }, { "epoch": 9.89, "learning_rate": 0.00013313982813743067, "loss": 1.1874, "step": 3145 }, { "epoch": 9.91, "learning_rate": 0.00013420910752136937, "loss": 1.2978, "step": 3150 }, { "epoch": 9.92, "learning_rate": 0.00013511833482006638, "loss": 1.5572, "step": 3155 }, { "epoch": 9.94, "learning_rate": 0.00013586529199199334, "loss": 1.3689, "step": 3160 }, { "epoch": 9.95, "learning_rate": 0.00013644815685030044, "loss": 1.1297, "step": 3165 }, { "epoch": 9.97, "learning_rate": 0.00013686550750800414, "loss": 1.3856, "step": 3170 }, { "epoch": 9.98, "learning_rate": 0.00013711632584665164, "loss": 1.2732, "step": 3175 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 1.5975, "step": 3180 }, { "epoch": 10.0, "eval_loss": 1.7831153869628906, "eval_runtime": 20.3518, "eval_samples_per_second": 22.062, "eval_steps_per_second": 2.801, "step": 3180 }, { "epoch": 9.95, "learning_rate": 0.00013645750858358395, "loss": 1.2433, "step": 3185 }, { "epoch": 9.97, "learning_rate": 0.0001368696722497127, "loss": 1.547, "step": 3190 }, { "epoch": 9.98, "learning_rate": 0.00013711736829567482, "loss": 1.4594, "step": 3195 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 1.3407, "step": 3200 }, { "epoch": 10.0, "eval_loss": 1.139600157737732, "eval_runtime": 5.2723, "eval_samples_per_second": 82.317, "eval_steps_per_second": 10.432, "step": 3200 }, { "epoch": 10.02, "learning_rate": 0.00013711736829567482, "loss": 1.4415, "step": 3205 }, { "epoch": 10.03, "learning_rate": 0.00013686967224971273, "loss": 1.2348, "step": 3210 }, { "epoch": 10.05, "learning_rate": 0.00013645750858358398, "loss": 1.4623, "step": 3215 }, { "epoch": 10.06, "learning_rate": 0.00013588187023566163, "loss": 1.437, "step": 3220 }, { "epoch": 10.08, "learning_rate": 0.00013514414396914573, "loss": 1.6916, "step": 3225 }, { "epoch": 10.09, "learning_rate": 0.00013424610703122958, "loss": 1.7023, "step": 3230 }, { "epoch": 10.11, "learning_rate": 0.00013318992287155525, "loss": 1.3172, "step": 3235 }, { "epoch": 10.12, "learning_rate": 0.00013197813593027435, "loss": 1.2053, "step": 3240 }, { "epoch": 10.14, "learning_rate": 0.00013061366550826825, "loss": 1.1869, "step": 3245 }, { "epoch": 10.16, "learning_rate": 0.00012909979873429724, "loss": 1.2981, "step": 3250 }, { "epoch": 10.17, "learning_rate": 0.0001274401826460187, "loss": 1.6608, "step": 3255 }, { "epoch": 10.19, "learning_rate": 0.00012563881540395474, "loss": 1.3115, "step": 3260 }, { "epoch": 10.2, "learning_rate": 0.00012370003665957216, "loss": 1.2824, "step": 3265 }, { "epoch": 10.22, "learning_rate": 0.00012162851710068375, "loss": 1.4082, "step": 3270 }, { "epoch": 10.23, "learning_rate": 0.00011942924719935029, "loss": 1.3048, "step": 3275 }, { "epoch": 10.25, "learning_rate": 0.00011710752518939736, "loss": 1.3276, "step": 3280 }, { "epoch": 10.27, "learning_rate": 0.0001146689443025054, "loss": 1.4064, "step": 3285 }, { "epoch": 10.28, "learning_rate": 0.00011211937929362613, "loss": 1.2408, "step": 3290 }, { "epoch": 10.3, "learning_rate": 0.00010946497228818107, "loss": 1.3932, "step": 3295 }, { "epoch": 10.31, "learning_rate": 0.00010671211798514499, "loss": 1.4576, "step": 3300 }, { "epoch": 10.33, "learning_rate": 0.00010386744825165496, "loss": 1.455, "step": 3305 }, { "epoch": 10.34, "learning_rate": 0.00010093781614626351, "loss": 1.3289, "step": 3310 }, { "epoch": 10.36, "learning_rate": 9.793027940931756e-05, "loss": 1.2645, "step": 3315 }, { "epoch": 10.38, "learning_rate": 9.485208346024504e-05, "loss": 1.39, "step": 3320 }, { "epoch": 10.39, "learning_rate": 9.17106439427063e-05, "loss": 1.3945, "step": 3325 }, { "epoch": 10.41, "learning_rate": 8.851352885965625e-05, "loss": 1.5375, "step": 3330 }, { "epoch": 10.42, "learning_rate": 8.526844034136417e-05, "loss": 1.4077, "step": 3335 }, { "epoch": 10.44, "learning_rate": 8.198319609030632e-05, "loss": 1.4331, "step": 3340 }, { "epoch": 10.45, "learning_rate": 7.866571054763788e-05, "loss": 1.8602, "step": 3345 }, { "epoch": 10.47, "learning_rate": 7.532397582660805e-05, "loss": 1.4865, "step": 3350 }, { "epoch": 10.48, "learning_rate": 7.19660424588612e-05, "loss": 1.2815, "step": 3355 }, { "epoch": 10.5, "learning_rate": 6.859999999999997e-05, "loss": 1.4705, "step": 3360 }, { "epoch": 10.52, "learning_rate": 6.523395754113922e-05, "loss": 1.1969, "step": 3365 }, { "epoch": 10.53, "learning_rate": 6.187602417339237e-05, "loss": 1.4564, "step": 3370 }, { "epoch": 10.55, "learning_rate": 5.853428945236207e-05, "loss": 1.4113, "step": 3375 }, { "epoch": 10.56, "learning_rate": 5.521680390969362e-05, "loss": 1.4642, "step": 3380 }, { "epoch": 10.58, "learning_rate": 5.193155965863624e-05, "loss": 1.4196, "step": 3385 }, { "epoch": 10.59, "learning_rate": 4.8686471140344147e-05, "loss": 1.3666, "step": 3390 }, { "epoch": 10.61, "learning_rate": 4.548935605729363e-05, "loss": 1.3908, "step": 3395 }, { "epoch": 10.62, "learning_rate": 4.23479165397549e-05, "loss": 1.4785, "step": 3400 }, { "epoch": 10.64, "learning_rate": 3.926972059068282e-05, "loss": 1.4775, "step": 3405 }, { "epoch": 10.66, "learning_rate": 3.626218385373685e-05, "loss": 1.4841, "step": 3410 }, { "epoch": 10.67, "learning_rate": 3.333255174834496e-05, "loss": 1.4263, "step": 3415 }, { "epoch": 10.69, "learning_rate": 3.0487882014855373e-05, "loss": 1.4815, "step": 3420 }, { "epoch": 10.7, "learning_rate": 2.7735027711819264e-05, "loss": 1.3612, "step": 3425 }, { "epoch": 10.72, "learning_rate": 2.508062070637383e-05, "loss": 1.3586, "step": 3430 }, { "epoch": 10.73, "learning_rate": 2.253105569749455e-05, "loss": 1.4036, "step": 3435 }, { "epoch": 10.75, "learning_rate": 2.0092474810602945e-05, "loss": 1.2455, "step": 3440 }, { "epoch": 10.77, "learning_rate": 1.7770752800649997e-05, "loss": 1.3747, "step": 3445 }, { "epoch": 10.78, "learning_rate": 1.5571482899316204e-05, "loss": 1.2848, "step": 3450 }, { "epoch": 10.8, "learning_rate": 1.3499963340427795e-05, "loss": 1.5623, "step": 3455 }, { "epoch": 10.81, "learning_rate": 1.1561184596045504e-05, "loss": 1.4704, "step": 3460 }, { "epoch": 10.83, "learning_rate": 9.759817353981509e-06, "loss": 1.3271, "step": 3465 }, { "epoch": 10.84, "learning_rate": 8.100201265702836e-06, "loss": 1.2696, "step": 3470 }, { "epoch": 10.86, "learning_rate": 6.586334491731833e-06, "loss": 1.5138, "step": 3475 }, { "epoch": 10.88, "learning_rate": 5.221864069725821e-06, "loss": 1.344, "step": 3480 }, { "epoch": 10.89, "learning_rate": 4.010077128444735e-06, "loss": 1.3544, "step": 3485 }, { "epoch": 10.91, "learning_rate": 2.9538929687704825e-06, "loss": 1.6602, "step": 3490 }, { "epoch": 10.92, "learning_rate": 2.0558560308543213e-06, "loss": 1.3761, "step": 3495 }, { "epoch": 10.94, "learning_rate": 1.3181297643384459e-06, "loss": 1.3709, "step": 3500 }, { "epoch": 10.95, "learning_rate": 7.424914164160148e-07, "loss": 1.3595, "step": 3505 }, { "epoch": 10.97, "learning_rate": 3.303277502872983e-07, "loss": 1.4077, "step": 3510 }, { "epoch": 10.98, "learning_rate": 8.263170432518063e-08, "loss": 1.4356, "step": 3515 }, { "epoch": 11.0, "learning_rate": 0.0, "loss": 1.7243, "step": 3520 }, { "epoch": 11.0, "eval_loss": 1.1156859397888184, "eval_runtime": 5.2715, "eval_samples_per_second": 82.33, "eval_steps_per_second": 10.433, "step": 3520 }, { "epoch": 10.95, "learning_rate": 9.414215321223168e-07, "loss": 1.3031, "step": 3525 }, { "epoch": 10.96, "learning_rate": 4.696220449804098e-07, "loss": 1.0973, "step": 3530 }, { "epoch": 10.98, "learning_rate": 1.5992243352901425e-07, "loss": 1.1205, "step": 3535 }, { "epoch": 10.99, "learning_rate": 1.3059553632214649e-08, "loss": 1.5828, "step": 3540 }, { "epoch": 11.0, "eval_loss": 1.0779144763946533, "eval_runtime": 8.8469, "eval_samples_per_second": 46.57, "eval_steps_per_second": 5.878, "step": 3542 } ], "max_steps": 3864, "num_train_epochs": 12, "total_flos": 3691011244032000.0, "trial_name": null, "trial_params": null }