{ "best_metric": 1.2792030572891235, "best_model_checkpoint": "output/oxxxymiron/checkpoint-4807", "epoch": 19.0, "global_step": 4807, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00013707387498762246, "loss": 2.6224, "step": 5 }, { "epoch": 0.04, "learning_rate": 0.00013669596372654658, "loss": 2.5403, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00013606765583958522, "loss": 2.4692, "step": 15 }, { "epoch": 0.08, "learning_rate": 0.00013519126168650572, "loss": 2.3553, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.00013407000386857356, "loss": 2.2478, "step": 25 }, { "epoch": 0.12, "learning_rate": 0.0001327080053786798, "loss": 2.4781, "step": 30 }, { "epoch": 0.14, "learning_rate": 0.0001311102744406261, "loss": 2.2512, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00012928268609331444, "loss": 2.2522, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.00012723196058755901, "loss": 2.4658, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00012496563867495734, "loss": 2.2714, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.0001224920538796866, "loss": 2.2569, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.00011982030185518478, "loss": 2.2897, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00011696020693839531, "loss": 2.1927, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00011392228602455959, "loss": 2.1867, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00011071770989539364, "loss": 2.1953, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.00010735826214284955, "loss": 2.2485, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00010385629583950372, "loss": 2.1775, "step": 85 }, { "epoch": 0.35, "learning_rate": 0.00010022468811490008, "loss": 2.2653, "step": 90 }, { "epoch": 0.37, "learning_rate": 9.647679280487579e-05, "loss": 2.2253, "step": 95 }, { "epoch": 0.39, "learning_rate": 9.262639134798382e-05, "loss": 2.2196, "step": 100 }, { "epoch": 0.41, "learning_rate": 8.868764210957135e-05, "loss": 2.1454, "step": 105 }, { "epoch": 0.42, "learning_rate": 8.467502831985555e-05, "loss": 2.1683, "step": 110 }, { "epoch": 0.44, "learning_rate": 8.060330481743385e-05, "loss": 2.1135, "step": 115 }, { "epoch": 0.46, "learning_rate": 7.648744379405968e-05, "loss": 2.2169, "step": 120 }, { "epoch": 0.48, "learning_rate": 7.234257974018543e-05, "loss": 2.0504, "step": 125 }, { "epoch": 0.5, "learning_rate": 6.818395379371482e-05, "loss": 2.1956, "step": 130 }, { "epoch": 0.52, "learning_rate": 6.40268576966004e-05, "loss": 2.1556, "step": 135 }, { "epoch": 0.54, "learning_rate": 5.9886577565364115e-05, "loss": 2.1258, "step": 140 }, { "epoch": 0.56, "learning_rate": 5.577833768230335e-05, "loss": 2.0906, "step": 145 }, { "epoch": 0.58, "learning_rate": 5.171724451406823e-05, "loss": 2.0648, "step": 150 }, { "epoch": 0.6, "learning_rate": 4.7718231163460816e-05, "loss": 2.1031, "step": 155 }, { "epoch": 0.62, "learning_rate": 4.379600245871268e-05, "loss": 2.456, "step": 160 }, { "epoch": 0.64, "learning_rate": 3.99649808821543e-05, "loss": 2.1486, "step": 165 }, { "epoch": 0.66, "learning_rate": 3.6239253537102286e-05, "loss": 2.1327, "step": 170 }, { "epoch": 0.68, "learning_rate": 3.263252034797391e-05, "loss": 2.1147, "step": 175 }, { "epoch": 0.69, "learning_rate": 2.9158043684102103e-05, "loss": 2.0989, "step": 180 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.082, "step": 185 }, { "epoch": 0.73, "learning_rate": 2.265643081883308e-05, "loss": 2.0394, "step": 190 }, { "epoch": 0.75, "learning_rate": 1.9653201789538038e-05, "loss": 2.0346, "step": 195 }, { "epoch": 0.77, "learning_rate": 1.682995572030529e-05, "loss": 2.1082, "step": 200 }, { "epoch": 0.79, "learning_rate": 1.419707400896067e-05, "loss": 2.1204, "step": 205 }, { "epoch": 0.81, "learning_rate": 1.1764238061872434e-05, "loss": 1.976, "step": 210 }, { "epoch": 0.83, "learning_rate": 9.540393694315812e-06, "loss": 1.9811, "step": 215 }, { "epoch": 0.85, "learning_rate": 7.533718235689727e-06, "loss": 2.0259, "step": 220 }, { "epoch": 0.87, "learning_rate": 5.751590460543944e-06, "loss": 2.0717, "step": 225 }, { "epoch": 0.89, "learning_rate": 4.2005634559834046e-06, "loss": 2.0042, "step": 230 }, { "epoch": 0.91, "learning_rate": 2.8863405252193965e-06, "loss": 2.0301, "step": 235 }, { "epoch": 0.93, "learning_rate": 1.8137542158731914e-06, "loss": 2.0554, "step": 240 }, { "epoch": 0.95, "learning_rate": 9.867485501471999e-07, "loss": 2.0167, "step": 245 }, { "epoch": 0.97, "learning_rate": 4.083645222054329e-07, "loss": 2.0252, "step": 250 }, { "epoch": 0.98, "learning_rate": 8.072891609113784e-08, "loss": 2.0128, "step": 255 }, { "epoch": 1.0, "learning_rate": 5.046485299251069e-09, "loss": 2.1545, "step": 260 }, { "epoch": 1.02, "learning_rate": 1.815955227603999e-07, "loss": 2.1436, "step": 265 }, { "epoch": 1.04, "learning_rate": 6.097268375260298e-07, "loss": 2.055, "step": 270 }, { "epoch": 1.06, "learning_rate": 1.2878661419176275e-06, "loss": 2.0446, "step": 275 }, { "epoch": 1.08, "learning_rate": 2.2135198403619775e-06, "loss": 2.0962, "step": 280 }, { "epoch": 1.1, "learning_rate": 3.3832841986266328e-06, "loss": 1.9858, "step": 285 }, { "epoch": 1.12, "learning_rate": 4.7928578597388414e-06, "loss": 2.0331, "step": 290 }, { "epoch": 1.14, "learning_rate": 6.437057660565834e-06, "loss": 2.0861, "step": 295 }, { "epoch": 1.16, "learning_rate": 8.309837690896773e-06, "loss": 2.0373, "step": 300 }, { "epoch": 1.18, "learning_rate": 1.0404311524944352e-05, "loss": 2.0409, "step": 305 }, { "epoch": 1.2, "learning_rate": 1.271277754351776e-05, "loss": 2.1014, "step": 310 }, { "epoch": 1.22, "learning_rate": 1.5226747253755048e-05, "loss": 2.0414, "step": 315 }, { "epoch": 1.24, "learning_rate": 1.79369765022795e-05, "loss": 2.1258, "step": 320 }, { "epoch": 1.25, "learning_rate": 2.083349946700612e-05, "loss": 2.0382, "step": 325 }, { "epoch": 1.27, "learning_rate": 2.3905665302606086e-05, "loss": 1.9499, "step": 330 }, { "epoch": 1.29, "learning_rate": 2.7142177304879985e-05, "loss": 2.1053, "step": 335 }, { "epoch": 1.31, "learning_rate": 3.05311344500276e-05, "loss": 2.0373, "step": 340 }, { "epoch": 1.33, "learning_rate": 3.4060075156069894e-05, "loss": 1.9227, "step": 345 }, { "epoch": 1.35, "learning_rate": 3.771602310550759e-05, "loss": 2.0504, "step": 350 }, { "epoch": 1.37, "learning_rate": 4.148553496072039e-05, "loss": 2.0583, "step": 355 }, { "epoch": 1.39, "learning_rate": 4.5354749796652995e-05, "loss": 2.0166, "step": 360 }, { "epoch": 1.41, "learning_rate": 4.930944006901758e-05, "loss": 2.0524, "step": 365 }, { "epoch": 1.43, "learning_rate": 5.333506393059682e-05, "loss": 2.0846, "step": 370 }, { "epoch": 1.45, "learning_rate": 5.741681870327513e-05, "loss": 2.0408, "step": 375 }, { "epoch": 1.47, "learning_rate": 6.153969530917408e-05, "loss": 1.9538, "step": 380 }, { "epoch": 1.49, "learning_rate": 6.56885334607442e-05, "loss": 2.1089, "step": 385 }, { "epoch": 1.51, "learning_rate": 6.984807740687121e-05, "loss": 2.0182, "step": 390 }, { "epoch": 1.53, "learning_rate": 7.400303203001308e-05, "loss": 1.9797, "step": 395 }, { "epoch": 1.54, "learning_rate": 7.813811908809188e-05, "loss": 2.0459, "step": 400 }, { "epoch": 1.56, "learning_rate": 8.22381333943327e-05, "loss": 2.0528, "step": 405 }, { "epoch": 1.58, "learning_rate": 8.628799872846947e-05, "loss": 2.0164, "step": 410 }, { "epoch": 1.6, "learning_rate": 9.027282327372695e-05, "loss": 1.973, "step": 415 }, { "epoch": 1.62, "learning_rate": 9.417795437572915e-05, "loss": 2.1012, "step": 420 }, { "epoch": 1.64, "learning_rate": 9.798903242198118e-05, "loss": 1.9342, "step": 425 }, { "epoch": 1.66, "learning_rate": 0.00010169204364380207, "loss": 1.9812, "step": 430 }, { "epoch": 1.68, "learning_rate": 0.0001052733716465509, "loss": 2.1037, "step": 435 }, { "epoch": 1.7, "learning_rate": 0.0001087198474786628, "loss": 2.0833, "step": 440 }, { "epoch": 1.72, "learning_rate": 0.00011201879805538593, "loss": 2.088, "step": 445 }, { "epoch": 1.74, "learning_rate": 0.00011515809275916009, "loss": 2.0211, "step": 450 }, { "epoch": 1.76, "learning_rate": 0.00011812618804528036, "loss": 1.9749, "step": 455 }, { "epoch": 1.78, "learning_rate": 0.00012091216988882859, "loss": 2.1269, "step": 460 }, { "epoch": 1.8, "learning_rate": 0.0001235057939167871, "loss": 2.1407, "step": 465 }, { "epoch": 1.81, "learning_rate": 0.0001258975230777674, "loss": 1.9911, "step": 470 }, { "epoch": 1.83, "learning_rate": 0.00012807856271083559, "loss": 1.8852, "step": 475 }, { "epoch": 1.85, "learning_rate": 0.00013004089288448385, "loss": 2.084, "step": 480 }, { "epoch": 1.87, "learning_rate": 0.00013177729788683344, "loss": 2.0223, "step": 485 }, { "epoch": 1.89, "learning_rate": 0.00013328139275863037, "loss": 1.8615, "step": 490 }, { "epoch": 1.91, "learning_rate": 0.00013454764677146868, "loss": 1.9669, "step": 495 }, { "epoch": 1.93, "learning_rate": 0.00013557140376490993, "loss": 1.997, "step": 500 }, { "epoch": 1.95, "learning_rate": 0.00013634889926771582, "loss": 2.0132, "step": 505 }, { "epoch": 1.97, "learning_rate": 0.00013687727434023874, "loss": 2.1001, "step": 510 }, { "epoch": 1.99, "learning_rate": 0.00013715458608706872, "loss": 1.9605, "step": 515 }, { "epoch": 2.01, "learning_rate": 0.00013717981480128154, "loss": 1.9686, "step": 520 }, { "epoch": 2.03, "learning_rate": 0.0001369528677140173, "loss": 1.9054, "step": 525 }, { "epoch": 2.05, "learning_rate": 0.00013647457933560234, "loss": 1.9621, "step": 530 }, { "epoch": 2.07, "learning_rate": 0.00013574670838695924, "loss": 1.9001, "step": 535 }, { "epoch": 2.08, "learning_rate": 0.00013477193133258972, "loss": 1.9553, "step": 540 }, { "epoch": 2.1, "learning_rate": 0.0001335538325389091, "loss": 1.9963, "step": 545 }, { "epoch": 2.12, "learning_rate": 0.00013209689109412243, "loss": 1.9554, "step": 550 }, { "epoch": 2.14, "learning_rate": 0.00013040646433810595, "loss": 2.0022, "step": 555 }, { "epoch": 2.16, "learning_rate": 0.00012848876816285752, "loss": 1.8702, "step": 560 }, { "epoch": 2.18, "learning_rate": 0.00012635085415595263, "loss": 1.9121, "step": 565 }, { "epoch": 2.2, "learning_rate": 0.00012400058367105258, "loss": 2.0007, "step": 570 }, { "epoch": 2.22, "learning_rate": 0.0001214465989208104, "loss": 2.0254, "step": 575 }, { "epoch": 2.24, "learning_rate": 0.00011869829119846914, "loss": 1.9571, "step": 580 }, { "epoch": 2.26, "learning_rate": 0.00011576576634500534, "loss": 1.9448, "step": 585 }, { "epoch": 2.28, "learning_rate": 0.00011265980758879933, "loss": 2.0852, "step": 590 }, { "epoch": 2.3, "learning_rate": 0.00010939183589447406, "loss": 1.9757, "step": 595 }, { "epoch": 2.32, "learning_rate": 0.00010597386796670587, "loss": 2.0051, "step": 600 }, { "epoch": 2.34, "learning_rate": 0.0001024184720634304, "loss": 2.015, "step": 605 }, { "epoch": 2.36, "learning_rate": 9.873872178092492e-05, "loss": 1.9478, "step": 610 }, { "epoch": 2.37, "learning_rate": 9.494814798070336e-05, "loss": 1.8666, "step": 615 }, { "epoch": 2.39, "learning_rate": 9.106068903499513e-05, "loss": 1.9268, "step": 620 }, { "epoch": 2.41, "learning_rate": 8.709063957376078e-05, "loss": 1.8729, "step": 625 }, { "epoch": 2.43, "learning_rate": 8.305259792170682e-05, "loss": 1.9046, "step": 630 }, { "epoch": 2.45, "learning_rate": 7.89614124185811e-05, "loss": 1.8255, "step": 635 }, { "epoch": 2.47, "learning_rate": 7.48321268201335e-05, "loss": 1.9063, "step": 640 }, { "epoch": 2.49, "learning_rate": 7.06799249805101e-05, "loss": 1.9487, "step": 645 }, { "epoch": 2.51, "learning_rate": 6.652007501948994e-05, "loss": 1.9612, "step": 650 }, { "epoch": 2.53, "learning_rate": 6.236787317986654e-05, "loss": 1.829, "step": 655 }, { "epoch": 2.55, "learning_rate": 5.8238587581418946e-05, "loss": 1.8833, "step": 660 }, { "epoch": 2.57, "learning_rate": 5.414740207829316e-05, "loss": 1.9743, "step": 665 }, { "epoch": 2.59, "learning_rate": 5.010936042623931e-05, "loss": 1.8592, "step": 670 }, { "epoch": 2.61, "learning_rate": 4.6139310965004967e-05, "loss": 1.8143, "step": 675 }, { "epoch": 2.63, "learning_rate": 4.225185201929667e-05, "loss": 1.9597, "step": 680 }, { "epoch": 2.64, "learning_rate": 3.8461278219075114e-05, "loss": 1.9673, "step": 685 }, { "epoch": 2.66, "learning_rate": 3.478152793656968e-05, "loss": 1.9703, "step": 690 }, { "epoch": 2.68, "learning_rate": 3.1226132033294165e-05, "loss": 1.8479, "step": 695 }, { "epoch": 2.7, "learning_rate": 2.7808164105525978e-05, "loss": 1.8916, "step": 700 }, { "epoch": 2.72, "learning_rate": 2.454019241120065e-05, "loss": 1.9233, "step": 705 }, { "epoch": 2.74, "learning_rate": 2.1434233654994646e-05, "loss": 1.8525, "step": 710 }, { "epoch": 2.76, "learning_rate": 1.850170880153093e-05, "loss": 1.9783, "step": 715 }, { "epoch": 2.78, "learning_rate": 1.5753401079189615e-05, "loss": 1.8898, "step": 720 }, { "epoch": 2.8, "learning_rate": 1.3199416328947464e-05, "loss": 1.9018, "step": 725 }, { "epoch": 2.82, "learning_rate": 1.0849145844047363e-05, "loss": 1.9754, "step": 730 }, { "epoch": 2.84, "learning_rate": 8.711231837142545e-06, "loss": 1.9263, "step": 735 }, { "epoch": 2.86, "learning_rate": 6.793535661894062e-06, "loss": 1.836, "step": 740 }, { "epoch": 2.88, "learning_rate": 5.103108905877591e-06, "loss": 1.9056, "step": 745 }, { "epoch": 2.9, "learning_rate": 3.6461674610908713e-06, "loss": 1.9471, "step": 750 }, { "epoch": 2.92, "learning_rate": 2.4280686674102973e-06, "loss": 1.7961, "step": 755 }, { "epoch": 2.93, "learning_rate": 1.453291613040777e-06, "loss": 1.9143, "step": 760 }, { "epoch": 2.95, "learning_rate": 7.254206643976737e-07, "loss": 1.9633, "step": 765 }, { "epoch": 2.97, "learning_rate": 2.4713228598268823e-07, "loss": 1.9096, "step": 770 }, { "epoch": 2.99, "learning_rate": 2.018519871846962e-08, "loss": 1.7972, "step": 775 }, { "epoch": 3.01, "learning_rate": 4.5413912931266996e-08, "loss": 1.8293, "step": 780 }, { "epoch": 3.03, "learning_rate": 3.2272565976124403e-07, "loss": 1.9095, "step": 785 }, { "epoch": 3.05, "learning_rate": 8.511007322841792e-07, "loss": 1.8653, "step": 790 }, { "epoch": 3.07, "learning_rate": 1.628596235090069e-06, "loss": 1.7876, "step": 795 }, { "epoch": 3.09, "learning_rate": 2.652353228531267e-06, "loss": 1.8415, "step": 800 }, { "epoch": 3.11, "learning_rate": 3.918607241369593e-06, "loss": 1.9383, "step": 805 }, { "epoch": 3.13, "learning_rate": 5.4227021131665505e-06, "loss": 1.8484, "step": 810 }, { "epoch": 3.15, "learning_rate": 7.159107115516193e-06, "loss": 1.8539, "step": 815 }, { "epoch": 3.17, "learning_rate": 9.121437289164363e-06, "loss": 1.8604, "step": 820 }, { "epoch": 3.19, "learning_rate": 1.1302476922232561e-05, "loss": 1.857, "step": 825 }, { "epoch": 3.2, "learning_rate": 1.3694206083212835e-05, "loss": 1.8542, "step": 830 }, { "epoch": 3.22, "learning_rate": 1.6287830111171433e-05, "loss": 1.8419, "step": 835 }, { "epoch": 3.24, "learning_rate": 1.90738119547196e-05, "loss": 1.853, "step": 840 }, { "epoch": 3.26, "learning_rate": 2.204190724083989e-05, "loss": 1.7492, "step": 845 }, { "epoch": 3.28, "learning_rate": 2.5181201944614038e-05, "loss": 1.8399, "step": 850 }, { "epoch": 3.3, "learning_rate": 2.8480152521337186e-05, "loss": 1.8052, "step": 855 }, { "epoch": 3.32, "learning_rate": 3.192662835344901e-05, "loss": 1.9257, "step": 860 }, { "epoch": 3.34, "learning_rate": 3.550795635619789e-05, "loss": 1.8094, "step": 865 }, { "epoch": 3.36, "learning_rate": 3.921096757801878e-05, "loss": 1.806, "step": 870 }, { "epoch": 3.38, "learning_rate": 4.302204562427086e-05, "loss": 1.8443, "step": 875 }, { "epoch": 3.4, "learning_rate": 4.692717672627302e-05, "loss": 1.892, "step": 880 }, { "epoch": 3.42, "learning_rate": 5.091200127153043e-05, "loss": 1.8859, "step": 885 }, { "epoch": 3.44, "learning_rate": 5.496186660566721e-05, "loss": 1.8386, "step": 890 }, { "epoch": 3.46, "learning_rate": 5.906188091190809e-05, "loss": 1.7457, "step": 895 }, { "epoch": 3.47, "learning_rate": 6.3196967969987e-05, "loss": 1.8338, "step": 900 }, { "epoch": 3.49, "learning_rate": 6.73519225931287e-05, "loss": 1.9389, "step": 905 }, { "epoch": 3.51, "learning_rate": 7.151146653925576e-05, "loss": 1.8629, "step": 910 }, { "epoch": 3.53, "learning_rate": 7.566030469082582e-05, "loss": 1.8406, "step": 915 }, { "epoch": 3.55, "learning_rate": 7.978318129672488e-05, "loss": 1.8205, "step": 920 }, { "epoch": 3.57, "learning_rate": 8.386493606940314e-05, "loss": 1.9223, "step": 925 }, { "epoch": 3.59, "learning_rate": 8.789055993098239e-05, "loss": 1.9075, "step": 930 }, { "epoch": 3.61, "learning_rate": 9.184525020334701e-05, "loss": 1.9361, "step": 935 }, { "epoch": 3.63, "learning_rate": 9.571446503927958e-05, "loss": 1.9117, "step": 940 }, { "epoch": 3.65, "learning_rate": 9.948397689449231e-05, "loss": 1.8154, "step": 945 }, { "epoch": 3.67, "learning_rate": 0.00010313992484393007, "loss": 1.7656, "step": 950 }, { "epoch": 3.69, "learning_rate": 0.00010666886554997237, "loss": 1.8084, "step": 955 }, { "epoch": 3.71, "learning_rate": 0.00011005782269512003, "loss": 1.8921, "step": 960 }, { "epoch": 3.73, "learning_rate": 0.00011329433469739388, "loss": 1.8615, "step": 965 }, { "epoch": 3.75, "learning_rate": 0.00011636650053299383, "loss": 1.9111, "step": 970 }, { "epoch": 3.76, "learning_rate": 0.00011926302349772043, "loss": 1.8464, "step": 975 }, { "epoch": 3.78, "learning_rate": 0.00012197325274624493, "loss": 1.8594, "step": 980 }, { "epoch": 3.8, "learning_rate": 0.00012448722245648227, "loss": 1.9001, "step": 985 }, { "epoch": 3.82, "learning_rate": 0.00012679568847505558, "loss": 1.9249, "step": 990 }, { "epoch": 3.84, "learning_rate": 0.00012889016230910322, "loss": 1.8819, "step": 995 }, { "epoch": 3.86, "learning_rate": 0.00013076294233943414, "loss": 1.8397, "step": 1000 }, { "epoch": 3.88, "learning_rate": 0.00013240714214026114, "loss": 1.8645, "step": 1005 }, { "epoch": 3.9, "learning_rate": 0.00013381671580137337, "loss": 1.8523, "step": 1010 }, { "epoch": 3.92, "learning_rate": 0.00013498648015963801, "loss": 1.8243, "step": 1015 }, { "epoch": 3.94, "learning_rate": 0.00013591213385808236, "loss": 1.7541, "step": 1020 }, { "epoch": 3.96, "learning_rate": 0.00013659027316247397, "loss": 1.8299, "step": 1025 }, { "epoch": 3.98, "learning_rate": 0.00013701840447723958, "loss": 1.8042, "step": 1030 }, { "epoch": 4.0, "learning_rate": 0.00013719495351470075, "loss": 1.8895, "step": 1035 }, { "epoch": 4.02, "learning_rate": 0.00013711927108390887, "loss": 1.7727, "step": 1040 }, { "epoch": 4.03, "learning_rate": 0.00013679163547779458, "loss": 1.7147, "step": 1045 }, { "epoch": 4.05, "learning_rate": 0.0001362132514498528, "loss": 1.7369, "step": 1050 }, { "epoch": 4.07, "learning_rate": 0.00013538624578412684, "loss": 1.8461, "step": 1055 }, { "epoch": 4.09, "learning_rate": 0.00013431365947478064, "loss": 1.8544, "step": 1060 }, { "epoch": 4.11, "learning_rate": 0.00013299943654401656, "loss": 1.7556, "step": 1065 }, { "epoch": 4.13, "learning_rate": 0.00013144840953945602, "loss": 1.8978, "step": 1070 }, { "epoch": 4.15, "learning_rate": 0.00012966628176431033, "loss": 1.7574, "step": 1075 }, { "epoch": 4.17, "learning_rate": 0.00012765960630568425, "loss": 1.8181, "step": 1080 }, { "epoch": 4.19, "learning_rate": 0.00012543576193812755, "loss": 1.8873, "step": 1085 }, { "epoch": 4.21, "learning_rate": 0.00012300292599103934, "loss": 1.8158, "step": 1090 }, { "epoch": 4.23, "learning_rate": 0.00012037004427969473, "loss": 1.7751, "step": 1095 }, { "epoch": 4.25, "learning_rate": 0.00011754679821046194, "loss": 1.8045, "step": 1100 }, { "epoch": 4.27, "learning_rate": 0.00011454356918116694, "loss": 1.8204, "step": 1105 }, { "epoch": 4.29, "learning_rate": 0.00011137140040750914, "loss": 1.7895, "step": 1110 }, { "epoch": 4.31, "learning_rate": 0.00010804195631589795, "loss": 1.8798, "step": 1115 }, { "epoch": 4.32, "learning_rate": 0.0001045674796520261, "loss": 1.8371, "step": 1120 }, { "epoch": 4.34, "learning_rate": 0.00010096074646289774, "loss": 1.8113, "step": 1125 }, { "epoch": 4.36, "learning_rate": 9.72350191178458e-05, "loss": 1.7679, "step": 1130 }, { "epoch": 4.38, "learning_rate": 9.340399754128733e-05, "loss": 1.8109, "step": 1135 }, { "epoch": 4.4, "learning_rate": 8.948176883653924e-05, "loss": 1.791, "step": 1140 }, { "epoch": 4.42, "learning_rate": 8.548275548593188e-05, "loss": 1.7472, "step": 1145 }, { "epoch": 4.44, "learning_rate": 8.14216623176968e-05, "loss": 1.7982, "step": 1150 }, { "epoch": 4.46, "learning_rate": 7.731342243463583e-05, "loss": 1.8122, "step": 1155 }, { "epoch": 4.48, "learning_rate": 7.317314230339972e-05, "loss": 1.8021, "step": 1160 }, { "epoch": 4.5, "learning_rate": 6.901604620628534e-05, "loss": 1.767, "step": 1165 }, { "epoch": 4.52, "learning_rate": 6.485742025981452e-05, "loss": 1.773, "step": 1170 }, { "epoch": 4.54, "learning_rate": 6.0712556205940305e-05, "loss": 1.7419, "step": 1175 }, { "epoch": 4.56, "learning_rate": 5.6596695182566174e-05, "loss": 1.7812, "step": 1180 }, { "epoch": 4.58, "learning_rate": 5.2524971680144414e-05, "loss": 1.7998, "step": 1185 }, { "epoch": 4.59, "learning_rate": 4.8512357890428636e-05, "loss": 1.8107, "step": 1190 }, { "epoch": 4.61, "learning_rate": 4.4573608652016233e-05, "loss": 1.8343, "step": 1195 }, { "epoch": 4.63, "learning_rate": 4.0723207195124294e-05, "loss": 1.7872, "step": 1200 }, { "epoch": 4.65, "learning_rate": 3.697531188509992e-05, "loss": 1.8365, "step": 1205 }, { "epoch": 4.67, "learning_rate": 3.334370416049629e-05, "loss": 1.7498, "step": 1210 }, { "epoch": 4.69, "learning_rate": 2.9841737857150516e-05, "loss": 1.7864, "step": 1215 }, { "epoch": 4.71, "learning_rate": 2.6482290104606358e-05, "loss": 1.7891, "step": 1220 }, { "epoch": 4.73, "learning_rate": 2.327771397544045e-05, "loss": 1.821, "step": 1225 }, { "epoch": 4.75, "learning_rate": 2.0239793061604753e-05, "loss": 1.6957, "step": 1230 }, { "epoch": 4.77, "learning_rate": 1.7379698144815295e-05, "loss": 1.7592, "step": 1235 }, { "epoch": 4.79, "learning_rate": 1.4707946120313422e-05, "loss": 1.7464, "step": 1240 }, { "epoch": 4.81, "learning_rate": 1.2234361325042733e-05, "loss": 1.7876, "step": 1245 }, { "epoch": 4.83, "learning_rate": 9.968039412441069e-06, "loss": 1.7211, "step": 1250 }, { "epoch": 4.85, "learning_rate": 7.917313906685515e-06, "loss": 1.7474, "step": 1255 }, { "epoch": 4.86, "learning_rate": 6.089725559373884e-06, "loss": 1.7813, "step": 1260 }, { "epoch": 4.88, "learning_rate": 4.491994621320209e-06, "loss": 1.7099, "step": 1265 }, { "epoch": 4.9, "learning_rate": 3.1299961314264275e-06, "loss": 1.697, "step": 1270 }, { "epoch": 4.92, "learning_rate": 2.0087383134942665e-06, "loss": 1.8274, "step": 1275 }, { "epoch": 4.94, "learning_rate": 1.1323441604147912e-06, "loss": 1.8383, "step": 1280 }, { "epoch": 4.96, "learning_rate": 5.040362734534388e-07, "loss": 1.8429, "step": 1285 }, { "epoch": 4.98, "learning_rate": 1.2612501237755182e-07, "loss": 1.7653, "step": 1290 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.7624, "step": 1295 }, { "epoch": 5.02, "learning_rate": 1.261250123775442e-07, "loss": 1.6889, "step": 1300 }, { "epoch": 5.04, "learning_rate": 5.040362734534236e-07, "loss": 1.6715, "step": 1305 }, { "epoch": 5.06, "learning_rate": 1.132344160414776e-06, "loss": 1.7252, "step": 1310 }, { "epoch": 5.08, "learning_rate": 2.0087383134942512e-06, "loss": 1.7355, "step": 1315 }, { "epoch": 5.1, "learning_rate": 3.1299961314264046e-06, "loss": 1.8099, "step": 1320 }, { "epoch": 5.12, "learning_rate": 4.491994621320179e-06, "loss": 1.7203, "step": 1325 }, { "epoch": 5.14, "learning_rate": 6.089725559373899e-06, "loss": 1.7262, "step": 1330 }, { "epoch": 5.15, "learning_rate": 7.917313906685478e-06, "loss": 1.721, "step": 1335 }, { "epoch": 5.17, "learning_rate": 9.968039412440962e-06, "loss": 1.6592, "step": 1340 }, { "epoch": 5.19, "learning_rate": 1.2234361325042687e-05, "loss": 1.6871, "step": 1345 }, { "epoch": 5.21, "learning_rate": 1.4707946120313293e-05, "loss": 1.7319, "step": 1350 }, { "epoch": 5.23, "learning_rate": 1.737969814481516e-05, "loss": 1.7102, "step": 1355 }, { "epoch": 5.25, "learning_rate": 2.0239793061604692e-05, "loss": 1.6969, "step": 1360 }, { "epoch": 5.27, "learning_rate": 2.3277713975440297e-05, "loss": 1.6891, "step": 1365 }, { "epoch": 5.29, "learning_rate": 2.648229010460629e-05, "loss": 1.7584, "step": 1370 }, { "epoch": 5.31, "learning_rate": 2.9841737857150448e-05, "loss": 1.7074, "step": 1375 }, { "epoch": 5.33, "learning_rate": 3.334370416049612e-05, "loss": 1.6984, "step": 1380 }, { "epoch": 5.35, "learning_rate": 3.697531188509984e-05, "loss": 1.6734, "step": 1385 }, { "epoch": 5.37, "learning_rate": 4.072320719512421e-05, "loss": 1.6728, "step": 1390 }, { "epoch": 5.39, "learning_rate": 4.457360865201626e-05, "loss": 1.7748, "step": 1395 }, { "epoch": 5.41, "learning_rate": 4.8512357890428555e-05, "loss": 1.6899, "step": 1400 }, { "epoch": 5.42, "learning_rate": 5.252497168014445e-05, "loss": 1.6741, "step": 1405 }, { "epoch": 5.44, "learning_rate": 5.659669518256621e-05, "loss": 1.6965, "step": 1410 }, { "epoch": 5.46, "learning_rate": 6.071255620594022e-05, "loss": 1.6641, "step": 1415 }, { "epoch": 5.48, "learning_rate": 6.485742025981456e-05, "loss": 1.7494, "step": 1420 }, { "epoch": 5.5, "learning_rate": 6.901604620628525e-05, "loss": 1.7469, "step": 1425 }, { "epoch": 5.52, "learning_rate": 7.31731423033995e-05, "loss": 1.7503, "step": 1430 }, { "epoch": 5.54, "learning_rate": 7.731342243463585e-05, "loss": 1.7359, "step": 1435 }, { "epoch": 5.56, "learning_rate": 8.14216623176967e-05, "loss": 1.7196, "step": 1440 }, { "epoch": 5.58, "learning_rate": 8.548275548593167e-05, "loss": 1.7352, "step": 1445 }, { "epoch": 5.6, "learning_rate": 8.948176883653917e-05, "loss": 1.7017, "step": 1450 }, { "epoch": 5.62, "learning_rate": 9.340399754128714e-05, "loss": 1.7402, "step": 1455 }, { "epoch": 5.64, "learning_rate": 9.723501911784583e-05, "loss": 1.7463, "step": 1460 }, { "epoch": 5.66, "learning_rate": 0.00010096074646289766, "loss": 1.6842, "step": 1465 }, { "epoch": 5.68, "learning_rate": 0.00010456747965202592, "loss": 1.7311, "step": 1470 }, { "epoch": 5.69, "learning_rate": 0.00010804195631589798, "loss": 1.7528, "step": 1475 }, { "epoch": 5.71, "learning_rate": 0.00011137140040750908, "loss": 1.7338, "step": 1480 }, { "epoch": 5.73, "learning_rate": 0.00011454356918116697, "loss": 1.8454, "step": 1485 }, { "epoch": 5.75, "learning_rate": 0.00011754679821046187, "loss": 1.6556, "step": 1490 }, { "epoch": 5.77, "learning_rate": 0.00012037004427969469, "loss": 1.7088, "step": 1495 }, { "epoch": 5.79, "learning_rate": 0.00012300292599103937, "loss": 1.7949, "step": 1500 }, { "epoch": 5.81, "learning_rate": 0.0001254357619381275, "loss": 1.7168, "step": 1505 }, { "epoch": 5.83, "learning_rate": 0.00012765960630568417, "loss": 1.8008, "step": 1510 }, { "epoch": 5.85, "learning_rate": 0.00012966628176431028, "loss": 1.7668, "step": 1515 }, { "epoch": 5.87, "learning_rate": 0.000131448409539456, "loss": 1.8123, "step": 1520 }, { "epoch": 5.89, "learning_rate": 0.00013299943654401658, "loss": 1.6967, "step": 1525 }, { "epoch": 5.91, "learning_rate": 0.0001343136594747806, "loss": 1.7767, "step": 1530 }, { "epoch": 5.93, "learning_rate": 0.00013538624578412676, "loss": 1.8144, "step": 1535 }, { "epoch": 5.95, "learning_rate": 0.0001362132514498528, "loss": 1.7673, "step": 1540 }, { "epoch": 5.97, "learning_rate": 0.00013679163547779458, "loss": 1.7239, "step": 1545 }, { "epoch": 5.98, "learning_rate": 0.00013711927108390887, "loss": 1.7237, "step": 1550 }, { "epoch": 6.0, "learning_rate": 0.00013719495351470075, "loss": 1.8186, "step": 1555 }, { "epoch": 6.02, "learning_rate": 0.0001370184044772396, "loss": 1.7571, "step": 1560 }, { "epoch": 6.04, "learning_rate": 0.00013659027316247397, "loss": 1.6908, "step": 1565 }, { "epoch": 6.06, "learning_rate": 0.00013591213385808238, "loss": 1.6378, "step": 1570 }, { "epoch": 6.08, "learning_rate": 0.0001349864801596381, "loss": 1.763, "step": 1575 }, { "epoch": 6.1, "learning_rate": 0.00013381671580137334, "loss": 1.8114, "step": 1580 }, { "epoch": 6.12, "learning_rate": 0.00013240714214026117, "loss": 1.7691, "step": 1585 }, { "epoch": 6.14, "learning_rate": 0.00013076294233943417, "loss": 1.7388, "step": 1590 }, { "epoch": 6.16, "learning_rate": 0.0001288901623091032, "loss": 1.6458, "step": 1595 }, { "epoch": 6.18, "learning_rate": 0.00012679568847505571, "loss": 1.6852, "step": 1600 }, { "epoch": 6.2, "learning_rate": 0.00012448722245648225, "loss": 1.7267, "step": 1605 }, { "epoch": 6.22, "learning_rate": 0.00012197325274624507, "loss": 1.7517, "step": 1610 }, { "epoch": 6.24, "learning_rate": 0.00011926302349772057, "loss": 1.7343, "step": 1615 }, { "epoch": 6.25, "learning_rate": 0.0001163665005329939, "loss": 1.6811, "step": 1620 }, { "epoch": 6.27, "learning_rate": 0.00011329433469739406, "loss": 1.7056, "step": 1625 }, { "epoch": 6.29, "learning_rate": 0.00011005782269511991, "loss": 1.7447, "step": 1630 }, { "epoch": 6.31, "learning_rate": 0.00010666886554997244, "loss": 1.6661, "step": 1635 }, { "epoch": 6.33, "learning_rate": 0.00010313992484393024, "loss": 1.723, "step": 1640 }, { "epoch": 6.35, "learning_rate": 9.948397689449228e-05, "loss": 1.6887, "step": 1645 }, { "epoch": 6.37, "learning_rate": 9.571446503927964e-05, "loss": 1.6767, "step": 1650 }, { "epoch": 6.39, "learning_rate": 9.184525020334699e-05, "loss": 1.6593, "step": 1655 }, { "epoch": 6.41, "learning_rate": 8.789055993098258e-05, "loss": 1.6807, "step": 1660 }, { "epoch": 6.43, "learning_rate": 8.386493606940322e-05, "loss": 1.7043, "step": 1665 }, { "epoch": 6.45, "learning_rate": 7.978318129672484e-05, "loss": 1.7188, "step": 1670 }, { "epoch": 6.47, "learning_rate": 7.566030469082603e-05, "loss": 1.6494, "step": 1675 }, { "epoch": 6.49, "learning_rate": 7.151146653925584e-05, "loss": 1.6752, "step": 1680 }, { "epoch": 6.51, "learning_rate": 6.735192259312878e-05, "loss": 1.6569, "step": 1685 }, { "epoch": 6.53, "learning_rate": 6.319696796998709e-05, "loss": 1.6728, "step": 1690 }, { "epoch": 6.54, "learning_rate": 5.906188091190817e-05, "loss": 1.6875, "step": 1695 }, { "epoch": 6.56, "learning_rate": 5.4961866605667284e-05, "loss": 1.6511, "step": 1700 }, { "epoch": 6.58, "learning_rate": 5.091200127153063e-05, "loss": 1.6906, "step": 1705 }, { "epoch": 6.6, "learning_rate": 4.6927176726273094e-05, "loss": 1.6586, "step": 1710 }, { "epoch": 6.62, "learning_rate": 4.302204562427082e-05, "loss": 1.6804, "step": 1715 }, { "epoch": 6.64, "learning_rate": 3.921096757801896e-05, "loss": 1.6353, "step": 1720 }, { "epoch": 6.66, "learning_rate": 3.550795635619796e-05, "loss": 1.6895, "step": 1725 }, { "epoch": 6.68, "learning_rate": 3.192662835344908e-05, "loss": 1.7331, "step": 1730 }, { "epoch": 6.7, "learning_rate": 2.8480152521337155e-05, "loss": 1.6892, "step": 1735 }, { "epoch": 6.72, "learning_rate": 2.51812019446141e-05, "loss": 1.6841, "step": 1740 }, { "epoch": 6.74, "learning_rate": 2.2041907240840133e-05, "loss": 1.7096, "step": 1745 }, { "epoch": 6.76, "learning_rate": 1.907381195471957e-05, "loss": 1.7284, "step": 1750 }, { "epoch": 6.78, "learning_rate": 1.6287830111171488e-05, "loss": 1.6272, "step": 1755 }, { "epoch": 6.8, "learning_rate": 1.3694206083212888e-05, "loss": 1.5783, "step": 1760 }, { "epoch": 6.81, "learning_rate": 1.1302476922232546e-05, "loss": 1.5607, "step": 1765 }, { "epoch": 6.83, "learning_rate": 9.121437289164463e-06, "loss": 1.6762, "step": 1770 }, { "epoch": 6.85, "learning_rate": 7.159107115516178e-06, "loss": 1.6488, "step": 1775 }, { "epoch": 6.87, "learning_rate": 5.422702113166627e-06, "loss": 1.6201, "step": 1780 }, { "epoch": 6.89, "learning_rate": 3.918607241369662e-06, "loss": 1.7022, "step": 1785 }, { "epoch": 6.91, "learning_rate": 2.65235322853129e-06, "loss": 1.632, "step": 1790 }, { "epoch": 6.93, "learning_rate": 1.6285962350901147e-06, "loss": 1.6661, "step": 1795 }, { "epoch": 6.95, "learning_rate": 8.511007322841488e-07, "loss": 1.6079, "step": 1800 }, { "epoch": 6.97, "learning_rate": 3.2272565976125165e-07, "loss": 1.6758, "step": 1805 }, { "epoch": 6.99, "learning_rate": 4.541391293127461e-08, "loss": 1.6987, "step": 1810 }, { "epoch": 7.01, "learning_rate": 2.018519871846962e-08, "loss": 1.5689, "step": 1815 }, { "epoch": 7.03, "learning_rate": 2.471322859826806e-07, "loss": 1.5376, "step": 1820 }, { "epoch": 7.05, "learning_rate": 7.254206643976737e-07, "loss": 1.6088, "step": 1825 }, { "epoch": 7.07, "learning_rate": 1.4532916130407314e-06, "loss": 1.6132, "step": 1830 }, { "epoch": 7.08, "learning_rate": 2.4280686674102744e-06, "loss": 1.6823, "step": 1835 }, { "epoch": 7.1, "learning_rate": 3.6461674610908866e-06, "loss": 1.5818, "step": 1840 }, { "epoch": 7.12, "learning_rate": 5.103108905877507e-06, "loss": 1.5745, "step": 1845 }, { "epoch": 7.14, "learning_rate": 6.793535661894024e-06, "loss": 1.5783, "step": 1850 }, { "epoch": 7.16, "learning_rate": 8.7112318371425e-06, "loss": 1.6224, "step": 1855 }, { "epoch": 7.18, "learning_rate": 1.0849145844047318e-05, "loss": 1.6016, "step": 1860 }, { "epoch": 7.2, "learning_rate": 1.3199416328947412e-05, "loss": 1.5826, "step": 1865 }, { "epoch": 7.22, "learning_rate": 1.5753401079189635e-05, "loss": 1.6424, "step": 1870 }, { "epoch": 7.24, "learning_rate": 1.8501708801530793e-05, "loss": 1.5944, "step": 1875 }, { "epoch": 7.26, "learning_rate": 2.1434233654994585e-05, "loss": 1.5702, "step": 1880 }, { "epoch": 7.28, "learning_rate": 2.454019241120068e-05, "loss": 1.5819, "step": 1885 }, { "epoch": 7.3, "learning_rate": 2.780816410552581e-05, "loss": 1.5461, "step": 1890 }, { "epoch": 7.32, "learning_rate": 3.12261320332941e-05, "loss": 1.6276, "step": 1895 }, { "epoch": 7.34, "learning_rate": 3.4781527936569615e-05, "loss": 1.6333, "step": 1900 }, { "epoch": 7.36, "learning_rate": 3.8461278219075155e-05, "loss": 1.5744, "step": 1905 }, { "epoch": 7.37, "learning_rate": 4.2251852019296586e-05, "loss": 1.601, "step": 1910 }, { "epoch": 7.39, "learning_rate": 4.6139310965004655e-05, "loss": 1.4994, "step": 1915 }, { "epoch": 7.41, "learning_rate": 5.010936042623934e-05, "loss": 1.5667, "step": 1920 }, { "epoch": 7.43, "learning_rate": 5.4147402078293086e-05, "loss": 1.6055, "step": 1925 }, { "epoch": 7.45, "learning_rate": 5.823858758141886e-05, "loss": 1.6403, "step": 1930 }, { "epoch": 7.47, "learning_rate": 6.236787317986658e-05, "loss": 1.5103, "step": 1935 }, { "epoch": 7.49, "learning_rate": 6.65200750194898e-05, "loss": 1.5918, "step": 1940 }, { "epoch": 7.51, "learning_rate": 7.067992498051008e-05, "loss": 1.5905, "step": 1945 }, { "epoch": 7.53, "learning_rate": 7.48321268201333e-05, "loss": 1.6157, "step": 1950 }, { "epoch": 7.55, "learning_rate": 7.896141241858101e-05, "loss": 1.5963, "step": 1955 }, { "epoch": 7.57, "learning_rate": 8.305259792170679e-05, "loss": 1.586, "step": 1960 }, { "epoch": 7.59, "learning_rate": 8.709063957376054e-05, "loss": 1.6324, "step": 1965 }, { "epoch": 7.61, "learning_rate": 9.106068903499522e-05, "loss": 1.6232, "step": 1970 }, { "epoch": 7.63, "learning_rate": 9.494814798070329e-05, "loss": 1.6404, "step": 1975 }, { "epoch": 7.64, "learning_rate": 9.873872178092473e-05, "loss": 1.6215, "step": 1980 }, { "epoch": 7.66, "learning_rate": 0.00010241847206343028, "loss": 1.617, "step": 1985 }, { "epoch": 7.68, "learning_rate": 0.0001059738679667058, "loss": 1.6565, "step": 1990 }, { "epoch": 7.7, "learning_rate": 0.0001093918358944741, "loss": 1.7342, "step": 1995 }, { "epoch": 7.72, "learning_rate": 0.00011265980758879924, "loss": 1.6063, "step": 2000 }, { "epoch": 7.74, "learning_rate": 0.00011576576634500532, "loss": 1.6993, "step": 2005 }, { "epoch": 7.76, "learning_rate": 0.00011869829119846911, "loss": 1.6355, "step": 2010 }, { "epoch": 7.78, "learning_rate": 0.00012144659892081027, "loss": 1.6087, "step": 2015 }, { "epoch": 7.8, "learning_rate": 0.00012400058367105252, "loss": 1.6668, "step": 2020 }, { "epoch": 7.82, "learning_rate": 0.00012635085415595263, "loss": 1.7275, "step": 2025 }, { "epoch": 7.84, "learning_rate": 0.00012848876816285744, "loss": 1.6637, "step": 2030 }, { "epoch": 7.86, "learning_rate": 0.00013040646433810593, "loss": 1.6713, "step": 2035 }, { "epoch": 7.88, "learning_rate": 0.00013209689109412246, "loss": 1.6358, "step": 2040 }, { "epoch": 7.9, "learning_rate": 0.00013355383253890908, "loss": 1.6572, "step": 2045 }, { "epoch": 7.92, "learning_rate": 0.0001347719313325897, "loss": 1.6781, "step": 2050 }, { "epoch": 7.93, "learning_rate": 0.00013574670838695924, "loss": 1.6401, "step": 2055 }, { "epoch": 7.95, "learning_rate": 0.0001364745793356023, "loss": 1.673, "step": 2060 }, { "epoch": 7.97, "learning_rate": 0.0001369528677140173, "loss": 1.7179, "step": 2065 }, { "epoch": 7.99, "learning_rate": 0.00013717981480128154, "loss": 1.7015, "step": 2070 }, { "epoch": 8.01, "learning_rate": 0.00013715458608706872, "loss": 1.6596, "step": 2075 }, { "epoch": 8.03, "learning_rate": 0.00013687727434023877, "loss": 1.6462, "step": 2080 }, { "epoch": 8.05, "learning_rate": 0.00013634889926771588, "loss": 1.6041, "step": 2085 }, { "epoch": 8.07, "learning_rate": 0.00013557140376490998, "loss": 1.5571, "step": 2090 }, { "epoch": 8.09, "learning_rate": 0.00013454764677146882, "loss": 1.5541, "step": 2095 }, { "epoch": 8.11, "learning_rate": 0.00013328139275863037, "loss": 1.6773, "step": 2100 }, { "epoch": 8.13, "learning_rate": 0.00013177729788683341, "loss": 1.6003, "step": 2105 }, { "epoch": 8.15, "learning_rate": 0.00013004089288448387, "loss": 1.5288, "step": 2110 }, { "epoch": 8.17, "learning_rate": 0.0001280785627108356, "loss": 1.6059, "step": 2115 }, { "epoch": 8.19, "learning_rate": 0.00012589752307776752, "loss": 1.6265, "step": 2120 }, { "epoch": 8.2, "learning_rate": 0.0001235057939167872, "loss": 1.6372, "step": 2125 }, { "epoch": 8.22, "learning_rate": 0.00012091216988882845, "loss": 1.7324, "step": 2130 }, { "epoch": 8.24, "learning_rate": 0.00011812618804528034, "loss": 1.5938, "step": 2135 }, { "epoch": 8.26, "learning_rate": 0.00011515809275915997, "loss": 1.5805, "step": 2140 }, { "epoch": 8.28, "learning_rate": 0.00011201879805538599, "loss": 1.62, "step": 2145 }, { "epoch": 8.3, "learning_rate": 0.00010871984747866294, "loss": 1.5884, "step": 2150 }, { "epoch": 8.32, "learning_rate": 0.00010527337164655102, "loss": 1.576, "step": 2155 }, { "epoch": 8.34, "learning_rate": 0.00010169204364380236, "loss": 1.5746, "step": 2160 }, { "epoch": 8.36, "learning_rate": 9.798903242198116e-05, "loss": 1.6259, "step": 2165 }, { "epoch": 8.38, "learning_rate": 9.417795437572906e-05, "loss": 1.6047, "step": 2170 }, { "epoch": 8.4, "learning_rate": 9.027282327372703e-05, "loss": 1.5615, "step": 2175 }, { "epoch": 8.42, "learning_rate": 8.628799872846948e-05, "loss": 1.6294, "step": 2180 }, { "epoch": 8.44, "learning_rate": 8.223813339433283e-05, "loss": 1.5485, "step": 2185 }, { "epoch": 8.46, "learning_rate": 7.813811908809194e-05, "loss": 1.5291, "step": 2190 }, { "epoch": 8.47, "learning_rate": 7.400303203001327e-05, "loss": 1.6178, "step": 2195 }, { "epoch": 8.49, "learning_rate": 6.98480774068711e-05, "loss": 1.6335, "step": 2200 }, { "epoch": 8.51, "learning_rate": 6.568853346074429e-05, "loss": 1.5607, "step": 2205 }, { "epoch": 8.53, "learning_rate": 6.15396953091741e-05, "loss": 1.552, "step": 2210 }, { "epoch": 8.55, "learning_rate": 5.741681870327528e-05, "loss": 1.6358, "step": 2215 }, { "epoch": 8.57, "learning_rate": 5.33350639305969e-05, "loss": 1.6499, "step": 2220 }, { "epoch": 8.59, "learning_rate": 4.930944006901777e-05, "loss": 1.5632, "step": 2225 }, { "epoch": 8.61, "learning_rate": 4.535474979665314e-05, "loss": 1.5825, "step": 2230 }, { "epoch": 8.63, "learning_rate": 4.148553496072023e-05, "loss": 1.6277, "step": 2235 }, { "epoch": 8.65, "learning_rate": 3.7716023105507615e-05, "loss": 1.5497, "step": 2240 }, { "epoch": 8.67, "learning_rate": 3.406007515606987e-05, "loss": 1.5159, "step": 2245 }, { "epoch": 8.69, "learning_rate": 3.0531134450027666e-05, "loss": 1.5683, "step": 2250 }, { "epoch": 8.71, "learning_rate": 2.7142177304880198e-05, "loss": 1.5193, "step": 2255 }, { "epoch": 8.73, "learning_rate": 2.390566530260624e-05, "loss": 1.6145, "step": 2260 }, { "epoch": 8.75, "learning_rate": 2.0833499467006378e-05, "loss": 1.5854, "step": 2265 }, { "epoch": 8.76, "learning_rate": 1.7936976502279525e-05, "loss": 1.5426, "step": 2270 }, { "epoch": 8.78, "learning_rate": 1.5226747253755011e-05, "loss": 1.5862, "step": 2275 }, { "epoch": 8.8, "learning_rate": 1.2712777543517822e-05, "loss": 1.5478, "step": 2280 }, { "epoch": 8.82, "learning_rate": 1.0404311524944368e-05, "loss": 1.6329, "step": 2285 }, { "epoch": 8.84, "learning_rate": 8.309837690896873e-06, "loss": 1.535, "step": 2290 }, { "epoch": 8.86, "learning_rate": 6.43705766056588e-06, "loss": 1.5849, "step": 2295 }, { "epoch": 8.88, "learning_rate": 4.792857859738948e-06, "loss": 1.6253, "step": 2300 }, { "epoch": 8.9, "learning_rate": 3.3832841986266175e-06, "loss": 1.537, "step": 2305 }, { "epoch": 8.92, "learning_rate": 2.213519840361947e-06, "loss": 1.5028, "step": 2310 }, { "epoch": 8.94, "learning_rate": 1.2878661419176351e-06, "loss": 1.5904, "step": 2315 }, { "epoch": 8.96, "learning_rate": 6.097268375260679e-07, "loss": 1.6009, "step": 2320 }, { "epoch": 8.98, "learning_rate": 1.8159552276040752e-07, "loss": 1.5465, "step": 2325 }, { "epoch": 9.0, "learning_rate": 5.046485299251069e-09, "loss": 1.5575, "step": 2330 }, { "epoch": 9.02, "learning_rate": 8.072891609113784e-08, "loss": 1.5791, "step": 2335 }, { "epoch": 9.03, "learning_rate": 4.0836452220544814e-07, "loss": 1.4865, "step": 2340 }, { "epoch": 9.05, "learning_rate": 9.867485501471922e-07, "loss": 1.5316, "step": 2345 }, { "epoch": 9.07, "learning_rate": 1.813754215873199e-06, "loss": 1.5403, "step": 2350 }, { "epoch": 9.09, "learning_rate": 2.8863405252193584e-06, "loss": 1.4183, "step": 2355 }, { "epoch": 9.11, "learning_rate": 4.200563455983382e-06, "loss": 1.5547, "step": 2360 }, { "epoch": 9.13, "learning_rate": 5.75159046054386e-06, "loss": 1.5414, "step": 2365 }, { "epoch": 9.15, "learning_rate": 7.5337182356897725e-06, "loss": 1.5445, "step": 2370 }, { "epoch": 9.17, "learning_rate": 9.540393694315775e-06, "loss": 1.4539, "step": 2375 }, { "epoch": 9.19, "learning_rate": 1.1764238061872442e-05, "loss": 1.4992, "step": 2380 }, { "epoch": 9.21, "learning_rate": 1.4197074008960564e-05, "loss": 1.5203, "step": 2385 }, { "epoch": 9.23, "learning_rate": 1.6829955720305234e-05, "loss": 1.4989, "step": 2390 }, { "epoch": 9.25, "learning_rate": 1.965320178953787e-05, "loss": 1.5128, "step": 2395 }, { "epoch": 9.27, "learning_rate": 2.265643081883295e-05, "loss": 1.5033, "step": 2400 }, { "epoch": 9.29, "learning_rate": 2.582859959249101e-05, "loss": 1.4938, "step": 2405 }, { "epoch": 9.31, "learning_rate": 2.915804368410211e-05, "loss": 1.5157, "step": 2410 }, { "epoch": 9.32, "learning_rate": 3.2632520347973973e-05, "loss": 1.4103, "step": 2415 }, { "epoch": 9.34, "learning_rate": 3.623925353710222e-05, "loss": 1.524, "step": 2420 }, { "epoch": 9.36, "learning_rate": 3.996498088215406e-05, "loss": 1.5389, "step": 2425 }, { "epoch": 9.38, "learning_rate": 4.3796002458712527e-05, "loss": 1.5645, "step": 2430 }, { "epoch": 9.4, "learning_rate": 4.7718231163460484e-05, "loss": 1.5511, "step": 2435 }, { "epoch": 9.42, "learning_rate": 5.1717244514068206e-05, "loss": 1.5406, "step": 2440 }, { "epoch": 9.44, "learning_rate": 5.57783376823034e-05, "loss": 1.567, "step": 2445 }, { "epoch": 9.46, "learning_rate": 5.988657756536402e-05, "loss": 1.602, "step": 2450 }, { "epoch": 9.48, "learning_rate": 6.402685769660036e-05, "loss": 1.4789, "step": 2455 }, { "epoch": 9.5, "learning_rate": 6.818395379371463e-05, "loss": 1.5673, "step": 2460 }, { "epoch": 9.52, "learning_rate": 7.234257974018531e-05, "loss": 1.5527, "step": 2465 }, { "epoch": 9.54, "learning_rate": 7.64874437940594e-05, "loss": 1.4721, "step": 2470 }, { "epoch": 9.56, "learning_rate": 8.060330481743391e-05, "loss": 1.4447, "step": 2475 }, { "epoch": 9.58, "learning_rate": 8.467502831985544e-05, "loss": 1.5768, "step": 2480 }, { "epoch": 9.59, "learning_rate": 8.868764210957132e-05, "loss": 1.4808, "step": 2485 }, { "epoch": 9.61, "learning_rate": 9.262639134798362e-05, "loss": 1.4197, "step": 2490 }, { "epoch": 9.63, "learning_rate": 9.647679280487567e-05, "loss": 1.6109, "step": 2495 }, { "epoch": 9.65, "learning_rate": 0.00010022468811489983, "loss": 1.5907, "step": 2500 }, { "epoch": 9.67, "learning_rate": 0.00010385629583950378, "loss": 1.5902, "step": 2505 }, { "epoch": 9.69, "learning_rate": 0.00010735826214284965, "loss": 1.6053, "step": 2510 }, { "epoch": 9.71, "learning_rate": 0.00011071770989539361, "loss": 1.552, "step": 2515 }, { "epoch": 9.73, "learning_rate": 0.00011392228602455961, "loss": 1.5787, "step": 2520 }, { "epoch": 9.75, "learning_rate": 0.00011696020693839523, "loss": 1.4997, "step": 2525 }, { "epoch": 9.77, "learning_rate": 0.00011982030185518476, "loss": 1.6354, "step": 2530 }, { "epoch": 9.79, "learning_rate": 0.00012249205387968647, "loss": 1.586, "step": 2535 }, { "epoch": 9.81, "learning_rate": 0.0001249656386749574, "loss": 1.511, "step": 2540 }, { "epoch": 9.83, "learning_rate": 0.000127231960587559, "loss": 1.5002, "step": 2545 }, { "epoch": 9.85, "learning_rate": 0.00012928268609331444, "loss": 1.5829, "step": 2550 }, { "epoch": 9.86, "learning_rate": 0.00013111027444062605, "loss": 1.6407, "step": 2555 }, { "epoch": 9.88, "learning_rate": 0.00013270800537867978, "loss": 1.5058, "step": 2560 }, { "epoch": 9.9, "learning_rate": 0.00013407000386857348, "loss": 1.4854, "step": 2565 }, { "epoch": 9.92, "learning_rate": 0.0001351912616865057, "loss": 1.4912, "step": 2570 }, { "epoch": 9.94, "learning_rate": 0.00013606765583958525, "loss": 1.5218, "step": 2575 }, { "epoch": 9.96, "learning_rate": 0.00013669596372654658, "loss": 1.5828, "step": 2580 }, { "epoch": 9.98, "learning_rate": 0.00013707387498762246, "loss": 1.5816, "step": 2585 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 1.4944, "step": 2590 }, { "epoch": 10.02, "learning_rate": 0.00013707387498762246, "loss": 1.5296, "step": 2595 }, { "epoch": 10.04, "learning_rate": 0.0001366959637265466, "loss": 1.5509, "step": 2600 }, { "epoch": 10.06, "learning_rate": 0.00013606765583958527, "loss": 1.5305, "step": 2605 }, { "epoch": 10.08, "learning_rate": 0.00013519126168650574, "loss": 1.4872, "step": 2610 }, { "epoch": 10.1, "learning_rate": 0.00013407000386857353, "loss": 1.544, "step": 2615 }, { "epoch": 10.12, "learning_rate": 0.00013270800537867983, "loss": 1.5421, "step": 2620 }, { "epoch": 10.14, "learning_rate": 0.0001311102744406261, "loss": 1.5468, "step": 2625 }, { "epoch": 10.15, "learning_rate": 0.00012928268609331455, "loss": 1.5529, "step": 2630 }, { "epoch": 10.17, "learning_rate": 0.00012723196058755907, "loss": 1.5357, "step": 2635 }, { "epoch": 10.19, "learning_rate": 0.00012496563867495748, "loss": 1.5077, "step": 2640 }, { "epoch": 10.21, "learning_rate": 0.00012249205387968658, "loss": 1.6099, "step": 2645 }, { "epoch": 10.23, "learning_rate": 0.00011982030185518488, "loss": 1.5701, "step": 2650 }, { "epoch": 10.25, "learning_rate": 0.0001169602069383955, "loss": 1.5479, "step": 2655 }, { "epoch": 10.27, "learning_rate": 0.00011392228602455956, "loss": 1.5486, "step": 2660 }, { "epoch": 10.29, "learning_rate": 0.00011071770989539373, "loss": 1.5367, "step": 2665 }, { "epoch": 10.31, "learning_rate": 0.0001073582621428498, "loss": 1.5361, "step": 2670 }, { "epoch": 10.33, "learning_rate": 0.00010385629583950413, "loss": 1.3755, "step": 2675 }, { "epoch": 10.35, "learning_rate": 0.00010022468811490019, "loss": 1.464, "step": 2680 }, { "epoch": 10.37, "learning_rate": 9.64767928048756e-05, "loss": 1.5142, "step": 2685 }, { "epoch": 10.39, "learning_rate": 9.262639134798378e-05, "loss": 1.5207, "step": 2690 }, { "epoch": 10.41, "learning_rate": 8.868764210957149e-05, "loss": 1.5357, "step": 2695 }, { "epoch": 10.42, "learning_rate": 8.467502831985583e-05, "loss": 1.454, "step": 2700 }, { "epoch": 10.44, "learning_rate": 8.06033048174343e-05, "loss": 1.5096, "step": 2705 }, { "epoch": 10.46, "learning_rate": 7.648744379405981e-05, "loss": 1.5628, "step": 2710 }, { "epoch": 10.48, "learning_rate": 7.234257974018524e-05, "loss": 1.468, "step": 2715 }, { "epoch": 10.5, "learning_rate": 6.818395379371479e-05, "loss": 1.4858, "step": 2720 }, { "epoch": 10.52, "learning_rate": 6.402685769660054e-05, "loss": 1.4885, "step": 2725 }, { "epoch": 10.54, "learning_rate": 5.988657756536443e-05, "loss": 1.4577, "step": 2730 }, { "epoch": 10.56, "learning_rate": 5.577833768230333e-05, "loss": 1.5513, "step": 2735 }, { "epoch": 10.58, "learning_rate": 5.171724451406837e-05, "loss": 1.4957, "step": 2740 }, { "epoch": 10.6, "learning_rate": 4.7718231163460647e-05, "loss": 1.5075, "step": 2745 }, { "epoch": 10.62, "learning_rate": 4.379600245871268e-05, "loss": 1.448, "step": 2750 }, { "epoch": 10.64, "learning_rate": 3.996498088215443e-05, "loss": 1.53, "step": 2755 }, { "epoch": 10.66, "learning_rate": 3.623925353710258e-05, "loss": 1.5454, "step": 2760 }, { "epoch": 10.68, "learning_rate": 3.2632520347973906e-05, "loss": 1.5139, "step": 2765 }, { "epoch": 10.69, "learning_rate": 2.915804368410225e-05, "loss": 1.5848, "step": 2770 }, { "epoch": 10.71, "learning_rate": 2.5828599592491143e-05, "loss": 1.514, "step": 2775 }, { "epoch": 10.73, "learning_rate": 2.2656430818833073e-05, "loss": 1.4666, "step": 2780 }, { "epoch": 10.75, "learning_rate": 1.965320178953816e-05, "loss": 1.4546, "step": 2785 }, { "epoch": 10.77, "learning_rate": 1.682995572030518e-05, "loss": 1.4552, "step": 2790 }, { "epoch": 10.79, "learning_rate": 1.4197074008960664e-05, "loss": 1.482, "step": 2795 }, { "epoch": 10.81, "learning_rate": 1.1764238061872534e-05, "loss": 1.5422, "step": 2800 }, { "epoch": 10.83, "learning_rate": 9.54039369431598e-06, "loss": 1.492, "step": 2805 }, { "epoch": 10.85, "learning_rate": 7.5337182356897344e-06, "loss": 1.4219, "step": 2810 }, { "epoch": 10.87, "learning_rate": 5.75159046054383e-06, "loss": 1.4972, "step": 2815 }, { "epoch": 10.89, "learning_rate": 4.200563455983359e-06, "loss": 1.4525, "step": 2820 }, { "epoch": 10.91, "learning_rate": 2.886340525219404e-06, "loss": 1.4337, "step": 2825 }, { "epoch": 10.93, "learning_rate": 1.8137542158732371e-06, "loss": 1.5066, "step": 2830 }, { "epoch": 10.95, "learning_rate": 9.867485501472609e-07, "loss": 1.4053, "step": 2835 }, { "epoch": 10.97, "learning_rate": 4.083645222054405e-07, "loss": 1.4861, "step": 2840 }, { "epoch": 10.98, "learning_rate": 8.072891609114545e-08, "loss": 1.4625, "step": 2845 }, { "epoch": 11.0, "learning_rate": 5.046485299251069e-09, "loss": 1.4993, "step": 2850 }, { "epoch": 11.02, "learning_rate": 1.8159552276039227e-07, "loss": 1.4475, "step": 2855 }, { "epoch": 11.04, "learning_rate": 6.097268375260069e-07, "loss": 1.4133, "step": 2860 }, { "epoch": 11.06, "learning_rate": 1.2878661419176504e-06, "loss": 1.4672, "step": 2865 }, { "epoch": 11.08, "learning_rate": 2.21351984036197e-06, "loss": 1.4601, "step": 2870 }, { "epoch": 11.1, "learning_rate": 3.383284198626564e-06, "loss": 1.4559, "step": 2875 }, { "epoch": 11.12, "learning_rate": 4.792857859738887e-06, "loss": 1.3926, "step": 2880 }, { "epoch": 11.14, "learning_rate": 6.437057660565811e-06, "loss": 1.3658, "step": 2885 }, { "epoch": 11.16, "learning_rate": 8.309837690896675e-06, "loss": 1.3814, "step": 2890 }, { "epoch": 11.18, "learning_rate": 1.0404311524944405e-05, "loss": 1.4756, "step": 2895 }, { "epoch": 11.2, "learning_rate": 1.271277754351773e-05, "loss": 1.3856, "step": 2900 }, { "epoch": 11.22, "learning_rate": 1.5226747253754904e-05, "loss": 1.3775, "step": 2905 }, { "epoch": 11.24, "learning_rate": 1.7936976502279244e-05, "loss": 1.4091, "step": 2910 }, { "epoch": 11.25, "learning_rate": 2.083349946700608e-05, "loss": 1.4543, "step": 2915 }, { "epoch": 11.27, "learning_rate": 2.39056653026063e-05, "loss": 1.4107, "step": 2920 }, { "epoch": 11.29, "learning_rate": 2.714217730488006e-05, "loss": 1.4381, "step": 2925 }, { "epoch": 11.31, "learning_rate": 3.053113445002753e-05, "loss": 1.4025, "step": 2930 }, { "epoch": 11.33, "learning_rate": 3.4060075156069725e-05, "loss": 1.3656, "step": 2935 }, { "epoch": 11.35, "learning_rate": 3.771602310550724e-05, "loss": 1.4167, "step": 2940 }, { "epoch": 11.37, "learning_rate": 4.148553496072031e-05, "loss": 1.4686, "step": 2945 }, { "epoch": 11.39, "learning_rate": 4.5354749796653205e-05, "loss": 1.3998, "step": 2950 }, { "epoch": 11.41, "learning_rate": 4.9309440069017615e-05, "loss": 1.4714, "step": 2955 }, { "epoch": 11.43, "learning_rate": 5.333506393059674e-05, "loss": 1.3963, "step": 2960 }, { "epoch": 11.45, "learning_rate": 5.7416818703274866e-05, "loss": 1.5068, "step": 2965 }, { "epoch": 11.47, "learning_rate": 6.153969530917418e-05, "loss": 1.4338, "step": 2970 }, { "epoch": 11.49, "learning_rate": 6.568853346074412e-05, "loss": 1.3606, "step": 2975 }, { "epoch": 11.51, "learning_rate": 6.984807740687094e-05, "loss": 1.4016, "step": 2980 }, { "epoch": 11.53, "learning_rate": 7.400303203001311e-05, "loss": 1.4269, "step": 2985 }, { "epoch": 11.54, "learning_rate": 7.813811908809178e-05, "loss": 1.44, "step": 2990 }, { "epoch": 11.56, "learning_rate": 8.223813339433243e-05, "loss": 1.4977, "step": 2995 }, { "epoch": 11.58, "learning_rate": 8.628799872846956e-05, "loss": 1.436, "step": 3000 }, { "epoch": 11.6, "learning_rate": 9.027282327372687e-05, "loss": 1.513, "step": 3005 }, { "epoch": 11.62, "learning_rate": 9.417795437572891e-05, "loss": 1.4691, "step": 3010 }, { "epoch": 11.64, "learning_rate": 9.798903242198079e-05, "loss": 1.5133, "step": 3015 }, { "epoch": 11.66, "learning_rate": 0.000101692043643802, "loss": 1.5047, "step": 3020 }, { "epoch": 11.68, "learning_rate": 0.00010527337164655109, "loss": 1.4302, "step": 3025 }, { "epoch": 11.7, "learning_rate": 0.00010871984747866282, "loss": 1.5886, "step": 3030 }, { "epoch": 11.72, "learning_rate": 0.00011201879805538586, "loss": 1.4925, "step": 3035 }, { "epoch": 11.74, "learning_rate": 0.00011515809275915985, "loss": 1.4445, "step": 3040 }, { "epoch": 11.76, "learning_rate": 0.00011812618804528006, "loss": 1.4577, "step": 3045 }, { "epoch": 11.78, "learning_rate": 0.00012091216988882848, "loss": 1.4792, "step": 3050 }, { "epoch": 11.8, "learning_rate": 0.00012350579391678723, "loss": 1.4425, "step": 3055 }, { "epoch": 11.81, "learning_rate": 0.00012589752307776744, "loss": 1.4294, "step": 3060 }, { "epoch": 11.83, "learning_rate": 0.0001280785627108355, "loss": 1.4138, "step": 3065 }, { "epoch": 11.85, "learning_rate": 0.0001300408928844837, "loss": 1.5429, "step": 3070 }, { "epoch": 11.87, "learning_rate": 0.00013177729788683344, "loss": 1.5001, "step": 3075 }, { "epoch": 11.89, "learning_rate": 0.00013328139275863032, "loss": 1.444, "step": 3080 }, { "epoch": 11.91, "learning_rate": 0.00013454764677146876, "loss": 1.4294, "step": 3085 }, { "epoch": 11.93, "learning_rate": 0.00013557140376490993, "loss": 1.5483, "step": 3090 }, { "epoch": 11.95, "learning_rate": 0.0001363488992677158, "loss": 1.5026, "step": 3095 }, { "epoch": 11.97, "learning_rate": 0.00013687727434023872, "loss": 1.5176, "step": 3100 }, { "epoch": 11.99, "learning_rate": 0.00013715458608706872, "loss": 1.4418, "step": 3105 }, { "epoch": 12.01, "learning_rate": 0.00013717981480128154, "loss": 1.441, "step": 3110 }, { "epoch": 12.03, "learning_rate": 0.00013695286771401734, "loss": 1.3854, "step": 3115 }, { "epoch": 12.05, "learning_rate": 0.00013647457933560234, "loss": 1.4397, "step": 3120 }, { "epoch": 12.07, "learning_rate": 0.00013574670838695926, "loss": 1.4672, "step": 3125 }, { "epoch": 12.08, "learning_rate": 0.0001347719313325897, "loss": 1.4525, "step": 3130 }, { "epoch": 12.1, "learning_rate": 0.00013355383253890914, "loss": 1.4068, "step": 3135 }, { "epoch": 12.12, "learning_rate": 0.0001320968910941225, "loss": 1.4855, "step": 3140 }, { "epoch": 12.14, "learning_rate": 0.0001304064643381061, "loss": 1.4212, "step": 3145 }, { "epoch": 12.16, "learning_rate": 0.00012848876816285777, "loss": 1.4849, "step": 3150 }, { "epoch": 12.18, "learning_rate": 0.00012635085415595244, "loss": 1.3912, "step": 3155 }, { "epoch": 12.2, "learning_rate": 0.00012400058367105247, "loss": 1.483, "step": 3160 }, { "epoch": 12.22, "learning_rate": 0.00012144659892081038, "loss": 1.3818, "step": 3165 }, { "epoch": 12.24, "learning_rate": 0.00011869829119846924, "loss": 1.4634, "step": 3170 }, { "epoch": 12.26, "learning_rate": 0.00011576576634500562, "loss": 1.4034, "step": 3175 }, { "epoch": 12.28, "learning_rate": 0.00011265980758879936, "loss": 1.4014, "step": 3180 }, { "epoch": 12.3, "learning_rate": 0.00010939183589447423, "loss": 1.4222, "step": 3185 }, { "epoch": 12.32, "learning_rate": 0.00010597386796670575, "loss": 1.4854, "step": 3190 }, { "epoch": 12.34, "learning_rate": 0.00010241847206343044, "loss": 1.4472, "step": 3195 }, { "epoch": 12.36, "learning_rate": 9.87387217809251e-05, "loss": 1.5271, "step": 3200 }, { "epoch": 12.37, "learning_rate": 9.494814798070321e-05, "loss": 1.401, "step": 3205 }, { "epoch": 12.39, "learning_rate": 9.106068903499514e-05, "loss": 1.5122, "step": 3210 }, { "epoch": 12.41, "learning_rate": 8.709063957376094e-05, "loss": 1.4755, "step": 3215 }, { "epoch": 12.43, "learning_rate": 8.30525979217072e-05, "loss": 1.4605, "step": 3220 }, { "epoch": 12.45, "learning_rate": 7.896141241858118e-05, "loss": 1.3958, "step": 3225 }, { "epoch": 12.47, "learning_rate": 7.48321268201337e-05, "loss": 1.4285, "step": 3230 }, { "epoch": 12.49, "learning_rate": 7.067992498051e-05, "loss": 1.4276, "step": 3235 }, { "epoch": 12.51, "learning_rate": 6.652007501948996e-05, "loss": 1.4174, "step": 3240 }, { "epoch": 12.53, "learning_rate": 6.236787317986674e-05, "loss": 1.4845, "step": 3245 }, { "epoch": 12.55, "learning_rate": 5.823858758141927e-05, "loss": 1.4357, "step": 3250 }, { "epoch": 12.57, "learning_rate": 5.414740207829325e-05, "loss": 1.4382, "step": 3255 }, { "epoch": 12.59, "learning_rate": 5.010936042623904e-05, "loss": 1.3395, "step": 3260 }, { "epoch": 12.61, "learning_rate": 4.6139310965004824e-05, "loss": 1.3898, "step": 3265 }, { "epoch": 12.63, "learning_rate": 4.225185201929675e-05, "loss": 1.4521, "step": 3270 }, { "epoch": 12.64, "learning_rate": 3.8461278219075304e-05, "loss": 1.4757, "step": 3275 }, { "epoch": 12.66, "learning_rate": 3.478152793656996e-05, "loss": 1.4128, "step": 3280 }, { "epoch": 12.68, "learning_rate": 3.122613203329423e-05, "loss": 1.4424, "step": 3285 }, { "epoch": 12.7, "learning_rate": 2.780816410552575e-05, "loss": 1.4287, "step": 3290 }, { "epoch": 12.72, "learning_rate": 2.454019241120062e-05, "loss": 1.3953, "step": 3295 }, { "epoch": 12.74, "learning_rate": 2.1434233654994707e-05, "loss": 1.3275, "step": 3300 }, { "epoch": 12.76, "learning_rate": 1.8501708801531077e-05, "loss": 1.3897, "step": 3305 }, { "epoch": 12.78, "learning_rate": 1.575340107918959e-05, "loss": 1.3657, "step": 3310 }, { "epoch": 12.8, "learning_rate": 1.319941632894751e-05, "loss": 1.3897, "step": 3315 }, { "epoch": 12.82, "learning_rate": 1.0849145844047538e-05, "loss": 1.4783, "step": 3320 }, { "epoch": 12.84, "learning_rate": 8.711231837142462e-06, "loss": 1.4102, "step": 3325 }, { "epoch": 12.86, "learning_rate": 6.793535661894092e-06, "loss": 1.4442, "step": 3330 }, { "epoch": 12.88, "learning_rate": 5.1031089058776675e-06, "loss": 1.3875, "step": 3335 }, { "epoch": 12.9, "learning_rate": 3.6461674610908637e-06, "loss": 1.4228, "step": 3340 }, { "epoch": 12.92, "learning_rate": 2.42806866741032e-06, "loss": 1.4015, "step": 3345 }, { "epoch": 12.93, "learning_rate": 1.453291613040815e-06, "loss": 1.3937, "step": 3350 }, { "epoch": 12.95, "learning_rate": 7.254206643977347e-07, "loss": 1.4905, "step": 3355 }, { "epoch": 12.97, "learning_rate": 2.4713228598269586e-07, "loss": 1.4419, "step": 3360 }, { "epoch": 12.99, "learning_rate": 2.0185198718462007e-08, "loss": 1.4331, "step": 3365 }, { "epoch": 13.01, "learning_rate": 4.5413912931266996e-08, "loss": 1.4014, "step": 3370 }, { "epoch": 13.03, "learning_rate": 3.227256597612364e-07, "loss": 1.3146, "step": 3375 }, { "epoch": 13.05, "learning_rate": 8.51100732284126e-07, "loss": 1.3623, "step": 3380 }, { "epoch": 13.07, "learning_rate": 1.62859623508997e-06, "loss": 1.3259, "step": 3385 }, { "epoch": 13.09, "learning_rate": 2.652353228531244e-06, "loss": 1.3975, "step": 3390 }, { "epoch": 13.11, "learning_rate": 3.9186072413696845e-06, "loss": 1.3585, "step": 3395 }, { "epoch": 13.13, "learning_rate": 5.422702113166566e-06, "loss": 1.3596, "step": 3400 }, { "epoch": 13.15, "learning_rate": 7.159107115516102e-06, "loss": 1.4021, "step": 3405 }, { "epoch": 13.17, "learning_rate": 9.121437289164265e-06, "loss": 1.4666, "step": 3410 }, { "epoch": 13.19, "learning_rate": 1.1302476922232583e-05, "loss": 1.327, "step": 3415 }, { "epoch": 13.2, "learning_rate": 1.3694206083212781e-05, "loss": 1.2798, "step": 3420 }, { "epoch": 13.22, "learning_rate": 1.628783011117153e-05, "loss": 1.3184, "step": 3425 }, { "epoch": 13.24, "learning_rate": 1.9073811954719624e-05, "loss": 1.3236, "step": 3430 }, { "epoch": 13.26, "learning_rate": 2.2041907240839828e-05, "loss": 1.3766, "step": 3435 }, { "epoch": 13.28, "learning_rate": 2.518120194461378e-05, "loss": 1.2779, "step": 3440 }, { "epoch": 13.3, "learning_rate": 2.8480152521337216e-05, "loss": 1.3743, "step": 3445 }, { "epoch": 13.32, "learning_rate": 3.1926628353448936e-05, "loss": 1.336, "step": 3450 }, { "epoch": 13.34, "learning_rate": 3.5507956356197615e-05, "loss": 1.3522, "step": 3455 }, { "epoch": 13.36, "learning_rate": 3.9210967578018804e-05, "loss": 1.3693, "step": 3460 }, { "epoch": 13.38, "learning_rate": 4.302204562427067e-05, "loss": 1.3374, "step": 3465 }, { "epoch": 13.4, "learning_rate": 4.692717672627317e-05, "loss": 1.3881, "step": 3470 }, { "epoch": 13.42, "learning_rate": 5.091200127153047e-05, "loss": 1.2653, "step": 3475 }, { "epoch": 13.44, "learning_rate": 5.496186660566713e-05, "loss": 1.3907, "step": 3480 }, { "epoch": 13.46, "learning_rate": 5.906188091190777e-05, "loss": 1.3586, "step": 3485 }, { "epoch": 13.47, "learning_rate": 6.319696796998643e-05, "loss": 1.3102, "step": 3490 }, { "epoch": 13.49, "learning_rate": 6.735192259312862e-05, "loss": 1.3599, "step": 3495 }, { "epoch": 13.51, "learning_rate": 7.151146653925592e-05, "loss": 1.3715, "step": 3500 }, { "epoch": 13.53, "learning_rate": 7.566030469082585e-05, "loss": 1.4274, "step": 3505 }, { "epoch": 13.55, "learning_rate": 7.978318129672468e-05, "loss": 1.2634, "step": 3510 }, { "epoch": 13.57, "learning_rate": 8.386493606940281e-05, "loss": 1.3939, "step": 3515 }, { "epoch": 13.59, "learning_rate": 8.789055993098241e-05, "loss": 1.4498, "step": 3520 }, { "epoch": 13.61, "learning_rate": 9.184525020334682e-05, "loss": 1.4425, "step": 3525 }, { "epoch": 13.63, "learning_rate": 9.571446503927972e-05, "loss": 1.3688, "step": 3530 }, { "epoch": 13.65, "learning_rate": 9.948397689449235e-05, "loss": 1.3409, "step": 3535 }, { "epoch": 13.67, "learning_rate": 0.00010313992484392988, "loss": 1.4686, "step": 3540 }, { "epoch": 13.69, "learning_rate": 0.00010666886554997249, "loss": 1.3646, "step": 3545 }, { "epoch": 13.71, "learning_rate": 0.00011005782269511996, "loss": 1.411, "step": 3550 }, { "epoch": 13.73, "learning_rate": 0.00011329433469739373, "loss": 1.3738, "step": 3555 }, { "epoch": 13.75, "learning_rate": 0.0001163665005329936, "loss": 1.3912, "step": 3560 }, { "epoch": 13.76, "learning_rate": 0.00011926302349772045, "loss": 1.3728, "step": 3565 }, { "epoch": 13.78, "learning_rate": 0.00012197325274624481, "loss": 1.3925, "step": 3570 }, { "epoch": 13.8, "learning_rate": 0.0001244872224564823, "loss": 1.3735, "step": 3575 }, { "epoch": 13.82, "learning_rate": 0.0001267956884750556, "loss": 1.4361, "step": 3580 }, { "epoch": 13.84, "learning_rate": 0.0001288901623091031, "loss": 1.4661, "step": 3585 }, { "epoch": 13.86, "learning_rate": 0.000130762942339434, "loss": 1.4177, "step": 3590 }, { "epoch": 13.88, "learning_rate": 0.00013240714214026112, "loss": 1.4351, "step": 3595 }, { "epoch": 13.9, "learning_rate": 0.00013381671580137345, "loss": 1.4243, "step": 3600 }, { "epoch": 13.92, "learning_rate": 0.00013498648015963804, "loss": 1.3258, "step": 3605 }, { "epoch": 13.94, "learning_rate": 0.00013591213385808236, "loss": 1.3917, "step": 3610 }, { "epoch": 13.96, "learning_rate": 0.00013659027316247394, "loss": 1.3626, "step": 3615 }, { "epoch": 13.98, "learning_rate": 0.00013701840447723958, "loss": 1.505, "step": 3620 }, { "epoch": 14.0, "learning_rate": 0.00013719495351470075, "loss": 1.3238, "step": 3625 }, { "epoch": 14.02, "learning_rate": 0.00013711927108390887, "loss": 1.3589, "step": 3630 }, { "epoch": 14.03, "learning_rate": 0.00013679163547779456, "loss": 1.4241, "step": 3635 }, { "epoch": 14.05, "learning_rate": 0.00013621325144985282, "loss": 1.4179, "step": 3640 }, { "epoch": 14.07, "learning_rate": 0.00013538624578412686, "loss": 1.3404, "step": 3645 }, { "epoch": 14.09, "learning_rate": 0.00013431365947478058, "loss": 1.3758, "step": 3650 }, { "epoch": 14.11, "learning_rate": 0.00013299943654401664, "loss": 1.4247, "step": 3655 }, { "epoch": 14.13, "learning_rate": 0.00013144840953945616, "loss": 1.3701, "step": 3660 }, { "epoch": 14.15, "learning_rate": 0.00012966628176431025, "loss": 1.3553, "step": 3665 }, { "epoch": 14.17, "learning_rate": 0.00012765960630568425, "loss": 1.381, "step": 3670 }, { "epoch": 14.19, "learning_rate": 0.00012543576193812774, "loss": 1.442, "step": 3675 }, { "epoch": 14.21, "learning_rate": 0.0001230029259910393, "loss": 1.3873, "step": 3680 }, { "epoch": 14.23, "learning_rate": 0.0001203700442796948, "loss": 1.3884, "step": 3685 }, { "epoch": 14.25, "learning_rate": 0.00011754679821046217, "loss": 1.3278, "step": 3690 }, { "epoch": 14.27, "learning_rate": 0.00011454356918116728, "loss": 1.3606, "step": 3695 }, { "epoch": 14.29, "learning_rate": 0.00011137140040750922, "loss": 1.2409, "step": 3700 }, { "epoch": 14.31, "learning_rate": 0.00010804195631589772, "loss": 1.411, "step": 3705 }, { "epoch": 14.32, "learning_rate": 0.00010456747965202607, "loss": 1.38, "step": 3710 }, { "epoch": 14.34, "learning_rate": 0.00010096074646289782, "loss": 1.3982, "step": 3715 }, { "epoch": 14.36, "learning_rate": 9.723501911784598e-05, "loss": 1.3883, "step": 3720 }, { "epoch": 14.38, "learning_rate": 9.340399754128775e-05, "loss": 1.3611, "step": 3725 }, { "epoch": 14.4, "learning_rate": 8.948176883653932e-05, "loss": 1.4344, "step": 3730 }, { "epoch": 14.42, "learning_rate": 8.548275548593159e-05, "loss": 1.2783, "step": 3735 }, { "epoch": 14.44, "learning_rate": 8.142166231769664e-05, "loss": 1.335, "step": 3740 }, { "epoch": 14.46, "learning_rate": 7.731342243463601e-05, "loss": 1.3506, "step": 3745 }, { "epoch": 14.48, "learning_rate": 7.317314230339991e-05, "loss": 1.4243, "step": 3750 }, { "epoch": 14.5, "learning_rate": 6.901604620628517e-05, "loss": 1.3969, "step": 3755 }, { "epoch": 14.52, "learning_rate": 6.485742025981473e-05, "loss": 1.3597, "step": 3760 }, { "epoch": 14.54, "learning_rate": 6.071255620594063e-05, "loss": 1.4289, "step": 3765 }, { "epoch": 14.56, "learning_rate": 5.659669518256613e-05, "loss": 1.3466, "step": 3770 }, { "epoch": 14.58, "learning_rate": 5.252497168014461e-05, "loss": 1.279, "step": 3775 }, { "epoch": 14.59, "learning_rate": 4.8512357890428955e-05, "loss": 1.3786, "step": 3780 }, { "epoch": 14.61, "learning_rate": 4.457360865201619e-05, "loss": 1.2442, "step": 3785 }, { "epoch": 14.63, "learning_rate": 4.072320719512437e-05, "loss": 1.2467, "step": 3790 }, { "epoch": 14.65, "learning_rate": 3.697531188510021e-05, "loss": 1.326, "step": 3795 }, { "epoch": 14.67, "learning_rate": 3.3343704160496265e-05, "loss": 1.3049, "step": 3800 }, { "epoch": 14.69, "learning_rate": 2.9841737857150583e-05, "loss": 1.3741, "step": 3805 }, { "epoch": 14.71, "learning_rate": 2.648229010460623e-05, "loss": 1.3036, "step": 3810 }, { "epoch": 14.73, "learning_rate": 2.3277713975440426e-05, "loss": 1.3118, "step": 3815 }, { "epoch": 14.75, "learning_rate": 2.0239793061604814e-05, "loss": 1.3541, "step": 3820 }, { "epoch": 14.77, "learning_rate": 1.7379698144815434e-05, "loss": 1.3646, "step": 3825 }, { "epoch": 14.79, "learning_rate": 1.4707946120313696e-05, "loss": 1.3313, "step": 3830 }, { "epoch": 14.81, "learning_rate": 1.2234361325042786e-05, "loss": 1.3923, "step": 3835 }, { "epoch": 14.83, "learning_rate": 9.968039412440925e-06, "loss": 1.2976, "step": 3840 }, { "epoch": 14.85, "learning_rate": 7.917313906685554e-06, "loss": 1.3127, "step": 3845 }, { "epoch": 14.86, "learning_rate": 6.089725559373968e-06, "loss": 1.3699, "step": 3850 }, { "epoch": 14.88, "learning_rate": 4.4919946213203235e-06, "loss": 1.2705, "step": 3855 }, { "epoch": 14.9, "learning_rate": 3.129996131426458e-06, "loss": 1.3474, "step": 3860 }, { "epoch": 14.92, "learning_rate": 2.00873831349432e-06, "loss": 1.3704, "step": 3865 }, { "epoch": 14.94, "learning_rate": 1.1323441604147607e-06, "loss": 1.3555, "step": 3870 }, { "epoch": 14.96, "learning_rate": 5.040362734534312e-07, "loss": 1.3937, "step": 3875 }, { "epoch": 14.98, "learning_rate": 1.2612501237755945e-07, "loss": 1.425, "step": 3880 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 1.42, "step": 3885 }, { "epoch": 15.02, "learning_rate": 1.261250123775442e-07, "loss": 1.4818, "step": 3890 }, { "epoch": 15.04, "learning_rate": 5.040362734534007e-07, "loss": 1.4719, "step": 3895 }, { "epoch": 15.06, "learning_rate": 1.132344160414715e-06, "loss": 1.3957, "step": 3900 }, { "epoch": 15.08, "learning_rate": 2.008738313494259e-06, "loss": 1.4262, "step": 3905 }, { "epoch": 15.1, "learning_rate": 3.1299961314263817e-06, "loss": 1.3789, "step": 3910 }, { "epoch": 15.12, "learning_rate": 4.491994621320232e-06, "loss": 1.4566, "step": 3915 }, { "epoch": 15.14, "learning_rate": 6.089725559373869e-06, "loss": 1.4144, "step": 3920 }, { "epoch": 15.15, "learning_rate": 7.91731390668544e-06, "loss": 1.4525, "step": 3925 }, { "epoch": 15.17, "learning_rate": 9.968039412440788e-06, "loss": 1.4557, "step": 3930 }, { "epoch": 15.19, "learning_rate": 1.2234361325042642e-05, "loss": 1.4918, "step": 3935 }, { "epoch": 15.21, "learning_rate": 1.4707946120313543e-05, "loss": 1.5974, "step": 3940 }, { "epoch": 15.23, "learning_rate": 1.7379698144815265e-05, "loss": 1.3931, "step": 3945 }, { "epoch": 15.25, "learning_rate": 2.0239793061604638e-05, "loss": 1.3826, "step": 3950 }, { "epoch": 15.27, "learning_rate": 2.3277713975440236e-05, "loss": 1.4445, "step": 3955 }, { "epoch": 15.29, "learning_rate": 2.6482290104606033e-05, "loss": 1.4149, "step": 3960 }, { "epoch": 15.31, "learning_rate": 2.984173785715038e-05, "loss": 1.3804, "step": 3965 }, { "epoch": 15.33, "learning_rate": 3.334370416049605e-05, "loss": 1.3937, "step": 3970 }, { "epoch": 15.35, "learning_rate": 3.697531188509998e-05, "loss": 1.4221, "step": 3975 }, { "epoch": 15.37, "learning_rate": 4.072320719512414e-05, "loss": 1.4485, "step": 3980 }, { "epoch": 15.39, "learning_rate": 4.4573608652015956e-05, "loss": 1.5171, "step": 3985 }, { "epoch": 15.41, "learning_rate": 4.851235789042871e-05, "loss": 1.4849, "step": 3990 }, { "epoch": 15.42, "learning_rate": 5.2524971680144367e-05, "loss": 1.4614, "step": 3995 }, { "epoch": 15.44, "learning_rate": 5.659669518256589e-05, "loss": 1.413, "step": 4000 }, { "epoch": 15.46, "learning_rate": 6.071255620594038e-05, "loss": 1.4743, "step": 4005 }, { "epoch": 15.48, "learning_rate": 6.485742025981448e-05, "loss": 1.4832, "step": 4010 }, { "epoch": 15.5, "learning_rate": 6.901604620628492e-05, "loss": 1.5146, "step": 4015 }, { "epoch": 15.52, "learning_rate": 7.317314230339967e-05, "loss": 1.5513, "step": 4020 }, { "epoch": 15.54, "learning_rate": 7.731342243463577e-05, "loss": 1.5379, "step": 4025 }, { "epoch": 15.56, "learning_rate": 8.142166231769639e-05, "loss": 1.4753, "step": 4030 }, { "epoch": 15.58, "learning_rate": 8.548275548593135e-05, "loss": 1.5384, "step": 4035 }, { "epoch": 15.6, "learning_rate": 8.948176883653908e-05, "loss": 1.5967, "step": 4040 }, { "epoch": 15.62, "learning_rate": 9.340399754128752e-05, "loss": 1.4906, "step": 4045 }, { "epoch": 15.64, "learning_rate": 9.723501911784575e-05, "loss": 1.5322, "step": 4050 }, { "epoch": 15.66, "learning_rate": 0.0001009607464628976, "loss": 1.5223, "step": 4055 }, { "epoch": 15.68, "learning_rate": 0.00010456747965202585, "loss": 1.4992, "step": 4060 }, { "epoch": 15.69, "learning_rate": 0.00010804195631589752, "loss": 1.5217, "step": 4065 }, { "epoch": 15.71, "learning_rate": 0.00011137140040750902, "loss": 1.4526, "step": 4070 }, { "epoch": 15.73, "learning_rate": 0.00011454356918116707, "loss": 1.5553, "step": 4075 }, { "epoch": 15.75, "learning_rate": 0.00011754679821046198, "loss": 1.4297, "step": 4080 }, { "epoch": 15.77, "learning_rate": 0.00012037004427969463, "loss": 1.4843, "step": 4085 }, { "epoch": 15.79, "learning_rate": 0.00012300292599103915, "loss": 1.4636, "step": 4090 }, { "epoch": 15.81, "learning_rate": 0.00012543576193812758, "loss": 1.4776, "step": 4095 }, { "epoch": 15.83, "learning_rate": 0.00012765960630568412, "loss": 1.514, "step": 4100 }, { "epoch": 15.85, "learning_rate": 0.00012966628176431014, "loss": 1.4759, "step": 4105 }, { "epoch": 15.87, "learning_rate": 0.00013144840953945605, "loss": 1.396, "step": 4110 }, { "epoch": 15.89, "learning_rate": 0.00013299943654401656, "loss": 1.5696, "step": 4115 }, { "epoch": 15.91, "learning_rate": 0.0001343136594747805, "loss": 1.5059, "step": 4120 }, { "epoch": 15.93, "learning_rate": 0.0001353862457841268, "loss": 1.4919, "step": 4125 }, { "epoch": 15.95, "learning_rate": 0.00013621325144985277, "loss": 1.515, "step": 4130 }, { "epoch": 15.97, "learning_rate": 0.00013679163547779453, "loss": 1.4448, "step": 4135 }, { "epoch": 15.98, "learning_rate": 0.00013711927108390882, "loss": 1.5143, "step": 4140 }, { "epoch": 16.0, "eval_loss": 1.3626197576522827, "eval_runtime": 22.0604, "eval_samples_per_second": 20.897, "eval_steps_per_second": 2.629, "step": 4144 }, { "epoch": 15.41, "learning_rate": 4.923820788333643e-05, "loss": 1.4417, "step": 4145 }, { "epoch": 15.43, "learning_rate": 5.311198428226757e-05, "loss": 1.5224, "step": 4150 }, { "epoch": 15.45, "learning_rate": 5.7038557476801184e-05, "loss": 1.4984, "step": 4155 }, { "epoch": 15.46, "learning_rate": 6.100454224793001e-05, "loss": 1.4427, "step": 4160 }, { "epoch": 15.48, "learning_rate": 6.49964190272892e-05, "loss": 1.4789, "step": 4165 }, { "epoch": 15.5, "learning_rate": 6.900057998375254e-05, "loss": 1.5665, "step": 4170 }, { "epoch": 15.52, "learning_rate": 7.300337541089789e-05, "loss": 1.5002, "step": 4175 }, { "epoch": 15.54, "learning_rate": 7.699116025723293e-05, "loss": 1.4668, "step": 4180 }, { "epoch": 15.56, "learning_rate": 8.09503406405399e-05, "loss": 1.3757, "step": 4185 }, { "epoch": 15.58, "learning_rate": 8.48674201878012e-05, "loss": 1.4722, "step": 4190 }, { "epoch": 15.59, "learning_rate": 8.872904604271726e-05, "loss": 1.4961, "step": 4195 }, { "epoch": 15.61, "learning_rate": 9.252205438400528e-05, "loss": 1.4798, "step": 4200 }, { "epoch": 15.63, "learning_rate": 9.623351529928802e-05, "loss": 1.5392, "step": 4205 }, { "epoch": 15.65, "learning_rate": 9.985077686162523e-05, "loss": 1.5653, "step": 4210 }, { "epoch": 15.67, "learning_rate": 0.00010336150825841603, "loss": 1.4743, "step": 4215 }, { "epoch": 15.69, "learning_rate": 0.00010675374182567242, "loss": 1.4201, "step": 4220 }, { "epoch": 15.71, "learning_rate": 0.00011001591384435138, "loss": 1.3889, "step": 4225 }, { "epoch": 15.72, "learning_rate": 0.00011313690395969416, "loss": 1.4913, "step": 4230 }, { "epoch": 15.74, "learning_rate": 0.00011610607308918656, "loss": 1.3722, "step": 4235 }, { "epoch": 15.76, "learning_rate": 0.00011891329968992182, "loss": 1.4133, "step": 4240 }, { "epoch": 15.78, "learning_rate": 0.0001215490142617292, "loss": 1.36, "step": 4245 }, { "epoch": 15.8, "learning_rate": 0.00012400423196845864, "loss": 1.361, "step": 4250 }, { "epoch": 15.82, "learning_rate": 0.00012627058326621316, "loss": 1.542, "step": 4255 }, { "epoch": 15.84, "learning_rate": 0.0001283403424341258, "loss": 1.4983, "step": 4260 }, { "epoch": 15.86, "learning_rate": 0.00013020645391041629, "loss": 1.4985, "step": 4265 }, { "epoch": 15.87, "learning_rate": 0.00013186255634396195, "loss": 1.4767, "step": 4270 }, { "epoch": 15.89, "learning_rate": 0.00013330300427938103, "loss": 1.4258, "step": 4275 }, { "epoch": 15.91, "learning_rate": 0.00013452288740171763, "loss": 1.4773, "step": 4280 }, { "epoch": 15.93, "learning_rate": 0.00013551804727511717, "loss": 1.462, "step": 4285 }, { "epoch": 15.95, "learning_rate": 0.0001362850915184393, "loss": 1.4688, "step": 4290 }, { "epoch": 15.97, "learning_rate": 0.00013682140536947865, "loss": 1.5146, "step": 4295 }, { "epoch": 15.99, "learning_rate": 0.00013712516059837763, "loss": 1.5462, "step": 4300 }, { "epoch": 16.0, "eval_loss": 1.3411859273910522, "eval_runtime": 18.5136, "eval_samples_per_second": 20.85, "eval_steps_per_second": 2.647, "step": 4304 }, { "epoch": 16.0, "learning_rate": 0.00013719532173984305, "loss": 1.3395, "step": 4305 }, { "epoch": 16.02, "learning_rate": 0.00013703164962292424, "loss": 1.3995, "step": 4310 }, { "epoch": 16.04, "learning_rate": 0.00013663470218631772, "loss": 1.4118, "step": 4315 }, { "epoch": 16.06, "learning_rate": 0.00013600583257642132, "loss": 1.3778, "step": 4320 }, { "epoch": 16.08, "learning_rate": 0.00013514718453461912, "loss": 1.4416, "step": 4325 }, { "epoch": 16.1, "learning_rate": 0.0001340616850895236, "loss": 1.4926, "step": 4330 }, { "epoch": 16.12, "learning_rate": 0.00013275303457908525, "loss": 1.4668, "step": 4335 }, { "epoch": 16.13, "learning_rate": 0.00013122569403658038, "loss": 1.3931, "step": 4340 }, { "epoch": 16.15, "learning_rate": 0.00012948486998348453, "loss": 1.403, "step": 4345 }, { "epoch": 16.17, "learning_rate": 0.0001275364966810606, "loss": 1.3802, "step": 4350 }, { "epoch": 16.19, "learning_rate": 0.00012538721590117088, "loss": 1.429, "step": 4355 }, { "epoch": 16.21, "learning_rate": 0.00012304435428527134, "loss": 1.4773, "step": 4360 }, { "epoch": 16.23, "learning_rate": 0.00012051589836876666, "loss": 1.3717, "step": 4365 }, { "epoch": 16.25, "learning_rate": 0.00011781046735586077, "loss": 1.4166, "step": 4370 }, { "epoch": 16.26, "learning_rate": 0.00011493728373772612, "loss": 1.432, "step": 4375 }, { "epoch": 16.28, "learning_rate": 0.00011190614185412497, "loss": 1.4722, "step": 4380 }, { "epoch": 16.3, "learning_rate": 0.00010872737450568259, "loss": 1.3411, "step": 4385 }, { "epoch": 16.32, "learning_rate": 0.00010541181773059928, "loss": 1.4268, "step": 4390 }, { "epoch": 16.34, "learning_rate": 0.00010197077386589103, "loss": 1.4257, "step": 4395 }, { "epoch": 16.36, "learning_rate": 9.841597301907411e-05, "loss": 1.4367, "step": 4400 }, { "epoch": 16.38, "learning_rate": 9.475953308163089e-05, "loss": 1.388, "step": 4405 }, { "epoch": 16.39, "learning_rate": 9.101391842055883e-05, "loss": 1.4486, "step": 4410 }, { "epoch": 16.41, "learning_rate": 8.719189738884117e-05, "loss": 1.3824, "step": 4415 }, { "epoch": 16.43, "learning_rate": 8.330649879965051e-05, "loss": 1.4313, "step": 4420 }, { "epoch": 16.45, "learning_rate": 7.937096751268169e-05, "loss": 1.3933, "step": 4425 }, { "epoch": 16.47, "learning_rate": 7.539871928400956e-05, "loss": 1.4352, "step": 4430 }, { "epoch": 16.49, "learning_rate": 7.140329503337758e-05, "loss": 1.4244, "step": 4435 }, { "epoch": 16.51, "learning_rate": 6.739831468481779e-05, "loss": 1.4062, "step": 4440 }, { "epoch": 16.52, "learning_rate": 6.33974307379626e-05, "loss": 1.3753, "step": 4445 }, { "epoch": 16.54, "learning_rate": 5.94142817282949e-05, "loss": 1.3918, "step": 4450 }, { "epoch": 16.56, "learning_rate": 5.546244573501996e-05, "loss": 1.423, "step": 4455 }, { "epoch": 16.58, "learning_rate": 5.155539409500841e-05, "loss": 1.4141, "step": 4460 }, { "epoch": 16.6, "learning_rate": 4.7706445480618974e-05, "loss": 1.4364, "step": 4465 }, { "epoch": 16.62, "learning_rate": 4.3928720497937174e-05, "loss": 1.405, "step": 4470 }, { "epoch": 16.64, "learning_rate": 4.02350969601972e-05, "loss": 1.4752, "step": 4475 }, { "epoch": 16.65, "learning_rate": 3.663816598884848e-05, "loss": 1.4515, "step": 4480 }, { "epoch": 16.67, "learning_rate": 3.315018909193563e-05, "loss": 1.4503, "step": 4485 }, { "epoch": 16.69, "learning_rate": 2.9783056366075814e-05, "loss": 1.3878, "step": 4490 }, { "epoch": 16.71, "learning_rate": 2.6548245964540616e-05, "loss": 1.3826, "step": 4495 }, { "epoch": 16.73, "learning_rate": 2.345678496960497e-05, "loss": 1.3709, "step": 4500 }, { "epoch": 16.75, "learning_rate": 2.051921180253764e-05, "loss": 1.4434, "step": 4505 }, { "epoch": 16.77, "learning_rate": 1.774554029938429e-05, "loss": 1.4217, "step": 4510 }, { "epoch": 16.78, "learning_rate": 1.5145225574996895e-05, "loss": 1.3259, "step": 4515 }, { "epoch": 16.8, "learning_rate": 1.272713179167218e-05, "loss": 1.3681, "step": 4520 }, { "epoch": 16.82, "learning_rate": 1.0499501942287456e-05, "loss": 1.3708, "step": 4525 }, { "epoch": 16.84, "learning_rate": 8.469929750918058e-06, "loss": 1.4352, "step": 4530 }, { "epoch": 16.86, "learning_rate": 6.6453337867398825e-06, "loss": 1.4355, "step": 4535 }, { "epoch": 16.88, "learning_rate": 5.031933879454651e-06, "loss": 1.4338, "step": 4540 }, { "epoch": 16.9, "learning_rate": 3.6352299166325223e-06, "loss": 1.3822, "step": 4545 }, { "epoch": 16.91, "learning_rate": 2.459983095251791e-06, "loss": 1.3442, "step": 4550 }, { "epoch": 16.93, "learning_rate": 1.5101996913488535e-06, "loss": 1.356, "step": 4555 }, { "epoch": 16.95, "learning_rate": 7.891174030992353e-07, "loss": 1.3681, "step": 4560 }, { "epoch": 16.97, "learning_rate": 2.991943138937121e-07, "loss": 1.3964, "step": 4565 }, { "epoch": 16.99, "learning_rate": 4.2100513024036057e-08, "loss": 1.4004, "step": 4570 }, { "epoch": 17.0, "eval_loss": 1.3110859394073486, "eval_runtime": 18.5084, "eval_samples_per_second": 20.855, "eval_steps_per_second": 2.647, "step": 4573 }, { "epoch": 18.08, "learning_rate": 0.000134880848712477, "loss": 1.3191, "step": 4575 }, { "epoch": 18.1, "learning_rate": 0.00013365575351388775, "loss": 1.4082, "step": 4580 }, { "epoch": 18.12, "learning_rate": 0.00013217996375537754, "loss": 1.381, "step": 4585 }, { "epoch": 18.14, "learning_rate": 0.0001304591664429994, "loss": 1.3937, "step": 4590 }, { "epoch": 18.16, "learning_rate": 0.00012849999272775362, "loss": 1.3955, "step": 4595 }, { "epoch": 18.18, "learning_rate": 0.000126309992352219, "loss": 1.3851, "step": 4600 }, { "epoch": 18.2, "learning_rate": 0.00012389760455736593, "loss": 1.3328, "step": 4605 }, { "epoch": 18.22, "learning_rate": 0.00012127212556165209, "loss": 1.3809, "step": 4610 }, { "epoch": 18.24, "learning_rate": 0.00011844367273772787, "loss": 1.2981, "step": 4615 }, { "epoch": 18.26, "learning_rate": 0.00011542314562479984, "loss": 1.4094, "step": 4620 }, { "epoch": 18.28, "learning_rate": 0.00011222218392688052, "loss": 1.4044, "step": 4625 }, { "epoch": 18.3, "learning_rate": 0.0001088531226587985, "loss": 1.4849, "step": 4630 }, { "epoch": 18.32, "learning_rate": 0.00010532894461279404, "loss": 1.4488, "step": 4635 }, { "epoch": 18.34, "learning_rate": 0.00010166323032888931, "loss": 1.4335, "step": 4640 }, { "epoch": 18.36, "learning_rate": 9.78701057618181e-05, "loss": 1.3215, "step": 4645 }, { "epoch": 18.38, "learning_rate": 9.396418784617256e-05, "loss": 1.4931, "step": 4650 }, { "epoch": 18.4, "learning_rate": 8.996052816955526e-05, "loss": 1.4301, "step": 4655 }, { "epoch": 18.42, "learning_rate": 8.587455497076757e-05, "loss": 1.3555, "step": 4660 }, { "epoch": 18.44, "learning_rate": 8.172201368657088e-05, "loss": 1.3862, "step": 4665 }, { "epoch": 18.46, "learning_rate": 7.751890627611039e-05, "loss": 1.3795, "step": 4670 }, { "epoch": 18.48, "learning_rate": 7.328142955681618e-05, "loss": 1.4168, "step": 4675 }, { "epoch": 18.5, "learning_rate": 6.902591278942331e-05, "loss": 1.4594, "step": 4680 }, { "epoch": 18.52, "learning_rate": 6.47687547526032e-05, "loss": 1.4803, "step": 4685 }, { "epoch": 18.54, "learning_rate": 6.0526360549714816e-05, "loss": 1.4239, "step": 4690 }, { "epoch": 18.56, "learning_rate": 5.6315078391183605e-05, "loss": 1.3304, "step": 4695 }, { "epoch": 18.58, "learning_rate": 5.21511365961095e-05, "loss": 1.3828, "step": 4700 }, { "epoch": 18.6, "learning_rate": 4.80505810558948e-05, "loss": 1.3273, "step": 4705 }, { "epoch": 18.62, "learning_rate": 4.402921340084794e-05, "loss": 1.3661, "step": 4710 }, { "epoch": 18.64, "learning_rate": 4.0102530108070474e-05, "loss": 1.287, "step": 4715 }, { "epoch": 18.66, "learning_rate": 3.6285662785250574e-05, "loss": 1.3865, "step": 4720 }, { "epoch": 18.68, "learning_rate": 3.2593319860498044e-05, "loss": 1.428, "step": 4725 }, { "epoch": 18.7, "learning_rate": 2.9039729902920295e-05, "loss": 1.2403, "step": 4730 }, { "epoch": 18.72, "learning_rate": 2.5638586792340877e-05, "loss": 1.4223, "step": 4735 }, { "epoch": 18.74, "learning_rate": 2.2402996949474048e-05, "loss": 1.3913, "step": 4740 }, { "epoch": 18.75, "learning_rate": 1.9345428829881034e-05, "loss": 1.3764, "step": 4745 }, { "epoch": 18.77, "learning_rate": 1.647766487635479e-05, "loss": 1.4167, "step": 4750 }, { "epoch": 18.79, "learning_rate": 1.3810756114877466e-05, "loss": 1.3081, "step": 4755 }, { "epoch": 18.81, "learning_rate": 1.1354979569111334e-05, "loss": 1.3206, "step": 4760 }, { "epoch": 18.83, "learning_rate": 9.119798657542995e-06, "loss": 1.3369, "step": 4765 }, { "epoch": 18.85, "learning_rate": 7.113826725875128e-06, "loss": 1.3328, "step": 4770 }, { "epoch": 18.87, "learning_rate": 5.344793855206173e-06, "loss": 1.4008, "step": 4775 }, { "epoch": 18.89, "learning_rate": 3.819517073901737e-06, "loss": 1.41, "step": 4780 }, { "epoch": 18.91, "learning_rate": 2.5438740879408957e-06, "loss": 1.1899, "step": 4785 }, { "epoch": 18.93, "learning_rate": 1.522780630978951e-06, "loss": 1.3401, "step": 4790 }, { "epoch": 18.95, "learning_rate": 7.601715213983543e-07, "loss": 1.3232, "step": 4795 }, { "epoch": 18.97, "learning_rate": 2.5898549935329754e-07, "loss": 1.3369, "step": 4800 }, { "epoch": 18.99, "learning_rate": 2.1153902234608112e-08, "loss": 1.4171, "step": 4805 }, { "epoch": 19.0, "eval_loss": 1.2792030572891235, "eval_runtime": 11.7078, "eval_samples_per_second": 43.902, "eval_steps_per_second": 5.552, "step": 4807 } ], "max_steps": 6831, "num_train_epochs": 27, "total_flos": 5020465102848000.0, "trial_name": null, "trial_params": null }