{ "best_metric": 1.5761640071868896, "best_model_checkpoint": "hsb_baichuan/checkpoint-12000", "epoch": 2.8293545534924847, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999992378674973e-05, "loss": 1.8463, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.999969514746361e-05, "loss": 1.7893, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999931408353566e-05, "loss": 1.7552, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999878059728925e-05, "loss": 1.7823, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999809469197708e-05, "loss": 1.7552, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.9997256371781154e-05, "loss": 1.7256, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.999626564181277e-05, "loss": 1.7518, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.9995122508112445e-05, "loss": 1.6769, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.9993826977649954e-05, "loss": 1.6256, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.999237905832422e-05, "loss": 1.7261, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.999077875896329e-05, "loss": 1.678, "step": 110 }, { "epoch": 0.03, "learning_rate": 4.998902608932429e-05, "loss": 1.6767, "step": 120 }, { "epoch": 0.03, "learning_rate": 4.998712106009335e-05, "loss": 1.6371, "step": 130 }, { "epoch": 0.03, "learning_rate": 4.9985063682885534e-05, "loss": 1.7198, "step": 140 }, { "epoch": 0.04, "learning_rate": 4.9982853970244816e-05, "loss": 1.6614, "step": 150 }, { "epoch": 0.04, "learning_rate": 4.998049193564394e-05, "loss": 1.6369, "step": 160 }, { "epoch": 0.04, "learning_rate": 4.9977977593484373e-05, "loss": 1.6423, "step": 170 }, { "epoch": 0.04, "learning_rate": 4.99753109590962e-05, "loss": 1.7224, "step": 180 }, { "epoch": 0.04, "learning_rate": 4.997249204873807e-05, "loss": 1.633, "step": 190 }, { "epoch": 0.05, "learning_rate": 4.9969520879597025e-05, "loss": 1.6923, "step": 200 }, { "epoch": 0.05, "learning_rate": 4.996639746978848e-05, "loss": 1.6915, "step": 210 }, { "epoch": 0.05, "learning_rate": 4.996312183835605e-05, "loss": 1.6234, "step": 220 }, { "epoch": 0.05, "learning_rate": 4.995969400527144e-05, "loss": 1.6664, "step": 230 }, { "epoch": 0.06, "learning_rate": 4.9956113991434375e-05, "loss": 1.7018, "step": 240 }, { "epoch": 0.06, "learning_rate": 4.995238181867241e-05, "loss": 1.6782, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.994849750974081e-05, "loss": 1.6428, "step": 260 }, { "epoch": 0.06, "learning_rate": 4.994446108832246e-05, "loss": 1.6444, "step": 270 }, { "epoch": 0.07, "learning_rate": 4.994027257902766e-05, "loss": 1.7016, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.9935932007393986e-05, "loss": 1.6771, "step": 290 }, { "epoch": 0.07, "learning_rate": 4.993143939988618e-05, "loss": 1.6449, "step": 300 }, { "epoch": 0.07, "learning_rate": 4.992679478389593e-05, "loss": 1.6916, "step": 310 }, { "epoch": 0.08, "learning_rate": 4.992199818774176e-05, "loss": 1.6751, "step": 320 }, { "epoch": 0.08, "learning_rate": 4.9917049640668776e-05, "loss": 1.6137, "step": 330 }, { "epoch": 0.08, "learning_rate": 4.9911949172848585e-05, "loss": 1.67, "step": 340 }, { "epoch": 0.08, "learning_rate": 4.990669681537903e-05, "loss": 1.6393, "step": 350 }, { "epoch": 0.08, "learning_rate": 4.9901292600284065e-05, "loss": 1.5925, "step": 360 }, { "epoch": 0.09, "learning_rate": 4.989573656051351e-05, "loss": 1.6982, "step": 370 }, { "epoch": 0.09, "learning_rate": 4.9890028729942875e-05, "loss": 1.6566, "step": 380 }, { "epoch": 0.09, "learning_rate": 4.9884169143373135e-05, "loss": 1.6258, "step": 390 }, { "epoch": 0.09, "learning_rate": 4.987815783653055e-05, "loss": 1.6007, "step": 400 }, { "epoch": 0.1, "learning_rate": 4.9871994846066405e-05, "loss": 1.6167, "step": 410 }, { "epoch": 0.1, "learning_rate": 4.986568020955685e-05, "loss": 1.6593, "step": 420 }, { "epoch": 0.1, "learning_rate": 4.9859213965502574e-05, "loss": 1.6396, "step": 430 }, { "epoch": 0.1, "learning_rate": 4.985259615332868e-05, "loss": 1.6635, "step": 440 }, { "epoch": 0.11, "learning_rate": 4.984582681338435e-05, "loss": 1.6402, "step": 450 }, { "epoch": 0.11, "learning_rate": 4.983890598694265e-05, "loss": 1.6745, "step": 460 }, { "epoch": 0.11, "learning_rate": 4.9831833716200296e-05, "loss": 1.7139, "step": 470 }, { "epoch": 0.11, "learning_rate": 4.982461004427733e-05, "loss": 1.6294, "step": 480 }, { "epoch": 0.12, "learning_rate": 4.981723501521692e-05, "loss": 1.6174, "step": 490 }, { "epoch": 0.12, "learning_rate": 4.980970867398506e-05, "loss": 1.5851, "step": 500 }, { "epoch": 0.12, "learning_rate": 4.98020310664703e-05, "loss": 1.6651, "step": 510 }, { "epoch": 0.12, "learning_rate": 4.979420223948348e-05, "loss": 1.5993, "step": 520 }, { "epoch": 0.12, "learning_rate": 4.978622224075742e-05, "loss": 1.6467, "step": 530 }, { "epoch": 0.13, "learning_rate": 4.977809111894667e-05, "loss": 1.6865, "step": 540 }, { "epoch": 0.13, "learning_rate": 4.9769808923627136e-05, "loss": 1.6948, "step": 550 }, { "epoch": 0.13, "learning_rate": 4.9761375705295894e-05, "loss": 1.6155, "step": 560 }, { "epoch": 0.13, "learning_rate": 4.975279151537076e-05, "loss": 1.6964, "step": 570 }, { "epoch": 0.14, "learning_rate": 4.9744056406190066e-05, "loss": 1.6332, "step": 580 }, { "epoch": 0.14, "learning_rate": 4.973517043101229e-05, "loss": 1.6989, "step": 590 }, { "epoch": 0.14, "learning_rate": 4.9726133644015753e-05, "loss": 1.6102, "step": 600 }, { "epoch": 0.14, "learning_rate": 4.97169461002983e-05, "loss": 1.7337, "step": 610 }, { "epoch": 0.15, "learning_rate": 4.970760785587693e-05, "loss": 1.6642, "step": 620 }, { "epoch": 0.15, "learning_rate": 4.969811896768748e-05, "loss": 1.6126, "step": 630 }, { "epoch": 0.15, "learning_rate": 4.968847949358427e-05, "loss": 1.6332, "step": 640 }, { "epoch": 0.15, "learning_rate": 4.967868949233975e-05, "loss": 1.644, "step": 650 }, { "epoch": 0.16, "learning_rate": 4.9668749023644154e-05, "loss": 1.6937, "step": 660 }, { "epoch": 0.16, "learning_rate": 4.965865814810511e-05, "loss": 1.5925, "step": 670 }, { "epoch": 0.16, "learning_rate": 4.964841692724729e-05, "loss": 1.646, "step": 680 }, { "epoch": 0.16, "learning_rate": 4.963802542351203e-05, "loss": 1.646, "step": 690 }, { "epoch": 0.17, "learning_rate": 4.962748370025696e-05, "loss": 1.6175, "step": 700 }, { "epoch": 0.17, "learning_rate": 4.961679182175559e-05, "loss": 1.6747, "step": 710 }, { "epoch": 0.17, "learning_rate": 4.960594985319696e-05, "loss": 1.5985, "step": 720 }, { "epoch": 0.17, "learning_rate": 4.959495786068519e-05, "loss": 1.6171, "step": 730 }, { "epoch": 0.17, "learning_rate": 4.958381591123912e-05, "loss": 1.6319, "step": 740 }, { "epoch": 0.18, "learning_rate": 4.957252407279189e-05, "loss": 1.6629, "step": 750 }, { "epoch": 0.18, "learning_rate": 4.956108241419052e-05, "loss": 1.6572, "step": 760 }, { "epoch": 0.18, "learning_rate": 4.954949100519547e-05, "loss": 1.6324, "step": 770 }, { "epoch": 0.18, "learning_rate": 4.953774991648027e-05, "loss": 1.7111, "step": 780 }, { "epoch": 0.19, "learning_rate": 4.952585921963104e-05, "loss": 1.69, "step": 790 }, { "epoch": 0.19, "learning_rate": 4.951381898714609e-05, "loss": 1.6322, "step": 800 }, { "epoch": 0.19, "learning_rate": 4.95016292924354e-05, "loss": 1.6527, "step": 810 }, { "epoch": 0.19, "learning_rate": 4.9489290209820313e-05, "loss": 1.6626, "step": 820 }, { "epoch": 0.2, "learning_rate": 4.947680181453293e-05, "loss": 1.675, "step": 830 }, { "epoch": 0.2, "learning_rate": 4.9464164182715755e-05, "loss": 1.5939, "step": 840 }, { "epoch": 0.2, "learning_rate": 4.945137739142119e-05, "loss": 1.5833, "step": 850 }, { "epoch": 0.2, "learning_rate": 4.943844151861106e-05, "loss": 1.6186, "step": 860 }, { "epoch": 0.21, "learning_rate": 4.9425356643156165e-05, "loss": 1.6172, "step": 870 }, { "epoch": 0.21, "learning_rate": 4.941212284483578e-05, "loss": 1.5903, "step": 880 }, { "epoch": 0.21, "learning_rate": 4.939874020433716e-05, "loss": 1.6082, "step": 890 }, { "epoch": 0.21, "learning_rate": 4.938520880325507e-05, "loss": 1.6156, "step": 900 }, { "epoch": 0.21, "learning_rate": 4.9371528724091275e-05, "loss": 1.6322, "step": 910 }, { "epoch": 0.22, "learning_rate": 4.935770005025403e-05, "loss": 1.5638, "step": 920 }, { "epoch": 0.22, "learning_rate": 4.9343722866057605e-05, "loss": 1.6491, "step": 930 }, { "epoch": 0.22, "learning_rate": 4.932959725672173e-05, "loss": 1.6502, "step": 940 }, { "epoch": 0.22, "learning_rate": 4.9315323308371074e-05, "loss": 1.6091, "step": 950 }, { "epoch": 0.23, "learning_rate": 4.930090110803478e-05, "loss": 1.6447, "step": 960 }, { "epoch": 0.23, "learning_rate": 4.9286330743645845e-05, "loss": 1.5945, "step": 970 }, { "epoch": 0.23, "learning_rate": 4.9271612304040685e-05, "loss": 1.6577, "step": 980 }, { "epoch": 0.23, "learning_rate": 4.925674587895848e-05, "loss": 1.5934, "step": 990 }, { "epoch": 0.24, "learning_rate": 4.924173155904074e-05, "loss": 1.5242, "step": 1000 }, { "epoch": 0.24, "eval_loss": 1.6374789476394653, "eval_runtime": 120.8306, "eval_samples_per_second": 11.346, "eval_steps_per_second": 2.839, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.922656943583066e-05, "loss": 1.6052, "step": 1010 }, { "epoch": 0.24, "learning_rate": 4.9211259601772615e-05, "loss": 1.6244, "step": 1020 }, { "epoch": 0.24, "learning_rate": 4.919580215021159e-05, "loss": 1.5851, "step": 1030 }, { "epoch": 0.25, "learning_rate": 4.91801971753926e-05, "loss": 1.6342, "step": 1040 }, { "epoch": 0.25, "learning_rate": 4.9164444772460085e-05, "loss": 1.5859, "step": 1050 }, { "epoch": 0.25, "learning_rate": 4.9148545037457425e-05, "loss": 1.6124, "step": 1060 }, { "epoch": 0.25, "learning_rate": 4.9132498067326236e-05, "loss": 1.602, "step": 1070 }, { "epoch": 0.25, "learning_rate": 4.911630395990587e-05, "loss": 1.6564, "step": 1080 }, { "epoch": 0.26, "learning_rate": 4.9099962813932774e-05, "loss": 1.5995, "step": 1090 }, { "epoch": 0.26, "learning_rate": 4.908347472903989e-05, "loss": 1.5934, "step": 1100 }, { "epoch": 0.26, "learning_rate": 4.906683980575606e-05, "loss": 1.676, "step": 1110 }, { "epoch": 0.26, "learning_rate": 4.9050058145505405e-05, "loss": 1.6273, "step": 1120 }, { "epoch": 0.27, "learning_rate": 4.9033129850606724e-05, "loss": 1.6725, "step": 1130 }, { "epoch": 0.27, "learning_rate": 4.9016055024272844e-05, "loss": 1.5949, "step": 1140 }, { "epoch": 0.27, "learning_rate": 4.899883377061001e-05, "loss": 1.6016, "step": 1150 }, { "epoch": 0.27, "learning_rate": 4.898146619461723e-05, "loss": 1.6214, "step": 1160 }, { "epoch": 0.28, "learning_rate": 4.8963952402185666e-05, "loss": 1.6544, "step": 1170 }, { "epoch": 0.28, "learning_rate": 4.8946292500097956e-05, "loss": 1.6928, "step": 1180 }, { "epoch": 0.28, "learning_rate": 4.892848659602759e-05, "loss": 1.617, "step": 1190 }, { "epoch": 0.28, "learning_rate": 4.891053479853822e-05, "loss": 1.5759, "step": 1200 }, { "epoch": 0.29, "learning_rate": 4.8892437217083046e-05, "loss": 1.5391, "step": 1210 }, { "epoch": 0.29, "learning_rate": 4.8874193962004105e-05, "loss": 1.6787, "step": 1220 }, { "epoch": 0.29, "learning_rate": 4.885580514453162e-05, "loss": 1.623, "step": 1230 }, { "epoch": 0.29, "learning_rate": 4.883727087678331e-05, "loss": 1.5873, "step": 1240 }, { "epoch": 0.29, "learning_rate": 4.8818591271763714e-05, "loss": 1.6066, "step": 1250 }, { "epoch": 0.3, "learning_rate": 4.879976644336352e-05, "loss": 1.6801, "step": 1260 }, { "epoch": 0.3, "learning_rate": 4.8780796506358825e-05, "loss": 1.5833, "step": 1270 }, { "epoch": 0.3, "learning_rate": 4.876168157641048e-05, "loss": 1.6128, "step": 1280 }, { "epoch": 0.3, "learning_rate": 4.874242177006335e-05, "loss": 1.6176, "step": 1290 }, { "epoch": 0.31, "learning_rate": 4.872301720474564e-05, "loss": 1.575, "step": 1300 }, { "epoch": 0.31, "learning_rate": 4.8703467998768134e-05, "loss": 1.595, "step": 1310 }, { "epoch": 0.31, "learning_rate": 4.8683774271323544e-05, "loss": 1.6355, "step": 1320 }, { "epoch": 0.31, "learning_rate": 4.8663936142485685e-05, "loss": 1.5456, "step": 1330 }, { "epoch": 0.32, "learning_rate": 4.8643953733208824e-05, "loss": 1.6025, "step": 1340 }, { "epoch": 0.32, "learning_rate": 4.862382716532691e-05, "loss": 1.6777, "step": 1350 }, { "epoch": 0.32, "learning_rate": 4.8603556561552835e-05, "loss": 1.5932, "step": 1360 }, { "epoch": 0.32, "learning_rate": 4.8583142045477694e-05, "loss": 1.6043, "step": 1370 }, { "epoch": 0.33, "learning_rate": 4.856258374157e-05, "loss": 1.594, "step": 1380 }, { "epoch": 0.33, "learning_rate": 4.854188177517499e-05, "loss": 1.605, "step": 1390 }, { "epoch": 0.33, "learning_rate": 4.852103627251377e-05, "loss": 1.6241, "step": 1400 }, { "epoch": 0.33, "learning_rate": 4.8500047360682636e-05, "loss": 1.6771, "step": 1410 }, { "epoch": 0.33, "learning_rate": 4.8478915167652244e-05, "loss": 1.5895, "step": 1420 }, { "epoch": 0.34, "learning_rate": 4.8457639822266844e-05, "loss": 1.6305, "step": 1430 }, { "epoch": 0.34, "learning_rate": 4.843622145424348e-05, "loss": 1.6484, "step": 1440 }, { "epoch": 0.34, "learning_rate": 4.8414660194171244e-05, "loss": 1.5931, "step": 1450 }, { "epoch": 0.34, "learning_rate": 4.839295617351042e-05, "loss": 1.571, "step": 1460 }, { "epoch": 0.35, "learning_rate": 4.837110952459173e-05, "loss": 1.6011, "step": 1470 }, { "epoch": 0.35, "learning_rate": 4.834912038061551e-05, "loss": 1.588, "step": 1480 }, { "epoch": 0.35, "learning_rate": 4.832698887565088e-05, "loss": 1.6041, "step": 1490 }, { "epoch": 0.35, "learning_rate": 4.830471514463496e-05, "loss": 1.5719, "step": 1500 }, { "epoch": 0.36, "learning_rate": 4.8282299323372027e-05, "loss": 1.5701, "step": 1510 }, { "epoch": 0.36, "learning_rate": 4.8259741548532675e-05, "loss": 1.5389, "step": 1520 }, { "epoch": 0.36, "learning_rate": 4.823704195765303e-05, "loss": 1.5716, "step": 1530 }, { "epoch": 0.36, "learning_rate": 4.8214200689133846e-05, "loss": 1.6128, "step": 1540 }, { "epoch": 0.37, "learning_rate": 4.819121788223972e-05, "loss": 1.585, "step": 1550 }, { "epoch": 0.37, "learning_rate": 4.81680936770982e-05, "loss": 1.6296, "step": 1560 }, { "epoch": 0.37, "learning_rate": 4.814482821469895e-05, "loss": 1.6498, "step": 1570 }, { "epoch": 0.37, "learning_rate": 4.8121421636892896e-05, "loss": 1.636, "step": 1580 }, { "epoch": 0.37, "learning_rate": 4.809787408639133e-05, "loss": 1.5585, "step": 1590 }, { "epoch": 0.38, "learning_rate": 4.8074185706765105e-05, "loss": 1.56, "step": 1600 }, { "epoch": 0.38, "learning_rate": 4.805035664244368e-05, "loss": 1.5903, "step": 1610 }, { "epoch": 0.38, "learning_rate": 4.8026387038714294e-05, "loss": 1.6253, "step": 1620 }, { "epoch": 0.38, "learning_rate": 4.800227704172106e-05, "loss": 1.6222, "step": 1630 }, { "epoch": 0.39, "learning_rate": 4.797802679846408e-05, "loss": 1.5612, "step": 1640 }, { "epoch": 0.39, "learning_rate": 4.795363645679853e-05, "loss": 1.6505, "step": 1650 }, { "epoch": 0.39, "learning_rate": 4.79291061654338e-05, "loss": 1.574, "step": 1660 }, { "epoch": 0.39, "learning_rate": 4.7904436073932546e-05, "loss": 1.6852, "step": 1670 }, { "epoch": 0.4, "learning_rate": 4.787962633270979e-05, "loss": 1.6222, "step": 1680 }, { "epoch": 0.4, "learning_rate": 4.785467709303203e-05, "loss": 1.6398, "step": 1690 }, { "epoch": 0.4, "learning_rate": 4.782958850701626e-05, "loss": 1.6125, "step": 1700 }, { "epoch": 0.4, "learning_rate": 4.7804360727629094e-05, "loss": 1.5746, "step": 1710 }, { "epoch": 0.41, "learning_rate": 4.777899390868583e-05, "loss": 1.6119, "step": 1720 }, { "epoch": 0.41, "learning_rate": 4.7753488204849474e-05, "loss": 1.6468, "step": 1730 }, { "epoch": 0.41, "learning_rate": 4.772784377162984e-05, "loss": 1.6201, "step": 1740 }, { "epoch": 0.41, "learning_rate": 4.7702060765382585e-05, "loss": 1.6671, "step": 1750 }, { "epoch": 0.41, "learning_rate": 4.7676139343308236e-05, "loss": 1.6204, "step": 1760 }, { "epoch": 0.42, "learning_rate": 4.765007966345125e-05, "loss": 1.6959, "step": 1770 }, { "epoch": 0.42, "learning_rate": 4.762388188469907e-05, "loss": 1.6466, "step": 1780 }, { "epoch": 0.42, "learning_rate": 4.7597546166781125e-05, "loss": 1.5926, "step": 1790 }, { "epoch": 0.42, "learning_rate": 4.757107267026787e-05, "loss": 1.6089, "step": 1800 }, { "epoch": 0.43, "learning_rate": 4.75444615565698e-05, "loss": 1.5725, "step": 1810 }, { "epoch": 0.43, "learning_rate": 4.751771298793647e-05, "loss": 1.6761, "step": 1820 }, { "epoch": 0.43, "learning_rate": 4.7490827127455504e-05, "loss": 1.5831, "step": 1830 }, { "epoch": 0.43, "learning_rate": 4.746380413905162e-05, "loss": 1.5859, "step": 1840 }, { "epoch": 0.44, "learning_rate": 4.743664418748559e-05, "loss": 1.5971, "step": 1850 }, { "epoch": 0.44, "learning_rate": 4.740934743835328e-05, "loss": 1.5726, "step": 1860 }, { "epoch": 0.44, "learning_rate": 4.7381914058084586e-05, "loss": 1.5331, "step": 1870 }, { "epoch": 0.44, "learning_rate": 4.7354344213942506e-05, "loss": 1.5822, "step": 1880 }, { "epoch": 0.45, "learning_rate": 4.7326638074022e-05, "loss": 1.5794, "step": 1890 }, { "epoch": 0.45, "learning_rate": 4.7298795807249085e-05, "loss": 1.5759, "step": 1900 }, { "epoch": 0.45, "learning_rate": 4.727081758337974e-05, "loss": 1.6224, "step": 1910 }, { "epoch": 0.45, "learning_rate": 4.724270357299886e-05, "loss": 1.5642, "step": 1920 }, { "epoch": 0.46, "learning_rate": 4.7214453947519256e-05, "loss": 1.556, "step": 1930 }, { "epoch": 0.46, "learning_rate": 4.71860688791806e-05, "loss": 1.585, "step": 1940 }, { "epoch": 0.46, "learning_rate": 4.715754854104835e-05, "loss": 1.6414, "step": 1950 }, { "epoch": 0.46, "learning_rate": 4.7128893107012716e-05, "loss": 1.5784, "step": 1960 }, { "epoch": 0.46, "learning_rate": 4.71001027517876e-05, "loss": 1.6047, "step": 1970 }, { "epoch": 0.47, "learning_rate": 4.707117765090954e-05, "loss": 1.648, "step": 1980 }, { "epoch": 0.47, "learning_rate": 4.704211798073659e-05, "loss": 1.6223, "step": 1990 }, { "epoch": 0.47, "learning_rate": 4.7012923918447326e-05, "loss": 1.6081, "step": 2000 }, { "epoch": 0.47, "eval_loss": 1.6162242889404297, "eval_runtime": 120.2644, "eval_samples_per_second": 11.4, "eval_steps_per_second": 2.852, "step": 2000 }, { "epoch": 0.47, "learning_rate": 4.698359564203968e-05, "loss": 1.6442, "step": 2010 }, { "epoch": 0.48, "learning_rate": 4.695413333032992e-05, "loss": 1.5722, "step": 2020 }, { "epoch": 0.48, "learning_rate": 4.692453716295153e-05, "loss": 1.6816, "step": 2030 }, { "epoch": 0.48, "learning_rate": 4.6894807320354125e-05, "loss": 1.5378, "step": 2040 }, { "epoch": 0.48, "learning_rate": 4.6864943983802324e-05, "loss": 1.6699, "step": 2050 }, { "epoch": 0.49, "learning_rate": 4.6834947335374696e-05, "loss": 1.5855, "step": 2060 }, { "epoch": 0.49, "learning_rate": 4.68048175579626e-05, "loss": 1.6015, "step": 2070 }, { "epoch": 0.49, "learning_rate": 4.67745548352691e-05, "loss": 1.5387, "step": 2080 }, { "epoch": 0.49, "learning_rate": 4.6744159351807837e-05, "loss": 1.6405, "step": 2090 }, { "epoch": 0.5, "learning_rate": 4.671363129290188e-05, "loss": 1.6547, "step": 2100 }, { "epoch": 0.5, "learning_rate": 4.668297084468266e-05, "loss": 1.6246, "step": 2110 }, { "epoch": 0.5, "learning_rate": 4.665217819408876e-05, "loss": 1.5588, "step": 2120 }, { "epoch": 0.5, "learning_rate": 4.662125352886482e-05, "loss": 1.6331, "step": 2130 }, { "epoch": 0.5, "learning_rate": 4.6590197037560367e-05, "loss": 1.6697, "step": 2140 }, { "epoch": 0.51, "learning_rate": 4.655900890952872e-05, "loss": 1.6041, "step": 2150 }, { "epoch": 0.51, "learning_rate": 4.652768933492574e-05, "loss": 1.6565, "step": 2160 }, { "epoch": 0.51, "learning_rate": 4.6496238504708764e-05, "loss": 1.6182, "step": 2170 }, { "epoch": 0.51, "learning_rate": 4.6464656610635405e-05, "loss": 1.6574, "step": 2180 }, { "epoch": 0.52, "learning_rate": 4.643294384526234e-05, "loss": 1.5828, "step": 2190 }, { "epoch": 0.52, "learning_rate": 4.640110040194423e-05, "loss": 1.5964, "step": 2200 }, { "epoch": 0.52, "learning_rate": 4.6369126474832434e-05, "loss": 1.6485, "step": 2210 }, { "epoch": 0.52, "learning_rate": 4.633702225887393e-05, "loss": 1.647, "step": 2220 }, { "epoch": 0.53, "learning_rate": 4.6304787949810037e-05, "loss": 1.6325, "step": 2230 }, { "epoch": 0.53, "learning_rate": 4.627242374417527e-05, "loss": 1.5784, "step": 2240 }, { "epoch": 0.53, "learning_rate": 4.6239929839296125e-05, "loss": 1.6343, "step": 2250 }, { "epoch": 0.53, "learning_rate": 4.6207306433289916e-05, "loss": 1.6395, "step": 2260 }, { "epoch": 0.54, "learning_rate": 4.6174553725063484e-05, "loss": 1.6122, "step": 2270 }, { "epoch": 0.54, "learning_rate": 4.6141671914312076e-05, "loss": 1.5881, "step": 2280 }, { "epoch": 0.54, "learning_rate": 4.610866120151805e-05, "loss": 1.6092, "step": 2290 }, { "epoch": 0.54, "learning_rate": 4.60755217879497e-05, "loss": 1.6278, "step": 2300 }, { "epoch": 0.54, "learning_rate": 4.604225387566005e-05, "loss": 1.5755, "step": 2310 }, { "epoch": 0.55, "learning_rate": 4.600885766748552e-05, "loss": 1.6634, "step": 2320 }, { "epoch": 0.55, "learning_rate": 4.597533336704482e-05, "loss": 1.5873, "step": 2330 }, { "epoch": 0.55, "learning_rate": 4.594168117873761e-05, "loss": 1.616, "step": 2340 }, { "epoch": 0.55, "learning_rate": 4.59079013077433e-05, "loss": 1.6475, "step": 2350 }, { "epoch": 0.56, "learning_rate": 4.58739939600198e-05, "loss": 1.6932, "step": 2360 }, { "epoch": 0.56, "learning_rate": 4.583995934230225e-05, "loss": 1.5927, "step": 2370 }, { "epoch": 0.56, "learning_rate": 4.580579766210175e-05, "loss": 1.5839, "step": 2380 }, { "epoch": 0.56, "learning_rate": 4.57715091277041e-05, "loss": 1.704, "step": 2390 }, { "epoch": 0.57, "learning_rate": 4.5737093948168566e-05, "loss": 1.6202, "step": 2400 }, { "epoch": 0.57, "learning_rate": 4.5702552333326574e-05, "loss": 1.5782, "step": 2410 }, { "epoch": 0.57, "learning_rate": 4.56678844937804e-05, "loss": 1.6271, "step": 2420 }, { "epoch": 0.57, "learning_rate": 4.5633090640901965e-05, "loss": 1.6253, "step": 2430 }, { "epoch": 0.58, "learning_rate": 4.559817098683146e-05, "loss": 1.6356, "step": 2440 }, { "epoch": 0.58, "learning_rate": 4.556312574447612e-05, "loss": 1.5635, "step": 2450 }, { "epoch": 0.58, "learning_rate": 4.552795512750889e-05, "loss": 1.6473, "step": 2460 }, { "epoch": 0.58, "learning_rate": 4.549265935036714e-05, "loss": 1.6278, "step": 2470 }, { "epoch": 0.58, "learning_rate": 4.545723862825133e-05, "loss": 1.6253, "step": 2480 }, { "epoch": 0.59, "learning_rate": 4.5421693177123724e-05, "loss": 1.6483, "step": 2490 }, { "epoch": 0.59, "learning_rate": 4.5386023213707095e-05, "loss": 1.6938, "step": 2500 }, { "epoch": 0.59, "learning_rate": 4.5350228955483334e-05, "loss": 1.6065, "step": 2510 }, { "epoch": 0.59, "learning_rate": 4.531431062069217e-05, "loss": 1.6266, "step": 2520 }, { "epoch": 0.6, "learning_rate": 4.527826842832987e-05, "loss": 1.5903, "step": 2530 }, { "epoch": 0.6, "learning_rate": 4.524210259814784e-05, "loss": 1.6314, "step": 2540 }, { "epoch": 0.6, "learning_rate": 4.520581335065131e-05, "loss": 1.5874, "step": 2550 }, { "epoch": 0.6, "learning_rate": 4.516940090709799e-05, "loss": 1.6006, "step": 2560 }, { "epoch": 0.61, "learning_rate": 4.5132865489496756e-05, "loss": 1.5898, "step": 2570 }, { "epoch": 0.61, "learning_rate": 4.509620732060623e-05, "loss": 1.6183, "step": 2580 }, { "epoch": 0.61, "learning_rate": 4.505942662393346e-05, "loss": 1.5927, "step": 2590 }, { "epoch": 0.61, "learning_rate": 4.5022523623732586e-05, "loss": 1.62, "step": 2600 }, { "epoch": 0.62, "learning_rate": 4.498549854500339e-05, "loss": 1.6127, "step": 2610 }, { "epoch": 0.62, "learning_rate": 4.4948351613490017e-05, "loss": 1.6201, "step": 2620 }, { "epoch": 0.62, "learning_rate": 4.4911083055679526e-05, "loss": 1.5915, "step": 2630 }, { "epoch": 0.62, "learning_rate": 4.4873693098800564e-05, "loss": 1.5617, "step": 2640 }, { "epoch": 0.62, "learning_rate": 4.4836181970821924e-05, "loss": 1.6041, "step": 2650 }, { "epoch": 0.63, "learning_rate": 4.479854990045121e-05, "loss": 1.5829, "step": 2660 }, { "epoch": 0.63, "learning_rate": 4.476079711713343e-05, "loss": 1.5854, "step": 2670 }, { "epoch": 0.63, "learning_rate": 4.4722923851049545e-05, "loss": 1.6142, "step": 2680 }, { "epoch": 0.63, "learning_rate": 4.468493033311515e-05, "loss": 1.603, "step": 2690 }, { "epoch": 0.64, "learning_rate": 4.464681679497901e-05, "loss": 1.5794, "step": 2700 }, { "epoch": 0.64, "learning_rate": 4.460858346902162e-05, "loss": 1.5865, "step": 2710 }, { "epoch": 0.64, "learning_rate": 4.4570230588353914e-05, "loss": 1.616, "step": 2720 }, { "epoch": 0.64, "learning_rate": 4.4531758386815665e-05, "loss": 1.63, "step": 2730 }, { "epoch": 0.65, "learning_rate": 4.449316709897421e-05, "loss": 1.576, "step": 2740 }, { "epoch": 0.65, "learning_rate": 4.445445696012295e-05, "loss": 1.6069, "step": 2750 }, { "epoch": 0.65, "learning_rate": 4.441562820627991e-05, "loss": 1.6056, "step": 2760 }, { "epoch": 0.65, "learning_rate": 4.4376681074186364e-05, "loss": 1.5949, "step": 2770 }, { "epoch": 0.66, "learning_rate": 4.4337615801305286e-05, "loss": 1.6223, "step": 2780 }, { "epoch": 0.66, "learning_rate": 4.429843262582e-05, "loss": 1.6109, "step": 2790 }, { "epoch": 0.66, "learning_rate": 4.425913178663268e-05, "loss": 1.5932, "step": 2800 }, { "epoch": 0.66, "learning_rate": 4.421971352336289e-05, "loss": 1.6713, "step": 2810 }, { "epoch": 0.66, "learning_rate": 4.418017807634616e-05, "loss": 1.64, "step": 2820 }, { "epoch": 0.67, "learning_rate": 4.414052568663248e-05, "loss": 1.662, "step": 2830 }, { "epoch": 0.67, "learning_rate": 4.4100756595984846e-05, "loss": 1.5613, "step": 2840 }, { "epoch": 0.67, "learning_rate": 4.40608710468778e-05, "loss": 1.6049, "step": 2850 }, { "epoch": 0.67, "learning_rate": 4.4020869282495916e-05, "loss": 1.5997, "step": 2860 }, { "epoch": 0.68, "learning_rate": 4.398075154673237e-05, "loss": 1.6365, "step": 2870 }, { "epoch": 0.68, "learning_rate": 4.3940518084187384e-05, "loss": 1.6177, "step": 2880 }, { "epoch": 0.68, "learning_rate": 4.39001691401668e-05, "loss": 1.6507, "step": 2890 }, { "epoch": 0.68, "learning_rate": 4.385970496068057e-05, "loss": 1.5873, "step": 2900 }, { "epoch": 0.69, "learning_rate": 4.38191257924412e-05, "loss": 1.5996, "step": 2910 }, { "epoch": 0.69, "learning_rate": 4.377843188286233e-05, "loss": 1.665, "step": 2920 }, { "epoch": 0.69, "learning_rate": 4.3737623480057165e-05, "loss": 1.6183, "step": 2930 }, { "epoch": 0.69, "learning_rate": 4.369670083283698e-05, "loss": 1.6413, "step": 2940 }, { "epoch": 0.7, "learning_rate": 4.365566419070962e-05, "loss": 1.5564, "step": 2950 }, { "epoch": 0.7, "learning_rate": 4.3614513803877956e-05, "loss": 1.6456, "step": 2960 }, { "epoch": 0.7, "learning_rate": 4.357324992323836e-05, "loss": 1.589, "step": 2970 }, { "epoch": 0.7, "learning_rate": 4.353187280037918e-05, "loss": 1.5581, "step": 2980 }, { "epoch": 0.7, "learning_rate": 4.349038268757924e-05, "loss": 1.6063, "step": 2990 }, { "epoch": 0.71, "learning_rate": 4.344877983780624e-05, "loss": 1.611, "step": 3000 }, { "epoch": 0.71, "eval_loss": 1.6054201126098633, "eval_runtime": 120.533, "eval_samples_per_second": 11.374, "eval_steps_per_second": 2.846, "step": 3000 }, { "epoch": 0.71, "learning_rate": 4.340706450471524e-05, "loss": 1.6163, "step": 3010 }, { "epoch": 0.71, "learning_rate": 4.3365236942647146e-05, "loss": 1.6384, "step": 3020 }, { "epoch": 0.71, "learning_rate": 4.33232974066271e-05, "loss": 1.5703, "step": 3030 }, { "epoch": 0.72, "learning_rate": 4.3281246152362986e-05, "loss": 1.5774, "step": 3040 }, { "epoch": 0.72, "learning_rate": 4.323908343624381e-05, "loss": 1.6061, "step": 3050 }, { "epoch": 0.72, "learning_rate": 4.319680951533819e-05, "loss": 1.5721, "step": 3060 }, { "epoch": 0.72, "learning_rate": 4.315442464739276e-05, "loss": 1.5836, "step": 3070 }, { "epoch": 0.73, "learning_rate": 4.3111929090830605e-05, "loss": 1.6175, "step": 3080 }, { "epoch": 0.73, "learning_rate": 4.306932310474968e-05, "loss": 1.595, "step": 3090 }, { "epoch": 0.73, "learning_rate": 4.302660694892124e-05, "loss": 1.5982, "step": 3100 }, { "epoch": 0.73, "learning_rate": 4.2983780883788247e-05, "loss": 1.5907, "step": 3110 }, { "epoch": 0.74, "learning_rate": 4.2940845170463806e-05, "loss": 1.6695, "step": 3120 }, { "epoch": 0.74, "learning_rate": 4.289780007072952e-05, "loss": 1.5913, "step": 3130 }, { "epoch": 0.74, "learning_rate": 4.285464584703396e-05, "loss": 1.553, "step": 3140 }, { "epoch": 0.74, "learning_rate": 4.2811382762491e-05, "loss": 1.618, "step": 3150 }, { "epoch": 0.75, "learning_rate": 4.276801108087829e-05, "loss": 1.4985, "step": 3160 }, { "epoch": 0.75, "learning_rate": 4.272453106663555e-05, "loss": 1.6046, "step": 3170 }, { "epoch": 0.75, "learning_rate": 4.268094298486305e-05, "loss": 1.6321, "step": 3180 }, { "epoch": 0.75, "learning_rate": 4.263724710131994e-05, "loss": 1.5503, "step": 3190 }, { "epoch": 0.75, "learning_rate": 4.259344368242264e-05, "loss": 1.573, "step": 3200 }, { "epoch": 0.76, "learning_rate": 4.254953299524323e-05, "loss": 1.528, "step": 3210 }, { "epoch": 0.76, "learning_rate": 4.250551530750779e-05, "loss": 1.5518, "step": 3220 }, { "epoch": 0.76, "learning_rate": 4.246139088759483e-05, "loss": 1.6624, "step": 3230 }, { "epoch": 0.76, "learning_rate": 4.241716000453357e-05, "loss": 1.5525, "step": 3240 }, { "epoch": 0.77, "learning_rate": 4.237282292800237e-05, "loss": 1.559, "step": 3250 }, { "epoch": 0.77, "learning_rate": 4.2328379928327025e-05, "loss": 1.5948, "step": 3260 }, { "epoch": 0.77, "learning_rate": 4.2283831276479185e-05, "loss": 1.6198, "step": 3270 }, { "epoch": 0.77, "learning_rate": 4.2239177244074655e-05, "loss": 1.5895, "step": 3280 }, { "epoch": 0.78, "learning_rate": 4.219441810337176e-05, "loss": 1.6092, "step": 3290 }, { "epoch": 0.78, "learning_rate": 4.214955412726965e-05, "loss": 1.5791, "step": 3300 }, { "epoch": 0.78, "learning_rate": 4.210458558930668e-05, "loss": 1.6058, "step": 3310 }, { "epoch": 0.78, "learning_rate": 4.205951276365875e-05, "loss": 1.5717, "step": 3320 }, { "epoch": 0.79, "learning_rate": 4.201433592513755e-05, "loss": 1.5903, "step": 3330 }, { "epoch": 0.79, "learning_rate": 4.1969055349189e-05, "loss": 1.6179, "step": 3340 }, { "epoch": 0.79, "learning_rate": 4.192367131189148e-05, "loss": 1.6235, "step": 3350 }, { "epoch": 0.79, "learning_rate": 4.1878184089954185e-05, "loss": 1.5712, "step": 3360 }, { "epoch": 0.79, "learning_rate": 4.183259396071545e-05, "loss": 1.6418, "step": 3370 }, { "epoch": 0.8, "learning_rate": 4.178690120214102e-05, "loss": 1.5748, "step": 3380 }, { "epoch": 0.8, "learning_rate": 4.1741106092822386e-05, "loss": 1.5349, "step": 3390 }, { "epoch": 0.8, "learning_rate": 4.169520891197508e-05, "loss": 1.6124, "step": 3400 }, { "epoch": 0.8, "learning_rate": 4.164920993943697e-05, "loss": 1.5648, "step": 3410 }, { "epoch": 0.81, "learning_rate": 4.1603109455666564e-05, "loss": 1.6162, "step": 3420 }, { "epoch": 0.81, "learning_rate": 4.1556907741741244e-05, "loss": 1.5948, "step": 3430 }, { "epoch": 0.81, "learning_rate": 4.151060507935568e-05, "loss": 1.6071, "step": 3440 }, { "epoch": 0.81, "learning_rate": 4.146420175081995e-05, "loss": 1.612, "step": 3450 }, { "epoch": 0.82, "learning_rate": 4.141769803905793e-05, "loss": 1.6257, "step": 3460 }, { "epoch": 0.82, "learning_rate": 4.1371094227605564e-05, "loss": 1.5877, "step": 3470 }, { "epoch": 0.82, "learning_rate": 4.132439060060908e-05, "loss": 1.5676, "step": 3480 }, { "epoch": 0.82, "learning_rate": 4.127758744282329e-05, "loss": 1.5605, "step": 3490 }, { "epoch": 0.83, "learning_rate": 4.123068503960986e-05, "loss": 1.6394, "step": 3500 }, { "epoch": 0.83, "learning_rate": 4.1183683676935555e-05, "loss": 1.6232, "step": 3510 }, { "epoch": 0.83, "learning_rate": 4.113658364137051e-05, "loss": 1.5559, "step": 3520 }, { "epoch": 0.83, "learning_rate": 4.108938522008646e-05, "loss": 1.5552, "step": 3530 }, { "epoch": 0.83, "learning_rate": 4.104208870085502e-05, "loss": 1.5411, "step": 3540 }, { "epoch": 0.84, "learning_rate": 4.0994694372045906e-05, "loss": 1.6087, "step": 3550 }, { "epoch": 0.84, "learning_rate": 4.0947202522625175e-05, "loss": 1.5833, "step": 3560 }, { "epoch": 0.84, "learning_rate": 4.08996134421535e-05, "loss": 1.5919, "step": 3570 }, { "epoch": 0.84, "learning_rate": 4.0851927420784353e-05, "loss": 1.5449, "step": 3580 }, { "epoch": 0.85, "learning_rate": 4.080414474926226e-05, "loss": 1.6524, "step": 3590 }, { "epoch": 0.85, "learning_rate": 4.075626571892105e-05, "loss": 1.5543, "step": 3600 }, { "epoch": 0.85, "learning_rate": 4.0708290621682045e-05, "loss": 1.565, "step": 3610 }, { "epoch": 0.85, "learning_rate": 4.066021975005228e-05, "loss": 1.6035, "step": 3620 }, { "epoch": 0.86, "learning_rate": 4.061205339712275e-05, "loss": 1.537, "step": 3630 }, { "epoch": 0.86, "learning_rate": 4.0563791856566616e-05, "loss": 1.5907, "step": 3640 }, { "epoch": 0.86, "learning_rate": 4.051543542263736e-05, "loss": 1.558, "step": 3650 }, { "epoch": 0.86, "learning_rate": 4.046698439016708e-05, "loss": 1.6066, "step": 3660 }, { "epoch": 0.87, "learning_rate": 4.0418439054564615e-05, "loss": 1.6129, "step": 3670 }, { "epoch": 0.87, "learning_rate": 4.036979971181382e-05, "loss": 1.6739, "step": 3680 }, { "epoch": 0.87, "learning_rate": 4.0321066658471646e-05, "loss": 1.5924, "step": 3690 }, { "epoch": 0.87, "learning_rate": 4.027224019166648e-05, "loss": 1.5308, "step": 3700 }, { "epoch": 0.87, "learning_rate": 4.0223320609096195e-05, "loss": 1.6009, "step": 3710 }, { "epoch": 0.88, "learning_rate": 4.0174308209026435e-05, "loss": 1.6631, "step": 3720 }, { "epoch": 0.88, "learning_rate": 4.012520329028874e-05, "loss": 1.5813, "step": 3730 }, { "epoch": 0.88, "learning_rate": 4.007600615227876e-05, "loss": 1.619, "step": 3740 }, { "epoch": 0.88, "learning_rate": 4.002671709495438e-05, "loss": 1.5654, "step": 3750 }, { "epoch": 0.89, "learning_rate": 3.997733641883395e-05, "loss": 1.5944, "step": 3760 }, { "epoch": 0.89, "learning_rate": 3.992786442499442e-05, "loss": 1.654, "step": 3770 }, { "epoch": 0.89, "learning_rate": 3.98783014150695e-05, "loss": 1.5784, "step": 3780 }, { "epoch": 0.89, "learning_rate": 3.9828647691247836e-05, "loss": 1.5812, "step": 3790 }, { "epoch": 0.9, "learning_rate": 3.977890355627116e-05, "loss": 1.5983, "step": 3800 }, { "epoch": 0.9, "learning_rate": 3.9729069313432454e-05, "loss": 1.633, "step": 3810 }, { "epoch": 0.9, "learning_rate": 3.967914526657408e-05, "loss": 1.639, "step": 3820 }, { "epoch": 0.9, "learning_rate": 3.9629131720085966e-05, "loss": 1.5822, "step": 3830 }, { "epoch": 0.91, "learning_rate": 3.957902897890369e-05, "loss": 1.6079, "step": 3840 }, { "epoch": 0.91, "learning_rate": 3.952883734850667e-05, "loss": 1.6206, "step": 3850 }, { "epoch": 0.91, "learning_rate": 3.947855713491631e-05, "loss": 1.5733, "step": 3860 }, { "epoch": 0.91, "learning_rate": 3.942818864469407e-05, "loss": 1.6351, "step": 3870 }, { "epoch": 0.91, "learning_rate": 3.9377732184939664e-05, "loss": 1.6283, "step": 3880 }, { "epoch": 0.92, "learning_rate": 3.9327188063289156e-05, "loss": 1.6428, "step": 3890 }, { "epoch": 0.92, "learning_rate": 3.9276556587913096e-05, "loss": 1.6037, "step": 3900 }, { "epoch": 0.92, "learning_rate": 3.922583806751461e-05, "loss": 1.5311, "step": 3910 }, { "epoch": 0.92, "learning_rate": 3.917503281132758e-05, "loss": 1.5731, "step": 3920 }, { "epoch": 0.93, "learning_rate": 3.9124141129114695e-05, "loss": 1.6292, "step": 3930 }, { "epoch": 0.93, "learning_rate": 3.90731633311656e-05, "loss": 1.5781, "step": 3940 }, { "epoch": 0.93, "learning_rate": 3.902209972829498e-05, "loss": 1.5801, "step": 3950 }, { "epoch": 0.93, "learning_rate": 3.897095063184069e-05, "loss": 1.642, "step": 3960 }, { "epoch": 0.94, "learning_rate": 3.8919716353661846e-05, "loss": 1.5843, "step": 3970 }, { "epoch": 0.94, "learning_rate": 3.886839720613691e-05, "loss": 1.6273, "step": 3980 }, { "epoch": 0.94, "learning_rate": 3.8816993502161815e-05, "loss": 1.675, "step": 3990 }, { "epoch": 0.94, "learning_rate": 3.876550555514802e-05, "loss": 1.6156, "step": 4000 }, { "epoch": 0.94, "eval_loss": 1.5981309413909912, "eval_runtime": 120.2627, "eval_samples_per_second": 11.4, "eval_steps_per_second": 2.852, "step": 4000 }, { "epoch": 0.95, "learning_rate": 3.8713933679020634e-05, "loss": 1.6232, "step": 4010 }, { "epoch": 0.95, "learning_rate": 3.8662278188216485e-05, "loss": 1.5513, "step": 4020 }, { "epoch": 0.95, "learning_rate": 3.861053939768218e-05, "loss": 1.5522, "step": 4030 }, { "epoch": 0.95, "learning_rate": 3.855871762287225e-05, "loss": 1.6226, "step": 4040 }, { "epoch": 0.95, "learning_rate": 3.8506813179747165e-05, "loss": 1.6186, "step": 4050 }, { "epoch": 0.96, "learning_rate": 3.8454826384771426e-05, "loss": 1.5325, "step": 4060 }, { "epoch": 0.96, "learning_rate": 3.840275755491164e-05, "loss": 1.5972, "step": 4070 }, { "epoch": 0.96, "learning_rate": 3.83506070076346e-05, "loss": 1.603, "step": 4080 }, { "epoch": 0.96, "learning_rate": 3.82983750609053e-05, "loss": 1.517, "step": 4090 }, { "epoch": 0.97, "learning_rate": 3.824606203318507e-05, "loss": 1.6564, "step": 4100 }, { "epoch": 0.97, "learning_rate": 3.819366824342959e-05, "loss": 1.656, "step": 4110 }, { "epoch": 0.97, "learning_rate": 3.814119401108692e-05, "loss": 1.6301, "step": 4120 }, { "epoch": 0.97, "learning_rate": 3.8088639656095614e-05, "loss": 1.5871, "step": 4130 }, { "epoch": 0.98, "learning_rate": 3.803600549888273e-05, "loss": 1.6384, "step": 4140 }, { "epoch": 0.98, "learning_rate": 3.7983291860361866e-05, "loss": 1.559, "step": 4150 }, { "epoch": 0.98, "learning_rate": 3.793049906193127e-05, "loss": 1.5624, "step": 4160 }, { "epoch": 0.98, "learning_rate": 3.78776274254718e-05, "loss": 1.5147, "step": 4170 }, { "epoch": 0.99, "learning_rate": 3.782467727334496e-05, "loss": 1.6521, "step": 4180 }, { "epoch": 0.99, "learning_rate": 3.7771648928391045e-05, "loss": 1.6049, "step": 4190 }, { "epoch": 0.99, "learning_rate": 3.771854271392703e-05, "loss": 1.6414, "step": 4200 }, { "epoch": 0.99, "learning_rate": 3.766535895374472e-05, "loss": 1.5619, "step": 4210 }, { "epoch": 0.99, "learning_rate": 3.761209797210866e-05, "loss": 1.5992, "step": 4220 }, { "epoch": 1.0, "learning_rate": 3.755876009375428e-05, "loss": 1.5478, "step": 4230 }, { "epoch": 1.0, "learning_rate": 3.750534564388582e-05, "loss": 1.6274, "step": 4240 }, { "epoch": 1.0, "learning_rate": 3.745185494817438e-05, "loss": 1.5732, "step": 4250 }, { "epoch": 1.0, "learning_rate": 3.7398288332755936e-05, "loss": 1.5372, "step": 4260 }, { "epoch": 1.01, "learning_rate": 3.7344646124229376e-05, "loss": 1.5875, "step": 4270 }, { "epoch": 1.01, "learning_rate": 3.7290928649654446e-05, "loss": 1.642, "step": 4280 }, { "epoch": 1.01, "learning_rate": 3.723713623654983e-05, "loss": 1.5793, "step": 4290 }, { "epoch": 1.01, "learning_rate": 3.718326921289108e-05, "loss": 1.644, "step": 4300 }, { "epoch": 1.02, "learning_rate": 3.712932790710869e-05, "loss": 1.5677, "step": 4310 }, { "epoch": 1.02, "learning_rate": 3.7075312648086036e-05, "loss": 1.5733, "step": 4320 }, { "epoch": 1.02, "learning_rate": 3.702122376515739e-05, "loss": 1.5445, "step": 4330 }, { "epoch": 1.02, "learning_rate": 3.696706158810591e-05, "loss": 1.6533, "step": 4340 }, { "epoch": 1.03, "learning_rate": 3.691282644716165e-05, "loss": 1.5779, "step": 4350 }, { "epoch": 1.03, "learning_rate": 3.685851867299953e-05, "loss": 1.5356, "step": 4360 }, { "epoch": 1.03, "learning_rate": 3.680413859673728e-05, "loss": 1.6036, "step": 4370 }, { "epoch": 1.03, "learning_rate": 3.674968654993352e-05, "loss": 1.5826, "step": 4380 }, { "epoch": 1.04, "learning_rate": 3.669516286458562e-05, "loss": 1.5973, "step": 4390 }, { "epoch": 1.04, "learning_rate": 3.664056787312779e-05, "loss": 1.5661, "step": 4400 }, { "epoch": 1.04, "learning_rate": 3.6585901908428946e-05, "loss": 1.5681, "step": 4410 }, { "epoch": 1.04, "learning_rate": 3.653116530379077e-05, "loss": 1.565, "step": 4420 }, { "epoch": 1.04, "learning_rate": 3.647635839294561e-05, "loss": 1.5517, "step": 4430 }, { "epoch": 1.05, "learning_rate": 3.642148151005452e-05, "loss": 1.588, "step": 4440 }, { "epoch": 1.05, "learning_rate": 3.636653498970512e-05, "loss": 1.592, "step": 4450 }, { "epoch": 1.05, "learning_rate": 3.6311519166909656e-05, "loss": 1.5633, "step": 4460 }, { "epoch": 1.05, "learning_rate": 3.62564343771029e-05, "loss": 1.6122, "step": 4470 }, { "epoch": 1.06, "learning_rate": 3.620128095614012e-05, "loss": 1.6046, "step": 4480 }, { "epoch": 1.06, "learning_rate": 3.614605924029504e-05, "loss": 1.5932, "step": 4490 }, { "epoch": 1.06, "learning_rate": 3.6090769566257767e-05, "loss": 1.6165, "step": 4500 }, { "epoch": 1.06, "learning_rate": 3.603541227113276e-05, "loss": 1.5227, "step": 4510 }, { "epoch": 1.07, "learning_rate": 3.597998769243678e-05, "loss": 1.5661, "step": 4520 }, { "epoch": 1.07, "learning_rate": 3.592449616809681e-05, "loss": 1.5499, "step": 4530 }, { "epoch": 1.07, "learning_rate": 3.5868938036448e-05, "loss": 1.5931, "step": 4540 }, { "epoch": 1.07, "learning_rate": 3.581331363623161e-05, "loss": 1.5451, "step": 4550 }, { "epoch": 1.08, "learning_rate": 3.5757623306592955e-05, "loss": 1.5564, "step": 4560 }, { "epoch": 1.08, "learning_rate": 3.570186738707931e-05, "loss": 1.5816, "step": 4570 }, { "epoch": 1.08, "learning_rate": 3.564604621763786e-05, "loss": 1.6446, "step": 4580 }, { "epoch": 1.08, "learning_rate": 3.559016013861364e-05, "loss": 1.594, "step": 4590 }, { "epoch": 1.08, "learning_rate": 3.553420949074742e-05, "loss": 1.6004, "step": 4600 }, { "epoch": 1.09, "learning_rate": 3.5478194615173655e-05, "loss": 1.5862, "step": 4610 }, { "epoch": 1.09, "learning_rate": 3.5422115853418405e-05, "loss": 1.6648, "step": 4620 }, { "epoch": 1.09, "learning_rate": 3.536597354739725e-05, "loss": 1.6372, "step": 4630 }, { "epoch": 1.09, "learning_rate": 3.530976803941319e-05, "loss": 1.5812, "step": 4640 }, { "epoch": 1.1, "learning_rate": 3.52534996721546e-05, "loss": 1.5302, "step": 4650 }, { "epoch": 1.1, "learning_rate": 3.519716878869308e-05, "loss": 1.5731, "step": 4660 }, { "epoch": 1.1, "learning_rate": 3.51407757324814e-05, "loss": 1.6117, "step": 4670 }, { "epoch": 1.1, "learning_rate": 3.508432084735142e-05, "loss": 1.5817, "step": 4680 }, { "epoch": 1.11, "learning_rate": 3.502780447751196e-05, "loss": 1.5911, "step": 4690 }, { "epoch": 1.11, "learning_rate": 3.4971226967546714e-05, "loss": 1.5234, "step": 4700 }, { "epoch": 1.11, "learning_rate": 3.491458866241217e-05, "loss": 1.5837, "step": 4710 }, { "epoch": 1.11, "learning_rate": 3.485788990743546e-05, "loss": 1.5487, "step": 4720 }, { "epoch": 1.12, "learning_rate": 3.48011310483123e-05, "loss": 1.5276, "step": 4730 }, { "epoch": 1.12, "learning_rate": 3.474431243110486e-05, "loss": 1.5954, "step": 4740 }, { "epoch": 1.12, "learning_rate": 3.468743440223966e-05, "loss": 1.5444, "step": 4750 }, { "epoch": 1.12, "learning_rate": 3.463049730850546e-05, "loss": 1.5836, "step": 4760 }, { "epoch": 1.12, "learning_rate": 3.457350149705113e-05, "loss": 1.547, "step": 4770 }, { "epoch": 1.13, "learning_rate": 3.451644731538357e-05, "loss": 1.6454, "step": 4780 }, { "epoch": 1.13, "learning_rate": 3.4459335111365533e-05, "loss": 1.5334, "step": 4790 }, { "epoch": 1.13, "learning_rate": 3.440216523321356e-05, "loss": 1.5022, "step": 4800 }, { "epoch": 1.13, "learning_rate": 3.434493802949582e-05, "loss": 1.5177, "step": 4810 }, { "epoch": 1.14, "learning_rate": 3.428765384913004e-05, "loss": 1.5837, "step": 4820 }, { "epoch": 1.14, "learning_rate": 3.4230313041381265e-05, "loss": 1.5765, "step": 4830 }, { "epoch": 1.14, "learning_rate": 3.417291595585987e-05, "loss": 1.5551, "step": 4840 }, { "epoch": 1.14, "learning_rate": 3.411546294251932e-05, "loss": 1.6088, "step": 4850 }, { "epoch": 1.15, "learning_rate": 3.405795435165409e-05, "loss": 1.5787, "step": 4860 }, { "epoch": 1.15, "learning_rate": 3.400039053389751e-05, "loss": 1.6321, "step": 4870 }, { "epoch": 1.15, "learning_rate": 3.394277184021962e-05, "loss": 1.5318, "step": 4880 }, { "epoch": 1.15, "learning_rate": 3.388509862192507e-05, "loss": 1.4413, "step": 4890 }, { "epoch": 1.16, "learning_rate": 3.382737123065092e-05, "loss": 1.5207, "step": 4900 }, { "epoch": 1.16, "learning_rate": 3.3769590018364564e-05, "loss": 1.6031, "step": 4910 }, { "epoch": 1.16, "learning_rate": 3.371175533736148e-05, "loss": 1.6052, "step": 4920 }, { "epoch": 1.16, "learning_rate": 3.365386754026323e-05, "loss": 1.5122, "step": 4930 }, { "epoch": 1.16, "learning_rate": 3.359592698001516e-05, "loss": 1.6031, "step": 4940 }, { "epoch": 1.17, "learning_rate": 3.353793400988436e-05, "loss": 1.5539, "step": 4950 }, { "epoch": 1.17, "learning_rate": 3.3479888983457454e-05, "loss": 1.5709, "step": 4960 }, { "epoch": 1.17, "learning_rate": 3.342179225463843e-05, "loss": 1.5619, "step": 4970 }, { "epoch": 1.17, "learning_rate": 3.336364417764654e-05, "loss": 1.5903, "step": 4980 }, { "epoch": 1.18, "learning_rate": 3.330544510701411e-05, "loss": 1.5481, "step": 4990 }, { "epoch": 1.18, "learning_rate": 3.324719539758435e-05, "loss": 1.5959, "step": 5000 }, { "epoch": 1.18, "eval_loss": 1.592111349105835, "eval_runtime": 120.2035, "eval_samples_per_second": 11.406, "eval_steps_per_second": 2.853, "step": 5000 }, { "epoch": 1.18, "learning_rate": 3.3188895404509254e-05, "loss": 1.5371, "step": 5010 }, { "epoch": 1.18, "learning_rate": 3.313054548324737e-05, "loss": 1.6071, "step": 5020 }, { "epoch": 1.19, "learning_rate": 3.307214598956165e-05, "loss": 1.5601, "step": 5030 }, { "epoch": 1.19, "learning_rate": 3.3013697279517346e-05, "loss": 1.4761, "step": 5040 }, { "epoch": 1.19, "learning_rate": 3.295519970947973e-05, "loss": 1.5097, "step": 5050 }, { "epoch": 1.19, "learning_rate": 3.289665363611201e-05, "loss": 1.6271, "step": 5060 }, { "epoch": 1.2, "learning_rate": 3.2838059416373094e-05, "loss": 1.6295, "step": 5070 }, { "epoch": 1.2, "learning_rate": 3.277941740751548e-05, "loss": 1.5371, "step": 5080 }, { "epoch": 1.2, "learning_rate": 3.272072796708299e-05, "loss": 1.641, "step": 5090 }, { "epoch": 1.2, "learning_rate": 3.266199145290868e-05, "loss": 1.5497, "step": 5100 }, { "epoch": 1.2, "learning_rate": 3.260320822311259e-05, "loss": 1.5779, "step": 5110 }, { "epoch": 1.21, "learning_rate": 3.2544378636099625e-05, "loss": 1.6502, "step": 5120 }, { "epoch": 1.21, "learning_rate": 3.248550305055728e-05, "loss": 1.636, "step": 5130 }, { "epoch": 1.21, "learning_rate": 3.242658182545356e-05, "loss": 1.5893, "step": 5140 }, { "epoch": 1.21, "learning_rate": 3.2367615320034675e-05, "loss": 1.5649, "step": 5150 }, { "epoch": 1.22, "learning_rate": 3.2308603893822985e-05, "loss": 1.6139, "step": 5160 }, { "epoch": 1.22, "learning_rate": 3.224954790661469e-05, "loss": 1.4683, "step": 5170 }, { "epoch": 1.22, "learning_rate": 3.219044771847767e-05, "loss": 1.5228, "step": 5180 }, { "epoch": 1.22, "learning_rate": 3.2131303689749334e-05, "loss": 1.5809, "step": 5190 }, { "epoch": 1.23, "learning_rate": 3.2072116181034364e-05, "loss": 1.5908, "step": 5200 }, { "epoch": 1.23, "learning_rate": 3.201288555320256e-05, "loss": 1.5981, "step": 5210 }, { "epoch": 1.23, "learning_rate": 3.1953612167386624e-05, "loss": 1.566, "step": 5220 }, { "epoch": 1.23, "learning_rate": 3.189429638497994e-05, "loss": 1.6265, "step": 5230 }, { "epoch": 1.24, "learning_rate": 3.183493856763438e-05, "loss": 1.5968, "step": 5240 }, { "epoch": 1.24, "learning_rate": 3.177553907725814e-05, "loss": 1.5487, "step": 5250 }, { "epoch": 1.24, "learning_rate": 3.171609827601347e-05, "loss": 1.5437, "step": 5260 }, { "epoch": 1.24, "learning_rate": 3.16566165263145e-05, "loss": 1.6033, "step": 5270 }, { "epoch": 1.24, "learning_rate": 3.159709419082503e-05, "loss": 1.5619, "step": 5280 }, { "epoch": 1.25, "learning_rate": 3.153753163245632e-05, "loss": 1.595, "step": 5290 }, { "epoch": 1.25, "learning_rate": 3.147792921436484e-05, "loss": 1.5967, "step": 5300 }, { "epoch": 1.25, "learning_rate": 3.1418287299950136e-05, "loss": 1.6131, "step": 5310 }, { "epoch": 1.25, "learning_rate": 3.1358606252852526e-05, "loss": 1.564, "step": 5320 }, { "epoch": 1.26, "learning_rate": 3.1298886436950946e-05, "loss": 1.5854, "step": 5330 }, { "epoch": 1.26, "learning_rate": 3.1239128216360696e-05, "loss": 1.5676, "step": 5340 }, { "epoch": 1.26, "learning_rate": 3.117933195543122e-05, "loss": 1.5799, "step": 5350 }, { "epoch": 1.26, "learning_rate": 3.111949801874393e-05, "loss": 1.5614, "step": 5360 }, { "epoch": 1.27, "learning_rate": 3.105962677110991e-05, "loss": 1.5621, "step": 5370 }, { "epoch": 1.27, "learning_rate": 3.099971857756777e-05, "loss": 1.64, "step": 5380 }, { "epoch": 1.27, "learning_rate": 3.093977380338134e-05, "loss": 1.5687, "step": 5390 }, { "epoch": 1.27, "learning_rate": 3.0879792814037524e-05, "loss": 1.6168, "step": 5400 }, { "epoch": 1.28, "learning_rate": 3.0819775975244005e-05, "loss": 1.5049, "step": 5410 }, { "epoch": 1.28, "learning_rate": 3.075972365292706e-05, "loss": 1.6166, "step": 5420 }, { "epoch": 1.28, "learning_rate": 3.0699636213229294e-05, "loss": 1.5757, "step": 5430 }, { "epoch": 1.28, "learning_rate": 3.0639514022507436e-05, "loss": 1.5442, "step": 5440 }, { "epoch": 1.28, "learning_rate": 3.057935744733009e-05, "loss": 1.5417, "step": 5450 }, { "epoch": 1.29, "learning_rate": 3.051916685447551e-05, "loss": 1.556, "step": 5460 }, { "epoch": 1.29, "learning_rate": 3.0458942610929353e-05, "loss": 1.5974, "step": 5470 }, { "epoch": 1.29, "learning_rate": 3.0398685083882438e-05, "loss": 1.5775, "step": 5480 }, { "epoch": 1.29, "learning_rate": 3.0338394640728533e-05, "loss": 1.5323, "step": 5490 }, { "epoch": 1.3, "learning_rate": 3.027807164906209e-05, "loss": 1.6194, "step": 5500 }, { "epoch": 1.3, "learning_rate": 3.0217716476676005e-05, "loss": 1.5799, "step": 5510 }, { "epoch": 1.3, "learning_rate": 3.0157329491559382e-05, "loss": 1.5676, "step": 5520 }, { "epoch": 1.3, "learning_rate": 3.0096911061895306e-05, "loss": 1.5538, "step": 5530 }, { "epoch": 1.31, "learning_rate": 3.0036461556058552e-05, "loss": 1.5787, "step": 5540 }, { "epoch": 1.31, "learning_rate": 2.9975981342613406e-05, "loss": 1.5615, "step": 5550 }, { "epoch": 1.31, "learning_rate": 2.9915470790311338e-05, "loss": 1.5831, "step": 5560 }, { "epoch": 1.31, "learning_rate": 2.9854930268088845e-05, "loss": 1.6376, "step": 5570 }, { "epoch": 1.32, "learning_rate": 2.9794360145065093e-05, "loss": 1.5673, "step": 5580 }, { "epoch": 1.32, "learning_rate": 2.9733760790539784e-05, "loss": 1.5465, "step": 5590 }, { "epoch": 1.32, "learning_rate": 2.9673132573990796e-05, "loss": 1.5375, "step": 5600 }, { "epoch": 1.32, "learning_rate": 2.961247586507203e-05, "loss": 1.6384, "step": 5610 }, { "epoch": 1.33, "learning_rate": 2.955179103361106e-05, "loss": 1.608, "step": 5620 }, { "epoch": 1.33, "learning_rate": 2.9491078449606958e-05, "loss": 1.6231, "step": 5630 }, { "epoch": 1.33, "learning_rate": 2.9430338483227982e-05, "loss": 1.5672, "step": 5640 }, { "epoch": 1.33, "learning_rate": 2.9369571504809368e-05, "loss": 1.5708, "step": 5650 }, { "epoch": 1.33, "learning_rate": 2.9308777884851013e-05, "loss": 1.5704, "step": 5660 }, { "epoch": 1.34, "learning_rate": 2.924795799401528e-05, "loss": 1.5602, "step": 5670 }, { "epoch": 1.34, "learning_rate": 2.9187112203124687e-05, "loss": 1.5754, "step": 5680 }, { "epoch": 1.34, "learning_rate": 2.9126240883159684e-05, "loss": 1.5619, "step": 5690 }, { "epoch": 1.34, "learning_rate": 2.9065344405256345e-05, "loss": 1.5708, "step": 5700 }, { "epoch": 1.35, "learning_rate": 2.9004423140704162e-05, "loss": 1.6248, "step": 5710 }, { "epoch": 1.35, "learning_rate": 2.894347746094374e-05, "loss": 1.5462, "step": 5720 }, { "epoch": 1.35, "learning_rate": 2.8882507737564546e-05, "loss": 1.523, "step": 5730 }, { "epoch": 1.35, "learning_rate": 2.8821514342302646e-05, "loss": 1.5787, "step": 5740 }, { "epoch": 1.36, "learning_rate": 2.876049764703842e-05, "loss": 1.5542, "step": 5750 }, { "epoch": 1.36, "learning_rate": 2.8699458023794342e-05, "loss": 1.503, "step": 5760 }, { "epoch": 1.36, "learning_rate": 2.8638395844732636e-05, "loss": 1.554, "step": 5770 }, { "epoch": 1.36, "learning_rate": 2.857731148215309e-05, "loss": 1.5576, "step": 5780 }, { "epoch": 1.37, "learning_rate": 2.8516205308490718e-05, "loss": 1.5663, "step": 5790 }, { "epoch": 1.37, "learning_rate": 2.8455077696313536e-05, "loss": 1.5718, "step": 5800 }, { "epoch": 1.37, "learning_rate": 2.8393929018320264e-05, "loss": 1.5952, "step": 5810 }, { "epoch": 1.37, "learning_rate": 2.8332759647338047e-05, "loss": 1.6014, "step": 5820 }, { "epoch": 1.37, "learning_rate": 2.827156995632024e-05, "loss": 1.5607, "step": 5830 }, { "epoch": 1.38, "learning_rate": 2.8210360318344032e-05, "loss": 1.6279, "step": 5840 }, { "epoch": 1.38, "learning_rate": 2.8149131106608284e-05, "loss": 1.5182, "step": 5850 }, { "epoch": 1.38, "learning_rate": 2.8087882694431156e-05, "loss": 1.6032, "step": 5860 }, { "epoch": 1.38, "learning_rate": 2.80266154552479e-05, "loss": 1.5395, "step": 5870 }, { "epoch": 1.39, "learning_rate": 2.796532976260856e-05, "loss": 1.5687, "step": 5880 }, { "epoch": 1.39, "learning_rate": 2.7904025990175675e-05, "loss": 1.5735, "step": 5890 }, { "epoch": 1.39, "learning_rate": 2.7842704511722017e-05, "loss": 1.5653, "step": 5900 }, { "epoch": 1.39, "learning_rate": 2.7781365701128333e-05, "loss": 1.5791, "step": 5910 }, { "epoch": 1.4, "learning_rate": 2.7720009932381024e-05, "loss": 1.5829, "step": 5920 }, { "epoch": 1.4, "learning_rate": 2.76586375795699e-05, "loss": 1.5581, "step": 5930 }, { "epoch": 1.4, "learning_rate": 2.7597249016885878e-05, "loss": 1.6085, "step": 5940 }, { "epoch": 1.4, "learning_rate": 2.753584461861871e-05, "loss": 1.5713, "step": 5950 }, { "epoch": 1.41, "learning_rate": 2.74744247591547e-05, "loss": 1.5618, "step": 5960 }, { "epoch": 1.41, "learning_rate": 2.7412989812974416e-05, "loss": 1.6267, "step": 5970 }, { "epoch": 1.41, "learning_rate": 2.7351540154650408e-05, "loss": 1.6045, "step": 5980 }, { "epoch": 1.41, "learning_rate": 2.7290076158844935e-05, "loss": 1.5682, "step": 5990 }, { "epoch": 1.41, "learning_rate": 2.7228598200307666e-05, "loss": 1.5716, "step": 6000 }, { "epoch": 1.41, "eval_loss": 1.5865955352783203, "eval_runtime": 120.5077, "eval_samples_per_second": 11.377, "eval_steps_per_second": 2.846, "step": 6000 }, { "epoch": 1.42, "learning_rate": 2.716710665387341e-05, "loss": 1.5948, "step": 6010 }, { "epoch": 1.42, "learning_rate": 2.710560189445981e-05, "loss": 1.5397, "step": 6020 }, { "epoch": 1.42, "learning_rate": 2.704408429706508e-05, "loss": 1.5307, "step": 6030 }, { "epoch": 1.42, "learning_rate": 2.6982554236765704e-05, "loss": 1.5215, "step": 6040 }, { "epoch": 1.43, "learning_rate": 2.692101208871415e-05, "loss": 1.6056, "step": 6050 }, { "epoch": 1.43, "learning_rate": 2.6859458228136592e-05, "loss": 1.5652, "step": 6060 }, { "epoch": 1.43, "learning_rate": 2.6797893030330607e-05, "loss": 1.5155, "step": 6070 }, { "epoch": 1.43, "learning_rate": 2.6736316870662904e-05, "loss": 1.6073, "step": 6080 }, { "epoch": 1.44, "learning_rate": 2.6674730124567023e-05, "loss": 1.5765, "step": 6090 }, { "epoch": 1.44, "learning_rate": 2.6613133167541055e-05, "loss": 1.601, "step": 6100 }, { "epoch": 1.44, "learning_rate": 2.6551526375145342e-05, "loss": 1.6192, "step": 6110 }, { "epoch": 1.44, "learning_rate": 2.6489910123000195e-05, "loss": 1.5599, "step": 6120 }, { "epoch": 1.45, "learning_rate": 2.6428284786783597e-05, "loss": 1.5574, "step": 6130 }, { "epoch": 1.45, "learning_rate": 2.6366650742228937e-05, "loss": 1.609, "step": 6140 }, { "epoch": 1.45, "learning_rate": 2.6305008365122664e-05, "loss": 1.5537, "step": 6150 }, { "epoch": 1.45, "learning_rate": 2.6243358031302067e-05, "loss": 1.4887, "step": 6160 }, { "epoch": 1.45, "learning_rate": 2.6181700116652917e-05, "loss": 1.604, "step": 6170 }, { "epoch": 1.46, "learning_rate": 2.612003499710724e-05, "loss": 1.5129, "step": 6180 }, { "epoch": 1.46, "learning_rate": 2.6058363048640948e-05, "loss": 1.5484, "step": 6190 }, { "epoch": 1.46, "learning_rate": 2.5996684647271635e-05, "loss": 1.5315, "step": 6200 }, { "epoch": 1.46, "learning_rate": 2.59350001690562e-05, "loss": 1.5443, "step": 6210 }, { "epoch": 1.47, "learning_rate": 2.5873309990088612e-05, "loss": 1.5461, "step": 6220 }, { "epoch": 1.47, "learning_rate": 2.5811614486497605e-05, "loss": 1.5906, "step": 6230 }, { "epoch": 1.47, "learning_rate": 2.574991403444435e-05, "loss": 1.5412, "step": 6240 }, { "epoch": 1.47, "learning_rate": 2.5688209010120225e-05, "loss": 1.5833, "step": 6250 }, { "epoch": 1.48, "learning_rate": 2.562649978974445e-05, "loss": 1.5442, "step": 6260 }, { "epoch": 1.48, "learning_rate": 2.556478674956186e-05, "loss": 1.5377, "step": 6270 }, { "epoch": 1.48, "learning_rate": 2.5503070265840556e-05, "loss": 1.5521, "step": 6280 }, { "epoch": 1.48, "learning_rate": 2.5441350714869644e-05, "loss": 1.5607, "step": 6290 }, { "epoch": 1.49, "learning_rate": 2.5379628472956933e-05, "loss": 1.5642, "step": 6300 }, { "epoch": 1.49, "learning_rate": 2.5317903916426645e-05, "loss": 1.5422, "step": 6310 }, { "epoch": 1.49, "learning_rate": 2.5256177421617088e-05, "loss": 1.5729, "step": 6320 }, { "epoch": 1.49, "learning_rate": 2.519444936487842e-05, "loss": 1.5666, "step": 6330 }, { "epoch": 1.49, "learning_rate": 2.5132720122570298e-05, "loss": 1.5788, "step": 6340 }, { "epoch": 1.5, "learning_rate": 2.507099007105963e-05, "loss": 1.4995, "step": 6350 }, { "epoch": 1.5, "learning_rate": 2.500925958671823e-05, "loss": 1.6433, "step": 6360 }, { "epoch": 1.5, "learning_rate": 2.494752904592058e-05, "loss": 1.5099, "step": 6370 }, { "epoch": 1.5, "learning_rate": 2.4885798825041488e-05, "loss": 1.6121, "step": 6380 }, { "epoch": 1.51, "learning_rate": 2.4824069300453815e-05, "loss": 1.551, "step": 6390 }, { "epoch": 1.51, "learning_rate": 2.4762340848526162e-05, "loss": 1.5144, "step": 6400 }, { "epoch": 1.51, "learning_rate": 2.4700613845620632e-05, "loss": 1.5084, "step": 6410 }, { "epoch": 1.51, "learning_rate": 2.4638888668090457e-05, "loss": 1.5896, "step": 6420 }, { "epoch": 1.52, "learning_rate": 2.4577165692277744e-05, "loss": 1.5006, "step": 6430 }, { "epoch": 1.52, "learning_rate": 2.4515445294511176e-05, "loss": 1.5435, "step": 6440 }, { "epoch": 1.52, "learning_rate": 2.445372785110374e-05, "loss": 1.5812, "step": 6450 }, { "epoch": 1.52, "learning_rate": 2.439201373835039e-05, "loss": 1.5867, "step": 6460 }, { "epoch": 1.53, "learning_rate": 2.433030333252576e-05, "loss": 1.5748, "step": 6470 }, { "epoch": 1.53, "learning_rate": 2.42685970098819e-05, "loss": 1.5986, "step": 6480 }, { "epoch": 1.53, "learning_rate": 2.42130651215743e-05, "loss": 1.5599, "step": 6490 }, { "epoch": 1.53, "learning_rate": 2.4151367593457314e-05, "loss": 1.549, "step": 6500 }, { "epoch": 1.53, "learning_rate": 2.4089675239503044e-05, "loss": 1.5109, "step": 6510 }, { "epoch": 1.54, "learning_rate": 2.4027988435853466e-05, "loss": 1.5712, "step": 6520 }, { "epoch": 1.54, "learning_rate": 2.3966307558616745e-05, "loss": 1.5581, "step": 6530 }, { "epoch": 1.54, "learning_rate": 2.3904632983864885e-05, "loss": 1.5607, "step": 6540 }, { "epoch": 1.54, "learning_rate": 2.384296508763147e-05, "loss": 1.598, "step": 6550 }, { "epoch": 1.55, "learning_rate": 2.378130424590935e-05, "loss": 1.5464, "step": 6560 }, { "epoch": 1.55, "learning_rate": 2.37196508346484e-05, "loss": 1.5242, "step": 6570 }, { "epoch": 1.55, "learning_rate": 2.365800522975316e-05, "loss": 1.4973, "step": 6580 }, { "epoch": 1.55, "learning_rate": 2.359636780708058e-05, "loss": 1.6024, "step": 6590 }, { "epoch": 1.56, "learning_rate": 2.353473894243772e-05, "loss": 1.5798, "step": 6600 }, { "epoch": 1.56, "learning_rate": 2.3473119011579485e-05, "loss": 1.5767, "step": 6610 }, { "epoch": 1.56, "learning_rate": 2.3411508390206286e-05, "loss": 1.5836, "step": 6620 }, { "epoch": 1.56, "learning_rate": 2.334990745396177e-05, "loss": 1.5451, "step": 6630 }, { "epoch": 1.57, "learning_rate": 2.328831657843054e-05, "loss": 1.5379, "step": 6640 }, { "epoch": 1.57, "learning_rate": 2.3226736139135876e-05, "loss": 1.591, "step": 6650 }, { "epoch": 1.57, "learning_rate": 2.316516651153741e-05, "loss": 1.5998, "step": 6660 }, { "epoch": 1.57, "learning_rate": 2.3103608071028848e-05, "loss": 1.6216, "step": 6670 }, { "epoch": 1.58, "learning_rate": 2.3042061192935705e-05, "loss": 1.5791, "step": 6680 }, { "epoch": 1.58, "learning_rate": 2.2980526252512972e-05, "loss": 1.6064, "step": 6690 }, { "epoch": 1.58, "learning_rate": 2.29190036249429e-05, "loss": 1.6265, "step": 6700 }, { "epoch": 1.58, "learning_rate": 2.2857493685332633e-05, "loss": 1.5985, "step": 6710 }, { "epoch": 1.58, "learning_rate": 2.2795996808711963e-05, "loss": 1.591, "step": 6720 }, { "epoch": 1.59, "learning_rate": 2.2734513370031025e-05, "loss": 1.5104, "step": 6730 }, { "epoch": 1.59, "learning_rate": 2.2673043744158057e-05, "loss": 1.5631, "step": 6740 }, { "epoch": 1.59, "learning_rate": 2.261158830587705e-05, "loss": 1.5249, "step": 6750 }, { "epoch": 1.59, "learning_rate": 2.25501474298855e-05, "loss": 1.6217, "step": 6760 }, { "epoch": 1.6, "learning_rate": 2.2488721490792104e-05, "loss": 1.5736, "step": 6770 }, { "epoch": 1.6, "learning_rate": 2.2427310863114513e-05, "loss": 1.5733, "step": 6780 }, { "epoch": 1.6, "learning_rate": 2.2365915921277004e-05, "loss": 1.56, "step": 6790 }, { "epoch": 1.6, "learning_rate": 2.2304537039608224e-05, "loss": 1.5668, "step": 6800 }, { "epoch": 1.61, "learning_rate": 2.22431745923389e-05, "loss": 1.5134, "step": 6810 }, { "epoch": 1.61, "learning_rate": 2.2181828953599556e-05, "loss": 1.5995, "step": 6820 }, { "epoch": 1.61, "learning_rate": 2.2120500497418238e-05, "loss": 1.644, "step": 6830 }, { "epoch": 1.61, "learning_rate": 2.2059189597718205e-05, "loss": 1.5736, "step": 6840 }, { "epoch": 1.62, "learning_rate": 2.200402510774106e-05, "loss": 1.449, "step": 6850 }, { "epoch": 1.62, "learning_rate": 2.194274859512892e-05, "loss": 1.5933, "step": 6860 }, { "epoch": 1.62, "learning_rate": 2.18814907227621e-05, "loss": 1.5343, "step": 6870 }, { "epoch": 1.62, "learning_rate": 2.182025186413352e-05, "loss": 1.4848, "step": 6880 }, { "epoch": 1.62, "learning_rate": 2.175903239262017e-05, "loss": 1.5866, "step": 6890 }, { "epoch": 1.63, "learning_rate": 2.1697832681480858e-05, "loss": 1.5759, "step": 6900 }, { "epoch": 1.63, "learning_rate": 2.1636653103853887e-05, "loss": 1.5649, "step": 6910 }, { "epoch": 1.63, "learning_rate": 2.157549403275481e-05, "loss": 1.5768, "step": 6920 }, { "epoch": 1.63, "learning_rate": 2.1514355841074157e-05, "loss": 1.5945, "step": 6930 }, { "epoch": 1.64, "learning_rate": 2.1453238901575158e-05, "loss": 1.5329, "step": 6940 }, { "epoch": 1.64, "learning_rate": 2.139214358689146e-05, "loss": 1.5272, "step": 6950 }, { "epoch": 1.64, "learning_rate": 2.1331070269524858e-05, "loss": 1.6365, "step": 6960 }, { "epoch": 1.64, "learning_rate": 2.1270019321843033e-05, "loss": 1.5395, "step": 6970 }, { "epoch": 1.65, "learning_rate": 2.120899111607728e-05, "loss": 1.5281, "step": 6980 }, { "epoch": 1.65, "learning_rate": 2.114798602432024e-05, "loss": 1.6174, "step": 6990 }, { "epoch": 1.65, "learning_rate": 2.108700441852361e-05, "loss": 1.6281, "step": 7000 }, { "epoch": 1.65, "eval_loss": 1.5833255052566528, "eval_runtime": 120.1395, "eval_samples_per_second": 11.412, "eval_steps_per_second": 2.855, "step": 7000 }, { "epoch": 1.65, "learning_rate": 2.1026046670495906e-05, "loss": 1.6199, "step": 7010 }, { "epoch": 1.66, "learning_rate": 2.0965113151900166e-05, "loss": 1.6042, "step": 7020 }, { "epoch": 1.66, "learning_rate": 2.090420423425172e-05, "loss": 1.5406, "step": 7030 }, { "epoch": 1.66, "learning_rate": 2.0843320288915903e-05, "loss": 1.5542, "step": 7040 }, { "epoch": 1.66, "learning_rate": 2.078246168710577e-05, "loss": 1.5691, "step": 7050 }, { "epoch": 1.66, "learning_rate": 2.072162879987986e-05, "loss": 1.5629, "step": 7060 }, { "epoch": 1.67, "learning_rate": 2.066082199813996e-05, "loss": 1.6191, "step": 7070 }, { "epoch": 1.67, "learning_rate": 2.0600041652628787e-05, "loss": 1.6127, "step": 7080 }, { "epoch": 1.67, "learning_rate": 2.0539288133927746e-05, "loss": 1.5847, "step": 7090 }, { "epoch": 1.67, "learning_rate": 2.0478561812454678e-05, "loss": 1.591, "step": 7100 }, { "epoch": 1.68, "learning_rate": 2.0417863058461633e-05, "loss": 1.5478, "step": 7110 }, { "epoch": 1.68, "learning_rate": 2.0357192242032547e-05, "loss": 1.5991, "step": 7120 }, { "epoch": 1.68, "learning_rate": 2.0296549733081027e-05, "loss": 1.5795, "step": 7130 }, { "epoch": 1.68, "learning_rate": 2.0235935901348098e-05, "loss": 1.5478, "step": 7140 }, { "epoch": 1.69, "learning_rate": 2.0175351116399904e-05, "loss": 1.641, "step": 7150 }, { "epoch": 1.69, "learning_rate": 2.011479574762555e-05, "loss": 1.5214, "step": 7160 }, { "epoch": 1.69, "learning_rate": 2.005427016423474e-05, "loss": 1.5606, "step": 7170 }, { "epoch": 1.69, "learning_rate": 1.9993774735255587e-05, "loss": 1.5445, "step": 7180 }, { "epoch": 1.7, "learning_rate": 1.9933309829532344e-05, "loss": 1.6013, "step": 7190 }, { "epoch": 1.7, "learning_rate": 1.9872875815723187e-05, "loss": 1.61, "step": 7200 }, { "epoch": 1.7, "learning_rate": 1.981247306229792e-05, "loss": 1.5263, "step": 7210 }, { "epoch": 1.7, "learning_rate": 1.9752101937535754e-05, "loss": 1.5198, "step": 7220 }, { "epoch": 1.7, "learning_rate": 1.9691762809523055e-05, "loss": 1.5959, "step": 7230 }, { "epoch": 1.71, "learning_rate": 1.963145604615112e-05, "loss": 1.6017, "step": 7240 }, { "epoch": 1.71, "learning_rate": 1.9571182015113894e-05, "loss": 1.5784, "step": 7250 }, { "epoch": 1.71, "learning_rate": 1.9510941083905775e-05, "loss": 1.5273, "step": 7260 }, { "epoch": 1.71, "learning_rate": 1.9450733619819317e-05, "loss": 1.5608, "step": 7270 }, { "epoch": 1.72, "learning_rate": 1.939055998994306e-05, "loss": 1.5707, "step": 7280 }, { "epoch": 1.72, "learning_rate": 1.9330420561159224e-05, "loss": 1.5873, "step": 7290 }, { "epoch": 1.72, "learning_rate": 1.9270315700141532e-05, "loss": 1.5831, "step": 7300 }, { "epoch": 1.72, "learning_rate": 1.9210245773352913e-05, "loss": 1.5502, "step": 7310 }, { "epoch": 1.73, "learning_rate": 1.915021114704332e-05, "loss": 1.5395, "step": 7320 }, { "epoch": 1.73, "learning_rate": 1.909021218724748e-05, "loss": 1.6205, "step": 7330 }, { "epoch": 1.73, "learning_rate": 1.9030249259782647e-05, "loss": 1.6269, "step": 7340 }, { "epoch": 1.73, "learning_rate": 1.8970322730246386e-05, "loss": 1.6254, "step": 7350 }, { "epoch": 1.74, "learning_rate": 1.891043296401435e-05, "loss": 1.6078, "step": 7360 }, { "epoch": 1.74, "learning_rate": 1.8850580326238037e-05, "loss": 1.5952, "step": 7370 }, { "epoch": 1.74, "learning_rate": 1.8790765181842572e-05, "loss": 1.6439, "step": 7380 }, { "epoch": 1.74, "learning_rate": 1.873098789552448e-05, "loss": 1.5527, "step": 7390 }, { "epoch": 1.74, "learning_rate": 1.8671248831749454e-05, "loss": 1.5974, "step": 7400 }, { "epoch": 1.75, "learning_rate": 1.8611548354750176e-05, "loss": 1.5194, "step": 7410 }, { "epoch": 1.75, "learning_rate": 1.8551886828524013e-05, "loss": 1.5647, "step": 7420 }, { "epoch": 1.75, "learning_rate": 1.8492264616830884e-05, "loss": 1.6324, "step": 7430 }, { "epoch": 1.75, "learning_rate": 1.843268208319098e-05, "loss": 1.5751, "step": 7440 }, { "epoch": 1.76, "learning_rate": 1.8373139590882603e-05, "loss": 1.5693, "step": 7450 }, { "epoch": 1.76, "learning_rate": 1.8313637502939895e-05, "loss": 1.562, "step": 7460 }, { "epoch": 1.76, "learning_rate": 1.8254176182150654e-05, "loss": 1.5584, "step": 7470 }, { "epoch": 1.76, "learning_rate": 1.8194755991054123e-05, "loss": 1.5866, "step": 7480 }, { "epoch": 1.77, "learning_rate": 1.8135377291938765e-05, "loss": 1.6487, "step": 7490 }, { "epoch": 1.77, "learning_rate": 1.8076040446840092e-05, "loss": 1.5458, "step": 7500 }, { "epoch": 1.77, "learning_rate": 1.80167458175384e-05, "loss": 1.5688, "step": 7510 }, { "epoch": 1.77, "learning_rate": 1.79574937655566e-05, "loss": 1.5377, "step": 7520 }, { "epoch": 1.78, "learning_rate": 1.7898284652158006e-05, "loss": 1.6038, "step": 7530 }, { "epoch": 1.78, "learning_rate": 1.783911883834415e-05, "loss": 1.5159, "step": 7540 }, { "epoch": 1.78, "learning_rate": 1.777999668485254e-05, "loss": 1.575, "step": 7550 }, { "epoch": 1.78, "learning_rate": 1.7720918552154498e-05, "loss": 1.6133, "step": 7560 }, { "epoch": 1.78, "learning_rate": 1.7661884800452932e-05, "loss": 1.5649, "step": 7570 }, { "epoch": 1.79, "learning_rate": 1.7602895789680194e-05, "loss": 1.5856, "step": 7580 }, { "epoch": 1.79, "learning_rate": 1.7543951879495806e-05, "loss": 1.5763, "step": 7590 }, { "epoch": 1.79, "learning_rate": 1.7485053429284335e-05, "loss": 1.5841, "step": 7600 }, { "epoch": 1.79, "learning_rate": 1.7426200798153152e-05, "loss": 1.6031, "step": 7610 }, { "epoch": 1.8, "learning_rate": 1.7367394344930298e-05, "loss": 1.5723, "step": 7620 }, { "epoch": 1.8, "learning_rate": 1.7308634428162245e-05, "loss": 1.5619, "step": 7630 }, { "epoch": 1.8, "learning_rate": 1.724992140611173e-05, "loss": 1.5642, "step": 7640 }, { "epoch": 1.8, "learning_rate": 1.719125563675557e-05, "loss": 1.5634, "step": 7650 }, { "epoch": 1.81, "learning_rate": 1.7132637477782477e-05, "loss": 1.5896, "step": 7660 }, { "epoch": 1.81, "learning_rate": 1.7074067286590897e-05, "loss": 1.5564, "step": 7670 }, { "epoch": 1.81, "learning_rate": 1.7015545420286798e-05, "loss": 1.533, "step": 7680 }, { "epoch": 1.81, "learning_rate": 1.695707223568151e-05, "loss": 1.5789, "step": 7690 }, { "epoch": 1.82, "learning_rate": 1.689864808928954e-05, "loss": 1.573, "step": 7700 }, { "epoch": 1.82, "learning_rate": 1.6840273337326424e-05, "loss": 1.6167, "step": 7710 }, { "epoch": 1.82, "learning_rate": 1.6781948335706534e-05, "loss": 1.5644, "step": 7720 }, { "epoch": 1.82, "learning_rate": 1.67236734400409e-05, "loss": 1.5827, "step": 7730 }, { "epoch": 1.82, "learning_rate": 1.666544900563505e-05, "loss": 1.5427, "step": 7740 }, { "epoch": 1.83, "learning_rate": 1.660727538748687e-05, "loss": 1.5782, "step": 7750 }, { "epoch": 1.83, "learning_rate": 1.654915294028439e-05, "loss": 1.5257, "step": 7760 }, { "epoch": 1.83, "learning_rate": 1.649108201840367e-05, "loss": 1.5747, "step": 7770 }, { "epoch": 1.83, "learning_rate": 1.6433062975906594e-05, "loss": 1.5598, "step": 7780 }, { "epoch": 1.84, "learning_rate": 1.6375096166538757e-05, "loss": 1.5349, "step": 7790 }, { "epoch": 1.84, "learning_rate": 1.6317181943727272e-05, "loss": 1.5958, "step": 7800 }, { "epoch": 1.84, "learning_rate": 1.6259320660578627e-05, "loss": 1.5406, "step": 7810 }, { "epoch": 1.84, "learning_rate": 1.620151266987654e-05, "loss": 1.4676, "step": 7820 }, { "epoch": 1.85, "learning_rate": 1.61437583240798e-05, "loss": 1.5561, "step": 7830 }, { "epoch": 1.85, "learning_rate": 1.608605797532013e-05, "loss": 1.5527, "step": 7840 }, { "epoch": 1.85, "learning_rate": 1.6028411975400005e-05, "loss": 1.6027, "step": 7850 }, { "epoch": 1.85, "learning_rate": 1.5970820675790554e-05, "loss": 1.5452, "step": 7860 }, { "epoch": 1.86, "learning_rate": 1.5913284427629376e-05, "loss": 1.5342, "step": 7870 }, { "epoch": 1.86, "learning_rate": 1.585580358171845e-05, "loss": 1.6369, "step": 7880 }, { "epoch": 1.86, "learning_rate": 1.5798378488521937e-05, "loss": 1.6002, "step": 7890 }, { "epoch": 1.86, "learning_rate": 1.5741009498164066e-05, "loss": 1.5132, "step": 7900 }, { "epoch": 1.87, "learning_rate": 1.5683696960427012e-05, "loss": 1.6326, "step": 7910 }, { "epoch": 1.87, "learning_rate": 1.5626441224748784e-05, "loss": 1.5737, "step": 7920 }, { "epoch": 1.87, "learning_rate": 1.5569242640221015e-05, "loss": 1.6005, "step": 7930 }, { "epoch": 1.87, "learning_rate": 1.5512101555586918e-05, "loss": 1.5976, "step": 7940 }, { "epoch": 1.87, "learning_rate": 1.54550183192391e-05, "loss": 1.6039, "step": 7950 }, { "epoch": 1.88, "learning_rate": 1.5397993279217504e-05, "loss": 1.5774, "step": 7960 }, { "epoch": 1.88, "learning_rate": 1.5341026783207208e-05, "loss": 1.5339, "step": 7970 }, { "epoch": 1.88, "learning_rate": 1.528411917853636e-05, "loss": 1.5806, "step": 7980 }, { "epoch": 1.88, "learning_rate": 1.5227270812174033e-05, "loss": 1.5673, "step": 7990 }, { "epoch": 1.89, "learning_rate": 1.5170482030728142e-05, "loss": 1.6091, "step": 8000 }, { "epoch": 1.89, "eval_loss": 1.5800806283950806, "eval_runtime": 121.3887, "eval_samples_per_second": 11.294, "eval_steps_per_second": 2.826, "step": 8000 }, { "epoch": 1.89, "learning_rate": 1.511375318044329e-05, "loss": 1.5585, "step": 8010 }, { "epoch": 1.89, "learning_rate": 1.5057084607198685e-05, "loss": 1.6185, "step": 8020 }, { "epoch": 1.89, "learning_rate": 1.5000476656506019e-05, "loss": 1.5859, "step": 8030 }, { "epoch": 1.9, "learning_rate": 1.4943929673507345e-05, "loss": 1.5645, "step": 8040 }, { "epoch": 1.9, "learning_rate": 1.4887444002973048e-05, "loss": 1.5036, "step": 8050 }, { "epoch": 1.9, "learning_rate": 1.483101998929963e-05, "loss": 1.6188, "step": 8060 }, { "epoch": 1.9, "learning_rate": 1.4774657976507695e-05, "loss": 1.5842, "step": 8070 }, { "epoch": 1.91, "learning_rate": 1.4718358308239799e-05, "loss": 1.5984, "step": 8080 }, { "epoch": 1.91, "learning_rate": 1.4662121327758432e-05, "loss": 1.5114, "step": 8090 }, { "epoch": 1.91, "learning_rate": 1.4605947377943818e-05, "loss": 1.5658, "step": 8100 }, { "epoch": 1.91, "learning_rate": 1.454983680129191e-05, "loss": 1.5323, "step": 8110 }, { "epoch": 1.91, "learning_rate": 1.4493789939912244e-05, "loss": 1.6191, "step": 8120 }, { "epoch": 1.92, "learning_rate": 1.4437807135525922e-05, "loss": 1.5712, "step": 8130 }, { "epoch": 1.92, "learning_rate": 1.438188872946345e-05, "loss": 1.5304, "step": 8140 }, { "epoch": 1.92, "learning_rate": 1.4326035062662707e-05, "loss": 1.5967, "step": 8150 }, { "epoch": 1.92, "learning_rate": 1.4270246475666846e-05, "loss": 1.5486, "step": 8160 }, { "epoch": 1.93, "learning_rate": 1.4214523308622243e-05, "loss": 1.6059, "step": 8170 }, { "epoch": 1.93, "learning_rate": 1.4158865901276385e-05, "loss": 1.593, "step": 8180 }, { "epoch": 1.93, "learning_rate": 1.410327459297583e-05, "loss": 1.5811, "step": 8190 }, { "epoch": 1.93, "learning_rate": 1.4047749722664116e-05, "loss": 1.5334, "step": 8200 }, { "epoch": 1.94, "learning_rate": 1.39922916288797e-05, "loss": 1.5781, "step": 8210 }, { "epoch": 1.94, "learning_rate": 1.3936900649753931e-05, "loss": 1.6089, "step": 8220 }, { "epoch": 1.94, "learning_rate": 1.3881577123008921e-05, "loss": 1.5119, "step": 8230 }, { "epoch": 1.94, "learning_rate": 1.3826321385955535e-05, "loss": 1.5515, "step": 8240 }, { "epoch": 1.95, "learning_rate": 1.3771133775491307e-05, "loss": 1.586, "step": 8250 }, { "epoch": 1.95, "learning_rate": 1.3716014628098431e-05, "loss": 1.6166, "step": 8260 }, { "epoch": 1.95, "learning_rate": 1.3660964279841647e-05, "loss": 1.5123, "step": 8270 }, { "epoch": 1.95, "learning_rate": 1.3605983066366234e-05, "loss": 1.5726, "step": 8280 }, { "epoch": 1.95, "learning_rate": 1.3551071322895936e-05, "loss": 1.5723, "step": 8290 }, { "epoch": 1.96, "learning_rate": 1.3496229384230974e-05, "loss": 1.5756, "step": 8300 }, { "epoch": 1.96, "learning_rate": 1.3441457584745928e-05, "loss": 1.5795, "step": 8310 }, { "epoch": 1.96, "learning_rate": 1.3386756258387744e-05, "loss": 1.5917, "step": 8320 }, { "epoch": 1.96, "learning_rate": 1.33321257386737e-05, "loss": 1.5951, "step": 8330 }, { "epoch": 1.97, "learning_rate": 1.3277566358689336e-05, "loss": 1.5424, "step": 8340 }, { "epoch": 1.97, "learning_rate": 1.3223078451086487e-05, "loss": 1.548, "step": 8350 }, { "epoch": 1.97, "learning_rate": 1.316866234808119e-05, "loss": 1.5404, "step": 8360 }, { "epoch": 1.97, "learning_rate": 1.3114318381451688e-05, "loss": 1.5472, "step": 8370 }, { "epoch": 1.98, "learning_rate": 1.3060046882536409e-05, "loss": 1.5692, "step": 8380 }, { "epoch": 1.98, "learning_rate": 1.3005848182231939e-05, "loss": 1.4966, "step": 8390 }, { "epoch": 1.98, "learning_rate": 1.2951722610990993e-05, "loss": 1.564, "step": 8400 }, { "epoch": 1.98, "learning_rate": 1.2897670498820455e-05, "loss": 1.5788, "step": 8410 }, { "epoch": 1.99, "learning_rate": 1.284369217527928e-05, "loss": 1.5353, "step": 8420 }, { "epoch": 1.99, "learning_rate": 1.2789787969476554e-05, "loss": 1.5966, "step": 8430 }, { "epoch": 1.99, "learning_rate": 1.2735958210069448e-05, "loss": 1.5634, "step": 8440 }, { "epoch": 1.99, "learning_rate": 1.268220322526123e-05, "loss": 1.5649, "step": 8450 }, { "epoch": 1.99, "learning_rate": 1.262852334279929e-05, "loss": 1.4958, "step": 8460 }, { "epoch": 2.0, "learning_rate": 1.257491888997308e-05, "loss": 1.5192, "step": 8470 }, { "epoch": 2.0, "learning_rate": 1.2521390193612165e-05, "loss": 1.5598, "step": 8480 }, { "epoch": 2.0, "learning_rate": 1.2467937580084225e-05, "loss": 1.5079, "step": 8490 }, { "epoch": 2.0, "learning_rate": 1.2414561375293038e-05, "loss": 1.514, "step": 8500 }, { "epoch": 2.01, "learning_rate": 1.236126190467655e-05, "loss": 1.5451, "step": 8510 }, { "epoch": 2.01, "learning_rate": 1.2308039493204823e-05, "loss": 1.5526, "step": 8520 }, { "epoch": 2.01, "learning_rate": 1.2254894465378094e-05, "loss": 1.4948, "step": 8530 }, { "epoch": 2.01, "learning_rate": 1.220182714522479e-05, "loss": 1.6119, "step": 8540 }, { "epoch": 2.02, "learning_rate": 1.2148837856299533e-05, "loss": 1.5818, "step": 8550 }, { "epoch": 2.02, "learning_rate": 1.2095926921681219e-05, "loss": 1.5446, "step": 8560 }, { "epoch": 2.02, "learning_rate": 1.2043094663970982e-05, "loss": 1.5348, "step": 8570 }, { "epoch": 2.02, "learning_rate": 1.1990341405290271e-05, "loss": 1.5595, "step": 8580 }, { "epoch": 2.03, "learning_rate": 1.193766746727886e-05, "loss": 1.5402, "step": 8590 }, { "epoch": 2.03, "learning_rate": 1.1885073171092926e-05, "loss": 1.5021, "step": 8600 }, { "epoch": 2.03, "learning_rate": 1.1832558837403043e-05, "loss": 1.5309, "step": 8610 }, { "epoch": 2.03, "learning_rate": 1.1780124786392258e-05, "loss": 1.6031, "step": 8620 }, { "epoch": 2.03, "learning_rate": 1.1727771337754112e-05, "loss": 1.6009, "step": 8630 }, { "epoch": 2.04, "learning_rate": 1.167549881069075e-05, "loss": 1.5555, "step": 8640 }, { "epoch": 2.04, "learning_rate": 1.162330752391089e-05, "loss": 1.5342, "step": 8650 }, { "epoch": 2.04, "learning_rate": 1.1571197795627941e-05, "loss": 1.5715, "step": 8660 }, { "epoch": 2.04, "learning_rate": 1.1519169943558042e-05, "loss": 1.5763, "step": 8670 }, { "epoch": 2.05, "learning_rate": 1.1467224284918141e-05, "loss": 1.5585, "step": 8680 }, { "epoch": 2.05, "learning_rate": 1.141536113642403e-05, "loss": 1.5248, "step": 8690 }, { "epoch": 2.05, "learning_rate": 1.1363580814288435e-05, "loss": 1.5985, "step": 8700 }, { "epoch": 2.05, "learning_rate": 1.1311883634219095e-05, "loss": 1.5718, "step": 8710 }, { "epoch": 2.06, "learning_rate": 1.1260269911416807e-05, "loss": 1.5899, "step": 8720 }, { "epoch": 2.06, "learning_rate": 1.1208739960573553e-05, "loss": 1.5258, "step": 8730 }, { "epoch": 2.06, "learning_rate": 1.1157294095870527e-05, "loss": 1.517, "step": 8740 }, { "epoch": 2.06, "learning_rate": 1.110593263097626e-05, "loss": 1.4968, "step": 8750 }, { "epoch": 2.07, "learning_rate": 1.105465587904467e-05, "loss": 1.538, "step": 8760 }, { "epoch": 2.07, "learning_rate": 1.100346415271321e-05, "loss": 1.5363, "step": 8770 }, { "epoch": 2.07, "learning_rate": 1.0952357764100906e-05, "loss": 1.5474, "step": 8780 }, { "epoch": 2.07, "learning_rate": 1.090133702480647e-05, "loss": 1.5999, "step": 8790 }, { "epoch": 2.07, "learning_rate": 1.0850402245906408e-05, "loss": 1.538, "step": 8800 }, { "epoch": 2.08, "learning_rate": 1.0799553737953136e-05, "loss": 1.5791, "step": 8810 }, { "epoch": 2.08, "learning_rate": 1.0748791810973052e-05, "loss": 1.6128, "step": 8820 }, { "epoch": 2.08, "learning_rate": 1.0698116774464676e-05, "loss": 1.5819, "step": 8830 }, { "epoch": 2.08, "learning_rate": 1.064752893739673e-05, "loss": 1.4816, "step": 8840 }, { "epoch": 2.09, "learning_rate": 1.059702860820632e-05, "loss": 1.5091, "step": 8850 }, { "epoch": 2.09, "learning_rate": 1.0546616094796968e-05, "loss": 1.5383, "step": 8860 }, { "epoch": 2.09, "learning_rate": 1.0496291704536798e-05, "loss": 1.5577, "step": 8870 }, { "epoch": 2.09, "learning_rate": 1.044605574425664e-05, "loss": 1.5483, "step": 8880 }, { "epoch": 2.1, "learning_rate": 1.0395908520248143e-05, "loss": 1.5387, "step": 8890 }, { "epoch": 2.1, "learning_rate": 1.0345850338261964e-05, "loss": 1.5891, "step": 8900 }, { "epoch": 2.1, "learning_rate": 1.0295881503505836e-05, "loss": 1.565, "step": 8910 }, { "epoch": 2.1, "learning_rate": 1.0246002320642742e-05, "loss": 1.5359, "step": 8920 }, { "epoch": 2.11, "learning_rate": 1.0196213093789042e-05, "loss": 1.5579, "step": 8930 }, { "epoch": 2.11, "learning_rate": 1.0146514126512663e-05, "loss": 1.563, "step": 8940 }, { "epoch": 2.11, "learning_rate": 1.0096905721831176e-05, "loss": 1.5762, "step": 8950 }, { "epoch": 2.11, "learning_rate": 1.004738818221001e-05, "loss": 1.4976, "step": 8960 }, { "epoch": 2.11, "learning_rate": 9.997961809560564e-06, "loss": 1.5758, "step": 8970 }, { "epoch": 2.12, "learning_rate": 9.948626905238415e-06, "loss": 1.5827, "step": 8980 }, { "epoch": 2.12, "learning_rate": 9.899383770041426e-06, "loss": 1.5686, "step": 8990 }, { "epoch": 2.12, "learning_rate": 9.850232704207951e-06, "loss": 1.5696, "step": 9000 }, { "epoch": 2.12, "eval_loss": 1.5784997940063477, "eval_runtime": 121.4622, "eval_samples_per_second": 11.287, "eval_steps_per_second": 2.824, "step": 9000 }, { "epoch": 2.12, "learning_rate": 9.801174007414978e-06, "loss": 1.5198, "step": 9010 }, { "epoch": 2.13, "learning_rate": 9.752207978776346e-06, "loss": 1.4989, "step": 9020 }, { "epoch": 2.13, "learning_rate": 9.703334916840856e-06, "loss": 1.5645, "step": 9030 }, { "epoch": 2.13, "learning_rate": 9.654555119590506e-06, "loss": 1.5655, "step": 9040 }, { "epoch": 2.13, "learning_rate": 9.605868884438645e-06, "loss": 1.5699, "step": 9050 }, { "epoch": 2.14, "learning_rate": 9.557276508228164e-06, "loss": 1.532, "step": 9060 }, { "epoch": 2.14, "learning_rate": 9.508778287229714e-06, "loss": 1.5158, "step": 9070 }, { "epoch": 2.14, "learning_rate": 9.460374517139848e-06, "loss": 1.5939, "step": 9080 }, { "epoch": 2.14, "learning_rate": 9.412065493079261e-06, "loss": 1.4778, "step": 9090 }, { "epoch": 2.15, "learning_rate": 9.363851509590962e-06, "loss": 1.5716, "step": 9100 }, { "epoch": 2.15, "learning_rate": 9.315732860638518e-06, "loss": 1.5349, "step": 9110 }, { "epoch": 2.15, "learning_rate": 9.267709839604217e-06, "loss": 1.5646, "step": 9120 }, { "epoch": 2.15, "learning_rate": 9.219782739287292e-06, "loss": 1.5573, "step": 9130 }, { "epoch": 2.16, "learning_rate": 9.171951851902149e-06, "loss": 1.5657, "step": 9140 }, { "epoch": 2.16, "learning_rate": 9.124217469076593e-06, "loss": 1.5415, "step": 9150 }, { "epoch": 2.16, "learning_rate": 9.076579881850011e-06, "loss": 1.5735, "step": 9160 }, { "epoch": 2.16, "learning_rate": 9.029039380671636e-06, "loss": 1.6238, "step": 9170 }, { "epoch": 2.16, "learning_rate": 8.981596255398756e-06, "loss": 1.612, "step": 9180 }, { "epoch": 2.17, "learning_rate": 8.934250795294943e-06, "loss": 1.4949, "step": 9190 }, { "epoch": 2.17, "learning_rate": 8.887003289028326e-06, "loss": 1.5107, "step": 9200 }, { "epoch": 2.17, "learning_rate": 8.839854024669781e-06, "loss": 1.534, "step": 9210 }, { "epoch": 2.17, "learning_rate": 8.792803289691199e-06, "loss": 1.538, "step": 9220 }, { "epoch": 2.18, "learning_rate": 8.745851370963737e-06, "loss": 1.5493, "step": 9230 }, { "epoch": 2.18, "learning_rate": 8.698998554756052e-06, "loss": 1.5706, "step": 9240 }, { "epoch": 2.18, "learning_rate": 8.652245126732595e-06, "loss": 1.5403, "step": 9250 }, { "epoch": 2.18, "learning_rate": 8.605591371951815e-06, "loss": 1.5141, "step": 9260 }, { "epoch": 2.19, "learning_rate": 8.559037574864453e-06, "loss": 1.59, "step": 9270 }, { "epoch": 2.19, "learning_rate": 8.512584019311806e-06, "loss": 1.5207, "step": 9280 }, { "epoch": 2.19, "learning_rate": 8.466230988523988e-06, "loss": 1.5303, "step": 9290 }, { "epoch": 2.19, "learning_rate": 8.419978765118206e-06, "loss": 1.5287, "step": 9300 }, { "epoch": 2.2, "learning_rate": 8.373827631097052e-06, "loss": 1.5204, "step": 9310 }, { "epoch": 2.2, "learning_rate": 8.327777867846758e-06, "loss": 1.5644, "step": 9320 }, { "epoch": 2.2, "learning_rate": 8.281829756135492e-06, "loss": 1.5745, "step": 9330 }, { "epoch": 2.2, "learning_rate": 8.23598357611165e-06, "loss": 1.5933, "step": 9340 }, { "epoch": 2.2, "learning_rate": 8.190239607302133e-06, "loss": 1.5505, "step": 9350 }, { "epoch": 2.21, "learning_rate": 8.144598128610684e-06, "loss": 1.5541, "step": 9360 }, { "epoch": 2.21, "learning_rate": 8.099059418316126e-06, "loss": 1.6338, "step": 9370 }, { "epoch": 2.21, "learning_rate": 8.053623754070714e-06, "loss": 1.5897, "step": 9380 }, { "epoch": 2.21, "learning_rate": 8.008291412898414e-06, "loss": 1.5704, "step": 9390 }, { "epoch": 2.22, "learning_rate": 7.963062671193225e-06, "loss": 1.5133, "step": 9400 }, { "epoch": 2.22, "learning_rate": 7.917937804717521e-06, "loss": 1.6135, "step": 9410 }, { "epoch": 2.22, "learning_rate": 7.872917088600307e-06, "loss": 1.4678, "step": 9420 }, { "epoch": 2.22, "learning_rate": 7.828000797335593e-06, "loss": 1.5418, "step": 9430 }, { "epoch": 2.23, "learning_rate": 7.783189204780696e-06, "loss": 1.6363, "step": 9440 }, { "epoch": 2.23, "learning_rate": 7.738482584154601e-06, "loss": 1.5124, "step": 9450 }, { "epoch": 2.23, "learning_rate": 7.693881208036253e-06, "loss": 1.5569, "step": 9460 }, { "epoch": 2.23, "learning_rate": 7.649385348362912e-06, "loss": 1.567, "step": 9470 }, { "epoch": 2.24, "learning_rate": 7.604995276428501e-06, "loss": 1.5967, "step": 9480 }, { "epoch": 2.24, "learning_rate": 7.560711262881967e-06, "loss": 1.5462, "step": 9490 }, { "epoch": 2.24, "learning_rate": 7.516533577725593e-06, "loss": 1.5963, "step": 9500 }, { "epoch": 2.24, "learning_rate": 7.472462490313379e-06, "loss": 1.5272, "step": 9510 }, { "epoch": 2.24, "learning_rate": 7.428498269349376e-06, "loss": 1.6033, "step": 9520 }, { "epoch": 2.25, "learning_rate": 7.384641182886098e-06, "loss": 1.5305, "step": 9530 }, { "epoch": 2.25, "learning_rate": 7.340891498322824e-06, "loss": 1.5018, "step": 9540 }, { "epoch": 2.25, "learning_rate": 7.297249482404009e-06, "loss": 1.5483, "step": 9550 }, { "epoch": 2.25, "learning_rate": 7.2537154012176425e-06, "loss": 1.5751, "step": 9560 }, { "epoch": 2.26, "learning_rate": 7.210289520193619e-06, "loss": 1.5893, "step": 9570 }, { "epoch": 2.26, "learning_rate": 7.166972104102163e-06, "loss": 1.5296, "step": 9580 }, { "epoch": 2.26, "learning_rate": 7.123763417052151e-06, "loss": 1.4995, "step": 9590 }, { "epoch": 2.26, "learning_rate": 7.080663722489536e-06, "loss": 1.5799, "step": 9600 }, { "epoch": 2.27, "learning_rate": 7.037673283195742e-06, "loss": 1.596, "step": 9610 }, { "epoch": 2.27, "learning_rate": 6.99479236128607e-06, "loss": 1.4874, "step": 9620 }, { "epoch": 2.27, "learning_rate": 6.952021218208069e-06, "loss": 1.6123, "step": 9630 }, { "epoch": 2.27, "learning_rate": 6.909360114739963e-06, "loss": 1.5308, "step": 9640 }, { "epoch": 2.28, "learning_rate": 6.866809310989053e-06, "loss": 1.5896, "step": 9650 }, { "epoch": 2.28, "learning_rate": 6.824369066390157e-06, "loss": 1.5295, "step": 9660 }, { "epoch": 2.28, "learning_rate": 6.782039639703991e-06, "loss": 1.5922, "step": 9670 }, { "epoch": 2.28, "learning_rate": 6.739821289015607e-06, "loss": 1.5989, "step": 9680 }, { "epoch": 2.28, "learning_rate": 6.6977142717328165e-06, "loss": 1.5635, "step": 9690 }, { "epoch": 2.29, "learning_rate": 6.6557188445846465e-06, "loss": 1.5945, "step": 9700 }, { "epoch": 2.29, "learning_rate": 6.613835263619727e-06, "loss": 1.5264, "step": 9710 }, { "epoch": 2.29, "learning_rate": 6.572063784204769e-06, "loss": 1.5457, "step": 9720 }, { "epoch": 2.29, "learning_rate": 6.530404661022984e-06, "loss": 1.549, "step": 9730 }, { "epoch": 2.3, "learning_rate": 6.488858148072547e-06, "loss": 1.6442, "step": 9740 }, { "epoch": 2.3, "learning_rate": 6.44742449866505e-06, "loss": 1.5169, "step": 9750 }, { "epoch": 2.3, "learning_rate": 6.406103965423932e-06, "loss": 1.6114, "step": 9760 }, { "epoch": 2.3, "learning_rate": 6.364896800282968e-06, "loss": 1.519, "step": 9770 }, { "epoch": 2.31, "learning_rate": 6.323803254484712e-06, "loss": 1.5455, "step": 9780 }, { "epoch": 2.31, "learning_rate": 6.282823578578986e-06, "loss": 1.587, "step": 9790 }, { "epoch": 2.31, "learning_rate": 6.241958022421332e-06, "loss": 1.5676, "step": 9800 }, { "epoch": 2.31, "learning_rate": 6.201206835171497e-06, "loss": 1.5286, "step": 9810 }, { "epoch": 2.32, "learning_rate": 6.1605702652919095e-06, "loss": 1.5097, "step": 9820 }, { "epoch": 2.32, "learning_rate": 6.12004856054619e-06, "loss": 1.5699, "step": 9830 }, { "epoch": 2.32, "learning_rate": 6.079641967997596e-06, "loss": 1.5225, "step": 9840 }, { "epoch": 2.32, "learning_rate": 6.039350734007546e-06, "loss": 1.5637, "step": 9850 }, { "epoch": 2.32, "learning_rate": 5.9991751042341085e-06, "loss": 1.5933, "step": 9860 }, { "epoch": 2.33, "learning_rate": 5.959115323630521e-06, "loss": 1.6083, "step": 9870 }, { "epoch": 2.33, "learning_rate": 5.919171636443663e-06, "loss": 1.5347, "step": 9880 }, { "epoch": 2.33, "learning_rate": 5.879344286212596e-06, "loss": 1.5161, "step": 9890 }, { "epoch": 2.33, "learning_rate": 5.8396335157670625e-06, "loss": 1.5147, "step": 9900 }, { "epoch": 2.34, "learning_rate": 5.800039567226004e-06, "loss": 1.4942, "step": 9910 }, { "epoch": 2.34, "learning_rate": 5.760562681996121e-06, "loss": 1.5687, "step": 9920 }, { "epoch": 2.34, "learning_rate": 5.721203100770339e-06, "loss": 1.5276, "step": 9930 }, { "epoch": 2.34, "learning_rate": 5.681961063526392e-06, "loss": 1.5139, "step": 9940 }, { "epoch": 2.35, "learning_rate": 5.6428368095253286e-06, "loss": 1.6345, "step": 9950 }, { "epoch": 2.35, "learning_rate": 5.603830577310084e-06, "loss": 1.5481, "step": 9960 }, { "epoch": 2.35, "learning_rate": 5.564942604703996e-06, "loss": 1.5523, "step": 9970 }, { "epoch": 2.35, "learning_rate": 5.526173128809362e-06, "loss": 1.5385, "step": 9980 }, { "epoch": 2.36, "learning_rate": 5.487522386006e-06, "loss": 1.5857, "step": 9990 }, { "epoch": 2.36, "learning_rate": 5.448990611949823e-06, "loss": 1.5659, "step": 10000 }, { "epoch": 2.36, "eval_loss": 1.5774571895599365, "eval_runtime": 120.2987, "eval_samples_per_second": 11.397, "eval_steps_per_second": 2.851, "step": 10000 }, { "epoch": 2.36, "learning_rate": 5.41057804157136e-06, "loss": 1.5826, "step": 10010 }, { "epoch": 2.36, "learning_rate": 5.372284909074362e-06, "loss": 1.5019, "step": 10020 }, { "epoch": 2.36, "learning_rate": 5.334111447934348e-06, "loss": 1.5691, "step": 10030 }, { "epoch": 2.37, "learning_rate": 5.296057890897213e-06, "loss": 1.524, "step": 10040 }, { "epoch": 2.37, "learning_rate": 5.258124469977776e-06, "loss": 1.5359, "step": 10050 }, { "epoch": 2.37, "learning_rate": 5.220311416458376e-06, "loss": 1.631, "step": 10060 }, { "epoch": 2.37, "learning_rate": 5.182618960887476e-06, "loss": 1.5473, "step": 10070 }, { "epoch": 2.38, "learning_rate": 5.145047333078235e-06, "loss": 1.5423, "step": 10080 }, { "epoch": 2.38, "learning_rate": 5.1075967621071166e-06, "loss": 1.5166, "step": 10090 }, { "epoch": 2.38, "learning_rate": 5.070267476312515e-06, "loss": 1.6095, "step": 10100 }, { "epoch": 2.38, "learning_rate": 5.033059703293319e-06, "loss": 1.552, "step": 10110 }, { "epoch": 2.39, "learning_rate": 4.995973669907553e-06, "loss": 1.5961, "step": 10120 }, { "epoch": 2.39, "learning_rate": 4.959009602270989e-06, "loss": 1.548, "step": 10130 }, { "epoch": 2.39, "learning_rate": 4.922167725755761e-06, "loss": 1.6018, "step": 10140 }, { "epoch": 2.39, "learning_rate": 4.885448264989015e-06, "loss": 1.4976, "step": 10150 }, { "epoch": 2.4, "learning_rate": 4.8488514438514955e-06, "loss": 1.5935, "step": 10160 }, { "epoch": 2.4, "learning_rate": 4.812377485476224e-06, "loss": 1.5797, "step": 10170 }, { "epoch": 2.4, "learning_rate": 4.776026612247108e-06, "loss": 1.5587, "step": 10180 }, { "epoch": 2.4, "learning_rate": 4.739799045797611e-06, "loss": 1.5267, "step": 10190 }, { "epoch": 2.4, "learning_rate": 4.7036950070093645e-06, "loss": 1.5398, "step": 10200 }, { "epoch": 2.41, "learning_rate": 4.667714716010882e-06, "loss": 1.6229, "step": 10210 }, { "epoch": 2.41, "learning_rate": 4.631858392176142e-06, "loss": 1.5529, "step": 10220 }, { "epoch": 2.41, "learning_rate": 4.596126254123309e-06, "loss": 1.6194, "step": 10230 }, { "epoch": 2.41, "learning_rate": 4.560518519713372e-06, "loss": 1.5792, "step": 10240 }, { "epoch": 2.42, "learning_rate": 4.525035406048819e-06, "loss": 1.5083, "step": 10250 }, { "epoch": 2.42, "learning_rate": 4.4896771294723334e-06, "loss": 1.5629, "step": 10260 }, { "epoch": 2.42, "learning_rate": 4.4544439055654474e-06, "loss": 1.464, "step": 10270 }, { "epoch": 2.42, "learning_rate": 4.419335949147241e-06, "loss": 1.5578, "step": 10280 }, { "epoch": 2.43, "learning_rate": 4.384353474273023e-06, "loss": 1.5438, "step": 10290 }, { "epoch": 2.43, "learning_rate": 4.349496694233057e-06, "loss": 1.5336, "step": 10300 }, { "epoch": 2.43, "learning_rate": 4.3147658215512196e-06, "loss": 1.5478, "step": 10310 }, { "epoch": 2.43, "learning_rate": 4.280161067983721e-06, "loss": 1.5084, "step": 10320 }, { "epoch": 2.44, "learning_rate": 4.245682644517815e-06, "loss": 1.6072, "step": 10330 }, { "epoch": 2.44, "learning_rate": 4.211330761370533e-06, "loss": 1.5532, "step": 10340 }, { "epoch": 2.44, "learning_rate": 4.177105627987363e-06, "loss": 1.5815, "step": 10350 }, { "epoch": 2.44, "learning_rate": 4.143007453040995e-06, "loss": 1.5864, "step": 10360 }, { "epoch": 2.45, "learning_rate": 4.109036444430045e-06, "loss": 1.4932, "step": 10370 }, { "epoch": 2.45, "learning_rate": 4.075192809277803e-06, "loss": 1.4614, "step": 10380 }, { "epoch": 2.45, "learning_rate": 4.041476753930937e-06, "loss": 1.5828, "step": 10390 }, { "epoch": 2.45, "learning_rate": 4.007888483958258e-06, "loss": 1.5167, "step": 10400 }, { "epoch": 2.45, "learning_rate": 3.974428204149469e-06, "loss": 1.5448, "step": 10410 }, { "epoch": 2.46, "learning_rate": 3.941096118513893e-06, "loss": 1.5393, "step": 10420 }, { "epoch": 2.46, "learning_rate": 3.907892430279272e-06, "loss": 1.5387, "step": 10430 }, { "epoch": 2.46, "learning_rate": 3.87481734189048e-06, "loss": 1.5227, "step": 10440 }, { "epoch": 2.46, "learning_rate": 3.841871055008317e-06, "loss": 1.6027, "step": 10450 }, { "epoch": 2.47, "learning_rate": 3.809053770508261e-06, "loss": 1.6111, "step": 10460 }, { "epoch": 2.47, "learning_rate": 3.776365688479283e-06, "loss": 1.5452, "step": 10470 }, { "epoch": 2.47, "learning_rate": 3.743807008222572e-06, "loss": 1.5782, "step": 10480 }, { "epoch": 2.47, "learning_rate": 3.7113779282503564e-06, "loss": 1.5405, "step": 10490 }, { "epoch": 2.48, "learning_rate": 3.6790786462846783e-06, "loss": 1.6353, "step": 10500 }, { "epoch": 2.48, "learning_rate": 3.6469093592562066e-06, "loss": 1.4493, "step": 10510 }, { "epoch": 2.48, "learning_rate": 3.6148702633030135e-06, "loss": 1.6325, "step": 10520 }, { "epoch": 2.48, "learning_rate": 3.582961553769387e-06, "loss": 1.5489, "step": 10530 }, { "epoch": 2.49, "learning_rate": 3.5511834252046435e-06, "loss": 1.6267, "step": 10540 }, { "epoch": 2.49, "learning_rate": 3.5195360713619452e-06, "loss": 1.5385, "step": 10550 }, { "epoch": 2.49, "learning_rate": 3.4880196851971055e-06, "loss": 1.5823, "step": 10560 }, { "epoch": 2.49, "learning_rate": 3.4566344588674248e-06, "loss": 1.5976, "step": 10570 }, { "epoch": 2.49, "learning_rate": 3.425380583730506e-06, "loss": 1.5544, "step": 10580 }, { "epoch": 2.5, "learning_rate": 3.394258250343102e-06, "loss": 1.5691, "step": 10590 }, { "epoch": 2.5, "learning_rate": 3.363267648459956e-06, "loss": 1.4777, "step": 10600 }, { "epoch": 2.5, "learning_rate": 3.3324089670326185e-06, "loss": 1.5837, "step": 10610 }, { "epoch": 2.5, "learning_rate": 3.3016823942083303e-06, "loss": 1.5463, "step": 10620 }, { "epoch": 2.51, "learning_rate": 3.2710881173288384e-06, "loss": 1.5895, "step": 10630 }, { "epoch": 2.51, "learning_rate": 3.2406263229292992e-06, "loss": 1.5495, "step": 10640 }, { "epoch": 2.51, "learning_rate": 3.2102971967370944e-06, "loss": 1.6608, "step": 10650 }, { "epoch": 2.51, "learning_rate": 3.1801009236707285e-06, "loss": 1.5773, "step": 10660 }, { "epoch": 2.52, "learning_rate": 3.1500376878386832e-06, "loss": 1.5229, "step": 10670 }, { "epoch": 2.52, "learning_rate": 3.120107672538325e-06, "loss": 1.5475, "step": 10680 }, { "epoch": 2.52, "learning_rate": 3.090311060254747e-06, "loss": 1.5161, "step": 10690 }, { "epoch": 2.52, "learning_rate": 3.0606480326596825e-06, "loss": 1.5614, "step": 10700 }, { "epoch": 2.53, "learning_rate": 3.031118770610386e-06, "loss": 1.5324, "step": 10710 }, { "epoch": 2.53, "learning_rate": 3.0017234541485503e-06, "loss": 1.6035, "step": 10720 }, { "epoch": 2.53, "learning_rate": 2.9724622624991815e-06, "loss": 1.5388, "step": 10730 }, { "epoch": 2.53, "learning_rate": 2.94333537406952e-06, "loss": 1.5093, "step": 10740 }, { "epoch": 2.53, "learning_rate": 2.9143429664479525e-06, "loss": 1.5894, "step": 10750 }, { "epoch": 2.54, "learning_rate": 2.8854852164029225e-06, "loss": 1.5607, "step": 10760 }, { "epoch": 2.54, "learning_rate": 2.8567622998818765e-06, "loss": 1.5648, "step": 10770 }, { "epoch": 2.54, "learning_rate": 2.8281743920101523e-06, "loss": 1.565, "step": 10780 }, { "epoch": 2.54, "learning_rate": 2.799721667089944e-06, "loss": 1.5372, "step": 10790 }, { "epoch": 2.55, "learning_rate": 2.7714042985992144e-06, "loss": 1.5587, "step": 10800 }, { "epoch": 2.55, "learning_rate": 2.7432224591906698e-06, "loss": 1.5361, "step": 10810 }, { "epoch": 2.55, "learning_rate": 2.715176320690674e-06, "loss": 1.5489, "step": 10820 }, { "epoch": 2.55, "learning_rate": 2.687266054098217e-06, "loss": 1.5527, "step": 10830 }, { "epoch": 2.56, "learning_rate": 2.65949182958386e-06, "loss": 1.5471, "step": 10840 }, { "epoch": 2.56, "learning_rate": 2.6318538164887303e-06, "loss": 1.528, "step": 10850 }, { "epoch": 2.56, "learning_rate": 2.604352183323447e-06, "loss": 1.5848, "step": 10860 }, { "epoch": 2.56, "learning_rate": 2.576987097767117e-06, "loss": 1.5875, "step": 10870 }, { "epoch": 2.57, "learning_rate": 2.549758726666307e-06, "loss": 1.5908, "step": 10880 }, { "epoch": 2.57, "learning_rate": 2.5226672360340373e-06, "loss": 1.5164, "step": 10890 }, { "epoch": 2.57, "learning_rate": 2.4984020638028378e-06, "loss": 1.54, "step": 10900 }, { "epoch": 2.57, "learning_rate": 2.4715711004368204e-06, "loss": 1.4219, "step": 10910 }, { "epoch": 2.57, "learning_rate": 2.4448774942539832e-06, "loss": 1.527, "step": 10920 }, { "epoch": 2.58, "learning_rate": 2.418321408006857e-06, "loss": 1.5304, "step": 10930 }, { "epoch": 2.58, "learning_rate": 2.391903003609486e-06, "loss": 1.5367, "step": 10940 }, { "epoch": 2.58, "learning_rate": 2.3656224421364724e-06, "loss": 1.5717, "step": 10950 }, { "epoch": 2.58, "learning_rate": 2.339479883821968e-06, "loss": 1.5522, "step": 10960 }, { "epoch": 2.59, "learning_rate": 2.3134754880587307e-06, "loss": 1.5375, "step": 10970 }, { "epoch": 2.59, "learning_rate": 2.2876094133971154e-06, "loss": 1.5232, "step": 10980 }, { "epoch": 2.59, "learning_rate": 2.261881817544137e-06, "loss": 1.5888, "step": 10990 }, { "epoch": 2.59, "learning_rate": 2.2362928573624877e-06, "loss": 1.5925, "step": 11000 }, { "epoch": 2.59, "eval_loss": 1.576684594154358, "eval_runtime": 121.7819, "eval_samples_per_second": 11.258, "eval_steps_per_second": 2.817, "step": 11000 }, { "epoch": 2.6, "learning_rate": 2.210842688869591e-06, "loss": 1.6087, "step": 11010 }, { "epoch": 2.6, "learning_rate": 2.1855314672366568e-06, "loss": 1.5736, "step": 11020 }, { "epoch": 2.6, "learning_rate": 2.1603593467877243e-06, "loss": 1.5425, "step": 11030 }, { "epoch": 2.6, "learning_rate": 2.135326480998717e-06, "loss": 1.5811, "step": 11040 }, { "epoch": 2.61, "learning_rate": 2.1104330224965247e-06, "loss": 1.5116, "step": 11050 }, { "epoch": 2.61, "learning_rate": 2.0856791230580484e-06, "loss": 1.5301, "step": 11060 }, { "epoch": 2.61, "learning_rate": 2.0610649336093134e-06, "loss": 1.5275, "step": 11070 }, { "epoch": 2.61, "learning_rate": 2.036590604224503e-06, "loss": 1.5363, "step": 11080 }, { "epoch": 2.61, "learning_rate": 2.012256284125072e-06, "loss": 1.4919, "step": 11090 }, { "epoch": 2.62, "learning_rate": 1.9880621216788298e-06, "loss": 1.627, "step": 11100 }, { "epoch": 2.62, "learning_rate": 1.9640082643990394e-06, "loss": 1.5766, "step": 11110 }, { "epoch": 2.62, "learning_rate": 1.9400948589435088e-06, "loss": 1.5139, "step": 11120 }, { "epoch": 2.62, "learning_rate": 1.9163220511137114e-06, "loss": 1.5513, "step": 11130 }, { "epoch": 2.63, "learning_rate": 1.8926899858538794e-06, "loss": 1.5805, "step": 11140 }, { "epoch": 2.63, "learning_rate": 1.8691988072501359e-06, "loss": 1.5263, "step": 11150 }, { "epoch": 2.63, "learning_rate": 1.845848658529606e-06, "loss": 1.528, "step": 11160 }, { "epoch": 2.63, "learning_rate": 1.8226396820595431e-06, "loss": 1.5566, "step": 11170 }, { "epoch": 2.64, "learning_rate": 1.7995720193464766e-06, "loss": 1.5577, "step": 11180 }, { "epoch": 2.64, "learning_rate": 1.7766458110353297e-06, "loss": 1.5529, "step": 11190 }, { "epoch": 2.64, "learning_rate": 1.753861196908571e-06, "loss": 1.5468, "step": 11200 }, { "epoch": 2.64, "learning_rate": 1.7312183158853524e-06, "loss": 1.568, "step": 11210 }, { "epoch": 2.65, "learning_rate": 1.7087173060206879e-06, "loss": 1.5648, "step": 11220 }, { "epoch": 2.65, "learning_rate": 1.6863583045045816e-06, "loss": 1.5755, "step": 11230 }, { "epoch": 2.65, "learning_rate": 1.6641414476612077e-06, "loss": 1.5526, "step": 11240 }, { "epoch": 2.65, "learning_rate": 1.642066870948078e-06, "loss": 1.5661, "step": 11250 }, { "epoch": 2.65, "learning_rate": 1.6201347089552038e-06, "loss": 1.5303, "step": 11260 }, { "epoch": 2.66, "learning_rate": 1.598345095404305e-06, "loss": 1.5469, "step": 11270 }, { "epoch": 2.66, "learning_rate": 1.576698163147955e-06, "loss": 1.5015, "step": 11280 }, { "epoch": 2.66, "learning_rate": 1.5551940441688034e-06, "loss": 1.6044, "step": 11290 }, { "epoch": 2.66, "learning_rate": 1.5338328695787496e-06, "loss": 1.4745, "step": 11300 }, { "epoch": 2.67, "learning_rate": 1.512614769618162e-06, "loss": 1.5845, "step": 11310 }, { "epoch": 2.67, "learning_rate": 1.491539873655068e-06, "loss": 1.5262, "step": 11320 }, { "epoch": 2.67, "learning_rate": 1.4706083101843737e-06, "loss": 1.5452, "step": 11330 }, { "epoch": 2.67, "learning_rate": 1.449820206827071e-06, "loss": 1.4844, "step": 11340 }, { "epoch": 2.68, "learning_rate": 1.4291756903294845e-06, "loss": 1.5662, "step": 11350 }, { "epoch": 2.68, "learning_rate": 1.4086748865624666e-06, "loss": 1.5769, "step": 11360 }, { "epoch": 2.68, "learning_rate": 1.3883179205206459e-06, "loss": 1.5101, "step": 11370 }, { "epoch": 2.68, "learning_rate": 1.3681049163216664e-06, "loss": 1.6027, "step": 11380 }, { "epoch": 2.69, "learning_rate": 1.3480359972054325e-06, "loss": 1.5928, "step": 11390 }, { "epoch": 2.69, "learning_rate": 1.3281112855333428e-06, "loss": 1.4951, "step": 11400 }, { "epoch": 2.69, "learning_rate": 1.3083309027875663e-06, "loss": 1.5245, "step": 11410 }, { "epoch": 2.69, "learning_rate": 1.2886949695702782e-06, "loss": 1.5047, "step": 11420 }, { "epoch": 2.69, "learning_rate": 1.269203605602942e-06, "loss": 1.5346, "step": 11430 }, { "epoch": 2.7, "learning_rate": 1.249856929725579e-06, "loss": 1.4552, "step": 11440 }, { "epoch": 2.7, "learning_rate": 1.2306550598960298e-06, "loss": 1.6088, "step": 11450 }, { "epoch": 2.7, "learning_rate": 1.2115981131892469e-06, "loss": 1.5466, "step": 11460 }, { "epoch": 2.7, "learning_rate": 1.1926862057965755e-06, "loss": 1.5386, "step": 11470 }, { "epoch": 2.71, "learning_rate": 1.1739194530250574e-06, "loss": 1.6025, "step": 11480 }, { "epoch": 2.71, "learning_rate": 1.1552979692967064e-06, "loss": 1.5199, "step": 11490 }, { "epoch": 2.71, "learning_rate": 1.1368218681478276e-06, "loss": 1.5507, "step": 11500 }, { "epoch": 2.71, "learning_rate": 1.1184912622283133e-06, "loss": 1.5485, "step": 11510 }, { "epoch": 2.72, "learning_rate": 1.1003062633009765e-06, "loss": 1.5408, "step": 11520 }, { "epoch": 2.72, "learning_rate": 1.0822669822408427e-06, "loss": 1.5907, "step": 11530 }, { "epoch": 2.72, "learning_rate": 1.064373529034493e-06, "loss": 1.581, "step": 11540 }, { "epoch": 2.72, "learning_rate": 1.0466260127793808e-06, "loss": 1.5346, "step": 11550 }, { "epoch": 2.73, "learning_rate": 1.0290245416831823e-06, "loss": 1.5924, "step": 11560 }, { "epoch": 2.73, "learning_rate": 1.0115692230631245e-06, "loss": 1.4831, "step": 11570 }, { "epoch": 2.73, "learning_rate": 9.942601633453313e-07, "loss": 1.5182, "step": 11580 }, { "epoch": 2.73, "learning_rate": 9.77097468064178e-07, "loss": 1.5762, "step": 11590 }, { "epoch": 2.74, "learning_rate": 9.600812418616434e-07, "loss": 1.5347, "step": 11600 }, { "epoch": 2.74, "learning_rate": 9.432115884866865e-07, "loss": 1.5561, "step": 11610 }, { "epoch": 2.74, "learning_rate": 9.264886107945986e-07, "loss": 1.5222, "step": 11620 }, { "epoch": 2.74, "learning_rate": 9.099124107463718e-07, "loss": 1.5198, "step": 11630 }, { "epoch": 2.74, "learning_rate": 8.934830894080897e-07, "loss": 1.5235, "step": 11640 }, { "epoch": 2.75, "learning_rate": 8.772007469503241e-07, "loss": 1.547, "step": 11650 }, { "epoch": 2.75, "learning_rate": 8.610654826474828e-07, "loss": 1.5714, "step": 11660 }, { "epoch": 2.75, "learning_rate": 8.450773948772445e-07, "loss": 1.5183, "step": 11670 }, { "epoch": 2.75, "learning_rate": 8.292365811199381e-07, "loss": 1.5648, "step": 11680 }, { "epoch": 2.76, "learning_rate": 8.135431379579589e-07, "loss": 1.6074, "step": 11690 }, { "epoch": 2.76, "learning_rate": 7.979971610751701e-07, "loss": 1.6357, "step": 11700 }, { "epoch": 2.76, "learning_rate": 7.825987452563271e-07, "loss": 1.584, "step": 11710 }, { "epoch": 2.76, "learning_rate": 7.673479843864933e-07, "loss": 1.5438, "step": 11720 }, { "epoch": 2.77, "learning_rate": 7.522449714504748e-07, "loss": 1.5387, "step": 11730 }, { "epoch": 2.77, "learning_rate": 7.37289798532248e-07, "loss": 1.5856, "step": 11740 }, { "epoch": 2.77, "learning_rate": 7.224825568143967e-07, "loss": 1.5303, "step": 11750 }, { "epoch": 2.77, "learning_rate": 7.078233365775677e-07, "loss": 1.5315, "step": 11760 }, { "epoch": 2.78, "learning_rate": 6.933122271998993e-07, "loss": 1.5769, "step": 11770 }, { "epoch": 2.78, "learning_rate": 6.789493171565048e-07, "loss": 1.5182, "step": 11780 }, { "epoch": 2.78, "learning_rate": 6.647346940189037e-07, "loss": 1.5813, "step": 11790 }, { "epoch": 2.78, "learning_rate": 6.50668444454508e-07, "loss": 1.5716, "step": 11800 }, { "epoch": 2.78, "learning_rate": 6.367506542260842e-07, "loss": 1.5617, "step": 11810 }, { "epoch": 2.79, "learning_rate": 6.229814081912366e-07, "loss": 1.6197, "step": 11820 }, { "epoch": 2.79, "learning_rate": 6.093607903018828e-07, "loss": 1.5667, "step": 11830 }, { "epoch": 2.79, "learning_rate": 5.958888836037513e-07, "loss": 1.5824, "step": 11840 }, { "epoch": 2.79, "learning_rate": 5.825657702358572e-07, "loss": 1.5595, "step": 11850 }, { "epoch": 2.8, "learning_rate": 5.69391531430033e-07, "loss": 1.5121, "step": 11860 }, { "epoch": 2.8, "learning_rate": 5.563662475103982e-07, "loss": 1.4909, "step": 11870 }, { "epoch": 2.8, "learning_rate": 5.434899978928904e-07, "loss": 1.5018, "step": 11880 }, { "epoch": 2.8, "learning_rate": 5.307628610847798e-07, "loss": 1.5611, "step": 11890 }, { "epoch": 2.81, "learning_rate": 5.18184914684175e-07, "loss": 1.5067, "step": 11900 }, { "epoch": 2.81, "learning_rate": 5.057562353795813e-07, "loss": 1.5029, "step": 11910 }, { "epoch": 2.81, "learning_rate": 4.934768989493938e-07, "loss": 1.6069, "step": 11920 }, { "epoch": 2.81, "learning_rate": 4.813469802614684e-07, "loss": 1.5995, "step": 11930 }, { "epoch": 2.82, "learning_rate": 4.69366553272646e-07, "loss": 1.5461, "step": 11940 }, { "epoch": 2.82, "learning_rate": 4.5753569102831016e-07, "loss": 1.5528, "step": 11950 }, { "epoch": 2.82, "learning_rate": 4.4585446566193236e-07, "loss": 1.6161, "step": 11960 }, { "epoch": 2.82, "learning_rate": 4.343229483946526e-07, "loss": 1.6181, "step": 11970 }, { "epoch": 2.82, "learning_rate": 4.2294120953482173e-07, "loss": 1.5454, "step": 11980 }, { "epoch": 2.83, "learning_rate": 4.117093184775822e-07, "loss": 1.524, "step": 11990 }, { "epoch": 2.83, "learning_rate": 4.006273437044489e-07, "loss": 1.5892, "step": 12000 }, { "epoch": 2.83, "eval_loss": 1.5761640071868896, "eval_runtime": 120.7457, "eval_samples_per_second": 11.354, "eval_steps_per_second": 2.841, "step": 12000 } ], "max_steps": 12723, "num_train_epochs": 3, "total_flos": 4.293883720613929e+18, "trial_name": null, "trial_params": null }