{ "best_metric": 1.4905033111572266, "best_model_checkpoint": "./checkpoints/mbarthez-davide_articles-copy_enhanced/checkpoint-100656", "epoch": 3.0, "global_step": 100656, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9971685741535528e-05, "loss": 2.9916, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.9941881258941343e-05, "loss": 2.4185, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.9912374821173107e-05, "loss": 2.336, "step": 300 }, { "epoch": 0.01, "learning_rate": 2.9882570338578922e-05, "loss": 2.2868, "step": 400 }, { "epoch": 0.01, "learning_rate": 2.985276585598474e-05, "loss": 2.2529, "step": 500 }, { "epoch": 0.02, "learning_rate": 2.98232594182165e-05, "loss": 2.2467, "step": 600 }, { "epoch": 0.02, "learning_rate": 2.9793454935622317e-05, "loss": 2.1735, "step": 700 }, { "epoch": 0.02, "learning_rate": 2.9763650453028136e-05, "loss": 2.2049, "step": 800 }, { "epoch": 0.03, "learning_rate": 2.9734442060085837e-05, "loss": 2.1168, "step": 900 }, { "epoch": 0.03, "learning_rate": 2.9704637577491656e-05, "loss": 2.1353, "step": 1000 }, { "epoch": 0.03, "learning_rate": 2.9674833094897475e-05, "loss": 2.0458, "step": 1100 }, { "epoch": 0.04, "learning_rate": 2.964502861230329e-05, "loss": 2.1209, "step": 1200 }, { "epoch": 0.04, "learning_rate": 2.961552217453505e-05, "loss": 2.0867, "step": 1300 }, { "epoch": 0.04, "learning_rate": 2.958571769194087e-05, "loss": 2.028, "step": 1400 }, { "epoch": 0.04, "learning_rate": 2.9555913209346685e-05, "loss": 2.0236, "step": 1500 }, { "epoch": 0.05, "learning_rate": 2.9526108726752504e-05, "loss": 2.1211, "step": 1600 }, { "epoch": 0.05, "learning_rate": 2.9496602288984264e-05, "loss": 2.0742, "step": 1700 }, { "epoch": 0.05, "learning_rate": 2.9466797806390083e-05, "loss": 2.0753, "step": 1800 }, { "epoch": 0.06, "learning_rate": 2.94369933237959e-05, "loss": 2.0278, "step": 1900 }, { "epoch": 0.06, "learning_rate": 2.9407188841201717e-05, "loss": 2.0408, "step": 2000 }, { "epoch": 0.06, "learning_rate": 2.9377384358607533e-05, "loss": 2.0124, "step": 2100 }, { "epoch": 0.07, "learning_rate": 2.9347579876013352e-05, "loss": 1.9951, "step": 2200 }, { "epoch": 0.07, "learning_rate": 2.931777539341917e-05, "loss": 1.9751, "step": 2300 }, { "epoch": 0.07, "learning_rate": 2.928826895565093e-05, "loss": 2.0204, "step": 2400 }, { "epoch": 0.07, "learning_rate": 2.925846447305675e-05, "loss": 1.9919, "step": 2500 }, { "epoch": 0.08, "learning_rate": 2.9228659990462566e-05, "loss": 2.0126, "step": 2600 }, { "epoch": 0.08, "learning_rate": 2.919885550786838e-05, "loss": 2.0276, "step": 2700 }, { "epoch": 0.08, "learning_rate": 2.91690510252742e-05, "loss": 2.0272, "step": 2800 }, { "epoch": 0.09, "learning_rate": 2.913924654268002e-05, "loss": 1.9608, "step": 2900 }, { "epoch": 0.09, "learning_rate": 2.910944206008584e-05, "loss": 2.0019, "step": 3000 }, { "epoch": 0.09, "learning_rate": 2.9079637577491654e-05, "loss": 1.9454, "step": 3100 }, { "epoch": 0.1, "learning_rate": 2.9049833094897473e-05, "loss": 1.9716, "step": 3200 }, { "epoch": 0.1, "learning_rate": 2.9020028612303292e-05, "loss": 1.9543, "step": 3300 }, { "epoch": 0.1, "learning_rate": 2.8990224129709108e-05, "loss": 1.9659, "step": 3400 }, { "epoch": 0.1, "learning_rate": 2.8960419647114927e-05, "loss": 1.9729, "step": 3500 }, { "epoch": 0.11, "learning_rate": 2.8930615164520746e-05, "loss": 1.992, "step": 3600 }, { "epoch": 0.11, "learning_rate": 2.8900810681926565e-05, "loss": 1.9147, "step": 3700 }, { "epoch": 0.11, "learning_rate": 2.887100619933238e-05, "loss": 1.8892, "step": 3800 }, { "epoch": 0.12, "learning_rate": 2.88412017167382e-05, "loss": 1.941, "step": 3900 }, { "epoch": 0.12, "learning_rate": 2.881139723414402e-05, "loss": 1.9463, "step": 4000 }, { "epoch": 0.12, "learning_rate": 2.878159275154983e-05, "loss": 1.9645, "step": 4100 }, { "epoch": 0.13, "learning_rate": 2.875178826895565e-05, "loss": 1.9414, "step": 4200 }, { "epoch": 0.13, "learning_rate": 2.872198378636147e-05, "loss": 1.9317, "step": 4300 }, { "epoch": 0.13, "learning_rate": 2.8692179303767288e-05, "loss": 1.8677, "step": 4400 }, { "epoch": 0.13, "learning_rate": 2.8662374821173103e-05, "loss": 1.9439, "step": 4500 }, { "epoch": 0.14, "learning_rate": 2.8632570338578922e-05, "loss": 1.8576, "step": 4600 }, { "epoch": 0.14, "learning_rate": 2.860276585598474e-05, "loss": 1.9448, "step": 4700 }, { "epoch": 0.14, "learning_rate": 2.8572961373390557e-05, "loss": 1.8824, "step": 4800 }, { "epoch": 0.15, "learning_rate": 2.8543156890796376e-05, "loss": 1.9521, "step": 4900 }, { "epoch": 0.15, "learning_rate": 2.8513352408202195e-05, "loss": 1.9182, "step": 5000 }, { "epoch": 0.15, "learning_rate": 2.8483547925608014e-05, "loss": 1.941, "step": 5100 }, { "epoch": 0.15, "learning_rate": 2.845374344301383e-05, "loss": 1.9335, "step": 5200 }, { "epoch": 0.16, "learning_rate": 2.842393896041965e-05, "loss": 1.9182, "step": 5300 }, { "epoch": 0.16, "learning_rate": 2.8394134477825468e-05, "loss": 1.9045, "step": 5400 }, { "epoch": 0.16, "learning_rate": 2.8364329995231287e-05, "loss": 1.8742, "step": 5500 }, { "epoch": 0.17, "learning_rate": 2.83345255126371e-05, "loss": 1.8348, "step": 5600 }, { "epoch": 0.17, "learning_rate": 2.8304721030042918e-05, "loss": 1.8538, "step": 5700 }, { "epoch": 0.17, "learning_rate": 2.8274916547448737e-05, "loss": 1.8849, "step": 5800 }, { "epoch": 0.18, "learning_rate": 2.8245112064854553e-05, "loss": 1.9078, "step": 5900 }, { "epoch": 0.18, "learning_rate": 2.821530758226037e-05, "loss": 1.8603, "step": 6000 }, { "epoch": 0.18, "learning_rate": 2.818550309966619e-05, "loss": 1.8936, "step": 6100 }, { "epoch": 0.18, "learning_rate": 2.815569861707201e-05, "loss": 1.8688, "step": 6200 }, { "epoch": 0.19, "learning_rate": 2.8125894134477825e-05, "loss": 1.8588, "step": 6300 }, { "epoch": 0.19, "learning_rate": 2.8096089651883644e-05, "loss": 1.8781, "step": 6400 }, { "epoch": 0.19, "learning_rate": 2.8066285169289463e-05, "loss": 1.8708, "step": 6500 }, { "epoch": 0.2, "learning_rate": 2.803648068669528e-05, "loss": 1.8755, "step": 6600 }, { "epoch": 0.2, "learning_rate": 2.8006676204101098e-05, "loss": 1.9063, "step": 6700 }, { "epoch": 0.2, "learning_rate": 2.7976871721506917e-05, "loss": 1.8432, "step": 6800 }, { "epoch": 0.21, "learning_rate": 2.7947067238912736e-05, "loss": 1.8905, "step": 6900 }, { "epoch": 0.21, "learning_rate": 2.791726275631855e-05, "loss": 1.8297, "step": 7000 }, { "epoch": 0.21, "learning_rate": 2.7887458273724367e-05, "loss": 1.8125, "step": 7100 }, { "epoch": 0.21, "learning_rate": 2.7857653791130186e-05, "loss": 1.8585, "step": 7200 }, { "epoch": 0.22, "learning_rate": 2.7827849308536002e-05, "loss": 1.8668, "step": 7300 }, { "epoch": 0.22, "learning_rate": 2.779804482594182e-05, "loss": 1.8574, "step": 7400 }, { "epoch": 0.22, "learning_rate": 2.776824034334764e-05, "loss": 1.8183, "step": 7500 }, { "epoch": 0.23, "learning_rate": 2.773843586075346e-05, "loss": 1.8725, "step": 7600 }, { "epoch": 0.23, "learning_rate": 2.7708631378159275e-05, "loss": 1.8529, "step": 7700 }, { "epoch": 0.23, "learning_rate": 2.7678826895565094e-05, "loss": 1.8526, "step": 7800 }, { "epoch": 0.24, "learning_rate": 2.7649022412970913e-05, "loss": 1.8428, "step": 7900 }, { "epoch": 0.24, "learning_rate": 2.7619217930376728e-05, "loss": 1.845, "step": 8000 }, { "epoch": 0.24, "learning_rate": 2.7589711492608488e-05, "loss": 1.8325, "step": 8100 }, { "epoch": 0.24, "learning_rate": 2.7559907010014307e-05, "loss": 1.821, "step": 8200 }, { "epoch": 0.25, "learning_rate": 2.7530400572246067e-05, "loss": 1.9171, "step": 8300 }, { "epoch": 0.25, "learning_rate": 2.7500596089651886e-05, "loss": 1.868, "step": 8400 }, { "epoch": 0.25, "learning_rate": 2.7470791607057705e-05, "loss": 1.8178, "step": 8500 }, { "epoch": 0.26, "learning_rate": 2.7440987124463517e-05, "loss": 1.8377, "step": 8600 }, { "epoch": 0.26, "learning_rate": 2.7411182641869336e-05, "loss": 1.8537, "step": 8700 }, { "epoch": 0.26, "learning_rate": 2.7381378159275155e-05, "loss": 1.845, "step": 8800 }, { "epoch": 0.27, "learning_rate": 2.735157367668097e-05, "loss": 1.8144, "step": 8900 }, { "epoch": 0.27, "learning_rate": 2.732176919408679e-05, "loss": 1.8261, "step": 9000 }, { "epoch": 0.27, "learning_rate": 2.729196471149261e-05, "loss": 1.8569, "step": 9100 }, { "epoch": 0.27, "learning_rate": 2.7262160228898428e-05, "loss": 1.8529, "step": 9200 }, { "epoch": 0.28, "learning_rate": 2.7232355746304244e-05, "loss": 1.7626, "step": 9300 }, { "epoch": 0.28, "learning_rate": 2.7202551263710063e-05, "loss": 1.8715, "step": 9400 }, { "epoch": 0.28, "learning_rate": 2.7172746781115882e-05, "loss": 1.7882, "step": 9500 }, { "epoch": 0.29, "learning_rate": 2.7142942298521697e-05, "loss": 1.8134, "step": 9600 }, { "epoch": 0.29, "learning_rate": 2.7113137815927516e-05, "loss": 1.7804, "step": 9700 }, { "epoch": 0.29, "learning_rate": 2.7083333333333335e-05, "loss": 1.8603, "step": 9800 }, { "epoch": 0.3, "learning_rate": 2.7053826895565092e-05, "loss": 1.7646, "step": 9900 }, { "epoch": 0.3, "learning_rate": 2.702402241297091e-05, "loss": 1.8108, "step": 10000 }, { "epoch": 0.3, "learning_rate": 2.699421793037673e-05, "loss": 1.7624, "step": 10100 }, { "epoch": 0.3, "learning_rate": 2.696441344778255e-05, "loss": 1.746, "step": 10200 }, { "epoch": 0.31, "learning_rate": 2.6934608965188365e-05, "loss": 1.7821, "step": 10300 }, { "epoch": 0.31, "learning_rate": 2.6904804482594184e-05, "loss": 1.8216, "step": 10400 }, { "epoch": 0.31, "learning_rate": 2.6875000000000003e-05, "loss": 1.8033, "step": 10500 }, { "epoch": 0.32, "learning_rate": 2.684519551740582e-05, "loss": 1.7954, "step": 10600 }, { "epoch": 0.32, "learning_rate": 2.6815391034811637e-05, "loss": 1.8692, "step": 10700 }, { "epoch": 0.32, "learning_rate": 2.6785586552217456e-05, "loss": 1.789, "step": 10800 }, { "epoch": 0.32, "learning_rate": 2.6755782069623272e-05, "loss": 1.7989, "step": 10900 }, { "epoch": 0.33, "learning_rate": 2.6726275631855032e-05, "loss": 1.7977, "step": 11000 }, { "epoch": 0.33, "learning_rate": 2.669647114926085e-05, "loss": 1.7815, "step": 11100 }, { "epoch": 0.33, "learning_rate": 2.6666666666666667e-05, "loss": 1.7803, "step": 11200 }, { "epoch": 0.34, "learning_rate": 2.6636862184072486e-05, "loss": 1.7956, "step": 11300 }, { "epoch": 0.34, "learning_rate": 2.6607057701478305e-05, "loss": 1.8058, "step": 11400 }, { "epoch": 0.34, "learning_rate": 2.6577253218884124e-05, "loss": 1.8262, "step": 11500 }, { "epoch": 0.35, "learning_rate": 2.654744873628994e-05, "loss": 1.8004, "step": 11600 }, { "epoch": 0.35, "learning_rate": 2.6517644253695755e-05, "loss": 1.7565, "step": 11700 }, { "epoch": 0.35, "learning_rate": 2.6487839771101574e-05, "loss": 1.8006, "step": 11800 }, { "epoch": 0.35, "learning_rate": 2.645803528850739e-05, "loss": 1.8004, "step": 11900 }, { "epoch": 0.36, "learning_rate": 2.642823080591321e-05, "loss": 1.7985, "step": 12000 }, { "epoch": 0.36, "learning_rate": 2.6398426323319028e-05, "loss": 1.7983, "step": 12100 }, { "epoch": 0.36, "learning_rate": 2.6368621840724847e-05, "loss": 1.7755, "step": 12200 }, { "epoch": 0.37, "learning_rate": 2.6338817358130662e-05, "loss": 1.8261, "step": 12300 }, { "epoch": 0.37, "learning_rate": 2.630901287553648e-05, "loss": 1.8139, "step": 12400 }, { "epoch": 0.37, "learning_rate": 2.62792083929423e-05, "loss": 1.7813, "step": 12500 }, { "epoch": 0.38, "learning_rate": 2.6249403910348116e-05, "loss": 1.7667, "step": 12600 }, { "epoch": 0.38, "learning_rate": 2.6219599427753935e-05, "loss": 1.7622, "step": 12700 }, { "epoch": 0.38, "learning_rate": 2.6189794945159754e-05, "loss": 1.7805, "step": 12800 }, { "epoch": 0.38, "learning_rate": 2.6159990462565573e-05, "loss": 1.7998, "step": 12900 }, { "epoch": 0.39, "learning_rate": 2.613018597997139e-05, "loss": 1.7419, "step": 13000 }, { "epoch": 0.39, "learning_rate": 2.6100381497377204e-05, "loss": 1.7531, "step": 13100 }, { "epoch": 0.39, "learning_rate": 2.6070577014783023e-05, "loss": 1.7852, "step": 13200 }, { "epoch": 0.4, "learning_rate": 2.6040772532188842e-05, "loss": 1.7967, "step": 13300 }, { "epoch": 0.4, "learning_rate": 2.6010968049594658e-05, "loss": 1.7887, "step": 13400 }, { "epoch": 0.4, "learning_rate": 2.5981163567000477e-05, "loss": 1.7465, "step": 13500 }, { "epoch": 0.41, "learning_rate": 2.5951359084406296e-05, "loss": 1.7776, "step": 13600 }, { "epoch": 0.41, "learning_rate": 2.592155460181211e-05, "loss": 1.7584, "step": 13700 }, { "epoch": 0.41, "learning_rate": 2.589175011921793e-05, "loss": 1.7761, "step": 13800 }, { "epoch": 0.41, "learning_rate": 2.586194563662375e-05, "loss": 1.7238, "step": 13900 }, { "epoch": 0.42, "learning_rate": 2.5832439198855506e-05, "loss": 1.7991, "step": 14000 }, { "epoch": 0.42, "learning_rate": 2.5802634716261325e-05, "loss": 1.7551, "step": 14100 }, { "epoch": 0.42, "learning_rate": 2.5772830233667144e-05, "loss": 1.8406, "step": 14200 }, { "epoch": 0.43, "learning_rate": 2.574302575107296e-05, "loss": 1.7478, "step": 14300 }, { "epoch": 0.43, "learning_rate": 2.571322126847878e-05, "loss": 1.8065, "step": 14400 }, { "epoch": 0.43, "learning_rate": 2.5683416785884598e-05, "loss": 1.8075, "step": 14500 }, { "epoch": 0.44, "learning_rate": 2.5653612303290417e-05, "loss": 1.7356, "step": 14600 }, { "epoch": 0.44, "learning_rate": 2.5623807820696232e-05, "loss": 1.7674, "step": 14700 }, { "epoch": 0.44, "learning_rate": 2.559400333810205e-05, "loss": 1.7419, "step": 14800 }, { "epoch": 0.44, "learning_rate": 2.556419885550787e-05, "loss": 1.7297, "step": 14900 }, { "epoch": 0.45, "learning_rate": 2.5534394372913686e-05, "loss": 1.7576, "step": 15000 }, { "epoch": 0.45, "learning_rate": 2.5504589890319505e-05, "loss": 1.8, "step": 15100 }, { "epoch": 0.45, "learning_rate": 2.5474785407725324e-05, "loss": 1.7792, "step": 15200 }, { "epoch": 0.46, "learning_rate": 2.5444980925131143e-05, "loss": 1.739, "step": 15300 }, { "epoch": 0.46, "learning_rate": 2.5415176442536955e-05, "loss": 1.7421, "step": 15400 }, { "epoch": 0.46, "learning_rate": 2.5385371959942774e-05, "loss": 1.7634, "step": 15500 }, { "epoch": 0.46, "learning_rate": 2.5355567477348593e-05, "loss": 1.8138, "step": 15600 }, { "epoch": 0.47, "learning_rate": 2.5325762994754413e-05, "loss": 1.6962, "step": 15700 }, { "epoch": 0.47, "learning_rate": 2.5295958512160228e-05, "loss": 1.7461, "step": 15800 }, { "epoch": 0.47, "learning_rate": 2.5266154029566047e-05, "loss": 1.7672, "step": 15900 }, { "epoch": 0.48, "learning_rate": 2.5236647591797807e-05, "loss": 1.7314, "step": 16000 }, { "epoch": 0.48, "learning_rate": 2.5206843109203626e-05, "loss": 1.7327, "step": 16100 }, { "epoch": 0.48, "learning_rate": 2.5177038626609442e-05, "loss": 1.702, "step": 16200 }, { "epoch": 0.49, "learning_rate": 2.514723414401526e-05, "loss": 1.7768, "step": 16300 }, { "epoch": 0.49, "learning_rate": 2.5117429661421076e-05, "loss": 1.7364, "step": 16400 }, { "epoch": 0.49, "learning_rate": 2.5087625178826895e-05, "loss": 1.7584, "step": 16500 }, { "epoch": 0.49, "learning_rate": 2.5057820696232714e-05, "loss": 1.7383, "step": 16600 }, { "epoch": 0.5, "learning_rate": 2.502801621363853e-05, "loss": 1.7418, "step": 16700 }, { "epoch": 0.5, "learning_rate": 2.499821173104435e-05, "loss": 1.6813, "step": 16800 }, { "epoch": 0.5, "learning_rate": 2.4968407248450168e-05, "loss": 1.7484, "step": 16900 }, { "epoch": 0.51, "learning_rate": 2.4938900810681925e-05, "loss": 1.7323, "step": 17000 }, { "epoch": 0.51, "learning_rate": 2.4909096328087744e-05, "loss": 1.7436, "step": 17100 }, { "epoch": 0.51, "learning_rate": 2.4879291845493563e-05, "loss": 1.7571, "step": 17200 }, { "epoch": 0.52, "learning_rate": 2.4849487362899378e-05, "loss": 1.7422, "step": 17300 }, { "epoch": 0.52, "learning_rate": 2.4819682880305197e-05, "loss": 1.783, "step": 17400 }, { "epoch": 0.52, "learning_rate": 2.4789878397711016e-05, "loss": 1.7675, "step": 17500 }, { "epoch": 0.52, "learning_rate": 2.4760073915116835e-05, "loss": 1.7611, "step": 17600 }, { "epoch": 0.53, "learning_rate": 2.473026943252265e-05, "loss": 1.7964, "step": 17700 }, { "epoch": 0.53, "learning_rate": 2.470046494992847e-05, "loss": 1.681, "step": 17800 }, { "epoch": 0.53, "learning_rate": 2.467066046733429e-05, "loss": 1.6863, "step": 17900 }, { "epoch": 0.54, "learning_rate": 2.4640855984740108e-05, "loss": 1.725, "step": 18000 }, { "epoch": 0.54, "learning_rate": 2.4611051502145924e-05, "loss": 1.7158, "step": 18100 }, { "epoch": 0.54, "learning_rate": 2.4581247019551743e-05, "loss": 1.7756, "step": 18200 }, { "epoch": 0.55, "learning_rate": 2.455144253695756e-05, "loss": 1.7627, "step": 18300 }, { "epoch": 0.55, "learning_rate": 2.4521638054363374e-05, "loss": 1.7381, "step": 18400 }, { "epoch": 0.55, "learning_rate": 2.4491833571769193e-05, "loss": 1.7606, "step": 18500 }, { "epoch": 0.55, "learning_rate": 2.4462029089175012e-05, "loss": 1.7298, "step": 18600 }, { "epoch": 0.56, "learning_rate": 2.443222460658083e-05, "loss": 1.7116, "step": 18700 }, { "epoch": 0.56, "learning_rate": 2.4402420123986647e-05, "loss": 1.775, "step": 18800 }, { "epoch": 0.56, "learning_rate": 2.4372615641392466e-05, "loss": 1.7324, "step": 18900 }, { "epoch": 0.57, "learning_rate": 2.4342811158798285e-05, "loss": 1.7449, "step": 19000 }, { "epoch": 0.57, "learning_rate": 2.43130066762041e-05, "loss": 1.7271, "step": 19100 }, { "epoch": 0.57, "learning_rate": 2.428320219360992e-05, "loss": 1.7374, "step": 19200 }, { "epoch": 0.58, "learning_rate": 2.425339771101574e-05, "loss": 1.7579, "step": 19300 }, { "epoch": 0.58, "learning_rate": 2.4223593228421557e-05, "loss": 1.7903, "step": 19400 }, { "epoch": 0.58, "learning_rate": 2.4193788745827373e-05, "loss": 1.7057, "step": 19500 }, { "epoch": 0.58, "learning_rate": 2.4163984263233192e-05, "loss": 1.7083, "step": 19600 }, { "epoch": 0.59, "learning_rate": 2.413417978063901e-05, "loss": 1.7556, "step": 19700 }, { "epoch": 0.59, "learning_rate": 2.4104375298044827e-05, "loss": 1.6617, "step": 19800 }, { "epoch": 0.59, "learning_rate": 2.4074570815450642e-05, "loss": 1.7244, "step": 19900 }, { "epoch": 0.6, "learning_rate": 2.404476633285646e-05, "loss": 1.7108, "step": 20000 }, { "epoch": 0.6, "learning_rate": 2.401496185026228e-05, "loss": 1.7411, "step": 20100 }, { "epoch": 0.6, "learning_rate": 2.3985157367668096e-05, "loss": 1.735, "step": 20200 }, { "epoch": 0.61, "learning_rate": 2.3955352885073915e-05, "loss": 1.7065, "step": 20300 }, { "epoch": 0.61, "learning_rate": 2.3925548402479734e-05, "loss": 1.7175, "step": 20400 }, { "epoch": 0.61, "learning_rate": 2.389574391988555e-05, "loss": 1.6863, "step": 20500 }, { "epoch": 0.61, "learning_rate": 2.386593943729137e-05, "loss": 1.6851, "step": 20600 }, { "epoch": 0.62, "learning_rate": 2.3836134954697188e-05, "loss": 1.6974, "step": 20700 }, { "epoch": 0.62, "learning_rate": 2.3806330472103007e-05, "loss": 1.7211, "step": 20800 }, { "epoch": 0.62, "learning_rate": 2.3776525989508822e-05, "loss": 1.7042, "step": 20900 }, { "epoch": 0.63, "learning_rate": 2.374672150691464e-05, "loss": 1.7337, "step": 21000 }, { "epoch": 0.63, "learning_rate": 2.371691702432046e-05, "loss": 1.7224, "step": 21100 }, { "epoch": 0.63, "learning_rate": 2.3687112541726276e-05, "loss": 1.6624, "step": 21200 }, { "epoch": 0.63, "learning_rate": 2.3657308059132095e-05, "loss": 1.6704, "step": 21300 }, { "epoch": 0.64, "learning_rate": 2.362750357653791e-05, "loss": 1.6547, "step": 21400 }, { "epoch": 0.64, "learning_rate": 2.359769909394373e-05, "loss": 1.6913, "step": 21500 }, { "epoch": 0.64, "learning_rate": 2.3567894611349545e-05, "loss": 1.7347, "step": 21600 }, { "epoch": 0.65, "learning_rate": 2.3538090128755364e-05, "loss": 1.6806, "step": 21700 }, { "epoch": 0.65, "learning_rate": 2.3508285646161183e-05, "loss": 1.6834, "step": 21800 }, { "epoch": 0.65, "learning_rate": 2.3478481163567002e-05, "loss": 1.7367, "step": 21900 }, { "epoch": 0.66, "learning_rate": 2.3448676680972818e-05, "loss": 1.6755, "step": 22000 }, { "epoch": 0.66, "learning_rate": 2.3418872198378637e-05, "loss": 1.7169, "step": 22100 }, { "epoch": 0.66, "learning_rate": 2.3389067715784456e-05, "loss": 1.7314, "step": 22200 }, { "epoch": 0.66, "learning_rate": 2.335926323319027e-05, "loss": 1.7077, "step": 22300 }, { "epoch": 0.67, "learning_rate": 2.332945875059609e-05, "loss": 1.7225, "step": 22400 }, { "epoch": 0.67, "learning_rate": 2.329965426800191e-05, "loss": 1.6787, "step": 22500 }, { "epoch": 0.67, "learning_rate": 2.326984978540773e-05, "loss": 1.6933, "step": 22600 }, { "epoch": 0.68, "learning_rate": 2.3240045302813544e-05, "loss": 1.7114, "step": 22700 }, { "epoch": 0.68, "learning_rate": 2.3210240820219363e-05, "loss": 1.7037, "step": 22800 }, { "epoch": 0.68, "learning_rate": 2.318043633762518e-05, "loss": 1.7217, "step": 22900 }, { "epoch": 0.69, "learning_rate": 2.3150631855030995e-05, "loss": 1.7062, "step": 23000 }, { "epoch": 0.69, "learning_rate": 2.3120827372436814e-05, "loss": 1.6969, "step": 23100 }, { "epoch": 0.69, "learning_rate": 2.3091022889842633e-05, "loss": 1.693, "step": 23200 }, { "epoch": 0.69, "learning_rate": 2.306121840724845e-05, "loss": 1.6718, "step": 23300 }, { "epoch": 0.7, "learning_rate": 2.3031413924654267e-05, "loss": 1.6782, "step": 23400 }, { "epoch": 0.7, "learning_rate": 2.3001609442060086e-05, "loss": 1.6719, "step": 23500 }, { "epoch": 0.7, "learning_rate": 2.2971804959465905e-05, "loss": 1.6695, "step": 23600 }, { "epoch": 0.71, "learning_rate": 2.294200047687172e-05, "loss": 1.6336, "step": 23700 }, { "epoch": 0.71, "learning_rate": 2.291219599427754e-05, "loss": 1.6949, "step": 23800 }, { "epoch": 0.71, "learning_rate": 2.288239151168336e-05, "loss": 1.6912, "step": 23900 }, { "epoch": 0.72, "learning_rate": 2.2852587029089178e-05, "loss": 1.6374, "step": 24000 }, { "epoch": 0.72, "learning_rate": 2.2822782546494994e-05, "loss": 1.6663, "step": 24100 }, { "epoch": 0.72, "learning_rate": 2.2792978063900813e-05, "loss": 1.7008, "step": 24200 }, { "epoch": 0.72, "learning_rate": 2.276317358130663e-05, "loss": 1.6883, "step": 24300 }, { "epoch": 0.73, "learning_rate": 2.2733369098712444e-05, "loss": 1.6593, "step": 24400 }, { "epoch": 0.73, "learning_rate": 2.2703564616118263e-05, "loss": 1.6847, "step": 24500 }, { "epoch": 0.73, "learning_rate": 2.2673760133524082e-05, "loss": 1.6684, "step": 24600 }, { "epoch": 0.74, "learning_rate": 2.26439556509299e-05, "loss": 1.6365, "step": 24700 }, { "epoch": 0.74, "learning_rate": 2.2614151168335717e-05, "loss": 1.6649, "step": 24800 }, { "epoch": 0.74, "learning_rate": 2.2584346685741536e-05, "loss": 1.6906, "step": 24900 }, { "epoch": 0.75, "learning_rate": 2.2554542203147355e-05, "loss": 1.6493, "step": 25000 }, { "epoch": 0.75, "learning_rate": 2.252473772055317e-05, "loss": 1.6709, "step": 25100 }, { "epoch": 0.75, "learning_rate": 2.249493323795899e-05, "loss": 1.6905, "step": 25200 }, { "epoch": 0.75, "learning_rate": 2.2465128755364808e-05, "loss": 1.6655, "step": 25300 }, { "epoch": 0.76, "learning_rate": 2.2435324272770627e-05, "loss": 1.6957, "step": 25400 }, { "epoch": 0.76, "learning_rate": 2.2405519790176443e-05, "loss": 1.6703, "step": 25500 }, { "epoch": 0.76, "learning_rate": 2.2376013352408203e-05, "loss": 1.7062, "step": 25600 }, { "epoch": 0.77, "learning_rate": 2.2346208869814022e-05, "loss": 1.6806, "step": 25700 }, { "epoch": 0.77, "learning_rate": 2.2316404387219837e-05, "loss": 1.6701, "step": 25800 }, { "epoch": 0.77, "learning_rate": 2.2286599904625657e-05, "loss": 1.6538, "step": 25900 }, { "epoch": 0.77, "learning_rate": 2.2256795422031476e-05, "loss": 1.6635, "step": 26000 }, { "epoch": 0.78, "learning_rate": 2.222699093943729e-05, "loss": 1.6629, "step": 26100 }, { "epoch": 0.78, "learning_rate": 2.219718645684311e-05, "loss": 1.7256, "step": 26200 }, { "epoch": 0.78, "learning_rate": 2.216738197424893e-05, "loss": 1.6586, "step": 26300 }, { "epoch": 0.79, "learning_rate": 2.2137577491654748e-05, "loss": 1.6814, "step": 26400 }, { "epoch": 0.79, "learning_rate": 2.2107773009060564e-05, "loss": 1.6326, "step": 26500 }, { "epoch": 0.79, "learning_rate": 2.207796852646638e-05, "loss": 1.6973, "step": 26600 }, { "epoch": 0.8, "learning_rate": 2.20481640438722e-05, "loss": 1.6486, "step": 26700 }, { "epoch": 0.8, "learning_rate": 2.2018359561278014e-05, "loss": 1.6419, "step": 26800 }, { "epoch": 0.8, "learning_rate": 2.1988555078683833e-05, "loss": 1.665, "step": 26900 }, { "epoch": 0.8, "learning_rate": 2.1958750596089652e-05, "loss": 1.6391, "step": 27000 }, { "epoch": 0.81, "learning_rate": 2.192894611349547e-05, "loss": 1.6599, "step": 27100 }, { "epoch": 0.81, "learning_rate": 2.1899141630901287e-05, "loss": 1.6657, "step": 27200 }, { "epoch": 0.81, "learning_rate": 2.1869337148307106e-05, "loss": 1.651, "step": 27300 }, { "epoch": 0.82, "learning_rate": 2.1839532665712925e-05, "loss": 1.6894, "step": 27400 }, { "epoch": 0.82, "learning_rate": 2.180972818311874e-05, "loss": 1.6259, "step": 27500 }, { "epoch": 0.82, "learning_rate": 2.177992370052456e-05, "loss": 1.6694, "step": 27600 }, { "epoch": 0.83, "learning_rate": 2.175011921793038e-05, "loss": 1.7037, "step": 27700 }, { "epoch": 0.83, "learning_rate": 2.1720314735336198e-05, "loss": 1.6759, "step": 27800 }, { "epoch": 0.83, "learning_rate": 2.1690808297567954e-05, "loss": 1.6561, "step": 27900 }, { "epoch": 0.83, "learning_rate": 2.1661003814973773e-05, "loss": 1.6251, "step": 28000 }, { "epoch": 0.84, "learning_rate": 2.1631199332379592e-05, "loss": 1.6564, "step": 28100 }, { "epoch": 0.84, "learning_rate": 2.1601394849785408e-05, "loss": 1.5827, "step": 28200 }, { "epoch": 0.84, "learning_rate": 2.1571590367191227e-05, "loss": 1.6458, "step": 28300 }, { "epoch": 0.85, "learning_rate": 2.1541785884597046e-05, "loss": 1.6513, "step": 28400 }, { "epoch": 0.85, "learning_rate": 2.151198140200286e-05, "loss": 1.6328, "step": 28500 }, { "epoch": 0.85, "learning_rate": 2.148217691940868e-05, "loss": 1.6608, "step": 28600 }, { "epoch": 0.86, "learning_rate": 2.14523724368145e-05, "loss": 1.6601, "step": 28700 }, { "epoch": 0.86, "learning_rate": 2.142256795422032e-05, "loss": 1.6835, "step": 28800 }, { "epoch": 0.86, "learning_rate": 2.139276347162613e-05, "loss": 1.644, "step": 28900 }, { "epoch": 0.86, "learning_rate": 2.136295898903195e-05, "loss": 1.647, "step": 29000 }, { "epoch": 0.87, "learning_rate": 2.133315450643777e-05, "loss": 1.6461, "step": 29100 }, { "epoch": 0.87, "learning_rate": 2.1303350023843584e-05, "loss": 1.6605, "step": 29200 }, { "epoch": 0.87, "learning_rate": 2.1273545541249403e-05, "loss": 1.693, "step": 29300 }, { "epoch": 0.88, "learning_rate": 2.1243741058655222e-05, "loss": 1.6356, "step": 29400 }, { "epoch": 0.88, "learning_rate": 2.121393657606104e-05, "loss": 1.6449, "step": 29500 }, { "epoch": 0.88, "learning_rate": 2.1184132093466857e-05, "loss": 1.628, "step": 29600 }, { "epoch": 0.89, "learning_rate": 2.1154327610872676e-05, "loss": 1.6563, "step": 29700 }, { "epoch": 0.89, "learning_rate": 2.1124523128278495e-05, "loss": 1.6084, "step": 29800 }, { "epoch": 0.89, "learning_rate": 2.109471864568431e-05, "loss": 1.627, "step": 29900 }, { "epoch": 0.89, "learning_rate": 2.106491416309013e-05, "loss": 1.6689, "step": 30000 }, { "epoch": 0.9, "learning_rate": 2.103510968049595e-05, "loss": 1.6247, "step": 30100 }, { "epoch": 0.9, "learning_rate": 2.1005305197901768e-05, "loss": 1.6514, "step": 30200 }, { "epoch": 0.9, "learning_rate": 2.0975500715307583e-05, "loss": 1.6428, "step": 30300 }, { "epoch": 0.91, "learning_rate": 2.0945994277539343e-05, "loss": 1.6138, "step": 30400 }, { "epoch": 0.91, "learning_rate": 2.0916189794945162e-05, "loss": 1.6405, "step": 30500 }, { "epoch": 0.91, "learning_rate": 2.0886385312350978e-05, "loss": 1.6526, "step": 30600 }, { "epoch": 0.91, "learning_rate": 2.0856580829756797e-05, "loss": 1.6827, "step": 30700 }, { "epoch": 0.92, "learning_rate": 2.0826776347162616e-05, "loss": 1.664, "step": 30800 }, { "epoch": 0.92, "learning_rate": 2.079697186456843e-05, "loss": 1.635, "step": 30900 }, { "epoch": 0.92, "learning_rate": 2.076716738197425e-05, "loss": 1.6359, "step": 31000 }, { "epoch": 0.93, "learning_rate": 2.0737362899380066e-05, "loss": 1.6109, "step": 31100 }, { "epoch": 0.93, "learning_rate": 2.0707558416785885e-05, "loss": 1.5975, "step": 31200 }, { "epoch": 0.93, "learning_rate": 2.06777539341917e-05, "loss": 1.6295, "step": 31300 }, { "epoch": 0.94, "learning_rate": 2.064794945159752e-05, "loss": 1.659, "step": 31400 }, { "epoch": 0.94, "learning_rate": 2.061814496900334e-05, "loss": 1.6064, "step": 31500 }, { "epoch": 0.94, "learning_rate": 2.0588340486409155e-05, "loss": 1.646, "step": 31600 }, { "epoch": 0.94, "learning_rate": 2.0558536003814974e-05, "loss": 1.6404, "step": 31700 }, { "epoch": 0.95, "learning_rate": 2.0528731521220793e-05, "loss": 1.6281, "step": 31800 }, { "epoch": 0.95, "learning_rate": 2.049892703862661e-05, "loss": 1.6216, "step": 31900 }, { "epoch": 0.95, "learning_rate": 2.0469122556032427e-05, "loss": 1.6398, "step": 32000 }, { "epoch": 0.96, "learning_rate": 2.0439318073438246e-05, "loss": 1.67, "step": 32100 }, { "epoch": 0.96, "learning_rate": 2.0409513590844065e-05, "loss": 1.619, "step": 32200 }, { "epoch": 0.96, "learning_rate": 2.037970910824988e-05, "loss": 1.6067, "step": 32300 }, { "epoch": 0.97, "learning_rate": 2.03499046256557e-05, "loss": 1.6079, "step": 32400 }, { "epoch": 0.97, "learning_rate": 2.032010014306152e-05, "loss": 1.6647, "step": 32500 }, { "epoch": 0.97, "learning_rate": 2.0290295660467335e-05, "loss": 1.6308, "step": 32600 }, { "epoch": 0.97, "learning_rate": 2.026049117787315e-05, "loss": 1.6525, "step": 32700 }, { "epoch": 0.98, "learning_rate": 2.023068669527897e-05, "loss": 1.6212, "step": 32800 }, { "epoch": 0.98, "learning_rate": 2.020118025751073e-05, "loss": 1.6845, "step": 32900 }, { "epoch": 0.98, "learning_rate": 2.0171375774916548e-05, "loss": 1.6324, "step": 33000 }, { "epoch": 0.99, "learning_rate": 2.0141571292322367e-05, "loss": 1.6024, "step": 33100 }, { "epoch": 0.99, "learning_rate": 2.0111766809728186e-05, "loss": 1.6687, "step": 33200 }, { "epoch": 0.99, "learning_rate": 2.0081962327134002e-05, "loss": 1.6338, "step": 33300 }, { "epoch": 1.0, "learning_rate": 2.0052157844539818e-05, "loss": 1.6172, "step": 33400 }, { "epoch": 1.0, "learning_rate": 2.0022353361945637e-05, "loss": 1.6706, "step": 33500 }, { "epoch": 1.0, "eval_gen_len": 18.6217, "eval_loss": 1.5689729452133179, "eval_rouge1": 31.2477, "eval_rouge2": 16.5455, "eval_rougeL": 26.9855, "eval_rougeLsum": 26.9754, "eval_runtime": 291.6493, "eval_samples_per_second": 9.299, "eval_steps_per_second": 1.162, "step": 33552 }, { "epoch": 1.0, "learning_rate": 1.9992548879351456e-05, "loss": 1.5373, "step": 33600 }, { "epoch": 1.0, "learning_rate": 1.996274439675727e-05, "loss": 1.5053, "step": 33700 }, { "epoch": 1.01, "learning_rate": 1.993293991416309e-05, "loss": 1.4423, "step": 33800 }, { "epoch": 1.01, "learning_rate": 1.990313543156891e-05, "loss": 1.4924, "step": 33900 }, { "epoch": 1.01, "learning_rate": 1.9873330948974725e-05, "loss": 1.5381, "step": 34000 }, { "epoch": 1.02, "learning_rate": 1.9843526466380544e-05, "loss": 1.4693, "step": 34100 }, { "epoch": 1.02, "learning_rate": 1.9813721983786363e-05, "loss": 1.4748, "step": 34200 }, { "epoch": 1.02, "learning_rate": 1.9783917501192182e-05, "loss": 1.5042, "step": 34300 }, { "epoch": 1.03, "learning_rate": 1.9754113018597998e-05, "loss": 1.4755, "step": 34400 }, { "epoch": 1.03, "learning_rate": 1.9724308536003817e-05, "loss": 1.4685, "step": 34500 }, { "epoch": 1.03, "learning_rate": 1.9694504053409636e-05, "loss": 1.5157, "step": 34600 }, { "epoch": 1.03, "learning_rate": 1.966469957081545e-05, "loss": 1.4362, "step": 34700 }, { "epoch": 1.04, "learning_rate": 1.963489508822127e-05, "loss": 1.4859, "step": 34800 }, { "epoch": 1.04, "learning_rate": 1.9605090605627086e-05, "loss": 1.4786, "step": 34900 }, { "epoch": 1.04, "learning_rate": 1.9575286123032905e-05, "loss": 1.4813, "step": 35000 }, { "epoch": 1.05, "learning_rate": 1.954548164043872e-05, "loss": 1.4775, "step": 35100 }, { "epoch": 1.05, "learning_rate": 1.951567715784454e-05, "loss": 1.5173, "step": 35200 }, { "epoch": 1.05, "learning_rate": 1.948587267525036e-05, "loss": 1.5366, "step": 35300 }, { "epoch": 1.06, "learning_rate": 1.9456068192656174e-05, "loss": 1.4664, "step": 35400 }, { "epoch": 1.06, "learning_rate": 1.9426263710061993e-05, "loss": 1.5118, "step": 35500 }, { "epoch": 1.06, "learning_rate": 1.9396459227467812e-05, "loss": 1.4968, "step": 35600 }, { "epoch": 1.06, "learning_rate": 1.936665474487363e-05, "loss": 1.4721, "step": 35700 }, { "epoch": 1.07, "learning_rate": 1.9336850262279447e-05, "loss": 1.5235, "step": 35800 }, { "epoch": 1.07, "learning_rate": 1.9307045779685266e-05, "loss": 1.4908, "step": 35900 }, { "epoch": 1.07, "learning_rate": 1.9277241297091085e-05, "loss": 1.4354, "step": 36000 }, { "epoch": 1.08, "learning_rate": 1.92474368144969e-05, "loss": 1.517, "step": 36100 }, { "epoch": 1.08, "learning_rate": 1.921763233190272e-05, "loss": 1.4848, "step": 36200 }, { "epoch": 1.08, "learning_rate": 1.9187827849308535e-05, "loss": 1.5129, "step": 36300 }, { "epoch": 1.08, "learning_rate": 1.9158023366714354e-05, "loss": 1.4799, "step": 36400 }, { "epoch": 1.09, "learning_rate": 1.912821888412017e-05, "loss": 1.4383, "step": 36500 }, { "epoch": 1.09, "learning_rate": 1.909841440152599e-05, "loss": 1.4719, "step": 36600 }, { "epoch": 1.09, "learning_rate": 1.9068609918931808e-05, "loss": 1.4657, "step": 36700 }, { "epoch": 1.1, "learning_rate": 1.9038805436337623e-05, "loss": 1.5345, "step": 36800 }, { "epoch": 1.1, "learning_rate": 1.9009000953743443e-05, "loss": 1.4622, "step": 36900 }, { "epoch": 1.1, "learning_rate": 1.897919647114926e-05, "loss": 1.4856, "step": 37000 }, { "epoch": 1.11, "learning_rate": 1.894939198855508e-05, "loss": 1.4691, "step": 37100 }, { "epoch": 1.11, "learning_rate": 1.8919587505960896e-05, "loss": 1.4761, "step": 37200 }, { "epoch": 1.11, "learning_rate": 1.8889783023366715e-05, "loss": 1.5007, "step": 37300 }, { "epoch": 1.11, "learning_rate": 1.8859978540772534e-05, "loss": 1.4755, "step": 37400 }, { "epoch": 1.12, "learning_rate": 1.8830174058178353e-05, "loss": 1.5022, "step": 37500 }, { "epoch": 1.12, "learning_rate": 1.880036957558417e-05, "loss": 1.4647, "step": 37600 }, { "epoch": 1.12, "learning_rate": 1.8770565092989988e-05, "loss": 1.4652, "step": 37700 }, { "epoch": 1.13, "learning_rate": 1.8740760610395804e-05, "loss": 1.4751, "step": 37800 }, { "epoch": 1.13, "learning_rate": 1.871095612780162e-05, "loss": 1.4767, "step": 37900 }, { "epoch": 1.13, "learning_rate": 1.8681151645207438e-05, "loss": 1.5161, "step": 38000 }, { "epoch": 1.14, "learning_rate": 1.8651347162613257e-05, "loss": 1.4369, "step": 38100 }, { "epoch": 1.14, "learning_rate": 1.8621542680019076e-05, "loss": 1.4612, "step": 38200 }, { "epoch": 1.14, "learning_rate": 1.8591738197424892e-05, "loss": 1.4817, "step": 38300 }, { "epoch": 1.14, "learning_rate": 1.856193371483071e-05, "loss": 1.4727, "step": 38400 }, { "epoch": 1.15, "learning_rate": 1.853212923223653e-05, "loss": 1.4477, "step": 38500 }, { "epoch": 1.15, "learning_rate": 1.8502324749642345e-05, "loss": 1.4775, "step": 38600 }, { "epoch": 1.15, "learning_rate": 1.8472520267048165e-05, "loss": 1.484, "step": 38700 }, { "epoch": 1.16, "learning_rate": 1.8442715784453984e-05, "loss": 1.4885, "step": 38800 }, { "epoch": 1.16, "learning_rate": 1.8412911301859803e-05, "loss": 1.5263, "step": 38900 }, { "epoch": 1.16, "learning_rate": 1.8383106819265618e-05, "loss": 1.4864, "step": 39000 }, { "epoch": 1.17, "learning_rate": 1.8353302336671437e-05, "loss": 1.5276, "step": 39100 }, { "epoch": 1.17, "learning_rate": 1.8323497854077256e-05, "loss": 1.5265, "step": 39200 }, { "epoch": 1.17, "learning_rate": 1.829369337148307e-05, "loss": 1.4731, "step": 39300 }, { "epoch": 1.17, "learning_rate": 1.8263888888888887e-05, "loss": 1.4715, "step": 39400 }, { "epoch": 1.18, "learning_rate": 1.8234084406294706e-05, "loss": 1.5026, "step": 39500 }, { "epoch": 1.18, "learning_rate": 1.8204279923700526e-05, "loss": 1.4841, "step": 39600 }, { "epoch": 1.18, "learning_rate": 1.817447544110634e-05, "loss": 1.4688, "step": 39700 }, { "epoch": 1.19, "learning_rate": 1.814467095851216e-05, "loss": 1.4782, "step": 39800 }, { "epoch": 1.19, "learning_rate": 1.811486647591798e-05, "loss": 1.4734, "step": 39900 }, { "epoch": 1.19, "learning_rate": 1.8085061993323795e-05, "loss": 1.4915, "step": 40000 }, { "epoch": 1.2, "learning_rate": 1.8055257510729614e-05, "loss": 1.4493, "step": 40100 }, { "epoch": 1.2, "learning_rate": 1.8025453028135433e-05, "loss": 1.5045, "step": 40200 }, { "epoch": 1.2, "learning_rate": 1.7995648545541252e-05, "loss": 1.4681, "step": 40300 }, { "epoch": 1.2, "learning_rate": 1.7965844062947067e-05, "loss": 1.5133, "step": 40400 }, { "epoch": 1.21, "learning_rate": 1.7936039580352887e-05, "loss": 1.4748, "step": 40500 }, { "epoch": 1.21, "learning_rate": 1.7906235097758706e-05, "loss": 1.5346, "step": 40600 }, { "epoch": 1.21, "learning_rate": 1.787643061516452e-05, "loss": 1.4709, "step": 40700 }, { "epoch": 1.22, "learning_rate": 1.7846626132570337e-05, "loss": 1.4431, "step": 40800 }, { "epoch": 1.22, "learning_rate": 1.7816821649976156e-05, "loss": 1.5095, "step": 40900 }, { "epoch": 1.22, "learning_rate": 1.7787017167381975e-05, "loss": 1.4872, "step": 41000 }, { "epoch": 1.22, "learning_rate": 1.775721268478779e-05, "loss": 1.529, "step": 41100 }, { "epoch": 1.23, "learning_rate": 1.772740820219361e-05, "loss": 1.4687, "step": 41200 }, { "epoch": 1.23, "learning_rate": 1.769760371959943e-05, "loss": 1.4696, "step": 41300 }, { "epoch": 1.23, "learning_rate": 1.7667799237005248e-05, "loss": 1.457, "step": 41400 }, { "epoch": 1.24, "learning_rate": 1.7637994754411063e-05, "loss": 1.5051, "step": 41500 }, { "epoch": 1.24, "learning_rate": 1.7608190271816882e-05, "loss": 1.4694, "step": 41600 }, { "epoch": 1.24, "learning_rate": 1.75783857892227e-05, "loss": 1.4556, "step": 41700 }, { "epoch": 1.25, "learning_rate": 1.7548581306628517e-05, "loss": 1.4872, "step": 41800 }, { "epoch": 1.25, "learning_rate": 1.7518776824034336e-05, "loss": 1.4489, "step": 41900 }, { "epoch": 1.25, "learning_rate": 1.7488972341440155e-05, "loss": 1.4892, "step": 42000 }, { "epoch": 1.25, "learning_rate": 1.7459167858845974e-05, "loss": 1.4869, "step": 42100 }, { "epoch": 1.26, "learning_rate": 1.742936337625179e-05, "loss": 1.4624, "step": 42200 }, { "epoch": 1.26, "learning_rate": 1.7399558893657605e-05, "loss": 1.4492, "step": 42300 }, { "epoch": 1.26, "learning_rate": 1.7369754411063424e-05, "loss": 1.4306, "step": 42400 }, { "epoch": 1.27, "learning_rate": 1.733994992846924e-05, "loss": 1.4595, "step": 42500 }, { "epoch": 1.27, "learning_rate": 1.731014544587506e-05, "loss": 1.4703, "step": 42600 }, { "epoch": 1.27, "learning_rate": 1.7280340963280878e-05, "loss": 1.4962, "step": 42700 }, { "epoch": 1.28, "learning_rate": 1.7250536480686697e-05, "loss": 1.4476, "step": 42800 }, { "epoch": 1.28, "learning_rate": 1.7220731998092512e-05, "loss": 1.4674, "step": 42900 }, { "epoch": 1.28, "learning_rate": 1.719092751549833e-05, "loss": 1.4704, "step": 43000 }, { "epoch": 1.28, "learning_rate": 1.716112303290415e-05, "loss": 1.5435, "step": 43100 }, { "epoch": 1.29, "learning_rate": 1.7131318550309966e-05, "loss": 1.5339, "step": 43200 }, { "epoch": 1.29, "learning_rate": 1.7101514067715785e-05, "loss": 1.4905, "step": 43300 }, { "epoch": 1.29, "learning_rate": 1.7071709585121604e-05, "loss": 1.4978, "step": 43400 }, { "epoch": 1.3, "learning_rate": 1.7041905102527423e-05, "loss": 1.4478, "step": 43500 }, { "epoch": 1.3, "learning_rate": 1.701239866475918e-05, "loss": 1.5059, "step": 43600 }, { "epoch": 1.3, "learning_rate": 1.6982594182165e-05, "loss": 1.4643, "step": 43700 }, { "epoch": 1.31, "learning_rate": 1.6953087744396755e-05, "loss": 1.5011, "step": 43800 }, { "epoch": 1.31, "learning_rate": 1.6923283261802574e-05, "loss": 1.4895, "step": 43900 }, { "epoch": 1.31, "learning_rate": 1.6893478779208393e-05, "loss": 1.4306, "step": 44000 }, { "epoch": 1.31, "learning_rate": 1.686367429661421e-05, "loss": 1.5001, "step": 44100 }, { "epoch": 1.32, "learning_rate": 1.6834167858845972e-05, "loss": 1.4506, "step": 44200 }, { "epoch": 1.32, "learning_rate": 1.680436337625179e-05, "loss": 1.4456, "step": 44300 }, { "epoch": 1.32, "learning_rate": 1.6774558893657607e-05, "loss": 1.5038, "step": 44400 }, { "epoch": 1.33, "learning_rate": 1.6744754411063426e-05, "loss": 1.5082, "step": 44500 }, { "epoch": 1.33, "learning_rate": 1.671494992846924e-05, "loss": 1.5055, "step": 44600 }, { "epoch": 1.33, "learning_rate": 1.6685145445875057e-05, "loss": 1.4697, "step": 44700 }, { "epoch": 1.34, "learning_rate": 1.6655340963280876e-05, "loss": 1.501, "step": 44800 }, { "epoch": 1.34, "learning_rate": 1.6625536480686695e-05, "loss": 1.4994, "step": 44900 }, { "epoch": 1.34, "learning_rate": 1.6595731998092514e-05, "loss": 1.4899, "step": 45000 }, { "epoch": 1.34, "learning_rate": 1.656592751549833e-05, "loss": 1.4819, "step": 45100 }, { "epoch": 1.35, "learning_rate": 1.653612303290415e-05, "loss": 1.4529, "step": 45200 }, { "epoch": 1.35, "learning_rate": 1.6506318550309968e-05, "loss": 1.4849, "step": 45300 }, { "epoch": 1.35, "learning_rate": 1.6476514067715784e-05, "loss": 1.4587, "step": 45400 }, { "epoch": 1.36, "learning_rate": 1.6446709585121603e-05, "loss": 1.4595, "step": 45500 }, { "epoch": 1.36, "learning_rate": 1.641690510252742e-05, "loss": 1.4292, "step": 45600 }, { "epoch": 1.36, "learning_rate": 1.638710061993324e-05, "loss": 1.4624, "step": 45700 }, { "epoch": 1.37, "learning_rate": 1.6357296137339056e-05, "loss": 1.5235, "step": 45800 }, { "epoch": 1.37, "learning_rate": 1.6327491654744875e-05, "loss": 1.5378, "step": 45900 }, { "epoch": 1.37, "learning_rate": 1.6297687172150694e-05, "loss": 1.4633, "step": 46000 }, { "epoch": 1.37, "learning_rate": 1.626788268955651e-05, "loss": 1.4387, "step": 46100 }, { "epoch": 1.38, "learning_rate": 1.6238078206962326e-05, "loss": 1.4335, "step": 46200 }, { "epoch": 1.38, "learning_rate": 1.6208273724368145e-05, "loss": 1.4708, "step": 46300 }, { "epoch": 1.38, "learning_rate": 1.6178469241773964e-05, "loss": 1.4633, "step": 46400 }, { "epoch": 1.39, "learning_rate": 1.6148962804005723e-05, "loss": 1.5035, "step": 46500 }, { "epoch": 1.39, "learning_rate": 1.6119158321411542e-05, "loss": 1.4553, "step": 46600 }, { "epoch": 1.39, "learning_rate": 1.608935383881736e-05, "loss": 1.4737, "step": 46700 }, { "epoch": 1.39, "learning_rate": 1.6059549356223177e-05, "loss": 1.4676, "step": 46800 }, { "epoch": 1.4, "learning_rate": 1.6029744873628993e-05, "loss": 1.4482, "step": 46900 }, { "epoch": 1.4, "learning_rate": 1.5999940391034812e-05, "loss": 1.4381, "step": 47000 }, { "epoch": 1.4, "learning_rate": 1.5970135908440627e-05, "loss": 1.4099, "step": 47100 }, { "epoch": 1.41, "learning_rate": 1.5940331425846446e-05, "loss": 1.4595, "step": 47200 }, { "epoch": 1.41, "learning_rate": 1.5910526943252265e-05, "loss": 1.4551, "step": 47300 }, { "epoch": 1.41, "learning_rate": 1.5880722460658084e-05, "loss": 1.4629, "step": 47400 }, { "epoch": 1.42, "learning_rate": 1.58509179780639e-05, "loss": 1.4616, "step": 47500 }, { "epoch": 1.42, "learning_rate": 1.582111349546972e-05, "loss": 1.4742, "step": 47600 }, { "epoch": 1.42, "learning_rate": 1.5791309012875538e-05, "loss": 1.4735, "step": 47700 }, { "epoch": 1.42, "learning_rate": 1.5761504530281354e-05, "loss": 1.4289, "step": 47800 }, { "epoch": 1.43, "learning_rate": 1.5731700047687173e-05, "loss": 1.4924, "step": 47900 }, { "epoch": 1.43, "learning_rate": 1.5701895565092992e-05, "loss": 1.4643, "step": 48000 }, { "epoch": 1.43, "learning_rate": 1.567209108249881e-05, "loss": 1.4499, "step": 48100 }, { "epoch": 1.44, "learning_rate": 1.5642286599904626e-05, "loss": 1.5153, "step": 48200 }, { "epoch": 1.44, "learning_rate": 1.5612482117310442e-05, "loss": 1.4627, "step": 48300 }, { "epoch": 1.44, "learning_rate": 1.558267763471626e-05, "loss": 1.426, "step": 48400 }, { "epoch": 1.45, "learning_rate": 1.555317119694802e-05, "loss": 1.4213, "step": 48500 }, { "epoch": 1.45, "learning_rate": 1.552336671435384e-05, "loss": 1.4519, "step": 48600 }, { "epoch": 1.45, "learning_rate": 1.549356223175966e-05, "loss": 1.4877, "step": 48700 }, { "epoch": 1.45, "learning_rate": 1.5463757749165475e-05, "loss": 1.488, "step": 48800 }, { "epoch": 1.46, "learning_rate": 1.5433953266571294e-05, "loss": 1.4738, "step": 48900 }, { "epoch": 1.46, "learning_rate": 1.5404148783977113e-05, "loss": 1.4912, "step": 49000 }, { "epoch": 1.46, "learning_rate": 1.537434430138293e-05, "loss": 1.4391, "step": 49100 }, { "epoch": 1.47, "learning_rate": 1.5344539818788744e-05, "loss": 1.4983, "step": 49200 }, { "epoch": 1.47, "learning_rate": 1.5314735336194563e-05, "loss": 1.411, "step": 49300 }, { "epoch": 1.47, "learning_rate": 1.5284930853600382e-05, "loss": 1.5083, "step": 49400 }, { "epoch": 1.48, "learning_rate": 1.5255126371006198e-05, "loss": 1.4399, "step": 49500 }, { "epoch": 1.48, "learning_rate": 1.5225321888412017e-05, "loss": 1.4691, "step": 49600 }, { "epoch": 1.48, "learning_rate": 1.5195517405817836e-05, "loss": 1.4732, "step": 49700 }, { "epoch": 1.48, "learning_rate": 1.5165712923223655e-05, "loss": 1.5002, "step": 49800 }, { "epoch": 1.49, "learning_rate": 1.513590844062947e-05, "loss": 1.4984, "step": 49900 }, { "epoch": 1.49, "learning_rate": 1.510610395803529e-05, "loss": 1.4538, "step": 50000 }, { "epoch": 1.49, "learning_rate": 1.5076299475441108e-05, "loss": 1.4828, "step": 50100 }, { "epoch": 1.5, "learning_rate": 1.5046494992846922e-05, "loss": 1.5021, "step": 50200 }, { "epoch": 1.5, "learning_rate": 1.5016690510252741e-05, "loss": 1.4735, "step": 50300 }, { "epoch": 1.5, "learning_rate": 1.498688602765856e-05, "loss": 1.4508, "step": 50400 }, { "epoch": 1.51, "learning_rate": 1.4957081545064378e-05, "loss": 1.5091, "step": 50500 }, { "epoch": 1.51, "learning_rate": 1.4927277062470197e-05, "loss": 1.4725, "step": 50600 }, { "epoch": 1.51, "learning_rate": 1.4897472579876014e-05, "loss": 1.4495, "step": 50700 }, { "epoch": 1.51, "learning_rate": 1.4867668097281831e-05, "loss": 1.4596, "step": 50800 }, { "epoch": 1.52, "learning_rate": 1.483786361468765e-05, "loss": 1.4447, "step": 50900 }, { "epoch": 1.52, "learning_rate": 1.4808059132093466e-05, "loss": 1.4665, "step": 51000 }, { "epoch": 1.52, "learning_rate": 1.4778254649499285e-05, "loss": 1.4793, "step": 51100 }, { "epoch": 1.53, "learning_rate": 1.4748450166905102e-05, "loss": 1.4722, "step": 51200 }, { "epoch": 1.53, "learning_rate": 1.4718645684310921e-05, "loss": 1.4474, "step": 51300 }, { "epoch": 1.53, "learning_rate": 1.4688841201716739e-05, "loss": 1.4295, "step": 51400 }, { "epoch": 1.53, "learning_rate": 1.4659036719122558e-05, "loss": 1.4441, "step": 51500 }, { "epoch": 1.54, "learning_rate": 1.4629232236528375e-05, "loss": 1.4148, "step": 51600 }, { "epoch": 1.54, "learning_rate": 1.459942775393419e-05, "loss": 1.4457, "step": 51700 }, { "epoch": 1.54, "learning_rate": 1.456962327134001e-05, "loss": 1.4851, "step": 51800 }, { "epoch": 1.55, "learning_rate": 1.4539818788745827e-05, "loss": 1.4864, "step": 51900 }, { "epoch": 1.55, "learning_rate": 1.4510014306151646e-05, "loss": 1.4926, "step": 52000 }, { "epoch": 1.55, "learning_rate": 1.4480209823557463e-05, "loss": 1.4553, "step": 52100 }, { "epoch": 1.56, "learning_rate": 1.4450405340963282e-05, "loss": 1.4744, "step": 52200 }, { "epoch": 1.56, "learning_rate": 1.44206008583691e-05, "loss": 1.4624, "step": 52300 }, { "epoch": 1.56, "learning_rate": 1.4390796375774915e-05, "loss": 1.4483, "step": 52400 }, { "epoch": 1.56, "learning_rate": 1.4360991893180734e-05, "loss": 1.4569, "step": 52500 }, { "epoch": 1.57, "learning_rate": 1.4331187410586552e-05, "loss": 1.4262, "step": 52600 }, { "epoch": 1.57, "learning_rate": 1.430138292799237e-05, "loss": 1.4517, "step": 52700 }, { "epoch": 1.57, "learning_rate": 1.4271578445398188e-05, "loss": 1.4496, "step": 52800 }, { "epoch": 1.58, "learning_rate": 1.4241773962804007e-05, "loss": 1.4519, "step": 52900 }, { "epoch": 1.58, "learning_rate": 1.4211969480209824e-05, "loss": 1.4407, "step": 53000 }, { "epoch": 1.58, "learning_rate": 1.4182164997615643e-05, "loss": 1.4715, "step": 53100 }, { "epoch": 1.59, "learning_rate": 1.4152360515021459e-05, "loss": 1.4313, "step": 53200 }, { "epoch": 1.59, "learning_rate": 1.4122556032427276e-05, "loss": 1.4798, "step": 53300 }, { "epoch": 1.59, "learning_rate": 1.4092751549833095e-05, "loss": 1.4386, "step": 53400 }, { "epoch": 1.59, "learning_rate": 1.4062947067238913e-05, "loss": 1.4592, "step": 53500 }, { "epoch": 1.6, "learning_rate": 1.4033142584644732e-05, "loss": 1.4857, "step": 53600 }, { "epoch": 1.6, "learning_rate": 1.4003338102050549e-05, "loss": 1.463, "step": 53700 }, { "epoch": 1.6, "learning_rate": 1.3973533619456368e-05, "loss": 1.4752, "step": 53800 }, { "epoch": 1.61, "learning_rate": 1.3943729136862184e-05, "loss": 1.4524, "step": 53900 }, { "epoch": 1.61, "learning_rate": 1.3913924654268001e-05, "loss": 1.4677, "step": 54000 }, { "epoch": 1.61, "learning_rate": 1.388412017167382e-05, "loss": 1.48, "step": 54100 }, { "epoch": 1.62, "learning_rate": 1.3854315689079637e-05, "loss": 1.4365, "step": 54200 }, { "epoch": 1.62, "learning_rate": 1.3824511206485456e-05, "loss": 1.485, "step": 54300 }, { "epoch": 1.62, "learning_rate": 1.3794706723891274e-05, "loss": 1.4921, "step": 54400 }, { "epoch": 1.62, "learning_rate": 1.3764902241297093e-05, "loss": 1.4118, "step": 54500 }, { "epoch": 1.63, "learning_rate": 1.373509775870291e-05, "loss": 1.4373, "step": 54600 }, { "epoch": 1.63, "learning_rate": 1.3705293276108726e-05, "loss": 1.4557, "step": 54700 }, { "epoch": 1.63, "learning_rate": 1.3675488793514545e-05, "loss": 1.4653, "step": 54800 }, { "epoch": 1.64, "learning_rate": 1.3645684310920362e-05, "loss": 1.4442, "step": 54900 }, { "epoch": 1.64, "learning_rate": 1.3615879828326181e-05, "loss": 1.4891, "step": 55000 }, { "epoch": 1.64, "learning_rate": 1.3586075345731998e-05, "loss": 1.4398, "step": 55100 }, { "epoch": 1.65, "learning_rate": 1.3556270863137817e-05, "loss": 1.4589, "step": 55200 }, { "epoch": 1.65, "learning_rate": 1.3526466380543635e-05, "loss": 1.4547, "step": 55300 }, { "epoch": 1.65, "learning_rate": 1.3496661897949452e-05, "loss": 1.4411, "step": 55400 }, { "epoch": 1.65, "learning_rate": 1.346685741535527e-05, "loss": 1.4369, "step": 55500 }, { "epoch": 1.66, "learning_rate": 1.3437052932761087e-05, "loss": 1.4799, "step": 55600 }, { "epoch": 1.66, "learning_rate": 1.3407546494992847e-05, "loss": 1.4088, "step": 55700 }, { "epoch": 1.66, "learning_rate": 1.3377742012398666e-05, "loss": 1.5022, "step": 55800 }, { "epoch": 1.67, "learning_rate": 1.3347937529804483e-05, "loss": 1.454, "step": 55900 }, { "epoch": 1.67, "learning_rate": 1.3318133047210302e-05, "loss": 1.4688, "step": 56000 }, { "epoch": 1.67, "learning_rate": 1.328832856461612e-05, "loss": 1.4207, "step": 56100 }, { "epoch": 1.68, "learning_rate": 1.3258524082021937e-05, "loss": 1.4623, "step": 56200 }, { "epoch": 1.68, "learning_rate": 1.3228719599427754e-05, "loss": 1.4111, "step": 56300 }, { "epoch": 1.68, "learning_rate": 1.3198915116833571e-05, "loss": 1.4048, "step": 56400 }, { "epoch": 1.68, "learning_rate": 1.316911063423939e-05, "loss": 1.5198, "step": 56500 }, { "epoch": 1.69, "learning_rate": 1.3139306151645208e-05, "loss": 1.4427, "step": 56600 }, { "epoch": 1.69, "learning_rate": 1.3109501669051027e-05, "loss": 1.451, "step": 56700 }, { "epoch": 1.69, "learning_rate": 1.3079697186456844e-05, "loss": 1.4568, "step": 56800 }, { "epoch": 1.7, "learning_rate": 1.3049892703862661e-05, "loss": 1.4529, "step": 56900 }, { "epoch": 1.7, "learning_rate": 1.3020088221268479e-05, "loss": 1.4692, "step": 57000 }, { "epoch": 1.7, "learning_rate": 1.2990283738674296e-05, "loss": 1.4401, "step": 57100 }, { "epoch": 1.7, "learning_rate": 1.2960479256080115e-05, "loss": 1.4887, "step": 57200 }, { "epoch": 1.71, "learning_rate": 1.2930674773485932e-05, "loss": 1.4467, "step": 57300 }, { "epoch": 1.71, "learning_rate": 1.2900870290891751e-05, "loss": 1.451, "step": 57400 }, { "epoch": 1.71, "learning_rate": 1.2871065808297569e-05, "loss": 1.4036, "step": 57500 }, { "epoch": 1.72, "learning_rate": 1.2841261325703388e-05, "loss": 1.4618, "step": 57600 }, { "epoch": 1.72, "learning_rate": 1.2811456843109203e-05, "loss": 1.4312, "step": 57700 }, { "epoch": 1.72, "learning_rate": 1.2781950405340963e-05, "loss": 1.4498, "step": 57800 }, { "epoch": 1.73, "learning_rate": 1.275214592274678e-05, "loss": 1.4157, "step": 57900 }, { "epoch": 1.73, "learning_rate": 1.27223414401526e-05, "loss": 1.5118, "step": 58000 }, { "epoch": 1.73, "learning_rate": 1.2692536957558417e-05, "loss": 1.3883, "step": 58100 }, { "epoch": 1.73, "learning_rate": 1.2662732474964236e-05, "loss": 1.4585, "step": 58200 }, { "epoch": 1.74, "learning_rate": 1.2632927992370053e-05, "loss": 1.4856, "step": 58300 }, { "epoch": 1.74, "learning_rate": 1.260312350977587e-05, "loss": 1.4275, "step": 58400 }, { "epoch": 1.74, "learning_rate": 1.2573319027181688e-05, "loss": 1.4411, "step": 58500 }, { "epoch": 1.75, "learning_rate": 1.2543514544587505e-05, "loss": 1.415, "step": 58600 }, { "epoch": 1.75, "learning_rate": 1.2513710061993324e-05, "loss": 1.4709, "step": 58700 }, { "epoch": 1.75, "learning_rate": 1.2483905579399141e-05, "loss": 1.4638, "step": 58800 }, { "epoch": 1.76, "learning_rate": 1.245410109680496e-05, "loss": 1.4267, "step": 58900 }, { "epoch": 1.76, "learning_rate": 1.2424296614210778e-05, "loss": 1.4549, "step": 59000 }, { "epoch": 1.76, "learning_rate": 1.2394492131616597e-05, "loss": 1.4498, "step": 59100 }, { "epoch": 1.76, "learning_rate": 1.2364687649022412e-05, "loss": 1.4167, "step": 59200 }, { "epoch": 1.77, "learning_rate": 1.2334883166428231e-05, "loss": 1.4539, "step": 59300 }, { "epoch": 1.77, "learning_rate": 1.2305078683834049e-05, "loss": 1.4552, "step": 59400 }, { "epoch": 1.77, "learning_rate": 1.2275274201239866e-05, "loss": 1.4319, "step": 59500 }, { "epoch": 1.78, "learning_rate": 1.2245469718645685e-05, "loss": 1.4416, "step": 59600 }, { "epoch": 1.78, "learning_rate": 1.2215665236051502e-05, "loss": 1.4767, "step": 59700 }, { "epoch": 1.78, "learning_rate": 1.2185860753457321e-05, "loss": 1.4194, "step": 59800 }, { "epoch": 1.79, "learning_rate": 1.2156056270863137e-05, "loss": 1.4524, "step": 59900 }, { "epoch": 1.79, "learning_rate": 1.2126251788268956e-05, "loss": 1.3962, "step": 60000 }, { "epoch": 1.79, "learning_rate": 1.2096745350500716e-05, "loss": 1.4556, "step": 60100 }, { "epoch": 1.79, "learning_rate": 1.2067238912732474e-05, "loss": 1.453, "step": 60200 }, { "epoch": 1.8, "learning_rate": 1.2037434430138293e-05, "loss": 1.4252, "step": 60300 }, { "epoch": 1.8, "learning_rate": 1.2007927992370053e-05, "loss": 1.4319, "step": 60400 }, { "epoch": 1.8, "learning_rate": 1.197812350977587e-05, "loss": 1.4448, "step": 60500 }, { "epoch": 1.81, "learning_rate": 1.194831902718169e-05, "loss": 1.4311, "step": 60600 }, { "epoch": 1.81, "learning_rate": 1.1918514544587505e-05, "loss": 1.4623, "step": 60700 }, { "epoch": 1.81, "learning_rate": 1.1888710061993324e-05, "loss": 1.408, "step": 60800 }, { "epoch": 1.82, "learning_rate": 1.1858905579399142e-05, "loss": 1.4497, "step": 60900 }, { "epoch": 1.82, "learning_rate": 1.1829101096804959e-05, "loss": 1.4413, "step": 61000 }, { "epoch": 1.82, "learning_rate": 1.1799296614210778e-05, "loss": 1.4201, "step": 61100 }, { "epoch": 1.82, "learning_rate": 1.1769492131616595e-05, "loss": 1.452, "step": 61200 }, { "epoch": 1.83, "learning_rate": 1.1739687649022414e-05, "loss": 1.4851, "step": 61300 }, { "epoch": 1.83, "learning_rate": 1.1709883166428232e-05, "loss": 1.4997, "step": 61400 }, { "epoch": 1.83, "learning_rate": 1.1680078683834049e-05, "loss": 1.461, "step": 61500 }, { "epoch": 1.84, "learning_rate": 1.1650274201239866e-05, "loss": 1.3893, "step": 61600 }, { "epoch": 1.84, "learning_rate": 1.1620767763471626e-05, "loss": 1.48, "step": 61700 }, { "epoch": 1.84, "learning_rate": 1.1590963280877443e-05, "loss": 1.451, "step": 61800 }, { "epoch": 1.84, "learning_rate": 1.1561158798283262e-05, "loss": 1.472, "step": 61900 }, { "epoch": 1.85, "learning_rate": 1.153135431568908e-05, "loss": 1.446, "step": 62000 }, { "epoch": 1.85, "learning_rate": 1.1501549833094899e-05, "loss": 1.4504, "step": 62100 }, { "epoch": 1.85, "learning_rate": 1.1471745350500716e-05, "loss": 1.4677, "step": 62200 }, { "epoch": 1.86, "learning_rate": 1.1441940867906533e-05, "loss": 1.4301, "step": 62300 }, { "epoch": 1.86, "learning_rate": 1.141213638531235e-05, "loss": 1.4207, "step": 62400 }, { "epoch": 1.86, "learning_rate": 1.138233190271817e-05, "loss": 1.4144, "step": 62500 }, { "epoch": 1.87, "learning_rate": 1.1352527420123987e-05, "loss": 1.4468, "step": 62600 }, { "epoch": 1.87, "learning_rate": 1.1322722937529804e-05, "loss": 1.4232, "step": 62700 }, { "epoch": 1.87, "learning_rate": 1.1292918454935623e-05, "loss": 1.4328, "step": 62800 }, { "epoch": 1.87, "learning_rate": 1.126311397234144e-05, "loss": 1.4364, "step": 62900 }, { "epoch": 1.88, "learning_rate": 1.1233309489747258e-05, "loss": 1.4485, "step": 63000 }, { "epoch": 1.88, "learning_rate": 1.1203505007153075e-05, "loss": 1.4054, "step": 63100 }, { "epoch": 1.88, "learning_rate": 1.1173700524558894e-05, "loss": 1.438, "step": 63200 }, { "epoch": 1.89, "learning_rate": 1.1143896041964712e-05, "loss": 1.4417, "step": 63300 }, { "epoch": 1.89, "learning_rate": 1.1114091559370529e-05, "loss": 1.5028, "step": 63400 }, { "epoch": 1.89, "learning_rate": 1.1084287076776348e-05, "loss": 1.424, "step": 63500 }, { "epoch": 1.9, "learning_rate": 1.1054482594182165e-05, "loss": 1.3832, "step": 63600 }, { "epoch": 1.9, "learning_rate": 1.1024678111587983e-05, "loss": 1.413, "step": 63700 }, { "epoch": 1.9, "learning_rate": 1.09948736289938e-05, "loss": 1.4512, "step": 63800 }, { "epoch": 1.9, "learning_rate": 1.0965069146399619e-05, "loss": 1.4262, "step": 63900 }, { "epoch": 1.91, "learning_rate": 1.0935264663805436e-05, "loss": 1.4745, "step": 64000 }, { "epoch": 1.91, "learning_rate": 1.0905460181211254e-05, "loss": 1.4105, "step": 64100 }, { "epoch": 1.91, "learning_rate": 1.0875655698617073e-05, "loss": 1.4641, "step": 64200 }, { "epoch": 1.92, "learning_rate": 1.084585121602289e-05, "loss": 1.4694, "step": 64300 }, { "epoch": 1.92, "learning_rate": 1.0816046733428709e-05, "loss": 1.4276, "step": 64400 }, { "epoch": 1.92, "learning_rate": 1.0786242250834525e-05, "loss": 1.4694, "step": 64500 }, { "epoch": 1.93, "learning_rate": 1.0756437768240344e-05, "loss": 1.4874, "step": 64600 }, { "epoch": 1.93, "learning_rate": 1.0726633285646161e-05, "loss": 1.4515, "step": 64700 }, { "epoch": 1.93, "learning_rate": 1.069682880305198e-05, "loss": 1.4539, "step": 64800 }, { "epoch": 1.93, "learning_rate": 1.0667024320457797e-05, "loss": 1.3871, "step": 64900 }, { "epoch": 1.94, "learning_rate": 1.0637219837863615e-05, "loss": 1.4109, "step": 65000 }, { "epoch": 1.94, "learning_rate": 1.0607415355269434e-05, "loss": 1.4384, "step": 65100 }, { "epoch": 1.94, "learning_rate": 1.057761087267525e-05, "loss": 1.4199, "step": 65200 }, { "epoch": 1.95, "learning_rate": 1.0547806390081068e-05, "loss": 1.46, "step": 65300 }, { "epoch": 1.95, "learning_rate": 1.0518001907486886e-05, "loss": 1.4389, "step": 65400 }, { "epoch": 1.95, "learning_rate": 1.0488197424892705e-05, "loss": 1.425, "step": 65500 }, { "epoch": 1.96, "learning_rate": 1.0458392942298522e-05, "loss": 1.4028, "step": 65600 }, { "epoch": 1.96, "learning_rate": 1.042858845970434e-05, "loss": 1.4536, "step": 65700 }, { "epoch": 1.96, "learning_rate": 1.0398783977110158e-05, "loss": 1.4986, "step": 65800 }, { "epoch": 1.96, "learning_rate": 1.0368979494515976e-05, "loss": 1.4005, "step": 65900 }, { "epoch": 1.97, "learning_rate": 1.0339175011921793e-05, "loss": 1.4286, "step": 66000 }, { "epoch": 1.97, "learning_rate": 1.030937052932761e-05, "loss": 1.4028, "step": 66100 }, { "epoch": 1.97, "learning_rate": 1.027956604673343e-05, "loss": 1.4589, "step": 66200 }, { "epoch": 1.98, "learning_rate": 1.025005960896519e-05, "loss": 1.4169, "step": 66300 }, { "epoch": 1.98, "learning_rate": 1.0220255126371007e-05, "loss": 1.4127, "step": 66400 }, { "epoch": 1.98, "learning_rate": 1.0190450643776824e-05, "loss": 1.4243, "step": 66500 }, { "epoch": 1.98, "learning_rate": 1.0160646161182643e-05, "loss": 1.3892, "step": 66600 }, { "epoch": 1.99, "learning_rate": 1.0130841678588459e-05, "loss": 1.4301, "step": 66700 }, { "epoch": 1.99, "learning_rate": 1.0101037195994278e-05, "loss": 1.4429, "step": 66800 }, { "epoch": 1.99, "learning_rate": 1.0071232713400095e-05, "loss": 1.4028, "step": 66900 }, { "epoch": 2.0, "learning_rate": 1.0041428230805914e-05, "loss": 1.4174, "step": 67000 }, { "epoch": 2.0, "learning_rate": 1.0011623748211731e-05, "loss": 1.3446, "step": 67100 }, { "epoch": 2.0, "eval_gen_len": 18.9115, "eval_loss": 1.5060008764266968, "eval_rouge1": 32.1108, "eval_rouge2": 17.1408, "eval_rougeL": 27.7833, "eval_rougeLsum": 27.7703, "eval_runtime": 292.3222, "eval_samples_per_second": 9.277, "eval_steps_per_second": 1.16, "step": 67104 }, { "epoch": 2.0, "learning_rate": 9.981819265617549e-06, "loss": 1.3076, "step": 67200 }, { "epoch": 2.01, "learning_rate": 9.952014783023368e-06, "loss": 1.3116, "step": 67300 }, { "epoch": 2.01, "learning_rate": 9.922210300429185e-06, "loss": 1.352, "step": 67400 }, { "epoch": 2.01, "learning_rate": 9.892405817835002e-06, "loss": 1.3229, "step": 67500 }, { "epoch": 2.01, "learning_rate": 9.86260133524082e-06, "loss": 1.2791, "step": 67600 }, { "epoch": 2.02, "learning_rate": 9.832796852646639e-06, "loss": 1.3413, "step": 67700 }, { "epoch": 2.02, "learning_rate": 9.802992370052456e-06, "loss": 1.313, "step": 67800 }, { "epoch": 2.02, "learning_rate": 9.773187887458275e-06, "loss": 1.3097, "step": 67900 }, { "epoch": 2.03, "learning_rate": 9.743383404864092e-06, "loss": 1.3202, "step": 68000 }, { "epoch": 2.03, "learning_rate": 9.71357892226991e-06, "loss": 1.3114, "step": 68100 }, { "epoch": 2.03, "learning_rate": 9.683774439675727e-06, "loss": 1.2995, "step": 68200 }, { "epoch": 2.04, "learning_rate": 9.653969957081544e-06, "loss": 1.2941, "step": 68300 }, { "epoch": 2.04, "learning_rate": 9.624165474487363e-06, "loss": 1.3195, "step": 68400 }, { "epoch": 2.04, "learning_rate": 9.59436099189318e-06, "loss": 1.3223, "step": 68500 }, { "epoch": 2.04, "learning_rate": 9.564556509299e-06, "loss": 1.2635, "step": 68600 }, { "epoch": 2.05, "learning_rate": 9.534752026704817e-06, "loss": 1.3237, "step": 68700 }, { "epoch": 2.05, "learning_rate": 9.504947544110634e-06, "loss": 1.3631, "step": 68800 }, { "epoch": 2.05, "learning_rate": 9.475143061516453e-06, "loss": 1.3345, "step": 68900 }, { "epoch": 2.06, "learning_rate": 9.445338578922269e-06, "loss": 1.2867, "step": 69000 }, { "epoch": 2.06, "learning_rate": 9.415534096328088e-06, "loss": 1.2966, "step": 69100 }, { "epoch": 2.06, "learning_rate": 9.385729613733905e-06, "loss": 1.3146, "step": 69200 }, { "epoch": 2.07, "learning_rate": 9.355925131139724e-06, "loss": 1.3152, "step": 69300 }, { "epoch": 2.07, "learning_rate": 9.326120648545542e-06, "loss": 1.2995, "step": 69400 }, { "epoch": 2.07, "learning_rate": 9.29631616595136e-06, "loss": 1.3113, "step": 69500 }, { "epoch": 2.07, "learning_rate": 9.266511683357178e-06, "loss": 1.3072, "step": 69600 }, { "epoch": 2.08, "learning_rate": 9.236707200762994e-06, "loss": 1.2986, "step": 69700 }, { "epoch": 2.08, "learning_rate": 9.206902718168813e-06, "loss": 1.3205, "step": 69800 }, { "epoch": 2.08, "learning_rate": 9.17709823557463e-06, "loss": 1.3499, "step": 69900 }, { "epoch": 2.09, "learning_rate": 9.147293752980449e-06, "loss": 1.3307, "step": 70000 }, { "epoch": 2.09, "learning_rate": 9.117489270386266e-06, "loss": 1.3387, "step": 70100 }, { "epoch": 2.09, "learning_rate": 9.087684787792085e-06, "loss": 1.3261, "step": 70200 }, { "epoch": 2.1, "learning_rate": 9.058178350023845e-06, "loss": 1.2811, "step": 70300 }, { "epoch": 2.1, "learning_rate": 9.028373867429663e-06, "loss": 1.2962, "step": 70400 }, { "epoch": 2.1, "learning_rate": 8.998569384835478e-06, "loss": 1.3124, "step": 70500 }, { "epoch": 2.1, "learning_rate": 8.968764902241297e-06, "loss": 1.3171, "step": 70600 }, { "epoch": 2.11, "learning_rate": 8.938960419647115e-06, "loss": 1.3073, "step": 70700 }, { "epoch": 2.11, "learning_rate": 8.909155937052934e-06, "loss": 1.3202, "step": 70800 }, { "epoch": 2.11, "learning_rate": 8.879351454458751e-06, "loss": 1.3314, "step": 70900 }, { "epoch": 2.12, "learning_rate": 8.84954697186457e-06, "loss": 1.3105, "step": 71000 }, { "epoch": 2.12, "learning_rate": 8.819742489270387e-06, "loss": 1.3241, "step": 71100 }, { "epoch": 2.12, "learning_rate": 8.789938006676203e-06, "loss": 1.3228, "step": 71200 }, { "epoch": 2.13, "learning_rate": 8.760133524082022e-06, "loss": 1.3377, "step": 71300 }, { "epoch": 2.13, "learning_rate": 8.73032904148784e-06, "loss": 1.263, "step": 71400 }, { "epoch": 2.13, "learning_rate": 8.700524558893658e-06, "loss": 1.2543, "step": 71500 }, { "epoch": 2.13, "learning_rate": 8.670720076299476e-06, "loss": 1.3436, "step": 71600 }, { "epoch": 2.14, "learning_rate": 8.640915593705295e-06, "loss": 1.3391, "step": 71700 }, { "epoch": 2.14, "learning_rate": 8.611111111111112e-06, "loss": 1.3108, "step": 71800 }, { "epoch": 2.14, "learning_rate": 8.58130662851693e-06, "loss": 1.2825, "step": 71900 }, { "epoch": 2.15, "learning_rate": 8.551502145922747e-06, "loss": 1.3004, "step": 72000 }, { "epoch": 2.15, "learning_rate": 8.521697663328564e-06, "loss": 1.312, "step": 72100 }, { "epoch": 2.15, "learning_rate": 8.491893180734383e-06, "loss": 1.3004, "step": 72200 }, { "epoch": 2.15, "learning_rate": 8.4620886981402e-06, "loss": 1.3638, "step": 72300 }, { "epoch": 2.16, "learning_rate": 8.43228421554602e-06, "loss": 1.3283, "step": 72400 }, { "epoch": 2.16, "learning_rate": 8.402479732951837e-06, "loss": 1.2835, "step": 72500 }, { "epoch": 2.16, "learning_rate": 8.372675250357656e-06, "loss": 1.3222, "step": 72600 }, { "epoch": 2.17, "learning_rate": 8.342870767763471e-06, "loss": 1.3031, "step": 72700 }, { "epoch": 2.17, "learning_rate": 8.313066285169288e-06, "loss": 1.3239, "step": 72800 }, { "epoch": 2.17, "learning_rate": 8.283261802575108e-06, "loss": 1.3093, "step": 72900 }, { "epoch": 2.18, "learning_rate": 8.253457319980925e-06, "loss": 1.3053, "step": 73000 }, { "epoch": 2.18, "learning_rate": 8.223652837386744e-06, "loss": 1.3409, "step": 73100 }, { "epoch": 2.18, "learning_rate": 8.193848354792561e-06, "loss": 1.3406, "step": 73200 }, { "epoch": 2.18, "learning_rate": 8.16404387219838e-06, "loss": 1.3051, "step": 73300 }, { "epoch": 2.19, "learning_rate": 8.134239389604196e-06, "loss": 1.2662, "step": 73400 }, { "epoch": 2.19, "learning_rate": 8.104434907010013e-06, "loss": 1.3195, "step": 73500 }, { "epoch": 2.19, "learning_rate": 8.074630424415832e-06, "loss": 1.2922, "step": 73600 }, { "epoch": 2.2, "learning_rate": 8.04482594182165e-06, "loss": 1.3208, "step": 73700 }, { "epoch": 2.2, "learning_rate": 8.015021459227469e-06, "loss": 1.3047, "step": 73800 }, { "epoch": 2.2, "learning_rate": 7.985216976633286e-06, "loss": 1.3148, "step": 73900 }, { "epoch": 2.21, "learning_rate": 7.955412494039105e-06, "loss": 1.2982, "step": 74000 }, { "epoch": 2.21, "learning_rate": 7.925608011444922e-06, "loss": 1.323, "step": 74100 }, { "epoch": 2.21, "learning_rate": 7.895803528850738e-06, "loss": 1.2963, "step": 74200 }, { "epoch": 2.21, "learning_rate": 7.865999046256557e-06, "loss": 1.2981, "step": 74300 }, { "epoch": 2.22, "learning_rate": 7.836194563662374e-06, "loss": 1.2817, "step": 74400 }, { "epoch": 2.22, "learning_rate": 7.806390081068193e-06, "loss": 1.3061, "step": 74500 }, { "epoch": 2.22, "learning_rate": 7.77658559847401e-06, "loss": 1.302, "step": 74600 }, { "epoch": 2.23, "learning_rate": 7.74678111587983e-06, "loss": 1.3064, "step": 74700 }, { "epoch": 2.23, "learning_rate": 7.716976633285647e-06, "loss": 1.2704, "step": 74800 }, { "epoch": 2.23, "learning_rate": 7.687172150691464e-06, "loss": 1.2871, "step": 74900 }, { "epoch": 2.24, "learning_rate": 7.657367668097281e-06, "loss": 1.3032, "step": 75000 }, { "epoch": 2.24, "learning_rate": 7.627563185503099e-06, "loss": 1.3276, "step": 75100 }, { "epoch": 2.24, "learning_rate": 7.597758702908918e-06, "loss": 1.2922, "step": 75200 }, { "epoch": 2.24, "learning_rate": 7.567954220314735e-06, "loss": 1.3013, "step": 75300 }, { "epoch": 2.25, "learning_rate": 7.538149737720554e-06, "loss": 1.3174, "step": 75400 }, { "epoch": 2.25, "learning_rate": 7.508345255126371e-06, "loss": 1.3155, "step": 75500 }, { "epoch": 2.25, "learning_rate": 7.478540772532189e-06, "loss": 1.3232, "step": 75600 }, { "epoch": 2.26, "learning_rate": 7.448736289938007e-06, "loss": 1.2449, "step": 75700 }, { "epoch": 2.26, "learning_rate": 7.418931807343825e-06, "loss": 1.297, "step": 75800 }, { "epoch": 2.26, "learning_rate": 7.3891273247496425e-06, "loss": 1.3049, "step": 75900 }, { "epoch": 2.27, "learning_rate": 7.359322842155461e-06, "loss": 1.2848, "step": 76000 }, { "epoch": 2.27, "learning_rate": 7.329816404387221e-06, "loss": 1.2833, "step": 76100 }, { "epoch": 2.27, "learning_rate": 7.300011921793038e-06, "loss": 1.281, "step": 76200 }, { "epoch": 2.27, "learning_rate": 7.270207439198855e-06, "loss": 1.3107, "step": 76300 }, { "epoch": 2.28, "learning_rate": 7.240402956604673e-06, "loss": 1.2745, "step": 76400 }, { "epoch": 2.28, "learning_rate": 7.210598474010492e-06, "loss": 1.3614, "step": 76500 }, { "epoch": 2.28, "learning_rate": 7.180793991416309e-06, "loss": 1.2813, "step": 76600 }, { "epoch": 2.29, "learning_rate": 7.150989508822127e-06, "loss": 1.3131, "step": 76700 }, { "epoch": 2.29, "learning_rate": 7.121185026227945e-06, "loss": 1.2976, "step": 76800 }, { "epoch": 2.29, "learning_rate": 7.091380543633763e-06, "loss": 1.3525, "step": 76900 }, { "epoch": 2.29, "learning_rate": 7.06157606103958e-06, "loss": 1.3217, "step": 77000 }, { "epoch": 2.3, "learning_rate": 7.031771578445398e-06, "loss": 1.2728, "step": 77100 }, { "epoch": 2.3, "learning_rate": 7.001967095851216e-06, "loss": 1.3291, "step": 77200 }, { "epoch": 2.3, "learning_rate": 6.972162613257034e-06, "loss": 1.3162, "step": 77300 }, { "epoch": 2.31, "learning_rate": 6.942358130662852e-06, "loss": 1.273, "step": 77400 }, { "epoch": 2.31, "learning_rate": 6.91255364806867e-06, "loss": 1.2917, "step": 77500 }, { "epoch": 2.31, "learning_rate": 6.882749165474488e-06, "loss": 1.3109, "step": 77600 }, { "epoch": 2.32, "learning_rate": 6.8529446828803046e-06, "loss": 1.3239, "step": 77700 }, { "epoch": 2.32, "learning_rate": 6.823140200286123e-06, "loss": 1.3042, "step": 77800 }, { "epoch": 2.32, "learning_rate": 6.793335717691941e-06, "loss": 1.2799, "step": 77900 }, { "epoch": 2.32, "learning_rate": 6.763531235097759e-06, "loss": 1.3083, "step": 78000 }, { "epoch": 2.33, "learning_rate": 6.733726752503576e-06, "loss": 1.2967, "step": 78100 }, { "epoch": 2.33, "learning_rate": 6.7039222699093946e-06, "loss": 1.3231, "step": 78200 }, { "epoch": 2.33, "learning_rate": 6.674117787315213e-06, "loss": 1.3679, "step": 78300 }, { "epoch": 2.34, "learning_rate": 6.644313304721031e-06, "loss": 1.3178, "step": 78400 }, { "epoch": 2.34, "learning_rate": 6.614508822126847e-06, "loss": 1.305, "step": 78500 }, { "epoch": 2.34, "learning_rate": 6.5847043395326656e-06, "loss": 1.329, "step": 78600 }, { "epoch": 2.35, "learning_rate": 6.554899856938484e-06, "loss": 1.3147, "step": 78700 }, { "epoch": 2.35, "learning_rate": 6.525095374344301e-06, "loss": 1.3129, "step": 78800 }, { "epoch": 2.35, "learning_rate": 6.495290891750119e-06, "loss": 1.295, "step": 78900 }, { "epoch": 2.35, "learning_rate": 6.465486409155937e-06, "loss": 1.3236, "step": 79000 }, { "epoch": 2.36, "learning_rate": 6.4356819265617556e-06, "loss": 1.3012, "step": 79100 }, { "epoch": 2.36, "learning_rate": 6.405877443967573e-06, "loss": 1.2792, "step": 79200 }, { "epoch": 2.36, "learning_rate": 6.37607296137339e-06, "loss": 1.3223, "step": 79300 }, { "epoch": 2.37, "learning_rate": 6.346268478779208e-06, "loss": 1.3346, "step": 79400 }, { "epoch": 2.37, "learning_rate": 6.3164639961850266e-06, "loss": 1.3006, "step": 79500 }, { "epoch": 2.37, "learning_rate": 6.286659513590844e-06, "loss": 1.3093, "step": 79600 }, { "epoch": 2.38, "learning_rate": 6.256855030996662e-06, "loss": 1.33, "step": 79700 }, { "epoch": 2.38, "learning_rate": 6.22705054840248e-06, "loss": 1.3127, "step": 79800 }, { "epoch": 2.38, "learning_rate": 6.197246065808298e-06, "loss": 1.2684, "step": 79900 }, { "epoch": 2.38, "learning_rate": 6.167441583214116e-06, "loss": 1.2938, "step": 80000 }, { "epoch": 2.39, "learning_rate": 6.137637100619933e-06, "loss": 1.3103, "step": 80100 }, { "epoch": 2.39, "learning_rate": 6.107832618025751e-06, "loss": 1.307, "step": 80200 }, { "epoch": 2.39, "learning_rate": 6.0780281354315685e-06, "loss": 1.327, "step": 80300 }, { "epoch": 2.4, "learning_rate": 6.048223652837387e-06, "loss": 1.292, "step": 80400 }, { "epoch": 2.4, "learning_rate": 6.018419170243205e-06, "loss": 1.3067, "step": 80500 }, { "epoch": 2.4, "learning_rate": 5.988614687649023e-06, "loss": 1.2973, "step": 80600 }, { "epoch": 2.41, "learning_rate": 5.95881020505484e-06, "loss": 1.3501, "step": 80700 }, { "epoch": 2.41, "learning_rate": 5.9290057224606586e-06, "loss": 1.2745, "step": 80800 }, { "epoch": 2.41, "learning_rate": 5.899201239866476e-06, "loss": 1.3246, "step": 80900 }, { "epoch": 2.41, "learning_rate": 5.869694802098236e-06, "loss": 1.3207, "step": 81000 }, { "epoch": 2.42, "learning_rate": 5.839890319504053e-06, "loss": 1.2823, "step": 81100 }, { "epoch": 2.42, "learning_rate": 5.810085836909871e-06, "loss": 1.307, "step": 81200 }, { "epoch": 2.42, "learning_rate": 5.7802813543156895e-06, "loss": 1.3289, "step": 81300 }, { "epoch": 2.43, "learning_rate": 5.750476871721508e-06, "loss": 1.271, "step": 81400 }, { "epoch": 2.43, "learning_rate": 5.720672389127325e-06, "loss": 1.3111, "step": 81500 }, { "epoch": 2.43, "learning_rate": 5.690867906533142e-06, "loss": 1.3137, "step": 81600 }, { "epoch": 2.44, "learning_rate": 5.6610634239389605e-06, "loss": 1.3168, "step": 81700 }, { "epoch": 2.44, "learning_rate": 5.631258941344778e-06, "loss": 1.3243, "step": 81800 }, { "epoch": 2.44, "learning_rate": 5.601454458750596e-06, "loss": 1.2846, "step": 81900 }, { "epoch": 2.44, "learning_rate": 5.571649976156414e-06, "loss": 1.3359, "step": 82000 }, { "epoch": 2.45, "learning_rate": 5.541845493562232e-06, "loss": 1.312, "step": 82100 }, { "epoch": 2.45, "learning_rate": 5.51204101096805e-06, "loss": 1.304, "step": 82200 }, { "epoch": 2.45, "learning_rate": 5.482236528373868e-06, "loss": 1.3005, "step": 82300 }, { "epoch": 2.46, "learning_rate": 5.452432045779685e-06, "loss": 1.3196, "step": 82400 }, { "epoch": 2.46, "learning_rate": 5.422925608011445e-06, "loss": 1.325, "step": 82500 }, { "epoch": 2.46, "learning_rate": 5.393121125417262e-06, "loss": 1.2993, "step": 82600 }, { "epoch": 2.46, "learning_rate": 5.3633166428230805e-06, "loss": 1.3078, "step": 82700 }, { "epoch": 2.47, "learning_rate": 5.333512160228899e-06, "loss": 1.2723, "step": 82800 }, { "epoch": 2.47, "learning_rate": 5.303707677634717e-06, "loss": 1.3061, "step": 82900 }, { "epoch": 2.47, "learning_rate": 5.273903195040534e-06, "loss": 1.2831, "step": 83000 }, { "epoch": 2.48, "learning_rate": 5.244098712446352e-06, "loss": 1.3367, "step": 83100 }, { "epoch": 2.48, "learning_rate": 5.21429422985217e-06, "loss": 1.3299, "step": 83200 }, { "epoch": 2.48, "learning_rate": 5.184489747257988e-06, "loss": 1.2967, "step": 83300 }, { "epoch": 2.49, "learning_rate": 5.154685264663805e-06, "loss": 1.2826, "step": 83400 }, { "epoch": 2.49, "learning_rate": 5.124880782069623e-06, "loss": 1.2924, "step": 83500 }, { "epoch": 2.49, "learning_rate": 5.0950762994754415e-06, "loss": 1.287, "step": 83600 }, { "epoch": 2.49, "learning_rate": 5.065271816881259e-06, "loss": 1.3353, "step": 83700 }, { "epoch": 2.5, "learning_rate": 5.035467334287077e-06, "loss": 1.2794, "step": 83800 }, { "epoch": 2.5, "learning_rate": 5.005960896518837e-06, "loss": 1.2811, "step": 83900 }, { "epoch": 2.5, "learning_rate": 4.976156413924654e-06, "loss": 1.2638, "step": 84000 }, { "epoch": 2.51, "learning_rate": 4.946351931330472e-06, "loss": 1.34, "step": 84100 }, { "epoch": 2.51, "learning_rate": 4.91654744873629e-06, "loss": 1.2886, "step": 84200 }, { "epoch": 2.51, "learning_rate": 4.886742966142108e-06, "loss": 1.2936, "step": 84300 }, { "epoch": 2.52, "learning_rate": 4.856938483547926e-06, "loss": 1.2905, "step": 84400 }, { "epoch": 2.52, "learning_rate": 4.8271340009537434e-06, "loss": 1.3039, "step": 84500 }, { "epoch": 2.52, "learning_rate": 4.797329518359562e-06, "loss": 1.2702, "step": 84600 }, { "epoch": 2.52, "learning_rate": 4.76752503576538e-06, "loss": 1.3245, "step": 84700 }, { "epoch": 2.53, "learning_rate": 4.737720553171197e-06, "loss": 1.298, "step": 84800 }, { "epoch": 2.53, "learning_rate": 4.7079160705770144e-06, "loss": 1.3183, "step": 84900 }, { "epoch": 2.53, "learning_rate": 4.678111587982833e-06, "loss": 1.2971, "step": 85000 }, { "epoch": 2.54, "learning_rate": 4.648307105388651e-06, "loss": 1.2877, "step": 85100 }, { "epoch": 2.54, "learning_rate": 4.618502622794468e-06, "loss": 1.2796, "step": 85200 }, { "epoch": 2.54, "learning_rate": 4.588698140200286e-06, "loss": 1.325, "step": 85300 }, { "epoch": 2.55, "learning_rate": 4.5588936576061044e-06, "loss": 1.3102, "step": 85400 }, { "epoch": 2.55, "learning_rate": 4.529089175011923e-06, "loss": 1.3211, "step": 85500 }, { "epoch": 2.55, "learning_rate": 4.499284692417739e-06, "loss": 1.2872, "step": 85600 }, { "epoch": 2.55, "learning_rate": 4.469480209823557e-06, "loss": 1.3031, "step": 85700 }, { "epoch": 2.56, "learning_rate": 4.4396757272293754e-06, "loss": 1.3126, "step": 85800 }, { "epoch": 2.56, "learning_rate": 4.409871244635194e-06, "loss": 1.3138, "step": 85900 }, { "epoch": 2.56, "learning_rate": 4.380066762041011e-06, "loss": 1.3186, "step": 86000 }, { "epoch": 2.57, "learning_rate": 4.350262279446829e-06, "loss": 1.3243, "step": 86100 }, { "epoch": 2.57, "learning_rate": 4.320457796852647e-06, "loss": 1.2839, "step": 86200 }, { "epoch": 2.57, "learning_rate": 4.290653314258465e-06, "loss": 1.266, "step": 86300 }, { "epoch": 2.58, "learning_rate": 4.260848831664282e-06, "loss": 1.2661, "step": 86400 }, { "epoch": 2.58, "learning_rate": 4.2310443490701e-06, "loss": 1.2955, "step": 86500 }, { "epoch": 2.58, "learning_rate": 4.201239866475918e-06, "loss": 1.2725, "step": 86600 }, { "epoch": 2.58, "learning_rate": 4.171435383881736e-06, "loss": 1.3274, "step": 86700 }, { "epoch": 2.59, "learning_rate": 4.141630901287554e-06, "loss": 1.291, "step": 86800 }, { "epoch": 2.59, "learning_rate": 4.111826418693372e-06, "loss": 1.313, "step": 86900 }, { "epoch": 2.59, "learning_rate": 4.08202193609919e-06, "loss": 1.3116, "step": 87000 }, { "epoch": 2.6, "learning_rate": 4.052217453505007e-06, "loss": 1.2802, "step": 87100 }, { "epoch": 2.6, "learning_rate": 4.022412970910825e-06, "loss": 1.3158, "step": 87200 }, { "epoch": 2.6, "learning_rate": 3.992608488316643e-06, "loss": 1.2753, "step": 87300 }, { "epoch": 2.6, "learning_rate": 3.962804005722461e-06, "loss": 1.2926, "step": 87400 }, { "epoch": 2.61, "learning_rate": 3.932999523128278e-06, "loss": 1.2848, "step": 87500 }, { "epoch": 2.61, "learning_rate": 3.903195040534097e-06, "loss": 1.3019, "step": 87600 }, { "epoch": 2.61, "learning_rate": 3.873390557939915e-06, "loss": 1.3003, "step": 87700 }, { "epoch": 2.62, "learning_rate": 3.843586075345732e-06, "loss": 1.2979, "step": 87800 }, { "epoch": 2.62, "learning_rate": 3.8137815927515494e-06, "loss": 1.2929, "step": 87900 }, { "epoch": 2.62, "learning_rate": 3.7839771101573676e-06, "loss": 1.2944, "step": 88000 }, { "epoch": 2.63, "learning_rate": 3.7541726275631853e-06, "loss": 1.2952, "step": 88100 }, { "epoch": 2.63, "learning_rate": 3.7243681449690035e-06, "loss": 1.3206, "step": 88200 }, { "epoch": 2.63, "learning_rate": 3.6945636623748213e-06, "loss": 1.272, "step": 88300 }, { "epoch": 2.63, "learning_rate": 3.6647591797806394e-06, "loss": 1.3305, "step": 88400 }, { "epoch": 2.64, "learning_rate": 3.6349546971864567e-06, "loss": 1.2863, "step": 88500 }, { "epoch": 2.64, "learning_rate": 3.605150214592275e-06, "loss": 1.3274, "step": 88600 }, { "epoch": 2.64, "learning_rate": 3.5753457319980927e-06, "loss": 1.2402, "step": 88700 }, { "epoch": 2.65, "learning_rate": 3.545541249403911e-06, "loss": 1.2966, "step": 88800 }, { "epoch": 2.65, "learning_rate": 3.515736766809728e-06, "loss": 1.2915, "step": 88900 }, { "epoch": 2.65, "learning_rate": 3.485932284215546e-06, "loss": 1.2973, "step": 89000 }, { "epoch": 2.66, "learning_rate": 3.456127801621364e-06, "loss": 1.2655, "step": 89100 }, { "epoch": 2.66, "learning_rate": 3.4263233190271814e-06, "loss": 1.305, "step": 89200 }, { "epoch": 2.66, "learning_rate": 3.3965188364329996e-06, "loss": 1.2942, "step": 89300 }, { "epoch": 2.66, "learning_rate": 3.3667143538388173e-06, "loss": 1.2901, "step": 89400 }, { "epoch": 2.67, "learning_rate": 3.3369098712446355e-06, "loss": 1.2841, "step": 89500 }, { "epoch": 2.67, "learning_rate": 3.307105388650453e-06, "loss": 1.3179, "step": 89600 }, { "epoch": 2.67, "learning_rate": 3.277300906056271e-06, "loss": 1.338, "step": 89700 }, { "epoch": 2.68, "learning_rate": 3.2474964234620887e-06, "loss": 1.2943, "step": 89800 }, { "epoch": 2.68, "learning_rate": 3.217691940867907e-06, "loss": 1.3189, "step": 89900 }, { "epoch": 2.68, "learning_rate": 3.1878874582737242e-06, "loss": 1.3266, "step": 90000 }, { "epoch": 2.69, "learning_rate": 3.1580829756795424e-06, "loss": 1.3228, "step": 90100 }, { "epoch": 2.69, "learning_rate": 3.12827849308536e-06, "loss": 1.297, "step": 90200 }, { "epoch": 2.69, "learning_rate": 3.098474010491178e-06, "loss": 1.2824, "step": 90300 }, { "epoch": 2.69, "learning_rate": 3.0686695278969957e-06, "loss": 1.3217, "step": 90400 }, { "epoch": 2.7, "learning_rate": 3.0388650453028134e-06, "loss": 1.2884, "step": 90500 }, { "epoch": 2.7, "learning_rate": 3.0090605627086316e-06, "loss": 1.2947, "step": 90600 }, { "epoch": 2.7, "learning_rate": 2.9792560801144493e-06, "loss": 1.2985, "step": 90700 }, { "epoch": 2.71, "learning_rate": 2.949451597520267e-06, "loss": 1.3063, "step": 90800 }, { "epoch": 2.71, "learning_rate": 2.919647114926085e-06, "loss": 1.2729, "step": 90900 }, { "epoch": 2.71, "learning_rate": 2.889842632331903e-06, "loss": 1.3206, "step": 91000 }, { "epoch": 2.72, "learning_rate": 2.8600381497377207e-06, "loss": 1.2862, "step": 91100 }, { "epoch": 2.72, "learning_rate": 2.8302336671435385e-06, "loss": 1.3047, "step": 91200 }, { "epoch": 2.72, "learning_rate": 2.8004291845493562e-06, "loss": 1.3283, "step": 91300 }, { "epoch": 2.72, "learning_rate": 2.7706247019551744e-06, "loss": 1.3355, "step": 91400 }, { "epoch": 2.73, "learning_rate": 2.740820219360992e-06, "loss": 1.294, "step": 91500 }, { "epoch": 2.73, "learning_rate": 2.7110157367668095e-06, "loss": 1.2771, "step": 91600 }, { "epoch": 2.73, "learning_rate": 2.6812112541726277e-06, "loss": 1.2671, "step": 91700 }, { "epoch": 2.74, "learning_rate": 2.6514067715784454e-06, "loss": 1.2855, "step": 91800 }, { "epoch": 2.74, "learning_rate": 2.6216022889842636e-06, "loss": 1.2664, "step": 91900 }, { "epoch": 2.74, "learning_rate": 2.5920958512160226e-06, "loss": 1.3065, "step": 92000 }, { "epoch": 2.74, "learning_rate": 2.562291368621841e-06, "loss": 1.2835, "step": 92100 }, { "epoch": 2.75, "learning_rate": 2.5324868860276586e-06, "loss": 1.2752, "step": 92200 }, { "epoch": 2.75, "learning_rate": 2.5026824034334767e-06, "loss": 1.3331, "step": 92300 }, { "epoch": 2.75, "learning_rate": 2.472877920839294e-06, "loss": 1.2784, "step": 92400 }, { "epoch": 2.76, "learning_rate": 2.4430734382451122e-06, "loss": 1.3154, "step": 92500 }, { "epoch": 2.76, "learning_rate": 2.41326895565093e-06, "loss": 1.2989, "step": 92600 }, { "epoch": 2.76, "learning_rate": 2.383464473056748e-06, "loss": 1.2854, "step": 92700 }, { "epoch": 2.77, "learning_rate": 2.3536599904625655e-06, "loss": 1.3367, "step": 92800 }, { "epoch": 2.77, "learning_rate": 2.3238555078683836e-06, "loss": 1.2928, "step": 92900 }, { "epoch": 2.77, "learning_rate": 2.2940510252742014e-06, "loss": 1.2888, "step": 93000 }, { "epoch": 2.77, "learning_rate": 2.264246542680019e-06, "loss": 1.2786, "step": 93100 }, { "epoch": 2.78, "learning_rate": 2.234442060085837e-06, "loss": 1.2806, "step": 93200 }, { "epoch": 2.78, "learning_rate": 2.2046375774916546e-06, "loss": 1.3047, "step": 93300 }, { "epoch": 2.78, "learning_rate": 2.174833094897473e-06, "loss": 1.2596, "step": 93400 }, { "epoch": 2.79, "learning_rate": 2.14502861230329e-06, "loss": 1.3129, "step": 93500 }, { "epoch": 2.79, "learning_rate": 2.1152241297091083e-06, "loss": 1.3005, "step": 93600 }, { "epoch": 2.79, "learning_rate": 2.085419647114926e-06, "loss": 1.2638, "step": 93700 }, { "epoch": 2.8, "learning_rate": 2.0556151645207442e-06, "loss": 1.305, "step": 93800 }, { "epoch": 2.8, "learning_rate": 2.0258106819265615e-06, "loss": 1.2499, "step": 93900 }, { "epoch": 2.8, "learning_rate": 1.9960061993323797e-06, "loss": 1.2803, "step": 94000 }, { "epoch": 2.8, "learning_rate": 1.9662017167381975e-06, "loss": 1.2982, "step": 94100 }, { "epoch": 2.81, "learning_rate": 1.9363972341440156e-06, "loss": 1.2846, "step": 94200 }, { "epoch": 2.81, "learning_rate": 1.906592751549833e-06, "loss": 1.3006, "step": 94300 }, { "epoch": 2.81, "learning_rate": 1.876788268955651e-06, "loss": 1.2996, "step": 94400 }, { "epoch": 2.82, "learning_rate": 1.8472818311874106e-06, "loss": 1.2905, "step": 94500 }, { "epoch": 2.82, "learning_rate": 1.8174773485932284e-06, "loss": 1.2661, "step": 94600 }, { "epoch": 2.82, "learning_rate": 1.7876728659990463e-06, "loss": 1.2848, "step": 94700 }, { "epoch": 2.83, "learning_rate": 1.757868383404864e-06, "loss": 1.2962, "step": 94800 }, { "epoch": 2.83, "learning_rate": 1.728063900810682e-06, "loss": 1.3271, "step": 94900 }, { "epoch": 2.83, "learning_rate": 1.6982594182164998e-06, "loss": 1.2668, "step": 95000 }, { "epoch": 2.83, "learning_rate": 1.6684549356223177e-06, "loss": 1.3104, "step": 95100 }, { "epoch": 2.84, "learning_rate": 1.6386504530281355e-06, "loss": 1.2795, "step": 95200 }, { "epoch": 2.84, "learning_rate": 1.6088459704339535e-06, "loss": 1.2807, "step": 95300 }, { "epoch": 2.84, "learning_rate": 1.5790414878397712e-06, "loss": 1.3023, "step": 95400 }, { "epoch": 2.85, "learning_rate": 1.549237005245589e-06, "loss": 1.2996, "step": 95500 }, { "epoch": 2.85, "learning_rate": 1.5194325226514067e-06, "loss": 1.3163, "step": 95600 }, { "epoch": 2.85, "learning_rate": 1.4896280400572247e-06, "loss": 1.3352, "step": 95700 }, { "epoch": 2.86, "learning_rate": 1.4598235574630424e-06, "loss": 1.2638, "step": 95800 }, { "epoch": 2.86, "learning_rate": 1.4300190748688604e-06, "loss": 1.3498, "step": 95900 }, { "epoch": 2.86, "learning_rate": 1.4002145922746781e-06, "loss": 1.2619, "step": 96000 }, { "epoch": 2.86, "learning_rate": 1.370410109680496e-06, "loss": 1.3227, "step": 96100 }, { "epoch": 2.87, "learning_rate": 1.3406056270863138e-06, "loss": 1.2838, "step": 96200 }, { "epoch": 2.87, "learning_rate": 1.3108011444921318e-06, "loss": 1.2992, "step": 96300 }, { "epoch": 2.87, "learning_rate": 1.2809966618979495e-06, "loss": 1.272, "step": 96400 }, { "epoch": 2.88, "learning_rate": 1.2511921793037675e-06, "loss": 1.2672, "step": 96500 }, { "epoch": 2.88, "learning_rate": 1.2213876967095852e-06, "loss": 1.307, "step": 96600 }, { "epoch": 2.88, "learning_rate": 1.191881258941345e-06, "loss": 1.2649, "step": 96700 }, { "epoch": 2.89, "learning_rate": 1.1620767763471627e-06, "loss": 1.2454, "step": 96800 }, { "epoch": 2.89, "learning_rate": 1.1322722937529807e-06, "loss": 1.2826, "step": 96900 }, { "epoch": 2.89, "learning_rate": 1.1024678111587984e-06, "loss": 1.3654, "step": 97000 }, { "epoch": 2.89, "learning_rate": 1.0726633285646161e-06, "loss": 1.2908, "step": 97100 }, { "epoch": 2.9, "learning_rate": 1.042858845970434e-06, "loss": 1.302, "step": 97200 }, { "epoch": 2.9, "learning_rate": 1.0130543633762516e-06, "loss": 1.2849, "step": 97300 }, { "epoch": 2.9, "learning_rate": 9.832498807820696e-07, "loss": 1.2789, "step": 97400 }, { "epoch": 2.91, "learning_rate": 9.537434430138293e-07, "loss": 1.2867, "step": 97500 }, { "epoch": 2.91, "learning_rate": 9.239389604196472e-07, "loss": 1.2662, "step": 97600 }, { "epoch": 2.91, "learning_rate": 8.94134477825465e-07, "loss": 1.2251, "step": 97700 }, { "epoch": 2.91, "learning_rate": 8.643299952312829e-07, "loss": 1.2888, "step": 97800 }, { "epoch": 2.92, "learning_rate": 8.345255126371006e-07, "loss": 1.2734, "step": 97900 }, { "epoch": 2.92, "learning_rate": 8.047210300429185e-07, "loss": 1.2751, "step": 98000 }, { "epoch": 2.92, "learning_rate": 7.749165474487363e-07, "loss": 1.2835, "step": 98100 }, { "epoch": 2.93, "learning_rate": 7.451120648545542e-07, "loss": 1.2643, "step": 98200 }, { "epoch": 2.93, "learning_rate": 7.15307582260372e-07, "loss": 1.3029, "step": 98300 }, { "epoch": 2.93, "learning_rate": 6.855030996661899e-07, "loss": 1.3015, "step": 98400 }, { "epoch": 2.94, "learning_rate": 6.556986170720076e-07, "loss": 1.2745, "step": 98500 }, { "epoch": 2.94, "learning_rate": 6.261921793037673e-07, "loss": 1.298, "step": 98600 }, { "epoch": 2.94, "learning_rate": 5.963876967095852e-07, "loss": 1.2703, "step": 98700 }, { "epoch": 2.94, "learning_rate": 5.66583214115403e-07, "loss": 1.2758, "step": 98800 }, { "epoch": 2.95, "learning_rate": 5.367787315212208e-07, "loss": 1.2944, "step": 98900 }, { "epoch": 2.95, "learning_rate": 5.069742489270386e-07, "loss": 1.3073, "step": 99000 }, { "epoch": 2.95, "learning_rate": 4.771697663328565e-07, "loss": 1.2741, "step": 99100 }, { "epoch": 2.96, "learning_rate": 4.473652837386743e-07, "loss": 1.2829, "step": 99200 }, { "epoch": 2.96, "learning_rate": 4.1756080114449216e-07, "loss": 1.3196, "step": 99300 }, { "epoch": 2.96, "learning_rate": 3.8775631855030996e-07, "loss": 1.3021, "step": 99400 }, { "epoch": 2.97, "learning_rate": 3.579518359561278e-07, "loss": 1.2743, "step": 99500 }, { "epoch": 2.97, "learning_rate": 3.2814735336194567e-07, "loss": 1.2547, "step": 99600 }, { "epoch": 2.97, "learning_rate": 2.9834287076776347e-07, "loss": 1.2993, "step": 99700 }, { "epoch": 2.97, "learning_rate": 2.685383881735813e-07, "loss": 1.2546, "step": 99800 }, { "epoch": 2.98, "learning_rate": 2.387339055793992e-07, "loss": 1.2743, "step": 99900 }, { "epoch": 2.98, "learning_rate": 2.0892942298521698e-07, "loss": 1.2957, "step": 100000 }, { "epoch": 2.98, "learning_rate": 1.791249403910348e-07, "loss": 1.2468, "step": 100100 }, { "epoch": 2.99, "learning_rate": 1.4932045779685266e-07, "loss": 1.2819, "step": 100200 }, { "epoch": 2.99, "learning_rate": 1.1951597520267048e-07, "loss": 1.2974, "step": 100300 }, { "epoch": 2.99, "learning_rate": 8.971149260848831e-08, "loss": 1.2679, "step": 100400 }, { "epoch": 3.0, "learning_rate": 5.990701001430615e-08, "loss": 1.2761, "step": 100500 }, { "epoch": 3.0, "learning_rate": 3.010252742012399e-08, "loss": 1.3245, "step": 100600 }, { "epoch": 3.0, "eval_gen_len": 18.9801, "eval_loss": 1.4905033111572266, "eval_rouge1": 32.9084, "eval_rouge2": 17.7027, "eval_rougeL": 28.2912, "eval_rougeLsum": 28.2975, "eval_runtime": 291.9809, "eval_samples_per_second": 9.288, "eval_steps_per_second": 1.161, "step": 100656 }, { "epoch": 3.0, "step": 100656, "total_flos": 9.506404988551581e+17, "train_loss": 1.5143268576412658, "train_runtime": 40494.9096, "train_samples_per_second": 19.885, "train_steps_per_second": 2.486 } ], "max_steps": 100656, "num_train_epochs": 3, "total_flos": 9.506404988551581e+17, "trial_name": null, "trial_params": null }