diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -50089,3 +50089,2506 @@ Use FP16 precision: False 12/29/2021 05:10:27 - INFO - codeparrot_training - Step 47499: {'lr': 3.1020040100037393e-06, 'samples': 24320000, 'steps': 47499, 'batch_loss/train': 0.7271024992223829} 12/29/2021 05:10:27 - INFO - codeparrot_training - Evaluating and saving model checkpoint 12/29/2021 05:13:49 - INFO - codeparrot_training - Step 47500: {'loss/eval': 0.7404438257217407, 'perplexity': 2.0968658924102783} +12/29/2021 05:14:08 - WARNING - huggingface_hub.repository - Several commits (17) will be pushed upstream. +12/29/2021 05:14:21 - INFO - codeparrot_training - Step 47500: {'lr': 3.0995290325141757e-06, 'samples': 24320512, 'steps': 47500, 'batch_loss/train': 0.7142994897440076} +12/29/2021 05:14:35 - INFO - codeparrot_training - Step 47501: {'lr': 3.0970550366076766e-06, 'samples': 24321024, 'steps': 47501, 'batch_loss/train': 0.7185069830156863} +12/29/2021 05:14:46 - INFO - codeparrot_training - Step 47502: {'lr': 3.094582022294068e-06, 'samples': 24321536, 'steps': 47502, 'batch_loss/train': 0.7865669149905443} +12/29/2021 05:14:56 - INFO - codeparrot_training - Step 47503: {'lr': 3.092109989583147e-06, 'samples': 24322048, 'steps': 47503, 'batch_loss/train': 0.5161740591283888} +12/29/2021 05:15:07 - INFO - codeparrot_training - Step 47504: {'lr': 3.0896389384847955e-06, 'samples': 24322560, 'steps': 47504, 'batch_loss/train': 0.7239033812074922} +12/29/2021 05:15:19 - INFO - codeparrot_training - Step 47505: {'lr': 3.0871688690087828e-06, 'samples': 24323072, 'steps': 47505, 'batch_loss/train': 0.8683047229424119} +12/29/2021 05:15:30 - INFO - codeparrot_training - Step 47506: {'lr': 3.0846997811649625e-06, 'samples': 24323584, 'steps': 47506, 'batch_loss/train': 0.795848541893065} +12/29/2021 05:15:41 - INFO - codeparrot_training - Step 47507: {'lr': 3.0822316749631597e-06, 'samples': 24324096, 'steps': 47507, 'batch_loss/train': 0.6930774739012122} +12/29/2021 05:15:55 - INFO - codeparrot_training - Step 47508: {'lr': 3.079764550413172e-06, 'samples': 24324608, 'steps': 47508, 'batch_loss/train': 0.9589014956727624} +12/29/2021 05:16:05 - INFO - codeparrot_training - Step 47509: {'lr': 3.077298407524798e-06, 'samples': 24325120, 'steps': 47509, 'batch_loss/train': 0.6889746403321624} +12/29/2021 05:16:16 - INFO - codeparrot_training - Step 47510: {'lr': 3.0748332463078345e-06, 'samples': 24325632, 'steps': 47510, 'batch_loss/train': 0.7055857053492218} +12/29/2021 05:16:28 - INFO - codeparrot_training - Step 47511: {'lr': 3.0723690667721347e-06, 'samples': 24326144, 'steps': 47511, 'batch_loss/train': 0.7254427324514836} +12/29/2021 05:16:39 - INFO - codeparrot_training - Step 47512: {'lr': 3.069905868927414e-06, 'samples': 24326656, 'steps': 47512, 'batch_loss/train': 0.7203088980168104} +12/29/2021 05:16:49 - INFO - codeparrot_training - Step 47513: {'lr': 3.0674436527835525e-06, 'samples': 24327168, 'steps': 47513, 'batch_loss/train': 0.6592502947896719} +12/29/2021 05:17:00 - INFO - codeparrot_training - Step 47514: {'lr': 3.064982418350293e-06, 'samples': 24327680, 'steps': 47514, 'batch_loss/train': 1.3359698895365} +12/29/2021 05:17:12 - INFO - codeparrot_training - Step 47515: {'lr': 3.0625221656374047e-06, 'samples': 24328192, 'steps': 47515, 'batch_loss/train': 0.7179236770607531} +12/29/2021 05:17:23 - INFO - codeparrot_training - Step 47516: {'lr': 3.060062894654686e-06, 'samples': 24328704, 'steps': 47516, 'batch_loss/train': 0.7161964094266295} +12/29/2021 05:17:33 - INFO - codeparrot_training - Step 47517: {'lr': 3.0576046054119344e-06, 'samples': 24329216, 'steps': 47517, 'batch_loss/train': 0.7144722617231309} +12/29/2021 05:17:45 - INFO - codeparrot_training - Step 47518: {'lr': 3.055147297918892e-06, 'samples': 24329728, 'steps': 47518, 'batch_loss/train': 0.712045188061893} +12/29/2021 05:17:56 - INFO - codeparrot_training - Step 47519: {'lr': 3.052690972185357e-06, 'samples': 24330240, 'steps': 47519, 'batch_loss/train': 0.799641733057797} +12/29/2021 05:18:07 - INFO - codeparrot_training - Step 47520: {'lr': 3.0502356282210706e-06, 'samples': 24330752, 'steps': 47520, 'batch_loss/train': 0.5674862313026097} +12/29/2021 05:18:21 - INFO - codeparrot_training - Step 47521: {'lr': 3.047781266035776e-06, 'samples': 24331264, 'steps': 47521, 'batch_loss/train': 0.8363626040518284} +12/29/2021 05:18:32 - INFO - codeparrot_training - Step 47522: {'lr': 3.0453278856392987e-06, 'samples': 24331776, 'steps': 47522, 'batch_loss/train': 0.7164649190381169} +12/29/2021 05:18:42 - INFO - codeparrot_training - Step 47523: {'lr': 3.0428754870413244e-06, 'samples': 24332288, 'steps': 47523, 'batch_loss/train': 0.7283314187079668} +12/29/2021 05:18:53 - INFO - codeparrot_training - Step 47524: {'lr': 3.040424070251652e-06, 'samples': 24332800, 'steps': 47524, 'batch_loss/train': 0.5454908024985343} +12/29/2021 05:19:05 - INFO - codeparrot_training - Step 47525: {'lr': 3.0379736352799945e-06, 'samples': 24333312, 'steps': 47525, 'batch_loss/train': 0.7206964856013656} +12/29/2021 05:19:16 - INFO - codeparrot_training - Step 47526: {'lr': 3.0355241821360956e-06, 'samples': 24333824, 'steps': 47526, 'batch_loss/train': 0.6414483650587499} +12/29/2021 05:19:27 - INFO - codeparrot_training - Step 47527: {'lr': 3.0330757108296967e-06, 'samples': 24334336, 'steps': 47527, 'batch_loss/train': 0.7219134867191315} +12/29/2021 05:19:40 - INFO - codeparrot_training - Step 47528: {'lr': 3.030628221370568e-06, 'samples': 24334848, 'steps': 47528, 'batch_loss/train': 0.7134045243728906} +12/29/2021 05:19:51 - INFO - codeparrot_training - Step 47529: {'lr': 3.0281817137683967e-06, 'samples': 24335360, 'steps': 47529, 'batch_loss/train': 0.7298453412950039} +12/29/2021 05:20:02 - INFO - codeparrot_training - Step 47530: {'lr': 3.025736188032896e-06, 'samples': 24335872, 'steps': 47530, 'batch_loss/train': 0.7523774793371558} +12/29/2021 05:20:14 - INFO - codeparrot_training - Step 47531: {'lr': 3.0232916441738367e-06, 'samples': 24336384, 'steps': 47531, 'batch_loss/train': 0.7076944727450609} +12/29/2021 05:20:25 - INFO - codeparrot_training - Step 47532: {'lr': 3.0208480822009332e-06, 'samples': 24336896, 'steps': 47532, 'batch_loss/train': 0.7527461764402688} +12/29/2021 05:20:35 - INFO - codeparrot_training - Step 47533: {'lr': 3.0184055021238443e-06, 'samples': 24337408, 'steps': 47533, 'batch_loss/train': 0.7105735270306468} +12/29/2021 05:20:47 - INFO - codeparrot_training - Step 47534: {'lr': 3.015963903952368e-06, 'samples': 24337920, 'steps': 47534, 'batch_loss/train': 0.8052504323422909} +12/29/2021 05:20:58 - INFO - codeparrot_training - Step 47535: {'lr': 3.0135232876961073e-06, 'samples': 24338432, 'steps': 47535, 'batch_loss/train': 0.7053061301121488} +12/29/2021 05:21:09 - INFO - codeparrot_training - Step 47536: {'lr': 3.011083653364832e-06, 'samples': 24338944, 'steps': 47536, 'batch_loss/train': 0.6767298295162618} +12/29/2021 05:21:19 - INFO - codeparrot_training - Step 47537: {'lr': 3.0086450009682574e-06, 'samples': 24339456, 'steps': 47537, 'batch_loss/train': 0.7511524488218129} +12/29/2021 05:21:33 - INFO - codeparrot_training - Step 47538: {'lr': 3.0062073305160144e-06, 'samples': 24339968, 'steps': 47538, 'batch_loss/train': 0.7240196322090924} +12/29/2021 05:21:44 - INFO - codeparrot_training - Step 47539: {'lr': 3.0037706420178444e-06, 'samples': 24340480, 'steps': 47539, 'batch_loss/train': 0.7757140975445509} +12/29/2021 05:21:54 - INFO - codeparrot_training - Step 47540: {'lr': 3.0013349354834353e-06, 'samples': 24340992, 'steps': 47540, 'batch_loss/train': 0.6689399461029097} +12/29/2021 05:22:06 - INFO - codeparrot_training - Step 47541: {'lr': 2.998900210922445e-06, 'samples': 24341504, 'steps': 47541, 'batch_loss/train': 0.6913521122187376} +12/29/2021 05:22:17 - INFO - codeparrot_training - Step 47542: {'lr': 2.9964664683445607e-06, 'samples': 24342016, 'steps': 47542, 'batch_loss/train': 0.7930256607942283} +12/29/2021 05:22:28 - INFO - codeparrot_training - Step 47543: {'lr': 2.9940337077594693e-06, 'samples': 24342528, 'steps': 47543, 'batch_loss/train': 0.6803514705970883} +12/29/2021 05:22:40 - INFO - codeparrot_training - Step 47544: {'lr': 2.991601929176829e-06, 'samples': 24343040, 'steps': 47544, 'batch_loss/train': 0.712479799054563} +12/29/2021 05:22:50 - INFO - codeparrot_training - Step 47545: {'lr': 2.9891711326063e-06, 'samples': 24343552, 'steps': 47545, 'batch_loss/train': 0.5496618575416505} +12/29/2021 05:23:01 - INFO - codeparrot_training - Step 47546: {'lr': 2.9867413180575676e-06, 'samples': 24344064, 'steps': 47546, 'batch_loss/train': 0.7113267029635608} +12/29/2021 05:23:12 - INFO - codeparrot_training - Step 47547: {'lr': 2.9843124855402916e-06, 'samples': 24344576, 'steps': 47547, 'batch_loss/train': 0.7492923235986382} +12/29/2021 05:23:26 - INFO - codeparrot_training - Step 47548: {'lr': 2.9818846350641027e-06, 'samples': 24345088, 'steps': 47548, 'batch_loss/train': 0.7225815574056469} +12/29/2021 05:23:36 - INFO - codeparrot_training - Step 47549: {'lr': 2.9794577666386877e-06, 'samples': 24345600, 'steps': 47549, 'batch_loss/train': 0.7759031439200044} +12/29/2021 05:23:47 - INFO - codeparrot_training - Step 47550: {'lr': 2.9770318802736785e-06, 'samples': 24346112, 'steps': 47550, 'batch_loss/train': 0.7414350416511297} +12/29/2021 05:23:59 - INFO - codeparrot_training - Step 47551: {'lr': 2.9746069759787053e-06, 'samples': 24346624, 'steps': 47551, 'batch_loss/train': 0.7995961112901568} +12/29/2021 05:24:10 - INFO - codeparrot_training - Step 47552: {'lr': 2.9721830537634278e-06, 'samples': 24347136, 'steps': 47552, 'batch_loss/train': 0.768515539704822} +12/29/2021 05:24:20 - INFO - codeparrot_training - Step 47553: {'lr': 2.9697601136374486e-06, 'samples': 24347648, 'steps': 47553, 'batch_loss/train': 0.7128591237124056} +12/29/2021 05:24:32 - INFO - codeparrot_training - Step 47554: {'lr': 2.967338155610483e-06, 'samples': 24348160, 'steps': 47554, 'batch_loss/train': 1.51214156486094} +12/29/2021 05:24:43 - INFO - codeparrot_training - Step 47555: {'lr': 2.9649171796920784e-06, 'samples': 24348672, 'steps': 47555, 'batch_loss/train': 0.6563785686157644} +12/29/2021 05:24:54 - INFO - codeparrot_training - Step 47556: {'lr': 2.962497185891866e-06, 'samples': 24349184, 'steps': 47556, 'batch_loss/train': 0.6941132512874901} +12/29/2021 05:25:04 - INFO - codeparrot_training - Step 47557: {'lr': 2.960078174219505e-06, 'samples': 24349696, 'steps': 47557, 'batch_loss/train': 0.6421837820671499} +12/29/2021 05:25:16 - INFO - codeparrot_training - Step 47558: {'lr': 2.9576601446845986e-06, 'samples': 24350208, 'steps': 47558, 'batch_loss/train': 0.6534938751719892} +12/29/2021 05:25:27 - INFO - codeparrot_training - Step 47559: {'lr': 2.9552430972967504e-06, 'samples': 24350720, 'steps': 47559, 'batch_loss/train': 0.6902362452819943} +12/29/2021 05:25:38 - INFO - codeparrot_training - Step 47560: {'lr': 2.9528270320655914e-06, 'samples': 24351232, 'steps': 47560, 'batch_loss/train': 0.7508368259295821} +12/29/2021 05:25:51 - INFO - codeparrot_training - Step 47561: {'lr': 2.9504119490006698e-06, 'samples': 24351744, 'steps': 47561, 'batch_loss/train': 0.6900778498966247} +12/29/2021 05:26:02 - INFO - codeparrot_training - Step 47562: {'lr': 2.947997848111672e-06, 'samples': 24352256, 'steps': 47562, 'batch_loss/train': 0.7777202734723687} +12/29/2021 05:26:13 - INFO - codeparrot_training - Step 47563: {'lr': 2.945584729408146e-06, 'samples': 24352768, 'steps': 47563, 'batch_loss/train': 0.7027611620724201} +12/29/2021 05:26:25 - INFO - codeparrot_training - Step 47564: {'lr': 2.9431725928996677e-06, 'samples': 24353280, 'steps': 47564, 'batch_loss/train': 0.6615166908595711} +12/29/2021 05:26:35 - INFO - codeparrot_training - Step 47565: {'lr': 2.94076143859584e-06, 'samples': 24353792, 'steps': 47565, 'batch_loss/train': 0.7248236564919353} +12/29/2021 05:26:46 - INFO - codeparrot_training - Step 47566: {'lr': 2.938351266506295e-06, 'samples': 24354304, 'steps': 47566, 'batch_loss/train': 0.6819373071193695} +12/29/2021 05:26:57 - INFO - codeparrot_training - Step 47567: {'lr': 2.93594207664058e-06, 'samples': 24354816, 'steps': 47567, 'batch_loss/train': 0.7767293106298894} +12/29/2021 05:27:10 - INFO - codeparrot_training - Step 47568: {'lr': 2.9335338690082424e-06, 'samples': 24355328, 'steps': 47568, 'batch_loss/train': 0.7146223345771432} +12/29/2021 05:27:21 - INFO - codeparrot_training - Step 47569: {'lr': 2.9311266436189142e-06, 'samples': 24355840, 'steps': 47569, 'batch_loss/train': 0.7852404867298901} +12/29/2021 05:27:32 - INFO - codeparrot_training - Step 47570: {'lr': 2.9287204004821154e-06, 'samples': 24356352, 'steps': 47570, 'batch_loss/train': 0.7412413572892547} +12/29/2021 05:27:44 - INFO - codeparrot_training - Step 47571: {'lr': 2.9263151396074215e-06, 'samples': 24356864, 'steps': 47571, 'batch_loss/train': 0.6066284934058785} +12/29/2021 05:27:55 - INFO - codeparrot_training - Step 47572: {'lr': 2.9239108610044363e-06, 'samples': 24357376, 'steps': 47572, 'batch_loss/train': 0.7324116248637438} +12/29/2021 05:28:05 - INFO - codeparrot_training - Step 47573: {'lr': 2.9215075646826796e-06, 'samples': 24357888, 'steps': 47573, 'batch_loss/train': 0.805313247255981} +12/29/2021 05:28:17 - INFO - codeparrot_training - Step 47574: {'lr': 2.919105250651699e-06, 'samples': 24358400, 'steps': 47574, 'batch_loss/train': 0.7013810598291457} +12/29/2021 05:28:28 - INFO - codeparrot_training - Step 47575: {'lr': 2.916703918921071e-06, 'samples': 24358912, 'steps': 47575, 'batch_loss/train': 0.6886053271591663} +12/29/2021 05:28:38 - INFO - codeparrot_training - Step 47576: {'lr': 2.9143035695003427e-06, 'samples': 24359424, 'steps': 47576, 'batch_loss/train': 0.7911646105349064} +12/29/2021 05:28:51 - INFO - codeparrot_training - Step 47577: {'lr': 2.911904202399035e-06, 'samples': 24359936, 'steps': 47577, 'batch_loss/train': 0.6676319246180356} +12/29/2021 05:29:02 - INFO - codeparrot_training - Step 47578: {'lr': 2.909505817626723e-06, 'samples': 24360448, 'steps': 47578, 'batch_loss/train': 0.8029229594394565} +12/29/2021 05:29:13 - INFO - codeparrot_training - Step 47579: {'lr': 2.9071084151928993e-06, 'samples': 24360960, 'steps': 47579, 'batch_loss/train': 0.7217835438204929} +12/29/2021 05:29:23 - INFO - codeparrot_training - Step 47580: {'lr': 2.90471199510714e-06, 'samples': 24361472, 'steps': 47580, 'batch_loss/train': 0.6408159178681672} +12/29/2021 05:29:36 - INFO - codeparrot_training - Step 47581: {'lr': 2.902316557378909e-06, 'samples': 24361984, 'steps': 47581, 'batch_loss/train': 0.6327755721285939} +12/29/2021 05:29:46 - INFO - codeparrot_training - Step 47582: {'lr': 2.8999221020177825e-06, 'samples': 24362496, 'steps': 47582, 'batch_loss/train': 0.7237575743347406} +12/29/2021 05:29:57 - INFO - codeparrot_training - Step 47583: {'lr': 2.8975286290332526e-06, 'samples': 24363008, 'steps': 47583, 'batch_loss/train': 0.6425970960408449} +12/29/2021 05:30:10 - INFO - codeparrot_training - Step 47584: {'lr': 2.895136138434867e-06, 'samples': 24363520, 'steps': 47584, 'batch_loss/train': 0.655895286356099} +12/29/2021 05:30:20 - INFO - codeparrot_training - Step 47585: {'lr': 2.8927446302321194e-06, 'samples': 24364032, 'steps': 47585, 'batch_loss/train': 0.6426265079062432} +12/29/2021 05:30:31 - INFO - codeparrot_training - Step 47586: {'lr': 2.8903541044345006e-06, 'samples': 24364544, 'steps': 47586, 'batch_loss/train': 0.7029325887560844} +12/29/2021 05:30:43 - INFO - codeparrot_training - Step 47587: {'lr': 2.8879645610515316e-06, 'samples': 24365056, 'steps': 47587, 'batch_loss/train': 0.7005817154422402} +12/29/2021 05:30:54 - INFO - codeparrot_training - Step 47588: {'lr': 2.8855760000927045e-06, 'samples': 24365568, 'steps': 47588, 'batch_loss/train': 0.6343494607135653} +12/29/2021 05:31:04 - INFO - codeparrot_training - Step 47589: {'lr': 2.8831884215675396e-06, 'samples': 24366080, 'steps': 47589, 'batch_loss/train': 0.6758362301625311} +12/29/2021 05:31:15 - INFO - codeparrot_training - Step 47590: {'lr': 2.880801825485474e-06, 'samples': 24366592, 'steps': 47590, 'batch_loss/train': 0.6774015170522034} +12/29/2021 05:31:27 - INFO - codeparrot_training - Step 47591: {'lr': 2.8784162118560554e-06, 'samples': 24367104, 'steps': 47591, 'batch_loss/train': 0.6734797954559326} +12/29/2021 05:31:38 - INFO - codeparrot_training - Step 47592: {'lr': 2.876031580688748e-06, 'samples': 24367616, 'steps': 47592, 'batch_loss/train': 0.6646332318487111} +12/29/2021 05:31:48 - INFO - codeparrot_training - Step 47593: {'lr': 2.8736479319930176e-06, 'samples': 24368128, 'steps': 47593, 'batch_loss/train': 0.7130439332686365} +12/29/2021 05:32:00 - INFO - codeparrot_training - Step 47594: {'lr': 2.8712652657783556e-06, 'samples': 24368640, 'steps': 47594, 'batch_loss/train': 0.6969249453395605} +12/29/2021 05:32:11 - INFO - codeparrot_training - Step 47595: {'lr': 2.8688835820542268e-06, 'samples': 24369152, 'steps': 47595, 'batch_loss/train': 0.6873803690541536} +12/29/2021 05:32:22 - INFO - codeparrot_training - Step 47596: {'lr': 2.866502880830124e-06, 'samples': 24369664, 'steps': 47596, 'batch_loss/train': 0.7662013052031398} +12/29/2021 05:32:35 - INFO - codeparrot_training - Step 47597: {'lr': 2.8641231621154563e-06, 'samples': 24370176, 'steps': 47597, 'batch_loss/train': 0.7173230298794806} +12/29/2021 05:32:46 - INFO - codeparrot_training - Step 47598: {'lr': 2.8617444259197433e-06, 'samples': 24370688, 'steps': 47598, 'batch_loss/train': 0.8044251287356019} +12/29/2021 05:32:56 - INFO - codeparrot_training - Step 47599: {'lr': 2.8593666722524225e-06, 'samples': 24371200, 'steps': 47599, 'batch_loss/train': 0.4655052630114369} +12/29/2021 05:33:07 - INFO - codeparrot_training - Step 47600: {'lr': 2.8569899011229308e-06, 'samples': 24371712, 'steps': 47600, 'batch_loss/train': 0.7130703059956431} +12/29/2021 05:33:19 - INFO - codeparrot_training - Step 47601: {'lr': 2.854614112540732e-06, 'samples': 24372224, 'steps': 47601, 'batch_loss/train': 0.8088556858710945} +12/29/2021 05:33:30 - INFO - codeparrot_training - Step 47602: {'lr': 2.8522393065152917e-06, 'samples': 24372736, 'steps': 47602, 'batch_loss/train': 0.7219401078764349} +12/29/2021 05:33:40 - INFO - codeparrot_training - Step 47603: {'lr': 2.8498654830559913e-06, 'samples': 24373248, 'steps': 47603, 'batch_loss/train': 0.7552243331447244} +12/29/2021 05:33:52 - INFO - codeparrot_training - Step 47604: {'lr': 2.84749264217235e-06, 'samples': 24373760, 'steps': 47604, 'batch_loss/train': 0.6813800157979131} +12/29/2021 05:34:03 - INFO - codeparrot_training - Step 47605: {'lr': 2.8451207838737227e-06, 'samples': 24374272, 'steps': 47605, 'batch_loss/train': 0.5835721300682053} +12/29/2021 05:34:14 - INFO - codeparrot_training - Step 47606: {'lr': 2.8427499081695727e-06, 'samples': 24374784, 'steps': 47606, 'batch_loss/train': 0.7135355175705627} +12/29/2021 05:34:26 - INFO - codeparrot_training - Step 47607: {'lr': 2.840380015069338e-06, 'samples': 24375296, 'steps': 47607, 'batch_loss/train': 0.545073562010657} +12/29/2021 05:34:37 - INFO - codeparrot_training - Step 47608: {'lr': 2.8380111045824277e-06, 'samples': 24375808, 'steps': 47608, 'batch_loss/train': 0.6871775141917169} +12/29/2021 05:34:48 - INFO - codeparrot_training - Step 47609: {'lr': 2.83564317671825e-06, 'samples': 24376320, 'steps': 47609, 'batch_loss/train': 0.7157067721709609} +12/29/2021 05:35:00 - INFO - codeparrot_training - Step 47610: {'lr': 2.8332762314862426e-06, 'samples': 24376832, 'steps': 47610, 'batch_loss/train': 0.6660990985110402} +12/29/2021 05:35:10 - INFO - codeparrot_training - Step 47611: {'lr': 2.8309102688957867e-06, 'samples': 24377344, 'steps': 47611, 'batch_loss/train': 0.6684398036450148} +12/29/2021 05:35:21 - INFO - codeparrot_training - Step 47612: {'lr': 2.828545288956319e-06, 'samples': 24377856, 'steps': 47612, 'batch_loss/train': 0.730004059150815} +12/29/2021 05:35:32 - INFO - codeparrot_training - Step 47613: {'lr': 2.826181291677221e-06, 'samples': 24378368, 'steps': 47613, 'batch_loss/train': 0.6507564564235508} +12/29/2021 05:35:44 - INFO - codeparrot_training - Step 47614: {'lr': 2.823818277067902e-06, 'samples': 24378880, 'steps': 47614, 'batch_loss/train': 0.704701678827405} +12/29/2021 05:35:55 - INFO - codeparrot_training - Step 47615: {'lr': 2.821456245137771e-06, 'samples': 24379392, 'steps': 47615, 'batch_loss/train': 0.6446958649903536} +12/29/2021 05:36:06 - INFO - codeparrot_training - Step 47616: {'lr': 2.819095195896154e-06, 'samples': 24379904, 'steps': 47616, 'batch_loss/train': 0.7165529052726924} +12/29/2021 05:36:18 - INFO - codeparrot_training - Step 47617: {'lr': 2.816735129352488e-06, 'samples': 24380416, 'steps': 47617, 'batch_loss/train': 0.740035904571414} +12/29/2021 05:36:28 - INFO - codeparrot_training - Step 47618: {'lr': 2.8143760455161814e-06, 'samples': 24380928, 'steps': 47618, 'batch_loss/train': 0.7639142920961604} +12/29/2021 05:36:39 - INFO - codeparrot_training - Step 47619: {'lr': 2.8120179443965887e-06, 'samples': 24381440, 'steps': 47619, 'batch_loss/train': 0.7542732837609947} +12/29/2021 05:36:51 - INFO - codeparrot_training - Step 47620: {'lr': 2.8096608260030353e-06, 'samples': 24381952, 'steps': 47620, 'batch_loss/train': 0.7188503104262054} +12/29/2021 05:37:02 - INFO - codeparrot_training - Step 47621: {'lr': 2.8073046903449585e-06, 'samples': 24382464, 'steps': 47621, 'batch_loss/train': 0.6594489415874705} +12/29/2021 05:37:13 - INFO - codeparrot_training - Step 47622: {'lr': 2.804949537431711e-06, 'samples': 24382976, 'steps': 47622, 'batch_loss/train': 0.8239318141713738} +12/29/2021 05:37:23 - INFO - codeparrot_training - Step 47623: {'lr': 2.80259536727262e-06, 'samples': 24383488, 'steps': 47623, 'batch_loss/train': 0.7400148864835501} +12/29/2021 05:37:36 - INFO - codeparrot_training - Step 47624: {'lr': 2.800242179877094e-06, 'samples': 24384000, 'steps': 47624, 'batch_loss/train': 0.6772532882168889} +12/29/2021 05:37:47 - INFO - codeparrot_training - Step 47625: {'lr': 2.7978899752544585e-06, 'samples': 24384512, 'steps': 47625, 'batch_loss/train': 0.667577319778502} +12/29/2021 05:37:57 - INFO - codeparrot_training - Step 47626: {'lr': 2.7955387534140674e-06, 'samples': 24385024, 'steps': 47626, 'batch_loss/train': 0.675511727691628} +12/29/2021 05:38:09 - INFO - codeparrot_training - Step 47627: {'lr': 2.7931885143652747e-06, 'samples': 24385536, 'steps': 47627, 'batch_loss/train': 0.7411810359917581} +12/29/2021 05:38:20 - INFO - codeparrot_training - Step 47628: {'lr': 2.7908392581174334e-06, 'samples': 24386048, 'steps': 47628, 'batch_loss/train': 0.6429246850311756} +12/29/2021 05:38:31 - INFO - codeparrot_training - Step 47629: {'lr': 2.78849098467987e-06, 'samples': 24386560, 'steps': 47629, 'batch_loss/train': 0.6558556822128594} +12/29/2021 05:38:43 - INFO - codeparrot_training - Step 47630: {'lr': 2.786143694061938e-06, 'samples': 24387072, 'steps': 47630, 'batch_loss/train': 0.5568773967388552} +12/29/2021 05:38:54 - INFO - codeparrot_training - Step 47631: {'lr': 2.7837973862729072e-06, 'samples': 24387584, 'steps': 47631, 'batch_loss/train': 0.6804157891310751} +12/29/2021 05:39:04 - INFO - codeparrot_training - Step 47632: {'lr': 2.7814520613221872e-06, 'samples': 24388096, 'steps': 47632, 'batch_loss/train': 0.7965161171741784} +12/29/2021 05:39:16 - INFO - codeparrot_training - Step 47633: {'lr': 2.779107719219076e-06, 'samples': 24388608, 'steps': 47633, 'batch_loss/train': 0.7072410219116136} +12/29/2021 05:39:27 - INFO - codeparrot_training - Step 47634: {'lr': 2.7767643599728443e-06, 'samples': 24389120, 'steps': 47634, 'batch_loss/train': 0.7364150499925017} +12/29/2021 05:39:37 - INFO - codeparrot_training - Step 47635: {'lr': 2.774421983592901e-06, 'samples': 24389632, 'steps': 47635, 'batch_loss/train': 0.8120554867200553} +12/29/2021 05:39:48 - INFO - codeparrot_training - Step 47636: {'lr': 2.772080590088488e-06, 'samples': 24390144, 'steps': 47636, 'batch_loss/train': 0.7128264459315687} +12/29/2021 05:40:02 - INFO - codeparrot_training - Step 47637: {'lr': 2.7697401794689324e-06, 'samples': 24390656, 'steps': 47637, 'batch_loss/train': 0.49858250422403216} +12/29/2021 05:40:12 - INFO - codeparrot_training - Step 47638: {'lr': 2.7674007517435317e-06, 'samples': 24391168, 'steps': 47638, 'batch_loss/train': 0.6818771832622588} +12/29/2021 05:40:23 - INFO - codeparrot_training - Step 47639: {'lr': 2.765062306921584e-06, 'samples': 24391680, 'steps': 47639, 'batch_loss/train': 0.6963473975192755} +12/29/2021 05:40:35 - INFO - codeparrot_training - Step 47640: {'lr': 2.7627248450124154e-06, 'samples': 24392192, 'steps': 47640, 'batch_loss/train': 0.8828220115974545} +12/29/2021 05:40:46 - INFO - codeparrot_training - Step 47641: {'lr': 2.760388366025296e-06, 'samples': 24392704, 'steps': 47641, 'batch_loss/train': 0.7388540196698159} +12/29/2021 05:40:57 - INFO - codeparrot_training - Step 47642: {'lr': 2.758052869969496e-06, 'samples': 24393216, 'steps': 47642, 'batch_loss/train': 0.7158956532366574} +12/29/2021 05:41:09 - INFO - codeparrot_training - Step 47643: {'lr': 2.7557183568543422e-06, 'samples': 24393728, 'steps': 47643, 'batch_loss/train': 0.5124149362090975} +12/29/2021 05:41:20 - INFO - codeparrot_training - Step 47644: {'lr': 2.7533848266891316e-06, 'samples': 24394240, 'steps': 47644, 'batch_loss/train': 0.7286157519556582} +12/29/2021 05:41:31 - INFO - codeparrot_training - Step 47645: {'lr': 2.751052279483052e-06, 'samples': 24394752, 'steps': 47645, 'batch_loss/train': 0.7098006107844412} +12/29/2021 05:41:41 - INFO - codeparrot_training - Step 47646: {'lr': 2.7487207152454565e-06, 'samples': 24395264, 'steps': 47646, 'batch_loss/train': 0.7338326014578342} +12/29/2021 05:41:54 - INFO - codeparrot_training - Step 47647: {'lr': 2.746390133985588e-06, 'samples': 24395776, 'steps': 47647, 'batch_loss/train': 0.6624634996987879} +12/29/2021 05:42:05 - INFO - codeparrot_training - Step 47648: {'lr': 2.744060535712717e-06, 'samples': 24396288, 'steps': 47648, 'batch_loss/train': 0.643579167779535} +12/29/2021 05:42:16 - INFO - codeparrot_training - Step 47649: {'lr': 2.741731920436058e-06, 'samples': 24396800, 'steps': 47649, 'batch_loss/train': 0.6745621864683926} +12/29/2021 05:42:28 - INFO - codeparrot_training - Step 47650: {'lr': 2.739404288164965e-06, 'samples': 24397312, 'steps': 47650, 'batch_loss/train': 1.4278827114030719} +12/29/2021 05:42:39 - INFO - codeparrot_training - Step 47651: {'lr': 2.737077638908625e-06, 'samples': 24397824, 'steps': 47651, 'batch_loss/train': 1.4574260907247663} +12/29/2021 05:42:49 - INFO - codeparrot_training - Step 47652: {'lr': 2.7347519726763083e-06, 'samples': 24398336, 'steps': 47652, 'batch_loss/train': 0.7458389014936984} +12/29/2021 05:43:00 - INFO - codeparrot_training - Step 47653: {'lr': 2.7324272894772295e-06, 'samples': 24398848, 'steps': 47653, 'batch_loss/train': 0.6280627138912678} +12/29/2021 05:43:13 - INFO - codeparrot_training - Step 47654: {'lr': 2.7301035893206593e-06, 'samples': 24399360, 'steps': 47654, 'batch_loss/train': 0.8101213546469808} +12/29/2021 05:43:23 - INFO - codeparrot_training - Step 47655: {'lr': 2.7277808722158405e-06, 'samples': 24399872, 'steps': 47655, 'batch_loss/train': 0.8412392577156425} +12/29/2021 05:43:34 - INFO - codeparrot_training - Step 47656: {'lr': 2.7254591381720152e-06, 'samples': 24400384, 'steps': 47656, 'batch_loss/train': 0.6545535447075963} +12/29/2021 05:43:46 - INFO - codeparrot_training - Step 47657: {'lr': 2.7231383871983707e-06, 'samples': 24400896, 'steps': 47657, 'batch_loss/train': 0.6051602603401989} +12/29/2021 05:43:57 - INFO - codeparrot_training - Step 47658: {'lr': 2.7208186193041494e-06, 'samples': 24401408, 'steps': 47658, 'batch_loss/train': 0.7897325940430164} +12/29/2021 05:44:07 - INFO - codeparrot_training - Step 47659: {'lr': 2.7184998344986224e-06, 'samples': 24401920, 'steps': 47659, 'batch_loss/train': 0.6984283630445134} +12/29/2021 05:44:20 - INFO - codeparrot_training - Step 47660: {'lr': 2.716182032790948e-06, 'samples': 24402432, 'steps': 47660, 'batch_loss/train': 0.6879128087311983} +12/29/2021 05:44:31 - INFO - codeparrot_training - Step 47661: {'lr': 2.7138652141903696e-06, 'samples': 24402944, 'steps': 47661, 'batch_loss/train': 0.7455329755321145} +12/29/2021 05:44:41 - INFO - codeparrot_training - Step 47662: {'lr': 2.711549378706102e-06, 'samples': 24403456, 'steps': 47662, 'batch_loss/train': 0.6691089926753193} +12/29/2021 05:44:52 - INFO - codeparrot_training - Step 47663: {'lr': 2.7092345263473317e-06, 'samples': 24403968, 'steps': 47663, 'batch_loss/train': 0.7134580994024873} +12/29/2021 05:45:05 - INFO - codeparrot_training - Step 47664: {'lr': 2.7069206571232464e-06, 'samples': 24404480, 'steps': 47664, 'batch_loss/train': 0.5992817102523986} +12/29/2021 05:45:15 - INFO - codeparrot_training - Step 47665: {'lr': 2.704607771043116e-06, 'samples': 24404992, 'steps': 47665, 'batch_loss/train': 1.2940046517178416} +12/29/2021 05:45:26 - INFO - codeparrot_training - Step 47666: {'lr': 2.702295868116045e-06, 'samples': 24405504, 'steps': 47666, 'batch_loss/train': 0.6928439093753695} +12/29/2021 05:45:38 - INFO - codeparrot_training - Step 47667: {'lr': 2.6999849483513027e-06, 'samples': 24406016, 'steps': 47667, 'batch_loss/train': 0.7364609576761723} +12/29/2021 05:45:49 - INFO - codeparrot_training - Step 47668: {'lr': 2.6976750117580218e-06, 'samples': 24406528, 'steps': 47668, 'batch_loss/train': 0.7101045162416995} +12/29/2021 05:45:59 - INFO - codeparrot_training - Step 47669: {'lr': 2.6953660583454166e-06, 'samples': 24407040, 'steps': 47669, 'batch_loss/train': 0.691777684725821} +12/29/2021 05:46:11 - INFO - codeparrot_training - Step 47670: {'lr': 2.693058088122674e-06, 'samples': 24407552, 'steps': 47670, 'batch_loss/train': 0.5931711997836828} +12/29/2021 05:46:22 - INFO - codeparrot_training - Step 47671: {'lr': 2.690751101098926e-06, 'samples': 24408064, 'steps': 47671, 'batch_loss/train': 0.6356811947189271} +12/29/2021 05:46:33 - INFO - codeparrot_training - Step 47672: {'lr': 2.6884450972833875e-06, 'samples': 24408576, 'steps': 47672, 'batch_loss/train': 0.7824193052947521} +12/29/2021 05:46:43 - INFO - codeparrot_training - Step 47673: {'lr': 2.686140076685217e-06, 'samples': 24409088, 'steps': 47673, 'batch_loss/train': 0.7002097070217133} +12/29/2021 05:46:56 - INFO - codeparrot_training - Step 47674: {'lr': 2.6838360393135475e-06, 'samples': 24409600, 'steps': 47674, 'batch_loss/train': 0.7601842237636447} +12/29/2021 05:47:07 - INFO - codeparrot_training - Step 47675: {'lr': 2.681532985177565e-06, 'samples': 24410112, 'steps': 47675, 'batch_loss/train': 0.7213630815385841} +12/29/2021 05:47:17 - INFO - codeparrot_training - Step 47676: {'lr': 2.6792309142864566e-06, 'samples': 24410624, 'steps': 47676, 'batch_loss/train': 0.716326053137891} +12/29/2021 05:47:29 - INFO - codeparrot_training - Step 47677: {'lr': 2.676929826649299e-06, 'samples': 24411136, 'steps': 47677, 'batch_loss/train': 0.7314335932023823} +12/29/2021 05:47:40 - INFO - codeparrot_training - Step 47678: {'lr': 2.6746297222753067e-06, 'samples': 24411648, 'steps': 47678, 'batch_loss/train': 0.6779692266136408} +12/29/2021 05:47:51 - INFO - codeparrot_training - Step 47679: {'lr': 2.672330601173584e-06, 'samples': 24412160, 'steps': 47679, 'batch_loss/train': 0.6458872593939304} +12/29/2021 05:48:03 - INFO - codeparrot_training - Step 47680: {'lr': 2.670032463353317e-06, 'samples': 24412672, 'steps': 47680, 'batch_loss/train': 0.7357852566055954} +12/29/2021 05:48:13 - INFO - codeparrot_training - Step 47681: {'lr': 2.6677353088235825e-06, 'samples': 24413184, 'steps': 47681, 'batch_loss/train': 0.6947172600775957} +12/29/2021 05:48:24 - INFO - codeparrot_training - Step 47682: {'lr': 2.665439137593567e-06, 'samples': 24413696, 'steps': 47682, 'batch_loss/train': 0.6709090601652861} +12/29/2021 05:48:37 - INFO - codeparrot_training - Step 47683: {'lr': 2.6631439496723477e-06, 'samples': 24414208, 'steps': 47683, 'batch_loss/train': 0.6313450834713876} +12/29/2021 05:48:47 - INFO - codeparrot_training - Step 47684: {'lr': 2.6608497450691106e-06, 'samples': 24414720, 'steps': 47684, 'batch_loss/train': 0.6791051959153265} +12/29/2021 05:48:58 - INFO - codeparrot_training - Step 47685: {'lr': 2.6585565237929323e-06, 'samples': 24415232, 'steps': 47685, 'batch_loss/train': 0.7347337007522583} +12/29/2021 05:49:09 - INFO - codeparrot_training - Step 47686: {'lr': 2.6562642858529162e-06, 'samples': 24415744, 'steps': 47686, 'batch_loss/train': 0.6793211402837187} +12/29/2021 05:49:21 - INFO - codeparrot_training - Step 47687: {'lr': 2.6539730312582223e-06, 'samples': 24416256, 'steps': 47687, 'batch_loss/train': 0.544526330602821} +12/29/2021 05:49:31 - INFO - codeparrot_training - Step 47688: {'lr': 2.651682760017954e-06, 'samples': 24416768, 'steps': 47688, 'batch_loss/train': 0.7955884588882327} +12/29/2021 05:49:42 - INFO - codeparrot_training - Step 47689: {'lr': 2.6493934721411593e-06, 'samples': 24417280, 'steps': 47689, 'batch_loss/train': 0.6970440991572104} +12/29/2021 05:49:54 - INFO - codeparrot_training - Step 47690: {'lr': 2.6471051676369982e-06, 'samples': 24417792, 'steps': 47690, 'batch_loss/train': 0.6857084180228412} +12/29/2021 05:50:05 - INFO - codeparrot_training - Step 47691: {'lr': 2.644817846514547e-06, 'samples': 24418304, 'steps': 47691, 'batch_loss/train': 0.7099656816571951} +12/29/2021 05:50:15 - INFO - codeparrot_training - Step 47692: {'lr': 2.6425315087829084e-06, 'samples': 24418816, 'steps': 47692, 'batch_loss/train': 0.7062044506892562} +12/29/2021 05:50:28 - INFO - codeparrot_training - Step 47693: {'lr': 2.640246154451187e-06, 'samples': 24419328, 'steps': 47693, 'batch_loss/train': 0.7461221646517515} +12/29/2021 05:50:39 - INFO - codeparrot_training - Step 47694: {'lr': 2.6379617835284032e-06, 'samples': 24419840, 'steps': 47694, 'batch_loss/train': 0.6903555886819959} +12/29/2021 05:50:49 - INFO - codeparrot_training - Step 47695: {'lr': 2.635678396023716e-06, 'samples': 24420352, 'steps': 47695, 'batch_loss/train': 0.8151137242093682} +12/29/2021 05:51:01 - INFO - codeparrot_training - Step 47696: {'lr': 2.633395991946147e-06, 'samples': 24420864, 'steps': 47696, 'batch_loss/train': 0.6811312790960073} +12/29/2021 05:51:12 - INFO - codeparrot_training - Step 47697: {'lr': 2.631114571304827e-06, 'samples': 24421376, 'steps': 47697, 'batch_loss/train': 0.7902692509815097} +12/29/2021 05:51:23 - INFO - codeparrot_training - Step 47698: {'lr': 2.6288341341087485e-06, 'samples': 24421888, 'steps': 47698, 'batch_loss/train': 0.7035880438052118} +12/29/2021 05:51:33 - INFO - codeparrot_training - Step 47699: {'lr': 2.6265546803670714e-06, 'samples': 24422400, 'steps': 47699, 'batch_loss/train': 0.6933302120305598} +12/29/2021 05:51:46 - INFO - codeparrot_training - Step 47700: {'lr': 2.6242762100887884e-06, 'samples': 24422912, 'steps': 47700, 'batch_loss/train': 0.7562898825854063} +12/29/2021 05:51:57 - INFO - codeparrot_training - Step 47701: {'lr': 2.6219987232829477e-06, 'samples': 24423424, 'steps': 47701, 'batch_loss/train': 0.7579228293616325} +12/29/2021 05:52:07 - INFO - codeparrot_training - Step 47702: {'lr': 2.619722219958681e-06, 'samples': 24423936, 'steps': 47702, 'batch_loss/train': 0.671227537561208} +12/29/2021 05:52:20 - INFO - codeparrot_training - Step 47703: {'lr': 2.6174467001249536e-06, 'samples': 24424448, 'steps': 47703, 'batch_loss/train': 0.7554085785523057} +12/29/2021 05:52:31 - INFO - codeparrot_training - Step 47704: {'lr': 2.6151721637908686e-06, 'samples': 24424960, 'steps': 47704, 'batch_loss/train': 0.7275888514705002} +12/29/2021 05:52:42 - INFO - codeparrot_training - Step 47705: {'lr': 2.612898610965447e-06, 'samples': 24425472, 'steps': 47705, 'batch_loss/train': 0.8493280680850148} +12/29/2021 05:52:52 - INFO - codeparrot_training - Step 47706: {'lr': 2.6106260416577376e-06, 'samples': 24425984, 'steps': 47706, 'batch_loss/train': 0.9366579549387097} +12/29/2021 05:53:04 - INFO - codeparrot_training - Step 47707: {'lr': 2.6083544558767604e-06, 'samples': 24426496, 'steps': 47707, 'batch_loss/train': 0.8141442453488708} +12/29/2021 05:53:15 - INFO - codeparrot_training - Step 47708: {'lr': 2.6060838536315356e-06, 'samples': 24427008, 'steps': 47708, 'batch_loss/train': 0.6405846420675516} +12/29/2021 05:53:26 - INFO - codeparrot_training - Step 47709: {'lr': 2.60381423493114e-06, 'samples': 24427520, 'steps': 47709, 'batch_loss/train': 0.659193079918623} +12/29/2021 05:53:38 - INFO - codeparrot_training - Step 47710: {'lr': 2.6015455997845383e-06, 'samples': 24428032, 'steps': 47710, 'batch_loss/train': 0.8405475742183626} +12/29/2021 05:53:48 - INFO - codeparrot_training - Step 47711: {'lr': 2.599277948200807e-06, 'samples': 24428544, 'steps': 47711, 'batch_loss/train': 0.6792366425506771} +12/29/2021 05:53:59 - INFO - codeparrot_training - Step 47712: {'lr': 2.59701128018891e-06, 'samples': 24429056, 'steps': 47712, 'batch_loss/train': 0.648960332153365} +12/29/2021 05:54:12 - INFO - codeparrot_training - Step 47713: {'lr': 2.594745595757897e-06, 'samples': 24429568, 'steps': 47713, 'batch_loss/train': 0.7796358547639102} +12/29/2021 05:54:22 - INFO - codeparrot_training - Step 47714: {'lr': 2.59248089491676e-06, 'samples': 24430080, 'steps': 47714, 'batch_loss/train': 0.7033442137762904} +12/29/2021 05:54:33 - INFO - codeparrot_training - Step 47715: {'lr': 2.590217177674492e-06, 'samples': 24430592, 'steps': 47715, 'batch_loss/train': 0.6876225797459483} +12/29/2021 05:54:43 - INFO - codeparrot_training - Step 47716: {'lr': 2.587954444040086e-06, 'samples': 24431104, 'steps': 47716, 'batch_loss/train': 0.6709520540898666} +12/29/2021 05:54:56 - INFO - codeparrot_training - Step 47717: {'lr': 2.58569269402259e-06, 'samples': 24431616, 'steps': 47717, 'batch_loss/train': 0.7230179845355451} +12/29/2021 05:55:06 - INFO - codeparrot_training - Step 47718: {'lr': 2.5834319276309413e-06, 'samples': 24432128, 'steps': 47718, 'batch_loss/train': 0.843428622931242} +12/29/2021 05:55:17 - INFO - codeparrot_training - Step 47719: {'lr': 2.581172144874133e-06, 'samples': 24432640, 'steps': 47719, 'batch_loss/train': 0.6772095162887126} +12/29/2021 05:55:29 - INFO - codeparrot_training - Step 47720: {'lr': 2.578913345761158e-06, 'samples': 24433152, 'steps': 47720, 'batch_loss/train': 0.6929710061522201} +12/29/2021 05:55:40 - INFO - codeparrot_training - Step 47721: {'lr': 2.5766555303010364e-06, 'samples': 24433664, 'steps': 47721, 'batch_loss/train': 0.7335815359838307} +12/29/2021 05:55:51 - INFO - codeparrot_training - Step 47722: {'lr': 2.574398698502678e-06, 'samples': 24434176, 'steps': 47722, 'batch_loss/train': 0.48765923114842735} +12/29/2021 05:56:04 - INFO - codeparrot_training - Step 47723: {'lr': 2.572142850375103e-06, 'samples': 24434688, 'steps': 47723, 'batch_loss/train': 0.6697178075555712} +12/29/2021 05:56:14 - INFO - codeparrot_training - Step 47724: {'lr': 2.5698879859272496e-06, 'samples': 24435200, 'steps': 47724, 'batch_loss/train': 0.6996753485873342} +12/29/2021 05:56:25 - INFO - codeparrot_training - Step 47725: {'lr': 2.5676341051681375e-06, 'samples': 24435712, 'steps': 47725, 'batch_loss/train': 0.7300521805882454} +12/29/2021 05:56:37 - INFO - codeparrot_training - Step 47726: {'lr': 2.5653812081066487e-06, 'samples': 24436224, 'steps': 47726, 'batch_loss/train': 0.9597101458348334} +12/29/2021 05:56:48 - INFO - codeparrot_training - Step 47727: {'lr': 2.5631292947517483e-06, 'samples': 24436736, 'steps': 47727, 'batch_loss/train': 0.5616514887660742} +12/29/2021 05:56:59 - INFO - codeparrot_training - Step 47728: {'lr': 2.560878365112457e-06, 'samples': 24437248, 'steps': 47728, 'batch_loss/train': 0.7955249072983861} +12/29/2021 05:57:09 - INFO - codeparrot_training - Step 47729: {'lr': 2.5586284191976837e-06, 'samples': 24437760, 'steps': 47729, 'batch_loss/train': 0.7172778057865798} +12/29/2021 05:57:22 - INFO - codeparrot_training - Step 47730: {'lr': 2.556379457016367e-06, 'samples': 24438272, 'steps': 47730, 'batch_loss/train': 0.6778391457628459} +12/29/2021 05:57:33 - INFO - codeparrot_training - Step 47731: {'lr': 2.5541314785774426e-06, 'samples': 24438784, 'steps': 47731, 'batch_loss/train': 0.7416002778336406} +12/29/2021 05:57:43 - INFO - codeparrot_training - Step 47732: {'lr': 2.551884483889877e-06, 'samples': 24439296, 'steps': 47732, 'batch_loss/train': 0.802486521191895} +12/29/2021 05:57:55 - INFO - codeparrot_training - Step 47733: {'lr': 2.5496384729625787e-06, 'samples': 24439808, 'steps': 47733, 'batch_loss/train': 0.7423054231330752} +12/29/2021 05:58:06 - INFO - codeparrot_training - Step 47734: {'lr': 2.5473934458044854e-06, 'samples': 24440320, 'steps': 47734, 'batch_loss/train': 0.7351760109886527} +12/29/2021 05:58:17 - INFO - codeparrot_training - Step 47735: {'lr': 2.545149402424507e-06, 'samples': 24440832, 'steps': 47735, 'batch_loss/train': 0.7284182007424533} +12/29/2021 05:58:29 - INFO - codeparrot_training - Step 47736: {'lr': 2.542906342831608e-06, 'samples': 24441344, 'steps': 47736, 'batch_loss/train': 0.6890797272790223} +12/29/2021 05:58:39 - INFO - codeparrot_training - Step 47737: {'lr': 2.54066426703467e-06, 'samples': 24441856, 'steps': 47737, 'batch_loss/train': 0.70770706795156} +12/29/2021 05:58:50 - INFO - codeparrot_training - Step 47738: {'lr': 2.5384231750425757e-06, 'samples': 24442368, 'steps': 47738, 'batch_loss/train': 0.7050310482736677} +12/29/2021 05:59:01 - INFO - codeparrot_training - Step 47739: {'lr': 2.5361830668643172e-06, 'samples': 24442880, 'steps': 47739, 'batch_loss/train': 0.6243020398542285} +12/29/2021 05:59:13 - INFO - codeparrot_training - Step 47740: {'lr': 2.5339439425087487e-06, 'samples': 24443392, 'steps': 47740, 'batch_loss/train': 0.7244626749306917} +12/29/2021 05:59:24 - INFO - codeparrot_training - Step 47741: {'lr': 2.5317058019847796e-06, 'samples': 24443904, 'steps': 47741, 'batch_loss/train': 0.7579465033486485} +12/29/2021 05:59:35 - INFO - codeparrot_training - Step 47742: {'lr': 2.5294686453012915e-06, 'samples': 24444416, 'steps': 47742, 'batch_loss/train': 0.7090219967067242} +12/29/2021 05:59:47 - INFO - codeparrot_training - Step 47743: {'lr': 2.5272324724671948e-06, 'samples': 24444928, 'steps': 47743, 'batch_loss/train': 0.6628467694390565} +12/29/2021 05:59:58 - INFO - codeparrot_training - Step 47744: {'lr': 2.524997283491398e-06, 'samples': 24445440, 'steps': 47744, 'batch_loss/train': 0.6778377252630889} +12/29/2021 06:00:08 - INFO - codeparrot_training - Step 47745: {'lr': 2.522763078382756e-06, 'samples': 24445952, 'steps': 47745, 'batch_loss/train': 0.7339647747576237} +12/29/2021 06:00:21 - INFO - codeparrot_training - Step 47746: {'lr': 2.5205298571501777e-06, 'samples': 24446464, 'steps': 47746, 'batch_loss/train': 0.7638788828044198} +12/29/2021 06:00:31 - INFO - codeparrot_training - Step 47747: {'lr': 2.5182976198025176e-06, 'samples': 24446976, 'steps': 47747, 'batch_loss/train': 0.6069710231386125} +12/29/2021 06:00:42 - INFO - codeparrot_training - Step 47748: {'lr': 2.516066366348657e-06, 'samples': 24447488, 'steps': 47748, 'batch_loss/train': 0.7012198213487864} +12/29/2021 06:00:53 - INFO - codeparrot_training - Step 47749: {'lr': 2.513836096797478e-06, 'samples': 24448000, 'steps': 47749, 'batch_loss/train': 0.6629310528514907} +12/29/2021 06:01:05 - INFO - codeparrot_training - Step 47750: {'lr': 2.511606811157835e-06, 'samples': 24448512, 'steps': 47750, 'batch_loss/train': 0.6230931570753455} +12/29/2021 06:01:15 - INFO - codeparrot_training - Step 47751: {'lr': 2.509378509438609e-06, 'samples': 24449024, 'steps': 47751, 'batch_loss/train': 0.6466726534999907} +12/29/2021 06:01:26 - INFO - codeparrot_training - Step 47752: {'lr': 2.507151191648627e-06, 'samples': 24449536, 'steps': 47752, 'batch_loss/train': 0.7356327336165123} +12/29/2021 06:01:39 - INFO - codeparrot_training - Step 47753: {'lr': 2.504924857796742e-06, 'samples': 24450048, 'steps': 47753, 'batch_loss/train': 0.6760245691984892} +12/29/2021 06:01:49 - INFO - codeparrot_training - Step 47754: {'lr': 2.5026995078918645e-06, 'samples': 24450560, 'steps': 47754, 'batch_loss/train': 0.62885895796353} +12/29/2021 06:02:00 - INFO - codeparrot_training - Step 47755: {'lr': 2.500475141942793e-06, 'samples': 24451072, 'steps': 47755, 'batch_loss/train': 0.6601232378743589} +12/29/2021 06:02:12 - INFO - codeparrot_training - Step 47756: {'lr': 2.498251759958381e-06, 'samples': 24451584, 'steps': 47756, 'batch_loss/train': 0.8212485425174236} +12/29/2021 06:02:23 - INFO - codeparrot_training - Step 47757: {'lr': 2.496029361947455e-06, 'samples': 24452096, 'steps': 47757, 'batch_loss/train': 0.6505339927971363} +12/29/2021 06:02:33 - INFO - codeparrot_training - Step 47758: {'lr': 2.4938079479188693e-06, 'samples': 24452608, 'steps': 47758, 'batch_loss/train': 0.7442509746178985} +12/29/2021 06:02:47 - INFO - codeparrot_training - Step 47759: {'lr': 2.4915875178814495e-06, 'samples': 24453120, 'steps': 47759, 'batch_loss/train': 0.7725750021636486} +12/29/2021 06:02:58 - INFO - codeparrot_training - Step 47760: {'lr': 2.4893680718440228e-06, 'samples': 24453632, 'steps': 47760, 'batch_loss/train': 1.1017093257978559} +12/29/2021 06:03:09 - INFO - codeparrot_training - Step 47761: {'lr': 2.4871496098154146e-06, 'samples': 24454144, 'steps': 47761, 'batch_loss/train': 0.7323428322561085} +12/29/2021 06:03:19 - INFO - codeparrot_training - Step 47762: {'lr': 2.4849321318044237e-06, 'samples': 24454656, 'steps': 47762, 'batch_loss/train': 0.690772014670074} +12/29/2021 06:03:32 - INFO - codeparrot_training - Step 47763: {'lr': 2.4827156378199324e-06, 'samples': 24455168, 'steps': 47763, 'batch_loss/train': 0.5770046599209309} +12/29/2021 06:03:42 - INFO - codeparrot_training - Step 47764: {'lr': 2.480500127870655e-06, 'samples': 24455680, 'steps': 47764, 'batch_loss/train': 0.6983143100515008} +12/29/2021 06:03:53 - INFO - codeparrot_training - Step 47765: {'lr': 2.4782856019654742e-06, 'samples': 24456192, 'steps': 47765, 'batch_loss/train': 0.608113358495757} +12/29/2021 06:04:05 - INFO - codeparrot_training - Step 47766: {'lr': 2.4760720601131604e-06, 'samples': 24456704, 'steps': 47766, 'batch_loss/train': 0.7204752609832212} +12/29/2021 06:04:16 - INFO - codeparrot_training - Step 47767: {'lr': 2.473859502322512e-06, 'samples': 24457216, 'steps': 47767, 'batch_loss/train': 0.7346412697806954} +12/29/2021 06:04:26 - INFO - codeparrot_training - Step 47768: {'lr': 2.471647928602355e-06, 'samples': 24457728, 'steps': 47768, 'batch_loss/train': 0.8320850385352969} +12/29/2021 06:04:37 - INFO - codeparrot_training - Step 47769: {'lr': 2.469437338961461e-06, 'samples': 24458240, 'steps': 47769, 'batch_loss/train': 0.7043535097036511} +12/29/2021 06:04:50 - INFO - codeparrot_training - Step 47770: {'lr': 2.4672277334086e-06, 'samples': 24458752, 'steps': 47770, 'batch_loss/train': 0.7450750067364424} +12/29/2021 06:05:00 - INFO - codeparrot_training - Step 47771: {'lr': 2.4650191119525988e-06, 'samples': 24459264, 'steps': 47771, 'batch_loss/train': 0.68956726975739} +12/29/2021 06:05:11 - INFO - codeparrot_training - Step 47772: {'lr': 2.4628114746022e-06, 'samples': 24459776, 'steps': 47772, 'batch_loss/train': 0.7496702512726188} +12/29/2021 06:05:23 - INFO - codeparrot_training - Step 47773: {'lr': 2.4606048213662026e-06, 'samples': 24460288, 'steps': 47773, 'batch_loss/train': 0.8086654795333743} +12/29/2021 06:05:34 - INFO - codeparrot_training - Step 47774: {'lr': 2.4583991522533767e-06, 'samples': 24460800, 'steps': 47774, 'batch_loss/train': 0.8106328977737576} +12/29/2021 06:05:44 - INFO - codeparrot_training - Step 47775: {'lr': 2.4561944672724933e-06, 'samples': 24461312, 'steps': 47775, 'batch_loss/train': 0.6869902145117521} +12/29/2021 06:05:56 - INFO - codeparrot_training - Step 47776: {'lr': 2.453990766432296e-06, 'samples': 24461824, 'steps': 47776, 'batch_loss/train': 0.6611487192567438} +12/29/2021 06:06:07 - INFO - codeparrot_training - Step 47777: {'lr': 2.451788049741582e-06, 'samples': 24462336, 'steps': 47777, 'batch_loss/train': 0.6591453799046576} +12/29/2021 06:06:18 - INFO - codeparrot_training - Step 47778: {'lr': 2.449586317209096e-06, 'samples': 24462848, 'steps': 47778, 'batch_loss/train': 0.6820595473982394} +12/29/2021 06:06:28 - INFO - codeparrot_training - Step 47779: {'lr': 2.447385568843552e-06, 'samples': 24463360, 'steps': 47779, 'batch_loss/train': 0.7436712952330709} +12/29/2021 06:06:40 - INFO - codeparrot_training - Step 47780: {'lr': 2.445185804653749e-06, 'samples': 24463872, 'steps': 47780, 'batch_loss/train': 0.7649499531835318} +12/29/2021 06:06:51 - INFO - codeparrot_training - Step 47781: {'lr': 2.442987024648402e-06, 'samples': 24464384, 'steps': 47781, 'batch_loss/train': 0.6062158061831724} +12/29/2021 06:07:02 - INFO - codeparrot_training - Step 47782: {'lr': 2.4407892288362543e-06, 'samples': 24464896, 'steps': 47782, 'batch_loss/train': 0.6878988966345787} +12/29/2021 06:07:15 - INFO - codeparrot_training - Step 47783: {'lr': 2.438592417226104e-06, 'samples': 24465408, 'steps': 47783, 'batch_loss/train': 0.7011427953839302} +12/29/2021 06:07:25 - INFO - codeparrot_training - Step 47784: {'lr': 2.4363965898265838e-06, 'samples': 24465920, 'steps': 47784, 'batch_loss/train': 0.6100686518475413} +12/29/2021 06:07:36 - INFO - codeparrot_training - Step 47785: {'lr': 2.4342017466464917e-06, 'samples': 24466432, 'steps': 47785, 'batch_loss/train': 0.7271692119538784} +12/29/2021 06:07:48 - INFO - codeparrot_training - Step 47786: {'lr': 2.432007887694543e-06, 'samples': 24466944, 'steps': 47786, 'batch_loss/train': 0.47528575686737895} +12/29/2021 06:07:59 - INFO - codeparrot_training - Step 47787: {'lr': 2.429815012979453e-06, 'samples': 24467456, 'steps': 47787, 'batch_loss/train': 0.44579508039169014} +12/29/2021 06:08:10 - INFO - codeparrot_training - Step 47788: {'lr': 2.427623122509909e-06, 'samples': 24467968, 'steps': 47788, 'batch_loss/train': 0.7765347734093666} +12/29/2021 06:08:22 - INFO - codeparrot_training - Step 47789: {'lr': 2.4254322162947095e-06, 'samples': 24468480, 'steps': 47789, 'batch_loss/train': 0.783193071372807} +12/29/2021 06:08:33 - INFO - codeparrot_training - Step 47790: {'lr': 2.423242294342459e-06, 'samples': 24468992, 'steps': 47790, 'batch_loss/train': 0.6993343327194452} +12/29/2021 06:08:43 - INFO - codeparrot_training - Step 47791: {'lr': 2.4210533566619285e-06, 'samples': 24469504, 'steps': 47791, 'batch_loss/train': 0.7143426039256155} +12/29/2021 06:08:54 - INFO - codeparrot_training - Step 47792: {'lr': 2.418865403261833e-06, 'samples': 24470016, 'steps': 47792, 'batch_loss/train': 0.7130467051174492} +12/29/2021 06:09:06 - INFO - codeparrot_training - Step 47793: {'lr': 2.416678434150832e-06, 'samples': 24470528, 'steps': 47793, 'batch_loss/train': 0.6840829190332443} +12/29/2021 06:09:17 - INFO - codeparrot_training - Step 47794: {'lr': 2.414492449337613e-06, 'samples': 24471040, 'steps': 47794, 'batch_loss/train': 0.7941497825086117} +12/29/2021 06:09:27 - INFO - codeparrot_training - Step 47795: {'lr': 2.4123074488309195e-06, 'samples': 24471552, 'steps': 47795, 'batch_loss/train': 0.7259944165125489} +12/29/2021 06:09:39 - INFO - codeparrot_training - Step 47796: {'lr': 2.4101234326394107e-06, 'samples': 24472064, 'steps': 47796, 'batch_loss/train': 0.6532089905813336} +12/29/2021 06:09:50 - INFO - codeparrot_training - Step 47797: {'lr': 2.4079404007717464e-06, 'samples': 24472576, 'steps': 47797, 'batch_loss/train': 0.7172740655951202} +12/29/2021 06:10:01 - INFO - codeparrot_training - Step 47798: {'lr': 2.4057583532366143e-06, 'samples': 24473088, 'steps': 47798, 'batch_loss/train': 0.698839204153046} +12/29/2021 06:10:13 - INFO - codeparrot_training - Step 47799: {'lr': 2.403577290042702e-06, 'samples': 24473600, 'steps': 47799, 'batch_loss/train': 0.6689703653100878} +12/29/2021 06:10:24 - INFO - codeparrot_training - Step 47800: {'lr': 2.401397211198697e-06, 'samples': 24474112, 'steps': 47800, 'batch_loss/train': 0.6834412661846727} +12/29/2021 06:10:35 - INFO - codeparrot_training - Step 47801: {'lr': 2.399218116713231e-06, 'samples': 24474624, 'steps': 47801, 'batch_loss/train': 0.8007857422344387} +12/29/2021 06:10:45 - INFO - codeparrot_training - Step 47802: {'lr': 2.3970400065949917e-06, 'samples': 24475136, 'steps': 47802, 'batch_loss/train': 0.8229571064002812} +12/29/2021 06:10:57 - INFO - codeparrot_training - Step 47803: {'lr': 2.394862880852611e-06, 'samples': 24475648, 'steps': 47803, 'batch_loss/train': 0.6322764959186316} +12/29/2021 06:11:08 - INFO - codeparrot_training - Step 47804: {'lr': 2.392686739494804e-06, 'samples': 24476160, 'steps': 47804, 'batch_loss/train': 0.627389463596046} +12/29/2021 06:11:19 - INFO - codeparrot_training - Step 47805: {'lr': 2.3905115825301195e-06, 'samples': 24476672, 'steps': 47805, 'batch_loss/train': 0.6302654803730547} +12/29/2021 06:11:31 - INFO - codeparrot_training - Step 47806: {'lr': 2.3883374099673283e-06, 'samples': 24477184, 'steps': 47806, 'batch_loss/train': 0.678322727442719} +12/29/2021 06:11:41 - INFO - codeparrot_training - Step 47807: {'lr': 2.3861642218149794e-06, 'samples': 24477696, 'steps': 47807, 'batch_loss/train': 0.7042055055499077} +12/29/2021 06:11:52 - INFO - codeparrot_training - Step 47808: {'lr': 2.383992018081732e-06, 'samples': 24478208, 'steps': 47808, 'batch_loss/train': 0.6342944544740021} +12/29/2021 06:12:05 - INFO - codeparrot_training - Step 47809: {'lr': 2.381820798776274e-06, 'samples': 24478720, 'steps': 47809, 'batch_loss/train': 0.7180003626272082} +12/29/2021 06:12:16 - INFO - codeparrot_training - Step 47810: {'lr': 2.379650563907182e-06, 'samples': 24479232, 'steps': 47810, 'batch_loss/train': 0.6949202520772815} +12/29/2021 06:12:26 - INFO - codeparrot_training - Step 47811: {'lr': 2.3774813134831154e-06, 'samples': 24479744, 'steps': 47811, 'batch_loss/train': 0.7674744483083487} +12/29/2021 06:12:38 - INFO - codeparrot_training - Step 47812: {'lr': 2.3753130475126785e-06, 'samples': 24480256, 'steps': 47812, 'batch_loss/train': 0.7141416458180174} +12/29/2021 06:12:49 - INFO - codeparrot_training - Step 47813: {'lr': 2.3731457660044753e-06, 'samples': 24480768, 'steps': 47813, 'batch_loss/train': 0.7450713801663369} +12/29/2021 06:12:59 - INFO - codeparrot_training - Step 47814: {'lr': 2.370979468967166e-06, 'samples': 24481280, 'steps': 47814, 'batch_loss/train': 0.6574523556046188} +12/29/2021 06:13:10 - INFO - codeparrot_training - Step 47815: {'lr': 2.3688141564093547e-06, 'samples': 24481792, 'steps': 47815, 'batch_loss/train': 0.9067096458747983} +12/29/2021 06:13:23 - INFO - codeparrot_training - Step 47816: {'lr': 2.3666498283395897e-06, 'samples': 24482304, 'steps': 47816, 'batch_loss/train': 0.7486199312843382} +12/29/2021 06:13:34 - INFO - codeparrot_training - Step 47817: {'lr': 2.3644864847665594e-06, 'samples': 24482816, 'steps': 47817, 'batch_loss/train': 1.2960069170221686} +12/29/2021 06:13:44 - INFO - codeparrot_training - Step 47818: {'lr': 2.3623241256988394e-06, 'samples': 24483328, 'steps': 47818, 'batch_loss/train': 0.6385038332082331} +12/29/2021 06:13:57 - INFO - codeparrot_training - Step 47819: {'lr': 2.3601627511449785e-06, 'samples': 24483840, 'steps': 47819, 'batch_loss/train': 0.6355394159909338} +12/29/2021 06:14:08 - INFO - codeparrot_training - Step 47820: {'lr': 2.3580023611136093e-06, 'samples': 24484352, 'steps': 47820, 'batch_loss/train': 0.7159617925062776} +12/29/2021 06:14:18 - INFO - codeparrot_training - Step 47821: {'lr': 2.3558429556133353e-06, 'samples': 24484864, 'steps': 47821, 'batch_loss/train': 0.6682819621637464} +12/29/2021 06:14:29 - INFO - codeparrot_training - Step 47822: {'lr': 2.3536845346527338e-06, 'samples': 24485376, 'steps': 47822, 'batch_loss/train': 0.7039201455190778} +12/29/2021 06:14:41 - INFO - codeparrot_training - Step 47823: {'lr': 2.3515270982403245e-06, 'samples': 24485888, 'steps': 47823, 'batch_loss/train': 0.7213034280575812} +12/29/2021 06:14:52 - INFO - codeparrot_training - Step 47824: {'lr': 2.3493706463847964e-06, 'samples': 24486400, 'steps': 47824, 'batch_loss/train': 0.6896284276153892} +12/29/2021 06:15:03 - INFO - codeparrot_training - Step 47825: {'lr': 2.347215179094614e-06, 'samples': 24486912, 'steps': 47825, 'batch_loss/train': 0.7968774721957743} +12/29/2021 06:15:15 - INFO - codeparrot_training - Step 47826: {'lr': 2.3450606963784094e-06, 'samples': 24487424, 'steps': 47826, 'batch_loss/train': 0.8080652141943574} +12/29/2021 06:15:26 - INFO - codeparrot_training - Step 47827: {'lr': 2.3429071982447312e-06, 'samples': 24487936, 'steps': 47827, 'batch_loss/train': 0.6893614670261741} +12/29/2021 06:15:36 - INFO - codeparrot_training - Step 47828: {'lr': 2.3407546847021565e-06, 'samples': 24488448, 'steps': 47828, 'batch_loss/train': 0.6407864019274712} +12/29/2021 06:15:49 - INFO - codeparrot_training - Step 47829: {'lr': 2.338603155759206e-06, 'samples': 24488960, 'steps': 47829, 'batch_loss/train': 0.7129602746572345} +12/29/2021 06:16:00 - INFO - codeparrot_training - Step 47830: {'lr': 2.336452611424483e-06, 'samples': 24489472, 'steps': 47830, 'batch_loss/train': 0.7244433359010145} +12/29/2021 06:16:10 - INFO - codeparrot_training - Step 47831: {'lr': 2.3343030517064822e-06, 'samples': 24489984, 'steps': 47831, 'batch_loss/train': 0.6931854300200939} +12/29/2021 06:16:21 - INFO - codeparrot_training - Step 47832: {'lr': 2.332154476613779e-06, 'samples': 24490496, 'steps': 47832, 'batch_loss/train': 0.6419736444950104} +12/29/2021 06:16:33 - INFO - codeparrot_training - Step 47833: {'lr': 2.3300068861549506e-06, 'samples': 24491008, 'steps': 47833, 'batch_loss/train': 0.7541031839791685} +12/29/2021 06:16:43 - INFO - codeparrot_training - Step 47834: {'lr': 2.3278602803384618e-06, 'samples': 24491520, 'steps': 47834, 'batch_loss/train': 0.6964752525091171} +12/29/2021 06:16:54 - INFO - codeparrot_training - Step 47835: {'lr': 2.325714659172917e-06, 'samples': 24492032, 'steps': 47835, 'batch_loss/train': 0.6345683941617608} +12/29/2021 06:17:06 - INFO - codeparrot_training - Step 47836: {'lr': 2.323570022666782e-06, 'samples': 24492544, 'steps': 47836, 'batch_loss/train': 0.7004695096984506} +12/29/2021 06:17:17 - INFO - codeparrot_training - Step 47837: {'lr': 2.3214263708286054e-06, 'samples': 24493056, 'steps': 47837, 'batch_loss/train': 0.72265171026811} +12/29/2021 06:17:28 - INFO - codeparrot_training - Step 47838: {'lr': 2.3192837036669358e-06, 'samples': 24493568, 'steps': 47838, 'batch_loss/train': 0.8289039712399244} +12/29/2021 06:17:40 - INFO - codeparrot_training - Step 47839: {'lr': 2.3171420211902937e-06, 'samples': 24494080, 'steps': 47839, 'batch_loss/train': 0.6797897759824991} +12/29/2021 06:17:51 - INFO - codeparrot_training - Step 47840: {'lr': 2.3150013234071455e-06, 'samples': 24494592, 'steps': 47840, 'batch_loss/train': 0.6285850868443958} +12/29/2021 06:18:02 - INFO - codeparrot_training - Step 47841: {'lr': 2.3128616103260393e-06, 'samples': 24495104, 'steps': 47841, 'batch_loss/train': 0.6437177821062505} +12/29/2021 06:18:12 - INFO - codeparrot_training - Step 47842: {'lr': 2.310722881955468e-06, 'samples': 24495616, 'steps': 47842, 'batch_loss/train': 0.6936373198404908} +12/29/2021 06:18:24 - INFO - codeparrot_training - Step 47843: {'lr': 2.308585138303926e-06, 'samples': 24496128, 'steps': 47843, 'batch_loss/train': 0.7533102855086327} +12/29/2021 06:18:35 - INFO - codeparrot_training - Step 47844: {'lr': 2.3064483793799607e-06, 'samples': 24496640, 'steps': 47844, 'batch_loss/train': 0.7033873999025673} +12/29/2021 06:18:46 - INFO - codeparrot_training - Step 47845: {'lr': 2.3043126051919826e-06, 'samples': 24497152, 'steps': 47845, 'batch_loss/train': 0.6875504488125443} +12/29/2021 06:18:59 - INFO - codeparrot_training - Step 47846: {'lr': 2.3021778157485686e-06, 'samples': 24497664, 'steps': 47846, 'batch_loss/train': 0.6664745942689478} +12/29/2021 06:19:10 - INFO - codeparrot_training - Step 47847: {'lr': 2.3000440110581557e-06, 'samples': 24498176, 'steps': 47847, 'batch_loss/train': 0.7204827666282654} +12/29/2021 06:19:20 - INFO - codeparrot_training - Step 47848: {'lr': 2.297911191129237e-06, 'samples': 24498688, 'steps': 47848, 'batch_loss/train': 0.6455901218578219} +12/29/2021 06:19:32 - INFO - codeparrot_training - Step 47849: {'lr': 2.2957793559703066e-06, 'samples': 24499200, 'steps': 47849, 'batch_loss/train': 0.6844102372415364} +12/29/2021 06:19:43 - INFO - codeparrot_training - Step 47850: {'lr': 2.2936485055898293e-06, 'samples': 24499712, 'steps': 47850, 'batch_loss/train': 0.8301321649923921} +12/29/2021 06:19:54 - INFO - codeparrot_training - Step 47851: {'lr': 2.291518639996271e-06, 'samples': 24500224, 'steps': 47851, 'batch_loss/train': 0.5045414548367262} +12/29/2021 06:20:06 - INFO - codeparrot_training - Step 47852: {'lr': 2.2893897591980963e-06, 'samples': 24500736, 'steps': 47852, 'batch_loss/train': 0.6484007276594639} +12/29/2021 06:20:17 - INFO - codeparrot_training - Step 47853: {'lr': 2.287261863203771e-06, 'samples': 24501248, 'steps': 47853, 'batch_loss/train': 0.6711104116402566} +12/29/2021 06:20:28 - INFO - codeparrot_training - Step 47854: {'lr': 2.2851349520217612e-06, 'samples': 24501760, 'steps': 47854, 'batch_loss/train': 0.5759068724000826} +12/29/2021 06:20:38 - INFO - codeparrot_training - Step 47855: {'lr': 2.283009025660532e-06, 'samples': 24502272, 'steps': 47855, 'batch_loss/train': 0.6104316973360255} +12/29/2021 06:20:50 - INFO - codeparrot_training - Step 47856: {'lr': 2.2808840841285206e-06, 'samples': 24502784, 'steps': 47856, 'batch_loss/train': 0.8049186486750841} +12/29/2021 06:21:01 - INFO - codeparrot_training - Step 47857: {'lr': 2.2787601274341654e-06, 'samples': 24503296, 'steps': 47857, 'batch_loss/train': 0.6145197271835059} +12/29/2021 06:21:12 - INFO - codeparrot_training - Step 47858: {'lr': 2.276637155585959e-06, 'samples': 24503808, 'steps': 47858, 'batch_loss/train': 0.6890281559899449} +12/29/2021 06:21:25 - INFO - codeparrot_training - Step 47859: {'lr': 2.274515168592284e-06, 'samples': 24504320, 'steps': 47859, 'batch_loss/train': 0.7608055535238236} +12/29/2021 06:21:35 - INFO - codeparrot_training - Step 47860: {'lr': 2.2723941664615777e-06, 'samples': 24504832, 'steps': 47860, 'batch_loss/train': 0.791534059215337} +12/29/2021 06:21:46 - INFO - codeparrot_training - Step 47861: {'lr': 2.270274149202334e-06, 'samples': 24505344, 'steps': 47861, 'batch_loss/train': 0.6785058863461018} +12/29/2021 06:21:58 - INFO - codeparrot_training - Step 47862: {'lr': 2.268155116822934e-06, 'samples': 24505856, 'steps': 47862, 'batch_loss/train': 0.5995357261272147} +12/29/2021 06:22:09 - INFO - codeparrot_training - Step 47863: {'lr': 2.266037069331789e-06, 'samples': 24506368, 'steps': 47863, 'batch_loss/train': 0.5127602485008538} +12/29/2021 06:22:20 - INFO - codeparrot_training - Step 47864: {'lr': 2.2639200067373633e-06, 'samples': 24506880, 'steps': 47864, 'batch_loss/train': 0.7284260438755155} +12/29/2021 06:22:30 - INFO - codeparrot_training - Step 47865: {'lr': 2.2618039290480397e-06, 'samples': 24507392, 'steps': 47865, 'batch_loss/train': 0.7373072346672416} +12/29/2021 06:22:42 - INFO - codeparrot_training - Step 47866: {'lr': 2.259688836272228e-06, 'samples': 24507904, 'steps': 47866, 'batch_loss/train': 0.8581383644486777} +12/29/2021 06:22:53 - INFO - codeparrot_training - Step 47867: {'lr': 2.2575747284183657e-06, 'samples': 24508416, 'steps': 47867, 'batch_loss/train': 0.7941754311323166} +12/29/2021 06:23:04 - INFO - codeparrot_training - Step 47868: {'lr': 2.255461605494835e-06, 'samples': 24508928, 'steps': 47868, 'batch_loss/train': 0.7597108571790159} +12/29/2021 06:23:17 - INFO - codeparrot_training - Step 47869: {'lr': 2.2533494675100465e-06, 'samples': 24509440, 'steps': 47869, 'batch_loss/train': 0.7391164479777217} +12/29/2021 06:23:27 - INFO - codeparrot_training - Step 47870: {'lr': 2.251238314472409e-06, 'samples': 24509952, 'steps': 47870, 'batch_loss/train': 0.6826137411408126} +12/29/2021 06:23:38 - INFO - codeparrot_training - Step 47871: {'lr': 2.249128146390278e-06, 'samples': 24510464, 'steps': 47871, 'batch_loss/train': 0.7039156402461231} +12/29/2021 06:23:50 - INFO - codeparrot_training - Step 47872: {'lr': 2.247018963272063e-06, 'samples': 24510976, 'steps': 47872, 'batch_loss/train': 0.6884315544739366} +12/29/2021 06:24:01 - INFO - codeparrot_training - Step 47873: {'lr': 2.2449107651261735e-06, 'samples': 24511488, 'steps': 47873, 'batch_loss/train': 0.7502632727846503} +12/29/2021 06:24:11 - INFO - codeparrot_training - Step 47874: {'lr': 2.2428035519609646e-06, 'samples': 24512000, 'steps': 47874, 'batch_loss/train': 0.6908773314207792} +12/29/2021 06:24:22 - INFO - codeparrot_training - Step 47875: {'lr': 2.240697323784818e-06, 'samples': 24512512, 'steps': 47875, 'batch_loss/train': 0.7422056854702532} +12/29/2021 06:24:34 - INFO - codeparrot_training - Step 47876: {'lr': 2.2385920806061166e-06, 'samples': 24513024, 'steps': 47876, 'batch_loss/train': 0.7409256836399436} +12/29/2021 06:24:45 - INFO - codeparrot_training - Step 47877: {'lr': 2.2364878224332143e-06, 'samples': 24513536, 'steps': 47877, 'batch_loss/train': 0.8285517133772373} +12/29/2021 06:24:55 - INFO - codeparrot_training - Step 47878: {'lr': 2.2343845492744653e-06, 'samples': 24514048, 'steps': 47878, 'batch_loss/train': 0.7571839489974082} +12/29/2021 06:25:08 - INFO - codeparrot_training - Step 47879: {'lr': 2.23228226113828e-06, 'samples': 24514560, 'steps': 47879, 'batch_loss/train': 0.7456127610057592} +12/29/2021 06:25:18 - INFO - codeparrot_training - Step 47880: {'lr': 2.2301809580329847e-06, 'samples': 24515072, 'steps': 47880, 'batch_loss/train': 0.7023007487878203} +12/29/2021 06:25:29 - INFO - codeparrot_training - Step 47881: {'lr': 2.2280806399669064e-06, 'samples': 24515584, 'steps': 47881, 'batch_loss/train': 0.6421002629213035} +12/29/2021 06:25:43 - INFO - codeparrot_training - Step 47882: {'lr': 2.2259813069484547e-06, 'samples': 24516096, 'steps': 47882, 'batch_loss/train': 0.7897145618335344} +12/29/2021 06:25:54 - INFO - codeparrot_training - Step 47883: {'lr': 2.223882958985929e-06, 'samples': 24516608, 'steps': 47883, 'batch_loss/train': 0.7543263314291835} +12/29/2021 06:26:04 - INFO - codeparrot_training - Step 47884: {'lr': 2.221785596087711e-06, 'samples': 24517120, 'steps': 47884, 'batch_loss/train': 0.6905619292519987} +12/29/2021 06:26:16 - INFO - codeparrot_training - Step 47885: {'lr': 2.2196892182620997e-06, 'samples': 24517632, 'steps': 47885, 'batch_loss/train': 0.6670072916895151} +12/29/2021 06:26:27 - INFO - codeparrot_training - Step 47886: {'lr': 2.217593825517422e-06, 'samples': 24518144, 'steps': 47886, 'batch_loss/train': 0.9089707564562559} +12/29/2021 06:26:38 - INFO - codeparrot_training - Step 47887: {'lr': 2.215499417862088e-06, 'samples': 24518656, 'steps': 47887, 'batch_loss/train': 0.7068099705502391} +12/29/2021 06:26:48 - INFO - codeparrot_training - Step 47888: {'lr': 2.2134059953043407e-06, 'samples': 24519168, 'steps': 47888, 'batch_loss/train': 0.7282369993627071} +12/29/2021 06:27:02 - INFO - codeparrot_training - Step 47889: {'lr': 2.2113135578525067e-06, 'samples': 24519680, 'steps': 47889, 'batch_loss/train': 0.7334133408148773} +12/29/2021 06:27:13 - INFO - codeparrot_training - Step 47890: {'lr': 2.209222105514941e-06, 'samples': 24520192, 'steps': 47890, 'batch_loss/train': 0.7546438836725429} +12/29/2021 06:27:24 - INFO - codeparrot_training - Step 47891: {'lr': 2.2071316382999697e-06, 'samples': 24520704, 'steps': 47891, 'batch_loss/train': 0.7505682432092726} +12/29/2021 06:27:36 - INFO - codeparrot_training - Step 47892: {'lr': 2.205042156215864e-06, 'samples': 24521216, 'steps': 47892, 'batch_loss/train': 0.773704444989562} +12/29/2021 06:27:46 - INFO - codeparrot_training - Step 47893: {'lr': 2.202953659270951e-06, 'samples': 24521728, 'steps': 47893, 'batch_loss/train': 0.7857165029272437} +12/29/2021 06:27:57 - INFO - codeparrot_training - Step 47894: {'lr': 2.2008661474735013e-06, 'samples': 24522240, 'steps': 47894, 'batch_loss/train': 0.7316697533242404} +12/29/2021 06:28:09 - INFO - codeparrot_training - Step 47895: {'lr': 2.19877962083187e-06, 'samples': 24522752, 'steps': 47895, 'batch_loss/train': 0.6408313601277769} +12/29/2021 06:28:20 - INFO - codeparrot_training - Step 47896: {'lr': 2.1966940793543557e-06, 'samples': 24523264, 'steps': 47896, 'batch_loss/train': 0.7382016410119832} +12/29/2021 06:28:30 - INFO - codeparrot_training - Step 47897: {'lr': 2.1946095230491458e-06, 'samples': 24523776, 'steps': 47897, 'batch_loss/train': 0.7256084163673222} +12/29/2021 06:28:41 - INFO - codeparrot_training - Step 47898: {'lr': 2.192525951924651e-06, 'samples': 24524288, 'steps': 47898, 'batch_loss/train': 0.7448982112109661} +12/29/2021 06:28:55 - INFO - codeparrot_training - Step 47899: {'lr': 2.190443365989114e-06, 'samples': 24524800, 'steps': 47899, 'batch_loss/train': 0.7306762882508337} +12/29/2021 06:29:06 - INFO - codeparrot_training - Step 47900: {'lr': 2.188361765250779e-06, 'samples': 24525312, 'steps': 47900, 'batch_loss/train': 0.7215521247126162} +12/29/2021 06:29:16 - INFO - codeparrot_training - Step 47901: {'lr': 2.1862811497179445e-06, 'samples': 24525824, 'steps': 47901, 'batch_loss/train': 0.7322764871641994} +12/29/2021 06:29:28 - INFO - codeparrot_training - Step 47902: {'lr': 2.1842015193989096e-06, 'samples': 24526336, 'steps': 47902, 'batch_loss/train': 0.7431391477584839} +12/29/2021 06:29:39 - INFO - codeparrot_training - Step 47903: {'lr': 2.1821228743018893e-06, 'samples': 24526848, 'steps': 47903, 'batch_loss/train': 0.7064082727301866} +12/29/2021 06:29:50 - INFO - codeparrot_training - Step 47904: {'lr': 2.1800452144351833e-06, 'samples': 24527360, 'steps': 47904, 'batch_loss/train': 0.7723795315250754} +12/29/2021 06:30:04 - INFO - codeparrot_training - Step 47905: {'lr': 2.1779685398070624e-06, 'samples': 24527872, 'steps': 47905, 'batch_loss/train': 0.7595129008404911} +12/29/2021 06:30:14 - INFO - codeparrot_training - Step 47906: {'lr': 2.1758928504257424e-06, 'samples': 24528384, 'steps': 47906, 'batch_loss/train': 0.6903949514962733} +12/29/2021 06:30:25 - INFO - codeparrot_training - Step 47907: {'lr': 2.1738181462994946e-06, 'samples': 24528896, 'steps': 47907, 'batch_loss/train': 0.7762304584030062} +12/29/2021 06:30:36 - INFO - codeparrot_training - Step 47908: {'lr': 2.1717444274365894e-06, 'samples': 24529408, 'steps': 47908, 'batch_loss/train': 0.7794095831923187} +12/29/2021 06:30:48 - INFO - codeparrot_training - Step 47909: {'lr': 2.169671693845243e-06, 'samples': 24529920, 'steps': 47909, 'batch_loss/train': 0.7844363618642092} +12/29/2021 06:30:58 - INFO - codeparrot_training - Step 47910: {'lr': 2.1675999455336993e-06, 'samples': 24530432, 'steps': 47910, 'batch_loss/train': 0.7002643287414685} +12/29/2021 06:31:09 - INFO - codeparrot_training - Step 47911: {'lr': 2.1655291825102285e-06, 'samples': 24530944, 'steps': 47911, 'batch_loss/train': 0.8517661551013589} +12/29/2021 06:31:21 - INFO - codeparrot_training - Step 47912: {'lr': 2.1634594047829916e-06, 'samples': 24531456, 'steps': 47912, 'batch_loss/train': 0.8358540418557823} +12/29/2021 06:31:31 - INFO - codeparrot_training - Step 47913: {'lr': 2.1613906123602867e-06, 'samples': 24531968, 'steps': 47913, 'batch_loss/train': 0.856759219430387} +12/29/2021 06:31:42 - INFO - codeparrot_training - Step 47914: {'lr': 2.15932280525033e-06, 'samples': 24532480, 'steps': 47914, 'batch_loss/train': 0.6985803490970284} +12/29/2021 06:31:56 - INFO - codeparrot_training - Step 47915: {'lr': 2.157255983461309e-06, 'samples': 24532992, 'steps': 47915, 'batch_loss/train': 0.7264803480356932} +12/29/2021 06:32:07 - INFO - codeparrot_training - Step 47916: {'lr': 2.15519014700144e-06, 'samples': 24533504, 'steps': 47916, 'batch_loss/train': 0.7377708128187805} +12/29/2021 06:32:17 - INFO - codeparrot_training - Step 47917: {'lr': 2.153125295878966e-06, 'samples': 24534016, 'steps': 47917, 'batch_loss/train': 0.7568432204425335} +12/29/2021 06:32:28 - INFO - codeparrot_training - Step 47918: {'lr': 2.1510614301020748e-06, 'samples': 24534528, 'steps': 47918, 'batch_loss/train': 0.7442411966621876} +12/29/2021 06:32:40 - INFO - codeparrot_training - Step 47919: {'lr': 2.1489985496789823e-06, 'samples': 24535040, 'steps': 47919, 'batch_loss/train': 0.7407852933974937} +12/29/2021 06:32:51 - INFO - codeparrot_training - Step 47920: {'lr': 2.1469366546179037e-06, 'samples': 24535552, 'steps': 47920, 'batch_loss/train': 0.6903624809347093} +12/29/2021 06:33:01 - INFO - codeparrot_training - Step 47921: {'lr': 2.1448757449269997e-06, 'samples': 24536064, 'steps': 47921, 'batch_loss/train': 0.7028262121602893} +12/29/2021 06:33:13 - INFO - codeparrot_training - Step 47922: {'lr': 2.1428158206144854e-06, 'samples': 24536576, 'steps': 47922, 'batch_loss/train': 0.7090919800102711} +12/29/2021 06:33:24 - INFO - codeparrot_training - Step 47923: {'lr': 2.1407568816885494e-06, 'samples': 24537088, 'steps': 47923, 'batch_loss/train': 0.6566650392487645} +12/29/2021 06:33:35 - INFO - codeparrot_training - Step 47924: {'lr': 2.1386989281573786e-06, 'samples': 24537600, 'steps': 47924, 'batch_loss/train': 0.7146233811508864} +12/29/2021 06:33:47 - INFO - codeparrot_training - Step 47925: {'lr': 2.136641960029162e-06, 'samples': 24538112, 'steps': 47925, 'batch_loss/train': 0.7254000310786068} +12/29/2021 06:33:57 - INFO - codeparrot_training - Step 47926: {'lr': 2.134585977312059e-06, 'samples': 24538624, 'steps': 47926, 'batch_loss/train': 0.7337144007906318} +12/29/2021 06:34:08 - INFO - codeparrot_training - Step 47927: {'lr': 2.1325309800142302e-06, 'samples': 24539136, 'steps': 47927, 'batch_loss/train': 0.6854514796286821} +12/29/2021 06:34:19 - INFO - codeparrot_training - Step 47928: {'lr': 2.1304769681439186e-06, 'samples': 24539648, 'steps': 47928, 'batch_loss/train': 0.6536952988244593} +12/29/2021 06:34:33 - INFO - codeparrot_training - Step 47929: {'lr': 2.128423941709201e-06, 'samples': 24540160, 'steps': 47929, 'batch_loss/train': 0.8195595825091004} +12/29/2021 06:34:43 - INFO - codeparrot_training - Step 47930: {'lr': 2.126371900718266e-06, 'samples': 24540672, 'steps': 47930, 'batch_loss/train': 0.6176537563442253} +12/29/2021 06:34:54 - INFO - codeparrot_training - Step 47931: {'lr': 2.1243208451793006e-06, 'samples': 24541184, 'steps': 47931, 'batch_loss/train': 0.6961613731982652} +12/29/2021 06:35:06 - INFO - codeparrot_training - Step 47932: {'lr': 2.1222707751004377e-06, 'samples': 24541696, 'steps': 47932, 'batch_loss/train': 0.7802633559331298} +12/29/2021 06:35:17 - INFO - codeparrot_training - Step 47933: {'lr': 2.1202216904898376e-06, 'samples': 24542208, 'steps': 47933, 'batch_loss/train': 0.7711006684694439} +12/29/2021 06:35:27 - INFO - codeparrot_training - Step 47934: {'lr': 2.118173591355632e-06, 'samples': 24542720, 'steps': 47934, 'batch_loss/train': 0.6856756739725824} +12/29/2021 06:35:39 - INFO - codeparrot_training - Step 47935: {'lr': 2.116126477705982e-06, 'samples': 24543232, 'steps': 47935, 'batch_loss/train': 0.63967256777687} +12/29/2021 06:35:50 - INFO - codeparrot_training - Step 47936: {'lr': 2.1140803495490192e-06, 'samples': 24543744, 'steps': 47936, 'batch_loss/train': 1.6197147639468312} +12/29/2021 06:36:01 - INFO - codeparrot_training - Step 47937: {'lr': 2.112035206892876e-06, 'samples': 24544256, 'steps': 47937, 'batch_loss/train': 0.7598372390493751} +12/29/2021 06:36:15 - INFO - codeparrot_training - Step 47938: {'lr': 2.1099910497456577e-06, 'samples': 24544768, 'steps': 47938, 'batch_loss/train': 0.8338267714716494} +12/29/2021 06:36:25 - INFO - codeparrot_training - Step 47939: {'lr': 2.107947878115524e-06, 'samples': 24545280, 'steps': 47939, 'batch_loss/train': 0.7817827546969056} +12/29/2021 06:36:36 - INFO - codeparrot_training - Step 47940: {'lr': 2.105905692010607e-06, 'samples': 24545792, 'steps': 47940, 'batch_loss/train': 0.8721248647198081} +12/29/2021 06:36:47 - INFO - codeparrot_training - Step 47941: {'lr': 2.1038644914390116e-06, 'samples': 24546304, 'steps': 47941, 'batch_loss/train': 0.7227296666242182} +12/29/2021 06:36:59 - INFO - codeparrot_training - Step 47942: {'lr': 2.101824276408815e-06, 'samples': 24546816, 'steps': 47942, 'batch_loss/train': 0.7711533436086029} +12/29/2021 06:37:09 - INFO - codeparrot_training - Step 47943: {'lr': 2.0997850469281765e-06, 'samples': 24547328, 'steps': 47943, 'batch_loss/train': 0.7052369425073266} +12/29/2021 06:37:20 - INFO - codeparrot_training - Step 47944: {'lr': 2.0977468030052015e-06, 'samples': 24547840, 'steps': 47944, 'batch_loss/train': 0.780565925873816} +12/29/2021 06:37:34 - INFO - codeparrot_training - Step 47945: {'lr': 2.0957095446479667e-06, 'samples': 24548352, 'steps': 47945, 'batch_loss/train': 0.6318064937368035} +12/29/2021 06:37:45 - INFO - codeparrot_training - Step 47946: {'lr': 2.0936732718646047e-06, 'samples': 24548864, 'steps': 47946, 'batch_loss/train': 1.2046016016975045} +12/29/2021 06:37:56 - INFO - codeparrot_training - Step 47947: {'lr': 2.091637984663164e-06, 'samples': 24549376, 'steps': 47947, 'batch_loss/train': 0.780244879424572} +12/29/2021 06:38:06 - INFO - codeparrot_training - Step 47948: {'lr': 2.089603683051777e-06, 'samples': 24549888, 'steps': 47948, 'batch_loss/train': 0.7842404972761869} +12/29/2021 06:38:18 - INFO - codeparrot_training - Step 47949: {'lr': 2.0875703670385217e-06, 'samples': 24550400, 'steps': 47949, 'batch_loss/train': 0.7903517936356366} +12/29/2021 06:38:29 - INFO - codeparrot_training - Step 47950: {'lr': 2.0855380366314738e-06, 'samples': 24550912, 'steps': 47950, 'batch_loss/train': 1.0494525833055377} +12/29/2021 06:38:40 - INFO - codeparrot_training - Step 47951: {'lr': 2.083506691838738e-06, 'samples': 24551424, 'steps': 47951, 'batch_loss/train': 0.7268383298069239} +12/29/2021 06:38:52 - INFO - codeparrot_training - Step 47952: {'lr': 2.081476332668336e-06, 'samples': 24551936, 'steps': 47952, 'batch_loss/train': 0.840443829074502} +12/29/2021 06:39:02 - INFO - codeparrot_training - Step 47953: {'lr': 2.0794469591284006e-06, 'samples': 24552448, 'steps': 47953, 'batch_loss/train': 0.7439680309034884} +12/29/2021 06:39:13 - INFO - codeparrot_training - Step 47954: {'lr': 2.0774185712269522e-06, 'samples': 24552960, 'steps': 47954, 'batch_loss/train': 0.8458955371752381} +12/29/2021 06:39:27 - INFO - codeparrot_training - Step 47955: {'lr': 2.0753911689720963e-06, 'samples': 24553472, 'steps': 47955, 'batch_loss/train': 0.6839080397039652} +12/29/2021 06:39:38 - INFO - codeparrot_training - Step 47956: {'lr': 2.0733647523718534e-06, 'samples': 24553984, 'steps': 47956, 'batch_loss/train': 0.7057524916017428} +12/29/2021 06:39:49 - INFO - codeparrot_training - Step 47957: {'lr': 2.071339321434329e-06, 'samples': 24554496, 'steps': 47957, 'batch_loss/train': 0.45943693770095706} +12/29/2021 06:39:59 - INFO - codeparrot_training - Step 47958: {'lr': 2.0693148761675164e-06, 'samples': 24555008, 'steps': 47958, 'batch_loss/train': 0.7266401117667556} +12/29/2021 06:40:11 - INFO - codeparrot_training - Step 47959: {'lr': 2.067291416579492e-06, 'samples': 24555520, 'steps': 47959, 'batch_loss/train': 0.7219248928595334} +12/29/2021 06:40:22 - INFO - codeparrot_training - Step 47960: {'lr': 2.0652689426783054e-06, 'samples': 24556032, 'steps': 47960, 'batch_loss/train': 0.6184686257038265} +12/29/2021 06:40:33 - INFO - codeparrot_training - Step 47961: {'lr': 2.0632474544720057e-06, 'samples': 24556544, 'steps': 47961, 'batch_loss/train': 0.6686196364462376} +12/29/2021 06:40:45 - INFO - codeparrot_training - Step 47962: {'lr': 2.0612269519686142e-06, 'samples': 24557056, 'steps': 47962, 'batch_loss/train': 0.7263323166407645} +12/29/2021 06:40:55 - INFO - codeparrot_training - Step 47963: {'lr': 2.0592074351761525e-06, 'samples': 24557568, 'steps': 47963, 'batch_loss/train': 0.650898611987941} +12/29/2021 06:41:06 - INFO - codeparrot_training - Step 47964: {'lr': 2.0571889041026692e-06, 'samples': 24558080, 'steps': 47964, 'batch_loss/train': 0.7199436374939978} +12/29/2021 06:41:18 - INFO - codeparrot_training - Step 47965: {'lr': 2.055171358756186e-06, 'samples': 24558592, 'steps': 47965, 'batch_loss/train': 0.725175061263144} +12/29/2021 06:41:29 - INFO - codeparrot_training - Step 47966: {'lr': 2.053154799144724e-06, 'samples': 24559104, 'steps': 47966, 'batch_loss/train': 0.7287663898896426} +12/29/2021 06:41:39 - INFO - codeparrot_training - Step 47967: {'lr': 2.051139225276277e-06, 'samples': 24559616, 'steps': 47967, 'batch_loss/train': 0.7415033169090748} +12/29/2021 06:41:50 - INFO - codeparrot_training - Step 47968: {'lr': 2.0491246371589223e-06, 'samples': 24560128, 'steps': 47968, 'batch_loss/train': 0.6499728966155089} +12/29/2021 06:42:04 - INFO - codeparrot_training - Step 47969: {'lr': 2.047111034800597e-06, 'samples': 24560640, 'steps': 47969, 'batch_loss/train': 0.7792909536510706} +12/29/2021 06:42:15 - INFO - codeparrot_training - Step 47970: {'lr': 2.0450984182093235e-06, 'samples': 24561152, 'steps': 47970, 'batch_loss/train': 0.7300239037722349} +12/29/2021 06:42:25 - INFO - codeparrot_training - Step 47971: {'lr': 2.0430867873931224e-06, 'samples': 24561664, 'steps': 47971, 'batch_loss/train': 0.8059534290805459} +12/29/2021 06:42:38 - INFO - codeparrot_training - Step 47972: {'lr': 2.0410761423599876e-06, 'samples': 24562176, 'steps': 47972, 'batch_loss/train': 0.655590872745961} +12/29/2021 06:42:48 - INFO - codeparrot_training - Step 47973: {'lr': 2.039066483117913e-06, 'samples': 24562688, 'steps': 47973, 'batch_loss/train': 0.6465772328083403} +12/29/2021 06:42:59 - INFO - codeparrot_training - Step 47974: {'lr': 2.0370578096748914e-06, 'samples': 24563200, 'steps': 47974, 'batch_loss/train': 0.7409800002351403} +12/29/2021 06:43:13 - INFO - codeparrot_training - Step 47975: {'lr': 2.0350501220388893e-06, 'samples': 24563712, 'steps': 47975, 'batch_loss/train': 0.7156143020838499} +12/29/2021 06:43:23 - INFO - codeparrot_training - Step 47976: {'lr': 2.0330434202179003e-06, 'samples': 24564224, 'steps': 47976, 'batch_loss/train': 0.6799608441069722} +12/29/2021 06:43:34 - INFO - codeparrot_training - Step 47977: {'lr': 2.031037704219918e-06, 'samples': 24564736, 'steps': 47977, 'batch_loss/train': 0.806843354832381} +12/29/2021 06:43:46 - INFO - codeparrot_training - Step 47978: {'lr': 2.02903297405288e-06, 'samples': 24565248, 'steps': 47978, 'batch_loss/train': 0.7523118769749999} +12/29/2021 06:43:57 - INFO - codeparrot_training - Step 47979: {'lr': 2.0270292297247526e-06, 'samples': 24565760, 'steps': 47979, 'batch_loss/train': 0.6329756050836295} +12/29/2021 06:44:07 - INFO - codeparrot_training - Step 47980: {'lr': 2.025026471243557e-06, 'samples': 24566272, 'steps': 47980, 'batch_loss/train': 0.7355766603723168} +12/29/2021 06:44:18 - INFO - codeparrot_training - Step 47981: {'lr': 2.023024698617232e-06, 'samples': 24566784, 'steps': 47981, 'batch_loss/train': 0.8123969566076994} +12/29/2021 06:44:30 - INFO - codeparrot_training - Step 47982: {'lr': 2.021023911853714e-06, 'samples': 24567296, 'steps': 47982, 'batch_loss/train': 0.5849059915635735} +12/29/2021 06:44:41 - INFO - codeparrot_training - Step 47983: {'lr': 2.019024110960971e-06, 'samples': 24567808, 'steps': 47983, 'batch_loss/train': 0.7647467700298876} +12/29/2021 06:44:52 - INFO - codeparrot_training - Step 47984: {'lr': 2.017025295946967e-06, 'samples': 24568320, 'steps': 47984, 'batch_loss/train': 0.7604167857207358} +12/29/2021 06:45:06 - INFO - codeparrot_training - Step 47985: {'lr': 2.0150274668196133e-06, 'samples': 24568832, 'steps': 47985, 'batch_loss/train': 0.7573279021307826} +12/29/2021 06:45:16 - INFO - codeparrot_training - Step 47986: {'lr': 2.0130306235869035e-06, 'samples': 24569344, 'steps': 47986, 'batch_loss/train': 0.6647088606841862} +12/29/2021 06:45:27 - INFO - codeparrot_training - Step 47987: {'lr': 2.0110347662567196e-06, 'samples': 24569856, 'steps': 47987, 'batch_loss/train': 0.695778498891741} +12/29/2021 06:45:39 - INFO - codeparrot_training - Step 47988: {'lr': 2.009039894837056e-06, 'samples': 24570368, 'steps': 47988, 'batch_loss/train': 0.7912974543869495} +12/29/2021 06:45:50 - INFO - codeparrot_training - Step 47989: {'lr': 2.0070460093357947e-06, 'samples': 24570880, 'steps': 47989, 'batch_loss/train': 0.7681646076962352} +12/29/2021 06:46:00 - INFO - codeparrot_training - Step 47990: {'lr': 2.0050531097608737e-06, 'samples': 24571392, 'steps': 47990, 'batch_loss/train': 0.6802233913913369} +12/29/2021 06:46:11 - INFO - codeparrot_training - Step 47991: {'lr': 2.0030611961202314e-06, 'samples': 24571904, 'steps': 47991, 'batch_loss/train': 0.7815374378114939} +12/29/2021 06:46:25 - INFO - codeparrot_training - Step 47992: {'lr': 2.0010702684217787e-06, 'samples': 24572416, 'steps': 47992, 'batch_loss/train': 0.6038015400990844} +12/29/2021 06:46:35 - INFO - codeparrot_training - Step 47993: {'lr': 1.999080326673397e-06, 'samples': 24572928, 'steps': 47993, 'batch_loss/train': 0.8322336133569479} +12/29/2021 06:46:46 - INFO - codeparrot_training - Step 47994: {'lr': 1.9970913708830807e-06, 'samples': 24573440, 'steps': 47994, 'batch_loss/train': 0.7059669164009392} +12/29/2021 06:46:58 - INFO - codeparrot_training - Step 47995: {'lr': 1.9951034010586567e-06, 'samples': 24573952, 'steps': 47995, 'batch_loss/train': 0.7483965782448649} +12/29/2021 06:47:09 - INFO - codeparrot_training - Step 47996: {'lr': 1.993116417208063e-06, 'samples': 24574464, 'steps': 47996, 'batch_loss/train': 0.862807254306972} +12/29/2021 06:47:19 - INFO - codeparrot_training - Step 47997: {'lr': 1.99113041933921e-06, 'samples': 24574976, 'steps': 47997, 'batch_loss/train': 0.7481928197667003} +12/29/2021 06:47:31 - INFO - codeparrot_training - Step 47998: {'lr': 1.9891454074599522e-06, 'samples': 24575488, 'steps': 47998, 'batch_loss/train': 0.6729669240303338} +12/29/2021 06:47:42 - INFO - codeparrot_training - Step 47999: {'lr': 1.9871613815782285e-06, 'samples': 24576000, 'steps': 47999, 'batch_loss/train': 0.7039976473897696} +12/29/2021 06:47:53 - INFO - codeparrot_training - Step 48000: {'lr': 1.985178341701893e-06, 'samples': 24576512, 'steps': 48000, 'batch_loss/train': 0.7029458270408213} +12/29/2021 06:48:03 - INFO - codeparrot_training - Step 48001: {'lr': 1.9831962878388566e-06, 'samples': 24577024, 'steps': 48001, 'batch_loss/train': 0.7524541299790144} +12/29/2021 06:48:16 - INFO - codeparrot_training - Step 48002: {'lr': 1.9812152199969735e-06, 'samples': 24577536, 'steps': 48002, 'batch_loss/train': 0.7777227442711592} +12/29/2021 06:48:26 - INFO - codeparrot_training - Step 48003: {'lr': 1.9792351381841266e-06, 'samples': 24578048, 'steps': 48003, 'batch_loss/train': 0.7907410357147455} +12/29/2021 06:48:37 - INFO - codeparrot_training - Step 48004: {'lr': 1.977256042408199e-06, 'samples': 24578560, 'steps': 48004, 'batch_loss/train': 0.673704874701798} +12/29/2021 06:48:51 - INFO - codeparrot_training - Step 48005: {'lr': 1.9752779326770444e-06, 'samples': 24579072, 'steps': 48005, 'batch_loss/train': 0.742690552957356} +12/29/2021 06:49:01 - INFO - codeparrot_training - Step 48006: {'lr': 1.973300808998546e-06, 'samples': 24579584, 'steps': 48006, 'batch_loss/train': 0.6570942191174254} +12/29/2021 06:49:12 - INFO - codeparrot_training - Step 48007: {'lr': 1.9713246713805587e-06, 'samples': 24580096, 'steps': 48007, 'batch_loss/train': 0.7511862255632877} +12/29/2021 06:49:24 - INFO - codeparrot_training - Step 48008: {'lr': 1.9693495198309098e-06, 'samples': 24580608, 'steps': 48008, 'batch_loss/train': 0.6833125650882721} +12/29/2021 06:49:35 - INFO - codeparrot_training - Step 48009: {'lr': 1.967375354357481e-06, 'samples': 24581120, 'steps': 48009, 'batch_loss/train': 0.7613055668771267} +12/29/2021 06:49:45 - INFO - codeparrot_training - Step 48010: {'lr': 1.9654021749681285e-06, 'samples': 24581632, 'steps': 48010, 'batch_loss/train': 0.7975763976573944} +12/29/2021 06:49:56 - INFO - codeparrot_training - Step 48011: {'lr': 1.96342998167065e-06, 'samples': 24582144, 'steps': 48011, 'batch_loss/train': 0.6888624876737595} +12/29/2021 06:50:08 - INFO - codeparrot_training - Step 48012: {'lr': 1.9614587744729573e-06, 'samples': 24582656, 'steps': 48012, 'batch_loss/train': 0.7389739067293704} +12/29/2021 06:50:19 - INFO - codeparrot_training - Step 48013: {'lr': 1.959488553382821e-06, 'samples': 24583168, 'steps': 48013, 'batch_loss/train': 0.7848729073884897} +12/29/2021 06:50:29 - INFO - codeparrot_training - Step 48014: {'lr': 1.957519318408124e-06, 'samples': 24583680, 'steps': 48014, 'batch_loss/train': 0.66167862189468} +12/29/2021 06:50:44 - INFO - codeparrot_training - Step 48015: {'lr': 1.9555510695566656e-06, 'samples': 24584192, 'steps': 48015, 'batch_loss/train': 0.7210561446845531} +12/29/2021 06:50:54 - INFO - codeparrot_training - Step 48016: {'lr': 1.9535838068362453e-06, 'samples': 24584704, 'steps': 48016, 'batch_loss/train': 0.7742695715278387} +12/29/2021 06:51:05 - INFO - codeparrot_training - Step 48017: {'lr': 1.9516175302547455e-06, 'samples': 24585216, 'steps': 48017, 'batch_loss/train': 0.7428563832072541} +12/29/2021 06:51:17 - INFO - codeparrot_training - Step 48018: {'lr': 1.949652239819966e-06, 'samples': 24585728, 'steps': 48018, 'batch_loss/train': 0.5295855798758566} +12/29/2021 06:51:28 - INFO - codeparrot_training - Step 48019: {'lr': 1.947687935539705e-06, 'samples': 24586240, 'steps': 48019, 'batch_loss/train': 0.509128155419603} +12/29/2021 06:51:38 - INFO - codeparrot_training - Step 48020: {'lr': 1.9457246174217624e-06, 'samples': 24586752, 'steps': 48020, 'batch_loss/train': 0.7032130118459463} +12/29/2021 06:51:49 - INFO - codeparrot_training - Step 48021: {'lr': 1.9437622854739656e-06, 'samples': 24587264, 'steps': 48021, 'batch_loss/train': 0.7128009248990566} +12/29/2021 06:52:03 - INFO - codeparrot_training - Step 48022: {'lr': 1.941800939704114e-06, 'samples': 24587776, 'steps': 48022, 'batch_loss/train': 0.7007823293097317} +12/29/2021 06:52:14 - INFO - codeparrot_training - Step 48023: {'lr': 1.9398405801199783e-06, 'samples': 24588288, 'steps': 48023, 'batch_loss/train': 0.7069714251556434} +12/29/2021 06:52:24 - INFO - codeparrot_training - Step 48024: {'lr': 1.937881206729386e-06, 'samples': 24588800, 'steps': 48024, 'batch_loss/train': 0.7836095774546266} +12/29/2021 06:52:36 - INFO - codeparrot_training - Step 48025: {'lr': 1.935922819540109e-06, 'samples': 24589312, 'steps': 48025, 'batch_loss/train': 0.686197841539979} +12/29/2021 06:52:47 - INFO - codeparrot_training - Step 48026: {'lr': 1.9339654185599465e-06, 'samples': 24589824, 'steps': 48026, 'batch_loss/train': 0.7704199128784239} +12/29/2021 06:52:58 - INFO - codeparrot_training - Step 48027: {'lr': 1.9320090037966697e-06, 'samples': 24590336, 'steps': 48027, 'batch_loss/train': 0.6565401146945078} +12/29/2021 06:53:10 - INFO - codeparrot_training - Step 48028: {'lr': 1.9300535752580505e-06, 'samples': 24590848, 'steps': 48028, 'batch_loss/train': 0.7476347894407809} +12/29/2021 06:53:21 - INFO - codeparrot_training - Step 48029: {'lr': 1.9280991329518605e-06, 'samples': 24591360, 'steps': 48029, 'batch_loss/train': 1.186897175386548} +12/29/2021 06:53:32 - INFO - codeparrot_training - Step 48030: {'lr': 1.926145676885899e-06, 'samples': 24591872, 'steps': 48030, 'batch_loss/train': 0.5946983993053436} +12/29/2021 06:53:42 - INFO - codeparrot_training - Step 48031: {'lr': 1.924193207067909e-06, 'samples': 24592384, 'steps': 48031, 'batch_loss/train': 0.7761753033846617} +12/29/2021 06:53:56 - INFO - codeparrot_training - Step 48032: {'lr': 1.9222417235056633e-06, 'samples': 24592896, 'steps': 48032, 'batch_loss/train': 0.7811786448583007} +12/29/2021 06:54:07 - INFO - codeparrot_training - Step 48033: {'lr': 1.920291226206905e-06, 'samples': 24593408, 'steps': 48033, 'batch_loss/train': 0.7282462911680341} +12/29/2021 06:54:17 - INFO - codeparrot_training - Step 48034: {'lr': 1.918341715179406e-06, 'samples': 24593920, 'steps': 48034, 'batch_loss/train': 0.7829730128869414} +12/29/2021 06:54:30 - INFO - codeparrot_training - Step 48035: {'lr': 1.9163931904309094e-06, 'samples': 24594432, 'steps': 48035, 'batch_loss/train': 0.7534253350459039} +12/29/2021 06:54:40 - INFO - codeparrot_training - Step 48036: {'lr': 1.9144456519691323e-06, 'samples': 24594944, 'steps': 48036, 'batch_loss/train': 0.7320773182436824} +12/29/2021 06:54:51 - INFO - codeparrot_training - Step 48037: {'lr': 1.912499099801873e-06, 'samples': 24595456, 'steps': 48037, 'batch_loss/train': 0.7369944136589766} +12/29/2021 06:55:01 - INFO - codeparrot_training - Step 48038: {'lr': 1.9105535339368484e-06, 'samples': 24595968, 'steps': 48038, 'batch_loss/train': 0.7668224675580859} +12/29/2021 06:55:14 - INFO - codeparrot_training - Step 48039: {'lr': 1.908608954381774e-06, 'samples': 24596480, 'steps': 48039, 'batch_loss/train': 0.7547698458656669} +12/29/2021 06:55:24 - INFO - codeparrot_training - Step 48040: {'lr': 1.906665361144394e-06, 'samples': 24596992, 'steps': 48040, 'batch_loss/train': 0.6114537864923477} +12/29/2021 06:55:35 - INFO - codeparrot_training - Step 48041: {'lr': 1.904722754232424e-06, 'samples': 24597504, 'steps': 48041, 'batch_loss/train': 0.9166889805346727} +12/29/2021 06:55:49 - INFO - codeparrot_training - Step 48042: {'lr': 1.902781133653636e-06, 'samples': 24598016, 'steps': 48042, 'batch_loss/train': 0.7188019130844623} +12/29/2021 06:56:00 - INFO - codeparrot_training - Step 48043: {'lr': 1.9008404994156902e-06, 'samples': 24598528, 'steps': 48043, 'batch_loss/train': 0.71328095626086} +12/29/2021 06:56:10 - INFO - codeparrot_training - Step 48044: {'lr': 1.8989008515263306e-06, 'samples': 24599040, 'steps': 48044, 'batch_loss/train': 0.8136280914768577} +12/29/2021 06:56:22 - INFO - codeparrot_training - Step 48045: {'lr': 1.8969621899932454e-06, 'samples': 24599552, 'steps': 48045, 'batch_loss/train': 0.7626346936449409} +12/29/2021 06:56:33 - INFO - codeparrot_training - Step 48046: {'lr': 1.8950245148241508e-06, 'samples': 24600064, 'steps': 48046, 'batch_loss/train': 0.7438567336648703} +12/29/2021 06:56:43 - INFO - codeparrot_training - Step 48047: {'lr': 1.8930878260267904e-06, 'samples': 24600576, 'steps': 48047, 'batch_loss/train': 0.7712875853758305} +12/29/2021 06:56:56 - INFO - codeparrot_training - Step 48048: {'lr': 1.8911521236088247e-06, 'samples': 24601088, 'steps': 48048, 'batch_loss/train': 0.7253146813600324} +12/29/2021 06:57:07 - INFO - codeparrot_training - Step 48049: {'lr': 1.8892174075779145e-06, 'samples': 24601600, 'steps': 48049, 'batch_loss/train': 0.7432173443958163} +12/29/2021 06:57:17 - INFO - codeparrot_training - Step 48050: {'lr': 1.887283677941859e-06, 'samples': 24602112, 'steps': 48050, 'batch_loss/train': 0.7301389849744737} +12/29/2021 06:57:28 - INFO - codeparrot_training - Step 48051: {'lr': 1.8853509347082355e-06, 'samples': 24602624, 'steps': 48051, 'batch_loss/train': 0.7482617853675038} +12/29/2021 06:57:40 - INFO - codeparrot_training - Step 48052: {'lr': 1.8834191778847876e-06, 'samples': 24603136, 'steps': 48052, 'batch_loss/train': 0.7735665997024626} +12/29/2021 06:57:51 - INFO - codeparrot_training - Step 48053: {'lr': 1.881488407479176e-06, 'samples': 24603648, 'steps': 48053, 'batch_loss/train': 1.098793406621553} +12/29/2021 06:58:02 - INFO - codeparrot_training - Step 48054: {'lr': 1.879558623499089e-06, 'samples': 24604160, 'steps': 48054, 'batch_loss/train': 0.723070815205574} +12/29/2021 06:58:15 - INFO - codeparrot_training - Step 48055: {'lr': 1.8776298259521596e-06, 'samples': 24604672, 'steps': 48055, 'batch_loss/train': 0.7969305836595595} +12/29/2021 06:58:26 - INFO - codeparrot_training - Step 48056: {'lr': 1.8757020148461035e-06, 'samples': 24605184, 'steps': 48056, 'batch_loss/train': 0.66534993465757} +12/29/2021 06:58:37 - INFO - codeparrot_training - Step 48057: {'lr': 1.8737751901885813e-06, 'samples': 24605696, 'steps': 48057, 'batch_loss/train': 0.747165369684808} +12/29/2021 06:58:47 - INFO - codeparrot_training - Step 48058: {'lr': 1.8718493519872537e-06, 'samples': 24606208, 'steps': 48058, 'batch_loss/train': 0.7317967140115798} +12/29/2021 06:58:59 - INFO - codeparrot_training - Step 48059: {'lr': 1.8699245002497257e-06, 'samples': 24606720, 'steps': 48059, 'batch_loss/train': 0.5710605320055038} +12/29/2021 06:59:10 - INFO - codeparrot_training - Step 48060: {'lr': 1.868000634983713e-06, 'samples': 24607232, 'steps': 48060, 'batch_loss/train': 0.6724818598013371} +12/29/2021 06:59:21 - INFO - codeparrot_training - Step 48061: {'lr': 1.8660777561968212e-06, 'samples': 24607744, 'steps': 48061, 'batch_loss/train': 0.7476045023649931} +12/29/2021 06:59:35 - INFO - codeparrot_training - Step 48062: {'lr': 1.8641558638967383e-06, 'samples': 24608256, 'steps': 48062, 'batch_loss/train': 0.674887839704752} +12/29/2021 06:59:45 - INFO - codeparrot_training - Step 48063: {'lr': 1.8622349580910413e-06, 'samples': 24608768, 'steps': 48063, 'batch_loss/train': 0.7020787661895156} +12/29/2021 06:59:56 - INFO - codeparrot_training - Step 48064: {'lr': 1.8603150387874469e-06, 'samples': 24609280, 'steps': 48064, 'batch_loss/train': 0.7314933703746647} +12/29/2021 07:00:08 - INFO - codeparrot_training - Step 48065: {'lr': 1.858396105993504e-06, 'samples': 24609792, 'steps': 48065, 'batch_loss/train': 0.8184639634564519} +12/29/2021 07:00:19 - INFO - codeparrot_training - Step 48066: {'lr': 1.8564781597169012e-06, 'samples': 24610304, 'steps': 48066, 'batch_loss/train': 0.7241315161809325} +12/29/2021 07:00:29 - INFO - codeparrot_training - Step 48067: {'lr': 1.854561199965271e-06, 'samples': 24610816, 'steps': 48067, 'batch_loss/train': 0.6708775060251355} +12/29/2021 07:00:40 - INFO - codeparrot_training - Step 48068: {'lr': 1.8526452267461636e-06, 'samples': 24611328, 'steps': 48068, 'batch_loss/train': 0.8180672032758594} +12/29/2021 07:00:52 - INFO - codeparrot_training - Step 48069: {'lr': 1.8507302400672666e-06, 'samples': 24611840, 'steps': 48069, 'batch_loss/train': 0.6604659301228821} +12/29/2021 07:01:03 - INFO - codeparrot_training - Step 48070: {'lr': 1.8488162399361852e-06, 'samples': 24612352, 'steps': 48070, 'batch_loss/train': 0.8188786851242185} +12/29/2021 07:01:14 - INFO - codeparrot_training - Step 48071: {'lr': 1.8469032263604967e-06, 'samples': 24612864, 'steps': 48071, 'batch_loss/train': 0.7908947048708797} +12/29/2021 07:01:28 - INFO - codeparrot_training - Step 48072: {'lr': 1.8449911993478064e-06, 'samples': 24613376, 'steps': 48072, 'batch_loss/train': 0.7366464098449796} +12/29/2021 07:01:38 - INFO - codeparrot_training - Step 48073: {'lr': 1.8430801589057467e-06, 'samples': 24613888, 'steps': 48073, 'batch_loss/train': 0.4550447320798412} +12/29/2021 07:01:49 - INFO - codeparrot_training - Step 48074: {'lr': 1.841170105041895e-06, 'samples': 24614400, 'steps': 48074, 'batch_loss/train': 0.7960298303514719} +12/29/2021 07:02:01 - INFO - codeparrot_training - Step 48075: {'lr': 1.8392610377638563e-06, 'samples': 24614912, 'steps': 48075, 'batch_loss/train': 0.6386082605458796} +12/29/2021 07:02:12 - INFO - codeparrot_training - Step 48076: {'lr': 1.837352957079208e-06, 'samples': 24615424, 'steps': 48076, 'batch_loss/train': 0.6185552791575901} +12/29/2021 07:02:22 - INFO - codeparrot_training - Step 48077: {'lr': 1.835445862995555e-06, 'samples': 24615936, 'steps': 48077, 'batch_loss/train': 0.7482722809072584} +12/29/2021 07:02:33 - INFO - codeparrot_training - Step 48078: {'lr': 1.833539755520447e-06, 'samples': 24616448, 'steps': 48078, 'batch_loss/train': 0.7160250288434327} +12/29/2021 07:02:45 - INFO - codeparrot_training - Step 48079: {'lr': 1.8316346346614887e-06, 'samples': 24616960, 'steps': 48079, 'batch_loss/train': 0.8042460763826966} +12/29/2021 07:02:56 - INFO - codeparrot_training - Step 48080: {'lr': 1.8297305004262576e-06, 'samples': 24617472, 'steps': 48080, 'batch_loss/train': 0.7407947642495856} +12/29/2021 07:03:07 - INFO - codeparrot_training - Step 48081: {'lr': 1.8278273528223032e-06, 'samples': 24617984, 'steps': 48081, 'batch_loss/train': 0.7185848960652947} +12/29/2021 07:03:19 - INFO - codeparrot_training - Step 48082: {'lr': 1.8259251918572028e-06, 'samples': 24618496, 'steps': 48082, 'batch_loss/train': 0.9031730759888887} +12/29/2021 07:03:29 - INFO - codeparrot_training - Step 48083: {'lr': 1.8240240175385059e-06, 'samples': 24619008, 'steps': 48083, 'batch_loss/train': 0.7384878965094686} +12/29/2021 07:03:40 - INFO - codeparrot_training - Step 48084: {'lr': 1.8221238298737897e-06, 'samples': 24619520, 'steps': 48084, 'batch_loss/train': 0.7060397979803383} +12/29/2021 07:03:54 - INFO - codeparrot_training - Step 48085: {'lr': 1.8202246288706037e-06, 'samples': 24620032, 'steps': 48085, 'batch_loss/train': 0.776437824126333} +12/29/2021 07:04:05 - INFO - codeparrot_training - Step 48086: {'lr': 1.8183264145364697e-06, 'samples': 24620544, 'steps': 48086, 'batch_loss/train': 0.7327298955060542} +12/29/2021 07:04:15 - INFO - codeparrot_training - Step 48087: {'lr': 1.8164291868790206e-06, 'samples': 24621056, 'steps': 48087, 'batch_loss/train': 0.6201968500972725} +12/29/2021 07:04:26 - INFO - codeparrot_training - Step 48088: {'lr': 1.8145329459056948e-06, 'samples': 24621568, 'steps': 48088, 'batch_loss/train': 0.7860184612218291} +12/29/2021 07:04:38 - INFO - codeparrot_training - Step 48089: {'lr': 1.8126376916240694e-06, 'samples': 24622080, 'steps': 48089, 'batch_loss/train': 0.7368827832397074} +12/29/2021 07:04:49 - INFO - codeparrot_training - Step 48090: {'lr': 1.8107434240416942e-06, 'samples': 24622592, 'steps': 48090, 'batch_loss/train': 0.7567741395905614} +12/29/2021 07:04:59 - INFO - codeparrot_training - Step 48091: {'lr': 1.808850143166091e-06, 'samples': 24623104, 'steps': 48091, 'batch_loss/train': 0.7656571017578244} +12/29/2021 07:05:14 - INFO - codeparrot_training - Step 48092: {'lr': 1.8069578490047812e-06, 'samples': 24623616, 'steps': 48092, 'batch_loss/train': 0.6644940879195929} +12/29/2021 07:05:24 - INFO - codeparrot_training - Step 48093: {'lr': 1.8050665415653144e-06, 'samples': 24624128, 'steps': 48093, 'batch_loss/train': 0.6905902717262506} +12/29/2021 07:05:35 - INFO - codeparrot_training - Step 48094: {'lr': 1.8031762208551571e-06, 'samples': 24624640, 'steps': 48094, 'batch_loss/train': 0.6945527605712414} +12/29/2021 07:05:47 - INFO - codeparrot_training - Step 48095: {'lr': 1.8012868868818865e-06, 'samples': 24625152, 'steps': 48095, 'batch_loss/train': 0.6087212348356843} +12/29/2021 07:05:58 - INFO - codeparrot_training - Step 48096: {'lr': 1.7993985396529688e-06, 'samples': 24625664, 'steps': 48096, 'batch_loss/train': 0.7627649027854204} +12/29/2021 07:06:09 - INFO - codeparrot_training - Step 48097: {'lr': 1.7975111791759257e-06, 'samples': 24626176, 'steps': 48097, 'batch_loss/train': 0.8179545197635889} +12/29/2021 07:06:19 - INFO - codeparrot_training - Step 48098: {'lr': 1.7956248054582513e-06, 'samples': 24626688, 'steps': 48098, 'batch_loss/train': 0.7152895163744688} +12/29/2021 07:06:32 - INFO - codeparrot_training - Step 48099: {'lr': 1.7937394185074673e-06, 'samples': 24627200, 'steps': 48099, 'batch_loss/train': 0.7016514586284757} +12/29/2021 07:06:43 - INFO - codeparrot_training - Step 48100: {'lr': 1.7918550183310677e-06, 'samples': 24627712, 'steps': 48100, 'batch_loss/train': 0.7437138739041984} +12/29/2021 07:06:53 - INFO - codeparrot_training - Step 48101: {'lr': 1.7899716049364912e-06, 'samples': 24628224, 'steps': 48101, 'batch_loss/train': 0.7979004764929414} +12/29/2021 07:07:08 - INFO - codeparrot_training - Step 48102: {'lr': 1.7880891783313146e-06, 'samples': 24628736, 'steps': 48102, 'batch_loss/train': 0.7488568918779492} +12/29/2021 07:07:18 - INFO - codeparrot_training - Step 48103: {'lr': 1.786207738522949e-06, 'samples': 24629248, 'steps': 48103, 'batch_loss/train': 0.6508500841446221} +12/29/2021 07:07:29 - INFO - codeparrot_training - Step 48104: {'lr': 1.7843272855189163e-06, 'samples': 24629760, 'steps': 48104, 'batch_loss/train': 0.8456008424982429} +12/29/2021 07:07:41 - INFO - codeparrot_training - Step 48105: {'lr': 1.7824478193266547e-06, 'samples': 24630272, 'steps': 48105, 'batch_loss/train': 0.734898654744029} +12/29/2021 07:07:52 - INFO - codeparrot_training - Step 48106: {'lr': 1.7805693399536583e-06, 'samples': 24630784, 'steps': 48106, 'batch_loss/train': 0.6994468932971358} +12/29/2021 07:08:03 - INFO - codeparrot_training - Step 48107: {'lr': 1.7786918474073932e-06, 'samples': 24631296, 'steps': 48107, 'batch_loss/train': 0.7011915906332433} +12/29/2021 07:08:13 - INFO - codeparrot_training - Step 48108: {'lr': 1.7768153416953536e-06, 'samples': 24631808, 'steps': 48108, 'batch_loss/train': 0.7531995642930269} +12/29/2021 07:08:27 - INFO - codeparrot_training - Step 48109: {'lr': 1.7749398228249503e-06, 'samples': 24632320, 'steps': 48109, 'batch_loss/train': 0.7528270841576159} +12/29/2021 07:08:38 - INFO - codeparrot_training - Step 48110: {'lr': 1.7730652908036493e-06, 'samples': 24632832, 'steps': 48110, 'batch_loss/train': 0.6991562554612756} +12/29/2021 07:08:48 - INFO - codeparrot_training - Step 48111: {'lr': 1.7711917456389448e-06, 'samples': 24633344, 'steps': 48111, 'batch_loss/train': 0.7362428614869714} +12/29/2021 07:09:01 - INFO - codeparrot_training - Step 48112: {'lr': 1.7693191873382198e-06, 'samples': 24633856, 'steps': 48112, 'batch_loss/train': 0.7232697084546089} +12/29/2021 07:09:12 - INFO - codeparrot_training - Step 48113: {'lr': 1.767447615908968e-06, 'samples': 24634368, 'steps': 48113, 'batch_loss/train': 0.586669534444809} +12/29/2021 07:09:22 - INFO - codeparrot_training - Step 48114: {'lr': 1.7655770313586283e-06, 'samples': 24634880, 'steps': 48114, 'batch_loss/train': 0.7345542582916096} +12/29/2021 07:09:34 - INFO - codeparrot_training - Step 48115: {'lr': 1.7637074336945834e-06, 'samples': 24635392, 'steps': 48115, 'batch_loss/train': 0.7531706746667624} +12/29/2021 07:09:45 - INFO - codeparrot_training - Step 48116: {'lr': 1.7618388229243554e-06, 'samples': 24635904, 'steps': 48116, 'batch_loss/train': 0.7217569686472416} +12/29/2021 07:09:55 - INFO - codeparrot_training - Step 48117: {'lr': 1.759971199055299e-06, 'samples': 24636416, 'steps': 48117, 'batch_loss/train': 0.6955771539360285} +12/29/2021 07:10:06 - INFO - codeparrot_training - Step 48118: {'lr': 1.758104562094881e-06, 'samples': 24636928, 'steps': 48118, 'batch_loss/train': 0.7765219230204821} +12/29/2021 07:10:18 - INFO - codeparrot_training - Step 48119: {'lr': 1.756238912050484e-06, 'samples': 24637440, 'steps': 48119, 'batch_loss/train': 0.8070937562733889} +12/29/2021 07:10:29 - INFO - codeparrot_training - Step 48120: {'lr': 1.7543742489295745e-06, 'samples': 24637952, 'steps': 48120, 'batch_loss/train': 0.6995236072689295} +12/29/2021 07:10:40 - INFO - codeparrot_training - Step 48121: {'lr': 1.7525105727395074e-06, 'samples': 24638464, 'steps': 48121, 'batch_loss/train': 0.7725237607955933} +12/29/2021 07:10:53 - INFO - codeparrot_training - Step 48122: {'lr': 1.7506478834877493e-06, 'samples': 24638976, 'steps': 48122, 'batch_loss/train': 0.730504245031625} +12/29/2021 07:11:04 - INFO - codeparrot_training - Step 48123: {'lr': 1.7487861811816552e-06, 'samples': 24639488, 'steps': 48123, 'batch_loss/train': 0.7343668836401775} +12/29/2021 07:11:15 - INFO - codeparrot_training - Step 48124: {'lr': 1.7469254658286914e-06, 'samples': 24640000, 'steps': 48124, 'batch_loss/train': 0.7464088359847665} +12/29/2021 07:11:27 - INFO - codeparrot_training - Step 48125: {'lr': 1.7450657374361855e-06, 'samples': 24640512, 'steps': 48125, 'batch_loss/train': 0.6406079921871424} +12/29/2021 07:11:38 - INFO - codeparrot_training - Step 48126: {'lr': 1.743206996011576e-06, 'samples': 24641024, 'steps': 48126, 'batch_loss/train': 0.7150219837203622} +12/29/2021 07:11:48 - INFO - codeparrot_training - Step 48127: {'lr': 1.7413492415622179e-06, 'samples': 24641536, 'steps': 48127, 'batch_loss/train': 0.6654278634232469} +12/29/2021 07:11:59 - INFO - codeparrot_training - Step 48128: {'lr': 1.7394924740955498e-06, 'samples': 24642048, 'steps': 48128, 'batch_loss/train': 0.6904932525940239} +12/29/2021 07:12:11 - INFO - codeparrot_training - Step 48129: {'lr': 1.7376366936188714e-06, 'samples': 24642560, 'steps': 48129, 'batch_loss/train': 0.7809798894450068} +12/29/2021 07:12:22 - INFO - codeparrot_training - Step 48130: {'lr': 1.7357819001396213e-06, 'samples': 24643072, 'steps': 48130, 'batch_loss/train': 0.6804253533482552} +12/29/2021 07:12:33 - INFO - codeparrot_training - Step 48131: {'lr': 1.73392809366521e-06, 'samples': 24643584, 'steps': 48131, 'batch_loss/train': 0.6500578029081225} +12/29/2021 07:12:47 - INFO - codeparrot_training - Step 48132: {'lr': 1.73207527420291e-06, 'samples': 24644096, 'steps': 48132, 'batch_loss/train': 0.8001793790608644} +12/29/2021 07:12:57 - INFO - codeparrot_training - Step 48133: {'lr': 1.7302234417601593e-06, 'samples': 24644608, 'steps': 48133, 'batch_loss/train': 0.71995262010023} +12/29/2021 07:13:08 - INFO - codeparrot_training - Step 48134: {'lr': 1.7283725963442854e-06, 'samples': 24645120, 'steps': 48134, 'batch_loss/train': 0.7882889942266047} +12/29/2021 07:13:20 - INFO - codeparrot_training - Step 48135: {'lr': 1.7265227379626713e-06, 'samples': 24645632, 'steps': 48135, 'batch_loss/train': 0.7124798013828695} +12/29/2021 07:13:30 - INFO - codeparrot_training - Step 48136: {'lr': 1.7246738666226446e-06, 'samples': 24646144, 'steps': 48136, 'batch_loss/train': 0.7809601151384413} +12/29/2021 07:13:41 - INFO - codeparrot_training - Step 48137: {'lr': 1.7228259823315885e-06, 'samples': 24646656, 'steps': 48137, 'batch_loss/train': 0.7051076762145385} +12/29/2021 07:13:52 - INFO - codeparrot_training - Step 48138: {'lr': 1.720979085096802e-06, 'samples': 24647168, 'steps': 48138, 'batch_loss/train': 0.6286134077236056} +12/29/2021 07:14:06 - INFO - codeparrot_training - Step 48139: {'lr': 1.719133174925669e-06, 'samples': 24647680, 'steps': 48139, 'batch_loss/train': 0.7242271043360233} +12/29/2021 07:14:16 - INFO - codeparrot_training - Step 48140: {'lr': 1.717288251825516e-06, 'samples': 24648192, 'steps': 48140, 'batch_loss/train': 0.7374797107186168} +12/29/2021 07:14:27 - INFO - codeparrot_training - Step 48141: {'lr': 1.7154443158036715e-06, 'samples': 24648704, 'steps': 48141, 'batch_loss/train': 0.6240693160798401} +12/29/2021 07:14:39 - INFO - codeparrot_training - Step 48142: {'lr': 1.71360136686749e-06, 'samples': 24649216, 'steps': 48142, 'batch_loss/train': 0.6566130956634879} +12/29/2021 07:14:50 - INFO - codeparrot_training - Step 48143: {'lr': 1.7117594050242714e-06, 'samples': 24649728, 'steps': 48143, 'batch_loss/train': 0.7275932729244232} +12/29/2021 07:15:00 - INFO - codeparrot_training - Step 48144: {'lr': 1.7099184302813431e-06, 'samples': 24650240, 'steps': 48144, 'batch_loss/train': 0.7329357411945239} +12/29/2021 07:15:12 - INFO - codeparrot_training - Step 48145: {'lr': 1.708078442646005e-06, 'samples': 24650752, 'steps': 48145, 'batch_loss/train': 0.7914088771212846} +12/29/2021 07:15:23 - INFO - codeparrot_training - Step 48146: {'lr': 1.70623944212564e-06, 'samples': 24651264, 'steps': 48146, 'batch_loss/train': 0.7520077931694686} +12/29/2021 07:15:34 - INFO - codeparrot_training - Step 48147: {'lr': 1.7044014287274923e-06, 'samples': 24651776, 'steps': 48147, 'batch_loss/train': 0.7054177051177248} +12/29/2021 07:15:44 - INFO - codeparrot_training - Step 48148: {'lr': 1.7025644024588893e-06, 'samples': 24652288, 'steps': 48148, 'batch_loss/train': 0.5888504860922694} +12/29/2021 07:15:58 - INFO - codeparrot_training - Step 48149: {'lr': 1.7007283633271308e-06, 'samples': 24652800, 'steps': 48149, 'batch_loss/train': 0.7287151651107706} +12/29/2021 07:16:09 - INFO - codeparrot_training - Step 48150: {'lr': 1.6988933113395443e-06, 'samples': 24653312, 'steps': 48150, 'batch_loss/train': 0.820059600751847} +12/29/2021 07:16:19 - INFO - codeparrot_training - Step 48151: {'lr': 1.6970592465034018e-06, 'samples': 24653824, 'steps': 48151, 'batch_loss/train': 0.7766981655731797} +12/29/2021 07:16:32 - INFO - codeparrot_training - Step 48152: {'lr': 1.6952261688259752e-06, 'samples': 24654336, 'steps': 48152, 'batch_loss/train': 0.6850547580979764} +12/29/2021 07:16:42 - INFO - codeparrot_training - Step 48153: {'lr': 1.693394078314564e-06, 'samples': 24654848, 'steps': 48153, 'batch_loss/train': 0.6093862220877782} +12/29/2021 07:16:53 - INFO - codeparrot_training - Step 48154: {'lr': 1.691562974976496e-06, 'samples': 24655360, 'steps': 48154, 'batch_loss/train': 0.8791398564353585} +12/29/2021 07:17:05 - INFO - codeparrot_training - Step 48155: {'lr': 1.6897328588190153e-06, 'samples': 24655872, 'steps': 48155, 'batch_loss/train': 0.7502018045634031} +12/29/2021 07:17:16 - INFO - codeparrot_training - Step 48156: {'lr': 1.687903729849366e-06, 'samples': 24656384, 'steps': 48156, 'batch_loss/train': 0.6492606499232352} +12/29/2021 07:17:26 - INFO - codeparrot_training - Step 48157: {'lr': 1.6860755880748758e-06, 'samples': 24656896, 'steps': 48157, 'batch_loss/train': 0.8497391319833696} +12/29/2021 07:17:37 - INFO - codeparrot_training - Step 48158: {'lr': 1.6842484335027885e-06, 'samples': 24657408, 'steps': 48158, 'batch_loss/train': 0.70508743962273} +12/29/2021 07:17:49 - INFO - codeparrot_training - Step 48159: {'lr': 1.6824222661403765e-06, 'samples': 24657920, 'steps': 48159, 'batch_loss/train': 0.7412471234565601} +12/29/2021 07:18:00 - INFO - codeparrot_training - Step 48160: {'lr': 1.680597085994856e-06, 'samples': 24658432, 'steps': 48160, 'batch_loss/train': 0.7554279399919324} +12/29/2021 07:18:10 - INFO - codeparrot_training - Step 48161: {'lr': 1.6787728930735546e-06, 'samples': 24658944, 'steps': 48161, 'batch_loss/train': 0.6838875566609204} +12/29/2021 07:18:25 - INFO - codeparrot_training - Step 48162: {'lr': 1.6769496873836887e-06, 'samples': 24659456, 'steps': 48162, 'batch_loss/train': 0.7663269150070846} +12/29/2021 07:18:35 - INFO - codeparrot_training - Step 48163: {'lr': 1.6751274689325023e-06, 'samples': 24659968, 'steps': 48163, 'batch_loss/train': 0.8021709127351642} +12/29/2021 07:18:46 - INFO - codeparrot_training - Step 48164: {'lr': 1.67330623772724e-06, 'samples': 24660480, 'steps': 48164, 'batch_loss/train': 0.6666919912677258} +12/29/2021 07:18:58 - INFO - codeparrot_training - Step 48165: {'lr': 1.6714859937751458e-06, 'samples': 24660992, 'steps': 48165, 'batch_loss/train': 0.8366995179094374} +12/29/2021 07:19:09 - INFO - codeparrot_training - Step 48166: {'lr': 1.6696667370834639e-06, 'samples': 24661504, 'steps': 48166, 'batch_loss/train': 0.6596614615991712} +12/29/2021 07:19:19 - INFO - codeparrot_training - Step 48167: {'lr': 1.6678484676594108e-06, 'samples': 24662016, 'steps': 48167, 'batch_loss/train': 0.7566618146374822} +12/29/2021 07:19:30 - INFO - codeparrot_training - Step 48168: {'lr': 1.6660311855102306e-06, 'samples': 24662528, 'steps': 48168, 'batch_loss/train': 0.5424173781648278} +12/29/2021 07:19:44 - INFO - codeparrot_training - Step 48169: {'lr': 1.6642148906431397e-06, 'samples': 24663040, 'steps': 48169, 'batch_loss/train': 0.7895241249352694} +12/29/2021 07:19:54 - INFO - codeparrot_training - Step 48170: {'lr': 1.6623995830653827e-06, 'samples': 24663552, 'steps': 48170, 'batch_loss/train': 0.7455487502738833} +12/29/2021 07:20:05 - INFO - codeparrot_training - Step 48171: {'lr': 1.6605852627841479e-06, 'samples': 24664064, 'steps': 48171, 'batch_loss/train': 0.7901531532406807} +12/29/2021 07:20:18 - INFO - codeparrot_training - Step 48172: {'lr': 1.6587719298066517e-06, 'samples': 24664576, 'steps': 48172, 'batch_loss/train': 0.6604746568482369} +12/29/2021 07:20:29 - INFO - codeparrot_training - Step 48173: {'lr': 1.6569595841401108e-06, 'samples': 24665088, 'steps': 48173, 'batch_loss/train': 0.6175311300903559} +12/29/2021 07:20:39 - INFO - codeparrot_training - Step 48174: {'lr': 1.6551482257917138e-06, 'samples': 24665600, 'steps': 48174, 'batch_loss/train': 0.6144176460802555} +12/29/2021 07:20:52 - INFO - codeparrot_training - Step 48175: {'lr': 1.6533378547686773e-06, 'samples': 24666112, 'steps': 48175, 'batch_loss/train': 0.7256717374548316} +12/29/2021 07:21:02 - INFO - codeparrot_training - Step 48176: {'lr': 1.6515284710782174e-06, 'samples': 24666624, 'steps': 48176, 'batch_loss/train': 0.5508596799336374} +12/29/2021 07:21:13 - INFO - codeparrot_training - Step 48177: {'lr': 1.6497200747274955e-06, 'samples': 24667136, 'steps': 48177, 'batch_loss/train': 0.6801609003450722} +12/29/2021 07:21:24 - INFO - codeparrot_training - Step 48178: {'lr': 1.6479126657236999e-06, 'samples': 24667648, 'steps': 48178, 'batch_loss/train': 0.8307559610693716} +12/29/2021 07:21:37 - INFO - codeparrot_training - Step 48179: {'lr': 1.646106244074047e-06, 'samples': 24668160, 'steps': 48179, 'batch_loss/train': 0.6932832628954202} +12/29/2021 07:21:48 - INFO - codeparrot_training - Step 48180: {'lr': 1.644300809785726e-06, 'samples': 24668672, 'steps': 48180, 'batch_loss/train': 0.7858306504786015} +12/29/2021 07:21:59 - INFO - codeparrot_training - Step 48181: {'lr': 1.6424963628658418e-06, 'samples': 24669184, 'steps': 48181, 'batch_loss/train': 0.8019935907796025} +12/29/2021 07:22:11 - INFO - codeparrot_training - Step 48182: {'lr': 1.640692903321639e-06, 'samples': 24669696, 'steps': 48182, 'batch_loss/train': 0.7642236454412341} +12/29/2021 07:22:22 - INFO - codeparrot_training - Step 48183: {'lr': 1.6388904311602781e-06, 'samples': 24670208, 'steps': 48183, 'batch_loss/train': 0.7852072897367179} +12/29/2021 07:22:32 - INFO - codeparrot_training - Step 48184: {'lr': 1.6370889463888926e-06, 'samples': 24670720, 'steps': 48184, 'batch_loss/train': 0.7285976447165012} +12/29/2021 07:22:44 - INFO - codeparrot_training - Step 48185: {'lr': 1.6352884490146437e-06, 'samples': 24671232, 'steps': 48185, 'batch_loss/train': 0.629280521592591} +12/29/2021 07:22:55 - INFO - codeparrot_training - Step 48186: {'lr': 1.6334889390447472e-06, 'samples': 24671744, 'steps': 48186, 'batch_loss/train': 0.6870396775193512} +12/29/2021 07:23:06 - INFO - codeparrot_training - Step 48187: {'lr': 1.6316904164863089e-06, 'samples': 24672256, 'steps': 48187, 'batch_loss/train': 0.6488667484372854} +12/29/2021 07:23:16 - INFO - codeparrot_training - Step 48188: {'lr': 1.629892881346462e-06, 'samples': 24672768, 'steps': 48188, 'batch_loss/train': 0.7234355872496963} +12/29/2021 07:23:30 - INFO - codeparrot_training - Step 48189: {'lr': 1.628096333632395e-06, 'samples': 24673280, 'steps': 48189, 'batch_loss/train': 0.6827297499403358} +12/29/2021 07:23:41 - INFO - codeparrot_training - Step 48190: {'lr': 1.6263007733512414e-06, 'samples': 24673792, 'steps': 48190, 'batch_loss/train': 0.7489448417909443} +12/29/2021 07:23:51 - INFO - codeparrot_training - Step 48191: {'lr': 1.6245062005101342e-06, 'samples': 24674304, 'steps': 48191, 'batch_loss/train': 0.7726914072409272} +12/29/2021 07:24:03 - INFO - codeparrot_training - Step 48192: {'lr': 1.6227126151162342e-06, 'samples': 24674816, 'steps': 48192, 'batch_loss/train': 0.8047918062657118} +12/29/2021 07:24:14 - INFO - codeparrot_training - Step 48193: {'lr': 1.6209200171765914e-06, 'samples': 24675328, 'steps': 48193, 'batch_loss/train': 0.6873145885765553} +12/29/2021 07:24:25 - INFO - codeparrot_training - Step 48194: {'lr': 1.6191284066984225e-06, 'samples': 24675840, 'steps': 48194, 'batch_loss/train': 0.7272727412637323} +12/29/2021 07:24:38 - INFO - codeparrot_training - Step 48195: {'lr': 1.6173377836888048e-06, 'samples': 24676352, 'steps': 48195, 'batch_loss/train': 0.8587202739436179} +12/29/2021 07:24:48 - INFO - codeparrot_training - Step 48196: {'lr': 1.6155481481548717e-06, 'samples': 24676864, 'steps': 48196, 'batch_loss/train': 0.7729220651090145} +12/29/2021 07:24:59 - INFO - codeparrot_training - Step 48197: {'lr': 1.6137595001037287e-06, 'samples': 24677376, 'steps': 48197, 'batch_loss/train': 0.6919299056753516} +12/29/2021 07:25:10 - INFO - codeparrot_training - Step 48198: {'lr': 1.611971839542481e-06, 'samples': 24677888, 'steps': 48198, 'batch_loss/train': 0.7874929991085082} +12/29/2021 07:25:22 - INFO - codeparrot_training - Step 48199: {'lr': 1.610185166478262e-06, 'samples': 24678400, 'steps': 48199, 'batch_loss/train': 0.9447293234989047} +12/29/2021 07:25:32 - INFO - codeparrot_training - Step 48200: {'lr': 1.608399480918149e-06, 'samples': 24678912, 'steps': 48200, 'batch_loss/train': 0.7097507989965379} +12/29/2021 07:25:43 - INFO - codeparrot_training - Step 48201: {'lr': 1.606614782869248e-06, 'samples': 24679424, 'steps': 48201, 'batch_loss/train': 0.7684809304773808} +12/29/2021 07:25:57 - INFO - codeparrot_training - Step 48202: {'lr': 1.6048310723386638e-06, 'samples': 24679936, 'steps': 48202, 'batch_loss/train': 0.658043249277398} +12/29/2021 07:26:08 - INFO - codeparrot_training - Step 48203: {'lr': 1.6030483493335024e-06, 'samples': 24680448, 'steps': 48203, 'batch_loss/train': 0.9065038897097111} +12/29/2021 07:26:18 - INFO - codeparrot_training - Step 48204: {'lr': 1.6012666138607857e-06, 'samples': 24680960, 'steps': 48204, 'batch_loss/train': 0.6839203235576861} +12/29/2021 07:26:30 - INFO - codeparrot_training - Step 48205: {'lr': 1.5994858659276467e-06, 'samples': 24681472, 'steps': 48205, 'batch_loss/train': 0.8184057120233774} +12/29/2021 07:26:41 - INFO - codeparrot_training - Step 48206: {'lr': 1.5977061055411911e-06, 'samples': 24681984, 'steps': 48206, 'batch_loss/train': 0.7237437216099352} +12/29/2021 07:26:52 - INFO - codeparrot_training - Step 48207: {'lr': 1.595927332708441e-06, 'samples': 24682496, 'steps': 48207, 'batch_loss/train': 0.6192921584006399} +12/29/2021 07:27:02 - INFO - codeparrot_training - Step 48208: {'lr': 1.5941495474364741e-06, 'samples': 24683008, 'steps': 48208, 'batch_loss/train': 0.7777470396831632} +12/29/2021 07:27:16 - INFO - codeparrot_training - Step 48209: {'lr': 1.5923727497324236e-06, 'samples': 24683520, 'steps': 48209, 'batch_loss/train': 0.7746319742873311} +12/29/2021 07:27:27 - INFO - codeparrot_training - Step 48210: {'lr': 1.5905969396032838e-06, 'samples': 24684032, 'steps': 48210, 'batch_loss/train': 0.8070311993360519} +12/29/2021 07:27:37 - INFO - codeparrot_training - Step 48211: {'lr': 1.5888221170561045e-06, 'samples': 24684544, 'steps': 48211, 'batch_loss/train': 0.771657676436007} +12/29/2021 07:27:50 - INFO - codeparrot_training - Step 48212: {'lr': 1.5870482820979914e-06, 'samples': 24685056, 'steps': 48212, 'batch_loss/train': 0.7567898379638791} +12/29/2021 07:28:00 - INFO - codeparrot_training - Step 48213: {'lr': 1.5852754347359943e-06, 'samples': 24685568, 'steps': 48213, 'batch_loss/train': 0.5925871810177341} +12/29/2021 07:28:11 - INFO - codeparrot_training - Step 48214: {'lr': 1.5835035749771353e-06, 'samples': 24686080, 'steps': 48214, 'batch_loss/train': 0.7962235789746046} +12/29/2021 07:28:23 - INFO - codeparrot_training - Step 48215: {'lr': 1.5817327028284366e-06, 'samples': 24686592, 'steps': 48215, 'batch_loss/train': 0.7711011897772551} +12/29/2021 07:28:34 - INFO - codeparrot_training - Step 48216: {'lr': 1.5799628182970038e-06, 'samples': 24687104, 'steps': 48216, 'batch_loss/train': 0.7158457040786743} +12/29/2021 07:28:44 - INFO - codeparrot_training - Step 48217: {'lr': 1.578193921389831e-06, 'samples': 24687616, 'steps': 48217, 'batch_loss/train': 0.7201679656282067} +12/29/2021 07:28:55 - INFO - codeparrot_training - Step 48218: {'lr': 1.5764260121139685e-06, 'samples': 24688128, 'steps': 48218, 'batch_loss/train': 0.7700069062411785} +12/29/2021 07:29:09 - INFO - codeparrot_training - Step 48219: {'lr': 1.5746590904764103e-06, 'samples': 24688640, 'steps': 48219, 'batch_loss/train': 0.7412732918746769} +12/29/2021 07:29:19 - INFO - codeparrot_training - Step 48220: {'lr': 1.572893156484234e-06, 'samples': 24689152, 'steps': 48220, 'batch_loss/train': 0.7925430051982403} +12/29/2021 07:29:30 - INFO - codeparrot_training - Step 48221: {'lr': 1.571128210144407e-06, 'samples': 24689664, 'steps': 48221, 'batch_loss/train': 0.8810338666662574} +12/29/2021 07:29:42 - INFO - codeparrot_training - Step 48222: {'lr': 1.5693642514639784e-06, 'samples': 24690176, 'steps': 48222, 'batch_loss/train': 0.8060699691995978} +12/29/2021 07:29:53 - INFO - codeparrot_training - Step 48223: {'lr': 1.5676012804499706e-06, 'samples': 24690688, 'steps': 48223, 'batch_loss/train': 1.0453801434487104} +12/29/2021 07:30:04 - INFO - codeparrot_training - Step 48224: {'lr': 1.5658392971093505e-06, 'samples': 24691200, 'steps': 48224, 'batch_loss/train': 0.7890575733035803} +12/29/2021 07:30:18 - INFO - codeparrot_training - Step 48225: {'lr': 1.5640783014491677e-06, 'samples': 24691712, 'steps': 48225, 'batch_loss/train': 0.7574287499301136} +12/29/2021 07:30:28 - INFO - codeparrot_training - Step 48226: {'lr': 1.5623182934763891e-06, 'samples': 24692224, 'steps': 48226, 'batch_loss/train': 0.6642492287792265} +12/29/2021 07:30:39 - INFO - codeparrot_training - Step 48227: {'lr': 1.5605592731980366e-06, 'samples': 24692736, 'steps': 48227, 'batch_loss/train': 0.7483199732378125} +12/29/2021 07:30:50 - INFO - codeparrot_training - Step 48228: {'lr': 1.558801240621105e-06, 'samples': 24693248, 'steps': 48228, 'batch_loss/train': 0.7956437724642456} +12/29/2021 07:31:02 - INFO - codeparrot_training - Step 48229: {'lr': 1.5570441957525606e-06, 'samples': 24693760, 'steps': 48229, 'batch_loss/train': 0.7218323717825115} +12/29/2021 07:31:13 - INFO - codeparrot_training - Step 48230: {'lr': 1.55528813859937e-06, 'samples': 24694272, 'steps': 48230, 'batch_loss/train': 0.9711312246508896} +12/29/2021 07:31:23 - INFO - codeparrot_training - Step 48231: {'lr': 1.5535330691685833e-06, 'samples': 24694784, 'steps': 48231, 'batch_loss/train': 0.7441495088860393} +12/29/2021 07:31:35 - INFO - codeparrot_training - Step 48232: {'lr': 1.5517789874671395e-06, 'samples': 24695296, 'steps': 48232, 'batch_loss/train': 0.7370923552662134} +12/29/2021 07:31:46 - INFO - codeparrot_training - Step 48233: {'lr': 1.550025893502005e-06, 'samples': 24695808, 'steps': 48233, 'batch_loss/train': 0.7016873233951628} +12/29/2021 07:31:56 - INFO - codeparrot_training - Step 48234: {'lr': 1.5482737872801466e-06, 'samples': 24696320, 'steps': 48234, 'batch_loss/train': 0.7903619729913771} +12/29/2021 07:32:08 - INFO - codeparrot_training - Step 48235: {'lr': 1.546522668808531e-06, 'samples': 24696832, 'steps': 48235, 'batch_loss/train': 0.7791981835616753} +12/29/2021 07:32:19 - INFO - codeparrot_training - Step 48236: {'lr': 1.5447725380941525e-06, 'samples': 24697344, 'steps': 48236, 'batch_loss/train': 0.730864513432607} +12/29/2021 07:32:30 - INFO - codeparrot_training - Step 48237: {'lr': 1.5430233951439498e-06, 'samples': 24697856, 'steps': 48237, 'batch_loss/train': 0.7760451000649482} +12/29/2021 07:32:40 - INFO - codeparrot_training - Step 48238: {'lr': 1.5412752399648345e-06, 'samples': 24698368, 'steps': 48238, 'batch_loss/train': 0.7368810987100005} +12/29/2021 07:32:54 - INFO - codeparrot_training - Step 48239: {'lr': 1.5395280725638283e-06, 'samples': 24698880, 'steps': 48239, 'batch_loss/train': 0.9168851473368704} +12/29/2021 07:33:05 - INFO - codeparrot_training - Step 48240: {'lr': 1.5377818929478427e-06, 'samples': 24699392, 'steps': 48240, 'batch_loss/train': 0.6702624009922147} +12/29/2021 07:33:16 - INFO - codeparrot_training - Step 48241: {'lr': 1.5360367011237886e-06, 'samples': 24699904, 'steps': 48241, 'batch_loss/train': 0.8245811541564763} +12/29/2021 07:33:28 - INFO - codeparrot_training - Step 48242: {'lr': 1.5342924970986606e-06, 'samples': 24700416, 'steps': 48242, 'batch_loss/train': 0.8021484054625034} +12/29/2021 07:33:39 - INFO - codeparrot_training - Step 48243: {'lr': 1.5325492808793695e-06, 'samples': 24700928, 'steps': 48243, 'batch_loss/train': 0.8325053631269839} +12/29/2021 07:33:49 - INFO - codeparrot_training - Step 48244: {'lr': 1.5308070524728546e-06, 'samples': 24701440, 'steps': 48244, 'batch_loss/train': 0.8899757429026067} +12/29/2021 07:34:01 - INFO - codeparrot_training - Step 48245: {'lr': 1.5290658118860268e-06, 'samples': 24701952, 'steps': 48245, 'batch_loss/train': 0.7626663232222199} +12/29/2021 07:34:12 - INFO - codeparrot_training - Step 48246: {'lr': 1.5273255591257974e-06, 'samples': 24702464, 'steps': 48246, 'batch_loss/train': 0.753651668317616} +12/29/2021 07:34:23 - INFO - codeparrot_training - Step 48247: {'lr': 1.5255862941991327e-06, 'samples': 24702976, 'steps': 48247, 'batch_loss/train': 0.7901764786802232} +12/29/2021 07:34:33 - INFO - codeparrot_training - Step 48248: {'lr': 1.5238480171128889e-06, 'samples': 24703488, 'steps': 48248, 'batch_loss/train': 0.7014881114009768} +12/29/2021 07:34:47 - INFO - codeparrot_training - Step 48249: {'lr': 1.5221107278740042e-06, 'samples': 24704000, 'steps': 48249, 'batch_loss/train': 0.8044718410819769} +12/29/2021 07:34:58 - INFO - codeparrot_training - Step 48250: {'lr': 1.5203744264893903e-06, 'samples': 24704512, 'steps': 48250, 'batch_loss/train': 1.0524268511217088} +12/29/2021 07:35:08 - INFO - codeparrot_training - Step 48251: {'lr': 1.5186391129659582e-06, 'samples': 24705024, 'steps': 48251, 'batch_loss/train': 0.7124572098255157} +12/29/2021 07:35:20 - INFO - codeparrot_training - Step 48252: {'lr': 1.5169047873105635e-06, 'samples': 24705536, 'steps': 48252, 'batch_loss/train': 0.7833931567147374} +12/29/2021 07:35:31 - INFO - codeparrot_training - Step 48253: {'lr': 1.5151714495301727e-06, 'samples': 24706048, 'steps': 48253, 'batch_loss/train': 0.6667253852356225} +12/29/2021 07:35:42 - INFO - codeparrot_training - Step 48254: {'lr': 1.5134390996316139e-06, 'samples': 24706560, 'steps': 48254, 'batch_loss/train': 0.7051604609005153} +12/29/2021 07:35:56 - INFO - codeparrot_training - Step 48255: {'lr': 1.511707737621798e-06, 'samples': 24707072, 'steps': 48255, 'batch_loss/train': 0.7479234719648957} +12/29/2021 07:36:06 - INFO - codeparrot_training - Step 48256: {'lr': 1.5099773635076085e-06, 'samples': 24707584, 'steps': 48256, 'batch_loss/train': 0.6333778141997755} +12/29/2021 07:36:17 - INFO - codeparrot_training - Step 48257: {'lr': 1.5082479772959013e-06, 'samples': 24708096, 'steps': 48257, 'batch_loss/train': 0.7995695890858769} +12/29/2021 07:36:27 - INFO - codeparrot_training - Step 48258: {'lr': 1.5065195789936147e-06, 'samples': 24708608, 'steps': 48258, 'batch_loss/train': 0.7685348689556122} +12/29/2021 07:36:40 - INFO - codeparrot_training - Step 48259: {'lr': 1.5047921686075494e-06, 'samples': 24709120, 'steps': 48259, 'batch_loss/train': 0.7430287110619247} +12/29/2021 07:36:50 - INFO - codeparrot_training - Step 48260: {'lr': 1.5030657461446162e-06, 'samples': 24709632, 'steps': 48260, 'batch_loss/train': 0.6216779361711815} +12/29/2021 07:37:01 - INFO - codeparrot_training - Step 48261: {'lr': 1.501340311611643e-06, 'samples': 24710144, 'steps': 48261, 'batch_loss/train': 0.7372178230434656} +12/29/2021 07:37:14 - INFO - codeparrot_training - Step 48262: {'lr': 1.4996158650155412e-06, 'samples': 24710656, 'steps': 48262, 'batch_loss/train': 0.7693818537518382} +12/29/2021 07:37:24 - INFO - codeparrot_training - Step 48263: {'lr': 1.4978924063631106e-06, 'samples': 24711168, 'steps': 48263, 'batch_loss/train': 0.9723462555557489} +12/29/2021 07:37:35 - INFO - codeparrot_training - Step 48264: {'lr': 1.4961699356612624e-06, 'samples': 24711680, 'steps': 48264, 'batch_loss/train': 0.9777362337335944} +12/29/2021 07:37:49 - INFO - codeparrot_training - Step 48265: {'lr': 1.494448452916769e-06, 'samples': 24712192, 'steps': 48265, 'batch_loss/train': 0.7579189105890691} +12/29/2021 07:37:59 - INFO - codeparrot_training - Step 48266: {'lr': 1.4927279581365416e-06, 'samples': 24712704, 'steps': 48266, 'batch_loss/train': 0.7901882063597441} +12/29/2021 07:38:10 - INFO - codeparrot_training - Step 48267: {'lr': 1.4910084513273802e-06, 'samples': 24713216, 'steps': 48267, 'batch_loss/train': 0.7047493904829025} +12/29/2021 07:38:21 - INFO - codeparrot_training - Step 48268: {'lr': 1.4892899324961684e-06, 'samples': 24713728, 'steps': 48268, 'batch_loss/train': 0.7168518714606762} +12/29/2021 07:38:33 - INFO - codeparrot_training - Step 48269: {'lr': 1.4875724016496784e-06, 'samples': 24714240, 'steps': 48269, 'batch_loss/train': 0.7050167159177363} +12/29/2021 07:38:43 - INFO - codeparrot_training - Step 48270: {'lr': 1.4858558587947657e-06, 'samples': 24714752, 'steps': 48270, 'batch_loss/train': 0.6576751479879022} +12/29/2021 07:38:54 - INFO - codeparrot_training - Step 48271: {'lr': 1.4841403039382585e-06, 'samples': 24715264, 'steps': 48271, 'batch_loss/train': 0.7278319969773293} +12/29/2021 07:39:06 - INFO - codeparrot_training - Step 48272: {'lr': 1.4824257370869842e-06, 'samples': 24715776, 'steps': 48272, 'batch_loss/train': 0.7794435303658247} +12/29/2021 07:39:17 - INFO - codeparrot_training - Step 48273: {'lr': 1.4807121582477435e-06, 'samples': 24716288, 'steps': 48273, 'batch_loss/train': 0.7716294508427382} +12/29/2021 07:39:27 - INFO - codeparrot_training - Step 48274: {'lr': 1.478999567427336e-06, 'samples': 24716800, 'steps': 48274, 'batch_loss/train': 0.7118595764040947} +12/29/2021 07:39:39 - INFO - codeparrot_training - Step 48275: {'lr': 1.4772879646325898e-06, 'samples': 24717312, 'steps': 48275, 'batch_loss/train': 0.6764001687988639} +12/29/2021 07:39:50 - INFO - codeparrot_training - Step 48276: {'lr': 1.475577349870333e-06, 'samples': 24717824, 'steps': 48276, 'batch_loss/train': 0.7823693761602044} +12/29/2021 07:40:01 - INFO - codeparrot_training - Step 48277: {'lr': 1.4738677231473375e-06, 'samples': 24718336, 'steps': 48277, 'batch_loss/train': 0.7777043264359236} +12/29/2021 07:40:11 - INFO - codeparrot_training - Step 48278: {'lr': 1.4721590844704036e-06, 'samples': 24718848, 'steps': 48278, 'batch_loss/train': 0.6659440686926246} +12/29/2021 07:40:25 - INFO - codeparrot_training - Step 48279: {'lr': 1.4704514338463037e-06, 'samples': 24719360, 'steps': 48279, 'batch_loss/train': 0.703183067496866} +12/29/2021 07:40:36 - INFO - codeparrot_training - Step 48280: {'lr': 1.4687447712818658e-06, 'samples': 24719872, 'steps': 48280, 'batch_loss/train': 0.7566351585555822} +12/29/2021 07:40:46 - INFO - codeparrot_training - Step 48281: {'lr': 1.4670390967838621e-06, 'samples': 24720384, 'steps': 48281, 'batch_loss/train': 0.788023242726922} +12/29/2021 07:40:59 - INFO - codeparrot_training - Step 48282: {'lr': 1.4653344103590372e-06, 'samples': 24720896, 'steps': 48282, 'batch_loss/train': 0.6608758866786957} +12/29/2021 07:41:09 - INFO - codeparrot_training - Step 48283: {'lr': 1.463630712014219e-06, 'samples': 24721408, 'steps': 48283, 'batch_loss/train': 0.7582284267991781} +12/29/2021 07:41:20 - INFO - codeparrot_training - Step 48284: {'lr': 1.4619280017561797e-06, 'samples': 24721920, 'steps': 48284, 'batch_loss/train': 0.8348948629572988} +12/29/2021 07:41:32 - INFO - codeparrot_training - Step 48285: {'lr': 1.4602262795916643e-06, 'samples': 24722432, 'steps': 48285, 'batch_loss/train': 0.6664507173700258} +12/29/2021 07:41:42 - INFO - codeparrot_training - Step 48286: {'lr': 1.458525545527417e-06, 'samples': 24722944, 'steps': 48286, 'batch_loss/train': 0.720650817733258} +12/29/2021 07:41:53 - INFO - codeparrot_training - Step 48287: {'lr': 1.456825799570266e-06, 'samples': 24723456, 'steps': 48287, 'batch_loss/train': 0.7933264216408134} +12/29/2021 07:42:04 - INFO - codeparrot_training - Step 48288: {'lr': 1.4551270417269002e-06, 'samples': 24723968, 'steps': 48288, 'batch_loss/train': 0.6814692134503275} +12/29/2021 07:42:17 - INFO - codeparrot_training - Step 48289: {'lr': 1.453429272004092e-06, 'samples': 24724480, 'steps': 48289, 'batch_loss/train': 0.8703194865956903} +12/29/2021 07:42:28 - INFO - codeparrot_training - Step 48290: {'lr': 1.4517324904086137e-06, 'samples': 24724992, 'steps': 48290, 'batch_loss/train': 0.7061907821334898} +12/29/2021 07:42:39 - INFO - codeparrot_training - Step 48291: {'lr': 1.45003669694721e-06, 'samples': 24725504, 'steps': 48291, 'batch_loss/train': 0.6793144010007381} +12/29/2021 07:42:51 - INFO - codeparrot_training - Step 48292: {'lr': 1.4483418916265977e-06, 'samples': 24726016, 'steps': 48292, 'batch_loss/train': 0.6976538700982928} +12/29/2021 07:43:02 - INFO - codeparrot_training - Step 48293: {'lr': 1.4466480744535492e-06, 'samples': 24726528, 'steps': 48293, 'batch_loss/train': 0.7629133488517255} +12/29/2021 07:43:12 - INFO - codeparrot_training - Step 48294: {'lr': 1.4449552454347537e-06, 'samples': 24727040, 'steps': 48294, 'batch_loss/train': 0.7626666137948632} +12/29/2021 07:43:26 - INFO - codeparrot_training - Step 48295: {'lr': 1.4432634045769832e-06, 'samples': 24727552, 'steps': 48295, 'batch_loss/train': 0.8231230525998399} +12/29/2021 07:43:37 - INFO - codeparrot_training - Step 48296: {'lr': 1.4415725518869548e-06, 'samples': 24728064, 'steps': 48296, 'batch_loss/train': 0.7420778272207826} +12/29/2021 07:43:47 - INFO - codeparrot_training - Step 48297: {'lr': 1.4398826873713578e-06, 'samples': 24728576, 'steps': 48297, 'batch_loss/train': 0.7854085825383663} +12/29/2021 07:43:58 - INFO - codeparrot_training - Step 48298: {'lr': 1.4381938110369363e-06, 'samples': 24729088, 'steps': 48298, 'batch_loss/train': 0.6781833155546337} +12/29/2021 07:44:11 - INFO - codeparrot_training - Step 48299: {'lr': 1.4365059228904076e-06, 'samples': 24729600, 'steps': 48299, 'batch_loss/train': 0.811085018562153} +12/29/2021 07:44:21 - INFO - codeparrot_training - Step 48300: {'lr': 1.4348190229384883e-06, 'samples': 24730112, 'steps': 48300, 'batch_loss/train': 0.6457379146013409} +12/29/2021 07:44:32 - INFO - codeparrot_training - Step 48301: {'lr': 1.4331331111878677e-06, 'samples': 24730624, 'steps': 48301, 'batch_loss/train': 1.1233734120614827} +12/29/2021 07:44:44 - INFO - codeparrot_training - Step 48302: {'lr': 1.4314481876452622e-06, 'samples': 24731136, 'steps': 48302, 'batch_loss/train': 0.7315197018906474} +12/29/2021 07:44:54 - INFO - codeparrot_training - Step 48303: {'lr': 1.4297642523173337e-06, 'samples': 24731648, 'steps': 48303, 'batch_loss/train': 0.6481932229362428} +12/29/2021 07:45:05 - INFO - codeparrot_training - Step 48304: {'lr': 1.4280813052108265e-06, 'samples': 24732160, 'steps': 48304, 'batch_loss/train': 0.7640979560092092} +12/29/2021 07:45:19 - INFO - codeparrot_training - Step 48305: {'lr': 1.4263993463324022e-06, 'samples': 24732672, 'steps': 48305, 'batch_loss/train': 0.6905289152637124} +12/29/2021 07:45:30 - INFO - codeparrot_training - Step 48306: {'lr': 1.424718375688777e-06, 'samples': 24733184, 'steps': 48306, 'batch_loss/train': 0.6883647856302559} +12/29/2021 07:45:40 - INFO - codeparrot_training - Step 48307: {'lr': 1.4230383932865853e-06, 'samples': 24733696, 'steps': 48307, 'batch_loss/train': 0.8516200203448534} +12/29/2021 07:45:52 - INFO - codeparrot_training - Step 48308: {'lr': 1.4213593991325713e-06, 'samples': 24734208, 'steps': 48308, 'batch_loss/train': 0.5814020487014204} +12/29/2021 07:46:03 - INFO - codeparrot_training - Step 48309: {'lr': 1.4196813932333407e-06, 'samples': 24734720, 'steps': 48309, 'batch_loss/train': 0.6679227380082011} +12/29/2021 07:46:14 - INFO - codeparrot_training - Step 48310: {'lr': 1.4180043755956106e-06, 'samples': 24735232, 'steps': 48310, 'batch_loss/train': 0.7526637623086572} +12/29/2021 07:46:24 - INFO - codeparrot_training - Step 48311: {'lr': 1.4163283462260424e-06, 'samples': 24735744, 'steps': 48311, 'batch_loss/train': 0.7322018044069409} +12/29/2021 07:46:39 - INFO - codeparrot_training - Step 48312: {'lr': 1.4146533051312693e-06, 'samples': 24736256, 'steps': 48312, 'batch_loss/train': 0.7565625687129796} +12/29/2021 07:46:49 - INFO - codeparrot_training - Step 48313: {'lr': 1.4129792523179808e-06, 'samples': 24736768, 'steps': 48313, 'batch_loss/train': 0.7929803244769573} +12/29/2021 07:47:00 - INFO - codeparrot_training - Step 48314: {'lr': 1.4113061877928102e-06, 'samples': 24737280, 'steps': 48314, 'batch_loss/train': 0.7263390182051808} +12/29/2021 07:47:12 - INFO - codeparrot_training - Step 48315: {'lr': 1.4096341115624466e-06, 'samples': 24737792, 'steps': 48315, 'batch_loss/train': 0.6773296864703298} +12/29/2021 07:47:23 - INFO - codeparrot_training - Step 48316: {'lr': 1.407963023633496e-06, 'samples': 24738304, 'steps': 48316, 'batch_loss/train': 1.4831803699489683} +12/29/2021 07:47:33 - INFO - codeparrot_training - Step 48317: {'lr': 1.4062929240126476e-06, 'samples': 24738816, 'steps': 48317, 'batch_loss/train': 0.7898713551112451} +12/29/2021 07:47:45 - INFO - codeparrot_training - Step 48318: {'lr': 1.404623812706479e-06, 'samples': 24739328, 'steps': 48318, 'batch_loss/train': 0.7682273415848613} +12/29/2021 07:47:56 - INFO - codeparrot_training - Step 48319: {'lr': 1.40295568972168e-06, 'samples': 24739840, 'steps': 48319, 'batch_loss/train': 0.7105168015696108} +12/29/2021 07:48:07 - INFO - codeparrot_training - Step 48320: {'lr': 1.4012885550648557e-06, 'samples': 24740352, 'steps': 48320, 'batch_loss/train': 0.686689589638263} +12/29/2021 07:48:17 - INFO - codeparrot_training - Step 48321: {'lr': 1.39962240874264e-06, 'samples': 24740864, 'steps': 48321, 'batch_loss/train': 0.7765085264109075} +12/29/2021 07:48:31 - INFO - codeparrot_training - Step 48322: {'lr': 1.3979572507616668e-06, 'samples': 24741376, 'steps': 48322, 'batch_loss/train': 0.7694974197074771} +12/29/2021 07:48:42 - INFO - codeparrot_training - Step 48323: {'lr': 1.3962930811285414e-06, 'samples': 24741888, 'steps': 48323, 'batch_loss/train': 0.813974445220083} +12/29/2021 07:48:53 - INFO - codeparrot_training - Step 48324: {'lr': 1.3946298998498698e-06, 'samples': 24742400, 'steps': 48324, 'batch_loss/train': 0.6812630547210574} +12/29/2021 07:49:05 - INFO - codeparrot_training - Step 48325: {'lr': 1.3929677069323133e-06, 'samples': 24742912, 'steps': 48325, 'batch_loss/train': 0.7260599206201732} +12/29/2021 07:49:15 - INFO - codeparrot_training - Step 48326: {'lr': 1.3913065023824222e-06, 'samples': 24743424, 'steps': 48326, 'batch_loss/train': 0.7108564113732427} +12/29/2021 07:49:26 - INFO - codeparrot_training - Step 48327: {'lr': 1.3896462862068304e-06, 'samples': 24743936, 'steps': 48327, 'batch_loss/train': 0.6855632485821843} +12/29/2021 07:49:40 - INFO - codeparrot_training - Step 48328: {'lr': 1.3879870584121435e-06, 'samples': 24744448, 'steps': 48328, 'batch_loss/train': 0.7783438442274928} +12/29/2021 07:49:51 - INFO - codeparrot_training - Step 48329: {'lr': 1.3863288190049395e-06, 'samples': 24744960, 'steps': 48329, 'batch_loss/train': 0.6785634178668261} +12/29/2021 07:50:01 - INFO - codeparrot_training - Step 48330: {'lr': 1.3846715679918243e-06, 'samples': 24745472, 'steps': 48330, 'batch_loss/train': 0.751599489711225} +12/29/2021 07:50:12 - INFO - codeparrot_training - Step 48331: {'lr': 1.3830153053793482e-06, 'samples': 24745984, 'steps': 48331, 'batch_loss/train': 0.681262903381139} +12/29/2021 07:50:24 - INFO - codeparrot_training - Step 48332: {'lr': 1.3813600311741726e-06, 'samples': 24746496, 'steps': 48332, 'batch_loss/train': 0.6270428323186934} +12/29/2021 07:50:35 - INFO - codeparrot_training - Step 48333: {'lr': 1.3797057453828198e-06, 'samples': 24747008, 'steps': 48333, 'batch_loss/train': 0.7414101869799197} +12/29/2021 07:50:46 - INFO - codeparrot_training - Step 48334: {'lr': 1.3780524480118683e-06, 'samples': 24747520, 'steps': 48334, 'batch_loss/train': 0.6077470146119595} +12/29/2021 07:50:58 - INFO - codeparrot_training - Step 48335: {'lr': 1.3764001390679236e-06, 'samples': 24748032, 'steps': 48335, 'batch_loss/train': 0.8227198915556073} +12/29/2021 07:51:08 - INFO - codeparrot_training - Step 48336: {'lr': 1.3747488185575364e-06, 'samples': 24748544, 'steps': 48336, 'batch_loss/train': 0.830544670112431} +12/29/2021 07:51:19 - INFO - codeparrot_training - Step 48337: {'lr': 1.3730984864872564e-06, 'samples': 24749056, 'steps': 48337, 'batch_loss/train': 0.8459116145968437} +12/29/2021 07:51:33 - INFO - codeparrot_training - Step 48338: {'lr': 1.3714491428636622e-06, 'samples': 24749568, 'steps': 48338, 'batch_loss/train': 0.7757006352767348} +12/29/2021 07:51:44 - INFO - codeparrot_training - Step 48339: {'lr': 1.3698007876933316e-06, 'samples': 24750080, 'steps': 48339, 'batch_loss/train': 0.731410413980484} +12/29/2021 07:51:54 - INFO - codeparrot_training - Step 48340: {'lr': 1.3681534209827595e-06, 'samples': 24750592, 'steps': 48340, 'batch_loss/train': 0.755577951669693} +12/29/2021 07:52:05 - INFO - codeparrot_training - Step 48341: {'lr': 1.366507042738524e-06, 'samples': 24751104, 'steps': 48341, 'batch_loss/train': 0.682768399361521} +12/29/2021 07:52:17 - INFO - codeparrot_training - Step 48342: {'lr': 1.364861652967203e-06, 'samples': 24751616, 'steps': 48342, 'batch_loss/train': 0.7180885728448629} +12/29/2021 07:52:28 - INFO - codeparrot_training - Step 48343: {'lr': 1.3632172516753194e-06, 'samples': 24752128, 'steps': 48343, 'batch_loss/train': 0.6535550812259316} +12/29/2021 07:52:38 - INFO - codeparrot_training - Step 48344: {'lr': 1.3615738388693677e-06, 'samples': 24752640, 'steps': 48344, 'batch_loss/train': 0.5847929192241281} +12/29/2021 07:52:51 - INFO - codeparrot_training - Step 48345: {'lr': 1.3599314145559538e-06, 'samples': 24753152, 'steps': 48345, 'batch_loss/train': 0.8063998622819781} +12/29/2021 07:53:01 - INFO - codeparrot_training - Step 48346: {'lr': 1.3582899787415448e-06, 'samples': 24753664, 'steps': 48346, 'batch_loss/train': 0.7642436539754272} +12/29/2021 07:53:12 - INFO - codeparrot_training - Step 48347: {'lr': 1.356649531432691e-06, 'samples': 24754176, 'steps': 48347, 'batch_loss/train': 0.7952477266080678} +12/29/2021 07:53:24 - INFO - codeparrot_training - Step 48348: {'lr': 1.3550100726359427e-06, 'samples': 24754688, 'steps': 48348, 'batch_loss/train': 0.7984708901494741} +12/29/2021 07:53:35 - INFO - codeparrot_training - Step 48349: {'lr': 1.3533716023577392e-06, 'samples': 24755200, 'steps': 48349, 'batch_loss/train': 0.7034047534689307} +12/29/2021 07:53:45 - INFO - codeparrot_training - Step 48350: {'lr': 1.3517341206046862e-06, 'samples': 24755712, 'steps': 48350, 'batch_loss/train': 0.757475861813873} +12/29/2021 07:53:56 - INFO - codeparrot_training - Step 48351: {'lr': 1.350097627383251e-06, 'samples': 24756224, 'steps': 48351, 'batch_loss/train': 0.766131647862494} +12/29/2021 07:54:10 - INFO - codeparrot_training - Step 48352: {'lr': 1.3484621226999005e-06, 'samples': 24756736, 'steps': 48352, 'batch_loss/train': 0.7951430999673903} +12/29/2021 07:54:20 - INFO - codeparrot_training - Step 48353: {'lr': 1.3468276065612128e-06, 'samples': 24757248, 'steps': 48353, 'batch_loss/train': 0.8504976984113455} +12/29/2021 07:54:31 - INFO - codeparrot_training - Step 48354: {'lr': 1.345194078973655e-06, 'samples': 24757760, 'steps': 48354, 'batch_loss/train': 0.7441824208945036} +12/29/2021 07:54:43 - INFO - codeparrot_training - Step 48355: {'lr': 1.3435615399437218e-06, 'samples': 24758272, 'steps': 48355, 'batch_loss/train': 0.7454209367278963} +12/29/2021 07:54:54 - INFO - codeparrot_training - Step 48356: {'lr': 1.3419299894778524e-06, 'samples': 24758784, 'steps': 48356, 'batch_loss/train': 0.7641761638224125} +12/29/2021 07:55:04 - INFO - codeparrot_training - Step 48357: {'lr': 1.3402994275826253e-06, 'samples': 24759296, 'steps': 48357, 'batch_loss/train': 0.6544999569887295} +12/29/2021 07:55:18 - INFO - codeparrot_training - Step 48358: {'lr': 1.3386698542644794e-06, 'samples': 24759808, 'steps': 48358, 'batch_loss/train': 0.789144825655967} +12/29/2021 07:55:29 - INFO - codeparrot_training - Step 48359: {'lr': 1.337041269529854e-06, 'samples': 24760320, 'steps': 48359, 'batch_loss/train': 0.7301239427179098} +12/29/2021 07:55:39 - INFO - codeparrot_training - Step 48360: {'lr': 1.3354136733852995e-06, 'samples': 24760832, 'steps': 48360, 'batch_loss/train': 0.7309691281989217} +12/29/2021 07:55:50 - INFO - codeparrot_training - Step 48361: {'lr': 1.3337870658372275e-06, 'samples': 24761344, 'steps': 48361, 'batch_loss/train': 0.6699147685430944} +12/29/2021 07:56:02 - INFO - codeparrot_training - Step 48362: {'lr': 1.3321614468921328e-06, 'samples': 24761856, 'steps': 48362, 'batch_loss/train': 0.8130527622997761} +12/29/2021 07:56:13 - INFO - codeparrot_training - Step 48363: {'lr': 1.3305368165564546e-06, 'samples': 24762368, 'steps': 48363, 'batch_loss/train': 0.6877323784865439} +12/29/2021 07:56:23 - INFO - codeparrot_training - Step 48364: {'lr': 1.3289131748366877e-06, 'samples': 24762880, 'steps': 48364, 'batch_loss/train': 0.7447910718619823} +12/29/2021 07:56:35 - INFO - codeparrot_training - Step 48365: {'lr': 1.3272905217392717e-06, 'samples': 24763392, 'steps': 48365, 'batch_loss/train': 0.7695676125586033} +12/29/2021 07:56:46 - INFO - codeparrot_training - Step 48366: {'lr': 1.3256688572706455e-06, 'samples': 24763904, 'steps': 48366, 'batch_loss/train': 0.7750978979747742} +12/29/2021 07:56:57 - INFO - codeparrot_training - Step 48367: {'lr': 1.3240481814372485e-06, 'samples': 24764416, 'steps': 48367, 'batch_loss/train': 0.7117932117544115} +12/29/2021 07:57:11 - INFO - codeparrot_training - Step 48368: {'lr': 1.3224284942455477e-06, 'samples': 24764928, 'steps': 48368, 'batch_loss/train': 0.753887745551765} +12/29/2021 07:57:21 - INFO - codeparrot_training - Step 48369: {'lr': 1.3208097957019828e-06, 'samples': 24765440, 'steps': 48369, 'batch_loss/train': 0.697342571336776} +12/29/2021 07:57:32 - INFO - codeparrot_training - Step 48370: {'lr': 1.3191920858129646e-06, 'samples': 24765952, 'steps': 48370, 'batch_loss/train': 0.7225215816870332} +12/29/2021 07:57:44 - INFO - codeparrot_training - Step 48371: {'lr': 1.3175753645849608e-06, 'samples': 24766464, 'steps': 48371, 'batch_loss/train': 0.7664311295375228} +12/29/2021 07:57:55 - INFO - codeparrot_training - Step 48372: {'lr': 1.315959632024355e-06, 'samples': 24766976, 'steps': 48372, 'batch_loss/train': 0.640465781558305} +12/29/2021 07:58:05 - INFO - codeparrot_training - Step 48373: {'lr': 1.3143448881376141e-06, 'samples': 24767488, 'steps': 48373, 'batch_loss/train': 0.7907677702605724} +12/29/2021 07:58:16 - INFO - codeparrot_training - Step 48374: {'lr': 1.312731132931122e-06, 'samples': 24768000, 'steps': 48374, 'batch_loss/train': 0.7052297564223409} +12/29/2021 07:58:30 - INFO - codeparrot_training - Step 48375: {'lr': 1.3111183664113458e-06, 'samples': 24768512, 'steps': 48375, 'batch_loss/train': 0.7069549718871713} +12/29/2021 07:58:41 - INFO - codeparrot_training - Step 48376: {'lr': 1.3095065885846136e-06, 'samples': 24769024, 'steps': 48376, 'batch_loss/train': 0.7069404271896929} +12/29/2021 07:58:51 - INFO - codeparrot_training - Step 48377: {'lr': 1.3078957994574204e-06, 'samples': 24769536, 'steps': 48377, 'batch_loss/train': 0.7767243904527277} +12/29/2021 07:59:03 - INFO - codeparrot_training - Step 48378: {'lr': 1.3062859990360942e-06, 'samples': 24770048, 'steps': 48378, 'batch_loss/train': 0.7403825595974922} +12/29/2021 07:59:14 - INFO - codeparrot_training - Step 48379: {'lr': 1.3046771873271024e-06, 'samples': 24770560, 'steps': 48379, 'batch_loss/train': 0.7367295935400762} +12/29/2021 07:59:25 - INFO - codeparrot_training - Step 48380: {'lr': 1.3030693643368007e-06, 'samples': 24771072, 'steps': 48380, 'batch_loss/train': 0.7203783085569739} +12/29/2021 07:59:37 - INFO - codeparrot_training - Step 48381: {'lr': 1.301462530071601e-06, 'samples': 24771584, 'steps': 48381, 'batch_loss/train': 0.6729747159406543} +12/29/2021 07:59:47 - INFO - codeparrot_training - Step 48382: {'lr': 1.299856684537859e-06, 'samples': 24772096, 'steps': 48382, 'batch_loss/train': 0.7170378995360807} +12/29/2021 07:59:58 - INFO - codeparrot_training - Step 48383: {'lr': 1.2982518277420142e-06, 'samples': 24772608, 'steps': 48383, 'batch_loss/train': 0.6754393093287945} +12/29/2021 08:00:09 - INFO - codeparrot_training - Step 48384: {'lr': 1.2966479596903945e-06, 'samples': 24773120, 'steps': 48384, 'batch_loss/train': 0.7748871613293886} +12/29/2021 08:00:23 - INFO - codeparrot_training - Step 48385: {'lr': 1.2950450803893844e-06, 'samples': 24773632, 'steps': 48385, 'batch_loss/train': 0.627560997963883} +12/29/2021 08:00:33 - INFO - codeparrot_training - Step 48386: {'lr': 1.2934431898453669e-06, 'samples': 24774144, 'steps': 48386, 'batch_loss/train': 0.7131344326771796} +12/29/2021 08:00:44 - INFO - codeparrot_training - Step 48387: {'lr': 1.2918422880647262e-06, 'samples': 24774656, 'steps': 48387, 'batch_loss/train': 0.7049403298879042} +12/29/2021 08:00:56 - INFO - codeparrot_training - Step 48388: {'lr': 1.2902423750538184e-06, 'samples': 24775168, 'steps': 48388, 'batch_loss/train': 0.7661587987095118} +12/29/2021 08:01:07 - INFO - codeparrot_training - Step 48389: {'lr': 1.2886434508189715e-06, 'samples': 24775680, 'steps': 48389, 'batch_loss/train': 0.5923097315244377} +12/29/2021 08:01:18 - INFO - codeparrot_training - Step 48390: {'lr': 1.2870455153665973e-06, 'samples': 24776192, 'steps': 48390, 'batch_loss/train': 0.7105920561589301} +12/29/2021 08:01:30 - INFO - codeparrot_training - Step 48391: {'lr': 1.285448568702996e-06, 'samples': 24776704, 'steps': 48391, 'batch_loss/train': 0.7596692591905594} +12/29/2021 08:01:40 - INFO - codeparrot_training - Step 48392: {'lr': 1.2838526108345516e-06, 'samples': 24777216, 'steps': 48392, 'batch_loss/train': 0.6416232972405851} +12/29/2021 08:01:51 - INFO - codeparrot_training - Step 48393: {'lr': 1.2822576417675924e-06, 'samples': 24777728, 'steps': 48393, 'batch_loss/train': 0.7251927978359163} +12/29/2021 08:02:01 - INFO - codeparrot_training - Step 48394: {'lr': 1.2806636615084744e-06, 'samples': 24778240, 'steps': 48394, 'batch_loss/train': 0.8520309766754508} +12/29/2021 08:02:14 - INFO - codeparrot_training - Step 48395: {'lr': 1.2790706700635256e-06, 'samples': 24778752, 'steps': 48395, 'batch_loss/train': 0.4951871468219906} +12/29/2021 08:02:25 - INFO - codeparrot_training - Step 48396: {'lr': 1.2774786674390748e-06, 'samples': 24779264, 'steps': 48396, 'batch_loss/train': 0.7165587125346065} +12/29/2021 08:02:35 - INFO - codeparrot_training - Step 48397: {'lr': 1.27588765364145e-06, 'samples': 24779776, 'steps': 48397, 'batch_loss/train': 0.7043344527482986} +12/29/2021 08:02:49 - INFO - codeparrot_training - Step 48398: {'lr': 1.2742976286769791e-06, 'samples': 24780288, 'steps': 48398, 'batch_loss/train': 0.695586503483355} +12/29/2021 08:03:00 - INFO - codeparrot_training - Step 48399: {'lr': 1.272708592551991e-06, 'samples': 24780800, 'steps': 48399, 'batch_loss/train': 0.7104108795174398} +12/29/2021 08:03:10 - INFO - codeparrot_training - Step 48400: {'lr': 1.2711205452727858e-06, 'samples': 24781312, 'steps': 48400, 'batch_loss/train': 0.7604880351573229} +12/29/2021 08:03:22 - INFO - codeparrot_training - Step 48401: {'lr': 1.2695334868457198e-06, 'samples': 24781824, 'steps': 48401, 'batch_loss/train': 0.8538858015090227} +12/29/2021 08:03:33 - INFO - codeparrot_training - Step 48402: {'lr': 1.2679474172770377e-06, 'samples': 24782336, 'steps': 48402, 'batch_loss/train': 0.7371403761208057} +12/29/2021 08:03:44 - INFO - codeparrot_training - Step 48403: {'lr': 1.2663623365731237e-06, 'samples': 24782848, 'steps': 48403, 'batch_loss/train': 0.737374366261065} +12/29/2021 08:03:54 - INFO - codeparrot_training - Step 48404: {'lr': 1.2647782447401668e-06, 'samples': 24783360, 'steps': 48404, 'batch_loss/train': 0.7109576016664505} +12/29/2021 08:04:16 - INFO - codeparrot_training - Step 48405: {'lr': 1.2631951417845788e-06, 'samples': 24783872, 'steps': 48405, 'batch_loss/train': 0.7567721763625741} +12/29/2021 08:04:27 - INFO - codeparrot_training - Step 48406: {'lr': 1.2616130277126048e-06, 'samples': 24784384, 'steps': 48406, 'batch_loss/train': 0.7032041332568042} +12/29/2021 08:04:38 - INFO - codeparrot_training - Step 48407: {'lr': 1.2600319025305451e-06, 'samples': 24784896, 'steps': 48407, 'batch_loss/train': 0.7646683044731617} +12/29/2021 08:04:50 - INFO - codeparrot_training - Step 48408: {'lr': 1.2584517662446448e-06, 'samples': 24785408, 'steps': 48408, 'batch_loss/train': 0.6285332971892785} +12/29/2021 08:05:01 - INFO - codeparrot_training - Step 48409: {'lr': 1.2568726188612601e-06, 'samples': 24785920, 'steps': 48409, 'batch_loss/train': 0.770296195987612} +12/29/2021 08:05:11 - INFO - codeparrot_training - Step 48410: {'lr': 1.255294460386608e-06, 'samples': 24786432, 'steps': 48410, 'batch_loss/train': 0.6892436040798202} +12/29/2021 08:05:24 - INFO - codeparrot_training - Step 48411: {'lr': 1.2537172908269612e-06, 'samples': 24786944, 'steps': 48411, 'batch_loss/train': 0.5546290490310639} +12/29/2021 08:05:34 - INFO - codeparrot_training - Step 48412: {'lr': 1.252141110188648e-06, 'samples': 24787456, 'steps': 48412, 'batch_loss/train': 0.6291042685043067} +12/29/2021 08:05:45 - INFO - codeparrot_training - Step 48413: {'lr': 1.2505659184778862e-06, 'samples': 24787968, 'steps': 48413, 'batch_loss/train': 0.7825110624544322} +12/29/2021 08:05:56 - INFO - codeparrot_training - Step 48414: {'lr': 1.2489917157009479e-06, 'samples': 24788480, 'steps': 48414, 'batch_loss/train': 0.7435341477394104} +12/29/2021 08:06:10 - INFO - codeparrot_training - Step 48415: {'lr': 1.2474185018640782e-06, 'samples': 24788992, 'steps': 48415, 'batch_loss/train': 0.793290154542774} +12/29/2021 08:06:20 - INFO - codeparrot_training - Step 48416: {'lr': 1.2458462769735502e-06, 'samples': 24789504, 'steps': 48416, 'batch_loss/train': 0.6262157299788669} +12/29/2021 08:06:31 - INFO - codeparrot_training - Step 48417: {'lr': 1.2442750410356362e-06, 'samples': 24790016, 'steps': 48417, 'batch_loss/train': 0.6821163957938552} +12/29/2021 08:06:43 - INFO - codeparrot_training - Step 48418: {'lr': 1.2427047940565539e-06, 'samples': 24790528, 'steps': 48418, 'batch_loss/train': 0.728728105314076} +12/29/2021 08:06:54 - INFO - codeparrot_training - Step 48419: {'lr': 1.2411355360425202e-06, 'samples': 24791040, 'steps': 48419, 'batch_loss/train': 0.7081649294123054} +12/29/2021 08:07:04 - INFO - codeparrot_training - Step 48420: {'lr': 1.239567266999836e-06, 'samples': 24791552, 'steps': 48420, 'batch_loss/train': 0.7705680699436925} +12/29/2021 08:07:17 - INFO - codeparrot_training - Step 48421: {'lr': 1.2379999869346903e-06, 'samples': 24792064, 'steps': 48421, 'batch_loss/train': 0.7733589340932667} +12/29/2021 08:07:27 - INFO - codeparrot_training - Step 48422: {'lr': 1.2364336958533284e-06, 'samples': 24792576, 'steps': 48422, 'batch_loss/train': 0.6676266237627715} +12/29/2021 08:07:38 - INFO - codeparrot_training - Step 48423: {'lr': 1.2348683937619953e-06, 'samples': 24793088, 'steps': 48423, 'batch_loss/train': 0.7463565738871694} +12/29/2021 08:07:48 - INFO - codeparrot_training - Step 48424: {'lr': 1.2333040806668806e-06, 'samples': 24793600, 'steps': 48424, 'batch_loss/train': 0.6843833988532424} +12/29/2021 08:08:01 - INFO - codeparrot_training - Step 48425: {'lr': 1.2317407565742011e-06, 'samples': 24794112, 'steps': 48425, 'batch_loss/train': 0.7916124926414341} +12/29/2021 08:08:11 - INFO - codeparrot_training - Step 48426: {'lr': 1.2301784214902024e-06, 'samples': 24794624, 'steps': 48426, 'batch_loss/train': 0.757781570777297} +12/29/2021 08:08:22 - INFO - codeparrot_training - Step 48427: {'lr': 1.2286170754211011e-06, 'samples': 24795136, 'steps': 48427, 'batch_loss/train': 0.7498543113470078} +12/29/2021 08:08:35 - INFO - codeparrot_training - Step 48428: {'lr': 1.2270567183730596e-06, 'samples': 24795648, 'steps': 48428, 'batch_loss/train': 0.7049633641727269} +12/29/2021 08:08:46 - INFO - codeparrot_training - Step 48429: {'lr': 1.2254973503523226e-06, 'samples': 24796160, 'steps': 48429, 'batch_loss/train': 0.609237025026232} +12/29/2021 08:08:57 - INFO - codeparrot_training - Step 48430: {'lr': 1.2239389713650518e-06, 'samples': 24796672, 'steps': 48430, 'batch_loss/train': 0.769994453061372} +12/29/2021 08:09:09 - INFO - codeparrot_training - Step 48431: {'lr': 1.222381581417492e-06, 'samples': 24797184, 'steps': 48431, 'batch_loss/train': 0.7489937014179304} +12/29/2021 08:09:19 - INFO - codeparrot_training - Step 48432: {'lr': 1.2208251805158055e-06, 'samples': 24797696, 'steps': 48432, 'batch_loss/train': 0.7694673342630267} +12/29/2021 08:09:30 - INFO - codeparrot_training - Step 48433: {'lr': 1.2192697686661814e-06, 'samples': 24798208, 'steps': 48433, 'batch_loss/train': 0.8460509795695543} +12/29/2021 08:09:41 - INFO - codeparrot_training - Step 48434: {'lr': 1.2177153458747814e-06, 'samples': 24798720, 'steps': 48434, 'batch_loss/train': 0.6541902946773916} +12/29/2021 08:09:53 - INFO - codeparrot_training - Step 48435: {'lr': 1.2161619121478507e-06, 'samples': 24799232, 'steps': 48435, 'batch_loss/train': 0.742153542349115} +12/29/2021 08:10:03 - INFO - codeparrot_training - Step 48436: {'lr': 1.2146094674914954e-06, 'samples': 24799744, 'steps': 48436, 'batch_loss/train': 0.7135434029623866} +12/29/2021 08:10:14 - INFO - codeparrot_training - Step 48437: {'lr': 1.213058011911905e-06, 'samples': 24800256, 'steps': 48437, 'batch_loss/train': 0.8060080586001277} +12/29/2021 08:10:28 - INFO - codeparrot_training - Step 48438: {'lr': 1.211507545415269e-06, 'samples': 24800768, 'steps': 48438, 'batch_loss/train': 0.6649038188625127} +12/29/2021 08:10:38 - INFO - codeparrot_training - Step 48439: {'lr': 1.2099580680077493e-06, 'samples': 24801280, 'steps': 48439, 'batch_loss/train': 0.6762106278911233} +12/29/2021 08:10:49 - INFO - codeparrot_training - Step 48440: {'lr': 1.2084095796954798e-06, 'samples': 24801792, 'steps': 48440, 'batch_loss/train': 0.7631034078076482} +12/29/2021 08:11:02 - INFO - codeparrot_training - Step 48441: {'lr': 1.2068620804846497e-06, 'samples': 24802304, 'steps': 48441, 'batch_loss/train': 0.6662178400438279} +12/29/2021 08:11:12 - INFO - codeparrot_training - Step 48442: {'lr': 1.2053155703813934e-06, 'samples': 24802816, 'steps': 48442, 'batch_loss/train': 0.4122544068377465} +12/29/2021 08:11:23 - INFO - codeparrot_training - Step 48443: {'lr': 1.2037700493918448e-06, 'samples': 24803328, 'steps': 48443, 'batch_loss/train': 0.7314733038656414} +12/29/2021 08:11:33 - INFO - codeparrot_training - Step 48444: {'lr': 1.202225517522193e-06, 'samples': 24803840, 'steps': 48444, 'batch_loss/train': 0.7197836423292756} +12/29/2021 08:11:47 - INFO - codeparrot_training - Step 48445: {'lr': 1.200681974778517e-06, 'samples': 24804352, 'steps': 48445, 'batch_loss/train': 0.6160486289300025} +12/29/2021 08:11:58 - INFO - codeparrot_training - Step 48446: {'lr': 1.199139421167006e-06, 'samples': 24804864, 'steps': 48446, 'batch_loss/train': 0.8360894564539194} +12/29/2021 08:12:08 - INFO - codeparrot_training - Step 48447: {'lr': 1.197597856693794e-06, 'samples': 24805376, 'steps': 48447, 'batch_loss/train': 0.715793059207499} +12/29/2021 08:12:21 - INFO - codeparrot_training - Step 48448: {'lr': 1.1960572813649594e-06, 'samples': 24805888, 'steps': 48448, 'batch_loss/train': 0.8647571867331862} +12/29/2021 08:12:31 - INFO - codeparrot_training - Step 48449: {'lr': 1.1945176951866643e-06, 'samples': 24806400, 'steps': 48449, 'batch_loss/train': 0.6995444912463427} +12/29/2021 08:12:42 - INFO - codeparrot_training - Step 48450: {'lr': 1.1929790981650146e-06, 'samples': 24806912, 'steps': 48450, 'batch_loss/train': 0.6992223288398236} +12/29/2021 08:12:54 - INFO - codeparrot_training - Step 48451: {'lr': 1.1914414903061443e-06, 'samples': 24807424, 'steps': 48451, 'batch_loss/train': 0.7445502057671547} +12/29/2021 08:13:05 - INFO - codeparrot_training - Step 48452: {'lr': 1.189904871616132e-06, 'samples': 24807936, 'steps': 48452, 'batch_loss/train': 0.7235536546795629} +12/29/2021 08:13:15 - INFO - codeparrot_training - Step 48453: {'lr': 1.1883692421011395e-06, 'samples': 24808448, 'steps': 48453, 'batch_loss/train': 0.7976557709043846} +12/29/2021 08:13:26 - INFO - codeparrot_training - Step 48454: {'lr': 1.1868346017672448e-06, 'samples': 24808960, 'steps': 48454, 'batch_loss/train': 0.8561710985377431} +12/29/2021 08:13:40 - INFO - codeparrot_training - Step 48455: {'lr': 1.185300950620527e-06, 'samples': 24809472, 'steps': 48455, 'batch_loss/train': 0.7468202573945746} +12/29/2021 08:13:50 - INFO - codeparrot_training - Step 48456: {'lr': 1.1837682886671197e-06, 'samples': 24809984, 'steps': 48456, 'batch_loss/train': 0.647159157320857} +12/29/2021 08:14:01 - INFO - codeparrot_training - Step 48457: {'lr': 1.1822366159131016e-06, 'samples': 24810496, 'steps': 48457, 'batch_loss/train': 0.6220426277723163} +12/29/2021 08:14:13 - INFO - codeparrot_training - Step 48458: {'lr': 1.1807059323645508e-06, 'samples': 24811008, 'steps': 48458, 'batch_loss/train': 0.73942200478632} +12/29/2021 08:14:24 - INFO - codeparrot_training - Step 48459: {'lr': 1.179176238027574e-06, 'samples': 24811520, 'steps': 48459, 'batch_loss/train': 0.7641506958752871} +12/29/2021 08:14:34 - INFO - codeparrot_training - Step 48460: {'lr': 1.1776475329082216e-06, 'samples': 24812032, 'steps': 48460, 'batch_loss/train': 0.799106125254184} +12/29/2021 08:14:45 - INFO - codeparrot_training - Step 48461: {'lr': 1.1761198170125997e-06, 'samples': 24812544, 'steps': 48461, 'batch_loss/train': 0.7352613266557455} +12/29/2021 08:14:57 - INFO - codeparrot_training - Step 48462: {'lr': 1.1745930903467872e-06, 'samples': 24813056, 'steps': 48462, 'batch_loss/train': 0.8249935712665319} +12/29/2021 08:15:08 - INFO - codeparrot_training - Step 48463: {'lr': 1.1730673529168068e-06, 'samples': 24813568, 'steps': 48463, 'batch_loss/train': 0.7624987829476595} +12/29/2021 08:15:18 - INFO - codeparrot_training - Step 48464: {'lr': 1.1715426047287925e-06, 'samples': 24814080, 'steps': 48464, 'batch_loss/train': 0.7175484298495576} +12/29/2021 08:15:32 - INFO - codeparrot_training - Step 48465: {'lr': 1.1700188457887396e-06, 'samples': 24814592, 'steps': 48465, 'batch_loss/train': 0.7626031562685966} +12/29/2021 08:15:43 - INFO - codeparrot_training - Step 48466: {'lr': 1.168496076102754e-06, 'samples': 24815104, 'steps': 48466, 'batch_loss/train': 0.7666340665891767} +12/29/2021 08:15:54 - INFO - codeparrot_training - Step 48467: {'lr': 1.166974295676887e-06, 'samples': 24815616, 'steps': 48467, 'batch_loss/train': 0.6822058754041791} +12/29/2021 08:16:06 - INFO - codeparrot_training - Step 48468: {'lr': 1.165453504517161e-06, 'samples': 24816128, 'steps': 48468, 'batch_loss/train': 0.717208482674323} +12/29/2021 08:16:17 - INFO - codeparrot_training - Step 48469: {'lr': 1.163933702629627e-06, 'samples': 24816640, 'steps': 48469, 'batch_loss/train': 0.8012021207250655} +12/29/2021 08:16:27 - INFO - codeparrot_training - Step 48470: {'lr': 1.1624148900203634e-06, 'samples': 24817152, 'steps': 48470, 'batch_loss/train': 0.7049840996041894} +12/29/2021 08:16:39 - INFO - codeparrot_training - Step 48471: {'lr': 1.1608970666953377e-06, 'samples': 24817664, 'steps': 48471, 'batch_loss/train': 0.7208565541077405} +12/29/2021 08:16:50 - INFO - codeparrot_training - Step 48472: {'lr': 1.1593802326606562e-06, 'samples': 24818176, 'steps': 48472, 'batch_loss/train': 1.016653761267662} +12/29/2021 08:17:01 - INFO - codeparrot_training - Step 48473: {'lr': 1.157864387922314e-06, 'samples': 24818688, 'steps': 48473, 'batch_loss/train': 0.7890771538950503} +12/29/2021 08:17:11 - INFO - codeparrot_training - Step 48474: {'lr': 1.1563495324863339e-06, 'samples': 24819200, 'steps': 48474, 'batch_loss/train': 0.7398597132414579} +12/29/2021 08:17:26 - INFO - codeparrot_training - Step 48475: {'lr': 1.1548356663587667e-06, 'samples': 24819712, 'steps': 48475, 'batch_loss/train': 0.705831415951252} +12/29/2021 08:17:36 - INFO - codeparrot_training - Step 48476: {'lr': 1.1533227895456078e-06, 'samples': 24820224, 'steps': 48476, 'batch_loss/train': 0.5455376369063742} +12/29/2021 08:17:47 - INFO - codeparrot_training - Step 48477: {'lr': 1.15181090205288e-06, 'samples': 24820736, 'steps': 48477, 'batch_loss/train': 0.5533592522260733} +12/29/2021 08:18:00 - INFO - codeparrot_training - Step 48478: {'lr': 1.1503000038865785e-06, 'samples': 24821248, 'steps': 48478, 'batch_loss/train': 0.6899294182658195} +12/29/2021 08:18:11 - INFO - codeparrot_training - Step 48479: {'lr': 1.1487900950527264e-06, 'samples': 24821760, 'steps': 48479, 'batch_loss/train': 0.7070214605191723} +12/29/2021 08:18:21 - INFO - codeparrot_training - Step 48480: {'lr': 1.1472811755573464e-06, 'samples': 24822272, 'steps': 48480, 'batch_loss/train': 0.720940881408751} +12/29/2021 08:18:32 - INFO - codeparrot_training - Step 48481: {'lr': 1.1457732454063786e-06, 'samples': 24822784, 'steps': 48481, 'batch_loss/train': 1.1828015614300966} +12/29/2021 08:18:44 - INFO - codeparrot_training - Step 48482: {'lr': 1.1442663046058733e-06, 'samples': 24823296, 'steps': 48482, 'batch_loss/train': 0.7021662839688361} +12/29/2021 08:18:55 - INFO - codeparrot_training - Step 48483: {'lr': 1.1427603531617703e-06, 'samples': 24823808, 'steps': 48483, 'batch_loss/train': 0.6296142798382789} +12/29/2021 08:19:05 - INFO - codeparrot_training - Step 48484: {'lr': 1.1412553910801205e-06, 'samples': 24824320, 'steps': 48484, 'batch_loss/train': 0.7536686769453809} +12/29/2021 08:19:19 - INFO - codeparrot_training - Step 48485: {'lr': 1.1397514183668633e-06, 'samples': 24824832, 'steps': 48485, 'batch_loss/train': 0.8047069273889065} +12/29/2021 08:19:30 - INFO - codeparrot_training - Step 48486: {'lr': 1.1382484350279942e-06, 'samples': 24825344, 'steps': 48486, 'batch_loss/train': 0.8874855795875192} +12/29/2021 08:19:40 - INFO - codeparrot_training - Step 48487: {'lr': 1.1367464410694805e-06, 'samples': 24825856, 'steps': 48487, 'batch_loss/train': 0.709813219960779} +12/29/2021 08:19:53 - INFO - codeparrot_training - Step 48488: {'lr': 1.1352454364972897e-06, 'samples': 24826368, 'steps': 48488, 'batch_loss/train': 0.5844313162378967} +12/29/2021 08:20:03 - INFO - codeparrot_training - Step 48489: {'lr': 1.1337454213173891e-06, 'samples': 24826880, 'steps': 48489, 'batch_loss/train': 0.7675598608329892} +12/29/2021 08:20:14 - INFO - codeparrot_training - Step 48490: {'lr': 1.1322463955357742e-06, 'samples': 24827392, 'steps': 48490, 'batch_loss/train': 0.7862128564156592} +12/29/2021 08:20:24 - INFO - codeparrot_training - Step 48491: {'lr': 1.1307483591583566e-06, 'samples': 24827904, 'steps': 48491, 'batch_loss/train': 0.6646546619012952} +12/29/2021 08:20:39 - INFO - codeparrot_training - Step 48492: {'lr': 1.129251312191104e-06, 'samples': 24828416, 'steps': 48492, 'batch_loss/train': 0.7549090199172497} +12/29/2021 08:20:50 - INFO - codeparrot_training - Step 48493: {'lr': 1.1277552546400117e-06, 'samples': 24828928, 'steps': 48493, 'batch_loss/train': 0.594290318666026} +12/29/2021 08:21:00 - INFO - codeparrot_training - Step 48494: {'lr': 1.1262601865109634e-06, 'samples': 24829440, 'steps': 48494, 'batch_loss/train': 0.7371112979017198} +12/29/2021 08:21:12 - INFO - codeparrot_training - Step 48495: {'lr': 1.1247661078099547e-06, 'samples': 24829952, 'steps': 48495, 'batch_loss/train': 0.7855017129331827} +12/29/2021 08:21:23 - INFO - codeparrot_training - Step 48496: {'lr': 1.1232730185428974e-06, 'samples': 24830464, 'steps': 48496, 'batch_loss/train': 0.7808919893577695} +12/29/2021 08:21:33 - INFO - codeparrot_training - Step 48497: {'lr': 1.1217809187157312e-06, 'samples': 24830976, 'steps': 48497, 'batch_loss/train': 0.888947494328022} +12/29/2021 08:21:46 - INFO - codeparrot_training - Step 48498: {'lr': 1.1202898083343959e-06, 'samples': 24831488, 'steps': 48498, 'batch_loss/train': 0.6735588423907757} +12/29/2021 08:21:56 - INFO - codeparrot_training - Step 48499: {'lr': 1.1187996874048034e-06, 'samples': 24832000, 'steps': 48499, 'batch_loss/train': 0.7007964579388499} +12/29/2021 08:22:07 - INFO - codeparrot_training - Step 48500: {'lr': 1.1173105559328932e-06, 'samples': 24832512, 'steps': 48500, 'batch_loss/train': 0.7291326355189085} +12/29/2021 08:22:19 - INFO - codeparrot_training - Step 48501: {'lr': 1.1158224139246053e-06, 'samples': 24833024, 'steps': 48501, 'batch_loss/train': 0.7215812960639596} +12/29/2021 08:22:30 - INFO - codeparrot_training - Step 48502: {'lr': 1.114335261385796e-06, 'samples': 24833536, 'steps': 48502, 'batch_loss/train': 0.6646559634245932} +12/29/2021 08:22:40 - INFO - codeparrot_training - Step 48503: {'lr': 1.1128490983224326e-06, 'samples': 24834048, 'steps': 48503, 'batch_loss/train': 0.7805347959510982} +12/29/2021 08:22:51 - INFO - codeparrot_training - Step 48504: {'lr': 1.1113639247403994e-06, 'samples': 24834560, 'steps': 48504, 'batch_loss/train': 0.7698882340919226} +12/29/2021 08:23:05 - INFO - codeparrot_training - Step 48505: {'lr': 1.1098797406456362e-06, 'samples': 24835072, 'steps': 48505, 'batch_loss/train': 0.7785197957418859} +12/29/2021 08:23:16 - INFO - codeparrot_training - Step 48506: {'lr': 1.1083965460439716e-06, 'samples': 24835584, 'steps': 48506, 'batch_loss/train': 0.44907059567049146} +12/29/2021 08:23:26 - INFO - codeparrot_training - Step 48507: {'lr': 1.1069143409413728e-06, 'samples': 24836096, 'steps': 48507, 'batch_loss/train': 0.7110481918789446} +12/29/2021 08:23:39 - INFO - codeparrot_training - Step 48508: {'lr': 1.1054331253436966e-06, 'samples': 24836608, 'steps': 48508, 'batch_loss/train': 0.8214517384767532} +12/29/2021 08:23:50 - INFO - codeparrot_training - Step 48509: {'lr': 1.103952899256827e-06, 'samples': 24837120, 'steps': 48509, 'batch_loss/train': 0.6753914784640074} +12/29/2021 08:24:00 - INFO - codeparrot_training - Step 48510: {'lr': 1.102473662686676e-06, 'samples': 24837632, 'steps': 48510, 'batch_loss/train': 0.7568382853642106} +12/29/2021 08:24:12 - INFO - codeparrot_training - Step 48511: {'lr': 1.1009954156391e-06, 'samples': 24838144, 'steps': 48511, 'batch_loss/train': 0.6917667454108596} +12/29/2021 08:24:23 - INFO - codeparrot_training - Step 48512: {'lr': 1.099518158119983e-06, 'samples': 24838656, 'steps': 48512, 'batch_loss/train': 0.7764375600963831} +12/29/2021 08:24:34 - INFO - codeparrot_training - Step 48513: {'lr': 1.0980418901352096e-06, 'samples': 24839168, 'steps': 48513, 'batch_loss/train': 0.738814102485776} +12/29/2021 08:24:44 - INFO - codeparrot_training - Step 48514: {'lr': 1.0965666116906359e-06, 'samples': 24839680, 'steps': 48514, 'batch_loss/train': 0.7425660295411944} +12/29/2021 08:24:59 - INFO - codeparrot_training - Step 48515: {'lr': 1.0950923227921183e-06, 'samples': 24840192, 'steps': 48515, 'batch_loss/train': 0.7043882557190955} +12/29/2021 08:25:09 - INFO - codeparrot_training - Step 48516: {'lr': 1.093619023445569e-06, 'samples': 24840704, 'steps': 48516, 'batch_loss/train': 0.7840326228761114} +12/29/2021 08:25:20 - INFO - codeparrot_training - Step 48517: {'lr': 1.0921467136567885e-06, 'samples': 24841216, 'steps': 48517, 'batch_loss/train': 0.6454784472007304} +12/29/2021 08:25:32 - INFO - codeparrot_training - Step 48518: {'lr': 1.0906753934316338e-06, 'samples': 24841728, 'steps': 48518, 'batch_loss/train': 0.7990711415186524} +12/29/2021 08:25:43 - INFO - codeparrot_training - Step 48519: {'lr': 1.0892050627759887e-06, 'samples': 24842240, 'steps': 48519, 'batch_loss/train': 0.7505232151597738} +12/29/2021 08:25:53 - INFO - codeparrot_training - Step 48520: {'lr': 1.087735721695654e-06, 'samples': 24842752, 'steps': 48520, 'batch_loss/train': 0.7840838967822492} +12/29/2021 08:26:07 - INFO - codeparrot_training - Step 48521: {'lr': 1.0862673701965143e-06, 'samples': 24843264, 'steps': 48521, 'batch_loss/train': 0.7368218968622386} +12/29/2021 08:26:18 - INFO - codeparrot_training - Step 48522: {'lr': 1.0848000082843979e-06, 'samples': 24843776, 'steps': 48522, 'batch_loss/train': 0.7472532042302191} +12/29/2021 08:26:29 - INFO - codeparrot_training - Step 48523: {'lr': 1.0833336359651057e-06, 'samples': 24844288, 'steps': 48523, 'batch_loss/train': 0.5902836271561682} +12/29/2021 08:26:39 - INFO - codeparrot_training - Step 48524: {'lr': 1.0818682532444945e-06, 'samples': 24844800, 'steps': 48524, 'batch_loss/train': 0.7609494049102068} +12/29/2021 08:26:52 - INFO - codeparrot_training - Step 48525: {'lr': 1.0804038601284205e-06, 'samples': 24845312, 'steps': 48525, 'batch_loss/train': 0.5195902225095779} +12/29/2021 08:27:02 - INFO - codeparrot_training - Step 48526: {'lr': 1.078940456622629e-06, 'samples': 24845824, 'steps': 48526, 'batch_loss/train': 0.6622261321172118} +12/29/2021 08:27:13 - INFO - codeparrot_training - Step 48527: {'lr': 1.0774780427330043e-06, 'samples': 24846336, 'steps': 48527, 'batch_loss/train': 0.3589968211017549} +12/29/2021 08:27:25 - INFO - codeparrot_training - Step 48528: {'lr': 1.0760166184653475e-06, 'samples': 24846848, 'steps': 48528, 'batch_loss/train': 0.6986249163746834} +12/29/2021 08:27:36 - INFO - codeparrot_training - Step 48529: {'lr': 1.0745561838254593e-06, 'samples': 24847360, 'steps': 48529, 'batch_loss/train': 0.7857569847255945} +12/29/2021 08:27:46 - INFO - codeparrot_training - Step 48530: {'lr': 1.0730967388191126e-06, 'samples': 24847872, 'steps': 48530, 'batch_loss/train': 0.8061559586785734} +12/29/2021 08:27:58 - INFO - codeparrot_training - Step 48531: {'lr': 1.0716382834521643e-06, 'samples': 24848384, 'steps': 48531, 'batch_loss/train': 0.6204854459501803} +12/29/2021 08:28:09 - INFO - codeparrot_training - Step 48532: {'lr': 1.0701808177303873e-06, 'samples': 24848896, 'steps': 48532, 'batch_loss/train': 0.7048764135688543} +12/29/2021 08:28:20 - INFO - codeparrot_training - Step 48533: {'lr': 1.0687243416595826e-06, 'samples': 24849408, 'steps': 48533, 'batch_loss/train': 0.7576726591214538} +12/29/2021 08:28:30 - INFO - codeparrot_training - Step 48534: {'lr': 1.0672688552455234e-06, 'samples': 24849920, 'steps': 48534, 'batch_loss/train': 0.7152283226605505} +12/29/2021 08:28:44 - INFO - codeparrot_training - Step 48535: {'lr': 1.0658143584940383e-06, 'samples': 24850432, 'steps': 48535, 'batch_loss/train': 0.7231877110898495} +12/29/2021 08:28:55 - INFO - codeparrot_training - Step 48536: {'lr': 1.064360851410845e-06, 'samples': 24850944, 'steps': 48536, 'batch_loss/train': 0.8127067033201456} +12/29/2021 08:29:06 - INFO - codeparrot_training - Step 48537: {'lr': 1.0629083340017997e-06, 'samples': 24851456, 'steps': 48537, 'batch_loss/train': 0.7122430019080639} +12/29/2021 08:29:18 - INFO - codeparrot_training - Step 48538: {'lr': 1.0614568062725927e-06, 'samples': 24851968, 'steps': 48538, 'batch_loss/train': 0.6015415922738612} +12/29/2021 08:29:28 - INFO - codeparrot_training - Step 48539: {'lr': 1.0600062682290802e-06, 'samples': 24852480, 'steps': 48539, 'batch_loss/train': 0.7557220421731472} +12/29/2021 08:29:39 - INFO - codeparrot_training - Step 48540: {'lr': 1.058556719876952e-06, 'samples': 24852992, 'steps': 48540, 'batch_loss/train': 0.724391839466989} +12/29/2021 08:29:51 - INFO - codeparrot_training - Step 48541: {'lr': 1.057108161222009e-06, 'samples': 24853504, 'steps': 48541, 'batch_loss/train': 0.7973825172521174} +12/29/2021 08:30:02 - INFO - codeparrot_training - Step 48542: {'lr': 1.0556605922700247e-06, 'samples': 24854016, 'steps': 48542, 'batch_loss/train': 0.6580570896621794} +12/29/2021 08:30:12 - INFO - codeparrot_training - Step 48543: {'lr': 1.0542140130267441e-06, 'samples': 24854528, 'steps': 48543, 'batch_loss/train': 0.789022873621434} +12/29/2021 08:30:23 - INFO - codeparrot_training - Step 48544: {'lr': 1.0527684234978851e-06, 'samples': 24855040, 'steps': 48544, 'batch_loss/train': 0.7833910943008959} +12/29/2021 08:30:37 - INFO - codeparrot_training - Step 48545: {'lr': 1.0513238236892209e-06, 'samples': 24855552, 'steps': 48545, 'batch_loss/train': 0.7277247223537415} +12/29/2021 08:30:48 - INFO - codeparrot_training - Step 48546: {'lr': 1.0498802136064966e-06, 'samples': 24856064, 'steps': 48546, 'batch_loss/train': 0.7217359342612326} +12/29/2021 08:30:58 - INFO - codeparrot_training - Step 48547: {'lr': 1.048437593255458e-06, 'samples': 24856576, 'steps': 48547, 'batch_loss/train': 0.8598299324512482} +12/29/2021 08:31:10 - INFO - codeparrot_training - Step 48548: {'lr': 1.0469959626418223e-06, 'samples': 24857088, 'steps': 48548, 'batch_loss/train': 0.8399775652214885} +12/29/2021 08:31:21 - INFO - codeparrot_training - Step 48549: {'lr': 1.0455553217713353e-06, 'samples': 24857600, 'steps': 48549, 'batch_loss/train': 0.7997096912004054} +12/29/2021 08:31:32 - INFO - codeparrot_training - Step 48550: {'lr': 1.0441156706497145e-06, 'samples': 24858112, 'steps': 48550, 'batch_loss/train': 0.7729160971939564} +12/29/2021 08:31:46 - INFO - codeparrot_training - Step 48551: {'lr': 1.0426770092827054e-06, 'samples': 24858624, 'steps': 48551, 'batch_loss/train': 0.7204874765593559} +12/29/2021 08:31:56 - INFO - codeparrot_training - Step 48552: {'lr': 1.0412393376759977e-06, 'samples': 24859136, 'steps': 48552, 'batch_loss/train': 0.7225810483796522} +12/29/2021 08:32:07 - INFO - codeparrot_training - Step 48553: {'lr': 1.0398026558353091e-06, 'samples': 24859648, 'steps': 48553, 'batch_loss/train': 0.7902199737727642} +12/29/2021 08:32:19 - INFO - codeparrot_training - Step 48554: {'lr': 1.038366963766385e-06, 'samples': 24860160, 'steps': 48554, 'batch_loss/train': 0.7287180623970926} +12/29/2021 08:32:29 - INFO - codeparrot_training - Step 48555: {'lr': 1.0369322614748878e-06, 'samples': 24860672, 'steps': 48555, 'batch_loss/train': 0.735390470654238} +12/29/2021 08:32:40 - INFO - codeparrot_training - Step 48556: {'lr': 1.0354985489665348e-06, 'samples': 24861184, 'steps': 48556, 'batch_loss/train': 0.6952435970306396} +12/29/2021 08:32:51 - INFO - codeparrot_training - Step 48557: {'lr': 1.0340658262470436e-06, 'samples': 24861696, 'steps': 48557, 'batch_loss/train': 0.8061623140238225} +12/29/2021 08:33:03 - INFO - codeparrot_training - Step 48558: {'lr': 1.0326340933221323e-06, 'samples': 24862208, 'steps': 48558, 'batch_loss/train': 0.7469576857984066} +12/29/2021 08:33:14 - INFO - codeparrot_training - Step 48559: {'lr': 1.0312033501974072e-06, 'samples': 24862720, 'steps': 48559, 'batch_loss/train': 0.6433977561537176} +12/29/2021 08:33:24 - INFO - codeparrot_training - Step 48560: {'lr': 1.0297735968786415e-06, 'samples': 24863232, 'steps': 48560, 'batch_loss/train': 1.673827555263415} +12/29/2021 08:33:39 - INFO - codeparrot_training - Step 48561: {'lr': 1.0283448333714973e-06, 'samples': 24863744, 'steps': 48561, 'batch_loss/train': 0.7454966679215431} +12/29/2021 08:33:49 - INFO - codeparrot_training - Step 48562: {'lr': 1.0269170596816369e-06, 'samples': 24864256, 'steps': 48562, 'batch_loss/train': 0.6009691627696157} +12/29/2021 08:34:00 - INFO - codeparrot_training - Step 48563: {'lr': 1.0254902758147222e-06, 'samples': 24864768, 'steps': 48563, 'batch_loss/train': 0.777558333822526} +12/29/2021 08:34:13 - INFO - codeparrot_training - Step 48564: {'lr': 1.024064481776471e-06, 'samples': 24865280, 'steps': 48564, 'batch_loss/train': 0.7053399726282805} +12/29/2021 08:34:23 - INFO - codeparrot_training - Step 48565: {'lr': 1.0226396775725456e-06, 'samples': 24865792, 'steps': 48565, 'batch_loss/train': 0.7795084761455655} +12/29/2021 08:34:34 - INFO - codeparrot_training - Step 48566: {'lr': 1.0212158632085799e-06, 'samples': 24866304, 'steps': 48566, 'batch_loss/train': 0.7720216276939027} +12/29/2021 08:34:45 - INFO - codeparrot_training - Step 48567: {'lr': 1.0197930386902365e-06, 'samples': 24866816, 'steps': 48567, 'batch_loss/train': 0.848394968546927} +12/29/2021 08:34:57 - INFO - codeparrot_training - Step 48568: {'lr': 1.0183712040232051e-06, 'samples': 24867328, 'steps': 48568, 'batch_loss/train': 0.7482132229488343} +12/29/2021 08:35:07 - INFO - codeparrot_training - Step 48569: {'lr': 1.0169503592130925e-06, 'samples': 24867840, 'steps': 48569, 'batch_loss/train': 0.7794571444392204} +12/29/2021 08:35:18 - INFO - codeparrot_training - Step 48570: {'lr': 1.0155305042655882e-06, 'samples': 24868352, 'steps': 48570, 'batch_loss/train': 0.7041214155033231} +12/29/2021 08:35:30 - INFO - codeparrot_training - Step 48571: {'lr': 1.014111639186327e-06, 'samples': 24868864, 'steps': 48571, 'batch_loss/train': 0.7346699258778244} +12/29/2021 08:35:41 - INFO - codeparrot_training - Step 48572: {'lr': 1.0126937639809431e-06, 'samples': 24869376, 'steps': 48572, 'batch_loss/train': 0.6000151622574776} +12/29/2021 08:35:51 - INFO - codeparrot_training - Step 48573: {'lr': 1.0112768786550709e-06, 'samples': 24869888, 'steps': 48573, 'batch_loss/train': 0.7170677995309234} +12/29/2021 08:36:05 - INFO - codeparrot_training - Step 48574: {'lr': 1.0098609832143445e-06, 'samples': 24870400, 'steps': 48574, 'batch_loss/train': 0.6193961646640673} +12/29/2021 08:36:16 - INFO - codeparrot_training - Step 48575: {'lr': 1.0084460776643988e-06, 'samples': 24870912, 'steps': 48575, 'batch_loss/train': 0.7896543345414102} +12/29/2021 08:36:27 - INFO - codeparrot_training - Step 48576: {'lr': 1.007032162010868e-06, 'samples': 24871424, 'steps': 48576, 'batch_loss/train': 0.7222659243270755} +12/29/2021 08:36:37 - INFO - codeparrot_training - Step 48577: {'lr': 1.0056192362593586e-06, 'samples': 24871936, 'steps': 48577, 'batch_loss/train': 0.679504944011569} +12/29/2021 08:36:50 - INFO - codeparrot_training - Step 48578: {'lr': 1.0042073004154494e-06, 'samples': 24872448, 'steps': 48578, 'batch_loss/train': 0.8789702898357064} +12/29/2021 08:37:01 - INFO - codeparrot_training - Step 48579: {'lr': 1.0027963544848307e-06, 'samples': 24872960, 'steps': 48579, 'batch_loss/train': 0.7026134198531508} +12/29/2021 08:37:11 - INFO - codeparrot_training - Step 48580: {'lr': 1.0013863984730809e-06, 'samples': 24873472, 'steps': 48580, 'batch_loss/train': 1.190974340774119} +12/29/2021 08:37:23 - INFO - codeparrot_training - Step 48581: {'lr': 9.999774323858068e-07, 'samples': 24873984, 'steps': 48581, 'batch_loss/train': 0.7712983712553978} +12/29/2021 08:37:34 - INFO - codeparrot_training - Step 48582: {'lr': 9.985694562285596e-07, 'samples': 24874496, 'steps': 48582, 'batch_loss/train': 0.8475746307522058} +12/29/2021 08:37:45 - INFO - codeparrot_training - Step 48583: {'lr': 9.971624700070293e-07, 'samples': 24875008, 'steps': 48583, 'batch_loss/train': 0.7984580653719604} +12/29/2021 08:37:59 - INFO - codeparrot_training - Step 48584: {'lr': 9.957564737267388e-07, 'samples': 24875520, 'steps': 48584, 'batch_loss/train': 0.7931341398507357} +12/29/2021 08:38:09 - INFO - codeparrot_training - Step 48585: {'lr': 9.943514673932952e-07, 'samples': 24876032, 'steps': 48585, 'batch_loss/train': 0.6610731020336971} +12/29/2021 08:38:20 - INFO - codeparrot_training - Step 48586: {'lr': 9.92947451012305e-07, 'samples': 24876544, 'steps': 48586, 'batch_loss/train': 0.7232476896606386} +12/29/2021 08:38:31 - INFO - codeparrot_training - Step 48587: {'lr': 9.91544424589319e-07, 'samples': 24877056, 'steps': 48587, 'batch_loss/train': 0.8035904942080379} +12/29/2021 08:38:43 - INFO - codeparrot_training - Step 48588: {'lr': 9.901423881299443e-07, 'samples': 24877568, 'steps': 48588, 'batch_loss/train': 0.7841789927333593} +12/29/2021 08:38:53 - INFO - codeparrot_training - Step 48589: {'lr': 9.887413416397317e-07, 'samples': 24878080, 'steps': 48589, 'batch_loss/train': 0.7594958720728755} +12/29/2021 08:39:04 - INFO - codeparrot_training - Step 48590: {'lr': 9.873412851242602e-07, 'samples': 24878592, 'steps': 48590, 'batch_loss/train': 0.7685517151840031} +12/29/2021 08:39:18 - INFO - codeparrot_training - Step 48591: {'lr': 9.859422185891087e-07, 'samples': 24879104, 'steps': 48591, 'batch_loss/train': 0.7306290385313332} +12/29/2021 08:39:28 - INFO - codeparrot_training - Step 48592: {'lr': 9.845441420398282e-07, 'samples': 24879616, 'steps': 48592, 'batch_loss/train': 0.7059139925986528} +12/29/2021 08:39:39 - INFO - codeparrot_training - Step 48593: {'lr': 9.831470554819421e-07, 'samples': 24880128, 'steps': 48593, 'batch_loss/train': 0.6903827330097556} +12/29/2021 08:39:51 - INFO - codeparrot_training - Step 48594: {'lr': 9.817509589210571e-07, 'samples': 24880640, 'steps': 48594, 'batch_loss/train': 0.6801237622275949} +12/29/2021 08:40:02 - INFO - codeparrot_training - Step 48595: {'lr': 9.803558523627242e-07, 'samples': 24881152, 'steps': 48595, 'batch_loss/train': 0.744922504061833} +12/29/2021 08:40:13 - INFO - codeparrot_training - Step 48596: {'lr': 9.789617358124391e-07, 'samples': 24881664, 'steps': 48596, 'batch_loss/train': 0.7182741602882743} +12/29/2021 08:40:23 - INFO - codeparrot_training - Step 48597: {'lr': 9.775686092758085e-07, 'samples': 24882176, 'steps': 48597, 'batch_loss/train': 0.7012136811390519} +12/29/2021 08:40:35 - INFO - codeparrot_training - Step 48598: {'lr': 9.761764727583e-07, 'samples': 24882688, 'steps': 48598, 'batch_loss/train': 0.8239004844799638} +12/29/2021 08:40:46 - INFO - codeparrot_training - Step 48599: {'lr': 9.747853262655204e-07, 'samples': 24883200, 'steps': 48599, 'batch_loss/train': 0.7102717068046331} +12/29/2021 08:40:57 - INFO - codeparrot_training - Step 48600: {'lr': 9.733951698029375e-07, 'samples': 24883712, 'steps': 48600, 'batch_loss/train': 0.7646710082190111} +12/29/2021 08:41:11 - INFO - codeparrot_training - Step 48601: {'lr': 9.720060033761303e-07, 'samples': 24884224, 'steps': 48601, 'batch_loss/train': 0.39342423586640507} +12/29/2021 08:41:21 - INFO - codeparrot_training - Step 48602: {'lr': 9.706178269905942e-07, 'samples': 24884736, 'steps': 48602, 'batch_loss/train': 0.6874048803001642} +12/29/2021 08:41:32 - INFO - codeparrot_training - Step 48603: {'lr': 9.692306406518526e-07, 'samples': 24885248, 'steps': 48603, 'batch_loss/train': 0.7679233672097325} +12/29/2021 08:41:44 - INFO - codeparrot_training - Step 48604: {'lr': 9.67844444365401e-07, 'samples': 24885760, 'steps': 48604, 'batch_loss/train': 0.7991809104569256} +12/29/2021 08:41:55 - INFO - codeparrot_training - Step 48605: {'lr': 9.664592381367909e-07, 'samples': 24886272, 'steps': 48605, 'batch_loss/train': 0.6421773261390626} +12/29/2021 08:42:05 - INFO - codeparrot_training - Step 48606: {'lr': 9.650750219714898e-07, 'samples': 24886784, 'steps': 48606, 'batch_loss/train': 0.8496884135529399} +12/29/2021 08:42:16 - INFO - codeparrot_training - Step 48607: {'lr': 9.636917958750212e-07, 'samples': 24887296, 'steps': 48607, 'batch_loss/train': 0.5822006111266091} +12/29/2021 08:42:28 - INFO - codeparrot_training - Step 48608: {'lr': 9.623095598528808e-07, 'samples': 24887808, 'steps': 48608, 'batch_loss/train': 0.6475925603881478} +12/29/2021 08:42:39 - INFO - codeparrot_training - Step 48609: {'lr': 9.609283139105918e-07, 'samples': 24888320, 'steps': 48609, 'batch_loss/train': 0.8459054594859481} +12/29/2021 08:42:50 - INFO - codeparrot_training - Step 48610: {'lr': 9.595480580535942e-07, 'samples': 24888832, 'steps': 48610, 'batch_loss/train': 0.7388561828993261} +12/29/2021 08:43:02 - INFO - codeparrot_training - Step 48611: {'lr': 9.58168792287384e-07, 'samples': 24889344, 'steps': 48611, 'batch_loss/train': 0.6766986076254398} +12/29/2021 08:43:12 - INFO - codeparrot_training - Step 48612: {'lr': 9.56790516617484e-07, 'samples': 24889856, 'steps': 48612, 'batch_loss/train': 0.771481916308403} +12/29/2021 08:43:23 - INFO - codeparrot_training - Step 48613: {'lr': 9.554132310493624e-07, 'samples': 24890368, 'steps': 48613, 'batch_loss/train': 0.699134879745543} +12/29/2021 08:43:37 - INFO - codeparrot_training - Step 48614: {'lr': 9.540369355884593e-07, 'samples': 24890880, 'steps': 48614, 'batch_loss/train': 0.758044458925724} +12/29/2021 08:43:48 - INFO - codeparrot_training - Step 48615: {'lr': 9.526616302402702e-07, 'samples': 24891392, 'steps': 48615, 'batch_loss/train': 0.8641630262136459} +12/29/2021 08:43:58 - INFO - codeparrot_training - Step 48616: {'lr': 9.51287315010263e-07, 'samples': 24891904, 'steps': 48616, 'batch_loss/train': 0.6079866837244481} +12/29/2021 08:44:10 - INFO - codeparrot_training - Step 48617: {'lr': 9.499139899039055e-07, 'samples': 24892416, 'steps': 48617, 'batch_loss/train': 0.6295154807157815} +12/29/2021 08:44:21 - INFO - codeparrot_training - Step 48618: {'lr': 9.485416549266657e-07, 'samples': 24892928, 'steps': 48618, 'batch_loss/train': 0.6151126902550459} +12/29/2021 08:44:32 - INFO - codeparrot_training - Step 48619: {'lr': 9.471703100839557e-07, 'samples': 24893440, 'steps': 48619, 'batch_loss/train': 0.8443085364997387} +12/29/2021 08:44:42 - INFO - codeparrot_training - Step 48620: {'lr': 9.457999553812713e-07, 'samples': 24893952, 'steps': 48620, 'batch_loss/train': 0.7165454463101923} +12/29/2021 08:44:56 - INFO - codeparrot_training - Step 48621: {'lr': 9.444305908240524e-07, 'samples': 24894464, 'steps': 48621, 'batch_loss/train': 0.6608729930594563} +12/29/2021 08:45:07 - INFO - codeparrot_training - Step 48622: {'lr': 9.430622164177116e-07, 'samples': 24894976, 'steps': 48622, 'batch_loss/train': 0.7303130775690079} +12/29/2021 08:45:17 - INFO - codeparrot_training - Step 48623: {'lr': 9.416948321677443e-07, 'samples': 24895488, 'steps': 48623, 'batch_loss/train': 0.7437559422105551} +12/29/2021 08:45:29 - INFO - codeparrot_training - Step 48624: {'lr': 9.403284380795629e-07, 'samples': 24896000, 'steps': 48624, 'batch_loss/train': 0.7964381487108767} +12/29/2021 08:45:40 - INFO - codeparrot_training - Step 48625: {'lr': 9.389630341585798e-07, 'samples': 24896512, 'steps': 48625, 'batch_loss/train': 0.7016892237588763} +12/29/2021 08:45:51 - INFO - codeparrot_training - Step 48626: {'lr': 9.375986204102349e-07, 'samples': 24897024, 'steps': 48626, 'batch_loss/train': 0.7615249729715288} +12/29/2021 08:46:03 - INFO - codeparrot_training - Step 48627: {'lr': 9.362351968399685e-07, 'samples': 24897536, 'steps': 48627, 'batch_loss/train': 0.7217223192565143} +12/29/2021 08:46:14 - INFO - codeparrot_training - Step 48628: {'lr': 9.348727634531651e-07, 'samples': 24898048, 'steps': 48628, 'batch_loss/train': 0.7066185348667204} +12/29/2021 08:46:24 - INFO - codeparrot_training - Step 48629: {'lr': 9.335113202552925e-07, 'samples': 24898560, 'steps': 48629, 'batch_loss/train': 0.815540736541152} +12/29/2021 08:46:35 - INFO - codeparrot_training - Step 48630: {'lr': 9.321508672517076e-07, 'samples': 24899072, 'steps': 48630, 'batch_loss/train': 0.7352399383671582} +12/29/2021 08:46:49 - INFO - codeparrot_training - Step 48631: {'lr': 9.307914044478782e-07, 'samples': 24899584, 'steps': 48631, 'batch_loss/train': 0.5849880164896604} +12/29/2021 08:47:00 - INFO - codeparrot_training - Step 48632: {'lr': 9.294329318491612e-07, 'samples': 24900096, 'steps': 48632, 'batch_loss/train': 0.620469794201199} +12/29/2021 08:47:10 - INFO - codeparrot_training - Step 48633: {'lr': 9.280754494609689e-07, 'samples': 24900608, 'steps': 48633, 'batch_loss/train': 0.6783293196931481} +12/29/2021 08:47:23 - INFO - codeparrot_training - Step 48634: {'lr': 9.267189572886859e-07, 'samples': 24901120, 'steps': 48634, 'batch_loss/train': 0.7780977771617472} +12/29/2021 08:47:33 - INFO - codeparrot_training - Step 48635: {'lr': 9.253634553377521e-07, 'samples': 24901632, 'steps': 48635, 'batch_loss/train': 0.9015526380389929} +12/29/2021 08:47:44 - INFO - codeparrot_training - Step 48636: {'lr': 9.240089436135246e-07, 'samples': 24902144, 'steps': 48636, 'batch_loss/train': 0.7086311001330614} +12/29/2021 08:47:55 - INFO - codeparrot_training - Step 48637: {'lr': 9.226554221213879e-07, 'samples': 24902656, 'steps': 48637, 'batch_loss/train': 0.7651178888045251} +12/29/2021 08:48:08 - INFO - codeparrot_training - Step 48638: {'lr': 9.213028908667264e-07, 'samples': 24903168, 'steps': 48638, 'batch_loss/train': 1.4465452815056778} +12/29/2021 08:48:19 - INFO - codeparrot_training - Step 48639: {'lr': 9.199513498548972e-07, 'samples': 24903680, 'steps': 48639, 'batch_loss/train': 0.9082850287668407} +12/29/2021 08:48:30 - INFO - codeparrot_training - Step 48640: {'lr': 9.186007990913126e-07, 'samples': 24904192, 'steps': 48640, 'batch_loss/train': 0.6887285998091102} +12/29/2021 08:48:42 - INFO - codeparrot_training - Step 48641: {'lr': 9.172512385813015e-07, 'samples': 24904704, 'steps': 48641, 'batch_loss/train': 0.67157664289698} +12/29/2021 08:48:52 - INFO - codeparrot_training - Step 48642: {'lr': 9.159026683302484e-07, 'samples': 24905216, 'steps': 48642, 'batch_loss/train': 0.7430353418458253} +12/29/2021 08:49:03 - INFO - codeparrot_training - Step 48643: {'lr': 9.145550883435383e-07, 'samples': 24905728, 'steps': 48643, 'batch_loss/train': 0.7442113072611392} +12/29/2021 08:49:15 - INFO - codeparrot_training - Step 48644: {'lr': 9.132084986264999e-07, 'samples': 24906240, 'steps': 48644, 'batch_loss/train': 0.7351208890322596} +12/29/2021 08:49:26 - INFO - codeparrot_training - Step 48645: {'lr': 9.118628991844901e-07, 'samples': 24906752, 'steps': 48645, 'batch_loss/train': 0.7596267983317375} +12/29/2021 08:49:36 - INFO - codeparrot_training - Step 48646: {'lr': 9.105182900228659e-07, 'samples': 24907264, 'steps': 48646, 'batch_loss/train': 0.7072691009379923} +12/29/2021 08:49:47 - INFO - codeparrot_training - Step 48647: {'lr': 9.091746711469562e-07, 'samples': 24907776, 'steps': 48647, 'batch_loss/train': 0.5967182889580727} +12/29/2021 08:49:59 - INFO - codeparrot_training - Step 48648: {'lr': 9.078320425621178e-07, 'samples': 24908288, 'steps': 48648, 'batch_loss/train': 0.7411892339587212} +12/29/2021 08:50:10 - INFO - codeparrot_training - Step 48649: {'lr': 9.064904042737076e-07, 'samples': 24908800, 'steps': 48649, 'batch_loss/train': 0.7552098957821727} +12/29/2021 08:50:21 - INFO - codeparrot_training - Step 48650: {'lr': 9.051497562869992e-07, 'samples': 24909312, 'steps': 48650, 'batch_loss/train': 0.6565972551470622} +12/29/2021 08:50:34 - INFO - codeparrot_training - Step 48651: {'lr': 9.038100986074049e-07, 'samples': 24909824, 'steps': 48651, 'batch_loss/train': 0.6927963839843869} +12/29/2021 08:50:45 - INFO - codeparrot_training - Step 48652: {'lr': 9.024714312401705e-07, 'samples': 24910336, 'steps': 48652, 'batch_loss/train': 0.695217679720372} +12/29/2021 08:50:56 - INFO - codeparrot_training - Step 48653: {'lr': 9.011337541906528e-07, 'samples': 24910848, 'steps': 48653, 'batch_loss/train': 0.6073239556280896} +12/29/2021 08:51:08 - INFO - codeparrot_training - Step 48654: {'lr': 8.997970674642087e-07, 'samples': 24911360, 'steps': 48654, 'batch_loss/train': 0.7599999764934182} +12/29/2021 08:51:19 - INFO - codeparrot_training - Step 48655: {'lr': 8.984613710660838e-07, 'samples': 24911872, 'steps': 48655, 'batch_loss/train': 0.7806160293985158} +12/29/2021 08:51:29 - INFO - codeparrot_training - Step 48656: {'lr': 8.971266650016075e-07, 'samples': 24912384, 'steps': 48656, 'batch_loss/train': 0.7418660968542099} +12/29/2021 08:51:41 - INFO - codeparrot_training - Step 48657: {'lr': 8.957929492761363e-07, 'samples': 24912896, 'steps': 48657, 'batch_loss/train': 0.8182906545698643} +12/29/2021 08:51:52 - INFO - codeparrot_training - Step 48658: {'lr': 8.944602238949162e-07, 'samples': 24913408, 'steps': 48658, 'batch_loss/train': 0.9327503703534603} +12/29/2021 08:52:03 - INFO - codeparrot_training - Step 48659: {'lr': 8.931284888632485e-07, 'samples': 24913920, 'steps': 48659, 'batch_loss/train': 0.7551333119627088} +12/29/2021 08:52:13 - INFO - codeparrot_training - Step 48660: {'lr': 8.917977441864622e-07, 'samples': 24914432, 'steps': 48660, 'batch_loss/train': 0.6779233273118734} +12/29/2021 08:52:27 - INFO - codeparrot_training - Step 48661: {'lr': 8.904679898698309e-07, 'samples': 24914944, 'steps': 48661, 'batch_loss/train': 0.708798878127709} +12/29/2021 08:52:38 - INFO - codeparrot_training - Step 48662: {'lr': 8.891392259186282e-07, 'samples': 24915456, 'steps': 48662, 'batch_loss/train': 0.7106085065752268} +12/29/2021 08:52:48 - INFO - codeparrot_training - Step 48663: {'lr': 8.878114523381553e-07, 'samples': 24915968, 'steps': 48663, 'batch_loss/train': 0.7127290097996593} +12/29/2021 08:53:00 - INFO - codeparrot_training - Step 48664: {'lr': 8.864846691336858e-07, 'samples': 24916480, 'steps': 48664, 'batch_loss/train': 0.5904231565073133} +12/29/2021 08:53:11 - INFO - codeparrot_training - Step 48665: {'lr': 8.851588763104657e-07, 'samples': 24916992, 'steps': 48665, 'batch_loss/train': 0.6505648037418723} +12/29/2021 08:53:22 - INFO - codeparrot_training - Step 48666: {'lr': 8.838340738738238e-07, 'samples': 24917504, 'steps': 48666, 'batch_loss/train': 0.7047189115546644} +12/29/2021 08:53:34 - INFO - codeparrot_training - Step 48667: {'lr': 8.825102618289782e-07, 'samples': 24918016, 'steps': 48667, 'batch_loss/train': 0.9177870582789183} +12/29/2021 08:53:45 - INFO - codeparrot_training - Step 48668: {'lr': 8.811874401812026e-07, 'samples': 24918528, 'steps': 48668, 'batch_loss/train': 0.6137466478394344} +12/29/2021 08:53:55 - INFO - codeparrot_training - Step 48669: {'lr': 8.798656089357703e-07, 'samples': 24919040, 'steps': 48669, 'batch_loss/train': 0.9498469214886427} +12/29/2021 08:54:06 - INFO - codeparrot_training - Step 48670: {'lr': 8.785447680979275e-07, 'samples': 24919552, 'steps': 48670, 'batch_loss/train': 0.8602556674741209} +12/29/2021 08:54:20 - INFO - codeparrot_training - Step 48671: {'lr': 8.772249176729197e-07, 'samples': 24920064, 'steps': 48671, 'batch_loss/train': 0.8354627210646868} +12/29/2021 08:54:31 - INFO - codeparrot_training - Step 48672: {'lr': 8.759060576660205e-07, 'samples': 24920576, 'steps': 48672, 'batch_loss/train': 0.7659877380356193} +12/29/2021 08:54:41 - INFO - codeparrot_training - Step 48673: {'lr': 8.74588188082448e-07, 'samples': 24921088, 'steps': 48673, 'batch_loss/train': 0.6812296148855239} +12/29/2021 08:54:54 - INFO - codeparrot_training - Step 48674: {'lr': 8.732713089274203e-07, 'samples': 24921600, 'steps': 48674, 'batch_loss/train': 0.7708143714116886} +12/29/2021 08:55:04 - INFO - codeparrot_training - Step 48675: {'lr': 8.719554202062385e-07, 'samples': 24922112, 'steps': 48675, 'batch_loss/train': 0.9913691172841936} +12/29/2021 08:55:15 - INFO - codeparrot_training - Step 48676: {'lr': 8.706405219240654e-07, 'samples': 24922624, 'steps': 48676, 'batch_loss/train': 0.8100067051127553} +12/29/2021 08:55:26 - INFO - codeparrot_training - Step 48677: {'lr': 8.693266140861744e-07, 'samples': 24923136, 'steps': 48677, 'batch_loss/train': 0.7215272719040513} +12/29/2021 08:55:40 - INFO - codeparrot_training - Step 48678: {'lr': 8.680136966977559e-07, 'samples': 24923648, 'steps': 48678, 'batch_loss/train': 0.5708432587562129} +12/29/2021 08:55:50 - INFO - codeparrot_training - Step 48679: {'lr': 8.667017697640555e-07, 'samples': 24924160, 'steps': 48679, 'batch_loss/train': 0.7539646066725254} +12/29/2021 08:56:01 - INFO - codeparrot_training - Step 48680: {'lr': 8.653908332902915e-07, 'samples': 24924672, 'steps': 48680, 'batch_loss/train': 0.7668366925790906} +12/29/2021 08:56:13 - INFO - codeparrot_training - Step 48681: {'lr': 8.640808872816542e-07, 'samples': 24925184, 'steps': 48681, 'batch_loss/train': 0.7495654402300715} +12/29/2021 08:56:24 - INFO - codeparrot_training - Step 48682: {'lr': 8.627719317433614e-07, 'samples': 24925696, 'steps': 48682, 'batch_loss/train': 0.6784750989172608} +12/29/2021 08:56:35 - INFO - codeparrot_training - Step 48683: {'lr': 8.614639666806034e-07, 'samples': 24926208, 'steps': 48683, 'batch_loss/train': 0.7564849406480789} +12/29/2021 08:56:47 - INFO - codeparrot_training - Step 48684: {'lr': 8.601569920986263e-07, 'samples': 24926720, 'steps': 48684, 'batch_loss/train': 1.1258407905697823} +12/29/2021 08:56:57 - INFO - codeparrot_training - Step 48685: {'lr': 8.588510080025647e-07, 'samples': 24927232, 'steps': 48685, 'batch_loss/train': 0.725710969301872} +12/29/2021 08:57:08 - INFO - codeparrot_training - Step 48686: {'lr': 8.575460143976365e-07, 'samples': 24927744, 'steps': 48686, 'batch_loss/train': 0.6409611033741385} +12/29/2021 08:57:19 - INFO - codeparrot_training - Step 48687: {'lr': 8.562420112890601e-07, 'samples': 24928256, 'steps': 48687, 'batch_loss/train': 0.7651011077687144} +12/29/2021 08:57:31 - INFO - codeparrot_training - Step 48688: {'lr': 8.5493899868197e-07, 'samples': 24928768, 'steps': 48688, 'batch_loss/train': 0.6332490706117824} +12/29/2021 08:57:42 - INFO - codeparrot_training - Step 48689: {'lr': 8.536369765815843e-07, 'samples': 24929280, 'steps': 48689, 'batch_loss/train': 0.7939290339127183} +12/29/2021 08:57:52 - INFO - codeparrot_training - Step 48690: {'lr': 8.52335944993038e-07, 'samples': 24929792, 'steps': 48690, 'batch_loss/train': 0.7993008978664875} +12/29/2021 08:58:06 - INFO - codeparrot_training - Step 48691: {'lr': 8.510359039215488e-07, 'samples': 24930304, 'steps': 48691, 'batch_loss/train': 0.6817129561677575} +12/29/2021 08:58:17 - INFO - codeparrot_training - Step 48692: {'lr': 8.497368533722794e-07, 'samples': 24930816, 'steps': 48692, 'batch_loss/train': 0.7624572515487671} +12/29/2021 08:58:28 - INFO - codeparrot_training - Step 48693: {'lr': 8.484387933503646e-07, 'samples': 24931328, 'steps': 48693, 'batch_loss/train': 0.7163457595743239} +12/29/2021 08:58:40 - INFO - codeparrot_training - Step 48694: {'lr': 8.471417238609669e-07, 'samples': 24931840, 'steps': 48694, 'batch_loss/train': 0.7789714364334941} +12/29/2021 08:58:50 - INFO - codeparrot_training - Step 48695: {'lr': 8.458456449092766e-07, 'samples': 24932352, 'steps': 48695, 'batch_loss/train': 0.7602567672729492} +12/29/2021 08:59:01 - INFO - codeparrot_training - Step 48696: {'lr': 8.445505565004286e-07, 'samples': 24932864, 'steps': 48696, 'batch_loss/train': 0.8034594338387251} +12/29/2021 08:59:14 - INFO - codeparrot_training - Step 48697: {'lr': 8.432564586395575e-07, 'samples': 24933376, 'steps': 48697, 'batch_loss/train': 0.7039846661500633} +12/29/2021 08:59:24 - INFO - codeparrot_training - Step 48698: {'lr': 8.419633513318259e-07, 'samples': 24933888, 'steps': 48698, 'batch_loss/train': 0.9419950628653169} +12/29/2021 08:59:35 - INFO - codeparrot_training - Step 48699: {'lr': 8.406712345823686e-07, 'samples': 24934400, 'steps': 48699, 'batch_loss/train': 0.9449430089443922} +12/29/2021 08:59:45 - INFO - codeparrot_training - Step 48700: {'lr': 8.393801083963204e-07, 'samples': 24934912, 'steps': 48700, 'batch_loss/train': 0.8113251309841871} +12/29/2021 08:59:59 - INFO - codeparrot_training - Step 48701: {'lr': 8.380899727788438e-07, 'samples': 24935424, 'steps': 48701, 'batch_loss/train': 0.7458582513500005} +12/29/2021 09:00:10 - INFO - codeparrot_training - Step 48702: {'lr': 8.36800827735018e-07, 'samples': 24935936, 'steps': 48702, 'batch_loss/train': 0.7325570799293928} +12/29/2021 09:00:21 - INFO - codeparrot_training - Step 48703: {'lr': 8.355126732700058e-07, 'samples': 24936448, 'steps': 48703, 'batch_loss/train': 0.7059121299535036} +12/29/2021 09:00:33 - INFO - codeparrot_training - Step 48704: {'lr': 8.342255093888862e-07, 'samples': 24936960, 'steps': 48704, 'batch_loss/train': 0.6875384841114283} +12/29/2021 09:00:43 - INFO - codeparrot_training - Step 48705: {'lr': 8.329393360968219e-07, 'samples': 24937472, 'steps': 48705, 'batch_loss/train': 0.7907391637563705} +12/29/2021 09:00:54 - INFO - codeparrot_training - Step 48706: {'lr': 8.316541533989197e-07, 'samples': 24937984, 'steps': 48706, 'batch_loss/train': 0.6102371462620795} +12/29/2021 09:01:08 - INFO - codeparrot_training - Step 48707: {'lr': 8.303699613002591e-07, 'samples': 24938496, 'steps': 48707, 'batch_loss/train': 0.7138471463695168} +12/29/2021 09:01:19 - INFO - codeparrot_training - Step 48708: {'lr': 8.290867598059748e-07, 'samples': 24939008, 'steps': 48708, 'batch_loss/train': 0.7853964185342193} +12/29/2021 09:01:30 - INFO - codeparrot_training - Step 48709: {'lr': 8.278045489211461e-07, 'samples': 24939520, 'steps': 48709, 'batch_loss/train': 0.7601233671884984} +12/29/2021 09:01:42 - INFO - codeparrot_training - Step 48710: {'lr': 8.265233286509077e-07, 'samples': 24940032, 'steps': 48710, 'batch_loss/train': 0.8032672051340342} +12/29/2021 09:01:53 - INFO - codeparrot_training - Step 48711: {'lr': 8.252430990003113e-07, 'samples': 24940544, 'steps': 48711, 'batch_loss/train': 0.8036401253193617} +12/29/2021 09:02:03 - INFO - codeparrot_training - Step 48712: {'lr': 8.239638599744637e-07, 'samples': 24941056, 'steps': 48712, 'batch_loss/train': 0.7205640412867069} +12/29/2021 09:02:14 - INFO - codeparrot_training - Step 48713: {'lr': 8.226856115784443e-07, 'samples': 24941568, 'steps': 48713, 'batch_loss/train': 0.7711657835170627} +12/29/2021 09:02:27 - INFO - codeparrot_training - Step 48714: {'lr': 8.2140835381736e-07, 'samples': 24942080, 'steps': 48714, 'batch_loss/train': 0.6729691782966256} +12/29/2021 09:02:37 - INFO - codeparrot_training - Step 48715: {'lr': 8.201320866962347e-07, 'samples': 24942592, 'steps': 48715, 'batch_loss/train': 0.7192195560783148} +12/29/2021 09:02:48 - INFO - codeparrot_training - Step 48716: {'lr': 8.188568102202309e-07, 'samples': 24943104, 'steps': 48716, 'batch_loss/train': 0.5559419498313218} +12/29/2021 09:03:02 - INFO - codeparrot_training - Step 48717: {'lr': 8.175825243943169e-07, 'samples': 24943616, 'steps': 48717, 'batch_loss/train': 1.2972456408897415} +12/29/2021 09:03:12 - INFO - codeparrot_training - Step 48718: {'lr': 8.163092292236273e-07, 'samples': 24944128, 'steps': 48718, 'batch_loss/train': 0.7632480678148568} +12/29/2021 09:03:23 - INFO - codeparrot_training - Step 48719: {'lr': 8.150369247132138e-07, 'samples': 24944640, 'steps': 48719, 'batch_loss/train': 0.7198485806584358} +12/29/2021 09:03:34 - INFO - codeparrot_training - Step 48720: {'lr': 8.137656108681279e-07, 'samples': 24945152, 'steps': 48720, 'batch_loss/train': 0.7110892413184047} +12/29/2021 09:03:46 - INFO - codeparrot_training - Step 48721: {'lr': 8.124952876933934e-07, 'samples': 24945664, 'steps': 48721, 'batch_loss/train': 0.7884154571220279} +12/29/2021 09:03:56 - INFO - codeparrot_training - Step 48722: {'lr': 8.112259551940892e-07, 'samples': 24946176, 'steps': 48722, 'batch_loss/train': 0.7110206210054457} +12/29/2021 09:04:07 - INFO - codeparrot_training - Step 48723: {'lr': 8.09957613375295e-07, 'samples': 24946688, 'steps': 48723, 'batch_loss/train': 0.7789900209754705} +12/29/2021 09:04:21 - INFO - codeparrot_training - Step 48724: {'lr': 8.086902622419789e-07, 'samples': 24947200, 'steps': 48724, 'batch_loss/train': 0.6328423414379358} +12/29/2021 09:04:31 - INFO - codeparrot_training - Step 48725: {'lr': 8.07423901799248e-07, 'samples': 24947712, 'steps': 48725, 'batch_loss/train': 0.7608653670176864} +12/29/2021 09:04:42 - INFO - codeparrot_training - Step 48726: {'lr': 8.061585320520981e-07, 'samples': 24948224, 'steps': 48726, 'batch_loss/train': 0.7599989781156182} +12/29/2021 09:04:55 - INFO - codeparrot_training - Step 48727: {'lr': 8.048941530055809e-07, 'samples': 24948736, 'steps': 48727, 'batch_loss/train': 0.6184286626521498} +12/29/2021 09:05:05 - INFO - codeparrot_training - Step 48728: {'lr': 8.036307646646923e-07, 'samples': 24949248, 'steps': 48728, 'batch_loss/train': 0.5588716887868941} +12/29/2021 09:05:16 - INFO - codeparrot_training - Step 48729: {'lr': 8.023683670344839e-07, 'samples': 24949760, 'steps': 48729, 'batch_loss/train': 0.725596786942333} +12/29/2021 09:05:28 - INFO - codeparrot_training - Step 48730: {'lr': 8.011069601199795e-07, 'samples': 24950272, 'steps': 48730, 'batch_loss/train': 0.7602634934009984} +12/29/2021 09:05:39 - INFO - codeparrot_training - Step 48731: {'lr': 7.998465439261471e-07, 'samples': 24950784, 'steps': 48731, 'batch_loss/train': 0.7681636083871126} +12/29/2021 09:05:50 - INFO - codeparrot_training - Step 48732: {'lr': 7.985871184580662e-07, 'samples': 24951296, 'steps': 48732, 'batch_loss/train': 1.253639408154413} +12/29/2021 09:06:00 - INFO - codeparrot_training - Step 48733: {'lr': 7.973286837206773e-07, 'samples': 24951808, 'steps': 48733, 'batch_loss/train': 0.7974913055077195} +12/29/2021 09:06:13 - INFO - codeparrot_training - Step 48734: {'lr': 7.960712397190317e-07, 'samples': 24952320, 'steps': 48734, 'batch_loss/train': 0.793164144270122} +12/29/2021 09:06:23 - INFO - codeparrot_training - Step 48735: {'lr': 7.948147864580979e-07, 'samples': 24952832, 'steps': 48735, 'batch_loss/train': 0.7302760076709092} +12/29/2021 09:06:34 - INFO - codeparrot_training - Step 48736: {'lr': 7.935593239428996e-07, 'samples': 24953344, 'steps': 48736, 'batch_loss/train': 0.644252966158092} +12/29/2021 09:06:48 - INFO - codeparrot_training - Step 48737: {'lr': 7.923048521784049e-07, 'samples': 24953856, 'steps': 48737, 'batch_loss/train': 0.7047855663113296} +12/29/2021 09:06:59 - INFO - codeparrot_training - Step 48738: {'lr': 7.910513711696099e-07, 'samples': 24954368, 'steps': 48738, 'batch_loss/train': 0.6460906891152263} +12/29/2021 09:07:09 - INFO - codeparrot_training - Step 48739: {'lr': 7.897988809215107e-07, 'samples': 24954880, 'steps': 48739, 'batch_loss/train': 0.6344204163178802} +12/29/2021 09:07:20 - INFO - codeparrot_training - Step 48740: {'lr': 7.885473814390475e-07, 'samples': 24955392, 'steps': 48740, 'batch_loss/train': 0.6919299517758191} +12/29/2021 09:07:32 - INFO - codeparrot_training - Step 48741: {'lr': 7.872968727272444e-07, 'samples': 24955904, 'steps': 48741, 'batch_loss/train': 0.8086422244086862} +12/29/2021 09:07:43 - INFO - codeparrot_training - Step 48742: {'lr': 7.860473547910419e-07, 'samples': 24956416, 'steps': 48742, 'batch_loss/train': 0.7309944604057819} +12/29/2021 09:07:53 - INFO - codeparrot_training - Step 48743: {'lr': 7.847988276354079e-07, 'samples': 24956928, 'steps': 48743, 'batch_loss/train': 0.6576087202411145} +12/29/2021 09:08:08 - INFO - codeparrot_training - Step 48744: {'lr': 7.835512912653109e-07, 'samples': 24957440, 'steps': 48744, 'batch_loss/train': 0.7123788883909583} +12/29/2021 09:08:18 - INFO - codeparrot_training - Step 48745: {'lr': 7.823047456857468e-07, 'samples': 24957952, 'steps': 48745, 'batch_loss/train': 1.5034438529110048} +12/29/2021 09:08:29 - INFO - codeparrot_training - Step 48746: {'lr': 7.810591909016007e-07, 'samples': 24958464, 'steps': 48746, 'batch_loss/train': 0.7209292566403747} +12/29/2021 09:08:41 - INFO - codeparrot_training - Step 48747: {'lr': 7.798146269178686e-07, 'samples': 24958976, 'steps': 48747, 'batch_loss/train': 1.5469704447314143} +12/29/2021 09:08:52 - INFO - codeparrot_training - Step 48748: {'lr': 7.785710537394908e-07, 'samples': 24959488, 'steps': 48748, 'batch_loss/train': 0.7673013550229371} +12/29/2021 09:09:02 - INFO - codeparrot_training - Step 48749: {'lr': 7.773284713714357e-07, 'samples': 24960000, 'steps': 48749, 'batch_loss/train': 0.679816777817905} +12/29/2021 09:09:13 - INFO - codeparrot_training - Step 48750: {'lr': 7.760868798185883e-07, 'samples': 24960512, 'steps': 48750, 'batch_loss/train': 0.6307709007523954} +12/29/2021 09:09:25 - INFO - codeparrot_training - Step 48751: {'lr': 7.748462790859446e-07, 'samples': 24961024, 'steps': 48751, 'batch_loss/train': 0.7183348340913653} +12/29/2021 09:09:36 - INFO - codeparrot_training - Step 48752: {'lr': 7.736066691783894e-07, 'samples': 24961536, 'steps': 48752, 'batch_loss/train': 0.7199084311723709} +12/29/2021 09:09:46 - INFO - codeparrot_training - Step 48753: {'lr': 7.723680501008635e-07, 'samples': 24962048, 'steps': 48753, 'batch_loss/train': 0.7279483694583178} +12/29/2021 09:10:01 - INFO - codeparrot_training - Step 48754: {'lr': 7.711304218583348e-07, 'samples': 24962560, 'steps': 48754, 'batch_loss/train': 0.6746753124753013} +12/29/2021 09:10:11 - INFO - codeparrot_training - Step 48755: {'lr': 7.698937844556609e-07, 'samples': 24963072, 'steps': 48755, 'batch_loss/train': 0.7003693040460348} +12/29/2021 09:10:22 - INFO - codeparrot_training - Step 48756: {'lr': 7.686581378977819e-07, 'samples': 24963584, 'steps': 48756, 'batch_loss/train': 0.6837002790998667} +12/29/2021 09:10:34 - INFO - codeparrot_training - Step 48757: {'lr': 7.674234821896109e-07, 'samples': 24964096, 'steps': 48757, 'batch_loss/train': 0.7398240939946845} +12/29/2021 09:10:45 - INFO - codeparrot_training - Step 48758: {'lr': 7.661898173360604e-07, 'samples': 24964608, 'steps': 48758, 'batch_loss/train': 0.7259067445993423} +12/29/2021 09:10:56 - INFO - codeparrot_training - Step 48759: {'lr': 7.649571433420154e-07, 'samples': 24965120, 'steps': 48759, 'batch_loss/train': 0.6619223922025412} +12/29/2021 09:11:06 - INFO - codeparrot_training - Step 48760: {'lr': 7.637254602124166e-07, 'samples': 24965632, 'steps': 48760, 'batch_loss/train': 0.6922811921685934} +12/29/2021 09:11:18 - INFO - codeparrot_training - Step 48761: {'lr': 7.62494767952121e-07, 'samples': 24966144, 'steps': 48761, 'batch_loss/train': 0.7052688321564347} +12/29/2021 09:11:29 - INFO - codeparrot_training - Step 48762: {'lr': 7.612650665660692e-07, 'samples': 24966656, 'steps': 48762, 'batch_loss/train': 0.6577281490899622} +12/29/2021 09:11:39 - INFO - codeparrot_training - Step 48763: {'lr': 7.600363560590906e-07, 'samples': 24967168, 'steps': 48763, 'batch_loss/train': 0.6793251060880721} +12/29/2021 09:11:53 - INFO - codeparrot_training - Step 48764: {'lr': 7.58808636436098e-07, 'samples': 24967680, 'steps': 48764, 'batch_loss/train': 0.6957404003478587} +12/29/2021 09:12:04 - INFO - codeparrot_training - Step 48765: {'lr': 7.575819077019763e-07, 'samples': 24968192, 'steps': 48765, 'batch_loss/train': 0.6973333992063999} +12/29/2021 09:12:15 - INFO - codeparrot_training - Step 48766: {'lr': 7.563561698616106e-07, 'samples': 24968704, 'steps': 48766, 'batch_loss/train': 0.7463909301441163} +12/29/2021 09:12:27 - INFO - codeparrot_training - Step 48767: {'lr': 7.55131422919858e-07, 'samples': 24969216, 'steps': 48767, 'batch_loss/train': 0.7568834647536278} +12/29/2021 09:12:37 - INFO - codeparrot_training - Step 48768: {'lr': 7.539076668815759e-07, 'samples': 24969728, 'steps': 48768, 'batch_loss/train': 0.7162639200687408} +12/29/2021 09:12:48 - INFO - codeparrot_training - Step 48769: {'lr': 7.526849017516491e-07, 'samples': 24970240, 'steps': 48769, 'batch_loss/train': 0.7451809151098132} +12/29/2021 09:12:59 - INFO - codeparrot_training - Step 48770: {'lr': 7.514631275349349e-07, 'samples': 24970752, 'steps': 48770, 'batch_loss/train': 0.7148760298732668} +12/29/2021 09:13:11 - INFO - codeparrot_training - Step 48771: {'lr': 7.502423442363182e-07, 'samples': 24971264, 'steps': 48771, 'batch_loss/train': 0.4977401622454636} +12/29/2021 09:13:22 - INFO - codeparrot_training - Step 48772: {'lr': 7.490225518606008e-07, 'samples': 24971776, 'steps': 48772, 'batch_loss/train': 0.4575455150625203} +12/29/2021 09:13:33 - INFO - codeparrot_training - Step 48773: {'lr': 7.478037504126678e-07, 'samples': 24972288, 'steps': 48773, 'batch_loss/train': 0.7136164684779942} +12/29/2021 09:13:47 - INFO - codeparrot_training - Step 48774: {'lr': 7.465859398973207e-07, 'samples': 24972800, 'steps': 48774, 'batch_loss/train': 0.6418462365400046} +12/29/2021 09:13:57 - INFO - codeparrot_training - Step 48775: {'lr': 7.453691203194723e-07, 'samples': 24973312, 'steps': 48775, 'batch_loss/train': 0.6238155757309869} +12/29/2021 09:14:08 - INFO - codeparrot_training - Step 48776: {'lr': 7.441532916838967e-07, 'samples': 24973824, 'steps': 48776, 'batch_loss/train': 0.6297505546826869} +12/29/2021 09:14:20 - INFO - codeparrot_training - Step 48777: {'lr': 7.429384539954787e-07, 'samples': 24974336, 'steps': 48777, 'batch_loss/train': 0.6341127105988562} +12/29/2021 09:14:30 - INFO - codeparrot_training - Step 48778: {'lr': 7.417246072589922e-07, 'samples': 24974848, 'steps': 48778, 'batch_loss/train': 0.6437413231469691} +12/29/2021 09:14:41 - INFO - codeparrot_training - Step 48779: {'lr': 7.405117514792948e-07, 'samples': 24975360, 'steps': 48779, 'batch_loss/train': 0.6916636563837528} +12/29/2021 09:14:52 - INFO - codeparrot_training - Step 48780: {'lr': 7.392998866612432e-07, 'samples': 24975872, 'steps': 48780, 'batch_loss/train': 0.7483610985800624} +12/29/2021 09:15:04 - INFO - codeparrot_training - Step 48781: {'lr': 7.380890128095841e-07, 'samples': 24976384, 'steps': 48781, 'batch_loss/train': 0.6280333122704178} +12/29/2021 09:15:15 - INFO - codeparrot_training - Step 48782: {'lr': 7.368791299291744e-07, 'samples': 24976896, 'steps': 48782, 'batch_loss/train': 0.7392048480687663} +12/29/2021 09:15:25 - INFO - codeparrot_training - Step 48783: {'lr': 7.356702380248159e-07, 'samples': 24977408, 'steps': 48783, 'batch_loss/train': 0.6774957748129964} +12/29/2021 09:15:40 - INFO - codeparrot_training - Step 48784: {'lr': 7.344623371013104e-07, 'samples': 24977920, 'steps': 48784, 'batch_loss/train': 0.7396395253017545} +12/29/2021 09:15:50 - INFO - codeparrot_training - Step 48785: {'lr': 7.332554271634595e-07, 'samples': 24978432, 'steps': 48785, 'batch_loss/train': 0.4900262322917115} +12/29/2021 09:16:01 - INFO - codeparrot_training - Step 48786: {'lr': 7.320495082160649e-07, 'samples': 24978944, 'steps': 48786, 'batch_loss/train': 0.7460703272372484} +12/29/2021 09:16:13 - INFO - codeparrot_training - Step 48787: {'lr': 7.308445802639286e-07, 'samples': 24979456, 'steps': 48787, 'batch_loss/train': 0.6081140507012606} +12/29/2021 09:16:24 - INFO - codeparrot_training - Step 48788: {'lr': 7.296406433118241e-07, 'samples': 24979968, 'steps': 48788, 'batch_loss/train': 0.7274126706179231} +12/29/2021 09:16:34 - INFO - codeparrot_training - Step 48789: {'lr': 7.284376973645535e-07, 'samples': 24980480, 'steps': 48789, 'batch_loss/train': 0.6497601978480816} +12/29/2021 09:16:45 - INFO - codeparrot_training - Step 48790: {'lr': 7.272357424269182e-07, 'samples': 24980992, 'steps': 48790, 'batch_loss/train': 0.7368303656112403} +12/29/2021 09:16:59 - INFO - codeparrot_training - Step 48791: {'lr': 7.26034778503637e-07, 'samples': 24981504, 'steps': 48791, 'batch_loss/train': 0.7259293519891798} +12/29/2021 09:17:09 - INFO - codeparrot_training - Step 48792: {'lr': 7.248348055995391e-07, 'samples': 24982016, 'steps': 48792, 'batch_loss/train': 0.6356371740112081} +12/29/2021 09:17:20 - INFO - codeparrot_training - Step 48793: {'lr': 7.236358237193708e-07, 'samples': 24982528, 'steps': 48793, 'batch_loss/train': 0.7820037314668298} +12/29/2021 09:17:32 - INFO - codeparrot_training - Step 48794: {'lr': 7.22437832867906e-07, 'samples': 24983040, 'steps': 48794, 'batch_loss/train': 0.6285573169589043} +12/29/2021 09:17:43 - INFO - codeparrot_training - Step 48795: {'lr': 7.21240833049891e-07, 'samples': 24983552, 'steps': 48795, 'batch_loss/train': 0.7050336794927716} +12/29/2021 09:17:53 - INFO - codeparrot_training - Step 48796: {'lr': 7.200448242700996e-07, 'samples': 24984064, 'steps': 48796, 'batch_loss/train': 0.7093717600218952} +12/29/2021 09:18:05 - INFO - codeparrot_training - Step 48797: {'lr': 7.18849806533306e-07, 'samples': 24984576, 'steps': 48797, 'batch_loss/train': 0.7676428882405162} +12/29/2021 09:18:16 - INFO - codeparrot_training - Step 48798: {'lr': 7.176557798442284e-07, 'samples': 24985088, 'steps': 48798, 'batch_loss/train': 0.8073788830079138} +12/29/2021 09:18:27 - INFO - codeparrot_training - Step 48799: {'lr': 7.164627442076133e-07, 'samples': 24985600, 'steps': 48799, 'batch_loss/train': 0.6808305319282226} +12/29/2021 09:18:37 - INFO - codeparrot_training - Step 48800: {'lr': 7.152706996282344e-07, 'samples': 24986112, 'steps': 48800, 'batch_loss/train': 0.7415898153558373} +12/29/2021 09:18:51 - INFO - codeparrot_training - Step 48801: {'lr': 7.140796461108102e-07, 'samples': 24986624, 'steps': 48801, 'batch_loss/train': 0.6786441693548113} +12/29/2021 09:19:02 - INFO - codeparrot_training - Step 48802: {'lr': 7.128895836600591e-07, 'samples': 24987136, 'steps': 48802, 'batch_loss/train': 0.7061250824481249} +12/29/2021 09:19:12 - INFO - codeparrot_training - Step 48803: {'lr': 7.117005122807552e-07, 'samples': 24987648, 'steps': 48803, 'batch_loss/train': 0.6491048275493085} +12/29/2021 09:19:24 - INFO - codeparrot_training - Step 48804: {'lr': 7.105124319775891e-07, 'samples': 24988160, 'steps': 48804, 'batch_loss/train': 0.612169440370053} +12/29/2021 09:19:35 - INFO - codeparrot_training - Step 48805: {'lr': 7.093253427553071e-07, 'samples': 24988672, 'steps': 48805, 'batch_loss/train': 0.7583474800921977} +12/29/2021 09:19:45 - INFO - codeparrot_training - Step 48806: {'lr': 7.081392446186274e-07, 'samples': 24989184, 'steps': 48806, 'batch_loss/train': 0.609788880450651} +12/29/2021 09:19:57 - INFO - codeparrot_training - Step 48807: {'lr': 7.06954137572241e-07, 'samples': 24989696, 'steps': 48807, 'batch_loss/train': 0.7618729677051306} +12/29/2021 09:20:08 - INFO - codeparrot_training - Step 48808: {'lr': 7.05770021620894e-07, 'samples': 24990208, 'steps': 48808, 'batch_loss/train': 0.7419911482720636} +12/29/2021 09:20:19 - INFO - codeparrot_training - Step 48809: {'lr': 7.045868967692493e-07, 'samples': 24990720, 'steps': 48809, 'batch_loss/train': 0.7459198497235775} +12/29/2021 09:20:29 - INFO - codeparrot_training - Step 48810: {'lr': 7.034047630220531e-07, 'samples': 24991232, 'steps': 48810, 'batch_loss/train': 0.8244185596704483} +12/29/2021 09:20:41 - INFO - codeparrot_training - Step 48811: {'lr': 7.022236203839682e-07, 'samples': 24991744, 'steps': 48811, 'batch_loss/train': 0.8088690002914518} +12/29/2021 09:20:52 - INFO - codeparrot_training - Step 48812: {'lr': 7.010434688597412e-07, 'samples': 24992256, 'steps': 48812, 'batch_loss/train': 0.6826389413326979} +12/29/2021 09:21:03 - INFO - codeparrot_training - Step 48813: {'lr': 6.998643084540069e-07, 'samples': 24992768, 'steps': 48813, 'batch_loss/train': 0.6341687858221121} +12/29/2021 09:21:17 - INFO - codeparrot_training - Step 48814: {'lr': 6.986861391715116e-07, 'samples': 24993280, 'steps': 48814, 'batch_loss/train': 2.0584837840870023} +12/29/2021 09:21:28 - INFO - codeparrot_training - Step 48815: {'lr': 6.975089610168906e-07, 'samples': 24993792, 'steps': 48815, 'batch_loss/train': 0.7124211041373201} +12/29/2021 09:21:39 - INFO - codeparrot_training - Step 48816: {'lr': 6.963327739948344e-07, 'samples': 24994304, 'steps': 48816, 'batch_loss/train': 0.6872486462816596} +12/29/2021 09:21:51 - INFO - codeparrot_training - Step 48817: {'lr': 6.951575781100339e-07, 'samples': 24994816, 'steps': 48817, 'batch_loss/train': 0.6996333601418883} +12/29/2021 09:22:01 - INFO - codeparrot_training - Step 48818: {'lr': 6.939833733671242e-07, 'samples': 24995328, 'steps': 48818, 'batch_loss/train': 0.8318786122836173} +12/29/2021 09:22:12 - INFO - codeparrot_training - Step 48819: {'lr': 6.928101597708514e-07, 'samples': 24995840, 'steps': 48819, 'batch_loss/train': 0.717551918933168} +12/29/2021 09:22:23 - INFO - codeparrot_training - Step 48820: {'lr': 6.916379373257953e-07, 'samples': 24996352, 'steps': 48820, 'batch_loss/train': 0.7650472931563854} +12/29/2021 09:22:35 - INFO - codeparrot_training - Step 48821: {'lr': 6.904667060366742e-07, 'samples': 24996864, 'steps': 48821, 'batch_loss/train': 0.5577662864234298} +12/29/2021 09:22:45 - INFO - codeparrot_training - Step 48822: {'lr': 6.892964659080958e-07, 'samples': 24997376, 'steps': 48822, 'batch_loss/train': 0.7781009525060654} +12/29/2021 09:22:56 - INFO - codeparrot_training - Step 48823: {'lr': 6.881272169447783e-07, 'samples': 24997888, 'steps': 48823, 'batch_loss/train': 0.6508955452591181} +12/29/2021 09:23:10 - INFO - codeparrot_training - Step 48824: {'lr': 6.869589591513015e-07, 'samples': 24998400, 'steps': 48824, 'batch_loss/train': 0.7557126423344016} +12/29/2021 09:23:21 - INFO - codeparrot_training - Step 48825: {'lr': 6.857916925323282e-07, 'samples': 24998912, 'steps': 48825, 'batch_loss/train': 0.7219321299344301} +12/29/2021 09:23:31 - INFO - codeparrot_training - Step 48826: {'lr': 6.846254170925215e-07, 'samples': 24999424, 'steps': 48826, 'batch_loss/train': 0.7416200982406735} +12/29/2021 09:23:43 - INFO - codeparrot_training - Step 48827: {'lr': 6.834601328365164e-07, 'samples': 24999936, 'steps': 48827, 'batch_loss/train': 0.6771259258966893} +12/29/2021 09:23:54 - INFO - codeparrot_training - Step 48828: {'lr': 6.822958397689205e-07, 'samples': 25000448, 'steps': 48828, 'batch_loss/train': 0.7544237598776817} +12/29/2021 09:24:05 - INFO - codeparrot_training - Step 48829: {'lr': 6.811325378943967e-07, 'samples': 25000960, 'steps': 48829, 'batch_loss/train': 0.7495616676751524} +12/29/2021 09:24:18 - INFO - codeparrot_training - Step 48830: {'lr': 6.799702272175523e-07, 'samples': 25001472, 'steps': 48830, 'batch_loss/train': 0.7950403997674584} +12/29/2021 09:24:29 - INFO - codeparrot_training - Step 48831: {'lr': 6.788089077429948e-07, 'samples': 25001984, 'steps': 48831, 'batch_loss/train': 0.7570103872567415} +12/29/2021 09:24:40 - INFO - codeparrot_training - Step 48832: {'lr': 6.776485794753596e-07, 'samples': 25002496, 'steps': 48832, 'batch_loss/train': 1.6159433210268617} +12/29/2021 09:24:50 - INFO - codeparrot_training - Step 48833: {'lr': 6.76489242419226e-07, 'samples': 25003008, 'steps': 48833, 'batch_loss/train': 0.7155724344775081} +12/29/2021 09:25:02 - INFO - codeparrot_training - Step 48834: {'lr': 6.753308965792571e-07, 'samples': 25003520, 'steps': 48834, 'batch_loss/train': 0.732963602989912} +12/29/2021 09:25:13 - INFO - codeparrot_training - Step 48835: {'lr': 6.741735419600325e-07, 'samples': 25004032, 'steps': 48835, 'batch_loss/train': 0.693383161444217} +12/29/2021 09:25:24 - INFO - codeparrot_training - Step 48836: {'lr': 6.730171785661321e-07, 'samples': 25004544, 'steps': 48836, 'batch_loss/train': 0.7305371053516865} +12/29/2021 09:25:36 - INFO - codeparrot_training - Step 48837: {'lr': 6.718618064021909e-07, 'samples': 25005056, 'steps': 48837, 'batch_loss/train': 0.649102296680212} +12/29/2021 09:25:47 - INFO - codeparrot_training - Step 48838: {'lr': 6.707074254727885e-07, 'samples': 25005568, 'steps': 48838, 'batch_loss/train': 0.7540359671693295} +12/29/2021 09:25:57 - INFO - codeparrot_training - Step 48839: {'lr': 6.695540357825047e-07, 'samples': 25006080, 'steps': 48839, 'batch_loss/train': 0.7033105101436377} +12/29/2021 09:26:08 - INFO - codeparrot_training - Step 48840: {'lr': 6.684016373359192e-07, 'samples': 25006592, 'steps': 48840, 'batch_loss/train': 0.7785658873617649} +12/29/2021 09:26:23 - INFO - codeparrot_training - Step 48841: {'lr': 6.672502301376393e-07, 'samples': 25007104, 'steps': 48841, 'batch_loss/train': 0.7394254696555436} +12/29/2021 09:26:33 - INFO - codeparrot_training - Step 48842: {'lr': 6.66099814192217e-07, 'samples': 25007616, 'steps': 48842, 'batch_loss/train': 0.6814299924299121} +12/29/2021 09:26:44 - INFO - codeparrot_training - Step 48843: {'lr': 6.64950389504232e-07, 'samples': 25008128, 'steps': 48843, 'batch_loss/train': 0.7948983409442008} +12/29/2021 09:26:56 - INFO - codeparrot_training - Step 48844: {'lr': 6.638019560782637e-07, 'samples': 25008640, 'steps': 48844, 'batch_loss/train': 0.8356736861169338} +12/29/2021 09:27:07 - INFO - codeparrot_training - Step 48845: {'lr': 6.626545139188922e-07, 'samples': 25009152, 'steps': 48845, 'batch_loss/train': 0.7290749736130238} +12/29/2021 09:27:18 - INFO - codeparrot_training - Step 48846: {'lr': 6.615080630306414e-07, 'samples': 25009664, 'steps': 48846, 'batch_loss/train': 0.6402737017488107} +12/29/2021 09:27:31 - INFO - codeparrot_training - Step 48847: {'lr': 6.60362603418091e-07, 'samples': 25010176, 'steps': 48847, 'batch_loss/train': 0.6332275015302002} +12/29/2021 09:27:42 - INFO - codeparrot_training - Step 48848: {'lr': 6.592181350857929e-07, 'samples': 25010688, 'steps': 48848, 'batch_loss/train': 0.8104922724887729} +12/29/2021 09:27:53 - INFO - codeparrot_training - Step 48849: {'lr': 6.58074658038299e-07, 'samples': 25011200, 'steps': 48849, 'batch_loss/train': 0.7121848100796342} +12/29/2021 09:28:05 - INFO - codeparrot_training - Step 48850: {'lr': 6.569321722801614e-07, 'samples': 25011712, 'steps': 48850, 'batch_loss/train': 0.7479889541864395} +12/29/2021 09:28:16 - INFO - codeparrot_training - Step 48851: {'lr': 6.557906778158762e-07, 'samples': 25012224, 'steps': 48851, 'batch_loss/train': 0.8109472473151982} +12/29/2021 09:28:26 - INFO - codeparrot_training - Step 48852: {'lr': 6.54650174650051e-07, 'samples': 25012736, 'steps': 48852, 'batch_loss/train': 0.6466767704114318} +12/29/2021 09:28:37 - INFO - codeparrot_training - Step 48853: {'lr': 6.535106627871823e-07, 'samples': 25013248, 'steps': 48853, 'batch_loss/train': 0.8521840954199433} +12/29/2021 09:28:49 - INFO - codeparrot_training - Step 48854: {'lr': 6.523721422317941e-07, 'samples': 25013760, 'steps': 48854, 'batch_loss/train': 0.7281014518812299} +12/29/2021 09:29:00 - INFO - codeparrot_training - Step 48855: {'lr': 6.512346129884383e-07, 'samples': 25014272, 'steps': 48855, 'batch_loss/train': 0.6789198340848088} +12/29/2021 09:29:10 - INFO - codeparrot_training - Step 48856: {'lr': 6.500980750616392e-07, 'samples': 25014784, 'steps': 48856, 'batch_loss/train': 0.7173955873586237} +12/29/2021 09:29:22 - INFO - codeparrot_training - Step 48857: {'lr': 6.489625284558654e-07, 'samples': 25015296, 'steps': 48857, 'batch_loss/train': 0.6887541189789772} +12/29/2021 09:29:33 - INFO - codeparrot_training - Step 48858: {'lr': 6.478279731756964e-07, 'samples': 25015808, 'steps': 48858, 'batch_loss/train': 0.9343058164231479} +12/29/2021 09:29:44 - INFO - codeparrot_training - Step 48859: {'lr': 6.466944092255733e-07, 'samples': 25016320, 'steps': 48859, 'batch_loss/train': 0.8377226110897027} +12/29/2021 09:29:58 - INFO - codeparrot_training - Step 48860: {'lr': 6.455618366100758e-07, 'samples': 25016832, 'steps': 48860, 'batch_loss/train': 0.6087450215127319} +12/29/2021 09:30:08 - INFO - codeparrot_training - Step 48861: {'lr': 6.444302553336446e-07, 'samples': 25017344, 'steps': 48861, 'batch_loss/train': 0.692066324991174} +12/29/2021 09:30:19 - INFO - codeparrot_training - Step 48862: {'lr': 6.432996654008317e-07, 'samples': 25017856, 'steps': 48862, 'batch_loss/train': 0.7410721685737371} +12/29/2021 09:30:30 - INFO - codeparrot_training - Step 48863: {'lr': 6.421700668160779e-07, 'samples': 25018368, 'steps': 48863, 'batch_loss/train': 0.7776189735159278} +12/29/2021 09:30:42 - INFO - codeparrot_training - Step 48864: {'lr': 6.410414595839353e-07, 'samples': 25018880, 'steps': 48864, 'batch_loss/train': 0.6663085492327809} +12/29/2021 09:30:53 - INFO - codeparrot_training - Step 48865: {'lr': 6.399138437088447e-07, 'samples': 25019392, 'steps': 48865, 'batch_loss/train': 0.6996879875659943} +12/29/2021 09:31:03 - INFO - codeparrot_training - Step 48866: {'lr': 6.387872191952748e-07, 'samples': 25019904, 'steps': 48866, 'batch_loss/train': 0.8030692329630256} +12/29/2021 09:31:16 - INFO - codeparrot_training - Step 48867: {'lr': 6.376615860477497e-07, 'samples': 25020416, 'steps': 48867, 'batch_loss/train': 0.654015158303082} +12/29/2021 09:31:26 - INFO - codeparrot_training - Step 48868: {'lr': 6.36536944270738e-07, 'samples': 25020928, 'steps': 48868, 'batch_loss/train': 0.7573057264089584} +12/29/2021 09:31:37 - INFO - codeparrot_training - Step 48869: {'lr': 6.354132938687084e-07, 'samples': 25021440, 'steps': 48869, 'batch_loss/train': 0.693883020314388} +12/29/2021 09:31:51 - INFO - codeparrot_training - Step 48870: {'lr': 6.34290634846102e-07, 'samples': 25021952, 'steps': 48870, 'batch_loss/train': 0.5801023283565883} +12/29/2021 09:32:02 - INFO - codeparrot_training - Step 48871: {'lr': 6.33168967207387e-07, 'samples': 25022464, 'steps': 48871, 'batch_loss/train': 0.712272166274488} +12/29/2021 09:32:12 - INFO - codeparrot_training - Step 48872: {'lr': 6.320482909570602e-07, 'samples': 25022976, 'steps': 48872, 'batch_loss/train': 0.6868470660410821} +12/29/2021 09:32:23 - INFO - codeparrot_training - Step 48873: {'lr': 6.309286060995345e-07, 'samples': 25023488, 'steps': 48873, 'batch_loss/train': 0.6217823978513479} +12/29/2021 09:32:35 - INFO - codeparrot_training - Step 48874: {'lr': 6.298099126392787e-07, 'samples': 25024000, 'steps': 48874, 'batch_loss/train': 0.9340907135047019} +12/29/2021 09:32:46 - INFO - codeparrot_training - Step 48875: {'lr': 6.286922105807613e-07, 'samples': 25024512, 'steps': 48875, 'batch_loss/train': 0.7835420505143702} +12/29/2021 09:32:56 - INFO - codeparrot_training - Step 48876: {'lr': 6.275754999283678e-07, 'samples': 25025024, 'steps': 48876, 'batch_loss/train': 0.5405367047060281} +12/29/2021 09:33:10 - INFO - codeparrot_training - Step 48877: {'lr': 6.264597806865946e-07, 'samples': 25025536, 'steps': 48877, 'batch_loss/train': 0.7632604003883898} +12/29/2021 09:33:21 - INFO - codeparrot_training - Step 48878: {'lr': 6.253450528598826e-07, 'samples': 25026048, 'steps': 48878, 'batch_loss/train': 0.7373282788321376} +12/29/2021 09:33:32 - INFO - codeparrot_training - Step 48879: {'lr': 6.242313164525893e-07, 'samples': 25026560, 'steps': 48879, 'batch_loss/train': 0.6165566976414993} +12/29/2021 09:33:44 - INFO - codeparrot_training - Step 48880: {'lr': 6.231185714692389e-07, 'samples': 25027072, 'steps': 48880, 'batch_loss/train': 0.7750975601375103} +12/29/2021 09:33:54 - INFO - codeparrot_training - Step 48881: {'lr': 6.220068179141614e-07, 'samples': 25027584, 'steps': 48881, 'batch_loss/train': 0.7830075239762664} +12/29/2021 09:34:05 - INFO - codeparrot_training - Step 48882: {'lr': 6.20896055791853e-07, 'samples': 25028096, 'steps': 48882, 'batch_loss/train': 0.667327500297688} +12/29/2021 09:34:16 - INFO - codeparrot_training - Step 48883: {'lr': 6.197862851066994e-07, 'samples': 25028608, 'steps': 48883, 'batch_loss/train': 0.7471308152889833} +12/29/2021 09:34:28 - INFO - codeparrot_training - Step 48884: {'lr': 6.186775058631134e-07, 'samples': 25029120, 'steps': 48884, 'batch_loss/train': 0.8313580721151084} +12/29/2021 09:34:38 - INFO - codeparrot_training - Step 48885: {'lr': 6.175697180654805e-07, 'samples': 25029632, 'steps': 48885, 'batch_loss/train': 0.7295168510172516} +12/29/2021 09:34:49 - INFO - codeparrot_training - Step 48886: {'lr': 6.164629217182693e-07, 'samples': 25030144, 'steps': 48886, 'batch_loss/train': 0.7559919254854321} +12/29/2021 09:35:03 - INFO - codeparrot_training - Step 48887: {'lr': 6.153571168258098e-07, 'samples': 25030656, 'steps': 48887, 'batch_loss/train': 0.7717156335711479} +12/29/2021 09:35:14 - INFO - codeparrot_training - Step 48888: {'lr': 6.142523033925151e-07, 'samples': 25031168, 'steps': 48888, 'batch_loss/train': 0.7897591448854655} +12/29/2021 09:35:24 - INFO - codeparrot_training - Step 48889: {'lr': 6.131484814227983e-07, 'samples': 25031680, 'steps': 48889, 'batch_loss/train': 0.691709749866277} +12/29/2021 09:35:36 - INFO - codeparrot_training - Step 48890: {'lr': 6.120456509210725e-07, 'samples': 25032192, 'steps': 48890, 'batch_loss/train': 0.7271308330819011} +12/29/2021 09:35:47 - INFO - codeparrot_training - Step 48891: {'lr': 6.1094381189164e-07, 'samples': 25032704, 'steps': 48891, 'batch_loss/train': 0.757993720471859} +12/29/2021 09:35:58 - INFO - codeparrot_training - Step 48892: {'lr': 6.098429643389691e-07, 'samples': 25033216, 'steps': 48892, 'batch_loss/train': 0.7668351056054235} +12/29/2021 09:36:10 - INFO - codeparrot_training - Step 48893: {'lr': 6.087431082673622e-07, 'samples': 25033728, 'steps': 48893, 'batch_loss/train': 0.7739393077790737} +12/29/2021 09:36:21 - INFO - codeparrot_training - Step 48894: {'lr': 6.076442436812324e-07, 'samples': 25034240, 'steps': 48894, 'batch_loss/train': 1.045581630896777} +12/29/2021 09:36:31 - INFO - codeparrot_training - Step 48895: {'lr': 6.06546370584965e-07, 'samples': 25034752, 'steps': 48895, 'batch_loss/train': 0.7492457311600447} +12/29/2021 09:36:42 - INFO - codeparrot_training - Step 48896: {'lr': 6.054494889828898e-07, 'samples': 25035264, 'steps': 48896, 'batch_loss/train': 0.6679469447117299} +12/29/2021 09:36:54 - INFO - codeparrot_training - Step 48897: {'lr': 6.043535988793647e-07, 'samples': 25035776, 'steps': 48897, 'batch_loss/train': 0.7685776180587709} +12/29/2021 09:37:05 - INFO - codeparrot_training - Step 48898: {'lr': 6.032587002787749e-07, 'samples': 25036288, 'steps': 48898, 'batch_loss/train': 0.6453145085833967} +12/29/2021 09:37:15 - INFO - codeparrot_training - Step 48899: {'lr': 6.021647931854501e-07, 'samples': 25036800, 'steps': 48899, 'batch_loss/train': 0.7158243649173528} +12/29/2021 09:37:29 - INFO - codeparrot_training - Step 48900: {'lr': 6.010718776037483e-07, 'samples': 25037312, 'steps': 48900, 'batch_loss/train': 0.6880078380927444} +12/29/2021 09:37:40 - INFO - codeparrot_training - Step 48901: {'lr': 5.999799535380546e-07, 'samples': 25037824, 'steps': 48901, 'batch_loss/train': 0.7727544531226158} +12/29/2021 09:37:51 - INFO - codeparrot_training - Step 48902: {'lr': 5.988890209926434e-07, 'samples': 25038336, 'steps': 48902, 'batch_loss/train': 0.7209668091963977} +12/29/2021 09:38:01 - INFO - codeparrot_training - Step 48903: {'lr': 5.977990799718724e-07, 'samples': 25038848, 'steps': 48903, 'batch_loss/train': 0.6939069638028741} +12/29/2021 09:38:13 - INFO - codeparrot_training - Step 48904: {'lr': 5.967101304800715e-07, 'samples': 25039360, 'steps': 48904, 'batch_loss/train': 0.7892586393281817} +12/29/2021 09:38:24 - INFO - codeparrot_training - Step 48905: {'lr': 5.956221725215983e-07, 'samples': 25039872, 'steps': 48905, 'batch_loss/train': 0.7240345245227218} +12/29/2021 09:38:35 - INFO - codeparrot_training - Step 48906: {'lr': 5.945352061007548e-07, 'samples': 25040384, 'steps': 48906, 'batch_loss/train': 0.7765799574553967} +12/29/2021 09:38:47 - INFO - codeparrot_training - Step 48907: {'lr': 5.934492312218709e-07, 'samples': 25040896, 'steps': 48907, 'batch_loss/train': 0.6455612946301699} +12/29/2021 09:38:57 - INFO - codeparrot_training - Step 48908: {'lr': 5.923642478892488e-07, 'samples': 25041408, 'steps': 48908, 'batch_loss/train': 0.7105989530682564} +12/29/2021 09:39:08 - INFO - codeparrot_training - Step 48909: {'lr': 5.912802561072184e-07, 'samples': 25041920, 'steps': 48909, 'batch_loss/train': 0.6434816832188517} +12/29/2021 09:39:22 - INFO - codeparrot_training - Step 48910: {'lr': 5.901972558801095e-07, 'samples': 25042432, 'steps': 48910, 'batch_loss/train': 0.7365147406235337} +12/29/2021 09:39:33 - INFO - codeparrot_training - Step 48911: {'lr': 5.891152472121686e-07, 'samples': 25042944, 'steps': 48911, 'batch_loss/train': 0.8781493827700615} +12/29/2021 09:39:43 - INFO - codeparrot_training - Step 48912: {'lr': 5.880342301077535e-07, 'samples': 25043456, 'steps': 48912, 'batch_loss/train': 0.6845367692876607} +12/29/2021 09:39:55 - INFO - codeparrot_training - Step 48913: {'lr': 5.869542045711385e-07, 'samples': 25043968, 'steps': 48913, 'batch_loss/train': 0.6920244578504935} +12/29/2021 09:40:06 - INFO - codeparrot_training - Step 48914: {'lr': 5.858751706065979e-07, 'samples': 25044480, 'steps': 48914, 'batch_loss/train': 0.7443080209195614} +12/29/2021 09:40:17 - INFO - codeparrot_training - Step 48915: {'lr': 5.847971282184616e-07, 'samples': 25044992, 'steps': 48915, 'batch_loss/train': 0.7384258466772735} +12/29/2021 09:40:27 - INFO - codeparrot_training - Step 48916: {'lr': 5.837200774110041e-07, 'samples': 25045504, 'steps': 48916, 'batch_loss/train': 0.8044142378494143} +12/29/2021 09:40:41 - INFO - codeparrot_training - Step 48917: {'lr': 5.826440181884718e-07, 'samples': 25046016, 'steps': 48917, 'batch_loss/train': 0.7890607309527695} +12/29/2021 09:40:52 - INFO - codeparrot_training - Step 48918: {'lr': 5.815689505551946e-07, 'samples': 25046528, 'steps': 48918, 'batch_loss/train': 0.7668475350365043} +12/29/2021 09:41:02 - INFO - codeparrot_training - Step 48919: {'lr': 5.804948745154193e-07, 'samples': 25047040, 'steps': 48919, 'batch_loss/train': 0.7050375966355205} +12/29/2021 09:41:15 - INFO - codeparrot_training - Step 48920: {'lr': 5.7942179007342e-07, 'samples': 25047552, 'steps': 48920, 'batch_loss/train': 0.6383198075927794} +12/29/2021 09:41:25 - INFO - codeparrot_training - Step 48921: {'lr': 5.783496972334434e-07, 'samples': 25048064, 'steps': 48921, 'batch_loss/train': 0.6666105738840997} +12/29/2021 09:41:36 - INFO - codeparrot_training - Step 48922: {'lr': 5.772785959997917e-07, 'samples': 25048576, 'steps': 48922, 'batch_loss/train': 0.7800071574747562} +12/29/2021 09:41:48 - INFO - codeparrot_training - Step 48923: {'lr': 5.762084863766837e-07, 'samples': 25049088, 'steps': 48923, 'batch_loss/train': 0.6463871211744845} +12/29/2021 09:41:58 - INFO - codeparrot_training - Step 48924: {'lr': 5.751393683683937e-07, 'samples': 25049600, 'steps': 48924, 'batch_loss/train': 0.6266887285746634} +12/29/2021 09:42:09 - INFO - codeparrot_training - Step 48925: {'lr': 5.74071241979196e-07, 'samples': 25050112, 'steps': 48925, 'batch_loss/train': 0.7247328441590071} +12/29/2021 09:42:23 - INFO - codeparrot_training - Step 48926: {'lr': 5.730041072132819e-07, 'samples': 25050624, 'steps': 48926, 'batch_loss/train': 0.647919581271708} +12/29/2021 09:42:34 - INFO - codeparrot_training - Step 48927: {'lr': 5.719379640749534e-07, 'samples': 25051136, 'steps': 48927, 'batch_loss/train': 0.6683847745880485} +12/29/2021 09:42:44 - INFO - codeparrot_training - Step 48928: {'lr': 5.708728125684015e-07, 'samples': 25051648, 'steps': 48928, 'batch_loss/train': 0.7747236748691648} +12/29/2021 09:42:55 - INFO - codeparrot_training - Step 48929: {'lr': 5.69808652697873e-07, 'samples': 25052160, 'steps': 48929, 'batch_loss/train': 0.5373793067410588} +12/29/2021 09:43:07 - INFO - codeparrot_training - Step 48930: {'lr': 5.687454844676143e-07, 'samples': 25052672, 'steps': 48930, 'batch_loss/train': 0.7900753582362086} +12/29/2021 09:43:18 - INFO - codeparrot_training - Step 48931: {'lr': 5.676833078818167e-07, 'samples': 25053184, 'steps': 48931, 'batch_loss/train': 1.020990014076233} +12/29/2021 09:43:28 - INFO - codeparrot_training - Step 48932: {'lr': 5.666221229447544e-07, 'samples': 25053696, 'steps': 48932, 'batch_loss/train': 0.6931632792111486} +12/29/2021 09:43:41 - INFO - codeparrot_training - Step 48933: {'lr': 5.655619296606185e-07, 'samples': 25054208, 'steps': 48933, 'batch_loss/train': 0.7689267238602042} +12/29/2021 09:43:52 - INFO - codeparrot_training - Step 48934: {'lr': 5.64502728033628e-07, 'samples': 25054720, 'steps': 48934, 'batch_loss/train': 0.7825247151777148} +12/29/2021 09:44:03 - INFO - codeparrot_training - Step 48935: {'lr': 5.634445180679737e-07, 'samples': 25055232, 'steps': 48935, 'batch_loss/train': 0.6439625115017407} +12/29/2021 09:44:15 - INFO - codeparrot_training - Step 48936: {'lr': 5.623872997679025e-07, 'samples': 25055744, 'steps': 48936, 'batch_loss/train': 0.5233240514062345} +12/29/2021 09:44:25 - INFO - codeparrot_training - Step 48937: {'lr': 5.613310731375776e-07, 'samples': 25056256, 'steps': 48937, 'batch_loss/train': 0.7463370240293443} +12/29/2021 09:44:36 - INFO - codeparrot_training - Step 48938: {'lr': 5.602758381812178e-07, 'samples': 25056768, 'steps': 48938, 'batch_loss/train': 0.7039226666092873} +12/29/2021 09:44:47 - INFO - codeparrot_training - Step 48939: {'lr': 5.592215949030421e-07, 'samples': 25057280, 'steps': 48939, 'batch_loss/train': 0.7410867484286427} +12/29/2021 09:45:00 - INFO - codeparrot_training - Step 48940: {'lr': 5.58168343307186e-07, 'samples': 25057792, 'steps': 48940, 'batch_loss/train': 0.7739759730175138} +12/29/2021 09:45:11 - INFO - codeparrot_training - Step 48941: {'lr': 5.571160833978683e-07, 'samples': 25058304, 'steps': 48941, 'batch_loss/train': 0.6225163680501282} +12/29/2021 09:45:22 - INFO - codeparrot_training - Step 48942: {'lr': 5.560648151792803e-07, 'samples': 25058816, 'steps': 48942, 'batch_loss/train': 0.7419935841462575} +12/29/2021 09:45:34 - INFO - codeparrot_training - Step 48943: {'lr': 5.550145386556127e-07, 'samples': 25059328, 'steps': 48943, 'batch_loss/train': 0.6854568959679455} +12/29/2021 09:45:44 - INFO - codeparrot_training - Step 48944: {'lr': 5.539652538309736e-07, 'samples': 25059840, 'steps': 48944, 'batch_loss/train': 0.7021562356385402} +12/29/2021 09:45:55 - INFO - codeparrot_training - Step 48945: {'lr': 5.529169607096096e-07, 'samples': 25060352, 'steps': 48945, 'batch_loss/train': 0.7177411783486605} +12/29/2021 09:46:09 - INFO - codeparrot_training - Step 48946: {'lr': 5.518696592956562e-07, 'samples': 25060864, 'steps': 48946, 'batch_loss/train': 0.7424995042383671} +12/29/2021 09:46:20 - INFO - codeparrot_training - Step 48947: {'lr': 5.508233495933046e-07, 'samples': 25061376, 'steps': 48947, 'batch_loss/train': 0.7589584197849035} +12/29/2021 09:46:30 - INFO - codeparrot_training - Step 48948: {'lr': 5.497780316066625e-07, 'samples': 25061888, 'steps': 48948, 'batch_loss/train': 0.5495677087455988} +12/29/2021 09:46:42 - INFO - codeparrot_training - Step 48949: {'lr': 5.487337053399211e-07, 'samples': 25062400, 'steps': 48949, 'batch_loss/train': 0.653334473259747} +12/29/2021 09:46:53 - INFO - codeparrot_training - Step 48950: {'lr': 5.476903707972158e-07, 'samples': 25062912, 'steps': 48950, 'batch_loss/train': 1.2045988405589014} +12/29/2021 09:47:04 - INFO - codeparrot_training - Step 48951: {'lr': 5.466480279827102e-07, 'samples': 25063424, 'steps': 48951, 'batch_loss/train': 0.9731959319906309} +12/29/2021 09:47:14 - INFO - codeparrot_training - Step 48952: {'lr': 5.456066769005397e-07, 'samples': 25063936, 'steps': 48952, 'batch_loss/train': 0.6896392540074885} +12/29/2021 09:47:27 - INFO - codeparrot_training - Step 48953: {'lr': 5.445663175548676e-07, 'samples': 25064448, 'steps': 48953, 'batch_loss/train': 0.6768562919460237} +12/29/2021 09:47:37 - INFO - codeparrot_training - Step 48954: {'lr': 5.435269499498019e-07, 'samples': 25064960, 'steps': 48954, 'batch_loss/train': 0.650777698494494} +12/29/2021 09:47:48 - INFO - codeparrot_training - Step 48955: {'lr': 5.424885740894503e-07, 'samples': 25065472, 'steps': 48955, 'batch_loss/train': 0.6262945138732903} +12/29/2021 09:48:02 - INFO - codeparrot_training - Step 48956: {'lr': 5.414511899780039e-07, 'samples': 25065984, 'steps': 48956, 'batch_loss/train': 0.7299549096496776} +12/29/2021 09:48:13 - INFO - codeparrot_training - Step 48957: {'lr': 5.404147976195428e-07, 'samples': 25066496, 'steps': 48957, 'batch_loss/train': 1.7583172984886914} +12/29/2021 09:48:23 - INFO - codeparrot_training - Step 48958: {'lr': 5.393793970182304e-07, 'samples': 25067008, 'steps': 48958, 'batch_loss/train': 0.6729537502396852} +12/29/2021 09:48:34 - INFO - codeparrot_training - Step 48959: {'lr': 5.383449881781189e-07, 'samples': 25067520, 'steps': 48959, 'batch_loss/train': 0.8681339318864048} +12/29/2021 09:48:46 - INFO - codeparrot_training - Step 48960: {'lr': 5.373115711033715e-07, 'samples': 25068032, 'steps': 48960, 'batch_loss/train': 0.7277547167614102} +12/29/2021 09:48:57 - INFO - codeparrot_training - Step 48961: {'lr': 5.362791457980687e-07, 'samples': 25068544, 'steps': 48961, 'batch_loss/train': 0.7473162687383592} +12/29/2021 09:49:08 - INFO - codeparrot_training - Step 48962: {'lr': 5.35247712266318e-07, 'samples': 25069056, 'steps': 48962, 'batch_loss/train': 0.7590807313099504} +12/29/2021 09:49:22 - INFO - codeparrot_training - Step 48963: {'lr': 5.342172705122273e-07, 'samples': 25069568, 'steps': 48963, 'batch_loss/train': 0.7457298655062914} +12/29/2021 09:49:32 - INFO - codeparrot_training - Step 48964: {'lr': 5.33187820539932e-07, 'samples': 25070080, 'steps': 48964, 'batch_loss/train': 0.7431729180389084} +12/29/2021 09:49:43 - INFO - codeparrot_training - Step 48965: {'lr': 5.32159362353457e-07, 'samples': 25070592, 'steps': 48965, 'batch_loss/train': 0.512013071012916} +12/29/2021 09:49:55 - INFO - codeparrot_training - Step 48966: {'lr': 5.3113189595691e-07, 'samples': 25071104, 'steps': 48966, 'batch_loss/train': 0.778100369963795} +12/29/2021 09:50:06 - INFO - codeparrot_training - Step 48967: {'lr': 5.301054213543988e-07, 'samples': 25071616, 'steps': 48967, 'batch_loss/train': 0.6450455943122506} +12/29/2021 09:50:16 - INFO - codeparrot_training - Step 48968: {'lr': 5.290799385500034e-07, 'samples': 25072128, 'steps': 48968, 'batch_loss/train': 0.6830332351382822} +12/29/2021 09:50:28 - INFO - codeparrot_training - Step 48969: {'lr': 5.280554475477761e-07, 'samples': 25072640, 'steps': 48969, 'batch_loss/train': 0.8864935070741922} +12/29/2021 09:50:39 - INFO - codeparrot_training - Step 48970: {'lr': 5.270319483518249e-07, 'samples': 25073152, 'steps': 48970, 'batch_loss/train': 0.8150467071682215} +12/29/2021 09:50:50 - INFO - codeparrot_training - Step 48971: {'lr': 5.260094409661742e-07, 'samples': 25073664, 'steps': 48971, 'batch_loss/train': 0.7543496275320649} +12/29/2021 09:51:00 - INFO - codeparrot_training - Step 48972: {'lr': 5.249879253949318e-07, 'samples': 25074176, 'steps': 48972, 'batch_loss/train': 0.790680349804461} +12/29/2021 09:51:14 - INFO - codeparrot_training - Step 48973: {'lr': 5.239674016421225e-07, 'samples': 25074688, 'steps': 48973, 'batch_loss/train': 0.6523028337396681} +12/29/2021 09:51:25 - INFO - codeparrot_training - Step 48974: {'lr': 5.229478697118262e-07, 'samples': 25075200, 'steps': 48974, 'batch_loss/train': 0.7706664691213518} +12/29/2021 09:51:35 - INFO - codeparrot_training - Step 48975: {'lr': 5.219293296080674e-07, 'samples': 25075712, 'steps': 48975, 'batch_loss/train': 0.7192023540847003} +12/29/2021 09:51:47 - INFO - codeparrot_training - Step 48976: {'lr': 5.20911781334954e-07, 'samples': 25076224, 'steps': 48976, 'batch_loss/train': 0.7826396888121963} +12/29/2021 09:51:58 - INFO - codeparrot_training - Step 48977: {'lr': 5.198952248964828e-07, 'samples': 25076736, 'steps': 48977, 'batch_loss/train': 0.7117941873148084} +12/29/2021 09:52:09 - INFO - codeparrot_training - Step 48978: {'lr': 5.18879660296706e-07, 'samples': 25077248, 'steps': 48978, 'batch_loss/train': 0.7586915157735348} +12/29/2021 09:52:21 - INFO - codeparrot_training - Step 48979: {'lr': 5.178650875396484e-07, 'samples': 25077760, 'steps': 48979, 'batch_loss/train': 0.6741468133404851} +12/29/2021 09:52:31 - INFO - codeparrot_training - Step 48980: {'lr': 5.1685150662939e-07, 'samples': 25078272, 'steps': 48980, 'batch_loss/train': 0.8124618334695697} +12/29/2021 09:52:42 - INFO - codeparrot_training - Step 48981: {'lr': 5.158389175698997e-07, 'samples': 25078784, 'steps': 48981, 'batch_loss/train': 0.7667684714542702} +12/29/2021 09:52:54 - INFO - codeparrot_training - Step 48982: {'lr': 5.148273203652576e-07, 'samples': 25079296, 'steps': 48982, 'batch_loss/train': 0.6875861305743456} +12/29/2021 09:53:05 - INFO - codeparrot_training - Step 48983: {'lr': 5.138167150194606e-07, 'samples': 25079808, 'steps': 48983, 'batch_loss/train': 0.7932756398804486} +12/29/2021 09:53:15 - INFO - codeparrot_training - Step 48984: {'lr': 5.128071015365055e-07, 'samples': 25080320, 'steps': 48984, 'batch_loss/train': 0.6933925587218255} +12/29/2021 09:53:26 - INFO - codeparrot_training - Step 48985: {'lr': 5.117984799204445e-07, 'samples': 25080832, 'steps': 48985, 'batch_loss/train': 0.7352213398553431} +12/29/2021 09:53:40 - INFO - codeparrot_training - Step 48986: {'lr': 5.107908501752468e-07, 'samples': 25081344, 'steps': 48986, 'batch_loss/train': 0.7393137149047107} +12/29/2021 09:53:51 - INFO - codeparrot_training - Step 48987: {'lr': 5.097842123049646e-07, 'samples': 25081856, 'steps': 48987, 'batch_loss/train': 0.757974112406373} +12/29/2021 09:54:01 - INFO - codeparrot_training - Step 48988: {'lr': 5.087785663135669e-07, 'samples': 25082368, 'steps': 48988, 'batch_loss/train': 0.7012648256495595} +12/29/2021 09:54:13 - INFO - codeparrot_training - Step 48989: {'lr': 5.077739122050506e-07, 'samples': 25082880, 'steps': 48989, 'batch_loss/train': 0.7630800916813314} +12/29/2021 09:54:24 - INFO - codeparrot_training - Step 48990: {'lr': 5.067702499834403e-07, 'samples': 25083392, 'steps': 48990, 'batch_loss/train': 0.7607673639431596} +12/29/2021 09:54:35 - INFO - codeparrot_training - Step 48991: {'lr': 5.057675796526772e-07, 'samples': 25083904, 'steps': 48991, 'batch_loss/train': 0.7305626654997468} +12/29/2021 09:54:47 - INFO - codeparrot_training - Step 48992: {'lr': 5.047659012168137e-07, 'samples': 25084416, 'steps': 48992, 'batch_loss/train': 0.7371665546670556} +12/29/2021 09:54:57 - INFO - codeparrot_training - Step 48993: {'lr': 5.037652146797633e-07, 'samples': 25084928, 'steps': 48993, 'batch_loss/train': 0.739754285197705} +12/29/2021 09:55:08 - INFO - codeparrot_training - Step 48994: {'lr': 5.027655200455505e-07, 'samples': 25085440, 'steps': 48994, 'batch_loss/train': 0.6848665052093565} +12/29/2021 09:55:19 - INFO - codeparrot_training - Step 48995: {'lr': 5.017668173181444e-07, 'samples': 25085952, 'steps': 48995, 'batch_loss/train': 0.6830455912277102} +12/29/2021 09:55:33 - INFO - codeparrot_training - Step 48996: {'lr': 5.007691065014864e-07, 'samples': 25086464, 'steps': 48996, 'batch_loss/train': 0.6678712638095021} +12/29/2021 09:55:43 - INFO - codeparrot_training - Step 48997: {'lr': 4.99772387599573e-07, 'samples': 25086976, 'steps': 48997, 'batch_loss/train': 0.7488440992310643} +12/29/2021 09:55:54 - INFO - codeparrot_training - Step 48998: {'lr': 4.987766606163735e-07, 'samples': 25087488, 'steps': 48998, 'batch_loss/train': 0.6991433394141495} +12/29/2021 09:56:06 - INFO - codeparrot_training - Step 48999: {'lr': 4.977819255558014e-07, 'samples': 25088000, 'steps': 48999, 'batch_loss/train': 0.7589141167700291} +12/29/2021 09:56:17 - INFO - codeparrot_training - Step 49000: {'lr': 4.967881824218534e-07, 'samples': 25088512, 'steps': 49000, 'batch_loss/train': 0.7194710369221866} +12/29/2021 09:56:27 - INFO - codeparrot_training - Step 49001: {'lr': 4.957954312184709e-07, 'samples': 25089024, 'steps': 49001, 'batch_loss/train': 0.7360256058163941} +12/29/2021 09:56:41 - INFO - codeparrot_training - Step 49002: {'lr': 4.948036719495952e-07, 'samples': 25089536, 'steps': 49002, 'batch_loss/train': 0.6414452578756027} +12/29/2021 09:56:52 - INFO - codeparrot_training - Step 49003: {'lr': 4.938129046191675e-07, 'samples': 25090048, 'steps': 49003, 'batch_loss/train': 0.6980158314108849} +12/29/2021 09:57:02 - INFO - codeparrot_training - Step 49004: {'lr': 4.928231292311291e-07, 'samples': 25090560, 'steps': 49004, 'batch_loss/train': 0.7427234780043364} +12/29/2021 09:57:15 - INFO - codeparrot_training - Step 49005: {'lr': 4.918343457894214e-07, 'samples': 25091072, 'steps': 49005, 'batch_loss/train': 0.8008470768108964} +12/29/2021 09:57:25 - INFO - codeparrot_training - Step 49006: {'lr': 4.908465542979857e-07, 'samples': 25091584, 'steps': 49006, 'batch_loss/train': 0.7039990541525185} +12/29/2021 09:57:36 - INFO - codeparrot_training - Step 49007: {'lr': 4.898597547607076e-07, 'samples': 25092096, 'steps': 49007, 'batch_loss/train': 0.7061966413166374} +12/29/2021 09:57:47 - INFO - codeparrot_training - Step 49008: {'lr': 4.888739471815284e-07, 'samples': 25092608, 'steps': 49008, 'batch_loss/train': 0.7150225141085684} +12/29/2021 09:57:59 - INFO - codeparrot_training - Step 49009: {'lr': 4.878891315643897e-07, 'samples': 25093120, 'steps': 49009, 'batch_loss/train': 0.6312555089825764} +12/29/2021 09:58:09 - INFO - codeparrot_training - Step 49010: {'lr': 4.869053079132047e-07, 'samples': 25093632, 'steps': 49010, 'batch_loss/train': 0.7535852990113199} +12/29/2021 09:58:20 - INFO - codeparrot_training - Step 49011: {'lr': 4.859224762318315e-07, 'samples': 25094144, 'steps': 49011, 'batch_loss/train': 1.0651730904355645} +12/29/2021 09:58:34 - INFO - codeparrot_training - Step 49012: {'lr': 4.84940636524267e-07, 'samples': 25094656, 'steps': 49012, 'batch_loss/train': 0.7481730910949409} +12/29/2021 09:58:45 - INFO - codeparrot_training - Step 49013: {'lr': 4.839597887943414e-07, 'samples': 25095168, 'steps': 49013, 'batch_loss/train': 0.6990538267418742} +12/29/2021 09:58:55 - INFO - codeparrot_training - Step 49014: {'lr': 4.829799330459683e-07, 'samples': 25095680, 'steps': 49014, 'batch_loss/train': 0.74390979250893} +12/29/2021 09:59:07 - INFO - codeparrot_training - Step 49015: {'lr': 4.820010692830612e-07, 'samples': 25096192, 'steps': 49015, 'batch_loss/train': 0.7757652285508811} +12/29/2021 09:59:18 - INFO - codeparrot_training - Step 49016: {'lr': 4.810231975095059e-07, 'samples': 25096704, 'steps': 49016, 'batch_loss/train': 0.9794818083755672} +12/29/2021 09:59:29 - INFO - codeparrot_training - Step 49017: {'lr': 4.800463177291881e-07, 'samples': 25097216, 'steps': 49017, 'batch_loss/train': 0.6704307021573186} +12/29/2021 09:59:39 - INFO - codeparrot_training - Step 49018: {'lr': 4.790704299459936e-07, 'samples': 25097728, 'steps': 49018, 'batch_loss/train': 0.8081860356032848} +12/29/2021 09:59:52 - INFO - codeparrot_training - Step 49019: {'lr': 4.780955341637805e-07, 'samples': 25098240, 'steps': 49019, 'batch_loss/train': 0.7090080287307501} +12/29/2021 10:00:03 - INFO - codeparrot_training - Step 49020: {'lr': 4.771216303864622e-07, 'samples': 25098752, 'steps': 49020, 'batch_loss/train': 0.6406249948777258} +12/29/2021 10:00:13 - INFO - codeparrot_training - Step 49021: {'lr': 4.76148718617897e-07, 'samples': 25099264, 'steps': 49021, 'batch_loss/train': 0.6822924633743241} +12/29/2021 10:00:27 - INFO - codeparrot_training - Step 49022: {'lr': 4.7517679886191487e-07, 'samples': 25099776, 'steps': 49022, 'batch_loss/train': 0.7294734297320247} +12/29/2021 10:00:38 - INFO - codeparrot_training - Step 49023: {'lr': 4.7420587112245726e-07, 'samples': 25100288, 'steps': 49023, 'batch_loss/train': 0.6857520069461316} +12/29/2021 10:00:48 - INFO - codeparrot_training - Step 49024: {'lr': 4.7323593540332664e-07, 'samples': 25100800, 'steps': 49024, 'batch_loss/train': 0.7701084865257144} +12/29/2021 10:01:01 - INFO - codeparrot_training - Step 49025: {'lr': 4.7226699170838104e-07, 'samples': 25101312, 'steps': 49025, 'batch_loss/train': 0.671170199289918} +12/29/2021 10:01:12 - INFO - codeparrot_training - Step 49026: {'lr': 4.712990400414785e-07, 'samples': 25101824, 'steps': 49026, 'batch_loss/train': 0.8293714132159948} +12/29/2021 10:01:22 - INFO - codeparrot_training - Step 49027: {'lr': 4.70332080406477e-07, 'samples': 25102336, 'steps': 49027, 'batch_loss/train': 0.826662466628477} +12/29/2021 10:01:33 - INFO - codeparrot_training - Step 49028: {'lr': 4.6936611280723463e-07, 'samples': 25102848, 'steps': 49028, 'batch_loss/train': 0.6335281520150602} +12/29/2021 10:01:45 - INFO - codeparrot_training - Step 49029: {'lr': 4.6840113724755383e-07, 'samples': 25103360, 'steps': 49029, 'batch_loss/train': 0.4495880967588164} +12/29/2021 10:01:56 - INFO - codeparrot_training - Step 49030: {'lr': 4.674371537312927e-07, 'samples': 25103872, 'steps': 49030, 'batch_loss/train': 0.8607345083728433} +12/29/2021 10:02:06 - INFO - codeparrot_training - Step 49031: {'lr': 4.664741622623092e-07, 'samples': 25104384, 'steps': 49031, 'batch_loss/train': 0.6835821634158492} +12/29/2021 10:02:21 - INFO - codeparrot_training - Step 49032: {'lr': 4.6551216284437813e-07, 'samples': 25104896, 'steps': 49032, 'batch_loss/train': 0.6443297588266432} +12/29/2021 10:02:31 - INFO - codeparrot_training - Step 49033: {'lr': 4.645511554813298e-07, 'samples': 25105408, 'steps': 49033, 'batch_loss/train': 0.8140531880781054} +12/29/2021 10:02:42 - INFO - codeparrot_training - Step 49034: {'lr': 4.6359114017704983e-07, 'samples': 25105920, 'steps': 49034, 'batch_loss/train': 0.7046846153680235} +12/29/2021 10:02:54 - INFO - codeparrot_training - Step 49035: {'lr': 4.626321169352854e-07, 'samples': 25106432, 'steps': 49035, 'batch_loss/train': 0.46209819382056594} +12/29/2021 10:03:05 - INFO - codeparrot_training - Step 49036: {'lr': 4.6167408575986667e-07, 'samples': 25106944, 'steps': 49036, 'batch_loss/train': 0.677086777985096} +12/29/2021 10:03:15 - INFO - codeparrot_training - Step 49037: {'lr': 4.60717046654624e-07, 'samples': 25107456, 'steps': 49037, 'batch_loss/train': 0.6888250643387437} +12/29/2021 10:03:26 - INFO - codeparrot_training - Step 49038: {'lr': 4.597609996233598e-07, 'samples': 25107968, 'steps': 49038, 'batch_loss/train': 0.6762382353190333} +12/29/2021 10:03:38 - INFO - codeparrot_training - Step 49039: {'lr': 4.588059446698489e-07, 'samples': 25108480, 'steps': 49039, 'batch_loss/train': 0.7302872836589813} +12/29/2021 10:03:49 - INFO - codeparrot_training - Step 49040: {'lr': 4.5785188179789385e-07, 'samples': 25108992, 'steps': 49040, 'batch_loss/train': 0.6422685054130852} +12/29/2021 10:03:59 - INFO - codeparrot_training - Step 49041: {'lr': 4.5689881101129703e-07, 'samples': 25109504, 'steps': 49041, 'batch_loss/train': 0.649268586654216} +12/29/2021 10:04:13 - INFO - codeparrot_training - Step 49042: {'lr': 4.559467323138611e-07, 'samples': 25110016, 'steps': 49042, 'batch_loss/train': 0.6746835135854781} +12/29/2021 10:04:24 - INFO - codeparrot_training - Step 49043: {'lr': 4.549956457093607e-07, 'samples': 25110528, 'steps': 49043, 'batch_loss/train': 0.9425616096705198} +12/29/2021 10:04:35 - INFO - codeparrot_training - Step 49044: {'lr': 4.540455512015429e-07, 'samples': 25111040, 'steps': 49044, 'batch_loss/train': 0.9766100002452731} +12/29/2021 10:04:45 - INFO - codeparrot_training - Step 49045: {'lr': 4.5309644879423795e-07, 'samples': 25111552, 'steps': 49045, 'batch_loss/train': 0.7486769678071141} +12/29/2021 10:04:58 - INFO - codeparrot_training - Step 49046: {'lr': 4.5214833849119286e-07, 'samples': 25112064, 'steps': 49046, 'batch_loss/train': 0.6809318526647985} +12/29/2021 10:05:08 - INFO - codeparrot_training - Step 49047: {'lr': 4.512012202961824e-07, 'samples': 25112576, 'steps': 49047, 'batch_loss/train': 0.7012458918616176} +12/29/2021 10:05:19 - INFO - codeparrot_training - Step 49048: {'lr': 4.502550942129813e-07, 'samples': 25113088, 'steps': 49048, 'batch_loss/train': 0.7569069089367986} +12/29/2021 10:05:31 - INFO - codeparrot_training - Step 49049: {'lr': 4.493099602453088e-07, 'samples': 25113600, 'steps': 49049, 'batch_loss/train': 0.7018961692228913} +12/29/2021 10:05:42 - INFO - codeparrot_training - Step 49050: {'lr': 4.4836581839696745e-07, 'samples': 25114112, 'steps': 49050, 'batch_loss/train': 0.6999639007262886} +12/29/2021 10:05:52 - INFO - codeparrot_training - Step 49051: {'lr': 4.4742266867170423e-07, 'samples': 25114624, 'steps': 49051, 'batch_loss/train': 0.8239671894116327} +12/29/2021 10:06:04 - INFO - codeparrot_training - Step 49052: {'lr': 4.4648051107326614e-07, 'samples': 25115136, 'steps': 49052, 'batch_loss/train': 0.6764687849208713} +12/29/2021 10:06:15 - INFO - codeparrot_training - Step 49053: {'lr': 4.4553934560537245e-07, 'samples': 25115648, 'steps': 49053, 'batch_loss/train': 0.7741415426135063} +12/29/2021 10:06:26 - INFO - codeparrot_training - Step 49054: {'lr': 4.445991722717979e-07, 'samples': 25116160, 'steps': 49054, 'batch_loss/train': 0.8036465980112553} +12/29/2021 10:06:36 - INFO - codeparrot_training - Step 49055: {'lr': 4.436599910762895e-07, 'samples': 25116672, 'steps': 49055, 'batch_loss/train': 0.7172203473746777} +12/29/2021 10:06:50 - INFO - codeparrot_training - Step 49056: {'lr': 4.4272180202253876e-07, 'samples': 25117184, 'steps': 49056, 'batch_loss/train': 0.8123975191265345} +12/29/2021 10:07:01 - INFO - codeparrot_training - Step 49057: {'lr': 4.4178460511429265e-07, 'samples': 25117696, 'steps': 49057, 'batch_loss/train': 0.6883505424484611} +12/29/2021 10:07:11 - INFO - codeparrot_training - Step 49058: {'lr': 4.408484003552704e-07, 'samples': 25118208, 'steps': 49058, 'batch_loss/train': 0.7648016642779112} +12/29/2021 10:07:23 - INFO - codeparrot_training - Step 49059: {'lr': 4.3991318774921906e-07, 'samples': 25118720, 'steps': 49059, 'batch_loss/train': 0.659437010763213} +12/29/2021 10:07:34 - INFO - codeparrot_training - Step 49060: {'lr': 4.3897896729985786e-07, 'samples': 25119232, 'steps': 49060, 'batch_loss/train': 0.7252837037667632} +12/29/2021 10:07:45 - INFO - codeparrot_training - Step 49061: {'lr': 4.380457390108783e-07, 'samples': 25119744, 'steps': 49061, 'batch_loss/train': 0.7251307442784309} +12/29/2021 10:07:59 - INFO - codeparrot_training - Step 49062: {'lr': 4.3711350288597187e-07, 'samples': 25120256, 'steps': 49062, 'batch_loss/train': 0.727368007414043} +12/29/2021 10:08:09 - INFO - codeparrot_training - Step 49063: {'lr': 4.3618225892891326e-07, 'samples': 25120768, 'steps': 49063, 'batch_loss/train': 0.7519029565155506} +12/29/2021 10:08:20 - INFO - codeparrot_training - Step 49064: {'lr': 4.352520071433386e-07, 'samples': 25121280, 'steps': 49064, 'batch_loss/train': 0.8121998829301447} +12/29/2021 10:08:32 - INFO - codeparrot_training - Step 49065: {'lr': 4.3432274753296695e-07, 'samples': 25121792, 'steps': 49065, 'batch_loss/train': 0.6366829714970663} +12/29/2021 10:08:43 - INFO - codeparrot_training - Step 49066: {'lr': 4.3339448010148994e-07, 'samples': 25122304, 'steps': 49066, 'batch_loss/train': 0.7484378670342267} +12/29/2021 10:08:53 - INFO - codeparrot_training - Step 49067: {'lr': 4.324672048526268e-07, 'samples': 25122816, 'steps': 49067, 'batch_loss/train': 0.6398016256280243} +12/29/2021 10:09:04 - INFO - codeparrot_training - Step 49068: {'lr': 4.3154092179001346e-07, 'samples': 25123328, 'steps': 49068, 'batch_loss/train': 0.771912201307714} +12/29/2021 10:09:16 - INFO - codeparrot_training - Step 49069: {'lr': 4.306156309173692e-07, 'samples': 25123840, 'steps': 49069, 'batch_loss/train': 0.6490618023090065} +12/29/2021 10:09:27 - INFO - codeparrot_training - Step 49070: {'lr': 4.296913322383855e-07, 'samples': 25124352, 'steps': 49070, 'batch_loss/train': 0.7939071110449731} +12/29/2021 10:09:37 - INFO - codeparrot_training - Step 49071: {'lr': 4.287680257566984e-07, 'samples': 25124864, 'steps': 49071, 'batch_loss/train': 0.7258637396153063} +12/29/2021 10:09:51 - INFO - codeparrot_training - Step 49072: {'lr': 4.278457114759993e-07, 'samples': 25125376, 'steps': 49072, 'batch_loss/train': 0.7642985451966524} +12/29/2021 10:10:02 - INFO - codeparrot_training - Step 49073: {'lr': 4.269243893999519e-07, 'samples': 25125888, 'steps': 49073, 'batch_loss/train': 0.7047563539817929} +12/29/2021 10:10:13 - INFO - codeparrot_training - Step 49074: {'lr': 4.260040595321923e-07, 'samples': 25126400, 'steps': 49074, 'batch_loss/train': 0.7070841840468347} +12/29/2021 10:10:25 - INFO - codeparrot_training - Step 49075: {'lr': 4.250847218764398e-07, 'samples': 25126912, 'steps': 49075, 'batch_loss/train': 0.8746702605858445} +12/29/2021 10:10:35 - INFO - codeparrot_training - Step 49076: {'lr': 4.241663764363024e-07, 'samples': 25127424, 'steps': 49076, 'batch_loss/train': 0.7301646331325173} +12/29/2021 10:10:46 - INFO - codeparrot_training - Step 49077: {'lr': 4.2324902321541624e-07, 'samples': 25127936, 'steps': 49077, 'batch_loss/train': 0.7325300415977836} +12/29/2021 10:10:57 - INFO - codeparrot_training - Step 49078: {'lr': 4.2233266221750056e-07, 'samples': 25128448, 'steps': 49078, 'batch_loss/train': 0.7239865995943546} +12/29/2021 10:11:11 - INFO - codeparrot_training - Step 49079: {'lr': 4.214172934461358e-07, 'samples': 25128960, 'steps': 49079, 'batch_loss/train': 0.6769450223073363} +12/29/2021 10:11:21 - INFO - codeparrot_training - Step 49080: {'lr': 4.205029169049579e-07, 'samples': 25129472, 'steps': 49080, 'batch_loss/train': 0.6471193539910018} +12/29/2021 10:11:32 - INFO - codeparrot_training - Step 49081: {'lr': 4.195895325976584e-07, 'samples': 25129984, 'steps': 49081, 'batch_loss/train': 0.6762829683721066} +12/29/2021 10:11:44 - INFO - codeparrot_training - Step 49082: {'lr': 4.186771405278178e-07, 'samples': 25130496, 'steps': 49082, 'batch_loss/train': 0.8584370696917176} +12/29/2021 10:11:55 - INFO - codeparrot_training - Step 49083: {'lr': 4.1776574069907203e-07, 'samples': 25131008, 'steps': 49083, 'batch_loss/train': 0.7354380954056978} +12/29/2021 10:12:05 - INFO - codeparrot_training - Step 49084: {'lr': 4.168553331150571e-07, 'samples': 25131520, 'steps': 49084, 'batch_loss/train': 0.7239827811717987} +12/29/2021 10:12:17 - INFO - codeparrot_training - Step 49085: {'lr': 4.159459177793812e-07, 'samples': 25132032, 'steps': 49085, 'batch_loss/train': 0.7225486128591001} +12/29/2021 10:12:28 - INFO - codeparrot_training - Step 49086: {'lr': 4.1503749469565255e-07, 'samples': 25132544, 'steps': 49086, 'batch_loss/train': 0.6685991304693744} +12/29/2021 10:12:39 - INFO - codeparrot_training - Step 49087: {'lr': 4.1413006386753494e-07, 'samples': 25133056, 'steps': 49087, 'batch_loss/train': 0.7215123688802123} +12/29/2021 10:12:51 - INFO - codeparrot_training - Step 49088: {'lr': 4.132236252985533e-07, 'samples': 25133568, 'steps': 49088, 'batch_loss/train': 0.6746517606079578} +12/29/2021 10:13:01 - INFO - codeparrot_training - Step 49089: {'lr': 4.123181789923991e-07, 'samples': 25134080, 'steps': 49089, 'batch_loss/train': 0.7912526922300458} +12/29/2021 10:13:12 - INFO - codeparrot_training - Step 49090: {'lr': 4.114137249525973e-07, 'samples': 25134592, 'steps': 49090, 'batch_loss/train': 0.7525664652348496} +12/29/2021 10:13:23 - INFO - codeparrot_training - Step 49091: {'lr': 4.105102631827562e-07, 'samples': 25135104, 'steps': 49091, 'batch_loss/train': 0.6564292479306459} +12/29/2021 10:13:37 - INFO - codeparrot_training - Step 49092: {'lr': 4.096077936865117e-07, 'samples': 25135616, 'steps': 49092, 'batch_loss/train': 0.7033978700637817} +12/29/2021 10:13:48 - INFO - codeparrot_training - Step 49093: {'lr': 4.0870631646741654e-07, 'samples': 25136128, 'steps': 49093, 'batch_loss/train': 0.8062050407752395} +12/29/2021 10:13:58 - INFO - codeparrot_training - Step 49094: {'lr': 4.078058315290789e-07, 'samples': 25136640, 'steps': 49094, 'batch_loss/train': 0.7266807602718472} +12/29/2021 10:14:10 - INFO - codeparrot_training - Step 49095: {'lr': 4.0690633887505157e-07, 'samples': 25137152, 'steps': 49095, 'batch_loss/train': 0.6051569650880992} +12/29/2021 10:14:21 - INFO - codeparrot_training - Step 49096: {'lr': 4.06007838508915e-07, 'samples': 25137664, 'steps': 49096, 'batch_loss/train': 0.7484024916775525} +12/29/2021 10:14:32 - INFO - codeparrot_training - Step 49097: {'lr': 4.051103304342496e-07, 'samples': 25138176, 'steps': 49097, 'batch_loss/train': 0.7707415008917451} +12/29/2021 10:14:44 - INFO - codeparrot_training - Step 49098: {'lr': 4.0421381465463593e-07, 'samples': 25138688, 'steps': 49098, 'batch_loss/train': 0.8641883507370949} +12/29/2021 10:14:54 - INFO - codeparrot_training - Step 49099: {'lr': 4.033182911736266e-07, 'samples': 25139200, 'steps': 49099, 'batch_loss/train': 0.7891717478632927} +12/29/2021 10:15:05 - INFO - codeparrot_training - Step 49100: {'lr': 4.0242375999474666e-07, 'samples': 25139712, 'steps': 49100, 'batch_loss/train': 0.7300404906272888} +12/29/2021 10:15:15 - INFO - codeparrot_training - Step 49101: {'lr': 4.015302211216043e-07, 'samples': 25140224, 'steps': 49101, 'batch_loss/train': 0.695763046387583} +12/29/2021 10:15:30 - INFO - codeparrot_training - Step 49102: {'lr': 4.0063767455775224e-07, 'samples': 25140736, 'steps': 49102, 'batch_loss/train': 0.7725979406386614} +12/29/2021 10:15:40 - INFO - codeparrot_training - Step 49103: {'lr': 3.997461203066877e-07, 'samples': 25141248, 'steps': 49103, 'batch_loss/train': 0.6733740859199315} +12/29/2021 10:15:51 - INFO - codeparrot_training - Step 49104: {'lr': 3.988555583720188e-07, 'samples': 25141760, 'steps': 49104, 'batch_loss/train': 0.6654140615137294} +12/29/2021 10:16:03 - INFO - codeparrot_training - Step 49105: {'lr': 3.9796598875724286e-07, 'samples': 25142272, 'steps': 49105, 'batch_loss/train': 0.5952272458234802} +12/29/2021 10:16:14 - INFO - codeparrot_training - Step 49106: {'lr': 3.9707741146588483e-07, 'samples': 25142784, 'steps': 49106, 'batch_loss/train': 0.7205049665644765} +12/29/2021 10:16:25 - INFO - codeparrot_training - Step 49107: {'lr': 3.9618982650152514e-07, 'samples': 25143296, 'steps': 49107, 'batch_loss/train': 0.5455720265163109} +12/29/2021 10:16:38 - INFO - codeparrot_training - Step 49108: {'lr': 3.95303233867661e-07, 'samples': 25143808, 'steps': 49108, 'batch_loss/train': 0.7369818645529449} +12/29/2021 10:16:48 - INFO - codeparrot_training - Step 49109: {'lr': 3.944176335678173e-07, 'samples': 25144320, 'steps': 49109, 'batch_loss/train': 0.6835706406272948} +12/29/2021 10:16:59 - INFO - codeparrot_training - Step 49110: {'lr': 3.9353302560554694e-07, 'samples': 25144832, 'steps': 49110, 'batch_loss/train': 0.7266463651321828} +12/29/2021 10:17:10 - INFO - codeparrot_training - Step 49111: {'lr': 3.926494099843192e-07, 'samples': 25145344, 'steps': 49111, 'batch_loss/train': 0.6893729818984866} +12/29/2021 10:17:22 - INFO - codeparrot_training - Step 49112: {'lr': 3.917667867076591e-07, 'samples': 25145856, 'steps': 49112, 'batch_loss/train': 0.738396670203656} +12/29/2021 10:17:33 - INFO - codeparrot_training - Step 49113: {'lr': 3.908851557791193e-07, 'samples': 25146368, 'steps': 49113, 'batch_loss/train': 0.7996827368624508} +12/29/2021 10:17:43 - INFO - codeparrot_training - Step 49114: {'lr': 3.900045172021416e-07, 'samples': 25146880, 'steps': 49114, 'batch_loss/train': 0.48811734560877085} +12/29/2021 10:17:55 - INFO - codeparrot_training - Step 49115: {'lr': 3.891248709802786e-07, 'samples': 25147392, 'steps': 49115, 'batch_loss/train': 0.7458665333688259} +12/29/2021 10:18:06 - INFO - codeparrot_training - Step 49116: {'lr': 3.882462171169998e-07, 'samples': 25147904, 'steps': 49116, 'batch_loss/train': 0.721932552754879} +12/29/2021 10:18:17 - INFO - codeparrot_training - Step 49117: {'lr': 3.8736855561580244e-07, 'samples': 25148416, 'steps': 49117, 'batch_loss/train': 0.7647102938499302} +12/29/2021 10:18:31 - INFO - codeparrot_training - Step 49118: {'lr': 3.8649188648018364e-07, 'samples': 25148928, 'steps': 49118, 'batch_loss/train': 0.7583031095564365} +12/29/2021 10:18:41 - INFO - codeparrot_training - Step 49119: {'lr': 3.8561620971361286e-07, 'samples': 25149440, 'steps': 49119, 'batch_loss/train': 0.7347408095374703} +12/29/2021 10:18:52 - INFO - codeparrot_training - Step 49120: {'lr': 3.8474152531955966e-07, 'samples': 25149952, 'steps': 49120, 'batch_loss/train': 0.7547663999721408} +12/29/2021 10:19:02 - INFO - codeparrot_training - Step 49121: {'lr': 3.8386783330154885e-07, 'samples': 25150464, 'steps': 49121, 'batch_loss/train': 0.6314167212694883} +12/29/2021 10:19:15 - INFO - codeparrot_training - Step 49122: {'lr': 3.829951336630222e-07, 'samples': 25150976, 'steps': 49122, 'batch_loss/train': 0.7288365252315998} +12/29/2021 10:19:25 - INFO - codeparrot_training - Step 49123: {'lr': 3.8212342640747687e-07, 'samples': 25151488, 'steps': 49123, 'batch_loss/train': 0.6911937615368515} +12/29/2021 10:19:36 - INFO - codeparrot_training - Step 49124: {'lr': 3.8125271153832686e-07, 'samples': 25152000, 'steps': 49124, 'batch_loss/train': 0.7713557332754135} +12/29/2021 10:19:48 - INFO - codeparrot_training - Step 49125: {'lr': 3.803829890590971e-07, 'samples': 25152512, 'steps': 49125, 'batch_loss/train': 0.7420659867930226} +12/29/2021 10:19:59 - INFO - codeparrot_training - Step 49126: {'lr': 3.795142589731737e-07, 'samples': 25153024, 'steps': 49126, 'batch_loss/train': 0.6912539638578892} +12/29/2021 10:20:09 - INFO - codeparrot_training - Step 49127: {'lr': 3.78646521284054e-07, 'samples': 25153536, 'steps': 49127, 'batch_loss/train': 0.737018272280693} +12/29/2021 10:20:22 - INFO - codeparrot_training - Step 49128: {'lr': 3.777797759952073e-07, 'samples': 25154048, 'steps': 49128, 'batch_loss/train': 0.6861158809624612} +12/29/2021 10:20:32 - INFO - codeparrot_training - Step 49129: {'lr': 3.769140231100199e-07, 'samples': 25154560, 'steps': 49129, 'batch_loss/train': 0.6428167349658906} +12/29/2021 10:20:43 - INFO - codeparrot_training - Step 49130: {'lr': 3.7604926263201666e-07, 'samples': 25155072, 'steps': 49130, 'batch_loss/train': 0.7533396519720554} +12/29/2021 10:20:53 - INFO - codeparrot_training - Step 49131: {'lr': 3.7518549456455606e-07, 'samples': 25155584, 'steps': 49131, 'batch_loss/train': 0.9762708605267107} +12/29/2021 10:21:07 - INFO - codeparrot_training - Step 49132: {'lr': 3.743227189111076e-07, 'samples': 25156096, 'steps': 49132, 'batch_loss/train': 0.7363897347822785} +12/29/2021 10:21:18 - INFO - codeparrot_training - Step 49133: {'lr': 3.734609356750851e-07, 'samples': 25156608, 'steps': 49133, 'batch_loss/train': 0.864881937392056} +12/29/2021 10:21:29 - INFO - codeparrot_training - Step 49134: {'lr': 3.7260014485993033e-07, 'samples': 25157120, 'steps': 49134, 'batch_loss/train': 0.8475954402238131} +12/29/2021 10:21:41 - INFO - codeparrot_training - Step 49135: {'lr': 3.7174034646908494e-07, 'samples': 25157632, 'steps': 49135, 'batch_loss/train': 0.7117032099631615} +12/29/2021 10:21:51 - INFO - codeparrot_training - Step 49136: {'lr': 3.7088154050593513e-07, 'samples': 25158144, 'steps': 49136, 'batch_loss/train': 0.7401823531836271} +12/29/2021 10:22:02 - INFO - codeparrot_training - Step 49137: {'lr': 3.7002372697389486e-07, 'samples': 25158656, 'steps': 49137, 'batch_loss/train': 0.7143446480622515} +12/29/2021 10:22:16 - INFO - codeparrot_training - Step 49138: {'lr': 3.6916690587637805e-07, 'samples': 25159168, 'steps': 49138, 'batch_loss/train': 0.7298367647454143} +12/29/2021 10:22:27 - INFO - codeparrot_training - Step 49139: {'lr': 3.683110772167986e-07, 'samples': 25159680, 'steps': 49139, 'batch_loss/train': 0.6985325030982494} +12/29/2021 10:22:37 - INFO - codeparrot_training - Step 49140: {'lr': 3.674562409985704e-07, 'samples': 25160192, 'steps': 49140, 'batch_loss/train': 0.6938665136694908} +12/29/2021 10:22:48 - INFO - codeparrot_training - Step 49141: {'lr': 3.666023972250521e-07, 'samples': 25160704, 'steps': 49141, 'batch_loss/train': 0.755506390996743} +12/29/2021 10:23:00 - INFO - codeparrot_training - Step 49142: {'lr': 3.6574954589968514e-07, 'samples': 25161216, 'steps': 49142, 'batch_loss/train': 0.6226528373663314} +12/29/2021 10:23:11 - INFO - codeparrot_training - Step 49143: {'lr': 3.648976870258558e-07, 'samples': 25161728, 'steps': 49143, 'batch_loss/train': 0.5791327776387334} +12/29/2021 10:23:21 - INFO - codeparrot_training - Step 49144: {'lr': 3.640468206069225e-07, 'samples': 25162240, 'steps': 49144, 'batch_loss/train': 0.7709026131778955} +12/29/2021 10:23:33 - INFO - codeparrot_training - Step 49145: {'lr': 3.6319694664627143e-07, 'samples': 25162752, 'steps': 49145, 'batch_loss/train': 0.6861301327589899} +12/29/2021 10:23:44 - INFO - codeparrot_training - Step 49146: {'lr': 3.623480651472888e-07, 'samples': 25163264, 'steps': 49146, 'batch_loss/train': 0.8146096910350025} +12/29/2021 10:23:55 - INFO - codeparrot_training - Step 49147: {'lr': 3.615001761133607e-07, 'samples': 25163776, 'steps': 49147, 'batch_loss/train': 0.745478315744549} +12/29/2021 10:24:09 - INFO - codeparrot_training - Step 49148: {'lr': 3.6065327954784564e-07, 'samples': 25164288, 'steps': 49148, 'batch_loss/train': 0.7105808625929058} +12/29/2021 10:24:19 - INFO - codeparrot_training - Step 49149: {'lr': 3.5980737545410204e-07, 'samples': 25164800, 'steps': 49149, 'batch_loss/train': 0.6580239967443049} +12/29/2021 10:24:30 - INFO - codeparrot_training - Step 49150: {'lr': 3.58962463835516e-07, 'samples': 25165312, 'steps': 49150, 'batch_loss/train': 0.7553186267614365} +12/29/2021 10:24:41 - INFO - codeparrot_training - Step 49151: {'lr': 3.581185446954183e-07, 'samples': 25165824, 'steps': 49151, 'batch_loss/train': 0.7449400760233402} +12/29/2021 10:24:53 - INFO - codeparrot_training - Step 49152: {'lr': 3.57275618037195e-07, 'samples': 25166336, 'steps': 49152, 'batch_loss/train': 0.6834912430495024} +12/29/2021 10:25:03 - INFO - codeparrot_training - Step 49153: {'lr': 3.5643368386417683e-07, 'samples': 25166848, 'steps': 49153, 'batch_loss/train': 0.6604671664535999} +12/29/2021 10:25:14 - INFO - codeparrot_training - Step 49154: {'lr': 3.5559274217972225e-07, 'samples': 25167360, 'steps': 49154, 'batch_loss/train': 0.6878858170239255} +12/29/2021 10:25:28 - INFO - codeparrot_training - Step 49155: {'lr': 3.547527929871619e-07, 'samples': 25167872, 'steps': 49155, 'batch_loss/train': 0.5376315329922363} +12/29/2021 10:25:39 - INFO - codeparrot_training - Step 49156: {'lr': 3.5391383628985416e-07, 'samples': 25168384, 'steps': 49156, 'batch_loss/train': 0.6500816164771095} +12/29/2021 10:25:49 - INFO - codeparrot_training - Step 49157: {'lr': 3.5307587209110204e-07, 'samples': 25168896, 'steps': 49157, 'batch_loss/train': 0.6982075218111277} +12/29/2021 10:26:01 - INFO - codeparrot_training - Step 49158: {'lr': 3.5223890039429163e-07, 'samples': 25169408, 'steps': 49158, 'batch_loss/train': 0.7440279111033306} +12/29/2021 10:26:12 - INFO - codeparrot_training - Step 49159: {'lr': 3.5140292120267036e-07, 'samples': 25169920, 'steps': 49159, 'batch_loss/train': 0.6723130873870105} +12/29/2021 10:26:23 - INFO - codeparrot_training - Step 49160: {'lr': 3.505679345196522e-07, 'samples': 25170432, 'steps': 49160, 'batch_loss/train': 0.7381194122135639} +12/29/2021 10:26:33 - INFO - codeparrot_training - Step 49161: {'lr': 3.497339403484845e-07, 'samples': 25170944, 'steps': 49161, 'batch_loss/train': 0.6922691110521555} +12/29/2021 10:26:45 - INFO - codeparrot_training - Step 49162: {'lr': 3.4890093869249795e-07, 'samples': 25171456, 'steps': 49162, 'batch_loss/train': 0.6766390189295635} +12/29/2021 10:26:56 - INFO - codeparrot_training - Step 49163: {'lr': 3.48068929555051e-07, 'samples': 25171968, 'steps': 49163, 'batch_loss/train': 0.85291145183146} +12/29/2021 10:27:07 - INFO - codeparrot_training - Step 49164: {'lr': 3.47237912939391e-07, 'samples': 25172480, 'steps': 49164, 'batch_loss/train': 0.7219033065484837} +12/29/2021 10:27:19 - INFO - codeparrot_training - Step 49165: {'lr': 3.464078888488764e-07, 'samples': 25172992, 'steps': 49165, 'batch_loss/train': 0.7586476730066352} +12/29/2021 10:27:30 - INFO - codeparrot_training - Step 49166: {'lr': 3.455788572867546e-07, 'samples': 25173504, 'steps': 49166, 'batch_loss/train': 0.7602196438238025} +12/29/2021 10:27:40 - INFO - codeparrot_training - Step 49167: {'lr': 3.4475081825635634e-07, 'samples': 25174016, 'steps': 49167, 'batch_loss/train': 0.758226063568145} +12/29/2021 10:27:54 - INFO - codeparrot_training - Step 49168: {'lr': 3.4392377176095666e-07, 'samples': 25174528, 'steps': 49168, 'batch_loss/train': 0.842649033293128} +12/29/2021 10:28:05 - INFO - codeparrot_training - Step 49169: {'lr': 3.430977178038308e-07, 'samples': 25175040, 'steps': 49169, 'batch_loss/train': 0.6811456624418497} +12/29/2021 10:28:15 - INFO - codeparrot_training - Step 49170: {'lr': 3.422726563883094e-07, 'samples': 25175552, 'steps': 49170, 'batch_loss/train': 0.6945701590739191} +12/29/2021 10:28:30 - INFO - codeparrot_training - Step 49171: {'lr': 3.4144858751763987e-07, 'samples': 25176064, 'steps': 49171, 'batch_loss/train': 0.6565318093053065} +12/29/2021 10:28:40 - INFO - codeparrot_training - Step 49172: {'lr': 3.4062551119509733e-07, 'samples': 25176576, 'steps': 49172, 'batch_loss/train': 0.7102809716016054} +12/29/2021 10:28:51 - INFO - codeparrot_training - Step 49173: {'lr': 3.39803427423957e-07, 'samples': 25177088, 'steps': 49173, 'batch_loss/train': 0.7312065421720035} +12/29/2021 10:29:02 - INFO - codeparrot_training - Step 49174: {'lr': 3.38982336207494e-07, 'samples': 25177600, 'steps': 49174, 'batch_loss/train': 0.6666083126328886} +12/29/2021 10:29:14 - INFO - codeparrot_training - Step 49175: {'lr': 3.3816223754898346e-07, 'samples': 25178112, 'steps': 49175, 'batch_loss/train': 0.6932624503970146} +12/29/2021 10:29:25 - INFO - codeparrot_training - Step 49176: {'lr': 3.3734313145164507e-07, 'samples': 25178624, 'steps': 49176, 'batch_loss/train': 0.5442811063257977} +12/29/2021 10:29:35 - INFO - codeparrot_training - Step 49177: {'lr': 3.3652501791878176e-07, 'samples': 25179136, 'steps': 49177, 'batch_loss/train': 0.7387994080781937} +12/29/2021 10:29:47 - INFO - codeparrot_training - Step 49178: {'lr': 3.3570789695361317e-07, 'samples': 25179648, 'steps': 49178, 'batch_loss/train': 0.7117399671114981} +12/29/2021 10:29:58 - INFO - codeparrot_training - Step 49179: {'lr': 3.348917685594144e-07, 'samples': 25180160, 'steps': 49179, 'batch_loss/train': 0.6902230558916926} +12/29/2021 10:30:09 - INFO - codeparrot_training - Step 49180: {'lr': 3.340766327394051e-07, 'samples': 25180672, 'steps': 49180, 'batch_loss/train': 0.6700640092603862} +12/29/2021 10:30:23 - INFO - codeparrot_training - Step 49181: {'lr': 3.332624894968328e-07, 'samples': 25181184, 'steps': 49181, 'batch_loss/train': 0.7236184179782867} +12/29/2021 10:30:33 - INFO - codeparrot_training - Step 49182: {'lr': 3.3244933883494475e-07, 'samples': 25181696, 'steps': 49182, 'batch_loss/train': 0.6598679237067699} +12/29/2021 10:30:44 - INFO - codeparrot_training - Step 49183: {'lr': 3.3163718075698843e-07, 'samples': 25182208, 'steps': 49183, 'batch_loss/train': 0.6911373329930939} +12/29/2021 10:30:55 - INFO - codeparrot_training - Step 49184: {'lr': 3.30826015266128e-07, 'samples': 25182720, 'steps': 49184, 'batch_loss/train': 0.6381656827870756} +12/29/2021 10:31:07 - INFO - codeparrot_training - Step 49185: {'lr': 3.300158423656663e-07, 'samples': 25183232, 'steps': 49185, 'batch_loss/train': 0.7213368928059936} +12/29/2021 10:31:17 - INFO - codeparrot_training - Step 49186: {'lr': 3.292066620587675e-07, 'samples': 25183744, 'steps': 49186, 'batch_loss/train': 0.7353638401255012} +12/29/2021 10:31:28 - INFO - codeparrot_training - Step 49187: {'lr': 3.283984743487067e-07, 'samples': 25184256, 'steps': 49187, 'batch_loss/train': 0.7897276552394032} +12/29/2021 10:31:42 - INFO - codeparrot_training - Step 49188: {'lr': 3.275912792386204e-07, 'samples': 25184768, 'steps': 49188, 'batch_loss/train': 0.7410060038673691} +12/29/2021 10:31:52 - INFO - codeparrot_training - Step 49189: {'lr': 3.267850767317837e-07, 'samples': 25185280, 'steps': 49189, 'batch_loss/train': 0.770335235632956} +12/29/2021 10:32:03 - INFO - codeparrot_training - Step 49190: {'lr': 3.259798668313607e-07, 'samples': 25185792, 'steps': 49190, 'batch_loss/train': 0.729620979167521} +12/29/2021 10:32:15 - INFO - codeparrot_training - Step 49191: {'lr': 3.2517564954057113e-07, 'samples': 25186304, 'steps': 49191, 'batch_loss/train': 0.6631822381168604} +12/29/2021 10:32:26 - INFO - codeparrot_training - Step 49192: {'lr': 3.2437242486260675e-07, 'samples': 25186816, 'steps': 49192, 'batch_loss/train': 0.7783093694597483} +12/29/2021 10:32:36 - INFO - codeparrot_training - Step 49193: {'lr': 3.235701928006873e-07, 'samples': 25187328, 'steps': 49193, 'batch_loss/train': 0.7392956716939807} +12/29/2021 10:32:49 - INFO - codeparrot_training - Step 49194: {'lr': 3.227689533579492e-07, 'samples': 25187840, 'steps': 49194, 'batch_loss/train': 0.7746235271915793} +12/29/2021 10:32:59 - INFO - codeparrot_training - Step 49195: {'lr': 3.2196870653763975e-07, 'samples': 25188352, 'steps': 49195, 'batch_loss/train': 0.6115034180693328} +12/29/2021 10:33:10 - INFO - codeparrot_training - Step 49196: {'lr': 3.2116945234286764e-07, 'samples': 25188864, 'steps': 49196, 'batch_loss/train': 0.7674262369982898} +12/29/2021 10:33:20 - INFO - codeparrot_training - Step 49197: {'lr': 3.203711907768803e-07, 'samples': 25189376, 'steps': 49197, 'batch_loss/train': 0.7380993259139359} +12/29/2021 10:33:35 - INFO - codeparrot_training - Step 49198: {'lr': 3.1957392184281396e-07, 'samples': 25189888, 'steps': 49198, 'batch_loss/train': 0.6936133075505495} +12/29/2021 10:33:46 - INFO - codeparrot_training - Step 49199: {'lr': 3.1877764554383293e-07, 'samples': 25190400, 'steps': 49199, 'batch_loss/train': 0.7934108776971698} +12/29/2021 10:33:56 - INFO - codeparrot_training - Step 49200: {'lr': 3.17982361883129e-07, 'samples': 25190912, 'steps': 49200, 'batch_loss/train': 0.7007455305429175} +12/29/2021 10:34:08 - INFO - codeparrot_training - Step 49201: {'lr': 3.171880708638664e-07, 'samples': 25191424, 'steps': 49201, 'batch_loss/train': 0.7225021133199334} +12/29/2021 10:34:19 - INFO - codeparrot_training - Step 49202: {'lr': 3.1639477248915363e-07, 'samples': 25191936, 'steps': 49202, 'batch_loss/train': 0.8050575135275722} +12/29/2021 10:34:30 - INFO - codeparrot_training - Step 49203: {'lr': 3.1560246676221037e-07, 'samples': 25192448, 'steps': 49203, 'batch_loss/train': 0.5898607521085069} +12/29/2021 10:34:42 - INFO - codeparrot_training - Step 49204: {'lr': 3.1481115368614534e-07, 'samples': 25192960, 'steps': 49204, 'batch_loss/train': 0.6869602827355266} +12/29/2021 10:34:52 - INFO - codeparrot_training - Step 49205: {'lr': 3.140208332640948e-07, 'samples': 25193472, 'steps': 49205, 'batch_loss/train': 0.8202020898461342} +12/29/2021 10:35:03 - INFO - codeparrot_training - Step 49206: {'lr': 3.132315054992507e-07, 'samples': 25193984, 'steps': 49206, 'batch_loss/train': 0.6572330272756517} +12/29/2021 10:35:14 - INFO - codeparrot_training - Step 49207: {'lr': 3.124431703946939e-07, 'samples': 25194496, 'steps': 49207, 'batch_loss/train': 0.5333404240664095} +12/29/2021 10:35:26 - INFO - codeparrot_training - Step 49208: {'lr': 3.116558279535886e-07, 'samples': 25195008, 'steps': 49208, 'batch_loss/train': 0.7814849382266402} +12/29/2021 10:35:36 - INFO - codeparrot_training - Step 49209: {'lr': 3.1086947817907107e-07, 'samples': 25195520, 'steps': 49209, 'batch_loss/train': 0.5142152768094093} +12/29/2021 10:35:47 - INFO - codeparrot_training - Step 49210: {'lr': 3.1008412107425e-07, 'samples': 25196032, 'steps': 49210, 'batch_loss/train': 0.7287000394426286} +12/29/2021 10:36:02 - INFO - codeparrot_training - Step 49211: {'lr': 3.0929975664226174e-07, 'samples': 25196544, 'steps': 49211, 'batch_loss/train': 0.7166980365291238} +12/29/2021 10:36:12 - INFO - codeparrot_training - Step 49212: {'lr': 3.085163848862149e-07, 'samples': 25197056, 'steps': 49212, 'batch_loss/train': 0.9861694443970919} +12/29/2021 10:36:23 - INFO - codeparrot_training - Step 49213: {'lr': 3.0773400580921815e-07, 'samples': 25197568, 'steps': 49213, 'batch_loss/train': 0.6760018682107329} +12/29/2021 10:36:35 - INFO - codeparrot_training - Step 49214: {'lr': 3.069526194144079e-07, 'samples': 25198080, 'steps': 49214, 'batch_loss/train': 0.7224558964371681} +12/29/2021 10:36:46 - INFO - codeparrot_training - Step 49215: {'lr': 3.061722257048649e-07, 'samples': 25198592, 'steps': 49215, 'batch_loss/train': 0.7525770491920412} +12/29/2021 10:36:56 - INFO - codeparrot_training - Step 49216: {'lr': 3.053928246836979e-07, 'samples': 25199104, 'steps': 49216, 'batch_loss/train': 0.8123498307541013} +12/29/2021 10:37:07 - INFO - codeparrot_training - Step 49217: {'lr': 3.046144163540154e-07, 'samples': 25199616, 'steps': 49217, 'batch_loss/train': 0.8243666738271713} +12/29/2021 10:37:21 - INFO - codeparrot_training - Step 49218: {'lr': 3.0383700071887064e-07, 'samples': 25200128, 'steps': 49218, 'batch_loss/train': 0.6984492023475468} +12/29/2021 10:37:32 - INFO - codeparrot_training - Step 49219: {'lr': 3.030605777814277e-07, 'samples': 25200640, 'steps': 49219, 'batch_loss/train': 0.7318003862164915} +12/29/2021 10:37:42 - INFO - codeparrot_training - Step 49220: {'lr': 3.0228514754473966e-07, 'samples': 25201152, 'steps': 49220, 'batch_loss/train': 0.7120937635772862} +12/29/2021 10:37:54 - INFO - codeparrot_training - Step 49221: {'lr': 3.015107100118597e-07, 'samples': 25201664, 'steps': 49221, 'batch_loss/train': 0.7843536706641316} +12/29/2021 10:38:05 - INFO - codeparrot_training - Step 49222: {'lr': 3.0073726518589637e-07, 'samples': 25202176, 'steps': 49222, 'batch_loss/train': 0.7488589105196297} +12/29/2021 10:38:16 - INFO - codeparrot_training - Step 49223: {'lr': 2.9996481306990286e-07, 'samples': 25202688, 'steps': 49223, 'batch_loss/train': 0.7004292127676308} +12/29/2021 10:38:28 - INFO - codeparrot_training - Step 49224: {'lr': 2.9919335366698774e-07, 'samples': 25203200, 'steps': 49224, 'batch_loss/train': 0.6958168335258961} +12/29/2021 10:38:38 - INFO - codeparrot_training - Step 49225: {'lr': 2.9842288698020414e-07, 'samples': 25203712, 'steps': 49225, 'batch_loss/train': 0.6828588657081127} +12/29/2021 10:38:49 - INFO - codeparrot_training - Step 49226: {'lr': 2.976534130126052e-07, 'samples': 25204224, 'steps': 49226, 'batch_loss/train': 1.6004905039444566} +12/29/2021 10:39:00 - INFO - codeparrot_training - Step 49227: {'lr': 2.9688493176724397e-07, 'samples': 25204736, 'steps': 49227, 'batch_loss/train': 0.9075669748708606} +12/29/2021 10:39:14 - INFO - codeparrot_training - Step 49228: {'lr': 2.961174432471736e-07, 'samples': 25205248, 'steps': 49228, 'batch_loss/train': 0.7580412719398737} +12/29/2021 10:39:24 - INFO - codeparrot_training - Step 49229: {'lr': 2.95350947455475e-07, 'samples': 25205760, 'steps': 49229, 'batch_loss/train': 0.6714131240732968} +12/29/2021 10:39:35 - INFO - codeparrot_training - Step 49230: {'lr': 2.9458544439514567e-07, 'samples': 25206272, 'steps': 49230, 'batch_loss/train': 0.6983271958306432} +12/29/2021 10:39:47 - INFO - codeparrot_training - Step 49231: {'lr': 2.9382093406929436e-07, 'samples': 25206784, 'steps': 49231, 'batch_loss/train': 0.7813609093427658} +12/29/2021 10:39:58 - INFO - codeparrot_training - Step 49232: {'lr': 2.930574164809185e-07, 'samples': 25207296, 'steps': 49232, 'batch_loss/train': 0.7247022567898966} +12/29/2021 10:40:08 - INFO - codeparrot_training - Step 49233: {'lr': 2.9229489163304366e-07, 'samples': 25207808, 'steps': 49233, 'batch_loss/train': 0.730399392079562} +12/29/2021 10:40:20 - INFO - codeparrot_training - Step 49234: {'lr': 2.9153335952872286e-07, 'samples': 25208320, 'steps': 49234, 'batch_loss/train': 0.7613345049321651} +12/29/2021 10:40:31 - INFO - codeparrot_training - Step 49235: {'lr': 2.907728201709814e-07, 'samples': 25208832, 'steps': 49235, 'batch_loss/train': 0.643864270998165} +12/29/2021 10:40:42 - INFO - codeparrot_training - Step 49236: {'lr': 2.9001327356284466e-07, 'samples': 25209344, 'steps': 49236, 'batch_loss/train': 0.7816912503913045} +12/29/2021 10:40:52 - INFO - codeparrot_training - Step 49237: {'lr': 2.892547197073381e-07, 'samples': 25209856, 'steps': 49237, 'batch_loss/train': 0.7360447756946087} +12/29/2021 10:41:06 - INFO - codeparrot_training - Step 49238: {'lr': 2.884971586074592e-07, 'samples': 25210368, 'steps': 49238, 'batch_loss/train': 0.724791650660336} +12/29/2021 10:41:17 - INFO - codeparrot_training - Step 49239: {'lr': 2.8774059026623334e-07, 'samples': 25210880, 'steps': 49239, 'batch_loss/train': 0.7638902319595218} +12/29/2021 10:41:27 - INFO - codeparrot_training - Step 49240: {'lr': 2.869850146866304e-07, 'samples': 25211392, 'steps': 49240, 'batch_loss/train': 0.7356999353505671} +12/29/2021 10:41:40 - INFO - codeparrot_training - Step 49241: {'lr': 2.862304318717313e-07, 'samples': 25211904, 'steps': 49241, 'batch_loss/train': 0.661766892299056} +12/29/2021 10:41:50 - INFO - codeparrot_training - Step 49242: {'lr': 2.8547684182447796e-07, 'samples': 25212416, 'steps': 49242, 'batch_loss/train': 0.6944723450578749} +12/29/2021 10:42:01 - INFO - codeparrot_training - Step 49243: {'lr': 2.847242445478682e-07, 'samples': 25212928, 'steps': 49243, 'batch_loss/train': 0.9770870776847005} +12/29/2021 10:42:15 - INFO - codeparrot_training - Step 49244: {'lr': 2.8397264004489943e-07, 'samples': 25213440, 'steps': 49244, 'batch_loss/train': 0.9429112332873046} +12/29/2021 10:42:26 - INFO - codeparrot_training - Step 49245: {'lr': 2.832220283185971e-07, 'samples': 25213952, 'steps': 49245, 'batch_loss/train': 0.7777408133260906} +12/29/2021 10:42:36 - INFO - codeparrot_training - Step 49246: {'lr': 2.8247240937190333e-07, 'samples': 25214464, 'steps': 49246, 'batch_loss/train': 0.6572827599011362} +12/29/2021 10:42:48 - INFO - codeparrot_training - Step 49247: {'lr': 2.817237832077879e-07, 'samples': 25214976, 'steps': 49247, 'batch_loss/train': 0.7040327859576792} +12/29/2021 10:42:59 - INFO - codeparrot_training - Step 49248: {'lr': 2.809761498292762e-07, 'samples': 25215488, 'steps': 49248, 'batch_loss/train': 0.778260032646358} +12/29/2021 10:43:09 - INFO - codeparrot_training - Step 49249: {'lr': 2.8022950923931035e-07, 'samples': 25216000, 'steps': 49249, 'batch_loss/train': 0.7751928111538291} +12/29/2021 10:43:20 - INFO - codeparrot_training - Step 49250: {'lr': 2.7948386144083236e-07, 'samples': 25216512, 'steps': 49250, 'batch_loss/train': 0.6849391767755151} +12/29/2021 10:43:32 - INFO - codeparrot_training - Step 49251: {'lr': 2.7873920643686767e-07, 'samples': 25217024, 'steps': 49251, 'batch_loss/train': 0.7940009832382202} +12/29/2021 10:43:43 - INFO - codeparrot_training - Step 49252: {'lr': 2.7799554423033056e-07, 'samples': 25217536, 'steps': 49252, 'batch_loss/train': 0.70244705688674} +12/29/2021 10:43:53 - INFO - codeparrot_training - Step 49253: {'lr': 2.772528748241909e-07, 'samples': 25218048, 'steps': 49253, 'batch_loss/train': 0.7613879164564423} +12/29/2021 10:44:06 - INFO - codeparrot_training - Step 49254: {'lr': 2.765111982214186e-07, 'samples': 25218560, 'steps': 49254, 'batch_loss/train': 0.7488312888890505} +12/29/2021 10:44:17 - INFO - codeparrot_training - Step 49255: {'lr': 2.7577051442492784e-07, 'samples': 25219072, 'steps': 49255, 'batch_loss/train': 0.5664005246944726} +12/29/2021 10:44:27 - INFO - codeparrot_training - Step 49256: {'lr': 2.7503082343768857e-07, 'samples': 25219584, 'steps': 49256, 'batch_loss/train': 0.5277619027765468} +12/29/2021 10:44:41 - INFO - codeparrot_training - Step 49257: {'lr': 2.7429212526261514e-07, 'samples': 25220096, 'steps': 49257, 'batch_loss/train': 0.718966699205339} +12/29/2021 10:44:52 - INFO - codeparrot_training - Step 49258: {'lr': 2.7355441990270514e-07, 'samples': 25220608, 'steps': 49258, 'batch_loss/train': 0.7867319779470563} +12/29/2021 10:45:02 - INFO - codeparrot_training - Step 49259: {'lr': 2.7281770736081737e-07, 'samples': 25221120, 'steps': 49259, 'batch_loss/train': 0.7075098808854818} +12/29/2021 10:45:14 - INFO - codeparrot_training - Step 49260: {'lr': 2.7208198763992163e-07, 'samples': 25221632, 'steps': 49260, 'batch_loss/train': 0.5640979206655174} +12/29/2021 10:45:25 - INFO - codeparrot_training - Step 49261: {'lr': 2.7134726074293235e-07, 'samples': 25222144, 'steps': 49261, 'batch_loss/train': 0.6254623425193131} +12/29/2021 10:45:36 - INFO - codeparrot_training - Step 49262: {'lr': 2.7061352667276385e-07, 'samples': 25222656, 'steps': 49262, 'batch_loss/train': 0.6810957583365962} +12/29/2021 10:45:46 - INFO - codeparrot_training - Step 49263: {'lr': 2.6988078543235814e-07, 'samples': 25223168, 'steps': 49263, 'batch_loss/train': 0.7135852687060833} +12/29/2021 10:45:59 - INFO - codeparrot_training - Step 49264: {'lr': 2.691490370246019e-07, 'samples': 25223680, 'steps': 49264, 'batch_loss/train': 0.7878930941224098} +12/29/2021 10:46:10 - INFO - codeparrot_training - Step 49265: {'lr': 2.6841828145240943e-07, 'samples': 25224192, 'steps': 49265, 'batch_loss/train': 0.9987336611375213} +12/29/2021 10:46:20 - INFO - codeparrot_training - Step 49266: {'lr': 2.67688518718695e-07, 'samples': 25224704, 'steps': 49266, 'batch_loss/train': 0.72668366599828} +12/29/2021 10:46:34 - INFO - codeparrot_training - Step 49267: {'lr': 2.6695974882637306e-07, 'samples': 25225216, 'steps': 49267, 'batch_loss/train': 0.7053287702146918} +12/29/2021 10:46:45 - INFO - codeparrot_training - Step 49268: {'lr': 2.662319717783024e-07, 'samples': 25225728, 'steps': 49268, 'batch_loss/train': 0.6562412097118795} +12/29/2021 10:46:55 - INFO - codeparrot_training - Step 49269: {'lr': 2.65505187577425e-07, 'samples': 25226240, 'steps': 49269, 'batch_loss/train': 0.7504204765427858} +12/29/2021 10:47:07 - INFO - codeparrot_training - Step 49270: {'lr': 2.647793962265721e-07, 'samples': 25226752, 'steps': 49270, 'batch_loss/train': 0.6232676059007645} +12/29/2021 10:47:18 - INFO - codeparrot_training - Step 49271: {'lr': 2.640545977286579e-07, 'samples': 25227264, 'steps': 49271, 'batch_loss/train': 0.6849357062019408} +12/29/2021 10:47:29 - INFO - codeparrot_training - Step 49272: {'lr': 2.633307920865968e-07, 'samples': 25227776, 'steps': 49272, 'batch_loss/train': 0.6735047260299325} +12/29/2021 10:47:39 - INFO - codeparrot_training - Step 49273: {'lr': 2.6260797930321987e-07, 'samples': 25228288, 'steps': 49273, 'batch_loss/train': 0.7496019662357867} +12/29/2021 10:47:53 - INFO - codeparrot_training - Step 49274: {'lr': 2.618861593814137e-07, 'samples': 25228800, 'steps': 49274, 'batch_loss/train': 0.6601896472275257} +12/29/2021 10:48:04 - INFO - codeparrot_training - Step 49275: {'lr': 2.6116533232403707e-07, 'samples': 25229312, 'steps': 49275, 'batch_loss/train': 1.4945657178759575} +12/29/2021 10:48:15 - INFO - codeparrot_training - Step 49276: {'lr': 2.604454981340043e-07, 'samples': 25229824, 'steps': 49276, 'batch_loss/train': 1.5115963511634618} +12/29/2021 10:48:27 - INFO - codeparrot_training - Step 49277: {'lr': 2.597266568141188e-07, 'samples': 25230336, 'steps': 49277, 'batch_loss/train': 0.7336253207176924} +12/29/2021 10:48:37 - INFO - codeparrot_training - Step 49278: {'lr': 2.5900880836726703e-07, 'samples': 25230848, 'steps': 49278, 'batch_loss/train': 0.7400713637471199} +12/29/2021 10:48:48 - INFO - codeparrot_training - Step 49279: {'lr': 2.5829195279628017e-07, 'samples': 25231360, 'steps': 49279, 'batch_loss/train': 0.7697266051545739} +12/29/2021 10:49:00 - INFO - codeparrot_training - Step 49280: {'lr': 2.575760901040447e-07, 'samples': 25231872, 'steps': 49280, 'batch_loss/train': 0.6963088354095817} +12/29/2021 10:49:11 - INFO - codeparrot_training - Step 49281: {'lr': 2.5686122029339174e-07, 'samples': 25232384, 'steps': 49281, 'batch_loss/train': 0.7155153614003211} +12/29/2021 10:49:21 - INFO - codeparrot_training - Step 49282: {'lr': 2.561473433671524e-07, 'samples': 25232896, 'steps': 49282, 'batch_loss/train': 0.7418396342545748} +12/29/2021 10:49:32 - INFO - codeparrot_training - Step 49283: {'lr': 2.554344593281577e-07, 'samples': 25233408, 'steps': 49283, 'batch_loss/train': 0.701126444619149} +12/29/2021 10:49:46 - INFO - codeparrot_training - Step 49284: {'lr': 2.5472256817929416e-07, 'samples': 25233920, 'steps': 49284, 'batch_loss/train': 0.7533069495111704} +12/29/2021 10:49:56 - INFO - codeparrot_training - Step 49285: {'lr': 2.540116699233097e-07, 'samples': 25234432, 'steps': 49285, 'batch_loss/train': 0.8156485296785831} +12/29/2021 10:50:07 - INFO - codeparrot_training - Step 49286: {'lr': 2.5330176456309085e-07, 'samples': 25234944, 'steps': 49286, 'batch_loss/train': 0.7140177116380073} +12/29/2021 10:50:19 - INFO - codeparrot_training - Step 49287: {'lr': 2.5259285210146865e-07, 'samples': 25235456, 'steps': 49287, 'batch_loss/train': 0.7932735979557037} +12/29/2021 10:50:30 - INFO - codeparrot_training - Step 49288: {'lr': 2.5188493254119095e-07, 'samples': 25235968, 'steps': 49288, 'batch_loss/train': 0.6679100533947349} +12/29/2021 10:50:40 - INFO - codeparrot_training - Step 49289: {'lr': 2.5117800588514426e-07, 'samples': 25236480, 'steps': 49289, 'batch_loss/train': 0.6745926523581147} +12/29/2021 10:50:54 - INFO - codeparrot_training - Step 49290: {'lr': 2.504720721361042e-07, 'samples': 25236992, 'steps': 49290, 'batch_loss/train': 0.7179815173149109} +12/29/2021 10:51:05 - INFO - codeparrot_training - Step 49291: {'lr': 2.497671312969019e-07, 'samples': 25237504, 'steps': 49291, 'batch_loss/train': 0.7013215636834502} +12/29/2021 10:51:16 - INFO - codeparrot_training - Step 49292: {'lr': 2.4906318337031273e-07, 'samples': 25238016, 'steps': 49292, 'batch_loss/train': 0.674258538056165} +12/29/2021 10:51:26 - INFO - codeparrot_training - Step 49293: {'lr': 2.483602283591402e-07, 'samples': 25238528, 'steps': 49293, 'batch_loss/train': 0.7030937452800572} +12/29/2021 10:51:38 - INFO - codeparrot_training - Step 49294: {'lr': 2.476582662661875e-07, 'samples': 25239040, 'steps': 49294, 'batch_loss/train': 0.6062643523328006} +12/29/2021 10:51:49 - INFO - codeparrot_training - Step 49295: {'lr': 2.4695729709425796e-07, 'samples': 25239552, 'steps': 49295, 'batch_loss/train': 0.6616833889856935} +12/29/2021 10:52:00 - INFO - codeparrot_training - Step 49296: {'lr': 2.462573208460994e-07, 'samples': 25240064, 'steps': 49296, 'batch_loss/train': 0.7906480251695029} +12/29/2021 10:52:12 - INFO - codeparrot_training - Step 49297: {'lr': 2.455583375245152e-07, 'samples': 25240576, 'steps': 49297, 'batch_loss/train': 0.6707731753122061} +12/29/2021 10:52:22 - INFO - codeparrot_training - Step 49298: {'lr': 2.4486034713230853e-07, 'samples': 25241088, 'steps': 49298, 'batch_loss/train': 0.5845672901778016} +12/29/2021 10:52:33 - INFO - codeparrot_training - Step 49299: {'lr': 2.441633496722273e-07, 'samples': 25241600, 'steps': 49299, 'batch_loss/train': 0.7299083706457168} +12/29/2021 10:52:45 - INFO - codeparrot_training - Step 49300: {'lr': 2.4346734514704704e-07, 'samples': 25242112, 'steps': 49300, 'batch_loss/train': 0.8455735864117742} +12/29/2021 10:52:56 - INFO - codeparrot_training - Step 49301: {'lr': 2.4277233355954334e-07, 'samples': 25242624, 'steps': 49301, 'batch_loss/train': 0.7513341708108783} +12/29/2021 10:53:06 - INFO - codeparrot_training - Step 49302: {'lr': 2.420783149124639e-07, 'samples': 25243136, 'steps': 49302, 'batch_loss/train': 0.8019305691123009} +12/29/2021 10:53:17 - INFO - codeparrot_training - Step 49303: {'lr': 2.4138528920855664e-07, 'samples': 25243648, 'steps': 49303, 'batch_loss/train': 0.6083602583967149} +12/29/2021 10:53:31 - INFO - codeparrot_training - Step 49304: {'lr': 2.406932564506248e-07, 'samples': 25244160, 'steps': 49304, 'batch_loss/train': 0.645183046348393} +12/29/2021 10:53:42 - INFO - codeparrot_training - Step 49305: {'lr': 2.4000221664138846e-07, 'samples': 25244672, 'steps': 49305, 'batch_loss/train': 0.7135252635926008} +12/29/2021 10:53:52 - INFO - codeparrot_training - Step 49306: {'lr': 2.393121697835954e-07, 'samples': 25245184, 'steps': 49306, 'batch_loss/train': 0.7116830563172698} +12/29/2021 10:54:04 - INFO - codeparrot_training - Step 49307: {'lr': 2.3862311587996567e-07, 'samples': 25245696, 'steps': 49307, 'batch_loss/train': 0.7858765637502074} +12/29/2021 10:54:15 - INFO - codeparrot_training - Step 49308: {'lr': 2.379350549333026e-07, 'samples': 25246208, 'steps': 49308, 'batch_loss/train': 0.7091975067742169} +12/29/2021 10:54:26 - INFO - codeparrot_training - Step 49309: {'lr': 2.372479869462707e-07, 'samples': 25246720, 'steps': 49309, 'batch_loss/train': 0.6397831356152892} +12/29/2021 10:54:38 - INFO - codeparrot_training - Step 49310: {'lr': 2.3656191192167332e-07, 'samples': 25247232, 'steps': 49310, 'batch_loss/train': 0.7557029211893678} +12/29/2021 10:54:49 - INFO - codeparrot_training - Step 49311: {'lr': 2.3587682986217496e-07, 'samples': 25247744, 'steps': 49311, 'batch_loss/train': 0.7988558290526271} +12/29/2021 10:54:59 - INFO - codeparrot_training - Step 49312: {'lr': 2.3519274077052344e-07, 'samples': 25248256, 'steps': 49312, 'batch_loss/train': 0.5557177467271686} +12/29/2021 10:55:10 - INFO - codeparrot_training - Step 49313: {'lr': 2.345096446494388e-07, 'samples': 25248768, 'steps': 49313, 'batch_loss/train': 0.7364036468788981} +12/29/2021 10:55:24 - INFO - codeparrot_training - Step 49314: {'lr': 2.3382754150161333e-07, 'samples': 25249280, 'steps': 49314, 'batch_loss/train': 0.7464653346687555} +12/29/2021 10:55:34 - INFO - codeparrot_training - Step 49315: {'lr': 2.331464313298226e-07, 'samples': 25249792, 'steps': 49315, 'batch_loss/train': 0.7437340496107936} +12/29/2021 10:55:45 - INFO - codeparrot_training - Step 49316: {'lr': 2.3246631413670337e-07, 'samples': 25250304, 'steps': 49316, 'batch_loss/train': 0.679720469750464} +12/29/2021 10:55:57 - INFO - codeparrot_training - Step 49317: {'lr': 2.3178718992500348e-07, 'samples': 25250816, 'steps': 49317, 'batch_loss/train': 0.6280224575311877} +12/29/2021 10:56:08 - INFO - codeparrot_training - Step 49318: {'lr': 2.3110905869741517e-07, 'samples': 25251328, 'steps': 49318, 'batch_loss/train': 0.7260826269921381} +12/29/2021 10:56:18 - INFO - codeparrot_training - Step 49319: {'lr': 2.30431920456603e-07, 'samples': 25251840, 'steps': 49319, 'batch_loss/train': 0.7232455210760236} +12/29/2021 10:56:30 - INFO - codeparrot_training - Step 49320: {'lr': 2.2975577520531477e-07, 'samples': 25252352, 'steps': 49320, 'batch_loss/train': 0.7817441076040268} +12/29/2021 10:56:41 - INFO - codeparrot_training - Step 49321: {'lr': 2.2908062294618725e-07, 'samples': 25252864, 'steps': 49321, 'batch_loss/train': 0.5414368548954371} +12/29/2021 10:56:52 - INFO - codeparrot_training - Step 49322: {'lr': 2.284064636819405e-07, 'samples': 25253376, 'steps': 49322, 'batch_loss/train': 0.8277892600744963} +12/29/2021 10:57:06 - INFO - codeparrot_training - Step 49323: {'lr': 2.2773329741521133e-07, 'samples': 25253888, 'steps': 49323, 'batch_loss/train': 0.7802560180425644} +12/29/2021 10:57:16 - INFO - codeparrot_training - Step 49324: {'lr': 2.2706112414874748e-07, 'samples': 25254400, 'steps': 49324, 'batch_loss/train': 0.6301242653280497} +12/29/2021 10:57:27 - INFO - codeparrot_training - Step 49325: {'lr': 2.2638994388513022e-07, 'samples': 25254912, 'steps': 49325, 'batch_loss/train': 0.7690768046304584} +12/29/2021 10:57:38 - INFO - codeparrot_training - Step 49326: {'lr': 2.257197566271074e-07, 'samples': 25255424, 'steps': 49326, 'batch_loss/train': 0.6371289873786736} +12/29/2021 10:57:50 - INFO - codeparrot_training - Step 49327: {'lr': 2.250505623773158e-07, 'samples': 25255936, 'steps': 49327, 'batch_loss/train': 0.7554929596371949} +12/29/2021 10:58:01 - INFO - codeparrot_training - Step 49328: {'lr': 2.2438236113839218e-07, 'samples': 25256448, 'steps': 49328, 'batch_loss/train': 0.7407253817655146} +12/29/2021 10:58:11 - INFO - codeparrot_training - Step 49329: {'lr': 2.2371515291300105e-07, 'samples': 25256960, 'steps': 49329, 'batch_loss/train': 0.6081867134198546} +12/29/2021 10:58:23 - INFO - codeparrot_training - Step 49330: {'lr': 2.2304893770383473e-07, 'samples': 25257472, 'steps': 49330, 'batch_loss/train': 0.6809135158546269} +12/29/2021 10:58:34 - INFO - codeparrot_training - Step 49331: {'lr': 2.2238371551350222e-07, 'samples': 25257984, 'steps': 49331, 'batch_loss/train': 0.7427145945839584} +12/29/2021 10:58:45 - INFO - codeparrot_training - Step 49332: {'lr': 2.217194863446681e-07, 'samples': 25258496, 'steps': 49332, 'batch_loss/train': 0.6463158689439297} +12/29/2021 10:58:59 - INFO - codeparrot_training - Step 49333: {'lr': 2.2105625019996912e-07, 'samples': 25259008, 'steps': 49333, 'batch_loss/train': 0.6948817062657326} +12/29/2021 10:59:09 - INFO - codeparrot_training - Step 49334: {'lr': 2.203940070820143e-07, 'samples': 25259520, 'steps': 49334, 'batch_loss/train': 0.6627803798764944} +12/29/2021 10:59:20 - INFO - codeparrot_training - Step 49335: {'lr': 2.1973275699346818e-07, 'samples': 25260032, 'steps': 49335, 'batch_loss/train': 0.6891114264726639} +12/29/2021 10:59:30 - INFO - codeparrot_training - Step 49336: {'lr': 2.1907249993696755e-07, 'samples': 25260544, 'steps': 49336, 'batch_loss/train': 0.7103944350965321} +12/29/2021 10:59:43 - INFO - codeparrot_training - Step 49337: {'lr': 2.1841323591509365e-07, 'samples': 25261056, 'steps': 49337, 'batch_loss/train': 0.6008953605778515} +12/29/2021 10:59:53 - INFO - codeparrot_training - Step 49338: {'lr': 2.1775496493051104e-07, 'samples': 25261568, 'steps': 49338, 'batch_loss/train': 0.7062697573564947} +12/29/2021 11:00:04 - INFO - codeparrot_training - Step 49339: {'lr': 2.1709768698582877e-07, 'samples': 25262080, 'steps': 49339, 'batch_loss/train': 0.6504008499905467} +12/29/2021 11:00:16 - INFO - codeparrot_training - Step 49340: {'lr': 2.164414020836558e-07, 'samples': 25262592, 'steps': 49340, 'batch_loss/train': 0.6525309160351753} +12/29/2021 11:00:27 - INFO - codeparrot_training - Step 49341: {'lr': 2.1578611022657345e-07, 'samples': 25263104, 'steps': 49341, 'batch_loss/train': 0.6788215446285903} +12/29/2021 11:00:37 - INFO - codeparrot_training - Step 49342: {'lr': 2.151318114172185e-07, 'samples': 25263616, 'steps': 49342, 'batch_loss/train': 0.6675360496155918} +12/29/2021 11:00:51 - INFO - codeparrot_training - Step 49343: {'lr': 2.1447850565819992e-07, 'samples': 25264128, 'steps': 49343, 'batch_loss/train': 0.7304994423175231} +12/29/2021 11:01:02 - INFO - codeparrot_training - Step 49344: {'lr': 2.1382619295207129e-07, 'samples': 25264640, 'steps': 49344, 'batch_loss/train': 0.6795962168835104} +12/29/2021 11:01:13 - INFO - codeparrot_training - Step 49345: {'lr': 2.1317487330146934e-07, 'samples': 25265152, 'steps': 49345, 'batch_loss/train': 0.7338958522304893} +12/29/2021 11:01:23 - INFO - codeparrot_training - Step 49346: {'lr': 2.1252454670897536e-07, 'samples': 25265664, 'steps': 49346, 'batch_loss/train': 0.5990864767809398} +12/29/2021 11:01:35 - INFO - codeparrot_training - Step 49347: {'lr': 2.1187521317717061e-07, 'samples': 25266176, 'steps': 49347, 'batch_loss/train': 0.830209163017571} +12/29/2021 11:01:46 - INFO - codeparrot_training - Step 49348: {'lr': 2.1122687270860863e-07, 'samples': 25266688, 'steps': 49348, 'batch_loss/train': 0.7681824169121683} +12/29/2021 11:01:57 - INFO - codeparrot_training - Step 49349: {'lr': 2.1057952530592617e-07, 'samples': 25267200, 'steps': 49349, 'batch_loss/train': 0.6753078768961132} +12/29/2021 11:02:11 - INFO - codeparrot_training - Step 49350: {'lr': 2.09933170971649e-07, 'samples': 25267712, 'steps': 49350, 'batch_loss/train': 0.7573577058501542} +12/29/2021 11:02:21 - INFO - codeparrot_training - Step 49351: {'lr': 2.0928780970833062e-07, 'samples': 25268224, 'steps': 49351, 'batch_loss/train': 0.7305026557296515} +12/29/2021 11:02:32 - INFO - codeparrot_training - Step 49352: {'lr': 2.0864344151860782e-07, 'samples': 25268736, 'steps': 49352, 'batch_loss/train': 0.7362006194889545} +12/29/2021 11:02:44 - INFO - codeparrot_training - Step 49353: {'lr': 2.0800006640497859e-07, 'samples': 25269248, 'steps': 49353, 'batch_loss/train': 0.7266898583620787} +12/29/2021 11:02:54 - INFO - codeparrot_training - Step 49354: {'lr': 2.0735768437002421e-07, 'samples': 25269760, 'steps': 49354, 'batch_loss/train': 0.705537929199636} +12/29/2021 11:03:05 - INFO - codeparrot_training - Step 49355: {'lr': 2.067162954162982e-07, 'samples': 25270272, 'steps': 49355, 'batch_loss/train': 0.6768641951493919} +12/29/2021 11:03:17 - INFO - codeparrot_training - Step 49356: {'lr': 2.060758995463541e-07, 'samples': 25270784, 'steps': 49356, 'batch_loss/train': 0.6968769326340407} +12/29/2021 11:03:28 - INFO - codeparrot_training - Step 49357: {'lr': 2.0543649676274535e-07, 'samples': 25271296, 'steps': 49357, 'batch_loss/train': 0.7396020339801908} +12/29/2021 11:03:39 - INFO - codeparrot_training - Step 49358: {'lr': 2.0479808706799774e-07, 'samples': 25271808, 'steps': 49358, 'batch_loss/train': 0.6784496619366109} +12/29/2021 11:03:49 - INFO - codeparrot_training - Step 49359: {'lr': 2.0416067046463706e-07, 'samples': 25272320, 'steps': 49359, 'batch_loss/train': 0.6200970550999045} +12/29/2021 11:04:03 - INFO - codeparrot_training - Step 49360: {'lr': 2.0352424695521677e-07, 'samples': 25272832, 'steps': 49360, 'batch_loss/train': 0.6897961043287069} +12/29/2021 11:04:14 - INFO - codeparrot_training - Step 49361: {'lr': 2.028888165422904e-07, 'samples': 25273344, 'steps': 49361, 'batch_loss/train': 0.6508042006753385} +12/29/2021 11:04:24 - INFO - codeparrot_training - Step 49362: {'lr': 2.0225437922832824e-07, 'samples': 25273856, 'steps': 49362, 'batch_loss/train': 0.7220627479255199} +12/29/2021 11:04:37 - INFO - codeparrot_training - Step 49363: {'lr': 2.016209350159115e-07, 'samples': 25274368, 'steps': 49363, 'batch_loss/train': 0.7333598607219756} +12/29/2021 11:04:47 - INFO - codeparrot_training - Step 49364: {'lr': 2.0098848390751044e-07, 'samples': 25274880, 'steps': 49364, 'batch_loss/train': 0.6924068918451667} +12/29/2021 11:04:58 - INFO - codeparrot_training - Step 49365: {'lr': 2.0035702590565087e-07, 'samples': 25275392, 'steps': 49365, 'batch_loss/train': 0.6969940746203065} +12/29/2021 11:05:10 - INFO - codeparrot_training - Step 49366: {'lr': 1.9972656101285846e-07, 'samples': 25275904, 'steps': 49366, 'batch_loss/train': 0.6423132159397937} +12/29/2021 11:05:21 - INFO - codeparrot_training - Step 49367: {'lr': 1.9909708923163128e-07, 'samples': 25276416, 'steps': 49367, 'batch_loss/train': 0.6997628309763968} +12/29/2021 11:05:31 - INFO - codeparrot_training - Step 49368: {'lr': 1.9846861056449507e-07, 'samples': 25276928, 'steps': 49368, 'batch_loss/train': 0.7426755174237769} +12/29/2021 11:05:42 - INFO - codeparrot_training - Step 49369: {'lr': 1.9784112501389227e-07, 'samples': 25277440, 'steps': 49369, 'batch_loss/train': 0.6835236917249858} +12/29/2021 11:05:54 - INFO - codeparrot_training - Step 49370: {'lr': 1.972146325823765e-07, 'samples': 25277952, 'steps': 49370, 'batch_loss/train': 0.747746713925153} +12/29/2021 11:06:05 - INFO - codeparrot_training - Step 49371: {'lr': 1.9658913327239015e-07, 'samples': 25278464, 'steps': 49371, 'batch_loss/train': 0.7267895615659654} +12/29/2021 11:06:15 - INFO - codeparrot_training - Step 49372: {'lr': 1.95964627086459e-07, 'samples': 25278976, 'steps': 49372, 'batch_loss/train': 0.7461863490752876} +12/29/2021 11:06:29 - INFO - codeparrot_training - Step 49373: {'lr': 1.9534111402705335e-07, 'samples': 25279488, 'steps': 49373, 'batch_loss/train': 1.1430549176875502} +12/29/2021 11:06:40 - INFO - codeparrot_training - Step 49374: {'lr': 1.9471859409664338e-07, 'samples': 25280000, 'steps': 49374, 'batch_loss/train': 0.7400333513505757} +12/29/2021 11:06:51 - INFO - codeparrot_training - Step 49375: {'lr': 1.9409706729772713e-07, 'samples': 25280512, 'steps': 49375, 'batch_loss/train': 0.674532572273165} +12/29/2021 11:07:03 - INFO - codeparrot_training - Step 49376: {'lr': 1.9347653363271932e-07, 'samples': 25281024, 'steps': 49376, 'batch_loss/train': 0.7059712819755077} +12/29/2021 11:07:13 - INFO - codeparrot_training - Step 49377: {'lr': 1.9285699310414574e-07, 'samples': 25281536, 'steps': 49377, 'batch_loss/train': 0.7192618338158354} +12/29/2021 11:07:24 - INFO - codeparrot_training - Step 49378: {'lr': 1.9223844571447658e-07, 'samples': 25282048, 'steps': 49378, 'batch_loss/train': 0.7686593541875482} +12/29/2021 11:07:36 - INFO - codeparrot_training - Step 49379: {'lr': 1.9162089146609885e-07, 'samples': 25282560, 'steps': 49379, 'batch_loss/train': 0.6661438813898712} +12/29/2021 11:07:47 - INFO - codeparrot_training - Step 49380: {'lr': 1.910043303615383e-07, 'samples': 25283072, 'steps': 49380, 'batch_loss/train': 0.7433510972186923} +12/29/2021 11:07:57 - INFO - codeparrot_training - Step 49381: {'lr': 1.903887624032097e-07, 'samples': 25283584, 'steps': 49381, 'batch_loss/train': 0.5764368506206665} +12/29/2021 11:08:08 - INFO - codeparrot_training - Step 49382: {'lr': 1.8977418759358323e-07, 'samples': 25284096, 'steps': 49382, 'batch_loss/train': 0.8962613558396697} +12/29/2021 11:08:22 - INFO - codeparrot_training - Step 49383: {'lr': 1.8916060593507366e-07, 'samples': 25284608, 'steps': 49383, 'batch_loss/train': 0.8569598654285073} +12/29/2021 11:08:33 - INFO - codeparrot_training - Step 49384: {'lr': 1.8854801743015127e-07, 'samples': 25285120, 'steps': 49384, 'batch_loss/train': 0.7559631103649735} +12/29/2021 11:08:43 - INFO - codeparrot_training - Step 49385: {'lr': 1.8793642208123074e-07, 'samples': 25285632, 'steps': 49385, 'batch_loss/train': 0.6060180200729519} +12/29/2021 11:08:56 - INFO - codeparrot_training - Step 49386: {'lr': 1.873258198907546e-07, 'samples': 25286144, 'steps': 49386, 'batch_loss/train': 0.6174922990612686} +12/29/2021 11:09:06 - INFO - codeparrot_training - Step 49387: {'lr': 1.8671621086113754e-07, 'samples': 25286656, 'steps': 49387, 'batch_loss/train': 0.6867718230932951} +12/29/2021 11:09:17 - INFO - codeparrot_training - Step 49388: {'lr': 1.8610759499479435e-07, 'samples': 25287168, 'steps': 49388, 'batch_loss/train': 0.6801299334038049} +12/29/2021 11:09:28 - INFO - codeparrot_training - Step 49389: {'lr': 1.8549997229419523e-07, 'samples': 25287680, 'steps': 49389, 'batch_loss/train': 0.7754363999702036} +12/29/2021 11:09:41 - INFO - codeparrot_training - Step 49390: {'lr': 1.8489334276169944e-07, 'samples': 25288192, 'steps': 49390, 'batch_loss/train': 0.7629470793763176} +12/29/2021 11:09:52 - INFO - codeparrot_training - Step 49391: {'lr': 1.8428770639974945e-07, 'samples': 25288704, 'steps': 49391, 'batch_loss/train': 0.7910648547112942} +12/29/2021 11:10:03 - INFO - codeparrot_training - Step 49392: {'lr': 1.8368306321073225e-07, 'samples': 25289216, 'steps': 49392, 'batch_loss/train': 0.7018304192461073} +12/29/2021 11:10:15 - INFO - codeparrot_training - Step 49393: {'lr': 1.830794131970903e-07, 'samples': 25289728, 'steps': 49393, 'batch_loss/train': 0.8221284504979849} +12/29/2021 11:10:26 - INFO - codeparrot_training - Step 49394: {'lr': 1.8247675636118288e-07, 'samples': 25290240, 'steps': 49394, 'batch_loss/train': 0.7325582085177302} +12/29/2021 11:10:36 - INFO - codeparrot_training - Step 49395: {'lr': 1.8187509270542468e-07, 'samples': 25290752, 'steps': 49395, 'batch_loss/train': 0.7634750795550644} +12/29/2021 11:10:48 - INFO - codeparrot_training - Step 49396: {'lr': 1.812744222322027e-07, 'samples': 25291264, 'steps': 49396, 'batch_loss/train': 0.7508445386774838} +12/29/2021 11:10:59 - INFO - codeparrot_training - Step 49397: {'lr': 1.806747449439039e-07, 'samples': 25291776, 'steps': 49397, 'batch_loss/train': 0.7354395147413015} +12/29/2021 11:11:10 - INFO - codeparrot_training - Step 49398: {'lr': 1.800760608429153e-07, 'samples': 25292288, 'steps': 49398, 'batch_loss/train': 0.5461051961174235} +12/29/2021 11:11:23 - INFO - codeparrot_training - Step 49399: {'lr': 1.7947836993159606e-07, 'samples': 25292800, 'steps': 49399, 'batch_loss/train': 0.6066608070395887} +12/29/2021 11:11:34 - INFO - codeparrot_training - Step 49400: {'lr': 1.7888167221236095e-07, 'samples': 25293312, 'steps': 49400, 'batch_loss/train': 0.7587936855852604} +12/29/2021 11:11:45 - INFO - codeparrot_training - Step 49401: {'lr': 1.7828596768756923e-07, 'samples': 25293824, 'steps': 49401, 'batch_loss/train': 0.676149055827409} +12/29/2021 11:11:55 - INFO - codeparrot_training - Step 49402: {'lr': 1.7769125635958005e-07, 'samples': 25294336, 'steps': 49402, 'batch_loss/train': 0.7490142425522208} +12/29/2021 11:12:08 - INFO - codeparrot_training - Step 49403: {'lr': 1.770975382307527e-07, 'samples': 25294848, 'steps': 49403, 'batch_loss/train': 0.7872518766671419} +12/29/2021 11:12:19 - INFO - codeparrot_training - Step 49404: {'lr': 1.7650481330344636e-07, 'samples': 25295360, 'steps': 49404, 'batch_loss/train': 1.3014731798321009} +12/29/2021 11:12:30 - INFO - codeparrot_training - Step 49405: {'lr': 1.7591308158004803e-07, 'samples': 25295872, 'steps': 49405, 'batch_loss/train': 0.7473590220324695} +12/29/2021 11:12:42 - INFO - codeparrot_training - Step 49406: {'lr': 1.753223430628892e-07, 'samples': 25296384, 'steps': 49406, 'batch_loss/train': 0.7609118018299341} +12/29/2021 11:12:52 - INFO - codeparrot_training - Step 49407: {'lr': 1.747325977543013e-07, 'samples': 25296896, 'steps': 49407, 'batch_loss/train': 0.7359719891101122} +12/29/2021 11:13:03 - INFO - codeparrot_training - Step 49408: {'lr': 1.7414384565667129e-07, 'samples': 25297408, 'steps': 49408, 'batch_loss/train': 0.6256393934600055} +12/29/2021 11:13:18 - INFO - codeparrot_training - Step 49409: {'lr': 1.7355608677230295e-07, 'samples': 25297920, 'steps': 49409, 'batch_loss/train': 0.7576334276236594} +12/29/2021 11:13:28 - INFO - codeparrot_training - Step 49410: {'lr': 1.729693211035277e-07, 'samples': 25298432, 'steps': 49410, 'batch_loss/train': 0.7715007183142006} +12/29/2021 11:13:39 - INFO - codeparrot_training - Step 49411: {'lr': 1.7238354865273253e-07, 'samples': 25298944, 'steps': 49411, 'batch_loss/train': 0.770542121026665} +12/29/2021 11:13:50 - INFO - codeparrot_training - Step 49412: {'lr': 1.7179876942216567e-07, 'samples': 25299456, 'steps': 49412, 'batch_loss/train': 0.6909829170908779} +12/29/2021 11:14:02 - INFO - codeparrot_training - Step 49413: {'lr': 1.7121498341421405e-07, 'samples': 25299968, 'steps': 49413, 'batch_loss/train': 0.6839496120810509} +12/29/2021 11:14:13 - INFO - codeparrot_training - Step 49414: {'lr': 1.7063219063118141e-07, 'samples': 25300480, 'steps': 49414, 'batch_loss/train': 0.820237924810499} +12/29/2021 11:14:23 - INFO - codeparrot_training - Step 49415: {'lr': 1.7005039107539923e-07, 'samples': 25300992, 'steps': 49415, 'batch_loss/train': 0.7053587147966027} +12/29/2021 11:14:35 - INFO - codeparrot_training - Step 49416: {'lr': 1.6946958474914344e-07, 'samples': 25301504, 'steps': 49416, 'batch_loss/train': 0.7428724407218397} +12/29/2021 11:14:46 - INFO - codeparrot_training - Step 49417: {'lr': 1.688897716547455e-07, 'samples': 25302016, 'steps': 49417, 'batch_loss/train': 0.7824080670252442} +12/29/2021 11:14:57 - INFO - codeparrot_training - Step 49418: {'lr': 1.6831095179450916e-07, 'samples': 25302528, 'steps': 49418, 'batch_loss/train': 0.783117407001555} +12/29/2021 11:15:07 - INFO - codeparrot_training - Step 49419: {'lr': 1.6773312517076588e-07, 'samples': 25303040, 'steps': 49419, 'batch_loss/train': 0.814361804514192} +12/29/2021 11:15:21 - INFO - codeparrot_training - Step 49420: {'lr': 1.671562917857361e-07, 'samples': 25303552, 'steps': 49420, 'batch_loss/train': 1.0590558536350727} +12/29/2021 11:15:32 - INFO - codeparrot_training - Step 49421: {'lr': 1.6658045164177904e-07, 'samples': 25304064, 'steps': 49421, 'batch_loss/train': 0.659408101812005} +12/29/2021 11:15:42 - INFO - codeparrot_training - Step 49422: {'lr': 1.6600560474117067e-07, 'samples': 25304576, 'steps': 49422, 'batch_loss/train': 0.7132363594137132} +12/29/2021 11:15:54 - INFO - codeparrot_training - Step 49423: {'lr': 1.6543175108618692e-07, 'samples': 25305088, 'steps': 49423, 'batch_loss/train': 0.714714182773605} +12/29/2021 11:16:05 - INFO - codeparrot_training - Step 49424: {'lr': 1.6485889067913151e-07, 'samples': 25305600, 'steps': 49424, 'batch_loss/train': 0.7315037841908634} +12/29/2021 11:16:16 - INFO - codeparrot_training - Step 49425: {'lr': 1.642870235222249e-07, 'samples': 25306112, 'steps': 49425, 'batch_loss/train': 0.7742658648639917} +12/29/2021 11:16:28 - INFO - codeparrot_training - Step 49426: {'lr': 1.6371614961779856e-07, 'samples': 25306624, 'steps': 49426, 'batch_loss/train': 0.8255768232047558} +12/29/2021 11:16:39 - INFO - codeparrot_training - Step 49427: {'lr': 1.631462689681007e-07, 'samples': 25307136, 'steps': 49427, 'batch_loss/train': 0.706240420229733} +12/29/2021 11:16:49 - INFO - codeparrot_training - Step 49428: {'lr': 1.6257738157540724e-07, 'samples': 25307648, 'steps': 49428, 'batch_loss/train': 0.5740001538069919} +12/29/2021 11:17:03 - INFO - codeparrot_training - Step 49429: {'lr': 1.620094874419664e-07, 'samples': 25308160, 'steps': 49429, 'batch_loss/train': 0.7050670827738941} +12/29/2021 11:17:14 - INFO - codeparrot_training - Step 49430: {'lr': 1.6144258657002642e-07, 'samples': 25308672, 'steps': 49430, 'batch_loss/train': 0.7517241379246116} +12/29/2021 11:17:25 - INFO - codeparrot_training - Step 49431: {'lr': 1.6087667896189095e-07, 'samples': 25309184, 'steps': 49431, 'batch_loss/train': 0.8087068067397922} +12/29/2021 11:17:35 - INFO - codeparrot_training - Step 49432: {'lr': 1.6031176461972496e-07, 'samples': 25309696, 'steps': 49432, 'batch_loss/train': 0.5709084101836197} +12/29/2021 11:17:47 - INFO - codeparrot_training - Step 49433: {'lr': 1.5974784354585992e-07, 'samples': 25310208, 'steps': 49433, 'batch_loss/train': 0.6447751699015498} +12/29/2021 11:17:58 - INFO - codeparrot_training - Step 49434: {'lr': 1.591849157424885e-07, 'samples': 25310720, 'steps': 49434, 'batch_loss/train': 0.7710631880909204} +12/29/2021 11:18:09 - INFO - codeparrot_training - Step 49435: {'lr': 1.5862298121185892e-07, 'samples': 25311232, 'steps': 49435, 'batch_loss/train': 0.7722279792651534} +12/29/2021 11:18:21 - INFO - codeparrot_training - Step 49436: {'lr': 1.5806203995621937e-07, 'samples': 25311744, 'steps': 49436, 'batch_loss/train': 0.7546388069167733} +12/29/2021 11:18:31 - INFO - codeparrot_training - Step 49437: {'lr': 1.575020919777903e-07, 'samples': 25312256, 'steps': 49437, 'batch_loss/train': 0.6500430857413448} +12/29/2021 11:18:42 - INFO - codeparrot_training - Step 49438: {'lr': 1.5694313727876442e-07, 'samples': 25312768, 'steps': 49438, 'batch_loss/train': 0.6954149744706228} +12/29/2021 11:18:56 - INFO - codeparrot_training - Step 49439: {'lr': 1.5638517586141764e-07, 'samples': 25313280, 'steps': 49439, 'batch_loss/train': 0.8153699939139187} +12/29/2021 11:19:07 - INFO - codeparrot_training - Step 49440: {'lr': 1.558282077279427e-07, 'samples': 25313792, 'steps': 49440, 'batch_loss/train': 0.6693235406419262} +12/29/2021 11:19:17 - INFO - codeparrot_training - Step 49441: {'lr': 1.5527223288056003e-07, 'samples': 25314304, 'steps': 49441, 'batch_loss/train': 0.7501918622292578} +12/29/2021 11:19:28 - INFO - codeparrot_training - Step 49442: {'lr': 1.5471725132149006e-07, 'samples': 25314816, 'steps': 49442, 'batch_loss/train': 0.7137623592279851} +12/29/2021 11:19:40 - INFO - codeparrot_training - Step 49443: {'lr': 1.541632630529255e-07, 'samples': 25315328, 'steps': 49443, 'batch_loss/train': 0.7006565070478246} +12/29/2021 11:19:51 - INFO - codeparrot_training - Step 49444: {'lr': 1.53610268077059e-07, 'samples': 25315840, 'steps': 49444, 'batch_loss/train': 0.6717297714203596} +12/29/2021 11:20:01 - INFO - codeparrot_training - Step 49445: {'lr': 1.5305826639608333e-07, 'samples': 25316352, 'steps': 49445, 'batch_loss/train': 0.759131315164268} +12/29/2021 11:20:15 - INFO - codeparrot_training - Step 49446: {'lr': 1.5250725801224663e-07, 'samples': 25316864, 'steps': 49446, 'batch_loss/train': 0.6787793486728333} +12/29/2021 11:20:26 - INFO - codeparrot_training - Step 49447: {'lr': 1.5195724292765833e-07, 'samples': 25317376, 'steps': 49447, 'batch_loss/train': 0.6822316944599152} +12/29/2021 11:20:37 - INFO - codeparrot_training - Step 49448: {'lr': 1.5140822114456664e-07, 'samples': 25317888, 'steps': 49448, 'batch_loss/train': 0.6122149908915162} +12/29/2021 11:20:49 - INFO - codeparrot_training - Step 49449: {'lr': 1.5086019266516428e-07, 'samples': 25318400, 'steps': 49449, 'batch_loss/train': 0.6578040085732937} +12/29/2021 11:20:59 - INFO - codeparrot_training - Step 49450: {'lr': 1.503131574915606e-07, 'samples': 25318912, 'steps': 49450, 'batch_loss/train': 0.6534743597730994} +12/29/2021 11:21:10 - INFO - codeparrot_training - Step 49451: {'lr': 1.4976711562600387e-07, 'samples': 25319424, 'steps': 49451, 'batch_loss/train': 0.7604107824154198} +12/29/2021 11:21:21 - INFO - codeparrot_training - Step 49452: {'lr': 1.4922206707063123e-07, 'samples': 25319936, 'steps': 49452, 'batch_loss/train': 0.69407067168504} +12/29/2021 11:21:33 - INFO - codeparrot_training - Step 49453: {'lr': 1.486780118275799e-07, 'samples': 25320448, 'steps': 49453, 'batch_loss/train': 0.6941923167323694} +12/29/2021 11:21:44 - INFO - codeparrot_training - Step 49454: {'lr': 1.4813494989907028e-07, 'samples': 25320960, 'steps': 49454, 'batch_loss/train': 0.7552373534999788} +12/29/2021 11:21:54 - INFO - codeparrot_training - Step 49455: {'lr': 1.4759288128723957e-07, 'samples': 25321472, 'steps': 49455, 'batch_loss/train': 0.6304348530247808} +12/29/2021 11:22:06 - INFO - codeparrot_training - Step 49456: {'lr': 1.4705180599422495e-07, 'samples': 25321984, 'steps': 49456, 'batch_loss/train': 0.7522726939059794} +12/29/2021 11:22:17 - INFO - codeparrot_training - Step 49457: {'lr': 1.4651172402219138e-07, 'samples': 25322496, 'steps': 49457, 'batch_loss/train': 0.7936521027004346} +12/29/2021 11:22:28 - INFO - codeparrot_training - Step 49458: {'lr': 1.4597263537330373e-07, 'samples': 25323008, 'steps': 49458, 'batch_loss/train': 0.8023766214028001} +12/29/2021 11:22:41 - INFO - codeparrot_training - Step 49459: {'lr': 1.454345400496715e-07, 'samples': 25323520, 'steps': 49459, 'batch_loss/train': 0.7183523122221231} +12/29/2021 11:22:52 - INFO - codeparrot_training - Step 49460: {'lr': 1.4489743805345956e-07, 'samples': 25324032, 'steps': 49460, 'batch_loss/train': 0.7130498946644366} +12/29/2021 11:23:03 - INFO - codeparrot_training - Step 49461: {'lr': 1.4436132938680512e-07, 'samples': 25324544, 'steps': 49461, 'batch_loss/train': 0.6563909306423739} +12/29/2021 11:23:15 - INFO - codeparrot_training - Step 49462: {'lr': 1.4382621405178986e-07, 'samples': 25325056, 'steps': 49462, 'batch_loss/train': 0.6723926754202694} +12/29/2021 11:23:25 - INFO - codeparrot_training - Step 49463: {'lr': 1.4329209205060646e-07, 'samples': 25325568, 'steps': 49463, 'batch_loss/train': 0.7357280743890442} +12/29/2021 11:23:36 - INFO - codeparrot_training - Step 49464: {'lr': 1.4275896338536432e-07, 'samples': 25326080, 'steps': 49464, 'batch_loss/train': 0.7774893429595977} +12/29/2021 11:23:47 - INFO - codeparrot_training - Step 49465: {'lr': 1.4222682805814514e-07, 'samples': 25326592, 'steps': 49465, 'batch_loss/train': 0.8593833111226559} +12/29/2021 11:23:59 - INFO - codeparrot_training - Step 49466: {'lr': 1.4169568607108607e-07, 'samples': 25327104, 'steps': 49466, 'batch_loss/train': 0.7449297029525042} +12/29/2021 11:24:09 - INFO - codeparrot_training - Step 49467: {'lr': 1.4116553742632433e-07, 'samples': 25327616, 'steps': 49467, 'batch_loss/train': 0.6647104159928858} +12/29/2021 11:24:20 - INFO - codeparrot_training - Step 49468: {'lr': 1.4063638212594154e-07, 'samples': 25328128, 'steps': 49468, 'batch_loss/train': 0.6319048891309649} +12/29/2021 11:24:34 - INFO - codeparrot_training - Step 49469: {'lr': 1.401082201720194e-07, 'samples': 25328640, 'steps': 49469, 'batch_loss/train': 0.6162313409149647} +12/29/2021 11:24:45 - INFO - codeparrot_training - Step 49470: {'lr': 1.395810515666951e-07, 'samples': 25329152, 'steps': 49470, 'batch_loss/train': 0.6715942923910916} +12/29/2021 11:24:55 - INFO - codeparrot_training - Step 49471: {'lr': 1.3905487631207804e-07, 'samples': 25329664, 'steps': 49471, 'batch_loss/train': 0.6367541195359081} +12/29/2021 11:25:08 - INFO - codeparrot_training - Step 49472: {'lr': 1.3852969441022212e-07, 'samples': 25330176, 'steps': 49472, 'batch_loss/train': 0.7237378912977874} +12/29/2021 11:25:19 - INFO - codeparrot_training - Step 49473: {'lr': 1.3800550586320903e-07, 'samples': 25330688, 'steps': 49473, 'batch_loss/train': 0.5741105217020959} +12/29/2021 11:25:29 - INFO - codeparrot_training - Step 49474: {'lr': 1.374823106731482e-07, 'samples': 25331200, 'steps': 49474, 'batch_loss/train': 0.7197969313710928} +12/29/2021 11:25:43 - INFO - codeparrot_training - Step 49475: {'lr': 1.3696010884214903e-07, 'samples': 25331712, 'steps': 49475, 'batch_loss/train': 0.5326621285639703} +12/29/2021 11:25:54 - INFO - codeparrot_training - Step 49476: {'lr': 1.3643890037220997e-07, 'samples': 25332224, 'steps': 49476, 'batch_loss/train': 0.6955746505409479} +12/29/2021 11:26:04 - INFO - codeparrot_training - Step 49477: {'lr': 1.3591868526546813e-07, 'samples': 25332736, 'steps': 49477, 'batch_loss/train': 0.6451351579162292} +12/29/2021 11:26:15 - INFO - codeparrot_training - Step 49478: {'lr': 1.3539946352394972e-07, 'samples': 25333248, 'steps': 49478, 'batch_loss/train': 0.8061218615621328} +12/29/2021 11:26:27 - INFO - codeparrot_training - Step 49479: {'lr': 1.3488123514973637e-07, 'samples': 25333760, 'steps': 49479, 'batch_loss/train': 0.6636226193513721} +12/29/2021 11:26:38 - INFO - codeparrot_training - Step 49480: {'lr': 1.3436400014490979e-07, 'samples': 25334272, 'steps': 49480, 'batch_loss/train': 0.745873102452606} +12/29/2021 11:26:48 - INFO - codeparrot_training - Step 49481: {'lr': 1.338477585114961e-07, 'samples': 25334784, 'steps': 49481, 'batch_loss/train': 0.7477726712822914} +12/29/2021 11:27:01 - INFO - codeparrot_training - Step 49482: {'lr': 1.3333251025154924e-07, 'samples': 25335296, 'steps': 49482, 'batch_loss/train': 0.6536796437576413} +12/29/2021 11:27:12 - INFO - codeparrot_training - Step 49483: {'lr': 1.3281825536715087e-07, 'samples': 25335808, 'steps': 49483, 'batch_loss/train': 0.6791275087743998} +12/29/2021 11:27:22 - INFO - codeparrot_training - Step 49484: {'lr': 1.3230499386029938e-07, 'samples': 25336320, 'steps': 49484, 'batch_loss/train': 0.7592086037620902} +12/29/2021 11:27:33 - INFO - codeparrot_training - Step 49485: {'lr': 1.3179272573307645e-07, 'samples': 25336832, 'steps': 49485, 'batch_loss/train': 0.6213183160871267} +12/29/2021 11:27:47 - INFO - codeparrot_training - Step 49486: {'lr': 1.3128145098748045e-07, 'samples': 25337344, 'steps': 49486, 'batch_loss/train': 0.7046960042789578} +12/29/2021 11:27:58 - INFO - codeparrot_training - Step 49487: {'lr': 1.3077116962556533e-07, 'samples': 25337856, 'steps': 49487, 'batch_loss/train': 0.7695923852152191} +12/29/2021 11:28:08 - INFO - codeparrot_training - Step 49488: {'lr': 1.3026188164935727e-07, 'samples': 25338368, 'steps': 49488, 'batch_loss/train': 0.6867910162545741} +12/29/2021 11:28:21 - INFO - codeparrot_training - Step 49489: {'lr': 1.2975358706088237e-07, 'samples': 25338880, 'steps': 49489, 'batch_loss/train': 0.8079290213063359} +12/29/2021 11:28:31 - INFO - codeparrot_training - Step 49490: {'lr': 1.292462858621668e-07, 'samples': 25339392, 'steps': 49490, 'batch_loss/train': 0.7379980306141078} +12/29/2021 11:28:42 - INFO - codeparrot_training - Step 49491: {'lr': 1.2873997805520897e-07, 'samples': 25339904, 'steps': 49491, 'batch_loss/train': 0.7196735041216016} +12/29/2021 11:28:54 - INFO - codeparrot_training - Step 49492: {'lr': 1.2823466364203507e-07, 'samples': 25340416, 'steps': 49492, 'batch_loss/train': 0.5771764968521893} +12/29/2021 11:29:05 - INFO - codeparrot_training - Step 49493: {'lr': 1.277303426246712e-07, 'samples': 25340928, 'steps': 49493, 'batch_loss/train': 0.7566359769552946} +12/29/2021 11:29:15 - INFO - codeparrot_training - Step 49494: {'lr': 1.2722701500508805e-07, 'samples': 25341440, 'steps': 49494, 'batch_loss/train': 0.7744531026110053} +12/29/2021 11:29:26 - INFO - codeparrot_training - Step 49495: {'lr': 1.2672468078528398e-07, 'samples': 25341952, 'steps': 49495, 'batch_loss/train': 0.6923135067336261} +12/29/2021 11:29:38 - INFO - codeparrot_training - Step 49496: {'lr': 1.2622333996731294e-07, 'samples': 25342464, 'steps': 49496, 'batch_loss/train': 0.8919157697819173} +12/29/2021 11:29:49 - INFO - codeparrot_training - Step 49497: {'lr': 1.2572299255309006e-07, 'samples': 25342976, 'steps': 49497, 'batch_loss/train': 0.6724886740557849} +12/29/2021 11:29:59 - INFO - codeparrot_training - Step 49498: {'lr': 1.2522363854466923e-07, 'samples': 25343488, 'steps': 49498, 'batch_loss/train': 0.771542236674577} +12/29/2021 11:30:13 - INFO - codeparrot_training - Step 49499: {'lr': 1.247252779440211e-07, 'samples': 25344000, 'steps': 49499, 'batch_loss/train': 0.6994452304206789} +12/29/2021 11:30:24 - INFO - codeparrot_training - Step 49500: {'lr': 1.2422791075308859e-07, 'samples': 25344512, 'steps': 49500, 'batch_loss/train': 0.646261372603476} +12/29/2021 11:30:35 - INFO - codeparrot_training - Step 49501: {'lr': 1.2373153697389783e-07, 'samples': 25345024, 'steps': 49501, 'batch_loss/train': 0.6219487031921744} +12/29/2021 11:30:47 - INFO - codeparrot_training - Step 49502: {'lr': 1.2323615660836396e-07, 'samples': 25345536, 'steps': 49502, 'batch_loss/train': 0.715386288240552} +12/29/2021 11:30:57 - INFO - codeparrot_training - Step 49503: {'lr': 1.227417696585409e-07, 'samples': 25346048, 'steps': 49503, 'batch_loss/train': 0.8089624703861773} +12/29/2021 11:31:08 - INFO - codeparrot_training - Step 49504: {'lr': 1.22248376126316e-07, 'samples': 25346560, 'steps': 49504, 'batch_loss/train': 0.7450744090601802} +12/29/2021 11:31:22 - INFO - codeparrot_training - Step 49505: {'lr': 1.217559760136877e-07, 'samples': 25347072, 'steps': 49505, 'batch_loss/train': 0.7793391146697104} +12/29/2021 11:31:32 - INFO - codeparrot_training - Step 49506: {'lr': 1.212645693225989e-07, 'samples': 25347584, 'steps': 49506, 'batch_loss/train': 0.7417790032923222} +12/29/2021 11:31:43 - INFO - codeparrot_training - Step 49507: {'lr': 1.2077415605502017e-07, 'samples': 25348096, 'steps': 49507, 'batch_loss/train': 0.728390634059906} +12/29/2021 11:31:54 - INFO - codeparrot_training - Step 49508: {'lr': 1.202847362128945e-07, 'samples': 25348608, 'steps': 49508, 'batch_loss/train': 0.6607215607073158} +12/29/2021 11:32:06 - INFO - codeparrot_training - Step 49509: {'lr': 1.1979630979816468e-07, 'samples': 25349120, 'steps': 49509, 'batch_loss/train': 0.7270897105336189} +12/29/2021 11:32:17 - INFO - codeparrot_training - Step 49510: {'lr': 1.1930887681280146e-07, 'samples': 25349632, 'steps': 49510, 'batch_loss/train': 0.7292198287323117} +12/29/2021 11:32:27 - INFO - codeparrot_training - Step 49511: {'lr': 1.1882243725866437e-07, 'samples': 25350144, 'steps': 49511, 'batch_loss/train': 0.7227557888254523} +12/29/2021 11:32:39 - INFO - codeparrot_training - Step 49512: {'lr': 1.1833699113777963e-07, 'samples': 25350656, 'steps': 49512, 'batch_loss/train': 0.7297208178788424} +12/29/2021 11:32:50 - INFO - codeparrot_training - Step 49513: {'lr': 1.1785253845200683e-07, 'samples': 25351168, 'steps': 49513, 'batch_loss/train': 0.8224280881695449} +12/29/2021 11:33:00 - INFO - codeparrot_training - Step 49514: {'lr': 1.1736907920331663e-07, 'samples': 25351680, 'steps': 49514, 'batch_loss/train': 0.7529900409281254} +12/29/2021 11:33:14 - INFO - codeparrot_training - Step 49515: {'lr': 1.168866133935964e-07, 'samples': 25352192, 'steps': 49515, 'batch_loss/train': 0.7552061164751649} +12/29/2021 11:33:25 - INFO - codeparrot_training - Step 49516: {'lr': 1.164051410248168e-07, 'samples': 25352704, 'steps': 49516, 'batch_loss/train': 0.7672306629829109} +12/29/2021 11:33:36 - INFO - codeparrot_training - Step 49517: {'lr': 1.1592466209883745e-07, 'samples': 25353216, 'steps': 49517, 'batch_loss/train': 0.7942891838029027} +12/29/2021 11:33:48 - INFO - codeparrot_training - Step 49518: {'lr': 1.1544517661757347e-07, 'samples': 25353728, 'steps': 49518, 'batch_loss/train': 0.8280387255363166} +12/29/2021 11:33:58 - INFO - codeparrot_training - Step 49519: {'lr': 1.1496668458296777e-07, 'samples': 25354240, 'steps': 49519, 'batch_loss/train': 0.77610575966537} +12/29/2021 11:34:09 - INFO - codeparrot_training - Step 49520: {'lr': 1.1448918599690772e-07, 'samples': 25354752, 'steps': 49520, 'batch_loss/train': 0.7334063942544162} +12/29/2021 11:34:20 - INFO - codeparrot_training - Step 49521: {'lr': 1.1401268086125294e-07, 'samples': 25355264, 'steps': 49521, 'batch_loss/train': 0.6780309833120555} +12/29/2021 11:34:33 - INFO - codeparrot_training - Step 49522: {'lr': 1.1353716917794632e-07, 'samples': 25355776, 'steps': 49522, 'batch_loss/train': 0.6684462444391102} +12/29/2021 11:34:43 - INFO - codeparrot_training - Step 49523: {'lr': 1.1306265094887525e-07, 'samples': 25356288, 'steps': 49523, 'batch_loss/train': 0.8014104254543781} +12/29/2021 11:34:54 - INFO - codeparrot_training - Step 49524: {'lr': 1.1258912617589933e-07, 'samples': 25356800, 'steps': 49524, 'batch_loss/train': 0.73196901823394} +12/29/2021 11:35:08 - INFO - codeparrot_training - Step 49525: {'lr': 1.1211659486093373e-07, 'samples': 25357312, 'steps': 49525, 'batch_loss/train': 0.4339001253247261} +12/29/2021 11:35:19 - INFO - codeparrot_training - Step 49526: {'lr': 1.1164505700581029e-07, 'samples': 25357824, 'steps': 49526, 'batch_loss/train': 0.7407907778397202} +12/29/2021 11:35:29 - INFO - codeparrot_training - Step 49527: {'lr': 1.1117451261247191e-07, 'samples': 25358336, 'steps': 49527, 'batch_loss/train': 0.7401115242391825} +12/29/2021 11:35:40 - INFO - codeparrot_training - Step 49528: {'lr': 1.107049616827227e-07, 'samples': 25358848, 'steps': 49528, 'batch_loss/train': 0.6894469894468784} +12/29/2021 11:35:52 - INFO - codeparrot_training - Step 49529: {'lr': 1.1023640421845005e-07, 'samples': 25359360, 'steps': 49529, 'batch_loss/train': 0.6172813335433602} +12/29/2021 11:36:03 - INFO - codeparrot_training - Step 49530: {'lr': 1.0976884022154132e-07, 'samples': 25359872, 'steps': 49530, 'batch_loss/train': 0.8472817139700055} +12/29/2021 11:36:13 - INFO - codeparrot_training - Step 49531: {'lr': 1.093022696938284e-07, 'samples': 25360384, 'steps': 49531, 'batch_loss/train': 0.766892016865313} +12/29/2021 11:36:25 - INFO - codeparrot_training - Step 49532: {'lr': 1.0883669263717089e-07, 'samples': 25360896, 'steps': 49532, 'batch_loss/train': 0.8292896212078631} +12/29/2021 11:36:36 - INFO - codeparrot_training - Step 49533: {'lr': 1.0837210905342843e-07, 'samples': 25361408, 'steps': 49533, 'batch_loss/train': 0.78414296079427} +12/29/2021 11:36:47 - INFO - codeparrot_training - Step 49534: {'lr': 1.0790851894443287e-07, 'samples': 25361920, 'steps': 49534, 'batch_loss/train': 0.682243843562901} +12/29/2021 11:36:59 - INFO - codeparrot_training - Step 49535: {'lr': 1.0744592231204387e-07, 'samples': 25362432, 'steps': 49535, 'batch_loss/train': 0.742088760714978} +12/29/2021 11:37:10 - INFO - codeparrot_training - Step 49536: {'lr': 1.0698431915809325e-07, 'samples': 25362944, 'steps': 49536, 'batch_loss/train': 0.5523319074127357} +12/29/2021 11:37:20 - INFO - codeparrot_training - Step 49537: {'lr': 1.0652370948441293e-07, 'samples': 25363456, 'steps': 49537, 'batch_loss/train': 0.725419987575151} +12/29/2021 11:37:34 - INFO - codeparrot_training - Step 49538: {'lr': 1.0606409329283473e-07, 'samples': 25363968, 'steps': 49538, 'batch_loss/train': 0.6494230413809419} +12/29/2021 11:37:45 - INFO - codeparrot_training - Step 49539: {'lr': 1.056054705852183e-07, 'samples': 25364480, 'steps': 49539, 'batch_loss/train': 0.6909134443849325} +12/29/2021 11:37:55 - INFO - codeparrot_training - Step 49540: {'lr': 1.0514784136333999e-07, 'samples': 25364992, 'steps': 49540, 'batch_loss/train': 0.6451571751385927} +12/29/2021 11:38:06 - INFO - codeparrot_training - Step 49541: {'lr': 1.0469120562903168e-07, 'samples': 25365504, 'steps': 49541, 'batch_loss/train': 0.6509219277650118} +12/29/2021 11:38:18 - INFO - codeparrot_training - Step 49542: {'lr': 1.042355633841252e-07, 'samples': 25366016, 'steps': 49542, 'batch_loss/train': 0.6547350576147437} +12/29/2021 11:38:29 - INFO - codeparrot_training - Step 49543: {'lr': 1.0378091463039697e-07, 'samples': 25366528, 'steps': 49543, 'batch_loss/train': 0.6795052766101435} +12/29/2021 11:38:39 - INFO - codeparrot_training - Step 49544: {'lr': 1.0332725936970654e-07, 'samples': 25367040, 'steps': 49544, 'batch_loss/train': 0.7256547319702804} +12/29/2021 11:38:53 - INFO - codeparrot_training - Step 49545: {'lr': 1.0287459760380257e-07, 'samples': 25367552, 'steps': 49545, 'batch_loss/train': 0.7089067851193249} +12/29/2021 11:39:04 - INFO - codeparrot_training - Step 49546: {'lr': 1.0242292933454467e-07, 'samples': 25368064, 'steps': 49546, 'batch_loss/train': 0.6988802945706993} +12/29/2021 11:39:15 - INFO - codeparrot_training - Step 49547: {'lr': 1.0197225456368142e-07, 'samples': 25368576, 'steps': 49547, 'batch_loss/train': 0.6660681343637407} +12/29/2021 11:39:27 - INFO - codeparrot_training - Step 49548: {'lr': 1.0152257329301695e-07, 'samples': 25369088, 'steps': 49548, 'batch_loss/train': 0.6504021314904094} +12/29/2021 11:39:38 - INFO - codeparrot_training - Step 49549: {'lr': 1.0107388552432761e-07, 'samples': 25369600, 'steps': 49549, 'batch_loss/train': 0.7751597492024302} +12/29/2021 11:39:48 - INFO - codeparrot_training - Step 49550: {'lr': 1.0062619125941752e-07, 'samples': 25370112, 'steps': 49550, 'batch_loss/train': 0.7980882069095969} +12/29/2021 11:39:59 - INFO - codeparrot_training - Step 49551: {'lr': 1.0017949050006303e-07, 'samples': 25370624, 'steps': 49551, 'batch_loss/train': 0.6961646601557732} +12/29/2021 11:40:11 - INFO - codeparrot_training - Step 49552: {'lr': 9.97337832480405e-08, 'samples': 25371136, 'steps': 49552, 'batch_loss/train': 0.6593493702821434} +12/29/2021 11:40:22 - INFO - codeparrot_training - Step 49553: {'lr': 9.928906950512628e-08, 'samples': 25371648, 'steps': 49553, 'batch_loss/train': 0.739242687355727} +12/29/2021 11:40:32 - INFO - codeparrot_training - Step 49554: {'lr': 9.884534927306899e-08, 'samples': 25372160, 'steps': 49554, 'batch_loss/train': 0.7050925400108099} +12/29/2021 11:40:46 - INFO - codeparrot_training - Step 49555: {'lr': 9.840262255364496e-08, 'samples': 25372672, 'steps': 49555, 'batch_loss/train': 0.89733827393502} +12/29/2021 11:40:57 - INFO - codeparrot_training - Step 49556: {'lr': 9.796088934860281e-08, 'samples': 25373184, 'steps': 49556, 'batch_loss/train': 0.6405396135523915} +12/29/2021 11:41:07 - INFO - codeparrot_training - Step 49557: {'lr': 9.752014965971889e-08, 'samples': 25373696, 'steps': 49557, 'batch_loss/train': 1.6777953170239925} +12/29/2021 11:41:20 - INFO - codeparrot_training - Step 49558: {'lr': 9.708040348874181e-08, 'samples': 25374208, 'steps': 49558, 'batch_loss/train': 0.7302074246108532} +12/29/2021 11:41:30 - INFO - codeparrot_training - Step 49559: {'lr': 9.66416508373924e-08, 'samples': 25374720, 'steps': 49559, 'batch_loss/train': 0.6598636899143457} +12/29/2021 11:41:41 - INFO - codeparrot_training - Step 49560: {'lr': 9.620389170744703e-08, 'samples': 25375232, 'steps': 49560, 'batch_loss/train': 0.6797405779361725} +12/29/2021 11:41:52 - INFO - codeparrot_training - Step 49561: {'lr': 9.57671261006543e-08, 'samples': 25375744, 'steps': 49561, 'batch_loss/train': 0.7071912558749318} +12/29/2021 11:42:06 - INFO - codeparrot_training - Step 49562: {'lr': 9.53313540187073e-08, 'samples': 25376256, 'steps': 49562, 'batch_loss/train': 0.6957278363406658} +12/29/2021 11:42:17 - INFO - codeparrot_training - Step 49563: {'lr': 9.489657546335462e-08, 'samples': 25376768, 'steps': 49563, 'batch_loss/train': 0.7194976601749659} +12/29/2021 11:42:27 - INFO - codeparrot_training - Step 49564: {'lr': 9.446279043634488e-08, 'samples': 25377280, 'steps': 49564, 'batch_loss/train': 0.8517761959228665} +12/29/2021 11:42:39 - INFO - codeparrot_training - Step 49565: {'lr': 9.40299989393989e-08, 'samples': 25377792, 'steps': 49565, 'batch_loss/train': 0.7493004580028355} +12/29/2021 11:42:50 - INFO - codeparrot_training - Step 49566: {'lr': 9.359820097423755e-08, 'samples': 25378304, 'steps': 49566, 'batch_loss/train': 0.6885150247253478} +12/29/2021 11:43:00 - INFO - codeparrot_training - Step 49567: {'lr': 9.31673965425539e-08, 'samples': 25378816, 'steps': 49567, 'batch_loss/train': 0.6080987232271582} +12/29/2021 11:43:13 - INFO - codeparrot_training - Step 49568: {'lr': 9.273758564606882e-08, 'samples': 25379328, 'steps': 49568, 'batch_loss/train': 0.7256855154410005} +12/29/2021 11:43:23 - INFO - codeparrot_training - Step 49569: {'lr': 9.230876828650314e-08, 'samples': 25379840, 'steps': 49569, 'batch_loss/train': 0.6355132830794901} +12/29/2021 11:43:34 - INFO - codeparrot_training - Step 49570: {'lr': 9.188094446554995e-08, 'samples': 25380352, 'steps': 49570, 'batch_loss/train': 0.7238946449942887} +12/29/2021 11:43:45 - INFO - codeparrot_training - Step 49571: {'lr': 9.14541141849301e-08, 'samples': 25380864, 'steps': 49571, 'batch_loss/train': 0.7426099944859743} +12/29/2021 11:43:57 - INFO - codeparrot_training - Step 49572: {'lr': 9.102827744630892e-08, 'samples': 25381376, 'steps': 49572, 'batch_loss/train': 0.7617309633642435} +12/29/2021 11:44:07 - INFO - codeparrot_training - Step 49573: {'lr': 9.060343425140727e-08, 'samples': 25381888, 'steps': 49573, 'batch_loss/train': 0.7322879899293184} +12/29/2021 11:44:18 - INFO - codeparrot_training - Step 49574: {'lr': 9.017958460191821e-08, 'samples': 25382400, 'steps': 49574, 'batch_loss/train': 0.7136316429823637} +12/29/2021 11:44:32 - INFO - codeparrot_training - Step 49575: {'lr': 8.975672849950711e-08, 'samples': 25382912, 'steps': 49575, 'batch_loss/train': 0.679483360494487} +12/29/2021 11:44:42 - INFO - codeparrot_training - Step 49576: {'lr': 8.933486594583929e-08, 'samples': 25383424, 'steps': 49576, 'batch_loss/train': 0.76906053267885} +12/29/2021 11:44:53 - INFO - codeparrot_training - Step 49577: {'lr': 8.891399694263558e-08, 'samples': 25383936, 'steps': 49577, 'batch_loss/train': 0.6558454008772969} +12/29/2021 11:45:05 - INFO - codeparrot_training - Step 49578: {'lr': 8.849412149153358e-08, 'samples': 25384448, 'steps': 49578, 'batch_loss/train': 0.793184407055378} +12/29/2021 11:45:16 - INFO - codeparrot_training - Step 49579: {'lr': 8.807523959422636e-08, 'samples': 25384960, 'steps': 49579, 'batch_loss/train': 0.7962601878680289} +12/29/2021 11:45:27 - INFO - codeparrot_training - Step 49580: {'lr': 8.765735125237928e-08, 'samples': 25385472, 'steps': 49580, 'batch_loss/train': 0.6469507440924644} +12/29/2021 11:45:39 - INFO - codeparrot_training - Step 49581: {'lr': 8.724045646760215e-08, 'samples': 25385984, 'steps': 49581, 'batch_loss/train': 0.7186060715466738} +12/29/2021 11:45:49 - INFO - codeparrot_training - Step 49582: {'lr': 8.682455524164357e-08, 'samples': 25386496, 'steps': 49582, 'batch_loss/train': 0.6766341091133654} +12/29/2021 11:46:00 - INFO - codeparrot_training - Step 49583: {'lr': 8.64096475760856e-08, 'samples': 25387008, 'steps': 49583, 'batch_loss/train': 0.8088371022604406} +12/29/2021 11:46:10 - INFO - codeparrot_training - Step 49584: {'lr': 8.59957334725936e-08, 'samples': 25387520, 'steps': 49584, 'batch_loss/train': 0.7476252615451813} +12/29/2021 11:46:24 - INFO - codeparrot_training - Step 49585: {'lr': 8.558281293280512e-08, 'samples': 25388032, 'steps': 49585, 'batch_loss/train': 0.7336369496770203} +12/29/2021 11:46:35 - INFO - codeparrot_training - Step 49586: {'lr': 8.517088595841328e-08, 'samples': 25388544, 'steps': 49586, 'batch_loss/train': 0.6409126111539081} +12/29/2021 11:46:46 - INFO - codeparrot_training - Step 49587: {'lr': 8.47599525510001e-08, 'samples': 25389056, 'steps': 49587, 'batch_loss/train': 0.7862218976952136} +12/29/2021 11:46:58 - INFO - codeparrot_training - Step 49588: {'lr': 8.435001271220321e-08, 'samples': 25389568, 'steps': 49588, 'batch_loss/train': 0.7286938913166523} +12/29/2021 11:47:09 - INFO - codeparrot_training - Step 49589: {'lr': 8.394106644368793e-08, 'samples': 25390080, 'steps': 49589, 'batch_loss/train': 0.7032261975109577} +12/29/2021 11:47:19 - INFO - codeparrot_training - Step 49590: {'lr': 8.353311374703631e-08, 'samples': 25390592, 'steps': 49590, 'batch_loss/train': 0.8818223725538701} +12/29/2021 11:47:33 - INFO - codeparrot_training - Step 49591: {'lr': 8.312615462391371e-08, 'samples': 25391104, 'steps': 49591, 'batch_loss/train': 0.7237468645907938} +12/29/2021 11:47:44 - INFO - codeparrot_training - Step 49592: {'lr': 8.272018907590217e-08, 'samples': 25391616, 'steps': 49592, 'batch_loss/train': 0.7750374283641577} +12/29/2021 11:47:55 - INFO - codeparrot_training - Step 49593: {'lr': 8.23152171046393e-08, 'samples': 25392128, 'steps': 49593, 'batch_loss/train': 0.43191522103734314} +12/29/2021 11:48:05 - INFO - codeparrot_training - Step 49594: {'lr': 8.191123871170714e-08, 'samples': 25392640, 'steps': 49594, 'batch_loss/train': 0.7795270672068} +12/29/2021 11:48:17 - INFO - codeparrot_training - Step 49595: {'lr': 8.150825389874329e-08, 'samples': 25393152, 'steps': 49595, 'batch_loss/train': 0.7602381691103801} +12/29/2021 11:48:28 - INFO - codeparrot_training - Step 49596: {'lr': 8.11062626673298e-08, 'samples': 25393664, 'steps': 49596, 'batch_loss/train': 0.6088513672584668} +12/29/2021 11:48:39 - INFO - codeparrot_training - Step 49597: {'lr': 8.070526501910425e-08, 'samples': 25394176, 'steps': 49597, 'batch_loss/train': 0.6480611888691783} +12/29/2021 11:48:52 - INFO - codeparrot_training - Step 49598: {'lr': 8.030526095559321e-08, 'samples': 25394688, 'steps': 49598, 'batch_loss/train': 0.7377682197839022} +12/29/2021 11:49:03 - INFO - codeparrot_training - Step 49599: {'lr': 7.990625047843425e-08, 'samples': 25395200, 'steps': 49599, 'batch_loss/train': 0.6021611816831864} +12/29/2021 11:49:13 - INFO - codeparrot_training - Step 49600: {'lr': 7.950823358920944e-08, 'samples': 25395712, 'steps': 49600, 'batch_loss/train': 0.9592564338818192} +12/29/2021 11:49:26 - INFO - codeparrot_training - Step 49601: {'lr': 7.91112102894731e-08, 'samples': 25396224, 'steps': 49601, 'batch_loss/train': 0.9022452780045569} +12/29/2021 11:49:36 - INFO - codeparrot_training - Step 49602: {'lr': 7.871518058083505e-08, 'samples': 25396736, 'steps': 49602, 'batch_loss/train': 0.697508743731305} +12/29/2021 11:49:47 - INFO - codeparrot_training - Step 49603: {'lr': 7.832014446487734e-08, 'samples': 25397248, 'steps': 49603, 'batch_loss/train': 0.7545348154380918} +12/29/2021 11:49:58 - INFO - codeparrot_training - Step 49604: {'lr': 7.792610194312655e-08, 'samples': 25397760, 'steps': 49604, 'batch_loss/train': 0.7953517017886043} +12/29/2021 11:50:12 - INFO - codeparrot_training - Step 49605: {'lr': 7.75330530171925e-08, 'samples': 25398272, 'steps': 49605, 'batch_loss/train': 0.7500478969886899} +12/29/2021 11:50:23 - INFO - codeparrot_training - Step 49606: {'lr': 7.714099768860173e-08, 'samples': 25398784, 'steps': 49606, 'batch_loss/train': 0.7482149042189121} +12/29/2021 11:50:33 - INFO - codeparrot_training - Step 49607: {'lr': 7.674993595893631e-08, 'samples': 25399296, 'steps': 49607, 'batch_loss/train': 0.7361537497490644} +12/29/2021 11:50:46 - INFO - codeparrot_training - Step 49608: {'lr': 7.635986782975057e-08, 'samples': 25399808, 'steps': 49608, 'batch_loss/train': 0.5892592868767679} +12/29/2021 11:50:56 - INFO - codeparrot_training - Step 49609: {'lr': 7.597079330259882e-08, 'samples': 25400320, 'steps': 49609, 'batch_loss/train': 0.6115807008463889} +12/29/2021 11:51:07 - INFO - codeparrot_training - Step 49610: {'lr': 7.55827123790076e-08, 'samples': 25400832, 'steps': 49610, 'batch_loss/train': 0.694832494482398} +12/29/2021 11:51:19 - INFO - codeparrot_training - Step 49611: {'lr': 7.519562506053123e-08, 'samples': 25401344, 'steps': 49611, 'batch_loss/train': 0.8008061992004514} +12/29/2021 11:51:30 - INFO - codeparrot_training - Step 49612: {'lr': 7.480953134872403e-08, 'samples': 25401856, 'steps': 49612, 'batch_loss/train': 0.646824948489666} +12/29/2021 11:51:40 - INFO - codeparrot_training - Step 49613: {'lr': 7.44244312450848e-08, 'samples': 25402368, 'steps': 49613, 'batch_loss/train': 0.7063311755191535} +12/29/2021 11:51:51 - INFO - codeparrot_training - Step 49614: {'lr': 7.404032475116784e-08, 'samples': 25402880, 'steps': 49614, 'batch_loss/train': 0.6999884301330894} +12/29/2021 11:52:05 - INFO - codeparrot_training - Step 49615: {'lr': 7.365721186849972e-08, 'samples': 25403392, 'steps': 49615, 'batch_loss/train': 0.7215190113056451} +12/29/2021 11:52:16 - INFO - codeparrot_training - Step 49616: {'lr': 7.327509259860699e-08, 'samples': 25403904, 'steps': 49616, 'batch_loss/train': 0.5379191164392978} +12/29/2021 11:52:26 - INFO - codeparrot_training - Step 49617: {'lr': 7.289396694301619e-08, 'samples': 25404416, 'steps': 49617, 'batch_loss/train': 0.6745675606653094} +12/29/2021 11:52:39 - INFO - codeparrot_training - Step 49618: {'lr': 7.251383490322616e-08, 'samples': 25404928, 'steps': 49618, 'batch_loss/train': 0.7130672893836163} +12/29/2021 11:52:49 - INFO - codeparrot_training - Step 49619: {'lr': 7.213469648073568e-08, 'samples': 25405440, 'steps': 49619, 'batch_loss/train': 0.8232782595441677} +12/29/2021 11:53:00 - INFO - codeparrot_training - Step 49620: {'lr': 7.17565516770713e-08, 'samples': 25405952, 'steps': 49620, 'batch_loss/train': 0.48499456510762684} +12/29/2021 11:53:14 - INFO - codeparrot_training - Step 49621: {'lr': 7.137940049373181e-08, 'samples': 25406464, 'steps': 49621, 'batch_loss/train': 0.6733599696308374} +12/29/2021 11:53:25 - INFO - codeparrot_training - Step 49622: {'lr': 7.100324293224381e-08, 'samples': 25406976, 'steps': 49622, 'batch_loss/train': 0.6658220151439309} +12/29/2021 11:53:35 - INFO - codeparrot_training - Step 49623: {'lr': 7.062807899405054e-08, 'samples': 25407488, 'steps': 49623, 'batch_loss/train': 0.7648122692480683} +12/29/2021 11:53:46 - INFO - codeparrot_training - Step 49624: {'lr': 7.02539086806786e-08, 'samples': 25408000, 'steps': 49624, 'batch_loss/train': 0.7740770210511982} +12/29/2021 11:53:58 - INFO - codeparrot_training - Step 49625: {'lr': 6.988073199362676e-08, 'samples': 25408512, 'steps': 49625, 'batch_loss/train': 0.7464604061096907} +12/29/2021 11:54:09 - INFO - codeparrot_training - Step 49626: {'lr': 6.950854893433833e-08, 'samples': 25409024, 'steps': 49626, 'batch_loss/train': 0.7284859456121922} +12/29/2021 11:54:19 - INFO - codeparrot_training - Step 49627: {'lr': 6.91373595043121e-08, 'samples': 25409536, 'steps': 49627, 'batch_loss/train': 0.6322208163328469} +12/29/2021 11:54:33 - INFO - codeparrot_training - Step 49628: {'lr': 6.876716370501912e-08, 'samples': 25410048, 'steps': 49628, 'batch_loss/train': 0.7556168041191995} +12/29/2021 11:54:44 - INFO - codeparrot_training - Step 49629: {'lr': 6.839796153795819e-08, 'samples': 25410560, 'steps': 49629, 'batch_loss/train': 0.7303334046155214} +12/29/2021 11:54:54 - INFO - codeparrot_training - Step 49630: {'lr': 6.802975300457259e-08, 'samples': 25411072, 'steps': 49630, 'batch_loss/train': 0.724764708429575} +12/29/2021 11:55:07 - INFO - codeparrot_training - Step 49631: {'lr': 6.766253810630563e-08, 'samples': 25411584, 'steps': 49631, 'batch_loss/train': 0.6999527304433286} +12/29/2021 11:55:17 - INFO - codeparrot_training - Step 49632: {'lr': 6.729631684468385e-08, 'samples': 25412096, 'steps': 49632, 'batch_loss/train': 0.5927771246060729} +12/29/2021 11:55:28 - INFO - codeparrot_training - Step 49633: {'lr': 6.693108922109503e-08, 'samples': 25412608, 'steps': 49633, 'batch_loss/train': 0.7596683679148555} +12/29/2021 11:55:38 - INFO - codeparrot_training - Step 49634: {'lr': 6.656685523701023e-08, 'samples': 25413120, 'steps': 49634, 'batch_loss/train': 0.609788021305576} +12/29/2021 11:55:51 - INFO - codeparrot_training - Step 49635: {'lr': 6.620361489390048e-08, 'samples': 25413632, 'steps': 49635, 'batch_loss/train': 0.8224725777981803} +12/29/2021 11:56:01 - INFO - codeparrot_training - Step 49636: {'lr': 6.584136819318132e-08, 'samples': 25414144, 'steps': 49636, 'batch_loss/train': 0.7134951394982636} +12/29/2021 11:56:12 - INFO - codeparrot_training - Step 49637: {'lr': 6.548011513629604e-08, 'samples': 25414656, 'steps': 49637, 'batch_loss/train': 0.5631029354408383} +12/29/2021 11:56:24 - INFO - codeparrot_training - Step 49638: {'lr': 6.511985572471568e-08, 'samples': 25415168, 'steps': 49638, 'batch_loss/train': 0.8111436204053462} +12/29/2021 11:56:35 - INFO - codeparrot_training - Step 49639: {'lr': 6.476058995982803e-08, 'samples': 25415680, 'steps': 49639, 'batch_loss/train': 0.7610357863595709} +12/29/2021 11:56:46 - INFO - codeparrot_training - Step 49640: {'lr': 6.440231784310413e-08, 'samples': 25416192, 'steps': 49640, 'batch_loss/train': 0.7641291497275233} +12/29/2021 11:56:58 - INFO - codeparrot_training - Step 49641: {'lr': 6.404503937593176e-08, 'samples': 25416704, 'steps': 49641, 'batch_loss/train': 0.7811222318559885} +12/29/2021 11:57:09 - INFO - codeparrot_training - Step 49642: {'lr': 6.368875455972644e-08, 'samples': 25417216, 'steps': 49642, 'batch_loss/train': 0.6142212462145835} +12/29/2021 11:57:19 - INFO - codeparrot_training - Step 49643: {'lr': 6.333346339595925e-08, 'samples': 25417728, 'steps': 49643, 'batch_loss/train': 0.7639573020860553} +12/29/2021 11:57:30 - INFO - codeparrot_training - Step 49644: {'lr': 6.297916588596242e-08, 'samples': 25418240, 'steps': 49644, 'batch_loss/train': 0.7423899965360761} +12/29/2021 11:57:44 - INFO - codeparrot_training - Step 49645: {'lr': 6.262586203120702e-08, 'samples': 25418752, 'steps': 49645, 'batch_loss/train': 0.7411622635554522} +12/29/2021 11:57:55 - INFO - codeparrot_training - Step 49646: {'lr': 6.227355183308081e-08, 'samples': 25419264, 'steps': 49646, 'batch_loss/train': 0.6993074878118932} +12/29/2021 11:58:05 - INFO - codeparrot_training - Step 49647: {'lr': 6.192223529299935e-08, 'samples': 25419776, 'steps': 49647, 'batch_loss/train': 0.7346445652656257} +12/29/2021 11:58:18 - INFO - codeparrot_training - Step 49648: {'lr': 6.157191241232263e-08, 'samples': 25420288, 'steps': 49648, 'batch_loss/train': 0.6586481961421669} +12/29/2021 11:58:28 - INFO - codeparrot_training - Step 49649: {'lr': 6.12225831924662e-08, 'samples': 25420800, 'steps': 49649, 'batch_loss/train': 0.7589275678619742} +12/29/2021 11:58:39 - INFO - codeparrot_training - Step 49650: {'lr': 6.087424763481786e-08, 'samples': 25421312, 'steps': 49650, 'batch_loss/train': 0.667423581937328} +12/29/2021 11:58:53 - INFO - codeparrot_training - Step 49651: {'lr': 6.05269057407376e-08, 'samples': 25421824, 'steps': 49651, 'batch_loss/train': 0.7596051967702806} +12/29/2021 11:59:03 - INFO - codeparrot_training - Step 49652: {'lr': 6.018055751166874e-08, 'samples': 25422336, 'steps': 49652, 'batch_loss/train': 0.6309612318873405} +12/29/2021 11:59:14 - INFO - codeparrot_training - Step 49653: {'lr': 5.983520294891576e-08, 'samples': 25422848, 'steps': 49653, 'batch_loss/train': 0.6898290319368243} +12/29/2021 11:59:26 - INFO - codeparrot_training - Step 49654: {'lr': 5.9490842053894214e-08, 'samples': 25423360, 'steps': 49654, 'batch_loss/train': 0.7469374206848443} +12/29/2021 11:59:37 - INFO - codeparrot_training - Step 49655: {'lr': 5.9147474827964124e-08, 'samples': 25423872, 'steps': 49655, 'batch_loss/train': 0.6061354537378065} +12/29/2021 11:59:48 - INFO - codeparrot_training - Step 49656: {'lr': 5.880510127248551e-08, 'samples': 25424384, 'steps': 49656, 'batch_loss/train': 0.7654200857505202} +12/29/2021 11:59:58 - INFO - codeparrot_training - Step 49657: {'lr': 5.8463721388846145e-08, 'samples': 25424896, 'steps': 49657, 'batch_loss/train': 0.7726735966280103} +12/29/2021 12:00:10 - INFO - codeparrot_training - Step 49658: {'lr': 5.812333517835056e-08, 'samples': 25425408, 'steps': 49658, 'batch_loss/train': 0.6599284969270229} +12/29/2021 12:00:21 - INFO - codeparrot_training - Step 49659: {'lr': 5.778394264241427e-08, 'samples': 25425920, 'steps': 49659, 'batch_loss/train': 0.7511524786241353} +12/29/2021 12:00:32 - INFO - codeparrot_training - Step 49660: {'lr': 5.744554378234179e-08, 'samples': 25426432, 'steps': 49660, 'batch_loss/train': 0.7472877791151404} +12/29/2021 12:00:45 - INFO - codeparrot_training - Step 49661: {'lr': 5.7108138599465395e-08, 'samples': 25426944, 'steps': 49661, 'batch_loss/train': 0.7670057555660605} +12/29/2021 12:00:56 - INFO - codeparrot_training - Step 49662: {'lr': 5.677172709517286e-08, 'samples': 25427456, 'steps': 49662, 'batch_loss/train': 0.7627437822520733} +12/29/2021 12:01:07 - INFO - codeparrot_training - Step 49663: {'lr': 5.643630927079646e-08, 'samples': 25427968, 'steps': 49663, 'batch_loss/train': 0.7260546693578362} +12/29/2021 12:01:19 - INFO - codeparrot_training - Step 49664: {'lr': 5.610188512764069e-08, 'samples': 25428480, 'steps': 49664, 'batch_loss/train': 0.7251288839615881} +12/29/2021 12:01:30 - INFO - codeparrot_training - Step 49665: {'lr': 5.5768454667037836e-08, 'samples': 25428992, 'steps': 49665, 'batch_loss/train': 0.7233811691403389} +12/29/2021 12:01:40 - INFO - codeparrot_training - Step 49666: {'lr': 5.543601789032016e-08, 'samples': 25429504, 'steps': 49666, 'batch_loss/train': 0.8114846516400576} +12/29/2021 12:01:51 - INFO - codeparrot_training - Step 49667: {'lr': 5.510457479881992e-08, 'samples': 25430016, 'steps': 49667, 'batch_loss/train': 0.8869628788670525} +12/29/2021 12:02:05 - INFO - codeparrot_training - Step 49668: {'lr': 5.477412539386939e-08, 'samples': 25430528, 'steps': 49668, 'batch_loss/train': 0.7931468235328794} +12/29/2021 12:02:16 - INFO - codeparrot_training - Step 49669: {'lr': 5.444466967674533e-08, 'samples': 25431040, 'steps': 49669, 'batch_loss/train': 0.7321270368993282} +12/29/2021 12:02:27 - INFO - codeparrot_training - Step 49670: {'lr': 5.4116207648780005e-08, 'samples': 25431552, 'steps': 49670, 'batch_loss/train': 0.6287279720418155} +12/29/2021 12:02:39 - INFO - codeparrot_training - Step 49671: {'lr': 5.378873931125017e-08, 'samples': 25432064, 'steps': 49671, 'batch_loss/train': 0.7724486626684666} +12/29/2021 12:02:49 - INFO - codeparrot_training - Step 49672: {'lr': 5.346226466551585e-08, 'samples': 25432576, 'steps': 49672, 'batch_loss/train': 0.7977384862024337} +12/29/2021 12:03:00 - INFO - codeparrot_training - Step 49673: {'lr': 5.3136783712798284e-08, 'samples': 25433088, 'steps': 49673, 'batch_loss/train': 0.7019165107049048} +12/29/2021 12:03:12 - INFO - codeparrot_training - Step 49674: {'lr': 5.281229645445751e-08, 'samples': 25433600, 'steps': 49674, 'batch_loss/train': 0.7495476147159934} +12/29/2021 12:03:23 - INFO - codeparrot_training - Step 49675: {'lr': 5.2488802891770274e-08, 'samples': 25434112, 'steps': 49675, 'batch_loss/train': 0.6791813438758254} +12/29/2021 12:03:33 - INFO - codeparrot_training - Step 49676: {'lr': 5.2166303025985574e-08, 'samples': 25434624, 'steps': 49676, 'batch_loss/train': 0.7757380966795608} +12/29/2021 12:03:45 - INFO - codeparrot_training - Step 49677: {'lr': 5.1844796858435685e-08, 'samples': 25435136, 'steps': 49677, 'batch_loss/train': 0.849838551832363} +12/29/2021 12:03:56 - INFO - codeparrot_training - Step 49678: {'lr': 5.152428439034185e-08, 'samples': 25435648, 'steps': 49678, 'batch_loss/train': 0.6982199784833938} +12/29/2021 12:04:07 - INFO - codeparrot_training - Step 49679: {'lr': 5.120476562303633e-08, 'samples': 25436160, 'steps': 49679, 'batch_loss/train': 0.7025620574131608} +12/29/2021 12:04:17 - INFO - codeparrot_training - Step 49680: {'lr': 5.088624055776814e-08, 'samples': 25436672, 'steps': 49680, 'batch_loss/train': 0.6807236555032432} +12/29/2021 12:04:32 - INFO - codeparrot_training - Step 49681: {'lr': 5.0568709195786264e-08, 'samples': 25437184, 'steps': 49681, 'batch_loss/train': 0.9202449265867472} +12/29/2021 12:04:42 - INFO - codeparrot_training - Step 49682: {'lr': 5.0252171538367475e-08, 'samples': 25437696, 'steps': 49682, 'batch_loss/train': 0.5928808082244359} +12/29/2021 12:04:53 - INFO - codeparrot_training - Step 49683: {'lr': 4.993662758676076e-08, 'samples': 25438208, 'steps': 49683, 'batch_loss/train': 0.6534469211474061} +12/29/2021 12:05:05 - INFO - codeparrot_training - Step 49684: {'lr': 4.962207734224289e-08, 'samples': 25438720, 'steps': 49684, 'batch_loss/train': 0.6905312980525196} +12/29/2021 12:05:15 - INFO - codeparrot_training - Step 49685: {'lr': 4.93085208060351e-08, 'samples': 25439232, 'steps': 49685, 'batch_loss/train': 0.7639983743429184} +12/29/2021 12:05:26 - INFO - codeparrot_training - Step 49686: {'lr': 4.899595797941414e-08, 'samples': 25439744, 'steps': 49686, 'batch_loss/train': 0.670738557819277} +12/29/2021 12:05:37 - INFO - codeparrot_training - Step 49687: {'lr': 4.868438886360127e-08, 'samples': 25440256, 'steps': 49687, 'batch_loss/train': 0.6533530564047396} +12/29/2021 12:05:49 - INFO - codeparrot_training - Step 49688: {'lr': 4.837381345984548e-08, 'samples': 25440768, 'steps': 49688, 'batch_loss/train': 0.7694872641004622} +12/29/2021 12:06:00 - INFO - codeparrot_training - Step 49689: {'lr': 4.806423176936803e-08, 'samples': 25441280, 'steps': 49689, 'batch_loss/train': 0.5938548998092301} +12/29/2021 12:06:10 - INFO - codeparrot_training - Step 49690: {'lr': 4.77556437934179e-08, 'samples': 25441792, 'steps': 49690, 'batch_loss/train': 0.7277667156886309} +12/29/2021 12:06:24 - INFO - codeparrot_training - Step 49691: {'lr': 4.7448049533216354e-08, 'samples': 25442304, 'steps': 49691, 'batch_loss/train': 0.689346119761467} +12/29/2021 12:06:35 - INFO - codeparrot_training - Step 49692: {'lr': 4.714144898995687e-08, 'samples': 25442816, 'steps': 49692, 'batch_loss/train': 0.7829747591167688} +12/29/2021 12:06:46 - INFO - codeparrot_training - Step 49693: {'lr': 4.683584216491621e-08, 'samples': 25443328, 'steps': 49693, 'batch_loss/train': 0.7128652790561318} +12/29/2021 12:06:58 - INFO - codeparrot_training - Step 49694: {'lr': 4.6531229059260104e-08, 'samples': 25443840, 'steps': 49694, 'batch_loss/train': 0.57119210367091} +12/29/2021 12:07:08 - INFO - codeparrot_training - Step 49695: {'lr': 4.6227609674209804e-08, 'samples': 25444352, 'steps': 49695, 'batch_loss/train': 0.7153498791158199} +12/29/2021 12:07:19 - INFO - codeparrot_training - Step 49696: {'lr': 4.592498401098655e-08, 'samples': 25444864, 'steps': 49696, 'batch_loss/train': 0.7389564514160156} +12/29/2021 12:07:33 - INFO - codeparrot_training - Step 49697: {'lr': 4.562335207081159e-08, 'samples': 25445376, 'steps': 49697, 'batch_loss/train': 0.7426752871833742} +12/29/2021 12:07:44 - INFO - codeparrot_training - Step 49698: {'lr': 4.53227138548229e-08, 'samples': 25445888, 'steps': 49698, 'batch_loss/train': 1.0261255977675319} +12/29/2021 12:07:55 - INFO - codeparrot_training - Step 49699: {'lr': 4.5023069364269475e-08, 'samples': 25446400, 'steps': 49699, 'batch_loss/train': 0.785298490896821} +12/29/2021 12:08:05 - INFO - codeparrot_training - Step 49700: {'lr': 4.4724418600317063e-08, 'samples': 25446912, 'steps': 49700, 'batch_loss/train': 0.879578722640872} +12/29/2021 12:08:17 - INFO - codeparrot_training - Step 49701: {'lr': 4.44267615641869e-08, 'samples': 25447424, 'steps': 49701, 'batch_loss/train': 0.576993859373033} +12/29/2021 12:08:28 - INFO - codeparrot_training - Step 49702: {'lr': 4.413009825701697e-08, 'samples': 25447936, 'steps': 49702, 'batch_loss/train': 0.740995617583394} +12/29/2021 12:08:38 - INFO - codeparrot_training - Step 49703: {'lr': 4.383442868000076e-08, 'samples': 25448448, 'steps': 49703, 'batch_loss/train': 0.5799486782052554} +12/29/2021 12:08:51 - INFO - codeparrot_training - Step 49704: {'lr': 4.353975283433176e-08, 'samples': 25448960, 'steps': 49704, 'batch_loss/train': 0.7129551068646833} +12/29/2021 12:09:02 - INFO - codeparrot_training - Step 49705: {'lr': 4.324607072114795e-08, 'samples': 25449472, 'steps': 49705, 'batch_loss/train': 0.7379651900264435} +12/29/2021 12:09:12 - INFO - codeparrot_training - Step 49706: {'lr': 4.295338234164281e-08, 'samples': 25449984, 'steps': 49706, 'batch_loss/train': 0.7283211858011782} +12/29/2021 12:09:23 - INFO - codeparrot_training - Step 49707: {'lr': 4.266168769698209e-08, 'samples': 25450496, 'steps': 49707, 'batch_loss/train': 0.6966181974858046} +12/29/2021 12:09:37 - INFO - codeparrot_training - Step 49708: {'lr': 4.237098678833151e-08, 'samples': 25451008, 'steps': 49708, 'batch_loss/train': 0.7587734026019461} +12/29/2021 12:09:47 - INFO - codeparrot_training - Step 49709: {'lr': 4.208127961682906e-08, 'samples': 25451520, 'steps': 49709, 'batch_loss/train': 0.7131918541272171} +12/29/2021 12:09:58 - INFO - codeparrot_training - Step 49710: {'lr': 4.179256618361271e-08, 'samples': 25452032, 'steps': 49710, 'batch_loss/train': 0.6033388966461644} +12/29/2021 12:10:10 - INFO - codeparrot_training - Step 49711: {'lr': 4.1504846489848203e-08, 'samples': 25452544, 'steps': 49711, 'batch_loss/train': 0.7061135922558606} +12/29/2021 12:10:21 - INFO - codeparrot_training - Step 49712: {'lr': 4.121812053670126e-08, 'samples': 25453056, 'steps': 49712, 'batch_loss/train': 0.7490996681153774} +12/29/2021 12:10:31 - INFO - codeparrot_training - Step 49713: {'lr': 4.093238832528212e-08, 'samples': 25453568, 'steps': 49713, 'batch_loss/train': 0.5921073062345386} +12/29/2021 12:10:43 - INFO - codeparrot_training - Step 49714: {'lr': 4.064764985672875e-08, 'samples': 25454080, 'steps': 49714, 'batch_loss/train': 0.7108915616408922} +12/29/2021 12:10:54 - INFO - codeparrot_training - Step 49715: {'lr': 4.036390513217914e-08, 'samples': 25454592, 'steps': 49715, 'batch_loss/train': 0.7186199384741485} +12/29/2021 12:11:05 - INFO - codeparrot_training - Step 49716: {'lr': 4.008115415277125e-08, 'samples': 25455104, 'steps': 49716, 'batch_loss/train': 0.7651899391785264} +12/29/2021 12:11:17 - INFO - codeparrot_training - Step 49717: {'lr': 3.979939691961532e-08, 'samples': 25455616, 'steps': 49717, 'batch_loss/train': 0.727060048317071} +12/29/2021 12:11:27 - INFO - codeparrot_training - Step 49718: {'lr': 3.951863343382156e-08, 'samples': 25456128, 'steps': 49718, 'batch_loss/train': 0.8015848263166845} +12/29/2021 12:11:38 - INFO - codeparrot_training - Step 49719: {'lr': 3.9238863696555715e-08, 'samples': 25456640, 'steps': 49719, 'batch_loss/train': 0.711345680989325} +12/29/2021 12:11:49 - INFO - codeparrot_training - Step 49720: {'lr': 3.896008770884474e-08, 'samples': 25457152, 'steps': 49720, 'batch_loss/train': 0.7087754879612476} +12/29/2021 12:12:03 - INFO - codeparrot_training - Step 49721: {'lr': 3.868230547188212e-08, 'samples': 25457664, 'steps': 49721, 'batch_loss/train': 0.7249702184926718} +12/29/2021 12:12:13 - INFO - codeparrot_training - Step 49722: {'lr': 3.840551698672257e-08, 'samples': 25458176, 'steps': 49722, 'batch_loss/train': 0.7261448707431555} +12/29/2021 12:12:24 - INFO - codeparrot_training - Step 49723: {'lr': 3.812972225450406e-08, 'samples': 25458688, 'steps': 49723, 'batch_loss/train': 0.6200693866703659} +12/29/2021 12:12:36 - INFO - codeparrot_training - Step 49724: {'lr': 3.785492127628132e-08, 'samples': 25459200, 'steps': 49724, 'batch_loss/train': 0.6569430567324162} +12/29/2021 12:12:47 - INFO - codeparrot_training - Step 49725: {'lr': 3.758111405316455e-08, 'samples': 25459712, 'steps': 49725, 'batch_loss/train': 0.6670534610748291} +12/29/2021 12:12:57 - INFO - codeparrot_training - Step 49726: {'lr': 3.730830058623624e-08, 'samples': 25460224, 'steps': 49726, 'batch_loss/train': 0.7770039159804583} +12/29/2021 12:13:09 - INFO - codeparrot_training - Step 49727: {'lr': 3.70364808766066e-08, 'samples': 25460736, 'steps': 49727, 'batch_loss/train': 0.6798737952485681} +12/29/2021 12:13:20 - INFO - codeparrot_training - Step 49728: {'lr': 3.676565492533035e-08, 'samples': 25461248, 'steps': 49728, 'batch_loss/train': 0.6673885294003412} +12/29/2021 12:13:30 - INFO - codeparrot_training - Step 49729: {'lr': 3.649582273348995e-08, 'samples': 25461760, 'steps': 49729, 'batch_loss/train': 0.7927574459463358} +12/29/2021 12:13:41 - INFO - codeparrot_training - Step 49730: {'lr': 3.622698430214011e-08, 'samples': 25462272, 'steps': 49730, 'batch_loss/train': 0.7916838591918349} +12/29/2021 12:13:55 - INFO - codeparrot_training - Step 49731: {'lr': 3.595913963239106e-08, 'samples': 25462784, 'steps': 49731, 'batch_loss/train': 0.6169229988008738} +12/29/2021 12:14:06 - INFO - codeparrot_training - Step 49732: {'lr': 3.56922887252975e-08, 'samples': 25463296, 'steps': 49732, 'batch_loss/train': 0.7542591774836183} +12/29/2021 12:14:16 - INFO - codeparrot_training - Step 49733: {'lr': 3.5426431581886407e-08, 'samples': 25463808, 'steps': 49733, 'batch_loss/train': 0.8271201085299253} +12/29/2021 12:14:29 - INFO - codeparrot_training - Step 49734: {'lr': 3.516156820324024e-08, 'samples': 25464320, 'steps': 49734, 'batch_loss/train': 0.7543727857992053} +12/29/2021 12:14:39 - INFO - codeparrot_training - Step 49735: {'lr': 3.48976985904137e-08, 'samples': 25464832, 'steps': 49735, 'batch_loss/train': 0.6904947212897241} +12/29/2021 12:14:50 - INFO - codeparrot_training - Step 49736: {'lr': 3.463482274443375e-08, 'samples': 25465344, 'steps': 49736, 'batch_loss/train': 0.6271153572015464} +12/29/2021 12:15:04 - INFO - codeparrot_training - Step 49737: {'lr': 3.4372940666382856e-08, 'samples': 25465856, 'steps': 49737, 'batch_loss/train': 0.833325375802815} +12/29/2021 12:15:15 - INFO - codeparrot_training - Step 49738: {'lr': 3.4112052357287985e-08, 'samples': 25466368, 'steps': 49738, 'batch_loss/train': 0.736074005253613} +12/29/2021 12:15:25 - INFO - codeparrot_training - Step 49739: {'lr': 3.3852157818176075e-08, 'samples': 25466880, 'steps': 49739, 'batch_loss/train': 0.7487575430423021} +12/29/2021 12:15:36 - INFO - codeparrot_training - Step 49740: {'lr': 3.35932570500741e-08, 'samples': 25467392, 'steps': 49740, 'batch_loss/train': 0.6517553944140673} +12/29/2021 12:15:48 - INFO - codeparrot_training - Step 49741: {'lr': 3.3335350054009004e-08, 'samples': 25467904, 'steps': 49741, 'batch_loss/train': 0.8283215723931789} +12/29/2021 12:15:59 - INFO - codeparrot_training - Step 49742: {'lr': 3.3078436831035506e-08, 'samples': 25468416, 'steps': 49742, 'batch_loss/train': 0.7795705180615187} +12/29/2021 12:16:09 - INFO - codeparrot_training - Step 49743: {'lr': 3.282251738215281e-08, 'samples': 25468928, 'steps': 49743, 'batch_loss/train': 0.6850054804235697} +12/29/2021 12:16:21 - INFO - codeparrot_training - Step 49744: {'lr': 3.256759170838786e-08, 'samples': 25469440, 'steps': 49744, 'batch_loss/train': 0.6723171072080731} +12/29/2021 12:16:32 - INFO - codeparrot_training - Step 49745: {'lr': 3.2313659810767617e-08, 'samples': 25469952, 'steps': 49745, 'batch_loss/train': 0.7322197295725346} +12/29/2021 12:16:42 - INFO - codeparrot_training - Step 49746: {'lr': 3.2060721690263526e-08, 'samples': 25470464, 'steps': 49746, 'batch_loss/train': 0.6839122204110026} +12/29/2021 12:16:56 - INFO - codeparrot_training - Step 49747: {'lr': 3.180877734790255e-08, 'samples': 25470976, 'steps': 49747, 'batch_loss/train': 0.718670372501947} +12/29/2021 12:17:07 - INFO - codeparrot_training - Step 49748: {'lr': 3.1557826784683886e-08, 'samples': 25471488, 'steps': 49748, 'batch_loss/train': 0.6707949216943234} +12/29/2021 12:17:18 - INFO - codeparrot_training - Step 49749: {'lr': 3.130787000160673e-08, 'samples': 25472000, 'steps': 49749, 'batch_loss/train': 0.6456519733183086} +12/29/2021 12:17:30 - INFO - codeparrot_training - Step 49750: {'lr': 3.1058906999670286e-08, 'samples': 25472512, 'steps': 49750, 'batch_loss/train': 0.7694197921082377} +12/29/2021 12:17:41 - INFO - codeparrot_training - Step 49751: {'lr': 3.081093777987376e-08, 'samples': 25473024, 'steps': 49751, 'batch_loss/train': 0.5715851485729218} +12/29/2021 12:17:51 - INFO - codeparrot_training - Step 49752: {'lr': 3.0563962343160836e-08, 'samples': 25473536, 'steps': 49752, 'batch_loss/train': 0.7848498777020723} +12/29/2021 12:18:02 - INFO - codeparrot_training - Step 49753: {'lr': 3.031798069055847e-08, 'samples': 25474048, 'steps': 49753, 'batch_loss/train': 0.7951310742646456} +12/29/2021 12:18:14 - INFO - codeparrot_training - Step 49754: {'lr': 3.007299282301035e-08, 'samples': 25474560, 'steps': 49754, 'batch_loss/train': 0.7028756479267031} +12/29/2021 12:18:25 - INFO - codeparrot_training - Step 49755: {'lr': 2.982899874151568e-08, 'samples': 25475072, 'steps': 49755, 'batch_loss/train': 0.7638527452945709} +12/29/2021 12:18:35 - INFO - codeparrot_training - Step 49756: {'lr': 2.9585998447045904e-08, 'samples': 25475584, 'steps': 49756, 'batch_loss/train': 0.7317563388496637} +12/29/2021 12:18:47 - INFO - codeparrot_training - Step 49757: {'lr': 2.934399194054471e-08, 'samples': 25476096, 'steps': 49757, 'batch_loss/train': 0.6885087681002915} +12/29/2021 12:18:58 - INFO - codeparrot_training - Step 49758: {'lr': 2.9102979223011307e-08, 'samples': 25476608, 'steps': 49758, 'batch_loss/train': 0.6888571493327618} +12/29/2021 12:19:09 - INFO - codeparrot_training - Step 49759: {'lr': 2.886296029536162e-08, 'samples': 25477120, 'steps': 49759, 'batch_loss/train': 0.7585279656341299} +12/29/2021 12:19:23 - INFO - codeparrot_training - Step 49760: {'lr': 2.8623935158539337e-08, 'samples': 25477632, 'steps': 49760, 'batch_loss/train': 0.6793496571481228} +12/29/2021 12:19:33 - INFO - codeparrot_training - Step 49761: {'lr': 2.8385903813571423e-08, 'samples': 25478144, 'steps': 49761, 'batch_loss/train': 0.7567462716251612} +12/29/2021 12:19:44 - INFO - codeparrot_training - Step 49762: {'lr': 2.8148866261318296e-08, 'samples': 25478656, 'steps': 49762, 'batch_loss/train': 0.6802740534767509} +12/29/2021 12:19:55 - INFO - codeparrot_training - Step 49763: {'lr': 2.7912822502779156e-08, 'samples': 25479168, 'steps': 49763, 'batch_loss/train': 0.7262102928943932} +12/29/2021 12:20:07 - INFO - codeparrot_training - Step 49764: {'lr': 2.7677772538869938e-08, 'samples': 25479680, 'steps': 49764, 'batch_loss/train': 0.7425803104415536} +12/29/2021 12:20:17 - INFO - codeparrot_training - Step 49765: {'lr': 2.7443716370534334e-08, 'samples': 25480192, 'steps': 49765, 'batch_loss/train': 0.775152828427963} +12/29/2021 12:20:28 - INFO - codeparrot_training - Step 49766: {'lr': 2.7210653998660517e-08, 'samples': 25480704, 'steps': 49766, 'batch_loss/train': 0.7143416912294924} +12/29/2021 12:20:43 - INFO - codeparrot_training - Step 49767: {'lr': 2.697858542424769e-08, 'samples': 25481216, 'steps': 49767, 'batch_loss/train': 0.7238801941275597} +12/29/2021 12:20:54 - INFO - codeparrot_training - Step 49768: {'lr': 2.6747510648184037e-08, 'samples': 25481728, 'steps': 49768, 'batch_loss/train': 0.8666967460885644} +12/29/2021 12:21:04 - INFO - codeparrot_training - Step 49769: {'lr': 2.6517429671357728e-08, 'samples': 25482240, 'steps': 49769, 'batch_loss/train': 1.2756245932541788} +12/29/2021 12:21:15 - INFO - codeparrot_training - Step 49770: {'lr': 2.6288342494740215e-08, 'samples': 25482752, 'steps': 49770, 'batch_loss/train': 0.5218506024684757} +12/29/2021 12:21:27 - INFO - codeparrot_training - Step 49771: {'lr': 2.6060249119191915e-08, 'samples': 25483264, 'steps': 49771, 'batch_loss/train': 0.7792106028646231} +12/29/2021 12:21:38 - INFO - codeparrot_training - Step 49772: {'lr': 2.583314954565652e-08, 'samples': 25483776, 'steps': 49772, 'batch_loss/train': 0.6263595604104921} +12/29/2021 12:21:48 - INFO - codeparrot_training - Step 49773: {'lr': 2.560704377502221e-08, 'samples': 25484288, 'steps': 49773, 'batch_loss/train': 0.6629399599332828} +12/29/2021 12:22:00 - INFO - codeparrot_training - Step 49774: {'lr': 2.538193180817716e-08, 'samples': 25484800, 'steps': 49774, 'batch_loss/train': 0.6576391342096031} +12/29/2021 12:22:11 - INFO - codeparrot_training - Step 49775: {'lr': 2.515781364603731e-08, 'samples': 25485312, 'steps': 49775, 'batch_loss/train': 1.001668369397521} +12/29/2021 12:22:22 - INFO - codeparrot_training - Step 49776: {'lr': 2.4934689289463075e-08, 'samples': 25485824, 'steps': 49776, 'batch_loss/train': 0.7322180375922471} +12/29/2021 12:22:36 - INFO - codeparrot_training - Step 49777: {'lr': 2.471255873939815e-08, 'samples': 25486336, 'steps': 49777, 'batch_loss/train': 0.7744163451716304} +12/29/2021 12:22:47 - INFO - codeparrot_training - Step 49778: {'lr': 2.4491421996647446e-08, 'samples': 25486848, 'steps': 49778, 'batch_loss/train': 0.6941306000808254} +12/29/2021 12:22:57 - INFO - codeparrot_training - Step 49779: {'lr': 2.427127906215465e-08, 'samples': 25487360, 'steps': 49779, 'batch_loss/train': 1.2193021569401026} +12/29/2021 12:23:08 - INFO - codeparrot_training - Step 49780: {'lr': 2.4052129936780188e-08, 'samples': 25487872, 'steps': 49780, 'batch_loss/train': 0.8616187418811023} +12/29/2021 12:23:20 - INFO - codeparrot_training - Step 49781: {'lr': 2.3833974621356724e-08, 'samples': 25488384, 'steps': 49781, 'batch_loss/train': 0.7220078082755208} +12/29/2021 12:23:31 - INFO - codeparrot_training - Step 49782: {'lr': 2.3616813116800194e-08, 'samples': 25488896, 'steps': 49782, 'batch_loss/train': 0.7451942176558077} +12/29/2021 12:23:41 - INFO - codeparrot_training - Step 49783: {'lr': 2.3400645423971024e-08, 'samples': 25489408, 'steps': 49783, 'batch_loss/train': 0.5317088805604726} +12/29/2021 12:23:55 - INFO - codeparrot_training - Step 49784: {'lr': 2.3185471543701876e-08, 'samples': 25489920, 'steps': 49784, 'batch_loss/train': 0.7454059803858399} +12/29/2021 12:24:06 - INFO - codeparrot_training - Step 49785: {'lr': 2.2971291476853173e-08, 'samples': 25490432, 'steps': 49785, 'batch_loss/train': 0.6687683411873877} +12/29/2021 12:24:17 - INFO - codeparrot_training - Step 49786: {'lr': 2.2758105224285343e-08, 'samples': 25490944, 'steps': 49786, 'batch_loss/train': 0.8331255288794637} +12/29/2021 12:24:29 - INFO - codeparrot_training - Step 49787: {'lr': 2.25459127868588e-08, 'samples': 25491456, 'steps': 49787, 'batch_loss/train': 0.7069140719249845} +12/29/2021 12:24:39 - INFO - codeparrot_training - Step 49788: {'lr': 2.2334714165378467e-08, 'samples': 25491968, 'steps': 49788, 'batch_loss/train': 0.7899830408859998} +12/29/2021 12:24:50 - INFO - codeparrot_training - Step 49789: {'lr': 2.212450936070476e-08, 'samples': 25492480, 'steps': 49789, 'batch_loss/train': 0.7206742335110903} +12/29/2021 12:25:00 - INFO - codeparrot_training - Step 49790: {'lr': 2.19152983736981e-08, 'samples': 25492992, 'steps': 49790, 'batch_loss/train': 0.6773557984270155} +12/29/2021 12:25:13 - INFO - codeparrot_training - Step 49791: {'lr': 2.1707081205163403e-08, 'samples': 25493504, 'steps': 49791, 'batch_loss/train': 0.7208636375144124} +12/29/2021 12:25:23 - INFO - codeparrot_training - Step 49792: {'lr': 2.1499857855933336e-08, 'samples': 25494016, 'steps': 49792, 'batch_loss/train': 0.7639092265162617} +12/29/2021 12:25:34 - INFO - codeparrot_training - Step 49793: {'lr': 2.129362832681281e-08, 'samples': 25494528, 'steps': 49793, 'batch_loss/train': 0.7507009556284174} +12/29/2021 12:25:46 - INFO - codeparrot_training - Step 49794: {'lr': 2.1088392618662243e-08, 'samples': 25495040, 'steps': 49794, 'batch_loss/train': 0.739140747115016} +12/29/2021 12:25:57 - INFO - codeparrot_training - Step 49795: {'lr': 2.08841507322588e-08, 'samples': 25495552, 'steps': 49795, 'batch_loss/train': 0.6845108885318041} +12/29/2021 12:26:07 - INFO - codeparrot_training - Step 49796: {'lr': 2.068090266843514e-08, 'samples': 25496064, 'steps': 49796, 'batch_loss/train': 0.6831740622874349} +12/29/2021 12:26:21 - INFO - codeparrot_training - Step 49797: {'lr': 2.0478648428023938e-08, 'samples': 25496576, 'steps': 49797, 'batch_loss/train': 0.7651861147023737} +12/29/2021 12:26:32 - INFO - codeparrot_training - Step 49798: {'lr': 2.027738801177459e-08, 'samples': 25497088, 'steps': 49798, 'batch_loss/train': 0.7260711374692619} +12/29/2021 12:26:42 - INFO - codeparrot_training - Step 49799: {'lr': 2.007712142051976e-08, 'samples': 25497600, 'steps': 49799, 'batch_loss/train': 0.7384539092890918} +12/29/2021 12:26:55 - INFO - codeparrot_training - Step 49800: {'lr': 1.9877848655036613e-08, 'samples': 25498112, 'steps': 49800, 'batch_loss/train': 0.6336557921022177} +12/29/2021 12:27:05 - INFO - codeparrot_training - Step 49801: {'lr': 1.9679569716130053e-08, 'samples': 25498624, 'steps': 49801, 'batch_loss/train': 0.6944420458748937} +12/29/2021 12:27:16 - INFO - codeparrot_training - Step 49802: {'lr': 1.9482284604604994e-08, 'samples': 25499136, 'steps': 49802, 'batch_loss/train': 0.7718196995556355} +12/29/2021 12:27:27 - INFO - codeparrot_training - Step 49803: {'lr': 1.9285993321238594e-08, 'samples': 25499648, 'steps': 49803, 'batch_loss/train': 0.664853454567492} +12/29/2021 12:27:39 - INFO - codeparrot_training - Step 49804: {'lr': 1.909069586678025e-08, 'samples': 25500160, 'steps': 49804, 'batch_loss/train': 0.5688439593650401} +12/29/2021 12:27:50 - INFO - codeparrot_training - Step 49805: {'lr': 1.8896392242034875e-08, 'samples': 25500672, 'steps': 49805, 'batch_loss/train': 0.702287164516747} +12/29/2021 12:28:00 - INFO - codeparrot_training - Step 49806: {'lr': 1.8703082447751875e-08, 'samples': 25501184, 'steps': 49806, 'batch_loss/train': 0.8333139475435019} +12/29/2021 12:28:14 - INFO - codeparrot_training - Step 49807: {'lr': 1.851076648473615e-08, 'samples': 25501696, 'steps': 49807, 'batch_loss/train': 0.7025415340904146} +12/29/2021 12:28:25 - INFO - codeparrot_training - Step 49808: {'lr': 1.8319444353737115e-08, 'samples': 25502208, 'steps': 49808, 'batch_loss/train': 0.6437043760088272} +12/29/2021 12:28:35 - INFO - codeparrot_training - Step 49809: {'lr': 1.8129116055504157e-08, 'samples': 25502720, 'steps': 49809, 'batch_loss/train': 0.720721585676074} +12/29/2021 12:28:46 - INFO - codeparrot_training - Step 49810: {'lr': 1.7939781590786687e-08, 'samples': 25503232, 'steps': 49810, 'batch_loss/train': 0.6914129708893597} +12/29/2021 12:28:58 - INFO - codeparrot_training - Step 49811: {'lr': 1.7751440960361852e-08, 'samples': 25503744, 'steps': 49811, 'batch_loss/train': 0.7588369161821902} +12/29/2021 12:29:09 - INFO - codeparrot_training - Step 49812: {'lr': 1.756409416497906e-08, 'samples': 25504256, 'steps': 49812, 'batch_loss/train': 0.6611596918664873} +12/29/2021 12:29:20 - INFO - codeparrot_training - Step 49813: {'lr': 1.7377741205359954e-08, 'samples': 25504768, 'steps': 49813, 'batch_loss/train': 0.6679522152990103} +12/29/2021 12:29:34 - INFO - codeparrot_training - Step 49814: {'lr': 1.7192382082253932e-08, 'samples': 25505280, 'steps': 49814, 'batch_loss/train': 0.7427371232770383} +12/29/2021 12:29:44 - INFO - codeparrot_training - Step 49815: {'lr': 1.7008016796410398e-08, 'samples': 25505792, 'steps': 49815, 'batch_loss/train': 0.6895394108723849} +12/29/2021 12:29:55 - INFO - codeparrot_training - Step 49816: {'lr': 1.6824645348550993e-08, 'samples': 25506304, 'steps': 49816, 'batch_loss/train': 0.7652262065093964} +12/29/2021 12:30:07 - INFO - codeparrot_training - Step 49817: {'lr': 1.6642267739397364e-08, 'samples': 25506816, 'steps': 49817, 'batch_loss/train': 0.804439929779619} +12/29/2021 12:30:18 - INFO - codeparrot_training - Step 49818: {'lr': 1.6460883969698915e-08, 'samples': 25507328, 'steps': 49818, 'batch_loss/train': 0.725324543658644} +12/29/2021 12:30:28 - INFO - codeparrot_training - Step 49819: {'lr': 1.628049404014953e-08, 'samples': 25507840, 'steps': 49819, 'batch_loss/train': 0.6955778123810887} +12/29/2021 12:30:40 - INFO - codeparrot_training - Step 49820: {'lr': 1.610109795149861e-08, 'samples': 25508352, 'steps': 49820, 'batch_loss/train': 0.7272875341586769} +12/29/2021 12:30:51 - INFO - codeparrot_training - Step 49821: {'lr': 1.5922695704412294e-08, 'samples': 25508864, 'steps': 49821, 'batch_loss/train': 0.5618532483931631} +12/29/2021 12:31:02 - INFO - codeparrot_training - Step 49822: {'lr': 1.5745287299639975e-08, 'samples': 25509376, 'steps': 49822, 'batch_loss/train': 0.5989105913322419} +12/29/2021 12:31:12 - INFO - codeparrot_training - Step 49823: {'lr': 1.556887273787555e-08, 'samples': 25509888, 'steps': 49823, 'batch_loss/train': 0.6162973502650857} +12/29/2021 12:31:24 - INFO - codeparrot_training - Step 49824: {'lr': 1.5393452019812904e-08, 'samples': 25510400, 'steps': 49824, 'batch_loss/train': 0.811881048779469} +12/29/2021 12:31:35 - INFO - codeparrot_training - Step 49825: {'lr': 1.5219025146145927e-08, 'samples': 25510912, 'steps': 49825, 'batch_loss/train': 0.6841723259421997} +12/29/2021 12:31:46 - INFO - codeparrot_training - Step 49826: {'lr': 1.5045592117596264e-08, 'samples': 25511424, 'steps': 49826, 'batch_loss/train': 0.6634762440808117} +12/29/2021 12:32:00 - INFO - codeparrot_training - Step 49827: {'lr': 1.4873152934830047e-08, 'samples': 25511936, 'steps': 49827, 'batch_loss/train': 0.7171554123051465} +12/29/2021 12:32:10 - INFO - codeparrot_training - Step 49828: {'lr': 1.470170759854117e-08, 'samples': 25512448, 'steps': 49828, 'batch_loss/train': 0.7569931368343532} +12/29/2021 12:32:21 - INFO - codeparrot_training - Step 49829: {'lr': 1.4531256109395763e-08, 'samples': 25512960, 'steps': 49829, 'batch_loss/train': 0.8454291627276689} +12/29/2021 12:32:33 - INFO - codeparrot_training - Step 49830: {'lr': 1.4361798468087716e-08, 'samples': 25513472, 'steps': 49830, 'batch_loss/train': 0.6952474664431065} +12/29/2021 12:32:44 - INFO - codeparrot_training - Step 49831: {'lr': 1.4193334675283164e-08, 'samples': 25513984, 'steps': 49831, 'batch_loss/train': 0.7410897798836231} +12/29/2021 12:32:54 - INFO - codeparrot_training - Step 49832: {'lr': 1.4025864731675997e-08, 'samples': 25514496, 'steps': 49832, 'batch_loss/train': 0.7304212525486946} +12/29/2021 12:33:05 - INFO - codeparrot_training - Step 49833: {'lr': 1.3859388637876835e-08, 'samples': 25515008, 'steps': 49833, 'batch_loss/train': 0.729501839261502} +12/29/2021 12:33:17 - INFO - codeparrot_training - Step 49834: {'lr': 1.3693906394607325e-08, 'samples': 25515520, 'steps': 49834, 'batch_loss/train': 0.6994355034548789} +12/29/2021 12:33:28 - INFO - codeparrot_training - Step 49835: {'lr': 1.3529418002505845e-08, 'samples': 25516032, 'steps': 49835, 'batch_loss/train': 0.78305643517524} +12/29/2021 12:33:38 - INFO - codeparrot_training - Step 49836: {'lr': 1.3365923462183015e-08, 'samples': 25516544, 'steps': 49836, 'batch_loss/train': 0.7357468730770051} +12/29/2021 12:33:52 - INFO - codeparrot_training - Step 49837: {'lr': 1.3203422774360485e-08, 'samples': 25517056, 'steps': 49837, 'batch_loss/train': 0.6766395317390561} +12/29/2021 12:34:03 - INFO - codeparrot_training - Step 49838: {'lr': 1.3041915939648874e-08, 'samples': 25517568, 'steps': 49838, 'batch_loss/train': 0.6528021135600284} +12/29/2021 12:34:13 - INFO - codeparrot_training - Step 49839: {'lr': 1.2881402958686561e-08, 'samples': 25518080, 'steps': 49839, 'batch_loss/train': 0.6304772743023932} +12/29/2021 12:34:26 - INFO - codeparrot_training - Step 49840: {'lr': 1.272188383208417e-08, 'samples': 25518592, 'steps': 49840, 'batch_loss/train': 0.6651781462132931} +12/29/2021 12:34:36 - INFO - codeparrot_training - Step 49841: {'lr': 1.2563358560535587e-08, 'samples': 25519104, 'steps': 49841, 'batch_loss/train': 0.765004638582468} +12/29/2021 12:34:47 - INFO - codeparrot_training - Step 49842: {'lr': 1.2405827144651438e-08, 'samples': 25519616, 'steps': 49842, 'batch_loss/train': 0.7789105498231947} +12/29/2021 12:34:57 - INFO - codeparrot_training - Step 49843: {'lr': 1.2249289585042345e-08, 'samples': 25520128, 'steps': 49843, 'batch_loss/train': 0.7060432913713157} +12/29/2021 12:35:12 - INFO - codeparrot_training - Step 49844: {'lr': 1.2093745882346684e-08, 'samples': 25520640, 'steps': 49844, 'batch_loss/train': 0.6563885288778692} +12/29/2021 12:35:22 - INFO - codeparrot_training - Step 49845: {'lr': 1.1939196037147326e-08, 'samples': 25521152, 'steps': 49845, 'batch_loss/train': 1.6233989698812366} +12/29/2021 12:35:33 - INFO - codeparrot_training - Step 49846: {'lr': 1.1785640050110403e-08, 'samples': 25521664, 'steps': 49846, 'batch_loss/train': 0.7303674668073654} +12/29/2021 12:35:45 - INFO - codeparrot_training - Step 49847: {'lr': 1.163307792181878e-08, 'samples': 25522176, 'steps': 49847, 'batch_loss/train': 0.8477260316722095} +12/29/2021 12:35:56 - INFO - codeparrot_training - Step 49848: {'lr': 1.1481509652883082e-08, 'samples': 25522688, 'steps': 49848, 'batch_loss/train': 0.7645454509183764} +12/29/2021 12:36:06 - INFO - codeparrot_training - Step 49849: {'lr': 1.1330935243886176e-08, 'samples': 25523200, 'steps': 49849, 'batch_loss/train': 0.72147445846349} +12/29/2021 12:36:18 - INFO - codeparrot_training - Step 49850: {'lr': 1.118135469546644e-08, 'samples': 25523712, 'steps': 49850, 'batch_loss/train': 0.7230130634270608} +12/29/2021 12:36:29 - INFO - codeparrot_training - Step 49851: {'lr': 1.1032768008178984e-08, 'samples': 25524224, 'steps': 49851, 'batch_loss/train': 0.6383392107672989} +12/29/2021 12:36:40 - INFO - codeparrot_training - Step 49852: {'lr': 1.0885175182662188e-08, 'samples': 25524736, 'steps': 49852, 'batch_loss/train': 0.7492544691776857} +12/29/2021 12:36:50 - INFO - codeparrot_training - Step 49853: {'lr': 1.0738576219443408e-08, 'samples': 25525248, 'steps': 49853, 'batch_loss/train': 0.6937068431871012} +12/29/2021 12:37:04 - INFO - codeparrot_training - Step 49854: {'lr': 1.0592971119161022e-08, 'samples': 25525760, 'steps': 49854, 'batch_loss/train': 0.7298724427819252} +12/29/2021 12:37:15 - INFO - codeparrot_training - Step 49855: {'lr': 1.0448359882342385e-08, 'samples': 25526272, 'steps': 49855, 'batch_loss/train': 0.7417085864581168} +12/29/2021 12:37:25 - INFO - codeparrot_training - Step 49856: {'lr': 1.030474250959812e-08, 'samples': 25526784, 'steps': 49856, 'batch_loss/train': 0.7198866007383913} +12/29/2021 12:37:37 - INFO - codeparrot_training - Step 49857: {'lr': 1.0162119001511094e-08, 'samples': 25527296, 'steps': 49857, 'batch_loss/train': 0.8586070858873427} +12/29/2021 12:37:48 - INFO - codeparrot_training - Step 49858: {'lr': 1.0020489358608665e-08, 'samples': 25527808, 'steps': 49858, 'batch_loss/train': 0.6739072455093265} +12/29/2021 12:37:59 - INFO - codeparrot_training - Step 49859: {'lr': 9.879853581473696e-09, 'samples': 25528320, 'steps': 49859, 'batch_loss/train': 0.6380928571743425} +12/29/2021 12:38:11 - INFO - codeparrot_training - Step 49860: {'lr': 9.740211670661303e-09, 'samples': 25528832, 'steps': 49860, 'batch_loss/train': 0.6396625279448926} +12/29/2021 12:38:21 - INFO - codeparrot_training - Step 49861: {'lr': 9.601563626726595e-09, 'samples': 25529344, 'steps': 49861, 'batch_loss/train': 0.6533124377019703} +12/29/2021 12:38:32 - INFO - codeparrot_training - Step 49862: {'lr': 9.463909450224683e-09, 'samples': 25529856, 'steps': 49862, 'batch_loss/train': 0.7405005963519216} +12/29/2021 12:38:46 - INFO - codeparrot_training - Step 49863: {'lr': 9.327249141710681e-09, 'samples': 25530368, 'steps': 49863, 'batch_loss/train': 0.6517513596918434} +12/29/2021 12:38:57 - INFO - codeparrot_training - Step 49864: {'lr': 9.191582701711943e-09, 'samples': 25530880, 'steps': 49864, 'batch_loss/train': 0.665884930989705} +12/29/2021 12:39:08 - INFO - codeparrot_training - Step 49865: {'lr': 9.05691013078358e-09, 'samples': 25531392, 'steps': 49865, 'batch_loss/train': 0.6557984943501651} +12/29/2021 12:39:18 - INFO - codeparrot_training - Step 49866: {'lr': 8.92323142945295e-09, 'samples': 25531904, 'steps': 49866, 'batch_loss/train': 0.5791574292816222} +12/29/2021 12:39:31 - INFO - codeparrot_training - Step 49867: {'lr': 8.790546598219651e-09, 'samples': 25532416, 'steps': 49867, 'batch_loss/train': 0.5707398122176528} +12/29/2021 12:39:41 - INFO - codeparrot_training - Step 49868: {'lr': 8.658855637666551e-09, 'samples': 25532928, 'steps': 49868, 'batch_loss/train': 0.7500795789528638} +12/29/2021 12:39:52 - INFO - codeparrot_training - Step 49869: {'lr': 8.528158548293252e-09, 'samples': 25533440, 'steps': 49869, 'batch_loss/train': 0.7108259308151901} +12/29/2021 12:40:04 - INFO - codeparrot_training - Step 49870: {'lr': 8.398455330627108e-09, 'samples': 25533952, 'steps': 49870, 'batch_loss/train': 0.678325857501477} +12/29/2021 12:40:14 - INFO - codeparrot_training - Step 49871: {'lr': 8.269745985167721e-09, 'samples': 25534464, 'steps': 49871, 'batch_loss/train': 0.7411825607996434} +12/29/2021 12:40:25 - INFO - codeparrot_training - Step 49872: {'lr': 8.142030512414688e-09, 'samples': 25534976, 'steps': 49872, 'batch_loss/train': 0.7576148207299411} +12/29/2021 12:40:39 - INFO - codeparrot_training - Step 49873: {'lr': 8.015308912923126e-09, 'samples': 25535488, 'steps': 49873, 'batch_loss/train': 0.6602278682403266} +12/29/2021 12:40:50 - INFO - codeparrot_training - Step 49874: {'lr': 7.889581187164874e-09, 'samples': 25536000, 'steps': 49874, 'batch_loss/train': 0.7005574550712481} +12/29/2021 12:41:00 - INFO - codeparrot_training - Step 49875: {'lr': 7.764847335639536e-09, 'samples': 25536512, 'steps': 49875, 'batch_loss/train': 0.6270767957903445} +12/29/2021 12:41:11 - INFO - codeparrot_training - Step 49876: {'lr': 7.64110735884671e-09, 'samples': 25537024, 'steps': 49876, 'batch_loss/train': 0.7794397185789421} +12/29/2021 12:41:23 - INFO - codeparrot_training - Step 49877: {'lr': 7.518361257258244e-09, 'samples': 25537536, 'steps': 49877, 'batch_loss/train': 0.7171402382664382} +12/29/2021 12:41:34 - INFO - codeparrot_training - Step 49878: {'lr': 7.396609031401491e-09, 'samples': 25538048, 'steps': 49878, 'batch_loss/train': 0.8075303738005459} +12/29/2021 12:41:44 - INFO - codeparrot_training - Step 49879: {'lr': 7.2758506817482975e-09, 'samples': 25538560, 'steps': 49879, 'batch_loss/train': 0.6314128148369491} +12/29/2021 12:41:56 - INFO - codeparrot_training - Step 49880: {'lr': 7.1560862087705066e-09, 'samples': 25539072, 'steps': 49880, 'batch_loss/train': 0.8618857730180025} +12/29/2021 12:42:07 - INFO - codeparrot_training - Step 49881: {'lr': 7.037315612939965e-09, 'samples': 25539584, 'steps': 49881, 'batch_loss/train': 0.7948663551360369} +12/29/2021 12:42:18 - INFO - codeparrot_training - Step 49882: {'lr': 6.9195388947562725e-09, 'samples': 25540096, 'steps': 49882, 'batch_loss/train': 0.5702019366435707} +12/29/2021 12:42:32 - INFO - codeparrot_training - Step 49883: {'lr': 6.8027560546357615e-09, 'samples': 25540608, 'steps': 49883, 'batch_loss/train': 0.7106835730373859} +12/29/2021 12:42:42 - INFO - codeparrot_training - Step 49884: {'lr': 6.686967093105789e-09, 'samples': 25541120, 'steps': 49884, 'batch_loss/train': 0.7161623956053518} +12/29/2021 12:42:53 - INFO - codeparrot_training - Step 49885: {'lr': 6.5721720105826885e-09, 'samples': 25541632, 'steps': 49885, 'batch_loss/train': 0.8213379960507154} +12/29/2021 12:43:03 - INFO - codeparrot_training - Step 49886: {'lr': 6.45837080751055e-09, 'samples': 25542144, 'steps': 49886, 'batch_loss/train': 0.7270708410069346} +12/29/2021 12:43:16 - INFO - codeparrot_training - Step 49887: {'lr': 6.345563484388972e-09, 'samples': 25542656, 'steps': 49887, 'batch_loss/train': 0.5877524325624108} +12/29/2021 12:43:27 - INFO - codeparrot_training - Step 49888: {'lr': 6.233750041634289e-09, 'samples': 25543168, 'steps': 49888, 'batch_loss/train': 0.5724373486591503} +12/29/2021 12:43:37 - INFO - codeparrot_training - Step 49889: {'lr': 6.122930479718347e-09, 'samples': 25543680, 'steps': 49889, 'batch_loss/train': 0.6685664977412671} +12/29/2021 12:43:49 - INFO - codeparrot_training - Step 49890: {'lr': 6.013104799057478e-09, 'samples': 25544192, 'steps': 49890, 'batch_loss/train': 0.6874986565671861} +12/29/2021 12:44:00 - INFO - codeparrot_training - Step 49891: {'lr': 5.904273000095772e-09, 'samples': 25544704, 'steps': 49891, 'batch_loss/train': 0.7923079524189234} +12/29/2021 12:44:11 - INFO - codeparrot_training - Step 49892: {'lr': 5.796435083249563e-09, 'samples': 25545216, 'steps': 49892, 'batch_loss/train': 0.726694033946842} +12/29/2021 12:44:24 - INFO - codeparrot_training - Step 49893: {'lr': 5.689591048962939e-09, 'samples': 25545728, 'steps': 49893, 'batch_loss/train': 0.7698747836984694} +12/29/2021 12:44:35 - INFO - codeparrot_training - Step 49894: {'lr': 5.58374089767999e-09, 'samples': 25546240, 'steps': 49894, 'batch_loss/train': 0.6994294598698616} +12/29/2021 12:44:46 - INFO - codeparrot_training - Step 49895: {'lr': 5.478884629789294e-09, 'samples': 25546752, 'steps': 49895, 'batch_loss/train': 0.6896230096463114} +12/29/2021 12:44:56 - INFO - codeparrot_training - Step 49896: {'lr': 5.375022245707184e-09, 'samples': 25547264, 'steps': 49896, 'batch_loss/train': 0.7120068224612623} +12/29/2021 12:45:09 - INFO - codeparrot_training - Step 49897: {'lr': 5.2721537458777506e-09, 'samples': 25547776, 'steps': 49897, 'batch_loss/train': 0.6731583990622312} +12/29/2021 12:45:19 - INFO - codeparrot_training - Step 49898: {'lr': 5.17027913068957e-09, 'samples': 25548288, 'steps': 49898, 'batch_loss/train': 0.6942173941060901} +12/29/2021 12:45:30 - INFO - codeparrot_training - Step 49899: {'lr': 5.069398400531222e-09, 'samples': 25548800, 'steps': 49899, 'batch_loss/train': 0.7435921290889382} +12/29/2021 12:45:42 - INFO - codeparrot_training - Step 49900: {'lr': 4.969511555846795e-09, 'samples': 25549312, 'steps': 49900, 'batch_loss/train': 0.696451535623055} +12/29/2021 12:45:53 - INFO - codeparrot_training - Step 49901: {'lr': 4.870618596997112e-09, 'samples': 25549824, 'steps': 49901, 'batch_loss/train': 0.9400501023046672} +12/29/2021 12:46:03 - INFO - codeparrot_training - Step 49902: {'lr': 4.772719524398505e-09, 'samples': 25550336, 'steps': 49902, 'batch_loss/train': 0.6924040998565033} +12/29/2021 12:46:16 - INFO - codeparrot_training - Step 49903: {'lr': 4.675814338411799e-09, 'samples': 25550848, 'steps': 49903, 'batch_loss/train': 0.676469320897013} +12/29/2021 12:46:26 - INFO - codeparrot_training - Step 49904: {'lr': 4.579903039453326e-09, 'samples': 25551360, 'steps': 49904, 'batch_loss/train': 1.4724192488065455} +12/29/2021 12:46:37 - INFO - codeparrot_training - Step 49905: {'lr': 4.484985627883908e-09, 'samples': 25551872, 'steps': 49905, 'batch_loss/train': 0.6538780559785664} +12/29/2021 12:46:48 - INFO - codeparrot_training - Step 49906: {'lr': 4.391062104092125e-09, 'samples': 25552384, 'steps': 49906, 'batch_loss/train': 0.7409894210286438} +12/29/2021 12:47:02 - INFO - codeparrot_training - Step 49907: {'lr': 4.298132468438798e-09, 'samples': 25552896, 'steps': 49907, 'batch_loss/train': 0.7304472564719617} +12/29/2021 12:47:12 - INFO - codeparrot_training - Step 49908: {'lr': 4.206196721312505e-09, 'samples': 25553408, 'steps': 49908, 'batch_loss/train': 0.6630393047817051} +12/29/2021 12:47:23 - INFO - codeparrot_training - Step 49909: {'lr': 4.1152548630740695e-09, 'samples': 25553920, 'steps': 49909, 'batch_loss/train': 0.7955454606562853} +12/29/2021 12:47:35 - INFO - codeparrot_training - Step 49910: {'lr': 4.0253068940565574e-09, 'samples': 25554432, 'steps': 49910, 'batch_loss/train': 0.7215407658368349} +12/29/2021 12:47:46 - INFO - codeparrot_training - Step 49911: {'lr': 3.936352814648547e-09, 'samples': 25554944, 'steps': 49911, 'batch_loss/train': 0.8101383065804839} +12/29/2021 12:47:56 - INFO - codeparrot_training - Step 49912: {'lr': 3.848392625210861e-09, 'samples': 25555456, 'steps': 49912, 'batch_loss/train': 0.5941392190870829} +12/29/2021 12:48:11 - INFO - codeparrot_training - Step 49913: {'lr': 3.761426326048812e-09, 'samples': 25555968, 'steps': 49913, 'batch_loss/train': 0.7438274944433942} +12/29/2021 12:48:22 - INFO - codeparrot_training - Step 49914: {'lr': 3.675453917550975e-09, 'samples': 25556480, 'steps': 49914, 'batch_loss/train': 0.655243890825659} +12/29/2021 12:48:32 - INFO - codeparrot_training - Step 49915: {'lr': 3.590475400050419e-09, 'samples': 25556992, 'steps': 49915, 'batch_loss/train': 0.584951470606029} +12/29/2021 12:48:43 - INFO - codeparrot_training - Step 49916: {'lr': 3.506490773880211e-09, 'samples': 25557504, 'steps': 49916, 'batch_loss/train': 0.7396182650700212} +12/29/2021 12:48:55 - INFO - codeparrot_training - Step 49917: {'lr': 3.423500039345662e-09, 'samples': 25558016, 'steps': 49917, 'batch_loss/train': 0.608006093185395} +12/29/2021 12:49:06 - INFO - codeparrot_training - Step 49918: {'lr': 3.341503196835349e-09, 'samples': 25558528, 'steps': 49918, 'batch_loss/train': 0.6754583325237036} +12/29/2021 12:49:16 - INFO - codeparrot_training - Step 49919: {'lr': 3.2605002466268296e-09, 'samples': 25559040, 'steps': 49919, 'batch_loss/train': 0.6148792556487024} +12/29/2021 12:49:29 - INFO - codeparrot_training - Step 49920: {'lr': 3.180491189080925e-09, 'samples': 25559552, 'steps': 49920, 'batch_loss/train': 0.6257413193816319} +12/29/2021 12:49:39 - INFO - codeparrot_training - Step 49921: {'lr': 3.1014760244751914e-09, 'samples': 25560064, 'steps': 49921, 'batch_loss/train': 0.767559562344104} +12/29/2021 12:49:50 - INFO - codeparrot_training - Step 49922: {'lr': 3.023454753142696e-09, 'samples': 25560576, 'steps': 49922, 'batch_loss/train': 0.8239208629820496} +12/29/2021 12:50:04 - INFO - codeparrot_training - Step 49923: {'lr': 2.9464273753887494e-09, 'samples': 25561088, 'steps': 49923, 'batch_loss/train': 0.7353542733471841} +12/29/2021 12:50:15 - INFO - codeparrot_training - Step 49924: {'lr': 2.870393891546419e-09, 'samples': 25561600, 'steps': 49924, 'batch_loss/train': 0.7918819454498589} +12/29/2021 12:50:25 - INFO - codeparrot_training - Step 49925: {'lr': 2.795354301865505e-09, 'samples': 25562112, 'steps': 49925, 'batch_loss/train': 0.7949960669502616} +12/29/2021 12:50:36 - INFO - codeparrot_training - Step 49926: {'lr': 2.721308606706829e-09, 'samples': 25562624, 'steps': 49926, 'batch_loss/train': 0.7303789751604199} +12/29/2021 12:50:48 - INFO - codeparrot_training - Step 49927: {'lr': 2.648256806320193e-09, 'samples': 25563136, 'steps': 49927, 'batch_loss/train': 0.6336272184271365} +12/29/2021 12:50:59 - INFO - codeparrot_training - Step 49928: {'lr': 2.5761989010109064e-09, 'samples': 25563648, 'steps': 49928, 'batch_loss/train': 0.7514077650848776} +12/29/2021 12:51:09 - INFO - codeparrot_training - Step 49929: {'lr': 2.5051348910565264e-09, 'samples': 25564160, 'steps': 49929, 'batch_loss/train': 0.7285669203847647} +12/29/2021 12:51:23 - INFO - codeparrot_training - Step 49930: {'lr': 2.435064776734608e-09, 'samples': 25564672, 'steps': 49930, 'batch_loss/train': 0.7881956762867048} +12/29/2021 12:51:34 - INFO - codeparrot_training - Step 49931: {'lr': 2.3659885583504627e-09, 'samples': 25565184, 'steps': 49931, 'batch_loss/train': 0.6491673714481294} +12/29/2021 12:51:45 - INFO - codeparrot_training - Step 49932: {'lr': 2.297906236181646e-09, 'samples': 25565696, 'steps': 49932, 'batch_loss/train': 0.6345381878782064} +12/29/2021 12:51:57 - INFO - codeparrot_training - Step 49933: {'lr': 2.230817810450203e-09, 'samples': 25566208, 'steps': 49933, 'batch_loss/train': 0.7228429140523076} +12/29/2021 12:52:07 - INFO - codeparrot_training - Step 49934: {'lr': 2.164723281461445e-09, 'samples': 25566720, 'steps': 49934, 'batch_loss/train': 0.7681522604543716} +12/29/2021 12:52:18 - INFO - codeparrot_training - Step 49935: {'lr': 2.099622649492927e-09, 'samples': 25567232, 'steps': 49935, 'batch_loss/train': 0.6954580461606383} +12/29/2021 12:52:29 - INFO - codeparrot_training - Step 49936: {'lr': 2.0355159147666947e-09, 'samples': 25567744, 'steps': 49936, 'batch_loss/train': 0.7016292652115226} +12/29/2021 12:52:41 - INFO - codeparrot_training - Step 49937: {'lr': 1.9724030775325475e-09, 'samples': 25568256, 'steps': 49937, 'batch_loss/train': 0.7271888358518481} +12/29/2021 12:52:51 - INFO - codeparrot_training - Step 49938: {'lr': 1.9102841380680415e-09, 'samples': 25568768, 'steps': 49938, 'batch_loss/train': 0.7010356718674302} +12/29/2021 12:53:02 - INFO - codeparrot_training - Step 49939: {'lr': 1.8491590966229766e-09, 'samples': 25569280, 'steps': 49939, 'batch_loss/train': 0.5926259499974549} +12/29/2021 12:53:14 - INFO - codeparrot_training - Step 49940: {'lr': 1.7890279534471531e-09, 'samples': 25569792, 'steps': 49940, 'batch_loss/train': 0.6718419110402465} +12/29/2021 12:53:25 - INFO - codeparrot_training - Step 49941: {'lr': 1.7298907087348603e-09, 'samples': 25570304, 'steps': 49941, 'batch_loss/train': 0.8373827980831265} +12/29/2021 12:53:36 - INFO - codeparrot_training - Step 49942: {'lr': 1.6717473627636536e-09, 'samples': 25570816, 'steps': 49942, 'batch_loss/train': 0.6776735354214907} +12/29/2021 12:53:50 - INFO - codeparrot_training - Step 49943: {'lr': 1.6145979157278223e-09, 'samples': 25571328, 'steps': 49943, 'batch_loss/train': 0.5027714566385839} +12/29/2021 12:54:00 - INFO - codeparrot_training - Step 49944: {'lr': 1.558442367904922e-09, 'samples': 25571840, 'steps': 49944, 'batch_loss/train': 0.7091701340395957} +12/29/2021 12:54:11 - INFO - codeparrot_training - Step 49945: {'lr': 1.503280719461486e-09, 'samples': 25572352, 'steps': 49945, 'batch_loss/train': 0.7306075245141983} +12/29/2021 12:54:23 - INFO - codeparrot_training - Step 49946: {'lr': 1.4491129706750705e-09, 'samples': 25572864, 'steps': 49946, 'batch_loss/train': 0.7347925333306193} +12/29/2021 12:54:34 - INFO - codeparrot_training - Step 49947: {'lr': 1.3959391217122087e-09, 'samples': 25573376, 'steps': 49947, 'batch_loss/train': 0.6795259951613843} +12/29/2021 12:54:45 - INFO - codeparrot_training - Step 49948: {'lr': 1.343759172794945e-09, 'samples': 25573888, 'steps': 49948, 'batch_loss/train': 0.5930272545083426} +12/29/2021 12:54:55 - INFO - codeparrot_training - Step 49949: {'lr': 1.2925731241730797e-09, 'samples': 25574400, 'steps': 49949, 'batch_loss/train': 0.6930013122037053} +12/29/2021 12:55:08 - INFO - codeparrot_training - Step 49950: {'lr': 1.242380975985391e-09, 'samples': 25574912, 'steps': 49950, 'batch_loss/train': 0.7828089320100844} +12/29/2021 12:55:18 - INFO - codeparrot_training - Step 49951: {'lr': 1.1931827284816787e-09, 'samples': 25575424, 'steps': 49951, 'batch_loss/train': 0.7072200626134872} +12/29/2021 12:55:29 - INFO - codeparrot_training - Step 49952: {'lr': 1.1449783818284765e-09, 'samples': 25575936, 'steps': 49952, 'batch_loss/train': 0.6803266940405592} +12/29/2021 12:55:43 - INFO - codeparrot_training - Step 49953: {'lr': 1.0977679362200732e-09, 'samples': 25576448, 'steps': 49953, 'batch_loss/train': 0.7213703240267932} +12/29/2021 12:55:54 - INFO - codeparrot_training - Step 49954: {'lr': 1.051551391850758e-09, 'samples': 25576960, 'steps': 49954, 'batch_loss/train': 1.5281107331393287} +12/29/2021 12:56:04 - INFO - codeparrot_training - Step 49955: {'lr': 1.0063287489148199e-09, 'samples': 25577472, 'steps': 49955, 'batch_loss/train': 0.5097056819358841} +12/29/2021 12:56:16 - INFO - codeparrot_training - Step 49956: {'lr': 9.621000075787922e-10, 'samples': 25577984, 'steps': 49956, 'batch_loss/train': 0.5891997774597257} +12/29/2021 12:56:27 - INFO - codeparrot_training - Step 49957: {'lr': 9.188651680092086e-10, 'samples': 25578496, 'steps': 49957, 'batch_loss/train': 0.8008020456181839} +12/29/2021 12:56:38 - INFO - codeparrot_training - Step 49958: {'lr': 8.766242304003579e-10, 'samples': 25579008, 'steps': 49958, 'batch_loss/train': 0.7720384951680899} +12/29/2021 12:56:48 - INFO - codeparrot_training - Step 49959: {'lr': 8.353771949187738e-10, 'samples': 25579520, 'steps': 49959, 'batch_loss/train': 0.6761095192632638} +12/29/2021 12:57:01 - INFO - codeparrot_training - Step 49960: {'lr': 7.951240617032341e-10, 'samples': 25580032, 'steps': 49960, 'batch_loss/train': 0.737551839556545} +12/29/2021 12:57:12 - INFO - codeparrot_training - Step 49961: {'lr': 7.558648309480276e-10, 'samples': 25580544, 'steps': 49961, 'batch_loss/train': 0.7837783619761467} +12/29/2021 12:57:22 - INFO - codeparrot_training - Step 49962: {'lr': 7.175995027919324e-10, 'samples': 25581056, 'steps': 49962, 'batch_loss/train': 0.6429206780157983} +12/29/2021 12:57:36 - INFO - codeparrot_training - Step 49963: {'lr': 6.803280773737264e-10, 'samples': 25581568, 'steps': 49963, 'batch_loss/train': 0.6214303034357727} +12/29/2021 12:57:47 - INFO - codeparrot_training - Step 49964: {'lr': 6.440505548599429e-10, 'samples': 25582080, 'steps': 49964, 'batch_loss/train': 0.7495497721247375} +12/29/2021 12:57:57 - INFO - codeparrot_training - Step 49965: {'lr': 6.087669353893599e-10, 'samples': 25582592, 'steps': 49965, 'batch_loss/train': 0.6696416735649109} +12/29/2021 12:58:10 - INFO - codeparrot_training - Step 49966: {'lr': 5.744772191007552e-10, 'samples': 25583104, 'steps': 49966, 'batch_loss/train': 0.68606261536479} +12/29/2021 12:58:20 - INFO - codeparrot_training - Step 49967: {'lr': 5.411814061329068e-10, 'samples': 25583616, 'steps': 49967, 'batch_loss/train': 0.7459427425637841} +12/29/2021 12:58:31 - INFO - codeparrot_training - Step 49968: {'lr': 5.088794965968368e-10, 'samples': 25584128, 'steps': 49968, 'batch_loss/train': 0.743040238507092} +12/29/2021 12:58:45 - INFO - codeparrot_training - Step 49969: {'lr': 4.775714906590789e-10, 'samples': 25584640, 'steps': 49969, 'batch_loss/train': 0.784685343503952} +12/29/2021 12:58:55 - INFO - codeparrot_training - Step 49970: {'lr': 4.4725738843065523e-10, 'samples': 25585152, 'steps': 49970, 'batch_loss/train': 0.8525404529646039} +12/29/2021 12:59:06 - INFO - codeparrot_training - Step 49971: {'lr': 4.1793719002258813e-10, 'samples': 25585664, 'steps': 49971, 'batch_loss/train': 0.7638161415234208} +12/29/2021 12:59:17 - INFO - codeparrot_training - Step 49972: {'lr': 3.8961089557365546e-10, 'samples': 25586176, 'steps': 49972, 'batch_loss/train': 0.7268530630972236} +12/29/2021 12:59:29 - INFO - codeparrot_training - Step 49973: {'lr': 3.62278505167124e-10, 'samples': 25586688, 'steps': 49973, 'batch_loss/train': 0.6703128577210009} +12/29/2021 12:59:40 - INFO - codeparrot_training - Step 49974: {'lr': 3.3594001891401605e-10, 'samples': 25587200, 'steps': 49974, 'batch_loss/train': 0.79679602291435} +12/29/2021 12:59:50 - INFO - codeparrot_training - Step 49975: {'lr': 3.1059543692535384e-10, 'samples': 25587712, 'steps': 49975, 'batch_loss/train': 0.6228625979274511} +12/29/2021 13:00:02 - INFO - codeparrot_training - Step 49976: {'lr': 2.8624475931215976e-10, 'samples': 25588224, 'steps': 49976, 'batch_loss/train': 0.7484005230944604} +12/29/2021 13:00:13 - INFO - codeparrot_training - Step 49977: {'lr': 2.628879861854561e-10, 'samples': 25588736, 'steps': 49977, 'batch_loss/train': 0.7667995793744922} +12/29/2021 13:00:24 - INFO - codeparrot_training - Step 49978: {'lr': 2.405251176007539e-10, 'samples': 25589248, 'steps': 49978, 'batch_loss/train': 0.6338405134156346} +12/29/2021 13:00:36 - INFO - codeparrot_training - Step 49979: {'lr': 2.191561536690756e-10, 'samples': 25589760, 'steps': 49979, 'batch_loss/train': 0.6390025834552944} +12/29/2021 13:00:46 - INFO - codeparrot_training - Step 49980: {'lr': 1.9878109444593228e-10, 'samples': 25590272, 'steps': 49980, 'batch_loss/train': 0.7874329024925828} +12/29/2021 13:00:57 - INFO - codeparrot_training - Step 49981: {'lr': 1.7939994007010186e-10, 'samples': 25590784, 'steps': 49981, 'batch_loss/train': 0.7303680684417486} +12/29/2021 13:01:08 - INFO - codeparrot_training - Step 49982: {'lr': 1.6101269056933987e-10, 'samples': 25591296, 'steps': 49982, 'batch_loss/train': 0.6950306771323085} +12/29/2021 13:01:22 - INFO - codeparrot_training - Step 49983: {'lr': 1.4361934602691306e-10, 'samples': 25591808, 'steps': 49983, 'batch_loss/train': 0.7469236357137561} +12/29/2021 13:01:32 - INFO - codeparrot_training - Step 49984: {'lr': 1.2721990652608817e-10, 'samples': 25592320, 'steps': 49984, 'batch_loss/train': 0.5969591030152515} +12/29/2021 13:01:43 - INFO - codeparrot_training - Step 49985: {'lr': 1.1181437212237633e-10, 'samples': 25592832, 'steps': 49985, 'batch_loss/train': 0.6338297328911722} +12/29/2021 13:01:55 - INFO - codeparrot_training - Step 49986: {'lr': 9.74027428712887e-11, 'samples': 25593344, 'steps': 49986, 'batch_loss/train': 0.7630333877168596} +12/29/2021 13:02:06 - INFO - codeparrot_training - Step 49987: {'lr': 8.398501882833642e-11, 'samples': 25593856, 'steps': 49987, 'batch_loss/train': 0.688161738216877} +12/29/2021 13:02:16 - INFO - codeparrot_training - Step 49988: {'lr': 7.156120007678624e-11, 'samples': 25594368, 'steps': 49988, 'batch_loss/train': 0.7013966045342386} +12/29/2021 13:02:28 - INFO - codeparrot_training - Step 49989: {'lr': 6.013128664439371e-11, 'samples': 25594880, 'steps': 49989, 'batch_loss/train': 0.7887239241972566} +12/29/2021 13:02:39 - INFO - codeparrot_training - Step 49990: {'lr': 4.9695278558914426e-11, 'samples': 25595392, 'steps': 49990, 'batch_loss/train': 0.7387627973221242} +12/29/2021 13:02:50 - INFO - codeparrot_training - Step 49991: {'lr': 4.0253175875859527e-11, 'samples': 25595904, 'steps': 49991, 'batch_loss/train': 0.6628477505873889} +12/29/2021 13:03:00 - INFO - codeparrot_training - Step 49992: {'lr': 3.180497865074017e-11, 'samples': 25596416, 'steps': 49992, 'batch_loss/train': 0.5629864588845521} +12/29/2021 13:03:14 - INFO - codeparrot_training - Step 49993: {'lr': 2.4350686911311925e-11, 'samples': 25596928, 'steps': 49993, 'batch_loss/train': 0.6278233337216079} +12/29/2021 13:03:25 - INFO - codeparrot_training - Step 49994: {'lr': 1.78903006575748e-11, 'samples': 25597440, 'steps': 49994, 'batch_loss/train': 0.6894345912151039} +12/29/2021 13:03:35 - INFO - codeparrot_training - Step 49995: {'lr': 1.2423819945039938e-11, 'samples': 25597952, 'steps': 49995, 'batch_loss/train': 0.7295490756514482} +12/29/2021 13:03:48 - INFO - codeparrot_training - Step 49996: {'lr': 7.951244801462921e-12, 'samples': 25598464, 'steps': 49996, 'batch_loss/train': 0.7133490510750562} +12/29/2021 13:03:58 - INFO - codeparrot_training - Step 49997: {'lr': 4.4725751990881695e-12, 'samples': 25598976, 'steps': 49997, 'batch_loss/train': 0.6320071532391012} +12/29/2021 13:04:09 - INFO - codeparrot_training - Step 49998: {'lr': 1.987811193426836e-12, 'samples': 25599488, 'steps': 49998, 'batch_loss/train': 0.7334505151957273} +12/29/2021 13:04:23 - INFO - codeparrot_training - Step 49999: {'lr': 4.969528122344969e-13, 'samples': 25600000, 'steps': 49999, 'batch_loss/train': 0.7188061801716685} +12/29/2021 13:04:23 - INFO - codeparrot_training - Evaluating and saving model checkpoint +12/29/2021 13:07:47 - INFO - codeparrot_training - Step 50000: {'loss/eval': 0.7400861382484436, 'perplexity': 2.096116065979004}