diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -16397,3 +16397,1009 @@ Use FP16 precision: False 02/24/2022 22:17:36 - INFO - codeparrot_training - Step 15998: {'lr': 0.0004022163183755853, 'samples': 8191488, 'steps': 15998, 'loss/train': 1.9355260133743286} 02/24/2022 22:17:40 - INFO - codeparrot_training - Step 15999: {'lr': 0.0004022033381398781, 'samples': 8192000, 'steps': 15999, 'loss/train': 2.3557024002075195} 02/24/2022 22:17:40 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 22:17:57 - WARNING - huggingface_hub.repository - Several commits (16) will be pushed upstream. +02/24/2022 22:17:57 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 22:18:30 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 751fbfb..704f602 floral-grass-11 -> floral-grass-11 + +02/24/2022 22:18:38 - INFO - codeparrot_training - Step 16000: {'lr': 0.0004021903572521802, 'samples': 8192512, 'steps': 16000, 'loss/train': 1.7201647758483887} +02/24/2022 22:18:41 - INFO - codeparrot_training - Step 16001: {'lr': 0.0004021773757125471, 'samples': 8193024, 'steps': 16001, 'loss/train': 0.508444607257843} +02/24/2022 22:18:47 - INFO - codeparrot_training - Step 16002: {'lr': 0.0004021643935210344, 'samples': 8193536, 'steps': 16002, 'loss/train': 1.3964498043060303} +02/24/2022 22:18:53 - INFO - codeparrot_training - Step 16003: {'lr': 0.0004021514106776978, 'samples': 8194048, 'steps': 16003, 'loss/train': 0.9580065011978149} +02/24/2022 22:18:56 - INFO - codeparrot_training - Step 16004: {'lr': 0.00040213842718259287, 'samples': 8194560, 'steps': 16004, 'loss/train': 1.0566712617874146} +02/24/2022 22:19:02 - INFO - codeparrot_training - Step 16005: {'lr': 0.00040212544303577525, 'samples': 8195072, 'steps': 16005, 'loss/train': 2.0069048404693604} +02/24/2022 22:19:05 - INFO - codeparrot_training - Step 16006: {'lr': 0.00040211245823730047, 'samples': 8195584, 'steps': 16006, 'loss/train': 2.917506217956543} +02/24/2022 22:19:11 - INFO - codeparrot_training - Step 16007: {'lr': 0.00040209947278722425, 'samples': 8196096, 'steps': 16007, 'loss/train': 1.8989200592041016} +02/24/2022 22:19:14 - INFO - codeparrot_training - Step 16008: {'lr': 0.0004020864866856022, 'samples': 8196608, 'steps': 16008, 'loss/train': 1.8675284385681152} +02/24/2022 22:19:20 - INFO - codeparrot_training - Step 16009: {'lr': 0.0004020734999324899, 'samples': 8197120, 'steps': 16009, 'loss/train': 1.8313961029052734} +02/24/2022 22:19:24 - INFO - codeparrot_training - Step 16010: {'lr': 0.0004020605125279431, 'samples': 8197632, 'steps': 16010, 'loss/train': 2.8082687854766846} +02/24/2022 22:19:29 - INFO - codeparrot_training - Step 16011: {'lr': 0.0004020475244720173, 'samples': 8198144, 'steps': 16011, 'loss/train': 1.8834885358810425} +02/24/2022 22:19:33 - INFO - codeparrot_training - Step 16012: {'lr': 0.0004020345357647681, 'samples': 8198656, 'steps': 16012, 'loss/train': 0.8034518361091614} +02/24/2022 22:19:38 - INFO - codeparrot_training - Step 16013: {'lr': 0.0004020215464062513, 'samples': 8199168, 'steps': 16013, 'loss/train': 2.044654607772827} +02/24/2022 22:19:42 - INFO - codeparrot_training - Step 16014: {'lr': 0.0004020085563965226, 'samples': 8199680, 'steps': 16014, 'loss/train': 0.5688008666038513} +02/24/2022 22:19:47 - INFO - codeparrot_training - Step 16015: {'lr': 0.00040199556573563736, 'samples': 8200192, 'steps': 16015, 'loss/train': 2.2517144680023193} +02/24/2022 22:19:51 - INFO - codeparrot_training - Step 16016: {'lr': 0.0004019825744236514, 'samples': 8200704, 'steps': 16016, 'loss/train': 1.289293885231018} +02/24/2022 22:19:56 - INFO - codeparrot_training - Step 16017: {'lr': 0.00040196958246062033, 'samples': 8201216, 'steps': 16017, 'loss/train': 1.4459197521209717} +02/24/2022 22:20:00 - INFO - codeparrot_training - Step 16018: {'lr': 0.00040195658984659987, 'samples': 8201728, 'steps': 16018, 'loss/train': 2.841352701187134} +02/24/2022 22:20:05 - INFO - codeparrot_training - Step 16019: {'lr': 0.0004019435965816456, 'samples': 8202240, 'steps': 16019, 'loss/train': 2.743588924407959} +02/24/2022 22:20:09 - INFO - codeparrot_training - Step 16020: {'lr': 0.0004019306026658132, 'samples': 8202752, 'steps': 16020, 'loss/train': 2.453803777694702} +02/24/2022 22:20:15 - INFO - codeparrot_training - Step 16021: {'lr': 0.00040191760809915833, 'samples': 8203264, 'steps': 16021, 'loss/train': 1.5826425552368164} +02/24/2022 22:20:18 - INFO - codeparrot_training - Step 16022: {'lr': 0.00040190461288173675, 'samples': 8203776, 'steps': 16022, 'loss/train': 2.028092622756958} +02/24/2022 22:20:24 - INFO - codeparrot_training - Step 16023: {'lr': 0.000401891617013604, 'samples': 8204288, 'steps': 16023, 'loss/train': 1.5239996910095215} +02/24/2022 22:20:27 - INFO - codeparrot_training - Step 16024: {'lr': 0.00040187862049481573, 'samples': 8204800, 'steps': 16024, 'loss/train': 2.1686224937438965} +02/24/2022 22:20:33 - INFO - codeparrot_training - Step 16025: {'lr': 0.00040186562332542773, 'samples': 8205312, 'steps': 16025, 'loss/train': 2.1555397510528564} +02/24/2022 22:20:36 - INFO - codeparrot_training - Step 16026: {'lr': 0.0004018526255054956, 'samples': 8205824, 'steps': 16026, 'loss/train': 1.6400172710418701} +02/24/2022 22:20:42 - INFO - codeparrot_training - Step 16027: {'lr': 0.00040183962703507515, 'samples': 8206336, 'steps': 16027, 'loss/train': 1.959862232208252} +02/24/2022 22:20:45 - INFO - codeparrot_training - Step 16028: {'lr': 0.00040182662791422185, 'samples': 8206848, 'steps': 16028, 'loss/train': 1.8384616374969482} +02/24/2022 22:20:51 - INFO - codeparrot_training - Step 16029: {'lr': 0.0004018136281429915, 'samples': 8207360, 'steps': 16029, 'loss/train': 1.9850516319274902} +02/24/2022 22:20:55 - INFO - codeparrot_training - Step 16030: {'lr': 0.0004018006277214398, 'samples': 8207872, 'steps': 16030, 'loss/train': 2.3242154121398926} +02/24/2022 22:21:01 - INFO - codeparrot_training - Step 16031: {'lr': 0.00040178762664962235, 'samples': 8208384, 'steps': 16031, 'loss/train': 1.7208975553512573} +02/24/2022 22:21:04 - INFO - codeparrot_training - Step 16032: {'lr': 0.000401774624927595, 'samples': 8208896, 'steps': 16032, 'loss/train': 1.891169786453247} +02/24/2022 22:21:10 - INFO - codeparrot_training - Step 16033: {'lr': 0.00040176162255541325, 'samples': 8209408, 'steps': 16033, 'loss/train': 3.9602861404418945} +02/24/2022 22:21:13 - INFO - codeparrot_training - Step 16034: {'lr': 0.00040174861953313297, 'samples': 8209920, 'steps': 16034, 'loss/train': 2.547063112258911} +02/24/2022 22:21:19 - INFO - codeparrot_training - Step 16035: {'lr': 0.00040173561586080974, 'samples': 8210432, 'steps': 16035, 'loss/train': 1.5564717054367065} +02/24/2022 22:21:22 - INFO - codeparrot_training - Step 16036: {'lr': 0.0004017226115384994, 'samples': 8210944, 'steps': 16036, 'loss/train': 2.483438014984131} +02/24/2022 22:21:28 - INFO - codeparrot_training - Step 16037: {'lr': 0.00040170960656625744, 'samples': 8211456, 'steps': 16037, 'loss/train': 3.138737916946411} +02/24/2022 22:21:31 - INFO - codeparrot_training - Step 16038: {'lr': 0.00040169660094413977, 'samples': 8211968, 'steps': 16038, 'loss/train': 2.3410651683807373} +02/24/2022 22:21:37 - INFO - codeparrot_training - Step 16039: {'lr': 0.00040168359467220206, 'samples': 8212480, 'steps': 16039, 'loss/train': 2.397145986557007} +02/24/2022 22:21:40 - INFO - codeparrot_training - Step 16040: {'lr': 0.00040167058775049993, 'samples': 8212992, 'steps': 16040, 'loss/train': 2.042656898498535} +02/24/2022 22:21:46 - INFO - codeparrot_training - Step 16041: {'lr': 0.0004016575801790892, 'samples': 8213504, 'steps': 16041, 'loss/train': 1.7949374914169312} +02/24/2022 22:21:49 - INFO - codeparrot_training - Step 16042: {'lr': 0.0004016445719580256, 'samples': 8214016, 'steps': 16042, 'loss/train': 1.7485071420669556} +02/24/2022 22:21:55 - INFO - codeparrot_training - Step 16043: {'lr': 0.0004016315630873647, 'samples': 8214528, 'steps': 16043, 'loss/train': 2.3833954334259033} +02/24/2022 22:21:58 - INFO - codeparrot_training - Step 16044: {'lr': 0.00040161855356716245, 'samples': 8215040, 'steps': 16044, 'loss/train': 1.5439420938491821} +02/24/2022 22:22:05 - INFO - codeparrot_training - Step 16045: {'lr': 0.00040160554339747434, 'samples': 8215552, 'steps': 16045, 'loss/train': 1.802834153175354} +02/24/2022 22:22:08 - INFO - codeparrot_training - Step 16046: {'lr': 0.00040159253257835624, 'samples': 8216064, 'steps': 16046, 'loss/train': 2.036452531814575} +02/24/2022 22:22:14 - INFO - codeparrot_training - Step 16047: {'lr': 0.00040157952110986397, 'samples': 8216576, 'steps': 16047, 'loss/train': 2.3060808181762695} +02/24/2022 22:22:17 - INFO - codeparrot_training - Step 16048: {'lr': 0.00040156650899205305, 'samples': 8217088, 'steps': 16048, 'loss/train': 2.501120090484619} +02/24/2022 22:22:23 - INFO - codeparrot_training - Step 16049: {'lr': 0.00040155349622497937, 'samples': 8217600, 'steps': 16049, 'loss/train': 2.1082980632781982} +02/24/2022 22:22:26 - INFO - codeparrot_training - Step 16050: {'lr': 0.0004015404828086987, 'samples': 8218112, 'steps': 16050, 'loss/train': 2.3823835849761963} +02/24/2022 22:22:32 - INFO - codeparrot_training - Step 16051: {'lr': 0.0004015274687432667, 'samples': 8218624, 'steps': 16051, 'loss/train': 2.436448097229004} +02/24/2022 22:22:35 - INFO - codeparrot_training - Step 16052: {'lr': 0.0004015144540287391, 'samples': 8219136, 'steps': 16052, 'loss/train': 2.071312665939331} +02/24/2022 22:22:41 - INFO - codeparrot_training - Step 16053: {'lr': 0.00040150143866517164, 'samples': 8219648, 'steps': 16053, 'loss/train': 2.40167498588562} +02/24/2022 22:22:47 - INFO - codeparrot_training - Step 16054: {'lr': 0.0004014884226526202, 'samples': 8220160, 'steps': 16054, 'loss/train': 2.357085943222046} +02/24/2022 22:22:50 - INFO - codeparrot_training - Step 16055: {'lr': 0.0004014754059911405, 'samples': 8220672, 'steps': 16055, 'loss/train': 2.284782886505127} +02/24/2022 22:22:56 - INFO - codeparrot_training - Step 16056: {'lr': 0.0004014623886807882, 'samples': 8221184, 'steps': 16056, 'loss/train': 1.9049789905548096} +02/24/2022 22:22:59 - INFO - codeparrot_training - Step 16057: {'lr': 0.0004014493707216191, 'samples': 8221696, 'steps': 16057, 'loss/train': 2.8246161937713623} +02/24/2022 22:23:05 - INFO - codeparrot_training - Step 16058: {'lr': 0.00040143635211368903, 'samples': 8222208, 'steps': 16058, 'loss/train': 3.0233166217803955} +02/24/2022 22:23:08 - INFO - codeparrot_training - Step 16059: {'lr': 0.0004014233328570537, 'samples': 8222720, 'steps': 16059, 'loss/train': 2.371473550796509} +02/24/2022 22:23:12 - INFO - codeparrot_training - Step 16060: {'lr': 0.0004014103129517689, 'samples': 8223232, 'steps': 16060, 'loss/train': 2.972662925720215} +02/24/2022 22:23:18 - INFO - codeparrot_training - Step 16061: {'lr': 0.00040139729239789036, 'samples': 8223744, 'steps': 16061, 'loss/train': 1.79690420627594} +02/24/2022 22:23:21 - INFO - codeparrot_training - Step 16062: {'lr': 0.0004013842711954739, 'samples': 8224256, 'steps': 16062, 'loss/train': 1.8570235967636108} +02/24/2022 22:23:27 - INFO - codeparrot_training - Step 16063: {'lr': 0.0004013712493445753, 'samples': 8224768, 'steps': 16063, 'loss/train': 2.9462969303131104} +02/24/2022 22:23:30 - INFO - codeparrot_training - Step 16064: {'lr': 0.00040135822684525036, 'samples': 8225280, 'steps': 16064, 'loss/train': 1.238988995552063} +02/24/2022 22:23:36 - INFO - codeparrot_training - Step 16065: {'lr': 0.0004013452036975548, 'samples': 8225792, 'steps': 16065, 'loss/train': 2.1266133785247803} +02/24/2022 22:23:39 - INFO - codeparrot_training - Step 16066: {'lr': 0.0004013321799015445, 'samples': 8226304, 'steps': 16066, 'loss/train': 1.759783387184143} +02/24/2022 22:23:46 - INFO - codeparrot_training - Step 16067: {'lr': 0.00040131915545727517, 'samples': 8226816, 'steps': 16067, 'loss/train': 2.3940839767456055} +02/24/2022 22:23:49 - INFO - codeparrot_training - Step 16068: {'lr': 0.00040130613036480265, 'samples': 8227328, 'steps': 16068, 'loss/train': 1.8836443424224854} +02/24/2022 22:23:55 - INFO - codeparrot_training - Step 16069: {'lr': 0.0004012931046241827, 'samples': 8227840, 'steps': 16069, 'loss/train': 0.15590330958366394} +02/24/2022 22:23:58 - INFO - codeparrot_training - Step 16070: {'lr': 0.00040128007823547106, 'samples': 8228352, 'steps': 16070, 'loss/train': 2.9946024417877197} +02/24/2022 22:24:04 - INFO - codeparrot_training - Step 16071: {'lr': 0.00040126705119872367, 'samples': 8228864, 'steps': 16071, 'loss/train': 3.3204965591430664} +02/24/2022 22:24:07 - INFO - codeparrot_training - Step 16072: {'lr': 0.00040125402351399623, 'samples': 8229376, 'steps': 16072, 'loss/train': 2.1243174076080322} +02/24/2022 22:24:13 - INFO - codeparrot_training - Step 16073: {'lr': 0.0004012409951813446, 'samples': 8229888, 'steps': 16073, 'loss/train': 2.5254733562469482} +02/24/2022 22:24:18 - INFO - codeparrot_training - Step 16074: {'lr': 0.0004012279662008246, 'samples': 8230400, 'steps': 16074, 'loss/train': 2.2832231521606445} +02/24/2022 22:24:22 - INFO - codeparrot_training - Step 16075: {'lr': 0.000401214936572492, 'samples': 8230912, 'steps': 16075, 'loss/train': 1.5007426738739014} +02/24/2022 22:24:28 - INFO - codeparrot_training - Step 16076: {'lr': 0.0004012019062964026, 'samples': 8231424, 'steps': 16076, 'loss/train': 1.786483645439148} +02/24/2022 22:24:32 - INFO - codeparrot_training - Step 16077: {'lr': 0.0004011888753726123, 'samples': 8231936, 'steps': 16077, 'loss/train': 2.955906867980957} +02/24/2022 22:24:38 - INFO - codeparrot_training - Step 16078: {'lr': 0.00040117584380117675, 'samples': 8232448, 'steps': 16078, 'loss/train': 3.329847574234009} +02/24/2022 22:24:41 - INFO - codeparrot_training - Step 16079: {'lr': 0.000401162811582152, 'samples': 8232960, 'steps': 16079, 'loss/train': 1.9173682928085327} +02/24/2022 22:24:47 - INFO - codeparrot_training - Step 16080: {'lr': 0.00040114977871559375, 'samples': 8233472, 'steps': 16080, 'loss/train': 1.4451696872711182} +02/24/2022 22:24:50 - INFO - codeparrot_training - Step 16081: {'lr': 0.0004011367452015578, 'samples': 8233984, 'steps': 16081, 'loss/train': 0.7013158202171326} +02/24/2022 22:24:56 - INFO - codeparrot_training - Step 16082: {'lr': 0.00040112371104010004, 'samples': 8234496, 'steps': 16082, 'loss/train': 2.2582297325134277} +02/24/2022 22:24:59 - INFO - codeparrot_training - Step 16083: {'lr': 0.00040111067623127626, 'samples': 8235008, 'steps': 16083, 'loss/train': 1.5379586219787598} +02/24/2022 22:25:05 - INFO - codeparrot_training - Step 16084: {'lr': 0.0004010976407751424, 'samples': 8235520, 'steps': 16084, 'loss/train': 2.447960138320923} +02/24/2022 22:25:08 - INFO - codeparrot_training - Step 16085: {'lr': 0.00040108460467175425, 'samples': 8236032, 'steps': 16085, 'loss/train': 1.7054228782653809} +02/24/2022 22:25:14 - INFO - codeparrot_training - Step 16086: {'lr': 0.00040107156792116753, 'samples': 8236544, 'steps': 16086, 'loss/train': 1.2864183187484741} +02/24/2022 22:25:18 - INFO - codeparrot_training - Step 16087: {'lr': 0.0004010585305234382, 'samples': 8237056, 'steps': 16087, 'loss/train': 1.4124513864517212} +02/24/2022 22:25:21 - INFO - codeparrot_training - Step 16088: {'lr': 0.00040104549247862217, 'samples': 8237568, 'steps': 16088, 'loss/train': 2.1191415786743164} +02/24/2022 22:25:27 - INFO - codeparrot_training - Step 16089: {'lr': 0.0004010324537867751, 'samples': 8238080, 'steps': 16089, 'loss/train': 1.8204383850097656} +02/24/2022 22:25:30 - INFO - codeparrot_training - Step 16090: {'lr': 0.000401019414447953, 'samples': 8238592, 'steps': 16090, 'loss/train': 0.9256582856178284} +02/24/2022 22:25:36 - INFO - codeparrot_training - Step 16091: {'lr': 0.0004010063744622117, 'samples': 8239104, 'steps': 16091, 'loss/train': 1.932460069656372} +02/24/2022 22:25:39 - INFO - codeparrot_training - Step 16092: {'lr': 0.00040099333382960707, 'samples': 8239616, 'steps': 16092, 'loss/train': 1.5542957782745361} +02/24/2022 22:25:45 - INFO - codeparrot_training - Step 16093: {'lr': 0.00040098029255019484, 'samples': 8240128, 'steps': 16093, 'loss/train': 2.8286492824554443} +02/24/2022 22:25:49 - INFO - codeparrot_training - Step 16094: {'lr': 0.0004009672506240311, 'samples': 8240640, 'steps': 16094, 'loss/train': 1.290589451789856} +02/24/2022 22:25:54 - INFO - codeparrot_training - Step 16095: {'lr': 0.00040095420805117153, 'samples': 8241152, 'steps': 16095, 'loss/train': 1.6316267251968384} +02/24/2022 22:26:00 - INFO - codeparrot_training - Step 16096: {'lr': 0.0004009411648316721, 'samples': 8241664, 'steps': 16096, 'loss/train': 2.666778564453125} +02/24/2022 22:26:03 - INFO - codeparrot_training - Step 16097: {'lr': 0.0004009281209655886, 'samples': 8242176, 'steps': 16097, 'loss/train': 3.057589292526245} +02/24/2022 22:26:09 - INFO - codeparrot_training - Step 16098: {'lr': 0.000400915076452977, 'samples': 8242688, 'steps': 16098, 'loss/train': 2.0071325302124023} +02/24/2022 22:26:12 - INFO - codeparrot_training - Step 16099: {'lr': 0.0004009020312938931, 'samples': 8243200, 'steps': 16099, 'loss/train': 1.0506532192230225} +02/24/2022 22:26:16 - INFO - codeparrot_training - Step 16100: {'lr': 0.0004008889854883929, 'samples': 8243712, 'steps': 16100, 'loss/train': 2.901155471801758} +02/24/2022 22:26:22 - INFO - codeparrot_training - Step 16101: {'lr': 0.0004008759390365321, 'samples': 8244224, 'steps': 16101, 'loss/train': 2.186505079269409} +02/24/2022 22:26:25 - INFO - codeparrot_training - Step 16102: {'lr': 0.00040086289193836674, 'samples': 8244736, 'steps': 16102, 'loss/train': 2.6797337532043457} +02/24/2022 22:26:33 - INFO - codeparrot_training - Step 16103: {'lr': 0.00040084984419395264, 'samples': 8245248, 'steps': 16103, 'loss/train': 2.0044150352478027} +02/24/2022 22:26:36 - INFO - codeparrot_training - Step 16104: {'lr': 0.00040083679580334565, 'samples': 8245760, 'steps': 16104, 'loss/train': 2.3971686363220215} +02/24/2022 22:26:42 - INFO - codeparrot_training - Step 16105: {'lr': 0.00040082374676660176, 'samples': 8246272, 'steps': 16105, 'loss/train': 2.8351309299468994} +02/24/2022 22:26:45 - INFO - codeparrot_training - Step 16106: {'lr': 0.00040081069708377686, 'samples': 8246784, 'steps': 16106, 'loss/train': 2.7092063426971436} +02/24/2022 22:26:51 - INFO - codeparrot_training - Step 16107: {'lr': 0.0004007976467549268, 'samples': 8247296, 'steps': 16107, 'loss/train': 3.106229066848755} +02/24/2022 22:26:54 - INFO - codeparrot_training - Step 16108: {'lr': 0.0004007845957801075, 'samples': 8247808, 'steps': 16108, 'loss/train': 2.0391921997070312} +02/24/2022 22:27:00 - INFO - codeparrot_training - Step 16109: {'lr': 0.0004007715441593749, 'samples': 8248320, 'steps': 16109, 'loss/train': 2.3040809631347656} +02/24/2022 22:27:03 - INFO - codeparrot_training - Step 16110: {'lr': 0.0004007584918927849, 'samples': 8248832, 'steps': 16110, 'loss/train': 1.5427764654159546} +02/24/2022 22:27:10 - INFO - codeparrot_training - Step 16111: {'lr': 0.0004007454389803933, 'samples': 8249344, 'steps': 16111, 'loss/train': 2.039019823074341} +02/24/2022 22:27:13 - INFO - codeparrot_training - Step 16112: {'lr': 0.00040073238542225623, 'samples': 8249856, 'steps': 16112, 'loss/train': 1.693688988685608} +02/24/2022 22:27:19 - INFO - codeparrot_training - Step 16113: {'lr': 0.00040071933121842943, 'samples': 8250368, 'steps': 16113, 'loss/train': 2.0680792331695557} +02/24/2022 22:27:23 - INFO - codeparrot_training - Step 16114: {'lr': 0.00040070627636896886, 'samples': 8250880, 'steps': 16114, 'loss/train': 3.236002206802368} +02/24/2022 22:27:26 - INFO - codeparrot_training - Step 16115: {'lr': 0.0004006932208739304, 'samples': 8251392, 'steps': 16115, 'loss/train': 1.1472545862197876} +02/24/2022 22:27:32 - INFO - codeparrot_training - Step 16116: {'lr': 0.0004006801647333701, 'samples': 8251904, 'steps': 16116, 'loss/train': 1.2056994438171387} +02/24/2022 22:27:35 - INFO - codeparrot_training - Step 16117: {'lr': 0.0004006671079473438, 'samples': 8252416, 'steps': 16117, 'loss/train': 0.5380105972290039} +02/24/2022 22:27:41 - INFO - codeparrot_training - Step 16118: {'lr': 0.00040065405051590745, 'samples': 8252928, 'steps': 16118, 'loss/train': 1.1405670642852783} +02/24/2022 22:27:44 - INFO - codeparrot_training - Step 16119: {'lr': 0.000400640992439117, 'samples': 8253440, 'steps': 16119, 'loss/train': 1.7501696348190308} +02/24/2022 22:27:50 - INFO - codeparrot_training - Step 16120: {'lr': 0.0004006279337170283, 'samples': 8253952, 'steps': 16120, 'loss/train': 1.8721909523010254} +02/24/2022 22:27:53 - INFO - codeparrot_training - Step 16121: {'lr': 0.00040061487434969744, 'samples': 8254464, 'steps': 16121, 'loss/train': 0.3439841568470001} +02/24/2022 22:27:59 - INFO - codeparrot_training - Step 16122: {'lr': 0.00040060181433718037, 'samples': 8254976, 'steps': 16122, 'loss/train': 2.461881160736084} +02/24/2022 22:28:02 - INFO - codeparrot_training - Step 16123: {'lr': 0.00040058875367953285, 'samples': 8255488, 'steps': 16123, 'loss/train': 2.0427472591400146} +02/24/2022 22:28:09 - INFO - codeparrot_training - Step 16124: {'lr': 0.0004005756923768109, 'samples': 8256000, 'steps': 16124, 'loss/train': 3.9292702674865723} +02/24/2022 22:28:12 - INFO - codeparrot_training - Step 16125: {'lr': 0.0004005626304290705, 'samples': 8256512, 'steps': 16125, 'loss/train': 2.3789103031158447} +02/24/2022 22:28:18 - INFO - codeparrot_training - Step 16126: {'lr': 0.00040054956783636765, 'samples': 8257024, 'steps': 16126, 'loss/train': 1.660765528678894} +02/24/2022 22:28:21 - INFO - codeparrot_training - Step 16127: {'lr': 0.00040053650459875823, 'samples': 8257536, 'steps': 16127, 'loss/train': 1.7264443635940552} +02/24/2022 22:28:27 - INFO - codeparrot_training - Step 16128: {'lr': 0.0004005234407162982, 'samples': 8258048, 'steps': 16128, 'loss/train': 1.4429055452346802} +02/24/2022 22:28:30 - INFO - codeparrot_training - Step 16129: {'lr': 0.00040051037618904365, 'samples': 8258560, 'steps': 16129, 'loss/train': 0.1482071429491043} +02/24/2022 22:28:36 - INFO - codeparrot_training - Step 16130: {'lr': 0.0004004973110170503, 'samples': 8259072, 'steps': 16130, 'loss/train': 2.3455629348754883} +02/24/2022 22:28:39 - INFO - codeparrot_training - Step 16131: {'lr': 0.0004004842452003743, 'samples': 8259584, 'steps': 16131, 'loss/train': 2.281982660293579} +02/24/2022 22:28:45 - INFO - codeparrot_training - Step 16132: {'lr': 0.0004004711787390716, 'samples': 8260096, 'steps': 16132, 'loss/train': 2.7971956729888916} +02/24/2022 22:28:48 - INFO - codeparrot_training - Step 16133: {'lr': 0.0004004581116331981, 'samples': 8260608, 'steps': 16133, 'loss/train': 1.4828740358352661} +02/24/2022 22:28:54 - INFO - codeparrot_training - Step 16134: {'lr': 0.00040044504388280996, 'samples': 8261120, 'steps': 16134, 'loss/train': 2.1721882820129395} +02/24/2022 22:28:57 - INFO - codeparrot_training - Step 16135: {'lr': 0.00040043197548796295, 'samples': 8261632, 'steps': 16135, 'loss/train': 1.853606104850769} +02/24/2022 22:29:03 - INFO - codeparrot_training - Step 16136: {'lr': 0.0004004189064487131, 'samples': 8262144, 'steps': 16136, 'loss/train': 1.6603351831436157} +02/24/2022 22:29:06 - INFO - codeparrot_training - Step 16137: {'lr': 0.00040040583676511645, 'samples': 8262656, 'steps': 16137, 'loss/train': 2.022278070449829} +02/24/2022 22:29:12 - INFO - codeparrot_training - Step 16138: {'lr': 0.0004003927664372289, 'samples': 8263168, 'steps': 16138, 'loss/train': 2.8447370529174805} +02/24/2022 22:29:15 - INFO - codeparrot_training - Step 16139: {'lr': 0.00040037969546510653, 'samples': 8263680, 'steps': 16139, 'loss/train': 1.3555209636688232} +02/24/2022 22:29:21 - INFO - codeparrot_training - Step 16140: {'lr': 0.0004003666238488053, 'samples': 8264192, 'steps': 16140, 'loss/train': 0.3209831118583679} +02/24/2022 22:29:25 - INFO - codeparrot_training - Step 16141: {'lr': 0.00040035355158838114, 'samples': 8264704, 'steps': 16141, 'loss/train': 1.6687036752700806} +02/24/2022 22:29:30 - INFO - codeparrot_training - Step 16142: {'lr': 0.0004003404786838902, 'samples': 8265216, 'steps': 16142, 'loss/train': 1.3506438732147217} +02/24/2022 22:29:34 - INFO - codeparrot_training - Step 16143: {'lr': 0.0004003274051353884, 'samples': 8265728, 'steps': 16143, 'loss/train': 1.289398431777954} +02/24/2022 22:29:40 - INFO - codeparrot_training - Step 16144: {'lr': 0.00040031433094293167, 'samples': 8266240, 'steps': 16144, 'loss/train': 1.6556278467178345} +02/24/2022 22:29:43 - INFO - codeparrot_training - Step 16145: {'lr': 0.0004003012561065761, 'samples': 8266752, 'steps': 16145, 'loss/train': 2.8677425384521484} +02/24/2022 22:29:49 - INFO - codeparrot_training - Step 16146: {'lr': 0.0004002881806263776, 'samples': 8267264, 'steps': 16146, 'loss/train': 3.008291006088257} +02/24/2022 22:29:52 - INFO - codeparrot_training - Step 16147: {'lr': 0.0004002751045023924, 'samples': 8267776, 'steps': 16147, 'loss/train': 2.081298828125} +02/24/2022 22:29:58 - INFO - codeparrot_training - Step 16148: {'lr': 0.00040026202773467623, 'samples': 8268288, 'steps': 16148, 'loss/train': 3.3766801357269287} +02/24/2022 22:30:01 - INFO - codeparrot_training - Step 16149: {'lr': 0.00040024895032328536, 'samples': 8268800, 'steps': 16149, 'loss/train': 0.7873048186302185} +02/24/2022 22:30:07 - INFO - codeparrot_training - Step 16150: {'lr': 0.0004002358722682756, 'samples': 8269312, 'steps': 16150, 'loss/train': 2.3426740169525146} +02/24/2022 22:30:11 - INFO - codeparrot_training - Step 16151: {'lr': 0.00040022279356970316, 'samples': 8269824, 'steps': 16151, 'loss/train': 1.2303619384765625} +02/24/2022 22:30:16 - INFO - codeparrot_training - Step 16152: {'lr': 0.0004002097142276239, 'samples': 8270336, 'steps': 16152, 'loss/train': 4.20693302154541} +02/24/2022 22:30:20 - INFO - codeparrot_training - Step 16153: {'lr': 0.00040019663424209397, 'samples': 8270848, 'steps': 16153, 'loss/train': 1.566499948501587} +02/24/2022 22:30:25 - INFO - codeparrot_training - Step 16154: {'lr': 0.0004001835536131693, 'samples': 8271360, 'steps': 16154, 'loss/train': 3.4570751190185547} +02/24/2022 22:30:29 - INFO - codeparrot_training - Step 16155: {'lr': 0.00040017047234090596, 'samples': 8271872, 'steps': 16155, 'loss/train': 2.058189630508423} +02/24/2022 22:30:34 - INFO - codeparrot_training - Step 16156: {'lr': 0.00040015739042536, 'samples': 8272384, 'steps': 16156, 'loss/train': 2.1488616466522217} +02/24/2022 22:30:38 - INFO - codeparrot_training - Step 16157: {'lr': 0.00040014430786658754, 'samples': 8272896, 'steps': 16157, 'loss/train': 1.2444789409637451} +02/24/2022 22:30:43 - INFO - codeparrot_training - Step 16158: {'lr': 0.0004001312246646446, 'samples': 8273408, 'steps': 16158, 'loss/train': 0.3768030107021332} +02/24/2022 22:30:47 - INFO - codeparrot_training - Step 16159: {'lr': 0.000400118140819587, 'samples': 8273920, 'steps': 16159, 'loss/train': 2.8664608001708984} +02/24/2022 22:30:53 - INFO - codeparrot_training - Step 16160: {'lr': 0.00040010505633147106, 'samples': 8274432, 'steps': 16160, 'loss/train': 2.529768228530884} +02/24/2022 22:30:57 - INFO - codeparrot_training - Step 16161: {'lr': 0.0004000919712003526, 'samples': 8274944, 'steps': 16161, 'loss/train': 2.0496413707733154} +02/24/2022 22:31:02 - INFO - codeparrot_training - Step 16162: {'lr': 0.0004000788854262879, 'samples': 8275456, 'steps': 16162, 'loss/train': 2.005619764328003} +02/24/2022 22:31:06 - INFO - codeparrot_training - Step 16163: {'lr': 0.00040006579900933294, 'samples': 8275968, 'steps': 16163, 'loss/train': 3.9936861991882324} +02/24/2022 22:31:11 - INFO - codeparrot_training - Step 16164: {'lr': 0.00040005271194954367, 'samples': 8276480, 'steps': 16164, 'loss/train': 2.508627414703369} +02/24/2022 22:31:15 - INFO - codeparrot_training - Step 16165: {'lr': 0.00040003962424697625, 'samples': 8276992, 'steps': 16165, 'loss/train': 2.586475133895874} +02/24/2022 22:31:20 - INFO - codeparrot_training - Step 16166: {'lr': 0.0004000265359016867, 'samples': 8277504, 'steps': 16166, 'loss/train': 1.5947999954223633} +02/24/2022 22:31:24 - INFO - codeparrot_training - Step 16167: {'lr': 0.0004000134469137312, 'samples': 8278016, 'steps': 16167, 'loss/train': 2.631283760070801} +02/24/2022 22:31:30 - INFO - codeparrot_training - Step 16168: {'lr': 0.00040000035728316564, 'samples': 8278528, 'steps': 16168, 'loss/train': 1.6275321245193481} +02/24/2022 22:31:33 - INFO - codeparrot_training - Step 16169: {'lr': 0.0003999872670100462, 'samples': 8279040, 'steps': 16169, 'loss/train': 2.1954967975616455} +02/24/2022 22:31:40 - INFO - codeparrot_training - Step 16170: {'lr': 0.000399974176094429, 'samples': 8279552, 'steps': 16170, 'loss/train': 2.0370078086853027} +02/24/2022 22:31:43 - INFO - codeparrot_training - Step 16171: {'lr': 0.00039996108453637, 'samples': 8280064, 'steps': 16171, 'loss/train': 2.3156166076660156} +02/24/2022 22:31:48 - INFO - codeparrot_training - Step 16172: {'lr': 0.0003999479923359253, 'samples': 8280576, 'steps': 16172, 'loss/train': 1.9278333187103271} +02/24/2022 22:31:52 - INFO - codeparrot_training - Step 16173: {'lr': 0.00039993489949315103, 'samples': 8281088, 'steps': 16173, 'loss/train': 1.853058099746704} +02/24/2022 22:31:57 - INFO - codeparrot_training - Step 16174: {'lr': 0.0003999218060081032, 'samples': 8281600, 'steps': 16174, 'loss/train': 2.7687489986419678} +02/24/2022 22:32:01 - INFO - codeparrot_training - Step 16175: {'lr': 0.0003999087118808381, 'samples': 8282112, 'steps': 16175, 'loss/train': 2.3673040866851807} +02/24/2022 22:32:07 - INFO - codeparrot_training - Step 16176: {'lr': 0.0003998956171114116, 'samples': 8282624, 'steps': 16176, 'loss/train': 1.9622160196304321} +02/24/2022 22:32:10 - INFO - codeparrot_training - Step 16177: {'lr': 0.0003998825216998799, 'samples': 8283136, 'steps': 16177, 'loss/train': 1.7406855821609497} +02/24/2022 22:32:15 - INFO - codeparrot_training - Step 16178: {'lr': 0.00039986942564629904, 'samples': 8283648, 'steps': 16178, 'loss/train': 1.0941768884658813} +02/24/2022 22:32:19 - INFO - codeparrot_training - Step 16179: {'lr': 0.0003998563289507251, 'samples': 8284160, 'steps': 16179, 'loss/train': 1.4929808378219604} +02/24/2022 22:32:25 - INFO - codeparrot_training - Step 16180: {'lr': 0.0003998432316132143, 'samples': 8284672, 'steps': 16180, 'loss/train': 2.450066089630127} +02/24/2022 22:32:28 - INFO - codeparrot_training - Step 16181: {'lr': 0.0003998301336338227, 'samples': 8285184, 'steps': 16181, 'loss/train': 1.1138701438903809} +02/24/2022 22:32:34 - INFO - codeparrot_training - Step 16182: {'lr': 0.0003998170350126064, 'samples': 8285696, 'steps': 16182, 'loss/train': 1.7946586608886719} +02/24/2022 22:32:37 - INFO - codeparrot_training - Step 16183: {'lr': 0.0003998039357496214, 'samples': 8286208, 'steps': 16183, 'loss/train': 1.4287916421890259} +02/24/2022 22:32:43 - INFO - codeparrot_training - Step 16184: {'lr': 0.000399790835844924, 'samples': 8286720, 'steps': 16184, 'loss/train': 1.0363694429397583} +02/24/2022 22:32:46 - INFO - codeparrot_training - Step 16185: {'lr': 0.00039977773529857016, 'samples': 8287232, 'steps': 16185, 'loss/train': 4.024621963500977} +02/24/2022 22:32:53 - INFO - codeparrot_training - Step 16186: {'lr': 0.00039976463411061606, 'samples': 8287744, 'steps': 16186, 'loss/train': 1.800007939338684} +02/24/2022 22:32:56 - INFO - codeparrot_training - Step 16187: {'lr': 0.00039975153228111784, 'samples': 8288256, 'steps': 16187, 'loss/train': 1.5483638048171997} +02/24/2022 22:33:02 - INFO - codeparrot_training - Step 16188: {'lr': 0.0003997384298101316, 'samples': 8288768, 'steps': 16188, 'loss/train': 1.9315979480743408} +02/24/2022 22:33:05 - INFO - codeparrot_training - Step 16189: {'lr': 0.0003997253266977135, 'samples': 8289280, 'steps': 16189, 'loss/train': 2.8159971237182617} +02/24/2022 22:33:11 - INFO - codeparrot_training - Step 16190: {'lr': 0.0003997122229439196, 'samples': 8289792, 'steps': 16190, 'loss/train': 1.053288459777832} +02/24/2022 22:33:14 - INFO - codeparrot_training - Step 16191: {'lr': 0.00039969911854880613, 'samples': 8290304, 'steps': 16191, 'loss/train': 2.3093559741973877} +02/24/2022 22:33:20 - INFO - codeparrot_training - Step 16192: {'lr': 0.0003996860135124292, 'samples': 8290816, 'steps': 16192, 'loss/train': 3.026240825653076} +02/24/2022 22:33:23 - INFO - codeparrot_training - Step 16193: {'lr': 0.00039967290783484485, 'samples': 8291328, 'steps': 16193, 'loss/train': 1.3618230819702148} +02/24/2022 22:33:29 - INFO - codeparrot_training - Step 16194: {'lr': 0.00039965980151610925, 'samples': 8291840, 'steps': 16194, 'loss/train': 0.6006201505661011} +02/24/2022 22:33:32 - INFO - codeparrot_training - Step 16195: {'lr': 0.0003996466945562787, 'samples': 8292352, 'steps': 16195, 'loss/train': 2.187267303466797} +02/24/2022 22:33:38 - INFO - codeparrot_training - Step 16196: {'lr': 0.00039963358695540907, 'samples': 8292864, 'steps': 16196, 'loss/train': 1.5534999370574951} +02/24/2022 22:33:42 - INFO - codeparrot_training - Step 16197: {'lr': 0.00039962047871355686, 'samples': 8293376, 'steps': 16197, 'loss/train': 1.945918083190918} +02/24/2022 22:33:47 - INFO - codeparrot_training - Step 16198: {'lr': 0.00039960736983077783, 'samples': 8293888, 'steps': 16198, 'loss/train': 2.2027645111083984} +02/24/2022 22:33:51 - INFO - codeparrot_training - Step 16199: {'lr': 0.0003995942603071285, 'samples': 8294400, 'steps': 16199, 'loss/train': 2.0365982055664062} +02/24/2022 22:33:56 - INFO - codeparrot_training - Step 16200: {'lr': 0.0003995811501426648, 'samples': 8294912, 'steps': 16200, 'loss/train': 1.5766929388046265} +02/24/2022 22:34:00 - INFO - codeparrot_training - Step 16201: {'lr': 0.0003995680393374429, 'samples': 8295424, 'steps': 16201, 'loss/train': 2.29494309425354} +02/24/2022 22:34:05 - INFO - codeparrot_training - Step 16202: {'lr': 0.00039955492789151904, 'samples': 8295936, 'steps': 16202, 'loss/train': 1.6093108654022217} +02/24/2022 22:34:09 - INFO - codeparrot_training - Step 16203: {'lr': 0.0003995418158049494, 'samples': 8296448, 'steps': 16203, 'loss/train': 1.9988603591918945} +02/24/2022 22:34:14 - INFO - codeparrot_training - Step 16204: {'lr': 0.0003995287030777901, 'samples': 8296960, 'steps': 16204, 'loss/train': 2.9860129356384277} +02/24/2022 22:34:18 - INFO - codeparrot_training - Step 16205: {'lr': 0.0003995155897100973, 'samples': 8297472, 'steps': 16205, 'loss/train': 1.9104325771331787} +02/24/2022 22:34:24 - INFO - codeparrot_training - Step 16206: {'lr': 0.0003995024757019272, 'samples': 8297984, 'steps': 16206, 'loss/train': 2.1127588748931885} +02/24/2022 22:34:27 - INFO - codeparrot_training - Step 16207: {'lr': 0.00039948936105333593, 'samples': 8298496, 'steps': 16207, 'loss/train': 0.9390119314193726} +02/24/2022 22:34:33 - INFO - codeparrot_training - Step 16208: {'lr': 0.0003994762457643797, 'samples': 8299008, 'steps': 16208, 'loss/train': 2.228416681289673} +02/24/2022 22:34:36 - INFO - codeparrot_training - Step 16209: {'lr': 0.0003994631298351148, 'samples': 8299520, 'steps': 16209, 'loss/train': 2.1857070922851562} +02/24/2022 22:34:42 - INFO - codeparrot_training - Step 16210: {'lr': 0.0003994500132655972, 'samples': 8300032, 'steps': 16210, 'loss/train': 1.95086669921875} +02/24/2022 22:34:45 - INFO - codeparrot_training - Step 16211: {'lr': 0.0003994368960558832, 'samples': 8300544, 'steps': 16211, 'loss/train': 2.3637008666992188} +02/24/2022 22:34:51 - INFO - codeparrot_training - Step 16212: {'lr': 0.0003994237782060291, 'samples': 8301056, 'steps': 16212, 'loss/train': 2.4839963912963867} +02/24/2022 22:34:54 - INFO - codeparrot_training - Step 16213: {'lr': 0.00039941065971609084, 'samples': 8301568, 'steps': 16213, 'loss/train': 0.7934821844100952} +02/24/2022 22:35:00 - INFO - codeparrot_training - Step 16214: {'lr': 0.00039939754058612487, 'samples': 8302080, 'steps': 16214, 'loss/train': 2.159456253051758} +02/24/2022 22:35:06 - INFO - codeparrot_training - Step 16215: {'lr': 0.0003993844208161872, 'samples': 8302592, 'steps': 16215, 'loss/train': 2.200657367706299} +02/24/2022 22:35:09 - INFO - codeparrot_training - Step 16216: {'lr': 0.0003993713004063341, 'samples': 8303104, 'steps': 16216, 'loss/train': 0.35117772221565247} +02/24/2022 22:35:15 - INFO - codeparrot_training - Step 16217: {'lr': 0.0003993581793566219, 'samples': 8303616, 'steps': 16217, 'loss/train': 1.3549160957336426} +02/24/2022 22:35:19 - INFO - codeparrot_training - Step 16218: {'lr': 0.00039934505766710656, 'samples': 8304128, 'steps': 16218, 'loss/train': 1.5180988311767578} +02/24/2022 22:35:24 - INFO - codeparrot_training - Step 16219: {'lr': 0.0003993319353378445, 'samples': 8304640, 'steps': 16219, 'loss/train': 2.35263991355896} +02/24/2022 22:35:28 - INFO - codeparrot_training - Step 16220: {'lr': 0.0003993188123688918, 'samples': 8305152, 'steps': 16220, 'loss/train': 1.7665669918060303} +02/24/2022 22:35:33 - INFO - codeparrot_training - Step 16221: {'lr': 0.00039930568876030473, 'samples': 8305664, 'steps': 16221, 'loss/train': 0.6001395583152771} +02/24/2022 22:35:37 - INFO - codeparrot_training - Step 16222: {'lr': 0.0003992925645121395, 'samples': 8306176, 'steps': 16222, 'loss/train': 2.160346269607544} +02/24/2022 22:35:42 - INFO - codeparrot_training - Step 16223: {'lr': 0.00039927943962445234, 'samples': 8306688, 'steps': 16223, 'loss/train': 2.4018616676330566} +02/24/2022 22:35:46 - INFO - codeparrot_training - Step 16224: {'lr': 0.0003992663140972994, 'samples': 8307200, 'steps': 16224, 'loss/train': 2.0112392902374268} +02/24/2022 22:35:51 - INFO - codeparrot_training - Step 16225: {'lr': 0.0003992531879307371, 'samples': 8307712, 'steps': 16225, 'loss/train': 2.5191287994384766} +02/24/2022 22:35:55 - INFO - codeparrot_training - Step 16226: {'lr': 0.0003992400611248214, 'samples': 8308224, 'steps': 16226, 'loss/train': 2.7199790477752686} +02/24/2022 22:36:00 - INFO - codeparrot_training - Step 16227: {'lr': 0.0003992269336796087, 'samples': 8308736, 'steps': 16227, 'loss/train': 1.5472825765609741} +02/24/2022 22:36:04 - INFO - codeparrot_training - Step 16228: {'lr': 0.0003992138055951552, 'samples': 8309248, 'steps': 16228, 'loss/train': 1.000227928161621} +02/24/2022 22:36:09 - INFO - codeparrot_training - Step 16229: {'lr': 0.00039920067687151717, 'samples': 8309760, 'steps': 16229, 'loss/train': 1.5652979612350464} +02/24/2022 22:36:13 - INFO - codeparrot_training - Step 16230: {'lr': 0.0003991875475087508, 'samples': 8310272, 'steps': 16230, 'loss/train': 1.9046932458877563} +02/24/2022 22:36:19 - INFO - codeparrot_training - Step 16231: {'lr': 0.00039917441750691237, 'samples': 8310784, 'steps': 16231, 'loss/train': 2.010688066482544} +02/24/2022 22:36:23 - INFO - codeparrot_training - Step 16232: {'lr': 0.0003991612868660581, 'samples': 8311296, 'steps': 16232, 'loss/train': 1.3565386533737183} +02/24/2022 22:36:29 - INFO - codeparrot_training - Step 16233: {'lr': 0.0003991481555862442, 'samples': 8311808, 'steps': 16233, 'loss/train': 2.3106303215026855} +02/24/2022 22:36:32 - INFO - codeparrot_training - Step 16234: {'lr': 0.00039913502366752704, 'samples': 8312320, 'steps': 16234, 'loss/train': 0.16075341403484344} +02/24/2022 22:36:38 - INFO - codeparrot_training - Step 16235: {'lr': 0.0003991218911099627, 'samples': 8312832, 'steps': 16235, 'loss/train': 1.3604589700698853} +02/24/2022 22:36:41 - INFO - codeparrot_training - Step 16236: {'lr': 0.0003991087579136076, 'samples': 8313344, 'steps': 16236, 'loss/train': 2.4689462184906006} +02/24/2022 22:36:47 - INFO - codeparrot_training - Step 16237: {'lr': 0.00039909562407851784, 'samples': 8313856, 'steps': 16237, 'loss/train': 1.8585337400436401} +02/24/2022 22:36:50 - INFO - codeparrot_training - Step 16238: {'lr': 0.0003990824896047498, 'samples': 8314368, 'steps': 16238, 'loss/train': 2.2633919715881348} +02/24/2022 22:36:56 - INFO - codeparrot_training - Step 16239: {'lr': 0.00039906935449235983, 'samples': 8314880, 'steps': 16239, 'loss/train': 2.590285062789917} +02/24/2022 22:36:59 - INFO - codeparrot_training - Step 16240: {'lr': 0.00039905621874140396, 'samples': 8315392, 'steps': 16240, 'loss/train': 0.5288260579109192} +02/24/2022 22:37:06 - INFO - codeparrot_training - Step 16241: {'lr': 0.00039904308235193866, 'samples': 8315904, 'steps': 16241, 'loss/train': 2.1751840114593506} +02/24/2022 22:37:09 - INFO - codeparrot_training - Step 16242: {'lr': 0.00039902994532402004, 'samples': 8316416, 'steps': 16242, 'loss/train': 2.2145566940307617} +02/24/2022 22:37:15 - INFO - codeparrot_training - Step 16243: {'lr': 0.0003990168076577045, 'samples': 8316928, 'steps': 16243, 'loss/train': 0.974034309387207} +02/24/2022 22:37:18 - INFO - codeparrot_training - Step 16244: {'lr': 0.00039900366935304824, 'samples': 8317440, 'steps': 16244, 'loss/train': 1.8895635604858398} +02/24/2022 22:37:24 - INFO - codeparrot_training - Step 16245: {'lr': 0.00039899053041010765, 'samples': 8317952, 'steps': 16245, 'loss/train': 1.9377793073654175} +02/24/2022 22:37:27 - INFO - codeparrot_training - Step 16246: {'lr': 0.00039897739082893883, 'samples': 8318464, 'steps': 16246, 'loss/train': 2.3315436840057373} +02/24/2022 22:37:33 - INFO - codeparrot_training - Step 16247: {'lr': 0.0003989642506095983, 'samples': 8318976, 'steps': 16247, 'loss/train': 1.5239439010620117} +02/24/2022 22:37:36 - INFO - codeparrot_training - Step 16248: {'lr': 0.0003989511097521421, 'samples': 8319488, 'steps': 16248, 'loss/train': 1.4171808958053589} +02/24/2022 22:37:42 - INFO - codeparrot_training - Step 16249: {'lr': 0.00039893796825662676, 'samples': 8320000, 'steps': 16249, 'loss/train': 3.3668322563171387} +02/24/2022 22:37:45 - INFO - codeparrot_training - Step 16250: {'lr': 0.0003989248261231084, 'samples': 8320512, 'steps': 16250, 'loss/train': 2.0585360527038574} +02/24/2022 22:37:52 - INFO - codeparrot_training - Step 16251: {'lr': 0.0003989116833516433, 'samples': 8321024, 'steps': 16251, 'loss/train': 1.05678391456604} +02/24/2022 22:37:55 - INFO - codeparrot_training - Step 16252: {'lr': 0.000398898539942288, 'samples': 8321536, 'steps': 16252, 'loss/train': 1.8924137353897095} +02/24/2022 22:38:00 - INFO - codeparrot_training - Step 16253: {'lr': 0.0003988853958950984, 'samples': 8322048, 'steps': 16253, 'loss/train': 1.5327723026275635} +02/24/2022 22:38:04 - INFO - codeparrot_training - Step 16254: {'lr': 0.00039887225121013124, 'samples': 8322560, 'steps': 16254, 'loss/train': 2.595731735229492} +02/24/2022 22:38:09 - INFO - codeparrot_training - Step 16255: {'lr': 0.0003988591058874426, 'samples': 8323072, 'steps': 16255, 'loss/train': 1.7835301160812378} +02/24/2022 22:38:13 - INFO - codeparrot_training - Step 16256: {'lr': 0.00039884595992708877, 'samples': 8323584, 'steps': 16256, 'loss/train': 2.2080979347229004} +02/24/2022 22:38:19 - INFO - codeparrot_training - Step 16257: {'lr': 0.0003988328133291261, 'samples': 8324096, 'steps': 16257, 'loss/train': 1.7378042936325073} +02/24/2022 22:38:22 - INFO - codeparrot_training - Step 16258: {'lr': 0.000398819666093611, 'samples': 8324608, 'steps': 16258, 'loss/train': 0.9707382917404175} +02/24/2022 22:38:28 - INFO - codeparrot_training - Step 16259: {'lr': 0.0003988065182205996, 'samples': 8325120, 'steps': 16259, 'loss/train': 2.9446816444396973} +02/24/2022 22:38:31 - INFO - codeparrot_training - Step 16260: {'lr': 0.0003987933697101484, 'samples': 8325632, 'steps': 16260, 'loss/train': 1.6062431335449219} +02/24/2022 22:38:37 - INFO - codeparrot_training - Step 16261: {'lr': 0.0003987802205623136, 'samples': 8326144, 'steps': 16261, 'loss/train': 2.2242660522460938} +02/24/2022 22:38:41 - INFO - codeparrot_training - Step 16262: {'lr': 0.0003987670707771516, 'samples': 8326656, 'steps': 16262, 'loss/train': 3.5205233097076416} +02/24/2022 22:38:46 - INFO - codeparrot_training - Step 16263: {'lr': 0.0003987539203547187, 'samples': 8327168, 'steps': 16263, 'loss/train': 0.9441400766372681} +02/24/2022 22:38:50 - INFO - codeparrot_training - Step 16264: {'lr': 0.00039874076929507124, 'samples': 8327680, 'steps': 16264, 'loss/train': 3.093445301055908} +02/24/2022 22:38:55 - INFO - codeparrot_training - Step 16265: {'lr': 0.0003987276175982656, 'samples': 8328192, 'steps': 16265, 'loss/train': 2.159552812576294} +02/24/2022 22:38:59 - INFO - codeparrot_training - Step 16266: {'lr': 0.00039871446526435806, 'samples': 8328704, 'steps': 16266, 'loss/train': 1.5061544179916382} +02/24/2022 22:39:04 - INFO - codeparrot_training - Step 16267: {'lr': 0.00039870131229340495, 'samples': 8329216, 'steps': 16267, 'loss/train': 1.717469334602356} +02/24/2022 22:39:08 - INFO - codeparrot_training - Step 16268: {'lr': 0.00039868815868546257, 'samples': 8329728, 'steps': 16268, 'loss/train': 1.8577830791473389} +02/24/2022 22:39:13 - INFO - codeparrot_training - Step 16269: {'lr': 0.00039867500444058747, 'samples': 8330240, 'steps': 16269, 'loss/train': 1.3528252840042114} +02/24/2022 22:39:17 - INFO - codeparrot_training - Step 16270: {'lr': 0.0003986618495588358, 'samples': 8330752, 'steps': 16270, 'loss/train': 0.8437669277191162} +02/24/2022 22:39:23 - INFO - codeparrot_training - Step 16271: {'lr': 0.00039864869404026394, 'samples': 8331264, 'steps': 16271, 'loss/train': 0.5787619948387146} +02/24/2022 22:39:26 - INFO - codeparrot_training - Step 16272: {'lr': 0.0003986355378849283, 'samples': 8331776, 'steps': 16272, 'loss/train': 0.3689199388027191} +02/24/2022 22:39:32 - INFO - codeparrot_training - Step 16273: {'lr': 0.00039862238109288523, 'samples': 8332288, 'steps': 16273, 'loss/train': 2.3050448894500732} +02/24/2022 22:39:35 - INFO - codeparrot_training - Step 16274: {'lr': 0.0003986092236641911, 'samples': 8332800, 'steps': 16274, 'loss/train': 2.035551071166992} +02/24/2022 22:39:41 - INFO - codeparrot_training - Step 16275: {'lr': 0.00039859606559890215, 'samples': 8333312, 'steps': 16275, 'loss/train': 1.84000563621521} +02/24/2022 22:39:44 - INFO - codeparrot_training - Step 16276: {'lr': 0.0003985829068970749, 'samples': 8333824, 'steps': 16276, 'loss/train': 1.827824592590332} +02/24/2022 22:39:50 - INFO - codeparrot_training - Step 16277: {'lr': 0.00039856974755876563, 'samples': 8334336, 'steps': 16277, 'loss/train': 0.7145095467567444} +02/24/2022 22:39:54 - INFO - codeparrot_training - Step 16278: {'lr': 0.0003985565875840308, 'samples': 8334848, 'steps': 16278, 'loss/train': 0.6022953987121582} +02/24/2022 22:39:59 - INFO - codeparrot_training - Step 16279: {'lr': 0.0003985434269729267, 'samples': 8335360, 'steps': 16279, 'loss/train': 2.246063470840454} +02/24/2022 22:40:03 - INFO - codeparrot_training - Step 16280: {'lr': 0.00039853026572550965, 'samples': 8335872, 'steps': 16280, 'loss/train': 3.149411916732788} +02/24/2022 22:40:08 - INFO - codeparrot_training - Step 16281: {'lr': 0.00039851710384183615, 'samples': 8336384, 'steps': 16281, 'loss/train': 1.8245059251785278} +02/24/2022 22:40:12 - INFO - codeparrot_training - Step 16282: {'lr': 0.0003985039413219626, 'samples': 8336896, 'steps': 16282, 'loss/train': 1.8205175399780273} +02/24/2022 22:40:17 - INFO - codeparrot_training - Step 16283: {'lr': 0.0003984907781659452, 'samples': 8337408, 'steps': 16283, 'loss/train': 1.714830994606018} +02/24/2022 22:40:21 - INFO - codeparrot_training - Step 16284: {'lr': 0.00039847761437384054, 'samples': 8337920, 'steps': 16284, 'loss/train': 2.2593464851379395} +02/24/2022 22:40:26 - INFO - codeparrot_training - Step 16285: {'lr': 0.0003984644499457049, 'samples': 8338432, 'steps': 16285, 'loss/train': 1.9405473470687866} +02/24/2022 22:40:30 - INFO - codeparrot_training - Step 16286: {'lr': 0.0003984512848815948, 'samples': 8338944, 'steps': 16286, 'loss/train': 0.8835693001747131} +02/24/2022 22:40:36 - INFO - codeparrot_training - Step 16287: {'lr': 0.00039843811918156635, 'samples': 8339456, 'steps': 16287, 'loss/train': 1.8923888206481934} +02/24/2022 22:40:39 - INFO - codeparrot_training - Step 16288: {'lr': 0.0003984249528456762, 'samples': 8339968, 'steps': 16288, 'loss/train': 1.098946213722229} +02/24/2022 22:40:45 - INFO - codeparrot_training - Step 16289: {'lr': 0.00039841178587398074, 'samples': 8340480, 'steps': 16289, 'loss/train': 2.3656797409057617} +02/24/2022 22:40:48 - INFO - codeparrot_training - Step 16290: {'lr': 0.0003983986182665362, 'samples': 8340992, 'steps': 16290, 'loss/train': 1.1832685470581055} +02/24/2022 22:40:54 - INFO - codeparrot_training - Step 16291: {'lr': 0.00039838545002339926, 'samples': 8341504, 'steps': 16291, 'loss/train': 2.0258514881134033} +02/24/2022 22:40:57 - INFO - codeparrot_training - Step 16292: {'lr': 0.0003983722811446261, 'samples': 8342016, 'steps': 16292, 'loss/train': 1.7101701498031616} +02/24/2022 22:41:03 - INFO - codeparrot_training - Step 16293: {'lr': 0.00039835911163027315, 'samples': 8342528, 'steps': 16293, 'loss/train': 2.7284963130950928} +02/24/2022 22:41:06 - INFO - codeparrot_training - Step 16294: {'lr': 0.00039834594148039693, 'samples': 8343040, 'steps': 16294, 'loss/train': 1.4164475202560425} +02/24/2022 22:41:12 - INFO - codeparrot_training - Step 16295: {'lr': 0.0003983327706950538, 'samples': 8343552, 'steps': 16295, 'loss/train': 1.9087039232254028} +02/24/2022 22:41:15 - INFO - codeparrot_training - Step 16296: {'lr': 0.00039831959927430017, 'samples': 8344064, 'steps': 16296, 'loss/train': 2.2220218181610107} +02/24/2022 22:41:21 - INFO - codeparrot_training - Step 16297: {'lr': 0.00039830642721819254, 'samples': 8344576, 'steps': 16297, 'loss/train': 3.1141719818115234} +02/24/2022 22:41:24 - INFO - codeparrot_training - Step 16298: {'lr': 0.0003982932545267872, 'samples': 8345088, 'steps': 16298, 'loss/train': 2.0572309494018555} +02/24/2022 22:41:31 - INFO - codeparrot_training - Step 16299: {'lr': 0.00039828008120014057, 'samples': 8345600, 'steps': 16299, 'loss/train': 2.0216176509857178} +02/24/2022 22:41:34 - INFO - codeparrot_training - Step 16300: {'lr': 0.00039826690723830926, 'samples': 8346112, 'steps': 16300, 'loss/train': 1.9893757104873657} +02/24/2022 22:41:40 - INFO - codeparrot_training - Step 16301: {'lr': 0.00039825373264134955, 'samples': 8346624, 'steps': 16301, 'loss/train': 1.7790496349334717} +02/24/2022 22:41:43 - INFO - codeparrot_training - Step 16302: {'lr': 0.00039824055740931804, 'samples': 8347136, 'steps': 16302, 'loss/train': 2.3769853115081787} +02/24/2022 22:41:49 - INFO - codeparrot_training - Step 16303: {'lr': 0.0003982273815422709, 'samples': 8347648, 'steps': 16303, 'loss/train': 1.9558449983596802} +02/24/2022 22:41:52 - INFO - codeparrot_training - Step 16304: {'lr': 0.00039821420504026486, 'samples': 8348160, 'steps': 16304, 'loss/train': 1.4359934329986572} +02/24/2022 22:41:58 - INFO - codeparrot_training - Step 16305: {'lr': 0.0003982010279033561, 'samples': 8348672, 'steps': 16305, 'loss/train': 2.1049954891204834} +02/24/2022 22:42:01 - INFO - codeparrot_training - Step 16306: {'lr': 0.0003981878501316013, 'samples': 8349184, 'steps': 16306, 'loss/train': 2.4715776443481445} +02/24/2022 22:42:07 - INFO - codeparrot_training - Step 16307: {'lr': 0.0003981746717250567, 'samples': 8349696, 'steps': 16307, 'loss/train': 2.257695436477661} +02/24/2022 22:42:10 - INFO - codeparrot_training - Step 16308: {'lr': 0.000398161492683779, 'samples': 8350208, 'steps': 16308, 'loss/train': 1.9007292985916138} +02/24/2022 22:42:17 - INFO - codeparrot_training - Step 16309: {'lr': 0.0003981483130078244, 'samples': 8350720, 'steps': 16309, 'loss/train': 2.6354665756225586} +02/24/2022 22:42:20 - INFO - codeparrot_training - Step 16310: {'lr': 0.0003981351326972495, 'samples': 8351232, 'steps': 16310, 'loss/train': 1.637117862701416} +02/24/2022 22:42:26 - INFO - codeparrot_training - Step 16311: {'lr': 0.00039812195175211075, 'samples': 8351744, 'steps': 16311, 'loss/train': 2.451132297515869} +02/24/2022 22:42:30 - INFO - codeparrot_training - Step 16312: {'lr': 0.0003981087701724645, 'samples': 8352256, 'steps': 16312, 'loss/train': 3.009580135345459} +02/24/2022 22:42:35 - INFO - codeparrot_training - Step 16313: {'lr': 0.00039809558795836743, 'samples': 8352768, 'steps': 16313, 'loss/train': 1.534165859222412} +02/24/2022 22:42:39 - INFO - codeparrot_training - Step 16314: {'lr': 0.00039808240510987584, 'samples': 8353280, 'steps': 16314, 'loss/train': 1.796891450881958} +02/24/2022 22:42:44 - INFO - codeparrot_training - Step 16315: {'lr': 0.0003980692216270462, 'samples': 8353792, 'steps': 16315, 'loss/train': 1.7552646398544312} +02/24/2022 22:42:47 - INFO - codeparrot_training - Step 16316: {'lr': 0.00039805603750993514, 'samples': 8354304, 'steps': 16316, 'loss/train': 1.9753177165985107} +02/24/2022 22:42:53 - INFO - codeparrot_training - Step 16317: {'lr': 0.0003980428527585989, 'samples': 8354816, 'steps': 16317, 'loss/train': 1.973376989364624} +02/24/2022 22:42:57 - INFO - codeparrot_training - Step 16318: {'lr': 0.0003980296673730942, 'samples': 8355328, 'steps': 16318, 'loss/train': 1.7562350034713745} +02/24/2022 22:43:02 - INFO - codeparrot_training - Step 16319: {'lr': 0.0003980164813534773, 'samples': 8355840, 'steps': 16319, 'loss/train': 2.663815975189209} +02/24/2022 22:43:06 - INFO - codeparrot_training - Step 16320: {'lr': 0.0003980032946998049, 'samples': 8356352, 'steps': 16320, 'loss/train': 2.2288382053375244} +02/24/2022 22:43:11 - INFO - codeparrot_training - Step 16321: {'lr': 0.00039799010741213336, 'samples': 8356864, 'steps': 16321, 'loss/train': 1.9420993328094482} +02/24/2022 22:43:15 - INFO - codeparrot_training - Step 16322: {'lr': 0.0003979769194905192, 'samples': 8357376, 'steps': 16322, 'loss/train': 2.2489728927612305} +02/24/2022 22:43:20 - INFO - codeparrot_training - Step 16323: {'lr': 0.0003979637309350188, 'samples': 8357888, 'steps': 16323, 'loss/train': 0.9923384189605713} +02/24/2022 22:43:24 - INFO - codeparrot_training - Step 16324: {'lr': 0.0003979505417456889, 'samples': 8358400, 'steps': 16324, 'loss/train': 2.8191654682159424} +02/24/2022 22:43:30 - INFO - codeparrot_training - Step 16325: {'lr': 0.00039793735192258575, 'samples': 8358912, 'steps': 16325, 'loss/train': 2.2891783714294434} +02/24/2022 22:43:33 - INFO - codeparrot_training - Step 16326: {'lr': 0.000397924161465766, 'samples': 8359424, 'steps': 16326, 'loss/train': 2.5036966800689697} +02/24/2022 22:43:39 - INFO - codeparrot_training - Step 16327: {'lr': 0.0003979109703752861, 'samples': 8359936, 'steps': 16327, 'loss/train': 1.8108277320861816} +02/24/2022 22:43:42 - INFO - codeparrot_training - Step 16328: {'lr': 0.00039789777865120257, 'samples': 8360448, 'steps': 16328, 'loss/train': 2.2767579555511475} +02/24/2022 22:43:48 - INFO - codeparrot_training - Step 16329: {'lr': 0.00039788458629357195, 'samples': 8360960, 'steps': 16329, 'loss/train': 1.2777466773986816} +02/24/2022 22:43:51 - INFO - codeparrot_training - Step 16330: {'lr': 0.0003978713933024507, 'samples': 8361472, 'steps': 16330, 'loss/train': 1.5918738842010498} +02/24/2022 22:43:57 - INFO - codeparrot_training - Step 16331: {'lr': 0.0003978581996778954, 'samples': 8361984, 'steps': 16331, 'loss/train': 1.9526430368423462} +02/24/2022 22:44:00 - INFO - codeparrot_training - Step 16332: {'lr': 0.0003978450054199625, 'samples': 8362496, 'steps': 16332, 'loss/train': 1.2859208583831787} +02/24/2022 22:44:07 - INFO - codeparrot_training - Step 16333: {'lr': 0.0003978318105287085, 'samples': 8363008, 'steps': 16333, 'loss/train': 2.0269393920898438} +02/24/2022 22:44:10 - INFO - codeparrot_training - Step 16334: {'lr': 0.00039781861500419, 'samples': 8363520, 'steps': 16334, 'loss/train': 2.8162662982940674} +02/24/2022 22:44:16 - INFO - codeparrot_training - Step 16335: {'lr': 0.00039780541884646347, 'samples': 8364032, 'steps': 16335, 'loss/train': 2.0910496711730957} +02/24/2022 22:44:20 - INFO - codeparrot_training - Step 16336: {'lr': 0.0003977922220555855, 'samples': 8364544, 'steps': 16336, 'loss/train': 2.6627039909362793} +02/24/2022 22:44:25 - INFO - codeparrot_training - Step 16337: {'lr': 0.0003977790246316125, 'samples': 8365056, 'steps': 16337, 'loss/train': 1.933947205543518} +02/24/2022 22:44:28 - INFO - codeparrot_training - Step 16338: {'lr': 0.00039776582657460115, 'samples': 8365568, 'steps': 16338, 'loss/train': 2.351294755935669} +02/24/2022 22:44:34 - INFO - codeparrot_training - Step 16339: {'lr': 0.000397752627884608, 'samples': 8366080, 'steps': 16339, 'loss/train': 1.0967384576797485} +02/24/2022 22:44:38 - INFO - codeparrot_training - Step 16340: {'lr': 0.0003977394285616893, 'samples': 8366592, 'steps': 16340, 'loss/train': 2.1124072074890137} +02/24/2022 22:44:43 - INFO - codeparrot_training - Step 16341: {'lr': 0.000397726228605902, 'samples': 8367104, 'steps': 16341, 'loss/train': 2.3344578742980957} +02/24/2022 22:44:47 - INFO - codeparrot_training - Step 16342: {'lr': 0.00039771302801730235, 'samples': 8367616, 'steps': 16342, 'loss/train': 2.6118335723876953} +02/24/2022 22:44:52 - INFO - codeparrot_training - Step 16343: {'lr': 0.00039769982679594703, 'samples': 8368128, 'steps': 16343, 'loss/train': 1.5699336528778076} +02/24/2022 22:44:55 - INFO - codeparrot_training - Step 16344: {'lr': 0.0003976866249418925, 'samples': 8368640, 'steps': 16344, 'loss/train': 2.2197580337524414} +02/24/2022 22:45:02 - INFO - codeparrot_training - Step 16345: {'lr': 0.0003976734224551954, 'samples': 8369152, 'steps': 16345, 'loss/train': 2.216350793838501} +02/24/2022 22:45:05 - INFO - codeparrot_training - Step 16346: {'lr': 0.0003976602193359122, 'samples': 8369664, 'steps': 16346, 'loss/train': 1.852243423461914} +02/24/2022 22:45:11 - INFO - codeparrot_training - Step 16347: {'lr': 0.00039764701558409955, 'samples': 8370176, 'steps': 16347, 'loss/train': 1.8337823152542114} +02/24/2022 22:45:14 - INFO - codeparrot_training - Step 16348: {'lr': 0.000397633811199814, 'samples': 8370688, 'steps': 16348, 'loss/train': 1.6812801361083984} +02/24/2022 22:45:20 - INFO - codeparrot_training - Step 16349: {'lr': 0.000397620606183112, 'samples': 8371200, 'steps': 16349, 'loss/train': 2.2033019065856934} +02/24/2022 22:45:23 - INFO - codeparrot_training - Step 16350: {'lr': 0.00039760740053405033, 'samples': 8371712, 'steps': 16350, 'loss/train': 2.432379961013794} +02/24/2022 22:45:29 - INFO - codeparrot_training - Step 16351: {'lr': 0.00039759419425268526, 'samples': 8372224, 'steps': 16351, 'loss/train': 2.5131478309631348} +02/24/2022 22:45:32 - INFO - codeparrot_training - Step 16352: {'lr': 0.00039758098733907364, 'samples': 8372736, 'steps': 16352, 'loss/train': 1.3854668140411377} +02/24/2022 22:45:38 - INFO - codeparrot_training - Step 16353: {'lr': 0.00039756777979327193, 'samples': 8373248, 'steps': 16353, 'loss/train': 0.9563204050064087} +02/24/2022 22:45:41 - INFO - codeparrot_training - Step 16354: {'lr': 0.0003975545716153367, 'samples': 8373760, 'steps': 16354, 'loss/train': 2.311582565307617} +02/24/2022 22:45:48 - INFO - codeparrot_training - Step 16355: {'lr': 0.0003975413628053245, 'samples': 8374272, 'steps': 16355, 'loss/train': 2.278615951538086} +02/24/2022 22:45:51 - INFO - codeparrot_training - Step 16356: {'lr': 0.000397528153363292, 'samples': 8374784, 'steps': 16356, 'loss/train': 2.7309703826904297} +02/24/2022 22:45:57 - INFO - codeparrot_training - Step 16357: {'lr': 0.00039751494328929565, 'samples': 8375296, 'steps': 16357, 'loss/train': 0.7731642127037048} +02/24/2022 22:46:00 - INFO - codeparrot_training - Step 16358: {'lr': 0.00039750173258339225, 'samples': 8375808, 'steps': 16358, 'loss/train': 1.1144764423370361} +02/24/2022 22:46:04 - INFO - codeparrot_training - Step 16359: {'lr': 0.00039748852124563816, 'samples': 8376320, 'steps': 16359, 'loss/train': 1.6835076808929443} +02/24/2022 22:46:10 - INFO - codeparrot_training - Step 16360: {'lr': 0.0003974753092760901, 'samples': 8376832, 'steps': 16360, 'loss/train': 3.235959053039551} +02/24/2022 22:46:15 - INFO - codeparrot_training - Step 16361: {'lr': 0.00039746209667480473, 'samples': 8377344, 'steps': 16361, 'loss/train': 1.918245553970337} +02/24/2022 22:46:19 - INFO - codeparrot_training - Step 16362: {'lr': 0.00039744888344183846, 'samples': 8377856, 'steps': 16362, 'loss/train': 1.8856500387191772} +02/24/2022 22:46:24 - INFO - codeparrot_training - Step 16363: {'lr': 0.00039743566957724805, 'samples': 8378368, 'steps': 16363, 'loss/train': 1.7472566366195679} +02/24/2022 22:46:27 - INFO - codeparrot_training - Step 16364: {'lr': 0.00039742245508109, 'samples': 8378880, 'steps': 16364, 'loss/train': 1.9533767700195312} +02/24/2022 22:46:33 - INFO - codeparrot_training - Step 16365: {'lr': 0.000397409239953421, 'samples': 8379392, 'steps': 16365, 'loss/train': 1.1026155948638916} +02/24/2022 22:46:37 - INFO - codeparrot_training - Step 16366: {'lr': 0.00039739602419429755, 'samples': 8379904, 'steps': 16366, 'loss/train': 3.1734139919281006} +02/24/2022 22:46:42 - INFO - codeparrot_training - Step 16367: {'lr': 0.00039738280780377645, 'samples': 8380416, 'steps': 16367, 'loss/train': 2.220407724380493} +02/24/2022 22:46:46 - INFO - codeparrot_training - Step 16368: {'lr': 0.0003973695907819141, 'samples': 8380928, 'steps': 16368, 'loss/train': 1.9063369035720825} +02/24/2022 22:46:51 - INFO - codeparrot_training - Step 16369: {'lr': 0.0003973563731287673, 'samples': 8381440, 'steps': 16369, 'loss/train': 2.800663948059082} +02/24/2022 22:46:55 - INFO - codeparrot_training - Step 16370: {'lr': 0.00039734315484439255, 'samples': 8381952, 'steps': 16370, 'loss/train': 2.1165618896484375} +02/24/2022 22:47:01 - INFO - codeparrot_training - Step 16371: {'lr': 0.0003973299359288465, 'samples': 8382464, 'steps': 16371, 'loss/train': 2.0720815658569336} +02/24/2022 22:47:04 - INFO - codeparrot_training - Step 16372: {'lr': 0.0003973167163821858, 'samples': 8382976, 'steps': 16372, 'loss/train': 2.5066123008728027} +02/24/2022 22:47:10 - INFO - codeparrot_training - Step 16373: {'lr': 0.0003973034962044671, 'samples': 8383488, 'steps': 16373, 'loss/train': 0.39379721879959106} +02/24/2022 22:47:13 - INFO - codeparrot_training - Step 16374: {'lr': 0.00039729027539574696, 'samples': 8384000, 'steps': 16374, 'loss/train': 1.7836464643478394} +02/24/2022 22:47:19 - INFO - codeparrot_training - Step 16375: {'lr': 0.00039727705395608203, 'samples': 8384512, 'steps': 16375, 'loss/train': 1.5476856231689453} +02/24/2022 22:47:22 - INFO - codeparrot_training - Step 16376: {'lr': 0.00039726383188552907, 'samples': 8385024, 'steps': 16376, 'loss/train': 1.8719788789749146} +02/24/2022 22:47:28 - INFO - codeparrot_training - Step 16377: {'lr': 0.00039725060918414446, 'samples': 8385536, 'steps': 16377, 'loss/train': 2.55049991607666} +02/24/2022 22:47:31 - INFO - codeparrot_training - Step 16378: {'lr': 0.0003972373858519851, 'samples': 8386048, 'steps': 16378, 'loss/train': 2.5014894008636475} +02/24/2022 22:47:37 - INFO - codeparrot_training - Step 16379: {'lr': 0.00039722416188910754, 'samples': 8386560, 'steps': 16379, 'loss/train': 1.4758388996124268} +02/24/2022 22:47:40 - INFO - codeparrot_training - Step 16380: {'lr': 0.00039721093729556836, 'samples': 8387072, 'steps': 16380, 'loss/train': 1.4191232919692993} +02/24/2022 22:47:46 - INFO - codeparrot_training - Step 16381: {'lr': 0.0003971977120714243, 'samples': 8387584, 'steps': 16381, 'loss/train': 1.8881176710128784} +02/24/2022 22:47:50 - INFO - codeparrot_training - Step 16382: {'lr': 0.000397184486216732, 'samples': 8388096, 'steps': 16382, 'loss/train': 1.9483017921447754} +02/24/2022 22:47:55 - INFO - codeparrot_training - Step 16383: {'lr': 0.0003971712597315481, 'samples': 8388608, 'steps': 16383, 'loss/train': 1.5617362260818481} +02/24/2022 22:47:59 - INFO - codeparrot_training - Step 16384: {'lr': 0.0003971580326159292, 'samples': 8389120, 'steps': 16384, 'loss/train': 2.1248550415039062} +02/24/2022 22:48:04 - INFO - codeparrot_training - Step 16385: {'lr': 0.0003971448048699321, 'samples': 8389632, 'steps': 16385, 'loss/train': 2.250239133834839} +02/24/2022 22:48:08 - INFO - codeparrot_training - Step 16386: {'lr': 0.00039713157649361327, 'samples': 8390144, 'steps': 16386, 'loss/train': 2.240642786026001} +02/24/2022 22:48:13 - INFO - codeparrot_training - Step 16387: {'lr': 0.00039711834748702956, 'samples': 8390656, 'steps': 16387, 'loss/train': 2.2107701301574707} +02/24/2022 22:48:17 - INFO - codeparrot_training - Step 16388: {'lr': 0.0003971051178502375, 'samples': 8391168, 'steps': 16388, 'loss/train': 4.451059818267822} +02/24/2022 22:48:22 - INFO - codeparrot_training - Step 16389: {'lr': 0.00039709188758329394, 'samples': 8391680, 'steps': 16389, 'loss/train': 2.0136067867279053} +02/24/2022 22:48:26 - INFO - codeparrot_training - Step 16390: {'lr': 0.0003970786566862553, 'samples': 8392192, 'steps': 16390, 'loss/train': 1.6645402908325195} +02/24/2022 22:48:32 - INFO - codeparrot_training - Step 16391: {'lr': 0.00039706542515917853, 'samples': 8392704, 'steps': 16391, 'loss/train': 1.0072708129882812} +02/24/2022 22:48:35 - INFO - codeparrot_training - Step 16392: {'lr': 0.00039705219300212015, 'samples': 8393216, 'steps': 16392, 'loss/train': 1.4125769138336182} +02/24/2022 22:48:41 - INFO - codeparrot_training - Step 16393: {'lr': 0.00039703896021513684, 'samples': 8393728, 'steps': 16393, 'loss/train': 0.5865223407745361} +02/24/2022 22:48:44 - INFO - codeparrot_training - Step 16394: {'lr': 0.0003970257267982853, 'samples': 8394240, 'steps': 16394, 'loss/train': 1.7379567623138428} +02/24/2022 22:48:50 - INFO - codeparrot_training - Step 16395: {'lr': 0.0003970124927516222, 'samples': 8394752, 'steps': 16395, 'loss/train': 1.4561570882797241} +02/24/2022 22:48:53 - INFO - codeparrot_training - Step 16396: {'lr': 0.0003969992580752043, 'samples': 8395264, 'steps': 16396, 'loss/train': 1.6364994049072266} +02/24/2022 22:48:59 - INFO - codeparrot_training - Step 16397: {'lr': 0.00039698602276908826, 'samples': 8395776, 'steps': 16397, 'loss/train': 2.2902824878692627} +02/24/2022 22:49:02 - INFO - codeparrot_training - Step 16398: {'lr': 0.0003969727868333308, 'samples': 8396288, 'steps': 16398, 'loss/train': 3.540570020675659} +02/24/2022 22:49:08 - INFO - codeparrot_training - Step 16399: {'lr': 0.00039695955026798857, 'samples': 8396800, 'steps': 16399, 'loss/train': 2.3943612575531006} +02/24/2022 22:49:11 - INFO - codeparrot_training - Step 16400: {'lr': 0.0003969463130731183, 'samples': 8397312, 'steps': 16400, 'loss/train': 0.6846969127655029} +02/24/2022 22:49:18 - INFO - codeparrot_training - Step 16401: {'lr': 0.00039693307524877664, 'samples': 8397824, 'steps': 16401, 'loss/train': 2.4312009811401367} +02/24/2022 22:49:21 - INFO - codeparrot_training - Step 16402: {'lr': 0.0003969198367950204, 'samples': 8398336, 'steps': 16402, 'loss/train': 1.9659732580184937} +02/24/2022 22:49:26 - INFO - codeparrot_training - Step 16403: {'lr': 0.00039690659771190616, 'samples': 8398848, 'steps': 16403, 'loss/train': 2.5383570194244385} +02/24/2022 22:49:30 - INFO - codeparrot_training - Step 16404: {'lr': 0.0003968933579994908, 'samples': 8399360, 'steps': 16404, 'loss/train': 1.7474948167800903} +02/24/2022 22:49:35 - INFO - codeparrot_training - Step 16405: {'lr': 0.0003968801176578309, 'samples': 8399872, 'steps': 16405, 'loss/train': 4.142742156982422} +02/24/2022 22:49:39 - INFO - codeparrot_training - Step 16406: {'lr': 0.00039686687668698316, 'samples': 8400384, 'steps': 16406, 'loss/train': 0.7344520688056946} +02/24/2022 22:49:44 - INFO - codeparrot_training - Step 16407: {'lr': 0.00039685363508700443, 'samples': 8400896, 'steps': 16407, 'loss/train': 3.1351852416992188} +02/24/2022 22:49:48 - INFO - codeparrot_training - Step 16408: {'lr': 0.00039684039285795133, 'samples': 8401408, 'steps': 16408, 'loss/train': 2.0528266429901123} +02/24/2022 22:49:54 - INFO - codeparrot_training - Step 16409: {'lr': 0.0003968271499998806, 'samples': 8401920, 'steps': 16409, 'loss/train': 1.8270974159240723} +02/24/2022 22:49:57 - INFO - codeparrot_training - Step 16410: {'lr': 0.000396813906512849, 'samples': 8402432, 'steps': 16410, 'loss/train': 2.603074312210083} +02/24/2022 22:50:03 - INFO - codeparrot_training - Step 16411: {'lr': 0.00039680066239691325, 'samples': 8402944, 'steps': 16411, 'loss/train': 2.0200417041778564} +02/24/2022 22:50:06 - INFO - codeparrot_training - Step 16412: {'lr': 0.00039678741765213006, 'samples': 8403456, 'steps': 16412, 'loss/train': 2.427785873413086} +02/24/2022 22:50:12 - INFO - codeparrot_training - Step 16413: {'lr': 0.00039677417227855624, 'samples': 8403968, 'steps': 16413, 'loss/train': 0.5865729451179504} +02/24/2022 22:50:15 - INFO - codeparrot_training - Step 16414: {'lr': 0.0003967609262762484, 'samples': 8404480, 'steps': 16414, 'loss/train': 1.9257038831710815} +02/24/2022 22:50:21 - INFO - codeparrot_training - Step 16415: {'lr': 0.0003967476796452634, 'samples': 8404992, 'steps': 16415, 'loss/train': 2.423224925994873} +02/24/2022 22:50:24 - INFO - codeparrot_training - Step 16416: {'lr': 0.00039673443238565786, 'samples': 8405504, 'steps': 16416, 'loss/train': 1.0601638555526733} +02/24/2022 22:50:31 - INFO - codeparrot_training - Step 16417: {'lr': 0.0003967211844974887, 'samples': 8406016, 'steps': 16417, 'loss/train': 0.7265795469284058} +02/24/2022 22:50:35 - INFO - codeparrot_training - Step 16418: {'lr': 0.0003967079359808125, 'samples': 8406528, 'steps': 16418, 'loss/train': 1.439018964767456} +02/24/2022 22:50:40 - INFO - codeparrot_training - Step 16419: {'lr': 0.0003966946868356861, 'samples': 8407040, 'steps': 16419, 'loss/train': 2.1818454265594482} +02/24/2022 22:50:44 - INFO - codeparrot_training - Step 16420: {'lr': 0.0003966814370621663, 'samples': 8407552, 'steps': 16420, 'loss/train': 1.3772715330123901} +02/24/2022 22:50:49 - INFO - codeparrot_training - Step 16421: {'lr': 0.00039666818666030974, 'samples': 8408064, 'steps': 16421, 'loss/train': 1.5109524726867676} +02/24/2022 22:50:53 - INFO - codeparrot_training - Step 16422: {'lr': 0.0003966549356301733, 'samples': 8408576, 'steps': 16422, 'loss/train': 1.0947314500808716} +02/24/2022 22:50:58 - INFO - codeparrot_training - Step 16423: {'lr': 0.0003966416839718136, 'samples': 8409088, 'steps': 16423, 'loss/train': 1.9209057092666626} +02/24/2022 22:51:02 - INFO - codeparrot_training - Step 16424: {'lr': 0.00039662843168528756, 'samples': 8409600, 'steps': 16424, 'loss/train': 1.0642305612564087} +02/24/2022 22:51:07 - INFO - codeparrot_training - Step 16425: {'lr': 0.00039661517877065183, 'samples': 8410112, 'steps': 16425, 'loss/train': 1.898046612739563} +02/24/2022 22:51:11 - INFO - codeparrot_training - Step 16426: {'lr': 0.0003966019252279633, 'samples': 8410624, 'steps': 16426, 'loss/train': 2.0211141109466553} +02/24/2022 22:51:18 - INFO - codeparrot_training - Step 16427: {'lr': 0.00039658867105727856, 'samples': 8411136, 'steps': 16427, 'loss/train': 2.5976674556732178} +02/24/2022 22:51:23 - INFO - codeparrot_training - Step 16428: {'lr': 0.0003965754162586547, 'samples': 8411648, 'steps': 16428, 'loss/train': 1.5950156450271606} +02/24/2022 22:51:27 - INFO - codeparrot_training - Step 16429: {'lr': 0.0003965621608321481, 'samples': 8412160, 'steps': 16429, 'loss/train': 1.2250365018844604} +02/24/2022 22:51:32 - INFO - codeparrot_training - Step 16430: {'lr': 0.0003965489047778158, 'samples': 8412672, 'steps': 16430, 'loss/train': 1.175410509109497} +02/24/2022 22:51:36 - INFO - codeparrot_training - Step 16431: {'lr': 0.0003965356480957145, 'samples': 8413184, 'steps': 16431, 'loss/train': 1.8192424774169922} +02/24/2022 22:51:41 - INFO - codeparrot_training - Step 16432: {'lr': 0.0003965223907859011, 'samples': 8413696, 'steps': 16432, 'loss/train': 2.0306432247161865} +02/24/2022 22:51:45 - INFO - codeparrot_training - Step 16433: {'lr': 0.00039650913284843225, 'samples': 8414208, 'steps': 16433, 'loss/train': 2.4991097450256348} +02/24/2022 22:51:50 - INFO - codeparrot_training - Step 16434: {'lr': 0.00039649587428336474, 'samples': 8414720, 'steps': 16434, 'loss/train': 3.0235767364501953} +02/24/2022 22:51:54 - INFO - codeparrot_training - Step 16435: {'lr': 0.00039648261509075554, 'samples': 8415232, 'steps': 16435, 'loss/train': 1.6170494556427002} +02/24/2022 22:51:59 - INFO - codeparrot_training - Step 16436: {'lr': 0.00039646935527066124, 'samples': 8415744, 'steps': 16436, 'loss/train': 2.5134470462799072} +02/24/2022 22:52:02 - INFO - codeparrot_training - Step 16437: {'lr': 0.0003964560948231388, 'samples': 8416256, 'steps': 16437, 'loss/train': 1.8894404172897339} +02/24/2022 22:52:09 - INFO - codeparrot_training - Step 16438: {'lr': 0.0003964428337482449, 'samples': 8416768, 'steps': 16438, 'loss/train': 1.789795160293579} +02/24/2022 22:52:12 - INFO - codeparrot_training - Step 16439: {'lr': 0.00039642957204603647, 'samples': 8417280, 'steps': 16439, 'loss/train': 2.028066873550415} +02/24/2022 22:52:18 - INFO - codeparrot_training - Step 16440: {'lr': 0.0003964163097165702, 'samples': 8417792, 'steps': 16440, 'loss/train': 2.473353624343872} +02/24/2022 22:52:21 - INFO - codeparrot_training - Step 16441: {'lr': 0.0003964030467599029, 'samples': 8418304, 'steps': 16441, 'loss/train': 0.18668408691883087} +02/24/2022 22:52:27 - INFO - codeparrot_training - Step 16442: {'lr': 0.00039638978317609155, 'samples': 8418816, 'steps': 16442, 'loss/train': 1.9909363985061646} +02/24/2022 22:52:30 - INFO - codeparrot_training - Step 16443: {'lr': 0.0003963765189651928, 'samples': 8419328, 'steps': 16443, 'loss/train': 2.2685911655426025} +02/24/2022 22:52:36 - INFO - codeparrot_training - Step 16444: {'lr': 0.0003963632541272635, 'samples': 8419840, 'steps': 16444, 'loss/train': 2.473036289215088} +02/24/2022 22:52:39 - INFO - codeparrot_training - Step 16445: {'lr': 0.00039634998866236047, 'samples': 8420352, 'steps': 16445, 'loss/train': 1.807316780090332} +02/24/2022 22:52:45 - INFO - codeparrot_training - Step 16446: {'lr': 0.0003963367225705406, 'samples': 8420864, 'steps': 16446, 'loss/train': 2.210996150970459} +02/24/2022 22:52:48 - INFO - codeparrot_training - Step 16447: {'lr': 0.0003963234558518607, 'samples': 8421376, 'steps': 16447, 'loss/train': 2.2673728466033936} +02/24/2022 22:52:54 - INFO - codeparrot_training - Step 16448: {'lr': 0.0003963101885063776, 'samples': 8421888, 'steps': 16448, 'loss/train': 2.0479724407196045} +02/24/2022 22:52:58 - INFO - codeparrot_training - Step 16449: {'lr': 0.000396296920534148, 'samples': 8422400, 'steps': 16449, 'loss/train': 3.3648974895477295} +02/24/2022 22:53:04 - INFO - codeparrot_training - Step 16450: {'lr': 0.000396283651935229, 'samples': 8422912, 'steps': 16450, 'loss/train': 1.3978142738342285} +02/24/2022 22:53:07 - INFO - codeparrot_training - Step 16451: {'lr': 0.0003962703827096771, 'samples': 8423424, 'steps': 16451, 'loss/train': 1.3956341743469238} +02/24/2022 22:53:13 - INFO - codeparrot_training - Step 16452: {'lr': 0.00039625711285754943, 'samples': 8423936, 'steps': 16452, 'loss/train': 1.7806352376937866} +02/24/2022 22:53:16 - INFO - codeparrot_training - Step 16453: {'lr': 0.00039624384237890275, 'samples': 8424448, 'steps': 16453, 'loss/train': 1.9342843294143677} +02/24/2022 22:53:22 - INFO - codeparrot_training - Step 16454: {'lr': 0.00039623057127379386, 'samples': 8424960, 'steps': 16454, 'loss/train': 1.8664958477020264} +02/24/2022 22:53:25 - INFO - codeparrot_training - Step 16455: {'lr': 0.0003962172995422796, 'samples': 8425472, 'steps': 16455, 'loss/train': 2.6650211811065674} +02/24/2022 22:53:30 - INFO - codeparrot_training - Step 16456: {'lr': 0.00039620402718441687, 'samples': 8425984, 'steps': 16456, 'loss/train': 3.2424442768096924} +02/24/2022 22:53:34 - INFO - codeparrot_training - Step 16457: {'lr': 0.0003961907542002626, 'samples': 8426496, 'steps': 16457, 'loss/train': 2.916908025741577} +02/24/2022 22:53:39 - INFO - codeparrot_training - Step 16458: {'lr': 0.00039617748058987345, 'samples': 8427008, 'steps': 16458, 'loss/train': 1.9213870763778687} +02/24/2022 22:53:43 - INFO - codeparrot_training - Step 16459: {'lr': 0.0003961642063533065, 'samples': 8427520, 'steps': 16459, 'loss/train': 1.4892544746398926} +02/24/2022 22:53:49 - INFO - codeparrot_training - Step 16460: {'lr': 0.0003961509314906184, 'samples': 8428032, 'steps': 16460, 'loss/train': 1.9083231687545776} +02/24/2022 22:53:52 - INFO - codeparrot_training - Step 16461: {'lr': 0.0003961376560018662, 'samples': 8428544, 'steps': 16461, 'loss/train': 2.6866848468780518} +02/24/2022 22:53:56 - INFO - codeparrot_training - Step 16462: {'lr': 0.0003961243798871066, 'samples': 8429056, 'steps': 16462, 'loss/train': 0.6899096965789795} +02/24/2022 22:54:02 - INFO - codeparrot_training - Step 16463: {'lr': 0.00039611110314639663, 'samples': 8429568, 'steps': 16463, 'loss/train': 2.3329179286956787} +02/24/2022 22:54:07 - INFO - codeparrot_training - Step 16464: {'lr': 0.00039609782577979306, 'samples': 8430080, 'steps': 16464, 'loss/train': 1.103137731552124} +02/24/2022 22:54:11 - INFO - codeparrot_training - Step 16465: {'lr': 0.0003960845477873528, 'samples': 8430592, 'steps': 16465, 'loss/train': 2.858816385269165} +02/24/2022 22:54:16 - INFO - codeparrot_training - Step 16466: {'lr': 0.00039607126916913274, 'samples': 8431104, 'steps': 16466, 'loss/train': 1.550572156906128} +02/24/2022 22:54:20 - INFO - codeparrot_training - Step 16467: {'lr': 0.00039605798992518973, 'samples': 8431616, 'steps': 16467, 'loss/train': 0.09146829694509506} +02/24/2022 22:54:26 - INFO - codeparrot_training - Step 16468: {'lr': 0.00039604471005558065, 'samples': 8432128, 'steps': 16468, 'loss/train': 2.452885627746582} +02/24/2022 22:54:29 - INFO - codeparrot_training - Step 16469: {'lr': 0.0003960314295603624, 'samples': 8432640, 'steps': 16469, 'loss/train': 0.927303671836853} +02/24/2022 22:54:33 - INFO - codeparrot_training - Step 16470: {'lr': 0.00039601814843959193, 'samples': 8433152, 'steps': 16470, 'loss/train': 2.736640214920044} +02/24/2022 22:54:38 - INFO - codeparrot_training - Step 16471: {'lr': 0.00039600486669332603, 'samples': 8433664, 'steps': 16471, 'loss/train': 2.7113940715789795} +02/24/2022 22:54:41 - INFO - codeparrot_training - Step 16472: {'lr': 0.00039599158432162163, 'samples': 8434176, 'steps': 16472, 'loss/train': 1.2780364751815796} +02/24/2022 22:54:48 - INFO - codeparrot_training - Step 16473: {'lr': 0.0003959783013245357, 'samples': 8434688, 'steps': 16473, 'loss/train': 2.6382501125335693} +02/24/2022 22:54:53 - INFO - codeparrot_training - Step 16474: {'lr': 0.000395965017702125, 'samples': 8435200, 'steps': 16474, 'loss/train': 2.0374083518981934} +02/24/2022 22:54:57 - INFO - codeparrot_training - Step 16475: {'lr': 0.00039595173345444656, 'samples': 8435712, 'steps': 16475, 'loss/train': 1.105425477027893} +02/24/2022 22:55:00 - INFO - codeparrot_training - Step 16476: {'lr': 0.0003959384485815573, 'samples': 8436224, 'steps': 16476, 'loss/train': 1.7489532232284546} +02/24/2022 22:55:06 - INFO - codeparrot_training - Step 16477: {'lr': 0.000395925163083514, 'samples': 8436736, 'steps': 16477, 'loss/train': 1.8258665800094604} +02/24/2022 22:55:11 - INFO - codeparrot_training - Step 16478: {'lr': 0.00039591187696037366, 'samples': 8437248, 'steps': 16478, 'loss/train': 2.2233920097351074} +02/24/2022 22:55:15 - INFO - codeparrot_training - Step 16479: {'lr': 0.0003958985902121931, 'samples': 8437760, 'steps': 16479, 'loss/train': 3.151305913925171} +02/24/2022 22:55:21 - INFO - codeparrot_training - Step 16480: {'lr': 0.00039588530283902936, 'samples': 8438272, 'steps': 16480, 'loss/train': 1.291198968887329} +02/24/2022 22:55:24 - INFO - codeparrot_training - Step 16481: {'lr': 0.00039587201484093937, 'samples': 8438784, 'steps': 16481, 'loss/train': 2.1003127098083496} +02/24/2022 22:55:27 - INFO - codeparrot_training - Step 16482: {'lr': 0.0003958587262179799, 'samples': 8439296, 'steps': 16482, 'loss/train': 1.629981279373169} +02/24/2022 22:55:34 - INFO - codeparrot_training - Step 16483: {'lr': 0.00039584543697020804, 'samples': 8439808, 'steps': 16483, 'loss/train': 2.0426506996154785} +02/24/2022 22:55:37 - INFO - codeparrot_training - Step 16484: {'lr': 0.00039583214709768054, 'samples': 8440320, 'steps': 16484, 'loss/train': 1.9853456020355225} +02/24/2022 22:55:43 - INFO - codeparrot_training - Step 16485: {'lr': 0.00039581885660045445, 'samples': 8440832, 'steps': 16485, 'loss/train': 1.8960797786712646} +02/24/2022 22:55:47 - INFO - codeparrot_training - Step 16486: {'lr': 0.0003958055654785867, 'samples': 8441344, 'steps': 16486, 'loss/train': 2.179119825363159} +02/24/2022 22:55:52 - INFO - codeparrot_training - Step 16487: {'lr': 0.0003957922737321343, 'samples': 8441856, 'steps': 16487, 'loss/train': 2.332688331604004} +02/24/2022 22:55:56 - INFO - codeparrot_training - Step 16488: {'lr': 0.00039577898136115397, 'samples': 8442368, 'steps': 16488, 'loss/train': 2.169513463973999} +02/24/2022 22:56:01 - INFO - codeparrot_training - Step 16489: {'lr': 0.00039576568836570283, 'samples': 8442880, 'steps': 16489, 'loss/train': 2.0098929405212402} +02/24/2022 22:56:05 - INFO - codeparrot_training - Step 16490: {'lr': 0.0003957523947458377, 'samples': 8443392, 'steps': 16490, 'loss/train': 1.5059328079223633} +02/24/2022 22:56:10 - INFO - codeparrot_training - Step 16491: {'lr': 0.00039573910050161564, 'samples': 8443904, 'steps': 16491, 'loss/train': 1.3284860849380493} +02/24/2022 22:56:14 - INFO - codeparrot_training - Step 16492: {'lr': 0.0003957258056330936, 'samples': 8444416, 'steps': 16492, 'loss/train': 2.356466054916382} +02/24/2022 22:56:19 - INFO - codeparrot_training - Step 16493: {'lr': 0.00039571251014032847, 'samples': 8444928, 'steps': 16493, 'loss/train': 4.82999849319458} +02/24/2022 22:56:23 - INFO - codeparrot_training - Step 16494: {'lr': 0.00039569921402337715, 'samples': 8445440, 'steps': 16494, 'loss/train': 2.3106446266174316} +02/24/2022 22:56:28 - INFO - codeparrot_training - Step 16495: {'lr': 0.00039568591728229667, 'samples': 8445952, 'steps': 16495, 'loss/train': 0.6705731749534607} +02/24/2022 22:56:32 - INFO - codeparrot_training - Step 16496: {'lr': 0.00039567261991714406, 'samples': 8446464, 'steps': 16496, 'loss/train': 1.957373023033142} +02/24/2022 22:56:37 - INFO - codeparrot_training - Step 16497: {'lr': 0.0003956593219279761, 'samples': 8446976, 'steps': 16497, 'loss/train': 1.888087272644043} +02/24/2022 22:56:41 - INFO - codeparrot_training - Step 16498: {'lr': 0.00039564602331484993, 'samples': 8447488, 'steps': 16498, 'loss/train': 1.583459496498108} +02/24/2022 22:56:46 - INFO - codeparrot_training - Step 16499: {'lr': 0.0003956327240778224, 'samples': 8448000, 'steps': 16499, 'loss/train': 2.1672847270965576} +02/24/2022 22:56:50 - INFO - codeparrot_training - Step 16500: {'lr': 0.00039561942421695057, 'samples': 8448512, 'steps': 16500, 'loss/train': 1.9693784713745117} +02/24/2022 22:56:56 - INFO - codeparrot_training - Step 16501: {'lr': 0.00039560612373229135, 'samples': 8449024, 'steps': 16501, 'loss/train': 1.5708659887313843} +02/24/2022 22:57:00 - INFO - codeparrot_training - Step 16502: {'lr': 0.0003955928226239017, 'samples': 8449536, 'steps': 16502, 'loss/train': 0.6870248317718506} +02/24/2022 22:57:05 - INFO - codeparrot_training - Step 16503: {'lr': 0.00039557952089183863, 'samples': 8450048, 'steps': 16503, 'loss/train': 2.149991035461426} +02/24/2022 22:57:09 - INFO - codeparrot_training - Step 16504: {'lr': 0.00039556621853615914, 'samples': 8450560, 'steps': 16504, 'loss/train': 2.26275897026062} +02/24/2022 22:57:14 - INFO - codeparrot_training - Step 16505: {'lr': 0.0003955529155569202, 'samples': 8451072, 'steps': 16505, 'loss/train': 2.676116466522217} +02/24/2022 22:57:18 - INFO - codeparrot_training - Step 16506: {'lr': 0.0003955396119541788, 'samples': 8451584, 'steps': 16506, 'loss/train': 0.8906749486923218} +02/24/2022 22:57:23 - INFO - codeparrot_training - Step 16507: {'lr': 0.00039552630772799185, 'samples': 8452096, 'steps': 16507, 'loss/train': 1.9210397005081177} +02/24/2022 22:57:27 - INFO - codeparrot_training - Step 16508: {'lr': 0.0003955130028784165, 'samples': 8452608, 'steps': 16508, 'loss/train': 2.290168523788452} +02/24/2022 22:57:33 - INFO - codeparrot_training - Step 16509: {'lr': 0.00039549969740550954, 'samples': 8453120, 'steps': 16509, 'loss/train': 2.4497992992401123} +02/24/2022 22:57:36 - INFO - codeparrot_training - Step 16510: {'lr': 0.00039548639130932816, 'samples': 8453632, 'steps': 16510, 'loss/train': 2.114773988723755} +02/24/2022 22:57:42 - INFO - codeparrot_training - Step 16511: {'lr': 0.00039547308458992927, 'samples': 8454144, 'steps': 16511, 'loss/train': 1.4165219068527222} +02/24/2022 22:57:45 - INFO - codeparrot_training - Step 16512: {'lr': 0.00039545977724736984, 'samples': 8454656, 'steps': 16512, 'loss/train': 2.0097784996032715} +02/24/2022 22:57:53 - INFO - codeparrot_training - Step 16513: {'lr': 0.00039544646928170695, 'samples': 8455168, 'steps': 16513, 'loss/train': 2.3383729457855225} +02/24/2022 22:57:56 - INFO - codeparrot_training - Step 16514: {'lr': 0.0003954331606929976, 'samples': 8455680, 'steps': 16514, 'loss/train': 0.8184428811073303} +02/24/2022 22:58:02 - INFO - codeparrot_training - Step 16515: {'lr': 0.00039541985148129865, 'samples': 8456192, 'steps': 16515, 'loss/train': 1.6029225587844849} +02/24/2022 22:58:05 - INFO - codeparrot_training - Step 16516: {'lr': 0.00039540654164666735, 'samples': 8456704, 'steps': 16516, 'loss/train': 2.4327926635742188} +02/24/2022 22:58:11 - INFO - codeparrot_training - Step 16517: {'lr': 0.00039539323118916055, 'samples': 8457216, 'steps': 16517, 'loss/train': 2.031630516052246} +02/24/2022 22:58:14 - INFO - codeparrot_training - Step 16518: {'lr': 0.0003953799201088353, 'samples': 8457728, 'steps': 16518, 'loss/train': 1.4740921258926392} +02/24/2022 22:58:20 - INFO - codeparrot_training - Step 16519: {'lr': 0.00039536660840574866, 'samples': 8458240, 'steps': 16519, 'loss/train': 2.8197085857391357} +02/24/2022 22:58:23 - INFO - codeparrot_training - Step 16520: {'lr': 0.0003953532960799577, 'samples': 8458752, 'steps': 16520, 'loss/train': 2.4500954151153564} +02/24/2022 22:58:29 - INFO - codeparrot_training - Step 16521: {'lr': 0.00039533998313151926, 'samples': 8459264, 'steps': 16521, 'loss/train': 2.0887598991394043} +02/24/2022 22:58:32 - INFO - codeparrot_training - Step 16522: {'lr': 0.0003953266695604906, 'samples': 8459776, 'steps': 16522, 'loss/train': 2.133399724960327} +02/24/2022 22:58:38 - INFO - codeparrot_training - Step 16523: {'lr': 0.0003953133553669285, 'samples': 8460288, 'steps': 16523, 'loss/train': 1.4083654880523682} +02/24/2022 22:58:42 - INFO - codeparrot_training - Step 16524: {'lr': 0.0003953000405508902, 'samples': 8460800, 'steps': 16524, 'loss/train': 1.5588939189910889} +02/24/2022 22:58:47 - INFO - codeparrot_training - Step 16525: {'lr': 0.00039528672511243256, 'samples': 8461312, 'steps': 16525, 'loss/train': 1.4627403020858765} +02/24/2022 22:58:51 - INFO - codeparrot_training - Step 16526: {'lr': 0.0003952734090516129, 'samples': 8461824, 'steps': 16526, 'loss/train': 2.596207618713379} +02/24/2022 22:58:56 - INFO - codeparrot_training - Step 16527: {'lr': 0.000395260092368488, 'samples': 8462336, 'steps': 16527, 'loss/train': 2.116630792617798} +02/24/2022 22:59:00 - INFO - codeparrot_training - Step 16528: {'lr': 0.000395246775063115, 'samples': 8462848, 'steps': 16528, 'loss/train': 1.810567021369934} +02/24/2022 22:59:05 - INFO - codeparrot_training - Step 16529: {'lr': 0.0003952334571355509, 'samples': 8463360, 'steps': 16529, 'loss/train': 2.4026710987091064} +02/24/2022 22:59:09 - INFO - codeparrot_training - Step 16530: {'lr': 0.0003952201385858528, 'samples': 8463872, 'steps': 16530, 'loss/train': 1.9620431661605835} +02/24/2022 22:59:14 - INFO - codeparrot_training - Step 16531: {'lr': 0.00039520681941407777, 'samples': 8464384, 'steps': 16531, 'loss/train': 2.0947883129119873} +02/24/2022 22:59:21 - INFO - codeparrot_training - Step 16532: {'lr': 0.00039519349962028276, 'samples': 8464896, 'steps': 16532, 'loss/train': 2.0926616191864014} +02/24/2022 22:59:24 - INFO - codeparrot_training - Step 16533: {'lr': 0.000395180179204525, 'samples': 8465408, 'steps': 16533, 'loss/train': 1.0830365419387817} +02/24/2022 22:59:30 - INFO - codeparrot_training - Step 16534: {'lr': 0.0003951668581668614, 'samples': 8465920, 'steps': 16534, 'loss/train': 2.152033805847168} +02/24/2022 22:59:33 - INFO - codeparrot_training - Step 16535: {'lr': 0.0003951535365073491, 'samples': 8466432, 'steps': 16535, 'loss/train': 1.3243255615234375} +02/24/2022 22:59:37 - INFO - codeparrot_training - Step 16536: {'lr': 0.00039514021422604515, 'samples': 8466944, 'steps': 16536, 'loss/train': 2.636953592300415} +02/24/2022 22:59:42 - INFO - codeparrot_training - Step 16537: {'lr': 0.0003951268913230066, 'samples': 8467456, 'steps': 16537, 'loss/train': 2.2711310386657715} +02/24/2022 22:59:45 - INFO - codeparrot_training - Step 16538: {'lr': 0.0003951135677982904, 'samples': 8467968, 'steps': 16538, 'loss/train': 2.9004974365234375} +02/24/2022 22:59:52 - INFO - codeparrot_training - Step 16539: {'lr': 0.000395100243651954, 'samples': 8468480, 'steps': 16539, 'loss/train': 2.2376620769500732} +02/24/2022 22:59:55 - INFO - codeparrot_training - Step 16540: {'lr': 0.00039508691888405403, 'samples': 8468992, 'steps': 16540, 'loss/train': 8.386177062988281} +02/24/2022 23:00:01 - INFO - codeparrot_training - Step 16541: {'lr': 0.0003950735934946478, 'samples': 8469504, 'steps': 16541, 'loss/train': 2.437643051147461} +02/24/2022 23:00:04 - INFO - codeparrot_training - Step 16542: {'lr': 0.0003950602674837924, 'samples': 8470016, 'steps': 16542, 'loss/train': 1.8111413717269897} +02/24/2022 23:00:09 - INFO - codeparrot_training - Step 16543: {'lr': 0.0003950469408515449, 'samples': 8470528, 'steps': 16543, 'loss/train': 1.3587454557418823} +02/24/2022 23:00:13 - INFO - codeparrot_training - Step 16544: {'lr': 0.00039503361359796235, 'samples': 8471040, 'steps': 16544, 'loss/train': 1.3132683038711548} +02/24/2022 23:00:19 - INFO - codeparrot_training - Step 16545: {'lr': 0.00039502028572310186, 'samples': 8471552, 'steps': 16545, 'loss/train': 1.629294514656067} +02/24/2022 23:00:23 - INFO - codeparrot_training - Step 16546: {'lr': 0.0003950069572270205, 'samples': 8472064, 'steps': 16546, 'loss/train': 2.0858993530273438} +02/24/2022 23:00:28 - INFO - codeparrot_training - Step 16547: {'lr': 0.00039499362810977535, 'samples': 8472576, 'steps': 16547, 'loss/train': 2.7927310466766357} +02/24/2022 23:00:32 - INFO - codeparrot_training - Step 16548: {'lr': 0.00039498029837142356, 'samples': 8473088, 'steps': 16548, 'loss/train': 1.405111312866211} +02/24/2022 23:00:37 - INFO - codeparrot_training - Step 16549: {'lr': 0.0003949669680120223, 'samples': 8473600, 'steps': 16549, 'loss/train': 1.546103596687317} +02/24/2022 23:00:41 - INFO - codeparrot_training - Step 16550: {'lr': 0.00039495363703162843, 'samples': 8474112, 'steps': 16550, 'loss/train': 2.0481925010681152} +02/24/2022 23:00:46 - INFO - codeparrot_training - Step 16551: {'lr': 0.00039494030543029925, 'samples': 8474624, 'steps': 16551, 'loss/train': 1.6093497276306152} +02/24/2022 23:00:50 - INFO - codeparrot_training - Step 16552: {'lr': 0.0003949269732080919, 'samples': 8475136, 'steps': 16552, 'loss/train': 2.665602684020996} +02/24/2022 23:00:55 - INFO - codeparrot_training - Step 16553: {'lr': 0.0003949136403650633, 'samples': 8475648, 'steps': 16553, 'loss/train': 2.1096978187561035} +02/24/2022 23:00:59 - INFO - codeparrot_training - Step 16554: {'lr': 0.0003949003069012708, 'samples': 8476160, 'steps': 16554, 'loss/train': 1.4082001447677612} +02/24/2022 23:01:05 - INFO - codeparrot_training - Step 16555: {'lr': 0.0003948869728167713, 'samples': 8476672, 'steps': 16555, 'loss/train': 2.1503357887268066} +02/24/2022 23:01:09 - INFO - codeparrot_training - Step 16556: {'lr': 0.0003948736381116221, 'samples': 8477184, 'steps': 16556, 'loss/train': 0.7984724640846252} +02/24/2022 23:01:14 - INFO - codeparrot_training - Step 16557: {'lr': 0.0003948603027858802, 'samples': 8477696, 'steps': 16557, 'loss/train': 1.2415456771850586} +02/24/2022 23:01:18 - INFO - codeparrot_training - Step 16558: {'lr': 0.00039484696683960276, 'samples': 8478208, 'steps': 16558, 'loss/train': 1.808021068572998} +02/24/2022 23:01:23 - INFO - codeparrot_training - Step 16559: {'lr': 0.0003948336302728469, 'samples': 8478720, 'steps': 16559, 'loss/train': 1.9360491037368774} +02/24/2022 23:01:27 - INFO - codeparrot_training - Step 16560: {'lr': 0.0003948202930856697, 'samples': 8479232, 'steps': 16560, 'loss/train': 1.6901130676269531} +02/24/2022 23:01:33 - INFO - codeparrot_training - Step 16561: {'lr': 0.0003948069552781285, 'samples': 8479744, 'steps': 16561, 'loss/train': 2.4037675857543945} +02/24/2022 23:01:36 - INFO - codeparrot_training - Step 16562: {'lr': 0.00039479361685028016, 'samples': 8480256, 'steps': 16562, 'loss/train': 2.626187562942505} +02/24/2022 23:01:42 - INFO - codeparrot_training - Step 16563: {'lr': 0.00039478027780218193, 'samples': 8480768, 'steps': 16563, 'loss/train': 2.0008575916290283} +02/24/2022 23:01:45 - INFO - codeparrot_training - Step 16564: {'lr': 0.00039476693813389105, 'samples': 8481280, 'steps': 16564, 'loss/train': 2.60748028755188} +02/24/2022 23:01:51 - INFO - codeparrot_training - Step 16565: {'lr': 0.0003947535978454645, 'samples': 8481792, 'steps': 16565, 'loss/train': 1.9211615324020386} +02/24/2022 23:01:54 - INFO - codeparrot_training - Step 16566: {'lr': 0.0003947402569369596, 'samples': 8482304, 'steps': 16566, 'loss/train': 1.8298922777175903} +02/24/2022 23:02:00 - INFO - codeparrot_training - Step 16567: {'lr': 0.0003947269154084333, 'samples': 8482816, 'steps': 16567, 'loss/train': 2.71044921875} +02/24/2022 23:02:03 - INFO - codeparrot_training - Step 16568: {'lr': 0.0003947135732599428, 'samples': 8483328, 'steps': 16568, 'loss/train': 1.496972918510437} +02/24/2022 23:02:09 - INFO - codeparrot_training - Step 16569: {'lr': 0.00039470023049154544, 'samples': 8483840, 'steps': 16569, 'loss/train': 1.642454743385315} +02/24/2022 23:02:13 - INFO - codeparrot_training - Step 16570: {'lr': 0.00039468688710329826, 'samples': 8484352, 'steps': 16570, 'loss/train': 2.7987024784088135} +02/24/2022 23:02:18 - INFO - codeparrot_training - Step 16571: {'lr': 0.0003946735430952583, 'samples': 8484864, 'steps': 16571, 'loss/train': 1.9255377054214478} +02/24/2022 23:02:22 - INFO - codeparrot_training - Step 16572: {'lr': 0.0003946601984674828, 'samples': 8485376, 'steps': 16572, 'loss/train': 1.349665880203247} +02/24/2022 23:02:27 - INFO - codeparrot_training - Step 16573: {'lr': 0.00039464685322002904, 'samples': 8485888, 'steps': 16573, 'loss/train': 1.7452784776687622} +02/24/2022 23:02:31 - INFO - codeparrot_training - Step 16574: {'lr': 0.000394633507352954, 'samples': 8486400, 'steps': 16574, 'loss/train': 2.2632179260253906} +02/24/2022 23:02:36 - INFO - codeparrot_training - Step 16575: {'lr': 0.00039462016086631505, 'samples': 8486912, 'steps': 16575, 'loss/train': 1.8841798305511475} +02/24/2022 23:02:40 - INFO - codeparrot_training - Step 16576: {'lr': 0.00039460681376016915, 'samples': 8487424, 'steps': 16576, 'loss/train': 1.313531517982483} +02/24/2022 23:02:45 - INFO - codeparrot_training - Step 16577: {'lr': 0.0003945934660345736, 'samples': 8487936, 'steps': 16577, 'loss/train': 2.0716559886932373} +02/24/2022 23:02:49 - INFO - codeparrot_training - Step 16578: {'lr': 0.00039458011768958557, 'samples': 8488448, 'steps': 16578, 'loss/train': 1.9649165868759155} +02/24/2022 23:02:54 - INFO - codeparrot_training - Step 16579: {'lr': 0.00039456676872526227, 'samples': 8488960, 'steps': 16579, 'loss/train': 2.3955788612365723} +02/24/2022 23:02:58 - INFO - codeparrot_training - Step 16580: {'lr': 0.00039455341914166074, 'samples': 8489472, 'steps': 16580, 'loss/train': 0.5866998434066772} +02/24/2022 23:03:04 - INFO - codeparrot_training - Step 16581: {'lr': 0.0003945400689388384, 'samples': 8489984, 'steps': 16581, 'loss/train': 1.8699214458465576} +02/24/2022 23:03:07 - INFO - codeparrot_training - Step 16582: {'lr': 0.00039452671811685214, 'samples': 8490496, 'steps': 16582, 'loss/train': 1.2274113893508911} +02/24/2022 23:03:13 - INFO - codeparrot_training - Step 16583: {'lr': 0.00039451336667575945, 'samples': 8491008, 'steps': 16583, 'loss/train': 1.9552297592163086} +02/24/2022 23:03:16 - INFO - codeparrot_training - Step 16584: {'lr': 0.0003945000146156173, 'samples': 8491520, 'steps': 16584, 'loss/train': 2.1564347743988037} +02/24/2022 23:03:22 - INFO - codeparrot_training - Step 16585: {'lr': 0.00039448666193648305, 'samples': 8492032, 'steps': 16585, 'loss/train': 2.390486717224121} +02/24/2022 23:03:26 - INFO - codeparrot_training - Step 16586: {'lr': 0.0003944733086384137, 'samples': 8492544, 'steps': 16586, 'loss/train': 2.964930772781372} +02/24/2022 23:03:31 - INFO - codeparrot_training - Step 16587: {'lr': 0.00039445995472146665, 'samples': 8493056, 'steps': 16587, 'loss/train': 2.803877592086792} +02/24/2022 23:03:34 - INFO - codeparrot_training - Step 16588: {'lr': 0.000394446600185699, 'samples': 8493568, 'steps': 16588, 'loss/train': 1.9944179058074951} +02/24/2022 23:03:40 - INFO - codeparrot_training - Step 16589: {'lr': 0.000394433245031168, 'samples': 8494080, 'steps': 16589, 'loss/train': 3.2032272815704346} +02/24/2022 23:03:43 - INFO - codeparrot_training - Step 16590: {'lr': 0.0003944198892579309, 'samples': 8494592, 'steps': 16590, 'loss/train': 1.1729189157485962} +02/24/2022 23:03:50 - INFO - codeparrot_training - Step 16591: {'lr': 0.0003944065328660447, 'samples': 8495104, 'steps': 16591, 'loss/train': 2.2717106342315674} +02/24/2022 23:03:53 - INFO - codeparrot_training - Step 16592: {'lr': 0.0003943931758555669, 'samples': 8495616, 'steps': 16592, 'loss/train': 1.2033859491348267} +02/24/2022 23:03:59 - INFO - codeparrot_training - Step 16593: {'lr': 0.00039437981822655453, 'samples': 8496128, 'steps': 16593, 'loss/train': 1.4606666564941406} +02/24/2022 23:04:02 - INFO - codeparrot_training - Step 16594: {'lr': 0.00039436645997906487, 'samples': 8496640, 'steps': 16594, 'loss/train': 3.0710957050323486} +02/24/2022 23:04:08 - INFO - codeparrot_training - Step 16595: {'lr': 0.00039435310111315513, 'samples': 8497152, 'steps': 16595, 'loss/train': 1.5867692232131958} +02/24/2022 23:04:11 - INFO - codeparrot_training - Step 16596: {'lr': 0.00039433974162888266, 'samples': 8497664, 'steps': 16596, 'loss/train': 1.9993149042129517} +02/24/2022 23:04:17 - INFO - codeparrot_training - Step 16597: {'lr': 0.0003943263815263044, 'samples': 8498176, 'steps': 16597, 'loss/train': 0.9557214379310608} +02/24/2022 23:04:20 - INFO - codeparrot_training - Step 16598: {'lr': 0.0003943130208054778, 'samples': 8498688, 'steps': 16598, 'loss/train': 2.130176544189453} +02/24/2022 23:04:26 - INFO - codeparrot_training - Step 16599: {'lr': 0.0003942996594664601, 'samples': 8499200, 'steps': 16599, 'loss/train': 1.9649536609649658} +02/24/2022 23:04:29 - INFO - codeparrot_training - Step 16600: {'lr': 0.00039428629750930846, 'samples': 8499712, 'steps': 16600, 'loss/train': 2.0388505458831787} +02/24/2022 23:04:35 - INFO - codeparrot_training - Step 16601: {'lr': 0.0003942729349340801, 'samples': 8500224, 'steps': 16601, 'loss/train': 1.426533579826355} +02/24/2022 23:04:39 - INFO - codeparrot_training - Step 16602: {'lr': 0.00039425957174083224, 'samples': 8500736, 'steps': 16602, 'loss/train': 2.302284002304077} +02/24/2022 23:04:44 - INFO - codeparrot_training - Step 16603: {'lr': 0.0003942462079296223, 'samples': 8501248, 'steps': 16603, 'loss/train': 2.7168502807617188} +02/24/2022 23:04:48 - INFO - codeparrot_training - Step 16604: {'lr': 0.00039423284350050735, 'samples': 8501760, 'steps': 16604, 'loss/train': 2.181910514831543} +02/24/2022 23:04:54 - INFO - codeparrot_training - Step 16605: {'lr': 0.00039421947845354476, 'samples': 8502272, 'steps': 16605, 'loss/train': 1.5540157556533813} +02/24/2022 23:04:59 - INFO - codeparrot_training - Step 16606: {'lr': 0.0003942061127887916, 'samples': 8502784, 'steps': 16606, 'loss/train': 1.2098674774169922} +02/24/2022 23:05:03 - INFO - codeparrot_training - Step 16607: {'lr': 0.00039419274650630536, 'samples': 8503296, 'steps': 16607, 'loss/train': 1.5457173585891724} +02/24/2022 23:05:08 - INFO - codeparrot_training - Step 16608: {'lr': 0.00039417937960614316, 'samples': 8503808, 'steps': 16608, 'loss/train': 2.610319137573242} +02/24/2022 23:05:12 - INFO - codeparrot_training - Step 16609: {'lr': 0.0003941660120883622, 'samples': 8504320, 'steps': 16609, 'loss/train': 1.9802271127700806} +02/24/2022 23:05:18 - INFO - codeparrot_training - Step 16610: {'lr': 0.0003941526439530199, 'samples': 8504832, 'steps': 16610, 'loss/train': 1.3905479907989502} +02/24/2022 23:05:21 - INFO - codeparrot_training - Step 16611: {'lr': 0.00039413927520017347, 'samples': 8505344, 'steps': 16611, 'loss/train': 2.3664402961730957} +02/24/2022 23:05:25 - INFO - codeparrot_training - Step 16612: {'lr': 0.00039412590582988007, 'samples': 8505856, 'steps': 16612, 'loss/train': 3.08532452583313} +02/24/2022 23:05:30 - INFO - codeparrot_training - Step 16613: {'lr': 0.00039411253584219707, 'samples': 8506368, 'steps': 16613, 'loss/train': 1.6768769025802612} +02/24/2022 23:05:34 - INFO - codeparrot_training - Step 16614: {'lr': 0.0003940991652371818, 'samples': 8506880, 'steps': 16614, 'loss/train': 2.472137451171875} +02/24/2022 23:05:40 - INFO - codeparrot_training - Step 16615: {'lr': 0.0003940857940148914, 'samples': 8507392, 'steps': 16615, 'loss/train': 2.0025970935821533} +02/24/2022 23:05:44 - INFO - codeparrot_training - Step 16616: {'lr': 0.00039407242217538317, 'samples': 8507904, 'steps': 16616, 'loss/train': 0.3123548924922943} +02/24/2022 23:05:49 - INFO - codeparrot_training - Step 16617: {'lr': 0.00039405904971871454, 'samples': 8508416, 'steps': 16617, 'loss/train': 1.2360575199127197} +02/24/2022 23:05:53 - INFO - codeparrot_training - Step 16618: {'lr': 0.00039404567664494264, 'samples': 8508928, 'steps': 16618, 'loss/train': 2.026491641998291} +02/24/2022 23:05:58 - INFO - codeparrot_training - Step 16619: {'lr': 0.0003940323029541248, 'samples': 8509440, 'steps': 16619, 'loss/train': 2.540133476257324} +02/24/2022 23:06:02 - INFO - codeparrot_training - Step 16620: {'lr': 0.00039401892864631826, 'samples': 8509952, 'steps': 16620, 'loss/train': 1.6243343353271484} +02/24/2022 23:06:07 - INFO - codeparrot_training - Step 16621: {'lr': 0.0003940055537215804, 'samples': 8510464, 'steps': 16621, 'loss/train': 1.334533929824829} +02/24/2022 23:06:11 - INFO - codeparrot_training - Step 16622: {'lr': 0.0003939921781799685, 'samples': 8510976, 'steps': 16622, 'loss/train': 2.263381242752075} +02/24/2022 23:06:16 - INFO - codeparrot_training - Step 16623: {'lr': 0.0003939788020215398, 'samples': 8511488, 'steps': 16623, 'loss/train': 1.5070136785507202} +02/24/2022 23:06:20 - INFO - codeparrot_training - Step 16624: {'lr': 0.0003939654252463517, 'samples': 8512000, 'steps': 16624, 'loss/train': 1.554828405380249} +02/24/2022 23:06:26 - INFO - codeparrot_training - Step 16625: {'lr': 0.00039395204785446137, 'samples': 8512512, 'steps': 16625, 'loss/train': 2.508723497390747} +02/24/2022 23:06:29 - INFO - codeparrot_training - Step 16626: {'lr': 0.00039393866984592616, 'samples': 8513024, 'steps': 16626, 'loss/train': 1.5081697702407837} +02/24/2022 23:06:35 - INFO - codeparrot_training - Step 16627: {'lr': 0.00039392529122080343, 'samples': 8513536, 'steps': 16627, 'loss/train': 2.061289072036743} +02/24/2022 23:06:39 - INFO - codeparrot_training - Step 16628: {'lr': 0.0003939119119791504, 'samples': 8514048, 'steps': 16628, 'loss/train': 2.5437963008880615} +02/24/2022 23:06:44 - INFO - codeparrot_training - Step 16629: {'lr': 0.0003938985321210245, 'samples': 8514560, 'steps': 16629, 'loss/train': 1.99045729637146} +02/24/2022 23:06:48 - INFO - codeparrot_training - Step 16630: {'lr': 0.00039388515164648293, 'samples': 8515072, 'steps': 16630, 'loss/train': 2.375284194946289} +02/24/2022 23:06:53 - INFO - codeparrot_training - Step 16631: {'lr': 0.0003938717705555831, 'samples': 8515584, 'steps': 16631, 'loss/train': 2.666696071624756} +02/24/2022 23:06:57 - INFO - codeparrot_training - Step 16632: {'lr': 0.0003938583888483823, 'samples': 8516096, 'steps': 16632, 'loss/train': 1.9341421127319336} +02/24/2022 23:07:02 - INFO - codeparrot_training - Step 16633: {'lr': 0.0003938450065249378, 'samples': 8516608, 'steps': 16633, 'loss/train': 1.1969630718231201} +02/24/2022 23:07:06 - INFO - codeparrot_training - Step 16634: {'lr': 0.00039383162358530696, 'samples': 8517120, 'steps': 16634, 'loss/train': 4.823916912078857} +02/24/2022 23:07:11 - INFO - codeparrot_training - Step 16635: {'lr': 0.0003938182400295471, 'samples': 8517632, 'steps': 16635, 'loss/train': 1.5374633073806763} +02/24/2022 23:07:15 - INFO - codeparrot_training - Step 16636: {'lr': 0.00039380485585771563, 'samples': 8518144, 'steps': 16636, 'loss/train': 2.1145589351654053} +02/24/2022 23:07:22 - INFO - codeparrot_training - Step 16637: {'lr': 0.00039379147106986985, 'samples': 8518656, 'steps': 16637, 'loss/train': 1.8385825157165527} +02/24/2022 23:07:25 - INFO - codeparrot_training - Step 16638: {'lr': 0.00039377808566606697, 'samples': 8519168, 'steps': 16638, 'loss/train': 3.803720235824585} +02/24/2022 23:07:31 - INFO - codeparrot_training - Step 16639: {'lr': 0.00039376469964636445, 'samples': 8519680, 'steps': 16639, 'loss/train': 0.5922945737838745} +02/24/2022 23:07:34 - INFO - codeparrot_training - Step 16640: {'lr': 0.0003937513130108197, 'samples': 8520192, 'steps': 16640, 'loss/train': 1.630444049835205} +02/24/2022 23:07:40 - INFO - codeparrot_training - Step 16641: {'lr': 0.00039373792575948986, 'samples': 8520704, 'steps': 16641, 'loss/train': 0.5477926731109619} +02/24/2022 23:07:43 - INFO - codeparrot_training - Step 16642: {'lr': 0.00039372453789243245, 'samples': 8521216, 'steps': 16642, 'loss/train': 1.720116138458252} +02/24/2022 23:07:49 - INFO - codeparrot_training - Step 16643: {'lr': 0.0003937111494097047, 'samples': 8521728, 'steps': 16643, 'loss/train': 0.5263066291809082} +02/24/2022 23:07:52 - INFO - codeparrot_training - Step 16644: {'lr': 0.0003936977603113641, 'samples': 8522240, 'steps': 16644, 'loss/train': 2.607430934906006} +02/24/2022 23:07:58 - INFO - codeparrot_training - Step 16645: {'lr': 0.00039368437059746785, 'samples': 8522752, 'steps': 16645, 'loss/train': 1.9224461317062378} +02/24/2022 23:08:01 - INFO - codeparrot_training - Step 16646: {'lr': 0.0003936709802680734, 'samples': 8523264, 'steps': 16646, 'loss/train': 2.295910596847534} +02/24/2022 23:08:08 - INFO - codeparrot_training - Step 16647: {'lr': 0.0003936575893232381, 'samples': 8523776, 'steps': 16647, 'loss/train': 1.4181220531463623} +02/24/2022 23:08:11 - INFO - codeparrot_training - Step 16648: {'lr': 0.0003936441977630193, 'samples': 8524288, 'steps': 16648, 'loss/train': 1.8561968803405762} +02/24/2022 23:08:17 - INFO - codeparrot_training - Step 16649: {'lr': 0.0003936308055874744, 'samples': 8524800, 'steps': 16649, 'loss/train': 2.004794120788574} +02/24/2022 23:08:20 - INFO - codeparrot_training - Step 16650: {'lr': 0.00039361741279666065, 'samples': 8525312, 'steps': 16650, 'loss/train': 1.9476447105407715} +02/24/2022 23:08:26 - INFO - codeparrot_training - Step 16651: {'lr': 0.0003936040193906356, 'samples': 8525824, 'steps': 16651, 'loss/train': 1.379144549369812} +02/24/2022 23:08:30 - INFO - codeparrot_training - Step 16652: {'lr': 0.00039359062536945645, 'samples': 8526336, 'steps': 16652, 'loss/train': 2.1489012241363525} +02/24/2022 23:08:33 - INFO - codeparrot_training - Step 16653: {'lr': 0.00039357723073318076, 'samples': 8526848, 'steps': 16653, 'loss/train': 2.451748847961426} +02/24/2022 23:08:39 - INFO - codeparrot_training - Step 16654: {'lr': 0.0003935638354818657, 'samples': 8527360, 'steps': 16654, 'loss/train': 1.9240946769714355} +02/24/2022 23:08:42 - INFO - codeparrot_training - Step 16655: {'lr': 0.0003935504396155688, 'samples': 8527872, 'steps': 16655, 'loss/train': 3.0455963611602783} +02/24/2022 23:08:48 - INFO - codeparrot_training - Step 16656: {'lr': 0.00039353704313434745, 'samples': 8528384, 'steps': 16656, 'loss/train': 2.579360246658325} +02/24/2022 23:08:51 - INFO - codeparrot_training - Step 16657: {'lr': 0.000393523646038259, 'samples': 8528896, 'steps': 16657, 'loss/train': 1.9909262657165527} +02/24/2022 23:08:57 - INFO - codeparrot_training - Step 16658: {'lr': 0.0003935102483273607, 'samples': 8529408, 'steps': 16658, 'loss/train': 1.5683646202087402} +02/24/2022 23:09:01 - INFO - codeparrot_training - Step 16659: {'lr': 0.0003934968500017101, 'samples': 8529920, 'steps': 16659, 'loss/train': 2.350641965866089} +02/24/2022 23:09:07 - INFO - codeparrot_training - Step 16660: {'lr': 0.0003934834510613646, 'samples': 8530432, 'steps': 16660, 'loss/train': 2.930534601211548} +02/24/2022 23:09:10 - INFO - codeparrot_training - Step 16661: {'lr': 0.00039347005150638156, 'samples': 8530944, 'steps': 16661, 'loss/train': 0.17369619011878967} +02/24/2022 23:09:16 - INFO - codeparrot_training - Step 16662: {'lr': 0.0003934566513368183, 'samples': 8531456, 'steps': 16662, 'loss/train': 1.937572717666626} +02/24/2022 23:09:19 - INFO - codeparrot_training - Step 16663: {'lr': 0.00039344325055273236, 'samples': 8531968, 'steps': 16663, 'loss/train': 1.3789302110671997} +02/24/2022 23:09:25 - INFO - codeparrot_training - Step 16664: {'lr': 0.0003934298491541811, 'samples': 8532480, 'steps': 16664, 'loss/train': 1.9850187301635742} +02/24/2022 23:09:29 - INFO - codeparrot_training - Step 16665: {'lr': 0.00039341644714122195, 'samples': 8532992, 'steps': 16665, 'loss/train': 1.5947589874267578} +02/24/2022 23:09:34 - INFO - codeparrot_training - Step 16666: {'lr': 0.00039340304451391216, 'samples': 8533504, 'steps': 16666, 'loss/train': 0.872013509273529} +02/24/2022 23:09:38 - INFO - codeparrot_training - Step 16667: {'lr': 0.00039338964127230935, 'samples': 8534016, 'steps': 16667, 'loss/train': 1.8149328231811523} +02/24/2022 23:09:43 - INFO - codeparrot_training - Step 16668: {'lr': 0.00039337623741647084, 'samples': 8534528, 'steps': 16668, 'loss/train': 2.2334938049316406} +02/24/2022 23:09:47 - INFO - codeparrot_training - Step 16669: {'lr': 0.000393362832946454, 'samples': 8535040, 'steps': 16669, 'loss/train': 2.6417410373687744} +02/24/2022 23:09:52 - INFO - codeparrot_training - Step 16670: {'lr': 0.0003933494278623164, 'samples': 8535552, 'steps': 16670, 'loss/train': 1.5269017219543457} +02/24/2022 23:09:56 - INFO - codeparrot_training - Step 16671: {'lr': 0.0003933360221641153, 'samples': 8536064, 'steps': 16671, 'loss/train': 1.4327607154846191} +02/24/2022 23:10:01 - INFO - codeparrot_training - Step 16672: {'lr': 0.0003933226158519082, 'samples': 8536576, 'steps': 16672, 'loss/train': 1.799267053604126} +02/24/2022 23:10:05 - INFO - codeparrot_training - Step 16673: {'lr': 0.0003933092089257525, 'samples': 8537088, 'steps': 16673, 'loss/train': 2.336268901824951} +02/24/2022 23:10:11 - INFO - codeparrot_training - Step 16674: {'lr': 0.0003932958013857057, 'samples': 8537600, 'steps': 16674, 'loss/train': 1.5832922458648682} +02/24/2022 23:10:14 - INFO - codeparrot_training - Step 16675: {'lr': 0.0003932823932318252, 'samples': 8538112, 'steps': 16675, 'loss/train': 2.7384278774261475} +02/24/2022 23:10:20 - INFO - codeparrot_training - Step 16676: {'lr': 0.0003932689844641684, 'samples': 8538624, 'steps': 16676, 'loss/train': 2.256833553314209} +02/24/2022 23:10:23 - INFO - codeparrot_training - Step 16677: {'lr': 0.00039325557508279276, 'samples': 8539136, 'steps': 16677, 'loss/train': 1.615507960319519} +02/24/2022 23:10:29 - INFO - codeparrot_training - Step 16678: {'lr': 0.00039324216508775567, 'samples': 8539648, 'steps': 16678, 'loss/train': 1.879104733467102} +02/24/2022 23:10:32 - INFO - codeparrot_training - Step 16679: {'lr': 0.0003932287544791148, 'samples': 8540160, 'steps': 16679, 'loss/train': 2.7534518241882324} +02/24/2022 23:10:38 - INFO - codeparrot_training - Step 16680: {'lr': 0.00039321534325692726, 'samples': 8540672, 'steps': 16680, 'loss/train': 2.967466115951538} +02/24/2022 23:10:41 - INFO - codeparrot_training - Step 16681: {'lr': 0.0003932019314212507, 'samples': 8541184, 'steps': 16681, 'loss/train': 2.105475664138794} +02/24/2022 23:10:47 - INFO - codeparrot_training - Step 16682: {'lr': 0.0003931885189721426, 'samples': 8541696, 'steps': 16682, 'loss/train': 1.5834599733352661} +02/24/2022 23:10:50 - INFO - codeparrot_training - Step 16683: {'lr': 0.00039317510590966033, 'samples': 8542208, 'steps': 16683, 'loss/train': 0.7416520714759827} +02/24/2022 23:10:57 - INFO - codeparrot_training - Step 16684: {'lr': 0.0003931616922338613, 'samples': 8542720, 'steps': 16684, 'loss/train': 1.5058794021606445} +02/24/2022 23:11:00 - INFO - codeparrot_training - Step 16685: {'lr': 0.00039314827794480314, 'samples': 8543232, 'steps': 16685, 'loss/train': 2.1089189052581787} +02/24/2022 23:11:06 - INFO - codeparrot_training - Step 16686: {'lr': 0.00039313486304254315, 'samples': 8543744, 'steps': 16686, 'loss/train': 2.7367444038391113} +02/24/2022 23:11:09 - INFO - codeparrot_training - Step 16687: {'lr': 0.00039312144752713885, 'samples': 8544256, 'steps': 16687, 'loss/train': 1.6765453815460205} +02/24/2022 23:11:15 - INFO - codeparrot_training - Step 16688: {'lr': 0.00039310803139864777, 'samples': 8544768, 'steps': 16688, 'loss/train': 1.931235432624817} +02/24/2022 23:11:18 - INFO - codeparrot_training - Step 16689: {'lr': 0.00039309461465712725, 'samples': 8545280, 'steps': 16689, 'loss/train': 1.2274912595748901} +02/24/2022 23:11:24 - INFO - codeparrot_training - Step 16690: {'lr': 0.00039308119730263494, 'samples': 8545792, 'steps': 16690, 'loss/train': 2.0927178859710693} +02/24/2022 23:11:27 - INFO - codeparrot_training - Step 16691: {'lr': 0.00039306777933522806, 'samples': 8546304, 'steps': 16691, 'loss/train': 2.453559398651123} +02/24/2022 23:11:33 - INFO - codeparrot_training - Step 16692: {'lr': 0.00039305436075496436, 'samples': 8546816, 'steps': 16692, 'loss/train': 1.3919786214828491} +02/24/2022 23:11:36 - INFO - codeparrot_training - Step 16693: {'lr': 0.0003930409415619012, 'samples': 8547328, 'steps': 16693, 'loss/train': 1.6564058065414429} +02/24/2022 23:11:42 - INFO - codeparrot_training - Step 16694: {'lr': 0.000393027521756096, 'samples': 8547840, 'steps': 16694, 'loss/train': 1.0032819509506226} +02/24/2022 23:11:46 - INFO - codeparrot_training - Step 16695: {'lr': 0.0003930141013376064, 'samples': 8548352, 'steps': 16695, 'loss/train': 2.062098979949951} +02/24/2022 23:11:51 - INFO - codeparrot_training - Step 16696: {'lr': 0.00039300068030648976, 'samples': 8548864, 'steps': 16696, 'loss/train': 1.3452891111373901} +02/24/2022 23:11:55 - INFO - codeparrot_training - Step 16697: {'lr': 0.0003929872586628036, 'samples': 8549376, 'steps': 16697, 'loss/train': 2.5055952072143555} +02/24/2022 23:12:00 - INFO - codeparrot_training - Step 16698: {'lr': 0.00039297383640660545, 'samples': 8549888, 'steps': 16698, 'loss/train': 1.6175209283828735} +02/24/2022 23:12:04 - INFO - codeparrot_training - Step 16699: {'lr': 0.0003929604135379528, 'samples': 8550400, 'steps': 16699, 'loss/train': 1.2649298906326294} +02/24/2022 23:12:09 - INFO - codeparrot_training - Step 16700: {'lr': 0.000392946990056903, 'samples': 8550912, 'steps': 16700, 'loss/train': 1.211811900138855} +02/24/2022 23:12:13 - INFO - codeparrot_training - Step 16701: {'lr': 0.0003929335659635139, 'samples': 8551424, 'steps': 16701, 'loss/train': 1.6003845930099487} +02/24/2022 23:12:18 - INFO - codeparrot_training - Step 16702: {'lr': 0.00039292014125784266, 'samples': 8551936, 'steps': 16702, 'loss/train': 1.6087394952774048} +02/24/2022 23:12:22 - INFO - codeparrot_training - Step 16703: {'lr': 0.00039290671593994697, 'samples': 8552448, 'steps': 16703, 'loss/train': 1.4104448556900024} +02/24/2022 23:12:28 - INFO - codeparrot_training - Step 16704: {'lr': 0.0003928932900098842, 'samples': 8552960, 'steps': 16704, 'loss/train': 3.354512929916382} +02/24/2022 23:12:31 - INFO - codeparrot_training - Step 16705: {'lr': 0.00039287986346771205, 'samples': 8553472, 'steps': 16705, 'loss/train': 1.54456627368927} +02/24/2022 23:12:37 - INFO - codeparrot_training - Step 16706: {'lr': 0.0003928664363134879, 'samples': 8553984, 'steps': 16706, 'loss/train': 1.5391348600387573} +02/24/2022 23:12:41 - INFO - codeparrot_training - Step 16707: {'lr': 0.00039285300854726926, 'samples': 8554496, 'steps': 16707, 'loss/train': 9.982333183288574} +02/24/2022 23:12:46 - INFO - codeparrot_training - Step 16708: {'lr': 0.00039283958016911373, 'samples': 8555008, 'steps': 16708, 'loss/train': 1.522818922996521} +02/24/2022 23:12:50 - INFO - codeparrot_training - Step 16709: {'lr': 0.00039282615117907884, 'samples': 8555520, 'steps': 16709, 'loss/train': 1.8078899383544922} +02/24/2022 23:12:55 - INFO - codeparrot_training - Step 16710: {'lr': 0.00039281272157722205, 'samples': 8556032, 'steps': 16710, 'loss/train': 2.7611310482025146} +02/24/2022 23:12:59 - INFO - codeparrot_training - Step 16711: {'lr': 0.0003927992913636008, 'samples': 8556544, 'steps': 16711, 'loss/train': 2.0730113983154297} +02/24/2022 23:13:04 - INFO - codeparrot_training - Step 16712: {'lr': 0.0003927858605382728, 'samples': 8557056, 'steps': 16712, 'loss/train': 3.581165075302124} +02/24/2022 23:13:08 - INFO - codeparrot_training - Step 16713: {'lr': 0.0003927724291012955, 'samples': 8557568, 'steps': 16713, 'loss/train': 2.925849199295044} +02/24/2022 23:13:13 - INFO - codeparrot_training - Step 16714: {'lr': 0.00039275899705272656, 'samples': 8558080, 'steps': 16714, 'loss/train': 3.4336538314819336} +02/24/2022 23:13:17 - INFO - codeparrot_training - Step 16715: {'lr': 0.00039274556439262325, 'samples': 8558592, 'steps': 16715, 'loss/train': 2.148698568344116} +02/24/2022 23:13:22 - INFO - codeparrot_training - Step 16716: {'lr': 0.0003927321311210434, 'samples': 8559104, 'steps': 16716, 'loss/train': 2.845339059829712} +02/24/2022 23:13:26 - INFO - codeparrot_training - Step 16717: {'lr': 0.00039271869723804434, 'samples': 8559616, 'steps': 16717, 'loss/train': 0.6309954524040222} +02/24/2022 23:13:31 - INFO - codeparrot_training - Step 16718: {'lr': 0.0003927052627436837, 'samples': 8560128, 'steps': 16718, 'loss/train': 2.1624178886413574} +02/24/2022 23:13:35 - INFO - codeparrot_training - Step 16719: {'lr': 0.000392691827638019, 'samples': 8560640, 'steps': 16719, 'loss/train': 2.5281877517700195} +02/24/2022 23:13:41 - INFO - codeparrot_training - Step 16720: {'lr': 0.000392678391921108, 'samples': 8561152, 'steps': 16720, 'loss/train': 2.620974540710449} +02/24/2022 23:13:45 - INFO - codeparrot_training - Step 16721: {'lr': 0.00039266495559300786, 'samples': 8561664, 'steps': 16721, 'loss/train': 2.650250196456909} +02/24/2022 23:13:50 - INFO - codeparrot_training - Step 16722: {'lr': 0.00039265151865377644, 'samples': 8562176, 'steps': 16722, 'loss/train': 2.0051259994506836} +02/24/2022 23:13:54 - INFO - codeparrot_training - Step 16723: {'lr': 0.0003926380811034712, 'samples': 8562688, 'steps': 16723, 'loss/train': 1.9259918928146362} +02/24/2022 23:13:59 - INFO - codeparrot_training - Step 16724: {'lr': 0.0003926246429421497, 'samples': 8563200, 'steps': 16724, 'loss/train': 2.457704544067383} +02/24/2022 23:14:03 - INFO - codeparrot_training - Step 16725: {'lr': 0.0003926112041698696, 'samples': 8563712, 'steps': 16725, 'loss/train': 3.1315267086029053} +02/24/2022 23:14:08 - INFO - codeparrot_training - Step 16726: {'lr': 0.0003925977647866883, 'samples': 8564224, 'steps': 16726, 'loss/train': 2.3196165561676025} +02/24/2022 23:14:12 - INFO - codeparrot_training - Step 16727: {'lr': 0.0003925843247926635, 'samples': 8564736, 'steps': 16727, 'loss/train': 2.237645387649536} +02/24/2022 23:14:17 - INFO - codeparrot_training - Step 16728: {'lr': 0.00039257088418785267, 'samples': 8565248, 'steps': 16728, 'loss/train': 1.8360320329666138} +02/24/2022 23:14:21 - INFO - codeparrot_training - Step 16729: {'lr': 0.00039255744297231354, 'samples': 8565760, 'steps': 16729, 'loss/train': 2.4263439178466797} +02/24/2022 23:14:27 - INFO - codeparrot_training - Step 16730: {'lr': 0.0003925440011461035, 'samples': 8566272, 'steps': 16730, 'loss/train': 2.2055110931396484} +02/24/2022 23:14:31 - INFO - codeparrot_training - Step 16731: {'lr': 0.0003925305587092802, 'samples': 8566784, 'steps': 16731, 'loss/train': 1.0306373834609985} +02/24/2022 23:14:36 - INFO - codeparrot_training - Step 16732: {'lr': 0.00039251711566190133, 'samples': 8567296, 'steps': 16732, 'loss/train': 1.720516324043274} +02/24/2022 23:14:40 - INFO - codeparrot_training - Step 16733: {'lr': 0.0003925036720040244, 'samples': 8567808, 'steps': 16733, 'loss/train': 2.2145564556121826} +02/24/2022 23:14:45 - INFO - codeparrot_training - Step 16734: {'lr': 0.000392490227735707, 'samples': 8568320, 'steps': 16734, 'loss/train': 1.176186203956604} +02/24/2022 23:14:49 - INFO - codeparrot_training - Step 16735: {'lr': 0.0003924767828570066, 'samples': 8568832, 'steps': 16735, 'loss/train': 0.7137700915336609} +02/24/2022 23:14:54 - INFO - codeparrot_training - Step 16736: {'lr': 0.00039246333736798095, 'samples': 8569344, 'steps': 16736, 'loss/train': 2.636673927307129} +02/24/2022 23:14:57 - INFO - codeparrot_training - Step 16737: {'lr': 0.00039244989126868755, 'samples': 8569856, 'steps': 16737, 'loss/train': 0.8950685858726501} +02/24/2022 23:15:03 - INFO - codeparrot_training - Step 16738: {'lr': 0.0003924364445591842, 'samples': 8570368, 'steps': 16738, 'loss/train': 2.578615188598633} +02/24/2022 23:15:06 - INFO - codeparrot_training - Step 16739: {'lr': 0.0003924229972395282, 'samples': 8570880, 'steps': 16739, 'loss/train': 1.9543473720550537} +02/24/2022 23:15:13 - INFO - codeparrot_training - Step 16740: {'lr': 0.00039240954930977744, 'samples': 8571392, 'steps': 16740, 'loss/train': 1.864445447921753} +02/24/2022 23:15:16 - INFO - codeparrot_training - Step 16741: {'lr': 0.0003923961007699893, 'samples': 8571904, 'steps': 16741, 'loss/train': 2.237811326980591} +02/24/2022 23:15:22 - INFO - codeparrot_training - Step 16742: {'lr': 0.00039238265162022147, 'samples': 8572416, 'steps': 16742, 'loss/train': 1.9068998098373413} +02/24/2022 23:15:25 - INFO - codeparrot_training - Step 16743: {'lr': 0.0003923692018605316, 'samples': 8572928, 'steps': 16743, 'loss/train': 2.144944429397583} +02/24/2022 23:15:31 - INFO - codeparrot_training - Step 16744: {'lr': 0.0003923557514909773, 'samples': 8573440, 'steps': 16744, 'loss/train': 3.1549019813537598} +02/24/2022 23:15:34 - INFO - codeparrot_training - Step 16745: {'lr': 0.00039234230051161614, 'samples': 8573952, 'steps': 16745, 'loss/train': 0.9322651624679565} +02/24/2022 23:15:40 - INFO - codeparrot_training - Step 16746: {'lr': 0.00039232884892250575, 'samples': 8574464, 'steps': 16746, 'loss/train': 2.5181801319122314} +02/24/2022 23:15:43 - INFO - codeparrot_training - Step 16747: {'lr': 0.00039231539672370376, 'samples': 8574976, 'steps': 16747, 'loss/train': 2.181405544281006} +02/24/2022 23:15:49 - INFO - codeparrot_training - Step 16748: {'lr': 0.00039230194391526784, 'samples': 8575488, 'steps': 16748, 'loss/train': 2.0367963314056396} +02/24/2022 23:15:52 - INFO - codeparrot_training - Step 16749: {'lr': 0.0003922884904972556, 'samples': 8576000, 'steps': 16749, 'loss/train': 2.0779237747192383} +02/24/2022 23:15:58 - INFO - codeparrot_training - Step 16750: {'lr': 0.0003922750364697246, 'samples': 8576512, 'steps': 16750, 'loss/train': 3.7160446643829346} +02/24/2022 23:16:02 - INFO - codeparrot_training - Step 16751: {'lr': 0.0003922615818327325, 'samples': 8577024, 'steps': 16751, 'loss/train': 2.6534552574157715} +02/24/2022 23:16:08 - INFO - codeparrot_training - Step 16752: {'lr': 0.000392248126586337, 'samples': 8577536, 'steps': 16752, 'loss/train': 2.5692691802978516} +02/24/2022 23:16:11 - INFO - codeparrot_training - Step 16753: {'lr': 0.0003922346707305957, 'samples': 8578048, 'steps': 16753, 'loss/train': 0.07850679010152817} +02/24/2022 23:16:17 - INFO - codeparrot_training - Step 16754: {'lr': 0.00039222121426556617, 'samples': 8578560, 'steps': 16754, 'loss/train': 2.6701011657714844} +02/24/2022 23:16:21 - INFO - codeparrot_training - Step 16755: {'lr': 0.0003922077571913062, 'samples': 8579072, 'steps': 16755, 'loss/train': 1.1906167268753052} +02/24/2022 23:16:26 - INFO - codeparrot_training - Step 16756: {'lr': 0.00039219429950787326, 'samples': 8579584, 'steps': 16756, 'loss/train': 2.0138866901397705} +02/24/2022 23:16:30 - INFO - codeparrot_training - Step 16757: {'lr': 0.0003921808412153252, 'samples': 8580096, 'steps': 16757, 'loss/train': 2.505519151687622} +02/24/2022 23:16:35 - INFO - codeparrot_training - Step 16758: {'lr': 0.0003921673823137195, 'samples': 8580608, 'steps': 16758, 'loss/train': 2.7859370708465576} +02/24/2022 23:16:39 - INFO - codeparrot_training - Step 16759: {'lr': 0.00039215392280311383, 'samples': 8581120, 'steps': 16759, 'loss/train': 4.237369060516357} +02/24/2022 23:16:44 - INFO - codeparrot_training - Step 16760: {'lr': 0.000392140462683566, 'samples': 8581632, 'steps': 16760, 'loss/train': 1.6873570680618286} +02/24/2022 23:16:48 - INFO - codeparrot_training - Step 16761: {'lr': 0.0003921270019551335, 'samples': 8582144, 'steps': 16761, 'loss/train': 2.5333385467529297} +02/24/2022 23:16:54 - INFO - codeparrot_training - Step 16762: {'lr': 0.00039211354061787407, 'samples': 8582656, 'steps': 16762, 'loss/train': 2.4803080558776855} +02/24/2022 23:16:57 - INFO - codeparrot_training - Step 16763: {'lr': 0.0003921000786718454, 'samples': 8583168, 'steps': 16763, 'loss/train': 2.0843112468719482} +02/24/2022 23:17:03 - INFO - codeparrot_training - Step 16764: {'lr': 0.0003920866161171051, 'samples': 8583680, 'steps': 16764, 'loss/train': 2.050842761993408} +02/24/2022 23:17:06 - INFO - codeparrot_training - Step 16765: {'lr': 0.0003920731529537108, 'samples': 8584192, 'steps': 16765, 'loss/train': 2.262209892272949} +02/24/2022 23:17:12 - INFO - codeparrot_training - Step 16766: {'lr': 0.00039205968918172026, 'samples': 8584704, 'steps': 16766, 'loss/train': 1.8851791620254517} +02/24/2022 23:17:16 - INFO - codeparrot_training - Step 16767: {'lr': 0.00039204622480119107, 'samples': 8585216, 'steps': 16767, 'loss/train': 1.0261403322219849} +02/24/2022 23:17:22 - INFO - codeparrot_training - Step 16768: {'lr': 0.000392032759812181, 'samples': 8585728, 'steps': 16768, 'loss/train': 2.142001152038574} +02/24/2022 23:17:25 - INFO - codeparrot_training - Step 16769: {'lr': 0.0003920192942147477, 'samples': 8586240, 'steps': 16769, 'loss/train': 1.6848701238632202} +02/24/2022 23:17:28 - INFO - codeparrot_training - Step 16770: {'lr': 0.00039200582800894885, 'samples': 8586752, 'steps': 16770, 'loss/train': 1.4346401691436768} +02/24/2022 23:17:34 - INFO - codeparrot_training - Step 16771: {'lr': 0.00039199236119484207, 'samples': 8587264, 'steps': 16771, 'loss/train': 2.4493155479431152} +02/24/2022 23:17:40 - INFO - codeparrot_training - Step 16772: {'lr': 0.0003919788937724852, 'samples': 8587776, 'steps': 16772, 'loss/train': 2.5252976417541504} +02/24/2022 23:17:43 - INFO - codeparrot_training - Step 16773: {'lr': 0.0003919654257419357, 'samples': 8588288, 'steps': 16773, 'loss/train': 2.1317074298858643} +02/24/2022 23:17:49 - INFO - codeparrot_training - Step 16774: {'lr': 0.0003919519571032515, 'samples': 8588800, 'steps': 16774, 'loss/train': 2.12827205657959} +02/24/2022 23:17:52 - INFO - codeparrot_training - Step 16775: {'lr': 0.00039193848785649016, 'samples': 8589312, 'steps': 16775, 'loss/train': 1.1113438606262207} +02/24/2022 23:17:56 - INFO - codeparrot_training - Step 16776: {'lr': 0.0003919250180017094, 'samples': 8589824, 'steps': 16776, 'loss/train': 2.9014785289764404} +02/24/2022 23:18:02 - INFO - codeparrot_training - Step 16777: {'lr': 0.00039191154753896696, 'samples': 8590336, 'steps': 16777, 'loss/train': 1.968258261680603} +02/24/2022 23:18:07 - INFO - codeparrot_training - Step 16778: {'lr': 0.00039189807646832045, 'samples': 8590848, 'steps': 16778, 'loss/train': 2.1374475955963135} +02/24/2022 23:18:11 - INFO - codeparrot_training - Step 16779: {'lr': 0.0003918846047898277, 'samples': 8591360, 'steps': 16779, 'loss/train': 3.5726158618927} +02/24/2022 23:18:14 - INFO - codeparrot_training - Step 16780: {'lr': 0.00039187113250354635, 'samples': 8591872, 'steps': 16780, 'loss/train': 1.8912001848220825} +02/24/2022 23:18:20 - INFO - codeparrot_training - Step 16781: {'lr': 0.00039185765960953405, 'samples': 8592384, 'steps': 16781, 'loss/train': 3.314514398574829} +02/24/2022 23:18:24 - INFO - codeparrot_training - Step 16782: {'lr': 0.0003918441861078486, 'samples': 8592896, 'steps': 16782, 'loss/train': 1.6754136085510254} +02/24/2022 23:18:29 - INFO - codeparrot_training - Step 16783: {'lr': 0.0003918307119985477, 'samples': 8593408, 'steps': 16783, 'loss/train': 2.063513994216919} +02/24/2022 23:18:33 - INFO - codeparrot_training - Step 16784: {'lr': 0.0003918172372816892, 'samples': 8593920, 'steps': 16784, 'loss/train': 1.3096811771392822} +02/24/2022 23:18:38 - INFO - codeparrot_training - Step 16785: {'lr': 0.0003918037619573305, 'samples': 8594432, 'steps': 16785, 'loss/train': 1.8276004791259766} +02/24/2022 23:18:42 - INFO - codeparrot_training - Step 16786: {'lr': 0.0003917902860255296, 'samples': 8594944, 'steps': 16786, 'loss/train': 1.6900595426559448} +02/24/2022 23:18:49 - INFO - codeparrot_training - Step 16787: {'lr': 0.0003917768094863441, 'samples': 8595456, 'steps': 16787, 'loss/train': 1.3743575811386108} +02/24/2022 23:18:52 - INFO - codeparrot_training - Step 16788: {'lr': 0.00039176333233983187, 'samples': 8595968, 'steps': 16788, 'loss/train': 2.1047770977020264} +02/24/2022 23:18:58 - INFO - codeparrot_training - Step 16789: {'lr': 0.0003917498545860504, 'samples': 8596480, 'steps': 16789, 'loss/train': 1.0771292448043823} +02/24/2022 23:19:02 - INFO - codeparrot_training - Step 16790: {'lr': 0.0003917363762250576, 'samples': 8596992, 'steps': 16790, 'loss/train': 3.275146961212158} +02/24/2022 23:19:08 - INFO - codeparrot_training - Step 16791: {'lr': 0.00039172289725691124, 'samples': 8597504, 'steps': 16791, 'loss/train': 1.6128149032592773} +02/24/2022 23:19:11 - INFO - codeparrot_training - Step 16792: {'lr': 0.000391709417681669, 'samples': 8598016, 'steps': 16792, 'loss/train': 1.7847570180892944} +02/24/2022 23:19:14 - INFO - codeparrot_training - Step 16793: {'lr': 0.0003916959374993885, 'samples': 8598528, 'steps': 16793, 'loss/train': 2.2029881477355957} +02/24/2022 23:19:20 - INFO - codeparrot_training - Step 16794: {'lr': 0.0003916824567101277, 'samples': 8599040, 'steps': 16794, 'loss/train': 1.8639984130859375} +02/24/2022 23:19:23 - INFO - codeparrot_training - Step 16795: {'lr': 0.0003916689753139442, 'samples': 8599552, 'steps': 16795, 'loss/train': 2.024332284927368} +02/24/2022 23:19:29 - INFO - codeparrot_training - Step 16796: {'lr': 0.0003916554933108958, 'samples': 8600064, 'steps': 16796, 'loss/train': 2.4026010036468506} +02/24/2022 23:19:32 - INFO - codeparrot_training - Step 16797: {'lr': 0.0003916420107010402, 'samples': 8600576, 'steps': 16797, 'loss/train': 2.0735392570495605} +02/24/2022 23:19:39 - INFO - codeparrot_training - Step 16798: {'lr': 0.0003916285274844353, 'samples': 8601088, 'steps': 16798, 'loss/train': 1.0994086265563965} +02/24/2022 23:19:42 - INFO - codeparrot_training - Step 16799: {'lr': 0.0003916150436611387, 'samples': 8601600, 'steps': 16799, 'loss/train': 2.4959099292755127} +02/24/2022 23:19:48 - INFO - codeparrot_training - Step 16800: {'lr': 0.0003916015592312082, 'samples': 8602112, 'steps': 16800, 'loss/train': 0.2565116286277771} +02/24/2022 23:19:51 - INFO - codeparrot_training - Step 16801: {'lr': 0.00039158807419470166, 'samples': 8602624, 'steps': 16801, 'loss/train': 2.1922967433929443} +02/24/2022 23:19:57 - INFO - codeparrot_training - Step 16802: {'lr': 0.0003915745885516767, 'samples': 8603136, 'steps': 16802, 'loss/train': 1.4452614784240723} +02/24/2022 23:20:00 - INFO - codeparrot_training - Step 16803: {'lr': 0.0003915611023021912, 'samples': 8603648, 'steps': 16803, 'loss/train': 1.3495919704437256} +02/24/2022 23:20:06 - INFO - codeparrot_training - Step 16804: {'lr': 0.00039154761544630287, 'samples': 8604160, 'steps': 16804, 'loss/train': 2.431001901626587} +02/24/2022 23:20:09 - INFO - codeparrot_training - Step 16805: {'lr': 0.0003915341279840695, 'samples': 8604672, 'steps': 16805, 'loss/train': 3.085236072540283} +02/24/2022 23:20:15 - INFO - codeparrot_training - Step 16806: {'lr': 0.00039152063991554885, 'samples': 8605184, 'steps': 16806, 'loss/train': 2.1057345867156982} +02/24/2022 23:20:18 - INFO - codeparrot_training - Step 16807: {'lr': 0.0003915071512407987, 'samples': 8605696, 'steps': 16807, 'loss/train': 1.8922282457351685} +02/24/2022 23:20:25 - INFO - codeparrot_training - Step 16808: {'lr': 0.0003914936619598769, 'samples': 8606208, 'steps': 16808, 'loss/train': 1.7595235109329224} +02/24/2022 23:20:28 - INFO - codeparrot_training - Step 16809: {'lr': 0.00039148017207284115, 'samples': 8606720, 'steps': 16809, 'loss/train': 1.1769860982894897} +02/24/2022 23:20:34 - INFO - codeparrot_training - Step 16810: {'lr': 0.0003914666815797493, 'samples': 8607232, 'steps': 16810, 'loss/train': 0.7896278500556946} +02/24/2022 23:20:37 - INFO - codeparrot_training - Step 16811: {'lr': 0.00039145319048065907, 'samples': 8607744, 'steps': 16811, 'loss/train': 1.8864352703094482} +02/24/2022 23:20:43 - INFO - codeparrot_training - Step 16812: {'lr': 0.00039143969877562833, 'samples': 8608256, 'steps': 16812, 'loss/train': 0.4437282383441925} +02/24/2022 23:20:46 - INFO - codeparrot_training - Step 16813: {'lr': 0.00039142620646471485, 'samples': 8608768, 'steps': 16813, 'loss/train': 1.2218278646469116} +02/24/2022 23:20:52 - INFO - codeparrot_training - Step 16814: {'lr': 0.00039141271354797635, 'samples': 8609280, 'steps': 16814, 'loss/train': 2.371201276779175} +02/24/2022 23:20:55 - INFO - codeparrot_training - Step 16815: {'lr': 0.0003913992200254707, 'samples': 8609792, 'steps': 16815, 'loss/train': 2.3276758193969727} +02/24/2022 23:21:01 - INFO - codeparrot_training - Step 16816: {'lr': 0.0003913857258972557, 'samples': 8610304, 'steps': 16816, 'loss/train': 1.5418903827667236} +02/24/2022 23:21:04 - INFO - codeparrot_training - Step 16817: {'lr': 0.0003913722311633892, 'samples': 8610816, 'steps': 16817, 'loss/train': 2.5481350421905518} +02/24/2022 23:21:10 - INFO - codeparrot_training - Step 16818: {'lr': 0.0003913587358239288, 'samples': 8611328, 'steps': 16818, 'loss/train': 1.9053610563278198} +02/24/2022 23:21:13 - INFO - codeparrot_training - Step 16819: {'lr': 0.0003913452398789326, 'samples': 8611840, 'steps': 16819, 'loss/train': 2.0319366455078125} +02/24/2022 23:21:19 - INFO - codeparrot_training - Step 16820: {'lr': 0.0003913317433284582, 'samples': 8612352, 'steps': 16820, 'loss/train': 1.5430309772491455} +02/24/2022 23:21:22 - INFO - codeparrot_training - Step 16821: {'lr': 0.00039131824617256354, 'samples': 8612864, 'steps': 16821, 'loss/train': 2.3261728286743164} +02/24/2022 23:21:28 - INFO - codeparrot_training - Step 16822: {'lr': 0.0003913047484113064, 'samples': 8613376, 'steps': 16822, 'loss/train': 2.5518836975097656} +02/24/2022 23:21:31 - INFO - codeparrot_training - Step 16823: {'lr': 0.0003912912500447445, 'samples': 8613888, 'steps': 16823, 'loss/train': 1.9733940362930298} +02/24/2022 23:21:37 - INFO - codeparrot_training - Step 16824: {'lr': 0.0003912777510729358, 'samples': 8614400, 'steps': 16824, 'loss/train': 1.2581181526184082} +02/24/2022 23:21:40 - INFO - codeparrot_training - Step 16825: {'lr': 0.0003912642514959381, 'samples': 8614912, 'steps': 16825, 'loss/train': 1.8309385776519775} +02/24/2022 23:21:47 - INFO - codeparrot_training - Step 16826: {'lr': 0.00039125075131380923, 'samples': 8615424, 'steps': 16826, 'loss/train': 1.498382329940796} +02/24/2022 23:21:50 - INFO - codeparrot_training - Step 16827: {'lr': 0.00039123725052660696, 'samples': 8615936, 'steps': 16827, 'loss/train': 1.355548620223999} +02/24/2022 23:21:56 - INFO - codeparrot_training - Step 16828: {'lr': 0.00039122374913438913, 'samples': 8616448, 'steps': 16828, 'loss/train': 1.8002629280090332} +02/24/2022 23:21:59 - INFO - codeparrot_training - Step 16829: {'lr': 0.00039121024713721365, 'samples': 8616960, 'steps': 16829, 'loss/train': 2.014267683029175} +02/24/2022 23:22:05 - INFO - codeparrot_training - Step 16830: {'lr': 0.0003911967445351382, 'samples': 8617472, 'steps': 16830, 'loss/train': 1.4848769903182983} +02/24/2022 23:22:08 - INFO - codeparrot_training - Step 16831: {'lr': 0.00039118324132822083, 'samples': 8617984, 'steps': 16831, 'loss/train': 1.7573505640029907} +02/24/2022 23:22:14 - INFO - codeparrot_training - Step 16832: {'lr': 0.0003911697375165193, 'samples': 8618496, 'steps': 16832, 'loss/train': 2.4055910110473633} +02/24/2022 23:22:17 - INFO - codeparrot_training - Step 16833: {'lr': 0.00039115623310009135, 'samples': 8619008, 'steps': 16833, 'loss/train': 1.5562704801559448} +02/24/2022 23:22:23 - INFO - codeparrot_training - Step 16834: {'lr': 0.00039114272807899496, 'samples': 8619520, 'steps': 16834, 'loss/train': 2.246941566467285} +02/24/2022 23:22:27 - INFO - codeparrot_training - Step 16835: {'lr': 0.000391129222453288, 'samples': 8620032, 'steps': 16835, 'loss/train': 0.9668616056442261} +02/24/2022 23:22:32 - INFO - codeparrot_training - Step 16836: {'lr': 0.00039111571622302824, 'samples': 8620544, 'steps': 16836, 'loss/train': 1.7109888792037964} +02/24/2022 23:22:36 - INFO - codeparrot_training - Step 16837: {'lr': 0.0003911022093882736, 'samples': 8621056, 'steps': 16837, 'loss/train': 0.9387457370758057} +02/24/2022 23:22:41 - INFO - codeparrot_training - Step 16838: {'lr': 0.00039108870194908175, 'samples': 8621568, 'steps': 16838, 'loss/train': 1.5231893062591553} +02/24/2022 23:22:45 - INFO - codeparrot_training - Step 16839: {'lr': 0.00039107519390551085, 'samples': 8622080, 'steps': 16839, 'loss/train': 1.5574750900268555} +02/24/2022 23:22:50 - INFO - codeparrot_training - Step 16840: {'lr': 0.00039106168525761855, 'samples': 8622592, 'steps': 16840, 'loss/train': 2.036545991897583} +02/24/2022 23:22:54 - INFO - codeparrot_training - Step 16841: {'lr': 0.00039104817600546277, 'samples': 8623104, 'steps': 16841, 'loss/train': 2.179760217666626} +02/24/2022 23:22:59 - INFO - codeparrot_training - Step 16842: {'lr': 0.00039103466614910144, 'samples': 8623616, 'steps': 16842, 'loss/train': 2.4726362228393555} +02/24/2022 23:23:03 - INFO - codeparrot_training - Step 16843: {'lr': 0.0003910211556885923, 'samples': 8624128, 'steps': 16843, 'loss/train': 2.223010778427124} +02/24/2022 23:23:10 - INFO - codeparrot_training - Step 16844: {'lr': 0.0003910076446239934, 'samples': 8624640, 'steps': 16844, 'loss/train': 2.6325414180755615} +02/24/2022 23:23:13 - INFO - codeparrot_training - Step 16845: {'lr': 0.00039099413295536246, 'samples': 8625152, 'steps': 16845, 'loss/train': 6.680877208709717} +02/24/2022 23:23:19 - INFO - codeparrot_training - Step 16846: {'lr': 0.0003909806206827575, 'samples': 8625664, 'steps': 16846, 'loss/train': 2.528313636779785} +02/24/2022 23:23:22 - INFO - codeparrot_training - Step 16847: {'lr': 0.00039096710780623625, 'samples': 8626176, 'steps': 16847, 'loss/train': 2.1600160598754883} +02/24/2022 23:23:28 - INFO - codeparrot_training - Step 16848: {'lr': 0.0003909535943258567, 'samples': 8626688, 'steps': 16848, 'loss/train': 2.4478447437286377} +02/24/2022 23:23:31 - INFO - codeparrot_training - Step 16849: {'lr': 0.0003909400802416767, 'samples': 8627200, 'steps': 16849, 'loss/train': 2.6089699268341064} +02/24/2022 23:23:37 - INFO - codeparrot_training - Step 16850: {'lr': 0.00039092656555375416, 'samples': 8627712, 'steps': 16850, 'loss/train': 1.8667513132095337} +02/24/2022 23:23:40 - INFO - codeparrot_training - Step 16851: {'lr': 0.00039091305026214704, 'samples': 8628224, 'steps': 16851, 'loss/train': 2.1120376586914062} +02/24/2022 23:23:46 - INFO - codeparrot_training - Step 16852: {'lr': 0.0003908995343669131, 'samples': 8628736, 'steps': 16852, 'loss/train': 2.670600652694702} +02/24/2022 23:23:49 - INFO - codeparrot_training - Step 16853: {'lr': 0.0003908860178681102, 'samples': 8629248, 'steps': 16853, 'loss/train': 3.1181223392486572} +02/24/2022 23:23:56 - INFO - codeparrot_training - Step 16854: {'lr': 0.0003908725007657964, 'samples': 8629760, 'steps': 16854, 'loss/train': 3.1714067459106445} +02/24/2022 23:23:59 - INFO - codeparrot_training - Step 16855: {'lr': 0.0003908589830600296, 'samples': 8630272, 'steps': 16855, 'loss/train': 0.7268884778022766} +02/24/2022 23:24:05 - INFO - codeparrot_training - Step 16856: {'lr': 0.0003908454647508676, 'samples': 8630784, 'steps': 16856, 'loss/train': 2.485808849334717} +02/24/2022 23:24:09 - INFO - codeparrot_training - Step 16857: {'lr': 0.00039083194583836836, 'samples': 8631296, 'steps': 16857, 'loss/train': 3.775114059448242} +02/24/2022 23:24:12 - INFO - codeparrot_training - Step 16858: {'lr': 0.0003908184263225898, 'samples': 8631808, 'steps': 16858, 'loss/train': 1.8956369161605835} +02/24/2022 23:24:19 - INFO - codeparrot_training - Step 16859: {'lr': 0.0003908049062035898, 'samples': 8632320, 'steps': 16859, 'loss/train': 1.420224905014038} +02/24/2022 23:24:22 - INFO - codeparrot_training - Step 16860: {'lr': 0.0003907913854814262, 'samples': 8632832, 'steps': 16860, 'loss/train': 2.164921998977661} +02/24/2022 23:24:26 - INFO - codeparrot_training - Step 16861: {'lr': 0.00039077786415615714, 'samples': 8633344, 'steps': 16861, 'loss/train': 1.5208487510681152} +02/24/2022 23:24:31 - INFO - codeparrot_training - Step 16862: {'lr': 0.0003907643422278404, 'samples': 8633856, 'steps': 16862, 'loss/train': 2.4336390495300293} +02/24/2022 23:24:35 - INFO - codeparrot_training - Step 16863: {'lr': 0.00039075081969653383, 'samples': 8634368, 'steps': 16863, 'loss/train': 1.6878808736801147} +02/24/2022 23:24:40 - INFO - codeparrot_training - Step 16864: {'lr': 0.0003907372965622955, 'samples': 8634880, 'steps': 16864, 'loss/train': 0.24565070867538452} +02/24/2022 23:24:44 - INFO - codeparrot_training - Step 16865: {'lr': 0.0003907237728251833, 'samples': 8635392, 'steps': 16865, 'loss/train': 2.3726556301116943} +02/24/2022 23:24:49 - INFO - codeparrot_training - Step 16866: {'lr': 0.0003907102484852551, 'samples': 8635904, 'steps': 16866, 'loss/train': 1.6078999042510986} +02/24/2022 23:24:53 - INFO - codeparrot_training - Step 16867: {'lr': 0.0003906967235425689, 'samples': 8636416, 'steps': 16867, 'loss/train': 1.6059931516647339} +02/24/2022 23:24:58 - INFO - codeparrot_training - Step 16868: {'lr': 0.0003906831979971826, 'samples': 8636928, 'steps': 16868, 'loss/train': 4.194704055786133} +02/24/2022 23:25:02 - INFO - codeparrot_training - Step 16869: {'lr': 0.0003906696718491541, 'samples': 8637440, 'steps': 16869, 'loss/train': 2.009641170501709} +02/24/2022 23:25:08 - INFO - codeparrot_training - Step 16870: {'lr': 0.0003906561450985415, 'samples': 8637952, 'steps': 16870, 'loss/train': 1.9054350852966309} +02/24/2022 23:25:11 - INFO - codeparrot_training - Step 16871: {'lr': 0.00039064261774540254, 'samples': 8638464, 'steps': 16871, 'loss/train': 3.1805734634399414} +02/24/2022 23:25:17 - INFO - codeparrot_training - Step 16872: {'lr': 0.0003906290897897953, 'samples': 8638976, 'steps': 16872, 'loss/train': 2.461371660232544} +02/24/2022 23:25:21 - INFO - codeparrot_training - Step 16873: {'lr': 0.00039061556123177777, 'samples': 8639488, 'steps': 16873, 'loss/train': 1.5450149774551392} +02/24/2022 23:25:26 - INFO - codeparrot_training - Step 16874: {'lr': 0.00039060203207140774, 'samples': 8640000, 'steps': 16874, 'loss/train': 2.7042887210845947} +02/24/2022 23:25:30 - INFO - codeparrot_training - Step 16875: {'lr': 0.0003905885023087433, 'samples': 8640512, 'steps': 16875, 'loss/train': 1.750491738319397} +02/24/2022 23:25:35 - INFO - codeparrot_training - Step 16876: {'lr': 0.0003905749719438423, 'samples': 8641024, 'steps': 16876, 'loss/train': 0.6731309294700623} +02/24/2022 23:25:39 - INFO - codeparrot_training - Step 16877: {'lr': 0.00039056144097676285, 'samples': 8641536, 'steps': 16877, 'loss/train': 1.1712673902511597} +02/24/2022 23:25:44 - INFO - codeparrot_training - Step 16878: {'lr': 0.0003905479094075627, 'samples': 8642048, 'steps': 16878, 'loss/train': 2.443943977355957} +02/24/2022 23:25:48 - INFO - codeparrot_training - Step 16879: {'lr': 0.00039053437723630003, 'samples': 8642560, 'steps': 16879, 'loss/train': 1.6826013326644897} +02/24/2022 23:25:53 - INFO - codeparrot_training - Step 16880: {'lr': 0.00039052084446303264, 'samples': 8643072, 'steps': 16880, 'loss/train': 2.573608160018921} +02/24/2022 23:25:57 - INFO - codeparrot_training - Step 16881: {'lr': 0.0003905073110878186, 'samples': 8643584, 'steps': 16881, 'loss/train': 0.9606328010559082} +02/24/2022 23:26:04 - INFO - codeparrot_training - Step 16882: {'lr': 0.00039049377711071595, 'samples': 8644096, 'steps': 16882, 'loss/train': 2.4206340312957764} +02/24/2022 23:26:07 - INFO - codeparrot_training - Step 16883: {'lr': 0.00039048024253178243, 'samples': 8644608, 'steps': 16883, 'loss/train': 1.2557203769683838} +02/24/2022 23:26:13 - INFO - codeparrot_training - Step 16884: {'lr': 0.00039046670735107627, 'samples': 8645120, 'steps': 16884, 'loss/train': 2.5004947185516357} +02/24/2022 23:26:16 - INFO - codeparrot_training - Step 16885: {'lr': 0.00039045317156865525, 'samples': 8645632, 'steps': 16885, 'loss/train': 1.8755720853805542} +02/24/2022 23:26:22 - INFO - codeparrot_training - Step 16886: {'lr': 0.0003904396351845775, 'samples': 8646144, 'steps': 16886, 'loss/train': 1.6499665975570679} +02/24/2022 23:26:25 - INFO - codeparrot_training - Step 16887: {'lr': 0.00039042609819890087, 'samples': 8646656, 'steps': 16887, 'loss/train': 1.7086681127548218} +02/24/2022 23:26:31 - INFO - codeparrot_training - Step 16888: {'lr': 0.0003904125606116835, 'samples': 8647168, 'steps': 16888, 'loss/train': 0.6751857399940491} +02/24/2022 23:26:34 - INFO - codeparrot_training - Step 16889: {'lr': 0.0003903990224229833, 'samples': 8647680, 'steps': 16889, 'loss/train': 1.9928969144821167} +02/24/2022 23:26:40 - INFO - codeparrot_training - Step 16890: {'lr': 0.00039038548363285825, 'samples': 8648192, 'steps': 16890, 'loss/train': 1.6685214042663574} +02/24/2022 23:26:43 - INFO - codeparrot_training - Step 16891: {'lr': 0.00039037194424136634, 'samples': 8648704, 'steps': 16891, 'loss/train': 2.2034332752227783} +02/24/2022 23:26:50 - INFO - codeparrot_training - Step 16892: {'lr': 0.0003903584042485656, 'samples': 8649216, 'steps': 16892, 'loss/train': 1.961581826210022} +02/24/2022 23:26:53 - INFO - codeparrot_training - Step 16893: {'lr': 0.00039034486365451405, 'samples': 8649728, 'steps': 16893, 'loss/train': 2.6328325271606445} +02/24/2022 23:26:59 - INFO - codeparrot_training - Step 16894: {'lr': 0.00039033132245926974, 'samples': 8650240, 'steps': 16894, 'loss/train': 2.49061918258667} +02/24/2022 23:27:02 - INFO - codeparrot_training - Step 16895: {'lr': 0.0003903177806628905, 'samples': 8650752, 'steps': 16895, 'loss/train': 2.2321810722351074} +02/24/2022 23:27:08 - INFO - codeparrot_training - Step 16896: {'lr': 0.00039030423826543446, 'samples': 8651264, 'steps': 16896, 'loss/train': 2.102652072906494} +02/24/2022 23:27:11 - INFO - codeparrot_training - Step 16897: {'lr': 0.0003902906952669596, 'samples': 8651776, 'steps': 16897, 'loss/train': 0.927318274974823} +02/24/2022 23:27:17 - INFO - codeparrot_training - Step 16898: {'lr': 0.000390277151667524, 'samples': 8652288, 'steps': 16898, 'loss/train': 2.801112174987793} +02/24/2022 23:27:20 - INFO - codeparrot_training - Step 16899: {'lr': 0.0003902636074671856, 'samples': 8652800, 'steps': 16899, 'loss/train': 1.952659010887146} +02/24/2022 23:27:26 - INFO - codeparrot_training - Step 16900: {'lr': 0.0003902500626660025, 'samples': 8653312, 'steps': 16900, 'loss/train': 0.9871919751167297} +02/24/2022 23:27:29 - INFO - codeparrot_training - Step 16901: {'lr': 0.00039023651726403263, 'samples': 8653824, 'steps': 16901, 'loss/train': 2.5099470615386963} +02/24/2022 23:27:35 - INFO - codeparrot_training - Step 16902: {'lr': 0.00039022297126133397, 'samples': 8654336, 'steps': 16902, 'loss/train': 2.026329517364502} +02/24/2022 23:27:39 - INFO - codeparrot_training - Step 16903: {'lr': 0.0003902094246579647, 'samples': 8654848, 'steps': 16903, 'loss/train': 1.2148100137710571} +02/24/2022 23:27:44 - INFO - codeparrot_training - Step 16904: {'lr': 0.00039019587745398276, 'samples': 8655360, 'steps': 16904, 'loss/train': 2.444507122039795} +02/24/2022 23:27:48 - INFO - codeparrot_training - Step 16905: {'lr': 0.00039018232964944623, 'samples': 8655872, 'steps': 16905, 'loss/train': 2.254626989364624} +02/24/2022 23:27:53 - INFO - codeparrot_training - Step 16906: {'lr': 0.0003901687812444131, 'samples': 8656384, 'steps': 16906, 'loss/train': 8.72937297821045} +02/24/2022 23:27:57 - INFO - codeparrot_training - Step 16907: {'lr': 0.0003901552322389414, 'samples': 8656896, 'steps': 16907, 'loss/train': 2.806398868560791} +02/24/2022 23:28:02 - INFO - codeparrot_training - Step 16908: {'lr': 0.00039014168263308926, 'samples': 8657408, 'steps': 16908, 'loss/train': 0.8207800984382629} +02/24/2022 23:28:06 - INFO - codeparrot_training - Step 16909: {'lr': 0.00039012813242691454, 'samples': 8657920, 'steps': 16909, 'loss/train': 1.2087421417236328} +02/24/2022 23:28:11 - INFO - codeparrot_training - Step 16910: {'lr': 0.00039011458162047547, 'samples': 8658432, 'steps': 16910, 'loss/train': 2.4208099842071533} +02/24/2022 23:28:15 - INFO - codeparrot_training - Step 16911: {'lr': 0.00039010103021383, 'samples': 8658944, 'steps': 16911, 'loss/train': 1.7641187906265259} +02/24/2022 23:28:20 - INFO - codeparrot_training - Step 16912: {'lr': 0.00039008747820703615, 'samples': 8659456, 'steps': 16912, 'loss/train': 1.685917854309082} +02/24/2022 23:28:24 - INFO - codeparrot_training - Step 16913: {'lr': 0.0003900739256001521, 'samples': 8659968, 'steps': 16913, 'loss/train': 2.2563421726226807} +02/24/2022 23:28:29 - INFO - codeparrot_training - Step 16914: {'lr': 0.00039006037239323584, 'samples': 8660480, 'steps': 16914, 'loss/train': 1.4826017618179321} +02/24/2022 23:28:33 - INFO - codeparrot_training - Step 16915: {'lr': 0.00039004681858634537, 'samples': 8660992, 'steps': 16915, 'loss/train': 0.29030516743659973} +02/24/2022 23:28:38 - INFO - codeparrot_training - Step 16916: {'lr': 0.0003900332641795388, 'samples': 8661504, 'steps': 16916, 'loss/train': 2.1742749214172363} +02/24/2022 23:28:42 - INFO - codeparrot_training - Step 16917: {'lr': 0.0003900197091728742, 'samples': 8662016, 'steps': 16917, 'loss/train': 2.3685264587402344} +02/24/2022 23:28:48 - INFO - codeparrot_training - Step 16918: {'lr': 0.0003900061535664097, 'samples': 8662528, 'steps': 16918, 'loss/train': 1.8964872360229492} +02/24/2022 23:28:51 - INFO - codeparrot_training - Step 16919: {'lr': 0.0003899925973602032, 'samples': 8663040, 'steps': 16919, 'loss/train': 1.3185784816741943} +02/24/2022 23:28:57 - INFO - codeparrot_training - Step 16920: {'lr': 0.0003899790405543129, 'samples': 8663552, 'steps': 16920, 'loss/train': 1.9329650402069092} +02/24/2022 23:29:00 - INFO - codeparrot_training - Step 16921: {'lr': 0.0003899654831487969, 'samples': 8664064, 'steps': 16921, 'loss/train': 2.5319745540618896} +02/24/2022 23:29:06 - INFO - codeparrot_training - Step 16922: {'lr': 0.0003899519251437131, 'samples': 8664576, 'steps': 16922, 'loss/train': 2.0593087673187256} +02/24/2022 23:29:09 - INFO - codeparrot_training - Step 16923: {'lr': 0.00038993836653911974, 'samples': 8665088, 'steps': 16923, 'loss/train': 1.5930911302566528} +02/24/2022 23:29:15 - INFO - codeparrot_training - Step 16924: {'lr': 0.00038992480733507487, 'samples': 8665600, 'steps': 16924, 'loss/train': 2.0232458114624023} +02/24/2022 23:29:18 - INFO - codeparrot_training - Step 16925: {'lr': 0.0003899112475316365, 'samples': 8666112, 'steps': 16925, 'loss/train': 2.715932846069336} +02/24/2022 23:29:24 - INFO - codeparrot_training - Step 16926: {'lr': 0.00038989768712886287, 'samples': 8666624, 'steps': 16926, 'loss/train': 1.8036901950836182} +02/24/2022 23:29:27 - INFO - codeparrot_training - Step 16927: {'lr': 0.0003898841261268119, 'samples': 8667136, 'steps': 16927, 'loss/train': 2.5591657161712646} +02/24/2022 23:29:33 - INFO - codeparrot_training - Step 16928: {'lr': 0.00038987056452554177, 'samples': 8667648, 'steps': 16928, 'loss/train': 1.5738908052444458} +02/24/2022 23:29:37 - INFO - codeparrot_training - Step 16929: {'lr': 0.00038985700232511055, 'samples': 8668160, 'steps': 16929, 'loss/train': 1.8292303085327148} +02/24/2022 23:29:43 - INFO - codeparrot_training - Step 16930: {'lr': 0.0003898434395255763, 'samples': 8668672, 'steps': 16930, 'loss/train': 2.2562856674194336} +02/24/2022 23:29:46 - INFO - codeparrot_training - Step 16931: {'lr': 0.0003898298761269973, 'samples': 8669184, 'steps': 16931, 'loss/train': 1.6968156099319458} +02/24/2022 23:29:52 - INFO - codeparrot_training - Step 16932: {'lr': 0.0003898163121294314, 'samples': 8669696, 'steps': 16932, 'loss/train': 2.213306427001953} +02/24/2022 23:29:55 - INFO - codeparrot_training - Step 16933: {'lr': 0.0003898027475329368, 'samples': 8670208, 'steps': 16933, 'loss/train': 1.7215467691421509} +02/24/2022 23:30:01 - INFO - codeparrot_training - Step 16934: {'lr': 0.00038978918233757167, 'samples': 8670720, 'steps': 16934, 'loss/train': 1.9487850666046143} +02/24/2022 23:30:04 - INFO - codeparrot_training - Step 16935: {'lr': 0.000389775616543394, 'samples': 8671232, 'steps': 16935, 'loss/train': 2.101301431655884} +02/24/2022 23:30:10 - INFO - codeparrot_training - Step 16936: {'lr': 0.00038976205015046206, 'samples': 8671744, 'steps': 16936, 'loss/train': 1.3145732879638672} +02/24/2022 23:30:13 - INFO - codeparrot_training - Step 16937: {'lr': 0.00038974848315883383, 'samples': 8672256, 'steps': 16937, 'loss/train': 2.593108892440796} +02/24/2022 23:30:19 - INFO - codeparrot_training - Step 16938: {'lr': 0.00038973491556856755, 'samples': 8672768, 'steps': 16938, 'loss/train': 2.0943918228149414} +02/24/2022 23:30:23 - INFO - codeparrot_training - Step 16939: {'lr': 0.0003897213473797212, 'samples': 8673280, 'steps': 16939, 'loss/train': 1.4675676822662354} +02/24/2022 23:30:28 - INFO - codeparrot_training - Step 16940: {'lr': 0.0003897077785923529, 'samples': 8673792, 'steps': 16940, 'loss/train': 2.2290542125701904} +02/24/2022 23:30:32 - INFO - codeparrot_training - Step 16941: {'lr': 0.0003896942092065209, 'samples': 8674304, 'steps': 16941, 'loss/train': 1.8714227676391602} +02/24/2022 23:30:37 - INFO - codeparrot_training - Step 16942: {'lr': 0.0003896806392222833, 'samples': 8674816, 'steps': 16942, 'loss/train': 1.1439050436019897} +02/24/2022 23:30:41 - INFO - codeparrot_training - Step 16943: {'lr': 0.00038966706863969815, 'samples': 8675328, 'steps': 16943, 'loss/train': 1.649509310722351} +02/24/2022 23:30:46 - INFO - codeparrot_training - Step 16944: {'lr': 0.00038965349745882365, 'samples': 8675840, 'steps': 16944, 'loss/train': 2.2249338626861572} +02/24/2022 23:30:50 - INFO - codeparrot_training - Step 16945: {'lr': 0.00038963992567971794, 'samples': 8676352, 'steps': 16945, 'loss/train': 1.5210684537887573} +02/24/2022 23:30:55 - INFO - codeparrot_training - Step 16946: {'lr': 0.0003896263533024391, 'samples': 8676864, 'steps': 16946, 'loss/train': 1.8658944368362427} +02/24/2022 23:30:59 - INFO - codeparrot_training - Step 16947: {'lr': 0.0003896127803270453, 'samples': 8677376, 'steps': 16947, 'loss/train': 1.8243488073349} +02/24/2022 23:31:04 - INFO - codeparrot_training - Step 16948: {'lr': 0.0003895992067535946, 'samples': 8677888, 'steps': 16948, 'loss/train': 2.0701653957366943} +02/24/2022 23:31:08 - INFO - codeparrot_training - Step 16949: {'lr': 0.0003895856325821454, 'samples': 8678400, 'steps': 16949, 'loss/train': 1.8434815406799316} +02/24/2022 23:31:14 - INFO - codeparrot_training - Step 16950: {'lr': 0.00038957205781275554, 'samples': 8678912, 'steps': 16950, 'loss/train': 2.7598280906677246} +02/24/2022 23:31:17 - INFO - codeparrot_training - Step 16951: {'lr': 0.00038955848244548333, 'samples': 8679424, 'steps': 16951, 'loss/train': 0.7306210398674011} +02/24/2022 23:31:23 - INFO - codeparrot_training - Step 16952: {'lr': 0.00038954490648038687, 'samples': 8679936, 'steps': 16952, 'loss/train': 2.261073112487793} +02/24/2022 23:31:26 - INFO - codeparrot_training - Step 16953: {'lr': 0.0003895313299175244, 'samples': 8680448, 'steps': 16953, 'loss/train': 2.1445531845092773} +02/24/2022 23:31:32 - INFO - codeparrot_training - Step 16954: {'lr': 0.000389517752756954, 'samples': 8680960, 'steps': 16954, 'loss/train': 1.6433805227279663} +02/24/2022 23:31:36 - INFO - codeparrot_training - Step 16955: {'lr': 0.0003895041749987338, 'samples': 8681472, 'steps': 16955, 'loss/train': 2.915520668029785} +02/24/2022 23:31:41 - INFO - codeparrot_training - Step 16956: {'lr': 0.00038949059664292207, 'samples': 8681984, 'steps': 16956, 'loss/train': 1.8010644912719727} +02/24/2022 23:31:45 - INFO - codeparrot_training - Step 16957: {'lr': 0.0003894770176895769, 'samples': 8682496, 'steps': 16957, 'loss/train': 2.249030590057373} +02/24/2022 23:31:50 - INFO - codeparrot_training - Step 16958: {'lr': 0.0003894634381387565, 'samples': 8683008, 'steps': 16958, 'loss/train': 1.8238641023635864} +02/24/2022 23:31:53 - INFO - codeparrot_training - Step 16959: {'lr': 0.00038944985799051896, 'samples': 8683520, 'steps': 16959, 'loss/train': 1.926099419593811} +02/24/2022 23:31:59 - INFO - codeparrot_training - Step 16960: {'lr': 0.0003894362772449226, 'samples': 8684032, 'steps': 16960, 'loss/train': 1.838199257850647} +02/24/2022 23:32:03 - INFO - codeparrot_training - Step 16961: {'lr': 0.0003894226959020254, 'samples': 8684544, 'steps': 16961, 'loss/train': 1.834027647972107} +02/24/2022 23:32:08 - INFO - codeparrot_training - Step 16962: {'lr': 0.00038940911396188573, 'samples': 8685056, 'steps': 16962, 'loss/train': 2.2430877685546875} +02/24/2022 23:32:15 - INFO - codeparrot_training - Step 16963: {'lr': 0.0003893955314245616, 'samples': 8685568, 'steps': 16963, 'loss/train': 1.9298113584518433} +02/24/2022 23:32:18 - INFO - codeparrot_training - Step 16964: {'lr': 0.0003893819482901113, 'samples': 8686080, 'steps': 16964, 'loss/train': 1.9258403778076172} +02/24/2022 23:32:24 - INFO - codeparrot_training - Step 16965: {'lr': 0.000389368364558593, 'samples': 8686592, 'steps': 16965, 'loss/train': 1.7546473741531372} +02/24/2022 23:32:27 - INFO - codeparrot_training - Step 16966: {'lr': 0.00038935478023006487, 'samples': 8687104, 'steps': 16966, 'loss/train': 2.231564521789551} +02/24/2022 23:32:33 - INFO - codeparrot_training - Step 16967: {'lr': 0.0003893411953045852, 'samples': 8687616, 'steps': 16967, 'loss/train': 1.3445745706558228} +02/24/2022 23:32:36 - INFO - codeparrot_training - Step 16968: {'lr': 0.000389327609782212, 'samples': 8688128, 'steps': 16968, 'loss/train': 1.9698094129562378} +02/24/2022 23:32:42 - INFO - codeparrot_training - Step 16969: {'lr': 0.0003893140236630036, 'samples': 8688640, 'steps': 16969, 'loss/train': 2.612501621246338} +02/24/2022 23:32:45 - INFO - codeparrot_training - Step 16970: {'lr': 0.0003893004369470181, 'samples': 8689152, 'steps': 16970, 'loss/train': 2.2755954265594482} +02/24/2022 23:32:51 - INFO - codeparrot_training - Step 16971: {'lr': 0.00038928684963431383, 'samples': 8689664, 'steps': 16971, 'loss/train': 2.605947256088257} +02/24/2022 23:32:54 - INFO - codeparrot_training - Step 16972: {'lr': 0.00038927326172494894, 'samples': 8690176, 'steps': 16972, 'loss/train': 2.1987826824188232} +02/24/2022 23:33:01 - INFO - codeparrot_training - Step 16973: {'lr': 0.0003892596732189816, 'samples': 8690688, 'steps': 16973, 'loss/train': 1.293381929397583} +02/24/2022 23:33:04 - INFO - codeparrot_training - Step 16974: {'lr': 0.00038924608411647, 'samples': 8691200, 'steps': 16974, 'loss/train': 1.3081204891204834} +02/24/2022 23:33:10 - INFO - codeparrot_training - Step 16975: {'lr': 0.00038923249441747245, 'samples': 8691712, 'steps': 16975, 'loss/train': 2.2672359943389893} +02/24/2022 23:33:13 - INFO - codeparrot_training - Step 16976: {'lr': 0.000389218904122047, 'samples': 8692224, 'steps': 16976, 'loss/train': 1.9623104333877563} +02/24/2022 23:33:19 - INFO - codeparrot_training - Step 16977: {'lr': 0.00038920531323025206, 'samples': 8692736, 'steps': 16977, 'loss/train': 3.1134965419769287} +02/24/2022 23:33:22 - INFO - codeparrot_training - Step 16978: {'lr': 0.0003891917217421458, 'samples': 8693248, 'steps': 16978, 'loss/train': 1.8949763774871826} +02/24/2022 23:33:28 - INFO - codeparrot_training - Step 16979: {'lr': 0.00038917812965778625, 'samples': 8693760, 'steps': 16979, 'loss/train': 1.8277864456176758} +02/24/2022 23:33:31 - INFO - codeparrot_training - Step 16980: {'lr': 0.00038916453697723194, 'samples': 8694272, 'steps': 16980, 'loss/train': 1.6719551086425781} +02/24/2022 23:33:37 - INFO - codeparrot_training - Step 16981: {'lr': 0.00038915094370054083, 'samples': 8694784, 'steps': 16981, 'loss/train': 2.123537302017212} +02/24/2022 23:33:40 - INFO - codeparrot_training - Step 16982: {'lr': 0.00038913734982777136, 'samples': 8695296, 'steps': 16982, 'loss/train': 1.8104923963546753} +02/24/2022 23:33:46 - INFO - codeparrot_training - Step 16983: {'lr': 0.0003891237553589816, 'samples': 8695808, 'steps': 16983, 'loss/train': 1.3323637247085571} +02/24/2022 23:33:49 - INFO - codeparrot_training - Step 16984: {'lr': 0.00038911016029422984, 'samples': 8696320, 'steps': 16984, 'loss/train': 2.28753924369812} +02/24/2022 23:33:55 - INFO - codeparrot_training - Step 16985: {'lr': 0.0003890965646335744, 'samples': 8696832, 'steps': 16985, 'loss/train': 1.9426664113998413} +02/24/2022 23:33:59 - INFO - codeparrot_training - Step 16986: {'lr': 0.0003890829683770734, 'samples': 8697344, 'steps': 16986, 'loss/train': 2.0136423110961914} +02/24/2022 23:34:04 - INFO - codeparrot_training - Step 16987: {'lr': 0.0003890693715247851, 'samples': 8697856, 'steps': 16987, 'loss/train': 2.2473838329315186} +02/24/2022 23:34:08 - INFO - codeparrot_training - Step 16988: {'lr': 0.0003890557740767678, 'samples': 8698368, 'steps': 16988, 'loss/train': 5.835962772369385} +02/24/2022 23:34:13 - INFO - codeparrot_training - Step 16989: {'lr': 0.0003890421760330798, 'samples': 8698880, 'steps': 16989, 'loss/train': 1.5661112070083618} +02/24/2022 23:34:17 - INFO - codeparrot_training - Step 16990: {'lr': 0.0003890285773937792, 'samples': 8699392, 'steps': 16990, 'loss/train': 2.22489595413208} +02/24/2022 23:34:22 - INFO - codeparrot_training - Step 16991: {'lr': 0.0003890149781589243, 'samples': 8699904, 'steps': 16991, 'loss/train': 1.6395831108093262} +02/24/2022 23:34:26 - INFO - codeparrot_training - Step 16992: {'lr': 0.0003890013783285733, 'samples': 8700416, 'steps': 16992, 'loss/train': 2.580901861190796} +02/24/2022 23:34:31 - INFO - codeparrot_training - Step 16993: {'lr': 0.00038898777790278465, 'samples': 8700928, 'steps': 16993, 'loss/train': 2.2957491874694824} +02/24/2022 23:34:35 - INFO - codeparrot_training - Step 16994: {'lr': 0.00038897417688161644, 'samples': 8701440, 'steps': 16994, 'loss/train': 2.2015116214752197} +02/24/2022 23:34:41 - INFO - codeparrot_training - Step 16995: {'lr': 0.0003889605752651271, 'samples': 8701952, 'steps': 16995, 'loss/train': 1.9655791521072388} +02/24/2022 23:34:44 - INFO - codeparrot_training - Step 16996: {'lr': 0.0003889469730533746, 'samples': 8702464, 'steps': 16996, 'loss/train': 1.8086227178573608} +02/24/2022 23:34:50 - INFO - codeparrot_training - Step 16997: {'lr': 0.0003889333702464175, 'samples': 8702976, 'steps': 16997, 'loss/train': 1.8407796621322632} +02/24/2022 23:34:53 - INFO - codeparrot_training - Step 16998: {'lr': 0.00038891976684431395, 'samples': 8703488, 'steps': 16998, 'loss/train': 2.391925096511841} +02/24/2022 23:34:59 - INFO - codeparrot_training - Step 16999: {'lr': 0.0003889061628471222, 'samples': 8704000, 'steps': 16999, 'loss/train': 3.4161200523376465} +02/24/2022 23:34:59 - INFO - codeparrot_training - Evaluating and saving model checkpoint