diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -14385,3 +14385,1009 @@ Use FP16 precision: False 02/24/2022 19:40:59 - INFO - codeparrot_training - Step 13998: {'lr': 0.00042679983379736324, 'samples': 7167488, 'steps': 13998, 'loss/train': 1.7144615650177002} 02/24/2022 19:41:03 - INFO - codeparrot_training - Step 13999: {'lr': 0.0004267882649256525, 'samples': 7168000, 'steps': 13999, 'loss/train': 1.6556921005249023} 02/24/2022 19:41:03 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 19:41:20 - WARNING - huggingface_hub.repository - Several commits (14) will be pushed upstream. +02/24/2022 19:41:20 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 19:41:58 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 7d01f0b..3a61b16 floral-grass-11 -> floral-grass-11 + +02/24/2022 19:42:05 - INFO - codeparrot_training - Step 14000: {'lr': 0.00042677669529663686, 'samples': 7168512, 'steps': 14000, 'loss/train': 2.7728171348571777} +02/24/2022 19:42:08 - INFO - codeparrot_training - Step 14001: {'lr': 0.0004267651249103661, 'samples': 7169024, 'steps': 14001, 'loss/train': 2.645996332168579} +02/24/2022 19:42:14 - INFO - codeparrot_training - Step 14002: {'lr': 0.00042675355376688964, 'samples': 7169536, 'steps': 14002, 'loss/train': 2.4357352256774902} +02/24/2022 19:42:17 - INFO - codeparrot_training - Step 14003: {'lr': 0.000426741981866257, 'samples': 7170048, 'steps': 14003, 'loss/train': 2.1884565353393555} +02/24/2022 19:42:23 - INFO - codeparrot_training - Step 14004: {'lr': 0.00042673040920851793, 'samples': 7170560, 'steps': 14004, 'loss/train': 1.3800054788589478} +02/24/2022 19:42:26 - INFO - codeparrot_training - Step 14005: {'lr': 0.00042671883579372186, 'samples': 7171072, 'steps': 14005, 'loss/train': 2.5828280448913574} +02/24/2022 19:42:32 - INFO - codeparrot_training - Step 14006: {'lr': 0.00042670726162191843, 'samples': 7171584, 'steps': 14006, 'loss/train': 1.8915454149246216} +02/24/2022 19:42:35 - INFO - codeparrot_training - Step 14007: {'lr': 0.0004266956866931572, 'samples': 7172096, 'steps': 14007, 'loss/train': 1.3992984294891357} +02/24/2022 19:42:43 - INFO - codeparrot_training - Step 14008: {'lr': 0.0004266841110074878, 'samples': 7172608, 'steps': 14008, 'loss/train': 1.6871919631958008} +02/24/2022 19:42:46 - INFO - codeparrot_training - Step 14009: {'lr': 0.0004266725345649597, 'samples': 7173120, 'steps': 14009, 'loss/train': 2.5861899852752686} +02/24/2022 19:42:52 - INFO - codeparrot_training - Step 14010: {'lr': 0.0004266609573656226, 'samples': 7173632, 'steps': 14010, 'loss/train': 2.141249656677246} +02/24/2022 19:42:56 - INFO - codeparrot_training - Step 14011: {'lr': 0.000426649379409526, 'samples': 7174144, 'steps': 14011, 'loss/train': 3.1508538722991943} +02/24/2022 19:43:01 - INFO - codeparrot_training - Step 14012: {'lr': 0.00042663780069671965, 'samples': 7174656, 'steps': 14012, 'loss/train': 1.845304250717163} +02/24/2022 19:43:05 - INFO - codeparrot_training - Step 14013: {'lr': 0.000426626221227253, 'samples': 7175168, 'steps': 14013, 'loss/train': 1.7898507118225098} +02/24/2022 19:43:10 - INFO - codeparrot_training - Step 14014: {'lr': 0.00042661464100117566, 'samples': 7175680, 'steps': 14014, 'loss/train': 0.6427545547485352} +02/24/2022 19:43:14 - INFO - codeparrot_training - Step 14015: {'lr': 0.00042660306001853735, 'samples': 7176192, 'steps': 14015, 'loss/train': 1.653831958770752} +02/24/2022 19:43:19 - INFO - codeparrot_training - Step 14016: {'lr': 0.0004265914782793875, 'samples': 7176704, 'steps': 14016, 'loss/train': 0.8801273107528687} +02/24/2022 19:43:23 - INFO - codeparrot_training - Step 14017: {'lr': 0.000426579895783776, 'samples': 7177216, 'steps': 14017, 'loss/train': 2.425933837890625} +02/24/2022 19:43:30 - INFO - codeparrot_training - Step 14018: {'lr': 0.0004265683125317521, 'samples': 7177728, 'steps': 14018, 'loss/train': 1.94931960105896} +02/24/2022 19:43:33 - INFO - codeparrot_training - Step 14019: {'lr': 0.0004265567285233658, 'samples': 7178240, 'steps': 14019, 'loss/train': 2.7595584392547607} +02/24/2022 19:43:39 - INFO - codeparrot_training - Step 14020: {'lr': 0.0004265451437586664, 'samples': 7178752, 'steps': 14020, 'loss/train': 1.0570260286331177} +02/24/2022 19:43:42 - INFO - codeparrot_training - Step 14021: {'lr': 0.0004265335582377038, 'samples': 7179264, 'steps': 14021, 'loss/train': 1.486286997795105} +02/24/2022 19:43:48 - INFO - codeparrot_training - Step 14022: {'lr': 0.0004265219719605273, 'samples': 7179776, 'steps': 14022, 'loss/train': 2.3152973651885986} +02/24/2022 19:43:51 - INFO - codeparrot_training - Step 14023: {'lr': 0.0004265103849271869, 'samples': 7180288, 'steps': 14023, 'loss/train': 2.016923427581787} +02/24/2022 19:43:57 - INFO - codeparrot_training - Step 14024: {'lr': 0.000426498797137732, 'samples': 7180800, 'steps': 14024, 'loss/train': 1.445349097251892} +02/24/2022 19:44:00 - INFO - codeparrot_training - Step 14025: {'lr': 0.0004264872085922122, 'samples': 7181312, 'steps': 14025, 'loss/train': 0.40025123953819275} +02/24/2022 19:44:06 - INFO - codeparrot_training - Step 14026: {'lr': 0.0004264756192906774, 'samples': 7181824, 'steps': 14026, 'loss/train': 2.2487075328826904} +02/24/2022 19:44:09 - INFO - codeparrot_training - Step 14027: {'lr': 0.000426464029233177, 'samples': 7182336, 'steps': 14027, 'loss/train': 1.7559268474578857} +02/24/2022 19:44:17 - INFO - codeparrot_training - Step 14028: {'lr': 0.0004264524384197608, 'samples': 7182848, 'steps': 14028, 'loss/train': 2.0156359672546387} +02/24/2022 19:44:20 - INFO - codeparrot_training - Step 14029: {'lr': 0.0004264408468504783, 'samples': 7183360, 'steps': 14029, 'loss/train': 1.5344573259353638} +02/24/2022 19:44:26 - INFO - codeparrot_training - Step 14030: {'lr': 0.00042642925452537927, 'samples': 7183872, 'steps': 14030, 'loss/train': 2.616739511489868} +02/24/2022 19:44:29 - INFO - codeparrot_training - Step 14031: {'lr': 0.0004264176614445133, 'samples': 7184384, 'steps': 14031, 'loss/train': 2.3235089778900146} +02/24/2022 19:44:35 - INFO - codeparrot_training - Step 14032: {'lr': 0.0004264060676079302, 'samples': 7184896, 'steps': 14032, 'loss/train': 0.9734417200088501} +02/24/2022 19:44:38 - INFO - codeparrot_training - Step 14033: {'lr': 0.00042639447301567944, 'samples': 7185408, 'steps': 14033, 'loss/train': 1.7799558639526367} +02/24/2022 19:44:44 - INFO - codeparrot_training - Step 14034: {'lr': 0.0004263828776678108, 'samples': 7185920, 'steps': 14034, 'loss/train': 3.505218029022217} +02/24/2022 19:44:47 - INFO - codeparrot_training - Step 14035: {'lr': 0.00042637128156437385, 'samples': 7186432, 'steps': 14035, 'loss/train': 1.9882664680480957} +02/24/2022 19:44:53 - INFO - codeparrot_training - Step 14036: {'lr': 0.0004263596847054184, 'samples': 7186944, 'steps': 14036, 'loss/train': 2.313277244567871} +02/24/2022 19:44:56 - INFO - codeparrot_training - Step 14037: {'lr': 0.00042634808709099403, 'samples': 7187456, 'steps': 14037, 'loss/train': 1.884895920753479} +02/24/2022 19:45:02 - INFO - codeparrot_training - Step 14038: {'lr': 0.0004263364887211505, 'samples': 7187968, 'steps': 14038, 'loss/train': 1.0802332162857056} +02/24/2022 19:45:05 - INFO - codeparrot_training - Step 14039: {'lr': 0.0004263248895959374, 'samples': 7188480, 'steps': 14039, 'loss/train': 0.7658008337020874} +02/24/2022 19:45:11 - INFO - codeparrot_training - Step 14040: {'lr': 0.0004263132897154044, 'samples': 7188992, 'steps': 14040, 'loss/train': 0.4843223989009857} +02/24/2022 19:45:14 - INFO - codeparrot_training - Step 14041: {'lr': 0.0004263016890796014, 'samples': 7189504, 'steps': 14041, 'loss/train': 1.9767647981643677} +02/24/2022 19:45:20 - INFO - codeparrot_training - Step 14042: {'lr': 0.0004262900876885778, 'samples': 7190016, 'steps': 14042, 'loss/train': 2.517286777496338} +02/24/2022 19:45:23 - INFO - codeparrot_training - Step 14043: {'lr': 0.0004262784855423836, 'samples': 7190528, 'steps': 14043, 'loss/train': 2.145695924758911} +02/24/2022 19:45:31 - INFO - codeparrot_training - Step 14044: {'lr': 0.00042626688264106816, 'samples': 7191040, 'steps': 14044, 'loss/train': 1.0589336156845093} +02/24/2022 19:45:35 - INFO - codeparrot_training - Step 14045: {'lr': 0.00042625527898468155, 'samples': 7191552, 'steps': 14045, 'loss/train': 2.410911798477173} +02/24/2022 19:45:40 - INFO - codeparrot_training - Step 14046: {'lr': 0.0004262436745732732, 'samples': 7192064, 'steps': 14046, 'loss/train': 2.678050994873047} +02/24/2022 19:45:44 - INFO - codeparrot_training - Step 14047: {'lr': 0.00042623206940689285, 'samples': 7192576, 'steps': 14047, 'loss/train': 2.223356246948242} +02/24/2022 19:45:49 - INFO - codeparrot_training - Step 14048: {'lr': 0.00042622046348559034, 'samples': 7193088, 'steps': 14048, 'loss/train': 1.682914137840271} +02/24/2022 19:45:53 - INFO - codeparrot_training - Step 14049: {'lr': 0.0004262088568094153, 'samples': 7193600, 'steps': 14049, 'loss/train': 1.5549159049987793} +02/24/2022 19:45:58 - INFO - codeparrot_training - Step 14050: {'lr': 0.0004261972493784175, 'samples': 7194112, 'steps': 14050, 'loss/train': 2.370227575302124} +02/24/2022 19:46:02 - INFO - codeparrot_training - Step 14051: {'lr': 0.0004261856411926467, 'samples': 7194624, 'steps': 14051, 'loss/train': 2.1616947650909424} +02/24/2022 19:46:07 - INFO - codeparrot_training - Step 14052: {'lr': 0.0004261740322521525, 'samples': 7195136, 'steps': 14052, 'loss/train': 2.090914011001587} +02/24/2022 19:46:11 - INFO - codeparrot_training - Step 14053: {'lr': 0.00042616242255698463, 'samples': 7195648, 'steps': 14053, 'loss/train': 2.1879055500030518} +02/24/2022 19:46:18 - INFO - codeparrot_training - Step 14054: {'lr': 0.0004261508121071929, 'samples': 7196160, 'steps': 14054, 'loss/train': 1.4653798341751099} +02/24/2022 19:46:22 - INFO - codeparrot_training - Step 14055: {'lr': 0.00042613920090282706, 'samples': 7196672, 'steps': 14055, 'loss/train': 3.5175087451934814} +02/24/2022 19:46:27 - INFO - codeparrot_training - Step 14056: {'lr': 0.0004261275889439368, 'samples': 7197184, 'steps': 14056, 'loss/train': 1.1598094701766968} +02/24/2022 19:46:31 - INFO - codeparrot_training - Step 14057: {'lr': 0.0004261159762305719, 'samples': 7197696, 'steps': 14057, 'loss/train': 2.758617639541626} +02/24/2022 19:46:36 - INFO - codeparrot_training - Step 14058: {'lr': 0.00042610436276278196, 'samples': 7198208, 'steps': 14058, 'loss/train': 2.1537179946899414} +02/24/2022 19:46:40 - INFO - codeparrot_training - Step 14059: {'lr': 0.00042609274854061695, 'samples': 7198720, 'steps': 14059, 'loss/train': 6.597321510314941} +02/24/2022 19:46:45 - INFO - codeparrot_training - Step 14060: {'lr': 0.0004260811335641266, 'samples': 7199232, 'steps': 14060, 'loss/train': 2.0042102336883545} +02/24/2022 19:46:49 - INFO - codeparrot_training - Step 14061: {'lr': 0.00042606951783336045, 'samples': 7199744, 'steps': 14061, 'loss/train': 2.3713490962982178} +02/24/2022 19:46:54 - INFO - codeparrot_training - Step 14062: {'lr': 0.0004260579013483684, 'samples': 7200256, 'steps': 14062, 'loss/train': 0.5766487717628479} +02/24/2022 19:46:58 - INFO - codeparrot_training - Step 14063: {'lr': 0.0004260462841092003, 'samples': 7200768, 'steps': 14063, 'loss/train': 2.5429704189300537} +02/24/2022 19:47:05 - INFO - codeparrot_training - Step 14064: {'lr': 0.00042603466611590575, 'samples': 7201280, 'steps': 14064, 'loss/train': 1.9629775285720825} +02/24/2022 19:47:09 - INFO - codeparrot_training - Step 14065: {'lr': 0.00042602304736853464, 'samples': 7201792, 'steps': 14065, 'loss/train': 2.5161023139953613} +02/24/2022 19:47:14 - INFO - codeparrot_training - Step 14066: {'lr': 0.00042601142786713664, 'samples': 7202304, 'steps': 14066, 'loss/train': 0.9914633631706238} +02/24/2022 19:47:18 - INFO - codeparrot_training - Step 14067: {'lr': 0.0004259998076117616, 'samples': 7202816, 'steps': 14067, 'loss/train': 0.9146500825881958} +02/24/2022 19:47:23 - INFO - codeparrot_training - Step 14068: {'lr': 0.00042598818660245926, 'samples': 7203328, 'steps': 14068, 'loss/train': 2.3664193153381348} +02/24/2022 19:47:27 - INFO - codeparrot_training - Step 14069: {'lr': 0.00042597656483927936, 'samples': 7203840, 'steps': 14069, 'loss/train': 1.3827407360076904} +02/24/2022 19:47:32 - INFO - codeparrot_training - Step 14070: {'lr': 0.0004259649423222718, 'samples': 7204352, 'steps': 14070, 'loss/train': 2.288076639175415} +02/24/2022 19:47:36 - INFO - codeparrot_training - Step 14071: {'lr': 0.0004259533190514863, 'samples': 7204864, 'steps': 14071, 'loss/train': 2.987483263015747} +02/24/2022 19:47:41 - INFO - codeparrot_training - Step 14072: {'lr': 0.00042594169502697265, 'samples': 7205376, 'steps': 14072, 'loss/train': 2.6726582050323486} +02/24/2022 19:47:45 - INFO - codeparrot_training - Step 14073: {'lr': 0.0004259300702487806, 'samples': 7205888, 'steps': 14073, 'loss/train': 1.5380879640579224} +02/24/2022 19:47:52 - INFO - codeparrot_training - Step 14074: {'lr': 0.00042591844471696005, 'samples': 7206400, 'steps': 14074, 'loss/train': 1.6843796968460083} +02/24/2022 19:47:56 - INFO - codeparrot_training - Step 14075: {'lr': 0.00042590681843156073, 'samples': 7206912, 'steps': 14075, 'loss/train': 1.7758569717407227} +02/24/2022 19:48:01 - INFO - codeparrot_training - Step 14076: {'lr': 0.00042589519139263246, 'samples': 7207424, 'steps': 14076, 'loss/train': 1.8768200874328613} +02/24/2022 19:48:05 - INFO - codeparrot_training - Step 14077: {'lr': 0.0004258835636002251, 'samples': 7207936, 'steps': 14077, 'loss/train': 2.116335391998291} +02/24/2022 19:48:10 - INFO - codeparrot_training - Step 14078: {'lr': 0.0004258719350543883, 'samples': 7208448, 'steps': 14078, 'loss/train': 2.2044074535369873} +02/24/2022 19:48:14 - INFO - codeparrot_training - Step 14079: {'lr': 0.00042586030575517196, 'samples': 7208960, 'steps': 14079, 'loss/train': 1.9954184293746948} +02/24/2022 19:48:19 - INFO - codeparrot_training - Step 14080: {'lr': 0.00042584867570262595, 'samples': 7209472, 'steps': 14080, 'loss/train': 2.493478298187256} +02/24/2022 19:48:23 - INFO - codeparrot_training - Step 14081: {'lr': 0.00042583704489680007, 'samples': 7209984, 'steps': 14081, 'loss/train': 1.6662728786468506} +02/24/2022 19:48:29 - INFO - codeparrot_training - Step 14082: {'lr': 0.00042582541333774414, 'samples': 7210496, 'steps': 14082, 'loss/train': 1.7109266519546509} +02/24/2022 19:48:32 - INFO - codeparrot_training - Step 14083: {'lr': 0.0004258137810255079, 'samples': 7211008, 'steps': 14083, 'loss/train': 2.7853987216949463} +02/24/2022 19:48:38 - INFO - codeparrot_training - Step 14084: {'lr': 0.0004258021479601414, 'samples': 7211520, 'steps': 14084, 'loss/train': 1.8407634496688843} +02/24/2022 19:48:41 - INFO - codeparrot_training - Step 14085: {'lr': 0.00042579051414169417, 'samples': 7212032, 'steps': 14085, 'loss/train': 1.635709524154663} +02/24/2022 19:48:47 - INFO - codeparrot_training - Step 14086: {'lr': 0.0004257788795702162, 'samples': 7212544, 'steps': 14086, 'loss/train': 2.503282308578491} +02/24/2022 19:48:50 - INFO - codeparrot_training - Step 14087: {'lr': 0.0004257672442457574, 'samples': 7213056, 'steps': 14087, 'loss/train': 2.4399311542510986} +02/24/2022 19:48:56 - INFO - codeparrot_training - Step 14088: {'lr': 0.00042575560816836755, 'samples': 7213568, 'steps': 14088, 'loss/train': 2.224973201751709} +02/24/2022 19:48:59 - INFO - codeparrot_training - Step 14089: {'lr': 0.00042574397133809646, 'samples': 7214080, 'steps': 14089, 'loss/train': 2.33028244972229} +02/24/2022 19:49:07 - INFO - codeparrot_training - Step 14090: {'lr': 0.000425732333754994, 'samples': 7214592, 'steps': 14090, 'loss/train': 1.267633080482483} +02/24/2022 19:49:10 - INFO - codeparrot_training - Step 14091: {'lr': 0.00042572069541911, 'samples': 7215104, 'steps': 14091, 'loss/train': 1.4393713474273682} +02/24/2022 19:49:16 - INFO - codeparrot_training - Step 14092: {'lr': 0.0004257090563304943, 'samples': 7215616, 'steps': 14092, 'loss/train': 1.8586256504058838} +02/24/2022 19:49:19 - INFO - codeparrot_training - Step 14093: {'lr': 0.0004256974164891969, 'samples': 7216128, 'steps': 14093, 'loss/train': 2.453240156173706} +02/24/2022 19:49:25 - INFO - codeparrot_training - Step 14094: {'lr': 0.00042568577589526744, 'samples': 7216640, 'steps': 14094, 'loss/train': 3.355185031890869} +02/24/2022 19:49:28 - INFO - codeparrot_training - Step 14095: {'lr': 0.00042567413454875605, 'samples': 7217152, 'steps': 14095, 'loss/train': 2.238398790359497} +02/24/2022 19:49:35 - INFO - codeparrot_training - Step 14096: {'lr': 0.00042566249244971235, 'samples': 7217664, 'steps': 14096, 'loss/train': 1.4047503471374512} +02/24/2022 19:49:38 - INFO - codeparrot_training - Step 14097: {'lr': 0.0004256508495981863, 'samples': 7218176, 'steps': 14097, 'loss/train': 1.0151554346084595} +02/24/2022 19:49:41 - INFO - codeparrot_training - Step 14098: {'lr': 0.00042563920599422776, 'samples': 7218688, 'steps': 14098, 'loss/train': 1.2435219287872314} +02/24/2022 19:49:47 - INFO - codeparrot_training - Step 14099: {'lr': 0.00042562756163788673, 'samples': 7219200, 'steps': 14099, 'loss/train': 1.9987332820892334} +02/24/2022 19:49:50 - INFO - codeparrot_training - Step 14100: {'lr': 0.00042561591652921294, 'samples': 7219712, 'steps': 14100, 'loss/train': 1.8893072605133057} +02/24/2022 19:49:58 - INFO - codeparrot_training - Step 14101: {'lr': 0.00042560427066825636, 'samples': 7220224, 'steps': 14101, 'loss/train': 2.0572547912597656} +02/24/2022 19:50:01 - INFO - codeparrot_training - Step 14102: {'lr': 0.0004255926240550668, 'samples': 7220736, 'steps': 14102, 'loss/train': 1.5312286615371704} +02/24/2022 19:50:07 - INFO - codeparrot_training - Step 14103: {'lr': 0.0004255809766896942, 'samples': 7221248, 'steps': 14103, 'loss/train': 2.3437764644622803} +02/24/2022 19:50:10 - INFO - codeparrot_training - Step 14104: {'lr': 0.00042556932857218855, 'samples': 7221760, 'steps': 14104, 'loss/train': 1.6085742712020874} +02/24/2022 19:50:16 - INFO - codeparrot_training - Step 14105: {'lr': 0.0004255576797025995, 'samples': 7222272, 'steps': 14105, 'loss/train': 2.0601792335510254} +02/24/2022 19:50:19 - INFO - codeparrot_training - Step 14106: {'lr': 0.0004255460300809772, 'samples': 7222784, 'steps': 14106, 'loss/train': 2.894132375717163} +02/24/2022 19:50:25 - INFO - codeparrot_training - Step 14107: {'lr': 0.00042553437970737143, 'samples': 7223296, 'steps': 14107, 'loss/train': 1.829621434211731} +02/24/2022 19:50:28 - INFO - codeparrot_training - Step 14108: {'lr': 0.00042552272858183203, 'samples': 7223808, 'steps': 14108, 'loss/train': 1.725304365158081} +02/24/2022 19:50:34 - INFO - codeparrot_training - Step 14109: {'lr': 0.0004255110767044091, 'samples': 7224320, 'steps': 14109, 'loss/train': 2.5196967124938965} +02/24/2022 19:50:37 - INFO - codeparrot_training - Step 14110: {'lr': 0.0004254994240751524, 'samples': 7224832, 'steps': 14110, 'loss/train': 2.227799654006958} +02/24/2022 19:50:45 - INFO - codeparrot_training - Step 14111: {'lr': 0.00042548777069411194, 'samples': 7225344, 'steps': 14111, 'loss/train': 2.149305582046509} +02/24/2022 19:50:48 - INFO - codeparrot_training - Step 14112: {'lr': 0.0004254761165613375, 'samples': 7225856, 'steps': 14112, 'loss/train': 2.230198860168457} +02/24/2022 19:50:54 - INFO - codeparrot_training - Step 14113: {'lr': 0.00042546446167687914, 'samples': 7226368, 'steps': 14113, 'loss/train': 0.24831725656986237} +02/24/2022 19:50:57 - INFO - codeparrot_training - Step 14114: {'lr': 0.00042545280604078673, 'samples': 7226880, 'steps': 14114, 'loss/train': 1.4795970916748047} +02/24/2022 19:51:03 - INFO - codeparrot_training - Step 14115: {'lr': 0.0004254411496531103, 'samples': 7227392, 'steps': 14115, 'loss/train': 2.3489580154418945} +02/24/2022 19:51:08 - INFO - codeparrot_training - Step 14116: {'lr': 0.0004254294925138996, 'samples': 7227904, 'steps': 14116, 'loss/train': 2.704899549484253} +02/24/2022 19:51:12 - INFO - codeparrot_training - Step 14117: {'lr': 0.00042541783462320473, 'samples': 7228416, 'steps': 14117, 'loss/train': 0.9145331978797913} +02/24/2022 19:51:17 - INFO - codeparrot_training - Step 14118: {'lr': 0.00042540617598107544, 'samples': 7228928, 'steps': 14118, 'loss/train': 1.9691981077194214} +02/24/2022 19:51:21 - INFO - codeparrot_training - Step 14119: {'lr': 0.00042539451658756195, 'samples': 7229440, 'steps': 14119, 'loss/train': 1.7203046083450317} +02/24/2022 19:51:26 - INFO - codeparrot_training - Step 14120: {'lr': 0.000425382856442714, 'samples': 7229952, 'steps': 14120, 'loss/train': 1.2800098657608032} +02/24/2022 19:51:30 - INFO - codeparrot_training - Step 14121: {'lr': 0.0004253711955465815, 'samples': 7230464, 'steps': 14121, 'loss/train': 2.357043504714966} +02/24/2022 19:51:37 - INFO - codeparrot_training - Step 14122: {'lr': 0.00042535953389921454, 'samples': 7230976, 'steps': 14122, 'loss/train': 2.3500945568084717} +02/24/2022 19:51:40 - INFO - codeparrot_training - Step 14123: {'lr': 0.000425347871500663, 'samples': 7231488, 'steps': 14123, 'loss/train': 1.2270649671554565} +02/24/2022 19:51:46 - INFO - codeparrot_training - Step 14124: {'lr': 0.0004253362083509769, 'samples': 7232000, 'steps': 14124, 'loss/train': 2.0851705074310303} +02/24/2022 19:51:49 - INFO - codeparrot_training - Step 14125: {'lr': 0.0004253245444502061, 'samples': 7232512, 'steps': 14125, 'loss/train': 1.5507687330245972} +02/24/2022 19:51:55 - INFO - codeparrot_training - Step 14126: {'lr': 0.00042531287979840065, 'samples': 7233024, 'steps': 14126, 'loss/train': 2.677147626876831} +02/24/2022 19:51:58 - INFO - codeparrot_training - Step 14127: {'lr': 0.0004253012143956105, 'samples': 7233536, 'steps': 14127, 'loss/train': 2.2290821075439453} +02/24/2022 19:52:04 - INFO - codeparrot_training - Step 14128: {'lr': 0.0004252895482418856, 'samples': 7234048, 'steps': 14128, 'loss/train': 1.985878348350525} +02/24/2022 19:52:07 - INFO - codeparrot_training - Step 14129: {'lr': 0.00042527788133727595, 'samples': 7234560, 'steps': 14129, 'loss/train': 1.995582938194275} +02/24/2022 19:52:13 - INFO - codeparrot_training - Step 14130: {'lr': 0.0004252662136818315, 'samples': 7235072, 'steps': 14130, 'loss/train': 1.4644620418548584} +02/24/2022 19:52:17 - INFO - codeparrot_training - Step 14131: {'lr': 0.00042525454527560225, 'samples': 7235584, 'steps': 14131, 'loss/train': 1.3274897336959839} +02/24/2022 19:52:20 - INFO - codeparrot_training - Step 14132: {'lr': 0.0004252428761186382, 'samples': 7236096, 'steps': 14132, 'loss/train': 0.8161274194717407} +02/24/2022 19:52:26 - INFO - codeparrot_training - Step 14133: {'lr': 0.00042523120621098924, 'samples': 7236608, 'steps': 14133, 'loss/train': 2.4763269424438477} +02/24/2022 19:52:31 - INFO - codeparrot_training - Step 14134: {'lr': 0.0004252195355527055, 'samples': 7237120, 'steps': 14134, 'loss/train': 2.014841079711914} +02/24/2022 19:52:35 - INFO - codeparrot_training - Step 14135: {'lr': 0.0004252078641438369, 'samples': 7237632, 'steps': 14135, 'loss/train': 1.8089255094528198} +02/24/2022 19:52:42 - INFO - codeparrot_training - Step 14136: {'lr': 0.00042519619198443337, 'samples': 7238144, 'steps': 14136, 'loss/train': 2.2746827602386475} +02/24/2022 19:52:45 - INFO - codeparrot_training - Step 14137: {'lr': 0.0004251845190745451, 'samples': 7238656, 'steps': 14137, 'loss/train': 3.2400434017181396} +02/24/2022 19:52:51 - INFO - codeparrot_training - Step 14138: {'lr': 0.00042517284541422195, 'samples': 7239168, 'steps': 14138, 'loss/train': 2.6276497840881348} +02/24/2022 19:52:54 - INFO - codeparrot_training - Step 14139: {'lr': 0.00042516117100351394, 'samples': 7239680, 'steps': 14139, 'loss/train': 1.9705607891082764} +02/24/2022 19:53:00 - INFO - codeparrot_training - Step 14140: {'lr': 0.0004251494958424711, 'samples': 7240192, 'steps': 14140, 'loss/train': 1.336334466934204} +02/24/2022 19:53:03 - INFO - codeparrot_training - Step 14141: {'lr': 0.0004251378199311434, 'samples': 7240704, 'steps': 14141, 'loss/train': 2.1847548484802246} +02/24/2022 19:53:09 - INFO - codeparrot_training - Step 14142: {'lr': 0.0004251261432695809, 'samples': 7241216, 'steps': 14142, 'loss/train': 2.5474154949188232} +02/24/2022 19:53:12 - INFO - codeparrot_training - Step 14143: {'lr': 0.00042511446585783363, 'samples': 7241728, 'steps': 14143, 'loss/train': 2.114548921585083} +02/24/2022 19:53:18 - INFO - codeparrot_training - Step 14144: {'lr': 0.0004251027876959516, 'samples': 7242240, 'steps': 14144, 'loss/train': 3.132277727127075} +02/24/2022 19:53:21 - INFO - codeparrot_training - Step 14145: {'lr': 0.0004250911087839848, 'samples': 7242752, 'steps': 14145, 'loss/train': 2.365048408508301} +02/24/2022 19:53:29 - INFO - codeparrot_training - Step 14146: {'lr': 0.0004250794291219833, 'samples': 7243264, 'steps': 14146, 'loss/train': 2.6687538623809814} +02/24/2022 19:53:32 - INFO - codeparrot_training - Step 14147: {'lr': 0.00042506774870999716, 'samples': 7243776, 'steps': 14147, 'loss/train': 1.9452260732650757} +02/24/2022 19:53:38 - INFO - codeparrot_training - Step 14148: {'lr': 0.00042505606754807634, 'samples': 7244288, 'steps': 14148, 'loss/train': 2.4273617267608643} +02/24/2022 19:53:41 - INFO - codeparrot_training - Step 14149: {'lr': 0.00042504438563627093, 'samples': 7244800, 'steps': 14149, 'loss/train': 1.8435360193252563} +02/24/2022 19:53:47 - INFO - codeparrot_training - Step 14150: {'lr': 0.0004250327029746309, 'samples': 7245312, 'steps': 14150, 'loss/train': 2.2571325302124023} +02/24/2022 19:53:50 - INFO - codeparrot_training - Step 14151: {'lr': 0.0004250210195632064, 'samples': 7245824, 'steps': 14151, 'loss/train': 1.7857688665390015} +02/24/2022 19:53:56 - INFO - codeparrot_training - Step 14152: {'lr': 0.00042500933540204745, 'samples': 7246336, 'steps': 14152, 'loss/train': 2.543046474456787} +02/24/2022 19:53:59 - INFO - codeparrot_training - Step 14153: {'lr': 0.00042499765049120396, 'samples': 7246848, 'steps': 14153, 'loss/train': 1.796617865562439} +02/24/2022 19:54:05 - INFO - codeparrot_training - Step 14154: {'lr': 0.0004249859648307263, 'samples': 7247360, 'steps': 14154, 'loss/train': 2.202575445175171} +02/24/2022 19:54:08 - INFO - codeparrot_training - Step 14155: {'lr': 0.0004249742784206642, 'samples': 7247872, 'steps': 14155, 'loss/train': 1.7143428325653076} +02/24/2022 19:54:14 - INFO - codeparrot_training - Step 14156: {'lr': 0.00042496259126106786, 'samples': 7248384, 'steps': 14156, 'loss/train': 0.9983693957328796} +02/24/2022 19:54:18 - INFO - codeparrot_training - Step 14157: {'lr': 0.00042495090335198735, 'samples': 7248896, 'steps': 14157, 'loss/train': 3.1260788440704346} +02/24/2022 19:54:25 - INFO - codeparrot_training - Step 14158: {'lr': 0.0004249392146934726, 'samples': 7249408, 'steps': 14158, 'loss/train': 2.291740894317627} +02/24/2022 19:54:28 - INFO - codeparrot_training - Step 14159: {'lr': 0.000424927525285574, 'samples': 7249920, 'steps': 14159, 'loss/train': 2.944366931915283} +02/24/2022 19:54:34 - INFO - codeparrot_training - Step 14160: {'lr': 0.00042491583512834137, 'samples': 7250432, 'steps': 14160, 'loss/train': 2.893218994140625} +02/24/2022 19:54:37 - INFO - codeparrot_training - Step 14161: {'lr': 0.00042490414422182484, 'samples': 7250944, 'steps': 14161, 'loss/train': 2.6081125736236572} +02/24/2022 19:54:43 - INFO - codeparrot_training - Step 14162: {'lr': 0.00042489245256607447, 'samples': 7251456, 'steps': 14162, 'loss/train': 2.4035706520080566} +02/24/2022 19:54:46 - INFO - codeparrot_training - Step 14163: {'lr': 0.0004248807601611404, 'samples': 7251968, 'steps': 14163, 'loss/train': 2.323502779006958} +02/24/2022 19:54:52 - INFO - codeparrot_training - Step 14164: {'lr': 0.0004248690670070726, 'samples': 7252480, 'steps': 14164, 'loss/train': 0.11929647624492645} +02/24/2022 19:54:55 - INFO - codeparrot_training - Step 14165: {'lr': 0.00042485737310392135, 'samples': 7252992, 'steps': 14165, 'loss/train': 1.1046714782714844} +02/24/2022 19:55:01 - INFO - codeparrot_training - Step 14166: {'lr': 0.0004248456784517366, 'samples': 7253504, 'steps': 14166, 'loss/train': 1.9417747259140015} +02/24/2022 19:55:04 - INFO - codeparrot_training - Step 14167: {'lr': 0.00042483398305056847, 'samples': 7254016, 'steps': 14167, 'loss/train': 1.3783197402954102} +02/24/2022 19:55:12 - INFO - codeparrot_training - Step 14168: {'lr': 0.0004248222869004671, 'samples': 7254528, 'steps': 14168, 'loss/train': 2.4155406951904297} +02/24/2022 19:55:15 - INFO - codeparrot_training - Step 14169: {'lr': 0.00042481059000148253, 'samples': 7255040, 'steps': 14169, 'loss/train': 1.2719244956970215} +02/24/2022 19:55:21 - INFO - codeparrot_training - Step 14170: {'lr': 0.00042479889235366486, 'samples': 7255552, 'steps': 14170, 'loss/train': 1.524439811706543} +02/24/2022 19:55:24 - INFO - codeparrot_training - Step 14171: {'lr': 0.0004247871939570643, 'samples': 7256064, 'steps': 14171, 'loss/train': 1.6970051527023315} +02/24/2022 19:55:30 - INFO - codeparrot_training - Step 14172: {'lr': 0.00042477549481173093, 'samples': 7256576, 'steps': 14172, 'loss/train': 0.6546095609664917} +02/24/2022 19:55:34 - INFO - codeparrot_training - Step 14173: {'lr': 0.00042476379491771475, 'samples': 7257088, 'steps': 14173, 'loss/train': 2.554852247238159} +02/24/2022 19:55:37 - INFO - codeparrot_training - Step 14174: {'lr': 0.00042475209427506614, 'samples': 7257600, 'steps': 14174, 'loss/train': 2.718362331390381} +02/24/2022 19:55:43 - INFO - codeparrot_training - Step 14175: {'lr': 0.00042474039288383484, 'samples': 7258112, 'steps': 14175, 'loss/train': 1.319974660873413} +02/24/2022 19:55:46 - INFO - codeparrot_training - Step 14176: {'lr': 0.0004247286907440713, 'samples': 7258624, 'steps': 14176, 'loss/train': 1.9846550226211548} +02/24/2022 19:55:52 - INFO - codeparrot_training - Step 14177: {'lr': 0.00042471698785582546, 'samples': 7259136, 'steps': 14177, 'loss/train': 1.2104663848876953} +02/24/2022 19:55:57 - INFO - codeparrot_training - Step 14178: {'lr': 0.00042470528421914767, 'samples': 7259648, 'steps': 14178, 'loss/train': 1.5844451189041138} +02/24/2022 19:56:01 - INFO - codeparrot_training - Step 14179: {'lr': 0.0004246935798340877, 'samples': 7260160, 'steps': 14179, 'loss/train': 1.9999110698699951} +02/24/2022 19:56:06 - INFO - codeparrot_training - Step 14180: {'lr': 0.0004246818747006961, 'samples': 7260672, 'steps': 14180, 'loss/train': 0.9666487574577332} +02/24/2022 19:56:10 - INFO - codeparrot_training - Step 14181: {'lr': 0.0004246701688190227, 'samples': 7261184, 'steps': 14181, 'loss/train': 2.428150177001953} +02/24/2022 19:56:17 - INFO - codeparrot_training - Step 14182: {'lr': 0.0004246584621891179, 'samples': 7261696, 'steps': 14182, 'loss/train': 2.5712311267852783} +02/24/2022 19:56:21 - INFO - codeparrot_training - Step 14183: {'lr': 0.00042464675481103154, 'samples': 7262208, 'steps': 14183, 'loss/train': 0.8073933720588684} +02/24/2022 19:56:26 - INFO - codeparrot_training - Step 14184: {'lr': 0.00042463504668481403, 'samples': 7262720, 'steps': 14184, 'loss/train': 1.8930400609970093} +02/24/2022 19:56:30 - INFO - codeparrot_training - Step 14185: {'lr': 0.00042462333781051535, 'samples': 7263232, 'steps': 14185, 'loss/train': 1.7692681550979614} +02/24/2022 19:56:33 - INFO - codeparrot_training - Step 14186: {'lr': 0.00042461162818818585, 'samples': 7263744, 'steps': 14186, 'loss/train': 0.5340458750724792} +02/24/2022 19:56:39 - INFO - codeparrot_training - Step 14187: {'lr': 0.0004245999178178755, 'samples': 7264256, 'steps': 14187, 'loss/train': 1.3101933002471924} +02/24/2022 19:56:44 - INFO - codeparrot_training - Step 14188: {'lr': 0.0004245882066996346, 'samples': 7264768, 'steps': 14188, 'loss/train': 2.090367317199707} +02/24/2022 19:56:48 - INFO - codeparrot_training - Step 14189: {'lr': 0.0004245764948335132, 'samples': 7265280, 'steps': 14189, 'loss/train': 7.02206563949585} +02/24/2022 19:56:54 - INFO - codeparrot_training - Step 14190: {'lr': 0.0004245647822195616, 'samples': 7265792, 'steps': 14190, 'loss/train': 2.1099836826324463} +02/24/2022 19:56:57 - INFO - codeparrot_training - Step 14191: {'lr': 0.00042455306885782985, 'samples': 7266304, 'steps': 14191, 'loss/train': 2.362506866455078} +02/24/2022 19:57:02 - INFO - codeparrot_training - Step 14192: {'lr': 0.00042454135474836817, 'samples': 7266816, 'steps': 14192, 'loss/train': 1.3173161745071411} +02/24/2022 19:57:06 - INFO - codeparrot_training - Step 14193: {'lr': 0.00042452963989122685, 'samples': 7267328, 'steps': 14193, 'loss/train': 1.62032151222229} +02/24/2022 19:57:13 - INFO - codeparrot_training - Step 14194: {'lr': 0.00042451792428645587, 'samples': 7267840, 'steps': 14194, 'loss/train': 2.348407030105591} +02/24/2022 19:57:17 - INFO - codeparrot_training - Step 14195: {'lr': 0.0004245062079341055, 'samples': 7268352, 'steps': 14195, 'loss/train': 1.8568233251571655} +02/24/2022 19:57:22 - INFO - codeparrot_training - Step 14196: {'lr': 0.000424494490834226, 'samples': 7268864, 'steps': 14196, 'loss/train': 1.1280829906463623} +02/24/2022 19:57:26 - INFO - codeparrot_training - Step 14197: {'lr': 0.0004244827729868675, 'samples': 7269376, 'steps': 14197, 'loss/train': 0.7390331625938416} +02/24/2022 19:57:31 - INFO - codeparrot_training - Step 14198: {'lr': 0.00042447105439208024, 'samples': 7269888, 'steps': 14198, 'loss/train': 1.3491523265838623} +02/24/2022 19:57:35 - INFO - codeparrot_training - Step 14199: {'lr': 0.0004244593350499143, 'samples': 7270400, 'steps': 14199, 'loss/train': 1.9855657815933228} +02/24/2022 19:57:40 - INFO - codeparrot_training - Step 14200: {'lr': 0.00042444761496042004, 'samples': 7270912, 'steps': 14200, 'loss/train': 2.601789951324463} +02/24/2022 19:57:44 - INFO - codeparrot_training - Step 14201: {'lr': 0.0004244358941236476, 'samples': 7271424, 'steps': 14201, 'loss/train': 1.7302453517913818} +02/24/2022 19:57:49 - INFO - codeparrot_training - Step 14202: {'lr': 0.00042442417253964713, 'samples': 7271936, 'steps': 14202, 'loss/train': 1.6395988464355469} +02/24/2022 19:57:53 - INFO - codeparrot_training - Step 14203: {'lr': 0.00042441245020846885, 'samples': 7272448, 'steps': 14203, 'loss/train': 1.1476298570632935} +02/24/2022 19:58:00 - INFO - codeparrot_training - Step 14204: {'lr': 0.00042440072713016317, 'samples': 7272960, 'steps': 14204, 'loss/train': 1.3235162496566772} +02/24/2022 19:58:04 - INFO - codeparrot_training - Step 14205: {'lr': 0.00042438900330478, 'samples': 7273472, 'steps': 14205, 'loss/train': 0.17766611278057098} +02/24/2022 19:58:09 - INFO - codeparrot_training - Step 14206: {'lr': 0.00042437727873236974, 'samples': 7273984, 'steps': 14206, 'loss/train': 1.7074435949325562} +02/24/2022 19:58:13 - INFO - codeparrot_training - Step 14207: {'lr': 0.00042436555341298266, 'samples': 7274496, 'steps': 14207, 'loss/train': 2.560800790786743} +02/24/2022 19:58:18 - INFO - codeparrot_training - Step 14208: {'lr': 0.0004243538273466689, 'samples': 7275008, 'steps': 14208, 'loss/train': 2.2613449096679688} +02/24/2022 19:58:22 - INFO - codeparrot_training - Step 14209: {'lr': 0.00042434210053347865, 'samples': 7275520, 'steps': 14209, 'loss/train': 1.077513337135315} +02/24/2022 19:58:27 - INFO - codeparrot_training - Step 14210: {'lr': 0.0004243303729734622, 'samples': 7276032, 'steps': 14210, 'loss/train': 0.9633165001869202} +02/24/2022 19:58:31 - INFO - codeparrot_training - Step 14211: {'lr': 0.0004243186446666699, 'samples': 7276544, 'steps': 14211, 'loss/train': 1.5966123342514038} +02/24/2022 19:58:36 - INFO - codeparrot_training - Step 14212: {'lr': 0.00042430691561315176, 'samples': 7277056, 'steps': 14212, 'loss/train': 1.075243592262268} +02/24/2022 19:58:40 - INFO - codeparrot_training - Step 14213: {'lr': 0.0004242951858129582, 'samples': 7277568, 'steps': 14213, 'loss/train': 2.289094924926758} +02/24/2022 19:58:47 - INFO - codeparrot_training - Step 14214: {'lr': 0.0004242834552661394, 'samples': 7278080, 'steps': 14214, 'loss/train': 1.7578402757644653} +02/24/2022 19:58:50 - INFO - codeparrot_training - Step 14215: {'lr': 0.0004242717239727456, 'samples': 7278592, 'steps': 14215, 'loss/train': 1.8033943176269531} +02/24/2022 19:58:56 - INFO - codeparrot_training - Step 14216: {'lr': 0.00042425999193282713, 'samples': 7279104, 'steps': 14216, 'loss/train': 1.9938198328018188} +02/24/2022 19:58:59 - INFO - codeparrot_training - Step 14217: {'lr': 0.0004242482591464342, 'samples': 7279616, 'steps': 14217, 'loss/train': 1.4476081132888794} +02/24/2022 19:59:05 - INFO - codeparrot_training - Step 14218: {'lr': 0.0004242365256136169, 'samples': 7280128, 'steps': 14218, 'loss/train': 1.5051683187484741} +02/24/2022 19:59:09 - INFO - codeparrot_training - Step 14219: {'lr': 0.00042422479133442573, 'samples': 7280640, 'steps': 14219, 'loss/train': 2.4768805503845215} +02/24/2022 19:59:14 - INFO - codeparrot_training - Step 14220: {'lr': 0.00042421305630891093, 'samples': 7281152, 'steps': 14220, 'loss/train': 2.524547576904297} +02/24/2022 19:59:17 - INFO - codeparrot_training - Step 14221: {'lr': 0.0004242013205371227, 'samples': 7281664, 'steps': 14221, 'loss/train': 2.484830856323242} +02/24/2022 19:59:23 - INFO - codeparrot_training - Step 14222: {'lr': 0.00042418958401911134, 'samples': 7282176, 'steps': 14222, 'loss/train': 2.2180001735687256} +02/24/2022 19:59:27 - INFO - codeparrot_training - Step 14223: {'lr': 0.000424177846754927, 'samples': 7282688, 'steps': 14223, 'loss/train': 2.3699991703033447} +02/24/2022 19:59:32 - INFO - codeparrot_training - Step 14224: {'lr': 0.0004241661087446202, 'samples': 7283200, 'steps': 14224, 'loss/train': 1.6239110231399536} +02/24/2022 19:59:36 - INFO - codeparrot_training - Step 14225: {'lr': 0.00042415436998824105, 'samples': 7283712, 'steps': 14225, 'loss/train': 2.4087109565734863} +02/24/2022 19:59:41 - INFO - codeparrot_training - Step 14226: {'lr': 0.0004241426304858399, 'samples': 7284224, 'steps': 14226, 'loss/train': 2.291109561920166} +02/24/2022 19:59:45 - INFO - codeparrot_training - Step 14227: {'lr': 0.00042413089023746696, 'samples': 7284736, 'steps': 14227, 'loss/train': 2.7332041263580322} +02/24/2022 19:59:50 - INFO - codeparrot_training - Step 14228: {'lr': 0.00042411914924317265, 'samples': 7285248, 'steps': 14228, 'loss/train': 0.664466381072998} +02/24/2022 19:59:54 - INFO - codeparrot_training - Step 14229: {'lr': 0.00042410740750300715, 'samples': 7285760, 'steps': 14229, 'loss/train': 1.952970266342163} +02/24/2022 20:00:01 - INFO - codeparrot_training - Step 14230: {'lr': 0.0004240956650170208, 'samples': 7286272, 'steps': 14230, 'loss/train': 2.496864080429077} +02/24/2022 20:00:04 - INFO - codeparrot_training - Step 14231: {'lr': 0.00042408392178526396, 'samples': 7286784, 'steps': 14231, 'loss/train': 2.1443912982940674} +02/24/2022 20:00:10 - INFO - codeparrot_training - Step 14232: {'lr': 0.0004240721778077868, 'samples': 7287296, 'steps': 14232, 'loss/train': 2.285691022872925} +02/24/2022 20:00:13 - INFO - codeparrot_training - Step 14233: {'lr': 0.0004240604330846397, 'samples': 7287808, 'steps': 14233, 'loss/train': 2.256314277648926} +02/24/2022 20:00:19 - INFO - codeparrot_training - Step 14234: {'lr': 0.000424048687615873, 'samples': 7288320, 'steps': 14234, 'loss/train': 2.262995958328247} +02/24/2022 20:00:22 - INFO - codeparrot_training - Step 14235: {'lr': 0.00042403694140153705, 'samples': 7288832, 'steps': 14235, 'loss/train': 1.2854812145233154} +02/24/2022 20:00:28 - INFO - codeparrot_training - Step 14236: {'lr': 0.00042402519444168207, 'samples': 7289344, 'steps': 14236, 'loss/train': 1.9728502035140991} +02/24/2022 20:00:31 - INFO - codeparrot_training - Step 14237: {'lr': 0.00042401344673635846, 'samples': 7289856, 'steps': 14237, 'loss/train': 2.1000592708587646} +02/24/2022 20:00:37 - INFO - codeparrot_training - Step 14238: {'lr': 0.00042400169828561636, 'samples': 7290368, 'steps': 14238, 'loss/train': 0.6835358738899231} +02/24/2022 20:00:40 - INFO - codeparrot_training - Step 14239: {'lr': 0.0004239899490895063, 'samples': 7290880, 'steps': 14239, 'loss/train': 2.3722028732299805} +02/24/2022 20:00:48 - INFO - codeparrot_training - Step 14240: {'lr': 0.00042397819914807855, 'samples': 7291392, 'steps': 14240, 'loss/train': 2.6693899631500244} +02/24/2022 20:00:51 - INFO - codeparrot_training - Step 14241: {'lr': 0.00042396644846138355, 'samples': 7291904, 'steps': 14241, 'loss/train': 1.8931578397750854} +02/24/2022 20:00:57 - INFO - codeparrot_training - Step 14242: {'lr': 0.00042395469702947135, 'samples': 7292416, 'steps': 14242, 'loss/train': 0.32943904399871826} +02/24/2022 20:01:00 - INFO - codeparrot_training - Step 14243: {'lr': 0.0004239429448523925, 'samples': 7292928, 'steps': 14243, 'loss/train': 1.087774634361267} +02/24/2022 20:01:06 - INFO - codeparrot_training - Step 14244: {'lr': 0.00042393119193019743, 'samples': 7293440, 'steps': 14244, 'loss/train': 1.0334482192993164} +02/24/2022 20:01:09 - INFO - codeparrot_training - Step 14245: {'lr': 0.00042391943826293623, 'samples': 7293952, 'steps': 14245, 'loss/train': 0.9405443072319031} +02/24/2022 20:01:15 - INFO - codeparrot_training - Step 14246: {'lr': 0.0004239076838506595, 'samples': 7294464, 'steps': 14246, 'loss/train': 0.5584782958030701} +02/24/2022 20:01:18 - INFO - codeparrot_training - Step 14247: {'lr': 0.0004238959286934174, 'samples': 7294976, 'steps': 14247, 'loss/train': 2.3326168060302734} +02/24/2022 20:01:23 - INFO - codeparrot_training - Step 14248: {'lr': 0.0004238841727912603, 'samples': 7295488, 'steps': 14248, 'loss/train': 1.9053739309310913} +02/24/2022 20:01:27 - INFO - codeparrot_training - Step 14249: {'lr': 0.00042387241614423875, 'samples': 7296000, 'steps': 14249, 'loss/train': 2.0522050857543945} +02/24/2022 20:01:34 - INFO - codeparrot_training - Step 14250: {'lr': 0.0004238606587524029, 'samples': 7296512, 'steps': 14250, 'loss/train': 2.627589464187622} +02/24/2022 20:01:38 - INFO - codeparrot_training - Step 14251: {'lr': 0.0004238489006158033, 'samples': 7297024, 'steps': 14251, 'loss/train': 2.3849539756774902} +02/24/2022 20:01:43 - INFO - codeparrot_training - Step 14252: {'lr': 0.00042383714173449007, 'samples': 7297536, 'steps': 14252, 'loss/train': 1.99263334274292} +02/24/2022 20:01:47 - INFO - codeparrot_training - Step 14253: {'lr': 0.0004238253821085138, 'samples': 7298048, 'steps': 14253, 'loss/train': 1.7893986701965332} +02/24/2022 20:01:52 - INFO - codeparrot_training - Step 14254: {'lr': 0.00042381362173792475, 'samples': 7298560, 'steps': 14254, 'loss/train': 1.9920262098312378} +02/24/2022 20:01:56 - INFO - codeparrot_training - Step 14255: {'lr': 0.00042380186062277337, 'samples': 7299072, 'steps': 14255, 'loss/train': 1.7049756050109863} +02/24/2022 20:02:02 - INFO - codeparrot_training - Step 14256: {'lr': 0.00042379009876311, 'samples': 7299584, 'steps': 14256, 'loss/train': 2.2791190147399902} +02/24/2022 20:02:06 - INFO - codeparrot_training - Step 14257: {'lr': 0.00042377833615898496, 'samples': 7300096, 'steps': 14257, 'loss/train': 0.9907761216163635} +02/24/2022 20:02:09 - INFO - codeparrot_training - Step 14258: {'lr': 0.0004237665728104488, 'samples': 7300608, 'steps': 14258, 'loss/train': 1.5103892087936401} +02/24/2022 20:02:14 - INFO - codeparrot_training - Step 14259: {'lr': 0.0004237548087175518, 'samples': 7301120, 'steps': 14259, 'loss/train': 1.8606855869293213} +02/24/2022 20:02:18 - INFO - codeparrot_training - Step 14260: {'lr': 0.00042374304388034437, 'samples': 7301632, 'steps': 14260, 'loss/train': 1.6345964670181274} +02/24/2022 20:02:25 - INFO - codeparrot_training - Step 14261: {'lr': 0.00042373127829887694, 'samples': 7302144, 'steps': 14261, 'loss/train': 1.6888474225997925} +02/24/2022 20:02:29 - INFO - codeparrot_training - Step 14262: {'lr': 0.0004237195119731998, 'samples': 7302656, 'steps': 14262, 'loss/train': 3.3613123893737793} +02/24/2022 20:02:34 - INFO - codeparrot_training - Step 14263: {'lr': 0.0004237077449033635, 'samples': 7303168, 'steps': 14263, 'loss/train': 1.1451348066329956} +02/24/2022 20:02:38 - INFO - codeparrot_training - Step 14264: {'lr': 0.0004236959770894183, 'samples': 7303680, 'steps': 14264, 'loss/train': 2.098689317703247} +02/24/2022 20:02:43 - INFO - codeparrot_training - Step 14265: {'lr': 0.0004236842085314148, 'samples': 7304192, 'steps': 14265, 'loss/train': 1.7305004596710205} +02/24/2022 20:02:47 - INFO - codeparrot_training - Step 14266: {'lr': 0.0004236724392294032, 'samples': 7304704, 'steps': 14266, 'loss/train': 2.318592071533203} +02/24/2022 20:02:52 - INFO - codeparrot_training - Step 14267: {'lr': 0.0004236606691834341, 'samples': 7305216, 'steps': 14267, 'loss/train': 1.944257378578186} +02/24/2022 20:02:56 - INFO - codeparrot_training - Step 14268: {'lr': 0.0004236488983935578, 'samples': 7305728, 'steps': 14268, 'loss/train': 2.0166215896606445} +02/24/2022 20:03:01 - INFO - codeparrot_training - Step 14269: {'lr': 0.0004236371268598248, 'samples': 7306240, 'steps': 14269, 'loss/train': 2.130969524383545} +02/24/2022 20:03:05 - INFO - codeparrot_training - Step 14270: {'lr': 0.0004236253545822855, 'samples': 7306752, 'steps': 14270, 'loss/train': 1.1413593292236328} +02/24/2022 20:03:10 - INFO - codeparrot_training - Step 14271: {'lr': 0.00042361358156099016, 'samples': 7307264, 'steps': 14271, 'loss/train': 2.35860538482666} +02/24/2022 20:03:14 - INFO - codeparrot_training - Step 14272: {'lr': 0.0004236018077959895, 'samples': 7307776, 'steps': 14272, 'loss/train': 1.9288330078125} +02/24/2022 20:03:19 - INFO - codeparrot_training - Step 14273: {'lr': 0.00042359003328733383, 'samples': 7308288, 'steps': 14273, 'loss/train': 1.3647397756576538} +02/24/2022 20:03:23 - INFO - codeparrot_training - Step 14274: {'lr': 0.0004235782580350734, 'samples': 7308800, 'steps': 14274, 'loss/train': 2.0449132919311523} +02/24/2022 20:03:28 - INFO - codeparrot_training - Step 14275: {'lr': 0.0004235664820392591, 'samples': 7309312, 'steps': 14275, 'loss/train': 1.743349552154541} +02/24/2022 20:03:36 - INFO - codeparrot_training - Step 14276: {'lr': 0.0004235547052999409, 'samples': 7309824, 'steps': 14276, 'loss/train': 1.7291473150253296} +02/24/2022 20:03:39 - INFO - codeparrot_training - Step 14277: {'lr': 0.0004235429278171695, 'samples': 7310336, 'steps': 14277, 'loss/train': 2.043515682220459} +02/24/2022 20:03:43 - INFO - codeparrot_training - Step 14278: {'lr': 0.00042353114959099535, 'samples': 7310848, 'steps': 14278, 'loss/train': 1.9309711456298828} +02/24/2022 20:03:48 - INFO - codeparrot_training - Step 14279: {'lr': 0.0004235193706214688, 'samples': 7311360, 'steps': 14279, 'loss/train': 2.1979947090148926} +02/24/2022 20:03:52 - INFO - codeparrot_training - Step 14280: {'lr': 0.00042350759090864043, 'samples': 7311872, 'steps': 14280, 'loss/train': 1.9417647123336792} +02/24/2022 20:03:57 - INFO - codeparrot_training - Step 14281: {'lr': 0.00042349581045256055, 'samples': 7312384, 'steps': 14281, 'loss/train': 2.2637672424316406} +02/24/2022 20:04:01 - INFO - codeparrot_training - Step 14282: {'lr': 0.00042348402925327977, 'samples': 7312896, 'steps': 14282, 'loss/train': 2.0324196815490723} +02/24/2022 20:04:06 - INFO - codeparrot_training - Step 14283: {'lr': 0.00042347224731084854, 'samples': 7313408, 'steps': 14283, 'loss/train': 2.368499279022217} +02/24/2022 20:04:10 - INFO - codeparrot_training - Step 14284: {'lr': 0.0004234604646253172, 'samples': 7313920, 'steps': 14284, 'loss/train': 2.141674041748047} +02/24/2022 20:04:15 - INFO - codeparrot_training - Step 14285: {'lr': 0.0004234486811967364, 'samples': 7314432, 'steps': 14285, 'loss/train': 1.910029411315918} +02/24/2022 20:04:19 - INFO - codeparrot_training - Step 14286: {'lr': 0.00042343689702515643, 'samples': 7314944, 'steps': 14286, 'loss/train': 2.3125417232513428} +02/24/2022 20:04:26 - INFO - codeparrot_training - Step 14287: {'lr': 0.0004234251121106279, 'samples': 7315456, 'steps': 14287, 'loss/train': 1.4323766231536865} +02/24/2022 20:04:29 - INFO - codeparrot_training - Step 14288: {'lr': 0.00042341332645320126, 'samples': 7315968, 'steps': 14288, 'loss/train': 3.8594212532043457} +02/24/2022 20:04:35 - INFO - codeparrot_training - Step 14289: {'lr': 0.000423401540052927, 'samples': 7316480, 'steps': 14289, 'loss/train': 1.9006260633468628} +02/24/2022 20:04:38 - INFO - codeparrot_training - Step 14290: {'lr': 0.0004233897529098556, 'samples': 7316992, 'steps': 14290, 'loss/train': 1.530927300453186} +02/24/2022 20:04:44 - INFO - codeparrot_training - Step 14291: {'lr': 0.0004233779650240376, 'samples': 7317504, 'steps': 14291, 'loss/train': 0.7660928964614868} +02/24/2022 20:04:47 - INFO - codeparrot_training - Step 14292: {'lr': 0.00042336617639552335, 'samples': 7318016, 'steps': 14292, 'loss/train': 2.562587022781372} +02/24/2022 20:04:53 - INFO - codeparrot_training - Step 14293: {'lr': 0.00042335438702436354, 'samples': 7318528, 'steps': 14293, 'loss/train': 2.1339643001556396} +02/24/2022 20:04:56 - INFO - codeparrot_training - Step 14294: {'lr': 0.0004233425969106085, 'samples': 7319040, 'steps': 14294, 'loss/train': 0.5550722479820251} +02/24/2022 20:05:02 - INFO - codeparrot_training - Step 14295: {'lr': 0.00042333080605430883, 'samples': 7319552, 'steps': 14295, 'loss/train': 2.4108386039733887} +02/24/2022 20:05:10 - INFO - codeparrot_training - Step 14296: {'lr': 0.00042331901445551514, 'samples': 7320064, 'steps': 14296, 'loss/train': 1.6498088836669922} +02/24/2022 20:05:13 - INFO - codeparrot_training - Step 14297: {'lr': 0.00042330722211427775, 'samples': 7320576, 'steps': 14297, 'loss/train': 1.024876594543457} +02/24/2022 20:05:19 - INFO - codeparrot_training - Step 14298: {'lr': 0.00042329542903064724, 'samples': 7321088, 'steps': 14298, 'loss/train': 1.3504054546356201} +02/24/2022 20:05:22 - INFO - codeparrot_training - Step 14299: {'lr': 0.00042328363520467417, 'samples': 7321600, 'steps': 14299, 'loss/train': 1.8318637609481812} +02/24/2022 20:05:28 - INFO - codeparrot_training - Step 14300: {'lr': 0.000423271840636409, 'samples': 7322112, 'steps': 14300, 'loss/train': 1.520402193069458} +02/24/2022 20:05:31 - INFO - codeparrot_training - Step 14301: {'lr': 0.0004232600453259023, 'samples': 7322624, 'steps': 14301, 'loss/train': 2.715498208999634} +02/24/2022 20:05:37 - INFO - codeparrot_training - Step 14302: {'lr': 0.0004232482492732046, 'samples': 7323136, 'steps': 14302, 'loss/train': 2.625418186187744} +02/24/2022 20:05:40 - INFO - codeparrot_training - Step 14303: {'lr': 0.00042323645247836636, 'samples': 7323648, 'steps': 14303, 'loss/train': 1.2860190868377686} +02/24/2022 20:05:46 - INFO - codeparrot_training - Step 14304: {'lr': 0.00042322465494143814, 'samples': 7324160, 'steps': 14304, 'loss/train': 0.4741033613681793} +02/24/2022 20:05:49 - INFO - codeparrot_training - Step 14305: {'lr': 0.00042321285666247063, 'samples': 7324672, 'steps': 14305, 'loss/train': 2.146327257156372} +02/24/2022 20:05:57 - INFO - codeparrot_training - Step 14306: {'lr': 0.0004232010576415141, 'samples': 7325184, 'steps': 14306, 'loss/train': 1.637241005897522} +02/24/2022 20:06:00 - INFO - codeparrot_training - Step 14307: {'lr': 0.00042318925787861937, 'samples': 7325696, 'steps': 14307, 'loss/train': 2.294499158859253} +02/24/2022 20:06:06 - INFO - codeparrot_training - Step 14308: {'lr': 0.0004231774573738367, 'samples': 7326208, 'steps': 14308, 'loss/train': 2.5879604816436768} +02/24/2022 20:06:09 - INFO - codeparrot_training - Step 14309: {'lr': 0.000423165656127217, 'samples': 7326720, 'steps': 14309, 'loss/train': 1.6816283464431763} +02/24/2022 20:06:15 - INFO - codeparrot_training - Step 14310: {'lr': 0.00042315385413881047, 'samples': 7327232, 'steps': 14310, 'loss/train': 1.2102160453796387} +02/24/2022 20:06:18 - INFO - codeparrot_training - Step 14311: {'lr': 0.00042314205140866785, 'samples': 7327744, 'steps': 14311, 'loss/train': 2.049474000930786} +02/24/2022 20:06:24 - INFO - codeparrot_training - Step 14312: {'lr': 0.00042313024793683965, 'samples': 7328256, 'steps': 14312, 'loss/train': 1.9472191333770752} +02/24/2022 20:06:27 - INFO - codeparrot_training - Step 14313: {'lr': 0.0004231184437233765, 'samples': 7328768, 'steps': 14313, 'loss/train': 1.5723084211349487} +02/24/2022 20:06:33 - INFO - codeparrot_training - Step 14314: {'lr': 0.0004231066387683288, 'samples': 7329280, 'steps': 14314, 'loss/train': 1.6288281679153442} +02/24/2022 20:06:36 - INFO - codeparrot_training - Step 14315: {'lr': 0.0004230948330717472, 'samples': 7329792, 'steps': 14315, 'loss/train': 0.5535238981246948} +02/24/2022 20:06:42 - INFO - codeparrot_training - Step 14316: {'lr': 0.0004230830266336825, 'samples': 7330304, 'steps': 14316, 'loss/train': 0.5326436758041382} +02/24/2022 20:06:45 - INFO - codeparrot_training - Step 14317: {'lr': 0.00042307121945418493, 'samples': 7330816, 'steps': 14317, 'loss/train': 2.1080830097198486} +02/24/2022 20:06:51 - INFO - codeparrot_training - Step 14318: {'lr': 0.00042305941153330525, 'samples': 7331328, 'steps': 14318, 'loss/train': 2.036498785018921} +02/24/2022 20:06:54 - INFO - codeparrot_training - Step 14319: {'lr': 0.00042304760287109394, 'samples': 7331840, 'steps': 14319, 'loss/train': 2.572666645050049} +02/24/2022 20:06:59 - INFO - codeparrot_training - Step 14320: {'lr': 0.0004230357934676017, 'samples': 7332352, 'steps': 14320, 'loss/train': 2.471630334854126} +02/24/2022 20:07:03 - INFO - codeparrot_training - Step 14321: {'lr': 0.00042302398332287903, 'samples': 7332864, 'steps': 14321, 'loss/train': 1.4810775518417358} +02/24/2022 20:07:10 - INFO - codeparrot_training - Step 14322: {'lr': 0.00042301217243697665, 'samples': 7333376, 'steps': 14322, 'loss/train': 1.4826350212097168} +02/24/2022 20:07:14 - INFO - codeparrot_training - Step 14323: {'lr': 0.00042300036080994495, 'samples': 7333888, 'steps': 14323, 'loss/train': 2.3870391845703125} +02/24/2022 20:07:19 - INFO - codeparrot_training - Step 14324: {'lr': 0.00042298854844183476, 'samples': 7334400, 'steps': 14324, 'loss/train': 2.327512741088867} +02/24/2022 20:07:23 - INFO - codeparrot_training - Step 14325: {'lr': 0.0004229767353326964, 'samples': 7334912, 'steps': 14325, 'loss/train': 2.4142353534698486} +02/24/2022 20:07:28 - INFO - codeparrot_training - Step 14326: {'lr': 0.0004229649214825808, 'samples': 7335424, 'steps': 14326, 'loss/train': 1.994194746017456} +02/24/2022 20:07:32 - INFO - codeparrot_training - Step 14327: {'lr': 0.0004229531068915383, 'samples': 7335936, 'steps': 14327, 'loss/train': 1.6865442991256714} +02/24/2022 20:07:37 - INFO - codeparrot_training - Step 14328: {'lr': 0.0004229412915596196, 'samples': 7336448, 'steps': 14328, 'loss/train': 1.3904539346694946} +02/24/2022 20:07:41 - INFO - codeparrot_training - Step 14329: {'lr': 0.0004229294754868754, 'samples': 7336960, 'steps': 14329, 'loss/train': 1.4125627279281616} +02/24/2022 20:07:46 - INFO - codeparrot_training - Step 14330: {'lr': 0.0004229176586733562, 'samples': 7337472, 'steps': 14330, 'loss/train': 2.1270740032196045} +02/24/2022 20:07:50 - INFO - codeparrot_training - Step 14331: {'lr': 0.0004229058411191126, 'samples': 7337984, 'steps': 14331, 'loss/train': 2.4757940769195557} +02/24/2022 20:07:58 - INFO - codeparrot_training - Step 14332: {'lr': 0.0004228940228241953, 'samples': 7338496, 'steps': 14332, 'loss/train': 2.683319568634033} +02/24/2022 20:08:01 - INFO - codeparrot_training - Step 14333: {'lr': 0.0004228822037886549, 'samples': 7339008, 'steps': 14333, 'loss/train': 2.4039764404296875} +02/24/2022 20:08:05 - INFO - codeparrot_training - Step 14334: {'lr': 0.00042287038401254214, 'samples': 7339520, 'steps': 14334, 'loss/train': 2.4975473880767822} +02/24/2022 20:08:10 - INFO - codeparrot_training - Step 14335: {'lr': 0.00042285856349590746, 'samples': 7340032, 'steps': 14335, 'loss/train': 1.7661595344543457} +02/24/2022 20:08:14 - INFO - codeparrot_training - Step 14336: {'lr': 0.0004228467422388016, 'samples': 7340544, 'steps': 14336, 'loss/train': 2.0884792804718018} +02/24/2022 20:08:19 - INFO - codeparrot_training - Step 14337: {'lr': 0.00042283492024127524, 'samples': 7341056, 'steps': 14337, 'loss/train': 1.9482609033584595} +02/24/2022 20:08:23 - INFO - codeparrot_training - Step 14338: {'lr': 0.00042282309750337887, 'samples': 7341568, 'steps': 14338, 'loss/train': 2.5940208435058594} +02/24/2022 20:08:28 - INFO - codeparrot_training - Step 14339: {'lr': 0.0004228112740251632, 'samples': 7342080, 'steps': 14339, 'loss/train': 0.18865668773651123} +02/24/2022 20:08:32 - INFO - codeparrot_training - Step 14340: {'lr': 0.00042279944980667906, 'samples': 7342592, 'steps': 14340, 'loss/train': 0.9635124206542969} +02/24/2022 20:08:37 - INFO - codeparrot_training - Step 14341: {'lr': 0.00042278762484797684, 'samples': 7343104, 'steps': 14341, 'loss/train': 1.851822853088379} +02/24/2022 20:08:41 - INFO - codeparrot_training - Step 14342: {'lr': 0.0004227757991491073, 'samples': 7343616, 'steps': 14342, 'loss/train': 2.107936143875122} +02/24/2022 20:08:48 - INFO - codeparrot_training - Step 14343: {'lr': 0.0004227639727101211, 'samples': 7344128, 'steps': 14343, 'loss/train': 2.010317087173462} +02/24/2022 20:08:52 - INFO - codeparrot_training - Step 14344: {'lr': 0.0004227521455310689, 'samples': 7344640, 'steps': 14344, 'loss/train': 2.015282154083252} +02/24/2022 20:08:57 - INFO - codeparrot_training - Step 14345: {'lr': 0.0004227403176120014, 'samples': 7345152, 'steps': 14345, 'loss/train': 1.9161802530288696} +02/24/2022 20:09:01 - INFO - codeparrot_training - Step 14346: {'lr': 0.00042272848895296924, 'samples': 7345664, 'steps': 14346, 'loss/train': 1.7873947620391846} +02/24/2022 20:09:06 - INFO - codeparrot_training - Step 14347: {'lr': 0.000422716659554023, 'samples': 7346176, 'steps': 14347, 'loss/train': 2.7401559352874756} +02/24/2022 20:09:09 - INFO - codeparrot_training - Step 14348: {'lr': 0.00042270482941521347, 'samples': 7346688, 'steps': 14348, 'loss/train': 1.6684261560440063} +02/24/2022 20:09:15 - INFO - codeparrot_training - Step 14349: {'lr': 0.0004226929985365913, 'samples': 7347200, 'steps': 14349, 'loss/train': 2.3258559703826904} +02/24/2022 20:09:21 - INFO - codeparrot_training - Step 14350: {'lr': 0.00042268116691820723, 'samples': 7347712, 'steps': 14350, 'loss/train': 1.7296404838562012} +02/24/2022 20:09:24 - INFO - codeparrot_training - Step 14351: {'lr': 0.00042266933456011174, 'samples': 7348224, 'steps': 14351, 'loss/train': 2.2408125400543213} +02/24/2022 20:09:31 - INFO - codeparrot_training - Step 14352: {'lr': 0.0004226575014623557, 'samples': 7348736, 'steps': 14352, 'loss/train': 2.1970057487487793} +02/24/2022 20:09:35 - INFO - codeparrot_training - Step 14353: {'lr': 0.0004226456676249898, 'samples': 7349248, 'steps': 14353, 'loss/train': 1.3432856798171997} +02/24/2022 20:09:40 - INFO - codeparrot_training - Step 14354: {'lr': 0.0004226338330480646, 'samples': 7349760, 'steps': 14354, 'loss/train': 1.8615167140960693} +02/24/2022 20:09:44 - INFO - codeparrot_training - Step 14355: {'lr': 0.00042262199773163096, 'samples': 7350272, 'steps': 14355, 'loss/train': 0.9743638634681702} +02/24/2022 20:09:49 - INFO - codeparrot_training - Step 14356: {'lr': 0.00042261016167573944, 'samples': 7350784, 'steps': 14356, 'loss/train': 2.3044936656951904} +02/24/2022 20:09:53 - INFO - codeparrot_training - Step 14357: {'lr': 0.0004225983248804408, 'samples': 7351296, 'steps': 14357, 'loss/train': 2.7075257301330566} +02/24/2022 20:09:58 - INFO - codeparrot_training - Step 14358: {'lr': 0.0004225864873457858, 'samples': 7351808, 'steps': 14358, 'loss/train': 2.5473790168762207} +02/24/2022 20:10:02 - INFO - codeparrot_training - Step 14359: {'lr': 0.0004225746490718251, 'samples': 7352320, 'steps': 14359, 'loss/train': 2.723165988922119} +02/24/2022 20:10:07 - INFO - codeparrot_training - Step 14360: {'lr': 0.0004225628100586093, 'samples': 7352832, 'steps': 14360, 'loss/train': 2.216243267059326} +02/24/2022 20:10:11 - INFO - codeparrot_training - Step 14361: {'lr': 0.0004225509703061893, 'samples': 7353344, 'steps': 14361, 'loss/train': 1.7508578300476074} +02/24/2022 20:10:16 - INFO - codeparrot_training - Step 14362: {'lr': 0.0004225391298146157, 'samples': 7353856, 'steps': 14362, 'loss/train': 1.519669532775879} +02/24/2022 20:10:20 - INFO - codeparrot_training - Step 14363: {'lr': 0.0004225272885839392, 'samples': 7354368, 'steps': 14363, 'loss/train': 2.3524415493011475} +02/24/2022 20:10:25 - INFO - codeparrot_training - Step 14364: {'lr': 0.0004225154466142107, 'samples': 7354880, 'steps': 14364, 'loss/train': 1.6151716709136963} +02/24/2022 20:10:29 - INFO - codeparrot_training - Step 14365: {'lr': 0.0004225036039054807, 'samples': 7355392, 'steps': 14365, 'loss/train': 2.6133036613464355} +02/24/2022 20:10:34 - INFO - codeparrot_training - Step 14366: {'lr': 0.00042249176045780013, 'samples': 7355904, 'steps': 14366, 'loss/train': 1.6383662223815918} +02/24/2022 20:10:38 - INFO - codeparrot_training - Step 14367: {'lr': 0.0004224799162712195, 'samples': 7356416, 'steps': 14367, 'loss/train': 2.3020594120025635} +02/24/2022 20:10:46 - INFO - codeparrot_training - Step 14368: {'lr': 0.0004224680713457898, 'samples': 7356928, 'steps': 14368, 'loss/train': 1.793048620223999} +02/24/2022 20:10:49 - INFO - codeparrot_training - Step 14369: {'lr': 0.00042245622568156164, 'samples': 7357440, 'steps': 14369, 'loss/train': 0.45681455731391907} +02/24/2022 20:10:55 - INFO - codeparrot_training - Step 14370: {'lr': 0.0004224443792785857, 'samples': 7357952, 'steps': 14370, 'loss/train': 1.9955453872680664} +02/24/2022 20:10:58 - INFO - codeparrot_training - Step 14371: {'lr': 0.0004224325321369128, 'samples': 7358464, 'steps': 14371, 'loss/train': 1.1860418319702148} +02/24/2022 20:11:04 - INFO - codeparrot_training - Step 14372: {'lr': 0.0004224206842565937, 'samples': 7358976, 'steps': 14372, 'loss/train': 1.9157577753067017} +02/24/2022 20:11:07 - INFO - codeparrot_training - Step 14373: {'lr': 0.00042240883563767916, 'samples': 7359488, 'steps': 14373, 'loss/train': 1.9489727020263672} +02/24/2022 20:11:13 - INFO - codeparrot_training - Step 14374: {'lr': 0.00042239698628021994, 'samples': 7360000, 'steps': 14374, 'loss/train': 2.3002262115478516} +02/24/2022 20:11:16 - INFO - codeparrot_training - Step 14375: {'lr': 0.0004223851361842668, 'samples': 7360512, 'steps': 14375, 'loss/train': 1.7936546802520752} +02/24/2022 20:11:22 - INFO - codeparrot_training - Step 14376: {'lr': 0.00042237328534987034, 'samples': 7361024, 'steps': 14376, 'loss/train': 2.042577028274536} +02/24/2022 20:11:25 - INFO - codeparrot_training - Step 14377: {'lr': 0.0004223614337770816, 'samples': 7361536, 'steps': 14377, 'loss/train': 2.534580707550049} +02/24/2022 20:11:31 - INFO - codeparrot_training - Step 14378: {'lr': 0.0004223495814659511, 'samples': 7362048, 'steps': 14378, 'loss/train': 1.127556562423706} +02/24/2022 20:11:34 - INFO - codeparrot_training - Step 14379: {'lr': 0.00042233772841652974, 'samples': 7362560, 'steps': 14379, 'loss/train': 1.4802942276000977} +02/24/2022 20:11:41 - INFO - codeparrot_training - Step 14380: {'lr': 0.00042232587462886833, 'samples': 7363072, 'steps': 14380, 'loss/train': 2.082639217376709} +02/24/2022 20:11:45 - INFO - codeparrot_training - Step 14381: {'lr': 0.0004223140201030176, 'samples': 7363584, 'steps': 14381, 'loss/train': 1.6701768636703491} +02/24/2022 20:11:50 - INFO - codeparrot_training - Step 14382: {'lr': 0.0004223021648390283, 'samples': 7364096, 'steps': 14382, 'loss/train': 3.672809362411499} +02/24/2022 20:11:54 - INFO - codeparrot_training - Step 14383: {'lr': 0.0004222903088369512, 'samples': 7364608, 'steps': 14383, 'loss/train': 1.7200851440429688} +02/24/2022 20:11:59 - INFO - codeparrot_training - Step 14384: {'lr': 0.0004222784520968371, 'samples': 7365120, 'steps': 14384, 'loss/train': 0.5897279977798462} +02/24/2022 20:12:03 - INFO - codeparrot_training - Step 14385: {'lr': 0.000422266594618737, 'samples': 7365632, 'steps': 14385, 'loss/train': 2.610156536102295} +02/24/2022 20:12:08 - INFO - codeparrot_training - Step 14386: {'lr': 0.0004222547364027013, 'samples': 7366144, 'steps': 14386, 'loss/train': 1.5327962636947632} +02/24/2022 20:12:12 - INFO - codeparrot_training - Step 14387: {'lr': 0.0004222428774487811, 'samples': 7366656, 'steps': 14387, 'loss/train': 2.266096830368042} +02/24/2022 20:12:19 - INFO - codeparrot_training - Step 14388: {'lr': 0.00042223101775702704, 'samples': 7367168, 'steps': 14388, 'loss/train': 1.8165912628173828} +02/24/2022 20:12:23 - INFO - codeparrot_training - Step 14389: {'lr': 0.00042221915732749006, 'samples': 7367680, 'steps': 14389, 'loss/train': 2.7712199687957764} +02/24/2022 20:12:29 - INFO - codeparrot_training - Step 14390: {'lr': 0.0004222072961602209, 'samples': 7368192, 'steps': 14390, 'loss/train': 2.4479289054870605} +02/24/2022 20:12:32 - INFO - codeparrot_training - Step 14391: {'lr': 0.0004221954342552703, 'samples': 7368704, 'steps': 14391, 'loss/train': 1.5435402393341064} +02/24/2022 20:12:35 - INFO - codeparrot_training - Step 14392: {'lr': 0.00042218357161268917, 'samples': 7369216, 'steps': 14392, 'loss/train': 0.8719975352287292} +02/24/2022 20:12:41 - INFO - codeparrot_training - Step 14393: {'lr': 0.0004221717082325283, 'samples': 7369728, 'steps': 14393, 'loss/train': 0.4197543263435364} +02/24/2022 20:12:45 - INFO - codeparrot_training - Step 14394: {'lr': 0.00042215984411483854, 'samples': 7370240, 'steps': 14394, 'loss/train': 1.9302916526794434} +02/24/2022 20:12:50 - INFO - codeparrot_training - Step 14395: {'lr': 0.00042214797925967064, 'samples': 7370752, 'steps': 14395, 'loss/train': 1.0474448204040527} +02/24/2022 20:12:56 - INFO - codeparrot_training - Step 14396: {'lr': 0.00042213611366707547, 'samples': 7371264, 'steps': 14396, 'loss/train': 1.5193946361541748} +02/24/2022 20:12:59 - INFO - codeparrot_training - Step 14397: {'lr': 0.0004221242473371038, 'samples': 7371776, 'steps': 14397, 'loss/train': 2.213353395462036} +02/24/2022 20:13:05 - INFO - codeparrot_training - Step 14398: {'lr': 0.00042211238026980657, 'samples': 7372288, 'steps': 14398, 'loss/train': 2.2594025135040283} +02/24/2022 20:13:08 - INFO - codeparrot_training - Step 14399: {'lr': 0.0004221005124652345, 'samples': 7372800, 'steps': 14399, 'loss/train': 1.2874884605407715} +02/24/2022 20:13:14 - INFO - codeparrot_training - Step 14400: {'lr': 0.0004220886439234385, 'samples': 7373312, 'steps': 14400, 'loss/train': 1.7433128356933594} +02/24/2022 20:13:17 - INFO - codeparrot_training - Step 14401: {'lr': 0.0004220767746444694, 'samples': 7373824, 'steps': 14401, 'loss/train': 1.84574556350708} +02/24/2022 20:13:23 - INFO - codeparrot_training - Step 14402: {'lr': 0.0004220649046283781, 'samples': 7374336, 'steps': 14402, 'loss/train': 2.269367218017578} +02/24/2022 20:13:26 - INFO - codeparrot_training - Step 14403: {'lr': 0.00042205303387521533, 'samples': 7374848, 'steps': 14403, 'loss/train': 2.172515630722046} +02/24/2022 20:13:33 - INFO - codeparrot_training - Step 14404: {'lr': 0.00042204116238503197, 'samples': 7375360, 'steps': 14404, 'loss/train': 2.2918453216552734} +02/24/2022 20:13:37 - INFO - codeparrot_training - Step 14405: {'lr': 0.00042202929015787893, 'samples': 7375872, 'steps': 14405, 'loss/train': 2.335841655731201} +02/24/2022 20:13:42 - INFO - codeparrot_training - Step 14406: {'lr': 0.000422017417193807, 'samples': 7376384, 'steps': 14406, 'loss/train': 2.2437055110931396} +02/24/2022 20:13:45 - INFO - codeparrot_training - Step 14407: {'lr': 0.0004220055434928671, 'samples': 7376896, 'steps': 14407, 'loss/train': 2.7684497833251953} +02/24/2022 20:13:52 - INFO - codeparrot_training - Step 14408: {'lr': 0.0004219936690551101, 'samples': 7377408, 'steps': 14408, 'loss/train': 2.7071282863616943} +02/24/2022 20:13:56 - INFO - codeparrot_training - Step 14409: {'lr': 0.0004219817938805869, 'samples': 7377920, 'steps': 14409, 'loss/train': 1.9697179794311523} +02/24/2022 20:14:01 - INFO - codeparrot_training - Step 14410: {'lr': 0.0004219699179693481, 'samples': 7378432, 'steps': 14410, 'loss/train': 2.549269914627075} +02/24/2022 20:14:05 - INFO - codeparrot_training - Step 14411: {'lr': 0.000421958041321445, 'samples': 7378944, 'steps': 14411, 'loss/train': 0.2498295158147812} +02/24/2022 20:14:10 - INFO - codeparrot_training - Step 14412: {'lr': 0.0004219461639369281, 'samples': 7379456, 'steps': 14412, 'loss/train': 0.8178668022155762} +02/24/2022 20:14:14 - INFO - codeparrot_training - Step 14413: {'lr': 0.0004219342858158485, 'samples': 7379968, 'steps': 14413, 'loss/train': 2.323140859603882} +02/24/2022 20:14:19 - INFO - codeparrot_training - Step 14414: {'lr': 0.000421922406958257, 'samples': 7380480, 'steps': 14414, 'loss/train': 1.9101383686065674} +02/24/2022 20:14:23 - INFO - codeparrot_training - Step 14415: {'lr': 0.00042191052736420445, 'samples': 7380992, 'steps': 14415, 'loss/train': 1.9701310396194458} +02/24/2022 20:14:29 - INFO - codeparrot_training - Step 14416: {'lr': 0.0004218986470337419, 'samples': 7381504, 'steps': 14416, 'loss/train': 2.2721662521362305} +02/24/2022 20:14:32 - INFO - codeparrot_training - Step 14417: {'lr': 0.00042188676596692, 'samples': 7382016, 'steps': 14417, 'loss/train': 2.5398759841918945} +02/24/2022 20:14:40 - INFO - codeparrot_training - Step 14418: {'lr': 0.0004218748841637899, 'samples': 7382528, 'steps': 14418, 'loss/train': 2.5376124382019043} +02/24/2022 20:14:43 - INFO - codeparrot_training - Step 14419: {'lr': 0.0004218630016244023, 'samples': 7383040, 'steps': 14419, 'loss/train': 2.3700544834136963} +02/24/2022 20:14:47 - INFO - codeparrot_training - Step 14420: {'lr': 0.0004218511183488082, 'samples': 7383552, 'steps': 14420, 'loss/train': 1.721614122390747} +02/24/2022 20:14:52 - INFO - codeparrot_training - Step 14421: {'lr': 0.0004218392343370584, 'samples': 7384064, 'steps': 14421, 'loss/train': 1.2554845809936523} +02/24/2022 20:14:55 - INFO - codeparrot_training - Step 14422: {'lr': 0.000421827349589204, 'samples': 7384576, 'steps': 14422, 'loss/train': 1.450324296951294} +02/24/2022 20:15:01 - INFO - codeparrot_training - Step 14423: {'lr': 0.0004218154641052957, 'samples': 7385088, 'steps': 14423, 'loss/train': 1.9200878143310547} +02/24/2022 20:15:04 - INFO - codeparrot_training - Step 14424: {'lr': 0.0004218035778853846, 'samples': 7385600, 'steps': 14424, 'loss/train': 2.5163228511810303} +02/24/2022 20:15:10 - INFO - codeparrot_training - Step 14425: {'lr': 0.0004217916909295215, 'samples': 7386112, 'steps': 14425, 'loss/train': 3.1031835079193115} +02/24/2022 20:15:14 - INFO - codeparrot_training - Step 14426: {'lr': 0.00042177980323775734, 'samples': 7386624, 'steps': 14426, 'loss/train': 2.2497856616973877} +02/24/2022 20:15:19 - INFO - codeparrot_training - Step 14427: {'lr': 0.00042176791481014303, 'samples': 7387136, 'steps': 14427, 'loss/train': 2.626216411590576} +02/24/2022 20:15:23 - INFO - codeparrot_training - Step 14428: {'lr': 0.0004217560256467295, 'samples': 7387648, 'steps': 14428, 'loss/train': 0.8603918552398682} +02/24/2022 20:15:29 - INFO - codeparrot_training - Step 14429: {'lr': 0.00042174413574756775, 'samples': 7388160, 'steps': 14429, 'loss/train': 2.1737658977508545} +02/24/2022 20:15:33 - INFO - codeparrot_training - Step 14430: {'lr': 0.0004217322451127086, 'samples': 7388672, 'steps': 14430, 'loss/train': 2.1812446117401123} +02/24/2022 20:15:38 - INFO - codeparrot_training - Step 14431: {'lr': 0.00042172035374220306, 'samples': 7389184, 'steps': 14431, 'loss/train': 1.7511701583862305} +02/24/2022 20:15:42 - INFO - codeparrot_training - Step 14432: {'lr': 0.0004217084616361021, 'samples': 7389696, 'steps': 14432, 'loss/train': 2.7966084480285645} +02/24/2022 20:15:47 - INFO - codeparrot_training - Step 14433: {'lr': 0.00042169656879445657, 'samples': 7390208, 'steps': 14433, 'loss/train': 2.0498228073120117} +02/24/2022 20:15:51 - INFO - codeparrot_training - Step 14434: {'lr': 0.00042168467521731747, 'samples': 7390720, 'steps': 14434, 'loss/train': 2.652679443359375} +02/24/2022 20:15:56 - INFO - codeparrot_training - Step 14435: {'lr': 0.00042167278090473573, 'samples': 7391232, 'steps': 14435, 'loss/train': 1.6542110443115234} +02/24/2022 20:16:00 - INFO - codeparrot_training - Step 14436: {'lr': 0.0004216608858567623, 'samples': 7391744, 'steps': 14436, 'loss/train': 1.975625991821289} +02/24/2022 20:16:05 - INFO - codeparrot_training - Step 14437: {'lr': 0.00042164899007344814, 'samples': 7392256, 'steps': 14437, 'loss/train': 2.3574142456054688} +02/24/2022 20:16:09 - INFO - codeparrot_training - Step 14438: {'lr': 0.00042163709355484425, 'samples': 7392768, 'steps': 14438, 'loss/train': 2.790147066116333} +02/24/2022 20:16:15 - INFO - codeparrot_training - Step 14439: {'lr': 0.0004216251963010015, 'samples': 7393280, 'steps': 14439, 'loss/train': 2.61586332321167} +02/24/2022 20:16:18 - INFO - codeparrot_training - Step 14440: {'lr': 0.0004216132983119709, 'samples': 7393792, 'steps': 14440, 'loss/train': 2.4092423915863037} +02/24/2022 20:16:24 - INFO - codeparrot_training - Step 14441: {'lr': 0.00042160139958780346, 'samples': 7394304, 'steps': 14441, 'loss/train': 1.951647162437439} +02/24/2022 20:16:27 - INFO - codeparrot_training - Step 14442: {'lr': 0.0004215895001285501, 'samples': 7394816, 'steps': 14442, 'loss/train': 1.4745805263519287} +02/24/2022 20:16:33 - INFO - codeparrot_training - Step 14443: {'lr': 0.0004215775999342618, 'samples': 7395328, 'steps': 14443, 'loss/train': 2.3024535179138184} +02/24/2022 20:16:36 - INFO - codeparrot_training - Step 14444: {'lr': 0.0004215656990049896, 'samples': 7395840, 'steps': 14444, 'loss/train': 2.0168559551239014} +02/24/2022 20:16:42 - INFO - codeparrot_training - Step 14445: {'lr': 0.0004215537973407844, 'samples': 7396352, 'steps': 14445, 'loss/train': 2.563513994216919} +02/24/2022 20:16:45 - INFO - codeparrot_training - Step 14446: {'lr': 0.0004215418949416972, 'samples': 7396864, 'steps': 14446, 'loss/train': 2.3443524837493896} +02/24/2022 20:16:51 - INFO - codeparrot_training - Step 14447: {'lr': 0.00042152999180777894, 'samples': 7397376, 'steps': 14447, 'loss/train': 1.9824228286743164} +02/24/2022 20:16:54 - INFO - codeparrot_training - Step 14448: {'lr': 0.0004215180879390807, 'samples': 7397888, 'steps': 14448, 'loss/train': 2.6413373947143555} +02/24/2022 20:17:01 - INFO - codeparrot_training - Step 14449: {'lr': 0.0004215061833356535, 'samples': 7398400, 'steps': 14449, 'loss/train': 3.082911252975464} +02/24/2022 20:17:04 - INFO - codeparrot_training - Step 14450: {'lr': 0.00042149427799754817, 'samples': 7398912, 'steps': 14450, 'loss/train': 2.255244255065918} +02/24/2022 20:17:10 - INFO - codeparrot_training - Step 14451: {'lr': 0.00042148237192481586, 'samples': 7399424, 'steps': 14451, 'loss/train': 2.464221715927124} +02/24/2022 20:17:13 - INFO - codeparrot_training - Step 14452: {'lr': 0.0004214704651175075, 'samples': 7399936, 'steps': 14452, 'loss/train': 4.910278797149658} +02/24/2022 20:17:19 - INFO - codeparrot_training - Step 14453: {'lr': 0.0004214585575756742, 'samples': 7400448, 'steps': 14453, 'loss/train': 0.29434093832969666} +02/24/2022 20:17:22 - INFO - codeparrot_training - Step 14454: {'lr': 0.0004214466492993668, 'samples': 7400960, 'steps': 14454, 'loss/train': 2.264237880706787} +02/24/2022 20:17:28 - INFO - codeparrot_training - Step 14455: {'lr': 0.00042143474028863637, 'samples': 7401472, 'steps': 14455, 'loss/train': 2.308858633041382} +02/24/2022 20:17:31 - INFO - codeparrot_training - Step 14456: {'lr': 0.000421422830543534, 'samples': 7401984, 'steps': 14456, 'loss/train': 8.902766227722168} +02/24/2022 20:17:37 - INFO - codeparrot_training - Step 14457: {'lr': 0.0004214109200641106, 'samples': 7402496, 'steps': 14457, 'loss/train': 1.2082109451293945} +02/24/2022 20:17:40 - INFO - codeparrot_training - Step 14458: {'lr': 0.00042139900885041734, 'samples': 7403008, 'steps': 14458, 'loss/train': 1.3440091609954834} +02/24/2022 20:17:47 - INFO - codeparrot_training - Step 14459: {'lr': 0.00042138709690250507, 'samples': 7403520, 'steps': 14459, 'loss/train': 0.8292269706726074} +02/24/2022 20:17:50 - INFO - codeparrot_training - Step 14460: {'lr': 0.0004213751842204249, 'samples': 7404032, 'steps': 14460, 'loss/train': 2.31581711769104} +02/24/2022 20:17:56 - INFO - codeparrot_training - Step 14461: {'lr': 0.00042136327080422785, 'samples': 7404544, 'steps': 14461, 'loss/train': 2.3676931858062744} +02/24/2022 20:17:59 - INFO - codeparrot_training - Step 14462: {'lr': 0.0004213513566539651, 'samples': 7405056, 'steps': 14462, 'loss/train': 1.2420704364776611} +02/24/2022 20:18:05 - INFO - codeparrot_training - Step 14463: {'lr': 0.0004213394417696874, 'samples': 7405568, 'steps': 14463, 'loss/train': 2.387732982635498} +02/24/2022 20:18:08 - INFO - codeparrot_training - Step 14464: {'lr': 0.00042132752615144597, 'samples': 7406080, 'steps': 14464, 'loss/train': 2.154527187347412} +02/24/2022 20:18:14 - INFO - codeparrot_training - Step 14465: {'lr': 0.00042131560979929186, 'samples': 7406592, 'steps': 14465, 'loss/train': 1.8868833780288696} +02/24/2022 20:18:17 - INFO - codeparrot_training - Step 14466: {'lr': 0.00042130369271327605, 'samples': 7407104, 'steps': 14466, 'loss/train': 1.738230586051941} +02/24/2022 20:18:23 - INFO - codeparrot_training - Step 14467: {'lr': 0.0004212917748934496, 'samples': 7407616, 'steps': 14467, 'loss/train': 2.4486939907073975} +02/24/2022 20:18:26 - INFO - codeparrot_training - Step 14468: {'lr': 0.00042127985633986365, 'samples': 7408128, 'steps': 14468, 'loss/train': 1.370111346244812} +02/24/2022 20:18:32 - INFO - codeparrot_training - Step 14469: {'lr': 0.00042126793705256913, 'samples': 7408640, 'steps': 14469, 'loss/train': 2.3062503337860107} +02/24/2022 20:18:35 - INFO - codeparrot_training - Step 14470: {'lr': 0.00042125601703161706, 'samples': 7409152, 'steps': 14470, 'loss/train': 1.824141502380371} +02/24/2022 20:18:41 - INFO - codeparrot_training - Step 14471: {'lr': 0.00042124409627705873, 'samples': 7409664, 'steps': 14471, 'loss/train': 3.3604238033294678} +02/24/2022 20:18:44 - INFO - codeparrot_training - Step 14472: {'lr': 0.00042123217478894504, 'samples': 7410176, 'steps': 14472, 'loss/train': 1.6430108547210693} +02/24/2022 20:18:50 - INFO - codeparrot_training - Step 14473: {'lr': 0.0004212202525673271, 'samples': 7410688, 'steps': 14473, 'loss/train': 0.5625841021537781} +02/24/2022 20:18:53 - INFO - codeparrot_training - Step 14474: {'lr': 0.00042120832961225585, 'samples': 7411200, 'steps': 14474, 'loss/train': 1.4384527206420898} +02/24/2022 20:18:59 - INFO - codeparrot_training - Step 14475: {'lr': 0.00042119640592378263, 'samples': 7411712, 'steps': 14475, 'loss/train': 0.25691255927085876} +02/24/2022 20:19:03 - INFO - codeparrot_training - Step 14476: {'lr': 0.00042118448150195827, 'samples': 7412224, 'steps': 14476, 'loss/train': 2.408867359161377} +02/24/2022 20:19:09 - INFO - codeparrot_training - Step 14477: {'lr': 0.000421172556346834, 'samples': 7412736, 'steps': 14477, 'loss/train': 3.917689085006714} +02/24/2022 20:19:12 - INFO - codeparrot_training - Step 14478: {'lr': 0.00042116063045846073, 'samples': 7413248, 'steps': 14478, 'loss/train': 2.466057538986206} +02/24/2022 20:19:16 - INFO - codeparrot_training - Step 14479: {'lr': 0.00042114870383688985, 'samples': 7413760, 'steps': 14479, 'loss/train': 1.545590877532959} +02/24/2022 20:19:21 - INFO - codeparrot_training - Step 14480: {'lr': 0.0004211367764821722, 'samples': 7414272, 'steps': 14480, 'loss/train': 1.8706343173980713} +02/24/2022 20:19:25 - INFO - codeparrot_training - Step 14481: {'lr': 0.00042112484839435893, 'samples': 7414784, 'steps': 14481, 'loss/train': 2.249089241027832} +02/24/2022 20:19:30 - INFO - codeparrot_training - Step 14482: {'lr': 0.00042111291957350113, 'samples': 7415296, 'steps': 14482, 'loss/train': 2.7552359104156494} +02/24/2022 20:19:34 - INFO - codeparrot_training - Step 14483: {'lr': 0.00042110099001964996, 'samples': 7415808, 'steps': 14483, 'loss/train': 1.8637077808380127} +02/24/2022 20:19:40 - INFO - codeparrot_training - Step 14484: {'lr': 0.0004210890597328564, 'samples': 7416320, 'steps': 14484, 'loss/train': 2.5025877952575684} +02/24/2022 20:19:43 - INFO - codeparrot_training - Step 14485: {'lr': 0.0004210771287131717, 'samples': 7416832, 'steps': 14485, 'loss/train': 2.380509614944458} +02/24/2022 20:19:48 - INFO - codeparrot_training - Step 14486: {'lr': 0.00042106519696064694, 'samples': 7417344, 'steps': 14486, 'loss/train': 2.055649518966675} +02/24/2022 20:19:52 - INFO - codeparrot_training - Step 14487: {'lr': 0.0004210532644753331, 'samples': 7417856, 'steps': 14487, 'loss/train': 1.5627260208129883} +02/24/2022 20:19:58 - INFO - codeparrot_training - Step 14488: {'lr': 0.00042104133125728146, 'samples': 7418368, 'steps': 14488, 'loss/train': 2.8318421840667725} +02/24/2022 20:20:02 - INFO - codeparrot_training - Step 14489: {'lr': 0.00042102939730654304, 'samples': 7418880, 'steps': 14489, 'loss/train': 2.5699374675750732} +02/24/2022 20:20:07 - INFO - codeparrot_training - Step 14490: {'lr': 0.000421017462623169, 'samples': 7419392, 'steps': 14490, 'loss/train': 1.6295500993728638} +02/24/2022 20:20:10 - INFO - codeparrot_training - Step 14491: {'lr': 0.0004210055272072104, 'samples': 7419904, 'steps': 14491, 'loss/train': 1.347360610961914} +02/24/2022 20:20:16 - INFO - codeparrot_training - Step 14492: {'lr': 0.00042099359105871856, 'samples': 7420416, 'steps': 14492, 'loss/train': 1.6652920246124268} +02/24/2022 20:20:19 - INFO - codeparrot_training - Step 14493: {'lr': 0.0004209816541777444, 'samples': 7420928, 'steps': 14493, 'loss/train': 2.105761766433716} +02/24/2022 20:20:25 - INFO - codeparrot_training - Step 14494: {'lr': 0.0004209697165643391, 'samples': 7421440, 'steps': 14494, 'loss/train': 2.452751636505127} +02/24/2022 20:20:29 - INFO - codeparrot_training - Step 14495: {'lr': 0.0004209577782185538, 'samples': 7421952, 'steps': 14495, 'loss/train': 2.643202066421509} +02/24/2022 20:20:34 - INFO - codeparrot_training - Step 14496: {'lr': 0.0004209458391404397, 'samples': 7422464, 'steps': 14496, 'loss/train': 1.2692300081253052} +02/24/2022 20:20:38 - INFO - codeparrot_training - Step 14497: {'lr': 0.0004209338993300479, 'samples': 7422976, 'steps': 14497, 'loss/train': 1.755126953125} +02/24/2022 20:20:45 - INFO - codeparrot_training - Step 14498: {'lr': 0.00042092195878742954, 'samples': 7423488, 'steps': 14498, 'loss/train': 1.7072489261627197} +02/24/2022 20:20:48 - INFO - codeparrot_training - Step 14499: {'lr': 0.0004209100175126358, 'samples': 7424000, 'steps': 14499, 'loss/train': 1.4411416053771973} +02/24/2022 20:20:54 - INFO - codeparrot_training - Step 14500: {'lr': 0.0004208980755057178, 'samples': 7424512, 'steps': 14500, 'loss/train': 2.59185528755188} +02/24/2022 20:20:57 - INFO - codeparrot_training - Step 14501: {'lr': 0.0004208861327667268, 'samples': 7425024, 'steps': 14501, 'loss/train': 1.7651935815811157} +02/24/2022 20:21:03 - INFO - codeparrot_training - Step 14502: {'lr': 0.00042087418929571377, 'samples': 7425536, 'steps': 14502, 'loss/train': 1.57146418094635} +02/24/2022 20:21:06 - INFO - codeparrot_training - Step 14503: {'lr': 0.00042086224509272995, 'samples': 7426048, 'steps': 14503, 'loss/train': 1.0619361400604248} +02/24/2022 20:21:11 - INFO - codeparrot_training - Step 14504: {'lr': 0.0004208503001578266, 'samples': 7426560, 'steps': 14504, 'loss/train': 2.572526693344116} +02/24/2022 20:21:15 - INFO - codeparrot_training - Step 14505: {'lr': 0.00042083835449105477, 'samples': 7427072, 'steps': 14505, 'loss/train': 1.7435468435287476} +02/24/2022 20:21:20 - INFO - codeparrot_training - Step 14506: {'lr': 0.00042082640809246576, 'samples': 7427584, 'steps': 14506, 'loss/train': 0.6964829564094543} +02/24/2022 20:21:24 - INFO - codeparrot_training - Step 14507: {'lr': 0.0004208144609621106, 'samples': 7428096, 'steps': 14507, 'loss/train': 3.0581626892089844} +02/24/2022 20:21:31 - INFO - codeparrot_training - Step 14508: {'lr': 0.0004208025131000405, 'samples': 7428608, 'steps': 14508, 'loss/train': 0.207466721534729} +02/24/2022 20:21:36 - INFO - codeparrot_training - Step 14509: {'lr': 0.0004207905645063067, 'samples': 7429120, 'steps': 14509, 'loss/train': 1.0727956295013428} +02/24/2022 20:21:40 - INFO - codeparrot_training - Step 14510: {'lr': 0.00042077861518096033, 'samples': 7429632, 'steps': 14510, 'loss/train': 2.526712656021118} +02/24/2022 20:21:45 - INFO - codeparrot_training - Step 14511: {'lr': 0.0004207666651240526, 'samples': 7430144, 'steps': 14511, 'loss/train': 1.8233929872512817} +02/24/2022 20:21:49 - INFO - codeparrot_training - Step 14512: {'lr': 0.0004207547143356347, 'samples': 7430656, 'steps': 14512, 'loss/train': 2.5123543739318848} +02/24/2022 20:21:54 - INFO - codeparrot_training - Step 14513: {'lr': 0.00042074276281575787, 'samples': 7431168, 'steps': 14513, 'loss/train': 2.1859428882598877} +02/24/2022 20:21:58 - INFO - codeparrot_training - Step 14514: {'lr': 0.00042073081056447325, 'samples': 7431680, 'steps': 14514, 'loss/train': 2.1641855239868164} +02/24/2022 20:22:03 - INFO - codeparrot_training - Step 14515: {'lr': 0.00042071885758183204, 'samples': 7432192, 'steps': 14515, 'loss/train': 1.7970335483551025} +02/24/2022 20:22:07 - INFO - codeparrot_training - Step 14516: {'lr': 0.00042070690386788545, 'samples': 7432704, 'steps': 14516, 'loss/train': 2.465810537338257} +02/24/2022 20:22:12 - INFO - codeparrot_training - Step 14517: {'lr': 0.0004206949494226847, 'samples': 7433216, 'steps': 14517, 'loss/train': 1.938108205795288} +02/24/2022 20:22:16 - INFO - codeparrot_training - Step 14518: {'lr': 0.000420682994246281, 'samples': 7433728, 'steps': 14518, 'loss/train': 2.027783155441284} +02/24/2022 20:22:21 - INFO - codeparrot_training - Step 14519: {'lr': 0.00042067103833872554, 'samples': 7434240, 'steps': 14519, 'loss/train': 1.2425369024276733} +02/24/2022 20:22:25 - INFO - codeparrot_training - Step 14520: {'lr': 0.0004206590817000695, 'samples': 7434752, 'steps': 14520, 'loss/train': 1.8573615550994873} +02/24/2022 20:22:30 - INFO - codeparrot_training - Step 14521: {'lr': 0.0004206471243303642, 'samples': 7435264, 'steps': 14521, 'loss/train': 0.4201807975769043} +02/24/2022 20:22:34 - INFO - codeparrot_training - Step 14522: {'lr': 0.0004206351662296608, 'samples': 7435776, 'steps': 14522, 'loss/train': 1.7703566551208496} +02/24/2022 20:22:41 - INFO - codeparrot_training - Step 14523: {'lr': 0.0004206232073980105, 'samples': 7436288, 'steps': 14523, 'loss/train': 1.8556761741638184} +02/24/2022 20:22:44 - INFO - codeparrot_training - Step 14524: {'lr': 0.00042061124783546454, 'samples': 7436800, 'steps': 14524, 'loss/train': 2.1972217559814453} +02/24/2022 20:22:50 - INFO - codeparrot_training - Step 14525: {'lr': 0.0004205992875420742, 'samples': 7437312, 'steps': 14525, 'loss/train': 1.4722493886947632} +02/24/2022 20:22:53 - INFO - codeparrot_training - Step 14526: {'lr': 0.0004205873265178907, 'samples': 7437824, 'steps': 14526, 'loss/train': 2.1752734184265137} +02/24/2022 20:22:59 - INFO - codeparrot_training - Step 14527: {'lr': 0.0004205753647629653, 'samples': 7438336, 'steps': 14527, 'loss/train': 2.447154998779297} +02/24/2022 20:23:02 - INFO - codeparrot_training - Step 14528: {'lr': 0.0004205634022773491, 'samples': 7438848, 'steps': 14528, 'loss/train': 2.4360098838806152} +02/24/2022 20:23:08 - INFO - codeparrot_training - Step 14529: {'lr': 0.0004205514390610935, 'samples': 7439360, 'steps': 14529, 'loss/train': 2.466944456100464} +02/24/2022 20:23:11 - INFO - codeparrot_training - Step 14530: {'lr': 0.00042053947511424975, 'samples': 7439872, 'steps': 14530, 'loss/train': 1.8803566694259644} +02/24/2022 20:23:17 - INFO - codeparrot_training - Step 14531: {'lr': 0.00042052751043686895, 'samples': 7440384, 'steps': 14531, 'loss/train': 2.230368137359619} +02/24/2022 20:23:20 - INFO - codeparrot_training - Step 14532: {'lr': 0.00042051554502900245, 'samples': 7440896, 'steps': 14532, 'loss/train': 2.1919503211975098} +02/24/2022 20:23:27 - INFO - codeparrot_training - Step 14533: {'lr': 0.0004205035788907015, 'samples': 7441408, 'steps': 14533, 'loss/train': 1.9169282913208008} +02/24/2022 20:23:30 - INFO - codeparrot_training - Step 14534: {'lr': 0.0004204916120220174, 'samples': 7441920, 'steps': 14534, 'loss/train': 0.8139283657073975} +02/24/2022 20:23:36 - INFO - codeparrot_training - Step 14535: {'lr': 0.00042047964442300137, 'samples': 7442432, 'steps': 14535, 'loss/train': 1.701749563217163} +02/24/2022 20:23:39 - INFO - codeparrot_training - Step 14536: {'lr': 0.0004204676760937046, 'samples': 7442944, 'steps': 14536, 'loss/train': 1.6795092821121216} +02/24/2022 20:23:45 - INFO - codeparrot_training - Step 14537: {'lr': 0.00042045570703417857, 'samples': 7443456, 'steps': 14537, 'loss/train': 1.0003081560134888} +02/24/2022 20:23:48 - INFO - codeparrot_training - Step 14538: {'lr': 0.00042044373724447434, 'samples': 7443968, 'steps': 14538, 'loss/train': 1.6481555700302124} +02/24/2022 20:23:54 - INFO - codeparrot_training - Step 14539: {'lr': 0.0004204317667246432, 'samples': 7444480, 'steps': 14539, 'loss/train': 1.1439129114151} +02/24/2022 20:23:57 - INFO - codeparrot_training - Step 14540: {'lr': 0.00042041979547473665, 'samples': 7444992, 'steps': 14540, 'loss/train': 1.3678139448165894} +02/24/2022 20:24:03 - INFO - codeparrot_training - Step 14541: {'lr': 0.0004204078234948057, 'samples': 7445504, 'steps': 14541, 'loss/train': 0.9234797954559326} +02/24/2022 20:24:06 - INFO - codeparrot_training - Step 14542: {'lr': 0.00042039585078490173, 'samples': 7446016, 'steps': 14542, 'loss/train': 2.22986102104187} +02/24/2022 20:24:13 - INFO - codeparrot_training - Step 14543: {'lr': 0.000420383877345076, 'samples': 7446528, 'steps': 14543, 'loss/train': 2.308837890625} +02/24/2022 20:24:16 - INFO - codeparrot_training - Step 14544: {'lr': 0.00042037190317538, 'samples': 7447040, 'steps': 14544, 'loss/train': 2.48694109916687} +02/24/2022 20:24:22 - INFO - codeparrot_training - Step 14545: {'lr': 0.00042035992827586474, 'samples': 7447552, 'steps': 14545, 'loss/train': 1.7496896982192993} +02/24/2022 20:24:25 - INFO - codeparrot_training - Step 14546: {'lr': 0.00042034795264658163, 'samples': 7448064, 'steps': 14546, 'loss/train': 8.975650787353516} +02/24/2022 20:24:31 - INFO - codeparrot_training - Step 14547: {'lr': 0.00042033597628758206, 'samples': 7448576, 'steps': 14547, 'loss/train': 2.4917502403259277} +02/24/2022 20:24:34 - INFO - codeparrot_training - Step 14548: {'lr': 0.00042032399919891724, 'samples': 7449088, 'steps': 14548, 'loss/train': 1.7235925197601318} +02/24/2022 20:24:40 - INFO - codeparrot_training - Step 14549: {'lr': 0.0004203120213806385, 'samples': 7449600, 'steps': 14549, 'loss/train': 1.7175612449645996} +02/24/2022 20:24:43 - INFO - codeparrot_training - Step 14550: {'lr': 0.0004203000428327971, 'samples': 7450112, 'steps': 14550, 'loss/train': 0.7223354578018188} +02/24/2022 20:24:49 - INFO - codeparrot_training - Step 14551: {'lr': 0.00042028806355544443, 'samples': 7450624, 'steps': 14551, 'loss/train': 1.2693114280700684} +02/24/2022 20:24:52 - INFO - codeparrot_training - Step 14552: {'lr': 0.0004202760835486317, 'samples': 7451136, 'steps': 14552, 'loss/train': 3.00299334526062} +02/24/2022 20:24:58 - INFO - codeparrot_training - Step 14553: {'lr': 0.00042026410281241033, 'samples': 7451648, 'steps': 14553, 'loss/train': 2.212014675140381} +02/24/2022 20:25:02 - INFO - codeparrot_training - Step 14554: {'lr': 0.00042025212134683165, 'samples': 7452160, 'steps': 14554, 'loss/train': 1.0328600406646729} +02/24/2022 20:25:07 - INFO - codeparrot_training - Step 14555: {'lr': 0.0004202401391519469, 'samples': 7452672, 'steps': 14555, 'loss/train': 2.4593493938446045} +02/24/2022 20:25:11 - INFO - codeparrot_training - Step 14556: {'lr': 0.0004202281562278075, 'samples': 7453184, 'steps': 14556, 'loss/train': 2.254045009613037} +02/24/2022 20:25:16 - INFO - codeparrot_training - Step 14557: {'lr': 0.0004202161725744647, 'samples': 7453696, 'steps': 14557, 'loss/train': 0.7042149901390076} +02/24/2022 20:25:20 - INFO - codeparrot_training - Step 14558: {'lr': 0.0004202041881919699, 'samples': 7454208, 'steps': 14558, 'loss/train': 1.346352219581604} +02/24/2022 20:25:25 - INFO - codeparrot_training - Step 14559: {'lr': 0.0004201922030803743, 'samples': 7454720, 'steps': 14559, 'loss/train': 1.8507301807403564} +02/24/2022 20:25:29 - INFO - codeparrot_training - Step 14560: {'lr': 0.0004201802172397295, 'samples': 7455232, 'steps': 14560, 'loss/train': 1.4599277973175049} +02/24/2022 20:25:34 - INFO - codeparrot_training - Step 14561: {'lr': 0.0004201682306700866, 'samples': 7455744, 'steps': 14561, 'loss/train': 3.0319459438323975} +02/24/2022 20:25:38 - INFO - codeparrot_training - Step 14562: {'lr': 0.00042015624337149703, 'samples': 7456256, 'steps': 14562, 'loss/train': 2.387321949005127} +02/24/2022 20:25:44 - INFO - codeparrot_training - Step 14563: {'lr': 0.0004201442553440121, 'samples': 7456768, 'steps': 14563, 'loss/train': 1.8804051876068115} +02/24/2022 20:25:47 - INFO - codeparrot_training - Step 14564: {'lr': 0.00042013226658768333, 'samples': 7457280, 'steps': 14564, 'loss/train': 0.9310999512672424} +02/24/2022 20:25:53 - INFO - codeparrot_training - Step 14565: {'lr': 0.0004201202771025618, 'samples': 7457792, 'steps': 14565, 'loss/train': 1.5589817762374878} +02/24/2022 20:25:56 - INFO - codeparrot_training - Step 14566: {'lr': 0.0004201082868886992, 'samples': 7458304, 'steps': 14566, 'loss/train': 2.2237401008605957} +02/24/2022 20:26:02 - INFO - codeparrot_training - Step 14567: {'lr': 0.00042009629594614656, 'samples': 7458816, 'steps': 14567, 'loss/train': 2.1755847930908203} +02/24/2022 20:26:05 - INFO - codeparrot_training - Step 14568: {'lr': 0.0004200843042749555, 'samples': 7459328, 'steps': 14568, 'loss/train': 1.5562310218811035} +02/24/2022 20:26:12 - INFO - codeparrot_training - Step 14569: {'lr': 0.0004200723118751772, 'samples': 7459840, 'steps': 14569, 'loss/train': 2.437448501586914} +02/24/2022 20:26:17 - INFO - codeparrot_training - Step 14570: {'lr': 0.00042006031874686315, 'samples': 7460352, 'steps': 14570, 'loss/train': 1.4461536407470703} +02/24/2022 20:26:21 - INFO - codeparrot_training - Step 14571: {'lr': 0.00042004832489006474, 'samples': 7460864, 'steps': 14571, 'loss/train': 2.2945306301116943} +02/24/2022 20:26:24 - INFO - codeparrot_training - Step 14572: {'lr': 0.0004200363303048332, 'samples': 7461376, 'steps': 14572, 'loss/train': 1.3468681573867798} +02/24/2022 20:26:30 - INFO - codeparrot_training - Step 14573: {'lr': 0.00042002433499122016, 'samples': 7461888, 'steps': 14573, 'loss/train': 0.2137172967195511} +02/24/2022 20:26:33 - INFO - codeparrot_training - Step 14574: {'lr': 0.00042001233894927684, 'samples': 7462400, 'steps': 14574, 'loss/train': 2.0592262744903564} +02/24/2022 20:26:39 - INFO - codeparrot_training - Step 14575: {'lr': 0.0004200003421790546, 'samples': 7462912, 'steps': 14575, 'loss/train': 2.0852370262145996} +02/24/2022 20:26:42 - INFO - codeparrot_training - Step 14576: {'lr': 0.0004199883446806048, 'samples': 7463424, 'steps': 14576, 'loss/train': 2.2405941486358643} +02/24/2022 20:26:48 - INFO - codeparrot_training - Step 14577: {'lr': 0.00041997634645397897, 'samples': 7463936, 'steps': 14577, 'loss/train': 2.425971508026123} +02/24/2022 20:26:52 - INFO - codeparrot_training - Step 14578: {'lr': 0.0004199643474992285, 'samples': 7464448, 'steps': 14578, 'loss/train': 3.7889187335968018} +02/24/2022 20:26:58 - INFO - codeparrot_training - Step 14579: {'lr': 0.00041995234781640466, 'samples': 7464960, 'steps': 14579, 'loss/train': 2.0633435249328613} +02/24/2022 20:27:01 - INFO - codeparrot_training - Step 14580: {'lr': 0.00041994034740555896, 'samples': 7465472, 'steps': 14580, 'loss/train': 2.2397541999816895} +02/24/2022 20:27:07 - INFO - codeparrot_training - Step 14581: {'lr': 0.00041992834626674273, 'samples': 7465984, 'steps': 14581, 'loss/train': 2.570878744125366} +02/24/2022 20:27:10 - INFO - codeparrot_training - Step 14582: {'lr': 0.0004199163444000075, 'samples': 7466496, 'steps': 14582, 'loss/train': 2.2161812782287598} +02/24/2022 20:27:16 - INFO - codeparrot_training - Step 14583: {'lr': 0.00041990434180540453, 'samples': 7467008, 'steps': 14583, 'loss/train': 1.61948823928833} +02/24/2022 20:27:19 - INFO - codeparrot_training - Step 14584: {'lr': 0.00041989233848298534, 'samples': 7467520, 'steps': 14584, 'loss/train': 2.2366573810577393} +02/24/2022 20:27:25 - INFO - codeparrot_training - Step 14585: {'lr': 0.00041988033443280136, 'samples': 7468032, 'steps': 14585, 'loss/train': 1.9982514381408691} +02/24/2022 20:27:28 - INFO - codeparrot_training - Step 14586: {'lr': 0.00041986832965490396, 'samples': 7468544, 'steps': 14586, 'loss/train': 1.779753565788269} +02/24/2022 20:27:34 - INFO - codeparrot_training - Step 14587: {'lr': 0.0004198563241493445, 'samples': 7469056, 'steps': 14587, 'loss/train': 3.2324678897857666} +02/24/2022 20:27:37 - INFO - codeparrot_training - Step 14588: {'lr': 0.00041984431791617456, 'samples': 7469568, 'steps': 14588, 'loss/train': 2.060723066329956} +02/24/2022 20:27:44 - INFO - codeparrot_training - Step 14589: {'lr': 0.00041983231095544545, 'samples': 7470080, 'steps': 14589, 'loss/train': 0.5975165367126465} +02/24/2022 20:27:48 - INFO - codeparrot_training - Step 14590: {'lr': 0.00041982030326720866, 'samples': 7470592, 'steps': 14590, 'loss/train': 2.745650053024292} +02/24/2022 20:27:53 - INFO - codeparrot_training - Step 14591: {'lr': 0.00041980829485151563, 'samples': 7471104, 'steps': 14591, 'loss/train': 1.505582571029663} +02/24/2022 20:27:57 - INFO - codeparrot_training - Step 14592: {'lr': 0.00041979628570841776, 'samples': 7471616, 'steps': 14592, 'loss/train': 2.2861671447753906} +02/24/2022 20:28:02 - INFO - codeparrot_training - Step 14593: {'lr': 0.00041978427583796654, 'samples': 7472128, 'steps': 14593, 'loss/train': 1.7498937845230103} +02/24/2022 20:28:06 - INFO - codeparrot_training - Step 14594: {'lr': 0.00041977226524021337, 'samples': 7472640, 'steps': 14594, 'loss/train': 3.7761788368225098} +02/24/2022 20:28:11 - INFO - codeparrot_training - Step 14595: {'lr': 0.0004197602539152098, 'samples': 7473152, 'steps': 14595, 'loss/train': 1.72860586643219} +02/24/2022 20:28:15 - INFO - codeparrot_training - Step 14596: {'lr': 0.00041974824186300706, 'samples': 7473664, 'steps': 14596, 'loss/train': 1.5018503665924072} +02/24/2022 20:28:20 - INFO - codeparrot_training - Step 14597: {'lr': 0.0004197362290836569, 'samples': 7474176, 'steps': 14597, 'loss/train': 2.6293368339538574} +02/24/2022 20:28:24 - INFO - codeparrot_training - Step 14598: {'lr': 0.00041972421557721055, 'samples': 7474688, 'steps': 14598, 'loss/train': 1.987839698791504} +02/24/2022 20:28:30 - INFO - codeparrot_training - Step 14599: {'lr': 0.00041971220134371957, 'samples': 7475200, 'steps': 14599, 'loss/train': 3.2312142848968506} +02/24/2022 20:28:34 - INFO - codeparrot_training - Step 14600: {'lr': 0.00041970018638323546, 'samples': 7475712, 'steps': 14600, 'loss/train': 0.6105589270591736} +02/24/2022 20:28:39 - INFO - codeparrot_training - Step 14601: {'lr': 0.0004196881706958096, 'samples': 7476224, 'steps': 14601, 'loss/train': 1.999027967453003} +02/24/2022 20:28:43 - INFO - codeparrot_training - Step 14602: {'lr': 0.00041967615428149346, 'samples': 7476736, 'steps': 14602, 'loss/train': 1.6984055042266846} +02/24/2022 20:28:48 - INFO - codeparrot_training - Step 14603: {'lr': 0.0004196641371403386, 'samples': 7477248, 'steps': 14603, 'loss/train': 2.222597360610962} +02/24/2022 20:28:52 - INFO - codeparrot_training - Step 14604: {'lr': 0.00041965211927239644, 'samples': 7477760, 'steps': 14604, 'loss/train': 2.177447557449341} +02/24/2022 20:28:57 - INFO - codeparrot_training - Step 14605: {'lr': 0.0004196401006777185, 'samples': 7478272, 'steps': 14605, 'loss/train': 2.0778374671936035} +02/24/2022 20:29:01 - INFO - codeparrot_training - Step 14606: {'lr': 0.00041962808135635624, 'samples': 7478784, 'steps': 14606, 'loss/train': 1.7459238767623901} +02/24/2022 20:29:06 - INFO - codeparrot_training - Step 14607: {'lr': 0.00041961606130836105, 'samples': 7479296, 'steps': 14607, 'loss/train': 1.6056931018829346} +02/24/2022 20:29:10 - INFO - codeparrot_training - Step 14608: {'lr': 0.0004196040405337845, 'samples': 7479808, 'steps': 14608, 'loss/train': 2.2504143714904785} +02/24/2022 20:29:15 - INFO - codeparrot_training - Step 14609: {'lr': 0.0004195920190326782, 'samples': 7480320, 'steps': 14609, 'loss/train': 2.0761241912841797} +02/24/2022 20:29:19 - INFO - codeparrot_training - Step 14610: {'lr': 0.0004195799968050935, 'samples': 7480832, 'steps': 14610, 'loss/train': 3.2137739658355713} +02/24/2022 20:29:24 - INFO - codeparrot_training - Step 14611: {'lr': 0.000419567973851082, 'samples': 7481344, 'steps': 14611, 'loss/train': 2.1327872276306152} +02/24/2022 20:29:28 - INFO - codeparrot_training - Step 14612: {'lr': 0.0004195559501706951, 'samples': 7481856, 'steps': 14612, 'loss/train': 2.407708168029785} +02/24/2022 20:29:33 - INFO - codeparrot_training - Step 14613: {'lr': 0.00041954392576398433, 'samples': 7482368, 'steps': 14613, 'loss/train': 2.487175703048706} +02/24/2022 20:29:37 - INFO - codeparrot_training - Step 14614: {'lr': 0.0004195319006310012, 'samples': 7482880, 'steps': 14614, 'loss/train': 2.634536027908325} +02/24/2022 20:29:43 - INFO - codeparrot_training - Step 14615: {'lr': 0.0004195198747717973, 'samples': 7483392, 'steps': 14615, 'loss/train': 1.9193918704986572} +02/24/2022 20:29:47 - INFO - codeparrot_training - Step 14616: {'lr': 0.00041950784818642404, 'samples': 7483904, 'steps': 14616, 'loss/train': 2.3051199913024902} +02/24/2022 20:29:52 - INFO - codeparrot_training - Step 14617: {'lr': 0.000419495820874933, 'samples': 7484416, 'steps': 14617, 'loss/train': 2.5814459323883057} +02/24/2022 20:29:56 - INFO - codeparrot_training - Step 14618: {'lr': 0.0004194837928373757, 'samples': 7484928, 'steps': 14618, 'loss/train': 1.7086278200149536} +02/24/2022 20:30:01 - INFO - codeparrot_training - Step 14619: {'lr': 0.0004194717640738036, 'samples': 7485440, 'steps': 14619, 'loss/train': 2.177126884460449} +02/24/2022 20:30:05 - INFO - codeparrot_training - Step 14620: {'lr': 0.0004194597345842683, 'samples': 7485952, 'steps': 14620, 'loss/train': 2.3541409969329834} +02/24/2022 20:30:10 - INFO - codeparrot_training - Step 14621: {'lr': 0.00041944770436882134, 'samples': 7486464, 'steps': 14621, 'loss/train': 1.2967948913574219} +02/24/2022 20:30:14 - INFO - codeparrot_training - Step 14622: {'lr': 0.00041943567342751423, 'samples': 7486976, 'steps': 14622, 'loss/train': 1.6909550428390503} +02/24/2022 20:30:19 - INFO - codeparrot_training - Step 14623: {'lr': 0.0004194236417603985, 'samples': 7487488, 'steps': 14623, 'loss/train': 0.7737172245979309} +02/24/2022 20:30:23 - INFO - codeparrot_training - Step 14624: {'lr': 0.0004194116093675256, 'samples': 7488000, 'steps': 14624, 'loss/train': 0.6357579231262207} +02/24/2022 20:30:29 - INFO - codeparrot_training - Step 14625: {'lr': 0.0004193995762489472, 'samples': 7488512, 'steps': 14625, 'loss/train': 1.3895153999328613} +02/24/2022 20:30:33 - INFO - codeparrot_training - Step 14626: {'lr': 0.0004193875424047148, 'samples': 7489024, 'steps': 14626, 'loss/train': 2.646043062210083} +02/24/2022 20:30:38 - INFO - codeparrot_training - Step 14627: {'lr': 0.00041937550783488, 'samples': 7489536, 'steps': 14627, 'loss/train': 2.2098827362060547} +02/24/2022 20:30:42 - INFO - codeparrot_training - Step 14628: {'lr': 0.00041936347253949426, 'samples': 7490048, 'steps': 14628, 'loss/train': 1.5238178968429565} +02/24/2022 20:30:47 - INFO - codeparrot_training - Step 14629: {'lr': 0.00041935143651860917, 'samples': 7490560, 'steps': 14629, 'loss/train': 2.241729736328125} +02/24/2022 20:30:51 - INFO - codeparrot_training - Step 14630: {'lr': 0.0004193393997722764, 'samples': 7491072, 'steps': 14630, 'loss/train': 2.1297357082366943} +02/24/2022 20:30:56 - INFO - codeparrot_training - Step 14631: {'lr': 0.00041932736230054725, 'samples': 7491584, 'steps': 14631, 'loss/train': 2.4730610847473145} +02/24/2022 20:31:00 - INFO - codeparrot_training - Step 14632: {'lr': 0.0004193153241034736, 'samples': 7492096, 'steps': 14632, 'loss/train': 2.0761828422546387} +02/24/2022 20:31:06 - INFO - codeparrot_training - Step 14633: {'lr': 0.00041930328518110675, 'samples': 7492608, 'steps': 14633, 'loss/train': 2.089456796646118} +02/24/2022 20:31:09 - INFO - codeparrot_training - Step 14634: {'lr': 0.0004192912455334985, 'samples': 7493120, 'steps': 14634, 'loss/train': 2.841273069381714} +02/24/2022 20:31:15 - INFO - codeparrot_training - Step 14635: {'lr': 0.0004192792051607002, 'samples': 7493632, 'steps': 14635, 'loss/train': 2.551028251647949} +02/24/2022 20:31:19 - INFO - codeparrot_training - Step 14636: {'lr': 0.00041926716406276367, 'samples': 7494144, 'steps': 14636, 'loss/train': 1.873466968536377} +02/24/2022 20:31:24 - INFO - codeparrot_training - Step 14637: {'lr': 0.0004192551222397402, 'samples': 7494656, 'steps': 14637, 'loss/train': 1.8465358018875122} +02/24/2022 20:31:28 - INFO - codeparrot_training - Step 14638: {'lr': 0.0004192430796916816, 'samples': 7495168, 'steps': 14638, 'loss/train': 2.47739315032959} +02/24/2022 20:31:33 - INFO - codeparrot_training - Step 14639: {'lr': 0.0004192310364186394, 'samples': 7495680, 'steps': 14639, 'loss/train': 1.9412683248519897} +02/24/2022 20:31:37 - INFO - codeparrot_training - Step 14640: {'lr': 0.0004192189924206652, 'samples': 7496192, 'steps': 14640, 'loss/train': 1.650928258895874} +02/24/2022 20:31:42 - INFO - codeparrot_training - Step 14641: {'lr': 0.0004192069476978105, 'samples': 7496704, 'steps': 14641, 'loss/train': 1.1390290260314941} +02/24/2022 20:31:46 - INFO - codeparrot_training - Step 14642: {'lr': 0.000419194902250127, 'samples': 7497216, 'steps': 14642, 'loss/train': 1.85894775390625} +02/24/2022 20:31:51 - INFO - codeparrot_training - Step 14643: {'lr': 0.0004191828560776663, 'samples': 7497728, 'steps': 14643, 'loss/train': 0.8702483773231506} +02/24/2022 20:31:55 - INFO - codeparrot_training - Step 14644: {'lr': 0.00041917080918047996, 'samples': 7498240, 'steps': 14644, 'loss/train': 2.2353756427764893} +02/24/2022 20:32:00 - INFO - codeparrot_training - Step 14645: {'lr': 0.00041915876155861954, 'samples': 7498752, 'steps': 14645, 'loss/train': 1.6367141008377075} +02/24/2022 20:32:04 - INFO - codeparrot_training - Step 14646: {'lr': 0.0004191467132121367, 'samples': 7499264, 'steps': 14646, 'loss/train': 1.5821138620376587} +02/24/2022 20:32:09 - INFO - codeparrot_training - Step 14647: {'lr': 0.00041913466414108315, 'samples': 7499776, 'steps': 14647, 'loss/train': 2.1365838050842285} +02/24/2022 20:32:13 - INFO - codeparrot_training - Step 14648: {'lr': 0.0004191226143455103, 'samples': 7500288, 'steps': 14648, 'loss/train': 1.8549156188964844} +02/24/2022 20:32:19 - INFO - codeparrot_training - Step 14649: {'lr': 0.00041911056382546997, 'samples': 7500800, 'steps': 14649, 'loss/train': 0.9458596706390381} +02/24/2022 20:32:22 - INFO - codeparrot_training - Step 14650: {'lr': 0.00041909851258101357, 'samples': 7501312, 'steps': 14650, 'loss/train': 1.5656195878982544} +02/24/2022 20:32:26 - INFO - codeparrot_training - Step 14651: {'lr': 0.0004190864606121929, 'samples': 7501824, 'steps': 14651, 'loss/train': 3.424837112426758} +02/24/2022 20:32:33 - INFO - codeparrot_training - Step 14652: {'lr': 0.0004190744079190595, 'samples': 7502336, 'steps': 14652, 'loss/train': 1.837128758430481} +02/24/2022 20:32:38 - INFO - codeparrot_training - Step 14653: {'lr': 0.0004190623545016651, 'samples': 7502848, 'steps': 14653, 'loss/train': 2.3164870738983154} +02/24/2022 20:32:42 - INFO - codeparrot_training - Step 14654: {'lr': 0.00041905030036006106, 'samples': 7503360, 'steps': 14654, 'loss/train': 1.6023658514022827} +02/24/2022 20:32:47 - INFO - codeparrot_training - Step 14655: {'lr': 0.00041903824549429936, 'samples': 7503872, 'steps': 14655, 'loss/train': 0.9231467247009277} +02/24/2022 20:32:51 - INFO - codeparrot_training - Step 14656: {'lr': 0.00041902618990443156, 'samples': 7504384, 'steps': 14656, 'loss/train': 2.1635947227478027} +02/24/2022 20:32:57 - INFO - codeparrot_training - Step 14657: {'lr': 0.0004190141335905091, 'samples': 7504896, 'steps': 14657, 'loss/train': 1.8210846185684204} +02/24/2022 20:33:00 - INFO - codeparrot_training - Step 14658: {'lr': 0.0004190020765525838, 'samples': 7505408, 'steps': 14658, 'loss/train': 2.2828598022460938} +02/24/2022 20:33:06 - INFO - codeparrot_training - Step 14659: {'lr': 0.0004189900187907073, 'samples': 7505920, 'steps': 14659, 'loss/train': 1.8248789310455322} +02/24/2022 20:33:09 - INFO - codeparrot_training - Step 14660: {'lr': 0.0004189779603049312, 'samples': 7506432, 'steps': 14660, 'loss/train': 1.1439238786697388} +02/24/2022 20:33:15 - INFO - codeparrot_training - Step 14661: {'lr': 0.00041896590109530713, 'samples': 7506944, 'steps': 14661, 'loss/train': 1.7079750299453735} +02/24/2022 20:33:19 - INFO - codeparrot_training - Step 14662: {'lr': 0.00041895384116188685, 'samples': 7507456, 'steps': 14662, 'loss/train': 1.5416775941848755} +02/24/2022 20:33:24 - INFO - codeparrot_training - Step 14663: {'lr': 0.000418941780504722, 'samples': 7507968, 'steps': 14663, 'loss/train': 1.5117465257644653} +02/24/2022 20:33:28 - INFO - codeparrot_training - Step 14664: {'lr': 0.00041892971912386415, 'samples': 7508480, 'steps': 14664, 'loss/train': 2.0657784938812256} +02/24/2022 20:33:33 - INFO - codeparrot_training - Step 14665: {'lr': 0.000418917657019365, 'samples': 7508992, 'steps': 14665, 'loss/train': 2.7559168338775635} +02/24/2022 20:33:37 - INFO - codeparrot_training - Step 14666: {'lr': 0.0004189055941912763, 'samples': 7509504, 'steps': 14666, 'loss/train': 2.1266298294067383} +02/24/2022 20:33:40 - INFO - codeparrot_training - Step 14667: {'lr': 0.0004188935306396496, 'samples': 7510016, 'steps': 14667, 'loss/train': 2.2662570476531982} +02/24/2022 20:33:46 - INFO - codeparrot_training - Step 14668: {'lr': 0.00041888146636453674, 'samples': 7510528, 'steps': 14668, 'loss/train': 2.0040929317474365} +02/24/2022 20:33:50 - INFO - codeparrot_training - Step 14669: {'lr': 0.0004188694013659892, 'samples': 7511040, 'steps': 14669, 'loss/train': 2.4362144470214844} +02/24/2022 20:33:55 - INFO - codeparrot_training - Step 14670: {'lr': 0.0004188573356440588, 'samples': 7511552, 'steps': 14670, 'loss/train': 1.0447674989700317} +02/24/2022 20:33:59 - INFO - codeparrot_training - Step 14671: {'lr': 0.0004188452691987973, 'samples': 7512064, 'steps': 14671, 'loss/train': 2.4095516204833984} +02/24/2022 20:34:04 - INFO - codeparrot_training - Step 14672: {'lr': 0.0004188332020302561, 'samples': 7512576, 'steps': 14672, 'loss/train': 2.437413215637207} +02/24/2022 20:34:08 - INFO - codeparrot_training - Step 14673: {'lr': 0.0004188211341384872, 'samples': 7513088, 'steps': 14673, 'loss/train': 1.5840586423873901} +02/24/2022 20:34:14 - INFO - codeparrot_training - Step 14674: {'lr': 0.0004188090655235421, 'samples': 7513600, 'steps': 14674, 'loss/train': 3.0262913703918457} +02/24/2022 20:34:19 - INFO - codeparrot_training - Step 14675: {'lr': 0.00041879699618547263, 'samples': 7514112, 'steps': 14675, 'loss/train': 2.727795362472534} +02/24/2022 20:34:23 - INFO - codeparrot_training - Step 14676: {'lr': 0.0004187849261243304, 'samples': 7514624, 'steps': 14676, 'loss/train': 0.6858757734298706} +02/24/2022 20:34:28 - INFO - codeparrot_training - Step 14677: {'lr': 0.0004187728553401671, 'samples': 7515136, 'steps': 14677, 'loss/train': 2.3487091064453125} +02/24/2022 20:34:32 - INFO - codeparrot_training - Step 14678: {'lr': 0.0004187607838330345, 'samples': 7515648, 'steps': 14678, 'loss/train': 2.055164098739624} +02/24/2022 20:34:37 - INFO - codeparrot_training - Step 14679: {'lr': 0.0004187487116029843, 'samples': 7516160, 'steps': 14679, 'loss/train': 2.552825927734375} +02/24/2022 20:34:41 - INFO - codeparrot_training - Step 14680: {'lr': 0.0004187366386500683, 'samples': 7516672, 'steps': 14680, 'loss/train': 2.208420515060425} +02/24/2022 20:34:46 - INFO - codeparrot_training - Step 14681: {'lr': 0.00041872456497433797, 'samples': 7517184, 'steps': 14681, 'loss/train': 2.609623432159424} +02/24/2022 20:34:50 - INFO - codeparrot_training - Step 14682: {'lr': 0.00041871249057584526, 'samples': 7517696, 'steps': 14682, 'loss/train': 2.8586361408233643} +02/24/2022 20:34:57 - INFO - codeparrot_training - Step 14683: {'lr': 0.00041870041545464176, 'samples': 7518208, 'steps': 14683, 'loss/train': 2.1922357082366943} +02/24/2022 20:35:00 - INFO - codeparrot_training - Step 14684: {'lr': 0.00041868833961077935, 'samples': 7518720, 'steps': 14684, 'loss/train': 2.1248395442962646} +02/24/2022 20:35:06 - INFO - codeparrot_training - Step 14685: {'lr': 0.0004186762630443096, 'samples': 7519232, 'steps': 14685, 'loss/train': 1.9496482610702515} +02/24/2022 20:35:09 - INFO - codeparrot_training - Step 14686: {'lr': 0.0004186641857552842, 'samples': 7519744, 'steps': 14686, 'loss/train': 2.205439805984497} +02/24/2022 20:35:15 - INFO - codeparrot_training - Step 14687: {'lr': 0.0004186521077437551, 'samples': 7520256, 'steps': 14687, 'loss/train': 1.977354884147644} +02/24/2022 20:35:18 - INFO - codeparrot_training - Step 14688: {'lr': 0.00041864002900977393, 'samples': 7520768, 'steps': 14688, 'loss/train': 2.500113010406494} +02/24/2022 20:35:24 - INFO - codeparrot_training - Step 14689: {'lr': 0.0004186279495533923, 'samples': 7521280, 'steps': 14689, 'loss/train': 1.9217275381088257} +02/24/2022 20:35:27 - INFO - codeparrot_training - Step 14690: {'lr': 0.0004186158693746622, 'samples': 7521792, 'steps': 14690, 'loss/train': 2.2815613746643066} +02/24/2022 20:35:33 - INFO - codeparrot_training - Step 14691: {'lr': 0.0004186037884736352, 'samples': 7522304, 'steps': 14691, 'loss/train': 2.5176374912261963} +02/24/2022 20:35:37 - INFO - codeparrot_training - Step 14692: {'lr': 0.0004185917068503632, 'samples': 7522816, 'steps': 14692, 'loss/train': 1.900394320487976} +02/24/2022 20:35:40 - INFO - codeparrot_training - Step 14693: {'lr': 0.00041857962450489786, 'samples': 7523328, 'steps': 14693, 'loss/train': 0.40461859107017517} +02/24/2022 20:35:46 - INFO - codeparrot_training - Step 14694: {'lr': 0.0004185675414372908, 'samples': 7523840, 'steps': 14694, 'loss/train': 2.1908669471740723} +02/24/2022 20:35:52 - INFO - codeparrot_training - Step 14695: {'lr': 0.000418555457647594, 'samples': 7524352, 'steps': 14695, 'loss/train': 2.508284330368042} +02/24/2022 20:35:55 - INFO - codeparrot_training - Step 14696: {'lr': 0.00041854337313585913, 'samples': 7524864, 'steps': 14696, 'loss/train': 0.7164973616600037} +02/24/2022 20:36:01 - INFO - codeparrot_training - Step 14697: {'lr': 0.00041853128790213804, 'samples': 7525376, 'steps': 14697, 'loss/train': 1.2111140489578247} +02/24/2022 20:36:05 - INFO - codeparrot_training - Step 14698: {'lr': 0.0004185192019464823, 'samples': 7525888, 'steps': 14698, 'loss/train': 1.7549132108688354} +02/24/2022 20:36:10 - INFO - codeparrot_training - Step 14699: {'lr': 0.0004185071152689439, 'samples': 7526400, 'steps': 14699, 'loss/train': 1.9711965322494507} +02/24/2022 20:36:14 - INFO - codeparrot_training - Step 14700: {'lr': 0.0004184950278695745, 'samples': 7526912, 'steps': 14700, 'loss/train': 1.5679363012313843} +02/24/2022 20:36:19 - INFO - codeparrot_training - Step 14701: {'lr': 0.0004184829397484259, 'samples': 7527424, 'steps': 14701, 'loss/train': 1.7270411252975464} +02/24/2022 20:36:23 - INFO - codeparrot_training - Step 14702: {'lr': 0.00041847085090554985, 'samples': 7527936, 'steps': 14702, 'loss/train': 2.149169921875} +02/24/2022 20:36:28 - INFO - codeparrot_training - Step 14703: {'lr': 0.00041845876134099825, 'samples': 7528448, 'steps': 14703, 'loss/train': 2.5090620517730713} +02/24/2022 20:36:32 - INFO - codeparrot_training - Step 14704: {'lr': 0.0004184466710548227, 'samples': 7528960, 'steps': 14704, 'loss/train': 2.6662349700927734} +02/24/2022 20:36:37 - INFO - codeparrot_training - Step 14705: {'lr': 0.0004184345800470752, 'samples': 7529472, 'steps': 14705, 'loss/train': 2.8832342624664307} +02/24/2022 20:36:41 - INFO - codeparrot_training - Step 14706: {'lr': 0.00041842248831780736, 'samples': 7529984, 'steps': 14706, 'loss/train': 2.292116165161133} +02/24/2022 20:36:47 - INFO - codeparrot_training - Step 14707: {'lr': 0.0004184103958670712, 'samples': 7530496, 'steps': 14707, 'loss/train': 1.8573864698410034} +02/24/2022 20:36:50 - INFO - codeparrot_training - Step 14708: {'lr': 0.00041839830269491823, 'samples': 7531008, 'steps': 14708, 'loss/train': 2.167811870574951} +02/24/2022 20:36:56 - INFO - codeparrot_training - Step 14709: {'lr': 0.00041838620880140046, 'samples': 7531520, 'steps': 14709, 'loss/train': 1.3617523908615112} +02/24/2022 20:36:59 - INFO - codeparrot_training - Step 14710: {'lr': 0.00041837411418656965, 'samples': 7532032, 'steps': 14710, 'loss/train': 0.7553223371505737} +02/24/2022 20:37:05 - INFO - codeparrot_training - Step 14711: {'lr': 0.0004183620188504776, 'samples': 7532544, 'steps': 14711, 'loss/train': 1.7091599702835083} +02/24/2022 20:37:08 - INFO - codeparrot_training - Step 14712: {'lr': 0.0004183499227931761, 'samples': 7533056, 'steps': 14712, 'loss/train': 1.4914801120758057} +02/24/2022 20:37:14 - INFO - codeparrot_training - Step 14713: {'lr': 0.00041833782601471704, 'samples': 7533568, 'steps': 14713, 'loss/train': 1.6754792928695679} +02/24/2022 20:37:17 - INFO - codeparrot_training - Step 14714: {'lr': 0.0004183257285151521, 'samples': 7534080, 'steps': 14714, 'loss/train': 1.8323981761932373} +02/24/2022 20:37:23 - INFO - codeparrot_training - Step 14715: {'lr': 0.00041831363029453327, 'samples': 7534592, 'steps': 14715, 'loss/train': 2.3271398544311523} +02/24/2022 20:37:26 - INFO - codeparrot_training - Step 14716: {'lr': 0.0004183015313529123, 'samples': 7535104, 'steps': 14716, 'loss/train': 2.863957405090332} +02/24/2022 20:37:33 - INFO - codeparrot_training - Step 14717: {'lr': 0.00041828943169034094, 'samples': 7535616, 'steps': 14717, 'loss/train': 1.5282034873962402} +02/24/2022 20:37:36 - INFO - codeparrot_training - Step 14718: {'lr': 0.0004182773313068711, 'samples': 7536128, 'steps': 14718, 'loss/train': 2.154193878173828} +02/24/2022 20:37:42 - INFO - codeparrot_training - Step 14719: {'lr': 0.00041826523020255463, 'samples': 7536640, 'steps': 14719, 'loss/train': 1.8944965600967407} +02/24/2022 20:37:45 - INFO - codeparrot_training - Step 14720: {'lr': 0.00041825312837744333, 'samples': 7537152, 'steps': 14720, 'loss/train': 1.7460154294967651} +02/24/2022 20:37:51 - INFO - codeparrot_training - Step 14721: {'lr': 0.00041824102583158906, 'samples': 7537664, 'steps': 14721, 'loss/train': 3.624465227127075} +02/24/2022 20:37:54 - INFO - codeparrot_training - Step 14722: {'lr': 0.0004182289225650437, 'samples': 7538176, 'steps': 14722, 'loss/train': 2.5229992866516113} +02/24/2022 20:38:00 - INFO - codeparrot_training - Step 14723: {'lr': 0.00041821681857785904, 'samples': 7538688, 'steps': 14723, 'loss/train': 7.863982200622559} +02/24/2022 20:38:03 - INFO - codeparrot_training - Step 14724: {'lr': 0.0004182047138700869, 'samples': 7539200, 'steps': 14724, 'loss/train': 3.5256266593933105} +02/24/2022 20:38:09 - INFO - codeparrot_training - Step 14725: {'lr': 0.0004181926084417792, 'samples': 7539712, 'steps': 14725, 'loss/train': 1.2098037004470825} +02/24/2022 20:38:12 - INFO - codeparrot_training - Step 14726: {'lr': 0.0004181805022929878, 'samples': 7540224, 'steps': 14726, 'loss/train': 1.508366584777832} +02/24/2022 20:38:18 - INFO - codeparrot_training - Step 14727: {'lr': 0.0004181683954237645, 'samples': 7540736, 'steps': 14727, 'loss/train': 2.637098550796509} +02/24/2022 20:38:22 - INFO - codeparrot_training - Step 14728: {'lr': 0.00041815628783416117, 'samples': 7541248, 'steps': 14728, 'loss/train': 2.6529407501220703} +02/24/2022 20:38:27 - INFO - codeparrot_training - Step 14729: {'lr': 0.00041814417952422975, 'samples': 7541760, 'steps': 14729, 'loss/train': 1.285556674003601} +02/24/2022 20:38:31 - INFO - codeparrot_training - Step 14730: {'lr': 0.000418132070494022, 'samples': 7542272, 'steps': 14730, 'loss/train': 0.8838563561439514} +02/24/2022 20:38:37 - INFO - codeparrot_training - Step 14731: {'lr': 0.00041811996074358993, 'samples': 7542784, 'steps': 14731, 'loss/train': 2.138936996459961} +02/24/2022 20:38:40 - INFO - codeparrot_training - Step 14732: {'lr': 0.00041810785027298524, 'samples': 7543296, 'steps': 14732, 'loss/train': 2.455162763595581} +02/24/2022 20:38:46 - INFO - codeparrot_training - Step 14733: {'lr': 0.00041809573908225997, 'samples': 7543808, 'steps': 14733, 'loss/train': 2.405764579772949} +02/24/2022 20:38:49 - INFO - codeparrot_training - Step 14734: {'lr': 0.00041808362717146594, 'samples': 7544320, 'steps': 14734, 'loss/train': 1.3611860275268555} +02/24/2022 20:38:55 - INFO - codeparrot_training - Step 14735: {'lr': 0.00041807151454065493, 'samples': 7544832, 'steps': 14735, 'loss/train': 1.5316896438598633} +02/24/2022 20:38:58 - INFO - codeparrot_training - Step 14736: {'lr': 0.00041805940118987904, 'samples': 7545344, 'steps': 14736, 'loss/train': 1.4424585103988647} +02/24/2022 20:39:04 - INFO - codeparrot_training - Step 14737: {'lr': 0.0004180472871191899, 'samples': 7545856, 'steps': 14737, 'loss/train': 2.603076219558716} +02/24/2022 20:39:07 - INFO - codeparrot_training - Step 14738: {'lr': 0.0004180351723286396, 'samples': 7546368, 'steps': 14738, 'loss/train': 2.431905508041382} +02/24/2022 20:39:13 - INFO - codeparrot_training - Step 14739: {'lr': 0.00041802305681828007, 'samples': 7546880, 'steps': 14739, 'loss/train': 2.0364010334014893} +02/24/2022 20:39:16 - INFO - codeparrot_training - Step 14740: {'lr': 0.00041801094058816304, 'samples': 7547392, 'steps': 14740, 'loss/train': 2.643197774887085} +02/24/2022 20:39:22 - INFO - codeparrot_training - Step 14741: {'lr': 0.0004179988236383405, 'samples': 7547904, 'steps': 14741, 'loss/train': 1.226442575454712} +02/24/2022 20:39:25 - INFO - codeparrot_training - Step 14742: {'lr': 0.00041798670596886433, 'samples': 7548416, 'steps': 14742, 'loss/train': 2.3600006103515625} +02/24/2022 20:39:31 - INFO - codeparrot_training - Step 14743: {'lr': 0.00041797458757978647, 'samples': 7548928, 'steps': 14743, 'loss/train': 1.5221009254455566} +02/24/2022 20:39:34 - INFO - codeparrot_training - Step 14744: {'lr': 0.0004179624684711588, 'samples': 7549440, 'steps': 14744, 'loss/train': 2.237016439437866} +02/24/2022 20:39:40 - INFO - codeparrot_training - Step 14745: {'lr': 0.0004179503486430333, 'samples': 7549952, 'steps': 14745, 'loss/train': 1.6865696907043457} +02/24/2022 20:39:44 - INFO - codeparrot_training - Step 14746: {'lr': 0.00041793822809546176, 'samples': 7550464, 'steps': 14746, 'loss/train': 1.893465518951416} +02/24/2022 20:39:50 - INFO - codeparrot_training - Step 14747: {'lr': 0.0004179261068284963, 'samples': 7550976, 'steps': 14747, 'loss/train': 1.814063310623169} +02/24/2022 20:39:53 - INFO - codeparrot_training - Step 14748: {'lr': 0.00041791398484218855, 'samples': 7551488, 'steps': 14748, 'loss/train': 1.319667100906372} +02/24/2022 20:39:59 - INFO - codeparrot_training - Step 14749: {'lr': 0.0004179018621365908, 'samples': 7552000, 'steps': 14749, 'loss/train': 0.8811789155006409} +02/24/2022 20:40:02 - INFO - codeparrot_training - Step 14750: {'lr': 0.00041788973871175465, 'samples': 7552512, 'steps': 14750, 'loss/train': 2.1467764377593994} +02/24/2022 20:40:08 - INFO - codeparrot_training - Step 14751: {'lr': 0.00041787761456773214, 'samples': 7553024, 'steps': 14751, 'loss/train': 2.9769864082336426} +02/24/2022 20:40:11 - INFO - codeparrot_training - Step 14752: {'lr': 0.00041786548970457535, 'samples': 7553536, 'steps': 14752, 'loss/train': 2.068859815597534} +02/24/2022 20:40:17 - INFO - codeparrot_training - Step 14753: {'lr': 0.000417853364122336, 'samples': 7554048, 'steps': 14753, 'loss/train': 2.2789199352264404} +02/24/2022 20:40:20 - INFO - codeparrot_training - Step 14754: {'lr': 0.0004178412378210662, 'samples': 7554560, 'steps': 14754, 'loss/train': 3.3096373081207275} +02/24/2022 20:40:26 - INFO - codeparrot_training - Step 14755: {'lr': 0.0004178291108008179, 'samples': 7555072, 'steps': 14755, 'loss/train': 0.914291501045227} +02/24/2022 20:40:30 - INFO - codeparrot_training - Step 14756: {'lr': 0.00041781698306164283, 'samples': 7555584, 'steps': 14756, 'loss/train': 1.2013415098190308} +02/24/2022 20:40:35 - INFO - codeparrot_training - Step 14757: {'lr': 0.0004178048546035932, 'samples': 7556096, 'steps': 14757, 'loss/train': 1.8637973070144653} +02/24/2022 20:40:39 - INFO - codeparrot_training - Step 14758: {'lr': 0.00041779272542672086, 'samples': 7556608, 'steps': 14758, 'loss/train': 1.29334557056427} +02/24/2022 20:40:44 - INFO - codeparrot_training - Step 14759: {'lr': 0.00041778059553107766, 'samples': 7557120, 'steps': 14759, 'loss/train': 2.095696449279785} +02/24/2022 20:40:48 - INFO - codeparrot_training - Step 14760: {'lr': 0.00041776846491671575, 'samples': 7557632, 'steps': 14760, 'loss/train': 1.901415228843689} +02/24/2022 20:40:53 - INFO - codeparrot_training - Step 14761: {'lr': 0.000417756333583687, 'samples': 7558144, 'steps': 14761, 'loss/train': 2.9715511798858643} +02/24/2022 20:40:57 - INFO - codeparrot_training - Step 14762: {'lr': 0.0004177442015320434, 'samples': 7558656, 'steps': 14762, 'loss/train': 1.8797039985656738} +02/24/2022 20:41:02 - INFO - codeparrot_training - Step 14763: {'lr': 0.0004177320687618369, 'samples': 7559168, 'steps': 14763, 'loss/train': 2.5225167274475098} +02/24/2022 20:41:06 - INFO - codeparrot_training - Step 14764: {'lr': 0.0004177199352731194, 'samples': 7559680, 'steps': 14764, 'loss/train': 2.6675233840942383} +02/24/2022 20:41:12 - INFO - codeparrot_training - Step 14765: {'lr': 0.0004177078010659431, 'samples': 7560192, 'steps': 14765, 'loss/train': 7.817892074584961} +02/24/2022 20:41:16 - INFO - codeparrot_training - Step 14766: {'lr': 0.0004176956661403597, 'samples': 7560704, 'steps': 14766, 'loss/train': 2.0508601665496826} +02/24/2022 20:41:19 - INFO - codeparrot_training - Step 14767: {'lr': 0.0004176835304964214, 'samples': 7561216, 'steps': 14767, 'loss/train': 1.1477147340774536} +02/24/2022 20:41:25 - INFO - codeparrot_training - Step 14768: {'lr': 0.00041767139413418, 'samples': 7561728, 'steps': 14768, 'loss/train': 2.1556835174560547} +02/24/2022 20:41:28 - INFO - codeparrot_training - Step 14769: {'lr': 0.00041765925705368766, 'samples': 7562240, 'steps': 14769, 'loss/train': 1.9438164234161377} +02/24/2022 20:41:34 - INFO - codeparrot_training - Step 14770: {'lr': 0.00041764711925499633, 'samples': 7562752, 'steps': 14770, 'loss/train': 2.140820026397705} +02/24/2022 20:41:37 - INFO - codeparrot_training - Step 14771: {'lr': 0.0004176349807381579, 'samples': 7563264, 'steps': 14771, 'loss/train': 2.330643892288208} +02/24/2022 20:41:43 - INFO - codeparrot_training - Step 14772: {'lr': 0.0004176228415032245, 'samples': 7563776, 'steps': 14772, 'loss/train': 1.9728665351867676} +02/24/2022 20:41:46 - INFO - codeparrot_training - Step 14773: {'lr': 0.000417610701550248, 'samples': 7564288, 'steps': 14773, 'loss/train': 2.3594679832458496} +02/24/2022 20:41:52 - INFO - codeparrot_training - Step 14774: {'lr': 0.0004175985608792806, 'samples': 7564800, 'steps': 14774, 'loss/train': 2.7506489753723145} +02/24/2022 20:41:55 - INFO - codeparrot_training - Step 14775: {'lr': 0.00041758641949037414, 'samples': 7565312, 'steps': 14775, 'loss/train': 1.4936597347259521} +02/24/2022 20:42:02 - INFO - codeparrot_training - Step 14776: {'lr': 0.00041757427738358066, 'samples': 7565824, 'steps': 14776, 'loss/train': 2.4827866554260254} +02/24/2022 20:42:05 - INFO - codeparrot_training - Step 14777: {'lr': 0.00041756213455895215, 'samples': 7566336, 'steps': 14777, 'loss/train': 1.9582109451293945} +02/24/2022 20:42:11 - INFO - codeparrot_training - Step 14778: {'lr': 0.00041754999101654066, 'samples': 7566848, 'steps': 14778, 'loss/train': 2.7278366088867188} +02/24/2022 20:42:14 - INFO - codeparrot_training - Step 14779: {'lr': 0.0004175378467563983, 'samples': 7567360, 'steps': 14779, 'loss/train': 1.2823923826217651} +02/24/2022 20:42:20 - INFO - codeparrot_training - Step 14780: {'lr': 0.00041752570177857695, 'samples': 7567872, 'steps': 14780, 'loss/train': 2.1559641361236572} +02/24/2022 20:42:23 - INFO - codeparrot_training - Step 14781: {'lr': 0.0004175135560831287, 'samples': 7568384, 'steps': 14781, 'loss/train': 1.3038510084152222} +02/24/2022 20:42:29 - INFO - codeparrot_training - Step 14782: {'lr': 0.00041750140967010554, 'samples': 7568896, 'steps': 14782, 'loss/train': 1.337658166885376} +02/24/2022 20:42:32 - INFO - codeparrot_training - Step 14783: {'lr': 0.00041748926253955954, 'samples': 7569408, 'steps': 14783, 'loss/train': 1.157994031906128} +02/24/2022 20:42:38 - INFO - codeparrot_training - Step 14784: {'lr': 0.0004174771146915427, 'samples': 7569920, 'steps': 14784, 'loss/train': 1.7247217893600464} +02/24/2022 20:42:41 - INFO - codeparrot_training - Step 14785: {'lr': 0.00041746496612610705, 'samples': 7570432, 'steps': 14785, 'loss/train': 2.2335851192474365} +02/24/2022 20:42:47 - INFO - codeparrot_training - Step 14786: {'lr': 0.00041745281684330476, 'samples': 7570944, 'steps': 14786, 'loss/train': 1.3352926969528198} +02/24/2022 20:42:50 - INFO - codeparrot_training - Step 14787: {'lr': 0.0004174406668431877, 'samples': 7571456, 'steps': 14787, 'loss/train': 2.600802421569824} +02/24/2022 20:42:56 - INFO - codeparrot_training - Step 14788: {'lr': 0.000417428516125808, 'samples': 7571968, 'steps': 14788, 'loss/train': 1.2635252475738525} +02/24/2022 20:43:02 - INFO - codeparrot_training - Step 14789: {'lr': 0.0004174163646912178, 'samples': 7572480, 'steps': 14789, 'loss/train': 1.9817677736282349} +02/24/2022 20:43:06 - INFO - codeparrot_training - Step 14790: {'lr': 0.0004174042125394689, 'samples': 7572992, 'steps': 14790, 'loss/train': 1.7664518356323242} +02/24/2022 20:43:11 - INFO - codeparrot_training - Step 14791: {'lr': 0.00041739205967061366, 'samples': 7573504, 'steps': 14791, 'loss/train': 1.8326669931411743} +02/24/2022 20:43:15 - INFO - codeparrot_training - Step 14792: {'lr': 0.0004173799060847039, 'samples': 7574016, 'steps': 14792, 'loss/train': 1.4710299968719482} +02/24/2022 20:43:18 - INFO - codeparrot_training - Step 14793: {'lr': 0.00041736775178179174, 'samples': 7574528, 'steps': 14793, 'loss/train': 2.086339235305786} +02/24/2022 20:43:24 - INFO - codeparrot_training - Step 14794: {'lr': 0.0004173555967619294, 'samples': 7575040, 'steps': 14794, 'loss/train': 1.2483069896697998} +02/24/2022 20:43:27 - INFO - codeparrot_training - Step 14795: {'lr': 0.00041734344102516873, 'samples': 7575552, 'steps': 14795, 'loss/train': 1.9300806522369385} +02/24/2022 20:43:33 - INFO - codeparrot_training - Step 14796: {'lr': 0.0004173312845715619, 'samples': 7576064, 'steps': 14796, 'loss/train': 2.8084537982940674} +02/24/2022 20:43:37 - INFO - codeparrot_training - Step 14797: {'lr': 0.000417319127401161, 'samples': 7576576, 'steps': 14797, 'loss/train': 2.187028169631958} +02/24/2022 20:43:42 - INFO - codeparrot_training - Step 14798: {'lr': 0.00041730696951401816, 'samples': 7577088, 'steps': 14798, 'loss/train': 1.3990521430969238} +02/24/2022 20:43:46 - INFO - codeparrot_training - Step 14799: {'lr': 0.00041729481091018527, 'samples': 7577600, 'steps': 14799, 'loss/train': 2.188141107559204} +02/24/2022 20:43:51 - INFO - codeparrot_training - Step 14800: {'lr': 0.0004172826515897146, 'samples': 7578112, 'steps': 14800, 'loss/train': 3.5938236713409424} +02/24/2022 20:43:54 - INFO - codeparrot_training - Step 14801: {'lr': 0.0004172704915526581, 'samples': 7578624, 'steps': 14801, 'loss/train': 2.7196619510650635} +02/24/2022 20:44:01 - INFO - codeparrot_training - Step 14802: {'lr': 0.000417258330799068, 'samples': 7579136, 'steps': 14802, 'loss/train': 2.588388442993164} +02/24/2022 20:44:06 - INFO - codeparrot_training - Step 14803: {'lr': 0.00041724616932899627, 'samples': 7579648, 'steps': 14803, 'loss/train': 2.099151611328125} +02/24/2022 20:44:10 - INFO - codeparrot_training - Step 14804: {'lr': 0.0004172340071424951, 'samples': 7580160, 'steps': 14804, 'loss/train': 0.7498244643211365} +02/24/2022 20:44:15 - INFO - codeparrot_training - Step 14805: {'lr': 0.0004172218442396165, 'samples': 7580672, 'steps': 14805, 'loss/train': 1.2891371250152588} +02/24/2022 20:44:19 - INFO - codeparrot_training - Step 14806: {'lr': 0.00041720968062041266, 'samples': 7581184, 'steps': 14806, 'loss/train': 2.330594062805176} +02/24/2022 20:44:24 - INFO - codeparrot_training - Step 14807: {'lr': 0.0004171975162849356, 'samples': 7581696, 'steps': 14807, 'loss/train': 1.589237928390503} +02/24/2022 20:44:28 - INFO - codeparrot_training - Step 14808: {'lr': 0.0004171853512332375, 'samples': 7582208, 'steps': 14808, 'loss/train': 1.9926923513412476} +02/24/2022 20:44:33 - INFO - codeparrot_training - Step 14809: {'lr': 0.00041717318546537045, 'samples': 7582720, 'steps': 14809, 'loss/train': 2.358306407928467} +02/24/2022 20:44:37 - INFO - codeparrot_training - Step 14810: {'lr': 0.0004171610189813866, 'samples': 7583232, 'steps': 14810, 'loss/train': 2.5473005771636963} +02/24/2022 20:44:43 - INFO - codeparrot_training - Step 14811: {'lr': 0.000417148851781338, 'samples': 7583744, 'steps': 14811, 'loss/train': 2.297606945037842} +02/24/2022 20:44:46 - INFO - codeparrot_training - Step 14812: {'lr': 0.0004171366838652767, 'samples': 7584256, 'steps': 14812, 'loss/train': 2.0433497428894043} +02/24/2022 20:44:52 - INFO - codeparrot_training - Step 14813: {'lr': 0.000417124515233255, 'samples': 7584768, 'steps': 14813, 'loss/train': 1.1630451679229736} +02/24/2022 20:44:55 - INFO - codeparrot_training - Step 14814: {'lr': 0.00041711234588532497, 'samples': 7585280, 'steps': 14814, 'loss/train': 2.403754472732544} +02/24/2022 20:45:01 - INFO - codeparrot_training - Step 14815: {'lr': 0.0004171001758215387, 'samples': 7585792, 'steps': 14815, 'loss/train': 1.3927111625671387} +02/24/2022 20:45:04 - INFO - codeparrot_training - Step 14816: {'lr': 0.0004170880050419483, 'samples': 7586304, 'steps': 14816, 'loss/train': 1.7989267110824585} +02/24/2022 20:45:10 - INFO - codeparrot_training - Step 14817: {'lr': 0.00041707583354660597, 'samples': 7586816, 'steps': 14817, 'loss/train': 2.126197099685669} +02/24/2022 20:45:13 - INFO - codeparrot_training - Step 14818: {'lr': 0.0004170636613355638, 'samples': 7587328, 'steps': 14818, 'loss/train': 1.4171420335769653} +02/24/2022 20:45:19 - INFO - codeparrot_training - Step 14819: {'lr': 0.000417051488408874, 'samples': 7587840, 'steps': 14819, 'loss/train': 1.7377910614013672} +02/24/2022 20:45:22 - INFO - codeparrot_training - Step 14820: {'lr': 0.00041703931476658857, 'samples': 7588352, 'steps': 14820, 'loss/train': 1.913621187210083} +02/24/2022 20:45:29 - INFO - codeparrot_training - Step 14821: {'lr': 0.0004170271404087598, 'samples': 7588864, 'steps': 14821, 'loss/train': 1.0714280605316162} +02/24/2022 20:45:33 - INFO - codeparrot_training - Step 14822: {'lr': 0.0004170149653354398, 'samples': 7589376, 'steps': 14822, 'loss/train': 2.133225679397583} +02/24/2022 20:45:38 - INFO - codeparrot_training - Step 14823: {'lr': 0.0004170027895466807, 'samples': 7589888, 'steps': 14823, 'loss/train': 1.0843403339385986} +02/24/2022 20:45:42 - INFO - codeparrot_training - Step 14824: {'lr': 0.00041699061304253476, 'samples': 7590400, 'steps': 14824, 'loss/train': 2.45759916305542} +02/24/2022 20:45:47 - INFO - codeparrot_training - Step 14825: {'lr': 0.00041697843582305406, 'samples': 7590912, 'steps': 14825, 'loss/train': 0.9926760792732239} +02/24/2022 20:45:51 - INFO - codeparrot_training - Step 14826: {'lr': 0.0004169662578882907, 'samples': 7591424, 'steps': 14826, 'loss/train': 2.2052457332611084} +02/24/2022 20:45:56 - INFO - codeparrot_training - Step 14827: {'lr': 0.0004169540792382969, 'samples': 7591936, 'steps': 14827, 'loss/train': 2.6973533630371094} +02/24/2022 20:46:00 - INFO - codeparrot_training - Step 14828: {'lr': 0.0004169418998731249, 'samples': 7592448, 'steps': 14828, 'loss/train': 2.4513397216796875} +02/24/2022 20:46:05 - INFO - codeparrot_training - Step 14829: {'lr': 0.0004169297197928268, 'samples': 7592960, 'steps': 14829, 'loss/train': 0.5002651214599609} +02/24/2022 20:46:09 - INFO - codeparrot_training - Step 14830: {'lr': 0.0004169175389974548, 'samples': 7593472, 'steps': 14830, 'loss/train': 1.257468581199646} +02/24/2022 20:46:14 - INFO - codeparrot_training - Step 14831: {'lr': 0.0004169053574870609, 'samples': 7593984, 'steps': 14831, 'loss/train': 1.9362249374389648} +02/24/2022 20:46:18 - INFO - codeparrot_training - Step 14832: {'lr': 0.0004168931752616977, 'samples': 7594496, 'steps': 14832, 'loss/train': 2.6308228969573975} +02/24/2022 20:46:23 - INFO - codeparrot_training - Step 14833: {'lr': 0.00041688099232141694, 'samples': 7595008, 'steps': 14833, 'loss/train': 2.483644485473633} +02/24/2022 20:46:27 - INFO - codeparrot_training - Step 14834: {'lr': 0.0004168688086662711, 'samples': 7595520, 'steps': 14834, 'loss/train': 2.298835039138794} +02/24/2022 20:46:32 - INFO - codeparrot_training - Step 14835: {'lr': 0.0004168566242963122, 'samples': 7596032, 'steps': 14835, 'loss/train': 2.0751938819885254} +02/24/2022 20:46:35 - INFO - codeparrot_training - Step 14836: {'lr': 0.00041684443921159253, 'samples': 7596544, 'steps': 14836, 'loss/train': 2.780609607696533} +02/24/2022 20:46:42 - INFO - codeparrot_training - Step 14837: {'lr': 0.00041683225341216426, 'samples': 7597056, 'steps': 14837, 'loss/train': 2.2594830989837646} +02/24/2022 20:46:45 - INFO - codeparrot_training - Step 14838: {'lr': 0.0004168200668980796, 'samples': 7597568, 'steps': 14838, 'loss/train': 1.9914158582687378} +02/24/2022 20:46:51 - INFO - codeparrot_training - Step 14839: {'lr': 0.0004168078796693908, 'samples': 7598080, 'steps': 14839, 'loss/train': 1.541532278060913} +02/24/2022 20:46:54 - INFO - codeparrot_training - Step 14840: {'lr': 0.00041679569172614996, 'samples': 7598592, 'steps': 14840, 'loss/train': 2.2549946308135986} +02/24/2022 20:47:00 - INFO - codeparrot_training - Step 14841: {'lr': 0.0004167835030684093, 'samples': 7599104, 'steps': 14841, 'loss/train': 1.0146172046661377} +02/24/2022 20:47:03 - INFO - codeparrot_training - Step 14842: {'lr': 0.0004167713136962211, 'samples': 7599616, 'steps': 14842, 'loss/train': 1.6750144958496094} +02/24/2022 20:47:09 - INFO - codeparrot_training - Step 14843: {'lr': 0.00041675912360963766, 'samples': 7600128, 'steps': 14843, 'loss/train': 2.6504602432250977} +02/24/2022 20:47:12 - INFO - codeparrot_training - Step 14844: {'lr': 0.0004167469328087109, 'samples': 7600640, 'steps': 14844, 'loss/train': 2.073831796646118} +02/24/2022 20:47:18 - INFO - codeparrot_training - Step 14845: {'lr': 0.0004167347412934933, 'samples': 7601152, 'steps': 14845, 'loss/train': 2.2274749279022217} +02/24/2022 20:47:21 - INFO - codeparrot_training - Step 14846: {'lr': 0.00041672254906403703, 'samples': 7601664, 'steps': 14846, 'loss/train': 1.2622736692428589} +02/24/2022 20:47:27 - INFO - codeparrot_training - Step 14847: {'lr': 0.00041671035612039434, 'samples': 7602176, 'steps': 14847, 'loss/train': 3.036304235458374} +02/24/2022 20:47:31 - INFO - codeparrot_training - Step 14848: {'lr': 0.0004166981624626174, 'samples': 7602688, 'steps': 14848, 'loss/train': 2.4200854301452637} +02/24/2022 20:47:36 - INFO - codeparrot_training - Step 14849: {'lr': 0.00041668596809075835, 'samples': 7603200, 'steps': 14849, 'loss/train': 2.3739476203918457} +02/24/2022 20:47:40 - INFO - codeparrot_training - Step 14850: {'lr': 0.0004166737730048697, 'samples': 7603712, 'steps': 14850, 'loss/train': 1.7012600898742676} +02/24/2022 20:47:45 - INFO - codeparrot_training - Step 14851: {'lr': 0.00041666157720500344, 'samples': 7604224, 'steps': 14851, 'loss/train': 2.3902628421783447} +02/24/2022 20:47:49 - INFO - codeparrot_training - Step 14852: {'lr': 0.00041664938069121195, 'samples': 7604736, 'steps': 14852, 'loss/train': 1.6821991205215454} +02/24/2022 20:47:54 - INFO - codeparrot_training - Step 14853: {'lr': 0.0004166371834635474, 'samples': 7605248, 'steps': 14853, 'loss/train': 2.3953053951263428} +02/24/2022 20:47:58 - INFO - codeparrot_training - Step 14854: {'lr': 0.00041662498552206206, 'samples': 7605760, 'steps': 14854, 'loss/train': 1.5478614568710327} +02/24/2022 20:48:03 - INFO - codeparrot_training - Step 14855: {'lr': 0.00041661278686680827, 'samples': 7606272, 'steps': 14855, 'loss/train': 2.187920570373535} +02/24/2022 20:48:07 - INFO - codeparrot_training - Step 14856: {'lr': 0.00041660058749783813, 'samples': 7606784, 'steps': 14856, 'loss/train': 1.7837194204330444} +02/24/2022 20:48:13 - INFO - codeparrot_training - Step 14857: {'lr': 0.000416588387415204, 'samples': 7607296, 'steps': 14857, 'loss/train': 1.7008607387542725} +02/24/2022 20:48:17 - INFO - codeparrot_training - Step 14858: {'lr': 0.0004165761866189581, 'samples': 7607808, 'steps': 14858, 'loss/train': 1.649808406829834} +02/24/2022 20:48:22 - INFO - codeparrot_training - Step 14859: {'lr': 0.00041656398510915273, 'samples': 7608320, 'steps': 14859, 'loss/train': 1.8994311094284058} +02/24/2022 20:48:26 - INFO - codeparrot_training - Step 14860: {'lr': 0.00041655178288584006, 'samples': 7608832, 'steps': 14860, 'loss/train': 1.8425670862197876} +02/24/2022 20:48:31 - INFO - codeparrot_training - Step 14861: {'lr': 0.00041653957994907255, 'samples': 7609344, 'steps': 14861, 'loss/train': 1.5617280006408691} +02/24/2022 20:48:35 - INFO - codeparrot_training - Step 14862: {'lr': 0.0004165273762989023, 'samples': 7609856, 'steps': 14862, 'loss/train': 1.6858184337615967} +02/24/2022 20:48:40 - INFO - codeparrot_training - Step 14863: {'lr': 0.0004165151719353817, 'samples': 7610368, 'steps': 14863, 'loss/train': 1.5079296827316284} +02/24/2022 20:48:44 - INFO - codeparrot_training - Step 14864: {'lr': 0.0004165029668585629, 'samples': 7610880, 'steps': 14864, 'loss/train': 2.8091301918029785} +02/24/2022 20:48:49 - INFO - codeparrot_training - Step 14865: {'lr': 0.00041649076106849836, 'samples': 7611392, 'steps': 14865, 'loss/train': 1.4334156513214111} +02/24/2022 20:48:55 - INFO - codeparrot_training - Step 14866: {'lr': 0.0004164785545652402, 'samples': 7611904, 'steps': 14866, 'loss/train': 2.2660720348358154} +02/24/2022 20:48:58 - INFO - codeparrot_training - Step 14867: {'lr': 0.0004164663473488408, 'samples': 7612416, 'steps': 14867, 'loss/train': 2.3031933307647705} +02/24/2022 20:49:04 - INFO - codeparrot_training - Step 14868: {'lr': 0.0004164541394193524, 'samples': 7612928, 'steps': 14868, 'loss/train': 1.8336914777755737} +02/24/2022 20:49:08 - INFO - codeparrot_training - Step 14869: {'lr': 0.00041644193077682734, 'samples': 7613440, 'steps': 14869, 'loss/train': 0.2681225836277008} +02/24/2022 20:49:13 - INFO - codeparrot_training - Step 14870: {'lr': 0.0004164297214213179, 'samples': 7613952, 'steps': 14870, 'loss/train': 1.74448823928833} +02/24/2022 20:49:17 - INFO - codeparrot_training - Step 14871: {'lr': 0.0004164175113528763, 'samples': 7614464, 'steps': 14871, 'loss/train': 1.7467695474624634} +02/24/2022 20:49:22 - INFO - codeparrot_training - Step 14872: {'lr': 0.000416405300571555, 'samples': 7614976, 'steps': 14872, 'loss/train': 2.1374170780181885} +02/24/2022 20:49:26 - INFO - codeparrot_training - Step 14873: {'lr': 0.00041639308907740624, 'samples': 7615488, 'steps': 14873, 'loss/train': 2.4194884300231934} +02/24/2022 20:49:31 - INFO - codeparrot_training - Step 14874: {'lr': 0.0004163808768704823, 'samples': 7616000, 'steps': 14874, 'loss/train': 2.1897895336151123} +02/24/2022 20:49:35 - INFO - codeparrot_training - Step 14875: {'lr': 0.0004163686639508356, 'samples': 7616512, 'steps': 14875, 'loss/train': 1.8933578729629517} +02/24/2022 20:49:40 - INFO - codeparrot_training - Step 14876: {'lr': 0.00041635645031851826, 'samples': 7617024, 'steps': 14876, 'loss/train': 1.790216326713562} +02/24/2022 20:49:44 - INFO - codeparrot_training - Step 14877: {'lr': 0.0004163442359735827, 'samples': 7617536, 'steps': 14877, 'loss/train': 1.200398564338684} +02/24/2022 20:49:50 - INFO - codeparrot_training - Step 14878: {'lr': 0.00041633202091608136, 'samples': 7618048, 'steps': 14878, 'loss/train': 2.0175182819366455} +02/24/2022 20:49:53 - INFO - codeparrot_training - Step 14879: {'lr': 0.00041631980514606636, 'samples': 7618560, 'steps': 14879, 'loss/train': 0.13233886659145355} +02/24/2022 20:49:58 - INFO - codeparrot_training - Step 14880: {'lr': 0.0004163075886635902, 'samples': 7619072, 'steps': 14880, 'loss/train': 2.0524795055389404} +02/24/2022 20:50:02 - INFO - codeparrot_training - Step 14881: {'lr': 0.0004162953714687051, 'samples': 7619584, 'steps': 14881, 'loss/train': 1.709363341331482} +02/24/2022 20:50:08 - INFO - codeparrot_training - Step 14882: {'lr': 0.0004162831535614635, 'samples': 7620096, 'steps': 14882, 'loss/train': 3.060720205307007} +02/24/2022 20:50:12 - INFO - codeparrot_training - Step 14883: {'lr': 0.0004162709349419176, 'samples': 7620608, 'steps': 14883, 'loss/train': 1.8922135829925537} +02/24/2022 20:50:17 - INFO - codeparrot_training - Step 14884: {'lr': 0.0004162587156101198, 'samples': 7621120, 'steps': 14884, 'loss/train': 2.6358642578125} +02/24/2022 20:50:21 - INFO - codeparrot_training - Step 14885: {'lr': 0.0004162464955661225, 'samples': 7621632, 'steps': 14885, 'loss/train': 1.724726915359497} +02/24/2022 20:50:24 - INFO - codeparrot_training - Step 14886: {'lr': 0.000416234274809978, 'samples': 7622144, 'steps': 14886, 'loss/train': 0.09583525359630585} +02/24/2022 20:50:30 - INFO - codeparrot_training - Step 14887: {'lr': 0.00041622205334173863, 'samples': 7622656, 'steps': 14887, 'loss/train': 2.0287089347839355} +02/24/2022 20:50:33 - INFO - codeparrot_training - Step 14888: {'lr': 0.00041620983116145673, 'samples': 7623168, 'steps': 14888, 'loss/train': 1.413889765739441} +02/24/2022 20:50:39 - INFO - codeparrot_training - Step 14889: {'lr': 0.00041619760826918474, 'samples': 7623680, 'steps': 14889, 'loss/train': 0.8066677451133728} +02/24/2022 20:50:42 - INFO - codeparrot_training - Step 14890: {'lr': 0.00041618538466497496, 'samples': 7624192, 'steps': 14890, 'loss/train': 1.8946795463562012} +02/24/2022 20:50:48 - INFO - codeparrot_training - Step 14891: {'lr': 0.00041617316034887983, 'samples': 7624704, 'steps': 14891, 'loss/train': 0.7714298367500305} +02/24/2022 20:50:51 - INFO - codeparrot_training - Step 14892: {'lr': 0.00041616093532095155, 'samples': 7625216, 'steps': 14892, 'loss/train': 2.578319787979126} +02/24/2022 20:50:58 - INFO - codeparrot_training - Step 14893: {'lr': 0.00041614870958124264, 'samples': 7625728, 'steps': 14893, 'loss/train': 2.4218244552612305} +02/24/2022 20:51:01 - INFO - codeparrot_training - Step 14894: {'lr': 0.00041613648312980537, 'samples': 7626240, 'steps': 14894, 'loss/train': 0.8149571418762207} +02/24/2022 20:51:07 - INFO - codeparrot_training - Step 14895: {'lr': 0.00041612425596669215, 'samples': 7626752, 'steps': 14895, 'loss/train': 2.4576964378356934} +02/24/2022 20:51:12 - INFO - codeparrot_training - Step 14896: {'lr': 0.0004161120280919554, 'samples': 7627264, 'steps': 14896, 'loss/train': 2.0430126190185547} +02/24/2022 20:51:16 - INFO - codeparrot_training - Step 14897: {'lr': 0.00041609979950564747, 'samples': 7627776, 'steps': 14897, 'loss/train': 1.5276203155517578} +02/24/2022 20:51:21 - INFO - codeparrot_training - Step 14898: {'lr': 0.00041608757020782073, 'samples': 7628288, 'steps': 14898, 'loss/train': 2.11814546585083} +02/24/2022 20:51:25 - INFO - codeparrot_training - Step 14899: {'lr': 0.0004160753401985276, 'samples': 7628800, 'steps': 14899, 'loss/train': 1.3218504190444946} +02/24/2022 20:51:30 - INFO - codeparrot_training - Step 14900: {'lr': 0.00041606310947782046, 'samples': 7629312, 'steps': 14900, 'loss/train': 1.7159701585769653} +02/24/2022 20:51:34 - INFO - codeparrot_training - Step 14901: {'lr': 0.00041605087804575167, 'samples': 7629824, 'steps': 14901, 'loss/train': 2.6790263652801514} +02/24/2022 20:51:39 - INFO - codeparrot_training - Step 14902: {'lr': 0.0004160386459023736, 'samples': 7630336, 'steps': 14902, 'loss/train': 2.3310153484344482} +02/24/2022 20:51:43 - INFO - codeparrot_training - Step 14903: {'lr': 0.00041602641304773876, 'samples': 7630848, 'steps': 14903, 'loss/train': 1.4768877029418945} +02/24/2022 20:51:49 - INFO - codeparrot_training - Step 14904: {'lr': 0.0004160141794818995, 'samples': 7631360, 'steps': 14904, 'loss/train': 1.1713708639144897} +02/24/2022 20:51:53 - INFO - codeparrot_training - Step 14905: {'lr': 0.00041600194520490815, 'samples': 7631872, 'steps': 14905, 'loss/train': 2.23829984664917} +02/24/2022 20:51:58 - INFO - codeparrot_training - Step 14906: {'lr': 0.0004159897102168172, 'samples': 7632384, 'steps': 14906, 'loss/train': 3.2544000148773193} +02/24/2022 20:52:02 - INFO - codeparrot_training - Step 14907: {'lr': 0.00041597747451767905, 'samples': 7632896, 'steps': 14907, 'loss/train': 2.157799243927002} +02/24/2022 20:52:07 - INFO - codeparrot_training - Step 14908: {'lr': 0.00041596523810754607, 'samples': 7633408, 'steps': 14908, 'loss/train': 2.5491750240325928} +02/24/2022 20:52:11 - INFO - codeparrot_training - Step 14909: {'lr': 0.0004159530009864707, 'samples': 7633920, 'steps': 14909, 'loss/train': 0.29711586236953735} +02/24/2022 20:52:16 - INFO - codeparrot_training - Step 14910: {'lr': 0.0004159407631545054, 'samples': 7634432, 'steps': 14910, 'loss/train': 2.1845693588256836} +02/24/2022 20:52:20 - INFO - codeparrot_training - Step 14911: {'lr': 0.0004159285246117026, 'samples': 7634944, 'steps': 14911, 'loss/train': 2.1220433712005615} +02/24/2022 20:52:25 - INFO - codeparrot_training - Step 14912: {'lr': 0.00041591628535811464, 'samples': 7635456, 'steps': 14912, 'loss/train': 1.818053126335144} +02/24/2022 20:52:29 - INFO - codeparrot_training - Step 14913: {'lr': 0.000415904045393794, 'samples': 7635968, 'steps': 14913, 'loss/train': 1.3587634563446045} +02/24/2022 20:52:35 - INFO - codeparrot_training - Step 14914: {'lr': 0.0004158918047187931, 'samples': 7636480, 'steps': 14914, 'loss/train': 2.447115182876587} +02/24/2022 20:52:38 - INFO - codeparrot_training - Step 14915: {'lr': 0.0004158795633331645, 'samples': 7636992, 'steps': 14915, 'loss/train': 2.1404457092285156} +02/24/2022 20:52:44 - INFO - codeparrot_training - Step 14916: {'lr': 0.00041586732123696037, 'samples': 7637504, 'steps': 14916, 'loss/train': 1.956944465637207} +02/24/2022 20:52:47 - INFO - codeparrot_training - Step 14917: {'lr': 0.0004158550784302334, 'samples': 7638016, 'steps': 14917, 'loss/train': 2.7461376190185547} +02/24/2022 20:52:53 - INFO - codeparrot_training - Step 14918: {'lr': 0.0004158428349130359, 'samples': 7638528, 'steps': 14918, 'loss/train': 2.5754482746124268} +02/24/2022 20:52:56 - INFO - codeparrot_training - Step 14919: {'lr': 0.00041583059068542034, 'samples': 7639040, 'steps': 14919, 'loss/train': 1.840137004852295} +02/24/2022 20:53:02 - INFO - codeparrot_training - Step 14920: {'lr': 0.0004158183457474392, 'samples': 7639552, 'steps': 14920, 'loss/train': 0.7040673494338989} +02/24/2022 20:53:05 - INFO - codeparrot_training - Step 14921: {'lr': 0.00041580610009914486, 'samples': 7640064, 'steps': 14921, 'loss/train': 1.3049136400222778} +02/24/2022 20:53:11 - INFO - codeparrot_training - Step 14922: {'lr': 0.00041579385374058996, 'samples': 7640576, 'steps': 14922, 'loss/train': 2.4330427646636963} +02/24/2022 20:53:14 - INFO - codeparrot_training - Step 14923: {'lr': 0.00041578160667182676, 'samples': 7641088, 'steps': 14923, 'loss/train': 1.8912090063095093} +02/24/2022 20:53:20 - INFO - codeparrot_training - Step 14924: {'lr': 0.00041576935889290777, 'samples': 7641600, 'steps': 14924, 'loss/train': 1.8363546133041382} +02/24/2022 20:53:24 - INFO - codeparrot_training - Step 14925: {'lr': 0.0004157571104038856, 'samples': 7642112, 'steps': 14925, 'loss/train': 1.9110095500946045} +02/24/2022 20:53:27 - INFO - codeparrot_training - Step 14926: {'lr': 0.00041574486120481255, 'samples': 7642624, 'steps': 14926, 'loss/train': 1.2370458841323853} +02/24/2022 20:53:33 - INFO - codeparrot_training - Step 14927: {'lr': 0.0004157326112957411, 'samples': 7643136, 'steps': 14927, 'loss/train': 2.8494725227355957} +02/24/2022 20:53:36 - INFO - codeparrot_training - Step 14928: {'lr': 0.0004157203606767238, 'samples': 7643648, 'steps': 14928, 'loss/train': 1.1753263473510742} +02/24/2022 20:53:42 - INFO - codeparrot_training - Step 14929: {'lr': 0.0004157081093478131, 'samples': 7644160, 'steps': 14929, 'loss/train': 1.904313564300537} +02/24/2022 20:53:45 - INFO - codeparrot_training - Step 14930: {'lr': 0.00041569585730906147, 'samples': 7644672, 'steps': 14930, 'loss/train': 1.8031725883483887} +02/24/2022 20:53:51 - INFO - codeparrot_training - Step 14931: {'lr': 0.0004156836045605214, 'samples': 7645184, 'steps': 14931, 'loss/train': 2.3395514488220215} +02/24/2022 20:53:55 - INFO - codeparrot_training - Step 14932: {'lr': 0.0004156713511022454, 'samples': 7645696, 'steps': 14932, 'loss/train': 2.0929887294769287} +02/24/2022 20:54:00 - INFO - codeparrot_training - Step 14933: {'lr': 0.00041565909693428593, 'samples': 7646208, 'steps': 14933, 'loss/train': 1.9599897861480713} +02/24/2022 20:54:04 - INFO - codeparrot_training - Step 14934: {'lr': 0.00041564684205669546, 'samples': 7646720, 'steps': 14934, 'loss/train': 1.6113312244415283} +02/24/2022 20:54:11 - INFO - codeparrot_training - Step 14935: {'lr': 0.00041563458646952655, 'samples': 7647232, 'steps': 14935, 'loss/train': 1.6996887922286987} +02/24/2022 20:54:14 - INFO - codeparrot_training - Step 14936: {'lr': 0.0004156223301728316, 'samples': 7647744, 'steps': 14936, 'loss/train': 2.5070126056671143} +02/24/2022 20:54:20 - INFO - codeparrot_training - Step 14937: {'lr': 0.00041561007316666333, 'samples': 7648256, 'steps': 14937, 'loss/train': 2.176846504211426} +02/24/2022 20:54:23 - INFO - codeparrot_training - Step 14938: {'lr': 0.00041559781545107393, 'samples': 7648768, 'steps': 14938, 'loss/train': 2.6045141220092773} +02/24/2022 20:54:29 - INFO - codeparrot_training - Step 14939: {'lr': 0.00041558555702611615, 'samples': 7649280, 'steps': 14939, 'loss/train': 2.515040874481201} +02/24/2022 20:54:34 - INFO - codeparrot_training - Step 14940: {'lr': 0.0004155732978918424, 'samples': 7649792, 'steps': 14940, 'loss/train': 1.1698956489562988} +02/24/2022 20:54:38 - INFO - codeparrot_training - Step 14941: {'lr': 0.00041556103804830523, 'samples': 7650304, 'steps': 14941, 'loss/train': 1.5823616981506348} +02/24/2022 20:54:43 - INFO - codeparrot_training - Step 14942: {'lr': 0.0004155487774955572, 'samples': 7650816, 'steps': 14942, 'loss/train': 2.547513484954834} +02/24/2022 20:54:47 - INFO - codeparrot_training - Step 14943: {'lr': 0.00041553651623365076, 'samples': 7651328, 'steps': 14943, 'loss/train': 2.0599911212921143} +02/24/2022 20:54:52 - INFO - codeparrot_training - Step 14944: {'lr': 0.00041552425426263836, 'samples': 7651840, 'steps': 14944, 'loss/train': 2.4216253757476807} +02/24/2022 20:54:56 - INFO - codeparrot_training - Step 14945: {'lr': 0.00041551199158257264, 'samples': 7652352, 'steps': 14945, 'loss/train': 1.619296669960022} +02/24/2022 20:55:02 - INFO - codeparrot_training - Step 14946: {'lr': 0.00041549972819350615, 'samples': 7652864, 'steps': 14946, 'loss/train': 2.132974863052368} +02/24/2022 20:55:06 - INFO - codeparrot_training - Step 14947: {'lr': 0.00041548746409549134, 'samples': 7653376, 'steps': 14947, 'loss/train': 1.4858053922653198} +02/24/2022 20:55:11 - INFO - codeparrot_training - Step 14948: {'lr': 0.0004154751992885808, 'samples': 7653888, 'steps': 14948, 'loss/train': 1.736487627029419} +02/24/2022 20:55:15 - INFO - codeparrot_training - Step 14949: {'lr': 0.0004154629337728271, 'samples': 7654400, 'steps': 14949, 'loss/train': 2.457109212875366} +02/24/2022 20:55:20 - INFO - codeparrot_training - Step 14950: {'lr': 0.00041545066754828264, 'samples': 7654912, 'steps': 14950, 'loss/train': 1.6297321319580078} +02/24/2022 20:55:24 - INFO - codeparrot_training - Step 14951: {'lr': 0.00041543840061500007, 'samples': 7655424, 'steps': 14951, 'loss/train': 2.04052996635437} +02/24/2022 20:55:29 - INFO - codeparrot_training - Step 14952: {'lr': 0.000415426132973032, 'samples': 7655936, 'steps': 14952, 'loss/train': 2.7318904399871826} +02/24/2022 20:55:33 - INFO - codeparrot_training - Step 14953: {'lr': 0.0004154138646224308, 'samples': 7656448, 'steps': 14953, 'loss/train': 2.008251428604126} +02/24/2022 20:55:38 - INFO - codeparrot_training - Step 14954: {'lr': 0.0004154015955632492, 'samples': 7656960, 'steps': 14954, 'loss/train': 2.7928543090820312} +02/24/2022 20:55:42 - INFO - codeparrot_training - Step 14955: {'lr': 0.0004153893257955397, 'samples': 7657472, 'steps': 14955, 'loss/train': 1.9538930654525757} +02/24/2022 20:55:48 - INFO - codeparrot_training - Step 14956: {'lr': 0.00041537705531935476, 'samples': 7657984, 'steps': 14956, 'loss/train': 2.53762149810791} +02/24/2022 20:55:51 - INFO - codeparrot_training - Step 14957: {'lr': 0.0004153647841347471, 'samples': 7658496, 'steps': 14957, 'loss/train': 2.2134952545166016} +02/24/2022 20:55:57 - INFO - codeparrot_training - Step 14958: {'lr': 0.0004153525122417692, 'samples': 7659008, 'steps': 14958, 'loss/train': 2.2954773902893066} +02/24/2022 20:56:00 - INFO - codeparrot_training - Step 14959: {'lr': 0.00041534023964047363, 'samples': 7659520, 'steps': 14959, 'loss/train': 2.664625883102417} +02/24/2022 20:56:06 - INFO - codeparrot_training - Step 14960: {'lr': 0.00041532796633091297, 'samples': 7660032, 'steps': 14960, 'loss/train': 2.5796895027160645} +02/24/2022 20:56:09 - INFO - codeparrot_training - Step 14961: {'lr': 0.0004153156923131398, 'samples': 7660544, 'steps': 14961, 'loss/train': 2.0717506408691406} +02/24/2022 20:56:15 - INFO - codeparrot_training - Step 14962: {'lr': 0.0004153034175872067, 'samples': 7661056, 'steps': 14962, 'loss/train': 2.521807909011841} +02/24/2022 20:56:18 - INFO - codeparrot_training - Step 14963: {'lr': 0.00041529114215316633, 'samples': 7661568, 'steps': 14963, 'loss/train': 1.5495537519454956} +02/24/2022 20:56:24 - INFO - codeparrot_training - Step 14964: {'lr': 0.0004152788660110711, 'samples': 7662080, 'steps': 14964, 'loss/train': 1.9410985708236694} +02/24/2022 20:56:27 - INFO - codeparrot_training - Step 14965: {'lr': 0.0004152665891609737, 'samples': 7662592, 'steps': 14965, 'loss/train': 0.3258343040943146} +02/24/2022 20:56:33 - INFO - codeparrot_training - Step 14966: {'lr': 0.0004152543116029267, 'samples': 7663104, 'steps': 14966, 'loss/train': 2.791116952896118} +02/24/2022 20:56:37 - INFO - codeparrot_training - Step 14967: {'lr': 0.0004152420333369827, 'samples': 7663616, 'steps': 14967, 'loss/train': 1.664000153541565} +02/24/2022 20:56:42 - INFO - codeparrot_training - Step 14968: {'lr': 0.00041522975436319445, 'samples': 7664128, 'steps': 14968, 'loss/train': 2.229444980621338} +02/24/2022 20:56:46 - INFO - codeparrot_training - Step 14969: {'lr': 0.00041521747468161417, 'samples': 7664640, 'steps': 14969, 'loss/train': 1.7846778631210327} +02/24/2022 20:56:51 - INFO - codeparrot_training - Step 14970: {'lr': 0.00041520519429229485, 'samples': 7665152, 'steps': 14970, 'loss/train': 0.5800321102142334} +02/24/2022 20:56:55 - INFO - codeparrot_training - Step 14971: {'lr': 0.00041519291319528886, 'samples': 7665664, 'steps': 14971, 'loss/train': 1.9767426252365112} +02/24/2022 20:57:00 - INFO - codeparrot_training - Step 14972: {'lr': 0.00041518063139064893, 'samples': 7666176, 'steps': 14972, 'loss/train': 1.750622272491455} +02/24/2022 20:57:04 - INFO - codeparrot_training - Step 14973: {'lr': 0.0004151683488784276, 'samples': 7666688, 'steps': 14973, 'loss/train': 1.5277704000473022} +02/24/2022 20:57:09 - INFO - codeparrot_training - Step 14974: {'lr': 0.00041515606565867746, 'samples': 7667200, 'steps': 14974, 'loss/train': 1.2477033138275146} +02/24/2022 20:57:13 - INFO - codeparrot_training - Step 14975: {'lr': 0.0004151437817314513, 'samples': 7667712, 'steps': 14975, 'loss/train': 2.1250593662261963} +02/24/2022 20:57:19 - INFO - codeparrot_training - Step 14976: {'lr': 0.00041513149709680155, 'samples': 7668224, 'steps': 14976, 'loss/train': 1.9217205047607422} +02/24/2022 20:57:22 - INFO - codeparrot_training - Step 14977: {'lr': 0.00041511921175478085, 'samples': 7668736, 'steps': 14977, 'loss/train': 1.381117582321167} +02/24/2022 20:57:28 - INFO - codeparrot_training - Step 14978: {'lr': 0.0004151069257054419, 'samples': 7669248, 'steps': 14978, 'loss/train': 2.457695960998535} +02/24/2022 20:57:31 - INFO - codeparrot_training - Step 14979: {'lr': 0.0004150946389488374, 'samples': 7669760, 'steps': 14979, 'loss/train': 0.7559409141540527} +02/24/2022 20:57:37 - INFO - codeparrot_training - Step 14980: {'lr': 0.0004150823514850198, 'samples': 7670272, 'steps': 14980, 'loss/train': 2.514883518218994} +02/24/2022 20:57:41 - INFO - codeparrot_training - Step 14981: {'lr': 0.00041507006331404186, 'samples': 7670784, 'steps': 14981, 'loss/train': 2.1716983318328857} +02/24/2022 20:57:46 - INFO - codeparrot_training - Step 14982: {'lr': 0.00041505777443595615, 'samples': 7671296, 'steps': 14982, 'loss/train': 1.7020424604415894} +02/24/2022 20:57:50 - INFO - codeparrot_training - Step 14983: {'lr': 0.0004150454848508154, 'samples': 7671808, 'steps': 14983, 'loss/train': 2.3099896907806396} +02/24/2022 20:57:55 - INFO - codeparrot_training - Step 14984: {'lr': 0.00041503319455867216, 'samples': 7672320, 'steps': 14984, 'loss/train': 1.9879225492477417} +02/24/2022 20:57:59 - INFO - codeparrot_training - Step 14985: {'lr': 0.0004150209035595791, 'samples': 7672832, 'steps': 14985, 'loss/train': 2.8142597675323486} +02/24/2022 20:58:05 - INFO - codeparrot_training - Step 14986: {'lr': 0.000415008611853589, 'samples': 7673344, 'steps': 14986, 'loss/train': 2.348301649093628} +02/24/2022 20:58:08 - INFO - codeparrot_training - Step 14987: {'lr': 0.0004149963194407543, 'samples': 7673856, 'steps': 14987, 'loss/train': 1.9153321981430054} +02/24/2022 20:58:14 - INFO - codeparrot_training - Step 14988: {'lr': 0.00041498402632112776, 'samples': 7674368, 'steps': 14988, 'loss/train': 1.4021347761154175} +02/24/2022 20:58:17 - INFO - codeparrot_training - Step 14989: {'lr': 0.00041497173249476204, 'samples': 7674880, 'steps': 14989, 'loss/train': 1.3956748247146606} +02/24/2022 20:58:23 - INFO - codeparrot_training - Step 14990: {'lr': 0.0004149594379617099, 'samples': 7675392, 'steps': 14990, 'loss/train': 2.416032075881958} +02/24/2022 20:58:26 - INFO - codeparrot_training - Step 14991: {'lr': 0.00041494714272202385, 'samples': 7675904, 'steps': 14991, 'loss/train': 1.4470218420028687} +02/24/2022 20:58:33 - INFO - codeparrot_training - Step 14992: {'lr': 0.00041493484677575655, 'samples': 7676416, 'steps': 14992, 'loss/train': 3.7259156703948975} +02/24/2022 20:58:36 - INFO - codeparrot_training - Step 14993: {'lr': 0.00041492255012296077, 'samples': 7676928, 'steps': 14993, 'loss/train': 1.637926459312439} +02/24/2022 20:58:42 - INFO - codeparrot_training - Step 14994: {'lr': 0.0004149102527636892, 'samples': 7677440, 'steps': 14994, 'loss/train': 2.3843772411346436} +02/24/2022 20:58:45 - INFO - codeparrot_training - Step 14995: {'lr': 0.0004148979546979944, 'samples': 7677952, 'steps': 14995, 'loss/train': 1.8539739847183228} +02/24/2022 20:58:51 - INFO - codeparrot_training - Step 14996: {'lr': 0.00041488565592592917, 'samples': 7678464, 'steps': 14996, 'loss/train': 1.4838346242904663} +02/24/2022 20:58:54 - INFO - codeparrot_training - Step 14997: {'lr': 0.0004148733564475462, 'samples': 7678976, 'steps': 14997, 'loss/train': 2.3169679641723633} +02/24/2022 20:59:00 - INFO - codeparrot_training - Step 14998: {'lr': 0.000414861056262898, 'samples': 7679488, 'steps': 14998, 'loss/train': 1.7019398212432861} +02/24/2022 20:59:03 - INFO - codeparrot_training - Step 14999: {'lr': 0.0004148487553720375, 'samples': 7680000, 'steps': 14999, 'loss/train': 2.059992551803589} +02/24/2022 20:59:03 - INFO - codeparrot_training - Evaluating and saving model checkpoint