diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -1309,3 +1309,1007 @@ Use FP16 precision: False 02/24/2022 02:50:27 - INFO - codeparrot_training - Step 998: {'lr': 0.0002495, 'samples': 511488, 'steps': 998, 'loss/train': 5.783529758453369} 02/24/2022 02:50:30 - INFO - codeparrot_training - Step 999: {'lr': 0.00024975, 'samples': 512000, 'steps': 999, 'loss/train': 6.577051639556885} 02/24/2022 02:50:30 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 02:52:17 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + * [new branch] floral-grass-11 -> floral-grass-11 + +02/24/2022 02:53:28 - INFO - codeparrot_training - Step 1000: {'lr': 0.00025, 'samples': 512512, 'steps': 1000, 'loss/train': 5.066281795501709} +02/24/2022 02:53:31 - INFO - codeparrot_training - Step 1001: {'lr': 0.00025025, 'samples': 513024, 'steps': 1001, 'loss/train': 4.80372953414917} +02/24/2022 02:53:37 - INFO - codeparrot_training - Step 1002: {'lr': 0.0002505, 'samples': 513536, 'steps': 1002, 'loss/train': 5.167027950286865} +02/24/2022 02:53:40 - INFO - codeparrot_training - Step 1003: {'lr': 0.00025075, 'samples': 514048, 'steps': 1003, 'loss/train': 6.6149187088012695} +02/24/2022 02:53:46 - INFO - codeparrot_training - Step 1004: {'lr': 0.00025100000000000003, 'samples': 514560, 'steps': 1004, 'loss/train': 5.51480770111084} +02/24/2022 02:53:49 - INFO - codeparrot_training - Step 1005: {'lr': 0.00025124999999999995, 'samples': 515072, 'steps': 1005, 'loss/train': 5.671285152435303} +02/24/2022 02:53:55 - INFO - codeparrot_training - Step 1006: {'lr': 0.0002515, 'samples': 515584, 'steps': 1006, 'loss/train': 5.495372772216797} +02/24/2022 02:53:58 - INFO - codeparrot_training - Step 1007: {'lr': 0.00025174999999999997, 'samples': 516096, 'steps': 1007, 'loss/train': 2.7495155334472656} +02/24/2022 02:54:03 - INFO - codeparrot_training - Step 1008: {'lr': 0.000252, 'samples': 516608, 'steps': 1008, 'loss/train': 4.522823810577393} +02/24/2022 02:54:07 - INFO - codeparrot_training - Step 1009: {'lr': 0.00025225, 'samples': 517120, 'steps': 1009, 'loss/train': 5.456524848937988} +02/24/2022 02:54:13 - INFO - codeparrot_training - Step 1010: {'lr': 0.0002525, 'samples': 517632, 'steps': 1010, 'loss/train': 5.721465110778809} +02/24/2022 02:54:16 - INFO - codeparrot_training - Step 1011: {'lr': 0.00025275, 'samples': 518144, 'steps': 1011, 'loss/train': 5.572442054748535} +02/24/2022 02:54:22 - INFO - codeparrot_training - Step 1012: {'lr': 0.000253, 'samples': 518656, 'steps': 1012, 'loss/train': 6.193021297454834} +02/24/2022 02:54:26 - INFO - codeparrot_training - Step 1013: {'lr': 0.00025325, 'samples': 519168, 'steps': 1013, 'loss/train': 5.142810344696045} +02/24/2022 02:54:31 - INFO - codeparrot_training - Step 1014: {'lr': 0.0002535, 'samples': 519680, 'steps': 1014, 'loss/train': 5.904702186584473} +02/24/2022 02:54:35 - INFO - codeparrot_training - Step 1015: {'lr': 0.00025374999999999996, 'samples': 520192, 'steps': 1015, 'loss/train': 5.99049186706543} +02/24/2022 02:54:40 - INFO - codeparrot_training - Step 1016: {'lr': 0.000254, 'samples': 520704, 'steps': 1016, 'loss/train': 5.846545696258545} +02/24/2022 02:54:44 - INFO - codeparrot_training - Step 1017: {'lr': 0.00025425, 'samples': 521216, 'steps': 1017, 'loss/train': 4.6801276206970215} +02/24/2022 02:54:49 - INFO - codeparrot_training - Step 1018: {'lr': 0.0002545, 'samples': 521728, 'steps': 1018, 'loss/train': 4.828365325927734} +02/24/2022 02:54:53 - INFO - codeparrot_training - Step 1019: {'lr': 0.00025475, 'samples': 522240, 'steps': 1019, 'loss/train': 5.723589897155762} +02/24/2022 02:54:59 - INFO - codeparrot_training - Step 1020: {'lr': 0.000255, 'samples': 522752, 'steps': 1020, 'loss/train': 5.405442237854004} +02/24/2022 02:55:02 - INFO - codeparrot_training - Step 1021: {'lr': 0.00025525, 'samples': 523264, 'steps': 1021, 'loss/train': 4.955881595611572} +02/24/2022 02:55:08 - INFO - codeparrot_training - Step 1022: {'lr': 0.00025550000000000003, 'samples': 523776, 'steps': 1022, 'loss/train': 5.056830406188965} +02/24/2022 02:55:11 - INFO - codeparrot_training - Step 1023: {'lr': 0.00025575, 'samples': 524288, 'steps': 1023, 'loss/train': 5.384207248687744} +02/24/2022 02:55:16 - INFO - codeparrot_training - Step 1024: {'lr': 0.000256, 'samples': 524800, 'steps': 1024, 'loss/train': 5.197412490844727} +02/24/2022 02:55:20 - INFO - codeparrot_training - Step 1025: {'lr': 0.00025624999999999997, 'samples': 525312, 'steps': 1025, 'loss/train': 5.356321334838867} +02/24/2022 02:55:25 - INFO - codeparrot_training - Step 1026: {'lr': 0.0002565, 'samples': 525824, 'steps': 1026, 'loss/train': 5.2317585945129395} +02/24/2022 02:55:29 - INFO - codeparrot_training - Step 1027: {'lr': 0.00025675, 'samples': 526336, 'steps': 1027, 'loss/train': 5.499627590179443} +02/24/2022 02:55:34 - INFO - codeparrot_training - Step 1028: {'lr': 0.000257, 'samples': 526848, 'steps': 1028, 'loss/train': 5.996160507202148} +02/24/2022 02:55:38 - INFO - codeparrot_training - Step 1029: {'lr': 0.00025725, 'samples': 527360, 'steps': 1029, 'loss/train': 5.855902671813965} +02/24/2022 02:55:44 - INFO - codeparrot_training - Step 1030: {'lr': 0.0002575, 'samples': 527872, 'steps': 1030, 'loss/train': 5.065313816070557} +02/24/2022 02:55:47 - INFO - codeparrot_training - Step 1031: {'lr': 0.00025775, 'samples': 528384, 'steps': 1031, 'loss/train': 6.560394287109375} +02/24/2022 02:55:53 - INFO - codeparrot_training - Step 1032: {'lr': 0.00025800000000000004, 'samples': 528896, 'steps': 1032, 'loss/train': 4.543503761291504} +02/24/2022 02:55:58 - INFO - codeparrot_training - Step 1033: {'lr': 0.00025824999999999996, 'samples': 529408, 'steps': 1033, 'loss/train': 5.719844341278076} +02/24/2022 02:56:02 - INFO - codeparrot_training - Step 1034: {'lr': 0.0002585, 'samples': 529920, 'steps': 1034, 'loss/train': 4.815674781799316} +02/24/2022 02:56:07 - INFO - codeparrot_training - Step 1035: {'lr': 0.00025875, 'samples': 530432, 'steps': 1035, 'loss/train': 4.833280086517334} +02/24/2022 02:56:11 - INFO - codeparrot_training - Step 1036: {'lr': 0.000259, 'samples': 530944, 'steps': 1036, 'loss/train': 5.193758487701416} +02/24/2022 02:56:16 - INFO - codeparrot_training - Step 1037: {'lr': 0.00025925, 'samples': 531456, 'steps': 1037, 'loss/train': 4.932142734527588} +02/24/2022 02:56:20 - INFO - codeparrot_training - Step 1038: {'lr': 0.0002595, 'samples': 531968, 'steps': 1038, 'loss/train': 5.557000160217285} +02/24/2022 02:56:26 - INFO - codeparrot_training - Step 1039: {'lr': 0.00025975, 'samples': 532480, 'steps': 1039, 'loss/train': 5.681889533996582} +02/24/2022 02:56:29 - INFO - codeparrot_training - Step 1040: {'lr': 0.00026000000000000003, 'samples': 532992, 'steps': 1040, 'loss/train': 4.866218566894531} +02/24/2022 02:56:35 - INFO - codeparrot_training - Step 1041: {'lr': 0.00026025, 'samples': 533504, 'steps': 1041, 'loss/train': 5.507498264312744} +02/24/2022 02:56:38 - INFO - codeparrot_training - Step 1042: {'lr': 0.0002605, 'samples': 534016, 'steps': 1042, 'loss/train': 5.162564754486084} +02/24/2022 02:56:44 - INFO - codeparrot_training - Step 1043: {'lr': 0.00026074999999999997, 'samples': 534528, 'steps': 1043, 'loss/train': 5.068989276885986} +02/24/2022 02:56:47 - INFO - codeparrot_training - Step 1044: {'lr': 0.000261, 'samples': 535040, 'steps': 1044, 'loss/train': 6.276032447814941} +02/24/2022 02:56:53 - INFO - codeparrot_training - Step 1045: {'lr': 0.00026125, 'samples': 535552, 'steps': 1045, 'loss/train': 8.02214527130127} +02/24/2022 02:56:56 - INFO - codeparrot_training - Step 1046: {'lr': 0.0002615, 'samples': 536064, 'steps': 1046, 'loss/train': 5.075617790222168} +02/24/2022 02:57:02 - INFO - codeparrot_training - Step 1047: {'lr': 0.00026175, 'samples': 536576, 'steps': 1047, 'loss/train': 5.763293743133545} +02/24/2022 02:57:05 - INFO - codeparrot_training - Step 1048: {'lr': 0.000262, 'samples': 537088, 'steps': 1048, 'loss/train': 1.702135682106018} +02/24/2022 02:57:11 - INFO - codeparrot_training - Step 1049: {'lr': 0.00026225, 'samples': 537600, 'steps': 1049, 'loss/train': 5.185730934143066} +02/24/2022 02:57:14 - INFO - codeparrot_training - Step 1050: {'lr': 0.00026250000000000004, 'samples': 538112, 'steps': 1050, 'loss/train': 3.9915056228637695} +02/24/2022 02:57:20 - INFO - codeparrot_training - Step 1051: {'lr': 0.00026274999999999996, 'samples': 538624, 'steps': 1051, 'loss/train': 4.6293792724609375} +02/24/2022 02:57:24 - INFO - codeparrot_training - Step 1052: {'lr': 0.000263, 'samples': 539136, 'steps': 1052, 'loss/train': 5.293740749359131} +02/24/2022 02:57:29 - INFO - codeparrot_training - Step 1053: {'lr': 0.00026325, 'samples': 539648, 'steps': 1053, 'loss/train': 5.670673370361328} +02/24/2022 02:57:33 - INFO - codeparrot_training - Step 1054: {'lr': 0.0002635, 'samples': 540160, 'steps': 1054, 'loss/train': 5.470986843109131} +02/24/2022 02:57:38 - INFO - codeparrot_training - Step 1055: {'lr': 0.00026375, 'samples': 540672, 'steps': 1055, 'loss/train': 5.483548164367676} +02/24/2022 02:57:42 - INFO - codeparrot_training - Step 1056: {'lr': 0.000264, 'samples': 541184, 'steps': 1056, 'loss/train': 4.262084007263184} +02/24/2022 02:57:50 - INFO - codeparrot_training - Step 1057: {'lr': 0.00026425, 'samples': 541696, 'steps': 1057, 'loss/train': 5.8530168533325195} +02/24/2022 02:57:53 - INFO - codeparrot_training - Step 1058: {'lr': 0.00026450000000000003, 'samples': 542208, 'steps': 1058, 'loss/train': 4.918515205383301} +02/24/2022 02:57:59 - INFO - codeparrot_training - Step 1059: {'lr': 0.00026475, 'samples': 542720, 'steps': 1059, 'loss/train': 4.118727684020996} +02/24/2022 02:58:02 - INFO - codeparrot_training - Step 1060: {'lr': 0.00026500000000000004, 'samples': 543232, 'steps': 1060, 'loss/train': 6.1179962158203125} +02/24/2022 02:58:08 - INFO - codeparrot_training - Step 1061: {'lr': 0.00026524999999999997, 'samples': 543744, 'steps': 1061, 'loss/train': 4.762015342712402} +02/24/2022 02:58:11 - INFO - codeparrot_training - Step 1062: {'lr': 0.0002655, 'samples': 544256, 'steps': 1062, 'loss/train': 4.535111904144287} +02/24/2022 02:58:17 - INFO - codeparrot_training - Step 1063: {'lr': 0.00026575, 'samples': 544768, 'steps': 1063, 'loss/train': 5.647745132446289} +02/24/2022 02:58:20 - INFO - codeparrot_training - Step 1064: {'lr': 0.000266, 'samples': 545280, 'steps': 1064, 'loss/train': 5.756256580352783} +02/24/2022 02:58:26 - INFO - codeparrot_training - Step 1065: {'lr': 0.00026625, 'samples': 545792, 'steps': 1065, 'loss/train': 5.518280982971191} +02/24/2022 02:58:29 - INFO - codeparrot_training - Step 1066: {'lr': 0.0002665, 'samples': 546304, 'steps': 1066, 'loss/train': 5.746287822723389} +02/24/2022 02:58:36 - INFO - codeparrot_training - Step 1067: {'lr': 0.00026675, 'samples': 546816, 'steps': 1067, 'loss/train': 5.017087936401367} +02/24/2022 02:58:40 - INFO - codeparrot_training - Step 1068: {'lr': 0.00026700000000000004, 'samples': 547328, 'steps': 1068, 'loss/train': 5.298943996429443} +02/24/2022 02:58:45 - INFO - codeparrot_training - Step 1069: {'lr': 0.00026725, 'samples': 547840, 'steps': 1069, 'loss/train': 5.6898298263549805} +02/24/2022 02:58:49 - INFO - codeparrot_training - Step 1070: {'lr': 0.0002675, 'samples': 548352, 'steps': 1070, 'loss/train': 3.6321702003479004} +02/24/2022 02:58:54 - INFO - codeparrot_training - Step 1071: {'lr': 0.00026775, 'samples': 548864, 'steps': 1071, 'loss/train': 5.318291187286377} +02/24/2022 02:58:58 - INFO - codeparrot_training - Step 1072: {'lr': 0.000268, 'samples': 549376, 'steps': 1072, 'loss/train': 5.162319183349609} +02/24/2022 02:59:03 - INFO - codeparrot_training - Step 1073: {'lr': 0.00026825, 'samples': 549888, 'steps': 1073, 'loss/train': 5.279951095581055} +02/24/2022 02:59:07 - INFO - codeparrot_training - Step 1074: {'lr': 0.0002685, 'samples': 550400, 'steps': 1074, 'loss/train': 5.386423110961914} +02/24/2022 02:59:12 - INFO - codeparrot_training - Step 1075: {'lr': 0.00026875, 'samples': 550912, 'steps': 1075, 'loss/train': 5.800024509429932} +02/24/2022 02:59:16 - INFO - codeparrot_training - Step 1076: {'lr': 0.00026900000000000003, 'samples': 551424, 'steps': 1076, 'loss/train': 4.981051445007324} +02/24/2022 02:59:23 - INFO - codeparrot_training - Step 1077: {'lr': 0.00026925, 'samples': 551936, 'steps': 1077, 'loss/train': 5.475964069366455} +02/24/2022 02:59:27 - INFO - codeparrot_training - Step 1078: {'lr': 0.00026950000000000005, 'samples': 552448, 'steps': 1078, 'loss/train': 6.140387535095215} +02/24/2022 02:59:32 - INFO - codeparrot_training - Step 1079: {'lr': 0.00026974999999999997, 'samples': 552960, 'steps': 1079, 'loss/train': 5.8200812339782715} +02/24/2022 02:59:35 - INFO - codeparrot_training - Step 1080: {'lr': 0.00027, 'samples': 553472, 'steps': 1080, 'loss/train': 2.874450206756592} +02/24/2022 02:59:41 - INFO - codeparrot_training - Step 1081: {'lr': 0.00027025, 'samples': 553984, 'steps': 1081, 'loss/train': 5.2649149894714355} +02/24/2022 02:59:44 - INFO - codeparrot_training - Step 1082: {'lr': 0.0002705, 'samples': 554496, 'steps': 1082, 'loss/train': 6.761886119842529} +02/24/2022 02:59:50 - INFO - codeparrot_training - Step 1083: {'lr': 0.00027075, 'samples': 555008, 'steps': 1083, 'loss/train': 4.219944000244141} +02/24/2022 02:59:53 - INFO - codeparrot_training - Step 1084: {'lr': 0.00027100000000000003, 'samples': 555520, 'steps': 1084, 'loss/train': 5.17498779296875} +02/24/2022 02:59:59 - INFO - codeparrot_training - Step 1085: {'lr': 0.00027125, 'samples': 556032, 'steps': 1085, 'loss/train': 6.553275108337402} +02/24/2022 03:00:03 - INFO - codeparrot_training - Step 1086: {'lr': 0.00027150000000000004, 'samples': 556544, 'steps': 1086, 'loss/train': 4.6969404220581055} +02/24/2022 03:00:08 - INFO - codeparrot_training - Step 1087: {'lr': 0.00027175, 'samples': 557056, 'steps': 1087, 'loss/train': 4.942883491516113} +02/24/2022 03:00:11 - INFO - codeparrot_training - Step 1088: {'lr': 0.00027200000000000005, 'samples': 557568, 'steps': 1088, 'loss/train': 4.379235744476318} +02/24/2022 03:00:19 - INFO - codeparrot_training - Step 1089: {'lr': 0.00027225, 'samples': 558080, 'steps': 1089, 'loss/train': 5.86265230178833} +02/24/2022 03:00:22 - INFO - codeparrot_training - Step 1090: {'lr': 0.0002725, 'samples': 558592, 'steps': 1090, 'loss/train': 4.575802803039551} +02/24/2022 03:00:28 - INFO - codeparrot_training - Step 1091: {'lr': 0.00027275, 'samples': 559104, 'steps': 1091, 'loss/train': 1.775154948234558} +02/24/2022 03:00:31 - INFO - codeparrot_training - Step 1092: {'lr': 0.000273, 'samples': 559616, 'steps': 1092, 'loss/train': 5.635185718536377} +02/24/2022 03:00:37 - INFO - codeparrot_training - Step 1093: {'lr': 0.00027325, 'samples': 560128, 'steps': 1093, 'loss/train': 4.302326679229736} +02/24/2022 03:00:40 - INFO - codeparrot_training - Step 1094: {'lr': 0.00027350000000000003, 'samples': 560640, 'steps': 1094, 'loss/train': 6.043479919433594} +02/24/2022 03:00:46 - INFO - codeparrot_training - Step 1095: {'lr': 0.00027375, 'samples': 561152, 'steps': 1095, 'loss/train': 6.240167140960693} +02/24/2022 03:00:49 - INFO - codeparrot_training - Step 1096: {'lr': 0.00027400000000000005, 'samples': 561664, 'steps': 1096, 'loss/train': 6.487496852874756} +02/24/2022 03:00:55 - INFO - codeparrot_training - Step 1097: {'lr': 0.00027425, 'samples': 562176, 'steps': 1097, 'loss/train': 4.986917018890381} +02/24/2022 03:00:58 - INFO - codeparrot_training - Step 1098: {'lr': 0.0002745, 'samples': 562688, 'steps': 1098, 'loss/train': 6.253907680511475} +02/24/2022 03:01:04 - INFO - codeparrot_training - Step 1099: {'lr': 0.00027475, 'samples': 563200, 'steps': 1099, 'loss/train': 5.558638095855713} +02/24/2022 03:01:09 - INFO - codeparrot_training - Step 1100: {'lr': 0.000275, 'samples': 563712, 'steps': 1100, 'loss/train': 6.074194431304932} +02/24/2022 03:01:13 - INFO - codeparrot_training - Step 1101: {'lr': 0.00027525, 'samples': 564224, 'steps': 1101, 'loss/train': 5.0031514167785645} +02/24/2022 03:01:20 - INFO - codeparrot_training - Step 1102: {'lr': 0.00027550000000000003, 'samples': 564736, 'steps': 1102, 'loss/train': 5.181690692901611} +02/24/2022 03:01:23 - INFO - codeparrot_training - Step 1103: {'lr': 0.00027575, 'samples': 565248, 'steps': 1103, 'loss/train': 6.462654113769531} +02/24/2022 03:01:29 - INFO - codeparrot_training - Step 1104: {'lr': 0.00027600000000000004, 'samples': 565760, 'steps': 1104, 'loss/train': 4.807124137878418} +02/24/2022 03:01:32 - INFO - codeparrot_training - Step 1105: {'lr': 0.00027625, 'samples': 566272, 'steps': 1105, 'loss/train': 5.1877007484436035} +02/24/2022 03:01:38 - INFO - codeparrot_training - Step 1106: {'lr': 0.00027650000000000005, 'samples': 566784, 'steps': 1106, 'loss/train': 5.409707069396973} +02/24/2022 03:01:41 - INFO - codeparrot_training - Step 1107: {'lr': 0.00027675, 'samples': 567296, 'steps': 1107, 'loss/train': 5.500791549682617} +02/24/2022 03:01:47 - INFO - codeparrot_training - Step 1108: {'lr': 0.000277, 'samples': 567808, 'steps': 1108, 'loss/train': 4.060062408447266} +02/24/2022 03:01:50 - INFO - codeparrot_training - Step 1109: {'lr': 0.00027725, 'samples': 568320, 'steps': 1109, 'loss/train': 4.874750137329102} +02/24/2022 03:01:56 - INFO - codeparrot_training - Step 1110: {'lr': 0.0002775, 'samples': 568832, 'steps': 1110, 'loss/train': 5.023270130157471} +02/24/2022 03:01:59 - INFO - codeparrot_training - Step 1111: {'lr': 0.00027775, 'samples': 569344, 'steps': 1111, 'loss/train': 5.3346076011657715} +02/24/2022 03:02:05 - INFO - codeparrot_training - Step 1112: {'lr': 0.00027800000000000004, 'samples': 569856, 'steps': 1112, 'loss/train': 4.889978885650635} +02/24/2022 03:02:08 - INFO - codeparrot_training - Step 1113: {'lr': 0.00027825, 'samples': 570368, 'steps': 1113, 'loss/train': 1.7322806119918823} +02/24/2022 03:02:15 - INFO - codeparrot_training - Step 1114: {'lr': 0.00027850000000000005, 'samples': 570880, 'steps': 1114, 'loss/train': 5.020740985870361} +02/24/2022 03:02:19 - INFO - codeparrot_training - Step 1115: {'lr': 0.00027875, 'samples': 571392, 'steps': 1115, 'loss/train': 4.7959160804748535} +02/24/2022 03:02:24 - INFO - codeparrot_training - Step 1116: {'lr': 0.000279, 'samples': 571904, 'steps': 1116, 'loss/train': 5.215575218200684} +02/24/2022 03:02:28 - INFO - codeparrot_training - Step 1117: {'lr': 0.00027925, 'samples': 572416, 'steps': 1117, 'loss/train': 3.991440773010254} +02/24/2022 03:02:33 - INFO - codeparrot_training - Step 1118: {'lr': 0.0002795, 'samples': 572928, 'steps': 1118, 'loss/train': 6.473158359527588} +02/24/2022 03:02:37 - INFO - codeparrot_training - Step 1119: {'lr': 0.00027975, 'samples': 573440, 'steps': 1119, 'loss/train': 4.704736709594727} +02/24/2022 03:02:42 - INFO - codeparrot_training - Step 1120: {'lr': 0.00028000000000000003, 'samples': 573952, 'steps': 1120, 'loss/train': 4.446264743804932} +02/24/2022 03:02:46 - INFO - codeparrot_training - Step 1121: {'lr': 0.00028025, 'samples': 574464, 'steps': 1121, 'loss/train': 4.785529613494873} +02/24/2022 03:02:53 - INFO - codeparrot_training - Step 1122: {'lr': 0.00028050000000000004, 'samples': 574976, 'steps': 1122, 'loss/train': 5.402191162109375} +02/24/2022 03:02:56 - INFO - codeparrot_training - Step 1123: {'lr': 0.00028075, 'samples': 575488, 'steps': 1123, 'loss/train': 5.555760860443115} +02/24/2022 03:03:02 - INFO - codeparrot_training - Step 1124: {'lr': 0.00028100000000000005, 'samples': 576000, 'steps': 1124, 'loss/train': 5.6269989013671875} +02/24/2022 03:03:05 - INFO - codeparrot_training - Step 1125: {'lr': 0.00028125000000000003, 'samples': 576512, 'steps': 1125, 'loss/train': 4.530860424041748} +02/24/2022 03:03:11 - INFO - codeparrot_training - Step 1126: {'lr': 0.00028149999999999996, 'samples': 577024, 'steps': 1126, 'loss/train': 5.3932785987854} +02/24/2022 03:03:14 - INFO - codeparrot_training - Step 1127: {'lr': 0.00028175, 'samples': 577536, 'steps': 1127, 'loss/train': 4.677549362182617} +02/24/2022 03:03:20 - INFO - codeparrot_training - Step 1128: {'lr': 0.00028199999999999997, 'samples': 578048, 'steps': 1128, 'loss/train': 5.662293434143066} +02/24/2022 03:03:24 - INFO - codeparrot_training - Step 1129: {'lr': 0.00028225, 'samples': 578560, 'steps': 1129, 'loss/train': 2.453404426574707} +02/24/2022 03:03:29 - INFO - codeparrot_training - Step 1130: {'lr': 0.0002825, 'samples': 579072, 'steps': 1130, 'loss/train': 4.762810707092285} +02/24/2022 03:03:33 - INFO - codeparrot_training - Step 1131: {'lr': 0.00028275, 'samples': 579584, 'steps': 1131, 'loss/train': 5.110528945922852} +02/24/2022 03:03:38 - INFO - codeparrot_training - Step 1132: {'lr': 0.000283, 'samples': 580096, 'steps': 1132, 'loss/train': 5.49754524230957} +02/24/2022 03:03:41 - INFO - codeparrot_training - Step 1133: {'lr': 0.00028325000000000003, 'samples': 580608, 'steps': 1133, 'loss/train': 5.466548442840576} +02/24/2022 03:03:47 - INFO - codeparrot_training - Step 1134: {'lr': 0.0002835, 'samples': 581120, 'steps': 1134, 'loss/train': 4.763579845428467} +02/24/2022 03:03:53 - INFO - codeparrot_training - Step 1135: {'lr': 0.00028375, 'samples': 581632, 'steps': 1135, 'loss/train': 4.981903076171875} +02/24/2022 03:03:56 - INFO - codeparrot_training - Step 1136: {'lr': 0.00028399999999999996, 'samples': 582144, 'steps': 1136, 'loss/train': 6.5357818603515625} +02/24/2022 03:04:03 - INFO - codeparrot_training - Step 1137: {'lr': 0.00028425, 'samples': 582656, 'steps': 1137, 'loss/train': 5.191011905670166} +02/24/2022 03:04:07 - INFO - codeparrot_training - Step 1138: {'lr': 0.0002845, 'samples': 583168, 'steps': 1138, 'loss/train': 4.785286903381348} +02/24/2022 03:04:12 - INFO - codeparrot_training - Step 1139: {'lr': 0.00028475, 'samples': 583680, 'steps': 1139, 'loss/train': 5.120326995849609} +02/24/2022 03:04:16 - INFO - codeparrot_training - Step 1140: {'lr': 0.000285, 'samples': 584192, 'steps': 1140, 'loss/train': 4.626264572143555} +02/24/2022 03:04:21 - INFO - codeparrot_training - Step 1141: {'lr': 0.00028525, 'samples': 584704, 'steps': 1141, 'loss/train': 6.172025203704834} +02/24/2022 03:04:25 - INFO - codeparrot_training - Step 1142: {'lr': 0.0002855, 'samples': 585216, 'steps': 1142, 'loss/train': 2.3676493167877197} +02/24/2022 03:04:30 - INFO - codeparrot_training - Step 1143: {'lr': 0.00028575000000000003, 'samples': 585728, 'steps': 1143, 'loss/train': 5.581705093383789} +02/24/2022 03:04:34 - INFO - codeparrot_training - Step 1144: {'lr': 0.00028599999999999996, 'samples': 586240, 'steps': 1144, 'loss/train': 3.964090347290039} +02/24/2022 03:04:39 - INFO - codeparrot_training - Step 1145: {'lr': 0.00028625, 'samples': 586752, 'steps': 1145, 'loss/train': 6.0772857666015625} +02/24/2022 03:04:43 - INFO - codeparrot_training - Step 1146: {'lr': 0.00028649999999999997, 'samples': 587264, 'steps': 1146, 'loss/train': 4.500453472137451} +02/24/2022 03:04:48 - INFO - codeparrot_training - Step 1147: {'lr': 0.00028675, 'samples': 587776, 'steps': 1147, 'loss/train': 4.486052989959717} +02/24/2022 03:04:52 - INFO - codeparrot_training - Step 1148: {'lr': 0.000287, 'samples': 588288, 'steps': 1148, 'loss/train': 5.942382335662842} +02/24/2022 03:04:59 - INFO - codeparrot_training - Step 1149: {'lr': 0.00028725, 'samples': 588800, 'steps': 1149, 'loss/train': 5.77067232131958} +02/24/2022 03:05:02 - INFO - codeparrot_training - Step 1150: {'lr': 0.0002875, 'samples': 589312, 'steps': 1150, 'loss/train': 5.469539165496826} +02/24/2022 03:05:08 - INFO - codeparrot_training - Step 1151: {'lr': 0.00028775000000000003, 'samples': 589824, 'steps': 1151, 'loss/train': 6.311186790466309} +02/24/2022 03:05:11 - INFO - codeparrot_training - Step 1152: {'lr': 0.000288, 'samples': 590336, 'steps': 1152, 'loss/train': 4.198432922363281} +02/24/2022 03:05:17 - INFO - codeparrot_training - Step 1153: {'lr': 0.00028825, 'samples': 590848, 'steps': 1153, 'loss/train': 3.5707523822784424} +02/24/2022 03:05:21 - INFO - codeparrot_training - Step 1154: {'lr': 0.00028849999999999997, 'samples': 591360, 'steps': 1154, 'loss/train': 4.79783296585083} +02/24/2022 03:05:26 - INFO - codeparrot_training - Step 1155: {'lr': 0.00028875, 'samples': 591872, 'steps': 1155, 'loss/train': 4.189629077911377} +02/24/2022 03:05:30 - INFO - codeparrot_training - Step 1156: {'lr': 0.000289, 'samples': 592384, 'steps': 1156, 'loss/train': 5.576852321624756} +02/24/2022 03:05:35 - INFO - codeparrot_training - Step 1157: {'lr': 0.00028925, 'samples': 592896, 'steps': 1157, 'loss/train': 6.0749921798706055} +02/24/2022 03:05:38 - INFO - codeparrot_training - Step 1158: {'lr': 0.0002895, 'samples': 593408, 'steps': 1158, 'loss/train': 5.3626556396484375} +02/24/2022 03:05:46 - INFO - codeparrot_training - Step 1159: {'lr': 0.00028975, 'samples': 593920, 'steps': 1159, 'loss/train': 4.907273769378662} +02/24/2022 03:05:49 - INFO - codeparrot_training - Step 1160: {'lr': 0.00029, 'samples': 594432, 'steps': 1160, 'loss/train': 6.00240421295166} +02/24/2022 03:05:53 - INFO - codeparrot_training - Step 1161: {'lr': 0.00029025000000000003, 'samples': 594944, 'steps': 1161, 'loss/train': 2.301327705383301} +02/24/2022 03:05:58 - INFO - codeparrot_training - Step 1162: {'lr': 0.00029049999999999996, 'samples': 595456, 'steps': 1162, 'loss/train': 5.6408281326293945} +02/24/2022 03:06:04 - INFO - codeparrot_training - Step 1163: {'lr': 0.00029075, 'samples': 595968, 'steps': 1163, 'loss/train': 4.322697162628174} +02/24/2022 03:06:07 - INFO - codeparrot_training - Step 1164: {'lr': 0.00029099999999999997, 'samples': 596480, 'steps': 1164, 'loss/train': 4.571609020233154} +02/24/2022 03:06:13 - INFO - codeparrot_training - Step 1165: {'lr': 0.00029125, 'samples': 596992, 'steps': 1165, 'loss/train': 5.720776557922363} +02/24/2022 03:06:16 - INFO - codeparrot_training - Step 1166: {'lr': 0.0002915, 'samples': 597504, 'steps': 1166, 'loss/train': 4.859494686126709} +02/24/2022 03:06:22 - INFO - codeparrot_training - Step 1167: {'lr': 0.00029175, 'samples': 598016, 'steps': 1167, 'loss/train': 5.917038917541504} +02/24/2022 03:06:25 - INFO - codeparrot_training - Step 1168: {'lr': 0.000292, 'samples': 598528, 'steps': 1168, 'loss/train': 5.487188816070557} +02/24/2022 03:06:32 - INFO - codeparrot_training - Step 1169: {'lr': 0.00029225000000000003, 'samples': 599040, 'steps': 1169, 'loss/train': 4.916807174682617} +02/24/2022 03:06:36 - INFO - codeparrot_training - Step 1170: {'lr': 0.0002925, 'samples': 599552, 'steps': 1170, 'loss/train': 4.521228313446045} +02/24/2022 03:06:41 - INFO - codeparrot_training - Step 1171: {'lr': 0.00029275000000000004, 'samples': 600064, 'steps': 1171, 'loss/train': 4.090498447418213} +02/24/2022 03:06:45 - INFO - codeparrot_training - Step 1172: {'lr': 0.00029299999999999997, 'samples': 600576, 'steps': 1172, 'loss/train': 4.494028091430664} +02/24/2022 03:06:50 - INFO - codeparrot_training - Step 1173: {'lr': 0.00029325, 'samples': 601088, 'steps': 1173, 'loss/train': 4.425089359283447} +02/24/2022 03:06:54 - INFO - codeparrot_training - Step 1174: {'lr': 0.0002935, 'samples': 601600, 'steps': 1174, 'loss/train': 5.415894508361816} +02/24/2022 03:06:59 - INFO - codeparrot_training - Step 1175: {'lr': 0.00029375, 'samples': 602112, 'steps': 1175, 'loss/train': 5.765039920806885} +02/24/2022 03:07:03 - INFO - codeparrot_training - Step 1176: {'lr': 0.000294, 'samples': 602624, 'steps': 1176, 'loss/train': 4.922140598297119} +02/24/2022 03:07:08 - INFO - codeparrot_training - Step 1177: {'lr': 0.00029425, 'samples': 603136, 'steps': 1177, 'loss/train': 5.992300987243652} +02/24/2022 03:07:12 - INFO - codeparrot_training - Step 1178: {'lr': 0.0002945, 'samples': 603648, 'steps': 1178, 'loss/train': 4.574462413787842} +02/24/2022 03:07:17 - INFO - codeparrot_training - Step 1179: {'lr': 0.00029475000000000004, 'samples': 604160, 'steps': 1179, 'loss/train': 4.694687366485596} +02/24/2022 03:07:21 - INFO - codeparrot_training - Step 1180: {'lr': 0.000295, 'samples': 604672, 'steps': 1180, 'loss/train': 5.516098499298096} +02/24/2022 03:07:26 - INFO - codeparrot_training - Step 1181: {'lr': 0.00029525, 'samples': 605184, 'steps': 1181, 'loss/train': 5.470821380615234} +02/24/2022 03:07:30 - INFO - codeparrot_training - Step 1182: {'lr': 0.00029549999999999997, 'samples': 605696, 'steps': 1182, 'loss/train': 6.128830432891846} +02/24/2022 03:07:35 - INFO - codeparrot_training - Step 1183: {'lr': 0.00029575, 'samples': 606208, 'steps': 1183, 'loss/train': 5.9133195877075195} +02/24/2022 03:07:38 - INFO - codeparrot_training - Step 1184: {'lr': 0.000296, 'samples': 606720, 'steps': 1184, 'loss/train': 4.757331848144531} +02/24/2022 03:07:46 - INFO - codeparrot_training - Step 1185: {'lr': 0.00029625, 'samples': 607232, 'steps': 1185, 'loss/train': 4.075989246368408} +02/24/2022 03:07:49 - INFO - codeparrot_training - Step 1186: {'lr': 0.0002965, 'samples': 607744, 'steps': 1186, 'loss/train': 4.66351842880249} +02/24/2022 03:07:55 - INFO - codeparrot_training - Step 1187: {'lr': 0.00029675000000000003, 'samples': 608256, 'steps': 1187, 'loss/train': 5.199337482452393} +02/24/2022 03:07:58 - INFO - codeparrot_training - Step 1188: {'lr': 0.000297, 'samples': 608768, 'steps': 1188, 'loss/train': 4.158010482788086} +02/24/2022 03:08:04 - INFO - codeparrot_training - Step 1189: {'lr': 0.00029725000000000004, 'samples': 609280, 'steps': 1189, 'loss/train': 5.813632965087891} +02/24/2022 03:08:07 - INFO - codeparrot_training - Step 1190: {'lr': 0.00029749999999999997, 'samples': 609792, 'steps': 1190, 'loss/train': 5.551846504211426} +02/24/2022 03:08:12 - INFO - codeparrot_training - Step 1191: {'lr': 0.00029775, 'samples': 610304, 'steps': 1191, 'loss/train': 5.164458751678467} +02/24/2022 03:08:16 - INFO - codeparrot_training - Step 1192: {'lr': 0.000298, 'samples': 610816, 'steps': 1192, 'loss/train': 4.805848598480225} +02/24/2022 03:08:22 - INFO - codeparrot_training - Step 1193: {'lr': 0.00029825, 'samples': 611328, 'steps': 1193, 'loss/train': 4.825739860534668} +02/24/2022 03:08:25 - INFO - codeparrot_training - Step 1194: {'lr': 0.0002985, 'samples': 611840, 'steps': 1194, 'loss/train': 4.308709144592285} +02/24/2022 03:08:32 - INFO - codeparrot_training - Step 1195: {'lr': 0.00029875, 'samples': 612352, 'steps': 1195, 'loss/train': 5.1788458824157715} +02/24/2022 03:08:36 - INFO - codeparrot_training - Step 1196: {'lr': 0.000299, 'samples': 612864, 'steps': 1196, 'loss/train': 4.943964004516602} +02/24/2022 03:08:41 - INFO - codeparrot_training - Step 1197: {'lr': 0.00029925000000000004, 'samples': 613376, 'steps': 1197, 'loss/train': 5.083621025085449} +02/24/2022 03:08:45 - INFO - codeparrot_training - Step 1198: {'lr': 0.0002995, 'samples': 613888, 'steps': 1198, 'loss/train': 1.606532335281372} +02/24/2022 03:08:50 - INFO - codeparrot_training - Step 1199: {'lr': 0.00029975000000000005, 'samples': 614400, 'steps': 1199, 'loss/train': 4.5193400382995605} +02/24/2022 03:08:54 - INFO - codeparrot_training - Step 1200: {'lr': 0.0003, 'samples': 614912, 'steps': 1200, 'loss/train': 4.09227991104126} +02/24/2022 03:08:59 - INFO - codeparrot_training - Step 1201: {'lr': 0.00030025, 'samples': 615424, 'steps': 1201, 'loss/train': 5.314067840576172} +02/24/2022 03:09:03 - INFO - codeparrot_training - Step 1202: {'lr': 0.0003005, 'samples': 615936, 'steps': 1202, 'loss/train': 6.291508674621582} +02/24/2022 03:09:08 - INFO - codeparrot_training - Step 1203: {'lr': 0.00030075, 'samples': 616448, 'steps': 1203, 'loss/train': 5.892355918884277} +02/24/2022 03:09:12 - INFO - codeparrot_training - Step 1204: {'lr': 0.000301, 'samples': 616960, 'steps': 1204, 'loss/train': 1.4669965505599976} +02/24/2022 03:09:19 - INFO - codeparrot_training - Step 1205: {'lr': 0.00030125000000000003, 'samples': 617472, 'steps': 1205, 'loss/train': 5.404385566711426} +02/24/2022 03:09:24 - INFO - codeparrot_training - Step 1206: {'lr': 0.0003015, 'samples': 617984, 'steps': 1206, 'loss/train': 4.801541805267334} +02/24/2022 03:09:28 - INFO - codeparrot_training - Step 1207: {'lr': 0.00030175000000000004, 'samples': 618496, 'steps': 1207, 'loss/train': 5.1355414390563965} +02/24/2022 03:09:33 - INFO - codeparrot_training - Step 1208: {'lr': 0.000302, 'samples': 619008, 'steps': 1208, 'loss/train': 4.8629279136657715} +02/24/2022 03:09:37 - INFO - codeparrot_training - Step 1209: {'lr': 0.00030225, 'samples': 619520, 'steps': 1209, 'loss/train': 5.296463966369629} +02/24/2022 03:09:42 - INFO - codeparrot_training - Step 1210: {'lr': 0.0003025, 'samples': 620032, 'steps': 1210, 'loss/train': 5.099849700927734} +02/24/2022 03:09:46 - INFO - codeparrot_training - Step 1211: {'lr': 0.00030275, 'samples': 620544, 'steps': 1211, 'loss/train': 4.357608318328857} +02/24/2022 03:09:51 - INFO - codeparrot_training - Step 1212: {'lr': 0.000303, 'samples': 621056, 'steps': 1212, 'loss/train': 4.461977005004883} +02/24/2022 03:09:54 - INFO - codeparrot_training - Step 1213: {'lr': 0.00030325, 'samples': 621568, 'steps': 1213, 'loss/train': 4.262706756591797} +02/24/2022 03:10:02 - INFO - codeparrot_training - Step 1214: {'lr': 0.0003035, 'samples': 622080, 'steps': 1214, 'loss/train': 4.767970561981201} +02/24/2022 03:10:05 - INFO - codeparrot_training - Step 1215: {'lr': 0.00030375000000000004, 'samples': 622592, 'steps': 1215, 'loss/train': 5.04935884475708} +02/24/2022 03:10:11 - INFO - codeparrot_training - Step 1216: {'lr': 0.000304, 'samples': 623104, 'steps': 1216, 'loss/train': 5.2728447914123535} +02/24/2022 03:10:14 - INFO - codeparrot_training - Step 1217: {'lr': 0.00030425000000000005, 'samples': 623616, 'steps': 1217, 'loss/train': 4.5625457763671875} +02/24/2022 03:10:20 - INFO - codeparrot_training - Step 1218: {'lr': 0.0003045, 'samples': 624128, 'steps': 1218, 'loss/train': 6.0289506912231445} +02/24/2022 03:10:23 - INFO - codeparrot_training - Step 1219: {'lr': 0.00030475, 'samples': 624640, 'steps': 1219, 'loss/train': 4.400296211242676} +02/24/2022 03:10:29 - INFO - codeparrot_training - Step 1220: {'lr': 0.000305, 'samples': 625152, 'steps': 1220, 'loss/train': 4.083799362182617} +02/24/2022 03:10:32 - INFO - codeparrot_training - Step 1221: {'lr': 0.00030525, 'samples': 625664, 'steps': 1221, 'loss/train': 4.796741485595703} +02/24/2022 03:10:38 - INFO - codeparrot_training - Step 1222: {'lr': 0.0003055, 'samples': 626176, 'steps': 1222, 'loss/train': 5.3789896965026855} +02/24/2022 03:10:41 - INFO - codeparrot_training - Step 1223: {'lr': 0.00030575000000000003, 'samples': 626688, 'steps': 1223, 'loss/train': 5.484334945678711} +02/24/2022 03:10:47 - INFO - codeparrot_training - Step 1224: {'lr': 0.000306, 'samples': 627200, 'steps': 1224, 'loss/train': 4.20302677154541} +02/24/2022 03:10:50 - INFO - codeparrot_training - Step 1225: {'lr': 0.00030625000000000004, 'samples': 627712, 'steps': 1225, 'loss/train': 7.1518049240112305} +02/24/2022 03:10:54 - INFO - codeparrot_training - Step 1226: {'lr': 0.0003065, 'samples': 628224, 'steps': 1226, 'loss/train': 4.745423793792725} +02/24/2022 03:11:00 - INFO - codeparrot_training - Step 1227: {'lr': 0.00030675, 'samples': 628736, 'steps': 1227, 'loss/train': 5.093043804168701} +02/24/2022 03:11:06 - INFO - codeparrot_training - Step 1228: {'lr': 0.000307, 'samples': 629248, 'steps': 1228, 'loss/train': 5.976129531860352} +02/24/2022 03:11:09 - INFO - codeparrot_training - Step 1229: {'lr': 0.00030725, 'samples': 629760, 'steps': 1229, 'loss/train': 5.111237049102783} +02/24/2022 03:11:15 - INFO - codeparrot_training - Step 1230: {'lr': 0.0003075, 'samples': 630272, 'steps': 1230, 'loss/train': 10.158740997314453} +02/24/2022 03:11:18 - INFO - codeparrot_training - Step 1231: {'lr': 0.00030775, 'samples': 630784, 'steps': 1231, 'loss/train': 4.465982437133789} +02/24/2022 03:11:24 - INFO - codeparrot_training - Step 1232: {'lr': 0.000308, 'samples': 631296, 'steps': 1232, 'loss/train': 5.0771260261535645} +02/24/2022 03:11:27 - INFO - codeparrot_training - Step 1233: {'lr': 0.00030825000000000004, 'samples': 631808, 'steps': 1233, 'loss/train': 4.414144039154053} +02/24/2022 03:11:33 - INFO - codeparrot_training - Step 1234: {'lr': 0.0003085, 'samples': 632320, 'steps': 1234, 'loss/train': 3.978367805480957} +02/24/2022 03:11:36 - INFO - codeparrot_training - Step 1235: {'lr': 0.00030875000000000005, 'samples': 632832, 'steps': 1235, 'loss/train': 4.054947376251221} +02/24/2022 03:11:42 - INFO - codeparrot_training - Step 1236: {'lr': 0.00030900000000000003, 'samples': 633344, 'steps': 1236, 'loss/train': 4.702358722686768} +02/24/2022 03:11:45 - INFO - codeparrot_training - Step 1237: {'lr': 0.00030925, 'samples': 633856, 'steps': 1237, 'loss/train': 5.610233306884766} +02/24/2022 03:11:51 - INFO - codeparrot_training - Step 1238: {'lr': 0.0003095, 'samples': 634368, 'steps': 1238, 'loss/train': 5.236934661865234} +02/24/2022 03:11:54 - INFO - codeparrot_training - Step 1239: {'lr': 0.00030975, 'samples': 634880, 'steps': 1239, 'loss/train': 5.027810573577881} +02/24/2022 03:12:00 - INFO - codeparrot_training - Step 1240: {'lr': 0.00031, 'samples': 635392, 'steps': 1240, 'loss/train': 4.405148506164551} +02/24/2022 03:12:04 - INFO - codeparrot_training - Step 1241: {'lr': 0.00031025000000000003, 'samples': 635904, 'steps': 1241, 'loss/train': 4.1442670822143555} +02/24/2022 03:12:09 - INFO - codeparrot_training - Step 1242: {'lr': 0.0003105, 'samples': 636416, 'steps': 1242, 'loss/train': 5.2206597328186035} +02/24/2022 03:12:12 - INFO - codeparrot_training - Step 1243: {'lr': 0.00031075000000000005, 'samples': 636928, 'steps': 1243, 'loss/train': 4.97514009475708} +02/24/2022 03:12:18 - INFO - codeparrot_training - Step 1244: {'lr': 0.000311, 'samples': 637440, 'steps': 1244, 'loss/train': 3.7692489624023438} +02/24/2022 03:12:21 - INFO - codeparrot_training - Step 1245: {'lr': 0.00031125000000000006, 'samples': 637952, 'steps': 1245, 'loss/train': 4.279240131378174} +02/24/2022 03:12:27 - INFO - codeparrot_training - Step 1246: {'lr': 0.0003115, 'samples': 638464, 'steps': 1246, 'loss/train': 4.816704750061035} +02/24/2022 03:12:31 - INFO - codeparrot_training - Step 1247: {'lr': 0.00031175, 'samples': 638976, 'steps': 1247, 'loss/train': 4.269006729125977} +02/24/2022 03:12:36 - INFO - codeparrot_training - Step 1248: {'lr': 0.000312, 'samples': 639488, 'steps': 1248, 'loss/train': 4.934327602386475} +02/24/2022 03:12:40 - INFO - codeparrot_training - Step 1249: {'lr': 0.00031225000000000003, 'samples': 640000, 'steps': 1249, 'loss/train': 4.7373552322387695} +02/24/2022 03:12:46 - INFO - codeparrot_training - Step 1250: {'lr': 0.0003125, 'samples': 640512, 'steps': 1250, 'loss/train': 5.402671813964844} +02/24/2022 03:12:49 - INFO - codeparrot_training - Step 1251: {'lr': 0.00031275, 'samples': 641024, 'steps': 1251, 'loss/train': 5.435028553009033} +02/24/2022 03:12:55 - INFO - codeparrot_training - Step 1252: {'lr': 0.000313, 'samples': 641536, 'steps': 1252, 'loss/train': 4.6016435623168945} +02/24/2022 03:12:58 - INFO - codeparrot_training - Step 1253: {'lr': 0.00031325, 'samples': 642048, 'steps': 1253, 'loss/train': 3.774256706237793} +02/24/2022 03:13:04 - INFO - codeparrot_training - Step 1254: {'lr': 0.00031350000000000003, 'samples': 642560, 'steps': 1254, 'loss/train': 3.7604446411132812} +02/24/2022 03:13:07 - INFO - codeparrot_training - Step 1255: {'lr': 0.00031374999999999996, 'samples': 643072, 'steps': 1255, 'loss/train': 6.243778705596924} +02/24/2022 03:13:12 - INFO - codeparrot_training - Step 1256: {'lr': 0.000314, 'samples': 643584, 'steps': 1256, 'loss/train': 5.965482711791992} +02/24/2022 03:13:16 - INFO - codeparrot_training - Step 1257: {'lr': 0.00031424999999999997, 'samples': 644096, 'steps': 1257, 'loss/train': 5.064489841461182} +02/24/2022 03:13:21 - INFO - codeparrot_training - Step 1258: {'lr': 0.0003145, 'samples': 644608, 'steps': 1258, 'loss/train': 4.7462158203125} +02/24/2022 03:13:25 - INFO - codeparrot_training - Step 1259: {'lr': 0.00031475, 'samples': 645120, 'steps': 1259, 'loss/train': 4.177390098571777} +02/24/2022 03:13:31 - INFO - codeparrot_training - Step 1260: {'lr': 0.000315, 'samples': 645632, 'steps': 1260, 'loss/train': 3.2847282886505127} +02/24/2022 03:13:34 - INFO - codeparrot_training - Step 1261: {'lr': 0.00031525, 'samples': 646144, 'steps': 1261, 'loss/train': 2.996202230453491} +02/24/2022 03:13:41 - INFO - codeparrot_training - Step 1262: {'lr': 0.0003155, 'samples': 646656, 'steps': 1262, 'loss/train': 5.227932453155518} +02/24/2022 03:13:44 - INFO - codeparrot_training - Step 1263: {'lr': 0.00031575, 'samples': 647168, 'steps': 1263, 'loss/train': 5.454081058502197} +02/24/2022 03:13:50 - INFO - codeparrot_training - Step 1264: {'lr': 0.000316, 'samples': 647680, 'steps': 1264, 'loss/train': 4.7259626388549805} +02/24/2022 03:13:53 - INFO - codeparrot_training - Step 1265: {'lr': 0.00031624999999999996, 'samples': 648192, 'steps': 1265, 'loss/train': 6.212442874908447} +02/24/2022 03:13:59 - INFO - codeparrot_training - Step 1266: {'lr': 0.0003165, 'samples': 648704, 'steps': 1266, 'loss/train': 4.92418098449707} +02/24/2022 03:14:02 - INFO - codeparrot_training - Step 1267: {'lr': 0.00031675, 'samples': 649216, 'steps': 1267, 'loss/train': 5.104456901550293} +02/24/2022 03:14:08 - INFO - codeparrot_training - Step 1268: {'lr': 0.000317, 'samples': 649728, 'steps': 1268, 'loss/train': 5.967926979064941} +02/24/2022 03:14:11 - INFO - codeparrot_training - Step 1269: {'lr': 0.00031725, 'samples': 650240, 'steps': 1269, 'loss/train': 4.988691806793213} +02/24/2022 03:14:17 - INFO - codeparrot_training - Step 1270: {'lr': 0.0003175, 'samples': 650752, 'steps': 1270, 'loss/train': 5.1615118980407715} +02/24/2022 03:14:20 - INFO - codeparrot_training - Step 1271: {'lr': 0.00031775, 'samples': 651264, 'steps': 1271, 'loss/train': 6.670093059539795} +02/24/2022 03:14:27 - INFO - codeparrot_training - Step 1272: {'lr': 0.00031800000000000003, 'samples': 651776, 'steps': 1272, 'loss/train': 6.492274761199951} +02/24/2022 03:14:30 - INFO - codeparrot_training - Step 1273: {'lr': 0.00031825, 'samples': 652288, 'steps': 1273, 'loss/train': 1.6502529382705688} +02/24/2022 03:14:35 - INFO - codeparrot_training - Step 1274: {'lr': 0.0003185, 'samples': 652800, 'steps': 1274, 'loss/train': 5.201275825500488} +02/24/2022 03:14:39 - INFO - codeparrot_training - Step 1275: {'lr': 0.00031874999999999997, 'samples': 653312, 'steps': 1275, 'loss/train': 2.714742422103882} +02/24/2022 03:14:44 - INFO - codeparrot_training - Step 1276: {'lr': 0.000319, 'samples': 653824, 'steps': 1276, 'loss/train': 1.6181998252868652} +02/24/2022 03:14:48 - INFO - codeparrot_training - Step 1277: {'lr': 0.00031925, 'samples': 654336, 'steps': 1277, 'loss/train': 4.431972026824951} +02/24/2022 03:14:53 - INFO - codeparrot_training - Step 1278: {'lr': 0.0003195, 'samples': 654848, 'steps': 1278, 'loss/train': 5.74703311920166} +02/24/2022 03:14:57 - INFO - codeparrot_training - Step 1279: {'lr': 0.00031975, 'samples': 655360, 'steps': 1279, 'loss/train': 3.972041606903076} +02/24/2022 03:15:02 - INFO - codeparrot_training - Step 1280: {'lr': 0.00032, 'samples': 655872, 'steps': 1280, 'loss/train': 4.932854175567627} +02/24/2022 03:15:06 - INFO - codeparrot_training - Step 1281: {'lr': 0.00032025, 'samples': 656384, 'steps': 1281, 'loss/train': 5.711575508117676} +02/24/2022 03:15:11 - INFO - codeparrot_training - Step 1282: {'lr': 0.00032050000000000004, 'samples': 656896, 'steps': 1282, 'loss/train': 4.543326377868652} +02/24/2022 03:15:15 - INFO - codeparrot_training - Step 1283: {'lr': 0.00032074999999999996, 'samples': 657408, 'steps': 1283, 'loss/train': 3.9023425579071045} +02/24/2022 03:15:20 - INFO - codeparrot_training - Step 1284: {'lr': 0.000321, 'samples': 657920, 'steps': 1284, 'loss/train': 4.438559055328369} +02/24/2022 03:15:24 - INFO - codeparrot_training - Step 1285: {'lr': 0.00032125, 'samples': 658432, 'steps': 1285, 'loss/train': 5.090837001800537} +02/24/2022 03:15:29 - INFO - codeparrot_training - Step 1286: {'lr': 0.0003215, 'samples': 658944, 'steps': 1286, 'loss/train': 3.9492552280426025} +02/24/2022 03:15:32 - INFO - codeparrot_training - Step 1287: {'lr': 0.00032175, 'samples': 659456, 'steps': 1287, 'loss/train': 4.6581549644470215} +02/24/2022 03:15:39 - INFO - codeparrot_training - Step 1288: {'lr': 0.000322, 'samples': 659968, 'steps': 1288, 'loss/train': 5.21675443649292} +02/24/2022 03:15:42 - INFO - codeparrot_training - Step 1289: {'lr': 0.00032225, 'samples': 660480, 'steps': 1289, 'loss/train': 5.406274318695068} +02/24/2022 03:15:48 - INFO - codeparrot_training - Step 1290: {'lr': 0.00032250000000000003, 'samples': 660992, 'steps': 1290, 'loss/train': 5.3217597007751465} +02/24/2022 03:15:51 - INFO - codeparrot_training - Step 1291: {'lr': 0.00032275, 'samples': 661504, 'steps': 1291, 'loss/train': 5.154272556304932} +02/24/2022 03:15:57 - INFO - codeparrot_training - Step 1292: {'lr': 0.000323, 'samples': 662016, 'steps': 1292, 'loss/train': 4.131954193115234} +02/24/2022 03:16:00 - INFO - codeparrot_training - Step 1293: {'lr': 0.00032324999999999997, 'samples': 662528, 'steps': 1293, 'loss/train': 4.235006332397461} +02/24/2022 03:16:06 - INFO - codeparrot_training - Step 1294: {'lr': 0.0003235, 'samples': 663040, 'steps': 1294, 'loss/train': 5.352925777435303} +02/24/2022 03:16:09 - INFO - codeparrot_training - Step 1295: {'lr': 0.00032375, 'samples': 663552, 'steps': 1295, 'loss/train': 4.743350505828857} +02/24/2022 03:16:15 - INFO - codeparrot_training - Step 1296: {'lr': 0.000324, 'samples': 664064, 'steps': 1296, 'loss/train': 5.727532863616943} +02/24/2022 03:16:21 - INFO - codeparrot_training - Step 1297: {'lr': 0.00032425, 'samples': 664576, 'steps': 1297, 'loss/train': 5.753353118896484} +02/24/2022 03:16:25 - INFO - codeparrot_training - Step 1298: {'lr': 0.00032450000000000003, 'samples': 665088, 'steps': 1298, 'loss/train': 4.1519455909729} +02/24/2022 03:16:31 - INFO - codeparrot_training - Step 1299: {'lr': 0.00032475, 'samples': 665600, 'steps': 1299, 'loss/train': 3.3136136531829834} +02/24/2022 03:16:34 - INFO - codeparrot_training - Step 1300: {'lr': 0.00032500000000000004, 'samples': 666112, 'steps': 1300, 'loss/train': 5.631933212280273} +02/24/2022 03:16:39 - INFO - codeparrot_training - Step 1301: {'lr': 0.00032524999999999996, 'samples': 666624, 'steps': 1301, 'loss/train': 3.7394909858703613} +02/24/2022 03:16:43 - INFO - codeparrot_training - Step 1302: {'lr': 0.0003255, 'samples': 667136, 'steps': 1302, 'loss/train': 4.254319190979004} +02/24/2022 03:16:49 - INFO - codeparrot_training - Step 1303: {'lr': 0.00032575, 'samples': 667648, 'steps': 1303, 'loss/train': 4.715116024017334} +02/24/2022 03:16:52 - INFO - codeparrot_training - Step 1304: {'lr': 0.000326, 'samples': 668160, 'steps': 1304, 'loss/train': 4.737923622131348} +02/24/2022 03:16:57 - INFO - codeparrot_training - Step 1305: {'lr': 0.00032625, 'samples': 668672, 'steps': 1305, 'loss/train': 5.72030782699585} +02/24/2022 03:17:01 - INFO - codeparrot_training - Step 1306: {'lr': 0.0003265, 'samples': 669184, 'steps': 1306, 'loss/train': 4.006705284118652} +02/24/2022 03:17:08 - INFO - codeparrot_training - Step 1307: {'lr': 0.00032675, 'samples': 669696, 'steps': 1307, 'loss/train': 5.079216003417969} +02/24/2022 03:17:11 - INFO - codeparrot_training - Step 1308: {'lr': 0.00032700000000000003, 'samples': 670208, 'steps': 1308, 'loss/train': 4.970621109008789} +02/24/2022 03:17:17 - INFO - codeparrot_training - Step 1309: {'lr': 0.00032725, 'samples': 670720, 'steps': 1309, 'loss/train': 3.7042927742004395} +02/24/2022 03:17:20 - INFO - codeparrot_training - Step 1310: {'lr': 0.00032750000000000005, 'samples': 671232, 'steps': 1310, 'loss/train': 1.5207202434539795} +02/24/2022 03:17:26 - INFO - codeparrot_training - Step 1311: {'lr': 0.00032774999999999997, 'samples': 671744, 'steps': 1311, 'loss/train': 4.482362270355225} +02/24/2022 03:17:29 - INFO - codeparrot_training - Step 1312: {'lr': 0.000328, 'samples': 672256, 'steps': 1312, 'loss/train': 4.10011100769043} +02/24/2022 03:17:35 - INFO - codeparrot_training - Step 1313: {'lr': 0.00032825, 'samples': 672768, 'steps': 1313, 'loss/train': 5.036800861358643} +02/24/2022 03:17:38 - INFO - codeparrot_training - Step 1314: {'lr': 0.0003285, 'samples': 673280, 'steps': 1314, 'loss/train': 4.329094886779785} +02/24/2022 03:17:43 - INFO - codeparrot_training - Step 1315: {'lr': 0.00032875, 'samples': 673792, 'steps': 1315, 'loss/train': 5.421314716339111} +02/24/2022 03:17:47 - INFO - codeparrot_training - Step 1316: {'lr': 0.00032900000000000003, 'samples': 674304, 'steps': 1316, 'loss/train': 3.2206647396087646} +02/24/2022 03:17:53 - INFO - codeparrot_training - Step 1317: {'lr': 0.00032925, 'samples': 674816, 'steps': 1317, 'loss/train': 4.149138450622559} +02/24/2022 03:17:57 - INFO - codeparrot_training - Step 1318: {'lr': 0.00032950000000000004, 'samples': 675328, 'steps': 1318, 'loss/train': 4.594919681549072} +02/24/2022 03:18:02 - INFO - codeparrot_training - Step 1319: {'lr': 0.00032975, 'samples': 675840, 'steps': 1319, 'loss/train': 2.8470685482025146} +02/24/2022 03:18:06 - INFO - codeparrot_training - Step 1320: {'lr': 0.00033, 'samples': 676352, 'steps': 1320, 'loss/train': 3.9833788871765137} +02/24/2022 03:18:09 - INFO - codeparrot_training - Step 1321: {'lr': 0.00033025, 'samples': 676864, 'steps': 1321, 'loss/train': 4.424703121185303} +02/24/2022 03:18:15 - INFO - codeparrot_training - Step 1322: {'lr': 0.0003305, 'samples': 677376, 'steps': 1322, 'loss/train': 4.049134254455566} +02/24/2022 03:18:18 - INFO - codeparrot_training - Step 1323: {'lr': 0.00033075, 'samples': 677888, 'steps': 1323, 'loss/train': 4.732639312744141} +02/24/2022 03:18:24 - INFO - codeparrot_training - Step 1324: {'lr': 0.000331, 'samples': 678400, 'steps': 1324, 'loss/train': 5.325099468231201} +02/24/2022 03:18:29 - INFO - codeparrot_training - Step 1325: {'lr': 0.00033125, 'samples': 678912, 'steps': 1325, 'loss/train': 4.985098838806152} +02/24/2022 03:18:32 - INFO - codeparrot_training - Step 1326: {'lr': 0.00033150000000000003, 'samples': 679424, 'steps': 1326, 'loss/train': 5.548741340637207} +02/24/2022 03:18:38 - INFO - codeparrot_training - Step 1327: {'lr': 0.00033175, 'samples': 679936, 'steps': 1327, 'loss/train': 4.608846664428711} +02/24/2022 03:18:41 - INFO - codeparrot_training - Step 1328: {'lr': 0.00033200000000000005, 'samples': 680448, 'steps': 1328, 'loss/train': 6.626372814178467} +02/24/2022 03:18:47 - INFO - codeparrot_training - Step 1329: {'lr': 0.00033224999999999997, 'samples': 680960, 'steps': 1329, 'loss/train': 4.52178955078125} +02/24/2022 03:18:50 - INFO - codeparrot_training - Step 1330: {'lr': 0.0003325, 'samples': 681472, 'steps': 1330, 'loss/train': 4.231669902801514} +02/24/2022 03:18:56 - INFO - codeparrot_training - Step 1331: {'lr': 0.00033275, 'samples': 681984, 'steps': 1331, 'loss/train': 5.275118350982666} +02/24/2022 03:18:59 - INFO - codeparrot_training - Step 1332: {'lr': 0.000333, 'samples': 682496, 'steps': 1332, 'loss/train': 1.3557944297790527} +02/24/2022 03:19:06 - INFO - codeparrot_training - Step 1333: {'lr': 0.00033325, 'samples': 683008, 'steps': 1333, 'loss/train': 4.469725608825684} +02/24/2022 03:19:09 - INFO - codeparrot_training - Step 1334: {'lr': 0.00033350000000000003, 'samples': 683520, 'steps': 1334, 'loss/train': 3.57256817817688} +02/24/2022 03:19:15 - INFO - codeparrot_training - Step 1335: {'lr': 0.00033375, 'samples': 684032, 'steps': 1335, 'loss/train': 4.704814910888672} +02/24/2022 03:19:19 - INFO - codeparrot_training - Step 1336: {'lr': 0.00033400000000000004, 'samples': 684544, 'steps': 1336, 'loss/train': 4.015722274780273} +02/24/2022 03:19:24 - INFO - codeparrot_training - Step 1337: {'lr': 0.00033425, 'samples': 685056, 'steps': 1337, 'loss/train': 4.798593521118164} +02/24/2022 03:19:27 - INFO - codeparrot_training - Step 1338: {'lr': 0.00033450000000000005, 'samples': 685568, 'steps': 1338, 'loss/train': 4.522674560546875} +02/24/2022 03:19:33 - INFO - codeparrot_training - Step 1339: {'lr': 0.00033475, 'samples': 686080, 'steps': 1339, 'loss/train': 4.385733127593994} +02/24/2022 03:19:36 - INFO - codeparrot_training - Step 1340: {'lr': 0.000335, 'samples': 686592, 'steps': 1340, 'loss/train': 5.581892967224121} +02/24/2022 03:19:42 - INFO - codeparrot_training - Step 1341: {'lr': 0.00033525, 'samples': 687104, 'steps': 1341, 'loss/train': 5.506782054901123} +02/24/2022 03:19:45 - INFO - codeparrot_training - Step 1342: {'lr': 0.0003355, 'samples': 687616, 'steps': 1342, 'loss/train': 4.064359664916992} +02/24/2022 03:19:52 - INFO - codeparrot_training - Step 1343: {'lr': 0.00033575, 'samples': 688128, 'steps': 1343, 'loss/train': 6.217005729675293} +02/24/2022 03:19:55 - INFO - codeparrot_training - Step 1344: {'lr': 0.00033600000000000004, 'samples': 688640, 'steps': 1344, 'loss/train': 3.9408841133117676} +02/24/2022 03:20:01 - INFO - codeparrot_training - Step 1345: {'lr': 0.00033625, 'samples': 689152, 'steps': 1345, 'loss/train': 4.430418014526367} +02/24/2022 03:20:04 - INFO - codeparrot_training - Step 1346: {'lr': 0.00033650000000000005, 'samples': 689664, 'steps': 1346, 'loss/train': 3.7917208671569824} +02/24/2022 03:20:10 - INFO - codeparrot_training - Step 1347: {'lr': 0.00033675, 'samples': 690176, 'steps': 1347, 'loss/train': 5.144470691680908} +02/24/2022 03:20:13 - INFO - codeparrot_training - Step 1348: {'lr': 0.000337, 'samples': 690688, 'steps': 1348, 'loss/train': 4.643494129180908} +02/24/2022 03:20:19 - INFO - codeparrot_training - Step 1349: {'lr': 0.00033725, 'samples': 691200, 'steps': 1349, 'loss/train': 5.293832302093506} +02/24/2022 03:20:22 - INFO - codeparrot_training - Step 1350: {'lr': 0.0003375, 'samples': 691712, 'steps': 1350, 'loss/train': 4.75413179397583} +02/24/2022 03:20:28 - INFO - codeparrot_training - Step 1351: {'lr': 0.00033775, 'samples': 692224, 'steps': 1351, 'loss/train': 4.326086044311523} +02/24/2022 03:20:31 - INFO - codeparrot_training - Step 1352: {'lr': 0.00033800000000000003, 'samples': 692736, 'steps': 1352, 'loss/train': 4.840494632720947} +02/24/2022 03:20:37 - INFO - codeparrot_training - Step 1353: {'lr': 0.00033825, 'samples': 693248, 'steps': 1353, 'loss/train': 4.518887996673584} +02/24/2022 03:20:41 - INFO - codeparrot_training - Step 1354: {'lr': 0.00033850000000000004, 'samples': 693760, 'steps': 1354, 'loss/train': 6.481963157653809} +02/24/2022 03:20:46 - INFO - codeparrot_training - Step 1355: {'lr': 0.00033875, 'samples': 694272, 'steps': 1355, 'loss/train': 6.150463104248047} +02/24/2022 03:20:50 - INFO - codeparrot_training - Step 1356: {'lr': 0.00033900000000000005, 'samples': 694784, 'steps': 1356, 'loss/train': 3.5370700359344482} +02/24/2022 03:20:55 - INFO - codeparrot_training - Step 1357: {'lr': 0.00033925, 'samples': 695296, 'steps': 1357, 'loss/train': 4.54744815826416} +02/24/2022 03:20:59 - INFO - codeparrot_training - Step 1358: {'lr': 0.0003395, 'samples': 695808, 'steps': 1358, 'loss/train': 3.866886615753174} +02/24/2022 03:21:04 - INFO - codeparrot_training - Step 1359: {'lr': 0.00033975, 'samples': 696320, 'steps': 1359, 'loss/train': 4.2036848068237305} +02/24/2022 03:21:08 - INFO - codeparrot_training - Step 1360: {'lr': 0.00034, 'samples': 696832, 'steps': 1360, 'loss/train': 4.554596424102783} +02/24/2022 03:21:13 - INFO - codeparrot_training - Step 1361: {'lr': 0.00034025, 'samples': 697344, 'steps': 1361, 'loss/train': 4.297909736633301} +02/24/2022 03:21:17 - INFO - codeparrot_training - Step 1362: {'lr': 0.00034050000000000004, 'samples': 697856, 'steps': 1362, 'loss/train': 4.0271687507629395} +02/24/2022 03:21:23 - INFO - codeparrot_training - Step 1363: {'lr': 0.00034075, 'samples': 698368, 'steps': 1363, 'loss/train': 3.2887182235717773} +02/24/2022 03:21:26 - INFO - codeparrot_training - Step 1364: {'lr': 0.00034100000000000005, 'samples': 698880, 'steps': 1364, 'loss/train': 4.91015625} +02/24/2022 03:21:32 - INFO - codeparrot_training - Step 1365: {'lr': 0.00034125000000000003, 'samples': 699392, 'steps': 1365, 'loss/train': 3.5127220153808594} +02/24/2022 03:21:35 - INFO - codeparrot_training - Step 1366: {'lr': 0.0003415, 'samples': 699904, 'steps': 1366, 'loss/train': 4.21429967880249} +02/24/2022 03:21:41 - INFO - codeparrot_training - Step 1367: {'lr': 0.00034175, 'samples': 700416, 'steps': 1367, 'loss/train': 4.592789173126221} +02/24/2022 03:21:44 - INFO - codeparrot_training - Step 1368: {'lr': 0.000342, 'samples': 700928, 'steps': 1368, 'loss/train': 4.821196556091309} +02/24/2022 03:21:50 - INFO - codeparrot_training - Step 1369: {'lr': 0.00034225, 'samples': 701440, 'steps': 1369, 'loss/train': 4.770508289337158} +02/24/2022 03:21:54 - INFO - codeparrot_training - Step 1370: {'lr': 0.00034250000000000003, 'samples': 701952, 'steps': 1370, 'loss/train': 2.9974818229675293} +02/24/2022 03:21:59 - INFO - codeparrot_training - Step 1371: {'lr': 0.00034275, 'samples': 702464, 'steps': 1371, 'loss/train': 4.845798969268799} +02/24/2022 03:22:02 - INFO - codeparrot_training - Step 1372: {'lr': 0.00034300000000000004, 'samples': 702976, 'steps': 1372, 'loss/train': 7.2618513107299805} +02/24/2022 03:22:08 - INFO - codeparrot_training - Step 1373: {'lr': 0.00034325, 'samples': 703488, 'steps': 1373, 'loss/train': 5.582989692687988} +02/24/2022 03:22:12 - INFO - codeparrot_training - Step 1374: {'lr': 0.00034350000000000006, 'samples': 704000, 'steps': 1374, 'loss/train': 4.1638641357421875} +02/24/2022 03:22:17 - INFO - codeparrot_training - Step 1375: {'lr': 0.00034375, 'samples': 704512, 'steps': 1375, 'loss/train': 4.44398832321167} +02/24/2022 03:22:21 - INFO - codeparrot_training - Step 1376: {'lr': 0.00034399999999999996, 'samples': 705024, 'steps': 1376, 'loss/train': 5.0551042556762695} +02/24/2022 03:22:26 - INFO - codeparrot_training - Step 1377: {'lr': 0.00034425, 'samples': 705536, 'steps': 1377, 'loss/train': 4.392722129821777} +02/24/2022 03:22:29 - INFO - codeparrot_training - Step 1378: {'lr': 0.00034449999999999997, 'samples': 706048, 'steps': 1378, 'loss/train': 3.820713758468628} +02/24/2022 03:22:35 - INFO - codeparrot_training - Step 1379: {'lr': 0.00034475, 'samples': 706560, 'steps': 1379, 'loss/train': 4.039594650268555} +02/24/2022 03:22:39 - INFO - codeparrot_training - Step 1380: {'lr': 0.000345, 'samples': 707072, 'steps': 1380, 'loss/train': 5.738271713256836} +02/24/2022 03:22:44 - INFO - codeparrot_training - Step 1381: {'lr': 0.00034525, 'samples': 707584, 'steps': 1381, 'loss/train': 4.054514408111572} +02/24/2022 03:22:48 - INFO - codeparrot_training - Step 1382: {'lr': 0.0003455, 'samples': 708096, 'steps': 1382, 'loss/train': 3.326011896133423} +02/24/2022 03:22:53 - INFO - codeparrot_training - Step 1383: {'lr': 0.00034575000000000003, 'samples': 708608, 'steps': 1383, 'loss/train': 4.194097995758057} +02/24/2022 03:22:57 - INFO - codeparrot_training - Step 1384: {'lr': 0.000346, 'samples': 709120, 'steps': 1384, 'loss/train': 3.6856579780578613} +02/24/2022 03:23:02 - INFO - codeparrot_training - Step 1385: {'lr': 0.00034625, 'samples': 709632, 'steps': 1385, 'loss/train': 4.916198253631592} +02/24/2022 03:23:06 - INFO - codeparrot_training - Step 1386: {'lr': 0.00034649999999999997, 'samples': 710144, 'steps': 1386, 'loss/train': 5.696788311004639} +02/24/2022 03:23:11 - INFO - codeparrot_training - Step 1387: {'lr': 0.00034675, 'samples': 710656, 'steps': 1387, 'loss/train': 3.7698769569396973} +02/24/2022 03:23:15 - INFO - codeparrot_training - Step 1388: {'lr': 0.000347, 'samples': 711168, 'steps': 1388, 'loss/train': 4.747680187225342} +02/24/2022 03:23:20 - INFO - codeparrot_training - Step 1389: {'lr': 0.00034725, 'samples': 711680, 'steps': 1389, 'loss/train': 3.7353427410125732} +02/24/2022 03:23:24 - INFO - codeparrot_training - Step 1390: {'lr': 0.0003475, 'samples': 712192, 'steps': 1390, 'loss/train': 4.51666259765625} +02/24/2022 03:23:30 - INFO - codeparrot_training - Step 1391: {'lr': 0.00034775, 'samples': 712704, 'steps': 1391, 'loss/train': 4.353278636932373} +02/24/2022 03:23:33 - INFO - codeparrot_training - Step 1392: {'lr': 0.000348, 'samples': 713216, 'steps': 1392, 'loss/train': 3.0993151664733887} +02/24/2022 03:23:39 - INFO - codeparrot_training - Step 1393: {'lr': 0.00034825000000000004, 'samples': 713728, 'steps': 1393, 'loss/train': 4.803194522857666} +02/24/2022 03:23:44 - INFO - codeparrot_training - Step 1394: {'lr': 0.00034849999999999996, 'samples': 714240, 'steps': 1394, 'loss/train': 3.762338876724243} +02/24/2022 03:23:48 - INFO - codeparrot_training - Step 1395: {'lr': 0.00034875, 'samples': 714752, 'steps': 1395, 'loss/train': 3.8587708473205566} +02/24/2022 03:23:53 - INFO - codeparrot_training - Step 1396: {'lr': 0.00034899999999999997, 'samples': 715264, 'steps': 1396, 'loss/train': 5.5884881019592285} +02/24/2022 03:23:57 - INFO - codeparrot_training - Step 1397: {'lr': 0.00034925, 'samples': 715776, 'steps': 1397, 'loss/train': 3.1794707775115967} +02/24/2022 03:24:02 - INFO - codeparrot_training - Step 1398: {'lr': 0.0003495, 'samples': 716288, 'steps': 1398, 'loss/train': 4.749282360076904} +02/24/2022 03:24:06 - INFO - codeparrot_training - Step 1399: {'lr': 0.00034975, 'samples': 716800, 'steps': 1399, 'loss/train': 4.140390396118164} +02/24/2022 03:24:12 - INFO - codeparrot_training - Step 1400: {'lr': 0.00035, 'samples': 717312, 'steps': 1400, 'loss/train': 5.647134304046631} +02/24/2022 03:24:15 - INFO - codeparrot_training - Step 1401: {'lr': 0.00035025000000000003, 'samples': 717824, 'steps': 1401, 'loss/train': 4.211458683013916} +02/24/2022 03:24:21 - INFO - codeparrot_training - Step 1402: {'lr': 0.0003505, 'samples': 718336, 'steps': 1402, 'loss/train': 4.266910552978516} +02/24/2022 03:24:24 - INFO - codeparrot_training - Step 1403: {'lr': 0.00035075, 'samples': 718848, 'steps': 1403, 'loss/train': 2.2526164054870605} +02/24/2022 03:24:30 - INFO - codeparrot_training - Step 1404: {'lr': 0.00035099999999999997, 'samples': 719360, 'steps': 1404, 'loss/train': 4.09318208694458} +02/24/2022 03:24:33 - INFO - codeparrot_training - Step 1405: {'lr': 0.00035125, 'samples': 719872, 'steps': 1405, 'loss/train': 5.502644062042236} +02/24/2022 03:24:39 - INFO - codeparrot_training - Step 1406: {'lr': 0.0003515, 'samples': 720384, 'steps': 1406, 'loss/train': 3.886813163757324} +02/24/2022 03:24:42 - INFO - codeparrot_training - Step 1407: {'lr': 0.00035175, 'samples': 720896, 'steps': 1407, 'loss/train': 4.046776294708252} +02/24/2022 03:24:48 - INFO - codeparrot_training - Step 1408: {'lr': 0.000352, 'samples': 721408, 'steps': 1408, 'loss/train': 4.324183940887451} +02/24/2022 03:24:51 - INFO - codeparrot_training - Step 1409: {'lr': 0.00035225, 'samples': 721920, 'steps': 1409, 'loss/train': 4.107088565826416} +02/24/2022 03:24:57 - INFO - codeparrot_training - Step 1410: {'lr': 0.0003525, 'samples': 722432, 'steps': 1410, 'loss/train': 4.725785732269287} +02/24/2022 03:25:01 - INFO - codeparrot_training - Step 1411: {'lr': 0.00035275000000000004, 'samples': 722944, 'steps': 1411, 'loss/train': 4.506245136260986} +02/24/2022 03:25:06 - INFO - codeparrot_training - Step 1412: {'lr': 0.00035299999999999996, 'samples': 723456, 'steps': 1412, 'loss/train': 4.756691932678223} +02/24/2022 03:25:10 - INFO - codeparrot_training - Step 1413: {'lr': 0.00035325, 'samples': 723968, 'steps': 1413, 'loss/train': 4.609408378601074} +02/24/2022 03:25:15 - INFO - codeparrot_training - Step 1414: {'lr': 0.0003535, 'samples': 724480, 'steps': 1414, 'loss/train': 4.31541109085083} +02/24/2022 03:25:19 - INFO - codeparrot_training - Step 1415: {'lr': 0.00035375, 'samples': 724992, 'steps': 1415, 'loss/train': 4.767493724822998} +02/24/2022 03:25:24 - INFO - codeparrot_training - Step 1416: {'lr': 0.000354, 'samples': 725504, 'steps': 1416, 'loss/train': 5.0047760009765625} +02/24/2022 03:25:28 - INFO - codeparrot_training - Step 1417: {'lr': 0.00035425, 'samples': 726016, 'steps': 1417, 'loss/train': 8.275381088256836} +02/24/2022 03:25:33 - INFO - codeparrot_training - Step 1418: {'lr': 0.0003545, 'samples': 726528, 'steps': 1418, 'loss/train': 4.996078968048096} +02/24/2022 03:25:36 - INFO - codeparrot_training - Step 1419: {'lr': 0.00035475000000000003, 'samples': 727040, 'steps': 1419, 'loss/train': 4.269710540771484} +02/24/2022 03:25:42 - INFO - codeparrot_training - Step 1420: {'lr': 0.000355, 'samples': 727552, 'steps': 1420, 'loss/train': 1.2767610549926758} +02/24/2022 03:25:45 - INFO - codeparrot_training - Step 1421: {'lr': 0.00035525000000000004, 'samples': 728064, 'steps': 1421, 'loss/train': 4.295624256134033} +02/24/2022 03:25:51 - INFO - codeparrot_training - Step 1422: {'lr': 0.00035549999999999997, 'samples': 728576, 'steps': 1422, 'loss/train': 5.73574686050415} +02/24/2022 03:25:54 - INFO - codeparrot_training - Step 1423: {'lr': 0.00035575, 'samples': 729088, 'steps': 1423, 'loss/train': 3.591066598892212} +02/24/2022 03:26:00 - INFO - codeparrot_training - Step 1424: {'lr': 0.000356, 'samples': 729600, 'steps': 1424, 'loss/train': 4.418255805969238} +02/24/2022 03:26:03 - INFO - codeparrot_training - Step 1425: {'lr': 0.00035625, 'samples': 730112, 'steps': 1425, 'loss/train': 1.3538577556610107} +02/24/2022 03:26:10 - INFO - codeparrot_training - Step 1426: {'lr': 0.0003565, 'samples': 730624, 'steps': 1426, 'loss/train': 4.249266147613525} +02/24/2022 03:26:13 - INFO - codeparrot_training - Step 1427: {'lr': 0.00035675, 'samples': 731136, 'steps': 1427, 'loss/train': 4.449369430541992} +02/24/2022 03:26:19 - INFO - codeparrot_training - Step 1428: {'lr': 0.000357, 'samples': 731648, 'steps': 1428, 'loss/train': 4.438675403594971} +02/24/2022 03:26:22 - INFO - codeparrot_training - Step 1429: {'lr': 0.00035725000000000004, 'samples': 732160, 'steps': 1429, 'loss/train': 4.605688095092773} +02/24/2022 03:26:28 - INFO - codeparrot_training - Step 1430: {'lr': 0.0003575, 'samples': 732672, 'steps': 1430, 'loss/train': 5.555391311645508} +02/24/2022 03:26:31 - INFO - codeparrot_training - Step 1431: {'lr': 0.00035775, 'samples': 733184, 'steps': 1431, 'loss/train': 2.1674280166625977} +02/24/2022 03:26:37 - INFO - codeparrot_training - Step 1432: {'lr': 0.000358, 'samples': 733696, 'steps': 1432, 'loss/train': 3.172797441482544} +02/24/2022 03:26:40 - INFO - codeparrot_training - Step 1433: {'lr': 0.00035825, 'samples': 734208, 'steps': 1433, 'loss/train': 4.789025783538818} +02/24/2022 03:26:46 - INFO - codeparrot_training - Step 1434: {'lr': 0.0003585, 'samples': 734720, 'steps': 1434, 'loss/train': 2.9427411556243896} +02/24/2022 03:26:49 - INFO - codeparrot_training - Step 1435: {'lr': 0.00035875, 'samples': 735232, 'steps': 1435, 'loss/train': 3.566638469696045} +02/24/2022 03:26:56 - INFO - codeparrot_training - Step 1436: {'lr': 0.000359, 'samples': 735744, 'steps': 1436, 'loss/train': 2.941498041152954} +02/24/2022 03:26:59 - INFO - codeparrot_training - Step 1437: {'lr': 0.00035925000000000003, 'samples': 736256, 'steps': 1437, 'loss/train': 4.232140064239502} +02/24/2022 03:27:05 - INFO - codeparrot_training - Step 1438: {'lr': 0.0003595, 'samples': 736768, 'steps': 1438, 'loss/train': 2.9871699810028076} +02/24/2022 03:27:08 - INFO - codeparrot_training - Step 1439: {'lr': 0.00035975000000000004, 'samples': 737280, 'steps': 1439, 'loss/train': 4.4560866355896} +02/24/2022 03:27:14 - INFO - codeparrot_training - Step 1440: {'lr': 0.00035999999999999997, 'samples': 737792, 'steps': 1440, 'loss/train': 4.410506725311279} +02/24/2022 03:27:17 - INFO - codeparrot_training - Step 1441: {'lr': 0.00036025, 'samples': 738304, 'steps': 1441, 'loss/train': 3.8036000728607178} +02/24/2022 03:27:23 - INFO - codeparrot_training - Step 1442: {'lr': 0.0003605, 'samples': 738816, 'steps': 1442, 'loss/train': 3.6715757846832275} +02/24/2022 03:27:26 - INFO - codeparrot_training - Step 1443: {'lr': 0.00036075, 'samples': 739328, 'steps': 1443, 'loss/train': 4.418936252593994} +02/24/2022 03:27:32 - INFO - codeparrot_training - Step 1444: {'lr': 0.000361, 'samples': 739840, 'steps': 1444, 'loss/train': 4.190974712371826} +02/24/2022 03:27:35 - INFO - codeparrot_training - Step 1445: {'lr': 0.00036125, 'samples': 740352, 'steps': 1445, 'loss/train': 4.757613182067871} +02/24/2022 03:27:41 - INFO - codeparrot_training - Step 1446: {'lr': 0.0003615, 'samples': 740864, 'steps': 1446, 'loss/train': 3.6293020248413086} +02/24/2022 03:27:45 - INFO - codeparrot_training - Step 1447: {'lr': 0.00036175000000000004, 'samples': 741376, 'steps': 1447, 'loss/train': 4.599608421325684} +02/24/2022 03:27:50 - INFO - codeparrot_training - Step 1448: {'lr': 0.000362, 'samples': 741888, 'steps': 1448, 'loss/train': 4.677462100982666} +02/24/2022 03:27:54 - INFO - codeparrot_training - Step 1449: {'lr': 0.00036225000000000005, 'samples': 742400, 'steps': 1449, 'loss/train': 3.832645893096924} +02/24/2022 03:27:59 - INFO - codeparrot_training - Step 1450: {'lr': 0.0003625, 'samples': 742912, 'steps': 1450, 'loss/train': 4.194993019104004} +02/24/2022 03:28:03 - INFO - codeparrot_training - Step 1451: {'lr': 0.00036275, 'samples': 743424, 'steps': 1451, 'loss/train': 4.173336982727051} +02/24/2022 03:28:08 - INFO - codeparrot_training - Step 1452: {'lr': 0.000363, 'samples': 743936, 'steps': 1452, 'loss/train': 5.092646598815918} +02/24/2022 03:28:14 - INFO - codeparrot_training - Step 1453: {'lr': 0.00036325, 'samples': 744448, 'steps': 1453, 'loss/train': 4.8541646003723145} +02/24/2022 03:28:17 - INFO - codeparrot_training - Step 1454: {'lr': 0.0003635, 'samples': 744960, 'steps': 1454, 'loss/train': 2.8798084259033203} +02/24/2022 03:28:24 - INFO - codeparrot_training - Step 1455: {'lr': 0.00036375000000000003, 'samples': 745472, 'steps': 1455, 'loss/train': 4.474486351013184} +02/24/2022 03:28:27 - INFO - codeparrot_training - Step 1456: {'lr': 0.000364, 'samples': 745984, 'steps': 1456, 'loss/train': 3.5735902786254883} +02/24/2022 03:28:33 - INFO - codeparrot_training - Step 1457: {'lr': 0.00036425000000000004, 'samples': 746496, 'steps': 1457, 'loss/train': 3.685084342956543} +02/24/2022 03:28:36 - INFO - codeparrot_training - Step 1458: {'lr': 0.0003645, 'samples': 747008, 'steps': 1458, 'loss/train': 3.5939478874206543} +02/24/2022 03:28:42 - INFO - codeparrot_training - Step 1459: {'lr': 0.00036475, 'samples': 747520, 'steps': 1459, 'loss/train': 4.002579689025879} +02/24/2022 03:28:45 - INFO - codeparrot_training - Step 1460: {'lr': 0.000365, 'samples': 748032, 'steps': 1460, 'loss/train': 4.364797115325928} +02/24/2022 03:28:50 - INFO - codeparrot_training - Step 1461: {'lr': 0.00036525, 'samples': 748544, 'steps': 1461, 'loss/train': 4.881330490112305} +02/24/2022 03:28:54 - INFO - codeparrot_training - Step 1462: {'lr': 0.0003655, 'samples': 749056, 'steps': 1462, 'loss/train': 4.901648998260498} +02/24/2022 03:28:59 - INFO - codeparrot_training - Step 1463: {'lr': 0.00036575, 'samples': 749568, 'steps': 1463, 'loss/train': 4.209507465362549} +02/24/2022 03:29:03 - INFO - codeparrot_training - Step 1464: {'lr': 0.000366, 'samples': 750080, 'steps': 1464, 'loss/train': 4.085138320922852} +02/24/2022 03:29:09 - INFO - codeparrot_training - Step 1465: {'lr': 0.00036625000000000004, 'samples': 750592, 'steps': 1465, 'loss/train': 5.039862155914307} +02/24/2022 03:29:12 - INFO - codeparrot_training - Step 1466: {'lr': 0.0003665, 'samples': 751104, 'steps': 1466, 'loss/train': 4.095442771911621} +02/24/2022 03:29:18 - INFO - codeparrot_training - Step 1467: {'lr': 0.00036675000000000005, 'samples': 751616, 'steps': 1467, 'loss/train': 9.734140396118164} +02/24/2022 03:29:21 - INFO - codeparrot_training - Step 1468: {'lr': 0.000367, 'samples': 752128, 'steps': 1468, 'loss/train': 3.7371835708618164} +02/24/2022 03:29:27 - INFO - codeparrot_training - Step 1469: {'lr': 0.00036725, 'samples': 752640, 'steps': 1469, 'loss/train': 4.368670463562012} +02/24/2022 03:29:30 - INFO - codeparrot_training - Step 1470: {'lr': 0.0003675, 'samples': 753152, 'steps': 1470, 'loss/train': 3.4546971321105957} +02/24/2022 03:29:36 - INFO - codeparrot_training - Step 1471: {'lr': 0.00036775, 'samples': 753664, 'steps': 1471, 'loss/train': 3.882535696029663} +02/24/2022 03:29:39 - INFO - codeparrot_training - Step 1472: {'lr': 0.000368, 'samples': 754176, 'steps': 1472, 'loss/train': 4.518320560455322} +02/24/2022 03:29:45 - INFO - codeparrot_training - Step 1473: {'lr': 0.00036825000000000003, 'samples': 754688, 'steps': 1473, 'loss/train': 4.773551940917969} +02/24/2022 03:29:48 - INFO - codeparrot_training - Step 1474: {'lr': 0.0003685, 'samples': 755200, 'steps': 1474, 'loss/train': 3.6483707427978516} +02/24/2022 03:29:53 - INFO - codeparrot_training - Step 1475: {'lr': 0.00036875000000000005, 'samples': 755712, 'steps': 1475, 'loss/train': 3.6492366790771484} +02/24/2022 03:29:57 - INFO - codeparrot_training - Step 1476: {'lr': 0.000369, 'samples': 756224, 'steps': 1476, 'loss/train': 4.988525390625} +02/24/2022 03:30:02 - INFO - codeparrot_training - Step 1477: {'lr': 0.00036925, 'samples': 756736, 'steps': 1477, 'loss/train': 4.443835735321045} +02/24/2022 03:30:06 - INFO - codeparrot_training - Step 1478: {'lr': 0.0003695, 'samples': 757248, 'steps': 1478, 'loss/train': 4.1710591316223145} +02/24/2022 03:30:11 - INFO - codeparrot_training - Step 1479: {'lr': 0.00036975, 'samples': 757760, 'steps': 1479, 'loss/train': 4.66081428527832} +02/24/2022 03:30:15 - INFO - codeparrot_training - Step 1480: {'lr': 0.00037, 'samples': 758272, 'steps': 1480, 'loss/train': 3.862652063369751} +02/24/2022 03:30:21 - INFO - codeparrot_training - Step 1481: {'lr': 0.00037025000000000003, 'samples': 758784, 'steps': 1481, 'loss/train': 6.220557689666748} +02/24/2022 03:30:26 - INFO - codeparrot_training - Step 1482: {'lr': 0.0003705, 'samples': 759296, 'steps': 1482, 'loss/train': 3.1184709072113037} +02/24/2022 03:30:30 - INFO - codeparrot_training - Step 1483: {'lr': 0.00037075000000000004, 'samples': 759808, 'steps': 1483, 'loss/train': 3.412602663040161} +02/24/2022 03:30:35 - INFO - codeparrot_training - Step 1484: {'lr': 0.000371, 'samples': 760320, 'steps': 1484, 'loss/train': 3.708744764328003} +02/24/2022 03:30:39 - INFO - codeparrot_training - Step 1485: {'lr': 0.00037125000000000005, 'samples': 760832, 'steps': 1485, 'loss/train': 4.213790416717529} +02/24/2022 03:30:44 - INFO - codeparrot_training - Step 1486: {'lr': 0.00037150000000000003, 'samples': 761344, 'steps': 1486, 'loss/train': 4.705002784729004} +02/24/2022 03:30:48 - INFO - codeparrot_training - Step 1487: {'lr': 0.00037175, 'samples': 761856, 'steps': 1487, 'loss/train': 5.278611660003662} +02/24/2022 03:30:53 - INFO - codeparrot_training - Step 1488: {'lr': 0.000372, 'samples': 762368, 'steps': 1488, 'loss/train': 3.840797185897827} +02/24/2022 03:30:57 - INFO - codeparrot_training - Step 1489: {'lr': 0.00037225, 'samples': 762880, 'steps': 1489, 'loss/train': 3.5598690509796143} +02/24/2022 03:31:03 - INFO - codeparrot_training - Step 1490: {'lr': 0.0003725, 'samples': 763392, 'steps': 1490, 'loss/train': 3.661531686782837} +02/24/2022 03:31:06 - INFO - codeparrot_training - Step 1491: {'lr': 0.00037275000000000003, 'samples': 763904, 'steps': 1491, 'loss/train': 4.435661792755127} +02/24/2022 03:31:12 - INFO - codeparrot_training - Step 1492: {'lr': 0.000373, 'samples': 764416, 'steps': 1492, 'loss/train': 4.603262901306152} +02/24/2022 03:31:15 - INFO - codeparrot_training - Step 1493: {'lr': 0.00037325000000000005, 'samples': 764928, 'steps': 1493, 'loss/train': 3.6793453693389893} +02/24/2022 03:31:21 - INFO - codeparrot_training - Step 1494: {'lr': 0.0003735, 'samples': 765440, 'steps': 1494, 'loss/train': 3.0818397998809814} +02/24/2022 03:31:24 - INFO - codeparrot_training - Step 1495: {'lr': 0.00037375000000000006, 'samples': 765952, 'steps': 1495, 'loss/train': 4.466464042663574} +02/24/2022 03:31:30 - INFO - codeparrot_training - Step 1496: {'lr': 0.000374, 'samples': 766464, 'steps': 1496, 'loss/train': 3.632730007171631} +02/24/2022 03:31:33 - INFO - codeparrot_training - Step 1497: {'lr': 0.00037425, 'samples': 766976, 'steps': 1497, 'loss/train': 1.4231302738189697} +02/24/2022 03:31:39 - INFO - codeparrot_training - Step 1498: {'lr': 0.0003745, 'samples': 767488, 'steps': 1498, 'loss/train': 4.891319751739502} +02/24/2022 03:31:42 - INFO - codeparrot_training - Step 1499: {'lr': 0.00037475000000000003, 'samples': 768000, 'steps': 1499, 'loss/train': 3.9893150329589844} +02/24/2022 03:31:48 - INFO - codeparrot_training - Step 1500: {'lr': 0.000375, 'samples': 768512, 'steps': 1500, 'loss/train': 2.9769890308380127} +02/24/2022 03:31:51 - INFO - codeparrot_training - Step 1501: {'lr': 0.00037525, 'samples': 769024, 'steps': 1501, 'loss/train': 3.0658252239227295} +02/24/2022 03:31:57 - INFO - codeparrot_training - Step 1502: {'lr': 0.0003755, 'samples': 769536, 'steps': 1502, 'loss/train': 4.065186500549316} +02/24/2022 03:32:01 - INFO - codeparrot_training - Step 1503: {'lr': 0.00037575, 'samples': 770048, 'steps': 1503, 'loss/train': 3.200500011444092} +02/24/2022 03:32:06 - INFO - codeparrot_training - Step 1504: {'lr': 0.00037600000000000003, 'samples': 770560, 'steps': 1504, 'loss/train': 4.314088821411133} +02/24/2022 03:32:10 - INFO - codeparrot_training - Step 1505: {'lr': 0.00037624999999999996, 'samples': 771072, 'steps': 1505, 'loss/train': 3.7412798404693604} +02/24/2022 03:32:15 - INFO - codeparrot_training - Step 1506: {'lr': 0.0003765, 'samples': 771584, 'steps': 1506, 'loss/train': 2.632077217102051} +02/24/2022 03:32:19 - INFO - codeparrot_training - Step 1507: {'lr': 0.00037674999999999997, 'samples': 772096, 'steps': 1507, 'loss/train': 4.561457633972168} +02/24/2022 03:32:24 - INFO - codeparrot_training - Step 1508: {'lr': 0.000377, 'samples': 772608, 'steps': 1508, 'loss/train': 2.8471598625183105} +02/24/2022 03:32:28 - INFO - codeparrot_training - Step 1509: {'lr': 0.00037725, 'samples': 773120, 'steps': 1509, 'loss/train': 3.7033565044403076} +02/24/2022 03:32:33 - INFO - codeparrot_training - Step 1510: {'lr': 0.0003775, 'samples': 773632, 'steps': 1510, 'loss/train': 4.7861433029174805} +02/24/2022 03:32:36 - INFO - codeparrot_training - Step 1511: {'lr': 0.00037775, 'samples': 774144, 'steps': 1511, 'loss/train': 4.188358306884766} +02/24/2022 03:32:43 - INFO - codeparrot_training - Step 1512: {'lr': 0.000378, 'samples': 774656, 'steps': 1512, 'loss/train': 3.815391778945923} +02/24/2022 03:32:47 - INFO - codeparrot_training - Step 1513: {'lr': 0.00037825, 'samples': 775168, 'steps': 1513, 'loss/train': 4.046994209289551} +02/24/2022 03:32:52 - INFO - codeparrot_training - Step 1514: {'lr': 0.0003785, 'samples': 775680, 'steps': 1514, 'loss/train': 2.8600573539733887} +02/24/2022 03:32:55 - INFO - codeparrot_training - Step 1515: {'lr': 0.00037874999999999996, 'samples': 776192, 'steps': 1515, 'loss/train': 4.728860855102539} +02/24/2022 03:33:01 - INFO - codeparrot_training - Step 1516: {'lr': 0.000379, 'samples': 776704, 'steps': 1516, 'loss/train': 4.52414083480835} +02/24/2022 03:33:04 - INFO - codeparrot_training - Step 1517: {'lr': 0.00037925, 'samples': 777216, 'steps': 1517, 'loss/train': 3.1684107780456543} +02/24/2022 03:33:10 - INFO - codeparrot_training - Step 1518: {'lr': 0.0003795, 'samples': 777728, 'steps': 1518, 'loss/train': 4.330784320831299} +02/24/2022 03:33:13 - INFO - codeparrot_training - Step 1519: {'lr': 0.00037975, 'samples': 778240, 'steps': 1519, 'loss/train': 4.734991550445557} +02/24/2022 03:33:19 - INFO - codeparrot_training - Step 1520: {'lr': 0.00038, 'samples': 778752, 'steps': 1520, 'loss/train': 4.218307971954346} +02/24/2022 03:33:22 - INFO - codeparrot_training - Step 1521: {'lr': 0.00038025, 'samples': 779264, 'steps': 1521, 'loss/train': 4.723255634307861} +02/24/2022 03:33:28 - INFO - codeparrot_training - Step 1522: {'lr': 0.00038050000000000003, 'samples': 779776, 'steps': 1522, 'loss/train': 5.335879802703857} +02/24/2022 03:33:31 - INFO - codeparrot_training - Step 1523: {'lr': 0.00038075, 'samples': 780288, 'steps': 1523, 'loss/train': 4.171871662139893} +02/24/2022 03:33:37 - INFO - codeparrot_training - Step 1524: {'lr': 0.000381, 'samples': 780800, 'steps': 1524, 'loss/train': 4.38768196105957} +02/24/2022 03:33:40 - INFO - codeparrot_training - Step 1525: {'lr': 0.00038124999999999997, 'samples': 781312, 'steps': 1525, 'loss/train': 3.8277640342712402} +02/24/2022 03:33:46 - INFO - codeparrot_training - Step 1526: {'lr': 0.0003815, 'samples': 781824, 'steps': 1526, 'loss/train': 4.827854633331299} +02/24/2022 03:33:50 - INFO - codeparrot_training - Step 1527: {'lr': 0.00038175, 'samples': 782336, 'steps': 1527, 'loss/train': 4.390958309173584} +02/24/2022 03:33:55 - INFO - codeparrot_training - Step 1528: {'lr': 0.000382, 'samples': 782848, 'steps': 1528, 'loss/train': 3.4712443351745605} +02/24/2022 03:33:59 - INFO - codeparrot_training - Step 1529: {'lr': 0.00038225, 'samples': 783360, 'steps': 1529, 'loss/train': 4.485414028167725} +02/24/2022 03:34:04 - INFO - codeparrot_training - Step 1530: {'lr': 0.00038250000000000003, 'samples': 783872, 'steps': 1530, 'loss/train': 4.224032402038574} +02/24/2022 03:34:08 - INFO - codeparrot_training - Step 1531: {'lr': 0.00038275, 'samples': 784384, 'steps': 1531, 'loss/train': 4.25079345703125} +02/24/2022 03:34:13 - INFO - codeparrot_training - Step 1532: {'lr': 0.00038300000000000004, 'samples': 784896, 'steps': 1532, 'loss/train': 3.665842056274414} +02/24/2022 03:34:17 - INFO - codeparrot_training - Step 1533: {'lr': 0.00038324999999999996, 'samples': 785408, 'steps': 1533, 'loss/train': 4.240571975708008} +02/24/2022 03:34:22 - INFO - codeparrot_training - Step 1534: {'lr': 0.0003835, 'samples': 785920, 'steps': 1534, 'loss/train': 3.8314082622528076} +02/24/2022 03:34:26 - INFO - codeparrot_training - Step 1535: {'lr': 0.00038375, 'samples': 786432, 'steps': 1535, 'loss/train': 3.0607237815856934} +02/24/2022 03:34:32 - INFO - codeparrot_training - Step 1536: {'lr': 0.000384, 'samples': 786944, 'steps': 1536, 'loss/train': 1.1733241081237793} +02/24/2022 03:34:35 - INFO - codeparrot_training - Step 1537: {'lr': 0.00038425, 'samples': 787456, 'steps': 1537, 'loss/train': 3.5049164295196533} +02/24/2022 03:34:41 - INFO - codeparrot_training - Step 1538: {'lr': 0.0003845, 'samples': 787968, 'steps': 1538, 'loss/train': 5.128952980041504} +02/24/2022 03:34:44 - INFO - codeparrot_training - Step 1539: {'lr': 0.00038475, 'samples': 788480, 'steps': 1539, 'loss/train': 3.32218599319458} +02/24/2022 03:34:50 - INFO - codeparrot_training - Step 1540: {'lr': 0.00038500000000000003, 'samples': 788992, 'steps': 1540, 'loss/train': 4.7126569747924805} +02/24/2022 03:34:53 - INFO - codeparrot_training - Step 1541: {'lr': 0.00038525, 'samples': 789504, 'steps': 1541, 'loss/train': 5.071201324462891} +02/24/2022 03:34:58 - INFO - codeparrot_training - Step 1542: {'lr': 0.0003855, 'samples': 790016, 'steps': 1542, 'loss/train': 4.748036861419678} +02/24/2022 03:35:02 - INFO - codeparrot_training - Step 1543: {'lr': 0.00038574999999999997, 'samples': 790528, 'steps': 1543, 'loss/train': 3.630767345428467} +02/24/2022 03:35:07 - INFO - codeparrot_training - Step 1544: {'lr': 0.000386, 'samples': 791040, 'steps': 1544, 'loss/train': 3.1564924716949463} +02/24/2022 03:35:11 - INFO - codeparrot_training - Step 1545: {'lr': 0.00038625, 'samples': 791552, 'steps': 1545, 'loss/train': 2.396965980529785} +02/24/2022 03:35:17 - INFO - codeparrot_training - Step 1546: {'lr': 0.0003865, 'samples': 792064, 'steps': 1546, 'loss/train': 3.0344560146331787} +02/24/2022 03:35:20 - INFO - codeparrot_training - Step 1547: {'lr': 0.00038675, 'samples': 792576, 'steps': 1547, 'loss/train': 4.657129764556885} +02/24/2022 03:35:26 - INFO - codeparrot_training - Step 1548: {'lr': 0.00038700000000000003, 'samples': 793088, 'steps': 1548, 'loss/train': 3.840728282928467} +02/24/2022 03:35:29 - INFO - codeparrot_training - Step 1549: {'lr': 0.00038725, 'samples': 793600, 'steps': 1549, 'loss/train': 5.157027721405029} +02/24/2022 03:35:35 - INFO - codeparrot_training - Step 1550: {'lr': 0.00038750000000000004, 'samples': 794112, 'steps': 1550, 'loss/train': 4.252553939819336} +02/24/2022 03:35:38 - INFO - codeparrot_training - Step 1551: {'lr': 0.00038774999999999997, 'samples': 794624, 'steps': 1551, 'loss/train': 3.6083273887634277} +02/24/2022 03:35:44 - INFO - codeparrot_training - Step 1552: {'lr': 0.000388, 'samples': 795136, 'steps': 1552, 'loss/train': 4.28788423538208} +02/24/2022 03:35:47 - INFO - codeparrot_training - Step 1553: {'lr': 0.00038825, 'samples': 795648, 'steps': 1553, 'loss/train': 5.174439430236816} +02/24/2022 03:35:53 - INFO - codeparrot_training - Step 1554: {'lr': 0.0003885, 'samples': 796160, 'steps': 1554, 'loss/train': 3.887331485748291} +02/24/2022 03:35:56 - INFO - codeparrot_training - Step 1555: {'lr': 0.00038875, 'samples': 796672, 'steps': 1555, 'loss/train': 3.7367477416992188} +02/24/2022 03:36:02 - INFO - codeparrot_training - Step 1556: {'lr': 0.000389, 'samples': 797184, 'steps': 1556, 'loss/train': 4.111754417419434} +02/24/2022 03:36:05 - INFO - codeparrot_training - Step 1557: {'lr': 0.00038925, 'samples': 797696, 'steps': 1557, 'loss/train': 3.9184670448303223} +02/24/2022 03:36:11 - INFO - codeparrot_training - Step 1558: {'lr': 0.00038950000000000003, 'samples': 798208, 'steps': 1558, 'loss/train': 3.515531063079834} +02/24/2022 03:36:15 - INFO - codeparrot_training - Step 1559: {'lr': 0.00038975, 'samples': 798720, 'steps': 1559, 'loss/train': 5.446542263031006} +02/24/2022 03:36:20 - INFO - codeparrot_training - Step 1560: {'lr': 0.00039000000000000005, 'samples': 799232, 'steps': 1560, 'loss/train': 3.9595837593078613} +02/24/2022 03:36:24 - INFO - codeparrot_training - Step 1561: {'lr': 0.00039024999999999997, 'samples': 799744, 'steps': 1561, 'loss/train': 3.99322509765625} +02/24/2022 03:36:29 - INFO - codeparrot_training - Step 1562: {'lr': 0.0003905, 'samples': 800256, 'steps': 1562, 'loss/train': 4.53653621673584} +02/24/2022 03:36:33 - INFO - codeparrot_training - Step 1563: {'lr': 0.00039075, 'samples': 800768, 'steps': 1563, 'loss/train': 4.1567606925964355} +02/24/2022 03:36:39 - INFO - codeparrot_training - Step 1564: {'lr': 0.000391, 'samples': 801280, 'steps': 1564, 'loss/train': 3.402520179748535} +02/24/2022 03:36:42 - INFO - codeparrot_training - Step 1565: {'lr': 0.00039125, 'samples': 801792, 'steps': 1565, 'loss/train': 3.962956428527832} +02/24/2022 03:36:48 - INFO - codeparrot_training - Step 1566: {'lr': 0.00039150000000000003, 'samples': 802304, 'steps': 1566, 'loss/train': 3.229032039642334} +02/24/2022 03:36:51 - INFO - codeparrot_training - Step 1567: {'lr': 0.00039175, 'samples': 802816, 'steps': 1567, 'loss/train': 5.345529556274414} +02/24/2022 03:36:57 - INFO - codeparrot_training - Step 1568: {'lr': 0.00039200000000000004, 'samples': 803328, 'steps': 1568, 'loss/train': 3.7535061836242676} +02/24/2022 03:37:00 - INFO - codeparrot_training - Step 1569: {'lr': 0.00039225, 'samples': 803840, 'steps': 1569, 'loss/train': 4.086646556854248} +02/24/2022 03:37:05 - INFO - codeparrot_training - Step 1570: {'lr': 0.0003925, 'samples': 804352, 'steps': 1570, 'loss/train': 3.606579542160034} +02/24/2022 03:37:09 - INFO - codeparrot_training - Step 1571: {'lr': 0.00039275, 'samples': 804864, 'steps': 1571, 'loss/train': 4.3846211433410645} +02/24/2022 03:37:15 - INFO - codeparrot_training - Step 1572: {'lr': 0.000393, 'samples': 805376, 'steps': 1572, 'loss/train': 5.237354755401611} +02/24/2022 03:37:19 - INFO - codeparrot_training - Step 1573: {'lr': 0.00039325, 'samples': 805888, 'steps': 1573, 'loss/train': 4.324764728546143} +02/24/2022 03:37:24 - INFO - codeparrot_training - Step 1574: {'lr': 0.0003935, 'samples': 806400, 'steps': 1574, 'loss/train': 3.2067623138427734} +02/24/2022 03:37:27 - INFO - codeparrot_training - Step 1575: {'lr': 0.00039375, 'samples': 806912, 'steps': 1575, 'loss/train': 4.206762313842773} +02/24/2022 03:37:33 - INFO - codeparrot_training - Step 1576: {'lr': 0.00039400000000000004, 'samples': 807424, 'steps': 1576, 'loss/train': 5.17689323425293} +02/24/2022 03:37:36 - INFO - codeparrot_training - Step 1577: {'lr': 0.00039425, 'samples': 807936, 'steps': 1577, 'loss/train': 3.9292101860046387} +02/24/2022 03:37:42 - INFO - codeparrot_training - Step 1578: {'lr': 0.00039450000000000005, 'samples': 808448, 'steps': 1578, 'loss/train': 3.6446094512939453} +02/24/2022 03:37:45 - INFO - codeparrot_training - Step 1579: {'lr': 0.00039474999999999997, 'samples': 808960, 'steps': 1579, 'loss/train': 5.564534664154053} +02/24/2022 03:37:51 - INFO - codeparrot_training - Step 1580: {'lr': 0.000395, 'samples': 809472, 'steps': 1580, 'loss/train': 4.369887351989746} +02/24/2022 03:37:54 - INFO - codeparrot_training - Step 1581: {'lr': 0.00039525, 'samples': 809984, 'steps': 1581, 'loss/train': 3.6876540184020996} +02/24/2022 03:38:01 - INFO - codeparrot_training - Step 1582: {'lr': 0.0003955, 'samples': 810496, 'steps': 1582, 'loss/train': 4.171565055847168} +02/24/2022 03:38:04 - INFO - codeparrot_training - Step 1583: {'lr': 0.00039575, 'samples': 811008, 'steps': 1583, 'loss/train': 4.222428321838379} +02/24/2022 03:38:10 - INFO - codeparrot_training - Step 1584: {'lr': 0.00039600000000000003, 'samples': 811520, 'steps': 1584, 'loss/train': 3.810997247695923} +02/24/2022 03:38:13 - INFO - codeparrot_training - Step 1585: {'lr': 0.00039625, 'samples': 812032, 'steps': 1585, 'loss/train': 4.887095928192139} +02/24/2022 03:38:19 - INFO - codeparrot_training - Step 1586: {'lr': 0.00039650000000000004, 'samples': 812544, 'steps': 1586, 'loss/train': 3.7610602378845215} +02/24/2022 03:38:22 - INFO - codeparrot_training - Step 1587: {'lr': 0.00039675, 'samples': 813056, 'steps': 1587, 'loss/train': 4.862823963165283} +02/24/2022 03:38:28 - INFO - codeparrot_training - Step 1588: {'lr': 0.00039700000000000005, 'samples': 813568, 'steps': 1588, 'loss/train': 6.103458881378174} +02/24/2022 03:38:31 - INFO - codeparrot_training - Step 1589: {'lr': 0.00039725, 'samples': 814080, 'steps': 1589, 'loss/train': 5.141780376434326} +02/24/2022 03:38:37 - INFO - codeparrot_training - Step 1590: {'lr': 0.0003975, 'samples': 814592, 'steps': 1590, 'loss/train': 4.060984134674072} +02/24/2022 03:38:40 - INFO - codeparrot_training - Step 1591: {'lr': 0.00039775, 'samples': 815104, 'steps': 1591, 'loss/train': 3.5712521076202393} +02/24/2022 03:38:46 - INFO - codeparrot_training - Step 1592: {'lr': 0.000398, 'samples': 815616, 'steps': 1592, 'loss/train': 4.213394641876221} +02/24/2022 03:38:49 - INFO - codeparrot_training - Step 1593: {'lr': 0.00039825, 'samples': 816128, 'steps': 1593, 'loss/train': 4.766323566436768} +02/24/2022 03:38:55 - INFO - codeparrot_training - Step 1594: {'lr': 0.00039850000000000004, 'samples': 816640, 'steps': 1594, 'loss/train': 3.537151336669922} +02/24/2022 03:38:59 - INFO - codeparrot_training - Step 1595: {'lr': 0.00039875, 'samples': 817152, 'steps': 1595, 'loss/train': 2.797670841217041} +02/24/2022 03:39:06 - INFO - codeparrot_training - Step 1596: {'lr': 0.00039900000000000005, 'samples': 817664, 'steps': 1596, 'loss/train': 4.90841007232666} +02/24/2022 03:39:09 - INFO - codeparrot_training - Step 1597: {'lr': 0.00039925000000000003, 'samples': 818176, 'steps': 1597, 'loss/train': 3.814776659011841} +02/24/2022 03:39:14 - INFO - codeparrot_training - Step 1598: {'lr': 0.0003995, 'samples': 818688, 'steps': 1598, 'loss/train': 4.990166664123535} +02/24/2022 03:39:18 - INFO - codeparrot_training - Step 1599: {'lr': 0.00039975, 'samples': 819200, 'steps': 1599, 'loss/train': 3.483522415161133} +02/24/2022 03:39:23 - INFO - codeparrot_training - Step 1600: {'lr': 0.0004, 'samples': 819712, 'steps': 1600, 'loss/train': 4.816373348236084} +02/24/2022 03:39:27 - INFO - codeparrot_training - Step 1601: {'lr': 0.00040025, 'samples': 820224, 'steps': 1601, 'loss/train': 2.801182270050049} +02/24/2022 03:39:32 - INFO - codeparrot_training - Step 1602: {'lr': 0.00040050000000000003, 'samples': 820736, 'steps': 1602, 'loss/train': 3.923861265182495} +02/24/2022 03:39:36 - INFO - codeparrot_training - Step 1603: {'lr': 0.00040075, 'samples': 821248, 'steps': 1603, 'loss/train': 3.9733026027679443} +02/24/2022 03:39:41 - INFO - codeparrot_training - Step 1604: {'lr': 0.00040100000000000004, 'samples': 821760, 'steps': 1604, 'loss/train': 5.037976264953613} +02/24/2022 03:39:45 - INFO - codeparrot_training - Step 1605: {'lr': 0.00040125, 'samples': 822272, 'steps': 1605, 'loss/train': 4.74677038192749} +02/24/2022 03:39:52 - INFO - codeparrot_training - Step 1606: {'lr': 0.00040150000000000006, 'samples': 822784, 'steps': 1606, 'loss/train': 5.230562210083008} +02/24/2022 03:39:55 - INFO - codeparrot_training - Step 1607: {'lr': 0.00040175, 'samples': 823296, 'steps': 1607, 'loss/train': 5.117575168609619} +02/24/2022 03:40:01 - INFO - codeparrot_training - Step 1608: {'lr': 0.000402, 'samples': 823808, 'steps': 1608, 'loss/train': 3.9092824459075928} +02/24/2022 03:40:04 - INFO - codeparrot_training - Step 1609: {'lr': 0.00040225, 'samples': 824320, 'steps': 1609, 'loss/train': 3.3457095623016357} +02/24/2022 03:40:10 - INFO - codeparrot_training - Step 1610: {'lr': 0.0004025, 'samples': 824832, 'steps': 1610, 'loss/train': 3.0912325382232666} +02/24/2022 03:40:13 - INFO - codeparrot_training - Step 1611: {'lr': 0.00040275, 'samples': 825344, 'steps': 1611, 'loss/train': 4.7266974449157715} +02/24/2022 03:40:19 - INFO - codeparrot_training - Step 1612: {'lr': 0.00040300000000000004, 'samples': 825856, 'steps': 1612, 'loss/train': 4.315446853637695} +02/24/2022 03:40:22 - INFO - codeparrot_training - Step 1613: {'lr': 0.00040325, 'samples': 826368, 'steps': 1613, 'loss/train': 1.2668206691741943} +02/24/2022 03:40:28 - INFO - codeparrot_training - Step 1614: {'lr': 0.00040350000000000005, 'samples': 826880, 'steps': 1614, 'loss/train': 4.686374187469482} +02/24/2022 03:40:31 - INFO - codeparrot_training - Step 1615: {'lr': 0.00040375000000000003, 'samples': 827392, 'steps': 1615, 'loss/train': 6.061168670654297} +02/24/2022 03:40:37 - INFO - codeparrot_training - Step 1616: {'lr': 0.000404, 'samples': 827904, 'steps': 1616, 'loss/train': 4.728420257568359} +02/24/2022 03:40:41 - INFO - codeparrot_training - Step 1617: {'lr': 0.00040425, 'samples': 828416, 'steps': 1617, 'loss/train': 4.256476402282715} +02/24/2022 03:40:46 - INFO - codeparrot_training - Step 1618: {'lr': 0.0004045, 'samples': 828928, 'steps': 1618, 'loss/train': 4.144393444061279} +02/24/2022 03:40:49 - INFO - codeparrot_training - Step 1619: {'lr': 0.00040475, 'samples': 829440, 'steps': 1619, 'loss/train': 3.679882764816284} +02/24/2022 03:40:55 - INFO - codeparrot_training - Step 1620: {'lr': 0.00040500000000000003, 'samples': 829952, 'steps': 1620, 'loss/train': 3.6705260276794434} +02/24/2022 03:40:58 - INFO - codeparrot_training - Step 1621: {'lr': 0.00040525, 'samples': 830464, 'steps': 1621, 'loss/train': 3.643399715423584} +02/24/2022 03:41:04 - INFO - codeparrot_training - Step 1622: {'lr': 0.00040550000000000004, 'samples': 830976, 'steps': 1622, 'loss/train': 6.2327680587768555} +02/24/2022 03:41:07 - INFO - codeparrot_training - Step 1623: {'lr': 0.00040575, 'samples': 831488, 'steps': 1623, 'loss/train': 5.0092692375183105} +02/24/2022 03:41:13 - INFO - codeparrot_training - Step 1624: {'lr': 0.00040600000000000006, 'samples': 832000, 'steps': 1624, 'loss/train': 3.2840332984924316} +02/24/2022 03:41:16 - INFO - codeparrot_training - Step 1625: {'lr': 0.00040625000000000004, 'samples': 832512, 'steps': 1625, 'loss/train': 4.8649091720581055} +02/24/2022 03:41:22 - INFO - codeparrot_training - Step 1626: {'lr': 0.00040649999999999996, 'samples': 833024, 'steps': 1626, 'loss/train': 3.7539560794830322} +02/24/2022 03:41:26 - INFO - codeparrot_training - Step 1627: {'lr': 0.00040675, 'samples': 833536, 'steps': 1627, 'loss/train': 3.8577213287353516} +02/24/2022 03:41:31 - INFO - codeparrot_training - Step 1628: {'lr': 0.00040699999999999997, 'samples': 834048, 'steps': 1628, 'loss/train': 3.5600366592407227} +02/24/2022 03:41:35 - INFO - codeparrot_training - Step 1629: {'lr': 0.00040725, 'samples': 834560, 'steps': 1629, 'loss/train': 0.8801425695419312} +02/24/2022 03:41:40 - INFO - codeparrot_training - Step 1630: {'lr': 0.0004075, 'samples': 835072, 'steps': 1630, 'loss/train': 3.8342957496643066} +02/24/2022 03:41:44 - INFO - codeparrot_training - Step 1631: {'lr': 0.00040775, 'samples': 835584, 'steps': 1631, 'loss/train': 4.777003765106201} +02/24/2022 03:41:49 - INFO - codeparrot_training - Step 1632: {'lr': 0.000408, 'samples': 836096, 'steps': 1632, 'loss/train': 5.079536437988281} +02/24/2022 03:41:53 - INFO - codeparrot_training - Step 1633: {'lr': 0.00040825000000000003, 'samples': 836608, 'steps': 1633, 'loss/train': 3.115975856781006} +02/24/2022 03:41:58 - INFO - codeparrot_training - Step 1634: {'lr': 0.0004085, 'samples': 837120, 'steps': 1634, 'loss/train': 3.5675368309020996} +02/24/2022 03:42:02 - INFO - codeparrot_training - Step 1635: {'lr': 0.00040875, 'samples': 837632, 'steps': 1635, 'loss/train': 3.040477752685547} +02/24/2022 03:42:07 - INFO - codeparrot_training - Step 1636: {'lr': 0.00040899999999999997, 'samples': 838144, 'steps': 1636, 'loss/train': 4.125766754150391} +02/24/2022 03:42:11 - INFO - codeparrot_training - Step 1637: {'lr': 0.00040925, 'samples': 838656, 'steps': 1637, 'loss/train': 2.4122812747955322} +02/24/2022 03:42:17 - INFO - codeparrot_training - Step 1638: {'lr': 0.0004095, 'samples': 839168, 'steps': 1638, 'loss/train': 2.6019678115844727} +02/24/2022 03:42:20 - INFO - codeparrot_training - Step 1639: {'lr': 0.00040975, 'samples': 839680, 'steps': 1639, 'loss/train': 4.266791343688965} +02/24/2022 03:42:26 - INFO - codeparrot_training - Step 1640: {'lr': 0.00041, 'samples': 840192, 'steps': 1640, 'loss/train': 3.685781240463257} +02/24/2022 03:42:29 - INFO - codeparrot_training - Step 1641: {'lr': 0.00041025, 'samples': 840704, 'steps': 1641, 'loss/train': 4.00113582611084} +02/24/2022 03:42:35 - INFO - codeparrot_training - Step 1642: {'lr': 0.0004105, 'samples': 841216, 'steps': 1642, 'loss/train': 1.5531821250915527} +02/24/2022 03:42:39 - INFO - codeparrot_training - Step 1643: {'lr': 0.00041075000000000004, 'samples': 841728, 'steps': 1643, 'loss/train': 3.4445090293884277} +02/24/2022 03:42:44 - INFO - codeparrot_training - Step 1644: {'lr': 0.00041099999999999996, 'samples': 842240, 'steps': 1644, 'loss/train': 3.668736696243286} +02/24/2022 03:42:48 - INFO - codeparrot_training - Step 1645: {'lr': 0.00041125, 'samples': 842752, 'steps': 1645, 'loss/train': 1.6072088479995728} +02/24/2022 03:42:53 - INFO - codeparrot_training - Step 1646: {'lr': 0.0004115, 'samples': 843264, 'steps': 1646, 'loss/train': 4.0394110679626465} +02/24/2022 03:42:56 - INFO - codeparrot_training - Step 1647: {'lr': 0.00041175, 'samples': 843776, 'steps': 1647, 'loss/train': 4.2862019538879395} +02/24/2022 03:43:02 - INFO - codeparrot_training - Step 1648: {'lr': 0.000412, 'samples': 844288, 'steps': 1648, 'loss/train': 4.242203235626221} +02/24/2022 03:43:06 - INFO - codeparrot_training - Step 1649: {'lr': 0.00041225, 'samples': 844800, 'steps': 1649, 'loss/train': 3.158301830291748} +02/24/2022 03:43:11 - INFO - codeparrot_training - Step 1650: {'lr': 0.0004125, 'samples': 845312, 'steps': 1650, 'loss/train': 4.465639114379883} +02/24/2022 03:43:15 - INFO - codeparrot_training - Step 1651: {'lr': 0.00041275000000000003, 'samples': 845824, 'steps': 1651, 'loss/train': 3.8523125648498535} +02/24/2022 03:43:20 - INFO - codeparrot_training - Step 1652: {'lr': 0.000413, 'samples': 846336, 'steps': 1652, 'loss/train': 4.363038539886475} +02/24/2022 03:43:24 - INFO - codeparrot_training - Step 1653: {'lr': 0.00041325, 'samples': 846848, 'steps': 1653, 'loss/train': 5.091957092285156} +02/24/2022 03:43:30 - INFO - codeparrot_training - Step 1654: {'lr': 0.00041349999999999997, 'samples': 847360, 'steps': 1654, 'loss/train': 4.1330485343933105} +02/24/2022 03:43:33 - INFO - codeparrot_training - Step 1655: {'lr': 0.00041375, 'samples': 847872, 'steps': 1655, 'loss/train': 4.860011577606201} +02/24/2022 03:43:39 - INFO - codeparrot_training - Step 1656: {'lr': 0.000414, 'samples': 848384, 'steps': 1656, 'loss/train': 3.5825374126434326} +02/24/2022 03:43:42 - INFO - codeparrot_training - Step 1657: {'lr': 0.00041425, 'samples': 848896, 'steps': 1657, 'loss/train': 3.5022332668304443} +02/24/2022 03:43:48 - INFO - codeparrot_training - Step 1658: {'lr': 0.0004145, 'samples': 849408, 'steps': 1658, 'loss/train': 3.397233486175537} +02/24/2022 03:43:51 - INFO - codeparrot_training - Step 1659: {'lr': 0.00041475, 'samples': 849920, 'steps': 1659, 'loss/train': 4.770598888397217} +02/24/2022 03:43:56 - INFO - codeparrot_training - Step 1660: {'lr': 0.000415, 'samples': 850432, 'steps': 1660, 'loss/train': 3.4581315517425537} +02/24/2022 03:44:00 - INFO - codeparrot_training - Step 1661: {'lr': 0.00041525000000000004, 'samples': 850944, 'steps': 1661, 'loss/train': 3.625771999359131} +02/24/2022 03:44:05 - INFO - codeparrot_training - Step 1662: {'lr': 0.00041549999999999996, 'samples': 851456, 'steps': 1662, 'loss/train': 3.6055209636688232} +02/24/2022 03:44:09 - INFO - codeparrot_training - Step 1663: {'lr': 0.00041575, 'samples': 851968, 'steps': 1663, 'loss/train': 3.7835278511047363} +02/24/2022 03:44:15 - INFO - codeparrot_training - Step 1664: {'lr': 0.000416, 'samples': 852480, 'steps': 1664, 'loss/train': 4.466989994049072} +02/24/2022 03:44:19 - INFO - codeparrot_training - Step 1665: {'lr': 0.00041625, 'samples': 852992, 'steps': 1665, 'loss/train': 4.221617698669434} +02/24/2022 03:44:24 - INFO - codeparrot_training - Step 1666: {'lr': 0.0004165, 'samples': 853504, 'steps': 1666, 'loss/train': 4.161709308624268} +02/24/2022 03:44:27 - INFO - codeparrot_training - Step 1667: {'lr': 0.00041675, 'samples': 854016, 'steps': 1667, 'loss/train': 3.2856905460357666} +02/24/2022 03:44:33 - INFO - codeparrot_training - Step 1668: {'lr': 0.000417, 'samples': 854528, 'steps': 1668, 'loss/train': 3.8417611122131348} +02/24/2022 03:44:36 - INFO - codeparrot_training - Step 1669: {'lr': 0.00041725000000000003, 'samples': 855040, 'steps': 1669, 'loss/train': 2.292628288269043} +02/24/2022 03:44:42 - INFO - codeparrot_training - Step 1670: {'lr': 0.0004175, 'samples': 855552, 'steps': 1670, 'loss/train': 3.4556281566619873} +02/24/2022 03:44:45 - INFO - codeparrot_training - Step 1671: {'lr': 0.00041775000000000004, 'samples': 856064, 'steps': 1671, 'loss/train': 4.098479747772217} +02/24/2022 03:44:51 - INFO - codeparrot_training - Step 1672: {'lr': 0.00041799999999999997, 'samples': 856576, 'steps': 1672, 'loss/train': 3.883469820022583} +02/24/2022 03:44:54 - INFO - codeparrot_training - Step 1673: {'lr': 0.00041825, 'samples': 857088, 'steps': 1673, 'loss/train': 3.452237367630005} +02/24/2022 03:45:00 - INFO - codeparrot_training - Step 1674: {'lr': 0.0004185, 'samples': 857600, 'steps': 1674, 'loss/train': 4.276821136474609} +02/24/2022 03:45:04 - INFO - codeparrot_training - Step 1675: {'lr': 0.00041875, 'samples': 858112, 'steps': 1675, 'loss/train': 3.78054141998291} +02/24/2022 03:45:09 - INFO - codeparrot_training - Step 1676: {'lr': 0.000419, 'samples': 858624, 'steps': 1676, 'loss/train': 3.6571803092956543} +02/24/2022 03:45:13 - INFO - codeparrot_training - Step 1677: {'lr': 0.00041925, 'samples': 859136, 'steps': 1677, 'loss/train': 0.9283031821250916} +02/24/2022 03:45:18 - INFO - codeparrot_training - Step 1678: {'lr': 0.0004195, 'samples': 859648, 'steps': 1678, 'loss/train': 4.2189040184021} +02/24/2022 03:45:22 - INFO - codeparrot_training - Step 1679: {'lr': 0.00041975000000000004, 'samples': 860160, 'steps': 1679, 'loss/train': 3.447009563446045} +02/24/2022 03:45:27 - INFO - codeparrot_training - Step 1680: {'lr': 0.00042, 'samples': 860672, 'steps': 1680, 'loss/train': 3.345370054244995} +02/24/2022 03:45:31 - INFO - codeparrot_training - Step 1681: {'lr': 0.00042025, 'samples': 861184, 'steps': 1681, 'loss/train': 4.379073619842529} +02/24/2022 03:45:36 - INFO - codeparrot_training - Step 1682: {'lr': 0.0004205, 'samples': 861696, 'steps': 1682, 'loss/train': 3.2736711502075195} +02/24/2022 03:45:42 - INFO - codeparrot_training - Step 1683: {'lr': 0.00042075, 'samples': 862208, 'steps': 1683, 'loss/train': 3.483628988265991} +02/24/2022 03:45:46 - INFO - codeparrot_training - Step 1684: {'lr': 0.000421, 'samples': 862720, 'steps': 1684, 'loss/train': 3.41766357421875} +02/24/2022 03:45:51 - INFO - codeparrot_training - Step 1685: {'lr': 0.00042125, 'samples': 863232, 'steps': 1685, 'loss/train': 2.9679110050201416} +02/24/2022 03:45:55 - INFO - codeparrot_training - Step 1686: {'lr': 0.0004215, 'samples': 863744, 'steps': 1686, 'loss/train': 5.177552700042725} +02/24/2022 03:46:00 - INFO - codeparrot_training - Step 1687: {'lr': 0.00042175000000000003, 'samples': 864256, 'steps': 1687, 'loss/train': 3.167175531387329} +02/24/2022 03:46:04 - INFO - codeparrot_training - Step 1688: {'lr': 0.000422, 'samples': 864768, 'steps': 1688, 'loss/train': 4.140285491943359} +02/24/2022 03:46:09 - INFO - codeparrot_training - Step 1689: {'lr': 0.00042225000000000005, 'samples': 865280, 'steps': 1689, 'loss/train': 4.430770397186279} +02/24/2022 03:46:12 - INFO - codeparrot_training - Step 1690: {'lr': 0.00042249999999999997, 'samples': 865792, 'steps': 1690, 'loss/train': 4.592250823974609} +02/24/2022 03:46:18 - INFO - codeparrot_training - Step 1691: {'lr': 0.00042275, 'samples': 866304, 'steps': 1691, 'loss/train': 3.8042314052581787} +02/24/2022 03:46:21 - INFO - codeparrot_training - Step 1692: {'lr': 0.000423, 'samples': 866816, 'steps': 1692, 'loss/train': 4.788865089416504} +02/24/2022 03:46:27 - INFO - codeparrot_training - Step 1693: {'lr': 0.00042325, 'samples': 867328, 'steps': 1693, 'loss/train': 4.038976192474365} +02/24/2022 03:46:30 - INFO - codeparrot_training - Step 1694: {'lr': 0.0004235, 'samples': 867840, 'steps': 1694, 'loss/train': 4.357811450958252} +02/24/2022 03:46:36 - INFO - codeparrot_training - Step 1695: {'lr': 0.00042375000000000003, 'samples': 868352, 'steps': 1695, 'loss/train': 3.128354072570801} +02/24/2022 03:46:39 - INFO - codeparrot_training - Step 1696: {'lr': 0.000424, 'samples': 868864, 'steps': 1696, 'loss/train': 4.368496417999268} +02/24/2022 03:46:45 - INFO - codeparrot_training - Step 1697: {'lr': 0.00042425000000000004, 'samples': 869376, 'steps': 1697, 'loss/train': 3.5256783962249756} +02/24/2022 03:46:48 - INFO - codeparrot_training - Step 1698: {'lr': 0.0004245, 'samples': 869888, 'steps': 1698, 'loss/train': 4.285900592803955} +02/24/2022 03:46:54 - INFO - codeparrot_training - Step 1699: {'lr': 0.00042475000000000005, 'samples': 870400, 'steps': 1699, 'loss/train': 3.7480287551879883} +02/24/2022 03:46:58 - INFO - codeparrot_training - Step 1700: {'lr': 0.000425, 'samples': 870912, 'steps': 1700, 'loss/train': 2.899427890777588} +02/24/2022 03:47:03 - INFO - codeparrot_training - Step 1701: {'lr': 0.00042525, 'samples': 871424, 'steps': 1701, 'loss/train': 3.787052869796753} +02/24/2022 03:47:07 - INFO - codeparrot_training - Step 1702: {'lr': 0.0004255, 'samples': 871936, 'steps': 1702, 'loss/train': 2.8308157920837402} +02/24/2022 03:47:12 - INFO - codeparrot_training - Step 1703: {'lr': 0.00042575, 'samples': 872448, 'steps': 1703, 'loss/train': 3.9945175647735596} +02/24/2022 03:47:16 - INFO - codeparrot_training - Step 1704: {'lr': 0.000426, 'samples': 872960, 'steps': 1704, 'loss/train': 4.1553425788879395} +02/24/2022 03:47:21 - INFO - codeparrot_training - Step 1705: {'lr': 0.00042625000000000003, 'samples': 873472, 'steps': 1705, 'loss/train': 3.1137735843658447} +02/24/2022 03:47:25 - INFO - codeparrot_training - Step 1706: {'lr': 0.0004265, 'samples': 873984, 'steps': 1706, 'loss/train': 3.8155298233032227} +02/24/2022 03:47:31 - INFO - codeparrot_training - Step 1707: {'lr': 0.00042675000000000005, 'samples': 874496, 'steps': 1707, 'loss/train': 4.7053632736206055} +02/24/2022 03:47:34 - INFO - codeparrot_training - Step 1708: {'lr': 0.000427, 'samples': 875008, 'steps': 1708, 'loss/train': 4.304908752441406} +02/24/2022 03:47:40 - INFO - codeparrot_training - Step 1709: {'lr': 0.00042725, 'samples': 875520, 'steps': 1709, 'loss/train': 1.082191824913025} +02/24/2022 03:47:43 - INFO - codeparrot_training - Step 1710: {'lr': 0.0004275, 'samples': 876032, 'steps': 1710, 'loss/train': 4.41619873046875} +02/24/2022 03:47:49 - INFO - codeparrot_training - Step 1711: {'lr': 0.00042775, 'samples': 876544, 'steps': 1711, 'loss/train': 2.172891855239868} +02/24/2022 03:47:52 - INFO - codeparrot_training - Step 1712: {'lr': 0.000428, 'samples': 877056, 'steps': 1712, 'loss/train': 4.226787567138672} +02/24/2022 03:47:58 - INFO - codeparrot_training - Step 1713: {'lr': 0.00042825000000000003, 'samples': 877568, 'steps': 1713, 'loss/train': 3.967400074005127} +02/24/2022 03:48:01 - INFO - codeparrot_training - Step 1714: {'lr': 0.0004285, 'samples': 878080, 'steps': 1714, 'loss/train': 4.285831928253174} +02/24/2022 03:48:07 - INFO - codeparrot_training - Step 1715: {'lr': 0.00042875000000000004, 'samples': 878592, 'steps': 1715, 'loss/train': 4.683849334716797} +02/24/2022 03:48:10 - INFO - codeparrot_training - Step 1716: {'lr': 0.000429, 'samples': 879104, 'steps': 1716, 'loss/train': 3.3186259269714355} +02/24/2022 03:48:16 - INFO - codeparrot_training - Step 1717: {'lr': 0.00042925000000000005, 'samples': 879616, 'steps': 1717, 'loss/train': 4.041749954223633} +02/24/2022 03:48:19 - INFO - codeparrot_training - Step 1718: {'lr': 0.0004295, 'samples': 880128, 'steps': 1718, 'loss/train': 3.5565340518951416} +02/24/2022 03:48:26 - INFO - codeparrot_training - Step 1719: {'lr': 0.00042975, 'samples': 880640, 'steps': 1719, 'loss/train': 3.8268656730651855} +02/24/2022 03:48:30 - INFO - codeparrot_training - Step 1720: {'lr': 0.00043, 'samples': 881152, 'steps': 1720, 'loss/train': 1.9114283323287964} +02/24/2022 03:48:35 - INFO - codeparrot_training - Step 1721: {'lr': 0.00043025, 'samples': 881664, 'steps': 1721, 'loss/train': 4.044600009918213} +02/24/2022 03:48:38 - INFO - codeparrot_training - Step 1722: {'lr': 0.0004305, 'samples': 882176, 'steps': 1722, 'loss/train': 3.9941177368164062} +02/24/2022 03:48:44 - INFO - codeparrot_training - Step 1723: {'lr': 0.00043075000000000003, 'samples': 882688, 'steps': 1723, 'loss/train': 3.646448850631714} +02/24/2022 03:48:47 - INFO - codeparrot_training - Step 1724: {'lr': 0.000431, 'samples': 883200, 'steps': 1724, 'loss/train': 3.9820351600646973} +02/24/2022 03:48:53 - INFO - codeparrot_training - Step 1725: {'lr': 0.00043125000000000005, 'samples': 883712, 'steps': 1725, 'loss/train': 3.4107930660247803} +02/24/2022 03:48:56 - INFO - codeparrot_training - Step 1726: {'lr': 0.0004315, 'samples': 884224, 'steps': 1726, 'loss/train': 4.530364513397217} +02/24/2022 03:49:02 - INFO - codeparrot_training - Step 1727: {'lr': 0.00043175, 'samples': 884736, 'steps': 1727, 'loss/train': 3.9251275062561035} +02/24/2022 03:49:05 - INFO - codeparrot_training - Step 1728: {'lr': 0.000432, 'samples': 885248, 'steps': 1728, 'loss/train': 4.252110004425049} +02/24/2022 03:49:12 - INFO - codeparrot_training - Step 1729: {'lr': 0.00043225, 'samples': 885760, 'steps': 1729, 'loss/train': 2.7589948177337646} +02/24/2022 03:49:15 - INFO - codeparrot_training - Step 1730: {'lr': 0.0004325, 'samples': 886272, 'steps': 1730, 'loss/train': 3.8178868293762207} +02/24/2022 03:49:21 - INFO - codeparrot_training - Step 1731: {'lr': 0.00043275000000000003, 'samples': 886784, 'steps': 1731, 'loss/train': 3.0542633533477783} +02/24/2022 03:49:24 - INFO - codeparrot_training - Step 1732: {'lr': 0.000433, 'samples': 887296, 'steps': 1732, 'loss/train': 3.4022269248962402} +02/24/2022 03:49:30 - INFO - codeparrot_training - Step 1733: {'lr': 0.00043325000000000004, 'samples': 887808, 'steps': 1733, 'loss/train': 6.1047844886779785} +02/24/2022 03:49:33 - INFO - codeparrot_training - Step 1734: {'lr': 0.0004335, 'samples': 888320, 'steps': 1734, 'loss/train': 3.8315672874450684} +02/24/2022 03:49:39 - INFO - codeparrot_training - Step 1735: {'lr': 0.00043375000000000005, 'samples': 888832, 'steps': 1735, 'loss/train': 4.061244487762451} +02/24/2022 03:49:42 - INFO - codeparrot_training - Step 1736: {'lr': 0.00043400000000000003, 'samples': 889344, 'steps': 1736, 'loss/train': 3.787344217300415} +02/24/2022 03:49:48 - INFO - codeparrot_training - Step 1737: {'lr': 0.00043425, 'samples': 889856, 'steps': 1737, 'loss/train': 4.335191249847412} +02/24/2022 03:49:51 - INFO - codeparrot_training - Step 1738: {'lr': 0.0004345, 'samples': 890368, 'steps': 1738, 'loss/train': 3.9164671897888184} +02/24/2022 03:49:57 - INFO - codeparrot_training - Step 1739: {'lr': 0.00043475, 'samples': 890880, 'steps': 1739, 'loss/train': 4.006381034851074} +02/24/2022 03:50:00 - INFO - codeparrot_training - Step 1740: {'lr': 0.000435, 'samples': 891392, 'steps': 1740, 'loss/train': 3.6912143230438232} +02/24/2022 03:50:06 - INFO - codeparrot_training - Step 1741: {'lr': 0.00043525000000000004, 'samples': 891904, 'steps': 1741, 'loss/train': 4.184384346008301} +02/24/2022 03:50:09 - INFO - codeparrot_training - Step 1742: {'lr': 0.0004355, 'samples': 892416, 'steps': 1742, 'loss/train': 4.126287460327148} +02/24/2022 03:50:15 - INFO - codeparrot_training - Step 1743: {'lr': 0.00043575000000000005, 'samples': 892928, 'steps': 1743, 'loss/train': 4.320517063140869} +02/24/2022 03:50:18 - INFO - codeparrot_training - Step 1744: {'lr': 0.000436, 'samples': 893440, 'steps': 1744, 'loss/train': 2.6963040828704834} +02/24/2022 03:50:25 - INFO - codeparrot_training - Step 1745: {'lr': 0.00043625000000000006, 'samples': 893952, 'steps': 1745, 'loss/train': 6.48025369644165} +02/24/2022 03:50:28 - INFO - codeparrot_training - Step 1746: {'lr': 0.0004365, 'samples': 894464, 'steps': 1746, 'loss/train': 4.3270039558410645} +02/24/2022 03:50:34 - INFO - codeparrot_training - Step 1747: {'lr': 0.00043675, 'samples': 894976, 'steps': 1747, 'loss/train': 4.204241752624512} +02/24/2022 03:50:37 - INFO - codeparrot_training - Step 1748: {'lr': 0.000437, 'samples': 895488, 'steps': 1748, 'loss/train': 5.211532115936279} +02/24/2022 03:50:43 - INFO - codeparrot_training - Step 1749: {'lr': 0.00043725000000000003, 'samples': 896000, 'steps': 1749, 'loss/train': 4.810113906860352} +02/24/2022 03:50:46 - INFO - codeparrot_training - Step 1750: {'lr': 0.0004375, 'samples': 896512, 'steps': 1750, 'loss/train': 3.8145179748535156} +02/24/2022 03:50:51 - INFO - codeparrot_training - Step 1751: {'lr': 0.00043775, 'samples': 897024, 'steps': 1751, 'loss/train': 3.480945348739624} +02/24/2022 03:50:55 - INFO - codeparrot_training - Step 1752: {'lr': 0.000438, 'samples': 897536, 'steps': 1752, 'loss/train': 4.3082194328308105} +02/24/2022 03:51:00 - INFO - codeparrot_training - Step 1753: {'lr': 0.00043825, 'samples': 898048, 'steps': 1753, 'loss/train': 3.9679627418518066} +02/24/2022 03:51:04 - INFO - codeparrot_training - Step 1754: {'lr': 0.00043850000000000003, 'samples': 898560, 'steps': 1754, 'loss/train': 3.5825259685516357} +02/24/2022 03:51:10 - INFO - codeparrot_training - Step 1755: {'lr': 0.00043874999999999996, 'samples': 899072, 'steps': 1755, 'loss/train': 2.855005979537964} +02/24/2022 03:51:13 - INFO - codeparrot_training - Step 1756: {'lr': 0.000439, 'samples': 899584, 'steps': 1756, 'loss/train': 3.516845703125} +02/24/2022 03:51:19 - INFO - codeparrot_training - Step 1757: {'lr': 0.00043924999999999997, 'samples': 900096, 'steps': 1757, 'loss/train': 4.522437572479248} +02/24/2022 03:51:22 - INFO - codeparrot_training - Step 1758: {'lr': 0.0004395, 'samples': 900608, 'steps': 1758, 'loss/train': 3.619581937789917} +02/24/2022 03:51:28 - INFO - codeparrot_training - Step 1759: {'lr': 0.00043975, 'samples': 901120, 'steps': 1759, 'loss/train': 3.721560001373291} +02/24/2022 03:51:31 - INFO - codeparrot_training - Step 1760: {'lr': 0.00044, 'samples': 901632, 'steps': 1760, 'loss/train': 4.21987771987915} +02/24/2022 03:51:37 - INFO - codeparrot_training - Step 1761: {'lr': 0.00044025, 'samples': 902144, 'steps': 1761, 'loss/train': 3.4904325008392334} +02/24/2022 03:51:40 - INFO - codeparrot_training - Step 1762: {'lr': 0.00044050000000000003, 'samples': 902656, 'steps': 1762, 'loss/train': 3.912849187850952} +02/24/2022 03:51:46 - INFO - codeparrot_training - Step 1763: {'lr': 0.00044075, 'samples': 903168, 'steps': 1763, 'loss/train': 1.1546976566314697} +02/24/2022 03:51:49 - INFO - codeparrot_training - Step 1764: {'lr': 0.000441, 'samples': 903680, 'steps': 1764, 'loss/train': 5.073992729187012} +02/24/2022 03:51:56 - INFO - codeparrot_training - Step 1765: {'lr': 0.00044124999999999996, 'samples': 904192, 'steps': 1765, 'loss/train': 3.295259475708008} +02/24/2022 03:51:59 - INFO - codeparrot_training - Step 1766: {'lr': 0.0004415, 'samples': 904704, 'steps': 1766, 'loss/train': 3.897064447402954} +02/24/2022 03:52:05 - INFO - codeparrot_training - Step 1767: {'lr': 0.00044175, 'samples': 905216, 'steps': 1767, 'loss/train': 3.720574140548706} +02/24/2022 03:52:08 - INFO - codeparrot_training - Step 1768: {'lr': 0.000442, 'samples': 905728, 'steps': 1768, 'loss/train': 2.6307895183563232} +02/24/2022 03:52:14 - INFO - codeparrot_training - Step 1769: {'lr': 0.00044225, 'samples': 906240, 'steps': 1769, 'loss/train': 3.19089412689209} +02/24/2022 03:52:17 - INFO - codeparrot_training - Step 1770: {'lr': 0.0004425, 'samples': 906752, 'steps': 1770, 'loss/train': 2.301189661026001} +02/24/2022 03:52:23 - INFO - codeparrot_training - Step 1771: {'lr': 0.00044275, 'samples': 907264, 'steps': 1771, 'loss/train': 3.2204225063323975} +02/24/2022 03:52:26 - INFO - codeparrot_training - Step 1772: {'lr': 0.00044300000000000003, 'samples': 907776, 'steps': 1772, 'loss/train': 3.2072110176086426} +02/24/2022 03:52:32 - INFO - codeparrot_training - Step 1773: {'lr': 0.00044325, 'samples': 908288, 'steps': 1773, 'loss/train': 4.46207332611084} +02/24/2022 03:52:35 - INFO - codeparrot_training - Step 1774: {'lr': 0.0004435, 'samples': 908800, 'steps': 1774, 'loss/train': 3.0252420902252197} +02/24/2022 03:52:42 - INFO - codeparrot_training - Step 1775: {'lr': 0.00044374999999999997, 'samples': 909312, 'steps': 1775, 'loss/train': 1.1468534469604492} +02/24/2022 03:52:46 - INFO - codeparrot_training - Step 1776: {'lr': 0.000444, 'samples': 909824, 'steps': 1776, 'loss/train': 3.9944698810577393} +02/24/2022 03:52:51 - INFO - codeparrot_training - Step 1777: {'lr': 0.00044425, 'samples': 910336, 'steps': 1777, 'loss/train': 6.24413537979126} +02/24/2022 03:52:55 - INFO - codeparrot_training - Step 1778: {'lr': 0.0004445, 'samples': 910848, 'steps': 1778, 'loss/train': 5.424709796905518} +02/24/2022 03:52:58 - INFO - codeparrot_training - Step 1779: {'lr': 0.00044475, 'samples': 911360, 'steps': 1779, 'loss/train': 4.296822547912598} +02/24/2022 03:53:04 - INFO - codeparrot_training - Step 1780: {'lr': 0.00044500000000000003, 'samples': 911872, 'steps': 1780, 'loss/train': 4.318593978881836} +02/24/2022 03:53:07 - INFO - codeparrot_training - Step 1781: {'lr': 0.00044525, 'samples': 912384, 'steps': 1781, 'loss/train': 4.570363521575928} +02/24/2022 03:53:13 - INFO - codeparrot_training - Step 1782: {'lr': 0.00044550000000000004, 'samples': 912896, 'steps': 1782, 'loss/train': 2.947409152984619} +02/24/2022 03:53:18 - INFO - codeparrot_training - Step 1783: {'lr': 0.00044574999999999997, 'samples': 913408, 'steps': 1783, 'loss/train': 3.573421001434326} +02/24/2022 03:53:22 - INFO - codeparrot_training - Step 1784: {'lr': 0.000446, 'samples': 913920, 'steps': 1784, 'loss/train': 4.229432582855225} +02/24/2022 03:53:27 - INFO - codeparrot_training - Step 1785: {'lr': 0.00044625, 'samples': 914432, 'steps': 1785, 'loss/train': 4.517875671386719} +02/24/2022 03:53:31 - INFO - codeparrot_training - Step 1786: {'lr': 0.0004465, 'samples': 914944, 'steps': 1786, 'loss/train': 4.342311382293701} +02/24/2022 03:53:37 - INFO - codeparrot_training - Step 1787: {'lr': 0.00044675, 'samples': 915456, 'steps': 1787, 'loss/train': 4.162026882171631} +02/24/2022 03:53:40 - INFO - codeparrot_training - Step 1788: {'lr': 0.000447, 'samples': 915968, 'steps': 1788, 'loss/train': 4.058498382568359} +02/24/2022 03:53:46 - INFO - codeparrot_training - Step 1789: {'lr': 0.00044725, 'samples': 916480, 'steps': 1789, 'loss/train': 5.35480260848999} +02/24/2022 03:53:49 - INFO - codeparrot_training - Step 1790: {'lr': 0.00044750000000000004, 'samples': 916992, 'steps': 1790, 'loss/train': 3.704540729522705} +02/24/2022 03:53:55 - INFO - codeparrot_training - Step 1791: {'lr': 0.00044775, 'samples': 917504, 'steps': 1791, 'loss/train': 3.169809341430664} +02/24/2022 03:53:58 - INFO - codeparrot_training - Step 1792: {'lr': 0.000448, 'samples': 918016, 'steps': 1792, 'loss/train': 5.216386795043945} +02/24/2022 03:54:03 - INFO - codeparrot_training - Step 1793: {'lr': 0.00044824999999999997, 'samples': 918528, 'steps': 1793, 'loss/train': 4.576674938201904} +02/24/2022 03:54:07 - INFO - codeparrot_training - Step 1794: {'lr': 0.0004485, 'samples': 919040, 'steps': 1794, 'loss/train': 4.403326511383057} +02/24/2022 03:54:12 - INFO - codeparrot_training - Step 1795: {'lr': 0.00044875, 'samples': 919552, 'steps': 1795, 'loss/train': 3.6770081520080566} +02/24/2022 03:54:16 - INFO - codeparrot_training - Step 1796: {'lr': 0.000449, 'samples': 920064, 'steps': 1796, 'loss/train': 4.592019557952881} +02/24/2022 03:54:21 - INFO - codeparrot_training - Step 1797: {'lr': 0.00044925, 'samples': 920576, 'steps': 1797, 'loss/train': 3.8015635013580322} +02/24/2022 03:54:25 - INFO - codeparrot_training - Step 1798: {'lr': 0.00044950000000000003, 'samples': 921088, 'steps': 1798, 'loss/train': 3.5735716819763184} +02/24/2022 03:54:30 - INFO - codeparrot_training - Step 1799: {'lr': 0.00044975, 'samples': 921600, 'steps': 1799, 'loss/train': 3.4813215732574463} +02/24/2022 03:54:34 - INFO - codeparrot_training - Step 1800: {'lr': 0.00045000000000000004, 'samples': 922112, 'steps': 1800, 'loss/train': 5.032011985778809} +02/24/2022 03:54:41 - INFO - codeparrot_training - Step 1801: {'lr': 0.00045024999999999997, 'samples': 922624, 'steps': 1801, 'loss/train': 4.501695156097412} +02/24/2022 03:54:44 - INFO - codeparrot_training - Step 1802: {'lr': 0.0004505, 'samples': 923136, 'steps': 1802, 'loss/train': 3.375959873199463} +02/24/2022 03:54:50 - INFO - codeparrot_training - Step 1803: {'lr': 0.00045075, 'samples': 923648, 'steps': 1803, 'loss/train': 3.2776548862457275} +02/24/2022 03:54:53 - INFO - codeparrot_training - Step 1804: {'lr': 0.000451, 'samples': 924160, 'steps': 1804, 'loss/train': 4.1599345207214355} +02/24/2022 03:54:59 - INFO - codeparrot_training - Step 1805: {'lr': 0.00045125, 'samples': 924672, 'steps': 1805, 'loss/train': 3.610893487930298} +02/24/2022 03:55:02 - INFO - codeparrot_training - Step 1806: {'lr': 0.0004515, 'samples': 925184, 'steps': 1806, 'loss/train': 4.139208793640137} +02/24/2022 03:55:08 - INFO - codeparrot_training - Step 1807: {'lr': 0.00045175, 'samples': 925696, 'steps': 1807, 'loss/train': 3.118744134902954} +02/24/2022 03:55:11 - INFO - codeparrot_training - Step 1808: {'lr': 0.00045200000000000004, 'samples': 926208, 'steps': 1808, 'loss/train': 3.631859302520752} +02/24/2022 03:55:17 - INFO - codeparrot_training - Step 1809: {'lr': 0.00045225, 'samples': 926720, 'steps': 1809, 'loss/train': 3.8989806175231934} +02/24/2022 03:55:20 - INFO - codeparrot_training - Step 1810: {'lr': 0.00045250000000000005, 'samples': 927232, 'steps': 1810, 'loss/train': 2.0652458667755127} +02/24/2022 03:55:27 - INFO - codeparrot_training - Step 1811: {'lr': 0.00045275, 'samples': 927744, 'steps': 1811, 'loss/train': 3.0327908992767334} +02/24/2022 03:55:31 - INFO - codeparrot_training - Step 1812: {'lr': 0.000453, 'samples': 928256, 'steps': 1812, 'loss/train': 4.495767593383789} +02/24/2022 03:55:36 - INFO - codeparrot_training - Step 1813: {'lr': 0.00045325, 'samples': 928768, 'steps': 1813, 'loss/train': 4.163337707519531} +02/24/2022 03:55:39 - INFO - codeparrot_training - Step 1814: {'lr': 0.0004535, 'samples': 929280, 'steps': 1814, 'loss/train': 3.809475898742676} +02/24/2022 03:55:45 - INFO - codeparrot_training - Step 1815: {'lr': 0.00045375, 'samples': 929792, 'steps': 1815, 'loss/train': 4.117321968078613} +02/24/2022 03:55:48 - INFO - codeparrot_training - Step 1816: {'lr': 0.00045400000000000003, 'samples': 930304, 'steps': 1816, 'loss/train': 3.2555124759674072} +02/24/2022 03:55:54 - INFO - codeparrot_training - Step 1817: {'lr': 0.00045425, 'samples': 930816, 'steps': 1817, 'loss/train': 3.4132704734802246} +02/24/2022 03:55:57 - INFO - codeparrot_training - Step 1818: {'lr': 0.00045450000000000004, 'samples': 931328, 'steps': 1818, 'loss/train': 3.0801494121551514} +02/24/2022 03:56:03 - INFO - codeparrot_training - Step 1819: {'lr': 0.00045475, 'samples': 931840, 'steps': 1819, 'loss/train': 3.2781450748443604} +02/24/2022 03:56:06 - INFO - codeparrot_training - Step 1820: {'lr': 0.000455, 'samples': 932352, 'steps': 1820, 'loss/train': 3.5847525596618652} +02/24/2022 03:56:12 - INFO - codeparrot_training - Step 1821: {'lr': 0.00045525, 'samples': 932864, 'steps': 1821, 'loss/train': 3.1453769207000732} +02/24/2022 03:56:16 - INFO - codeparrot_training - Step 1822: {'lr': 0.0004555, 'samples': 933376, 'steps': 1822, 'loss/train': 3.9529950618743896} +02/24/2022 03:56:21 - INFO - codeparrot_training - Step 1823: {'lr': 0.00045575, 'samples': 933888, 'steps': 1823, 'loss/train': 1.4370235204696655} +02/24/2022 03:56:25 - INFO - codeparrot_training - Step 1824: {'lr': 0.000456, 'samples': 934400, 'steps': 1824, 'loss/train': 5.172427177429199} +02/24/2022 03:56:30 - INFO - codeparrot_training - Step 1825: {'lr': 0.00045625, 'samples': 934912, 'steps': 1825, 'loss/train': 2.903763771057129} +02/24/2022 03:56:36 - INFO - codeparrot_training - Step 1826: {'lr': 0.00045650000000000004, 'samples': 935424, 'steps': 1826, 'loss/train': 4.424928188323975} +02/24/2022 03:56:39 - INFO - codeparrot_training - Step 1827: {'lr': 0.00045675, 'samples': 935936, 'steps': 1827, 'loss/train': 3.0317370891571045} +02/24/2022 03:56:45 - INFO - codeparrot_training - Step 1828: {'lr': 0.00045700000000000005, 'samples': 936448, 'steps': 1828, 'loss/train': 4.324011325836182} +02/24/2022 03:56:48 - INFO - codeparrot_training - Step 1829: {'lr': 0.00045725, 'samples': 936960, 'steps': 1829, 'loss/train': 3.033329725265503} +02/24/2022 03:56:54 - INFO - codeparrot_training - Step 1830: {'lr': 0.0004575, 'samples': 937472, 'steps': 1830, 'loss/train': 4.144485950469971} +02/24/2022 03:56:57 - INFO - codeparrot_training - Step 1831: {'lr': 0.00045775, 'samples': 937984, 'steps': 1831, 'loss/train': 4.162665843963623} +02/24/2022 03:57:03 - INFO - codeparrot_training - Step 1832: {'lr': 0.000458, 'samples': 938496, 'steps': 1832, 'loss/train': 1.6082497835159302} +02/24/2022 03:57:06 - INFO - codeparrot_training - Step 1833: {'lr': 0.00045825, 'samples': 939008, 'steps': 1833, 'loss/train': 3.437598705291748} +02/24/2022 03:57:12 - INFO - codeparrot_training - Step 1834: {'lr': 0.00045850000000000003, 'samples': 939520, 'steps': 1834, 'loss/train': 5.750239372253418} +02/24/2022 03:57:15 - INFO - codeparrot_training - Step 1835: {'lr': 0.00045875, 'samples': 940032, 'steps': 1835, 'loss/train': 4.496338367462158} +02/24/2022 03:57:20 - INFO - codeparrot_training - Step 1836: {'lr': 0.00045900000000000004, 'samples': 940544, 'steps': 1836, 'loss/train': 3.1345911026000977} +02/24/2022 03:57:24 - INFO - codeparrot_training - Step 1837: {'lr': 0.00045925, 'samples': 941056, 'steps': 1837, 'loss/train': 2.838383436203003} +02/24/2022 03:57:30 - INFO - codeparrot_training - Step 1838: {'lr': 0.00045950000000000006, 'samples': 941568, 'steps': 1838, 'loss/train': 4.904336929321289} +02/24/2022 03:57:33 - INFO - codeparrot_training - Step 1839: {'lr': 0.00045975, 'samples': 942080, 'steps': 1839, 'loss/train': 3.4146924018859863} +02/24/2022 03:57:39 - INFO - codeparrot_training - Step 1840: {'lr': 0.00046, 'samples': 942592, 'steps': 1840, 'loss/train': 4.4224748611450195} +02/24/2022 03:57:42 - INFO - codeparrot_training - Step 1841: {'lr': 0.00046025, 'samples': 943104, 'steps': 1841, 'loss/train': 2.8425915241241455} +02/24/2022 03:57:48 - INFO - codeparrot_training - Step 1842: {'lr': 0.0004605, 'samples': 943616, 'steps': 1842, 'loss/train': 4.913407802581787} +02/24/2022 03:57:52 - INFO - codeparrot_training - Step 1843: {'lr': 0.00046075, 'samples': 944128, 'steps': 1843, 'loss/train': 4.115130424499512} +02/24/2022 03:57:57 - INFO - codeparrot_training - Step 1844: {'lr': 0.00046100000000000004, 'samples': 944640, 'steps': 1844, 'loss/train': 3.745527744293213} +02/24/2022 03:58:01 - INFO - codeparrot_training - Step 1845: {'lr': 0.00046125, 'samples': 945152, 'steps': 1845, 'loss/train': 4.078271865844727} +02/24/2022 03:58:06 - INFO - codeparrot_training - Step 1846: {'lr': 0.00046150000000000005, 'samples': 945664, 'steps': 1846, 'loss/train': 4.371314525604248} +02/24/2022 03:58:09 - INFO - codeparrot_training - Step 1847: {'lr': 0.00046175000000000003, 'samples': 946176, 'steps': 1847, 'loss/train': 3.3703036308288574} +02/24/2022 03:58:16 - INFO - codeparrot_training - Step 1848: {'lr': 0.000462, 'samples': 946688, 'steps': 1848, 'loss/train': 5.860727787017822} +02/24/2022 03:58:19 - INFO - codeparrot_training - Step 1849: {'lr': 0.00046225, 'samples': 947200, 'steps': 1849, 'loss/train': 3.782621383666992} +02/24/2022 03:58:24 - INFO - codeparrot_training - Step 1850: {'lr': 0.0004625, 'samples': 947712, 'steps': 1850, 'loss/train': 3.4912502765655518} +02/24/2022 03:58:28 - INFO - codeparrot_training - Step 1851: {'lr': 0.00046275, 'samples': 948224, 'steps': 1851, 'loss/train': 3.6662824153900146} +02/24/2022 03:58:33 - INFO - codeparrot_training - Step 1852: {'lr': 0.00046300000000000003, 'samples': 948736, 'steps': 1852, 'loss/train': 3.2053329944610596} +02/24/2022 03:58:37 - INFO - codeparrot_training - Step 1853: {'lr': 0.00046325, 'samples': 949248, 'steps': 1853, 'loss/train': 3.854938268661499} +02/24/2022 03:58:42 - INFO - codeparrot_training - Step 1854: {'lr': 0.00046350000000000004, 'samples': 949760, 'steps': 1854, 'loss/train': 3.7356789112091064} +02/24/2022 03:58:46 - INFO - codeparrot_training - Step 1855: {'lr': 0.00046375, 'samples': 950272, 'steps': 1855, 'loss/train': 3.7279837131500244} +02/24/2022 03:58:51 - INFO - codeparrot_training - Step 1856: {'lr': 0.00046400000000000006, 'samples': 950784, 'steps': 1856, 'loss/train': 3.8213253021240234} +02/24/2022 03:58:55 - INFO - codeparrot_training - Step 1857: {'lr': 0.00046425, 'samples': 951296, 'steps': 1857, 'loss/train': 3.7587873935699463} +02/24/2022 03:59:01 - INFO - codeparrot_training - Step 1858: {'lr': 0.0004645, 'samples': 951808, 'steps': 1858, 'loss/train': 3.8782124519348145} +02/24/2022 03:59:05 - INFO - codeparrot_training - Step 1859: {'lr': 0.00046475, 'samples': 952320, 'steps': 1859, 'loss/train': 3.160205125808716} +02/24/2022 03:59:10 - INFO - codeparrot_training - Step 1860: {'lr': 0.000465, 'samples': 952832, 'steps': 1860, 'loss/train': 4.472071170806885} +02/24/2022 03:59:13 - INFO - codeparrot_training - Step 1861: {'lr': 0.00046525, 'samples': 953344, 'steps': 1861, 'loss/train': 4.162772178649902} +02/24/2022 03:59:19 - INFO - codeparrot_training - Step 1862: {'lr': 0.00046550000000000004, 'samples': 953856, 'steps': 1862, 'loss/train': 4.5032525062561035} +02/24/2022 03:59:23 - INFO - codeparrot_training - Step 1863: {'lr': 0.00046575, 'samples': 954368, 'steps': 1863, 'loss/train': 3.5368921756744385} +02/24/2022 03:59:28 - INFO - codeparrot_training - Step 1864: {'lr': 0.00046600000000000005, 'samples': 954880, 'steps': 1864, 'loss/train': 3.049611806869507} +02/24/2022 03:59:31 - INFO - codeparrot_training - Step 1865: {'lr': 0.00046625000000000003, 'samples': 955392, 'steps': 1865, 'loss/train': 2.394439697265625} +02/24/2022 03:59:37 - INFO - codeparrot_training - Step 1866: {'lr': 0.0004665, 'samples': 955904, 'steps': 1866, 'loss/train': 4.33760404586792} +02/24/2022 03:59:40 - INFO - codeparrot_training - Step 1867: {'lr': 0.00046675, 'samples': 956416, 'steps': 1867, 'loss/train': 3.278625011444092} +02/24/2022 03:59:47 - INFO - codeparrot_training - Step 1868: {'lr': 0.000467, 'samples': 956928, 'steps': 1868, 'loss/train': 4.229162693023682} +02/24/2022 03:59:51 - INFO - codeparrot_training - Step 1869: {'lr': 0.00046725, 'samples': 957440, 'steps': 1869, 'loss/train': 2.944370746612549} +02/24/2022 03:59:56 - INFO - codeparrot_training - Step 1870: {'lr': 0.00046750000000000003, 'samples': 957952, 'steps': 1870, 'loss/train': 3.1038155555725098} +02/24/2022 03:59:59 - INFO - codeparrot_training - Step 1871: {'lr': 0.00046775, 'samples': 958464, 'steps': 1871, 'loss/train': 3.9347496032714844} +02/24/2022 04:00:05 - INFO - codeparrot_training - Step 1872: {'lr': 0.00046800000000000005, 'samples': 958976, 'steps': 1872, 'loss/train': 6.10337495803833} +02/24/2022 04:00:08 - INFO - codeparrot_training - Step 1873: {'lr': 0.00046825, 'samples': 959488, 'steps': 1873, 'loss/train': 3.674490451812744} +02/24/2022 04:00:14 - INFO - codeparrot_training - Step 1874: {'lr': 0.00046850000000000006, 'samples': 960000, 'steps': 1874, 'loss/train': 2.7155466079711914} +02/24/2022 04:00:17 - INFO - codeparrot_training - Step 1875: {'lr': 0.00046875, 'samples': 960512, 'steps': 1875, 'loss/train': 3.057251453399658} +02/24/2022 04:00:23 - INFO - codeparrot_training - Step 1876: {'lr': 0.00046899999999999996, 'samples': 961024, 'steps': 1876, 'loss/train': 0.8594743609428406} +02/24/2022 04:00:26 - INFO - codeparrot_training - Step 1877: {'lr': 0.00046925, 'samples': 961536, 'steps': 1877, 'loss/train': 1.3336923122406006} +02/24/2022 04:00:32 - INFO - codeparrot_training - Step 1878: {'lr': 0.0004695, 'samples': 962048, 'steps': 1878, 'loss/train': 3.075562000274658} +02/24/2022 04:00:35 - INFO - codeparrot_training - Step 1879: {'lr': 0.00046975, 'samples': 962560, 'steps': 1879, 'loss/train': 4.606093883514404} +02/24/2022 04:00:41 - INFO - codeparrot_training - Step 1880: {'lr': 0.00047, 'samples': 963072, 'steps': 1880, 'loss/train': 3.365853786468506} +02/24/2022 04:00:44 - INFO - codeparrot_training - Step 1881: {'lr': 0.00047025, 'samples': 963584, 'steps': 1881, 'loss/train': 3.5819649696350098} +02/24/2022 04:00:50 - INFO - codeparrot_training - Step 1882: {'lr': 0.0004705, 'samples': 964096, 'steps': 1882, 'loss/train': 2.9109272956848145} +02/24/2022 04:00:53 - INFO - codeparrot_training - Step 1883: {'lr': 0.00047075000000000003, 'samples': 964608, 'steps': 1883, 'loss/train': 3.848289728164673} +02/24/2022 04:00:59 - INFO - codeparrot_training - Step 1884: {'lr': 0.000471, 'samples': 965120, 'steps': 1884, 'loss/train': 3.2787702083587646} +02/24/2022 04:01:03 - INFO - codeparrot_training - Step 1885: {'lr': 0.00047125, 'samples': 965632, 'steps': 1885, 'loss/train': 3.697462320327759} +02/24/2022 04:01:08 - INFO - codeparrot_training - Step 1886: {'lr': 0.00047149999999999997, 'samples': 966144, 'steps': 1886, 'loss/train': 4.535035610198975} +02/24/2022 04:01:12 - INFO - codeparrot_training - Step 1887: {'lr': 0.00047175, 'samples': 966656, 'steps': 1887, 'loss/train': 0.8160093426704407} +02/24/2022 04:01:17 - INFO - codeparrot_training - Step 1888: {'lr': 0.000472, 'samples': 967168, 'steps': 1888, 'loss/train': 4.502017974853516} +02/24/2022 04:01:21 - INFO - codeparrot_training - Step 1889: {'lr': 0.00047225, 'samples': 967680, 'steps': 1889, 'loss/train': 3.7890665531158447} +02/24/2022 04:01:26 - INFO - codeparrot_training - Step 1890: {'lr': 0.0004725, 'samples': 968192, 'steps': 1890, 'loss/train': 4.392154693603516} +02/24/2022 04:01:30 - INFO - codeparrot_training - Step 1891: {'lr': 0.00047275, 'samples': 968704, 'steps': 1891, 'loss/train': 2.513261318206787} +02/24/2022 04:01:35 - INFO - codeparrot_training - Step 1892: {'lr': 0.000473, 'samples': 969216, 'steps': 1892, 'loss/train': 3.9184131622314453} +02/24/2022 04:01:39 - INFO - codeparrot_training - Step 1893: {'lr': 0.00047325000000000004, 'samples': 969728, 'steps': 1893, 'loss/train': 4.705053329467773} +02/24/2022 04:01:45 - INFO - codeparrot_training - Step 1894: {'lr': 0.00047349999999999996, 'samples': 970240, 'steps': 1894, 'loss/train': 4.477334976196289} +02/24/2022 04:01:48 - INFO - codeparrot_training - Step 1895: {'lr': 0.00047375, 'samples': 970752, 'steps': 1895, 'loss/train': 4.102129936218262} +02/24/2022 04:01:54 - INFO - codeparrot_training - Step 1896: {'lr': 0.000474, 'samples': 971264, 'steps': 1896, 'loss/train': 3.6128153800964355} +02/24/2022 04:01:57 - INFO - codeparrot_training - Step 1897: {'lr': 0.00047425, 'samples': 971776, 'steps': 1897, 'loss/train': 1.9788767099380493} +02/24/2022 04:02:03 - INFO - codeparrot_training - Step 1898: {'lr': 0.0004745, 'samples': 972288, 'steps': 1898, 'loss/train': 5.206483364105225} +02/24/2022 04:02:07 - INFO - codeparrot_training - Step 1899: {'lr': 0.00047475, 'samples': 972800, 'steps': 1899, 'loss/train': 0.6392098665237427} +02/24/2022 04:02:12 - INFO - codeparrot_training - Step 1900: {'lr': 0.000475, 'samples': 973312, 'steps': 1900, 'loss/train': 3.522040367126465} +02/24/2022 04:02:17 - INFO - codeparrot_training - Step 1901: {'lr': 0.00047525000000000003, 'samples': 973824, 'steps': 1901, 'loss/train': 3.5939764976501465} +02/24/2022 04:02:21 - INFO - codeparrot_training - Step 1902: {'lr': 0.0004755, 'samples': 974336, 'steps': 1902, 'loss/train': 4.5955352783203125} +02/24/2022 04:02:27 - INFO - codeparrot_training - Step 1903: {'lr': 0.00047575, 'samples': 974848, 'steps': 1903, 'loss/train': 3.5274877548217773} +02/24/2022 04:02:31 - INFO - codeparrot_training - Step 1904: {'lr': 0.00047599999999999997, 'samples': 975360, 'steps': 1904, 'loss/train': 3.993286371231079} +02/24/2022 04:02:36 - INFO - codeparrot_training - Step 1905: {'lr': 0.00047625, 'samples': 975872, 'steps': 1905, 'loss/train': 3.3120734691619873} +02/24/2022 04:02:39 - INFO - codeparrot_training - Step 1906: {'lr': 0.0004765, 'samples': 976384, 'steps': 1906, 'loss/train': 3.9515020847320557} +02/24/2022 04:02:45 - INFO - codeparrot_training - Step 1907: {'lr': 0.00047675, 'samples': 976896, 'steps': 1907, 'loss/train': 4.023288249969482} +02/24/2022 04:02:48 - INFO - codeparrot_training - Step 1908: {'lr': 0.000477, 'samples': 977408, 'steps': 1908, 'loss/train': 3.07366681098938} +02/24/2022 04:02:54 - INFO - codeparrot_training - Step 1909: {'lr': 0.00047725, 'samples': 977920, 'steps': 1909, 'loss/train': 4.2689666748046875} +02/24/2022 04:02:57 - INFO - codeparrot_training - Step 1910: {'lr': 0.0004775, 'samples': 978432, 'steps': 1910, 'loss/train': 4.167940616607666} +02/24/2022 04:03:03 - INFO - codeparrot_training - Step 1911: {'lr': 0.00047775000000000004, 'samples': 978944, 'steps': 1911, 'loss/train': 4.071891784667969} +02/24/2022 04:03:06 - INFO - codeparrot_training - Step 1912: {'lr': 0.00047799999999999996, 'samples': 979456, 'steps': 1912, 'loss/train': 3.5444371700286865} +02/24/2022 04:03:13 - INFO - codeparrot_training - Step 1913: {'lr': 0.00047825, 'samples': 979968, 'steps': 1913, 'loss/train': 3.6563661098480225} +02/24/2022 04:03:17 - INFO - codeparrot_training - Step 1914: {'lr': 0.0004785, 'samples': 980480, 'steps': 1914, 'loss/train': 3.701944351196289} +02/24/2022 04:03:22 - INFO - codeparrot_training - Step 1915: {'lr': 0.00047875, 'samples': 980992, 'steps': 1915, 'loss/train': 2.2404544353485107} +02/24/2022 04:03:25 - INFO - codeparrot_training - Step 1916: {'lr': 0.000479, 'samples': 981504, 'steps': 1916, 'loss/train': 4.324679374694824} +02/24/2022 04:03:31 - INFO - codeparrot_training - Step 1917: {'lr': 0.00047925, 'samples': 982016, 'steps': 1917, 'loss/train': 3.3296546936035156} +02/24/2022 04:03:34 - INFO - codeparrot_training - Step 1918: {'lr': 0.0004795, 'samples': 982528, 'steps': 1918, 'loss/train': 4.014566898345947} +02/24/2022 04:03:40 - INFO - codeparrot_training - Step 1919: {'lr': 0.00047975000000000003, 'samples': 983040, 'steps': 1919, 'loss/train': 2.519977331161499} +02/24/2022 04:03:43 - INFO - codeparrot_training - Step 1920: {'lr': 0.00048, 'samples': 983552, 'steps': 1920, 'loss/train': 3.5396993160247803} +02/24/2022 04:03:49 - INFO - codeparrot_training - Step 1921: {'lr': 0.00048025000000000005, 'samples': 984064, 'steps': 1921, 'loss/train': 4.678669452667236} +02/24/2022 04:03:52 - INFO - codeparrot_training - Step 1922: {'lr': 0.00048049999999999997, 'samples': 984576, 'steps': 1922, 'loss/train': 3.478494882583618} +02/24/2022 04:03:58 - INFO - codeparrot_training - Step 1923: {'lr': 0.00048075, 'samples': 985088, 'steps': 1923, 'loss/train': 2.2176525592803955} +02/24/2022 04:04:01 - INFO - codeparrot_training - Step 1924: {'lr': 0.000481, 'samples': 985600, 'steps': 1924, 'loss/train': 4.056451797485352} +02/24/2022 04:04:07 - INFO - codeparrot_training - Step 1925: {'lr': 0.00048125, 'samples': 986112, 'steps': 1925, 'loss/train': 5.742905616760254} +02/24/2022 04:04:10 - INFO - codeparrot_training - Step 1926: {'lr': 0.0004815, 'samples': 986624, 'steps': 1926, 'loss/train': 4.071230888366699} +02/24/2022 04:04:16 - INFO - codeparrot_training - Step 1927: {'lr': 0.00048175000000000003, 'samples': 987136, 'steps': 1927, 'loss/train': 4.317747592926025} +02/24/2022 04:04:19 - INFO - codeparrot_training - Step 1928: {'lr': 0.000482, 'samples': 987648, 'steps': 1928, 'loss/train': 3.545318365097046} +02/24/2022 04:04:25 - INFO - codeparrot_training - Step 1929: {'lr': 0.00048225000000000004, 'samples': 988160, 'steps': 1929, 'loss/train': 4.278107166290283} +02/24/2022 04:04:29 - INFO - codeparrot_training - Step 1930: {'lr': 0.0004825, 'samples': 988672, 'steps': 1930, 'loss/train': 3.470717191696167} +02/24/2022 04:04:34 - INFO - codeparrot_training - Step 1931: {'lr': 0.00048275, 'samples': 989184, 'steps': 1931, 'loss/train': 4.630705833435059} +02/24/2022 04:04:38 - INFO - codeparrot_training - Step 1932: {'lr': 0.000483, 'samples': 989696, 'steps': 1932, 'loss/train': 4.433801651000977} +02/24/2022 04:04:43 - INFO - codeparrot_training - Step 1933: {'lr': 0.00048325, 'samples': 990208, 'steps': 1933, 'loss/train': 3.111100673675537} +02/24/2022 04:04:47 - INFO - codeparrot_training - Step 1934: {'lr': 0.0004835, 'samples': 990720, 'steps': 1934, 'loss/train': 4.2302565574646} +02/24/2022 04:04:52 - INFO - codeparrot_training - Step 1935: {'lr': 0.00048375, 'samples': 991232, 'steps': 1935, 'loss/train': 4.6753129959106445} +02/24/2022 04:04:56 - INFO - codeparrot_training - Step 1936: {'lr': 0.000484, 'samples': 991744, 'steps': 1936, 'loss/train': 4.574672698974609} +02/24/2022 04:05:01 - INFO - codeparrot_training - Step 1937: {'lr': 0.00048425000000000003, 'samples': 992256, 'steps': 1937, 'loss/train': 0.7327980399131775} +02/24/2022 04:05:07 - INFO - codeparrot_training - Step 1938: {'lr': 0.0004845, 'samples': 992768, 'steps': 1938, 'loss/train': 4.488827705383301} +02/24/2022 04:05:11 - INFO - codeparrot_training - Step 1939: {'lr': 0.00048475000000000005, 'samples': 993280, 'steps': 1939, 'loss/train': 3.215743064880371} +02/24/2022 04:05:16 - INFO - codeparrot_training - Step 1940: {'lr': 0.00048499999999999997, 'samples': 993792, 'steps': 1940, 'loss/train': 2.5476274490356445} +02/24/2022 04:05:19 - INFO - codeparrot_training - Step 1941: {'lr': 0.00048525, 'samples': 994304, 'steps': 1941, 'loss/train': 3.6032984256744385} +02/24/2022 04:05:25 - INFO - codeparrot_training - Step 1942: {'lr': 0.0004855, 'samples': 994816, 'steps': 1942, 'loss/train': 3.7020487785339355} +02/24/2022 04:05:28 - INFO - codeparrot_training - Step 1943: {'lr': 0.00048575, 'samples': 995328, 'steps': 1943, 'loss/train': 4.331693649291992} +02/24/2022 04:05:34 - INFO - codeparrot_training - Step 1944: {'lr': 0.000486, 'samples': 995840, 'steps': 1944, 'loss/train': 1.9530490636825562} +02/24/2022 04:05:37 - INFO - codeparrot_training - Step 1945: {'lr': 0.00048625000000000003, 'samples': 996352, 'steps': 1945, 'loss/train': 2.9386301040649414} +02/24/2022 04:05:43 - INFO - codeparrot_training - Step 1946: {'lr': 0.0004865, 'samples': 996864, 'steps': 1946, 'loss/train': 4.366998672485352} +02/24/2022 04:05:46 - INFO - codeparrot_training - Step 1947: {'lr': 0.00048675000000000004, 'samples': 997376, 'steps': 1947, 'loss/train': 3.3206379413604736} +02/24/2022 04:05:52 - INFO - codeparrot_training - Step 1948: {'lr': 0.000487, 'samples': 997888, 'steps': 1948, 'loss/train': 3.3380088806152344} +02/24/2022 04:05:55 - INFO - codeparrot_training - Step 1949: {'lr': 0.00048725000000000005, 'samples': 998400, 'steps': 1949, 'loss/train': 3.6555140018463135} +02/24/2022 04:06:01 - INFO - codeparrot_training - Step 1950: {'lr': 0.0004875, 'samples': 998912, 'steps': 1950, 'loss/train': 2.818737745285034} +02/24/2022 04:06:05 - INFO - codeparrot_training - Step 1951: {'lr': 0.00048775, 'samples': 999424, 'steps': 1951, 'loss/train': 2.6505634784698486} +02/24/2022 04:06:10 - INFO - codeparrot_training - Step 1952: {'lr': 0.000488, 'samples': 999936, 'steps': 1952, 'loss/train': 3.7129604816436768} +02/24/2022 04:06:14 - INFO - codeparrot_training - Step 1953: {'lr': 0.00048825, 'samples': 1000448, 'steps': 1953, 'loss/train': 4.589820861816406} +02/24/2022 04:06:19 - INFO - codeparrot_training - Step 1954: {'lr': 0.0004885, 'samples': 1000960, 'steps': 1954, 'loss/train': 3.3188226222991943} +02/24/2022 04:06:23 - INFO - codeparrot_training - Step 1955: {'lr': 0.00048875, 'samples': 1001472, 'steps': 1955, 'loss/train': 4.6001410484313965} +02/24/2022 04:06:28 - INFO - codeparrot_training - Step 1956: {'lr': 0.000489, 'samples': 1001984, 'steps': 1956, 'loss/train': 3.3966524600982666} +02/24/2022 04:06:32 - INFO - codeparrot_training - Step 1957: {'lr': 0.00048925, 'samples': 1002496, 'steps': 1957, 'loss/train': 3.7696356773376465} +02/24/2022 04:06:37 - INFO - codeparrot_training - Step 1958: {'lr': 0.0004895, 'samples': 1003008, 'steps': 1958, 'loss/train': 1.4602926969528198} +02/24/2022 04:06:41 - INFO - codeparrot_training - Step 1959: {'lr': 0.0004897500000000001, 'samples': 1003520, 'steps': 1959, 'loss/train': 2.9611456394195557} +02/24/2022 04:06:47 - INFO - codeparrot_training - Step 1960: {'lr': 0.00049, 'samples': 1004032, 'steps': 1960, 'loss/train': 4.06137228012085} +02/24/2022 04:06:50 - INFO - codeparrot_training - Step 1961: {'lr': 0.00049025, 'samples': 1004544, 'steps': 1961, 'loss/train': 3.5533761978149414} +02/24/2022 04:06:56 - INFO - codeparrot_training - Step 1962: {'lr': 0.0004905, 'samples': 1005056, 'steps': 1962, 'loss/train': 4.941059112548828} +02/24/2022 04:06:59 - INFO - codeparrot_training - Step 1963: {'lr': 0.0004907500000000001, 'samples': 1005568, 'steps': 1963, 'loss/train': 3.677499532699585} +02/24/2022 04:07:05 - INFO - codeparrot_training - Step 1964: {'lr': 0.000491, 'samples': 1006080, 'steps': 1964, 'loss/train': 2.110358238220215} +02/24/2022 04:07:08 - INFO - codeparrot_training - Step 1965: {'lr': 0.00049125, 'samples': 1006592, 'steps': 1965, 'loss/train': 3.6638340950012207} +02/24/2022 04:07:14 - INFO - codeparrot_training - Step 1966: {'lr': 0.0004915, 'samples': 1007104, 'steps': 1966, 'loss/train': 2.495577335357666} +02/24/2022 04:07:17 - INFO - codeparrot_training - Step 1967: {'lr': 0.00049175, 'samples': 1007616, 'steps': 1967, 'loss/train': 3.320880889892578} +02/24/2022 04:07:23 - INFO - codeparrot_training - Step 1968: {'lr': 0.000492, 'samples': 1008128, 'steps': 1968, 'loss/train': 3.538256883621216} +02/24/2022 04:07:26 - INFO - codeparrot_training - Step 1969: {'lr': 0.0004922500000000001, 'samples': 1008640, 'steps': 1969, 'loss/train': 3.474503517150879} +02/24/2022 04:07:31 - INFO - codeparrot_training - Step 1970: {'lr': 0.0004925, 'samples': 1009152, 'steps': 1970, 'loss/train': 3.4210190773010254} +02/24/2022 04:07:35 - INFO - codeparrot_training - Step 1971: {'lr': 0.00049275, 'samples': 1009664, 'steps': 1971, 'loss/train': 3.2094409465789795} +02/24/2022 04:07:40 - INFO - codeparrot_training - Step 1972: {'lr': 0.0004930000000000001, 'samples': 1010176, 'steps': 1972, 'loss/train': 3.4008500576019287} +02/24/2022 04:07:44 - INFO - codeparrot_training - Step 1973: {'lr': 0.00049325, 'samples': 1010688, 'steps': 1973, 'loss/train': 3.1762261390686035} +02/24/2022 04:07:49 - INFO - codeparrot_training - Step 1974: {'lr': 0.0004935, 'samples': 1011200, 'steps': 1974, 'loss/train': 3.091787338256836} +02/24/2022 04:07:53 - INFO - codeparrot_training - Step 1975: {'lr': 0.00049375, 'samples': 1011712, 'steps': 1975, 'loss/train': 1.7174049615859985} +02/24/2022 04:07:59 - INFO - codeparrot_training - Step 1976: {'lr': 0.000494, 'samples': 1012224, 'steps': 1976, 'loss/train': 4.197392463684082} +02/24/2022 04:08:03 - INFO - codeparrot_training - Step 1977: {'lr': 0.00049425, 'samples': 1012736, 'steps': 1977, 'loss/train': 2.6754541397094727} +02/24/2022 04:08:08 - INFO - codeparrot_training - Step 1978: {'lr': 0.0004945, 'samples': 1013248, 'steps': 1978, 'loss/train': 3.13645601272583} +02/24/2022 04:08:11 - INFO - codeparrot_training - Step 1979: {'lr': 0.0004947500000000001, 'samples': 1013760, 'steps': 1979, 'loss/train': 4.084655284881592} +02/24/2022 04:08:17 - INFO - codeparrot_training - Step 1980: {'lr': 0.000495, 'samples': 1014272, 'steps': 1980, 'loss/train': 4.445559501647949} +02/24/2022 04:08:20 - INFO - codeparrot_training - Step 1981: {'lr': 0.00049525, 'samples': 1014784, 'steps': 1981, 'loss/train': 2.8948557376861572} +02/24/2022 04:08:26 - INFO - codeparrot_training - Step 1982: {'lr': 0.0004955, 'samples': 1015296, 'steps': 1982, 'loss/train': 3.580780029296875} +02/24/2022 04:08:29 - INFO - codeparrot_training - Step 1983: {'lr': 0.00049575, 'samples': 1015808, 'steps': 1983, 'loss/train': 3.9996466636657715} +02/24/2022 04:08:35 - INFO - codeparrot_training - Step 1984: {'lr': 0.000496, 'samples': 1016320, 'steps': 1984, 'loss/train': 3.889043092727661} +02/24/2022 04:08:38 - INFO - codeparrot_training - Step 1985: {'lr': 0.0004962500000000001, 'samples': 1016832, 'steps': 1985, 'loss/train': 2.5638394355773926} +02/24/2022 04:08:45 - INFO - codeparrot_training - Step 1986: {'lr': 0.0004965, 'samples': 1017344, 'steps': 1986, 'loss/train': 4.096147537231445} +02/24/2022 04:08:48 - INFO - codeparrot_training - Step 1987: {'lr': 0.00049675, 'samples': 1017856, 'steps': 1987, 'loss/train': 3.9539005756378174} +02/24/2022 04:08:54 - INFO - codeparrot_training - Step 1988: {'lr': 0.000497, 'samples': 1018368, 'steps': 1988, 'loss/train': 1.1471844911575317} +02/24/2022 04:08:57 - INFO - codeparrot_training - Step 1989: {'lr': 0.0004972500000000001, 'samples': 1018880, 'steps': 1989, 'loss/train': 2.479738235473633} +02/24/2022 04:09:03 - INFO - codeparrot_training - Step 1990: {'lr': 0.0004975, 'samples': 1019392, 'steps': 1990, 'loss/train': 3.169276237487793} +02/24/2022 04:09:06 - INFO - codeparrot_training - Step 1991: {'lr': 0.00049775, 'samples': 1019904, 'steps': 1991, 'loss/train': 3.661322593688965} +02/24/2022 04:09:12 - INFO - codeparrot_training - Step 1992: {'lr': 0.000498, 'samples': 1020416, 'steps': 1992, 'loss/train': 3.7599399089813232} +02/24/2022 04:09:17 - INFO - codeparrot_training - Step 1993: {'lr': 0.00049825, 'samples': 1020928, 'steps': 1993, 'loss/train': 3.4758248329162598} +02/24/2022 04:09:20 - INFO - codeparrot_training - Step 1994: {'lr': 0.0004985, 'samples': 1021440, 'steps': 1994, 'loss/train': 6.070404052734375} +02/24/2022 04:09:27 - INFO - codeparrot_training - Step 1995: {'lr': 0.0004987500000000001, 'samples': 1021952, 'steps': 1995, 'loss/train': 4.1490583419799805} +02/24/2022 04:09:30 - INFO - codeparrot_training - Step 1996: {'lr': 0.000499, 'samples': 1022464, 'steps': 1996, 'loss/train': 3.0770957469940186} +02/24/2022 04:09:36 - INFO - codeparrot_training - Step 1997: {'lr': 0.00049925, 'samples': 1022976, 'steps': 1997, 'loss/train': 3.3806257247924805} +02/24/2022 04:09:39 - INFO - codeparrot_training - Step 1998: {'lr': 0.0004995, 'samples': 1023488, 'steps': 1998, 'loss/train': 2.887777805328369} +02/24/2022 04:09:45 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004997500000000001, 'samples': 1024000, 'steps': 1999, 'loss/train': 9.467606544494629} +02/24/2022 04:09:45 - INFO - codeparrot_training - Evaluating and saving model checkpoint