diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -8349,3 +8349,1009 @@ Use FP16 precision: False 02/24/2022 11:55:18 - INFO - codeparrot_training - Step 7998: {'lr': 0.00048098240443502195, 'samples': 4095488, 'steps': 7998, 'loss/train': 2.3064351081848145} 02/24/2022 11:55:21 - INFO - codeparrot_training - Step 7999: {'lr': 0.000480976144276136, 'samples': 4096000, 'steps': 7999, 'loss/train': 2.3386523723602295} 02/24/2022 11:55:21 - INFO - codeparrot_training - Evaluating and saving model checkpoint +02/24/2022 11:55:38 - WARNING - huggingface_hub.repository - Several commits (8) will be pushed upstream. +02/24/2022 11:55:38 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +02/24/2022 11:56:12 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/multi-code-clippy + 1deb887..869a374 floral-grass-11 -> floral-grass-11 + +02/24/2022 11:56:19 - INFO - codeparrot_training - Step 8000: {'lr': 0.0004809698831278217, 'samples': 4096512, 'steps': 8000, 'loss/train': 2.433603048324585} +02/24/2022 11:56:22 - INFO - codeparrot_training - Step 8001: {'lr': 0.0004809636209901057, 'samples': 4097024, 'steps': 8001, 'loss/train': 2.5256423950195312} +02/24/2022 11:56:28 - INFO - codeparrot_training - Step 8002: {'lr': 0.00048095735786301495, 'samples': 4097536, 'steps': 8002, 'loss/train': 2.812481641769409} +02/24/2022 11:56:32 - INFO - codeparrot_training - Step 8003: {'lr': 0.00048095109374657617, 'samples': 4098048, 'steps': 8003, 'loss/train': 1.7197555303573608} +02/24/2022 11:56:37 - INFO - codeparrot_training - Step 8004: {'lr': 0.00048094482864081625, 'samples': 4098560, 'steps': 8004, 'loss/train': 2.114607810974121} +02/24/2022 11:56:41 - INFO - codeparrot_training - Step 8005: {'lr': 0.00048093856254576196, 'samples': 4099072, 'steps': 8005, 'loss/train': 2.01350998878479} +02/24/2022 11:56:46 - INFO - codeparrot_training - Step 8006: {'lr': 0.0004809322954614403, 'samples': 4099584, 'steps': 8006, 'loss/train': 1.9032108783721924} +02/24/2022 11:56:50 - INFO - codeparrot_training - Step 8007: {'lr': 0.00048092602738787795, 'samples': 4100096, 'steps': 8007, 'loss/train': 2.137026071548462} +02/24/2022 11:56:55 - INFO - codeparrot_training - Step 8008: {'lr': 0.00048091975832510183, 'samples': 4100608, 'steps': 8008, 'loss/train': 2.5735089778900146} +02/24/2022 11:56:59 - INFO - codeparrot_training - Step 8009: {'lr': 0.00048091348827313885, 'samples': 4101120, 'steps': 8009, 'loss/train': 2.4302027225494385} +02/24/2022 11:57:05 - INFO - codeparrot_training - Step 8010: {'lr': 0.0004809072172320157, 'samples': 4101632, 'steps': 8010, 'loss/train': 1.8490601778030396} +02/24/2022 11:57:08 - INFO - codeparrot_training - Step 8011: {'lr': 0.0004809009452017594, 'samples': 4102144, 'steps': 8011, 'loss/train': 1.9899863004684448} +02/24/2022 11:57:14 - INFO - codeparrot_training - Step 8012: {'lr': 0.00048089467218239687, 'samples': 4102656, 'steps': 8012, 'loss/train': 1.6612939834594727} +02/24/2022 11:57:18 - INFO - codeparrot_training - Step 8013: {'lr': 0.0004808883981739548, 'samples': 4103168, 'steps': 8013, 'loss/train': 1.3895426988601685} +02/24/2022 11:57:23 - INFO - codeparrot_training - Step 8014: {'lr': 0.00048088212317646016, 'samples': 4103680, 'steps': 8014, 'loss/train': 1.6364991664886475} +02/24/2022 11:57:27 - INFO - codeparrot_training - Step 8015: {'lr': 0.00048087584718993975, 'samples': 4104192, 'steps': 8015, 'loss/train': 2.0440566539764404} +02/24/2022 11:57:32 - INFO - codeparrot_training - Step 8016: {'lr': 0.0004808695702144206, 'samples': 4104704, 'steps': 8016, 'loss/train': 2.1823010444641113} +02/24/2022 11:57:36 - INFO - codeparrot_training - Step 8017: {'lr': 0.0004808632922499295, 'samples': 4105216, 'steps': 8017, 'loss/train': 2.9521472454071045} +02/24/2022 11:57:41 - INFO - codeparrot_training - Step 8018: {'lr': 0.00048085701329649336, 'samples': 4105728, 'steps': 8018, 'loss/train': 1.6742266416549683} +02/24/2022 11:57:45 - INFO - codeparrot_training - Step 8019: {'lr': 0.0004808507333541391, 'samples': 4106240, 'steps': 8019, 'loss/train': 2.1094822883605957} +02/24/2022 11:57:50 - INFO - codeparrot_training - Step 8020: {'lr': 0.00048084445242289355, 'samples': 4106752, 'steps': 8020, 'loss/train': 2.3094542026519775} +02/24/2022 11:57:54 - INFO - codeparrot_training - Step 8021: {'lr': 0.0004808381705027837, 'samples': 4107264, 'steps': 8021, 'loss/train': 2.991875648498535} +02/24/2022 11:58:00 - INFO - codeparrot_training - Step 8022: {'lr': 0.00048083188759383646, 'samples': 4107776, 'steps': 8022, 'loss/train': 2.4072718620300293} +02/24/2022 11:58:03 - INFO - codeparrot_training - Step 8023: {'lr': 0.00048082560369607863, 'samples': 4108288, 'steps': 8023, 'loss/train': 1.6046478748321533} +02/24/2022 11:58:09 - INFO - codeparrot_training - Step 8024: {'lr': 0.0004808193188095372, 'samples': 4108800, 'steps': 8024, 'loss/train': 2.563946485519409} +02/24/2022 11:58:12 - INFO - codeparrot_training - Step 8025: {'lr': 0.00048081303293423923, 'samples': 4109312, 'steps': 8025, 'loss/train': 2.3632664680480957} +02/24/2022 11:58:18 - INFO - codeparrot_training - Step 8026: {'lr': 0.0004808067460702115, 'samples': 4109824, 'steps': 8026, 'loss/train': 2.011209487915039} +02/24/2022 11:58:21 - INFO - codeparrot_training - Step 8027: {'lr': 0.00048080045821748086, 'samples': 4110336, 'steps': 8027, 'loss/train': 3.8600800037384033} +02/24/2022 11:58:28 - INFO - codeparrot_training - Step 8028: {'lr': 0.00048079416937607436, 'samples': 4110848, 'steps': 8028, 'loss/train': 2.4174904823303223} +02/24/2022 11:58:32 - INFO - codeparrot_training - Step 8029: {'lr': 0.000480787879546019, 'samples': 4111360, 'steps': 8029, 'loss/train': 3.168501377105713} +02/24/2022 11:58:37 - INFO - codeparrot_training - Step 8030: {'lr': 0.00048078158872734157, 'samples': 4111872, 'steps': 8030, 'loss/train': 2.173494577407837} +02/24/2022 11:58:41 - INFO - codeparrot_training - Step 8031: {'lr': 0.0004807752969200691, 'samples': 4112384, 'steps': 8031, 'loss/train': 1.3554211854934692} +02/24/2022 11:58:46 - INFO - codeparrot_training - Step 8032: {'lr': 0.0004807690041242286, 'samples': 4112896, 'steps': 8032, 'loss/train': 2.6872105598449707} +02/24/2022 11:58:50 - INFO - codeparrot_training - Step 8033: {'lr': 0.00048076271033984687, 'samples': 4113408, 'steps': 8033, 'loss/train': 1.967897891998291} +02/24/2022 11:58:55 - INFO - codeparrot_training - Step 8034: {'lr': 0.00048075641556695107, 'samples': 4113920, 'steps': 8034, 'loss/train': 1.3190655708312988} +02/24/2022 11:58:59 - INFO - codeparrot_training - Step 8035: {'lr': 0.000480750119805568, 'samples': 4114432, 'steps': 8035, 'loss/train': 2.2726380825042725} +02/24/2022 11:59:04 - INFO - codeparrot_training - Step 8036: {'lr': 0.0004807438230557247, 'samples': 4114944, 'steps': 8036, 'loss/train': 1.9625569581985474} +02/24/2022 11:59:08 - INFO - codeparrot_training - Step 8037: {'lr': 0.00048073752531744814, 'samples': 4115456, 'steps': 8037, 'loss/train': 2.1068637371063232} +02/24/2022 11:59:14 - INFO - codeparrot_training - Step 8038: {'lr': 0.0004807312265907653, 'samples': 4115968, 'steps': 8038, 'loss/train': 1.6491332054138184} +02/24/2022 11:59:17 - INFO - codeparrot_training - Step 8039: {'lr': 0.0004807249268757031, 'samples': 4116480, 'steps': 8039, 'loss/train': 2.0935187339782715} +02/24/2022 11:59:23 - INFO - codeparrot_training - Step 8040: {'lr': 0.00048071862617228854, 'samples': 4116992, 'steps': 8040, 'loss/train': 2.45920729637146} +02/24/2022 11:59:26 - INFO - codeparrot_training - Step 8041: {'lr': 0.0004807123244805488, 'samples': 4117504, 'steps': 8041, 'loss/train': 3.753026247024536} +02/24/2022 11:59:32 - INFO - codeparrot_training - Step 8042: {'lr': 0.0004807060218005106, 'samples': 4118016, 'steps': 8042, 'loss/train': 2.635624647140503} +02/24/2022 11:59:36 - INFO - codeparrot_training - Step 8043: {'lr': 0.00048069971813220107, 'samples': 4118528, 'steps': 8043, 'loss/train': 2.515420913696289} +02/24/2022 11:59:41 - INFO - codeparrot_training - Step 8044: {'lr': 0.0004806934134756472, 'samples': 4119040, 'steps': 8044, 'loss/train': 4.101273536682129} +02/24/2022 11:59:44 - INFO - codeparrot_training - Step 8045: {'lr': 0.0004806871078308761, 'samples': 4119552, 'steps': 8045, 'loss/train': 4.552318096160889} +02/24/2022 11:59:50 - INFO - codeparrot_training - Step 8046: {'lr': 0.0004806808011979146, 'samples': 4120064, 'steps': 8046, 'loss/train': 2.0850627422332764} +02/24/2022 11:59:53 - INFO - codeparrot_training - Step 8047: {'lr': 0.00048067449357678984, 'samples': 4120576, 'steps': 8047, 'loss/train': 2.1984524726867676} +02/24/2022 12:00:00 - INFO - codeparrot_training - Step 8048: {'lr': 0.0004806681849675287, 'samples': 4121088, 'steps': 8048, 'loss/train': 2.4453721046447754} +02/24/2022 12:00:05 - INFO - codeparrot_training - Step 8049: {'lr': 0.00048066187537015837, 'samples': 4121600, 'steps': 8049, 'loss/train': 2.386759042739868} +02/24/2022 12:00:09 - INFO - codeparrot_training - Step 8050: {'lr': 0.00048065556478470584, 'samples': 4122112, 'steps': 8050, 'loss/train': 2.3366856575012207} +02/24/2022 12:00:14 - INFO - codeparrot_training - Step 8051: {'lr': 0.0004806492532111981, 'samples': 4122624, 'steps': 8051, 'loss/train': 2.038649320602417} +02/24/2022 12:00:18 - INFO - codeparrot_training - Step 8052: {'lr': 0.00048064294064966215, 'samples': 4123136, 'steps': 8052, 'loss/train': 2.676236629486084} +02/24/2022 12:00:23 - INFO - codeparrot_training - Step 8053: {'lr': 0.00048063662710012513, 'samples': 4123648, 'steps': 8053, 'loss/train': 2.9631597995758057} +02/24/2022 12:00:27 - INFO - codeparrot_training - Step 8054: {'lr': 0.000480630312562614, 'samples': 4124160, 'steps': 8054, 'loss/train': 2.490952730178833} +02/24/2022 12:00:32 - INFO - codeparrot_training - Step 8055: {'lr': 0.0004806239970371558, 'samples': 4124672, 'steps': 8055, 'loss/train': 1.6425117254257202} +02/24/2022 12:00:36 - INFO - codeparrot_training - Step 8056: {'lr': 0.0004806176805237777, 'samples': 4125184, 'steps': 8056, 'loss/train': 6.032262325286865} +02/24/2022 12:00:42 - INFO - codeparrot_training - Step 8057: {'lr': 0.0004806113630225066, 'samples': 4125696, 'steps': 8057, 'loss/train': 2.2534685134887695} +02/24/2022 12:00:45 - INFO - codeparrot_training - Step 8058: {'lr': 0.0004806050445333697, 'samples': 4126208, 'steps': 8058, 'loss/train': 1.3106892108917236} +02/24/2022 12:00:51 - INFO - codeparrot_training - Step 8059: {'lr': 0.00048059872505639415, 'samples': 4126720, 'steps': 8059, 'loss/train': 1.9426368474960327} +02/24/2022 12:00:54 - INFO - codeparrot_training - Step 8060: {'lr': 0.0004805924045916067, 'samples': 4127232, 'steps': 8060, 'loss/train': 1.356183648109436} +02/24/2022 12:01:00 - INFO - codeparrot_training - Step 8061: {'lr': 0.00048058608313903474, 'samples': 4127744, 'steps': 8061, 'loss/train': 2.119553565979004} +02/24/2022 12:01:03 - INFO - codeparrot_training - Step 8062: {'lr': 0.0004805797606987051, 'samples': 4128256, 'steps': 8062, 'loss/train': 2.763160467147827} +02/24/2022 12:01:09 - INFO - codeparrot_training - Step 8063: {'lr': 0.0004805734372706451, 'samples': 4128768, 'steps': 8063, 'loss/train': 1.8133933544158936} +02/24/2022 12:01:12 - INFO - codeparrot_training - Step 8064: {'lr': 0.0004805671128548816, 'samples': 4129280, 'steps': 8064, 'loss/train': 2.0286574363708496} +02/24/2022 12:01:18 - INFO - codeparrot_training - Step 8065: {'lr': 0.00048056078745144183, 'samples': 4129792, 'steps': 8065, 'loss/train': 3.311856985092163} +02/24/2022 12:01:21 - INFO - codeparrot_training - Step 8066: {'lr': 0.0004805544610603529, 'samples': 4130304, 'steps': 8066, 'loss/train': 3.5693204402923584} +02/24/2022 12:01:27 - INFO - codeparrot_training - Step 8067: {'lr': 0.00048054813368164184, 'samples': 4130816, 'steps': 8067, 'loss/train': 1.5602513551712036} +02/24/2022 12:01:30 - INFO - codeparrot_training - Step 8068: {'lr': 0.00048054180531533576, 'samples': 4131328, 'steps': 8068, 'loss/train': 3.062683582305908} +02/24/2022 12:01:36 - INFO - codeparrot_training - Step 8069: {'lr': 0.00048053547596146185, 'samples': 4131840, 'steps': 8069, 'loss/train': 1.6411796808242798} +02/24/2022 12:01:39 - INFO - codeparrot_training - Step 8070: {'lr': 0.0004805291456200471, 'samples': 4132352, 'steps': 8070, 'loss/train': 2.829799175262451} +02/24/2022 12:01:45 - INFO - codeparrot_training - Step 8071: {'lr': 0.0004805228142911188, 'samples': 4132864, 'steps': 8071, 'loss/train': 1.6796129941940308} +02/24/2022 12:01:48 - INFO - codeparrot_training - Step 8072: {'lr': 0.0004805164819747038, 'samples': 4133376, 'steps': 8072, 'loss/train': 2.3893637657165527} +02/24/2022 12:01:54 - INFO - codeparrot_training - Step 8073: {'lr': 0.0004805101486708295, 'samples': 4133888, 'steps': 8073, 'loss/train': 1.6499165296554565} +02/24/2022 12:01:58 - INFO - codeparrot_training - Step 8074: {'lr': 0.0004805038143795229, 'samples': 4134400, 'steps': 8074, 'loss/train': 2.346736431121826} +02/24/2022 12:02:03 - INFO - codeparrot_training - Step 8075: {'lr': 0.00048049747910081114, 'samples': 4134912, 'steps': 8075, 'loss/train': 2.0924017429351807} +02/24/2022 12:02:07 - INFO - codeparrot_training - Step 8076: {'lr': 0.0004804911428347214, 'samples': 4135424, 'steps': 8076, 'loss/train': 2.370426654815674} +02/24/2022 12:02:13 - INFO - codeparrot_training - Step 8077: {'lr': 0.0004804848055812807, 'samples': 4135936, 'steps': 8077, 'loss/train': 2.170945644378662} +02/24/2022 12:02:17 - INFO - codeparrot_training - Step 8078: {'lr': 0.0004804784673405164, 'samples': 4136448, 'steps': 8078, 'loss/train': 2.602884531021118} +02/24/2022 12:02:22 - INFO - codeparrot_training - Step 8079: {'lr': 0.00048047212811245545, 'samples': 4136960, 'steps': 8079, 'loss/train': 1.7192648649215698} +02/24/2022 12:02:26 - INFO - codeparrot_training - Step 8080: {'lr': 0.00048046578789712516, 'samples': 4137472, 'steps': 8080, 'loss/train': 2.4500865936279297} +02/24/2022 12:02:31 - INFO - codeparrot_training - Step 8081: {'lr': 0.0004804594466945525, 'samples': 4137984, 'steps': 8081, 'loss/train': 2.430422782897949} +02/24/2022 12:02:35 - INFO - codeparrot_training - Step 8082: {'lr': 0.00048045310450476486, 'samples': 4138496, 'steps': 8082, 'loss/train': 3.008835792541504} +02/24/2022 12:02:40 - INFO - codeparrot_training - Step 8083: {'lr': 0.0004804467613277893, 'samples': 4139008, 'steps': 8083, 'loss/train': 1.8665213584899902} +02/24/2022 12:02:44 - INFO - codeparrot_training - Step 8084: {'lr': 0.00048044041716365296, 'samples': 4139520, 'steps': 8084, 'loss/train': 2.3280811309814453} +02/24/2022 12:02:49 - INFO - codeparrot_training - Step 8085: {'lr': 0.000480434072012383, 'samples': 4140032, 'steps': 8085, 'loss/train': 1.5482207536697388} +02/24/2022 12:02:53 - INFO - codeparrot_training - Step 8086: {'lr': 0.0004804277258740067, 'samples': 4140544, 'steps': 8086, 'loss/train': 2.0478014945983887} +02/24/2022 12:02:59 - INFO - codeparrot_training - Step 8087: {'lr': 0.0004804213787485512, 'samples': 4141056, 'steps': 8087, 'loss/train': 2.316554069519043} +02/24/2022 12:03:03 - INFO - codeparrot_training - Step 8088: {'lr': 0.00048041503063604366, 'samples': 4141568, 'steps': 8088, 'loss/train': 2.178321123123169} +02/24/2022 12:03:08 - INFO - codeparrot_training - Step 8089: {'lr': 0.00048040868153651124, 'samples': 4142080, 'steps': 8089, 'loss/train': 2.8111791610717773} +02/24/2022 12:03:12 - INFO - codeparrot_training - Step 8090: {'lr': 0.00048040233144998123, 'samples': 4142592, 'steps': 8090, 'loss/train': 2.189178228378296} +02/24/2022 12:03:17 - INFO - codeparrot_training - Step 8091: {'lr': 0.0004803959803764808, 'samples': 4143104, 'steps': 8091, 'loss/train': 2.443894624710083} +02/24/2022 12:03:21 - INFO - codeparrot_training - Step 8092: {'lr': 0.0004803896283160372, 'samples': 4143616, 'steps': 8092, 'loss/train': 2.2112085819244385} +02/24/2022 12:03:26 - INFO - codeparrot_training - Step 8093: {'lr': 0.0004803832752686775, 'samples': 4144128, 'steps': 8093, 'loss/train': 3.663377046585083} +02/24/2022 12:03:30 - INFO - codeparrot_training - Step 8094: {'lr': 0.00048037692123442904, 'samples': 4144640, 'steps': 8094, 'loss/train': 2.401536464691162} +02/24/2022 12:03:35 - INFO - codeparrot_training - Step 8095: {'lr': 0.000480370566213319, 'samples': 4145152, 'steps': 8095, 'loss/train': 1.8455917835235596} +02/24/2022 12:03:39 - INFO - codeparrot_training - Step 8096: {'lr': 0.00048036421020537464, 'samples': 4145664, 'steps': 8096, 'loss/train': 3.3369877338409424} +02/24/2022 12:03:45 - INFO - codeparrot_training - Step 8097: {'lr': 0.0004803578532106231, 'samples': 4146176, 'steps': 8097, 'loss/train': 2.3343923091888428} +02/24/2022 12:03:49 - INFO - codeparrot_training - Step 8098: {'lr': 0.00048035149522909174, 'samples': 4146688, 'steps': 8098, 'loss/train': 3.571747303009033} +02/24/2022 12:03:54 - INFO - codeparrot_training - Step 8099: {'lr': 0.0004803451362608076, 'samples': 4147200, 'steps': 8099, 'loss/train': 2.8301188945770264} +02/24/2022 12:03:58 - INFO - codeparrot_training - Step 8100: {'lr': 0.00048033877630579815, 'samples': 4147712, 'steps': 8100, 'loss/train': 1.7285693883895874} +02/24/2022 12:04:03 - INFO - codeparrot_training - Step 8101: {'lr': 0.00048033241536409043, 'samples': 4148224, 'steps': 8101, 'loss/train': 2.335404872894287} +02/24/2022 12:04:09 - INFO - codeparrot_training - Step 8102: {'lr': 0.0004803260534357119, 'samples': 4148736, 'steps': 8102, 'loss/train': 2.0846755504608154} +02/24/2022 12:04:12 - INFO - codeparrot_training - Step 8103: {'lr': 0.00048031969052068956, 'samples': 4149248, 'steps': 8103, 'loss/train': 2.8624277114868164} +02/24/2022 12:04:18 - INFO - codeparrot_training - Step 8104: {'lr': 0.00048031332661905093, 'samples': 4149760, 'steps': 8104, 'loss/train': 2.3115487098693848} +02/24/2022 12:04:21 - INFO - codeparrot_training - Step 8105: {'lr': 0.000480306961730823, 'samples': 4150272, 'steps': 8105, 'loss/train': 0.7021949887275696} +02/24/2022 12:04:27 - INFO - codeparrot_training - Step 8106: {'lr': 0.00048030059585603326, 'samples': 4150784, 'steps': 8106, 'loss/train': 1.5503710508346558} +02/24/2022 12:04:31 - INFO - codeparrot_training - Step 8107: {'lr': 0.0004802942289947089, 'samples': 4151296, 'steps': 8107, 'loss/train': 3.0905518531799316} +02/24/2022 12:04:36 - INFO - codeparrot_training - Step 8108: {'lr': 0.00048028786114687715, 'samples': 4151808, 'steps': 8108, 'loss/train': 1.925814151763916} +02/24/2022 12:04:40 - INFO - codeparrot_training - Step 8109: {'lr': 0.0004802814923125654, 'samples': 4152320, 'steps': 8109, 'loss/train': 1.0505138635635376} +02/24/2022 12:04:45 - INFO - codeparrot_training - Step 8110: {'lr': 0.00048027512249180083, 'samples': 4152832, 'steps': 8110, 'loss/train': 3.64475417137146} +02/24/2022 12:04:49 - INFO - codeparrot_training - Step 8111: {'lr': 0.0004802687516846107, 'samples': 4153344, 'steps': 8111, 'loss/train': 1.3404834270477295} +02/24/2022 12:04:54 - INFO - codeparrot_training - Step 8112: {'lr': 0.0004802623798910224, 'samples': 4153856, 'steps': 8112, 'loss/train': 1.6767250299453735} +02/24/2022 12:04:58 - INFO - codeparrot_training - Step 8113: {'lr': 0.00048025600711106323, 'samples': 4154368, 'steps': 8113, 'loss/train': 0.15420502424240112} +02/24/2022 12:05:04 - INFO - codeparrot_training - Step 8114: {'lr': 0.00048024963334476035, 'samples': 4154880, 'steps': 8114, 'loss/train': 2.6755011081695557} +02/24/2022 12:05:07 - INFO - codeparrot_training - Step 8115: {'lr': 0.00048024325859214123, 'samples': 4155392, 'steps': 8115, 'loss/train': 1.9760112762451172} +02/24/2022 12:05:13 - INFO - codeparrot_training - Step 8116: {'lr': 0.00048023688285323305, 'samples': 4155904, 'steps': 8116, 'loss/train': 1.9275007247924805} +02/24/2022 12:05:16 - INFO - codeparrot_training - Step 8117: {'lr': 0.0004802305061280632, 'samples': 4156416, 'steps': 8117, 'loss/train': 2.2548446655273438} +02/24/2022 12:05:22 - INFO - codeparrot_training - Step 8118: {'lr': 0.0004802241284166589, 'samples': 4156928, 'steps': 8118, 'loss/train': 2.323350191116333} +02/24/2022 12:05:25 - INFO - codeparrot_training - Step 8119: {'lr': 0.00048021774971904765, 'samples': 4157440, 'steps': 8119, 'loss/train': 2.828622817993164} +02/24/2022 12:05:31 - INFO - codeparrot_training - Step 8120: {'lr': 0.0004802113700352566, 'samples': 4157952, 'steps': 8120, 'loss/train': 0.19782328605651855} +02/24/2022 12:05:34 - INFO - codeparrot_training - Step 8121: {'lr': 0.0004802049893653131, 'samples': 4158464, 'steps': 8121, 'loss/train': 1.5305522680282593} +02/24/2022 12:05:40 - INFO - codeparrot_training - Step 8122: {'lr': 0.0004801986077092446, 'samples': 4158976, 'steps': 8122, 'loss/train': 0.35005131363868713} +02/24/2022 12:05:44 - INFO - codeparrot_training - Step 8123: {'lr': 0.0004801922250670783, 'samples': 4159488, 'steps': 8123, 'loss/train': 2.779991865158081} +02/24/2022 12:05:49 - INFO - codeparrot_training - Step 8124: {'lr': 0.0004801858414388416, 'samples': 4160000, 'steps': 8124, 'loss/train': 1.8902994394302368} +02/24/2022 12:05:53 - INFO - codeparrot_training - Step 8125: {'lr': 0.0004801794568245619, 'samples': 4160512, 'steps': 8125, 'loss/train': 3.0531387329101562} +02/24/2022 12:05:58 - INFO - codeparrot_training - Step 8126: {'lr': 0.00048017307122426653, 'samples': 4161024, 'steps': 8126, 'loss/train': 1.2154006958007812} +02/24/2022 12:06:02 - INFO - codeparrot_training - Step 8127: {'lr': 0.0004801666846379827, 'samples': 4161536, 'steps': 8127, 'loss/train': 2.110605239868164} +02/24/2022 12:06:07 - INFO - codeparrot_training - Step 8128: {'lr': 0.00048016029706573793, 'samples': 4162048, 'steps': 8128, 'loss/train': 2.361940383911133} +02/24/2022 12:06:11 - INFO - codeparrot_training - Step 8129: {'lr': 0.0004801539085075596, 'samples': 4162560, 'steps': 8129, 'loss/train': 2.6462466716766357} +02/24/2022 12:06:16 - INFO - codeparrot_training - Step 8130: {'lr': 0.0004801475189634749, 'samples': 4163072, 'steps': 8130, 'loss/train': 2.303628444671631} +02/24/2022 12:06:20 - INFO - codeparrot_training - Step 8131: {'lr': 0.0004801411284335114, 'samples': 4163584, 'steps': 8131, 'loss/train': 2.897541046142578} +02/24/2022 12:06:26 - INFO - codeparrot_training - Step 8132: {'lr': 0.0004801347369176963, 'samples': 4164096, 'steps': 8132, 'loss/train': 2.826315402984619} +02/24/2022 12:06:30 - INFO - codeparrot_training - Step 8133: {'lr': 0.0004801283444160571, 'samples': 4164608, 'steps': 8133, 'loss/train': 1.4086928367614746} +02/24/2022 12:06:35 - INFO - codeparrot_training - Step 8134: {'lr': 0.0004801219509286212, 'samples': 4165120, 'steps': 8134, 'loss/train': 1.6862421035766602} +02/24/2022 12:06:39 - INFO - codeparrot_training - Step 8135: {'lr': 0.00048011555645541585, 'samples': 4165632, 'steps': 8135, 'loss/train': 0.4918065369129181} +02/24/2022 12:06:44 - INFO - codeparrot_training - Step 8136: {'lr': 0.00048010916099646854, 'samples': 4166144, 'steps': 8136, 'loss/train': 2.506182909011841} +02/24/2022 12:06:47 - INFO - codeparrot_training - Step 8137: {'lr': 0.0004801027645518067, 'samples': 4166656, 'steps': 8137, 'loss/train': 2.2698466777801514} +02/24/2022 12:06:53 - INFO - codeparrot_training - Step 8138: {'lr': 0.00048009636712145764, 'samples': 4167168, 'steps': 8138, 'loss/train': 2.1795644760131836} +02/24/2022 12:06:57 - INFO - codeparrot_training - Step 8139: {'lr': 0.00048008996870544887, 'samples': 4167680, 'steps': 8139, 'loss/train': 1.3044347763061523} +02/24/2022 12:07:02 - INFO - codeparrot_training - Step 8140: {'lr': 0.0004800835693038076, 'samples': 4168192, 'steps': 8140, 'loss/train': 2.018707513809204} +02/24/2022 12:07:06 - INFO - codeparrot_training - Step 8141: {'lr': 0.0004800771689165615, 'samples': 4168704, 'steps': 8141, 'loss/train': 1.9235914945602417} +02/24/2022 12:07:11 - INFO - codeparrot_training - Step 8142: {'lr': 0.00048007076754373785, 'samples': 4169216, 'steps': 8142, 'loss/train': 1.0019667148590088} +02/24/2022 12:07:15 - INFO - codeparrot_training - Step 8143: {'lr': 0.00048006436518536403, 'samples': 4169728, 'steps': 8143, 'loss/train': 2.8660309314727783} +02/24/2022 12:07:22 - INFO - codeparrot_training - Step 8144: {'lr': 0.0004800579618414676, 'samples': 4170240, 'steps': 8144, 'loss/train': 1.065003752708435} +02/24/2022 12:07:25 - INFO - codeparrot_training - Step 8145: {'lr': 0.00048005155751207584, 'samples': 4170752, 'steps': 8145, 'loss/train': 0.17463712394237518} +02/24/2022 12:07:31 - INFO - codeparrot_training - Step 8146: {'lr': 0.0004800451521972163, 'samples': 4171264, 'steps': 8146, 'loss/train': 2.980131149291992} +02/24/2022 12:07:34 - INFO - codeparrot_training - Step 8147: {'lr': 0.0004800387458969164, 'samples': 4171776, 'steps': 8147, 'loss/train': 2.8523271083831787} +02/24/2022 12:07:40 - INFO - codeparrot_training - Step 8148: {'lr': 0.00048003233861120356, 'samples': 4172288, 'steps': 8148, 'loss/train': 2.7215511798858643} +02/24/2022 12:07:43 - INFO - codeparrot_training - Step 8149: {'lr': 0.00048002593034010516, 'samples': 4172800, 'steps': 8149, 'loss/train': 2.1218414306640625} +02/24/2022 12:07:49 - INFO - codeparrot_training - Step 8150: {'lr': 0.00048001952108364876, 'samples': 4173312, 'steps': 8150, 'loss/train': 2.644815683364868} +02/24/2022 12:07:52 - INFO - codeparrot_training - Step 8151: {'lr': 0.00048001311084186173, 'samples': 4173824, 'steps': 8151, 'loss/train': 2.0464587211608887} +02/24/2022 12:07:58 - INFO - codeparrot_training - Step 8152: {'lr': 0.0004800066996147716, 'samples': 4174336, 'steps': 8152, 'loss/train': 2.6863176822662354} +02/24/2022 12:08:05 - INFO - codeparrot_training - Step 8153: {'lr': 0.0004800002874024058, 'samples': 4174848, 'steps': 8153, 'loss/train': 2.6502692699432373} +02/24/2022 12:08:08 - INFO - codeparrot_training - Step 8154: {'lr': 0.0004799938742047918, 'samples': 4175360, 'steps': 8154, 'loss/train': 1.6076663732528687} +02/24/2022 12:08:14 - INFO - codeparrot_training - Step 8155: {'lr': 0.0004799874600219571, 'samples': 4175872, 'steps': 8155, 'loss/train': 2.502009391784668} +02/24/2022 12:08:17 - INFO - codeparrot_training - Step 8156: {'lr': 0.00047998104485392915, 'samples': 4176384, 'steps': 8156, 'loss/train': 2.1580681800842285} +02/24/2022 12:08:23 - INFO - codeparrot_training - Step 8157: {'lr': 0.0004799746287007354, 'samples': 4176896, 'steps': 8157, 'loss/train': 1.8411126136779785} +02/24/2022 12:08:26 - INFO - codeparrot_training - Step 8158: {'lr': 0.00047996821156240333, 'samples': 4177408, 'steps': 8158, 'loss/train': 2.3313233852386475} +02/24/2022 12:08:30 - INFO - codeparrot_training - Step 8159: {'lr': 0.0004799617934389605, 'samples': 4177920, 'steps': 8159, 'loss/train': 2.6959192752838135} +02/24/2022 12:08:35 - INFO - codeparrot_training - Step 8160: {'lr': 0.00047995537433043444, 'samples': 4178432, 'steps': 8160, 'loss/train': 1.8789721727371216} +02/24/2022 12:08:39 - INFO - codeparrot_training - Step 8161: {'lr': 0.00047994895423685246, 'samples': 4178944, 'steps': 8161, 'loss/train': 1.0363401174545288} +02/24/2022 12:08:44 - INFO - codeparrot_training - Step 8162: {'lr': 0.0004799425331582423, 'samples': 4179456, 'steps': 8162, 'loss/train': 1.6722670793533325} +02/24/2022 12:08:48 - INFO - codeparrot_training - Step 8163: {'lr': 0.00047993611109463125, 'samples': 4179968, 'steps': 8163, 'loss/train': 2.320652723312378} +02/24/2022 12:08:54 - INFO - codeparrot_training - Step 8164: {'lr': 0.00047992968804604693, 'samples': 4180480, 'steps': 8164, 'loss/train': 2.011254072189331} +02/24/2022 12:08:57 - INFO - codeparrot_training - Step 8165: {'lr': 0.00047992326401251686, 'samples': 4180992, 'steps': 8165, 'loss/train': 2.0376811027526855} +02/24/2022 12:09:03 - INFO - codeparrot_training - Step 8166: {'lr': 0.0004799168389940685, 'samples': 4181504, 'steps': 8166, 'loss/train': 2.3124780654907227} +02/24/2022 12:09:06 - INFO - codeparrot_training - Step 8167: {'lr': 0.00047991041299072946, 'samples': 4182016, 'steps': 8167, 'loss/train': 2.154853105545044} +02/24/2022 12:09:12 - INFO - codeparrot_training - Step 8168: {'lr': 0.00047990398600252713, 'samples': 4182528, 'steps': 8168, 'loss/train': 2.822540521621704} +02/24/2022 12:09:15 - INFO - codeparrot_training - Step 8169: {'lr': 0.0004798975580294892, 'samples': 4183040, 'steps': 8169, 'loss/train': 2.153313159942627} +02/24/2022 12:09:21 - INFO - codeparrot_training - Step 8170: {'lr': 0.0004798911290716431, 'samples': 4183552, 'steps': 8170, 'loss/train': 1.9812062978744507} +02/24/2022 12:09:24 - INFO - codeparrot_training - Step 8171: {'lr': 0.0004798846991290164, 'samples': 4184064, 'steps': 8171, 'loss/train': 1.9204516410827637} +02/24/2022 12:09:30 - INFO - codeparrot_training - Step 8172: {'lr': 0.0004798782682016367, 'samples': 4184576, 'steps': 8172, 'loss/train': 1.9890244007110596} +02/24/2022 12:09:33 - INFO - codeparrot_training - Step 8173: {'lr': 0.0004798718362895315, 'samples': 4185088, 'steps': 8173, 'loss/train': 2.2296829223632812} +02/24/2022 12:09:39 - INFO - codeparrot_training - Step 8174: {'lr': 0.0004798654033927283, 'samples': 4185600, 'steps': 8174, 'loss/train': 1.7498815059661865} +02/24/2022 12:09:42 - INFO - codeparrot_training - Step 8175: {'lr': 0.00047985896951125464, 'samples': 4186112, 'steps': 8175, 'loss/train': 2.340967893600464} +02/24/2022 12:09:48 - INFO - codeparrot_training - Step 8176: {'lr': 0.00047985253464513823, 'samples': 4186624, 'steps': 8176, 'loss/train': 1.6529486179351807} +02/24/2022 12:09:51 - INFO - codeparrot_training - Step 8177: {'lr': 0.00047984609879440655, 'samples': 4187136, 'steps': 8177, 'loss/train': 2.792949914932251} +02/24/2022 12:09:57 - INFO - codeparrot_training - Step 8178: {'lr': 0.0004798396619590871, 'samples': 4187648, 'steps': 8178, 'loss/train': 2.7088193893432617} +02/24/2022 12:10:00 - INFO - codeparrot_training - Step 8179: {'lr': 0.0004798332241392076, 'samples': 4188160, 'steps': 8179, 'loss/train': 1.6455278396606445} +02/24/2022 12:10:07 - INFO - codeparrot_training - Step 8180: {'lr': 0.0004798267853347955, 'samples': 4188672, 'steps': 8180, 'loss/train': 2.6440117359161377} +02/24/2022 12:10:10 - INFO - codeparrot_training - Step 8181: {'lr': 0.00047982034554587837, 'samples': 4189184, 'steps': 8181, 'loss/train': 2.4521286487579346} +02/24/2022 12:10:16 - INFO - codeparrot_training - Step 8182: {'lr': 0.000479813904772484, 'samples': 4189696, 'steps': 8182, 'loss/train': 3.2595694065093994} +02/24/2022 12:10:19 - INFO - codeparrot_training - Step 8183: {'lr': 0.0004798074630146397, 'samples': 4190208, 'steps': 8183, 'loss/train': 1.3921838998794556} +02/24/2022 12:10:25 - INFO - codeparrot_training - Step 8184: {'lr': 0.0004798010202723733, 'samples': 4190720, 'steps': 8184, 'loss/train': 1.9243848323822021} +02/24/2022 12:10:28 - INFO - codeparrot_training - Step 8185: {'lr': 0.00047979457654571223, 'samples': 4191232, 'steps': 8185, 'loss/train': 2.5567169189453125} +02/24/2022 12:10:34 - INFO - codeparrot_training - Step 8186: {'lr': 0.0004797881318346842, 'samples': 4191744, 'steps': 8186, 'loss/train': 1.6026275157928467} +02/24/2022 12:10:37 - INFO - codeparrot_training - Step 8187: {'lr': 0.00047978168613931684, 'samples': 4192256, 'steps': 8187, 'loss/train': 2.5572195053100586} +02/24/2022 12:10:43 - INFO - codeparrot_training - Step 8188: {'lr': 0.0004797752394596376, 'samples': 4192768, 'steps': 8188, 'loss/train': 2.0416271686553955} +02/24/2022 12:10:49 - INFO - codeparrot_training - Step 8189: {'lr': 0.0004797687917956742, 'samples': 4193280, 'steps': 8189, 'loss/train': 2.3751988410949707} +02/24/2022 12:10:52 - INFO - codeparrot_training - Step 8190: {'lr': 0.0004797623431474543, 'samples': 4193792, 'steps': 8190, 'loss/train': 2.909899950027466} +02/24/2022 12:10:58 - INFO - codeparrot_training - Step 8191: {'lr': 0.0004797558935150055, 'samples': 4194304, 'steps': 8191, 'loss/train': 2.1544458866119385} +02/24/2022 12:11:01 - INFO - codeparrot_training - Step 8192: {'lr': 0.0004797494428983553, 'samples': 4194816, 'steps': 8192, 'loss/train': 2.2693026065826416} +02/24/2022 12:11:07 - INFO - codeparrot_training - Step 8193: {'lr': 0.0004797429912975316, 'samples': 4195328, 'steps': 8193, 'loss/train': 2.261376142501831} +02/24/2022 12:11:10 - INFO - codeparrot_training - Step 8194: {'lr': 0.00047973653871256173, 'samples': 4195840, 'steps': 8194, 'loss/train': 3.045262575149536} +02/24/2022 12:11:16 - INFO - codeparrot_training - Step 8195: {'lr': 0.00047973008514347353, 'samples': 4196352, 'steps': 8195, 'loss/train': 1.2056047916412354} +02/24/2022 12:11:19 - INFO - codeparrot_training - Step 8196: {'lr': 0.00047972363059029465, 'samples': 4196864, 'steps': 8196, 'loss/train': 1.6056427955627441} +02/24/2022 12:11:25 - INFO - codeparrot_training - Step 8197: {'lr': 0.0004797171750530526, 'samples': 4197376, 'steps': 8197, 'loss/train': 1.9963260889053345} +02/24/2022 12:11:28 - INFO - codeparrot_training - Step 8198: {'lr': 0.00047971071853177515, 'samples': 4197888, 'steps': 8198, 'loss/train': 2.772096872329712} +02/24/2022 12:11:35 - INFO - codeparrot_training - Step 8199: {'lr': 0.0004797042610264899, 'samples': 4198400, 'steps': 8199, 'loss/train': 2.4937896728515625} +02/24/2022 12:11:38 - INFO - codeparrot_training - Step 8200: {'lr': 0.0004796978025372246, 'samples': 4198912, 'steps': 8200, 'loss/train': 2.0932905673980713} +02/24/2022 12:11:44 - INFO - codeparrot_training - Step 8201: {'lr': 0.0004796913430640068, 'samples': 4199424, 'steps': 8201, 'loss/train': 2.6117615699768066} +02/24/2022 12:11:47 - INFO - codeparrot_training - Step 8202: {'lr': 0.0004796848826068642, 'samples': 4199936, 'steps': 8202, 'loss/train': 2.5743870735168457} +02/24/2022 12:11:53 - INFO - codeparrot_training - Step 8203: {'lr': 0.00047967842116582453, 'samples': 4200448, 'steps': 8203, 'loss/train': 2.954094409942627} +02/24/2022 12:11:56 - INFO - codeparrot_training - Step 8204: {'lr': 0.00047967195874091547, 'samples': 4200960, 'steps': 8204, 'loss/train': 1.1489640474319458} +02/24/2022 12:12:02 - INFO - codeparrot_training - Step 8205: {'lr': 0.00047966549533216466, 'samples': 4201472, 'steps': 8205, 'loss/train': 2.8725554943084717} +02/24/2022 12:12:05 - INFO - codeparrot_training - Step 8206: {'lr': 0.00047965903093959974, 'samples': 4201984, 'steps': 8206, 'loss/train': 3.312897205352783} +02/24/2022 12:12:11 - INFO - codeparrot_training - Step 8207: {'lr': 0.0004796525655632484, 'samples': 4202496, 'steps': 8207, 'loss/train': 1.7352651357650757} +02/24/2022 12:12:14 - INFO - codeparrot_training - Step 8208: {'lr': 0.0004796460992031385, 'samples': 4203008, 'steps': 8208, 'loss/train': 1.9571641683578491} +02/24/2022 12:12:21 - INFO - codeparrot_training - Step 8209: {'lr': 0.0004796396318592976, 'samples': 4203520, 'steps': 8209, 'loss/train': 2.4581120014190674} +02/24/2022 12:12:24 - INFO - codeparrot_training - Step 8210: {'lr': 0.00047963316353175344, 'samples': 4204032, 'steps': 8210, 'loss/train': 1.0137072801589966} +02/24/2022 12:12:30 - INFO - codeparrot_training - Step 8211: {'lr': 0.00047962669422053374, 'samples': 4204544, 'steps': 8211, 'loss/train': 1.7872384786605835} +02/24/2022 12:12:33 - INFO - codeparrot_training - Step 8212: {'lr': 0.0004796202239256662, 'samples': 4205056, 'steps': 8212, 'loss/train': 2.5947911739349365} +02/24/2022 12:12:39 - INFO - codeparrot_training - Step 8213: {'lr': 0.0004796137526471785, 'samples': 4205568, 'steps': 8213, 'loss/train': 1.6550679206848145} +02/24/2022 12:12:42 - INFO - codeparrot_training - Step 8214: {'lr': 0.0004796072803850984, 'samples': 4206080, 'steps': 8214, 'loss/train': 1.2377721071243286} +02/24/2022 12:12:48 - INFO - codeparrot_training - Step 8215: {'lr': 0.00047960080713945364, 'samples': 4206592, 'steps': 8215, 'loss/train': 4.210224628448486} +02/24/2022 12:12:51 - INFO - codeparrot_training - Step 8216: {'lr': 0.0004795943329102719, 'samples': 4207104, 'steps': 8216, 'loss/train': 2.362583875656128} +02/24/2022 12:12:57 - INFO - codeparrot_training - Step 8217: {'lr': 0.00047958785769758094, 'samples': 4207616, 'steps': 8217, 'loss/train': 1.7497318983078003} +02/24/2022 12:13:00 - INFO - codeparrot_training - Step 8218: {'lr': 0.0004795813815014085, 'samples': 4208128, 'steps': 8218, 'loss/train': 2.7496001720428467} +02/24/2022 12:13:06 - INFO - codeparrot_training - Step 8219: {'lr': 0.0004795749043217824, 'samples': 4208640, 'steps': 8219, 'loss/train': 2.711496591567993} +02/24/2022 12:13:09 - INFO - codeparrot_training - Step 8220: {'lr': 0.0004795684261587302, 'samples': 4209152, 'steps': 8220, 'loss/train': 2.292638063430786} +02/24/2022 12:13:15 - INFO - codeparrot_training - Step 8221: {'lr': 0.00047956194701227983, 'samples': 4209664, 'steps': 8221, 'loss/train': 2.5446643829345703} +02/24/2022 12:13:18 - INFO - codeparrot_training - Step 8222: {'lr': 0.000479555466882459, 'samples': 4210176, 'steps': 8222, 'loss/train': 1.4770768880844116} +02/24/2022 12:13:24 - INFO - codeparrot_training - Step 8223: {'lr': 0.00047954898576929534, 'samples': 4210688, 'steps': 8223, 'loss/train': 1.0413466691970825} +02/24/2022 12:13:27 - INFO - codeparrot_training - Step 8224: {'lr': 0.0004795425036728168, 'samples': 4211200, 'steps': 8224, 'loss/train': 2.090332269668579} +02/24/2022 12:13:33 - INFO - codeparrot_training - Step 8225: {'lr': 0.000479536020593051, 'samples': 4211712, 'steps': 8225, 'loss/train': 1.7069073915481567} +02/24/2022 12:13:37 - INFO - codeparrot_training - Step 8226: {'lr': 0.0004795295365300258, 'samples': 4212224, 'steps': 8226, 'loss/train': 1.806524395942688} +02/24/2022 12:13:42 - INFO - codeparrot_training - Step 8227: {'lr': 0.00047952305148376895, 'samples': 4212736, 'steps': 8227, 'loss/train': 2.024029493331909} +02/24/2022 12:13:46 - INFO - codeparrot_training - Step 8228: {'lr': 0.0004795165654543082, 'samples': 4213248, 'steps': 8228, 'loss/train': 2.5008585453033447} +02/24/2022 12:13:51 - INFO - codeparrot_training - Step 8229: {'lr': 0.0004795100784416714, 'samples': 4213760, 'steps': 8229, 'loss/train': 2.52839732170105} +02/24/2022 12:13:55 - INFO - codeparrot_training - Step 8230: {'lr': 0.0004795035904458863, 'samples': 4214272, 'steps': 8230, 'loss/train': 1.7009122371673584} +02/24/2022 12:14:00 - INFO - codeparrot_training - Step 8231: {'lr': 0.00047949710146698066, 'samples': 4214784, 'steps': 8231, 'loss/train': 2.181901693344116} +02/24/2022 12:14:04 - INFO - codeparrot_training - Step 8232: {'lr': 0.0004794906115049824, 'samples': 4215296, 'steps': 8232, 'loss/train': 1.8688772916793823} +02/24/2022 12:14:09 - INFO - codeparrot_training - Step 8233: {'lr': 0.00047948412055991916, 'samples': 4215808, 'steps': 8233, 'loss/train': 2.3490421772003174} +02/24/2022 12:14:13 - INFO - codeparrot_training - Step 8234: {'lr': 0.0004794776286318188, 'samples': 4216320, 'steps': 8234, 'loss/train': 2.4164299964904785} +02/24/2022 12:14:19 - INFO - codeparrot_training - Step 8235: {'lr': 0.0004794711357207092, 'samples': 4216832, 'steps': 8235, 'loss/train': 1.2209607362747192} +02/24/2022 12:14:23 - INFO - codeparrot_training - Step 8236: {'lr': 0.0004794646418266181, 'samples': 4217344, 'steps': 8236, 'loss/train': 1.6379138231277466} +02/24/2022 12:14:28 - INFO - codeparrot_training - Step 8237: {'lr': 0.0004794581469495733, 'samples': 4217856, 'steps': 8237, 'loss/train': 2.0143113136291504} +02/24/2022 12:14:31 - INFO - codeparrot_training - Step 8238: {'lr': 0.00047945165108960274, 'samples': 4218368, 'steps': 8238, 'loss/train': 2.0794875621795654} +02/24/2022 12:14:37 - INFO - codeparrot_training - Step 8239: {'lr': 0.0004794451542467341, 'samples': 4218880, 'steps': 8239, 'loss/train': 2.4172987937927246} +02/24/2022 12:14:40 - INFO - codeparrot_training - Step 8240: {'lr': 0.00047943865642099525, 'samples': 4219392, 'steps': 8240, 'loss/train': 2.9183919429779053} +02/24/2022 12:14:46 - INFO - codeparrot_training - Step 8241: {'lr': 0.0004794321576124141, 'samples': 4219904, 'steps': 8241, 'loss/train': 2.501436471939087} +02/24/2022 12:14:50 - INFO - codeparrot_training - Step 8242: {'lr': 0.0004794256578210184, 'samples': 4220416, 'steps': 8242, 'loss/train': 2.6892893314361572} +02/24/2022 12:14:55 - INFO - codeparrot_training - Step 8243: {'lr': 0.0004794191570468361, 'samples': 4220928, 'steps': 8243, 'loss/train': 2.8740122318267822} +02/24/2022 12:14:59 - INFO - codeparrot_training - Step 8244: {'lr': 0.00047941265528989496, 'samples': 4221440, 'steps': 8244, 'loss/train': 1.7507708072662354} +02/24/2022 12:15:04 - INFO - codeparrot_training - Step 8245: {'lr': 0.0004794061525502229, 'samples': 4221952, 'steps': 8245, 'loss/train': 2.9183146953582764} +02/24/2022 12:15:08 - INFO - codeparrot_training - Step 8246: {'lr': 0.00047939964882784766, 'samples': 4222464, 'steps': 8246, 'loss/train': 3.189979076385498} +02/24/2022 12:15:14 - INFO - codeparrot_training - Step 8247: {'lr': 0.0004793931441227972, 'samples': 4222976, 'steps': 8247, 'loss/train': 1.870833158493042} +02/24/2022 12:15:17 - INFO - codeparrot_training - Step 8248: {'lr': 0.00047938663843509927, 'samples': 4223488, 'steps': 8248, 'loss/train': 1.9074362516403198} +02/24/2022 12:15:23 - INFO - codeparrot_training - Step 8249: {'lr': 0.00047938013176478193, 'samples': 4224000, 'steps': 8249, 'loss/train': 2.826785087585449} +02/24/2022 12:15:26 - INFO - codeparrot_training - Step 8250: {'lr': 0.0004793736241118728, 'samples': 4224512, 'steps': 8250, 'loss/train': 2.700700521469116} +02/24/2022 12:15:32 - INFO - codeparrot_training - Step 8251: {'lr': 0.0004793671154764, 'samples': 4225024, 'steps': 8251, 'loss/train': 3.159172773361206} +02/24/2022 12:15:35 - INFO - codeparrot_training - Step 8252: {'lr': 0.0004793606058583913, 'samples': 4225536, 'steps': 8252, 'loss/train': 2.0935311317443848} +02/24/2022 12:15:41 - INFO - codeparrot_training - Step 8253: {'lr': 0.0004793540952578746, 'samples': 4226048, 'steps': 8253, 'loss/train': 2.501469612121582} +02/24/2022 12:15:44 - INFO - codeparrot_training - Step 8254: {'lr': 0.0004793475836748777, 'samples': 4226560, 'steps': 8254, 'loss/train': 1.8863047361373901} +02/24/2022 12:15:51 - INFO - codeparrot_training - Step 8255: {'lr': 0.0004793410711094287, 'samples': 4227072, 'steps': 8255, 'loss/train': 2.8314342498779297} +02/24/2022 12:15:54 - INFO - codeparrot_training - Step 8256: {'lr': 0.00047933455756155534, 'samples': 4227584, 'steps': 8256, 'loss/train': 1.3806148767471313} +02/24/2022 12:16:00 - INFO - codeparrot_training - Step 8257: {'lr': 0.00047932804303128557, 'samples': 4228096, 'steps': 8257, 'loss/train': 2.3710362911224365} +02/24/2022 12:16:05 - INFO - codeparrot_training - Step 8258: {'lr': 0.0004793215275186472, 'samples': 4228608, 'steps': 8258, 'loss/train': 2.5845117568969727} +02/24/2022 12:16:09 - INFO - codeparrot_training - Step 8259: {'lr': 0.0004793150110236684, 'samples': 4229120, 'steps': 8259, 'loss/train': 2.2541561126708984} +02/24/2022 12:16:14 - INFO - codeparrot_training - Step 8260: {'lr': 0.00047930849354637674, 'samples': 4229632, 'steps': 8260, 'loss/train': 1.9688395261764526} +02/24/2022 12:16:18 - INFO - codeparrot_training - Step 8261: {'lr': 0.00047930197508680027, 'samples': 4230144, 'steps': 8261, 'loss/train': 2.1843063831329346} +02/24/2022 12:16:23 - INFO - codeparrot_training - Step 8262: {'lr': 0.00047929545564496715, 'samples': 4230656, 'steps': 8262, 'loss/train': 2.6420583724975586} +02/24/2022 12:16:27 - INFO - codeparrot_training - Step 8263: {'lr': 0.0004792889352209049, 'samples': 4231168, 'steps': 8263, 'loss/train': 1.9495543241500854} +02/24/2022 12:16:32 - INFO - codeparrot_training - Step 8264: {'lr': 0.00047928241381464177, 'samples': 4231680, 'steps': 8264, 'loss/train': 1.4433046579360962} +02/24/2022 12:16:36 - INFO - codeparrot_training - Step 8265: {'lr': 0.00047927589142620556, 'samples': 4232192, 'steps': 8265, 'loss/train': 0.4476317763328552} +02/24/2022 12:16:41 - INFO - codeparrot_training - Step 8266: {'lr': 0.0004792693680556243, 'samples': 4232704, 'steps': 8266, 'loss/train': 0.9182693362236023} +02/24/2022 12:16:45 - INFO - codeparrot_training - Step 8267: {'lr': 0.0004792628437029258, 'samples': 4233216, 'steps': 8267, 'loss/train': 2.5585031509399414} +02/24/2022 12:16:50 - INFO - codeparrot_training - Step 8268: {'lr': 0.0004792563183681381, 'samples': 4233728, 'steps': 8268, 'loss/train': 1.8075768947601318} +02/24/2022 12:16:54 - INFO - codeparrot_training - Step 8269: {'lr': 0.0004792497920512891, 'samples': 4234240, 'steps': 8269, 'loss/train': 2.9600419998168945} +02/24/2022 12:17:00 - INFO - codeparrot_training - Step 8270: {'lr': 0.00047924326475240676, 'samples': 4234752, 'steps': 8270, 'loss/train': 2.1123013496398926} +02/24/2022 12:17:03 - INFO - codeparrot_training - Step 8271: {'lr': 0.00047923673647151915, 'samples': 4235264, 'steps': 8271, 'loss/train': 2.3942224979400635} +02/24/2022 12:17:09 - INFO - codeparrot_training - Step 8272: {'lr': 0.00047923020720865413, 'samples': 4235776, 'steps': 8272, 'loss/train': 2.0425853729248047} +02/24/2022 12:17:12 - INFO - codeparrot_training - Step 8273: {'lr': 0.0004792236769638396, 'samples': 4236288, 'steps': 8273, 'loss/train': 2.497483015060425} +02/24/2022 12:17:18 - INFO - codeparrot_training - Step 8274: {'lr': 0.00047921714573710374, 'samples': 4236800, 'steps': 8274, 'loss/train': 2.4915049076080322} +02/24/2022 12:17:21 - INFO - codeparrot_training - Step 8275: {'lr': 0.0004792106135284744, 'samples': 4237312, 'steps': 8275, 'loss/train': 2.1010518074035645} +02/24/2022 12:17:27 - INFO - codeparrot_training - Step 8276: {'lr': 0.00047920408033797954, 'samples': 4237824, 'steps': 8276, 'loss/train': 2.2919366359710693} +02/24/2022 12:17:30 - INFO - codeparrot_training - Step 8277: {'lr': 0.00047919754616564716, 'samples': 4238336, 'steps': 8277, 'loss/train': 2.2083323001861572} +02/24/2022 12:17:36 - INFO - codeparrot_training - Step 8278: {'lr': 0.0004791910110115053, 'samples': 4238848, 'steps': 8278, 'loss/train': 1.7486019134521484} +02/24/2022 12:17:39 - INFO - codeparrot_training - Step 8279: {'lr': 0.0004791844748755819, 'samples': 4239360, 'steps': 8279, 'loss/train': 3.083209753036499} +02/24/2022 12:17:45 - INFO - codeparrot_training - Step 8280: {'lr': 0.00047917793775790503, 'samples': 4239872, 'steps': 8280, 'loss/train': 2.515814781188965} +02/24/2022 12:17:48 - INFO - codeparrot_training - Step 8281: {'lr': 0.00047917139965850266, 'samples': 4240384, 'steps': 8281, 'loss/train': 2.1462748050689697} +02/24/2022 12:17:55 - INFO - codeparrot_training - Step 8282: {'lr': 0.0004791648605774027, 'samples': 4240896, 'steps': 8282, 'loss/train': 2.547936201095581} +02/24/2022 12:17:58 - INFO - codeparrot_training - Step 8283: {'lr': 0.00047915832051463326, 'samples': 4241408, 'steps': 8283, 'loss/train': 1.8159955739974976} +02/24/2022 12:18:04 - INFO - codeparrot_training - Step 8284: {'lr': 0.0004791517794702224, 'samples': 4241920, 'steps': 8284, 'loss/train': 1.4766459465026855} +02/24/2022 12:18:07 - INFO - codeparrot_training - Step 8285: {'lr': 0.00047914523744419803, 'samples': 4242432, 'steps': 8285, 'loss/train': 2.598806858062744} +02/24/2022 12:18:13 - INFO - codeparrot_training - Step 8286: {'lr': 0.00047913869443658825, 'samples': 4242944, 'steps': 8286, 'loss/train': 2.0815885066986084} +02/24/2022 12:18:16 - INFO - codeparrot_training - Step 8287: {'lr': 0.0004791321504474211, 'samples': 4243456, 'steps': 8287, 'loss/train': 1.115213394165039} +02/24/2022 12:18:22 - INFO - codeparrot_training - Step 8288: {'lr': 0.00047912560547672453, 'samples': 4243968, 'steps': 8288, 'loss/train': 1.9663575887680054} +02/24/2022 12:18:25 - INFO - codeparrot_training - Step 8289: {'lr': 0.0004791190595245266, 'samples': 4244480, 'steps': 8289, 'loss/train': 5.791067123413086} +02/24/2022 12:18:31 - INFO - codeparrot_training - Step 8290: {'lr': 0.0004791125125908554, 'samples': 4244992, 'steps': 8290, 'loss/train': 3.670483350753784} +02/24/2022 12:18:34 - INFO - codeparrot_training - Step 8291: {'lr': 0.000479105964675739, 'samples': 4245504, 'steps': 8291, 'loss/train': 1.0139940977096558} +02/24/2022 12:18:40 - INFO - codeparrot_training - Step 8292: {'lr': 0.0004790994157792053, 'samples': 4246016, 'steps': 8292, 'loss/train': 2.2026891708374023} +02/24/2022 12:18:44 - INFO - codeparrot_training - Step 8293: {'lr': 0.0004790928659012825, 'samples': 4246528, 'steps': 8293, 'loss/train': 2.047849416732788} +02/24/2022 12:18:49 - INFO - codeparrot_training - Step 8294: {'lr': 0.00047908631504199855, 'samples': 4247040, 'steps': 8294, 'loss/train': 2.145226001739502} +02/24/2022 12:18:53 - INFO - codeparrot_training - Step 8295: {'lr': 0.00047907976320138163, 'samples': 4247552, 'steps': 8295, 'loss/train': 2.48504376411438} +02/24/2022 12:18:58 - INFO - codeparrot_training - Step 8296: {'lr': 0.00047907321037945973, 'samples': 4248064, 'steps': 8296, 'loss/train': 2.725163221359253} +02/24/2022 12:19:02 - INFO - codeparrot_training - Step 8297: {'lr': 0.0004790666565762609, 'samples': 4248576, 'steps': 8297, 'loss/train': 0.6805282831192017} +02/24/2022 12:19:07 - INFO - codeparrot_training - Step 8298: {'lr': 0.0004790601017918134, 'samples': 4249088, 'steps': 8298, 'loss/train': 4.326928615570068} +02/24/2022 12:19:11 - INFO - codeparrot_training - Step 8299: {'lr': 0.00047905354602614504, 'samples': 4249600, 'steps': 8299, 'loss/train': 1.4267929792404175} +02/24/2022 12:19:16 - INFO - codeparrot_training - Step 8300: {'lr': 0.00047904698927928404, 'samples': 4250112, 'steps': 8300, 'loss/train': 2.0606985092163086} +02/24/2022 12:19:20 - INFO - codeparrot_training - Step 8301: {'lr': 0.0004790404315512584, 'samples': 4250624, 'steps': 8301, 'loss/train': 2.01861572265625} +02/24/2022 12:19:26 - INFO - codeparrot_training - Step 8302: {'lr': 0.0004790338728420963, 'samples': 4251136, 'steps': 8302, 'loss/train': 4.525434970855713} +02/24/2022 12:19:29 - INFO - codeparrot_training - Step 8303: {'lr': 0.0004790273131518259, 'samples': 4251648, 'steps': 8303, 'loss/train': 3.037768602371216} +02/24/2022 12:19:35 - INFO - codeparrot_training - Step 8304: {'lr': 0.00047902075248047515, 'samples': 4252160, 'steps': 8304, 'loss/train': 2.4598560333251953} +02/24/2022 12:19:39 - INFO - codeparrot_training - Step 8305: {'lr': 0.0004790141908280723, 'samples': 4252672, 'steps': 8305, 'loss/train': 1.3979452848434448} +02/24/2022 12:19:44 - INFO - codeparrot_training - Step 8306: {'lr': 0.00047900762819464527, 'samples': 4253184, 'steps': 8306, 'loss/train': 2.760977029800415} +02/24/2022 12:19:48 - INFO - codeparrot_training - Step 8307: {'lr': 0.0004790010645802223, 'samples': 4253696, 'steps': 8307, 'loss/train': 0.6402405500411987} +02/24/2022 12:19:53 - INFO - codeparrot_training - Step 8308: {'lr': 0.0004789944999848316, 'samples': 4254208, 'steps': 8308, 'loss/train': 2.015840530395508} +02/24/2022 12:19:57 - INFO - codeparrot_training - Step 8309: {'lr': 0.00047898793440850104, 'samples': 4254720, 'steps': 8309, 'loss/train': 2.688425064086914} +02/24/2022 12:20:03 - INFO - codeparrot_training - Step 8310: {'lr': 0.0004789813678512589, 'samples': 4255232, 'steps': 8310, 'loss/train': 1.593688726425171} +02/24/2022 12:20:06 - INFO - codeparrot_training - Step 8311: {'lr': 0.0004789748003131333, 'samples': 4255744, 'steps': 8311, 'loss/train': 2.0959279537200928} +02/24/2022 12:20:12 - INFO - codeparrot_training - Step 8312: {'lr': 0.00047896823179415237, 'samples': 4256256, 'steps': 8312, 'loss/train': 2.116199254989624} +02/24/2022 12:20:15 - INFO - codeparrot_training - Step 8313: {'lr': 0.00047896166229434423, 'samples': 4256768, 'steps': 8313, 'loss/train': 1.8071153163909912} +02/24/2022 12:20:21 - INFO - codeparrot_training - Step 8314: {'lr': 0.0004789550918137371, 'samples': 4257280, 'steps': 8314, 'loss/train': 2.653411865234375} +02/24/2022 12:20:24 - INFO - codeparrot_training - Step 8315: {'lr': 0.000478948520352359, 'samples': 4257792, 'steps': 8315, 'loss/train': 2.8427231311798096} +02/24/2022 12:20:30 - INFO - codeparrot_training - Step 8316: {'lr': 0.00047894194791023813, 'samples': 4258304, 'steps': 8316, 'loss/train': 2.527801275253296} +02/24/2022 12:20:33 - INFO - codeparrot_training - Step 8317: {'lr': 0.0004789353744874027, 'samples': 4258816, 'steps': 8317, 'loss/train': 1.9348825216293335} +02/24/2022 12:20:39 - INFO - codeparrot_training - Step 8318: {'lr': 0.0004789288000838808, 'samples': 4259328, 'steps': 8318, 'loss/train': 1.7931863069534302} +02/24/2022 12:20:43 - INFO - codeparrot_training - Step 8319: {'lr': 0.0004789222246997006, 'samples': 4259840, 'steps': 8319, 'loss/train': 1.3822815418243408} +02/24/2022 12:20:48 - INFO - codeparrot_training - Step 8320: {'lr': 0.00047891564833489034, 'samples': 4260352, 'steps': 8320, 'loss/train': 3.32655930519104} +02/24/2022 12:20:52 - INFO - codeparrot_training - Step 8321: {'lr': 0.000478909070989478, 'samples': 4260864, 'steps': 8321, 'loss/train': 2.1469578742980957} +02/24/2022 12:20:57 - INFO - codeparrot_training - Step 8322: {'lr': 0.00047890249266349194, 'samples': 4261376, 'steps': 8322, 'loss/train': 1.6860512495040894} +02/24/2022 12:21:01 - INFO - codeparrot_training - Step 8323: {'lr': 0.0004788959133569604, 'samples': 4261888, 'steps': 8323, 'loss/train': 0.27361732721328735} +02/24/2022 12:21:06 - INFO - codeparrot_training - Step 8324: {'lr': 0.00047888933306991136, 'samples': 4262400, 'steps': 8324, 'loss/train': 2.4335134029388428} +02/24/2022 12:21:10 - INFO - codeparrot_training - Step 8325: {'lr': 0.00047888275180237304, 'samples': 4262912, 'steps': 8325, 'loss/train': 1.720828652381897} +02/24/2022 12:21:15 - INFO - codeparrot_training - Step 8326: {'lr': 0.00047887616955437373, 'samples': 4263424, 'steps': 8326, 'loss/train': 2.173304557800293} +02/24/2022 12:21:19 - INFO - codeparrot_training - Step 8327: {'lr': 0.0004788695863259416, 'samples': 4263936, 'steps': 8327, 'loss/train': 3.2694969177246094} +02/24/2022 12:21:25 - INFO - codeparrot_training - Step 8328: {'lr': 0.0004788630021171049, 'samples': 4264448, 'steps': 8328, 'loss/train': 1.9894022941589355} +02/24/2022 12:21:28 - INFO - codeparrot_training - Step 8329: {'lr': 0.0004788564169278917, 'samples': 4264960, 'steps': 8329, 'loss/train': 1.9709280729293823} +02/24/2022 12:21:34 - INFO - codeparrot_training - Step 8330: {'lr': 0.00047884983075833023, 'samples': 4265472, 'steps': 8330, 'loss/train': 1.9120969772338867} +02/24/2022 12:21:37 - INFO - codeparrot_training - Step 8331: {'lr': 0.00047884324360844885, 'samples': 4265984, 'steps': 8331, 'loss/train': 3.297394275665283} +02/24/2022 12:21:43 - INFO - codeparrot_training - Step 8332: {'lr': 0.0004788366554782756, 'samples': 4266496, 'steps': 8332, 'loss/train': 1.3300307989120483} +02/24/2022 12:21:48 - INFO - codeparrot_training - Step 8333: {'lr': 0.00047883006636783887, 'samples': 4267008, 'steps': 8333, 'loss/train': 0.5932084918022156} +02/24/2022 12:21:52 - INFO - codeparrot_training - Step 8334: {'lr': 0.0004788234762771667, 'samples': 4267520, 'steps': 8334, 'loss/train': 1.8165948390960693} +02/24/2022 12:21:57 - INFO - codeparrot_training - Step 8335: {'lr': 0.0004788168852062875, 'samples': 4268032, 'steps': 8335, 'loss/train': 2.3402628898620605} +02/24/2022 12:22:01 - INFO - codeparrot_training - Step 8336: {'lr': 0.0004788102931552294, 'samples': 4268544, 'steps': 8336, 'loss/train': 2.7658097743988037} +02/24/2022 12:22:07 - INFO - codeparrot_training - Step 8337: {'lr': 0.00047880370012402064, 'samples': 4269056, 'steps': 8337, 'loss/train': 3.3505196571350098} +02/24/2022 12:22:10 - INFO - codeparrot_training - Step 8338: {'lr': 0.0004787971061126895, 'samples': 4269568, 'steps': 8338, 'loss/train': 2.4177515506744385} +02/24/2022 12:22:16 - INFO - codeparrot_training - Step 8339: {'lr': 0.0004787905111212642, 'samples': 4270080, 'steps': 8339, 'loss/train': 0.9822052717208862} +02/24/2022 12:22:19 - INFO - codeparrot_training - Step 8340: {'lr': 0.00047878391514977306, 'samples': 4270592, 'steps': 8340, 'loss/train': 2.2899115085601807} +02/24/2022 12:22:25 - INFO - codeparrot_training - Step 8341: {'lr': 0.0004787773181982442, 'samples': 4271104, 'steps': 8341, 'loss/train': 0.2807779312133789} +02/24/2022 12:22:28 - INFO - codeparrot_training - Step 8342: {'lr': 0.0004787707202667059, 'samples': 4271616, 'steps': 8342, 'loss/train': 2.1107070446014404} +02/24/2022 12:22:34 - INFO - codeparrot_training - Step 8343: {'lr': 0.00047876412135518655, 'samples': 4272128, 'steps': 8343, 'loss/train': 0.5293779969215393} +02/24/2022 12:22:37 - INFO - codeparrot_training - Step 8344: {'lr': 0.0004787575214637144, 'samples': 4272640, 'steps': 8344, 'loss/train': 2.0584027767181396} +02/24/2022 12:22:43 - INFO - codeparrot_training - Step 8345: {'lr': 0.00047875092059231756, 'samples': 4273152, 'steps': 8345, 'loss/train': 2.5660791397094727} +02/24/2022 12:22:46 - INFO - codeparrot_training - Step 8346: {'lr': 0.0004787443187410245, 'samples': 4273664, 'steps': 8346, 'loss/train': 2.752887725830078} +02/24/2022 12:22:52 - INFO - codeparrot_training - Step 8347: {'lr': 0.00047873771590986337, 'samples': 4274176, 'steps': 8347, 'loss/train': 2.2198922634124756} +02/24/2022 12:22:56 - INFO - codeparrot_training - Step 8348: {'lr': 0.00047873111209886245, 'samples': 4274688, 'steps': 8348, 'loss/train': 2.9151782989501953} +02/24/2022 12:23:01 - INFO - codeparrot_training - Step 8349: {'lr': 0.00047872450730805015, 'samples': 4275200, 'steps': 8349, 'loss/train': 0.9172837138175964} +02/24/2022 12:23:05 - INFO - codeparrot_training - Step 8350: {'lr': 0.00047871790153745464, 'samples': 4275712, 'steps': 8350, 'loss/train': 3.037362575531006} +02/24/2022 12:23:11 - INFO - codeparrot_training - Step 8351: {'lr': 0.0004787112947871043, 'samples': 4276224, 'steps': 8351, 'loss/train': 2.521303176879883} +02/24/2022 12:23:14 - INFO - codeparrot_training - Step 8352: {'lr': 0.0004787046870570274, 'samples': 4276736, 'steps': 8352, 'loss/train': 2.4298343658447266} +02/24/2022 12:23:18 - INFO - codeparrot_training - Step 8353: {'lr': 0.00047869807834725225, 'samples': 4277248, 'steps': 8353, 'loss/train': 1.12665593624115} +02/24/2022 12:23:23 - INFO - codeparrot_training - Step 8354: {'lr': 0.0004786914686578071, 'samples': 4277760, 'steps': 8354, 'loss/train': 2.123337745666504} +02/24/2022 12:23:27 - INFO - codeparrot_training - Step 8355: {'lr': 0.00047868485798872044, 'samples': 4278272, 'steps': 8355, 'loss/train': 4.201889514923096} +02/24/2022 12:23:32 - INFO - codeparrot_training - Step 8356: {'lr': 0.00047867824634002034, 'samples': 4278784, 'steps': 8356, 'loss/train': 0.5770955681800842} +02/24/2022 12:23:36 - INFO - codeparrot_training - Step 8357: {'lr': 0.0004786716337117353, 'samples': 4279296, 'steps': 8357, 'loss/train': 2.2288622856140137} +02/24/2022 12:23:41 - INFO - codeparrot_training - Step 8358: {'lr': 0.00047866502010389356, 'samples': 4279808, 'steps': 8358, 'loss/train': 2.534905433654785} +02/24/2022 12:23:45 - INFO - codeparrot_training - Step 8359: {'lr': 0.00047865840551652343, 'samples': 4280320, 'steps': 8359, 'loss/train': 1.0368577241897583} +02/24/2022 12:23:50 - INFO - codeparrot_training - Step 8360: {'lr': 0.0004786517899496534, 'samples': 4280832, 'steps': 8360, 'loss/train': 1.8622485399246216} +02/24/2022 12:23:54 - INFO - codeparrot_training - Step 8361: {'lr': 0.0004786451734033117, 'samples': 4281344, 'steps': 8361, 'loss/train': 1.3333227634429932} +02/24/2022 12:23:59 - INFO - codeparrot_training - Step 8362: {'lr': 0.00047863855587752666, 'samples': 4281856, 'steps': 8362, 'loss/train': 1.9483284950256348} +02/24/2022 12:24:03 - INFO - codeparrot_training - Step 8363: {'lr': 0.0004786319373723266, 'samples': 4282368, 'steps': 8363, 'loss/train': 2.3748581409454346} +02/24/2022 12:24:09 - INFO - codeparrot_training - Step 8364: {'lr': 0.00047862531788774, 'samples': 4282880, 'steps': 8364, 'loss/train': 2.4677064418792725} +02/24/2022 12:24:12 - INFO - codeparrot_training - Step 8365: {'lr': 0.00047861869742379503, 'samples': 4283392, 'steps': 8365, 'loss/train': 0.3311143219470978} +02/24/2022 12:24:18 - INFO - codeparrot_training - Step 8366: {'lr': 0.0004786120759805203, 'samples': 4283904, 'steps': 8366, 'loss/train': 1.3763526678085327} +02/24/2022 12:24:21 - INFO - codeparrot_training - Step 8367: {'lr': 0.0004786054535579439, 'samples': 4284416, 'steps': 8367, 'loss/train': 2.129305601119995} +02/24/2022 12:24:27 - INFO - codeparrot_training - Step 8368: {'lr': 0.0004785988301560944, 'samples': 4284928, 'steps': 8368, 'loss/train': 2.2499935626983643} +02/24/2022 12:24:31 - INFO - codeparrot_training - Step 8369: {'lr': 0.0004785922057750001, 'samples': 4285440, 'steps': 8369, 'loss/train': 1.6785212755203247} +02/24/2022 12:24:36 - INFO - codeparrot_training - Step 8370: {'lr': 0.00047858558041468925, 'samples': 4285952, 'steps': 8370, 'loss/train': 2.668321371078491} +02/24/2022 12:24:39 - INFO - codeparrot_training - Step 8371: {'lr': 0.0004785789540751905, 'samples': 4286464, 'steps': 8371, 'loss/train': 2.5227622985839844} +02/24/2022 12:24:45 - INFO - codeparrot_training - Step 8372: {'lr': 0.00047857232675653207, 'samples': 4286976, 'steps': 8372, 'loss/train': 1.676182508468628} +02/24/2022 12:24:48 - INFO - codeparrot_training - Step 8373: {'lr': 0.0004785656984587423, 'samples': 4287488, 'steps': 8373, 'loss/train': 2.2817189693450928} +02/24/2022 12:24:55 - INFO - codeparrot_training - Step 8374: {'lr': 0.0004785590691818498, 'samples': 4288000, 'steps': 8374, 'loss/train': 1.941512107849121} +02/24/2022 12:24:58 - INFO - codeparrot_training - Step 8375: {'lr': 0.0004785524389258827, 'samples': 4288512, 'steps': 8375, 'loss/train': 1.960147500038147} +02/24/2022 12:25:04 - INFO - codeparrot_training - Step 8376: {'lr': 0.0004785458076908695, 'samples': 4289024, 'steps': 8376, 'loss/train': 2.76289963722229} +02/24/2022 12:25:07 - INFO - codeparrot_training - Step 8377: {'lr': 0.00047853917547683873, 'samples': 4289536, 'steps': 8377, 'loss/train': 3.043684959411621} +02/24/2022 12:25:13 - INFO - codeparrot_training - Step 8378: {'lr': 0.00047853254228381864, 'samples': 4290048, 'steps': 8378, 'loss/train': 2.4906437397003174} +02/24/2022 12:25:16 - INFO - codeparrot_training - Step 8379: {'lr': 0.0004785259081118377, 'samples': 4290560, 'steps': 8379, 'loss/train': 2.2070980072021484} +02/24/2022 12:25:22 - INFO - codeparrot_training - Step 8380: {'lr': 0.0004785192729609244, 'samples': 4291072, 'steps': 8380, 'loss/train': 1.390824794769287} +02/24/2022 12:25:25 - INFO - codeparrot_training - Step 8381: {'lr': 0.00047851263683110706, 'samples': 4291584, 'steps': 8381, 'loss/train': 3.5089597702026367} +02/24/2022 12:25:31 - INFO - codeparrot_training - Step 8382: {'lr': 0.0004785059997224142, 'samples': 4292096, 'steps': 8382, 'loss/train': 1.415643334388733} +02/24/2022 12:25:34 - INFO - codeparrot_training - Step 8383: {'lr': 0.0004784993616348741, 'samples': 4292608, 'steps': 8383, 'loss/train': 2.391145944595337} +02/24/2022 12:25:41 - INFO - codeparrot_training - Step 8384: {'lr': 0.0004784927225685153, 'samples': 4293120, 'steps': 8384, 'loss/train': 1.3958176374435425} +02/24/2022 12:25:44 - INFO - codeparrot_training - Step 8385: {'lr': 0.0004784860825233662, 'samples': 4293632, 'steps': 8385, 'loss/train': 2.319824457168579} +02/24/2022 12:25:50 - INFO - codeparrot_training - Step 8386: {'lr': 0.00047847944149945545, 'samples': 4294144, 'steps': 8386, 'loss/train': 2.8788483142852783} +02/24/2022 12:25:53 - INFO - codeparrot_training - Step 8387: {'lr': 0.00047847279949681117, 'samples': 4294656, 'steps': 8387, 'loss/train': 2.1804423332214355} +02/24/2022 12:25:59 - INFO - codeparrot_training - Step 8388: {'lr': 0.000478466156515462, 'samples': 4295168, 'steps': 8388, 'loss/train': 2.3936657905578613} +02/24/2022 12:26:03 - INFO - codeparrot_training - Step 8389: {'lr': 0.0004784595125554364, 'samples': 4295680, 'steps': 8389, 'loss/train': 2.9301764965057373} +02/24/2022 12:26:08 - INFO - codeparrot_training - Step 8390: {'lr': 0.00047845286761676276, 'samples': 4296192, 'steps': 8390, 'loss/train': 2.4787824153900146} +02/24/2022 12:26:12 - INFO - codeparrot_training - Step 8391: {'lr': 0.00047844622169946954, 'samples': 4296704, 'steps': 8391, 'loss/train': 3.358727216720581} +02/24/2022 12:26:17 - INFO - codeparrot_training - Step 8392: {'lr': 0.0004784395748035853, 'samples': 4297216, 'steps': 8392, 'loss/train': 2.1815829277038574} +02/24/2022 12:26:21 - INFO - codeparrot_training - Step 8393: {'lr': 0.0004784329269291384, 'samples': 4297728, 'steps': 8393, 'loss/train': 0.7497676014900208} +02/24/2022 12:26:26 - INFO - codeparrot_training - Step 8394: {'lr': 0.0004784262780761575, 'samples': 4298240, 'steps': 8394, 'loss/train': 3.2470383644104004} +02/24/2022 12:26:30 - INFO - codeparrot_training - Step 8395: {'lr': 0.00047841962824467086, 'samples': 4298752, 'steps': 8395, 'loss/train': 1.9744614362716675} +02/24/2022 12:26:35 - INFO - codeparrot_training - Step 8396: {'lr': 0.000478412977434707, 'samples': 4299264, 'steps': 8396, 'loss/train': 2.033781051635742} +02/24/2022 12:26:38 - INFO - codeparrot_training - Step 8397: {'lr': 0.0004784063256462946, 'samples': 4299776, 'steps': 8397, 'loss/train': 1.9653384685516357} +02/24/2022 12:26:44 - INFO - codeparrot_training - Step 8398: {'lr': 0.00047839967287946196, 'samples': 4300288, 'steps': 8398, 'loss/train': 0.29436564445495605} +02/24/2022 12:26:47 - INFO - codeparrot_training - Step 8399: {'lr': 0.00047839301913423773, 'samples': 4300800, 'steps': 8399, 'loss/train': 2.555769205093384} +02/24/2022 12:26:54 - INFO - codeparrot_training - Step 8400: {'lr': 0.0004783863644106502, 'samples': 4301312, 'steps': 8400, 'loss/train': 2.3032169342041016} +02/24/2022 12:26:57 - INFO - codeparrot_training - Step 8401: {'lr': 0.0004783797087087281, 'samples': 4301824, 'steps': 8401, 'loss/train': 2.5960450172424316} +02/24/2022 12:27:03 - INFO - codeparrot_training - Step 8402: {'lr': 0.00047837305202849987, 'samples': 4302336, 'steps': 8402, 'loss/train': 1.1665267944335938} +02/24/2022 12:27:06 - INFO - codeparrot_training - Step 8403: {'lr': 0.0004783663943699939, 'samples': 4302848, 'steps': 8403, 'loss/train': 0.24033935368061066} +02/24/2022 12:27:12 - INFO - codeparrot_training - Step 8404: {'lr': 0.00047835973573323885, 'samples': 4303360, 'steps': 8404, 'loss/train': 1.2128452062606812} +02/24/2022 12:27:15 - INFO - codeparrot_training - Step 8405: {'lr': 0.00047835307611826327, 'samples': 4303872, 'steps': 8405, 'loss/train': 1.773014783859253} +02/24/2022 12:27:21 - INFO - codeparrot_training - Step 8406: {'lr': 0.0004783464155250955, 'samples': 4304384, 'steps': 8406, 'loss/train': 2.131950855255127} +02/24/2022 12:27:24 - INFO - codeparrot_training - Step 8407: {'lr': 0.00047833975395376426, 'samples': 4304896, 'steps': 8407, 'loss/train': 2.3662731647491455} +02/24/2022 12:27:30 - INFO - codeparrot_training - Step 8408: {'lr': 0.00047833309140429803, 'samples': 4305408, 'steps': 8408, 'loss/train': 2.124105215072632} +02/24/2022 12:27:33 - INFO - codeparrot_training - Step 8409: {'lr': 0.00047832642787672537, 'samples': 4305920, 'steps': 8409, 'loss/train': 0.120108462870121} +02/24/2022 12:27:40 - INFO - codeparrot_training - Step 8410: {'lr': 0.00047831976337107474, 'samples': 4306432, 'steps': 8410, 'loss/train': 1.3338755369186401} +02/24/2022 12:27:43 - INFO - codeparrot_training - Step 8411: {'lr': 0.00047831309788737476, 'samples': 4306944, 'steps': 8411, 'loss/train': 3.362307071685791} +02/24/2022 12:27:49 - INFO - codeparrot_training - Step 8412: {'lr': 0.000478306431425654, 'samples': 4307456, 'steps': 8412, 'loss/train': 1.5072788000106812} +02/24/2022 12:27:52 - INFO - codeparrot_training - Step 8413: {'lr': 0.0004782997639859409, 'samples': 4307968, 'steps': 8413, 'loss/train': 2.6732122898101807} +02/24/2022 12:27:58 - INFO - codeparrot_training - Step 8414: {'lr': 0.00047829309556826415, 'samples': 4308480, 'steps': 8414, 'loss/train': 2.0217456817626953} +02/24/2022 12:28:01 - INFO - codeparrot_training - Step 8415: {'lr': 0.0004782864261726523, 'samples': 4308992, 'steps': 8415, 'loss/train': 1.818701148033142} +02/24/2022 12:28:07 - INFO - codeparrot_training - Step 8416: {'lr': 0.0004782797557991339, 'samples': 4309504, 'steps': 8416, 'loss/train': 1.957191824913025} +02/24/2022 12:28:12 - INFO - codeparrot_training - Step 8417: {'lr': 0.00047827308444773746, 'samples': 4310016, 'steps': 8417, 'loss/train': 1.8469033241271973} +02/24/2022 12:28:16 - INFO - codeparrot_training - Step 8418: {'lr': 0.00047826641211849165, 'samples': 4310528, 'steps': 8418, 'loss/train': 0.8525946736335754} +02/24/2022 12:28:22 - INFO - codeparrot_training - Step 8419: {'lr': 0.000478259738811425, 'samples': 4311040, 'steps': 8419, 'loss/train': 1.5026800632476807} +02/24/2022 12:28:25 - INFO - codeparrot_training - Step 8420: {'lr': 0.0004782530645265661, 'samples': 4311552, 'steps': 8420, 'loss/train': 2.6665549278259277} +02/24/2022 12:28:31 - INFO - codeparrot_training - Step 8421: {'lr': 0.00047824638926394355, 'samples': 4312064, 'steps': 8421, 'loss/train': 2.0895164012908936} +02/24/2022 12:28:34 - INFO - codeparrot_training - Step 8422: {'lr': 0.0004782397130235859, 'samples': 4312576, 'steps': 8422, 'loss/train': 2.006305694580078} +02/24/2022 12:28:40 - INFO - codeparrot_training - Step 8423: {'lr': 0.0004782330358055219, 'samples': 4313088, 'steps': 8423, 'loss/train': 2.4006519317626953} +02/24/2022 12:28:43 - INFO - codeparrot_training - Step 8424: {'lr': 0.00047822635760977995, 'samples': 4313600, 'steps': 8424, 'loss/train': 2.131471633911133} +02/24/2022 12:28:49 - INFO - codeparrot_training - Step 8425: {'lr': 0.0004782196784363888, 'samples': 4314112, 'steps': 8425, 'loss/train': 1.9225215911865234} +02/24/2022 12:28:52 - INFO - codeparrot_training - Step 8426: {'lr': 0.000478212998285377, 'samples': 4314624, 'steps': 8426, 'loss/train': 1.5440889596939087} +02/24/2022 12:28:58 - INFO - codeparrot_training - Step 8427: {'lr': 0.0004782063171567732, 'samples': 4315136, 'steps': 8427, 'loss/train': 2.0610897541046143} +02/24/2022 12:29:01 - INFO - codeparrot_training - Step 8428: {'lr': 0.000478199635050606, 'samples': 4315648, 'steps': 8428, 'loss/train': 2.043809175491333} +02/24/2022 12:29:07 - INFO - codeparrot_training - Step 8429: {'lr': 0.000478192951966904, 'samples': 4316160, 'steps': 8429, 'loss/train': 0.31734123826026917} +02/24/2022 12:29:11 - INFO - codeparrot_training - Step 8430: {'lr': 0.00047818626790569586, 'samples': 4316672, 'steps': 8430, 'loss/train': 2.2577764987945557} +02/24/2022 12:29:16 - INFO - codeparrot_training - Step 8431: {'lr': 0.00047817958286701026, 'samples': 4317184, 'steps': 8431, 'loss/train': 2.492427349090576} +02/24/2022 12:29:20 - INFO - codeparrot_training - Step 8432: {'lr': 0.00047817289685087575, 'samples': 4317696, 'steps': 8432, 'loss/train': 2.4741947650909424} +02/24/2022 12:29:25 - INFO - codeparrot_training - Step 8433: {'lr': 0.00047816620985732095, 'samples': 4318208, 'steps': 8433, 'loss/train': 1.7228083610534668} +02/24/2022 12:29:29 - INFO - codeparrot_training - Step 8434: {'lr': 0.0004781595218863746, 'samples': 4318720, 'steps': 8434, 'loss/train': 2.0321130752563477} +02/24/2022 12:29:34 - INFO - codeparrot_training - Step 8435: {'lr': 0.00047815283293806533, 'samples': 4319232, 'steps': 8435, 'loss/train': 2.9467849731445312} +02/24/2022 12:29:38 - INFO - codeparrot_training - Step 8436: {'lr': 0.0004781461430124217, 'samples': 4319744, 'steps': 8436, 'loss/train': 1.9760843515396118} +02/24/2022 12:29:43 - INFO - codeparrot_training - Step 8437: {'lr': 0.0004781394521094725, 'samples': 4320256, 'steps': 8437, 'loss/train': 2.0097668170928955} +02/24/2022 12:29:47 - INFO - codeparrot_training - Step 8438: {'lr': 0.00047813276022924634, 'samples': 4320768, 'steps': 8438, 'loss/train': 3.0590879917144775} +02/24/2022 12:29:52 - INFO - codeparrot_training - Step 8439: {'lr': 0.0004781260673717718, 'samples': 4321280, 'steps': 8439, 'loss/train': 1.5833929777145386} +02/24/2022 12:29:56 - INFO - codeparrot_training - Step 8440: {'lr': 0.0004781193735370777, 'samples': 4321792, 'steps': 8440, 'loss/train': 3.2533435821533203} +02/24/2022 12:30:01 - INFO - codeparrot_training - Step 8441: {'lr': 0.0004781126787251926, 'samples': 4322304, 'steps': 8441, 'loss/train': 2.16023588180542} +02/24/2022 12:30:05 - INFO - codeparrot_training - Step 8442: {'lr': 0.0004781059829361453, 'samples': 4322816, 'steps': 8442, 'loss/train': 2.2788326740264893} +02/24/2022 12:30:10 - INFO - codeparrot_training - Step 8443: {'lr': 0.00047809928616996425, 'samples': 4323328, 'steps': 8443, 'loss/train': 3.5258076190948486} +02/24/2022 12:30:14 - INFO - codeparrot_training - Step 8444: {'lr': 0.00047809258842667837, 'samples': 4323840, 'steps': 8444, 'loss/train': 2.1777842044830322} +02/24/2022 12:30:20 - INFO - codeparrot_training - Step 8445: {'lr': 0.00047808588970631627, 'samples': 4324352, 'steps': 8445, 'loss/train': 2.0628180503845215} +02/24/2022 12:30:23 - INFO - codeparrot_training - Step 8446: {'lr': 0.0004780791900089066, 'samples': 4324864, 'steps': 8446, 'loss/train': 1.7411916255950928} +02/24/2022 12:30:29 - INFO - codeparrot_training - Step 8447: {'lr': 0.0004780724893344782, 'samples': 4325376, 'steps': 8447, 'loss/train': 2.3368139266967773} +02/24/2022 12:30:32 - INFO - codeparrot_training - Step 8448: {'lr': 0.00047806578768305963, 'samples': 4325888, 'steps': 8448, 'loss/train': 1.7770050764083862} +02/24/2022 12:30:38 - INFO - codeparrot_training - Step 8449: {'lr': 0.00047805908505467963, 'samples': 4326400, 'steps': 8449, 'loss/train': 2.472324848175049} +02/24/2022 12:30:41 - INFO - codeparrot_training - Step 8450: {'lr': 0.0004780523814493669, 'samples': 4326912, 'steps': 8450, 'loss/train': 2.0360825061798096} +02/24/2022 12:30:47 - INFO - codeparrot_training - Step 8451: {'lr': 0.0004780456768671503, 'samples': 4327424, 'steps': 8451, 'loss/train': 1.8343180418014526} +02/24/2022 12:30:50 - INFO - codeparrot_training - Step 8452: {'lr': 0.0004780389713080583, 'samples': 4327936, 'steps': 8452, 'loss/train': 2.3955178260803223} +02/24/2022 12:30:56 - INFO - codeparrot_training - Step 8453: {'lr': 0.0004780322647721198, 'samples': 4328448, 'steps': 8453, 'loss/train': 1.004384994506836} +02/24/2022 12:30:59 - INFO - codeparrot_training - Step 8454: {'lr': 0.00047802555725936347, 'samples': 4328960, 'steps': 8454, 'loss/train': 2.443615674972534} +02/24/2022 12:31:05 - INFO - codeparrot_training - Step 8455: {'lr': 0.00047801884876981813, 'samples': 4329472, 'steps': 8455, 'loss/train': 1.5048587322235107} +02/24/2022 12:31:09 - INFO - codeparrot_training - Step 8456: {'lr': 0.0004780121393035124, 'samples': 4329984, 'steps': 8456, 'loss/train': 2.0674431324005127} +02/24/2022 12:31:14 - INFO - codeparrot_training - Step 8457: {'lr': 0.00047800542886047506, 'samples': 4330496, 'steps': 8457, 'loss/train': 2.230578660964966} +02/24/2022 12:31:18 - INFO - codeparrot_training - Step 8458: {'lr': 0.00047799871744073485, 'samples': 4331008, 'steps': 8458, 'loss/train': 2.067634105682373} +02/24/2022 12:31:23 - INFO - codeparrot_training - Step 8459: {'lr': 0.00047799200504432054, 'samples': 4331520, 'steps': 8459, 'loss/train': 2.618751287460327} +02/24/2022 12:31:27 - INFO - codeparrot_training - Step 8460: {'lr': 0.0004779852916712609, 'samples': 4332032, 'steps': 8460, 'loss/train': 2.3800833225250244} +02/24/2022 12:31:32 - INFO - codeparrot_training - Step 8461: {'lr': 0.0004779785773215847, 'samples': 4332544, 'steps': 8461, 'loss/train': 2.295905828475952} +02/24/2022 12:31:36 - INFO - codeparrot_training - Step 8462: {'lr': 0.00047797186199532055, 'samples': 4333056, 'steps': 8462, 'loss/train': 4.059459209442139} +02/24/2022 12:31:41 - INFO - codeparrot_training - Step 8463: {'lr': 0.0004779651456924974, 'samples': 4333568, 'steps': 8463, 'loss/train': 2.6968955993652344} +02/24/2022 12:31:45 - INFO - codeparrot_training - Step 8464: {'lr': 0.00047795842841314394, 'samples': 4334080, 'steps': 8464, 'loss/train': 1.1903438568115234} +02/24/2022 12:31:51 - INFO - codeparrot_training - Step 8465: {'lr': 0.000477951710157289, 'samples': 4334592, 'steps': 8465, 'loss/train': 1.9200540781021118} +02/24/2022 12:31:54 - INFO - codeparrot_training - Step 8466: {'lr': 0.00047794499092496123, 'samples': 4335104, 'steps': 8466, 'loss/train': 2.4851508140563965} +02/24/2022 12:32:00 - INFO - codeparrot_training - Step 8467: {'lr': 0.00047793827071618955, 'samples': 4335616, 'steps': 8467, 'loss/train': 1.9378186464309692} +02/24/2022 12:32:03 - INFO - codeparrot_training - Step 8468: {'lr': 0.0004779315495310027, 'samples': 4336128, 'steps': 8468, 'loss/train': 1.8286139965057373} +02/24/2022 12:32:09 - INFO - codeparrot_training - Step 8469: {'lr': 0.00047792482736942955, 'samples': 4336640, 'steps': 8469, 'loss/train': 1.626105546951294} +02/24/2022 12:32:12 - INFO - codeparrot_training - Step 8470: {'lr': 0.00047791810423149873, 'samples': 4337152, 'steps': 8470, 'loss/train': 1.3139162063598633} +02/24/2022 12:32:18 - INFO - codeparrot_training - Step 8471: {'lr': 0.0004779113801172391, 'samples': 4337664, 'steps': 8471, 'loss/train': 2.4346654415130615} +02/24/2022 12:32:23 - INFO - codeparrot_training - Step 8472: {'lr': 0.0004779046550266795, 'samples': 4338176, 'steps': 8472, 'loss/train': 2.1775312423706055} +02/24/2022 12:32:27 - INFO - codeparrot_training - Step 8473: {'lr': 0.00047789792895984874, 'samples': 4338688, 'steps': 8473, 'loss/train': 1.46390962600708} +02/24/2022 12:32:30 - INFO - codeparrot_training - Step 8474: {'lr': 0.0004778912019167756, 'samples': 4339200, 'steps': 8474, 'loss/train': 2.397479295730591} +02/24/2022 12:32:36 - INFO - codeparrot_training - Step 8475: {'lr': 0.00047788447389748894, 'samples': 4339712, 'steps': 8475, 'loss/train': 2.3953919410705566} +02/24/2022 12:32:39 - INFO - codeparrot_training - Step 8476: {'lr': 0.0004778777449020176, 'samples': 4340224, 'steps': 8476, 'loss/train': 2.907946825027466} +02/24/2022 12:32:45 - INFO - codeparrot_training - Step 8477: {'lr': 0.0004778710149303903, 'samples': 4340736, 'steps': 8477, 'loss/train': 2.48589825630188} +02/24/2022 12:32:51 - INFO - codeparrot_training - Step 8478: {'lr': 0.00047786428398263595, 'samples': 4341248, 'steps': 8478, 'loss/train': 1.9270997047424316} +02/24/2022 12:32:54 - INFO - codeparrot_training - Step 8479: {'lr': 0.00047785755205878333, 'samples': 4341760, 'steps': 8479, 'loss/train': 1.5082348585128784} +02/24/2022 12:33:00 - INFO - codeparrot_training - Step 8480: {'lr': 0.0004778508191588613, 'samples': 4342272, 'steps': 8480, 'loss/train': 3.1460177898406982} +02/24/2022 12:33:03 - INFO - codeparrot_training - Step 8481: {'lr': 0.0004778440852828988, 'samples': 4342784, 'steps': 8481, 'loss/train': 1.150168776512146} +02/24/2022 12:33:09 - INFO - codeparrot_training - Step 8482: {'lr': 0.00047783735043092446, 'samples': 4343296, 'steps': 8482, 'loss/train': 1.5935105085372925} +02/24/2022 12:33:12 - INFO - codeparrot_training - Step 8483: {'lr': 0.0004778306146029674, 'samples': 4343808, 'steps': 8483, 'loss/train': 2.04718279838562} +02/24/2022 12:33:18 - INFO - codeparrot_training - Step 8484: {'lr': 0.0004778238777990562, 'samples': 4344320, 'steps': 8484, 'loss/train': 1.802907943725586} +02/24/2022 12:33:21 - INFO - codeparrot_training - Step 8485: {'lr': 0.00047781714001921997, 'samples': 4344832, 'steps': 8485, 'loss/train': 0.8413384556770325} +02/24/2022 12:33:27 - INFO - codeparrot_training - Step 8486: {'lr': 0.00047781040126348734, 'samples': 4345344, 'steps': 8486, 'loss/train': 2.87712025642395} +02/24/2022 12:33:30 - INFO - codeparrot_training - Step 8487: {'lr': 0.0004778036615318874, 'samples': 4345856, 'steps': 8487, 'loss/train': 1.5497286319732666} +02/24/2022 12:33:36 - INFO - codeparrot_training - Step 8488: {'lr': 0.0004777969208244488, 'samples': 4346368, 'steps': 8488, 'loss/train': 0.3731866180896759} +02/24/2022 12:33:39 - INFO - codeparrot_training - Step 8489: {'lr': 0.0004777901791412006, 'samples': 4346880, 'steps': 8489, 'loss/train': 2.040407657623291} +02/24/2022 12:33:45 - INFO - codeparrot_training - Step 8490: {'lr': 0.00047778343648217155, 'samples': 4347392, 'steps': 8490, 'loss/train': 1.8158525228500366} +02/24/2022 12:33:49 - INFO - codeparrot_training - Step 8491: {'lr': 0.00047777669284739064, 'samples': 4347904, 'steps': 8491, 'loss/train': 1.5633140802383423} +02/24/2022 12:33:54 - INFO - codeparrot_training - Step 8492: {'lr': 0.0004777699482368867, 'samples': 4348416, 'steps': 8492, 'loss/train': 2.3152101039886475} +02/24/2022 12:33:58 - INFO - codeparrot_training - Step 8493: {'lr': 0.0004777632026506886, 'samples': 4348928, 'steps': 8493, 'loss/train': 0.2024056315422058} +02/24/2022 12:34:03 - INFO - codeparrot_training - Step 8494: {'lr': 0.0004777564560888252, 'samples': 4349440, 'steps': 8494, 'loss/train': 2.1427817344665527} +02/24/2022 12:34:07 - INFO - codeparrot_training - Step 8495: {'lr': 0.0004777497085513256, 'samples': 4349952, 'steps': 8495, 'loss/train': 2.6349411010742188} +02/24/2022 12:34:12 - INFO - codeparrot_training - Step 8496: {'lr': 0.0004777429600382185, 'samples': 4350464, 'steps': 8496, 'loss/train': 1.7786177396774292} +02/24/2022 12:34:16 - INFO - codeparrot_training - Step 8497: {'lr': 0.00047773621054953287, 'samples': 4350976, 'steps': 8497, 'loss/train': 1.8175199031829834} +02/24/2022 12:34:21 - INFO - codeparrot_training - Step 8498: {'lr': 0.0004777294600852976, 'samples': 4351488, 'steps': 8498, 'loss/train': 0.35944968461990356} +02/24/2022 12:34:25 - INFO - codeparrot_training - Step 8499: {'lr': 0.0004777227086455417, 'samples': 4352000, 'steps': 8499, 'loss/train': 2.4719696044921875} +02/24/2022 12:34:31 - INFO - codeparrot_training - Step 8500: {'lr': 0.000477715956230294, 'samples': 4352512, 'steps': 8500, 'loss/train': 1.956946611404419} +02/24/2022 12:34:35 - INFO - codeparrot_training - Step 8501: {'lr': 0.0004777092028395834, 'samples': 4353024, 'steps': 8501, 'loss/train': 0.12164922058582306} +02/24/2022 12:34:40 - INFO - codeparrot_training - Step 8502: {'lr': 0.00047770244847343893, 'samples': 4353536, 'steps': 8502, 'loss/train': 1.2616498470306396} +02/24/2022 12:34:44 - INFO - codeparrot_training - Step 8503: {'lr': 0.0004776956931318895, 'samples': 4354048, 'steps': 8503, 'loss/train': 0.6837210059165955} +02/24/2022 12:34:49 - INFO - codeparrot_training - Step 8504: {'lr': 0.00047768893681496397, 'samples': 4354560, 'steps': 8504, 'loss/train': 0.9616917967796326} +02/24/2022 12:34:53 - INFO - codeparrot_training - Step 8505: {'lr': 0.0004776821795226913, 'samples': 4355072, 'steps': 8505, 'loss/train': 2.351278066635132} +02/24/2022 12:34:58 - INFO - codeparrot_training - Step 8506: {'lr': 0.0004776754212551006, 'samples': 4355584, 'steps': 8506, 'loss/train': 1.5699753761291504} +02/24/2022 12:35:02 - INFO - codeparrot_training - Step 8507: {'lr': 0.0004776686620122206, 'samples': 4356096, 'steps': 8507, 'loss/train': 5.027220726013184} +02/24/2022 12:35:08 - INFO - codeparrot_training - Step 8508: {'lr': 0.00047766190179408043, 'samples': 4356608, 'steps': 8508, 'loss/train': 2.2917065620422363} +02/24/2022 12:35:11 - INFO - codeparrot_training - Step 8509: {'lr': 0.00047765514060070887, 'samples': 4357120, 'steps': 8509, 'loss/train': 1.9001367092132568} +02/24/2022 12:35:17 - INFO - codeparrot_training - Step 8510: {'lr': 0.00047764837843213497, 'samples': 4357632, 'steps': 8510, 'loss/train': 2.093930721282959} +02/24/2022 12:35:20 - INFO - codeparrot_training - Step 8511: {'lr': 0.0004776416152883878, 'samples': 4358144, 'steps': 8511, 'loss/train': 2.780133008956909} +02/24/2022 12:35:26 - INFO - codeparrot_training - Step 8512: {'lr': 0.0004776348511694961, 'samples': 4358656, 'steps': 8512, 'loss/train': 2.6753807067871094} +02/24/2022 12:35:30 - INFO - codeparrot_training - Step 8513: {'lr': 0.0004776280860754891, 'samples': 4359168, 'steps': 8513, 'loss/train': 2.1889731884002686} +02/24/2022 12:35:35 - INFO - codeparrot_training - Step 8514: {'lr': 0.0004776213200063956, 'samples': 4359680, 'steps': 8514, 'loss/train': 3.200205087661743} +02/24/2022 12:35:39 - INFO - codeparrot_training - Step 8515: {'lr': 0.00047761455296224464, 'samples': 4360192, 'steps': 8515, 'loss/train': 2.1677839756011963} +02/24/2022 12:35:44 - INFO - codeparrot_training - Step 8516: {'lr': 0.0004776077849430652, 'samples': 4360704, 'steps': 8516, 'loss/train': 2.529313564300537} +02/24/2022 12:35:48 - INFO - codeparrot_training - Step 8517: {'lr': 0.00047760101594888633, 'samples': 4361216, 'steps': 8517, 'loss/train': 2.6973793506622314} +02/24/2022 12:35:53 - INFO - codeparrot_training - Step 8518: {'lr': 0.000477594245979737, 'samples': 4361728, 'steps': 8518, 'loss/train': 2.445666790008545} +02/24/2022 12:35:56 - INFO - codeparrot_training - Step 8519: {'lr': 0.0004775874750356461, 'samples': 4362240, 'steps': 8519, 'loss/train': 2.794433832168579} +02/24/2022 12:36:02 - INFO - codeparrot_training - Step 8520: {'lr': 0.00047758070311664283, 'samples': 4362752, 'steps': 8520, 'loss/train': 1.6525999307632446} +02/24/2022 12:36:05 - INFO - codeparrot_training - Step 8521: {'lr': 0.000477573930222756, 'samples': 4363264, 'steps': 8521, 'loss/train': 1.8866370916366577} +02/24/2022 12:36:12 - INFO - codeparrot_training - Step 8522: {'lr': 0.0004775671563540147, 'samples': 4363776, 'steps': 8522, 'loss/train': 2.0719964504241943} +02/24/2022 12:36:15 - INFO - codeparrot_training - Step 8523: {'lr': 0.000477560381510448, 'samples': 4364288, 'steps': 8523, 'loss/train': 2.1382999420166016} +02/24/2022 12:36:21 - INFO - codeparrot_training - Step 8524: {'lr': 0.00047755360569208495, 'samples': 4364800, 'steps': 8524, 'loss/train': 2.7998528480529785} +02/24/2022 12:36:24 - INFO - codeparrot_training - Step 8525: {'lr': 0.00047754682889895444, 'samples': 4365312, 'steps': 8525, 'loss/train': 1.8198583126068115} +02/24/2022 12:36:30 - INFO - codeparrot_training - Step 8526: {'lr': 0.00047754005113108557, 'samples': 4365824, 'steps': 8526, 'loss/train': 2.023512840270996} +02/24/2022 12:36:33 - INFO - codeparrot_training - Step 8527: {'lr': 0.0004775332723885074, 'samples': 4366336, 'steps': 8527, 'loss/train': 1.8731328248977661} +02/24/2022 12:36:39 - INFO - codeparrot_training - Step 8528: {'lr': 0.00047752649267124894, 'samples': 4366848, 'steps': 8528, 'loss/train': 1.8908895254135132} +02/24/2022 12:36:42 - INFO - codeparrot_training - Step 8529: {'lr': 0.0004775197119793392, 'samples': 4367360, 'steps': 8529, 'loss/train': 1.4178643226623535} +02/24/2022 12:36:48 - INFO - codeparrot_training - Step 8530: {'lr': 0.0004775129303128073, 'samples': 4367872, 'steps': 8530, 'loss/train': 2.051976203918457} +02/24/2022 12:36:51 - INFO - codeparrot_training - Step 8531: {'lr': 0.0004775061476716822, 'samples': 4368384, 'steps': 8531, 'loss/train': 2.5321860313415527} +02/24/2022 12:36:56 - INFO - codeparrot_training - Step 8532: {'lr': 0.000477499364055993, 'samples': 4368896, 'steps': 8532, 'loss/train': 0.30298274755477905} +02/24/2022 12:37:02 - INFO - codeparrot_training - Step 8533: {'lr': 0.00047749257946576887, 'samples': 4369408, 'steps': 8533, 'loss/train': 1.7422784566879272} +02/24/2022 12:37:05 - INFO - codeparrot_training - Step 8534: {'lr': 0.0004774857939010387, 'samples': 4369920, 'steps': 8534, 'loss/train': 2.6280179023742676} +02/24/2022 12:37:11 - INFO - codeparrot_training - Step 8535: {'lr': 0.0004774790073618316, 'samples': 4370432, 'steps': 8535, 'loss/train': 1.9418681859970093} +02/24/2022 12:37:14 - INFO - codeparrot_training - Step 8536: {'lr': 0.00047747221984817666, 'samples': 4370944, 'steps': 8536, 'loss/train': 0.8090088963508606} +02/24/2022 12:37:21 - INFO - codeparrot_training - Step 8537: {'lr': 0.000477465431360103, 'samples': 4371456, 'steps': 8537, 'loss/train': 1.2902745008468628} +02/24/2022 12:37:24 - INFO - codeparrot_training - Step 8538: {'lr': 0.00047745864189763964, 'samples': 4371968, 'steps': 8538, 'loss/train': 3.206209897994995} +02/24/2022 12:37:30 - INFO - codeparrot_training - Step 8539: {'lr': 0.0004774518514608157, 'samples': 4372480, 'steps': 8539, 'loss/train': 2.961583375930786} +02/24/2022 12:37:33 - INFO - codeparrot_training - Step 8540: {'lr': 0.00047744506004966024, 'samples': 4372992, 'steps': 8540, 'loss/train': 3.248595714569092} +02/24/2022 12:37:39 - INFO - codeparrot_training - Step 8541: {'lr': 0.0004774382676642024, 'samples': 4373504, 'steps': 8541, 'loss/train': 1.8727253675460815} +02/24/2022 12:37:42 - INFO - codeparrot_training - Step 8542: {'lr': 0.0004774314743044712, 'samples': 4374016, 'steps': 8542, 'loss/train': 2.283466100692749} +02/24/2022 12:37:47 - INFO - codeparrot_training - Step 8543: {'lr': 0.00047742467997049576, 'samples': 4374528, 'steps': 8543, 'loss/train': 2.719625949859619} +02/24/2022 12:37:51 - INFO - codeparrot_training - Step 8544: {'lr': 0.00047741788466230527, 'samples': 4375040, 'steps': 8544, 'loss/train': 2.3168108463287354} +02/24/2022 12:37:56 - INFO - codeparrot_training - Step 8545: {'lr': 0.00047741108837992877, 'samples': 4375552, 'steps': 8545, 'loss/train': 2.062335729598999} +02/24/2022 12:38:00 - INFO - codeparrot_training - Step 8546: {'lr': 0.0004774042911233953, 'samples': 4376064, 'steps': 8546, 'loss/train': 1.0751019716262817} +02/24/2022 12:38:06 - INFO - codeparrot_training - Step 8547: {'lr': 0.0004773974928927342, 'samples': 4376576, 'steps': 8547, 'loss/train': 2.533010482788086} +02/24/2022 12:38:09 - INFO - codeparrot_training - Step 8548: {'lr': 0.00047739069368797426, 'samples': 4377088, 'steps': 8548, 'loss/train': 2.0355021953582764} +02/24/2022 12:38:15 - INFO - codeparrot_training - Step 8549: {'lr': 0.0004773838935091449, 'samples': 4377600, 'steps': 8549, 'loss/train': 2.4210667610168457} +02/24/2022 12:38:18 - INFO - codeparrot_training - Step 8550: {'lr': 0.00047737709235627515, 'samples': 4378112, 'steps': 8550, 'loss/train': 1.7051926851272583} +02/24/2022 12:38:24 - INFO - codeparrot_training - Step 8551: {'lr': 0.00047737029022939414, 'samples': 4378624, 'steps': 8551, 'loss/train': 2.4141745567321777} +02/24/2022 12:38:27 - INFO - codeparrot_training - Step 8552: {'lr': 0.00047736348712853094, 'samples': 4379136, 'steps': 8552, 'loss/train': 1.9121068716049194} +02/24/2022 12:38:33 - INFO - codeparrot_training - Step 8553: {'lr': 0.00047735668305371484, 'samples': 4379648, 'steps': 8553, 'loss/train': 1.5458152294158936} +02/24/2022 12:38:36 - INFO - codeparrot_training - Step 8554: {'lr': 0.0004773498780049749, 'samples': 4380160, 'steps': 8554, 'loss/train': 2.028305768966675} +02/24/2022 12:38:42 - INFO - codeparrot_training - Step 8555: {'lr': 0.00047734307198234015, 'samples': 4380672, 'steps': 8555, 'loss/train': 2.7924082279205322} +02/24/2022 12:38:45 - INFO - codeparrot_training - Step 8556: {'lr': 0.00047733626498584, 'samples': 4381184, 'steps': 8556, 'loss/train': 2.3836426734924316} +02/24/2022 12:38:52 - INFO - codeparrot_training - Step 8557: {'lr': 0.0004773294570155035, 'samples': 4381696, 'steps': 8557, 'loss/train': 2.8018240928649902} +02/24/2022 12:38:55 - INFO - codeparrot_training - Step 8558: {'lr': 0.0004773226480713596, 'samples': 4382208, 'steps': 8558, 'loss/train': 2.0178024768829346} +02/24/2022 12:39:01 - INFO - codeparrot_training - Step 8559: {'lr': 0.00047731583815343784, 'samples': 4382720, 'steps': 8559, 'loss/train': 2.6657023429870605} +02/24/2022 12:39:04 - INFO - codeparrot_training - Step 8560: {'lr': 0.00047730902726176715, 'samples': 4383232, 'steps': 8560, 'loss/train': 2.0087077617645264} +02/24/2022 12:39:10 - INFO - codeparrot_training - Step 8561: {'lr': 0.00047730221539637677, 'samples': 4383744, 'steps': 8561, 'loss/train': 1.6984707117080688} +02/24/2022 12:39:13 - INFO - codeparrot_training - Step 8562: {'lr': 0.00047729540255729585, 'samples': 4384256, 'steps': 8562, 'loss/train': 2.4408490657806396} +02/24/2022 12:39:19 - INFO - codeparrot_training - Step 8563: {'lr': 0.0004772885887445536, 'samples': 4384768, 'steps': 8563, 'loss/train': 2.894321918487549} +02/24/2022 12:39:22 - INFO - codeparrot_training - Step 8564: {'lr': 0.0004772817739581793, 'samples': 4385280, 'steps': 8564, 'loss/train': 2.4830737113952637} +02/24/2022 12:39:28 - INFO - codeparrot_training - Step 8565: {'lr': 0.000477274958198202, 'samples': 4385792, 'steps': 8565, 'loss/train': 1.175816297531128} +02/24/2022 12:39:31 - INFO - codeparrot_training - Step 8566: {'lr': 0.0004772681414646509, 'samples': 4386304, 'steps': 8566, 'loss/train': 1.0876022577285767} +02/24/2022 12:39:37 - INFO - codeparrot_training - Step 8567: {'lr': 0.00047726132375755525, 'samples': 4386816, 'steps': 8567, 'loss/train': 1.7903074026107788} +02/24/2022 12:39:41 - INFO - codeparrot_training - Step 8568: {'lr': 0.00047725450507694433, 'samples': 4387328, 'steps': 8568, 'loss/train': 2.4472103118896484} +02/24/2022 12:39:46 - INFO - codeparrot_training - Step 8569: {'lr': 0.00047724768542284726, 'samples': 4387840, 'steps': 8569, 'loss/train': 2.4018044471740723} +02/24/2022 12:39:50 - INFO - codeparrot_training - Step 8570: {'lr': 0.0004772408647952932, 'samples': 4388352, 'steps': 8570, 'loss/train': 1.9022170305252075} +02/24/2022 12:39:56 - INFO - codeparrot_training - Step 8571: {'lr': 0.0004772340431943114, 'samples': 4388864, 'steps': 8571, 'loss/train': 1.9567608833312988} +02/24/2022 12:39:59 - INFO - codeparrot_training - Step 8572: {'lr': 0.0004772272206199312, 'samples': 4389376, 'steps': 8572, 'loss/train': 1.7057418823242188} +02/24/2022 12:40:05 - INFO - codeparrot_training - Step 8573: {'lr': 0.0004772203970721817, 'samples': 4389888, 'steps': 8573, 'loss/train': 0.4100549519062042} +02/24/2022 12:40:08 - INFO - codeparrot_training - Step 8574: {'lr': 0.0004772135725510922, 'samples': 4390400, 'steps': 8574, 'loss/train': 2.523175001144409} +02/24/2022 12:40:14 - INFO - codeparrot_training - Step 8575: {'lr': 0.0004772067470566919, 'samples': 4390912, 'steps': 8575, 'loss/train': 1.1470764875411987} +02/24/2022 12:40:18 - INFO - codeparrot_training - Step 8576: {'lr': 0.00047719992058901006, 'samples': 4391424, 'steps': 8576, 'loss/train': 2.4629693031311035} +02/24/2022 12:40:23 - INFO - codeparrot_training - Step 8577: {'lr': 0.00047719309314807584, 'samples': 4391936, 'steps': 8577, 'loss/train': 2.9566709995269775} +02/24/2022 12:40:27 - INFO - codeparrot_training - Step 8578: {'lr': 0.0004771862647339186, 'samples': 4392448, 'steps': 8578, 'loss/train': 1.8519346714019775} +02/24/2022 12:40:32 - INFO - codeparrot_training - Step 8579: {'lr': 0.0004771794353465675, 'samples': 4392960, 'steps': 8579, 'loss/train': 1.43887197971344} +02/24/2022 12:40:36 - INFO - codeparrot_training - Step 8580: {'lr': 0.00047717260498605186, 'samples': 4393472, 'steps': 8580, 'loss/train': 1.830541968345642} +02/24/2022 12:40:42 - INFO - codeparrot_training - Step 8581: {'lr': 0.0004771657736524009, 'samples': 4393984, 'steps': 8581, 'loss/train': 1.790372371673584} +02/24/2022 12:40:46 - INFO - codeparrot_training - Step 8582: {'lr': 0.00047715894134564395, 'samples': 4394496, 'steps': 8582, 'loss/train': 3.436487913131714} +02/24/2022 12:40:51 - INFO - codeparrot_training - Step 8583: {'lr': 0.0004771521080658102, 'samples': 4395008, 'steps': 8583, 'loss/train': 2.7922022342681885} +02/24/2022 12:40:54 - INFO - codeparrot_training - Step 8584: {'lr': 0.00047714527381292893, 'samples': 4395520, 'steps': 8584, 'loss/train': 3.391402006149292} +02/24/2022 12:41:00 - INFO - codeparrot_training - Step 8585: {'lr': 0.00047713843858702943, 'samples': 4396032, 'steps': 8585, 'loss/train': 2.1233153343200684} +02/24/2022 12:41:03 - INFO - codeparrot_training - Step 8586: {'lr': 0.000477131602388141, 'samples': 4396544, 'steps': 8586, 'loss/train': 2.618825912475586} +02/24/2022 12:41:09 - INFO - codeparrot_training - Step 8587: {'lr': 0.00047712476521629294, 'samples': 4397056, 'steps': 8587, 'loss/train': 1.6917307376861572} +02/24/2022 12:41:12 - INFO - codeparrot_training - Step 8588: {'lr': 0.0004771179270715145, 'samples': 4397568, 'steps': 8588, 'loss/train': 2.150625705718994} +02/24/2022 12:41:18 - INFO - codeparrot_training - Step 8589: {'lr': 0.000477111087953835, 'samples': 4398080, 'steps': 8589, 'loss/train': 2.749589681625366} +02/24/2022 12:41:21 - INFO - codeparrot_training - Step 8590: {'lr': 0.0004771042478632836, 'samples': 4398592, 'steps': 8590, 'loss/train': 2.3499083518981934} +02/24/2022 12:41:27 - INFO - codeparrot_training - Step 8591: {'lr': 0.0004770974067998898, 'samples': 4399104, 'steps': 8591, 'loss/train': 2.635939598083496} +02/24/2022 12:41:30 - INFO - codeparrot_training - Step 8592: {'lr': 0.0004770905647636828, 'samples': 4399616, 'steps': 8592, 'loss/train': 1.1321337223052979} +02/24/2022 12:41:36 - INFO - codeparrot_training - Step 8593: {'lr': 0.00047708372175469193, 'samples': 4400128, 'steps': 8593, 'loss/train': 2.657602071762085} +02/24/2022 12:41:40 - INFO - codeparrot_training - Step 8594: {'lr': 0.0004770768777729465, 'samples': 4400640, 'steps': 8594, 'loss/train': 2.3549373149871826} +02/24/2022 12:41:45 - INFO - codeparrot_training - Step 8595: {'lr': 0.0004770700328184758, 'samples': 4401152, 'steps': 8595, 'loss/train': 1.016804814338684} +02/24/2022 12:41:49 - INFO - codeparrot_training - Step 8596: {'lr': 0.00047706318689130924, 'samples': 4401664, 'steps': 8596, 'loss/train': 2.461090326309204} +02/24/2022 12:41:54 - INFO - codeparrot_training - Step 8597: {'lr': 0.0004770563399914761, 'samples': 4402176, 'steps': 8597, 'loss/train': 3.0149478912353516} +02/24/2022 12:41:58 - INFO - codeparrot_training - Step 8598: {'lr': 0.00047704949211900565, 'samples': 4402688, 'steps': 8598, 'loss/train': 2.4274518489837646} +02/24/2022 12:42:03 - INFO - codeparrot_training - Step 8599: {'lr': 0.0004770426432739273, 'samples': 4403200, 'steps': 8599, 'loss/train': 2.5701568126678467} +02/24/2022 12:42:07 - INFO - codeparrot_training - Step 8600: {'lr': 0.00047703579345627036, 'samples': 4403712, 'steps': 8600, 'loss/train': 1.3668849468231201} +02/24/2022 12:42:12 - INFO - codeparrot_training - Step 8601: {'lr': 0.00047702894266606413, 'samples': 4404224, 'steps': 8601, 'loss/train': 1.8855148553848267} +02/24/2022 12:42:16 - INFO - codeparrot_training - Step 8602: {'lr': 0.00047702209090333804, 'samples': 4404736, 'steps': 8602, 'loss/train': 3.4527690410614014} +02/24/2022 12:42:22 - INFO - codeparrot_training - Step 8603: {'lr': 0.0004770152381681214, 'samples': 4405248, 'steps': 8603, 'loss/train': 2.4510040283203125} +02/24/2022 12:42:26 - INFO - codeparrot_training - Step 8604: {'lr': 0.0004770083844604435, 'samples': 4405760, 'steps': 8604, 'loss/train': 2.505350351333618} +02/24/2022 12:42:32 - INFO - codeparrot_training - Step 8605: {'lr': 0.00047700152978033387, 'samples': 4406272, 'steps': 8605, 'loss/train': 2.3096370697021484} +02/24/2022 12:42:35 - INFO - codeparrot_training - Step 8606: {'lr': 0.0004769946741278217, 'samples': 4406784, 'steps': 8606, 'loss/train': 1.517177939414978} +02/24/2022 12:42:41 - INFO - codeparrot_training - Step 8607: {'lr': 0.00047698781750293644, 'samples': 4407296, 'steps': 8607, 'loss/train': 2.5887279510498047} +02/24/2022 12:42:44 - INFO - codeparrot_training - Step 8608: {'lr': 0.00047698095990570744, 'samples': 4407808, 'steps': 8608, 'loss/train': 2.6150879859924316} +02/24/2022 12:42:50 - INFO - codeparrot_training - Step 8609: {'lr': 0.00047697410133616414, 'samples': 4408320, 'steps': 8609, 'loss/train': 1.7853350639343262} +02/24/2022 12:42:53 - INFO - codeparrot_training - Step 8610: {'lr': 0.0004769672417943358, 'samples': 4408832, 'steps': 8610, 'loss/train': 1.4286537170410156} +02/24/2022 12:42:59 - INFO - codeparrot_training - Step 8611: {'lr': 0.00047696038128025185, 'samples': 4409344, 'steps': 8611, 'loss/train': 2.2056002616882324} +02/24/2022 12:43:02 - INFO - codeparrot_training - Step 8612: {'lr': 0.00047695351979394173, 'samples': 4409856, 'steps': 8612, 'loss/train': 1.807463526725769} +02/24/2022 12:43:08 - INFO - codeparrot_training - Step 8613: {'lr': 0.00047694665733543485, 'samples': 4410368, 'steps': 8613, 'loss/train': 3.0162248611450195} +02/24/2022 12:43:11 - INFO - codeparrot_training - Step 8614: {'lr': 0.00047693979390476046, 'samples': 4410880, 'steps': 8614, 'loss/train': 1.6820416450500488} +02/24/2022 12:43:17 - INFO - codeparrot_training - Step 8615: {'lr': 0.00047693292950194813, 'samples': 4411392, 'steps': 8615, 'loss/train': 2.2958154678344727} +02/24/2022 12:43:23 - INFO - codeparrot_training - Step 8616: {'lr': 0.0004769260641270271, 'samples': 4411904, 'steps': 8616, 'loss/train': 1.3787344694137573} +02/24/2022 12:43:26 - INFO - codeparrot_training - Step 8617: {'lr': 0.0004769191977800269, 'samples': 4412416, 'steps': 8617, 'loss/train': 2.0811045169830322} +02/24/2022 12:43:32 - INFO - codeparrot_training - Step 8618: {'lr': 0.0004769123304609769, 'samples': 4412928, 'steps': 8618, 'loss/train': 1.517041563987732} +02/24/2022 12:43:35 - INFO - codeparrot_training - Step 8619: {'lr': 0.0004769054621699066, 'samples': 4413440, 'steps': 8619, 'loss/train': 2.7214767932891846} +02/24/2022 12:43:41 - INFO - codeparrot_training - Step 8620: {'lr': 0.0004768985929068453, 'samples': 4413952, 'steps': 8620, 'loss/train': 1.9583196640014648} +02/24/2022 12:43:44 - INFO - codeparrot_training - Step 8621: {'lr': 0.0004768917226718225, 'samples': 4414464, 'steps': 8621, 'loss/train': 2.4468472003936768} +02/24/2022 12:43:50 - INFO - codeparrot_training - Step 8622: {'lr': 0.0004768848514648676, 'samples': 4414976, 'steps': 8622, 'loss/train': 2.4089601039886475} +02/24/2022 12:43:53 - INFO - codeparrot_training - Step 8623: {'lr': 0.0004768779792860101, 'samples': 4415488, 'steps': 8623, 'loss/train': 1.7274982929229736} +02/24/2022 12:43:59 - INFO - codeparrot_training - Step 8624: {'lr': 0.00047687110613527924, 'samples': 4416000, 'steps': 8624, 'loss/train': 1.3341413736343384} +02/24/2022 12:44:03 - INFO - codeparrot_training - Step 8625: {'lr': 0.0004768642320127047, 'samples': 4416512, 'steps': 8625, 'loss/train': 2.6436188220977783} +02/24/2022 12:44:08 - INFO - codeparrot_training - Step 8626: {'lr': 0.0004768573569183158, 'samples': 4417024, 'steps': 8626, 'loss/train': 1.0955262184143066} +02/24/2022 12:44:12 - INFO - codeparrot_training - Step 8627: {'lr': 0.000476850480852142, 'samples': 4417536, 'steps': 8627, 'loss/train': 2.3829903602600098} +02/24/2022 12:44:17 - INFO - codeparrot_training - Step 8628: {'lr': 0.0004768436038142128, 'samples': 4418048, 'steps': 8628, 'loss/train': 1.5524612665176392} +02/24/2022 12:44:21 - INFO - codeparrot_training - Step 8629: {'lr': 0.00047683672580455764, 'samples': 4418560, 'steps': 8629, 'loss/train': 3.0201761722564697} +02/24/2022 12:44:26 - INFO - codeparrot_training - Step 8630: {'lr': 0.00047682984682320597, 'samples': 4419072, 'steps': 8630, 'loss/train': 1.336091160774231} +02/24/2022 12:44:30 - INFO - codeparrot_training - Step 8631: {'lr': 0.0004768229668701872, 'samples': 4419584, 'steps': 8631, 'loss/train': 2.226219892501831} +02/24/2022 12:44:35 - INFO - codeparrot_training - Step 8632: {'lr': 0.00047681608594553093, 'samples': 4420096, 'steps': 8632, 'loss/train': 2.4161622524261475} +02/24/2022 12:44:39 - INFO - codeparrot_training - Step 8633: {'lr': 0.00047680920404926655, 'samples': 4420608, 'steps': 8633, 'loss/train': 2.278937578201294} +02/24/2022 12:44:45 - INFO - codeparrot_training - Step 8634: {'lr': 0.0004768023211814236, 'samples': 4421120, 'steps': 8634, 'loss/train': 0.4753687381744385} +02/24/2022 12:44:48 - INFO - codeparrot_training - Step 8635: {'lr': 0.0004767954373420315, 'samples': 4421632, 'steps': 8635, 'loss/train': 0.9312040209770203} +02/24/2022 12:44:54 - INFO - codeparrot_training - Step 8636: {'lr': 0.0004767885525311197, 'samples': 4422144, 'steps': 8636, 'loss/train': 0.989240288734436} +02/24/2022 12:44:57 - INFO - codeparrot_training - Step 8637: {'lr': 0.00047678166674871783, 'samples': 4422656, 'steps': 8637, 'loss/train': 2.154517889022827} +02/24/2022 12:45:03 - INFO - codeparrot_training - Step 8638: {'lr': 0.0004767747799948553, 'samples': 4423168, 'steps': 8638, 'loss/train': 1.625832438468933} +02/24/2022 12:45:06 - INFO - codeparrot_training - Step 8639: {'lr': 0.0004767678922695616, 'samples': 4423680, 'steps': 8639, 'loss/train': 2.3734169006347656} +02/24/2022 12:45:12 - INFO - codeparrot_training - Step 8640: {'lr': 0.0004767610035728662, 'samples': 4424192, 'steps': 8640, 'loss/train': 2.1616783142089844} +02/24/2022 12:45:16 - INFO - codeparrot_training - Step 8641: {'lr': 0.00047675411390479876, 'samples': 4424704, 'steps': 8641, 'loss/train': 2.3193087577819824} +02/24/2022 12:45:21 - INFO - codeparrot_training - Step 8642: {'lr': 0.0004767472232653887, 'samples': 4425216, 'steps': 8642, 'loss/train': 1.4009196758270264} +02/24/2022 12:45:25 - INFO - codeparrot_training - Step 8643: {'lr': 0.00047674033165466545, 'samples': 4425728, 'steps': 8643, 'loss/train': 1.9185664653778076} +02/24/2022 12:45:31 - INFO - codeparrot_training - Step 8644: {'lr': 0.0004767334390726588, 'samples': 4426240, 'steps': 8644, 'loss/train': 2.3010120391845703} +02/24/2022 12:45:34 - INFO - codeparrot_training - Step 8645: {'lr': 0.00047672654551939785, 'samples': 4426752, 'steps': 8645, 'loss/train': 2.000993251800537} +02/24/2022 12:45:40 - INFO - codeparrot_training - Step 8646: {'lr': 0.00047671965099491256, 'samples': 4427264, 'steps': 8646, 'loss/train': 2.680065631866455} +02/24/2022 12:45:43 - INFO - codeparrot_training - Step 8647: {'lr': 0.0004767127554992322, 'samples': 4427776, 'steps': 8647, 'loss/train': 2.972494602203369} +02/24/2022 12:45:49 - INFO - codeparrot_training - Step 8648: {'lr': 0.0004767058590323864, 'samples': 4428288, 'steps': 8648, 'loss/train': 1.4183294773101807} +02/24/2022 12:45:52 - INFO - codeparrot_training - Step 8649: {'lr': 0.00047669896159440464, 'samples': 4428800, 'steps': 8649, 'loss/train': 2.120751142501831} +02/24/2022 12:45:58 - INFO - codeparrot_training - Step 8650: {'lr': 0.00047669206318531654, 'samples': 4429312, 'steps': 8650, 'loss/train': 2.8385157585144043} +02/24/2022 12:46:01 - INFO - codeparrot_training - Step 8651: {'lr': 0.00047668516380515165, 'samples': 4429824, 'steps': 8651, 'loss/train': 2.1079602241516113} +02/24/2022 12:46:07 - INFO - codeparrot_training - Step 8652: {'lr': 0.0004766782634539395, 'samples': 4430336, 'steps': 8652, 'loss/train': 1.5508980751037598} +02/24/2022 12:46:10 - INFO - codeparrot_training - Step 8653: {'lr': 0.00047667136213170957, 'samples': 4430848, 'steps': 8653, 'loss/train': 3.2541048526763916} +02/24/2022 12:46:16 - INFO - codeparrot_training - Step 8654: {'lr': 0.00047666445983849163, 'samples': 4431360, 'steps': 8654, 'loss/train': 1.6311894655227661} +02/24/2022 12:46:19 - INFO - codeparrot_training - Step 8655: {'lr': 0.000476657556574315, 'samples': 4431872, 'steps': 8655, 'loss/train': 2.0137391090393066} +02/24/2022 12:46:25 - INFO - codeparrot_training - Step 8656: {'lr': 0.00047665065233920946, 'samples': 4432384, 'steps': 8656, 'loss/train': 1.5860389471054077} +02/24/2022 12:46:28 - INFO - codeparrot_training - Step 8657: {'lr': 0.0004766437471332045, 'samples': 4432896, 'steps': 8657, 'loss/train': 0.8020785450935364} +02/24/2022 12:46:34 - INFO - codeparrot_training - Step 8658: {'lr': 0.0004766368409563296, 'samples': 4433408, 'steps': 8658, 'loss/train': 2.4337875843048096} +02/24/2022 12:46:37 - INFO - codeparrot_training - Step 8659: {'lr': 0.0004766299338086145, 'samples': 4433920, 'steps': 8659, 'loss/train': 1.8769762516021729} +02/24/2022 12:46:43 - INFO - codeparrot_training - Step 8660: {'lr': 0.0004766230256900887, 'samples': 4434432, 'steps': 8660, 'loss/train': 2.092787504196167} +02/24/2022 12:46:47 - INFO - codeparrot_training - Step 8661: {'lr': 0.00047661611660078184, 'samples': 4434944, 'steps': 8661, 'loss/train': 2.9152870178222656} +02/24/2022 12:46:52 - INFO - codeparrot_training - Step 8662: {'lr': 0.0004766092065407235, 'samples': 4435456, 'steps': 8662, 'loss/train': 2.8709444999694824} +02/24/2022 12:46:56 - INFO - codeparrot_training - Step 8663: {'lr': 0.0004766022955099433, 'samples': 4435968, 'steps': 8663, 'loss/train': 2.575606346130371} +02/24/2022 12:47:01 - INFO - codeparrot_training - Step 8664: {'lr': 0.00047659538350847076, 'samples': 4436480, 'steps': 8664, 'loss/train': 1.45050847530365} +02/24/2022 12:47:05 - INFO - codeparrot_training - Step 8665: {'lr': 0.00047658847053633555, 'samples': 4436992, 'steps': 8665, 'loss/train': 1.629294753074646} +02/24/2022 12:47:10 - INFO - codeparrot_training - Step 8666: {'lr': 0.00047658155659356725, 'samples': 4437504, 'steps': 8666, 'loss/train': 2.524251937866211} +02/24/2022 12:47:14 - INFO - codeparrot_training - Step 8667: {'lr': 0.0004765746416801956, 'samples': 4438016, 'steps': 8667, 'loss/train': 2.461313247680664} +02/24/2022 12:47:19 - INFO - codeparrot_training - Step 8668: {'lr': 0.0004765677257962501, 'samples': 4438528, 'steps': 8668, 'loss/train': 2.9091362953186035} +02/24/2022 12:47:23 - INFO - codeparrot_training - Step 8669: {'lr': 0.0004765608089417604, 'samples': 4439040, 'steps': 8669, 'loss/train': 2.237517833709717} +02/24/2022 12:47:29 - INFO - codeparrot_training - Step 8670: {'lr': 0.0004765538911167562, 'samples': 4439552, 'steps': 8670, 'loss/train': 2.2874817848205566} +02/24/2022 12:47:32 - INFO - codeparrot_training - Step 8671: {'lr': 0.00047654697232126696, 'samples': 4440064, 'steps': 8671, 'loss/train': 1.8436689376831055} +02/24/2022 12:47:38 - INFO - codeparrot_training - Step 8672: {'lr': 0.00047654005255532247, 'samples': 4440576, 'steps': 8672, 'loss/train': 2.346289873123169} +02/24/2022 12:47:41 - INFO - codeparrot_training - Step 8673: {'lr': 0.0004765331318189523, 'samples': 4441088, 'steps': 8673, 'loss/train': 1.8630294799804688} +02/24/2022 12:47:47 - INFO - codeparrot_training - Step 8674: {'lr': 0.00047652621011218623, 'samples': 4441600, 'steps': 8674, 'loss/train': 3.237675428390503} +02/24/2022 12:47:50 - INFO - codeparrot_training - Step 8675: {'lr': 0.0004765192874350537, 'samples': 4442112, 'steps': 8675, 'loss/train': 1.8413033485412598} +02/24/2022 12:47:56 - INFO - codeparrot_training - Step 8676: {'lr': 0.0004765123637875845, 'samples': 4442624, 'steps': 8676, 'loss/train': 3.178135633468628} +02/24/2022 12:47:59 - INFO - codeparrot_training - Step 8677: {'lr': 0.00047650543916980827, 'samples': 4443136, 'steps': 8677, 'loss/train': 2.0916380882263184} +02/24/2022 12:48:05 - INFO - codeparrot_training - Step 8678: {'lr': 0.00047649851358175466, 'samples': 4443648, 'steps': 8678, 'loss/train': 2.175136089324951} +02/24/2022 12:48:08 - INFO - codeparrot_training - Step 8679: {'lr': 0.0004764915870234533, 'samples': 4444160, 'steps': 8679, 'loss/train': 2.0423433780670166} +02/24/2022 12:48:15 - INFO - codeparrot_training - Step 8680: {'lr': 0.000476484659494934, 'samples': 4444672, 'steps': 8680, 'loss/train': 3.073350429534912} +02/24/2022 12:48:18 - INFO - codeparrot_training - Step 8681: {'lr': 0.0004764777309962263, 'samples': 4445184, 'steps': 8681, 'loss/train': 2.4866981506347656} +02/24/2022 12:48:24 - INFO - codeparrot_training - Step 8682: {'lr': 0.0004764708015273599, 'samples': 4445696, 'steps': 8682, 'loss/train': 2.342972755432129} +02/24/2022 12:48:27 - INFO - codeparrot_training - Step 8683: {'lr': 0.0004764638710883644, 'samples': 4446208, 'steps': 8683, 'loss/train': 1.2774816751480103} +02/24/2022 12:48:33 - INFO - codeparrot_training - Step 8684: {'lr': 0.0004764569396792697, 'samples': 4446720, 'steps': 8684, 'loss/train': 1.7196332216262817} +02/24/2022 12:48:36 - INFO - codeparrot_training - Step 8685: {'lr': 0.00047645000730010535, 'samples': 4447232, 'steps': 8685, 'loss/train': 0.31277045607566833} +02/24/2022 12:48:42 - INFO - codeparrot_training - Step 8686: {'lr': 0.00047644307395090107, 'samples': 4447744, 'steps': 8686, 'loss/train': 1.9592556953430176} +02/24/2022 12:48:45 - INFO - codeparrot_training - Step 8687: {'lr': 0.0004764361396316866, 'samples': 4448256, 'steps': 8687, 'loss/train': 2.346013307571411} +02/24/2022 12:48:51 - INFO - codeparrot_training - Step 8688: {'lr': 0.0004764292043424916, 'samples': 4448768, 'steps': 8688, 'loss/train': 2.1401946544647217} +02/24/2022 12:48:54 - INFO - codeparrot_training - Step 8689: {'lr': 0.0004764222680833458, 'samples': 4449280, 'steps': 8689, 'loss/train': 2.2720906734466553} +02/24/2022 12:49:01 - INFO - codeparrot_training - Step 8690: {'lr': 0.0004764153308542788, 'samples': 4449792, 'steps': 8690, 'loss/train': 1.8749568462371826} +02/24/2022 12:49:04 - INFO - codeparrot_training - Step 8691: {'lr': 0.0004764083926553205, 'samples': 4450304, 'steps': 8691, 'loss/train': 2.8419225215911865} +02/24/2022 12:49:09 - INFO - codeparrot_training - Step 8692: {'lr': 0.00047640145348650057, 'samples': 4450816, 'steps': 8692, 'loss/train': 1.652675747871399} +02/24/2022 12:49:13 - INFO - codeparrot_training - Step 8693: {'lr': 0.0004763945133478486, 'samples': 4451328, 'steps': 8693, 'loss/train': 2.562063455581665} +02/24/2022 12:49:18 - INFO - codeparrot_training - Step 8694: {'lr': 0.0004763875722393945, 'samples': 4451840, 'steps': 8694, 'loss/train': 1.279549241065979} +02/24/2022 12:49:22 - INFO - codeparrot_training - Step 8695: {'lr': 0.000476380630161168, 'samples': 4452352, 'steps': 8695, 'loss/train': 8.204840660095215} +02/24/2022 12:49:27 - INFO - codeparrot_training - Step 8696: {'lr': 0.00047637368711319863, 'samples': 4452864, 'steps': 8696, 'loss/train': 1.8550820350646973} +02/24/2022 12:49:31 - INFO - codeparrot_training - Step 8697: {'lr': 0.00047636674309551626, 'samples': 4453376, 'steps': 8697, 'loss/train': 1.9099466800689697} +02/24/2022 12:49:36 - INFO - codeparrot_training - Step 8698: {'lr': 0.0004763597981081507, 'samples': 4453888, 'steps': 8698, 'loss/train': 1.665390968322754} +02/24/2022 12:49:40 - INFO - codeparrot_training - Step 8699: {'lr': 0.00047635285215113165, 'samples': 4454400, 'steps': 8699, 'loss/train': 2.509888172149658} +02/24/2022 12:49:46 - INFO - codeparrot_training - Step 8700: {'lr': 0.0004763459052244888, 'samples': 4454912, 'steps': 8700, 'loss/train': 0.6205786466598511} +02/24/2022 12:49:49 - INFO - codeparrot_training - Step 8701: {'lr': 0.0004763389573282521, 'samples': 4455424, 'steps': 8701, 'loss/train': 1.8865984678268433} +02/24/2022 12:49:55 - INFO - codeparrot_training - Step 8702: {'lr': 0.00047633200846245106, 'samples': 4455936, 'steps': 8702, 'loss/train': 2.541926383972168} +02/24/2022 12:49:58 - INFO - codeparrot_training - Step 8703: {'lr': 0.0004763250586271156, 'samples': 4456448, 'steps': 8703, 'loss/train': 2.171534776687622} +02/24/2022 12:50:04 - INFO - codeparrot_training - Step 8704: {'lr': 0.00047631810782227535, 'samples': 4456960, 'steps': 8704, 'loss/train': 1.604278802871704} +02/24/2022 12:50:07 - INFO - codeparrot_training - Step 8705: {'lr': 0.00047631115604796035, 'samples': 4457472, 'steps': 8705, 'loss/train': 1.72527277469635} +02/24/2022 12:50:14 - INFO - codeparrot_training - Step 8706: {'lr': 0.0004763042033042001, 'samples': 4457984, 'steps': 8706, 'loss/train': 0.8240301012992859} +02/24/2022 12:50:17 - INFO - codeparrot_training - Step 8707: {'lr': 0.0004762972495910246, 'samples': 4458496, 'steps': 8707, 'loss/train': 1.293099284172058} +02/24/2022 12:50:23 - INFO - codeparrot_training - Step 8708: {'lr': 0.00047629029490846346, 'samples': 4459008, 'steps': 8708, 'loss/train': 2.628904104232788} +02/24/2022 12:50:26 - INFO - codeparrot_training - Step 8709: {'lr': 0.0004762833392565466, 'samples': 4459520, 'steps': 8709, 'loss/train': 1.8404642343521118} +02/24/2022 12:50:32 - INFO - codeparrot_training - Step 8710: {'lr': 0.00047627638263530374, 'samples': 4460032, 'steps': 8710, 'loss/train': 1.2400083541870117} +02/24/2022 12:50:35 - INFO - codeparrot_training - Step 8711: {'lr': 0.00047626942504476477, 'samples': 4460544, 'steps': 8711, 'loss/train': 2.5367848873138428} +02/24/2022 12:50:41 - INFO - codeparrot_training - Step 8712: {'lr': 0.00047626246648495936, 'samples': 4461056, 'steps': 8712, 'loss/train': 2.0482499599456787} +02/24/2022 12:50:44 - INFO - codeparrot_training - Step 8713: {'lr': 0.0004762555069559175, 'samples': 4461568, 'steps': 8713, 'loss/train': 2.862839937210083} +02/24/2022 12:50:49 - INFO - codeparrot_training - Step 8714: {'lr': 0.00047624854645766875, 'samples': 4462080, 'steps': 8714, 'loss/train': 3.66365122795105} +02/24/2022 12:50:53 - INFO - codeparrot_training - Step 8715: {'lr': 0.0004762415849902431, 'samples': 4462592, 'steps': 8715, 'loss/train': 1.9151747226715088} +02/24/2022 12:50:59 - INFO - codeparrot_training - Step 8716: {'lr': 0.0004762346225536703, 'samples': 4463104, 'steps': 8716, 'loss/train': 2.9629907608032227} +02/24/2022 12:51:03 - INFO - codeparrot_training - Step 8717: {'lr': 0.0004762276591479804, 'samples': 4463616, 'steps': 8717, 'loss/train': 2.2446770668029785} +02/24/2022 12:51:08 - INFO - codeparrot_training - Step 8718: {'lr': 0.00047622069477320285, 'samples': 4464128, 'steps': 8718, 'loss/train': 1.707044243812561} +02/24/2022 12:51:12 - INFO - codeparrot_training - Step 8719: {'lr': 0.0004762137294293678, 'samples': 4464640, 'steps': 8719, 'loss/train': 1.3499451875686646} +02/24/2022 12:51:17 - INFO - codeparrot_training - Step 8720: {'lr': 0.0004762067631165049, 'samples': 4465152, 'steps': 8720, 'loss/train': 4.016249179840088} +02/24/2022 12:51:21 - INFO - codeparrot_training - Step 8721: {'lr': 0.0004761997958346441, 'samples': 4465664, 'steps': 8721, 'loss/train': 1.7870075702667236} +02/24/2022 12:51:26 - INFO - codeparrot_training - Step 8722: {'lr': 0.00047619282758381513, 'samples': 4466176, 'steps': 8722, 'loss/train': 2.3704593181610107} +02/24/2022 12:51:30 - INFO - codeparrot_training - Step 8723: {'lr': 0.0004761858583640479, 'samples': 4466688, 'steps': 8723, 'loss/train': 2.6884233951568604} +02/24/2022 12:51:35 - INFO - codeparrot_training - Step 8724: {'lr': 0.00047617888817537234, 'samples': 4467200, 'steps': 8724, 'loss/train': 1.7528960704803467} +02/24/2022 12:51:39 - INFO - codeparrot_training - Step 8725: {'lr': 0.00047617191701781824, 'samples': 4467712, 'steps': 8725, 'loss/train': 3.846125364303589} +02/24/2022 12:51:45 - INFO - codeparrot_training - Step 8726: {'lr': 0.0004761649448914155, 'samples': 4468224, 'steps': 8726, 'loss/train': 1.9477664232254028} +02/24/2022 12:51:48 - INFO - codeparrot_training - Step 8727: {'lr': 0.0004761579717961939, 'samples': 4468736, 'steps': 8727, 'loss/train': 2.341510057449341} +02/24/2022 12:51:54 - INFO - codeparrot_training - Step 8728: {'lr': 0.0004761509977321834, 'samples': 4469248, 'steps': 8728, 'loss/train': 2.20957088470459} +02/24/2022 12:51:57 - INFO - codeparrot_training - Step 8729: {'lr': 0.0004761440226994138, 'samples': 4469760, 'steps': 8729, 'loss/train': 2.3531336784362793} +02/24/2022 12:52:03 - INFO - codeparrot_training - Step 8730: {'lr': 0.000476137046697915, 'samples': 4470272, 'steps': 8730, 'loss/train': 1.7737395763397217} +02/24/2022 12:52:06 - INFO - codeparrot_training - Step 8731: {'lr': 0.0004761300697277169, 'samples': 4470784, 'steps': 8731, 'loss/train': 2.7883336544036865} +02/24/2022 12:52:12 - INFO - codeparrot_training - Step 8732: {'lr': 0.0004761230917888494, 'samples': 4471296, 'steps': 8732, 'loss/train': 2.4218623638153076} +02/24/2022 12:52:15 - INFO - codeparrot_training - Step 8733: {'lr': 0.00047611611288134236, 'samples': 4471808, 'steps': 8733, 'loss/train': 2.199613571166992} +02/24/2022 12:52:21 - INFO - codeparrot_training - Step 8734: {'lr': 0.00047610913300522576, 'samples': 4472320, 'steps': 8734, 'loss/train': 2.6561696529388428} +02/24/2022 12:52:24 - INFO - codeparrot_training - Step 8735: {'lr': 0.00047610215216052946, 'samples': 4472832, 'steps': 8735, 'loss/train': 2.19661808013916} +02/24/2022 12:52:30 - INFO - codeparrot_training - Step 8736: {'lr': 0.0004760951703472832, 'samples': 4473344, 'steps': 8736, 'loss/train': 3.079450845718384} +02/24/2022 12:52:33 - INFO - codeparrot_training - Step 8737: {'lr': 0.0004760881875655171, 'samples': 4473856, 'steps': 8737, 'loss/train': 1.9540948867797852} +02/24/2022 12:52:40 - INFO - codeparrot_training - Step 8738: {'lr': 0.000476081203815261, 'samples': 4474368, 'steps': 8738, 'loss/train': 2.0764429569244385} +02/24/2022 12:52:43 - INFO - codeparrot_training - Step 8739: {'lr': 0.0004760742190965447, 'samples': 4474880, 'steps': 8739, 'loss/train': 1.7902617454528809} +02/24/2022 12:52:49 - INFO - codeparrot_training - Step 8740: {'lr': 0.0004760672334093984, 'samples': 4475392, 'steps': 8740, 'loss/train': 2.467662811279297} +02/24/2022 12:52:52 - INFO - codeparrot_training - Step 8741: {'lr': 0.0004760602467538517, 'samples': 4475904, 'steps': 8741, 'loss/train': 8.840398788452148} +02/24/2022 12:52:58 - INFO - codeparrot_training - Step 8742: {'lr': 0.0004760532591299348, 'samples': 4476416, 'steps': 8742, 'loss/train': 2.396095037460327} +02/24/2022 12:53:01 - INFO - codeparrot_training - Step 8743: {'lr': 0.00047604627053767754, 'samples': 4476928, 'steps': 8743, 'loss/train': 1.8426545858383179} +02/24/2022 12:53:07 - INFO - codeparrot_training - Step 8744: {'lr': 0.0004760392809771098, 'samples': 4477440, 'steps': 8744, 'loss/train': 3.0879733562469482} +02/24/2022 12:53:10 - INFO - codeparrot_training - Step 8745: {'lr': 0.00047603229044826146, 'samples': 4477952, 'steps': 8745, 'loss/train': 2.3374834060668945} +02/24/2022 12:53:16 - INFO - codeparrot_training - Step 8746: {'lr': 0.00047602529895116264, 'samples': 4478464, 'steps': 8746, 'loss/train': 1.066630244255066} +02/24/2022 12:53:19 - INFO - codeparrot_training - Step 8747: {'lr': 0.0004760183064858432, 'samples': 4478976, 'steps': 8747, 'loss/train': 2.6336004734039307} +02/24/2022 12:53:25 - INFO - codeparrot_training - Step 8748: {'lr': 0.0004760113130523331, 'samples': 4479488, 'steps': 8748, 'loss/train': 2.3310763835906982} +02/24/2022 12:53:28 - INFO - codeparrot_training - Step 8749: {'lr': 0.0004760043186506624, 'samples': 4480000, 'steps': 8749, 'loss/train': 1.129913091659546} +02/24/2022 12:53:34 - INFO - codeparrot_training - Step 8750: {'lr': 0.0004759973232808609, 'samples': 4480512, 'steps': 8750, 'loss/train': 1.3763890266418457} +02/24/2022 12:53:37 - INFO - codeparrot_training - Step 8751: {'lr': 0.0004759903269429585, 'samples': 4481024, 'steps': 8751, 'loss/train': 2.6321349143981934} +02/24/2022 12:53:43 - INFO - codeparrot_training - Step 8752: {'lr': 0.00047598332963698543, 'samples': 4481536, 'steps': 8752, 'loss/train': 2.5127756595611572} +02/24/2022 12:53:47 - INFO - codeparrot_training - Step 8753: {'lr': 0.00047597633136297154, 'samples': 4482048, 'steps': 8753, 'loss/train': 2.2823503017425537} +02/24/2022 12:53:52 - INFO - codeparrot_training - Step 8754: {'lr': 0.0004759693321209467, 'samples': 4482560, 'steps': 8754, 'loss/train': 1.953088641166687} +02/24/2022 12:53:56 - INFO - codeparrot_training - Step 8755: {'lr': 0.00047596233191094114, 'samples': 4483072, 'steps': 8755, 'loss/train': 2.757328748703003} +02/24/2022 12:54:01 - INFO - codeparrot_training - Step 8756: {'lr': 0.0004759553307329846, 'samples': 4483584, 'steps': 8756, 'loss/train': 2.455230951309204} +02/24/2022 12:54:05 - INFO - codeparrot_training - Step 8757: {'lr': 0.00047594832858710725, 'samples': 4484096, 'steps': 8757, 'loss/train': 1.6920143365859985} +02/24/2022 12:54:10 - INFO - codeparrot_training - Step 8758: {'lr': 0.0004759413254733389, 'samples': 4484608, 'steps': 8758, 'loss/train': 1.721900224685669} +02/24/2022 12:54:14 - INFO - codeparrot_training - Step 8759: {'lr': 0.0004759343213917097, 'samples': 4485120, 'steps': 8759, 'loss/train': 2.8824124336242676} +02/24/2022 12:54:19 - INFO - codeparrot_training - Step 8760: {'lr': 0.0004759273163422496, 'samples': 4485632, 'steps': 8760, 'loss/train': 1.133579969406128} +02/24/2022 12:54:23 - INFO - codeparrot_training - Step 8761: {'lr': 0.00047592031032498875, 'samples': 4486144, 'steps': 8761, 'loss/train': 2.9116897583007812} +02/24/2022 12:54:30 - INFO - codeparrot_training - Step 8762: {'lr': 0.00047591330333995684, 'samples': 4486656, 'steps': 8762, 'loss/train': 2.5794615745544434} +02/24/2022 12:54:33 - INFO - codeparrot_training - Step 8763: {'lr': 0.0004759062953871842, 'samples': 4487168, 'steps': 8763, 'loss/train': 2.7960944175720215} +02/24/2022 12:54:39 - INFO - codeparrot_training - Step 8764: {'lr': 0.0004758992864667007, 'samples': 4487680, 'steps': 8764, 'loss/train': 2.0763607025146484} +02/24/2022 12:54:42 - INFO - codeparrot_training - Step 8765: {'lr': 0.0004758922765785363, 'samples': 4488192, 'steps': 8765, 'loss/train': 1.8580307960510254} +02/24/2022 12:54:47 - INFO - codeparrot_training - Step 8766: {'lr': 0.00047588526572272117, 'samples': 4488704, 'steps': 8766, 'loss/train': 1.993737816810608} +02/24/2022 12:54:51 - INFO - codeparrot_training - Step 8767: {'lr': 0.0004758782538992853, 'samples': 4489216, 'steps': 8767, 'loss/train': 2.3059587478637695} +02/24/2022 12:54:56 - INFO - codeparrot_training - Step 8768: {'lr': 0.00047587124110825874, 'samples': 4489728, 'steps': 8768, 'loss/train': 2.352890729904175} +02/24/2022 12:55:02 - INFO - codeparrot_training - Step 8769: {'lr': 0.0004758642273496714, 'samples': 4490240, 'steps': 8769, 'loss/train': 1.7700345516204834} +02/24/2022 12:55:05 - INFO - codeparrot_training - Step 8770: {'lr': 0.0004758572126235535, 'samples': 4490752, 'steps': 8770, 'loss/train': 2.174175977706909} +02/24/2022 12:55:11 - INFO - codeparrot_training - Step 8771: {'lr': 0.0004758501969299351, 'samples': 4491264, 'steps': 8771, 'loss/train': 2.276357650756836} +02/24/2022 12:55:14 - INFO - codeparrot_training - Step 8772: {'lr': 0.0004758431802688461, 'samples': 4491776, 'steps': 8772, 'loss/train': 0.6365671753883362} +02/24/2022 12:55:21 - INFO - codeparrot_training - Step 8773: {'lr': 0.00047583616264031657, 'samples': 4492288, 'steps': 8773, 'loss/train': 2.4573609828948975} +02/24/2022 12:55:25 - INFO - codeparrot_training - Step 8774: {'lr': 0.00047582914404437673, 'samples': 4492800, 'steps': 8774, 'loss/train': 2.514500617980957} +02/24/2022 12:55:30 - INFO - codeparrot_training - Step 8775: {'lr': 0.00047582212448105647, 'samples': 4493312, 'steps': 8775, 'loss/train': 1.933380126953125} +02/24/2022 12:55:34 - INFO - codeparrot_training - Step 8776: {'lr': 0.000475815103950386, 'samples': 4493824, 'steps': 8776, 'loss/train': 2.333723783493042} +02/24/2022 12:55:39 - INFO - codeparrot_training - Step 8777: {'lr': 0.00047580808245239526, 'samples': 4494336, 'steps': 8777, 'loss/train': 2.663935899734497} +02/24/2022 12:55:43 - INFO - codeparrot_training - Step 8778: {'lr': 0.0004758010599871145, 'samples': 4494848, 'steps': 8778, 'loss/train': 2.3152177333831787} +02/24/2022 12:55:48 - INFO - codeparrot_training - Step 8779: {'lr': 0.0004757940365545736, 'samples': 4495360, 'steps': 8779, 'loss/train': 1.993561029434204} +02/24/2022 12:55:52 - INFO - codeparrot_training - Step 8780: {'lr': 0.0004757870121548028, 'samples': 4495872, 'steps': 8780, 'loss/train': 2.6793200969696045} +02/24/2022 12:55:57 - INFO - codeparrot_training - Step 8781: {'lr': 0.00047577998678783207, 'samples': 4496384, 'steps': 8781, 'loss/train': 2.725834608078003} +02/24/2022 12:56:01 - INFO - codeparrot_training - Step 8782: {'lr': 0.0004757729604536917, 'samples': 4496896, 'steps': 8782, 'loss/train': 2.812669515609741} +02/24/2022 12:56:07 - INFO - codeparrot_training - Step 8783: {'lr': 0.0004757659331524115, 'samples': 4497408, 'steps': 8783, 'loss/train': 3.419722080230713} +02/24/2022 12:56:10 - INFO - codeparrot_training - Step 8784: {'lr': 0.00047575890488402183, 'samples': 4497920, 'steps': 8784, 'loss/train': 2.7267181873321533} +02/24/2022 12:56:16 - INFO - codeparrot_training - Step 8785: {'lr': 0.00047575187564855264, 'samples': 4498432, 'steps': 8785, 'loss/train': 1.5478872060775757} +02/24/2022 12:56:19 - INFO - codeparrot_training - Step 8786: {'lr': 0.00047574484544603415, 'samples': 4498944, 'steps': 8786, 'loss/train': 2.7265970706939697} +02/24/2022 12:56:25 - INFO - codeparrot_training - Step 8787: {'lr': 0.00047573781427649644, 'samples': 4499456, 'steps': 8787, 'loss/train': 2.3036603927612305} +02/24/2022 12:56:28 - INFO - codeparrot_training - Step 8788: {'lr': 0.00047573078213996954, 'samples': 4499968, 'steps': 8788, 'loss/train': 2.4199509620666504} +02/24/2022 12:56:34 - INFO - codeparrot_training - Step 8789: {'lr': 0.0004757237490364836, 'samples': 4500480, 'steps': 8789, 'loss/train': 2.5069308280944824} +02/24/2022 12:56:37 - INFO - codeparrot_training - Step 8790: {'lr': 0.00047571671496606893, 'samples': 4500992, 'steps': 8790, 'loss/train': 2.3331410884857178} +02/24/2022 12:56:43 - INFO - codeparrot_training - Step 8791: {'lr': 0.0004757096799287555, 'samples': 4501504, 'steps': 8791, 'loss/train': 1.9628428220748901} +02/24/2022 12:56:46 - INFO - codeparrot_training - Step 8792: {'lr': 0.0004757026439245735, 'samples': 4502016, 'steps': 8792, 'loss/train': 2.3894236087799072} +02/24/2022 12:56:52 - INFO - codeparrot_training - Step 8793: {'lr': 0.00047569560695355295, 'samples': 4502528, 'steps': 8793, 'loss/train': 3.1706998348236084} +02/24/2022 12:56:55 - INFO - codeparrot_training - Step 8794: {'lr': 0.0004756885690157241, 'samples': 4503040, 'steps': 8794, 'loss/train': 1.8535314798355103} +02/24/2022 12:57:01 - INFO - codeparrot_training - Step 8795: {'lr': 0.00047568153011111715, 'samples': 4503552, 'steps': 8795, 'loss/train': 1.6466879844665527} +02/24/2022 12:57:04 - INFO - codeparrot_training - Step 8796: {'lr': 0.00047567449023976213, 'samples': 4504064, 'steps': 8796, 'loss/train': 2.8835508823394775} +02/24/2022 12:57:11 - INFO - codeparrot_training - Step 8797: {'lr': 0.00047566744940168924, 'samples': 4504576, 'steps': 8797, 'loss/train': 2.6650779247283936} +02/24/2022 12:57:14 - INFO - codeparrot_training - Step 8798: {'lr': 0.0004756604075969287, 'samples': 4505088, 'steps': 8798, 'loss/train': 1.3685665130615234} +02/24/2022 12:57:20 - INFO - codeparrot_training - Step 8799: {'lr': 0.0004756533648255106, 'samples': 4505600, 'steps': 8799, 'loss/train': 2.1950764656066895} +02/24/2022 12:57:24 - INFO - codeparrot_training - Step 8800: {'lr': 0.0004756463210874652, 'samples': 4506112, 'steps': 8800, 'loss/train': 2.122185230255127} +02/24/2022 12:57:29 - INFO - codeparrot_training - Step 8801: {'lr': 0.0004756392763828226, 'samples': 4506624, 'steps': 8801, 'loss/train': 1.2308300733566284} +02/24/2022 12:57:33 - INFO - codeparrot_training - Step 8802: {'lr': 0.0004756322307116129, 'samples': 4507136, 'steps': 8802, 'loss/train': 2.173133134841919} +02/24/2022 12:57:38 - INFO - codeparrot_training - Step 8803: {'lr': 0.0004756251840738664, 'samples': 4507648, 'steps': 8803, 'loss/train': 2.749377489089966} +02/24/2022 12:57:42 - INFO - codeparrot_training - Step 8804: {'lr': 0.00047561813646961325, 'samples': 4508160, 'steps': 8804, 'loss/train': 1.2036086320877075} +02/24/2022 12:57:47 - INFO - codeparrot_training - Step 8805: {'lr': 0.00047561108789888367, 'samples': 4508672, 'steps': 8805, 'loss/train': 2.545456886291504} +02/24/2022 12:57:51 - INFO - codeparrot_training - Step 8806: {'lr': 0.0004756040383617078, 'samples': 4509184, 'steps': 8806, 'loss/train': 2.6579318046569824} +02/24/2022 12:57:56 - INFO - codeparrot_training - Step 8807: {'lr': 0.00047559698785811595, 'samples': 4509696, 'steps': 8807, 'loss/train': 2.8812873363494873} +02/24/2022 12:58:00 - INFO - codeparrot_training - Step 8808: {'lr': 0.0004755899363881382, 'samples': 4510208, 'steps': 8808, 'loss/train': 2.4146788120269775} +02/24/2022 12:58:06 - INFO - codeparrot_training - Step 8809: {'lr': 0.00047558288395180477, 'samples': 4510720, 'steps': 8809, 'loss/train': 1.9129815101623535} +02/24/2022 12:58:10 - INFO - codeparrot_training - Step 8810: {'lr': 0.0004755758305491459, 'samples': 4511232, 'steps': 8810, 'loss/train': 1.725149154663086} +02/24/2022 12:58:15 - INFO - codeparrot_training - Step 8811: {'lr': 0.0004755687761801918, 'samples': 4511744, 'steps': 8811, 'loss/train': 0.24333910644054413} +02/24/2022 12:58:19 - INFO - codeparrot_training - Step 8812: {'lr': 0.00047556172084497274, 'samples': 4512256, 'steps': 8812, 'loss/train': 2.304560899734497} +02/24/2022 12:58:24 - INFO - codeparrot_training - Step 8813: {'lr': 0.0004755546645435188, 'samples': 4512768, 'steps': 8813, 'loss/train': 2.0783276557922363} +02/24/2022 12:58:28 - INFO - codeparrot_training - Step 8814: {'lr': 0.0004755476072758604, 'samples': 4513280, 'steps': 8814, 'loss/train': 1.448231816291809} +02/24/2022 12:58:33 - INFO - codeparrot_training - Step 8815: {'lr': 0.0004755405490420276, 'samples': 4513792, 'steps': 8815, 'loss/train': 2.2205657958984375} +02/24/2022 12:58:37 - INFO - codeparrot_training - Step 8816: {'lr': 0.0004755334898420507, 'samples': 4514304, 'steps': 8816, 'loss/train': 2.519583225250244} +02/24/2022 12:58:42 - INFO - codeparrot_training - Step 8817: {'lr': 0.00047552642967596, 'samples': 4514816, 'steps': 8817, 'loss/train': 2.266977310180664} +02/24/2022 12:58:46 - INFO - codeparrot_training - Step 8818: {'lr': 0.00047551936854378564, 'samples': 4515328, 'steps': 8818, 'loss/train': 1.1768916845321655} +02/24/2022 12:58:52 - INFO - codeparrot_training - Step 8819: {'lr': 0.00047551230644555793, 'samples': 4515840, 'steps': 8819, 'loss/train': 2.3790745735168457} +02/24/2022 12:58:55 - INFO - codeparrot_training - Step 8820: {'lr': 0.00047550524338130706, 'samples': 4516352, 'steps': 8820, 'loss/train': 2.516787052154541} +02/24/2022 12:59:01 - INFO - codeparrot_training - Step 8821: {'lr': 0.00047549817935106344, 'samples': 4516864, 'steps': 8821, 'loss/train': 2.075530767440796} +02/24/2022 12:59:04 - INFO - codeparrot_training - Step 8822: {'lr': 0.00047549111435485716, 'samples': 4517376, 'steps': 8822, 'loss/train': 1.9331542253494263} +02/24/2022 12:59:10 - INFO - codeparrot_training - Step 8823: {'lr': 0.0004754840483927185, 'samples': 4517888, 'steps': 8823, 'loss/train': 2.2771711349487305} +02/24/2022 12:59:13 - INFO - codeparrot_training - Step 8824: {'lr': 0.0004754769814646779, 'samples': 4518400, 'steps': 8824, 'loss/train': 1.1682443618774414} +02/24/2022 12:59:19 - INFO - codeparrot_training - Step 8825: {'lr': 0.00047546991357076544, 'samples': 4518912, 'steps': 8825, 'loss/train': 2.4097707271575928} +02/24/2022 12:59:22 - INFO - codeparrot_training - Step 8826: {'lr': 0.00047546284471101143, 'samples': 4519424, 'steps': 8826, 'loss/train': 0.9507866501808167} +02/24/2022 12:59:28 - INFO - codeparrot_training - Step 8827: {'lr': 0.00047545577488544623, 'samples': 4519936, 'steps': 8827, 'loss/train': 2.25288987159729} +02/24/2022 12:59:31 - INFO - codeparrot_training - Step 8828: {'lr': 0.0004754487040941001, 'samples': 4520448, 'steps': 8828, 'loss/train': 1.9752769470214844} +02/24/2022 12:59:37 - INFO - codeparrot_training - Step 8829: {'lr': 0.00047544163233700324, 'samples': 4520960, 'steps': 8829, 'loss/train': 2.9149653911590576} +02/24/2022 12:59:41 - INFO - codeparrot_training - Step 8830: {'lr': 0.00047543455961418605, 'samples': 4521472, 'steps': 8830, 'loss/train': 1.2913155555725098} +02/24/2022 12:59:46 - INFO - codeparrot_training - Step 8831: {'lr': 0.0004754274859256788, 'samples': 4521984, 'steps': 8831, 'loss/train': 2.1174747943878174} +02/24/2022 12:59:50 - INFO - codeparrot_training - Step 8832: {'lr': 0.0004754204112715118, 'samples': 4522496, 'steps': 8832, 'loss/train': 2.7904491424560547} +02/24/2022 12:59:56 - INFO - codeparrot_training - Step 8833: {'lr': 0.0004754133356517153, 'samples': 4523008, 'steps': 8833, 'loss/train': 2.5591137409210205} +02/24/2022 12:59:59 - INFO - codeparrot_training - Step 8834: {'lr': 0.0004754062590663196, 'samples': 4523520, 'steps': 8834, 'loss/train': 2.704284429550171} +02/24/2022 13:00:05 - INFO - codeparrot_training - Step 8835: {'lr': 0.00047539918151535515, 'samples': 4524032, 'steps': 8835, 'loss/train': 0.2202148139476776} +02/24/2022 13:00:08 - INFO - codeparrot_training - Step 8836: {'lr': 0.00047539210299885217, 'samples': 4524544, 'steps': 8836, 'loss/train': 1.999778151512146} +02/24/2022 13:00:14 - INFO - codeparrot_training - Step 8837: {'lr': 0.00047538502351684097, 'samples': 4525056, 'steps': 8837, 'loss/train': 2.7448577880859375} +02/24/2022 13:00:19 - INFO - codeparrot_training - Step 8838: {'lr': 0.0004753779430693519, 'samples': 4525568, 'steps': 8838, 'loss/train': 1.8795011043548584} +02/24/2022 13:00:23 - INFO - codeparrot_training - Step 8839: {'lr': 0.0004753708616564153, 'samples': 4526080, 'steps': 8839, 'loss/train': 1.963841199874878} +02/24/2022 13:00:28 - INFO - codeparrot_training - Step 8840: {'lr': 0.00047536377927806143, 'samples': 4526592, 'steps': 8840, 'loss/train': 1.7777982950210571} +02/24/2022 13:00:32 - INFO - codeparrot_training - Step 8841: {'lr': 0.0004753566959343207, 'samples': 4527104, 'steps': 8841, 'loss/train': 1.4867552518844604} +02/24/2022 13:00:37 - INFO - codeparrot_training - Step 8842: {'lr': 0.0004753496116252235, 'samples': 4527616, 'steps': 8842, 'loss/train': 1.454268217086792} +02/24/2022 13:00:41 - INFO - codeparrot_training - Step 8843: {'lr': 0.0004753425263508001, 'samples': 4528128, 'steps': 8843, 'loss/train': 2.5899415016174316} +02/24/2022 13:00:47 - INFO - codeparrot_training - Step 8844: {'lr': 0.0004753354401110809, 'samples': 4528640, 'steps': 8844, 'loss/train': 1.742884635925293} +02/24/2022 13:00:51 - INFO - codeparrot_training - Step 8845: {'lr': 0.00047532835290609623, 'samples': 4529152, 'steps': 8845, 'loss/train': 1.3017199039459229} +02/24/2022 13:00:54 - INFO - codeparrot_training - Step 8846: {'lr': 0.00047532126473587635, 'samples': 4529664, 'steps': 8846, 'loss/train': 2.9739320278167725} +02/24/2022 13:01:00 - INFO - codeparrot_training - Step 8847: {'lr': 0.0004753141756004518, 'samples': 4530176, 'steps': 8847, 'loss/train': 0.5695855021476746} +02/24/2022 13:01:05 - INFO - codeparrot_training - Step 8848: {'lr': 0.00047530708549985287, 'samples': 4530688, 'steps': 8848, 'loss/train': 1.966842532157898} +02/24/2022 13:01:09 - INFO - codeparrot_training - Step 8849: {'lr': 0.00047529999443410986, 'samples': 4531200, 'steps': 8849, 'loss/train': 2.7894649505615234} +02/24/2022 13:01:14 - INFO - codeparrot_training - Step 8850: {'lr': 0.0004752929024032533, 'samples': 4531712, 'steps': 8850, 'loss/train': 2.735463857650757} +02/24/2022 13:01:18 - INFO - codeparrot_training - Step 8851: {'lr': 0.0004752858094073134, 'samples': 4532224, 'steps': 8851, 'loss/train': 3.1281850337982178} +02/24/2022 13:01:23 - INFO - codeparrot_training - Step 8852: {'lr': 0.0004752787154463207, 'samples': 4532736, 'steps': 8852, 'loss/train': 1.266960620880127} +02/24/2022 13:01:27 - INFO - codeparrot_training - Step 8853: {'lr': 0.0004752716205203055, 'samples': 4533248, 'steps': 8853, 'loss/train': 2.5410046577453613} +02/24/2022 13:01:33 - INFO - codeparrot_training - Step 8854: {'lr': 0.0004752645246292982, 'samples': 4533760, 'steps': 8854, 'loss/train': 2.573962450027466} +02/24/2022 13:01:37 - INFO - codeparrot_training - Step 8855: {'lr': 0.0004752574277733292, 'samples': 4534272, 'steps': 8855, 'loss/train': 2.521437406539917} +02/24/2022 13:01:40 - INFO - codeparrot_training - Step 8856: {'lr': 0.0004752503299524289, 'samples': 4534784, 'steps': 8856, 'loss/train': 2.4318201541900635} +02/24/2022 13:01:46 - INFO - codeparrot_training - Step 8857: {'lr': 0.0004752432311666277, 'samples': 4535296, 'steps': 8857, 'loss/train': 2.0192575454711914} +02/24/2022 13:01:49 - INFO - codeparrot_training - Step 8858: {'lr': 0.0004752361314159561, 'samples': 4535808, 'steps': 8858, 'loss/train': 3.0350465774536133} +02/24/2022 13:01:55 - INFO - codeparrot_training - Step 8859: {'lr': 0.0004752290307004444, 'samples': 4536320, 'steps': 8859, 'loss/train': 2.6406280994415283} +02/24/2022 13:01:58 - INFO - codeparrot_training - Step 8860: {'lr': 0.000475221929020123, 'samples': 4536832, 'steps': 8860, 'loss/train': 1.559017300605774} +02/24/2022 13:02:04 - INFO - codeparrot_training - Step 8861: {'lr': 0.00047521482637502246, 'samples': 4537344, 'steps': 8861, 'loss/train': 1.950171709060669} +02/24/2022 13:02:07 - INFO - codeparrot_training - Step 8862: {'lr': 0.00047520772276517297, 'samples': 4537856, 'steps': 8862, 'loss/train': 2.460857391357422} +02/24/2022 13:02:13 - INFO - codeparrot_training - Step 8863: {'lr': 0.0004752006181906052, 'samples': 4538368, 'steps': 8863, 'loss/train': 4.379079341888428} +02/24/2022 13:02:16 - INFO - codeparrot_training - Step 8864: {'lr': 0.00047519351265134954, 'samples': 4538880, 'steps': 8864, 'loss/train': 1.812307357788086} +02/24/2022 13:02:23 - INFO - codeparrot_training - Step 8865: {'lr': 0.0004751864061474364, 'samples': 4539392, 'steps': 8865, 'loss/train': 2.5313527584075928} +02/24/2022 13:02:26 - INFO - codeparrot_training - Step 8866: {'lr': 0.000475179298678896, 'samples': 4539904, 'steps': 8866, 'loss/train': 1.605868935585022} +02/24/2022 13:02:32 - INFO - codeparrot_training - Step 8867: {'lr': 0.0004751721902457592, 'samples': 4540416, 'steps': 8867, 'loss/train': 2.56888484954834} +02/24/2022 13:02:35 - INFO - codeparrot_training - Step 8868: {'lr': 0.0004751650808480561, 'samples': 4540928, 'steps': 8868, 'loss/train': 2.6334800720214844} +02/24/2022 13:02:41 - INFO - codeparrot_training - Step 8869: {'lr': 0.00047515797048581734, 'samples': 4541440, 'steps': 8869, 'loss/train': 2.85691499710083} +02/24/2022 13:02:44 - INFO - codeparrot_training - Step 8870: {'lr': 0.00047515085915907334, 'samples': 4541952, 'steps': 8870, 'loss/train': 1.6506918668746948} +02/24/2022 13:02:50 - INFO - codeparrot_training - Step 8871: {'lr': 0.00047514374686785454, 'samples': 4542464, 'steps': 8871, 'loss/train': 2.445449113845825} +02/24/2022 13:02:53 - INFO - codeparrot_training - Step 8872: {'lr': 0.00047513663361219144, 'samples': 4542976, 'steps': 8872, 'loss/train': 2.0163402557373047} +02/24/2022 13:02:59 - INFO - codeparrot_training - Step 8873: {'lr': 0.00047512951939211447, 'samples': 4543488, 'steps': 8873, 'loss/train': 2.0149426460266113} +02/24/2022 13:03:02 - INFO - codeparrot_training - Step 8874: {'lr': 0.0004751224042076542, 'samples': 4544000, 'steps': 8874, 'loss/train': 2.9443156719207764} +02/24/2022 13:03:08 - INFO - codeparrot_training - Step 8875: {'lr': 0.0004751152880588409, 'samples': 4544512, 'steps': 8875, 'loss/train': 2.747853994369507} +02/24/2022 13:03:12 - INFO - codeparrot_training - Step 8876: {'lr': 0.00047510817094570526, 'samples': 4545024, 'steps': 8876, 'loss/train': 1.9711774587631226} +02/24/2022 13:03:17 - INFO - codeparrot_training - Step 8877: {'lr': 0.0004751010528682777, 'samples': 4545536, 'steps': 8877, 'loss/train': 2.7659058570861816} +02/24/2022 13:03:21 - INFO - codeparrot_training - Step 8878: {'lr': 0.0004750939338265887, 'samples': 4546048, 'steps': 8878, 'loss/train': 2.2706782817840576} +02/24/2022 13:03:26 - INFO - codeparrot_training - Step 8879: {'lr': 0.0004750868138206688, 'samples': 4546560, 'steps': 8879, 'loss/train': 1.5981159210205078} +02/24/2022 13:03:30 - INFO - codeparrot_training - Step 8880: {'lr': 0.0004750796928505484, 'samples': 4547072, 'steps': 8880, 'loss/train': 2.0803921222686768} +02/24/2022 13:03:35 - INFO - codeparrot_training - Step 8881: {'lr': 0.0004750725709162581, 'samples': 4547584, 'steps': 8881, 'loss/train': 1.8010421991348267} +02/24/2022 13:03:39 - INFO - codeparrot_training - Step 8882: {'lr': 0.00047506544801782834, 'samples': 4548096, 'steps': 8882, 'loss/train': 2.5815365314483643} +02/24/2022 13:03:44 - INFO - codeparrot_training - Step 8883: {'lr': 0.00047505832415528973, 'samples': 4548608, 'steps': 8883, 'loss/train': 2.71773362159729} +02/24/2022 13:03:48 - INFO - codeparrot_training - Step 8884: {'lr': 0.0004750511993286727, 'samples': 4549120, 'steps': 8884, 'loss/train': 0.18191629648208618} +02/24/2022 13:03:54 - INFO - codeparrot_training - Step 8885: {'lr': 0.0004750440735380077, 'samples': 4549632, 'steps': 8885, 'loss/train': 2.0170774459838867} +02/24/2022 13:03:57 - INFO - codeparrot_training - Step 8886: {'lr': 0.00047503694678332543, 'samples': 4550144, 'steps': 8886, 'loss/train': 2.072664499282837} +02/24/2022 13:04:03 - INFO - codeparrot_training - Step 8887: {'lr': 0.00047502981906465634, 'samples': 4550656, 'steps': 8887, 'loss/train': 2.4431796073913574} +02/24/2022 13:04:06 - INFO - codeparrot_training - Step 8888: {'lr': 0.000475022690382031, 'samples': 4551168, 'steps': 8888, 'loss/train': 1.0116039514541626} +02/24/2022 13:04:12 - INFO - codeparrot_training - Step 8889: {'lr': 0.0004750155607354799, 'samples': 4551680, 'steps': 8889, 'loss/train': 2.342756986618042} +02/24/2022 13:04:15 - INFO - codeparrot_training - Step 8890: {'lr': 0.0004750084301250335, 'samples': 4552192, 'steps': 8890, 'loss/train': 2.442150831222534} +02/24/2022 13:04:22 - INFO - codeparrot_training - Step 8891: {'lr': 0.0004750012985507225, 'samples': 4552704, 'steps': 8891, 'loss/train': 2.6814463138580322} +02/24/2022 13:04:25 - INFO - codeparrot_training - Step 8892: {'lr': 0.0004749941660125774, 'samples': 4553216, 'steps': 8892, 'loss/train': 1.5645052194595337} +02/24/2022 13:04:31 - INFO - codeparrot_training - Step 8893: {'lr': 0.0004749870325106287, 'samples': 4553728, 'steps': 8893, 'loss/train': 2.6697940826416016} +02/24/2022 13:04:34 - INFO - codeparrot_training - Step 8894: {'lr': 0.00047497989804490693, 'samples': 4554240, 'steps': 8894, 'loss/train': 2.2933146953582764} +02/24/2022 13:04:40 - INFO - codeparrot_training - Step 8895: {'lr': 0.0004749727626154428, 'samples': 4554752, 'steps': 8895, 'loss/train': 2.0006628036499023} +02/24/2022 13:04:43 - INFO - codeparrot_training - Step 8896: {'lr': 0.0004749656262222668, 'samples': 4555264, 'steps': 8896, 'loss/train': 2.4212472438812256} +02/24/2022 13:04:49 - INFO - codeparrot_training - Step 8897: {'lr': 0.0004749584888654095, 'samples': 4555776, 'steps': 8897, 'loss/train': 2.1934757232666016} +02/24/2022 13:04:52 - INFO - codeparrot_training - Step 8898: {'lr': 0.0004749513505449014, 'samples': 4556288, 'steps': 8898, 'loss/train': 2.125549793243408} +02/24/2022 13:04:58 - INFO - codeparrot_training - Step 8899: {'lr': 0.00047494421126077313, 'samples': 4556800, 'steps': 8899, 'loss/train': 2.5988833904266357} +02/24/2022 13:05:01 - INFO - codeparrot_training - Step 8900: {'lr': 0.0004749370710130554, 'samples': 4557312, 'steps': 8900, 'loss/train': 2.2430381774902344} +02/24/2022 13:05:08 - INFO - codeparrot_training - Step 8901: {'lr': 0.0004749299298017786, 'samples': 4557824, 'steps': 8901, 'loss/train': 1.6011979579925537} +02/24/2022 13:05:11 - INFO - codeparrot_training - Step 8902: {'lr': 0.00047492278762697337, 'samples': 4558336, 'steps': 8902, 'loss/train': 2.3110179901123047} +02/24/2022 13:05:17 - INFO - codeparrot_training - Step 8903: {'lr': 0.0004749156444886704, 'samples': 4558848, 'steps': 8903, 'loss/train': 3.0487117767333984} +02/24/2022 13:05:20 - INFO - codeparrot_training - Step 8904: {'lr': 0.0004749085003869003, 'samples': 4559360, 'steps': 8904, 'loss/train': 0.6672154664993286} +02/24/2022 13:05:26 - INFO - codeparrot_training - Step 8905: {'lr': 0.00047490135532169347, 'samples': 4559872, 'steps': 8905, 'loss/train': 2.1986923217773438} +02/24/2022 13:05:29 - INFO - codeparrot_training - Step 8906: {'lr': 0.0004748942092930807, 'samples': 4560384, 'steps': 8906, 'loss/train': 2.151268720626831} +02/24/2022 13:05:35 - INFO - codeparrot_training - Step 8907: {'lr': 0.00047488706230109257, 'samples': 4560896, 'steps': 8907, 'loss/train': 2.0609793663024902} +02/24/2022 13:05:38 - INFO - codeparrot_training - Step 8908: {'lr': 0.00047487991434575963, 'samples': 4561408, 'steps': 8908, 'loss/train': 2.131075382232666} +02/24/2022 13:05:44 - INFO - codeparrot_training - Step 8909: {'lr': 0.0004748727654271126, 'samples': 4561920, 'steps': 8909, 'loss/train': 2.5970921516418457} +02/24/2022 13:05:47 - INFO - codeparrot_training - Step 8910: {'lr': 0.000474865615545182, 'samples': 4562432, 'steps': 8910, 'loss/train': 2.234992027282715} +02/24/2022 13:05:53 - INFO - codeparrot_training - Step 8911: {'lr': 0.0004748584646999985, 'samples': 4562944, 'steps': 8911, 'loss/train': 2.191291332244873} +02/24/2022 13:05:57 - INFO - codeparrot_training - Step 8912: {'lr': 0.0004748513128915928, 'samples': 4563456, 'steps': 8912, 'loss/train': 1.9498353004455566} +02/24/2022 13:06:02 - INFO - codeparrot_training - Step 8913: {'lr': 0.0004748441601199954, 'samples': 4563968, 'steps': 8913, 'loss/train': 2.004565715789795} +02/24/2022 13:06:06 - INFO - codeparrot_training - Step 8914: {'lr': 0.0004748370063852371, 'samples': 4564480, 'steps': 8914, 'loss/train': 3.04158353805542} +02/24/2022 13:06:11 - INFO - codeparrot_training - Step 8915: {'lr': 0.0004748298516873484, 'samples': 4564992, 'steps': 8915, 'loss/train': 2.4030370712280273} +02/24/2022 13:06:15 - INFO - codeparrot_training - Step 8916: {'lr': 0.00047482269602636, 'samples': 4565504, 'steps': 8916, 'loss/train': 2.060742139816284} +02/24/2022 13:06:20 - INFO - codeparrot_training - Step 8917: {'lr': 0.00047481553940230257, 'samples': 4566016, 'steps': 8917, 'loss/train': 2.0784223079681396} +02/24/2022 13:06:24 - INFO - codeparrot_training - Step 8918: {'lr': 0.0004748083818152067, 'samples': 4566528, 'steps': 8918, 'loss/train': 2.1121463775634766} +02/24/2022 13:06:29 - INFO - codeparrot_training - Step 8919: {'lr': 0.00047480122326510325, 'samples': 4567040, 'steps': 8919, 'loss/train': 2.3299055099487305} +02/24/2022 13:06:33 - INFO - codeparrot_training - Step 8920: {'lr': 0.0004747940637520226, 'samples': 4567552, 'steps': 8920, 'loss/train': 1.8456697463989258} +02/24/2022 13:06:39 - INFO - codeparrot_training - Step 8921: {'lr': 0.0004747869032759956, 'samples': 4568064, 'steps': 8921, 'loss/train': 3.1344287395477295} +02/24/2022 13:06:42 - INFO - codeparrot_training - Step 8922: {'lr': 0.00047477974183705293, 'samples': 4568576, 'steps': 8922, 'loss/train': 2.278343439102173} +02/24/2022 13:06:48 - INFO - codeparrot_training - Step 8923: {'lr': 0.0004747725794352252, 'samples': 4569088, 'steps': 8923, 'loss/train': 2.763631582260132} +02/24/2022 13:06:52 - INFO - codeparrot_training - Step 8924: {'lr': 0.00047476541607054313, 'samples': 4569600, 'steps': 8924, 'loss/train': 3.3006646633148193} +02/24/2022 13:06:57 - INFO - codeparrot_training - Step 8925: {'lr': 0.0004747582517430373, 'samples': 4570112, 'steps': 8925, 'loss/train': 1.9184359312057495} +02/24/2022 13:07:01 - INFO - codeparrot_training - Step 8926: {'lr': 0.00047475108645273856, 'samples': 4570624, 'steps': 8926, 'loss/train': 1.8225644826889038} +02/24/2022 13:07:06 - INFO - codeparrot_training - Step 8927: {'lr': 0.00047474392019967754, 'samples': 4571136, 'steps': 8927, 'loss/train': 2.553635358810425} +02/24/2022 13:07:10 - INFO - codeparrot_training - Step 8928: {'lr': 0.0004747367529838849, 'samples': 4571648, 'steps': 8928, 'loss/train': 2.4356164932250977} +02/24/2022 13:07:15 - INFO - codeparrot_training - Step 8929: {'lr': 0.0004747295848053914, 'samples': 4572160, 'steps': 8929, 'loss/train': 1.031925082206726} +02/24/2022 13:07:18 - INFO - codeparrot_training - Step 8930: {'lr': 0.0004747224156642277, 'samples': 4572672, 'steps': 8930, 'loss/train': 2.750985860824585} +02/24/2022 13:07:24 - INFO - codeparrot_training - Step 8931: {'lr': 0.00047471524556042454, 'samples': 4573184, 'steps': 8931, 'loss/train': 1.6292858123779297} +02/24/2022 13:07:27 - INFO - codeparrot_training - Step 8932: {'lr': 0.00047470807449401264, 'samples': 4573696, 'steps': 8932, 'loss/train': 2.339555263519287} +02/24/2022 13:07:34 - INFO - codeparrot_training - Step 8933: {'lr': 0.0004747009024650227, 'samples': 4574208, 'steps': 8933, 'loss/train': 2.4546921253204346} +02/24/2022 13:07:37 - INFO - codeparrot_training - Step 8934: {'lr': 0.00047469372947348546, 'samples': 4574720, 'steps': 8934, 'loss/train': 2.2428476810455322} +02/24/2022 13:07:43 - INFO - codeparrot_training - Step 8935: {'lr': 0.0004746865555194315, 'samples': 4575232, 'steps': 8935, 'loss/train': 1.9881370067596436} +02/24/2022 13:07:46 - INFO - codeparrot_training - Step 8936: {'lr': 0.00047467938060289185, 'samples': 4575744, 'steps': 8936, 'loss/train': 0.6411017179489136} +02/24/2022 13:07:52 - INFO - codeparrot_training - Step 8937: {'lr': 0.00047467220472389694, 'samples': 4576256, 'steps': 8937, 'loss/train': 2.5050201416015625} +02/24/2022 13:07:56 - INFO - codeparrot_training - Step 8938: {'lr': 0.0004746650278824777, 'samples': 4576768, 'steps': 8938, 'loss/train': 2.3526217937469482} +02/24/2022 13:08:01 - INFO - codeparrot_training - Step 8939: {'lr': 0.00047465785007866487, 'samples': 4577280, 'steps': 8939, 'loss/train': 2.139683246612549} +02/24/2022 13:08:05 - INFO - codeparrot_training - Step 8940: {'lr': 0.00047465067131248907, 'samples': 4577792, 'steps': 8940, 'loss/train': 3.1311049461364746} +02/24/2022 13:08:08 - INFO - codeparrot_training - Step 8941: {'lr': 0.0004746434915839812, 'samples': 4578304, 'steps': 8941, 'loss/train': 2.407386302947998} +02/24/2022 13:08:14 - INFO - codeparrot_training - Step 8942: {'lr': 0.00047463631089317195, 'samples': 4578816, 'steps': 8942, 'loss/train': 1.269031047821045} +02/24/2022 13:08:19 - INFO - codeparrot_training - Step 8943: {'lr': 0.000474629129240092, 'samples': 4579328, 'steps': 8943, 'loss/train': 1.9832195043563843} +02/24/2022 13:08:23 - INFO - codeparrot_training - Step 8944: {'lr': 0.0004746219466247722, 'samples': 4579840, 'steps': 8944, 'loss/train': 2.5644195079803467} +02/24/2022 13:08:28 - INFO - codeparrot_training - Step 8945: {'lr': 0.0004746147630472434, 'samples': 4580352, 'steps': 8945, 'loss/train': 4.009216785430908} +02/24/2022 13:08:32 - INFO - codeparrot_training - Step 8946: {'lr': 0.00047460757850753614, 'samples': 4580864, 'steps': 8946, 'loss/train': 2.1093924045562744} +02/24/2022 13:08:38 - INFO - codeparrot_training - Step 8947: {'lr': 0.00047460039300568143, 'samples': 4581376, 'steps': 8947, 'loss/train': 1.8118352890014648} +02/24/2022 13:08:41 - INFO - codeparrot_training - Step 8948: {'lr': 0.0004745932065417099, 'samples': 4581888, 'steps': 8948, 'loss/train': 5.964253902435303} +02/24/2022 13:08:45 - INFO - codeparrot_training - Step 8949: {'lr': 0.00047458601911565246, 'samples': 4582400, 'steps': 8949, 'loss/train': 2.5903995037078857} +02/24/2022 13:08:51 - INFO - codeparrot_training - Step 8950: {'lr': 0.0004745788307275398, 'samples': 4582912, 'steps': 8950, 'loss/train': 2.4588711261749268} +02/24/2022 13:08:54 - INFO - codeparrot_training - Step 8951: {'lr': 0.0004745716413774027, 'samples': 4583424, 'steps': 8951, 'loss/train': 1.8251460790634155} +02/24/2022 13:09:00 - INFO - codeparrot_training - Step 8952: {'lr': 0.000474564451065272, 'samples': 4583936, 'steps': 8952, 'loss/train': 1.827713966369629} +02/24/2022 13:09:04 - INFO - codeparrot_training - Step 8953: {'lr': 0.00047455725979117855, 'samples': 4584448, 'steps': 8953, 'loss/train': 1.045904517173767} +02/24/2022 13:09:09 - INFO - codeparrot_training - Step 8954: {'lr': 0.00047455006755515306, 'samples': 4584960, 'steps': 8954, 'loss/train': 2.504101276397705} +02/24/2022 13:09:13 - INFO - codeparrot_training - Step 8955: {'lr': 0.00047454287435722643, 'samples': 4585472, 'steps': 8955, 'loss/train': 2.354496955871582} +02/24/2022 13:09:18 - INFO - codeparrot_training - Step 8956: {'lr': 0.00047453568019742936, 'samples': 4585984, 'steps': 8956, 'loss/train': 3.8351099491119385} +02/24/2022 13:09:22 - INFO - codeparrot_training - Step 8957: {'lr': 0.0004745284850757928, 'samples': 4586496, 'steps': 8957, 'loss/train': 2.2186691761016846} +02/24/2022 13:09:28 - INFO - codeparrot_training - Step 8958: {'lr': 0.00047452128899234746, 'samples': 4587008, 'steps': 8958, 'loss/train': 2.7836594581604004} +02/24/2022 13:09:31 - INFO - codeparrot_training - Step 8959: {'lr': 0.0004745140919471243, 'samples': 4587520, 'steps': 8959, 'loss/train': 3.1200828552246094} +02/24/2022 13:09:37 - INFO - codeparrot_training - Step 8960: {'lr': 0.0004745068939401539, 'samples': 4588032, 'steps': 8960, 'loss/train': 0.8317255973815918} +02/24/2022 13:09:40 - INFO - codeparrot_training - Step 8961: {'lr': 0.0004744996949714674, 'samples': 4588544, 'steps': 8961, 'loss/train': 3.034991979598999} +02/24/2022 13:09:46 - INFO - codeparrot_training - Step 8962: {'lr': 0.0004744924950410954, 'samples': 4589056, 'steps': 8962, 'loss/train': 1.7224841117858887} +02/24/2022 13:09:49 - INFO - codeparrot_training - Step 8963: {'lr': 0.0004744852941490689, 'samples': 4589568, 'steps': 8963, 'loss/train': 2.6999459266662598} +02/24/2022 13:09:55 - INFO - codeparrot_training - Step 8964: {'lr': 0.0004744780922954186, 'samples': 4590080, 'steps': 8964, 'loss/train': 2.097709894180298} +02/24/2022 13:09:59 - INFO - codeparrot_training - Step 8965: {'lr': 0.00047447088948017555, 'samples': 4590592, 'steps': 8965, 'loss/train': 2.6860897541046143} +02/24/2022 13:10:04 - INFO - codeparrot_training - Step 8966: {'lr': 0.0004744636857033704, 'samples': 4591104, 'steps': 8966, 'loss/train': 3.8114237785339355} +02/24/2022 13:10:08 - INFO - codeparrot_training - Step 8967: {'lr': 0.00047445648096503413, 'samples': 4591616, 'steps': 8967, 'loss/train': 2.8054850101470947} +02/24/2022 13:10:11 - INFO - codeparrot_training - Step 8968: {'lr': 0.00047444927526519757, 'samples': 4592128, 'steps': 8968, 'loss/train': 2.804905414581299} +02/24/2022 13:10:17 - INFO - codeparrot_training - Step 8969: {'lr': 0.00047444206860389155, 'samples': 4592640, 'steps': 8969, 'loss/train': 2.3340671062469482} +02/24/2022 13:10:23 - INFO - codeparrot_training - Step 8970: {'lr': 0.00047443486098114703, 'samples': 4593152, 'steps': 8970, 'loss/train': 1.1994097232818604} +02/24/2022 13:10:26 - INFO - codeparrot_training - Step 8971: {'lr': 0.0004744276523969948, 'samples': 4593664, 'steps': 8971, 'loss/train': 2.310925006866455} +02/24/2022 13:10:32 - INFO - codeparrot_training - Step 8972: {'lr': 0.0004744204428514658, 'samples': 4594176, 'steps': 8972, 'loss/train': 1.7849228382110596} +02/24/2022 13:10:35 - INFO - codeparrot_training - Step 8973: {'lr': 0.0004744132323445908, 'samples': 4594688, 'steps': 8973, 'loss/train': 1.676094651222229} +02/24/2022 13:10:41 - INFO - codeparrot_training - Step 8974: {'lr': 0.00047440602087640084, 'samples': 4595200, 'steps': 8974, 'loss/train': 2.3776402473449707} +02/24/2022 13:10:44 - INFO - codeparrot_training - Step 8975: {'lr': 0.0004743988084469267, 'samples': 4595712, 'steps': 8975, 'loss/train': 2.1946041584014893} +02/24/2022 13:10:50 - INFO - codeparrot_training - Step 8976: {'lr': 0.00047439159505619936, 'samples': 4596224, 'steps': 8976, 'loss/train': 1.8234540224075317} +02/24/2022 13:10:53 - INFO - codeparrot_training - Step 8977: {'lr': 0.0004743843807042497, 'samples': 4596736, 'steps': 8977, 'loss/train': 2.3686439990997314} +02/24/2022 13:10:59 - INFO - codeparrot_training - Step 8978: {'lr': 0.0004743771653911086, 'samples': 4597248, 'steps': 8978, 'loss/train': 2.1785380840301514} +02/24/2022 13:11:02 - INFO - codeparrot_training - Step 8979: {'lr': 0.00047436994911680694, 'samples': 4597760, 'steps': 8979, 'loss/train': 2.5101404190063477} +02/24/2022 13:11:08 - INFO - codeparrot_training - Step 8980: {'lr': 0.0004743627318813757, 'samples': 4598272, 'steps': 8980, 'loss/train': 1.986647129058838} +02/24/2022 13:11:11 - INFO - codeparrot_training - Step 8981: {'lr': 0.00047435551368484567, 'samples': 4598784, 'steps': 8981, 'loss/train': 1.4231780767440796} +02/24/2022 13:11:17 - INFO - codeparrot_training - Step 8982: {'lr': 0.00047434829452724795, 'samples': 4599296, 'steps': 8982, 'loss/train': 3.5546858310699463} +02/24/2022 13:11:20 - INFO - codeparrot_training - Step 8983: {'lr': 0.00047434107440861336, 'samples': 4599808, 'steps': 8983, 'loss/train': 0.21147406101226807} +02/24/2022 13:11:26 - INFO - codeparrot_training - Step 8984: {'lr': 0.0004743338533289728, 'samples': 4600320, 'steps': 8984, 'loss/train': 2.249940872192383} +02/24/2022 13:11:30 - INFO - codeparrot_training - Step 8985: {'lr': 0.00047432663128835727, 'samples': 4600832, 'steps': 8985, 'loss/train': 1.261738896369934} +02/24/2022 13:11:35 - INFO - codeparrot_training - Step 8986: {'lr': 0.0004743194082867977, 'samples': 4601344, 'steps': 8986, 'loss/train': 2.7038543224334717} +02/24/2022 13:11:39 - INFO - codeparrot_training - Step 8987: {'lr': 0.000474312184324325, 'samples': 4601856, 'steps': 8987, 'loss/train': 2.480295419692993} +02/24/2022 13:11:44 - INFO - codeparrot_training - Step 8988: {'lr': 0.0004743049594009701, 'samples': 4602368, 'steps': 8988, 'loss/train': 1.866456151008606} +02/24/2022 13:11:48 - INFO - codeparrot_training - Step 8989: {'lr': 0.0004742977335167641, 'samples': 4602880, 'steps': 8989, 'loss/train': 2.4427878856658936} +02/24/2022 13:11:53 - INFO - codeparrot_training - Step 8990: {'lr': 0.0004742905066717377, 'samples': 4603392, 'steps': 8990, 'loss/train': 2.4546449184417725} +02/24/2022 13:11:57 - INFO - codeparrot_training - Step 8991: {'lr': 0.00047428327886592204, 'samples': 4603904, 'steps': 8991, 'loss/train': 2.667329788208008} +02/24/2022 13:12:02 - INFO - codeparrot_training - Step 8992: {'lr': 0.00047427605009934805, 'samples': 4604416, 'steps': 8992, 'loss/train': 2.1340625286102295} +02/24/2022 13:12:06 - INFO - codeparrot_training - Step 8993: {'lr': 0.00047426882037204663, 'samples': 4604928, 'steps': 8993, 'loss/train': 2.5201327800750732} +02/24/2022 13:12:12 - INFO - codeparrot_training - Step 8994: {'lr': 0.0004742615896840488, 'samples': 4605440, 'steps': 8994, 'loss/train': 2.1459014415740967} +02/24/2022 13:12:16 - INFO - codeparrot_training - Step 8995: {'lr': 0.00047425435803538554, 'samples': 4605952, 'steps': 8995, 'loss/train': 2.3032312393188477} +02/24/2022 13:12:21 - INFO - codeparrot_training - Step 8996: {'lr': 0.0004742471254260878, 'samples': 4606464, 'steps': 8996, 'loss/train': 1.379835844039917} +02/24/2022 13:12:24 - INFO - codeparrot_training - Step 8997: {'lr': 0.00047423989185618666, 'samples': 4606976, 'steps': 8997, 'loss/train': 2.6440694332122803} +02/24/2022 13:12:30 - INFO - codeparrot_training - Step 8998: {'lr': 0.00047423265732571295, 'samples': 4607488, 'steps': 8998, 'loss/train': 2.1104156970977783} +02/24/2022 13:12:34 - INFO - codeparrot_training - Step 8999: {'lr': 0.00047422542183469775, 'samples': 4608000, 'steps': 8999, 'loss/train': 3.3685567378997803} +02/24/2022 13:12:34 - INFO - codeparrot_training - Evaluating and saving model checkpoint