diff --git "a/log/debug_0.log" "b/log/debug_0.log" new file mode 100644--- /dev/null +++ "b/log/debug_0.log" @@ -0,0 +1,2012 @@ +01/27/2022 18:45:28 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/27/2022 18:45:29 - WARNING - huggingface_hub.repository - Revision `colorful-plasma-1` does not exist. Created and checked out branch `colorful-plasma-1`. +01/27/2022 18:45:29 - WARNING - huggingface_hub.repository - +01/27/2022 18:45:44 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/27/2022 18:45:45 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/27/2022 18:46:29 - INFO - codeparrot_training - Step 0: {'lr': 0.0, 'samples': 192, 'steps': 0, 'loss/train': 12.299906730651855} +01/27/2022 18:47:44 - INFO - codeparrot_training - Step 1: {'lr': 2.5e-07, 'samples': 384, 'steps': 1, 'loss/train': 12.254416465759277} +01/27/2022 18:48:57 - INFO - codeparrot_training - Step 2: {'lr': 5e-07, 'samples': 576, 'steps': 2, 'loss/train': 12.296040058135986} +01/27/2022 18:49:02 - INFO - codeparrot_training - Step 3: {'lr': 7.5e-07, 'samples': 768, 'steps': 3, 'loss/train': 12.258198738098145} +01/27/2022 18:49:08 - INFO - codeparrot_training - Step 4: {'lr': 1e-06, 'samples': 960, 'steps': 4, 'loss/train': 12.26336145401001} +01/27/2022 18:49:12 - INFO - codeparrot_training - Step 5: {'lr': 1.25e-06, 'samples': 1152, 'steps': 5, 'loss/train': 12.287298202514648} +01/27/2022 18:49:17 - INFO - codeparrot_training - Step 6: {'lr': 1.5e-06, 'samples': 1344, 'steps': 6, 'loss/train': 12.187986373901367} +01/27/2022 18:49:21 - INFO - codeparrot_training - Step 7: {'lr': 1.75e-06, 'samples': 1536, 'steps': 7, 'loss/train': 12.255367755889893} +01/27/2022 18:49:25 - INFO - codeparrot_training - Step 8: {'lr': 2e-06, 'samples': 1728, 'steps': 8, 'loss/train': 12.240334510803223} +01/27/2022 18:49:30 - INFO - codeparrot_training - Step 9: {'lr': 2.25e-06, 'samples': 1920, 'steps': 9, 'loss/train': 12.035845756530762} +01/27/2022 18:49:34 - INFO - codeparrot_training - Step 10: {'lr': 2.5e-06, 'samples': 2112, 'steps': 10, 'loss/train': 12.013946056365967} +01/27/2022 18:49:38 - INFO - codeparrot_training - Step 11: {'lr': 2.75e-06, 'samples': 2304, 'steps': 11, 'loss/train': 12.014110565185547} +01/27/2022 18:49:43 - INFO - codeparrot_training - Step 12: {'lr': 3e-06, 'samples': 2496, 'steps': 12, 'loss/train': 11.882771015167236} +01/27/2022 18:49:47 - INFO - codeparrot_training - Step 13: {'lr': 3.25e-06, 'samples': 2688, 'steps': 13, 'loss/train': 11.914687871932983} +01/27/2022 18:49:53 - INFO - codeparrot_training - Step 14: {'lr': 3.5e-06, 'samples': 2880, 'steps': 14, 'loss/train': 11.83001446723938} +01/27/2022 18:49:57 - INFO - codeparrot_training - Step 15: {'lr': 3.75e-06, 'samples': 3072, 'steps': 15, 'loss/train': 11.810596704483032} +01/27/2022 18:50:01 - INFO - codeparrot_training - Step 16: {'lr': 4e-06, 'samples': 3264, 'steps': 16, 'loss/train': 11.734410524368286} +01/27/2022 18:50:05 - INFO - codeparrot_training - Step 17: {'lr': 4.250000000000001e-06, 'samples': 3456, 'steps': 17, 'loss/train': 11.65330696105957} +01/27/2022 18:50:10 - INFO - codeparrot_training - Step 18: {'lr': 4.5e-06, 'samples': 3648, 'steps': 18, 'loss/train': 11.715793132781982} +01/27/2022 18:50:15 - INFO - codeparrot_training - Step 19: {'lr': 4.75e-06, 'samples': 3840, 'steps': 19, 'loss/train': 11.387081623077393} +01/27/2022 18:50:19 - INFO - codeparrot_training - Step 20: {'lr': 5e-06, 'samples': 4032, 'steps': 20, 'loss/train': 11.467334747314453} +01/27/2022 18:50:23 - INFO - codeparrot_training - Step 21: {'lr': 5.2500000000000006e-06, 'samples': 4224, 'steps': 21, 'loss/train': 11.103084325790405} +01/27/2022 18:50:27 - INFO - codeparrot_training - Step 22: {'lr': 5.5e-06, 'samples': 4416, 'steps': 22, 'loss/train': 11.488559246063232} +01/27/2022 18:50:31 - INFO - codeparrot_training - Step 23: {'lr': 5.75e-06, 'samples': 4608, 'steps': 23, 'loss/train': 11.6660635471344} +01/27/2022 18:50:37 - INFO - codeparrot_training - Step 24: {'lr': 6e-06, 'samples': 4800, 'steps': 24, 'loss/train': 10.99052095413208} +01/27/2022 18:50:41 - INFO - codeparrot_training - Step 25: {'lr': 6.25e-06, 'samples': 4992, 'steps': 25, 'loss/train': 11.491893768310547} +01/27/2022 18:50:45 - INFO - codeparrot_training - Step 26: {'lr': 6.5e-06, 'samples': 5184, 'steps': 26, 'loss/train': 11.300984144210815} +01/27/2022 18:50:49 - INFO - codeparrot_training - Step 27: {'lr': 6.75e-06, 'samples': 5376, 'steps': 27, 'loss/train': 11.552963733673096} +01/27/2022 18:50:53 - INFO - codeparrot_training - Step 28: {'lr': 7e-06, 'samples': 5568, 'steps': 28, 'loss/train': 10.592391729354858} +01/27/2022 18:51:00 - INFO - codeparrot_training - Step 29: {'lr': 7.250000000000001e-06, 'samples': 5760, 'steps': 29, 'loss/train': 10.966165781021118} +01/27/2022 18:51:04 - INFO - codeparrot_training - Step 30: {'lr': 7.5e-06, 'samples': 5952, 'steps': 30, 'loss/train': 11.093848943710327} +01/27/2022 18:51:08 - INFO - codeparrot_training - Step 31: {'lr': 7.75e-06, 'samples': 6144, 'steps': 31, 'loss/train': 11.246557474136353} +01/27/2022 18:51:12 - INFO - codeparrot_training - Step 32: {'lr': 8e-06, 'samples': 6336, 'steps': 32, 'loss/train': 10.572290897369385} +01/27/2022 18:51:16 - INFO - codeparrot_training - Step 33: {'lr': 8.25e-06, 'samples': 6528, 'steps': 33, 'loss/train': 10.75559949874878} +01/27/2022 18:51:22 - INFO - codeparrot_training - Step 34: {'lr': 8.500000000000002e-06, 'samples': 6720, 'steps': 34, 'loss/train': 11.311209440231323} +01/27/2022 18:51:26 - INFO - codeparrot_training - Step 35: {'lr': 8.750000000000001e-06, 'samples': 6912, 'steps': 35, 'loss/train': 10.82191014289856} +01/27/2022 18:51:30 - INFO - codeparrot_training - Step 36: {'lr': 9e-06, 'samples': 7104, 'steps': 36, 'loss/train': 11.219964981079102} +01/27/2022 18:51:34 - INFO - codeparrot_training - Step 37: {'lr': 9.25e-06, 'samples': 7296, 'steps': 37, 'loss/train': 11.145844459533691} +01/27/2022 18:51:38 - INFO - codeparrot_training - Step 38: {'lr': 9.5e-06, 'samples': 7488, 'steps': 38, 'loss/train': 10.999979496002197} +01/27/2022 18:51:43 - INFO - codeparrot_training - Step 39: {'lr': 9.75e-06, 'samples': 7680, 'steps': 39, 'loss/train': 9.97054123878479} +01/27/2022 18:51:47 - INFO - codeparrot_training - Step 40: {'lr': 1e-05, 'samples': 7872, 'steps': 40, 'loss/train': 11.009016752243042} +01/27/2022 18:51:52 - INFO - codeparrot_training - Step 41: {'lr': 1.025e-05, 'samples': 8064, 'steps': 41, 'loss/train': 10.472270250320435} +01/27/2022 18:51:56 - INFO - codeparrot_training - Step 42: {'lr': 1.0500000000000001e-05, 'samples': 8256, 'steps': 42, 'loss/train': 10.424858093261719} +01/27/2022 18:52:00 - INFO - codeparrot_training - Step 43: {'lr': 1.0749999999999999e-05, 'samples': 8448, 'steps': 43, 'loss/train': 10.910295009613037} +01/27/2022 18:52:06 - INFO - codeparrot_training - Step 44: {'lr': 1.1e-05, 'samples': 8640, 'steps': 44, 'loss/train': 10.834372758865356} +01/27/2022 18:52:10 - INFO - codeparrot_training - Step 45: {'lr': 1.1249999999999999e-05, 'samples': 8832, 'steps': 45, 'loss/train': 11.367445707321167} +01/27/2022 18:52:15 - INFO - codeparrot_training - Step 46: {'lr': 1.15e-05, 'samples': 9024, 'steps': 46, 'loss/train': 10.566834926605225} +01/27/2022 18:52:19 - INFO - codeparrot_training - Step 47: {'lr': 1.1750000000000001e-05, 'samples': 9216, 'steps': 47, 'loss/train': 10.700668573379517} +01/27/2022 18:52:23 - INFO - codeparrot_training - Step 48: {'lr': 1.2e-05, 'samples': 9408, 'steps': 48, 'loss/train': 11.141412734985352} +01/27/2022 18:52:28 - INFO - codeparrot_training - Step 49: {'lr': 1.2250000000000001e-05, 'samples': 9600, 'steps': 49, 'loss/train': 11.026137113571167} +01/27/2022 18:52:32 - INFO - codeparrot_training - Step 50: {'lr': 1.25e-05, 'samples': 9792, 'steps': 50, 'loss/train': 11.223975419998169} +01/27/2022 18:52:36 - INFO - codeparrot_training - Step 51: {'lr': 1.275e-05, 'samples': 9984, 'steps': 51, 'loss/train': 10.629223108291626} +01/27/2022 18:52:40 - INFO - codeparrot_training - Step 52: {'lr': 1.3e-05, 'samples': 10176, 'steps': 52, 'loss/train': 10.660104274749756} +01/27/2022 18:52:45 - INFO - codeparrot_training - Step 53: {'lr': 1.325e-05, 'samples': 10368, 'steps': 53, 'loss/train': 10.400070190429688} +01/27/2022 18:52:50 - INFO - codeparrot_training - Step 54: {'lr': 1.35e-05, 'samples': 10560, 'steps': 54, 'loss/train': 11.049419403076172} +01/27/2022 18:52:54 - INFO - codeparrot_training - Step 55: {'lr': 1.375e-05, 'samples': 10752, 'steps': 55, 'loss/train': 10.336088418960571} +01/27/2022 18:52:58 - INFO - codeparrot_training - Step 56: {'lr': 1.4e-05, 'samples': 10944, 'steps': 56, 'loss/train': 11.02346920967102} +01/27/2022 18:53:02 - INFO - codeparrot_training - Step 57: {'lr': 1.425e-05, 'samples': 11136, 'steps': 57, 'loss/train': 10.886565685272217} +01/27/2022 18:53:06 - INFO - codeparrot_training - Step 58: {'lr': 1.4500000000000002e-05, 'samples': 11328, 'steps': 58, 'loss/train': 10.751394510269165} +01/27/2022 18:53:11 - INFO - codeparrot_training - Step 59: {'lr': 1.475e-05, 'samples': 11520, 'steps': 59, 'loss/train': 10.39724063873291} +01/27/2022 18:53:15 - INFO - codeparrot_training - Step 60: {'lr': 1.5e-05, 'samples': 11712, 'steps': 60, 'loss/train': 10.75915789604187} +01/27/2022 18:53:20 - INFO - codeparrot_training - Step 61: {'lr': 1.525e-05, 'samples': 11904, 'steps': 61, 'loss/train': 10.844947814941406} +01/27/2022 18:53:24 - INFO - codeparrot_training - Step 62: {'lr': 1.55e-05, 'samples': 12096, 'steps': 62, 'loss/train': 9.661353349685669} +01/27/2022 18:53:28 - INFO - codeparrot_training - Step 63: {'lr': 1.575e-05, 'samples': 12288, 'steps': 63, 'loss/train': 10.0857253074646} +01/27/2022 18:53:34 - INFO - codeparrot_training - Step 64: {'lr': 1.6e-05, 'samples': 12480, 'steps': 64, 'loss/train': 10.630635738372803} +01/27/2022 18:53:39 - INFO - codeparrot_training - Step 65: {'lr': 1.6250000000000002e-05, 'samples': 12672, 'steps': 65, 'loss/train': 11.000107526779175} +01/27/2022 18:53:43 - INFO - codeparrot_training - Step 66: {'lr': 1.65e-05, 'samples': 12864, 'steps': 66, 'loss/train': 10.835297584533691} +01/27/2022 18:53:47 - INFO - codeparrot_training - Step 67: {'lr': 1.675e-05, 'samples': 13056, 'steps': 67, 'loss/train': 10.615701913833618} +01/27/2022 18:53:51 - INFO - codeparrot_training - Step 68: {'lr': 1.7000000000000003e-05, 'samples': 13248, 'steps': 68, 'loss/train': 11.006195783615112} +01/27/2022 18:53:56 - INFO - codeparrot_training - Step 69: {'lr': 1.7250000000000003e-05, 'samples': 13440, 'steps': 69, 'loss/train': 11.055887460708618} +01/27/2022 18:54:00 - INFO - codeparrot_training - Step 70: {'lr': 1.7500000000000002e-05, 'samples': 13632, 'steps': 70, 'loss/train': 11.058547496795654} +01/27/2022 18:54:04 - INFO - codeparrot_training - Step 71: {'lr': 1.7749999999999998e-05, 'samples': 13824, 'steps': 71, 'loss/train': 11.03822636604309} +01/27/2022 18:54:09 - INFO - codeparrot_training - Step 72: {'lr': 1.8e-05, 'samples': 14016, 'steps': 72, 'loss/train': 10.50807809829712} +01/27/2022 18:54:13 - INFO - codeparrot_training - Step 73: {'lr': 1.825e-05, 'samples': 14208, 'steps': 73, 'loss/train': 10.766568660736084} +01/27/2022 18:54:19 - INFO - codeparrot_training - Step 74: {'lr': 1.85e-05, 'samples': 14400, 'steps': 74, 'loss/train': 10.553864479064941} +01/27/2022 18:54:23 - INFO - codeparrot_training - Step 75: {'lr': 1.875e-05, 'samples': 14592, 'steps': 75, 'loss/train': 10.21326470375061} +01/27/2022 18:54:27 - INFO - codeparrot_training - Step 76: {'lr': 1.9e-05, 'samples': 14784, 'steps': 76, 'loss/train': 10.913193941116333} +01/27/2022 18:54:32 - INFO - codeparrot_training - Step 77: {'lr': 1.925e-05, 'samples': 14976, 'steps': 77, 'loss/train': 11.222438335418701} +01/27/2022 18:54:37 - INFO - codeparrot_training - Step 78: {'lr': 1.95e-05, 'samples': 15168, 'steps': 78, 'loss/train': 11.160276889801025} +01/27/2022 18:54:41 - INFO - codeparrot_training - Step 79: {'lr': 1.975e-05, 'samples': 15360, 'steps': 79, 'loss/train': 10.69108772277832} +01/27/2022 18:54:45 - INFO - codeparrot_training - Step 80: {'lr': 2e-05, 'samples': 15552, 'steps': 80, 'loss/train': 11.075340270996094} +01/27/2022 18:54:49 - INFO - codeparrot_training - Step 81: {'lr': 2.025e-05, 'samples': 15744, 'steps': 81, 'loss/train': 10.32427453994751} +01/27/2022 18:54:53 - INFO - codeparrot_training - Step 82: {'lr': 2.05e-05, 'samples': 15936, 'steps': 82, 'loss/train': 11.106040477752686} +01/27/2022 18:54:57 - INFO - codeparrot_training - Step 83: {'lr': 2.0750000000000003e-05, 'samples': 16128, 'steps': 83, 'loss/train': 10.27891731262207} +01/27/2022 18:55:03 - INFO - codeparrot_training - Step 84: {'lr': 2.1000000000000002e-05, 'samples': 16320, 'steps': 84, 'loss/train': 10.17206597328186} +01/27/2022 18:55:07 - INFO - codeparrot_training - Step 85: {'lr': 2.125e-05, 'samples': 16512, 'steps': 85, 'loss/train': 10.83674669265747} +01/27/2022 18:55:11 - INFO - codeparrot_training - Step 86: {'lr': 2.1499999999999997e-05, 'samples': 16704, 'steps': 86, 'loss/train': 10.95323395729065} +01/27/2022 18:55:15 - INFO - codeparrot_training - Step 87: {'lr': 2.175e-05, 'samples': 16896, 'steps': 87, 'loss/train': 10.6928071975708} +01/27/2022 18:55:19 - INFO - codeparrot_training - Step 88: {'lr': 2.2e-05, 'samples': 17088, 'steps': 88, 'loss/train': 10.250412225723267} +01/27/2022 18:55:26 - INFO - codeparrot_training - Step 89: {'lr': 2.225e-05, 'samples': 17280, 'steps': 89, 'loss/train': 11.357476472854614} +01/27/2022 18:55:30 - INFO - codeparrot_training - Step 90: {'lr': 2.2499999999999998e-05, 'samples': 17472, 'steps': 90, 'loss/train': 10.97425103187561} +01/27/2022 18:55:34 - INFO - codeparrot_training - Step 91: {'lr': 2.275e-05, 'samples': 17664, 'steps': 91, 'loss/train': 10.546386480331421} +01/27/2022 18:55:38 - INFO - codeparrot_training - Step 92: {'lr': 2.3e-05, 'samples': 17856, 'steps': 92, 'loss/train': 10.648743152618408} +01/27/2022 18:55:43 - INFO - codeparrot_training - Step 93: {'lr': 2.325e-05, 'samples': 18048, 'steps': 93, 'loss/train': 10.244426965713501} +01/27/2022 18:55:48 - INFO - codeparrot_training - Step 94: {'lr': 2.3500000000000002e-05, 'samples': 18240, 'steps': 94, 'loss/train': 11.093276023864746} +01/27/2022 18:55:52 - INFO - codeparrot_training - Step 95: {'lr': 2.375e-05, 'samples': 18432, 'steps': 95, 'loss/train': 10.794178247451782} +01/27/2022 18:55:56 - INFO - codeparrot_training - Step 96: {'lr': 2.4e-05, 'samples': 18624, 'steps': 96, 'loss/train': 10.56095266342163} +01/27/2022 18:56:00 - INFO - codeparrot_training - Step 97: {'lr': 2.425e-05, 'samples': 18816, 'steps': 97, 'loss/train': 9.840100049972534} +01/27/2022 18:56:05 - INFO - codeparrot_training - Step 98: {'lr': 2.4500000000000003e-05, 'samples': 19008, 'steps': 98, 'loss/train': 10.543049097061157} +01/27/2022 18:56:09 - INFO - codeparrot_training - Step 99: {'lr': 2.4750000000000002e-05, 'samples': 19200, 'steps': 99, 'loss/train': 10.238038301467896} +01/27/2022 18:56:14 - INFO - codeparrot_training - Step 100: {'lr': 2.5e-05, 'samples': 19392, 'steps': 100, 'loss/train': 10.771350145339966} +01/27/2022 18:56:18 - INFO - codeparrot_training - Step 101: {'lr': 2.525e-05, 'samples': 19584, 'steps': 101, 'loss/train': 10.056172370910645} +01/27/2022 18:56:22 - INFO - codeparrot_training - Step 102: {'lr': 2.55e-05, 'samples': 19776, 'steps': 102, 'loss/train': 10.980010986328125} +01/27/2022 18:56:28 - INFO - codeparrot_training - Step 103: {'lr': 2.575e-05, 'samples': 19968, 'steps': 103, 'loss/train': 10.886353969573975} +01/27/2022 18:56:32 - INFO - codeparrot_training - Step 104: {'lr': 2.6e-05, 'samples': 20160, 'steps': 104, 'loss/train': 10.946210861206055} +01/27/2022 18:56:36 - INFO - codeparrot_training - Step 105: {'lr': 2.625e-05, 'samples': 20352, 'steps': 105, 'loss/train': 10.287086963653564} +01/27/2022 18:56:41 - INFO - codeparrot_training - Step 106: {'lr': 2.65e-05, 'samples': 20544, 'steps': 106, 'loss/train': 10.691130638122559} +01/27/2022 18:56:45 - INFO - codeparrot_training - Step 107: {'lr': 2.675e-05, 'samples': 20736, 'steps': 107, 'loss/train': 10.635468006134033} +01/27/2022 18:56:50 - INFO - codeparrot_training - Step 108: {'lr': 2.7e-05, 'samples': 20928, 'steps': 108, 'loss/train': 10.396392345428467} +01/27/2022 18:56:54 - INFO - codeparrot_training - Step 109: {'lr': 2.725e-05, 'samples': 21120, 'steps': 109, 'loss/train': 11.198972940444946} +01/27/2022 18:56:58 - INFO - codeparrot_training - Step 110: {'lr': 2.75e-05, 'samples': 21312, 'steps': 110, 'loss/train': 10.812159061431885} +01/27/2022 18:57:03 - INFO - codeparrot_training - Step 111: {'lr': 2.775e-05, 'samples': 21504, 'steps': 111, 'loss/train': 10.239061117172241} +01/27/2022 18:57:07 - INFO - codeparrot_training - Step 112: {'lr': 2.8e-05, 'samples': 21696, 'steps': 112, 'loss/train': 10.685145378112793} +01/27/2022 18:57:14 - INFO - codeparrot_training - Step 113: {'lr': 2.8250000000000002e-05, 'samples': 21888, 'steps': 113, 'loss/train': 10.541263103485107} +01/27/2022 18:57:18 - INFO - codeparrot_training - Step 114: {'lr': 2.85e-05, 'samples': 22080, 'steps': 114, 'loss/train': 10.467995882034302} +01/27/2022 18:57:22 - INFO - codeparrot_training - Step 115: {'lr': 2.875e-05, 'samples': 22272, 'steps': 115, 'loss/train': 10.09206748008728} +01/27/2022 18:57:26 - INFO - codeparrot_training - Step 116: {'lr': 2.9000000000000004e-05, 'samples': 22464, 'steps': 116, 'loss/train': 9.689193964004517} +01/27/2022 18:57:30 - INFO - codeparrot_training - Step 117: {'lr': 2.9250000000000003e-05, 'samples': 22656, 'steps': 117, 'loss/train': 9.968210935592651} +01/27/2022 18:57:34 - INFO - codeparrot_training - Step 118: {'lr': 2.95e-05, 'samples': 22848, 'steps': 118, 'loss/train': 10.019670009613037} +01/27/2022 18:57:39 - INFO - codeparrot_training - Step 119: {'lr': 2.9749999999999998e-05, 'samples': 23040, 'steps': 119, 'loss/train': 9.76928186416626} +01/27/2022 18:57:44 - INFO - codeparrot_training - Step 120: {'lr': 3e-05, 'samples': 23232, 'steps': 120, 'loss/train': 9.688311338424683} +01/27/2022 18:57:48 - INFO - codeparrot_training - Step 121: {'lr': 3.025e-05, 'samples': 23424, 'steps': 121, 'loss/train': 10.486538171768188} +01/27/2022 18:57:52 - INFO - codeparrot_training - Step 122: {'lr': 3.05e-05, 'samples': 23616, 'steps': 122, 'loss/train': 10.556225538253784} +01/27/2022 18:57:56 - INFO - codeparrot_training - Step 123: {'lr': 3.075e-05, 'samples': 23808, 'steps': 123, 'loss/train': 9.987429141998291} +01/27/2022 18:58:01 - INFO - codeparrot_training - Step 124: {'lr': 3.1e-05, 'samples': 24000, 'steps': 124, 'loss/train': 9.408846616744995} +01/27/2022 18:58:05 - INFO - codeparrot_training - Step 125: {'lr': 3.125e-05, 'samples': 24192, 'steps': 125, 'loss/train': 9.847419261932373} +01/27/2022 18:58:09 - INFO - codeparrot_training - Step 126: {'lr': 3.15e-05, 'samples': 24384, 'steps': 126, 'loss/train': 10.11429762840271} +01/27/2022 18:58:14 - INFO - codeparrot_training - Step 127: {'lr': 3.175e-05, 'samples': 24576, 'steps': 127, 'loss/train': 10.277008295059204} +01/27/2022 18:58:18 - INFO - codeparrot_training - Step 128: {'lr': 3.2e-05, 'samples': 24768, 'steps': 128, 'loss/train': 9.601389169692993} +01/27/2022 18:58:23 - INFO - codeparrot_training - Step 129: {'lr': 3.2250000000000005e-05, 'samples': 24960, 'steps': 129, 'loss/train': 10.083940029144287} +01/27/2022 18:58:27 - INFO - codeparrot_training - Step 130: {'lr': 3.2500000000000004e-05, 'samples': 25152, 'steps': 130, 'loss/train': 10.465167760848999} +01/27/2022 18:58:31 - INFO - codeparrot_training - Step 131: {'lr': 3.275e-05, 'samples': 25344, 'steps': 131, 'loss/train': 10.400367021560669} +01/27/2022 18:58:35 - INFO - codeparrot_training - Step 132: {'lr': 3.3e-05, 'samples': 25536, 'steps': 132, 'loss/train': 9.513238191604614} +01/27/2022 18:58:39 - INFO - codeparrot_training - Step 133: {'lr': 3.325e-05, 'samples': 25728, 'steps': 133, 'loss/train': 9.972669124603271} +01/27/2022 18:58:46 - INFO - codeparrot_training - Step 134: {'lr': 3.35e-05, 'samples': 25920, 'steps': 134, 'loss/train': 11.59807276725769} +01/27/2022 18:58:50 - INFO - codeparrot_training - Step 135: {'lr': 3.375e-05, 'samples': 26112, 'steps': 135, 'loss/train': 10.637120962142944} +01/27/2022 18:58:54 - INFO - codeparrot_training - Step 136: {'lr': 3.4000000000000007e-05, 'samples': 26304, 'steps': 136, 'loss/train': 9.751748085021973} +01/27/2022 18:58:59 - INFO - codeparrot_training - Step 137: {'lr': 3.4250000000000006e-05, 'samples': 26496, 'steps': 137, 'loss/train': 8.839701890945435} +01/27/2022 18:59:03 - INFO - codeparrot_training - Step 138: {'lr': 3.4500000000000005e-05, 'samples': 26688, 'steps': 138, 'loss/train': 10.428051710128784} +01/27/2022 18:59:08 - INFO - codeparrot_training - Step 139: {'lr': 3.4750000000000004e-05, 'samples': 26880, 'steps': 139, 'loss/train': 10.133004426956177} +01/27/2022 18:59:12 - INFO - codeparrot_training - Step 140: {'lr': 3.5000000000000004e-05, 'samples': 27072, 'steps': 140, 'loss/train': 6.793606281280518} +01/27/2022 18:59:16 - INFO - codeparrot_training - Step 141: {'lr': 3.5249999999999996e-05, 'samples': 27264, 'steps': 141, 'loss/train': 8.847809314727783} +01/27/2022 18:59:20 - INFO - codeparrot_training - Step 142: {'lr': 3.5499999999999996e-05, 'samples': 27456, 'steps': 142, 'loss/train': 10.743701219558716} +01/27/2022 18:59:24 - INFO - codeparrot_training - Step 143: {'lr': 3.5749999999999995e-05, 'samples': 27648, 'steps': 143, 'loss/train': 10.090747833251953} +01/27/2022 18:59:30 - INFO - codeparrot_training - Step 144: {'lr': 3.6e-05, 'samples': 27840, 'steps': 144, 'loss/train': 10.086535692214966} +01/27/2022 18:59:35 - INFO - codeparrot_training - Step 145: {'lr': 3.625e-05, 'samples': 28032, 'steps': 145, 'loss/train': 10.105350494384766} +01/27/2022 18:59:39 - INFO - codeparrot_training - Step 146: {'lr': 3.65e-05, 'samples': 28224, 'steps': 146, 'loss/train': 9.796883583068848} +01/27/2022 18:59:43 - INFO - codeparrot_training - Step 147: {'lr': 3.675e-05, 'samples': 28416, 'steps': 147, 'loss/train': 10.850471019744873} +01/27/2022 18:59:47 - INFO - codeparrot_training - Step 148: {'lr': 3.7e-05, 'samples': 28608, 'steps': 148, 'loss/train': 10.105576515197754} +01/27/2022 18:59:52 - INFO - codeparrot_training - Step 149: {'lr': 3.725e-05, 'samples': 28800, 'steps': 149, 'loss/train': 10.211700439453125} +01/27/2022 18:59:56 - INFO - codeparrot_training - Step 150: {'lr': 3.75e-05, 'samples': 28992, 'steps': 150, 'loss/train': 10.29758906364441} +01/27/2022 19:00:00 - INFO - codeparrot_training - Step 151: {'lr': 3.775e-05, 'samples': 29184, 'steps': 151, 'loss/train': 9.936643123626709} +01/27/2022 19:00:04 - INFO - codeparrot_training - Step 152: {'lr': 3.8e-05, 'samples': 29376, 'steps': 152, 'loss/train': 8.640991687774658} +01/27/2022 19:00:09 - INFO - codeparrot_training - Step 153: {'lr': 3.825e-05, 'samples': 29568, 'steps': 153, 'loss/train': 10.395740747451782} +01/27/2022 19:00:14 - INFO - codeparrot_training - Step 154: {'lr': 3.85e-05, 'samples': 29760, 'steps': 154, 'loss/train': 9.710185289382935} +01/27/2022 19:00:18 - INFO - codeparrot_training - Step 155: {'lr': 3.875e-05, 'samples': 29952, 'steps': 155, 'loss/train': 10.221109628677368} +01/27/2022 19:00:22 - INFO - codeparrot_training - Step 156: {'lr': 3.9e-05, 'samples': 30144, 'steps': 156, 'loss/train': 9.791813850402832} +01/27/2022 19:00:26 - INFO - codeparrot_training - Step 157: {'lr': 3.925e-05, 'samples': 30336, 'steps': 157, 'loss/train': 9.372159004211426} +01/27/2022 19:00:30 - INFO - codeparrot_training - Step 158: {'lr': 3.95e-05, 'samples': 30528, 'steps': 158, 'loss/train': 10.537101745605469} +01/27/2022 19:00:37 - INFO - codeparrot_training - Step 159: {'lr': 3.9750000000000004e-05, 'samples': 30720, 'steps': 159, 'loss/train': 8.887600421905518} +01/27/2022 19:00:41 - INFO - codeparrot_training - Step 160: {'lr': 4e-05, 'samples': 30912, 'steps': 160, 'loss/train': 9.904645442962646} +01/27/2022 19:00:45 - INFO - codeparrot_training - Step 161: {'lr': 4.025e-05, 'samples': 31104, 'steps': 161, 'loss/train': 9.94864010810852} +01/27/2022 19:00:49 - INFO - codeparrot_training - Step 162: {'lr': 4.05e-05, 'samples': 31296, 'steps': 162, 'loss/train': 9.892107725143433} +01/27/2022 19:00:53 - INFO - codeparrot_training - Step 163: {'lr': 4.075e-05, 'samples': 31488, 'steps': 163, 'loss/train': 10.019979000091553} +01/27/2022 19:00:58 - INFO - codeparrot_training - Step 164: {'lr': 4.1e-05, 'samples': 31680, 'steps': 164, 'loss/train': 9.86623764038086} +01/27/2022 19:01:03 - INFO - codeparrot_training - Step 165: {'lr': 4.125e-05, 'samples': 31872, 'steps': 165, 'loss/train': 9.841260194778442} +01/27/2022 19:01:07 - INFO - codeparrot_training - Step 166: {'lr': 4.1500000000000006e-05, 'samples': 32064, 'steps': 166, 'loss/train': 8.373968839645386} +01/27/2022 19:01:11 - INFO - codeparrot_training - Step 167: {'lr': 4.1750000000000005e-05, 'samples': 32256, 'steps': 167, 'loss/train': 9.584777355194092} +01/27/2022 19:01:15 - INFO - codeparrot_training - Step 168: {'lr': 4.2000000000000004e-05, 'samples': 32448, 'steps': 168, 'loss/train': 10.839874505996704} +01/27/2022 19:01:20 - INFO - codeparrot_training - Step 169: {'lr': 4.2250000000000004e-05, 'samples': 32640, 'steps': 169, 'loss/train': 10.583508253097534} +01/27/2022 19:01:24 - INFO - codeparrot_training - Step 170: {'lr': 4.25e-05, 'samples': 32832, 'steps': 170, 'loss/train': 9.085827112197876} +01/27/2022 19:01:28 - INFO - codeparrot_training - Step 171: {'lr': 4.275e-05, 'samples': 33024, 'steps': 171, 'loss/train': 9.918323278427124} +01/27/2022 19:01:32 - INFO - codeparrot_training - Step 172: {'lr': 4.2999999999999995e-05, 'samples': 33216, 'steps': 172, 'loss/train': 10.165422677993774} +01/27/2022 19:01:37 - INFO - codeparrot_training - Step 173: {'lr': 4.325e-05, 'samples': 33408, 'steps': 173, 'loss/train': 9.276403427124023} +01/27/2022 19:01:42 - INFO - codeparrot_training - Step 174: {'lr': 4.35e-05, 'samples': 33600, 'steps': 174, 'loss/train': 9.754451036453247} +01/27/2022 19:01:46 - INFO - codeparrot_training - Step 175: {'lr': 4.375e-05, 'samples': 33792, 'steps': 175, 'loss/train': 10.083123207092285} +01/27/2022 19:01:50 - INFO - codeparrot_training - Step 176: {'lr': 4.4e-05, 'samples': 33984, 'steps': 176, 'loss/train': 8.438665866851807} +01/27/2022 19:01:54 - INFO - codeparrot_training - Step 177: {'lr': 4.425e-05, 'samples': 34176, 'steps': 177, 'loss/train': 8.652661085128784} +01/27/2022 19:01:58 - INFO - codeparrot_training - Step 178: {'lr': 4.45e-05, 'samples': 34368, 'steps': 178, 'loss/train': 9.337483406066895} +01/27/2022 19:02:04 - INFO - codeparrot_training - Step 179: {'lr': 4.475e-05, 'samples': 34560, 'steps': 179, 'loss/train': 9.091297388076782} +01/27/2022 19:02:09 - INFO - codeparrot_training - Step 180: {'lr': 4.4999999999999996e-05, 'samples': 34752, 'steps': 180, 'loss/train': 9.52385401725769} +01/27/2022 19:02:13 - INFO - codeparrot_training - Step 181: {'lr': 4.525e-05, 'samples': 34944, 'steps': 181, 'loss/train': 9.355359077453613} +01/27/2022 19:02:17 - INFO - codeparrot_training - Step 182: {'lr': 4.55e-05, 'samples': 35136, 'steps': 182, 'loss/train': 8.646886110305786} +01/27/2022 19:02:21 - INFO - codeparrot_training - Step 183: {'lr': 4.575e-05, 'samples': 35328, 'steps': 183, 'loss/train': 10.042189836502075} +01/27/2022 19:02:26 - INFO - codeparrot_training - Step 184: {'lr': 4.6e-05, 'samples': 35520, 'steps': 184, 'loss/train': 10.683302164077759} +01/27/2022 19:02:30 - INFO - codeparrot_training - Step 185: {'lr': 4.625e-05, 'samples': 35712, 'steps': 185, 'loss/train': 8.979841232299805} +01/27/2022 19:02:35 - INFO - codeparrot_training - Step 186: {'lr': 4.65e-05, 'samples': 35904, 'steps': 186, 'loss/train': 9.779321908950806} +01/27/2022 19:02:39 - INFO - codeparrot_training - Step 187: {'lr': 4.675e-05, 'samples': 36096, 'steps': 187, 'loss/train': 9.665011167526245} +01/27/2022 19:02:43 - INFO - codeparrot_training - Step 188: {'lr': 4.7000000000000004e-05, 'samples': 36288, 'steps': 188, 'loss/train': 9.561795473098755} +01/27/2022 19:02:48 - INFO - codeparrot_training - Step 189: {'lr': 4.725e-05, 'samples': 36480, 'steps': 189, 'loss/train': 9.158857583999634} +01/27/2022 19:02:52 - INFO - codeparrot_training - Step 190: {'lr': 4.75e-05, 'samples': 36672, 'steps': 190, 'loss/train': 9.023682117462158} +01/27/2022 19:02:56 - INFO - codeparrot_training - Step 191: {'lr': 4.775e-05, 'samples': 36864, 'steps': 191, 'loss/train': 9.031155824661255} +01/27/2022 19:03:00 - INFO - codeparrot_training - Step 192: {'lr': 4.8e-05, 'samples': 37056, 'steps': 192, 'loss/train': 9.122292280197144} +01/27/2022 19:03:05 - INFO - codeparrot_training - Step 193: {'lr': 4.825e-05, 'samples': 37248, 'steps': 193, 'loss/train': 9.38718581199646} +01/27/2022 19:03:11 - INFO - codeparrot_training - Step 194: {'lr': 4.85e-05, 'samples': 37440, 'steps': 194, 'loss/train': 9.636369466781616} +01/27/2022 19:03:16 - INFO - codeparrot_training - Step 195: {'lr': 4.8750000000000006e-05, 'samples': 37632, 'steps': 195, 'loss/train': 8.828883647918701} +01/27/2022 19:03:20 - INFO - codeparrot_training - Step 196: {'lr': 4.9000000000000005e-05, 'samples': 37824, 'steps': 196, 'loss/train': 6.451952219009399} +01/27/2022 19:03:24 - INFO - codeparrot_training - Step 197: {'lr': 4.9250000000000004e-05, 'samples': 38016, 'steps': 197, 'loss/train': 6.7652199268341064} +01/27/2022 19:03:28 - INFO - codeparrot_training - Step 198: {'lr': 4.9500000000000004e-05, 'samples': 38208, 'steps': 198, 'loss/train': 9.880377531051636} +01/27/2022 19:03:32 - INFO - codeparrot_training - Step 199: {'lr': 4.975e-05, 'samples': 38400, 'steps': 199, 'loss/train': 9.147174596786499} +01/27/2022 19:03:37 - INFO - codeparrot_training - Step 200: {'lr': 5e-05, 'samples': 38592, 'steps': 200, 'loss/train': 8.676039218902588} +01/27/2022 19:03:41 - INFO - codeparrot_training - Step 201: {'lr': 5.025e-05, 'samples': 38784, 'steps': 201, 'loss/train': 9.533545017242432} +01/27/2022 19:03:46 - INFO - codeparrot_training - Step 202: {'lr': 5.05e-05, 'samples': 38976, 'steps': 202, 'loss/train': 9.315980672836304} +01/27/2022 19:03:50 - INFO - codeparrot_training - Step 203: {'lr': 5.075000000000001e-05, 'samples': 39168, 'steps': 203, 'loss/train': 10.182298421859741} +01/27/2022 19:03:54 - INFO - codeparrot_training - Step 204: {'lr': 5.1e-05, 'samples': 39360, 'steps': 204, 'loss/train': 10.115712404251099} +01/27/2022 19:04:00 - INFO - codeparrot_training - Step 205: {'lr': 5.125e-05, 'samples': 39552, 'steps': 205, 'loss/train': 8.276333570480347} +01/27/2022 19:04:04 - INFO - codeparrot_training - Step 206: {'lr': 5.15e-05, 'samples': 39744, 'steps': 206, 'loss/train': 9.16178011894226} +01/27/2022 19:04:08 - INFO - codeparrot_training - Step 207: {'lr': 5.175e-05, 'samples': 39936, 'steps': 207, 'loss/train': 9.451743364334106} +01/27/2022 19:04:12 - INFO - codeparrot_training - Step 208: {'lr': 5.2e-05, 'samples': 40128, 'steps': 208, 'loss/train': 10.303925514221191} +01/27/2022 19:04:17 - INFO - codeparrot_training - Step 209: {'lr': 5.2249999999999996e-05, 'samples': 40320, 'steps': 209, 'loss/train': 8.935240745544434} +01/27/2022 19:04:22 - INFO - codeparrot_training - Step 210: {'lr': 5.25e-05, 'samples': 40512, 'steps': 210, 'loss/train': 9.774219274520874} +01/27/2022 19:04:26 - INFO - codeparrot_training - Step 211: {'lr': 5.275e-05, 'samples': 40704, 'steps': 211, 'loss/train': 8.904966115951538} +01/27/2022 19:04:30 - INFO - codeparrot_training - Step 212: {'lr': 5.3e-05, 'samples': 40896, 'steps': 212, 'loss/train': 9.138321161270142} +01/27/2022 19:04:34 - INFO - codeparrot_training - Step 213: {'lr': 5.325e-05, 'samples': 41088, 'steps': 213, 'loss/train': 9.766228437423706} +01/27/2022 19:04:38 - INFO - codeparrot_training - Step 214: {'lr': 5.35e-05, 'samples': 41280, 'steps': 214, 'loss/train': 8.124200820922852} +01/27/2022 19:04:43 - INFO - codeparrot_training - Step 215: {'lr': 5.375e-05, 'samples': 41472, 'steps': 215, 'loss/train': 9.688585996627808} +01/27/2022 19:04:47 - INFO - codeparrot_training - Step 216: {'lr': 5.4e-05, 'samples': 41664, 'steps': 216, 'loss/train': 10.137500524520874} +01/27/2022 19:04:52 - INFO - codeparrot_training - Step 217: {'lr': 5.4250000000000004e-05, 'samples': 41856, 'steps': 217, 'loss/train': 9.659674644470215} +01/27/2022 19:04:56 - INFO - codeparrot_training - Step 218: {'lr': 5.45e-05, 'samples': 42048, 'steps': 218, 'loss/train': 8.457206726074219} +01/27/2022 19:05:00 - INFO - codeparrot_training - Step 219: {'lr': 5.475e-05, 'samples': 42240, 'steps': 219, 'loss/train': 9.459918737411499} +01/27/2022 19:05:06 - INFO - codeparrot_training - Step 220: {'lr': 5.5e-05, 'samples': 42432, 'steps': 220, 'loss/train': 8.549018383026123} +01/27/2022 19:05:10 - INFO - codeparrot_training - Step 221: {'lr': 5.525e-05, 'samples': 42624, 'steps': 221, 'loss/train': 10.017014265060425} +01/27/2022 19:05:15 - INFO - codeparrot_training - Step 222: {'lr': 5.55e-05, 'samples': 42816, 'steps': 222, 'loss/train': 9.32455587387085} +01/27/2022 19:05:19 - INFO - codeparrot_training - Step 223: {'lr': 5.575e-05, 'samples': 43008, 'steps': 223, 'loss/train': 9.075031757354736} +01/27/2022 19:05:23 - INFO - codeparrot_training - Step 224: {'lr': 5.6e-05, 'samples': 43200, 'steps': 224, 'loss/train': 8.99543023109436} +01/27/2022 19:05:28 - INFO - codeparrot_training - Step 225: {'lr': 5.6250000000000005e-05, 'samples': 43392, 'steps': 225, 'loss/train': 9.823913812637329} +01/27/2022 19:05:32 - INFO - codeparrot_training - Step 226: {'lr': 5.6500000000000005e-05, 'samples': 43584, 'steps': 226, 'loss/train': 9.353821277618408} +01/27/2022 19:05:37 - INFO - codeparrot_training - Step 227: {'lr': 5.6750000000000004e-05, 'samples': 43776, 'steps': 227, 'loss/train': 9.444689512252808} +01/27/2022 19:05:41 - INFO - codeparrot_training - Step 228: {'lr': 5.7e-05, 'samples': 43968, 'steps': 228, 'loss/train': 8.868521690368652} +01/27/2022 19:05:47 - INFO - codeparrot_training - Step 229: {'lr': 5.725e-05, 'samples': 44160, 'steps': 229, 'loss/train': 9.563207387924194} +01/27/2022 19:05:51 - INFO - codeparrot_training - Step 230: {'lr': 5.75e-05, 'samples': 44352, 'steps': 230, 'loss/train': 8.640009641647339} +01/27/2022 19:05:55 - INFO - codeparrot_training - Step 231: {'lr': 5.775e-05, 'samples': 44544, 'steps': 231, 'loss/train': 9.500942945480347} +01/27/2022 19:05:59 - INFO - codeparrot_training - Step 232: {'lr': 5.800000000000001e-05, 'samples': 44736, 'steps': 232, 'loss/train': 9.161518335342407} +01/27/2022 19:06:03 - INFO - codeparrot_training - Step 233: {'lr': 5.8250000000000006e-05, 'samples': 44928, 'steps': 233, 'loss/train': 11.825181484222412} +01/27/2022 19:06:09 - INFO - codeparrot_training - Step 234: {'lr': 5.8500000000000006e-05, 'samples': 45120, 'steps': 234, 'loss/train': 6.474182367324829} +01/27/2022 19:06:13 - INFO - codeparrot_training - Step 235: {'lr': 5.875e-05, 'samples': 45312, 'steps': 235, 'loss/train': 8.545825481414795} +01/27/2022 19:06:17 - INFO - codeparrot_training - Step 236: {'lr': 5.9e-05, 'samples': 45504, 'steps': 236, 'loss/train': 9.267122268676758} +01/27/2022 19:06:21 - INFO - codeparrot_training - Step 237: {'lr': 5.925e-05, 'samples': 45696, 'steps': 237, 'loss/train': 9.534893989562988} +01/27/2022 19:06:25 - INFO - codeparrot_training - Step 238: {'lr': 5.9499999999999996e-05, 'samples': 45888, 'steps': 238, 'loss/train': 9.004261493682861} +01/27/2022 19:06:30 - INFO - codeparrot_training - Step 239: {'lr': 5.9749999999999995e-05, 'samples': 46080, 'steps': 239, 'loss/train': 8.730738401412964} +01/27/2022 19:06:34 - INFO - codeparrot_training - Step 240: {'lr': 6e-05, 'samples': 46272, 'steps': 240, 'loss/train': 9.990759372711182} +01/27/2022 19:06:39 - INFO - codeparrot_training - Step 241: {'lr': 6.025e-05, 'samples': 46464, 'steps': 241, 'loss/train': 9.453759670257568} +01/27/2022 19:06:43 - INFO - codeparrot_training - Step 242: {'lr': 6.05e-05, 'samples': 46656, 'steps': 242, 'loss/train': 8.70737886428833} +01/27/2022 19:06:47 - INFO - codeparrot_training - Step 243: {'lr': 6.075e-05, 'samples': 46848, 'steps': 243, 'loss/train': 9.522861242294312} +01/27/2022 19:06:52 - INFO - codeparrot_training - Step 244: {'lr': 6.1e-05, 'samples': 47040, 'steps': 244, 'loss/train': 9.158050775527954} +01/27/2022 19:06:56 - INFO - codeparrot_training - Step 245: {'lr': 6.125e-05, 'samples': 47232, 'steps': 245, 'loss/train': 9.867875576019287} +01/27/2022 19:07:00 - INFO - codeparrot_training - Step 246: {'lr': 6.15e-05, 'samples': 47424, 'steps': 246, 'loss/train': 9.131139993667603} +01/27/2022 19:07:05 - INFO - codeparrot_training - Step 247: {'lr': 6.175e-05, 'samples': 47616, 'steps': 247, 'loss/train': 9.486006259918213} +01/27/2022 19:07:09 - INFO - codeparrot_training - Step 248: {'lr': 6.2e-05, 'samples': 47808, 'steps': 248, 'loss/train': 10.204999208450317} +01/27/2022 19:07:16 - INFO - codeparrot_training - Step 249: {'lr': 6.225e-05, 'samples': 48000, 'steps': 249, 'loss/train': 9.54790735244751} +01/27/2022 19:07:20 - INFO - codeparrot_training - Step 250: {'lr': 6.25e-05, 'samples': 48192, 'steps': 250, 'loss/train': 9.41062617301941} +01/27/2022 19:07:24 - INFO - codeparrot_training - Step 251: {'lr': 6.275000000000001e-05, 'samples': 48384, 'steps': 251, 'loss/train': 8.843762397766113} +01/27/2022 19:07:28 - INFO - codeparrot_training - Step 252: {'lr': 6.3e-05, 'samples': 48576, 'steps': 252, 'loss/train': 8.657515525817871} +01/27/2022 19:07:32 - INFO - codeparrot_training - Step 253: {'lr': 6.325e-05, 'samples': 48768, 'steps': 253, 'loss/train': 9.77532148361206} +01/27/2022 19:07:36 - INFO - codeparrot_training - Step 254: {'lr': 6.35e-05, 'samples': 48960, 'steps': 254, 'loss/train': 8.118179082870483} +01/27/2022 19:07:42 - INFO - codeparrot_training - Step 255: {'lr': 6.375e-05, 'samples': 49152, 'steps': 255, 'loss/train': 8.203963279724121} +01/27/2022 19:07:46 - INFO - codeparrot_training - Step 256: {'lr': 6.4e-05, 'samples': 49344, 'steps': 256, 'loss/train': 7.924534320831299} +01/27/2022 19:07:50 - INFO - codeparrot_training - Step 257: {'lr': 6.425e-05, 'samples': 49536, 'steps': 257, 'loss/train': 9.613861799240112} +01/27/2022 19:07:54 - INFO - codeparrot_training - Step 258: {'lr': 6.450000000000001e-05, 'samples': 49728, 'steps': 258, 'loss/train': 9.142173528671265} +01/27/2022 19:07:58 - INFO - codeparrot_training - Step 259: {'lr': 6.475e-05, 'samples': 49920, 'steps': 259, 'loss/train': 8.71092438697815} +01/27/2022 19:08:03 - INFO - codeparrot_training - Step 260: {'lr': 6.500000000000001e-05, 'samples': 50112, 'steps': 260, 'loss/train': 9.467320919036865} +01/27/2022 19:08:07 - INFO - codeparrot_training - Step 261: {'lr': 6.525e-05, 'samples': 50304, 'steps': 261, 'loss/train': 9.934967994689941} +01/27/2022 19:08:12 - INFO - codeparrot_training - Step 262: {'lr': 6.55e-05, 'samples': 50496, 'steps': 262, 'loss/train': 9.104984521865845} +01/27/2022 19:08:16 - INFO - codeparrot_training - Step 263: {'lr': 6.575e-05, 'samples': 50688, 'steps': 263, 'loss/train': 8.995105504989624} +01/27/2022 19:08:20 - INFO - codeparrot_training - Step 264: {'lr': 6.6e-05, 'samples': 50880, 'steps': 264, 'loss/train': 9.03219223022461} +01/27/2022 19:08:26 - INFO - codeparrot_training - Step 265: {'lr': 6.625000000000001e-05, 'samples': 51072, 'steps': 265, 'loss/train': 9.551888465881348} +01/27/2022 19:08:30 - INFO - codeparrot_training - Step 266: {'lr': 6.65e-05, 'samples': 51264, 'steps': 266, 'loss/train': 8.787272930145264} +01/27/2022 19:08:35 - INFO - codeparrot_training - Step 267: {'lr': 6.675000000000001e-05, 'samples': 51456, 'steps': 267, 'loss/train': 8.996274948120117} +01/27/2022 19:08:39 - INFO - codeparrot_training - Step 268: {'lr': 6.7e-05, 'samples': 51648, 'steps': 268, 'loss/train': 9.076965093612671} +01/27/2022 19:08:43 - INFO - codeparrot_training - Step 269: {'lr': 6.725000000000001e-05, 'samples': 51840, 'steps': 269, 'loss/train': 10.229608297348022} +01/27/2022 19:08:48 - INFO - codeparrot_training - Step 270: {'lr': 6.75e-05, 'samples': 52032, 'steps': 270, 'loss/train': 8.42824673652649} +01/27/2022 19:08:52 - INFO - codeparrot_training - Step 271: {'lr': 6.775000000000001e-05, 'samples': 52224, 'steps': 271, 'loss/train': 9.485559225082397} +01/27/2022 19:08:56 - INFO - codeparrot_training - Step 272: {'lr': 6.800000000000001e-05, 'samples': 52416, 'steps': 272, 'loss/train': 10.391708135604858} +01/27/2022 19:09:01 - INFO - codeparrot_training - Step 273: {'lr': 6.825e-05, 'samples': 52608, 'steps': 273, 'loss/train': 8.94865608215332} +01/27/2022 19:09:05 - INFO - codeparrot_training - Step 274: {'lr': 6.850000000000001e-05, 'samples': 52800, 'steps': 274, 'loss/train': 11.138488054275513} +01/27/2022 19:09:11 - INFO - codeparrot_training - Step 275: {'lr': 6.875e-05, 'samples': 52992, 'steps': 275, 'loss/train': 9.075019598007202} +01/27/2022 19:09:15 - INFO - codeparrot_training - Step 276: {'lr': 6.900000000000001e-05, 'samples': 53184, 'steps': 276, 'loss/train': 9.00141978263855} +01/27/2022 19:09:19 - INFO - codeparrot_training - Step 277: {'lr': 6.925e-05, 'samples': 53376, 'steps': 277, 'loss/train': 9.004418134689331} +01/27/2022 19:09:23 - INFO - codeparrot_training - Step 278: {'lr': 6.950000000000001e-05, 'samples': 53568, 'steps': 278, 'loss/train': 8.450300931930542} +01/27/2022 19:09:27 - INFO - codeparrot_training - Step 279: {'lr': 6.975e-05, 'samples': 53760, 'steps': 279, 'loss/train': 8.994419574737549} +01/27/2022 19:09:33 - INFO - codeparrot_training - Step 280: {'lr': 7.000000000000001e-05, 'samples': 53952, 'steps': 280, 'loss/train': 9.547023296356201} +01/27/2022 19:09:37 - INFO - codeparrot_training - Step 281: {'lr': 7.025000000000001e-05, 'samples': 54144, 'steps': 281, 'loss/train': 9.412967205047607} +01/27/2022 19:09:41 - INFO - codeparrot_training - Step 282: {'lr': 7.049999999999999e-05, 'samples': 54336, 'steps': 282, 'loss/train': 8.810139656066895} +01/27/2022 19:09:45 - INFO - codeparrot_training - Step 283: {'lr': 7.075e-05, 'samples': 54528, 'steps': 283, 'loss/train': 9.050954818725586} +01/27/2022 19:09:49 - INFO - codeparrot_training - Step 284: {'lr': 7.099999999999999e-05, 'samples': 54720, 'steps': 284, 'loss/train': 5.882340788841248} +01/27/2022 19:09:54 - INFO - codeparrot_training - Step 285: {'lr': 7.125e-05, 'samples': 54912, 'steps': 285, 'loss/train': 8.897455215454102} +01/27/2022 19:09:59 - INFO - codeparrot_training - Step 286: {'lr': 7.149999999999999e-05, 'samples': 55104, 'steps': 286, 'loss/train': 9.109829664230347} +01/27/2022 19:10:03 - INFO - codeparrot_training - Step 287: {'lr': 7.175e-05, 'samples': 55296, 'steps': 287, 'loss/train': 9.303452253341675} +01/27/2022 19:10:07 - INFO - codeparrot_training - Step 288: {'lr': 7.2e-05, 'samples': 55488, 'steps': 288, 'loss/train': 9.503397703170776} +01/27/2022 19:10:11 - INFO - codeparrot_training - Step 289: {'lr': 7.225e-05, 'samples': 55680, 'steps': 289, 'loss/train': 9.040723085403442} +01/27/2022 19:10:16 - INFO - codeparrot_training - Step 290: {'lr': 7.25e-05, 'samples': 55872, 'steps': 290, 'loss/train': 9.225221872329712} +01/27/2022 19:10:20 - INFO - codeparrot_training - Step 291: {'lr': 7.274999999999999e-05, 'samples': 56064, 'steps': 291, 'loss/train': 10.142202615737915} +01/27/2022 19:10:25 - INFO - codeparrot_training - Step 292: {'lr': 7.3e-05, 'samples': 56256, 'steps': 292, 'loss/train': 9.247089385986328} +01/27/2022 19:10:29 - INFO - codeparrot_training - Step 293: {'lr': 7.324999999999999e-05, 'samples': 56448, 'steps': 293, 'loss/train': 9.730626583099365} +01/27/2022 19:10:33 - INFO - codeparrot_training - Step 294: {'lr': 7.35e-05, 'samples': 56640, 'steps': 294, 'loss/train': 8.77634882926941} +01/27/2022 19:10:39 - INFO - codeparrot_training - Step 295: {'lr': 7.375e-05, 'samples': 56832, 'steps': 295, 'loss/train': 9.378878831863403} +01/27/2022 19:10:43 - INFO - codeparrot_training - Step 296: {'lr': 7.4e-05, 'samples': 57024, 'steps': 296, 'loss/train': 8.283158540725708} +01/27/2022 19:10:47 - INFO - codeparrot_training - Step 297: {'lr': 7.425e-05, 'samples': 57216, 'steps': 297, 'loss/train': 9.073874473571777} +01/27/2022 19:10:52 - INFO - codeparrot_training - Step 298: {'lr': 7.45e-05, 'samples': 57408, 'steps': 298, 'loss/train': 9.411186218261719} +01/27/2022 19:10:56 - INFO - codeparrot_training - Step 299: {'lr': 7.475e-05, 'samples': 57600, 'steps': 299, 'loss/train': 9.331010341644287} +01/27/2022 19:11:01 - INFO - codeparrot_training - Step 300: {'lr': 7.5e-05, 'samples': 57792, 'steps': 300, 'loss/train': 8.803576469421387} +01/27/2022 19:11:05 - INFO - codeparrot_training - Step 301: {'lr': 7.525e-05, 'samples': 57984, 'steps': 301, 'loss/train': 8.91267728805542} +01/27/2022 19:11:09 - INFO - codeparrot_training - Step 302: {'lr': 7.55e-05, 'samples': 58176, 'steps': 302, 'loss/train': 9.571316242218018} +01/27/2022 19:11:13 - INFO - codeparrot_training - Step 303: {'lr': 7.575e-05, 'samples': 58368, 'steps': 303, 'loss/train': 8.972235918045044} +01/27/2022 19:11:17 - INFO - codeparrot_training - Step 304: {'lr': 7.6e-05, 'samples': 58560, 'steps': 304, 'loss/train': 8.86762261390686} +01/27/2022 19:11:22 - INFO - codeparrot_training - Step 305: {'lr': 7.625e-05, 'samples': 58752, 'steps': 305, 'loss/train': 9.450607538223267} +01/27/2022 19:11:27 - INFO - codeparrot_training - Step 306: {'lr': 7.65e-05, 'samples': 58944, 'steps': 306, 'loss/train': 7.825889825820923} +01/27/2022 19:11:31 - INFO - codeparrot_training - Step 307: {'lr': 7.675e-05, 'samples': 59136, 'steps': 307, 'loss/train': 9.48786735534668} +01/27/2022 19:11:35 - INFO - codeparrot_training - Step 308: {'lr': 7.7e-05, 'samples': 59328, 'steps': 308, 'loss/train': 8.511699199676514} +01/27/2022 19:11:39 - INFO - codeparrot_training - Step 309: {'lr': 7.725000000000001e-05, 'samples': 59520, 'steps': 309, 'loss/train': 8.14856243133545} +01/27/2022 19:11:45 - INFO - codeparrot_training - Step 310: {'lr': 7.75e-05, 'samples': 59712, 'steps': 310, 'loss/train': 9.22744631767273} +01/27/2022 19:11:49 - INFO - codeparrot_training - Step 311: {'lr': 7.775e-05, 'samples': 59904, 'steps': 311, 'loss/train': 8.862165927886963} +01/27/2022 19:11:54 - INFO - codeparrot_training - Step 312: {'lr': 7.8e-05, 'samples': 60096, 'steps': 312, 'loss/train': 7.8139564990997314} +01/27/2022 19:11:58 - INFO - codeparrot_training - Step 313: {'lr': 7.825e-05, 'samples': 60288, 'steps': 313, 'loss/train': 9.201639890670776} +01/27/2022 19:12:02 - INFO - codeparrot_training - Step 314: {'lr': 7.85e-05, 'samples': 60480, 'steps': 314, 'loss/train': 9.275781154632568} +01/27/2022 19:12:07 - INFO - codeparrot_training - Step 315: {'lr': 7.875e-05, 'samples': 60672, 'steps': 315, 'loss/train': 8.995943069458008} +01/27/2022 19:12:11 - INFO - codeparrot_training - Step 316: {'lr': 7.9e-05, 'samples': 60864, 'steps': 316, 'loss/train': 8.69792890548706} +01/27/2022 19:12:15 - INFO - codeparrot_training - Step 317: {'lr': 7.925e-05, 'samples': 61056, 'steps': 317, 'loss/train': 9.844456672668457} +01/27/2022 19:12:20 - INFO - codeparrot_training - Step 318: {'lr': 7.950000000000001e-05, 'samples': 61248, 'steps': 318, 'loss/train': 10.125522136688232} +01/27/2022 19:12:24 - INFO - codeparrot_training - Step 319: {'lr': 7.975e-05, 'samples': 61440, 'steps': 319, 'loss/train': 9.283169746398926} +01/27/2022 19:12:30 - INFO - codeparrot_training - Step 320: {'lr': 8e-05, 'samples': 61632, 'steps': 320, 'loss/train': 9.539125442504883} +01/27/2022 19:12:34 - INFO - codeparrot_training - Step 321: {'lr': 8.025e-05, 'samples': 61824, 'steps': 321, 'loss/train': 8.35297966003418} +01/27/2022 19:12:38 - INFO - codeparrot_training - Step 322: {'lr': 8.05e-05, 'samples': 62016, 'steps': 322, 'loss/train': 9.01502251625061} +01/27/2022 19:12:43 - INFO - codeparrot_training - Step 323: {'lr': 8.075e-05, 'samples': 62208, 'steps': 323, 'loss/train': 8.88915753364563} +01/27/2022 19:12:47 - INFO - codeparrot_training - Step 324: {'lr': 8.1e-05, 'samples': 62400, 'steps': 324, 'loss/train': 9.352758407592773} +01/27/2022 19:12:52 - INFO - codeparrot_training - Step 325: {'lr': 8.125000000000001e-05, 'samples': 62592, 'steps': 325, 'loss/train': 8.851012945175171} +01/27/2022 19:12:56 - INFO - codeparrot_training - Step 326: {'lr': 8.15e-05, 'samples': 62784, 'steps': 326, 'loss/train': 8.044254541397095} +01/27/2022 19:13:00 - INFO - codeparrot_training - Step 327: {'lr': 8.175000000000001e-05, 'samples': 62976, 'steps': 327, 'loss/train': 8.919136047363281} +01/27/2022 19:13:04 - INFO - codeparrot_training - Step 328: {'lr': 8.2e-05, 'samples': 63168, 'steps': 328, 'loss/train': 7.897423267364502} +01/27/2022 19:13:09 - INFO - codeparrot_training - Step 329: {'lr': 8.225000000000001e-05, 'samples': 63360, 'steps': 329, 'loss/train': 8.732008695602417} +01/27/2022 19:13:14 - INFO - codeparrot_training - Step 330: {'lr': 8.25e-05, 'samples': 63552, 'steps': 330, 'loss/train': 8.978665351867676} +01/27/2022 19:13:18 - INFO - codeparrot_training - Step 331: {'lr': 8.275e-05, 'samples': 63744, 'steps': 331, 'loss/train': 8.693783283233643} +01/27/2022 19:13:22 - INFO - codeparrot_training - Step 332: {'lr': 8.300000000000001e-05, 'samples': 63936, 'steps': 332, 'loss/train': 10.519891262054443} +01/27/2022 19:13:27 - INFO - codeparrot_training - Step 333: {'lr': 8.325e-05, 'samples': 64128, 'steps': 333, 'loss/train': 9.274801254272461} +01/27/2022 19:13:31 - INFO - codeparrot_training - Step 334: {'lr': 8.350000000000001e-05, 'samples': 64320, 'steps': 334, 'loss/train': 9.015591859817505} +01/27/2022 19:13:36 - INFO - codeparrot_training - Step 335: {'lr': 8.375e-05, 'samples': 64512, 'steps': 335, 'loss/train': 8.601804971694946} +01/27/2022 19:13:40 - INFO - codeparrot_training - Step 336: {'lr': 8.400000000000001e-05, 'samples': 64704, 'steps': 336, 'loss/train': 9.10523271560669} +01/27/2022 19:13:44 - INFO - codeparrot_training - Step 337: {'lr': 8.425e-05, 'samples': 64896, 'steps': 337, 'loss/train': 9.47730302810669} +01/27/2022 19:13:49 - INFO - codeparrot_training - Step 338: {'lr': 8.450000000000001e-05, 'samples': 65088, 'steps': 338, 'loss/train': 8.80834150314331} +01/27/2022 19:13:53 - INFO - codeparrot_training - Step 339: {'lr': 8.475000000000001e-05, 'samples': 65280, 'steps': 339, 'loss/train': 8.973668575286865} +01/27/2022 19:14:00 - INFO - codeparrot_training - Step 340: {'lr': 8.5e-05, 'samples': 65472, 'steps': 340, 'loss/train': 8.7022705078125} +01/27/2022 19:14:04 - INFO - codeparrot_training - Step 341: {'lr': 8.525000000000001e-05, 'samples': 65664, 'steps': 341, 'loss/train': 9.048253297805786} +01/27/2022 19:14:08 - INFO - codeparrot_training - Step 342: {'lr': 8.55e-05, 'samples': 65856, 'steps': 342, 'loss/train': 8.496989965438843} +01/27/2022 19:14:12 - INFO - codeparrot_training - Step 343: {'lr': 8.575000000000001e-05, 'samples': 66048, 'steps': 343, 'loss/train': 10.007545709609985} +01/27/2022 19:14:16 - INFO - codeparrot_training - Step 344: {'lr': 8.599999999999999e-05, 'samples': 66240, 'steps': 344, 'loss/train': 9.02388310432434} +01/27/2022 19:14:21 - INFO - codeparrot_training - Step 345: {'lr': 8.625e-05, 'samples': 66432, 'steps': 345, 'loss/train': 8.611332893371582} +01/27/2022 19:14:25 - INFO - codeparrot_training - Step 346: {'lr': 8.65e-05, 'samples': 66624, 'steps': 346, 'loss/train': 8.02782654762268} +01/27/2022 19:14:30 - INFO - codeparrot_training - Step 347: {'lr': 8.675e-05, 'samples': 66816, 'steps': 347, 'loss/train': 8.79668641090393} +01/27/2022 19:14:34 - INFO - codeparrot_training - Step 348: {'lr': 8.7e-05, 'samples': 67008, 'steps': 348, 'loss/train': 8.362978219985962} +01/27/2022 19:14:38 - INFO - codeparrot_training - Step 349: {'lr': 8.724999999999999e-05, 'samples': 67200, 'steps': 349, 'loss/train': 8.863133668899536} +01/27/2022 19:14:43 - INFO - codeparrot_training - Step 350: {'lr': 8.75e-05, 'samples': 67392, 'steps': 350, 'loss/train': 8.88676929473877} +01/27/2022 19:14:47 - INFO - codeparrot_training - Step 351: {'lr': 8.774999999999999e-05, 'samples': 67584, 'steps': 351, 'loss/train': 9.247013568878174} +01/27/2022 19:14:51 - INFO - codeparrot_training - Step 352: {'lr': 8.8e-05, 'samples': 67776, 'steps': 352, 'loss/train': 7.664119720458984} +01/27/2022 19:14:56 - INFO - codeparrot_training - Step 353: {'lr': 8.824999999999999e-05, 'samples': 67968, 'steps': 353, 'loss/train': 9.151994705200195} +01/27/2022 19:15:00 - INFO - codeparrot_training - Step 354: {'lr': 8.85e-05, 'samples': 68160, 'steps': 354, 'loss/train': 9.227952003479004} +01/27/2022 19:15:06 - INFO - codeparrot_training - Step 355: {'lr': 8.875e-05, 'samples': 68352, 'steps': 355, 'loss/train': 9.120519876480103} +01/27/2022 19:15:10 - INFO - codeparrot_training - Step 356: {'lr': 8.9e-05, 'samples': 68544, 'steps': 356, 'loss/train': 9.297804594039917} +01/27/2022 19:15:14 - INFO - codeparrot_training - Step 357: {'lr': 8.925e-05, 'samples': 68736, 'steps': 357, 'loss/train': 8.624364852905273} +01/27/2022 19:15:18 - INFO - codeparrot_training - Step 358: {'lr': 8.95e-05, 'samples': 68928, 'steps': 358, 'loss/train': 9.515746593475342} +01/27/2022 19:15:23 - INFO - codeparrot_training - Step 359: {'lr': 8.975e-05, 'samples': 69120, 'steps': 359, 'loss/train': 8.978293418884277} +01/27/2022 19:15:27 - INFO - codeparrot_training - Step 360: {'lr': 8.999999999999999e-05, 'samples': 69312, 'steps': 360, 'loss/train': 8.702915668487549} +01/27/2022 19:15:32 - INFO - codeparrot_training - Step 361: {'lr': 9.025e-05, 'samples': 69504, 'steps': 361, 'loss/train': 9.636523246765137} +01/27/2022 19:15:36 - INFO - codeparrot_training - Step 362: {'lr': 9.05e-05, 'samples': 69696, 'steps': 362, 'loss/train': 9.489566087722778} +01/27/2022 19:15:40 - INFO - codeparrot_training - Step 363: {'lr': 9.075e-05, 'samples': 69888, 'steps': 363, 'loss/train': 9.47793173789978} +01/27/2022 19:15:44 - INFO - codeparrot_training - Step 364: {'lr': 9.1e-05, 'samples': 70080, 'steps': 364, 'loss/train': 8.561887979507446} +01/27/2022 19:15:48 - INFO - codeparrot_training - Step 365: {'lr': 9.125e-05, 'samples': 70272, 'steps': 365, 'loss/train': 8.943610668182373} +01/27/2022 19:15:54 - INFO - codeparrot_training - Step 366: {'lr': 9.15e-05, 'samples': 70464, 'steps': 366, 'loss/train': 9.22202754020691} +01/27/2022 19:15:59 - INFO - codeparrot_training - Step 367: {'lr': 9.175e-05, 'samples': 70656, 'steps': 367, 'loss/train': 8.819038152694702} +01/27/2022 19:16:03 - INFO - codeparrot_training - Step 368: {'lr': 9.2e-05, 'samples': 70848, 'steps': 368, 'loss/train': 8.843501329421997} +01/27/2022 19:16:07 - INFO - codeparrot_training - Step 369: {'lr': 9.225e-05, 'samples': 71040, 'steps': 369, 'loss/train': 9.003690004348755} +01/27/2022 19:16:12 - INFO - codeparrot_training - Step 370: {'lr': 9.25e-05, 'samples': 71232, 'steps': 370, 'loss/train': 7.789920330047607} +01/27/2022 19:16:16 - INFO - codeparrot_training - Step 371: {'lr': 9.275e-05, 'samples': 71424, 'steps': 371, 'loss/train': 8.740252017974854} +01/27/2022 19:16:20 - INFO - codeparrot_training - Step 372: {'lr': 9.3e-05, 'samples': 71616, 'steps': 372, 'loss/train': 9.126051664352417} +01/27/2022 19:16:24 - INFO - codeparrot_training - Step 373: {'lr': 9.325e-05, 'samples': 71808, 'steps': 373, 'loss/train': 10.538711786270142} +01/27/2022 19:16:29 - INFO - codeparrot_training - Step 374: {'lr': 9.35e-05, 'samples': 72000, 'steps': 374, 'loss/train': 11.539859533309937} +01/27/2022 19:16:33 - INFO - codeparrot_training - Step 375: {'lr': 9.375e-05, 'samples': 72192, 'steps': 375, 'loss/train': 4.924748182296753} +01/27/2022 19:16:38 - INFO - codeparrot_training - Step 376: {'lr': 9.400000000000001e-05, 'samples': 72384, 'steps': 376, 'loss/train': 10.32872200012207} +01/27/2022 19:16:42 - INFO - codeparrot_training - Step 377: {'lr': 9.425e-05, 'samples': 72576, 'steps': 377, 'loss/train': 7.9768452644348145} +01/27/2022 19:16:46 - INFO - codeparrot_training - Step 378: {'lr': 9.45e-05, 'samples': 72768, 'steps': 378, 'loss/train': 8.341711521148682} +01/27/2022 19:16:50 - INFO - codeparrot_training - Step 379: {'lr': 9.475e-05, 'samples': 72960, 'steps': 379, 'loss/train': 8.565402746200562} +01/27/2022 19:16:56 - INFO - codeparrot_training - Step 380: {'lr': 9.5e-05, 'samples': 73152, 'steps': 380, 'loss/train': 9.063901662826538} +01/27/2022 19:17:00 - INFO - codeparrot_training - Step 381: {'lr': 9.525e-05, 'samples': 73344, 'steps': 381, 'loss/train': 8.95740008354187} +01/27/2022 19:17:04 - INFO - codeparrot_training - Step 382: {'lr': 9.55e-05, 'samples': 73536, 'steps': 382, 'loss/train': 8.847960233688354} +01/27/2022 19:17:09 - INFO - codeparrot_training - Step 383: {'lr': 9.575000000000001e-05, 'samples': 73728, 'steps': 383, 'loss/train': 8.798057556152344} +01/27/2022 19:17:13 - INFO - codeparrot_training - Step 384: {'lr': 9.6e-05, 'samples': 73920, 'steps': 384, 'loss/train': 8.828840732574463} +01/27/2022 19:17:18 - INFO - codeparrot_training - Step 385: {'lr': 9.625000000000001e-05, 'samples': 74112, 'steps': 385, 'loss/train': 9.475341796875} +01/27/2022 19:17:22 - INFO - codeparrot_training - Step 386: {'lr': 9.65e-05, 'samples': 74304, 'steps': 386, 'loss/train': 8.664866209030151} +01/27/2022 19:17:26 - INFO - codeparrot_training - Step 387: {'lr': 9.675000000000001e-05, 'samples': 74496, 'steps': 387, 'loss/train': 9.094900846481323} +01/27/2022 19:17:30 - INFO - codeparrot_training - Step 388: {'lr': 9.7e-05, 'samples': 74688, 'steps': 388, 'loss/train': 8.839553833007812} +01/27/2022 19:17:34 - INFO - codeparrot_training - Step 389: {'lr': 9.725e-05, 'samples': 74880, 'steps': 389, 'loss/train': 8.480072736740112} +01/27/2022 19:17:39 - INFO - codeparrot_training - Step 390: {'lr': 9.750000000000001e-05, 'samples': 75072, 'steps': 390, 'loss/train': 9.995652437210083} +01/27/2022 19:17:44 - INFO - codeparrot_training - Step 391: {'lr': 9.775e-05, 'samples': 75264, 'steps': 391, 'loss/train': 9.592024326324463} +01/27/2022 19:17:48 - INFO - codeparrot_training - Step 392: {'lr': 9.800000000000001e-05, 'samples': 75456, 'steps': 392, 'loss/train': 8.603671073913574} +01/27/2022 19:17:52 - INFO - codeparrot_training - Step 393: {'lr': 9.825e-05, 'samples': 75648, 'steps': 393, 'loss/train': 9.834109783172607} +01/27/2022 19:17:56 - INFO - codeparrot_training - Step 394: {'lr': 9.850000000000001e-05, 'samples': 75840, 'steps': 394, 'loss/train': 9.401895761489868} +01/27/2022 19:18:02 - INFO - codeparrot_training - Step 395: {'lr': 9.875e-05, 'samples': 76032, 'steps': 395, 'loss/train': 8.424455165863037} +01/27/2022 19:18:06 - INFO - codeparrot_training - Step 396: {'lr': 9.900000000000001e-05, 'samples': 76224, 'steps': 396, 'loss/train': 8.191839694976807} +01/27/2022 19:18:10 - INFO - codeparrot_training - Step 397: {'lr': 9.925000000000001e-05, 'samples': 76416, 'steps': 397, 'loss/train': 9.516587018966675} +01/27/2022 19:18:14 - INFO - codeparrot_training - Step 398: {'lr': 9.95e-05, 'samples': 76608, 'steps': 398, 'loss/train': 8.904014110565186} +01/27/2022 19:18:19 - INFO - codeparrot_training - Step 399: {'lr': 9.975000000000001e-05, 'samples': 76800, 'steps': 399, 'loss/train': 8.627155780792236} +01/27/2022 19:18:24 - INFO - codeparrot_training - Step 400: {'lr': 0.0001, 'samples': 76992, 'steps': 400, 'loss/train': 7.955677270889282} +01/27/2022 19:18:28 - INFO - codeparrot_training - Step 401: {'lr': 0.00010025000000000001, 'samples': 77184, 'steps': 401, 'loss/train': 8.836859464645386} +01/27/2022 19:18:32 - INFO - codeparrot_training - Step 402: {'lr': 0.0001005, 'samples': 77376, 'steps': 402, 'loss/train': 8.888705492019653} +01/27/2022 19:18:36 - INFO - codeparrot_training - Step 403: {'lr': 0.00010075000000000001, 'samples': 77568, 'steps': 403, 'loss/train': 9.59162163734436} +01/27/2022 19:18:40 - INFO - codeparrot_training - Step 404: {'lr': 0.000101, 'samples': 77760, 'steps': 404, 'loss/train': 7.719418287277222} +01/27/2022 19:18:46 - INFO - codeparrot_training - Step 405: {'lr': 0.00010125000000000001, 'samples': 77952, 'steps': 405, 'loss/train': 8.82796025276184} +01/27/2022 19:18:50 - INFO - codeparrot_training - Step 406: {'lr': 0.00010150000000000001, 'samples': 78144, 'steps': 406, 'loss/train': 5.772486448287964} +01/27/2022 19:18:54 - INFO - codeparrot_training - Step 407: {'lr': 0.00010174999999999999, 'samples': 78336, 'steps': 407, 'loss/train': 7.790276527404785} +01/27/2022 19:18:58 - INFO - codeparrot_training - Step 408: {'lr': 0.000102, 'samples': 78528, 'steps': 408, 'loss/train': 9.21680474281311} +01/27/2022 19:19:02 - INFO - codeparrot_training - Step 409: {'lr': 0.00010224999999999999, 'samples': 78720, 'steps': 409, 'loss/train': 9.145365715026855} +01/27/2022 19:19:07 - INFO - codeparrot_training - Step 410: {'lr': 0.0001025, 'samples': 78912, 'steps': 410, 'loss/train': 8.65236496925354} +01/27/2022 19:19:11 - INFO - codeparrot_training - Step 411: {'lr': 0.00010274999999999999, 'samples': 79104, 'steps': 411, 'loss/train': 9.195783376693726} +01/27/2022 19:19:16 - INFO - codeparrot_training - Step 412: {'lr': 0.000103, 'samples': 79296, 'steps': 412, 'loss/train': 8.685220956802368} +01/27/2022 19:19:20 - INFO - codeparrot_training - Step 413: {'lr': 0.00010325, 'samples': 79488, 'steps': 413, 'loss/train': 8.50923228263855} +01/27/2022 19:19:24 - INFO - codeparrot_training - Step 414: {'lr': 0.0001035, 'samples': 79680, 'steps': 414, 'loss/train': 8.791507244110107} +01/27/2022 19:19:30 - INFO - codeparrot_training - Step 415: {'lr': 0.00010375, 'samples': 79872, 'steps': 415, 'loss/train': 8.846287965774536} +01/27/2022 19:19:34 - INFO - codeparrot_training - Step 416: {'lr': 0.000104, 'samples': 80064, 'steps': 416, 'loss/train': 9.453208208084106} +01/27/2022 19:19:38 - INFO - codeparrot_training - Step 417: {'lr': 0.00010425, 'samples': 80256, 'steps': 417, 'loss/train': 7.977580547332764} +01/27/2022 19:19:43 - INFO - codeparrot_training - Step 418: {'lr': 0.00010449999999999999, 'samples': 80448, 'steps': 418, 'loss/train': 8.50738263130188} +01/27/2022 19:19:47 - INFO - codeparrot_training - Step 419: {'lr': 0.00010475, 'samples': 80640, 'steps': 419, 'loss/train': 7.507714033126831} +01/27/2022 19:19:51 - INFO - codeparrot_training - Step 420: {'lr': 0.000105, 'samples': 80832, 'steps': 420, 'loss/train': 10.262629508972168} +01/27/2022 19:19:56 - INFO - codeparrot_training - Step 421: {'lr': 0.00010525, 'samples': 81024, 'steps': 421, 'loss/train': 8.580647706985474} +01/27/2022 19:20:00 - INFO - codeparrot_training - Step 422: {'lr': 0.0001055, 'samples': 81216, 'steps': 422, 'loss/train': 8.881326913833618} +01/27/2022 19:20:05 - INFO - codeparrot_training - Step 423: {'lr': 0.00010575, 'samples': 81408, 'steps': 423, 'loss/train': 10.195034265518188} +01/27/2022 19:20:09 - INFO - codeparrot_training - Step 424: {'lr': 0.000106, 'samples': 81600, 'steps': 424, 'loss/train': 8.339170217514038} +01/27/2022 19:20:15 - INFO - codeparrot_training - Step 425: {'lr': 0.00010625, 'samples': 81792, 'steps': 425, 'loss/train': 8.661192655563354} +01/27/2022 19:20:19 - INFO - codeparrot_training - Step 426: {'lr': 0.0001065, 'samples': 81984, 'steps': 426, 'loss/train': 8.773484230041504} +01/27/2022 19:20:23 - INFO - codeparrot_training - Step 427: {'lr': 0.00010675, 'samples': 82176, 'steps': 427, 'loss/train': 7.9621946811676025} +01/27/2022 19:20:27 - INFO - codeparrot_training - Step 428: {'lr': 0.000107, 'samples': 82368, 'steps': 428, 'loss/train': 9.662103652954102} +01/27/2022 19:20:31 - INFO - codeparrot_training - Step 429: {'lr': 0.00010725, 'samples': 82560, 'steps': 429, 'loss/train': 8.578760862350464} +01/27/2022 19:20:36 - INFO - codeparrot_training - Step 430: {'lr': 0.0001075, 'samples': 82752, 'steps': 430, 'loss/train': 9.057950019836426} +01/27/2022 19:20:41 - INFO - codeparrot_training - Step 431: {'lr': 0.00010775, 'samples': 82944, 'steps': 431, 'loss/train': 8.532402992248535} +01/27/2022 19:20:45 - INFO - codeparrot_training - Step 432: {'lr': 0.000108, 'samples': 83136, 'steps': 432, 'loss/train': 8.037179946899414} +01/27/2022 19:20:49 - INFO - codeparrot_training - Step 433: {'lr': 0.00010825, 'samples': 83328, 'steps': 433, 'loss/train': 8.34144401550293} +01/27/2022 19:20:53 - INFO - codeparrot_training - Step 434: {'lr': 0.00010850000000000001, 'samples': 83520, 'steps': 434, 'loss/train': 9.126363515853882} +01/27/2022 19:20:58 - INFO - codeparrot_training - Step 435: {'lr': 0.00010875, 'samples': 83712, 'steps': 435, 'loss/train': 8.610699892044067} +01/27/2022 19:21:02 - INFO - codeparrot_training - Step 436: {'lr': 0.000109, 'samples': 83904, 'steps': 436, 'loss/train': 9.06845998764038} +01/27/2022 19:21:06 - INFO - codeparrot_training - Step 437: {'lr': 0.00010925, 'samples': 84096, 'steps': 437, 'loss/train': 9.466124296188354} +01/27/2022 19:21:11 - INFO - codeparrot_training - Step 438: {'lr': 0.0001095, 'samples': 84288, 'steps': 438, 'loss/train': 9.236776113510132} +01/27/2022 19:21:15 - INFO - codeparrot_training - Step 439: {'lr': 0.00010975, 'samples': 84480, 'steps': 439, 'loss/train': 8.494872093200684} +01/27/2022 19:21:22 - INFO - codeparrot_training - Step 440: {'lr': 0.00011, 'samples': 84672, 'steps': 440, 'loss/train': 8.198070287704468} +01/27/2022 19:21:26 - INFO - codeparrot_training - Step 441: {'lr': 0.00011025, 'samples': 84864, 'steps': 441, 'loss/train': 8.65276837348938} +01/27/2022 19:21:30 - INFO - codeparrot_training - Step 442: {'lr': 0.0001105, 'samples': 85056, 'steps': 442, 'loss/train': 8.7622811794281} +01/27/2022 19:21:34 - INFO - codeparrot_training - Step 443: {'lr': 0.00011075000000000001, 'samples': 85248, 'steps': 443, 'loss/train': 9.05262565612793} +01/27/2022 19:21:38 - INFO - codeparrot_training - Step 444: {'lr': 0.000111, 'samples': 85440, 'steps': 444, 'loss/train': 8.239619493484497} +01/27/2022 19:21:43 - INFO - codeparrot_training - Step 445: {'lr': 0.00011125000000000001, 'samples': 85632, 'steps': 445, 'loss/train': 8.450908184051514} +01/27/2022 19:21:48 - INFO - codeparrot_training - Step 446: {'lr': 0.0001115, 'samples': 85824, 'steps': 446, 'loss/train': 9.082252979278564} +01/27/2022 19:21:52 - INFO - codeparrot_training - Step 447: {'lr': 0.00011175, 'samples': 86016, 'steps': 447, 'loss/train': 8.904988288879395} +01/27/2022 19:21:56 - INFO - codeparrot_training - Step 448: {'lr': 0.000112, 'samples': 86208, 'steps': 448, 'loss/train': 8.551026105880737} +01/27/2022 19:22:00 - INFO - codeparrot_training - Step 449: {'lr': 0.00011225, 'samples': 86400, 'steps': 449, 'loss/train': 8.441035509109497} +01/27/2022 19:22:06 - INFO - codeparrot_training - Step 450: {'lr': 0.00011250000000000001, 'samples': 86592, 'steps': 450, 'loss/train': 8.621562480926514} +01/27/2022 19:22:10 - INFO - codeparrot_training - Step 451: {'lr': 0.00011275, 'samples': 86784, 'steps': 451, 'loss/train': 8.748263597488403} +01/27/2022 19:22:14 - INFO - codeparrot_training - Step 452: {'lr': 0.00011300000000000001, 'samples': 86976, 'steps': 452, 'loss/train': 8.997128963470459} +01/27/2022 19:22:18 - INFO - codeparrot_training - Step 453: {'lr': 0.00011325, 'samples': 87168, 'steps': 453, 'loss/train': 8.09796667098999} +01/27/2022 19:22:22 - INFO - codeparrot_training - Step 454: {'lr': 0.00011350000000000001, 'samples': 87360, 'steps': 454, 'loss/train': 8.689520359039307} +01/27/2022 19:22:28 - INFO - codeparrot_training - Step 455: {'lr': 0.00011375, 'samples': 87552, 'steps': 455, 'loss/train': 9.379069805145264} +01/27/2022 19:22:32 - INFO - codeparrot_training - Step 456: {'lr': 0.000114, 'samples': 87744, 'steps': 456, 'loss/train': 8.803585052490234} +01/27/2022 19:22:36 - INFO - codeparrot_training - Step 457: {'lr': 0.00011425000000000001, 'samples': 87936, 'steps': 457, 'loss/train': 8.374651908874512} +01/27/2022 19:22:40 - INFO - codeparrot_training - Step 458: {'lr': 0.0001145, 'samples': 88128, 'steps': 458, 'loss/train': 8.781396389007568} +01/27/2022 19:22:44 - INFO - codeparrot_training - Step 459: {'lr': 0.00011475000000000001, 'samples': 88320, 'steps': 459, 'loss/train': 8.592355012893677} +01/27/2022 19:22:49 - INFO - codeparrot_training - Step 460: {'lr': 0.000115, 'samples': 88512, 'steps': 460, 'loss/train': 7.943728923797607} +01/27/2022 19:22:54 - INFO - codeparrot_training - Step 461: {'lr': 0.00011525000000000001, 'samples': 88704, 'steps': 461, 'loss/train': 8.678408145904541} +01/27/2022 19:22:58 - INFO - codeparrot_training - Step 462: {'lr': 0.0001155, 'samples': 88896, 'steps': 462, 'loss/train': 10.198154211044312} +01/27/2022 19:23:02 - INFO - codeparrot_training - Step 463: {'lr': 0.00011575000000000001, 'samples': 89088, 'steps': 463, 'loss/train': 8.712399244308472} +01/27/2022 19:23:06 - INFO - codeparrot_training - Step 464: {'lr': 0.00011600000000000001, 'samples': 89280, 'steps': 464, 'loss/train': 8.593222618103027} +01/27/2022 19:23:11 - INFO - codeparrot_training - Step 465: {'lr': 0.00011625, 'samples': 89472, 'steps': 465, 'loss/train': 8.657633543014526} +01/27/2022 19:23:15 - INFO - codeparrot_training - Step 466: {'lr': 0.00011650000000000001, 'samples': 89664, 'steps': 466, 'loss/train': 8.809774160385132} +01/27/2022 19:23:19 - INFO - codeparrot_training - Step 467: {'lr': 0.00011675, 'samples': 89856, 'steps': 467, 'loss/train': 8.131398439407349} +01/27/2022 19:23:24 - INFO - codeparrot_training - Step 468: {'lr': 0.00011700000000000001, 'samples': 90048, 'steps': 468, 'loss/train': 8.037885189056396} +01/27/2022 19:23:28 - INFO - codeparrot_training - Step 469: {'lr': 0.00011724999999999999, 'samples': 90240, 'steps': 469, 'loss/train': 9.223987340927124} +01/27/2022 19:23:34 - INFO - codeparrot_training - Step 470: {'lr': 0.0001175, 'samples': 90432, 'steps': 470, 'loss/train': 8.752360582351685} +01/27/2022 19:23:38 - INFO - codeparrot_training - Step 471: {'lr': 0.00011775, 'samples': 90624, 'steps': 471, 'loss/train': 8.001303434371948} +01/27/2022 19:23:42 - INFO - codeparrot_training - Step 472: {'lr': 0.000118, 'samples': 90816, 'steps': 472, 'loss/train': 8.158232688903809} +01/27/2022 19:23:46 - INFO - codeparrot_training - Step 473: {'lr': 0.00011825, 'samples': 91008, 'steps': 473, 'loss/train': 7.7227113246917725} +01/27/2022 19:23:50 - INFO - codeparrot_training - Step 474: {'lr': 0.0001185, 'samples': 91200, 'steps': 474, 'loss/train': 8.60695481300354} +01/27/2022 19:23:55 - INFO - codeparrot_training - Step 475: {'lr': 0.00011875, 'samples': 91392, 'steps': 475, 'loss/train': 8.559202909469604} +01/27/2022 19:24:00 - INFO - codeparrot_training - Step 476: {'lr': 0.00011899999999999999, 'samples': 91584, 'steps': 476, 'loss/train': 8.1037015914917} +01/27/2022 19:24:04 - INFO - codeparrot_training - Step 477: {'lr': 0.00011925, 'samples': 91776, 'steps': 477, 'loss/train': 8.52787184715271} +01/27/2022 19:24:08 - INFO - codeparrot_training - Step 478: {'lr': 0.00011949999999999999, 'samples': 91968, 'steps': 478, 'loss/train': 9.877085208892822} +01/27/2022 19:24:12 - INFO - codeparrot_training - Step 479: {'lr': 0.00011975, 'samples': 92160, 'steps': 479, 'loss/train': 8.822041511535645} +01/27/2022 19:24:17 - INFO - codeparrot_training - Step 480: {'lr': 0.00012, 'samples': 92352, 'steps': 480, 'loss/train': 8.334150552749634} +01/27/2022 19:24:21 - INFO - codeparrot_training - Step 481: {'lr': 0.00012025, 'samples': 92544, 'steps': 481, 'loss/train': 8.455609560012817} +01/27/2022 19:24:25 - INFO - codeparrot_training - Step 482: {'lr': 0.0001205, 'samples': 92736, 'steps': 482, 'loss/train': 9.207207441329956} +01/27/2022 19:24:30 - INFO - codeparrot_training - Step 483: {'lr': 0.00012075, 'samples': 92928, 'steps': 483, 'loss/train': 8.488374710083008} +01/27/2022 19:24:34 - INFO - codeparrot_training - Step 484: {'lr': 0.000121, 'samples': 93120, 'steps': 484, 'loss/train': 8.650288581848145} +01/27/2022 19:24:40 - INFO - codeparrot_training - Step 485: {'lr': 0.00012124999999999999, 'samples': 93312, 'steps': 485, 'loss/train': 9.08907437324524} +01/27/2022 19:24:44 - INFO - codeparrot_training - Step 486: {'lr': 0.0001215, 'samples': 93504, 'steps': 486, 'loss/train': 8.138011693954468} +01/27/2022 19:24:48 - INFO - codeparrot_training - Step 487: {'lr': 0.00012175, 'samples': 93696, 'steps': 487, 'loss/train': 8.505961418151855} +01/27/2022 19:24:52 - INFO - codeparrot_training - Step 488: {'lr': 0.000122, 'samples': 93888, 'steps': 488, 'loss/train': 8.104951858520508} +01/27/2022 19:24:56 - INFO - codeparrot_training - Step 489: {'lr': 0.00012225, 'samples': 94080, 'steps': 489, 'loss/train': 8.997678995132446} +01/27/2022 19:25:01 - INFO - codeparrot_training - Step 490: {'lr': 0.0001225, 'samples': 94272, 'steps': 490, 'loss/train': 9.484206676483154} +01/27/2022 19:25:05 - INFO - codeparrot_training - Step 491: {'lr': 0.00012275, 'samples': 94464, 'steps': 491, 'loss/train': 8.445098161697388} +01/27/2022 19:25:10 - INFO - codeparrot_training - Step 492: {'lr': 0.000123, 'samples': 94656, 'steps': 492, 'loss/train': 9.071303129196167} +01/27/2022 19:25:14 - INFO - codeparrot_training - Step 493: {'lr': 0.00012325000000000001, 'samples': 94848, 'steps': 493, 'loss/train': 9.275761842727661} +01/27/2022 19:25:18 - INFO - codeparrot_training - Step 494: {'lr': 0.0001235, 'samples': 95040, 'steps': 494, 'loss/train': 7.959945917129517} +01/27/2022 19:25:24 - INFO - codeparrot_training - Step 495: {'lr': 0.00012375, 'samples': 95232, 'steps': 495, 'loss/train': 8.65991735458374} +01/27/2022 19:25:28 - INFO - codeparrot_training - Step 496: {'lr': 0.000124, 'samples': 95424, 'steps': 496, 'loss/train': 7.939209222793579} +01/27/2022 19:25:32 - INFO - codeparrot_training - Step 497: {'lr': 0.00012425, 'samples': 95616, 'steps': 497, 'loss/train': 8.233192920684814} +01/27/2022 19:25:36 - INFO - codeparrot_training - Step 498: {'lr': 0.0001245, 'samples': 95808, 'steps': 498, 'loss/train': 8.595185995101929} +01/27/2022 19:25:41 - INFO - codeparrot_training - Step 499: {'lr': 0.00012475, 'samples': 96000, 'steps': 499, 'loss/train': 8.485043048858643} +01/27/2022 19:25:46 - INFO - codeparrot_training - Step 500: {'lr': 0.000125, 'samples': 96192, 'steps': 500, 'loss/train': 8.449069261550903} +01/27/2022 19:25:50 - INFO - codeparrot_training - Step 501: {'lr': 0.00012525, 'samples': 96384, 'steps': 501, 'loss/train': 8.081286907196045} +01/27/2022 19:25:54 - INFO - codeparrot_training - Step 502: {'lr': 0.00012550000000000001, 'samples': 96576, 'steps': 502, 'loss/train': 7.730855941772461} +01/27/2022 19:25:58 - INFO - codeparrot_training - Step 503: {'lr': 0.00012575, 'samples': 96768, 'steps': 503, 'loss/train': 8.047250032424927} +01/27/2022 19:26:02 - INFO - codeparrot_training - Step 504: {'lr': 0.000126, 'samples': 96960, 'steps': 504, 'loss/train': 8.251461267471313} +01/27/2022 19:26:07 - INFO - codeparrot_training - Step 505: {'lr': 0.00012625, 'samples': 97152, 'steps': 505, 'loss/train': 8.245980262756348} +01/27/2022 19:26:12 - INFO - codeparrot_training - Step 506: {'lr': 0.0001265, 'samples': 97344, 'steps': 506, 'loss/train': 9.07162356376648} +01/27/2022 19:26:16 - INFO - codeparrot_training - Step 507: {'lr': 0.00012675, 'samples': 97536, 'steps': 507, 'loss/train': 8.477697372436523} +01/27/2022 19:26:20 - INFO - codeparrot_training - Step 508: {'lr': 0.000127, 'samples': 97728, 'steps': 508, 'loss/train': 8.715029954910278} +01/27/2022 19:26:24 - INFO - codeparrot_training - Step 509: {'lr': 0.00012725, 'samples': 97920, 'steps': 509, 'loss/train': 8.291935443878174} +01/27/2022 19:26:30 - INFO - codeparrot_training - Step 510: {'lr': 0.0001275, 'samples': 98112, 'steps': 510, 'loss/train': 8.807954549789429} +01/27/2022 19:26:34 - INFO - codeparrot_training - Step 511: {'lr': 0.00012775000000000002, 'samples': 98304, 'steps': 511, 'loss/train': 7.690762996673584} +01/27/2022 19:26:39 - INFO - codeparrot_training - Step 512: {'lr': 0.000128, 'samples': 98496, 'steps': 512, 'loss/train': 9.079181671142578} +01/27/2022 19:26:43 - INFO - codeparrot_training - Step 513: {'lr': 0.00012825, 'samples': 98688, 'steps': 513, 'loss/train': 8.444893598556519} +01/27/2022 19:26:47 - INFO - codeparrot_training - Step 514: {'lr': 0.0001285, 'samples': 98880, 'steps': 514, 'loss/train': 8.17026972770691} +01/27/2022 19:26:52 - INFO - codeparrot_training - Step 515: {'lr': 0.00012875, 'samples': 99072, 'steps': 515, 'loss/train': 8.681805610656738} +01/27/2022 19:26:56 - INFO - codeparrot_training - Step 516: {'lr': 0.00012900000000000002, 'samples': 99264, 'steps': 516, 'loss/train': 8.015620708465576} +01/27/2022 19:27:00 - INFO - codeparrot_training - Step 517: {'lr': 0.00012925, 'samples': 99456, 'steps': 517, 'loss/train': 8.866118431091309} +01/27/2022 19:27:04 - INFO - codeparrot_training - Step 518: {'lr': 0.0001295, 'samples': 99648, 'steps': 518, 'loss/train': 7.207820892333984} +01/27/2022 19:27:09 - INFO - codeparrot_training - Step 519: {'lr': 0.00012975, 'samples': 99840, 'steps': 519, 'loss/train': 8.88998293876648} +01/27/2022 19:27:14 - INFO - codeparrot_training - Step 520: {'lr': 0.00013000000000000002, 'samples': 100032, 'steps': 520, 'loss/train': 8.746418237686157} +01/27/2022 19:27:19 - INFO - codeparrot_training - Step 521: {'lr': 0.00013025, 'samples': 100224, 'steps': 521, 'loss/train': 8.946112632751465} +01/27/2022 19:27:23 - INFO - codeparrot_training - Step 522: {'lr': 0.0001305, 'samples': 100416, 'steps': 522, 'loss/train': 8.354847192764282} +01/27/2022 19:27:27 - INFO - codeparrot_training - Step 523: {'lr': 0.00013075, 'samples': 100608, 'steps': 523, 'loss/train': 8.326250553131104} +01/27/2022 19:27:31 - INFO - codeparrot_training - Step 524: {'lr': 0.000131, 'samples': 100800, 'steps': 524, 'loss/train': 7.511366128921509} +01/27/2022 19:27:36 - INFO - codeparrot_training - Step 525: {'lr': 0.00013125000000000002, 'samples': 100992, 'steps': 525, 'loss/train': 7.890795707702637} +01/27/2022 19:27:40 - INFO - codeparrot_training - Step 526: {'lr': 0.0001315, 'samples': 101184, 'steps': 526, 'loss/train': 8.375786304473877} +01/27/2022 19:27:45 - INFO - codeparrot_training - Step 527: {'lr': 0.00013175, 'samples': 101376, 'steps': 527, 'loss/train': 8.021414995193481} +01/27/2022 19:27:49 - INFO - codeparrot_training - Step 528: {'lr': 0.000132, 'samples': 101568, 'steps': 528, 'loss/train': 7.28090500831604} +01/27/2022 19:27:53 - INFO - codeparrot_training - Step 529: {'lr': 0.00013225000000000002, 'samples': 101760, 'steps': 529, 'loss/train': 8.963454008102417} +01/27/2022 19:27:58 - INFO - codeparrot_training - Step 530: {'lr': 0.00013250000000000002, 'samples': 101952, 'steps': 530, 'loss/train': 8.001948595046997} +01/27/2022 19:28:02 - INFO - codeparrot_training - Step 531: {'lr': 0.00013275, 'samples': 102144, 'steps': 531, 'loss/train': 9.026639699935913} +01/27/2022 19:28:06 - INFO - codeparrot_training - Step 532: {'lr': 0.000133, 'samples': 102336, 'steps': 532, 'loss/train': 8.229212522506714} +01/27/2022 19:28:10 - INFO - codeparrot_training - Step 533: {'lr': 0.00013325, 'samples': 102528, 'steps': 533, 'loss/train': 8.387352705001831} +01/27/2022 19:28:15 - INFO - codeparrot_training - Step 534: {'lr': 0.00013350000000000002, 'samples': 102720, 'steps': 534, 'loss/train': 8.451747179031372} +01/27/2022 19:28:20 - INFO - codeparrot_training - Step 535: {'lr': 0.00013375, 'samples': 102912, 'steps': 535, 'loss/train': 7.593340873718262} +01/27/2022 19:28:24 - INFO - codeparrot_training - Step 536: {'lr': 0.000134, 'samples': 103104, 'steps': 536, 'loss/train': 8.485268354415894} +01/27/2022 19:28:28 - INFO - codeparrot_training - Step 537: {'lr': 0.00013425, 'samples': 103296, 'steps': 537, 'loss/train': 8.370349645614624} +01/27/2022 19:28:32 - INFO - codeparrot_training - Step 538: {'lr': 0.00013450000000000002, 'samples': 103488, 'steps': 538, 'loss/train': 8.465364217758179} +01/27/2022 19:28:36 - INFO - codeparrot_training - Step 539: {'lr': 0.00013475000000000002, 'samples': 103680, 'steps': 539, 'loss/train': 8.298682451248169} +01/27/2022 19:28:42 - INFO - codeparrot_training - Step 540: {'lr': 0.000135, 'samples': 103872, 'steps': 540, 'loss/train': 8.758976697921753} +01/27/2022 19:28:47 - INFO - codeparrot_training - Step 541: {'lr': 0.00013525, 'samples': 104064, 'steps': 541, 'loss/train': 8.58174991607666} +01/27/2022 19:28:51 - INFO - codeparrot_training - Step 542: {'lr': 0.00013550000000000001, 'samples': 104256, 'steps': 542, 'loss/train': 8.730417251586914} +01/27/2022 19:28:55 - INFO - codeparrot_training - Step 543: {'lr': 0.00013575000000000002, 'samples': 104448, 'steps': 543, 'loss/train': 8.091321229934692} +01/27/2022 19:28:59 - INFO - codeparrot_training - Step 544: {'lr': 0.00013600000000000003, 'samples': 104640, 'steps': 544, 'loss/train': 8.639739990234375} +01/27/2022 19:29:04 - INFO - codeparrot_training - Step 545: {'lr': 0.00013625, 'samples': 104832, 'steps': 545, 'loss/train': 8.174670696258545} +01/27/2022 19:29:08 - INFO - codeparrot_training - Step 546: {'lr': 0.0001365, 'samples': 105024, 'steps': 546, 'loss/train': 8.93849802017212} +01/27/2022 19:29:12 - INFO - codeparrot_training - Step 547: {'lr': 0.00013675000000000002, 'samples': 105216, 'steps': 547, 'loss/train': 7.690248012542725} +01/27/2022 19:29:17 - INFO - codeparrot_training - Step 548: {'lr': 0.00013700000000000002, 'samples': 105408, 'steps': 548, 'loss/train': 8.276523113250732} +01/27/2022 19:29:21 - INFO - codeparrot_training - Step 549: {'lr': 0.00013725, 'samples': 105600, 'steps': 549, 'loss/train': 8.273192882537842} +01/27/2022 19:29:26 - INFO - codeparrot_training - Step 550: {'lr': 0.0001375, 'samples': 105792, 'steps': 550, 'loss/train': 8.364015340805054} +01/27/2022 19:29:30 - INFO - codeparrot_training - Step 551: {'lr': 0.00013775000000000001, 'samples': 105984, 'steps': 551, 'loss/train': 9.108482837677002} +01/27/2022 19:29:34 - INFO - codeparrot_training - Step 552: {'lr': 0.00013800000000000002, 'samples': 106176, 'steps': 552, 'loss/train': 8.416153907775879} +01/27/2022 19:29:38 - INFO - codeparrot_training - Step 553: {'lr': 0.00013825000000000003, 'samples': 106368, 'steps': 553, 'loss/train': 8.693472862243652} +01/27/2022 19:29:42 - INFO - codeparrot_training - Step 554: {'lr': 0.0001385, 'samples': 106560, 'steps': 554, 'loss/train': 8.259191751480103} +01/27/2022 19:29:48 - INFO - codeparrot_training - Step 555: {'lr': 0.00013875, 'samples': 106752, 'steps': 555, 'loss/train': 8.928863525390625} +01/27/2022 19:29:52 - INFO - codeparrot_training - Step 556: {'lr': 0.00013900000000000002, 'samples': 106944, 'steps': 556, 'loss/train': 8.77770209312439} +01/27/2022 19:29:57 - INFO - codeparrot_training - Step 557: {'lr': 0.00013925000000000002, 'samples': 107136, 'steps': 557, 'loss/train': 7.560959100723267} +01/27/2022 19:30:01 - INFO - codeparrot_training - Step 558: {'lr': 0.0001395, 'samples': 107328, 'steps': 558, 'loss/train': 8.654487133026123} +01/27/2022 19:30:05 - INFO - codeparrot_training - Step 559: {'lr': 0.00013975, 'samples': 107520, 'steps': 559, 'loss/train': 8.728206396102905} +01/27/2022 19:30:10 - INFO - codeparrot_training - Step 560: {'lr': 0.00014000000000000001, 'samples': 107712, 'steps': 560, 'loss/train': 9.231706380844116} +01/27/2022 19:30:14 - INFO - codeparrot_training - Step 561: {'lr': 0.00014025000000000002, 'samples': 107904, 'steps': 561, 'loss/train': 8.957905769348145} +01/27/2022 19:30:18 - INFO - codeparrot_training - Step 562: {'lr': 0.00014050000000000003, 'samples': 108096, 'steps': 562, 'loss/train': 8.754714488983154} +01/27/2022 19:30:22 - INFO - codeparrot_training - Step 563: {'lr': 0.00014074999999999998, 'samples': 108288, 'steps': 563, 'loss/train': 8.369371891021729} +01/27/2022 19:30:27 - INFO - codeparrot_training - Step 564: {'lr': 0.00014099999999999998, 'samples': 108480, 'steps': 564, 'loss/train': 8.07359004020691} +01/27/2022 19:30:32 - INFO - codeparrot_training - Step 565: {'lr': 0.00014125, 'samples': 108672, 'steps': 565, 'loss/train': 9.256146669387817} +01/27/2022 19:30:36 - INFO - codeparrot_training - Step 566: {'lr': 0.0001415, 'samples': 108864, 'steps': 566, 'loss/train': 4.473942518234253} +01/27/2022 19:30:40 - INFO - codeparrot_training - Step 567: {'lr': 0.00014175, 'samples': 109056, 'steps': 567, 'loss/train': 7.521366119384766} +01/27/2022 19:30:44 - INFO - codeparrot_training - Step 568: {'lr': 0.00014199999999999998, 'samples': 109248, 'steps': 568, 'loss/train': 7.821649789810181} +01/27/2022 19:30:48 - INFO - codeparrot_training - Step 569: {'lr': 0.00014225, 'samples': 109440, 'steps': 569, 'loss/train': 9.155828475952148} +01/27/2022 19:30:54 - INFO - codeparrot_training - Step 570: {'lr': 0.0001425, 'samples': 109632, 'steps': 570, 'loss/train': 8.23076605796814} +01/27/2022 19:30:58 - INFO - codeparrot_training - Step 571: {'lr': 0.00014275, 'samples': 109824, 'steps': 571, 'loss/train': 8.648569107055664} +01/27/2022 19:31:03 - INFO - codeparrot_training - Step 572: {'lr': 0.00014299999999999998, 'samples': 110016, 'steps': 572, 'loss/train': 8.473631858825684} +01/27/2022 19:31:07 - INFO - codeparrot_training - Step 573: {'lr': 0.00014324999999999999, 'samples': 110208, 'steps': 573, 'loss/train': 6.631492853164673} +01/27/2022 19:31:12 - INFO - codeparrot_training - Step 574: {'lr': 0.0001435, 'samples': 110400, 'steps': 574, 'loss/train': 8.640091180801392} +01/27/2022 19:31:16 - INFO - codeparrot_training - Step 575: {'lr': 0.00014375, 'samples': 110592, 'steps': 575, 'loss/train': 8.769315004348755} +01/27/2022 19:31:20 - INFO - codeparrot_training - Step 576: {'lr': 0.000144, 'samples': 110784, 'steps': 576, 'loss/train': 8.279324054718018} +01/27/2022 19:31:25 - INFO - codeparrot_training - Step 577: {'lr': 0.00014424999999999998, 'samples': 110976, 'steps': 577, 'loss/train': 9.597347259521484} +01/27/2022 19:31:29 - INFO - codeparrot_training - Step 578: {'lr': 0.0001445, 'samples': 111168, 'steps': 578, 'loss/train': 8.302575588226318} +01/27/2022 19:31:34 - INFO - codeparrot_training - Step 579: {'lr': 0.00014475, 'samples': 111360, 'steps': 579, 'loss/train': 7.874625205993652} +01/27/2022 19:31:39 - INFO - codeparrot_training - Step 580: {'lr': 0.000145, 'samples': 111552, 'steps': 580, 'loss/train': 8.314260721206665} +01/27/2022 19:31:43 - INFO - codeparrot_training - Step 581: {'lr': 0.00014524999999999998, 'samples': 111744, 'steps': 581, 'loss/train': 8.078325748443604} +01/27/2022 19:31:47 - INFO - codeparrot_training - Step 582: {'lr': 0.00014549999999999999, 'samples': 111936, 'steps': 582, 'loss/train': 8.919520139694214} +01/27/2022 19:31:51 - INFO - codeparrot_training - Step 583: {'lr': 0.00014575, 'samples': 112128, 'steps': 583, 'loss/train': 8.303371667861938} +01/27/2022 19:31:55 - INFO - codeparrot_training - Step 584: {'lr': 0.000146, 'samples': 112320, 'steps': 584, 'loss/train': 7.914836883544922} +01/27/2022 19:32:01 - INFO - codeparrot_training - Step 585: {'lr': 0.00014625, 'samples': 112512, 'steps': 585, 'loss/train': 8.660935878753662} +01/27/2022 19:32:05 - INFO - codeparrot_training - Step 586: {'lr': 0.00014649999999999998, 'samples': 112704, 'steps': 586, 'loss/train': 7.782023906707764} +01/27/2022 19:32:09 - INFO - codeparrot_training - Step 587: {'lr': 0.00014675, 'samples': 112896, 'steps': 587, 'loss/train': 7.973663806915283} +01/27/2022 19:32:13 - INFO - codeparrot_training - Step 588: {'lr': 0.000147, 'samples': 113088, 'steps': 588, 'loss/train': 8.153911828994751} +01/27/2022 19:32:18 - INFO - codeparrot_training - Step 589: {'lr': 0.00014725, 'samples': 113280, 'steps': 589, 'loss/train': 8.781780481338501} +01/27/2022 19:32:23 - INFO - codeparrot_training - Step 590: {'lr': 0.0001475, 'samples': 113472, 'steps': 590, 'loss/train': 8.209387063980103} +01/27/2022 19:32:27 - INFO - codeparrot_training - Step 591: {'lr': 0.00014774999999999999, 'samples': 113664, 'steps': 591, 'loss/train': 7.8359527587890625} +01/27/2022 19:32:31 - INFO - codeparrot_training - Step 592: {'lr': 0.000148, 'samples': 113856, 'steps': 592, 'loss/train': 8.229139566421509} +01/27/2022 19:32:35 - INFO - codeparrot_training - Step 593: {'lr': 0.00014825, 'samples': 114048, 'steps': 593, 'loss/train': 8.467285394668579} +01/27/2022 19:32:40 - INFO - codeparrot_training - Step 594: {'lr': 0.0001485, 'samples': 114240, 'steps': 594, 'loss/train': 7.223497152328491} +01/27/2022 19:32:44 - INFO - codeparrot_training - Step 595: {'lr': 0.00014874999999999998, 'samples': 114432, 'steps': 595, 'loss/train': 8.965587615966797} +01/27/2022 19:32:49 - INFO - codeparrot_training - Step 596: {'lr': 0.000149, 'samples': 114624, 'steps': 596, 'loss/train': 8.973945379257202} +01/27/2022 19:32:53 - INFO - codeparrot_training - Step 597: {'lr': 0.00014925, 'samples': 114816, 'steps': 597, 'loss/train': 8.514985799789429} +01/27/2022 19:32:57 - INFO - codeparrot_training - Step 598: {'lr': 0.0001495, 'samples': 115008, 'steps': 598, 'loss/train': 9.305945634841919} +01/27/2022 19:33:03 - INFO - codeparrot_training - Step 599: {'lr': 0.00014975, 'samples': 115200, 'steps': 599, 'loss/train': 7.214632987976074} +01/27/2022 19:33:07 - INFO - codeparrot_training - Step 600: {'lr': 0.00015, 'samples': 115392, 'steps': 600, 'loss/train': 8.336686849594116} +01/27/2022 19:33:11 - INFO - codeparrot_training - Step 601: {'lr': 0.00015025, 'samples': 115584, 'steps': 601, 'loss/train': 7.75456166267395} +01/27/2022 19:33:15 - INFO - codeparrot_training - Step 602: {'lr': 0.0001505, 'samples': 115776, 'steps': 602, 'loss/train': 8.072105884552002} +01/27/2022 19:33:20 - INFO - codeparrot_training - Step 603: {'lr': 0.00015075, 'samples': 115968, 'steps': 603, 'loss/train': 8.81142282485962} +01/27/2022 19:33:25 - INFO - codeparrot_training - Step 604: {'lr': 0.000151, 'samples': 116160, 'steps': 604, 'loss/train': 7.754583120346069} +01/27/2022 19:33:29 - INFO - codeparrot_training - Step 605: {'lr': 0.00015125, 'samples': 116352, 'steps': 605, 'loss/train': 7.993920564651489} +01/27/2022 19:33:33 - INFO - codeparrot_training - Step 606: {'lr': 0.0001515, 'samples': 116544, 'steps': 606, 'loss/train': 7.54375147819519} +01/27/2022 19:33:37 - INFO - codeparrot_training - Step 607: {'lr': 0.00015175, 'samples': 116736, 'steps': 607, 'loss/train': 8.539509057998657} +01/27/2022 19:33:41 - INFO - codeparrot_training - Step 608: {'lr': 0.000152, 'samples': 116928, 'steps': 608, 'loss/train': 7.788200855255127} +01/27/2022 19:33:46 - INFO - codeparrot_training - Step 609: {'lr': 0.00015225, 'samples': 117120, 'steps': 609, 'loss/train': 8.278229713439941} +01/27/2022 19:33:51 - INFO - codeparrot_training - Step 610: {'lr': 0.0001525, 'samples': 117312, 'steps': 610, 'loss/train': 8.791933536529541} +01/27/2022 19:33:55 - INFO - codeparrot_training - Step 611: {'lr': 0.00015275, 'samples': 117504, 'steps': 611, 'loss/train': 8.514147520065308} +01/27/2022 19:33:59 - INFO - codeparrot_training - Step 612: {'lr': 0.000153, 'samples': 117696, 'steps': 612, 'loss/train': 8.430699348449707} +01/27/2022 19:34:03 - INFO - codeparrot_training - Step 613: {'lr': 0.00015325, 'samples': 117888, 'steps': 613, 'loss/train': 8.370607137680054} +01/27/2022 19:34:09 - INFO - codeparrot_training - Step 614: {'lr': 0.0001535, 'samples': 118080, 'steps': 614, 'loss/train': 8.25789499282837} +01/27/2022 19:34:13 - INFO - codeparrot_training - Step 615: {'lr': 0.00015375, 'samples': 118272, 'steps': 615, 'loss/train': 9.28782606124878} +01/27/2022 19:34:17 - INFO - codeparrot_training - Step 616: {'lr': 0.000154, 'samples': 118464, 'steps': 616, 'loss/train': 7.710235118865967} +01/27/2022 19:34:21 - INFO - codeparrot_training - Step 617: {'lr': 0.00015425, 'samples': 118656, 'steps': 617, 'loss/train': 9.011675834655762} +01/27/2022 19:34:26 - INFO - codeparrot_training - Step 618: {'lr': 0.00015450000000000001, 'samples': 118848, 'steps': 618, 'loss/train': 8.131665229797363} +01/27/2022 19:34:31 - INFO - codeparrot_training - Step 619: {'lr': 0.00015475, 'samples': 119040, 'steps': 619, 'loss/train': 7.657416343688965} +01/27/2022 19:34:35 - INFO - codeparrot_training - Step 620: {'lr': 0.000155, 'samples': 119232, 'steps': 620, 'loss/train': 8.05387258529663} +01/27/2022 19:34:39 - INFO - codeparrot_training - Step 621: {'lr': 0.00015525, 'samples': 119424, 'steps': 621, 'loss/train': 8.192591428756714} +01/27/2022 19:34:43 - INFO - codeparrot_training - Step 622: {'lr': 0.0001555, 'samples': 119616, 'steps': 622, 'loss/train': 8.283280849456787} +01/27/2022 19:34:47 - INFO - codeparrot_training - Step 623: {'lr': 0.00015575, 'samples': 119808, 'steps': 623, 'loss/train': 8.495567321777344} +01/27/2022 19:34:53 - INFO - codeparrot_training - Step 624: {'lr': 0.000156, 'samples': 120000, 'steps': 624, 'loss/train': 8.304754257202148} +01/27/2022 19:34:58 - INFO - codeparrot_training - Step 625: {'lr': 0.00015625, 'samples': 120192, 'steps': 625, 'loss/train': 8.422377347946167} +01/27/2022 19:35:02 - INFO - codeparrot_training - Step 626: {'lr': 0.0001565, 'samples': 120384, 'steps': 626, 'loss/train': 7.985602140426636} +01/27/2022 19:35:06 - INFO - codeparrot_training - Step 627: {'lr': 0.00015675000000000002, 'samples': 120576, 'steps': 627, 'loss/train': 7.165228843688965} +01/27/2022 19:35:10 - INFO - codeparrot_training - Step 628: {'lr': 0.000157, 'samples': 120768, 'steps': 628, 'loss/train': 8.297940015792847} +01/27/2022 19:35:15 - INFO - codeparrot_training - Step 629: {'lr': 0.00015725, 'samples': 120960, 'steps': 629, 'loss/train': 8.375863552093506} +01/27/2022 19:35:19 - INFO - codeparrot_training - Step 630: {'lr': 0.0001575, 'samples': 121152, 'steps': 630, 'loss/train': 9.098508596420288} +01/27/2022 19:35:23 - INFO - codeparrot_training - Step 631: {'lr': 0.00015775, 'samples': 121344, 'steps': 631, 'loss/train': 8.307096004486084} +01/27/2022 19:35:28 - INFO - codeparrot_training - Step 632: {'lr': 0.000158, 'samples': 121536, 'steps': 632, 'loss/train': 7.5440497398376465} +01/27/2022 19:35:32 - INFO - codeparrot_training - Step 633: {'lr': 0.00015825, 'samples': 121728, 'steps': 633, 'loss/train': 8.483729124069214} +01/27/2022 19:35:37 - INFO - codeparrot_training - Step 634: {'lr': 0.0001585, 'samples': 121920, 'steps': 634, 'loss/train': 8.692340612411499} +01/27/2022 19:35:41 - INFO - codeparrot_training - Step 635: {'lr': 0.00015875, 'samples': 122112, 'steps': 635, 'loss/train': 8.061896324157715} +01/27/2022 19:35:45 - INFO - codeparrot_training - Step 636: {'lr': 0.00015900000000000002, 'samples': 122304, 'steps': 636, 'loss/train': 8.079899311065674} +01/27/2022 19:35:49 - INFO - codeparrot_training - Step 637: {'lr': 0.00015925, 'samples': 122496, 'steps': 637, 'loss/train': 7.544781446456909} +01/27/2022 19:35:53 - INFO - codeparrot_training - Step 638: {'lr': 0.0001595, 'samples': 122688, 'steps': 638, 'loss/train': 9.011221647262573} +01/27/2022 19:35:59 - INFO - codeparrot_training - Step 639: {'lr': 0.00015975, 'samples': 122880, 'steps': 639, 'loss/train': 8.249982833862305} +01/27/2022 19:36:03 - INFO - codeparrot_training - Step 640: {'lr': 0.00016, 'samples': 123072, 'steps': 640, 'loss/train': 7.240501642227173} +01/27/2022 19:36:07 - INFO - codeparrot_training - Step 641: {'lr': 0.00016025000000000002, 'samples': 123264, 'steps': 641, 'loss/train': 8.318994998931885} +01/27/2022 19:36:12 - INFO - codeparrot_training - Step 642: {'lr': 0.0001605, 'samples': 123456, 'steps': 642, 'loss/train': 8.296253442764282} +01/27/2022 19:36:16 - INFO - codeparrot_training - Step 643: {'lr': 0.00016075, 'samples': 123648, 'steps': 643, 'loss/train': 9.265507936477661} +01/27/2022 19:36:21 - INFO - codeparrot_training - Step 644: {'lr': 0.000161, 'samples': 123840, 'steps': 644, 'loss/train': 8.830106019973755} +01/27/2022 19:36:25 - INFO - codeparrot_training - Step 645: {'lr': 0.00016125000000000002, 'samples': 124032, 'steps': 645, 'loss/train': 9.4315767288208} +01/27/2022 19:36:29 - INFO - codeparrot_training - Step 646: {'lr': 0.0001615, 'samples': 124224, 'steps': 646, 'loss/train': 5.333669543266296} +01/27/2022 19:36:33 - INFO - codeparrot_training - Step 647: {'lr': 0.00016175, 'samples': 124416, 'steps': 647, 'loss/train': 7.553824424743652} +01/27/2022 19:36:38 - INFO - codeparrot_training - Step 648: {'lr': 0.000162, 'samples': 124608, 'steps': 648, 'loss/train': 9.619696140289307} +01/27/2022 19:36:43 - INFO - codeparrot_training - Step 649: {'lr': 0.00016225000000000001, 'samples': 124800, 'steps': 649, 'loss/train': 8.453848600387573} +01/27/2022 19:36:47 - INFO - codeparrot_training - Step 650: {'lr': 0.00016250000000000002, 'samples': 124992, 'steps': 650, 'loss/train': 7.417596101760864} +01/27/2022 19:36:51 - INFO - codeparrot_training - Step 651: {'lr': 0.00016275, 'samples': 125184, 'steps': 651, 'loss/train': 7.024638891220093} +01/27/2022 19:36:55 - INFO - codeparrot_training - Step 652: {'lr': 0.000163, 'samples': 125376, 'steps': 652, 'loss/train': 8.656749486923218} +01/27/2022 19:37:00 - INFO - codeparrot_training - Step 653: {'lr': 0.00016325, 'samples': 125568, 'steps': 653, 'loss/train': 8.449368953704834} +01/27/2022 19:37:06 - INFO - codeparrot_training - Step 654: {'lr': 0.00016350000000000002, 'samples': 125760, 'steps': 654, 'loss/train': 8.034994125366211} +01/27/2022 19:37:10 - INFO - codeparrot_training - Step 655: {'lr': 0.00016375000000000002, 'samples': 125952, 'steps': 655, 'loss/train': 9.132595539093018} +01/27/2022 19:37:14 - INFO - codeparrot_training - Step 656: {'lr': 0.000164, 'samples': 126144, 'steps': 656, 'loss/train': 7.8157031536102295} +01/27/2022 19:37:18 - INFO - codeparrot_training - Step 657: {'lr': 0.00016425, 'samples': 126336, 'steps': 657, 'loss/train': 9.362704038619995} +01/27/2022 19:37:22 - INFO - codeparrot_training - Step 658: {'lr': 0.00016450000000000001, 'samples': 126528, 'steps': 658, 'loss/train': 8.65830659866333} +01/27/2022 19:37:27 - INFO - codeparrot_training - Step 659: {'lr': 0.00016475000000000002, 'samples': 126720, 'steps': 659, 'loss/train': 7.5368242263793945} +01/27/2022 19:37:31 - INFO - codeparrot_training - Step 660: {'lr': 0.000165, 'samples': 126912, 'steps': 660, 'loss/train': 7.6077353954315186} +01/27/2022 19:37:36 - INFO - codeparrot_training - Step 661: {'lr': 0.00016525, 'samples': 127104, 'steps': 661, 'loss/train': 7.764186143875122} +01/27/2022 19:37:40 - INFO - codeparrot_training - Step 662: {'lr': 0.0001655, 'samples': 127296, 'steps': 662, 'loss/train': 8.5371572971344} +01/27/2022 19:37:44 - INFO - codeparrot_training - Step 663: {'lr': 0.00016575000000000002, 'samples': 127488, 'steps': 663, 'loss/train': 8.563830614089966} +01/27/2022 19:37:49 - INFO - codeparrot_training - Step 664: {'lr': 0.00016600000000000002, 'samples': 127680, 'steps': 664, 'loss/train': 8.0364511013031} +01/27/2022 19:37:53 - INFO - codeparrot_training - Step 665: {'lr': 0.00016625, 'samples': 127872, 'steps': 665, 'loss/train': 7.877760887145996} +01/27/2022 19:37:57 - INFO - codeparrot_training - Step 666: {'lr': 0.0001665, 'samples': 128064, 'steps': 666, 'loss/train': 8.593442916870117} +01/27/2022 19:38:01 - INFO - codeparrot_training - Step 667: {'lr': 0.00016675000000000001, 'samples': 128256, 'steps': 667, 'loss/train': 7.1693115234375} +01/27/2022 19:38:07 - INFO - codeparrot_training - Step 668: {'lr': 0.00016700000000000002, 'samples': 128448, 'steps': 668, 'loss/train': 7.933293342590332} +01/27/2022 19:38:11 - INFO - codeparrot_training - Step 669: {'lr': 0.00016725000000000003, 'samples': 128640, 'steps': 669, 'loss/train': 7.103787660598755} +01/27/2022 19:38:15 - INFO - codeparrot_training - Step 670: {'lr': 0.0001675, 'samples': 128832, 'steps': 670, 'loss/train': 8.280439138412476} +01/27/2022 19:38:20 - INFO - codeparrot_training - Step 671: {'lr': 0.00016775, 'samples': 129024, 'steps': 671, 'loss/train': 8.499544858932495} +01/27/2022 19:38:24 - INFO - codeparrot_training - Step 672: {'lr': 0.00016800000000000002, 'samples': 129216, 'steps': 672, 'loss/train': 8.899912118911743} +01/27/2022 19:38:29 - INFO - codeparrot_training - Step 673: {'lr': 0.00016825000000000002, 'samples': 129408, 'steps': 673, 'loss/train': 8.211901187896729} +01/27/2022 19:38:33 - INFO - codeparrot_training - Step 674: {'lr': 0.0001685, 'samples': 129600, 'steps': 674, 'loss/train': 8.731487274169922} +01/27/2022 19:38:37 - INFO - codeparrot_training - Step 675: {'lr': 0.00016875, 'samples': 129792, 'steps': 675, 'loss/train': 6.801019191741943} +01/27/2022 19:38:42 - INFO - codeparrot_training - Step 676: {'lr': 0.00016900000000000002, 'samples': 129984, 'steps': 676, 'loss/train': 7.723647594451904} +01/27/2022 19:38:46 - INFO - codeparrot_training - Step 677: {'lr': 0.00016925000000000002, 'samples': 130176, 'steps': 677, 'loss/train': 7.8934242725372314} +01/27/2022 19:38:51 - INFO - codeparrot_training - Step 678: {'lr': 0.00016950000000000003, 'samples': 130368, 'steps': 678, 'loss/train': 7.5493855476379395} +01/27/2022 19:38:55 - INFO - codeparrot_training - Step 679: {'lr': 0.00016975, 'samples': 130560, 'steps': 679, 'loss/train': 8.79888367652893} +01/27/2022 19:38:59 - INFO - codeparrot_training - Step 680: {'lr': 0.00017, 'samples': 130752, 'steps': 680, 'loss/train': 7.605967283248901} +01/27/2022 19:39:03 - INFO - codeparrot_training - Step 681: {'lr': 0.00017025000000000002, 'samples': 130944, 'steps': 681, 'loss/train': 5.154707193374634} +01/27/2022 19:39:08 - INFO - codeparrot_training - Step 682: {'lr': 0.00017050000000000002, 'samples': 131136, 'steps': 682, 'loss/train': 8.063627243041992} +01/27/2022 19:39:13 - INFO - codeparrot_training - Step 683: {'lr': 0.00017075, 'samples': 131328, 'steps': 683, 'loss/train': 8.482172012329102} +01/27/2022 19:39:18 - INFO - codeparrot_training - Step 684: {'lr': 0.000171, 'samples': 131520, 'steps': 684, 'loss/train': 8.070060968399048} +01/27/2022 19:39:22 - INFO - codeparrot_training - Step 685: {'lr': 0.00017125000000000002, 'samples': 131712, 'steps': 685, 'loss/train': 4.122437953948975} +01/27/2022 19:39:26 - INFO - codeparrot_training - Step 686: {'lr': 0.00017150000000000002, 'samples': 131904, 'steps': 686, 'loss/train': 8.9731764793396} +01/27/2022 19:39:30 - INFO - codeparrot_training - Step 687: {'lr': 0.00017175000000000003, 'samples': 132096, 'steps': 687, 'loss/train': 8.004594326019287} +01/27/2022 19:39:35 - INFO - codeparrot_training - Step 688: {'lr': 0.00017199999999999998, 'samples': 132288, 'steps': 688, 'loss/train': 7.945577144622803} +01/27/2022 19:39:39 - INFO - codeparrot_training - Step 689: {'lr': 0.00017224999999999999, 'samples': 132480, 'steps': 689, 'loss/train': 8.377017974853516} +01/27/2022 19:39:44 - INFO - codeparrot_training - Step 690: {'lr': 0.0001725, 'samples': 132672, 'steps': 690, 'loss/train': 8.469246625900269} +01/27/2022 19:39:48 - INFO - codeparrot_training - Step 691: {'lr': 0.00017275, 'samples': 132864, 'steps': 691, 'loss/train': 8.472698450088501} +01/27/2022 19:39:52 - INFO - codeparrot_training - Step 692: {'lr': 0.000173, 'samples': 133056, 'steps': 692, 'loss/train': 8.58025074005127} +01/27/2022 19:39:58 - INFO - codeparrot_training - Step 693: {'lr': 0.00017324999999999998, 'samples': 133248, 'steps': 693, 'loss/train': 8.043803215026855} +01/27/2022 19:40:02 - INFO - codeparrot_training - Step 694: {'lr': 0.0001735, 'samples': 133440, 'steps': 694, 'loss/train': 7.3625078201293945} +01/27/2022 19:40:06 - INFO - codeparrot_training - Step 695: {'lr': 0.00017375, 'samples': 133632, 'steps': 695, 'loss/train': 8.26303482055664} +01/27/2022 19:40:10 - INFO - codeparrot_training - Step 696: {'lr': 0.000174, 'samples': 133824, 'steps': 696, 'loss/train': 7.4576239585876465} +01/27/2022 19:40:14 - INFO - codeparrot_training - Step 697: {'lr': 0.00017424999999999998, 'samples': 134016, 'steps': 697, 'loss/train': 8.191519975662231} +01/27/2022 19:40:19 - INFO - codeparrot_training - Step 698: {'lr': 0.00017449999999999999, 'samples': 134208, 'steps': 698, 'loss/train': 7.1545493602752686} +01/27/2022 19:40:23 - INFO - codeparrot_training - Step 699: {'lr': 0.00017475, 'samples': 134400, 'steps': 699, 'loss/train': 8.097326517105103} +01/27/2022 19:40:28 - INFO - codeparrot_training - Step 700: {'lr': 0.000175, 'samples': 134592, 'steps': 700, 'loss/train': 7.546249151229858} +01/27/2022 19:40:32 - INFO - codeparrot_training - Step 701: {'lr': 0.00017525, 'samples': 134784, 'steps': 701, 'loss/train': 8.151185989379883} +01/27/2022 19:40:36 - INFO - codeparrot_training - Step 702: {'lr': 0.00017549999999999998, 'samples': 134976, 'steps': 702, 'loss/train': 7.952397108078003} +01/27/2022 19:40:41 - INFO - codeparrot_training - Step 703: {'lr': 0.00017575, 'samples': 135168, 'steps': 703, 'loss/train': 8.549700736999512} +01/27/2022 19:40:45 - INFO - codeparrot_training - Step 704: {'lr': 0.000176, 'samples': 135360, 'steps': 704, 'loss/train': 8.339847564697266} +01/27/2022 19:40:49 - INFO - codeparrot_training - Step 705: {'lr': 0.00017625, 'samples': 135552, 'steps': 705, 'loss/train': 7.991495847702026} +01/27/2022 19:40:54 - INFO - codeparrot_training - Step 706: {'lr': 0.00017649999999999998, 'samples': 135744, 'steps': 706, 'loss/train': 7.92998743057251} +01/27/2022 19:40:58 - INFO - codeparrot_training - Step 707: {'lr': 0.00017675, 'samples': 135936, 'steps': 707, 'loss/train': 8.14457631111145} +01/27/2022 19:41:03 - INFO - codeparrot_training - Step 708: {'lr': 0.000177, 'samples': 136128, 'steps': 708, 'loss/train': 7.857103586196899} +01/27/2022 19:41:08 - INFO - codeparrot_training - Step 709: {'lr': 0.00017725, 'samples': 136320, 'steps': 709, 'loss/train': 8.489074230194092} +01/27/2022 19:41:12 - INFO - codeparrot_training - Step 710: {'lr': 0.0001775, 'samples': 136512, 'steps': 710, 'loss/train': 7.297561883926392} +01/27/2022 19:41:16 - INFO - codeparrot_training - Step 711: {'lr': 0.00017774999999999998, 'samples': 136704, 'steps': 711, 'loss/train': 7.275579929351807} +01/27/2022 19:41:20 - INFO - codeparrot_training - Step 712: {'lr': 0.000178, 'samples': 136896, 'steps': 712, 'loss/train': 7.603190660476685} +01/27/2022 19:41:25 - INFO - codeparrot_training - Step 713: {'lr': 0.00017825, 'samples': 137088, 'steps': 713, 'loss/train': 7.588614463806152} +01/27/2022 19:41:29 - INFO - codeparrot_training - Step 714: {'lr': 0.0001785, 'samples': 137280, 'steps': 714, 'loss/train': 7.02622389793396} +01/27/2022 19:41:33 - INFO - codeparrot_training - Step 715: {'lr': 0.00017875, 'samples': 137472, 'steps': 715, 'loss/train': 7.690698623657227} +01/27/2022 19:41:38 - INFO - codeparrot_training - Step 716: {'lr': 0.000179, 'samples': 137664, 'steps': 716, 'loss/train': 7.861556768417358} +01/27/2022 19:41:42 - INFO - codeparrot_training - Step 717: {'lr': 0.00017925, 'samples': 137856, 'steps': 717, 'loss/train': 7.842064619064331} +01/27/2022 19:41:47 - INFO - codeparrot_training - Step 718: {'lr': 0.0001795, 'samples': 138048, 'steps': 718, 'loss/train': 8.416612386703491} +01/27/2022 19:41:51 - INFO - codeparrot_training - Step 719: {'lr': 0.00017975, 'samples': 138240, 'steps': 719, 'loss/train': 7.464595556259155} +01/27/2022 19:41:55 - INFO - codeparrot_training - Step 720: {'lr': 0.00017999999999999998, 'samples': 138432, 'steps': 720, 'loss/train': 7.971174716949463} +01/27/2022 19:41:59 - INFO - codeparrot_training - Step 721: {'lr': 0.00018025, 'samples': 138624, 'steps': 721, 'loss/train': 7.8818700313568115} +01/27/2022 19:42:04 - INFO - codeparrot_training - Step 722: {'lr': 0.0001805, 'samples': 138816, 'steps': 722, 'loss/train': 7.1598801612854} +01/27/2022 19:42:09 - INFO - codeparrot_training - Step 723: {'lr': 0.00018075, 'samples': 139008, 'steps': 723, 'loss/train': 7.922802686691284} +01/27/2022 19:42:13 - INFO - codeparrot_training - Step 724: {'lr': 0.000181, 'samples': 139200, 'steps': 724, 'loss/train': 7.660142183303833} +01/27/2022 19:42:17 - INFO - codeparrot_training - Step 725: {'lr': 0.00018125, 'samples': 139392, 'steps': 725, 'loss/train': 7.76464319229126} +01/27/2022 19:42:21 - INFO - codeparrot_training - Step 726: {'lr': 0.0001815, 'samples': 139584, 'steps': 726, 'loss/train': 7.864576578140259} +01/27/2022 19:42:25 - INFO - codeparrot_training - Step 727: {'lr': 0.00018175, 'samples': 139776, 'steps': 727, 'loss/train': 7.511577129364014} +01/27/2022 19:42:32 - INFO - codeparrot_training - Step 728: {'lr': 0.000182, 'samples': 139968, 'steps': 728, 'loss/train': 7.740895986557007} +01/27/2022 19:42:36 - INFO - codeparrot_training - Step 729: {'lr': 0.00018225, 'samples': 140160, 'steps': 729, 'loss/train': 8.064425468444824} +01/27/2022 19:42:41 - INFO - codeparrot_training - Step 730: {'lr': 0.0001825, 'samples': 140352, 'steps': 730, 'loss/train': 8.026722192764282} +01/27/2022 19:42:45 - INFO - codeparrot_training - Step 731: {'lr': 0.00018275, 'samples': 140544, 'steps': 731, 'loss/train': 4.874718189239502} +01/27/2022 19:42:49 - INFO - codeparrot_training - Step 732: {'lr': 0.000183, 'samples': 140736, 'steps': 732, 'loss/train': 5.072027206420898} +01/27/2022 19:42:54 - INFO - codeparrot_training - Step 733: {'lr': 0.00018325, 'samples': 140928, 'steps': 733, 'loss/train': 7.2466349601745605} +01/27/2022 19:42:58 - INFO - codeparrot_training - Step 734: {'lr': 0.0001835, 'samples': 141120, 'steps': 734, 'loss/train': 8.898398637771606} +01/27/2022 19:43:02 - INFO - codeparrot_training - Step 735: {'lr': 0.00018375, 'samples': 141312, 'steps': 735, 'loss/train': 8.268721103668213} +01/27/2022 19:43:06 - INFO - codeparrot_training - Step 736: {'lr': 0.000184, 'samples': 141504, 'steps': 736, 'loss/train': 8.143299579620361} +01/27/2022 19:43:11 - INFO - codeparrot_training - Step 737: {'lr': 0.00018425, 'samples': 141696, 'steps': 737, 'loss/train': 7.380795478820801} +01/27/2022 19:43:16 - INFO - codeparrot_training - Step 738: {'lr': 0.0001845, 'samples': 141888, 'steps': 738, 'loss/train': 7.440507173538208} +01/27/2022 19:43:21 - INFO - codeparrot_training - Step 739: {'lr': 0.00018475, 'samples': 142080, 'steps': 739, 'loss/train': 7.575955152511597} +01/27/2022 19:43:25 - INFO - codeparrot_training - Step 740: {'lr': 0.000185, 'samples': 142272, 'steps': 740, 'loss/train': 8.205970287322998} +01/27/2022 19:43:29 - INFO - codeparrot_training - Step 741: {'lr': 0.00018525, 'samples': 142464, 'steps': 741, 'loss/train': 8.263526916503906} +01/27/2022 19:43:33 - INFO - codeparrot_training - Step 742: {'lr': 0.0001855, 'samples': 142656, 'steps': 742, 'loss/train': 6.766265630722046} +01/27/2022 19:43:38 - INFO - codeparrot_training - Step 743: {'lr': 0.00018575000000000002, 'samples': 142848, 'steps': 743, 'loss/train': 5.876844048500061} +01/27/2022 19:43:43 - INFO - codeparrot_training - Step 744: {'lr': 0.000186, 'samples': 143040, 'steps': 744, 'loss/train': 5.536075830459595} +01/27/2022 19:43:47 - INFO - codeparrot_training - Step 745: {'lr': 0.00018625, 'samples': 143232, 'steps': 745, 'loss/train': 7.311076641082764} +01/27/2022 19:43:51 - INFO - codeparrot_training - Step 746: {'lr': 0.0001865, 'samples': 143424, 'steps': 746, 'loss/train': 8.380584955215454} +01/27/2022 19:43:55 - INFO - codeparrot_training - Step 747: {'lr': 0.00018675, 'samples': 143616, 'steps': 747, 'loss/train': 8.695755243301392} +01/27/2022 19:44:00 - INFO - codeparrot_training - Step 748: {'lr': 0.000187, 'samples': 143808, 'steps': 748, 'loss/train': 6.71664834022522} +01/27/2022 19:44:04 - INFO - codeparrot_training - Step 749: {'lr': 0.00018725, 'samples': 144000, 'steps': 749, 'loss/train': 8.109562397003174} +01/27/2022 19:44:09 - INFO - codeparrot_training - Step 750: {'lr': 0.0001875, 'samples': 144192, 'steps': 750, 'loss/train': 5.441789388656616} +01/27/2022 19:44:13 - INFO - codeparrot_training - Step 751: {'lr': 0.00018775, 'samples': 144384, 'steps': 751, 'loss/train': 7.893102407455444} +01/27/2022 19:44:17 - INFO - codeparrot_training - Step 752: {'lr': 0.00018800000000000002, 'samples': 144576, 'steps': 752, 'loss/train': 8.077803611755371} +01/27/2022 19:44:23 - INFO - codeparrot_training - Step 753: {'lr': 0.00018825, 'samples': 144768, 'steps': 753, 'loss/train': 8.374654054641724} +01/27/2022 19:44:27 - INFO - codeparrot_training - Step 754: {'lr': 0.0001885, 'samples': 144960, 'steps': 754, 'loss/train': 7.729047775268555} +01/27/2022 19:44:31 - INFO - codeparrot_training - Step 755: {'lr': 0.00018875, 'samples': 145152, 'steps': 755, 'loss/train': 7.703528165817261} +01/27/2022 19:44:35 - INFO - codeparrot_training - Step 756: {'lr': 0.000189, 'samples': 145344, 'steps': 756, 'loss/train': 7.928225755691528} +01/27/2022 19:44:39 - INFO - codeparrot_training - Step 757: {'lr': 0.00018925, 'samples': 145536, 'steps': 757, 'loss/train': 7.509890556335449} +01/27/2022 19:44:44 - INFO - codeparrot_training - Step 758: {'lr': 0.0001895, 'samples': 145728, 'steps': 758, 'loss/train': 7.092700481414795} +01/27/2022 19:44:48 - INFO - codeparrot_training - Step 759: {'lr': 0.00018975, 'samples': 145920, 'steps': 759, 'loss/train': 6.214686155319214} +01/27/2022 19:44:53 - INFO - codeparrot_training - Step 760: {'lr': 0.00019, 'samples': 146112, 'steps': 760, 'loss/train': 8.178486585617065} +01/27/2022 19:44:57 - INFO - codeparrot_training - Step 761: {'lr': 0.00019025000000000002, 'samples': 146304, 'steps': 761, 'loss/train': 6.458388090133667} +01/27/2022 19:45:01 - INFO - codeparrot_training - Step 762: {'lr': 0.0001905, 'samples': 146496, 'steps': 762, 'loss/train': 7.590346097946167} +01/27/2022 19:45:06 - INFO - codeparrot_training - Step 763: {'lr': 0.00019075, 'samples': 146688, 'steps': 763, 'loss/train': 7.043601751327515} +01/27/2022 19:45:10 - INFO - codeparrot_training - Step 764: {'lr': 0.000191, 'samples': 146880, 'steps': 764, 'loss/train': 7.580701589584351} +01/27/2022 19:45:15 - INFO - codeparrot_training - Step 765: {'lr': 0.00019125000000000001, 'samples': 147072, 'steps': 765, 'loss/train': 7.34462571144104} +01/27/2022 19:45:19 - INFO - codeparrot_training - Step 766: {'lr': 0.00019150000000000002, 'samples': 147264, 'steps': 766, 'loss/train': 6.2960364818573} +01/27/2022 19:45:23 - INFO - codeparrot_training - Step 767: {'lr': 0.00019175, 'samples': 147456, 'steps': 767, 'loss/train': 7.760506868362427} +01/27/2022 19:45:28 - INFO - codeparrot_training - Step 768: {'lr': 0.000192, 'samples': 147648, 'steps': 768, 'loss/train': 7.030592679977417} +01/27/2022 19:45:33 - INFO - codeparrot_training - Step 769: {'lr': 0.00019225, 'samples': 147840, 'steps': 769, 'loss/train': 8.43232011795044} +01/27/2022 19:45:37 - INFO - codeparrot_training - Step 770: {'lr': 0.00019250000000000002, 'samples': 148032, 'steps': 770, 'loss/train': 7.337213516235352} +01/27/2022 19:45:41 - INFO - codeparrot_training - Step 771: {'lr': 0.00019275, 'samples': 148224, 'steps': 771, 'loss/train': 6.487716436386108} +01/27/2022 19:45:45 - INFO - codeparrot_training - Step 772: {'lr': 0.000193, 'samples': 148416, 'steps': 772, 'loss/train': 7.263144493103027} +01/27/2022 19:45:51 - INFO - codeparrot_training - Step 773: {'lr': 0.00019325, 'samples': 148608, 'steps': 773, 'loss/train': 4.697204232215881} +01/27/2022 19:45:55 - INFO - codeparrot_training - Step 774: {'lr': 0.00019350000000000001, 'samples': 148800, 'steps': 774, 'loss/train': 7.143262624740601} +01/27/2022 19:46:00 - INFO - codeparrot_training - Step 775: {'lr': 0.00019375000000000002, 'samples': 148992, 'steps': 775, 'loss/train': 7.7516984939575195} +01/27/2022 19:46:04 - INFO - codeparrot_training - Step 776: {'lr': 0.000194, 'samples': 149184, 'steps': 776, 'loss/train': 7.434524059295654} +01/27/2022 19:46:09 - INFO - codeparrot_training - Step 777: {'lr': 0.00019425, 'samples': 149376, 'steps': 777, 'loss/train': 7.52365779876709} +01/27/2022 19:46:13 - INFO - codeparrot_training - Step 778: {'lr': 0.0001945, 'samples': 149568, 'steps': 778, 'loss/train': 7.154605865478516} +01/27/2022 19:46:17 - INFO - codeparrot_training - Step 779: {'lr': 0.00019475000000000002, 'samples': 149760, 'steps': 779, 'loss/train': 7.896199464797974} +01/27/2022 19:46:21 - INFO - codeparrot_training - Step 780: {'lr': 0.00019500000000000002, 'samples': 149952, 'steps': 780, 'loss/train': 7.540104389190674} +01/27/2022 19:46:25 - INFO - codeparrot_training - Step 781: {'lr': 0.00019525, 'samples': 150144, 'steps': 781, 'loss/train': 8.11072325706482} +01/27/2022 19:46:31 - INFO - codeparrot_training - Step 782: {'lr': 0.0001955, 'samples': 150336, 'steps': 782, 'loss/train': 7.418931484222412} +01/27/2022 19:46:35 - INFO - codeparrot_training - Step 783: {'lr': 0.00019575000000000001, 'samples': 150528, 'steps': 783, 'loss/train': 6.519766330718994} +01/27/2022 19:46:40 - INFO - codeparrot_training - Step 784: {'lr': 0.00019600000000000002, 'samples': 150720, 'steps': 784, 'loss/train': 8.003017902374268} +01/27/2022 19:46:44 - INFO - codeparrot_training - Step 785: {'lr': 0.00019625, 'samples': 150912, 'steps': 785, 'loss/train': 7.556897878646851} +01/27/2022 19:46:48 - INFO - codeparrot_training - Step 786: {'lr': 0.0001965, 'samples': 151104, 'steps': 786, 'loss/train': 7.598862648010254} +01/27/2022 19:46:53 - INFO - codeparrot_training - Step 787: {'lr': 0.00019675, 'samples': 151296, 'steps': 787, 'loss/train': 8.38918948173523} +01/27/2022 19:46:57 - INFO - codeparrot_training - Step 788: {'lr': 0.00019700000000000002, 'samples': 151488, 'steps': 788, 'loss/train': 7.555492401123047} +01/27/2022 19:47:01 - INFO - codeparrot_training - Step 789: {'lr': 0.00019725000000000002, 'samples': 151680, 'steps': 789, 'loss/train': 7.571065664291382} +01/27/2022 19:47:05 - INFO - codeparrot_training - Step 790: {'lr': 0.0001975, 'samples': 151872, 'steps': 790, 'loss/train': 7.973852634429932} +01/27/2022 19:47:09 - INFO - codeparrot_training - Step 791: {'lr': 0.00019775, 'samples': 152064, 'steps': 791, 'loss/train': 7.589966297149658} +01/27/2022 19:47:15 - INFO - codeparrot_training - Step 792: {'lr': 0.00019800000000000002, 'samples': 152256, 'steps': 792, 'loss/train': 8.019420146942139} +01/27/2022 19:47:19 - INFO - codeparrot_training - Step 793: {'lr': 0.00019825000000000002, 'samples': 152448, 'steps': 793, 'loss/train': 3.786741614341736} +01/27/2022 19:47:23 - INFO - codeparrot_training - Step 794: {'lr': 0.00019850000000000003, 'samples': 152640, 'steps': 794, 'loss/train': 7.3002705574035645} +01/27/2022 19:47:27 - INFO - codeparrot_training - Step 795: {'lr': 0.00019875, 'samples': 152832, 'steps': 795, 'loss/train': 7.697554349899292} +01/27/2022 19:47:31 - INFO - codeparrot_training - Step 796: {'lr': 0.000199, 'samples': 153024, 'steps': 796, 'loss/train': 7.322354793548584} +01/27/2022 19:47:37 - INFO - codeparrot_training - Step 797: {'lr': 0.00019925000000000002, 'samples': 153216, 'steps': 797, 'loss/train': 7.714418649673462} +01/27/2022 19:47:41 - INFO - codeparrot_training - Step 798: {'lr': 0.00019950000000000002, 'samples': 153408, 'steps': 798, 'loss/train': 6.932586908340454} +01/27/2022 19:47:46 - INFO - codeparrot_training - Step 799: {'lr': 0.00019975, 'samples': 153600, 'steps': 799, 'loss/train': 7.1686248779296875} +01/27/2022 19:47:50 - INFO - codeparrot_training - Step 800: {'lr': 0.0002, 'samples': 153792, 'steps': 800, 'loss/train': 7.482051372528076} +01/27/2022 19:47:54 - INFO - codeparrot_training - Step 801: {'lr': 0.00020025000000000002, 'samples': 153984, 'steps': 801, 'loss/train': 9.389641284942627} +01/27/2022 19:47:59 - INFO - codeparrot_training - Step 802: {'lr': 0.00020050000000000002, 'samples': 154176, 'steps': 802, 'loss/train': 7.458050966262817} +01/27/2022 19:48:03 - INFO - codeparrot_training - Step 803: {'lr': 0.00020075000000000003, 'samples': 154368, 'steps': 803, 'loss/train': 7.7519567012786865} +01/27/2022 19:48:08 - INFO - codeparrot_training - Step 804: {'lr': 0.000201, 'samples': 154560, 'steps': 804, 'loss/train': 7.618620157241821} +01/27/2022 19:48:12 - INFO - codeparrot_training - Step 805: {'lr': 0.00020125, 'samples': 154752, 'steps': 805, 'loss/train': 8.397203207015991} +01/27/2022 19:48:16 - INFO - codeparrot_training - Step 806: {'lr': 0.00020150000000000002, 'samples': 154944, 'steps': 806, 'loss/train': 7.557582378387451} +01/27/2022 19:48:21 - INFO - codeparrot_training - Step 807: {'lr': 0.00020175000000000003, 'samples': 155136, 'steps': 807, 'loss/train': 7.578557252883911} +01/27/2022 19:48:25 - INFO - codeparrot_training - Step 808: {'lr': 0.000202, 'samples': 155328, 'steps': 808, 'loss/train': 7.775289058685303} +01/27/2022 19:48:29 - INFO - codeparrot_training - Step 809: {'lr': 0.00020225, 'samples': 155520, 'steps': 809, 'loss/train': 5.857353329658508} +01/27/2022 19:48:33 - INFO - codeparrot_training - Step 810: {'lr': 0.00020250000000000002, 'samples': 155712, 'steps': 810, 'loss/train': 7.948709964752197} +01/27/2022 19:48:38 - INFO - codeparrot_training - Step 811: {'lr': 0.00020275000000000002, 'samples': 155904, 'steps': 811, 'loss/train': 8.424277782440186} +01/27/2022 19:48:43 - INFO - codeparrot_training - Step 812: {'lr': 0.00020300000000000003, 'samples': 156096, 'steps': 812, 'loss/train': 7.227815866470337} +01/27/2022 19:48:47 - INFO - codeparrot_training - Step 813: {'lr': 0.00020324999999999998, 'samples': 156288, 'steps': 813, 'loss/train': 7.635960102081299} +01/27/2022 19:48:52 - INFO - codeparrot_training - Step 814: {'lr': 0.00020349999999999999, 'samples': 156480, 'steps': 814, 'loss/train': 7.400285482406616} +01/27/2022 19:48:56 - INFO - codeparrot_training - Step 815: {'lr': 0.00020375, 'samples': 156672, 'steps': 815, 'loss/train': 7.3520286083221436} +01/27/2022 19:49:00 - INFO - codeparrot_training - Step 816: {'lr': 0.000204, 'samples': 156864, 'steps': 816, 'loss/train': 7.535558223724365} +01/27/2022 19:49:05 - INFO - codeparrot_training - Step 817: {'lr': 0.00020425, 'samples': 157056, 'steps': 817, 'loss/train': 7.880458116531372} +01/27/2022 19:49:09 - INFO - codeparrot_training - Step 818: {'lr': 0.00020449999999999998, 'samples': 157248, 'steps': 818, 'loss/train': 6.829286813735962} +01/27/2022 19:49:13 - INFO - codeparrot_training - Step 819: {'lr': 0.00020475, 'samples': 157440, 'steps': 819, 'loss/train': 8.716244459152222} +01/27/2022 19:49:18 - INFO - codeparrot_training - Step 820: {'lr': 0.000205, 'samples': 157632, 'steps': 820, 'loss/train': 7.438692569732666} +01/27/2022 19:49:22 - INFO - codeparrot_training - Step 821: {'lr': 0.00020525, 'samples': 157824, 'steps': 821, 'loss/train': 7.752033233642578} +01/27/2022 19:49:27 - INFO - codeparrot_training - Step 822: {'lr': 0.00020549999999999998, 'samples': 158016, 'steps': 822, 'loss/train': 7.9090189933776855} +01/27/2022 19:49:31 - INFO - codeparrot_training - Step 823: {'lr': 0.00020575, 'samples': 158208, 'steps': 823, 'loss/train': 6.93665885925293} +01/27/2022 19:49:35 - INFO - codeparrot_training - Step 824: {'lr': 0.000206, 'samples': 158400, 'steps': 824, 'loss/train': 7.665543794631958} +01/27/2022 19:49:39 - INFO - codeparrot_training - Step 825: {'lr': 0.00020625, 'samples': 158592, 'steps': 825, 'loss/train': 6.250310897827148} +01/27/2022 19:49:43 - INFO - codeparrot_training - Step 826: {'lr': 0.0002065, 'samples': 158784, 'steps': 826, 'loss/train': 6.196201801300049} +01/27/2022 19:49:49 - INFO - codeparrot_training - Step 827: {'lr': 0.00020674999999999998, 'samples': 158976, 'steps': 827, 'loss/train': 6.98847770690918} +01/27/2022 19:49:53 - INFO - codeparrot_training - Step 828: {'lr': 0.000207, 'samples': 159168, 'steps': 828, 'loss/train': 7.437055349349976} +01/27/2022 19:49:57 - INFO - codeparrot_training - Step 829: {'lr': 0.00020725, 'samples': 159360, 'steps': 829, 'loss/train': 7.113067388534546} +01/27/2022 19:50:02 - INFO - codeparrot_training - Step 830: {'lr': 0.0002075, 'samples': 159552, 'steps': 830, 'loss/train': 7.686000108718872} +01/27/2022 19:50:06 - INFO - codeparrot_training - Step 831: {'lr': 0.00020774999999999998, 'samples': 159744, 'steps': 831, 'loss/train': 6.280385255813599} +01/27/2022 19:50:11 - INFO - codeparrot_training - Step 832: {'lr': 0.000208, 'samples': 159936, 'steps': 832, 'loss/train': 7.563260793685913} +01/27/2022 19:50:15 - INFO - codeparrot_training - Step 833: {'lr': 0.00020825, 'samples': 160128, 'steps': 833, 'loss/train': 7.371585130691528} +01/27/2022 19:50:19 - INFO - codeparrot_training - Step 834: {'lr': 0.0002085, 'samples': 160320, 'steps': 834, 'loss/train': 7.753119707107544} +01/27/2022 19:50:23 - INFO - codeparrot_training - Step 835: {'lr': 0.00020875, 'samples': 160512, 'steps': 835, 'loss/train': 7.26523232460022} +01/27/2022 19:50:28 - INFO - codeparrot_training - Step 836: {'lr': 0.00020899999999999998, 'samples': 160704, 'steps': 836, 'loss/train': 7.165149450302124} +01/27/2022 19:50:33 - INFO - codeparrot_training - Step 837: {'lr': 0.00020925, 'samples': 160896, 'steps': 837, 'loss/train': 5.2571539878845215} +01/27/2022 19:50:37 - INFO - codeparrot_training - Step 838: {'lr': 0.0002095, 'samples': 161088, 'steps': 838, 'loss/train': 7.704563140869141} +01/27/2022 19:50:41 - INFO - codeparrot_training - Step 839: {'lr': 0.00020975, 'samples': 161280, 'steps': 839, 'loss/train': 6.747865676879883} +01/27/2022 19:50:45 - INFO - codeparrot_training - Step 840: {'lr': 0.00021, 'samples': 161472, 'steps': 840, 'loss/train': 6.728604555130005} +01/27/2022 19:50:49 - INFO - codeparrot_training - Step 841: {'lr': 0.00021025, 'samples': 161664, 'steps': 841, 'loss/train': 6.855344295501709} +01/27/2022 19:50:56 - INFO - codeparrot_training - Step 842: {'lr': 0.0002105, 'samples': 161856, 'steps': 842, 'loss/train': 7.13779878616333} +01/27/2022 19:51:00 - INFO - codeparrot_training - Step 843: {'lr': 0.00021075, 'samples': 162048, 'steps': 843, 'loss/train': 6.714107036590576} +01/27/2022 19:51:04 - INFO - codeparrot_training - Step 844: {'lr': 0.000211, 'samples': 162240, 'steps': 844, 'loss/train': 7.078253746032715} +01/27/2022 19:51:08 - INFO - codeparrot_training - Step 845: {'lr': 0.00021124999999999998, 'samples': 162432, 'steps': 845, 'loss/train': 4.984176993370056} +01/27/2022 19:51:12 - INFO - codeparrot_training - Step 846: {'lr': 0.0002115, 'samples': 162624, 'steps': 846, 'loss/train': 4.37643027305603} +01/27/2022 19:51:17 - INFO - codeparrot_training - Step 847: {'lr': 0.00021175, 'samples': 162816, 'steps': 847, 'loss/train': 7.023126840591431} +01/27/2022 19:51:22 - INFO - codeparrot_training - Step 848: {'lr': 0.000212, 'samples': 163008, 'steps': 848, 'loss/train': 6.818269729614258} +01/27/2022 19:51:26 - INFO - codeparrot_training - Step 849: {'lr': 0.00021225, 'samples': 163200, 'steps': 849, 'loss/train': 7.410804033279419} +01/27/2022 19:51:30 - INFO - codeparrot_training - Step 850: {'lr': 0.0002125, 'samples': 163392, 'steps': 850, 'loss/train': 7.784402847290039} +01/27/2022 19:51:34 - INFO - codeparrot_training - Step 851: {'lr': 0.00021275, 'samples': 163584, 'steps': 851, 'loss/train': 7.730224370956421} +01/27/2022 19:51:40 - INFO - codeparrot_training - Step 852: {'lr': 0.000213, 'samples': 163776, 'steps': 852, 'loss/train': 6.38837742805481} +01/27/2022 19:51:44 - INFO - codeparrot_training - Step 853: {'lr': 0.00021325, 'samples': 163968, 'steps': 853, 'loss/train': 6.656601190567017} +01/27/2022 19:51:48 - INFO - codeparrot_training - Step 854: {'lr': 0.0002135, 'samples': 164160, 'steps': 854, 'loss/train': 8.349605083465576} +01/27/2022 19:51:52 - INFO - codeparrot_training - Step 855: {'lr': 0.00021375, 'samples': 164352, 'steps': 855, 'loss/train': 6.615616321563721} +01/27/2022 19:51:56 - INFO - codeparrot_training - Step 856: {'lr': 0.000214, 'samples': 164544, 'steps': 856, 'loss/train': 7.127137899398804} +01/27/2022 19:52:02 - INFO - codeparrot_training - Step 857: {'lr': 0.00021425, 'samples': 164736, 'steps': 857, 'loss/train': 7.894361972808838} +01/27/2022 19:52:06 - INFO - codeparrot_training - Step 858: {'lr': 0.0002145, 'samples': 164928, 'steps': 858, 'loss/train': 7.885131597518921} +01/27/2022 19:52:10 - INFO - codeparrot_training - Step 859: {'lr': 0.00021475, 'samples': 165120, 'steps': 859, 'loss/train': 6.882686376571655} +01/27/2022 19:52:15 - INFO - codeparrot_training - Step 860: {'lr': 0.000215, 'samples': 165312, 'steps': 860, 'loss/train': 7.567090272903442} +01/27/2022 19:52:19 - INFO - codeparrot_training - Step 861: {'lr': 0.00021525, 'samples': 165504, 'steps': 861, 'loss/train': 7.0731682777404785} +01/27/2022 19:52:24 - INFO - codeparrot_training - Step 862: {'lr': 0.0002155, 'samples': 165696, 'steps': 862, 'loss/train': 9.323542356491089} +01/27/2022 19:52:28 - INFO - codeparrot_training - Step 863: {'lr': 0.00021575, 'samples': 165888, 'steps': 863, 'loss/train': 6.99372124671936} +01/27/2022 19:52:32 - INFO - codeparrot_training - Step 864: {'lr': 0.000216, 'samples': 166080, 'steps': 864, 'loss/train': 7.365343809127808} +01/27/2022 19:52:36 - INFO - codeparrot_training - Step 865: {'lr': 0.00021625, 'samples': 166272, 'steps': 865, 'loss/train': 7.873505115509033} +01/27/2022 19:52:40 - INFO - codeparrot_training - Step 866: {'lr': 0.0002165, 'samples': 166464, 'steps': 866, 'loss/train': 7.0479912757873535} +01/27/2022 19:52:46 - INFO - codeparrot_training - Step 867: {'lr': 0.00021675, 'samples': 166656, 'steps': 867, 'loss/train': 7.002092599868774} +01/27/2022 19:52:50 - INFO - codeparrot_training - Step 868: {'lr': 0.00021700000000000002, 'samples': 166848, 'steps': 868, 'loss/train': 8.695030689239502} +01/27/2022 19:52:54 - INFO - codeparrot_training - Step 869: {'lr': 0.00021725, 'samples': 167040, 'steps': 869, 'loss/train': 6.47701621055603} +01/27/2022 19:52:58 - INFO - codeparrot_training - Step 870: {'lr': 0.0002175, 'samples': 167232, 'steps': 870, 'loss/train': 7.1184282302856445} +01/27/2022 19:53:02 - INFO - codeparrot_training - Step 871: {'lr': 0.00021775, 'samples': 167424, 'steps': 871, 'loss/train': 7.635532379150391} +01/27/2022 19:53:07 - INFO - codeparrot_training - Step 872: {'lr': 0.000218, 'samples': 167616, 'steps': 872, 'loss/train': 7.512827396392822} +01/27/2022 19:53:12 - INFO - codeparrot_training - Step 873: {'lr': 0.00021825, 'samples': 167808, 'steps': 873, 'loss/train': 7.669466257095337} +01/27/2022 19:53:16 - INFO - codeparrot_training - Step 874: {'lr': 0.0002185, 'samples': 168000, 'steps': 874, 'loss/train': 7.071858644485474} +01/27/2022 19:53:20 - INFO - codeparrot_training - Step 875: {'lr': 0.00021875, 'samples': 168192, 'steps': 875, 'loss/train': 7.268239259719849} +01/27/2022 19:53:24 - INFO - codeparrot_training - Step 876: {'lr': 0.000219, 'samples': 168384, 'steps': 876, 'loss/train': 8.0877685546875} +01/27/2022 19:53:30 - INFO - codeparrot_training - Step 877: {'lr': 0.00021925000000000002, 'samples': 168576, 'steps': 877, 'loss/train': 7.6133058071136475} +01/27/2022 19:53:34 - INFO - codeparrot_training - Step 878: {'lr': 0.0002195, 'samples': 168768, 'steps': 878, 'loss/train': 7.026730298995972} +01/27/2022 19:53:39 - INFO - codeparrot_training - Step 879: {'lr': 0.00021975, 'samples': 168960, 'steps': 879, 'loss/train': 7.413425445556641} +01/27/2022 19:53:43 - INFO - codeparrot_training - Step 880: {'lr': 0.00022, 'samples': 169152, 'steps': 880, 'loss/train': 8.85764193534851} +01/27/2022 19:53:47 - INFO - codeparrot_training - Step 881: {'lr': 0.00022025000000000001, 'samples': 169344, 'steps': 881, 'loss/train': 7.755910634994507} +01/27/2022 19:53:51 - INFO - codeparrot_training - Step 882: {'lr': 0.0002205, 'samples': 169536, 'steps': 882, 'loss/train': 7.811797857284546} +01/27/2022 19:53:56 - INFO - codeparrot_training - Step 883: {'lr': 0.00022075, 'samples': 169728, 'steps': 883, 'loss/train': 7.600443363189697} +01/27/2022 19:54:01 - INFO - codeparrot_training - Step 884: {'lr': 0.000221, 'samples': 169920, 'steps': 884, 'loss/train': 6.118984937667847} +01/27/2022 19:54:05 - INFO - codeparrot_training - Step 885: {'lr': 0.00022125, 'samples': 170112, 'steps': 885, 'loss/train': 7.171224117279053} +01/27/2022 19:54:09 - INFO - codeparrot_training - Step 886: {'lr': 0.00022150000000000002, 'samples': 170304, 'steps': 886, 'loss/train': 6.639321327209473} +01/27/2022 19:54:15 - INFO - codeparrot_training - Step 887: {'lr': 0.00022175, 'samples': 170496, 'steps': 887, 'loss/train': 7.177823066711426} +01/27/2022 19:54:19 - INFO - codeparrot_training - Step 888: {'lr': 0.000222, 'samples': 170688, 'steps': 888, 'loss/train': 7.860971689224243} +01/27/2022 19:54:23 - INFO - codeparrot_training - Step 889: {'lr': 0.00022225, 'samples': 170880, 'steps': 889, 'loss/train': 7.319984436035156} +01/27/2022 19:54:27 - INFO - codeparrot_training - Step 890: {'lr': 0.00022250000000000001, 'samples': 171072, 'steps': 890, 'loss/train': 6.704685688018799} +01/27/2022 19:54:31 - INFO - codeparrot_training - Step 891: {'lr': 0.00022275000000000002, 'samples': 171264, 'steps': 891, 'loss/train': 6.902200698852539} +01/27/2022 19:54:36 - INFO - codeparrot_training - Step 892: {'lr': 0.000223, 'samples': 171456, 'steps': 892, 'loss/train': 7.375280141830444} +01/27/2022 19:54:41 - INFO - codeparrot_training - Step 893: {'lr': 0.00022325, 'samples': 171648, 'steps': 893, 'loss/train': 5.083872556686401} +01/27/2022 19:54:45 - INFO - codeparrot_training - Step 894: {'lr': 0.0002235, 'samples': 171840, 'steps': 894, 'loss/train': 6.673138618469238} +01/27/2022 19:54:49 - INFO - codeparrot_training - Step 895: {'lr': 0.00022375000000000002, 'samples': 172032, 'steps': 895, 'loss/train': 7.516380786895752} +01/27/2022 19:54:53 - INFO - codeparrot_training - Step 896: {'lr': 0.000224, 'samples': 172224, 'steps': 896, 'loss/train': 6.994914293289185} +01/27/2022 19:54:58 - INFO - codeparrot_training - Step 897: {'lr': 0.00022425, 'samples': 172416, 'steps': 897, 'loss/train': 7.059888124465942} +01/27/2022 19:55:02 - INFO - codeparrot_training - Step 898: {'lr': 0.0002245, 'samples': 172608, 'steps': 898, 'loss/train': 3.5606542825698853} +01/27/2022 19:55:06 - INFO - codeparrot_training - Step 899: {'lr': 0.00022475000000000001, 'samples': 172800, 'steps': 899, 'loss/train': 8.262231588363647} +01/27/2022 19:55:11 - INFO - codeparrot_training - Step 900: {'lr': 0.00022500000000000002, 'samples': 172992, 'steps': 900, 'loss/train': 7.715365648269653} +01/27/2022 19:55:15 - INFO - codeparrot_training - Step 901: {'lr': 0.00022525, 'samples': 173184, 'steps': 901, 'loss/train': 6.664234399795532} +01/27/2022 19:55:21 - INFO - codeparrot_training - Step 902: {'lr': 0.0002255, 'samples': 173376, 'steps': 902, 'loss/train': 7.200341463088989} +01/27/2022 19:55:25 - INFO - codeparrot_training - Step 903: {'lr': 0.00022575, 'samples': 173568, 'steps': 903, 'loss/train': 7.4642908573150635} +01/27/2022 19:55:29 - INFO - codeparrot_training - Step 904: {'lr': 0.00022600000000000002, 'samples': 173760, 'steps': 904, 'loss/train': 6.824518203735352} +01/27/2022 19:55:33 - INFO - codeparrot_training - Step 905: {'lr': 0.00022625000000000002, 'samples': 173952, 'steps': 905, 'loss/train': 6.4327311515808105} +01/27/2022 19:55:37 - INFO - codeparrot_training - Step 906: {'lr': 0.0002265, 'samples': 174144, 'steps': 906, 'loss/train': 6.040751695632935} +01/27/2022 19:55:42 - INFO - codeparrot_training - Step 907: {'lr': 0.00022675, 'samples': 174336, 'steps': 907, 'loss/train': 4.96863055229187} +01/27/2022 19:55:47 - INFO - codeparrot_training - Step 908: {'lr': 0.00022700000000000002, 'samples': 174528, 'steps': 908, 'loss/train': 6.524607181549072} +01/27/2022 19:55:51 - INFO - codeparrot_training - Step 909: {'lr': 0.00022725000000000002, 'samples': 174720, 'steps': 909, 'loss/train': 6.859138011932373} +01/27/2022 19:55:55 - INFO - codeparrot_training - Step 910: {'lr': 0.0002275, 'samples': 174912, 'steps': 910, 'loss/train': 7.168780088424683} +01/27/2022 19:55:59 - INFO - codeparrot_training - Step 911: {'lr': 0.00022775, 'samples': 175104, 'steps': 911, 'loss/train': 6.728759050369263} +01/27/2022 19:56:05 - INFO - codeparrot_training - Step 912: {'lr': 0.000228, 'samples': 175296, 'steps': 912, 'loss/train': 7.605461597442627} +01/27/2022 19:56:09 - INFO - codeparrot_training - Step 913: {'lr': 0.00022825000000000002, 'samples': 175488, 'steps': 913, 'loss/train': 6.410759210586548} +01/27/2022 19:56:13 - INFO - codeparrot_training - Step 914: {'lr': 0.00022850000000000002, 'samples': 175680, 'steps': 914, 'loss/train': 7.014211893081665} +01/27/2022 19:56:17 - INFO - codeparrot_training - Step 915: {'lr': 0.00022875, 'samples': 175872, 'steps': 915, 'loss/train': 7.0388023853302} +01/27/2022 19:56:22 - INFO - codeparrot_training - Step 916: {'lr': 0.000229, 'samples': 176064, 'steps': 916, 'loss/train': 4.739097833633423} +01/27/2022 19:56:27 - INFO - codeparrot_training - Step 917: {'lr': 0.00022925000000000002, 'samples': 176256, 'steps': 917, 'loss/train': 7.574775695800781} +01/27/2022 19:56:31 - INFO - codeparrot_training - Step 918: {'lr': 0.00022950000000000002, 'samples': 176448, 'steps': 918, 'loss/train': 7.156859636306763} +01/27/2022 19:56:35 - INFO - codeparrot_training - Step 919: {'lr': 0.00022975000000000003, 'samples': 176640, 'steps': 919, 'loss/train': 7.780276536941528} +01/27/2022 19:56:39 - INFO - codeparrot_training - Step 920: {'lr': 0.00023, 'samples': 176832, 'steps': 920, 'loss/train': 6.601100921630859} +01/27/2022 19:56:43 - INFO - codeparrot_training - Step 921: {'lr': 0.00023025, 'samples': 177024, 'steps': 921, 'loss/train': 6.518356561660767} +01/27/2022 19:56:49 - INFO - codeparrot_training - Step 922: {'lr': 0.00023050000000000002, 'samples': 177216, 'steps': 922, 'loss/train': 7.5875279903411865} +01/27/2022 19:56:53 - INFO - codeparrot_training - Step 923: {'lr': 0.00023075000000000003, 'samples': 177408, 'steps': 923, 'loss/train': 6.655817270278931} +01/27/2022 19:56:57 - INFO - codeparrot_training - Step 924: {'lr': 0.000231, 'samples': 177600, 'steps': 924, 'loss/train': 6.846208333969116} +01/27/2022 19:57:01 - INFO - codeparrot_training - Step 925: {'lr': 0.00023125, 'samples': 177792, 'steps': 925, 'loss/train': 7.254077196121216} +01/27/2022 19:57:05 - INFO - codeparrot_training - Step 926: {'lr': 0.00023150000000000002, 'samples': 177984, 'steps': 926, 'loss/train': 7.18137788772583} +01/27/2022 19:57:10 - INFO - codeparrot_training - Step 927: {'lr': 0.00023175000000000002, 'samples': 178176, 'steps': 927, 'loss/train': 9.288630723953247} +01/27/2022 19:57:15 - INFO - codeparrot_training - Step 928: {'lr': 0.00023200000000000003, 'samples': 178368, 'steps': 928, 'loss/train': 7.154744625091553} +01/27/2022 19:57:19 - INFO - codeparrot_training - Step 929: {'lr': 0.00023225, 'samples': 178560, 'steps': 929, 'loss/train': 6.805961608886719} +01/27/2022 19:57:23 - INFO - codeparrot_training - Step 930: {'lr': 0.0002325, 'samples': 178752, 'steps': 930, 'loss/train': 7.899904489517212} +01/27/2022 19:57:27 - INFO - codeparrot_training - Step 931: {'lr': 0.00023275000000000002, 'samples': 178944, 'steps': 931, 'loss/train': 7.287409543991089} +01/27/2022 19:57:33 - INFO - codeparrot_training - Step 932: {'lr': 0.00023300000000000003, 'samples': 179136, 'steps': 932, 'loss/train': 6.028538703918457} +01/27/2022 19:57:37 - INFO - codeparrot_training - Step 933: {'lr': 0.00023325, 'samples': 179328, 'steps': 933, 'loss/train': 6.456415414810181} +01/27/2022 19:57:41 - INFO - codeparrot_training - Step 934: {'lr': 0.0002335, 'samples': 179520, 'steps': 934, 'loss/train': 8.036659955978394} +01/27/2022 19:57:45 - INFO - codeparrot_training - Step 935: {'lr': 0.00023375000000000002, 'samples': 179712, 'steps': 935, 'loss/train': 6.991081953048706} +01/27/2022 19:57:50 - INFO - codeparrot_training - Step 936: {'lr': 0.00023400000000000002, 'samples': 179904, 'steps': 936, 'loss/train': 6.907919883728027} +01/27/2022 19:57:55 - INFO - codeparrot_training - Step 937: {'lr': 0.00023425000000000003, 'samples': 180096, 'steps': 937, 'loss/train': 7.334412574768066} +01/27/2022 19:57:59 - INFO - codeparrot_training - Step 938: {'lr': 0.00023449999999999998, 'samples': 180288, 'steps': 938, 'loss/train': 6.904307842254639} +01/27/2022 19:58:03 - INFO - codeparrot_training - Step 939: {'lr': 0.00023475, 'samples': 180480, 'steps': 939, 'loss/train': 9.220777988433838} +01/27/2022 19:58:07 - INFO - codeparrot_training - Step 940: {'lr': 0.000235, 'samples': 180672, 'steps': 940, 'loss/train': 6.523514270782471} +01/27/2022 19:58:11 - INFO - codeparrot_training - Step 941: {'lr': 0.00023525, 'samples': 180864, 'steps': 941, 'loss/train': 7.42478084564209} +01/27/2022 19:58:16 - INFO - codeparrot_training - Step 942: {'lr': 0.0002355, 'samples': 181056, 'steps': 942, 'loss/train': 7.942927837371826} +01/27/2022 19:58:21 - INFO - codeparrot_training - Step 943: {'lr': 0.00023574999999999998, 'samples': 181248, 'steps': 943, 'loss/train': 6.2294347286224365} +01/27/2022 19:58:25 - INFO - codeparrot_training - Step 944: {'lr': 0.000236, 'samples': 181440, 'steps': 944, 'loss/train': 8.150972843170166} +01/27/2022 19:58:29 - INFO - codeparrot_training - Step 945: {'lr': 0.00023625, 'samples': 181632, 'steps': 945, 'loss/train': 7.299391508102417} +01/27/2022 19:58:33 - INFO - codeparrot_training - Step 946: {'lr': 0.0002365, 'samples': 181824, 'steps': 946, 'loss/train': 7.574323654174805} +01/27/2022 19:58:39 - INFO - codeparrot_training - Step 947: {'lr': 0.00023674999999999998, 'samples': 182016, 'steps': 947, 'loss/train': 6.772777318954468} +01/27/2022 19:58:43 - INFO - codeparrot_training - Step 948: {'lr': 0.000237, 'samples': 182208, 'steps': 948, 'loss/train': 6.088188171386719} +01/27/2022 19:58:47 - INFO - codeparrot_training - Step 949: {'lr': 0.00023725, 'samples': 182400, 'steps': 949, 'loss/train': 7.268367290496826} +01/27/2022 19:58:51 - INFO - codeparrot_training - Step 950: {'lr': 0.0002375, 'samples': 182592, 'steps': 950, 'loss/train': 7.064356327056885} +01/27/2022 19:58:56 - INFO - codeparrot_training - Step 951: {'lr': 0.00023775, 'samples': 182784, 'steps': 951, 'loss/train': 7.1734278202056885} +01/27/2022 19:59:01 - INFO - codeparrot_training - Step 952: {'lr': 0.00023799999999999998, 'samples': 182976, 'steps': 952, 'loss/train': 6.710713148117065} +01/27/2022 19:59:05 - INFO - codeparrot_training - Step 953: {'lr': 0.00023825, 'samples': 183168, 'steps': 953, 'loss/train': 8.174689292907715} +01/27/2022 19:59:09 - INFO - codeparrot_training - Step 954: {'lr': 0.0002385, 'samples': 183360, 'steps': 954, 'loss/train': 7.612285137176514} +01/27/2022 19:59:14 - INFO - codeparrot_training - Step 955: {'lr': 0.00023875, 'samples': 183552, 'steps': 955, 'loss/train': 7.401938438415527} +01/27/2022 19:59:18 - INFO - codeparrot_training - Step 956: {'lr': 0.00023899999999999998, 'samples': 183744, 'steps': 956, 'loss/train': 7.803273439407349} +01/27/2022 19:59:24 - INFO - codeparrot_training - Step 957: {'lr': 0.00023925, 'samples': 183936, 'steps': 957, 'loss/train': 6.716939449310303} +01/27/2022 19:59:28 - INFO - codeparrot_training - Step 958: {'lr': 0.0002395, 'samples': 184128, 'steps': 958, 'loss/train': 6.849352598190308} +01/27/2022 19:59:32 - INFO - codeparrot_training - Step 959: {'lr': 0.00023975, 'samples': 184320, 'steps': 959, 'loss/train': 7.120357275009155} +01/27/2022 19:59:36 - INFO - codeparrot_training - Step 960: {'lr': 0.00024, 'samples': 184512, 'steps': 960, 'loss/train': 9.059615850448608} +01/27/2022 19:59:40 - INFO - codeparrot_training - Step 961: {'lr': 0.00024024999999999999, 'samples': 184704, 'steps': 961, 'loss/train': 6.081015586853027} +01/27/2022 19:59:45 - INFO - codeparrot_training - Step 962: {'lr': 0.0002405, 'samples': 184896, 'steps': 962, 'loss/train': 7.1585633754730225} +01/27/2022 19:59:49 - INFO - codeparrot_training - Step 963: {'lr': 0.00024075, 'samples': 185088, 'steps': 963, 'loss/train': 6.374916315078735} +01/27/2022 19:59:54 - INFO - codeparrot_training - Step 964: {'lr': 0.000241, 'samples': 185280, 'steps': 964, 'loss/train': 7.46045994758606} +01/27/2022 19:59:58 - INFO - codeparrot_training - Step 965: {'lr': 0.00024125, 'samples': 185472, 'steps': 965, 'loss/train': 7.519346237182617} +01/27/2022 20:00:02 - INFO - codeparrot_training - Step 966: {'lr': 0.0002415, 'samples': 185664, 'steps': 966, 'loss/train': 6.693201541900635} +01/27/2022 20:00:07 - INFO - codeparrot_training - Step 967: {'lr': 0.00024175, 'samples': 185856, 'steps': 967, 'loss/train': 5.646547436714172} +01/27/2022 20:00:11 - INFO - codeparrot_training - Step 968: {'lr': 0.000242, 'samples': 186048, 'steps': 968, 'loss/train': 7.297076225280762} +01/27/2022 20:00:16 - INFO - codeparrot_training - Step 969: {'lr': 0.00024225, 'samples': 186240, 'steps': 969, 'loss/train': 6.749300479888916} +01/27/2022 20:00:20 - INFO - codeparrot_training - Step 970: {'lr': 0.00024249999999999999, 'samples': 186432, 'steps': 970, 'loss/train': 8.186639785766602} +01/27/2022 20:00:24 - INFO - codeparrot_training - Step 971: {'lr': 0.00024275, 'samples': 186624, 'steps': 971, 'loss/train': 3.0953779220581055} +01/27/2022 20:00:29 - INFO - codeparrot_training - Step 972: {'lr': 0.000243, 'samples': 186816, 'steps': 972, 'loss/train': 3.534411907196045} +01/27/2022 20:00:33 - INFO - codeparrot_training - Step 973: {'lr': 0.00024325, 'samples': 187008, 'steps': 973, 'loss/train': 7.451557874679565} +01/27/2022 20:00:38 - INFO - codeparrot_training - Step 974: {'lr': 0.0002435, 'samples': 187200, 'steps': 974, 'loss/train': 7.9868645668029785} +01/27/2022 20:00:42 - INFO - codeparrot_training - Step 975: {'lr': 0.00024375, 'samples': 187392, 'steps': 975, 'loss/train': 6.10376501083374} +01/27/2022 20:00:46 - INFO - codeparrot_training - Step 976: {'lr': 0.000244, 'samples': 187584, 'steps': 976, 'loss/train': 7.4672276973724365} +01/27/2022 20:00:52 - INFO - codeparrot_training - Step 977: {'lr': 0.00024425, 'samples': 187776, 'steps': 977, 'loss/train': 7.113274097442627} +01/27/2022 20:00:56 - INFO - codeparrot_training - Step 978: {'lr': 0.0002445, 'samples': 187968, 'steps': 978, 'loss/train': 7.51456618309021} +01/27/2022 20:01:00 - INFO - codeparrot_training - Step 979: {'lr': 0.00024475, 'samples': 188160, 'steps': 979, 'loss/train': 7.385890960693359} +01/27/2022 20:01:04 - INFO - codeparrot_training - Step 980: {'lr': 0.000245, 'samples': 188352, 'steps': 980, 'loss/train': 8.501586198806763} +01/27/2022 20:01:09 - INFO - codeparrot_training - Step 981: {'lr': 0.00024525, 'samples': 188544, 'steps': 981, 'loss/train': 7.369788408279419} +01/27/2022 20:01:14 - INFO - codeparrot_training - Step 982: {'lr': 0.0002455, 'samples': 188736, 'steps': 982, 'loss/train': 6.954960823059082} +01/27/2022 20:01:18 - INFO - codeparrot_training - Step 983: {'lr': 0.00024575, 'samples': 188928, 'steps': 983, 'loss/train': 6.792914628982544} +01/27/2022 20:01:22 - INFO - codeparrot_training - Step 984: {'lr': 0.000246, 'samples': 189120, 'steps': 984, 'loss/train': 3.656927704811096} +01/27/2022 20:01:27 - INFO - codeparrot_training - Step 985: {'lr': 0.00024625, 'samples': 189312, 'steps': 985, 'loss/train': 6.928876161575317} +01/27/2022 20:01:32 - INFO - codeparrot_training - Step 986: {'lr': 0.00024650000000000003, 'samples': 189504, 'steps': 986, 'loss/train': 7.014236211776733} +01/27/2022 20:01:36 - INFO - codeparrot_training - Step 987: {'lr': 0.00024675, 'samples': 189696, 'steps': 987, 'loss/train': 6.70487380027771} +01/27/2022 20:01:40 - INFO - codeparrot_training - Step 988: {'lr': 0.000247, 'samples': 189888, 'steps': 988, 'loss/train': 7.39477801322937} +01/27/2022 20:01:44 - INFO - codeparrot_training - Step 989: {'lr': 0.00024725, 'samples': 190080, 'steps': 989, 'loss/train': 7.157878875732422} +01/27/2022 20:01:48 - INFO - codeparrot_training - Step 990: {'lr': 0.0002475, 'samples': 190272, 'steps': 990, 'loss/train': 7.20275616645813} +01/27/2022 20:01:54 - INFO - codeparrot_training - Step 991: {'lr': 0.00024775, 'samples': 190464, 'steps': 991, 'loss/train': 6.944641828536987} +01/27/2022 20:01:58 - INFO - codeparrot_training - Step 992: {'lr': 0.000248, 'samples': 190656, 'steps': 992, 'loss/train': 6.4415974617004395} +01/27/2022 20:02:03 - INFO - codeparrot_training - Step 993: {'lr': 0.00024825, 'samples': 190848, 'steps': 993, 'loss/train': 6.851691484451294} +01/27/2022 20:02:07 - INFO - codeparrot_training - Step 994: {'lr': 0.0002485, 'samples': 191040, 'steps': 994, 'loss/train': 6.905003786087036} +01/27/2022 20:02:11 - INFO - codeparrot_training - Step 995: {'lr': 0.00024875, 'samples': 191232, 'steps': 995, 'loss/train': 8.069269180297852} +01/27/2022 20:02:16 - INFO - codeparrot_training - Step 996: {'lr': 0.000249, 'samples': 191424, 'steps': 996, 'loss/train': 6.3540308475494385} +01/27/2022 20:02:20 - INFO - codeparrot_training - Step 997: {'lr': 0.00024925, 'samples': 191616, 'steps': 997, 'loss/train': 6.985671043395996} +01/27/2022 20:02:24 - INFO - codeparrot_training - Step 998: {'lr': 0.0002495, 'samples': 191808, 'steps': 998, 'loss/train': 5.8150938749313354} +01/27/2022 20:02:29 - INFO - codeparrot_training - Step 999: {'lr': 0.00024975, 'samples': 192000, 'steps': 999, 'loss/train': 6.347761631011963} +01/27/2022 20:02:33 - INFO - codeparrot_training - Step 1000: {'lr': 0.00025, 'samples': 192192, 'steps': 1000, 'loss/train': 2.727059841156006} +01/27/2022 20:02:39 - INFO - codeparrot_training - Step 1001: {'lr': 0.00025025, 'samples': 192384, 'steps': 1001, 'loss/train': 8.42210340499878} +01/27/2022 20:02:43 - INFO - codeparrot_training - Step 1002: {'lr': 0.0002505, 'samples': 192576, 'steps': 1002, 'loss/train': 7.332793951034546} +01/27/2022 20:02:47 - INFO - codeparrot_training - Step 1003: {'lr': 0.00025075, 'samples': 192768, 'steps': 1003, 'loss/train': 7.334043502807617} +01/27/2022 20:02:51 - INFO - codeparrot_training - Step 1004: {'lr': 0.00025100000000000003, 'samples': 192960, 'steps': 1004, 'loss/train': 6.399794340133667} +01/27/2022 20:02:55 - INFO - codeparrot_training - Step 1005: {'lr': 0.00025124999999999995, 'samples': 193152, 'steps': 1005, 'loss/train': 6.915691137313843} +01/27/2022 20:03:00 - INFO - codeparrot_training - Step 1006: {'lr': 0.0002515, 'samples': 193344, 'steps': 1006, 'loss/train': 7.140349388122559} +01/27/2022 20:03:04 - INFO - codeparrot_training - Step 1007: {'lr': 0.00025174999999999997, 'samples': 193536, 'steps': 1007, 'loss/train': 7.182172536849976} +01/27/2022 20:03:09 - INFO - codeparrot_training - Step 1008: {'lr': 0.000252, 'samples': 193728, 'steps': 1008, 'loss/train': 5.296411871910095} +01/27/2022 20:03:13 - INFO - codeparrot_training - Step 1009: {'lr': 0.00025225, 'samples': 193920, 'steps': 1009, 'loss/train': 7.232731819152832} +01/27/2022 20:03:17 - INFO - codeparrot_training - Step 1010: {'lr': 0.0002525, 'samples': 194112, 'steps': 1010, 'loss/train': 6.959974765777588} +01/27/2022 20:03:22 - INFO - codeparrot_training - Step 1011: {'lr': 0.00025275, 'samples': 194304, 'steps': 1011, 'loss/train': 6.996084451675415} +01/27/2022 20:03:26 - INFO - codeparrot_training - Step 1012: {'lr': 0.000253, 'samples': 194496, 'steps': 1012, 'loss/train': 6.97163200378418} +01/27/2022 20:03:30 - INFO - codeparrot_training - Step 1013: {'lr': 0.00025325, 'samples': 194688, 'steps': 1013, 'loss/train': 6.342827081680298} +01/27/2022 20:03:35 - INFO - codeparrot_training - Step 1014: {'lr': 0.0002535, 'samples': 194880, 'steps': 1014, 'loss/train': 7.124094486236572} +01/27/2022 20:03:39 - INFO - codeparrot_training - Step 1015: {'lr': 0.00025374999999999996, 'samples': 195072, 'steps': 1015, 'loss/train': 8.34977102279663} +01/27/2022 20:03:45 - INFO - codeparrot_training - Step 1016: {'lr': 0.000254, 'samples': 195264, 'steps': 1016, 'loss/train': 7.086735248565674} +01/27/2022 20:03:49 - INFO - codeparrot_training - Step 1017: {'lr': 0.00025425, 'samples': 195456, 'steps': 1017, 'loss/train': 7.534659147262573} +01/27/2022 20:03:53 - INFO - codeparrot_training - Step 1018: {'lr': 0.0002545, 'samples': 195648, 'steps': 1018, 'loss/train': 7.8111891746521} +01/27/2022 20:03:57 - INFO - codeparrot_training - Step 1019: {'lr': 0.00025475, 'samples': 195840, 'steps': 1019, 'loss/train': 6.920222282409668} +01/27/2022 20:04:01 - INFO - codeparrot_training - Step 1020: {'lr': 0.000255, 'samples': 196032, 'steps': 1020, 'loss/train': 7.115208864212036} +01/27/2022 20:04:07 - INFO - codeparrot_training - Step 1021: {'lr': 0.00025525, 'samples': 196224, 'steps': 1021, 'loss/train': 3.0207903385162354} +01/27/2022 20:04:11 - INFO - codeparrot_training - Step 1022: {'lr': 0.00025550000000000003, 'samples': 196416, 'steps': 1022, 'loss/train': 4.931810975074768} +01/27/2022 20:04:15 - INFO - codeparrot_training - Step 1023: {'lr': 0.00025575, 'samples': 196608, 'steps': 1023, 'loss/train': 6.924686908721924} +01/27/2022 20:04:19 - INFO - codeparrot_training - Step 1024: {'lr': 0.000256, 'samples': 196800, 'steps': 1024, 'loss/train': 6.617191314697266} +01/27/2022 20:04:23 - INFO - codeparrot_training - Step 1025: {'lr': 0.00025624999999999997, 'samples': 196992, 'steps': 1025, 'loss/train': 6.591811895370483} +01/27/2022 20:04:28 - INFO - codeparrot_training - Step 1026: {'lr': 0.0002565, 'samples': 197184, 'steps': 1026, 'loss/train': 8.660568952560425} +01/27/2022 20:04:32 - INFO - codeparrot_training - Step 1027: {'lr': 0.00025675, 'samples': 197376, 'steps': 1027, 'loss/train': 6.5282721519470215} +01/27/2022 20:04:37 - INFO - codeparrot_training - Step 1028: {'lr': 0.000257, 'samples': 197568, 'steps': 1028, 'loss/train': 7.155653715133667} +01/27/2022 20:04:41 - INFO - codeparrot_training - Step 1029: {'lr': 0.00025725, 'samples': 197760, 'steps': 1029, 'loss/train': 6.496752977371216} +01/27/2022 20:04:45 - INFO - codeparrot_training - Step 1030: {'lr': 0.0002575, 'samples': 197952, 'steps': 1030, 'loss/train': 6.339601278305054} +01/27/2022 20:04:50 - INFO - codeparrot_training - Step 1031: {'lr': 0.00025775, 'samples': 198144, 'steps': 1031, 'loss/train': 6.180484056472778} +01/27/2022 20:04:54 - INFO - codeparrot_training - Step 1032: {'lr': 0.00025800000000000004, 'samples': 198336, 'steps': 1032, 'loss/train': 6.904130458831787} +01/27/2022 20:04:59 - INFO - codeparrot_training - Step 1033: {'lr': 0.00025824999999999996, 'samples': 198528, 'steps': 1033, 'loss/train': 6.179152965545654} +01/27/2022 20:05:03 - INFO - codeparrot_training - Step 1034: {'lr': 0.0002585, 'samples': 198720, 'steps': 1034, 'loss/train': 6.8922951221466064} +01/27/2022 20:05:09 - INFO - codeparrot_training - Step 1035: {'lr': 0.00025875, 'samples': 198912, 'steps': 1035, 'loss/train': 6.693593502044678} +01/27/2022 20:05:13 - INFO - codeparrot_training - Step 1036: {'lr': 0.000259, 'samples': 199104, 'steps': 1036, 'loss/train': 6.657398700714111} +01/27/2022 20:05:17 - INFO - codeparrot_training - Step 1037: {'lr': 0.00025925, 'samples': 199296, 'steps': 1037, 'loss/train': 5.598530888557434} +01/27/2022 20:05:21 - INFO - codeparrot_training - Step 1038: {'lr': 0.0002595, 'samples': 199488, 'steps': 1038, 'loss/train': 6.935786247253418} +01/27/2022 20:05:25 - INFO - codeparrot_training - Step 1039: {'lr': 0.00025975, 'samples': 199680, 'steps': 1039, 'loss/train': 5.9086010456085205} +01/27/2022 20:05:31 - INFO - codeparrot_training - Step 1040: {'lr': 0.00026000000000000003, 'samples': 199872, 'steps': 1040, 'loss/train': 2.6797045469284058} +01/27/2022 20:05:35 - INFO - codeparrot_training - Step 1041: {'lr': 0.00026025, 'samples': 200064, 'steps': 1041, 'loss/train': 6.955983638763428} +01/27/2022 20:05:39 - INFO - codeparrot_training - Step 1042: {'lr': 0.0002605, 'samples': 200256, 'steps': 1042, 'loss/train': 7.775304079055786} +01/27/2022 20:05:43 - INFO - codeparrot_training - Step 1043: {'lr': 0.00026074999999999997, 'samples': 200448, 'steps': 1043, 'loss/train': 8.124592781066895} +01/27/2022 20:05:47 - INFO - codeparrot_training - Step 1044: {'lr': 0.000261, 'samples': 200640, 'steps': 1044, 'loss/train': 7.032229900360107} +01/27/2022 20:05:52 - INFO - codeparrot_training - Step 1045: {'lr': 0.00026125, 'samples': 200832, 'steps': 1045, 'loss/train': 6.700995683670044} +01/27/2022 20:05:57 - INFO - codeparrot_training - Step 1046: {'lr': 0.0002615, 'samples': 201024, 'steps': 1046, 'loss/train': 5.590494990348816} +01/27/2022 20:06:01 - INFO - codeparrot_training - Step 1047: {'lr': 0.00026175, 'samples': 201216, 'steps': 1047, 'loss/train': 7.30798602104187} +01/27/2022 20:06:05 - INFO - codeparrot_training - Step 1048: {'lr': 0.000262, 'samples': 201408, 'steps': 1048, 'loss/train': 5.346248030662537} +01/27/2022 20:06:09 - INFO - codeparrot_training - Step 1049: {'lr': 0.00026225, 'samples': 201600, 'steps': 1049, 'loss/train': 7.299126148223877} +01/27/2022 20:06:15 - INFO - codeparrot_training - Step 1050: {'lr': 0.00026250000000000004, 'samples': 201792, 'steps': 1050, 'loss/train': 7.436676979064941} +01/27/2022 20:06:19 - INFO - codeparrot_training - Step 1051: {'lr': 0.00026274999999999996, 'samples': 201984, 'steps': 1051, 'loss/train': 6.254862070083618} +01/27/2022 20:06:23 - INFO - codeparrot_training - Step 1052: {'lr': 0.000263, 'samples': 202176, 'steps': 1052, 'loss/train': 6.72723126411438} +01/27/2022 20:06:27 - INFO - codeparrot_training - Step 1053: {'lr': 0.00026325, 'samples': 202368, 'steps': 1053, 'loss/train': 6.373230457305908} +01/27/2022 20:06:32 - INFO - codeparrot_training - Step 1054: {'lr': 0.0002635, 'samples': 202560, 'steps': 1054, 'loss/train': 7.470884799957275} +01/27/2022 20:06:37 - INFO - codeparrot_training - Step 1055: {'lr': 0.00026375, 'samples': 202752, 'steps': 1055, 'loss/train': 6.225339889526367} +01/27/2022 20:06:41 - INFO - codeparrot_training - Step 1056: {'lr': 0.000264, 'samples': 202944, 'steps': 1056, 'loss/train': 6.287844657897949} +01/27/2022 20:06:45 - INFO - codeparrot_training - Step 1057: {'lr': 0.00026425, 'samples': 203136, 'steps': 1057, 'loss/train': 5.9497575759887695} +01/27/2022 20:06:49 - INFO - codeparrot_training - Step 1058: {'lr': 0.00026450000000000003, 'samples': 203328, 'steps': 1058, 'loss/train': 6.595285177230835} +01/27/2022 20:06:53 - INFO - codeparrot_training - Step 1059: {'lr': 0.00026475, 'samples': 203520, 'steps': 1059, 'loss/train': 6.861415386199951} +01/27/2022 20:06:59 - INFO - codeparrot_training - Step 1060: {'lr': 0.00026500000000000004, 'samples': 203712, 'steps': 1060, 'loss/train': 6.762890338897705} +01/27/2022 20:07:03 - INFO - codeparrot_training - Step 1061: {'lr': 0.00026524999999999997, 'samples': 203904, 'steps': 1061, 'loss/train': 6.5421459674835205} +01/27/2022 20:07:08 - INFO - codeparrot_training - Step 1062: {'lr': 0.0002655, 'samples': 204096, 'steps': 1062, 'loss/train': 8.077380895614624} +01/27/2022 20:07:12 - INFO - codeparrot_training - Step 1063: {'lr': 0.00026575, 'samples': 204288, 'steps': 1063, 'loss/train': 6.763631343841553} +01/27/2022 20:07:16 - INFO - codeparrot_training - Step 1064: {'lr': 0.000266, 'samples': 204480, 'steps': 1064, 'loss/train': 7.018068552017212} +01/27/2022 20:07:21 - INFO - codeparrot_training - Step 1065: {'lr': 0.00026625, 'samples': 204672, 'steps': 1065, 'loss/train': 8.275073289871216} +01/27/2022 20:07:25 - INFO - codeparrot_training - Step 1066: {'lr': 0.0002665, 'samples': 204864, 'steps': 1066, 'loss/train': 6.306669473648071} +01/27/2022 20:07:29 - INFO - codeparrot_training - Step 1067: {'lr': 0.00026675, 'samples': 205056, 'steps': 1067, 'loss/train': 2.7302345037460327} +01/27/2022 20:07:33 - INFO - codeparrot_training - Step 1068: {'lr': 0.00026700000000000004, 'samples': 205248, 'steps': 1068, 'loss/train': 7.303512096405029} +01/27/2022 20:07:37 - INFO - codeparrot_training - Step 1069: {'lr': 0.00026725, 'samples': 205440, 'steps': 1069, 'loss/train': 6.571877717971802} +01/27/2022 20:07:43 - INFO - codeparrot_training - Step 1070: {'lr': 0.0002675, 'samples': 205632, 'steps': 1070, 'loss/train': 6.219856023788452} +01/27/2022 20:07:47 - INFO - codeparrot_training - Step 1071: {'lr': 0.00026775, 'samples': 205824, 'steps': 1071, 'loss/train': 6.711027145385742} +01/27/2022 20:07:51 - INFO - codeparrot_training - Step 1072: {'lr': 0.000268, 'samples': 206016, 'steps': 1072, 'loss/train': 6.044724225997925} +01/27/2022 20:07:55 - INFO - codeparrot_training - Step 1073: {'lr': 0.00026825, 'samples': 206208, 'steps': 1073, 'loss/train': 2.7096844911575317} +01/27/2022 20:07:59 - INFO - codeparrot_training - Step 1074: {'lr': 0.0002685, 'samples': 206400, 'steps': 1074, 'loss/train': 8.10952377319336} +01/27/2022 20:08:05 - INFO - codeparrot_training - Step 1075: {'lr': 0.00026875, 'samples': 206592, 'steps': 1075, 'loss/train': 7.001063346862793} +01/27/2022 20:08:09 - INFO - codeparrot_training - Step 1076: {'lr': 0.00026900000000000003, 'samples': 206784, 'steps': 1076, 'loss/train': 6.5374345779418945} +01/27/2022 20:08:13 - INFO - codeparrot_training - Step 1077: {'lr': 0.00026925, 'samples': 206976, 'steps': 1077, 'loss/train': 5.885529041290283} +01/27/2022 20:08:17 - INFO - codeparrot_training - Step 1078: {'lr': 0.00026950000000000005, 'samples': 207168, 'steps': 1078, 'loss/train': 6.198288917541504} +01/27/2022 20:08:22 - INFO - codeparrot_training - Step 1079: {'lr': 0.00026974999999999997, 'samples': 207360, 'steps': 1079, 'loss/train': 6.992666959762573} +01/27/2022 20:08:27 - INFO - codeparrot_training - Step 1080: {'lr': 0.00027, 'samples': 207552, 'steps': 1080, 'loss/train': 6.471394300460815} +01/27/2022 20:08:31 - INFO - codeparrot_training - Step 1081: {'lr': 0.00027025, 'samples': 207744, 'steps': 1081, 'loss/train': 6.9427735805511475} +01/27/2022 20:08:35 - INFO - codeparrot_training - Step 1082: {'lr': 0.0002705, 'samples': 207936, 'steps': 1082, 'loss/train': 7.291034460067749} +01/27/2022 20:08:40 - INFO - codeparrot_training - Step 1083: {'lr': 0.00027075, 'samples': 208128, 'steps': 1083, 'loss/train': 7.425044059753418} +01/27/2022 20:08:44 - INFO - codeparrot_training - Step 1084: {'lr': 0.00027100000000000003, 'samples': 208320, 'steps': 1084, 'loss/train': 6.592901945114136} +01/27/2022 20:08:49 - INFO - codeparrot_training - Step 1085: {'lr': 0.00027125, 'samples': 208512, 'steps': 1085, 'loss/train': 6.1694605350494385} +01/27/2022 20:08:53 - INFO - codeparrot_training - Step 1086: {'lr': 0.00027150000000000004, 'samples': 208704, 'steps': 1086, 'loss/train': 6.886749029159546} +01/27/2022 20:08:57 - INFO - codeparrot_training - Step 1087: {'lr': 0.00027175, 'samples': 208896, 'steps': 1087, 'loss/train': 6.601865530014038} +01/27/2022 20:09:01 - INFO - codeparrot_training - Step 1088: {'lr': 0.00027200000000000005, 'samples': 209088, 'steps': 1088, 'loss/train': 7.312464952468872} +01/27/2022 20:09:05 - INFO - codeparrot_training - Step 1089: {'lr': 0.00027225, 'samples': 209280, 'steps': 1089, 'loss/train': 7.383501291275024} +01/27/2022 20:09:10 - INFO - codeparrot_training - Step 1090: {'lr': 0.0002725, 'samples': 209472, 'steps': 1090, 'loss/train': 7.0864784717559814} +01/27/2022 20:09:15 - INFO - codeparrot_training - Step 1091: {'lr': 0.00027275, 'samples': 209664, 'steps': 1091, 'loss/train': 5.819867134094238} +01/27/2022 20:09:19 - INFO - codeparrot_training - Step 1092: {'lr': 0.000273, 'samples': 209856, 'steps': 1092, 'loss/train': 5.9724122285842896} +01/27/2022 20:09:23 - INFO - codeparrot_training - Step 1093: {'lr': 0.00027325, 'samples': 210048, 'steps': 1093, 'loss/train': 6.062058448791504} +01/27/2022 20:09:27 - INFO - codeparrot_training - Step 1094: {'lr': 0.00027350000000000003, 'samples': 210240, 'steps': 1094, 'loss/train': 5.281084299087524} +01/27/2022 20:09:33 - INFO - codeparrot_training - Step 1095: {'lr': 0.00027375, 'samples': 210432, 'steps': 1095, 'loss/train': 6.791861057281494} +01/27/2022 20:09:38 - INFO - codeparrot_training - Step 1096: {'lr': 0.00027400000000000005, 'samples': 210624, 'steps': 1096, 'loss/train': 5.105173945426941} +01/27/2022 20:09:42 - INFO - codeparrot_training - Step 1097: {'lr': 0.00027425, 'samples': 210816, 'steps': 1097, 'loss/train': 6.027201175689697} +01/27/2022 20:09:46 - INFO - codeparrot_training - Step 1098: {'lr': 0.0002745, 'samples': 211008, 'steps': 1098, 'loss/train': 4.949175596237183} +01/27/2022 20:09:50 - INFO - codeparrot_training - Step 1099: {'lr': 0.00027475, 'samples': 211200, 'steps': 1099, 'loss/train': 7.246063470840454} +01/27/2022 20:09:55 - INFO - codeparrot_training - Step 1100: {'lr': 0.000275, 'samples': 211392, 'steps': 1100, 'loss/train': 5.314031481742859} +01/27/2022 20:09:59 - INFO - codeparrot_training - Step 1101: {'lr': 0.00027525, 'samples': 211584, 'steps': 1101, 'loss/train': 7.240302801132202} +01/27/2022 20:10:03 - INFO - codeparrot_training - Step 1102: {'lr': 0.00027550000000000003, 'samples': 211776, 'steps': 1102, 'loss/train': 7.208335876464844} +01/27/2022 20:10:08 - INFO - codeparrot_training - Step 1103: {'lr': 0.00027575, 'samples': 211968, 'steps': 1103, 'loss/train': 6.179112195968628} +01/27/2022 20:10:12 - INFO - codeparrot_training - Step 1104: {'lr': 0.00027600000000000004, 'samples': 212160, 'steps': 1104, 'loss/train': 6.805338621139526} +01/27/2022 20:10:18 - INFO - codeparrot_training - Step 1105: {'lr': 0.00027625, 'samples': 212352, 'steps': 1105, 'loss/train': 5.8247305154800415} +01/27/2022 20:10:22 - INFO - codeparrot_training - Step 1106: {'lr': 0.00027650000000000005, 'samples': 212544, 'steps': 1106, 'loss/train': 7.093969345092773} +01/27/2022 20:10:26 - INFO - codeparrot_training - Step 1107: {'lr': 0.00027675, 'samples': 212736, 'steps': 1107, 'loss/train': 6.224668979644775} +01/27/2022 20:10:30 - INFO - codeparrot_training - Step 1108: {'lr': 0.000277, 'samples': 212928, 'steps': 1108, 'loss/train': 6.302997350692749} +01/27/2022 20:10:34 - INFO - codeparrot_training - Step 1109: {'lr': 0.00027725, 'samples': 213120, 'steps': 1109, 'loss/train': 7.30902886390686} +01/27/2022 20:10:39 - INFO - codeparrot_training - Step 1110: {'lr': 0.0002775, 'samples': 213312, 'steps': 1110, 'loss/train': 6.090117931365967} +01/27/2022 20:10:43 - INFO - codeparrot_training - Step 1111: {'lr': 0.00027775, 'samples': 213504, 'steps': 1111, 'loss/train': 6.622694492340088} +01/27/2022 20:10:48 - INFO - codeparrot_training - Step 1112: {'lr': 0.00027800000000000004, 'samples': 213696, 'steps': 1112, 'loss/train': 6.511759042739868} +01/27/2022 20:10:52 - INFO - codeparrot_training - Step 1113: {'lr': 0.00027825, 'samples': 213888, 'steps': 1113, 'loss/train': 6.381570339202881} +01/27/2022 20:10:56 - INFO - codeparrot_training - Step 1114: {'lr': 0.00027850000000000005, 'samples': 214080, 'steps': 1114, 'loss/train': 5.967792391777039} +01/27/2022 20:11:01 - INFO - codeparrot_training - Step 1115: {'lr': 0.00027875, 'samples': 214272, 'steps': 1115, 'loss/train': 8.538374662399292} +01/27/2022 20:11:05 - INFO - codeparrot_training - Step 1116: {'lr': 0.000279, 'samples': 214464, 'steps': 1116, 'loss/train': 5.950817227363586} +01/27/2022 20:11:09 - INFO - codeparrot_training - Step 1117: {'lr': 0.00027925, 'samples': 214656, 'steps': 1117, 'loss/train': 7.775706052780151} +01/27/2022 20:11:13 - INFO - codeparrot_training - Step 1118: {'lr': 0.0002795, 'samples': 214848, 'steps': 1118, 'loss/train': 6.328991889953613} +01/27/2022 20:11:18 - INFO - codeparrot_training - Step 1119: {'lr': 0.00027975, 'samples': 215040, 'steps': 1119, 'loss/train': 7.466660499572754} +01/27/2022 20:11:24 - INFO - codeparrot_training - Step 1120: {'lr': 0.00028000000000000003, 'samples': 215232, 'steps': 1120, 'loss/train': 6.498028993606567} +01/27/2022 20:11:28 - INFO - codeparrot_training - Step 1121: {'lr': 0.00028025, 'samples': 215424, 'steps': 1121, 'loss/train': 6.444835424423218} +01/27/2022 20:11:32 - INFO - codeparrot_training - Step 1122: {'lr': 0.00028050000000000004, 'samples': 215616, 'steps': 1122, 'loss/train': 6.599710464477539} +01/27/2022 20:11:36 - INFO - codeparrot_training - Step 1123: {'lr': 0.00028075, 'samples': 215808, 'steps': 1123, 'loss/train': 6.3989417552948} +01/27/2022 20:11:40 - INFO - codeparrot_training - Step 1124: {'lr': 0.00028100000000000005, 'samples': 216000, 'steps': 1124, 'loss/train': 7.14109468460083} +01/27/2022 20:11:45 - INFO - codeparrot_training - Step 1125: {'lr': 0.00028125000000000003, 'samples': 216192, 'steps': 1125, 'loss/train': 6.869213104248047} +01/27/2022 20:11:50 - INFO - codeparrot_training - Step 1126: {'lr': 0.00028149999999999996, 'samples': 216384, 'steps': 1126, 'loss/train': 6.086499452590942} +01/27/2022 20:11:54 - INFO - codeparrot_training - Step 1127: {'lr': 0.00028175, 'samples': 216576, 'steps': 1127, 'loss/train': 6.089702367782593} +01/27/2022 20:11:58 - INFO - codeparrot_training - Step 1128: {'lr': 0.00028199999999999997, 'samples': 216768, 'steps': 1128, 'loss/train': 6.265631675720215} +01/27/2022 20:12:02 - INFO - codeparrot_training - Step 1129: {'lr': 0.00028225, 'samples': 216960, 'steps': 1129, 'loss/train': 6.024996042251587} +01/27/2022 20:12:07 - INFO - codeparrot_training - Step 1130: {'lr': 0.0002825, 'samples': 217152, 'steps': 1130, 'loss/train': 6.219256639480591} +01/27/2022 20:12:11 - INFO - codeparrot_training - Step 1131: {'lr': 0.00028275, 'samples': 217344, 'steps': 1131, 'loss/train': 7.128313779830933} +01/27/2022 20:12:16 - INFO - codeparrot_training - Step 1132: {'lr': 0.000283, 'samples': 217536, 'steps': 1132, 'loss/train': 5.9960854053497314} +01/27/2022 20:12:20 - INFO - codeparrot_training - Step 1133: {'lr': 0.00028325000000000003, 'samples': 217728, 'steps': 1133, 'loss/train': 6.278270244598389} +01/27/2022 20:12:26 - INFO - codeparrot_training - Step 1134: {'lr': 0.0002835, 'samples': 217920, 'steps': 1134, 'loss/train': 5.583935737609863} +01/27/2022 20:12:30 - INFO - codeparrot_training - Step 1135: {'lr': 0.00028375, 'samples': 218112, 'steps': 1135, 'loss/train': 5.667274475097656} +01/27/2022 20:12:34 - INFO - codeparrot_training - Step 1136: {'lr': 0.00028399999999999996, 'samples': 218304, 'steps': 1136, 'loss/train': 5.555999279022217} +01/27/2022 20:12:38 - INFO - codeparrot_training - Step 1137: {'lr': 0.00028425, 'samples': 218496, 'steps': 1137, 'loss/train': 5.911423087120056} +01/27/2022 20:12:42 - INFO - codeparrot_training - Step 1138: {'lr': 0.0002845, 'samples': 218688, 'steps': 1138, 'loss/train': 6.839578628540039} +01/27/2022 20:12:47 - INFO - codeparrot_training - Step 1139: {'lr': 0.00028475, 'samples': 218880, 'steps': 1139, 'loss/train': 6.366803884506226} +01/27/2022 20:12:52 - INFO - codeparrot_training - Step 1140: {'lr': 0.000285, 'samples': 219072, 'steps': 1140, 'loss/train': 6.293874263763428} +01/27/2022 20:12:56 - INFO - codeparrot_training - Step 1141: {'lr': 0.00028525, 'samples': 219264, 'steps': 1141, 'loss/train': 6.04304051399231} +01/27/2022 20:13:00 - INFO - codeparrot_training - Step 1142: {'lr': 0.0002855, 'samples': 219456, 'steps': 1142, 'loss/train': 7.324260234832764} +01/27/2022 20:13:04 - INFO - codeparrot_training - Step 1143: {'lr': 0.00028575000000000003, 'samples': 219648, 'steps': 1143, 'loss/train': 6.565672874450684} +01/27/2022 20:13:09 - INFO - codeparrot_training - Step 1144: {'lr': 0.00028599999999999996, 'samples': 219840, 'steps': 1144, 'loss/train': 5.621720910072327} +01/27/2022 20:13:13 - INFO - codeparrot_training - Step 1145: {'lr': 0.00028625, 'samples': 220032, 'steps': 1145, 'loss/train': 7.351043701171875} +01/27/2022 20:13:18 - INFO - codeparrot_training - Step 1146: {'lr': 0.00028649999999999997, 'samples': 220224, 'steps': 1146, 'loss/train': 6.070271015167236} +01/27/2022 20:13:22 - INFO - codeparrot_training - Step 1147: {'lr': 0.00028675, 'samples': 220416, 'steps': 1147, 'loss/train': 8.005390405654907} +01/27/2022 20:13:26 - INFO - codeparrot_training - Step 1148: {'lr': 0.000287, 'samples': 220608, 'steps': 1148, 'loss/train': 6.678696155548096} +01/27/2022 20:13:32 - INFO - codeparrot_training - Step 1149: {'lr': 0.00028725, 'samples': 220800, 'steps': 1149, 'loss/train': 6.863654851913452} +01/27/2022 20:13:37 - INFO - codeparrot_training - Step 1150: {'lr': 0.0002875, 'samples': 220992, 'steps': 1150, 'loss/train': 6.061199426651001} +01/27/2022 20:13:41 - INFO - codeparrot_training - Step 1151: {'lr': 0.00028775000000000003, 'samples': 221184, 'steps': 1151, 'loss/train': 5.310739159584045} +01/27/2022 20:13:45 - INFO - codeparrot_training - Step 1152: {'lr': 0.000288, 'samples': 221376, 'steps': 1152, 'loss/train': 6.548009634017944} +01/27/2022 20:13:49 - INFO - codeparrot_training - Step 1153: {'lr': 0.00028825, 'samples': 221568, 'steps': 1153, 'loss/train': 4.907338500022888} +01/27/2022 20:13:53 - INFO - codeparrot_training - Step 1154: {'lr': 0.00028849999999999997, 'samples': 221760, 'steps': 1154, 'loss/train': 5.8318716287612915} +01/27/2022 20:13:58 - INFO - codeparrot_training - Step 1155: {'lr': 0.00028875, 'samples': 221952, 'steps': 1155, 'loss/train': 5.536755681037903} +01/27/2022 20:14:02 - INFO - codeparrot_training - Step 1156: {'lr': 0.000289, 'samples': 222144, 'steps': 1156, 'loss/train': 6.573516368865967} +01/27/2022 20:14:06 - INFO - codeparrot_training - Step 1157: {'lr': 0.00028925, 'samples': 222336, 'steps': 1157, 'loss/train': 9.802571296691895} +01/27/2022 20:14:11 - INFO - codeparrot_training - Step 1158: {'lr': 0.0002895, 'samples': 222528, 'steps': 1158, 'loss/train': 6.779712438583374} +01/27/2022 20:14:15 - INFO - codeparrot_training - Step 1159: {'lr': 0.00028975, 'samples': 222720, 'steps': 1159, 'loss/train': 5.3718788623809814} +01/27/2022 20:14:20 - INFO - codeparrot_training - Step 1160: {'lr': 0.00029, 'samples': 222912, 'steps': 1160, 'loss/train': 6.039066553115845} +01/27/2022 20:14:24 - INFO - codeparrot_training - Step 1161: {'lr': 0.00029025000000000003, 'samples': 223104, 'steps': 1161, 'loss/train': 6.990618467330933} +01/27/2022 20:14:28 - INFO - codeparrot_training - Step 1162: {'lr': 0.00029049999999999996, 'samples': 223296, 'steps': 1162, 'loss/train': 6.2342870235443115} +01/27/2022 20:14:32 - INFO - codeparrot_training - Step 1163: {'lr': 0.00029075, 'samples': 223488, 'steps': 1163, 'loss/train': 6.3156702518463135} +01/27/2022 20:14:37 - INFO - codeparrot_training - Step 1164: {'lr': 0.00029099999999999997, 'samples': 223680, 'steps': 1164, 'loss/train': 6.798289775848389} +01/27/2022 20:14:43 - INFO - codeparrot_training - Step 1165: {'lr': 0.00029125, 'samples': 223872, 'steps': 1165, 'loss/train': 6.069545030593872} +01/27/2022 20:14:47 - INFO - codeparrot_training - Step 1166: {'lr': 0.0002915, 'samples': 224064, 'steps': 1166, 'loss/train': 7.147094249725342} +01/27/2022 20:14:51 - INFO - codeparrot_training - Step 1167: {'lr': 0.00029175, 'samples': 224256, 'steps': 1167, 'loss/train': 6.279296636581421} +01/27/2022 20:14:55 - INFO - codeparrot_training - Step 1168: {'lr': 0.000292, 'samples': 224448, 'steps': 1168, 'loss/train': 1.8390909433364868} +01/27/2022 20:14:59 - INFO - codeparrot_training - Step 1169: {'lr': 0.00029225000000000003, 'samples': 224640, 'steps': 1169, 'loss/train': 6.530015230178833} +01/27/2022 20:15:05 - INFO - codeparrot_training - Step 1170: {'lr': 0.0002925, 'samples': 224832, 'steps': 1170, 'loss/train': 6.314259052276611} +01/27/2022 20:15:09 - INFO - codeparrot_training - Step 1171: {'lr': 0.00029275000000000004, 'samples': 225024, 'steps': 1171, 'loss/train': 5.534663915634155} +01/27/2022 20:15:13 - INFO - codeparrot_training - Step 1172: {'lr': 0.00029299999999999997, 'samples': 225216, 'steps': 1172, 'loss/train': 4.3329784870147705} +01/27/2022 20:15:17 - INFO - codeparrot_training - Step 1173: {'lr': 0.00029325, 'samples': 225408, 'steps': 1173, 'loss/train': 6.79440450668335} +01/27/2022 20:15:21 - INFO - codeparrot_training - Step 1174: {'lr': 0.0002935, 'samples': 225600, 'steps': 1174, 'loss/train': 6.60332179069519} +01/27/2022 20:15:26 - INFO - codeparrot_training - Step 1175: {'lr': 0.00029375, 'samples': 225792, 'steps': 1175, 'loss/train': 6.518944501876831} +01/27/2022 20:15:31 - INFO - codeparrot_training - Step 1176: {'lr': 0.000294, 'samples': 225984, 'steps': 1176, 'loss/train': 5.771566987037659} +01/27/2022 20:15:35 - INFO - codeparrot_training - Step 1177: {'lr': 0.00029425, 'samples': 226176, 'steps': 1177, 'loss/train': 5.747413158416748} +01/27/2022 20:15:39 - INFO - codeparrot_training - Step 1178: {'lr': 0.0002945, 'samples': 226368, 'steps': 1178, 'loss/train': 6.580724000930786} +01/27/2022 20:15:43 - INFO - codeparrot_training - Step 1179: {'lr': 0.00029475000000000004, 'samples': 226560, 'steps': 1179, 'loss/train': 4.977144241333008} +01/27/2022 20:15:49 - INFO - codeparrot_training - Step 1180: {'lr': 0.000295, 'samples': 226752, 'steps': 1180, 'loss/train': 6.731553554534912} +01/27/2022 20:15:53 - INFO - codeparrot_training - Step 1181: {'lr': 0.00029525, 'samples': 226944, 'steps': 1181, 'loss/train': 7.82481837272644} +01/27/2022 20:15:58 - INFO - codeparrot_training - Step 1182: {'lr': 0.00029549999999999997, 'samples': 227136, 'steps': 1182, 'loss/train': 6.1408281326293945} +01/27/2022 20:16:02 - INFO - codeparrot_training - Step 1183: {'lr': 0.00029575, 'samples': 227328, 'steps': 1183, 'loss/train': 5.9980491399765015} +01/27/2022 20:16:07 - INFO - codeparrot_training - Step 1184: {'lr': 0.000296, 'samples': 227520, 'steps': 1184, 'loss/train': 6.043874502182007} +01/27/2022 20:16:11 - INFO - codeparrot_training - Step 1185: {'lr': 0.00029625, 'samples': 227712, 'steps': 1185, 'loss/train': 4.868932127952576} +01/27/2022 20:16:15 - INFO - codeparrot_training - Step 1186: {'lr': 0.0002965, 'samples': 227904, 'steps': 1186, 'loss/train': 6.037048816680908} +01/27/2022 20:16:19 - INFO - codeparrot_training - Step 1187: {'lr': 0.00029675000000000003, 'samples': 228096, 'steps': 1187, 'loss/train': 6.044588327407837} +01/27/2022 20:16:24 - INFO - codeparrot_training - Step 1188: {'lr': 0.000297, 'samples': 228288, 'steps': 1188, 'loss/train': 6.625821590423584} +01/27/2022 20:16:28 - INFO - codeparrot_training - Step 1189: {'lr': 0.00029725000000000004, 'samples': 228480, 'steps': 1189, 'loss/train': 5.946741342544556} +01/27/2022 20:16:34 - INFO - codeparrot_training - Step 1190: {'lr': 0.00029749999999999997, 'samples': 228672, 'steps': 1190, 'loss/train': 6.38441276550293} +01/27/2022 20:16:38 - INFO - codeparrot_training - Step 1191: {'lr': 0.00029775, 'samples': 228864, 'steps': 1191, 'loss/train': 6.306578636169434} +01/27/2022 20:16:42 - INFO - codeparrot_training - Step 1192: {'lr': 0.000298, 'samples': 229056, 'steps': 1192, 'loss/train': 6.502519369125366} +01/27/2022 20:16:46 - INFO - codeparrot_training - Step 1193: {'lr': 0.00029825, 'samples': 229248, 'steps': 1193, 'loss/train': 6.899967670440674} +01/27/2022 20:16:51 - INFO - codeparrot_training - Step 1194: {'lr': 0.0002985, 'samples': 229440, 'steps': 1194, 'loss/train': 6.601760387420654} +01/27/2022 20:16:55 - INFO - codeparrot_training - Step 1195: {'lr': 0.00029875, 'samples': 229632, 'steps': 1195, 'loss/train': 6.1342384815216064} +01/27/2022 20:16:59 - INFO - codeparrot_training - Step 1196: {'lr': 0.000299, 'samples': 229824, 'steps': 1196, 'loss/train': 6.967541456222534} +01/27/2022 20:17:04 - INFO - codeparrot_training - Step 1197: {'lr': 0.00029925000000000004, 'samples': 230016, 'steps': 1197, 'loss/train': 6.511771202087402} +01/27/2022 20:17:08 - INFO - codeparrot_training - Step 1198: {'lr': 0.0002995, 'samples': 230208, 'steps': 1198, 'loss/train': 6.0717387199401855} +01/27/2022 20:17:13 - INFO - codeparrot_training - Step 1199: {'lr': 0.00029975000000000005, 'samples': 230400, 'steps': 1199, 'loss/train': 4.923880577087402} +01/27/2022 20:17:17 - INFO - codeparrot_training - Step 1200: {'lr': 0.0003, 'samples': 230592, 'steps': 1200, 'loss/train': 6.300751447677612} +01/27/2022 20:17:21 - INFO - codeparrot_training - Step 1201: {'lr': 0.00030025, 'samples': 230784, 'steps': 1201, 'loss/train': 5.998017311096191} +01/27/2022 20:17:25 - INFO - codeparrot_training - Step 1202: {'lr': 0.0003005, 'samples': 230976, 'steps': 1202, 'loss/train': 5.30766499042511} +01/27/2022 20:17:30 - INFO - codeparrot_training - Step 1203: {'lr': 0.00030075, 'samples': 231168, 'steps': 1203, 'loss/train': 6.3577001094818115} +01/27/2022 20:17:35 - INFO - codeparrot_training - Step 1204: {'lr': 0.000301, 'samples': 231360, 'steps': 1204, 'loss/train': 6.341496706008911} +01/27/2022 20:17:39 - INFO - codeparrot_training - Step 1205: {'lr': 0.00030125000000000003, 'samples': 231552, 'steps': 1205, 'loss/train': 7.376644134521484} +01/27/2022 20:17:43 - INFO - codeparrot_training - Step 1206: {'lr': 0.0003015, 'samples': 231744, 'steps': 1206, 'loss/train': 6.280597686767578} +01/27/2022 20:17:47 - INFO - codeparrot_training - Step 1207: {'lr': 0.00030175000000000004, 'samples': 231936, 'steps': 1207, 'loss/train': 6.872528314590454} +01/27/2022 20:17:51 - INFO - codeparrot_training - Step 1208: {'lr': 0.000302, 'samples': 232128, 'steps': 1208, 'loss/train': 6.061391115188599} +01/27/2022 20:17:57 - INFO - codeparrot_training - Step 1209: {'lr': 0.00030225, 'samples': 232320, 'steps': 1209, 'loss/train': 5.210599422454834} +01/27/2022 20:18:01 - INFO - codeparrot_training - Step 1210: {'lr': 0.0003025, 'samples': 232512, 'steps': 1210, 'loss/train': 5.790054202079773} +01/27/2022 20:18:06 - INFO - codeparrot_training - Step 1211: {'lr': 0.00030275, 'samples': 232704, 'steps': 1211, 'loss/train': 5.796718597412109} +01/27/2022 20:18:10 - INFO - codeparrot_training - Step 1212: {'lr': 0.000303, 'samples': 232896, 'steps': 1212, 'loss/train': 4.802600026130676} +01/27/2022 20:18:14 - INFO - codeparrot_training - Step 1213: {'lr': 0.00030325, 'samples': 233088, 'steps': 1213, 'loss/train': 6.613306045532227} +01/27/2022 20:18:19 - INFO - codeparrot_training - Step 1214: {'lr': 0.0003035, 'samples': 233280, 'steps': 1214, 'loss/train': 5.217711925506592} +01/27/2022 20:18:23 - INFO - codeparrot_training - Step 1215: {'lr': 0.00030375000000000004, 'samples': 233472, 'steps': 1215, 'loss/train': 5.135290503501892} +01/27/2022 20:18:28 - INFO - codeparrot_training - Step 1216: {'lr': 0.000304, 'samples': 233664, 'steps': 1216, 'loss/train': 5.579798698425293} +01/27/2022 20:18:32 - INFO - codeparrot_training - Step 1217: {'lr': 0.00030425000000000005, 'samples': 233856, 'steps': 1217, 'loss/train': 4.471961617469788} +01/27/2022 20:18:36 - INFO - codeparrot_training - Step 1218: {'lr': 0.0003045, 'samples': 234048, 'steps': 1218, 'loss/train': 5.69901180267334} +01/27/2022 20:18:41 - INFO - codeparrot_training - Step 1219: {'lr': 0.00030475, 'samples': 234240, 'steps': 1219, 'loss/train': 5.395376443862915} +01/27/2022 20:18:45 - INFO - codeparrot_training - Step 1220: {'lr': 0.000305, 'samples': 234432, 'steps': 1220, 'loss/train': 6.262244939804077} +01/27/2022 20:18:49 - INFO - codeparrot_training - Step 1221: {'lr': 0.00030525, 'samples': 234624, 'steps': 1221, 'loss/train': 5.507705926895142} +01/27/2022 20:18:54 - INFO - codeparrot_training - Step 1222: {'lr': 0.0003055, 'samples': 234816, 'steps': 1222, 'loss/train': 5.467741012573242} +01/27/2022 20:18:58 - INFO - codeparrot_training - Step 1223: {'lr': 0.00030575000000000003, 'samples': 235008, 'steps': 1223, 'loss/train': 7.247064113616943} +01/27/2022 20:19:04 - INFO - codeparrot_training - Step 1224: {'lr': 0.000306, 'samples': 235200, 'steps': 1224, 'loss/train': 5.343425273895264} +01/27/2022 20:19:08 - INFO - codeparrot_training - Step 1225: {'lr': 0.00030625000000000004, 'samples': 235392, 'steps': 1225, 'loss/train': 5.71028745174408} +01/27/2022 20:19:12 - INFO - codeparrot_training - Step 1226: {'lr': 0.0003065, 'samples': 235584, 'steps': 1226, 'loss/train': 5.828846454620361} +01/27/2022 20:19:16 - INFO - codeparrot_training - Step 1227: {'lr': 0.00030675, 'samples': 235776, 'steps': 1227, 'loss/train': 6.56818699836731} +01/27/2022 20:19:20 - INFO - codeparrot_training - Step 1228: {'lr': 0.000307, 'samples': 235968, 'steps': 1228, 'loss/train': 6.041790246963501} +01/27/2022 20:19:25 - INFO - codeparrot_training - Step 1229: {'lr': 0.00030725, 'samples': 236160, 'steps': 1229, 'loss/train': 6.172706365585327} +01/27/2022 20:19:29 - INFO - codeparrot_training - Step 1230: {'lr': 0.0003075, 'samples': 236352, 'steps': 1230, 'loss/train': 6.19190239906311} +01/27/2022 20:19:34 - INFO - codeparrot_training - Step 1231: {'lr': 0.00030775, 'samples': 236544, 'steps': 1231, 'loss/train': 6.197120904922485} +01/27/2022 20:19:38 - INFO - codeparrot_training - Step 1232: {'lr': 0.000308, 'samples': 236736, 'steps': 1232, 'loss/train': 6.165764093399048} +01/27/2022 20:19:42 - INFO - codeparrot_training - Step 1233: {'lr': 0.00030825000000000004, 'samples': 236928, 'steps': 1233, 'loss/train': 5.548623561859131} +01/27/2022 20:19:49 - INFO - codeparrot_training - Step 1234: {'lr': 0.0003085, 'samples': 237120, 'steps': 1234, 'loss/train': 7.033030986785889} +01/27/2022 20:19:53 - INFO - codeparrot_training - Step 1235: {'lr': 0.00030875000000000005, 'samples': 237312, 'steps': 1235, 'loss/train': 6.1687610149383545} +01/27/2022 20:19:57 - INFO - codeparrot_training - Step 1236: {'lr': 0.00030900000000000003, 'samples': 237504, 'steps': 1236, 'loss/train': 5.911296129226685} +01/27/2022 20:20:01 - INFO - codeparrot_training - Step 1237: {'lr': 0.00030925, 'samples': 237696, 'steps': 1237, 'loss/train': 5.624135255813599} +01/27/2022 20:20:05 - INFO - codeparrot_training - Step 1238: {'lr': 0.0003095, 'samples': 237888, 'steps': 1238, 'loss/train': 5.627055287361145} +01/27/2022 20:20:11 - INFO - codeparrot_training - Step 1239: {'lr': 0.00030975, 'samples': 238080, 'steps': 1239, 'loss/train': 7.038751602172852} +01/27/2022 20:20:15 - INFO - codeparrot_training - Step 1240: {'lr': 0.00031, 'samples': 238272, 'steps': 1240, 'loss/train': 4.588157057762146} +01/27/2022 20:20:19 - INFO - codeparrot_training - Step 1241: {'lr': 0.00031025000000000003, 'samples': 238464, 'steps': 1241, 'loss/train': 5.421356678009033} +01/27/2022 20:20:23 - INFO - codeparrot_training - Step 1242: {'lr': 0.0003105, 'samples': 238656, 'steps': 1242, 'loss/train': 5.470308423042297} +01/27/2022 20:20:27 - INFO - codeparrot_training - Step 1243: {'lr': 0.00031075000000000005, 'samples': 238848, 'steps': 1243, 'loss/train': 5.879084944725037} +01/27/2022 20:20:32 - INFO - codeparrot_training - Step 1244: {'lr': 0.000311, 'samples': 239040, 'steps': 1244, 'loss/train': 6.332687616348267} +01/27/2022 20:20:37 - INFO - codeparrot_training - Step 1245: {'lr': 0.00031125000000000006, 'samples': 239232, 'steps': 1245, 'loss/train': 5.388120174407959} +01/27/2022 20:20:41 - INFO - codeparrot_training - Step 1246: {'lr': 0.0003115, 'samples': 239424, 'steps': 1246, 'loss/train': 5.9562599658966064} +01/27/2022 20:20:45 - INFO - codeparrot_training - Step 1247: {'lr': 0.00031175, 'samples': 239616, 'steps': 1247, 'loss/train': 5.953097820281982} +01/27/2022 20:20:49 - INFO - codeparrot_training - Step 1248: {'lr': 0.000312, 'samples': 239808, 'steps': 1248, 'loss/train': 5.735149383544922} +01/27/2022 20:20:55 - INFO - codeparrot_training - Step 1249: {'lr': 0.00031225000000000003, 'samples': 240000, 'steps': 1249, 'loss/train': 5.868349313735962} +01/27/2022 20:20:59 - INFO - codeparrot_training - Step 1250: {'lr': 0.0003125, 'samples': 240192, 'steps': 1250, 'loss/train': 5.6702492237091064} +01/27/2022 20:21:03 - INFO - codeparrot_training - Step 1251: {'lr': 0.00031275, 'samples': 240384, 'steps': 1251, 'loss/train': 5.804202318191528} +01/27/2022 20:21:07 - INFO - codeparrot_training - Step 1252: {'lr': 0.000313, 'samples': 240576, 'steps': 1252, 'loss/train': 5.520266175270081} +01/27/2022 20:21:12 - INFO - codeparrot_training - Step 1253: {'lr': 0.00031325, 'samples': 240768, 'steps': 1253, 'loss/train': 6.052096366882324} +01/27/2022 20:21:17 - INFO - codeparrot_training - Step 1254: {'lr': 0.00031350000000000003, 'samples': 240960, 'steps': 1254, 'loss/train': 6.466270208358765} +01/27/2022 20:21:21 - INFO - codeparrot_training - Step 1255: {'lr': 0.00031374999999999996, 'samples': 241152, 'steps': 1255, 'loss/train': 5.624329090118408} +01/27/2022 20:21:25 - INFO - codeparrot_training - Step 1256: {'lr': 0.000314, 'samples': 241344, 'steps': 1256, 'loss/train': 6.50145149230957} +01/27/2022 20:21:29 - INFO - codeparrot_training - Step 1257: {'lr': 0.00031424999999999997, 'samples': 241536, 'steps': 1257, 'loss/train': 5.034597873687744} +01/27/2022 20:21:33 - INFO - codeparrot_training - Step 1258: {'lr': 0.0003145, 'samples': 241728, 'steps': 1258, 'loss/train': 5.814113616943359} +01/27/2022 20:21:38 - INFO - codeparrot_training - Step 1259: {'lr': 0.00031475, 'samples': 241920, 'steps': 1259, 'loss/train': 5.415977954864502} +01/27/2022 20:21:42 - INFO - codeparrot_training - Step 1260: {'lr': 0.000315, 'samples': 242112, 'steps': 1260, 'loss/train': 6.592894077301025} +01/27/2022 20:21:47 - INFO - codeparrot_training - Step 1261: {'lr': 0.00031525, 'samples': 242304, 'steps': 1261, 'loss/train': 5.853173732757568} +01/27/2022 20:21:51 - INFO - codeparrot_training - Step 1262: {'lr': 0.0003155, 'samples': 242496, 'steps': 1262, 'loss/train': 2.980490505695343} +01/27/2022 20:21:55 - INFO - codeparrot_training - Step 1263: {'lr': 0.00031575, 'samples': 242688, 'steps': 1263, 'loss/train': 5.863975882530212} +01/27/2022 20:22:00 - INFO - codeparrot_training - Step 1264: {'lr': 0.000316, 'samples': 242880, 'steps': 1264, 'loss/train': 6.6285810470581055} +01/27/2022 20:22:04 - INFO - codeparrot_training - Step 1265: {'lr': 0.00031624999999999996, 'samples': 243072, 'steps': 1265, 'loss/train': 7.1148762702941895} +01/27/2022 20:22:08 - INFO - codeparrot_training - Step 1266: {'lr': 0.0003165, 'samples': 243264, 'steps': 1266, 'loss/train': 6.865442991256714} +01/27/2022 20:22:12 - INFO - codeparrot_training - Step 1267: {'lr': 0.00031675, 'samples': 243456, 'steps': 1267, 'loss/train': 5.841933846473694} +01/27/2022 20:22:17 - INFO - codeparrot_training - Step 1268: {'lr': 0.000317, 'samples': 243648, 'steps': 1268, 'loss/train': 5.91564416885376} +01/27/2022 20:22:22 - INFO - codeparrot_training - Step 1269: {'lr': 0.00031725, 'samples': 243840, 'steps': 1269, 'loss/train': 5.927930474281311} +01/27/2022 20:22:27 - INFO - codeparrot_training - Step 1270: {'lr': 0.0003175, 'samples': 244032, 'steps': 1270, 'loss/train': 5.584486484527588} +01/27/2022 20:22:31 - INFO - codeparrot_training - Step 1271: {'lr': 0.00031775, 'samples': 244224, 'steps': 1271, 'loss/train': 5.189220070838928} +01/27/2022 20:22:35 - INFO - codeparrot_training - Step 1272: {'lr': 0.00031800000000000003, 'samples': 244416, 'steps': 1272, 'loss/train': 5.846536874771118} +01/27/2022 20:22:39 - INFO - codeparrot_training - Step 1273: {'lr': 0.00031825, 'samples': 244608, 'steps': 1273, 'loss/train': 5.705549240112305} +01/27/2022 20:22:44 - INFO - codeparrot_training - Step 1274: {'lr': 0.0003185, 'samples': 244800, 'steps': 1274, 'loss/train': 5.478667616844177} +01/27/2022 20:22:48 - INFO - codeparrot_training - Step 1275: {'lr': 0.00031874999999999997, 'samples': 244992, 'steps': 1275, 'loss/train': 4.732180595397949} +01/27/2022 20:22:52 - INFO - codeparrot_training - Step 1276: {'lr': 0.000319, 'samples': 245184, 'steps': 1276, 'loss/train': 7.347651958465576} +01/27/2022 20:22:57 - INFO - codeparrot_training - Step 1277: {'lr': 0.00031925, 'samples': 245376, 'steps': 1277, 'loss/train': 6.617639064788818} +01/27/2022 20:23:01 - INFO - codeparrot_training - Step 1278: {'lr': 0.0003195, 'samples': 245568, 'steps': 1278, 'loss/train': 6.381918668746948} +01/27/2022 20:23:07 - INFO - codeparrot_training - Step 1279: {'lr': 0.00031975, 'samples': 245760, 'steps': 1279, 'loss/train': 3.523518204689026} +01/27/2022 20:23:11 - INFO - codeparrot_training - Step 1280: {'lr': 0.00032, 'samples': 245952, 'steps': 1280, 'loss/train': 6.2709832191467285} +01/27/2022 20:23:15 - INFO - codeparrot_training - Step 1281: {'lr': 0.00032025, 'samples': 246144, 'steps': 1281, 'loss/train': 5.995697021484375} +01/27/2022 20:23:19 - INFO - codeparrot_training - Step 1282: {'lr': 0.00032050000000000004, 'samples': 246336, 'steps': 1282, 'loss/train': 10.030723571777344} +01/27/2022 20:23:23 - INFO - codeparrot_training - Step 1283: {'lr': 0.00032074999999999996, 'samples': 246528, 'steps': 1283, 'loss/train': 6.351902961730957} +01/27/2022 20:23:29 - INFO - codeparrot_training - Step 1284: {'lr': 0.000321, 'samples': 246720, 'steps': 1284, 'loss/train': 6.770330429077148} +01/27/2022 20:23:33 - INFO - codeparrot_training - Step 1285: {'lr': 0.00032125, 'samples': 246912, 'steps': 1285, 'loss/train': 6.45099663734436} +01/27/2022 20:23:37 - INFO - codeparrot_training - Step 1286: {'lr': 0.0003215, 'samples': 247104, 'steps': 1286, 'loss/train': 8.37216567993164} +01/27/2022 20:23:41 - INFO - codeparrot_training - Step 1287: {'lr': 0.00032175, 'samples': 247296, 'steps': 1287, 'loss/train': 5.633511185646057} +01/27/2022 20:23:46 - INFO - codeparrot_training - Step 1288: {'lr': 0.000322, 'samples': 247488, 'steps': 1288, 'loss/train': 6.591914176940918} +01/27/2022 20:23:51 - INFO - codeparrot_training - Step 1289: {'lr': 0.00032225, 'samples': 247680, 'steps': 1289, 'loss/train': 6.548358678817749} +01/27/2022 20:23:55 - INFO - codeparrot_training - Step 1290: {'lr': 0.00032250000000000003, 'samples': 247872, 'steps': 1290, 'loss/train': 6.10328221321106} +01/27/2022 20:23:59 - INFO - codeparrot_training - Step 1291: {'lr': 0.00032275, 'samples': 248064, 'steps': 1291, 'loss/train': 5.338511824607849} +01/27/2022 20:24:03 - INFO - codeparrot_training - Step 1292: {'lr': 0.000323, 'samples': 248256, 'steps': 1292, 'loss/train': 6.172074794769287} +01/27/2022 20:24:07 - INFO - codeparrot_training - Step 1293: {'lr': 0.00032324999999999997, 'samples': 248448, 'steps': 1293, 'loss/train': 6.089951992034912} +01/27/2022 20:24:14 - INFO - codeparrot_training - Step 1294: {'lr': 0.0003235, 'samples': 248640, 'steps': 1294, 'loss/train': 6.235123157501221} +01/27/2022 20:24:18 - INFO - codeparrot_training - Step 1295: {'lr': 0.00032375, 'samples': 248832, 'steps': 1295, 'loss/train': 4.142842411994934} +01/27/2022 20:24:22 - INFO - codeparrot_training - Step 1296: {'lr': 0.000324, 'samples': 249024, 'steps': 1296, 'loss/train': 5.903172612190247} +01/27/2022 20:24:26 - INFO - codeparrot_training - Step 1297: {'lr': 0.00032425, 'samples': 249216, 'steps': 1297, 'loss/train': 6.823473930358887} +01/27/2022 20:24:30 - INFO - codeparrot_training - Step 1298: {'lr': 0.00032450000000000003, 'samples': 249408, 'steps': 1298, 'loss/train': 7.181246995925903} +01/27/2022 20:24:37 - INFO - codeparrot_training - Step 1299: {'lr': 0.00032475, 'samples': 249600, 'steps': 1299, 'loss/train': 9.384151697158813} +01/27/2022 20:24:41 - INFO - codeparrot_training - Step 1300: {'lr': 0.00032500000000000004, 'samples': 249792, 'steps': 1300, 'loss/train': 6.890306711196899} +01/27/2022 20:24:45 - INFO - codeparrot_training - Step 1301: {'lr': 0.00032524999999999996, 'samples': 249984, 'steps': 1301, 'loss/train': 4.543152451515198} +01/27/2022 20:24:49 - INFO - codeparrot_training - Step 1302: {'lr': 0.0003255, 'samples': 250176, 'steps': 1302, 'loss/train': 4.94231379032135} +01/27/2022 20:24:53 - INFO - codeparrot_training - Step 1303: {'lr': 0.00032575, 'samples': 250368, 'steps': 1303, 'loss/train': 6.2204976081848145} +01/27/2022 20:24:58 - INFO - codeparrot_training - Step 1304: {'lr': 0.000326, 'samples': 250560, 'steps': 1304, 'loss/train': 4.890730619430542} +01/27/2022 20:25:03 - INFO - codeparrot_training - Step 1305: {'lr': 0.00032625, 'samples': 250752, 'steps': 1305, 'loss/train': 7.002712726593018} +01/27/2022 20:25:07 - INFO - codeparrot_training - Step 1306: {'lr': 0.0003265, 'samples': 250944, 'steps': 1306, 'loss/train': 6.253939390182495} +01/27/2022 20:25:11 - INFO - codeparrot_training - Step 1307: {'lr': 0.00032675, 'samples': 251136, 'steps': 1307, 'loss/train': 6.320581912994385} +01/27/2022 20:25:15 - INFO - codeparrot_training - Step 1308: {'lr': 0.00032700000000000003, 'samples': 251328, 'steps': 1308, 'loss/train': 5.838821053504944} +01/27/2022 20:25:19 - INFO - codeparrot_training - Step 1309: {'lr': 0.00032725, 'samples': 251520, 'steps': 1309, 'loss/train': 6.843127012252808} +01/27/2022 20:25:25 - INFO - codeparrot_training - Step 1310: {'lr': 0.00032750000000000005, 'samples': 251712, 'steps': 1310, 'loss/train': 6.715729236602783} +01/27/2022 20:25:29 - INFO - codeparrot_training - Step 1311: {'lr': 0.00032774999999999997, 'samples': 251904, 'steps': 1311, 'loss/train': 5.719773888587952} +01/27/2022 20:25:33 - INFO - codeparrot_training - Step 1312: {'lr': 0.000328, 'samples': 252096, 'steps': 1312, 'loss/train': 6.019824743270874} +01/27/2022 20:25:37 - INFO - codeparrot_training - Step 1313: {'lr': 0.00032825, 'samples': 252288, 'steps': 1313, 'loss/train': 5.300379395484924} +01/27/2022 20:25:42 - INFO - codeparrot_training - Step 1314: {'lr': 0.0003285, 'samples': 252480, 'steps': 1314, 'loss/train': 6.472275495529175} +01/27/2022 20:25:47 - INFO - codeparrot_training - Step 1315: {'lr': 0.00032875, 'samples': 252672, 'steps': 1315, 'loss/train': 4.7848674058914185} +01/27/2022 20:25:51 - INFO - codeparrot_training - Step 1316: {'lr': 0.00032900000000000003, 'samples': 252864, 'steps': 1316, 'loss/train': 5.480200409889221} +01/27/2022 20:25:55 - INFO - codeparrot_training - Step 1317: {'lr': 0.00032925, 'samples': 253056, 'steps': 1317, 'loss/train': 6.194598913192749} +01/27/2022 20:26:00 - INFO - codeparrot_training - Step 1318: {'lr': 0.00032950000000000004, 'samples': 253248, 'steps': 1318, 'loss/train': 5.316754102706909} +01/27/2022 20:26:04 - INFO - codeparrot_training - Step 1319: {'lr': 0.00032975, 'samples': 253440, 'steps': 1319, 'loss/train': 10.881548881530762} +01/27/2022 20:26:09 - INFO - codeparrot_training - Step 1320: {'lr': 0.00033, 'samples': 253632, 'steps': 1320, 'loss/train': 5.247634291648865} +01/27/2022 20:26:13 - INFO - codeparrot_training - Step 1321: {'lr': 0.00033025, 'samples': 253824, 'steps': 1321, 'loss/train': 5.323326945304871} +01/27/2022 20:26:17 - INFO - codeparrot_training - Step 1322: {'lr': 0.0003305, 'samples': 254016, 'steps': 1322, 'loss/train': 5.004830002784729} +01/27/2022 20:26:21 - INFO - codeparrot_training - Step 1323: {'lr': 0.00033075, 'samples': 254208, 'steps': 1323, 'loss/train': 5.998468637466431} +01/27/2022 20:26:25 - INFO - codeparrot_training - Step 1324: {'lr': 0.000331, 'samples': 254400, 'steps': 1324, 'loss/train': 5.258039832115173} +01/27/2022 20:26:31 - INFO - codeparrot_training - Step 1325: {'lr': 0.00033125, 'samples': 254592, 'steps': 1325, 'loss/train': 5.178244113922119} +01/27/2022 20:26:35 - INFO - codeparrot_training - Step 1326: {'lr': 0.00033150000000000003, 'samples': 254784, 'steps': 1326, 'loss/train': 6.739606618881226} +01/27/2022 20:26:39 - INFO - codeparrot_training - Step 1327: {'lr': 0.00033175, 'samples': 254976, 'steps': 1327, 'loss/train': 5.981183409690857} +01/27/2022 20:26:44 - INFO - codeparrot_training - Step 1328: {'lr': 0.00033200000000000005, 'samples': 255168, 'steps': 1328, 'loss/train': 5.995321869850159} +01/27/2022 20:26:48 - INFO - codeparrot_training - Step 1329: {'lr': 0.00033224999999999997, 'samples': 255360, 'steps': 1329, 'loss/train': 5.820279121398926} +01/27/2022 20:26:53 - INFO - codeparrot_training - Step 1330: {'lr': 0.0003325, 'samples': 255552, 'steps': 1330, 'loss/train': 5.64120626449585} +01/27/2022 20:26:57 - INFO - codeparrot_training - Step 1331: {'lr': 0.00033275, 'samples': 255744, 'steps': 1331, 'loss/train': 5.612629294395447} +01/27/2022 20:27:01 - INFO - codeparrot_training - Step 1332: {'lr': 0.000333, 'samples': 255936, 'steps': 1332, 'loss/train': 6.206796884536743} +01/27/2022 20:27:05 - INFO - codeparrot_training - Step 1333: {'lr': 0.00033325, 'samples': 256128, 'steps': 1333, 'loss/train': 4.314150452613831} +01/27/2022 20:27:09 - INFO - codeparrot_training - Step 1334: {'lr': 0.00033350000000000003, 'samples': 256320, 'steps': 1334, 'loss/train': 5.413541078567505} +01/27/2022 20:27:15 - INFO - codeparrot_training - Step 1335: {'lr': 0.00033375, 'samples': 256512, 'steps': 1335, 'loss/train': 5.380462288856506} +01/27/2022 20:27:19 - INFO - codeparrot_training - Step 1336: {'lr': 0.00033400000000000004, 'samples': 256704, 'steps': 1336, 'loss/train': 6.767319917678833} +01/27/2022 20:27:23 - INFO - codeparrot_training - Step 1337: {'lr': 0.00033425, 'samples': 256896, 'steps': 1337, 'loss/train': 6.24406099319458} +01/27/2022 20:27:27 - INFO - codeparrot_training - Step 1338: {'lr': 0.00033450000000000005, 'samples': 257088, 'steps': 1338, 'loss/train': 5.743507146835327} +01/27/2022 20:27:31 - INFO - codeparrot_training - Step 1339: {'lr': 0.00033475, 'samples': 257280, 'steps': 1339, 'loss/train': 5.89429771900177} +01/27/2022 20:27:38 - INFO - codeparrot_training - Step 1340: {'lr': 0.000335, 'samples': 257472, 'steps': 1340, 'loss/train': 4.837573170661926} +01/27/2022 20:27:42 - INFO - codeparrot_training - Step 1341: {'lr': 0.00033525, 'samples': 257664, 'steps': 1341, 'loss/train': 4.5707173347473145} +01/27/2022 20:27:46 - INFO - codeparrot_training - Step 1342: {'lr': 0.0003355, 'samples': 257856, 'steps': 1342, 'loss/train': 4.389951109886169} +01/27/2022 20:27:50 - INFO - codeparrot_training - Step 1343: {'lr': 0.00033575, 'samples': 258048, 'steps': 1343, 'loss/train': 5.474466562271118} +01/27/2022 20:27:54 - INFO - codeparrot_training - Step 1344: {'lr': 0.00033600000000000004, 'samples': 258240, 'steps': 1344, 'loss/train': 5.8221423625946045} +01/27/2022 20:27:59 - INFO - codeparrot_training - Step 1345: {'lr': 0.00033625, 'samples': 258432, 'steps': 1345, 'loss/train': 6.327499866485596} +01/27/2022 20:28:03 - INFO - codeparrot_training - Step 1346: {'lr': 0.00033650000000000005, 'samples': 258624, 'steps': 1346, 'loss/train': 5.9627180099487305} +01/27/2022 20:28:08 - INFO - codeparrot_training - Step 1347: {'lr': 0.00033675, 'samples': 258816, 'steps': 1347, 'loss/train': 6.49405574798584} +01/27/2022 20:28:12 - INFO - codeparrot_training - Step 1348: {'lr': 0.000337, 'samples': 259008, 'steps': 1348, 'loss/train': 5.677776575088501} +01/27/2022 20:28:16 - INFO - codeparrot_training - Step 1349: {'lr': 0.00033725, 'samples': 259200, 'steps': 1349, 'loss/train': 5.403262138366699} +01/27/2022 20:28:21 - INFO - codeparrot_training - Step 1350: {'lr': 0.0003375, 'samples': 259392, 'steps': 1350, 'loss/train': 5.227678656578064} +01/27/2022 20:28:25 - INFO - codeparrot_training - Step 1351: {'lr': 0.00033775, 'samples': 259584, 'steps': 1351, 'loss/train': 5.7738107442855835} +01/27/2022 20:28:29 - INFO - codeparrot_training - Step 1352: {'lr': 0.00033800000000000003, 'samples': 259776, 'steps': 1352, 'loss/train': 5.483787775039673} +01/27/2022 20:28:34 - INFO - codeparrot_training - Step 1353: {'lr': 0.00033825, 'samples': 259968, 'steps': 1353, 'loss/train': 6.146427154541016} +01/27/2022 20:28:38 - INFO - codeparrot_training - Step 1354: {'lr': 0.00033850000000000004, 'samples': 260160, 'steps': 1354, 'loss/train': 5.212450861930847} +01/27/2022 20:28:44 - INFO - codeparrot_training - Step 1355: {'lr': 0.00033875, 'samples': 260352, 'steps': 1355, 'loss/train': 6.173201322555542} +01/27/2022 20:28:48 - INFO - codeparrot_training - Step 1356: {'lr': 0.00033900000000000005, 'samples': 260544, 'steps': 1356, 'loss/train': 5.408001065254211} +01/27/2022 20:28:52 - INFO - codeparrot_training - Step 1357: {'lr': 0.00033925, 'samples': 260736, 'steps': 1357, 'loss/train': 5.412554740905762} +01/27/2022 20:28:56 - INFO - codeparrot_training - Step 1358: {'lr': 0.0003395, 'samples': 260928, 'steps': 1358, 'loss/train': 6.324632406234741} +01/27/2022 20:29:00 - INFO - codeparrot_training - Step 1359: {'lr': 0.00033975, 'samples': 261120, 'steps': 1359, 'loss/train': 5.624316215515137} +01/27/2022 20:29:05 - INFO - codeparrot_training - Step 1360: {'lr': 0.00034, 'samples': 261312, 'steps': 1360, 'loss/train': 6.95796275138855} +01/27/2022 20:29:09 - INFO - codeparrot_training - Step 1361: {'lr': 0.00034025, 'samples': 261504, 'steps': 1361, 'loss/train': 5.593010187149048} +01/27/2022 20:29:14 - INFO - codeparrot_training - Step 1362: {'lr': 0.00034050000000000004, 'samples': 261696, 'steps': 1362, 'loss/train': 5.221229910850525} +01/27/2022 20:29:18 - INFO - codeparrot_training - Step 1363: {'lr': 0.00034075, 'samples': 261888, 'steps': 1363, 'loss/train': 4.74666166305542} +01/27/2022 20:29:22 - INFO - codeparrot_training - Step 1364: {'lr': 0.00034100000000000005, 'samples': 262080, 'steps': 1364, 'loss/train': 5.112065434455872} +01/27/2022 20:29:29 - INFO - codeparrot_training - Step 1365: {'lr': 0.00034125000000000003, 'samples': 262272, 'steps': 1365, 'loss/train': 7.578925609588623} +01/27/2022 20:29:33 - INFO - codeparrot_training - Step 1366: {'lr': 0.0003415, 'samples': 262464, 'steps': 1366, 'loss/train': 4.9115309715271} +01/27/2022 20:29:37 - INFO - codeparrot_training - Step 1367: {'lr': 0.00034175, 'samples': 262656, 'steps': 1367, 'loss/train': 4.823127508163452} +01/27/2022 20:29:41 - INFO - codeparrot_training - Step 1368: {'lr': 0.000342, 'samples': 262848, 'steps': 1368, 'loss/train': 6.049104452133179} +01/27/2022 20:29:46 - INFO - codeparrot_training - Step 1369: {'lr': 0.00034225, 'samples': 263040, 'steps': 1369, 'loss/train': 6.076598882675171} +01/27/2022 20:29:51 - INFO - codeparrot_training - Step 1370: {'lr': 0.00034250000000000003, 'samples': 263232, 'steps': 1370, 'loss/train': 5.928333163261414} +01/27/2022 20:29:55 - INFO - codeparrot_training - Step 1371: {'lr': 0.00034275, 'samples': 263424, 'steps': 1371, 'loss/train': 5.8759496212005615} +01/27/2022 20:29:59 - INFO - codeparrot_training - Step 1372: {'lr': 0.00034300000000000004, 'samples': 263616, 'steps': 1372, 'loss/train': 5.896329402923584} +01/27/2022 20:30:03 - INFO - codeparrot_training - Step 1373: {'lr': 0.00034325, 'samples': 263808, 'steps': 1373, 'loss/train': 5.90148138999939} +01/27/2022 20:30:07 - INFO - codeparrot_training - Step 1374: {'lr': 0.00034350000000000006, 'samples': 264000, 'steps': 1374, 'loss/train': 4.09662938117981} +01/27/2022 20:30:12 - INFO - codeparrot_training - Step 1375: {'lr': 0.00034375, 'samples': 264192, 'steps': 1375, 'loss/train': 4.673898696899414} +01/27/2022 20:30:16 - INFO - codeparrot_training - Step 1376: {'lr': 0.00034399999999999996, 'samples': 264384, 'steps': 1376, 'loss/train': 5.130805134773254} +01/27/2022 20:30:21 - INFO - codeparrot_training - Step 1377: {'lr': 0.00034425, 'samples': 264576, 'steps': 1377, 'loss/train': 4.903575539588928} +01/27/2022 20:30:25 - INFO - codeparrot_training - Step 1378: {'lr': 0.00034449999999999997, 'samples': 264768, 'steps': 1378, 'loss/train': 5.7353843450546265} +01/27/2022 20:30:29 - INFO - codeparrot_training - Step 1379: {'lr': 0.00034475, 'samples': 264960, 'steps': 1379, 'loss/train': 5.375657558441162} +01/27/2022 20:30:35 - INFO - codeparrot_training - Step 1380: {'lr': 0.000345, 'samples': 265152, 'steps': 1380, 'loss/train': 5.550543308258057} +01/27/2022 20:30:39 - INFO - codeparrot_training - Step 1381: {'lr': 0.00034525, 'samples': 265344, 'steps': 1381, 'loss/train': 6.442796945571899} +01/27/2022 20:30:43 - INFO - codeparrot_training - Step 1382: {'lr': 0.0003455, 'samples': 265536, 'steps': 1382, 'loss/train': 5.595800757408142} +01/27/2022 20:30:47 - INFO - codeparrot_training - Step 1383: {'lr': 0.00034575000000000003, 'samples': 265728, 'steps': 1383, 'loss/train': 5.464699029922485} +01/27/2022 20:30:51 - INFO - codeparrot_training - Step 1384: {'lr': 0.000346, 'samples': 265920, 'steps': 1384, 'loss/train': 5.888701558113098} +01/27/2022 20:30:57 - INFO - codeparrot_training - Step 1385: {'lr': 0.00034625, 'samples': 266112, 'steps': 1385, 'loss/train': 5.0014214515686035} +01/27/2022 20:31:01 - INFO - codeparrot_training - Step 1386: {'lr': 0.00034649999999999997, 'samples': 266304, 'steps': 1386, 'loss/train': 5.541521787643433} +01/27/2022 20:31:05 - INFO - codeparrot_training - Step 1387: {'lr': 0.00034675, 'samples': 266496, 'steps': 1387, 'loss/train': 5.95023787021637} +01/27/2022 20:31:09 - INFO - codeparrot_training - Step 1388: {'lr': 0.000347, 'samples': 266688, 'steps': 1388, 'loss/train': 6.971344470977783} +01/27/2022 20:31:13 - INFO - codeparrot_training - Step 1389: {'lr': 0.00034725, 'samples': 266880, 'steps': 1389, 'loss/train': 5.894403576850891} +01/27/2022 20:31:19 - INFO - codeparrot_training - Step 1390: {'lr': 0.0003475, 'samples': 267072, 'steps': 1390, 'loss/train': 5.6230244636535645} +01/27/2022 20:31:23 - INFO - codeparrot_training - Step 1391: {'lr': 0.00034775, 'samples': 267264, 'steps': 1391, 'loss/train': 6.673340320587158} +01/27/2022 20:31:28 - INFO - codeparrot_training - Step 1392: {'lr': 0.000348, 'samples': 267456, 'steps': 1392, 'loss/train': 6.4198994636535645} +01/27/2022 20:31:32 - INFO - codeparrot_training - Step 1393: {'lr': 0.00034825000000000004, 'samples': 267648, 'steps': 1393, 'loss/train': 5.833487033843994} +01/27/2022 20:31:36 - INFO - codeparrot_training - Step 1394: {'lr': 0.00034849999999999996, 'samples': 267840, 'steps': 1394, 'loss/train': 5.105870246887207} +01/27/2022 20:31:41 - INFO - codeparrot_training - Step 1395: {'lr': 0.00034875, 'samples': 268032, 'steps': 1395, 'loss/train': 4.660842418670654} +01/27/2022 20:31:45 - INFO - codeparrot_training - Step 1396: {'lr': 0.00034899999999999997, 'samples': 268224, 'steps': 1396, 'loss/train': 4.761531829833984} +01/27/2022 20:31:49 - INFO - codeparrot_training - Step 1397: {'lr': 0.00034925, 'samples': 268416, 'steps': 1397, 'loss/train': 6.4013872146606445} +01/27/2022 20:31:53 - INFO - codeparrot_training - Step 1398: {'lr': 0.0003495, 'samples': 268608, 'steps': 1398, 'loss/train': 3.6321576833724976} +01/27/2022 20:31:58 - INFO - codeparrot_training - Step 1399: {'lr': 0.00034975, 'samples': 268800, 'steps': 1399, 'loss/train': 5.103322505950928} +01/27/2022 20:32:03 - INFO - codeparrot_training - Step 1400: {'lr': 0.00035, 'samples': 268992, 'steps': 1400, 'loss/train': 4.250860691070557} +01/27/2022 20:32:07 - INFO - codeparrot_training - Step 1401: {'lr': 0.00035025000000000003, 'samples': 269184, 'steps': 1401, 'loss/train': 6.224779844284058} +01/27/2022 20:32:11 - INFO - codeparrot_training - Step 1402: {'lr': 0.0003505, 'samples': 269376, 'steps': 1402, 'loss/train': 5.970402359962463} +01/27/2022 20:32:15 - INFO - codeparrot_training - Step 1403: {'lr': 0.00035075, 'samples': 269568, 'steps': 1403, 'loss/train': 5.812585473060608} +01/27/2022 20:32:19 - INFO - codeparrot_training - Step 1404: {'lr': 0.00035099999999999997, 'samples': 269760, 'steps': 1404, 'loss/train': 4.140394806861877} +01/27/2022 20:32:24 - INFO - codeparrot_training - Step 1405: {'lr': 0.00035125, 'samples': 269952, 'steps': 1405, 'loss/train': 4.795518279075623} +01/27/2022 20:32:29 - INFO - codeparrot_training - Step 1406: {'lr': 0.0003515, 'samples': 270144, 'steps': 1406, 'loss/train': 4.613077640533447} +01/27/2022 20:32:33 - INFO - codeparrot_training - Step 1407: {'lr': 0.00035175, 'samples': 270336, 'steps': 1407, 'loss/train': 5.554975390434265} +01/27/2022 20:32:37 - INFO - codeparrot_training - Step 1408: {'lr': 0.000352, 'samples': 270528, 'steps': 1408, 'loss/train': 5.505416393280029} +01/27/2022 20:32:41 - INFO - codeparrot_training - Step 1409: {'lr': 0.00035225, 'samples': 270720, 'steps': 1409, 'loss/train': 4.871671199798584} +01/27/2022 20:32:47 - INFO - codeparrot_training - Step 1410: {'lr': 0.0003525, 'samples': 270912, 'steps': 1410, 'loss/train': 5.588037371635437} +01/27/2022 20:32:51 - INFO - codeparrot_training - Step 1411: {'lr': 0.00035275000000000004, 'samples': 271104, 'steps': 1411, 'loss/train': 3.6950258016586304} +01/27/2022 20:32:56 - INFO - codeparrot_training - Step 1412: {'lr': 0.00035299999999999996, 'samples': 271296, 'steps': 1412, 'loss/train': 10.260623931884766} +01/27/2022 20:33:00 - INFO - codeparrot_training - Step 1413: {'lr': 0.00035325, 'samples': 271488, 'steps': 1413, 'loss/train': 5.850403904914856} +01/27/2022 20:33:04 - INFO - codeparrot_training - Step 1414: {'lr': 0.0003535, 'samples': 271680, 'steps': 1414, 'loss/train': 4.54086971282959} +01/27/2022 20:33:09 - INFO - codeparrot_training - Step 1415: {'lr': 0.00035375, 'samples': 271872, 'steps': 1415, 'loss/train': 4.318433046340942} +01/27/2022 20:33:13 - INFO - codeparrot_training - Step 1416: {'lr': 0.000354, 'samples': 272064, 'steps': 1416, 'loss/train': 4.870678424835205} +01/27/2022 20:33:17 - INFO - codeparrot_training - Step 1417: {'lr': 0.00035425, 'samples': 272256, 'steps': 1417, 'loss/train': 4.648704886436462} +01/27/2022 20:33:21 - INFO - codeparrot_training - Step 1418: {'lr': 0.0003545, 'samples': 272448, 'steps': 1418, 'loss/train': 6.076154708862305} +01/27/2022 20:33:26 - INFO - codeparrot_training - Step 1419: {'lr': 0.00035475000000000003, 'samples': 272640, 'steps': 1419, 'loss/train': 4.453089237213135} +01/27/2022 20:33:31 - INFO - codeparrot_training - Step 1420: {'lr': 0.000355, 'samples': 272832, 'steps': 1420, 'loss/train': 5.415349245071411} +01/27/2022 20:33:35 - INFO - codeparrot_training - Step 1421: {'lr': 0.00035525000000000004, 'samples': 273024, 'steps': 1421, 'loss/train': 5.648927450180054} +01/27/2022 20:33:39 - INFO - codeparrot_training - Step 1422: {'lr': 0.00035549999999999997, 'samples': 273216, 'steps': 1422, 'loss/train': 4.790592670440674} +01/27/2022 20:33:43 - INFO - codeparrot_training - Step 1423: {'lr': 0.00035575, 'samples': 273408, 'steps': 1423, 'loss/train': 4.946924686431885} +01/27/2022 20:33:47 - INFO - codeparrot_training - Step 1424: {'lr': 0.000356, 'samples': 273600, 'steps': 1424, 'loss/train': 6.102973222732544} +01/27/2022 20:33:53 - INFO - codeparrot_training - Step 1425: {'lr': 0.00035625, 'samples': 273792, 'steps': 1425, 'loss/train': 4.645643591880798} +01/27/2022 20:33:57 - INFO - codeparrot_training - Step 1426: {'lr': 0.0003565, 'samples': 273984, 'steps': 1426, 'loss/train': 5.433918356895447} +01/27/2022 20:34:02 - INFO - codeparrot_training - Step 1427: {'lr': 0.00035675, 'samples': 274176, 'steps': 1427, 'loss/train': 5.46021580696106} +01/27/2022 20:34:06 - INFO - codeparrot_training - Step 1428: {'lr': 0.000357, 'samples': 274368, 'steps': 1428, 'loss/train': 5.155351281166077} +01/27/2022 20:34:10 - INFO - codeparrot_training - Step 1429: {'lr': 0.00035725000000000004, 'samples': 274560, 'steps': 1429, 'loss/train': 5.724423050880432} +01/27/2022 20:34:15 - INFO - codeparrot_training - Step 1430: {'lr': 0.0003575, 'samples': 274752, 'steps': 1430, 'loss/train': 5.392196774482727} +01/27/2022 20:34:19 - INFO - codeparrot_training - Step 1431: {'lr': 0.00035775, 'samples': 274944, 'steps': 1431, 'loss/train': 2.5217714309692383} +01/27/2022 20:34:23 - INFO - codeparrot_training - Step 1432: {'lr': 0.000358, 'samples': 275136, 'steps': 1432, 'loss/train': 9.94968581199646} +01/27/2022 20:34:28 - INFO - codeparrot_training - Step 1433: {'lr': 0.00035825, 'samples': 275328, 'steps': 1433, 'loss/train': 12.064802169799805} +01/27/2022 20:34:32 - INFO - codeparrot_training - Step 1434: {'lr': 0.0003585, 'samples': 275520, 'steps': 1434, 'loss/train': 6.286809682846069} +01/27/2022 20:34:39 - INFO - codeparrot_training - Step 1435: {'lr': 0.00035875, 'samples': 275712, 'steps': 1435, 'loss/train': 6.113040447235107} +01/27/2022 20:34:43 - INFO - codeparrot_training - Step 1436: {'lr': 0.000359, 'samples': 275904, 'steps': 1436, 'loss/train': 5.559696793556213} +01/27/2022 20:34:47 - INFO - codeparrot_training - Step 1437: {'lr': 0.00035925000000000003, 'samples': 276096, 'steps': 1437, 'loss/train': 5.99433159828186} +01/27/2022 20:34:51 - INFO - codeparrot_training - Step 1438: {'lr': 0.0003595, 'samples': 276288, 'steps': 1438, 'loss/train': 5.255759239196777} +01/27/2022 20:34:55 - INFO - codeparrot_training - Step 1439: {'lr': 0.00035975000000000004, 'samples': 276480, 'steps': 1439, 'loss/train': 4.47011411190033} +01/27/2022 20:35:00 - INFO - codeparrot_training - Step 1440: {'lr': 0.00035999999999999997, 'samples': 276672, 'steps': 1440, 'loss/train': 5.940374851226807} +01/27/2022 20:35:05 - INFO - codeparrot_training - Step 1441: {'lr': 0.00036025, 'samples': 276864, 'steps': 1441, 'loss/train': 5.317480802536011} +01/27/2022 20:35:09 - INFO - codeparrot_training - Step 1442: {'lr': 0.0003605, 'samples': 277056, 'steps': 1442, 'loss/train': 5.7190375328063965} +01/27/2022 20:35:13 - INFO - codeparrot_training - Step 1443: {'lr': 0.00036075, 'samples': 277248, 'steps': 1443, 'loss/train': 4.8697017431259155} +01/27/2022 20:35:17 - INFO - codeparrot_training - Step 1444: {'lr': 0.000361, 'samples': 277440, 'steps': 1444, 'loss/train': 4.927966475486755} +01/27/2022 20:35:22 - INFO - codeparrot_training - Step 1445: {'lr': 0.00036125, 'samples': 277632, 'steps': 1445, 'loss/train': 5.465997934341431} +01/27/2022 20:35:26 - INFO - codeparrot_training - Step 1446: {'lr': 0.0003615, 'samples': 277824, 'steps': 1446, 'loss/train': 5.08867084980011} +01/27/2022 20:35:31 - INFO - codeparrot_training - Step 1447: {'lr': 0.00036175000000000004, 'samples': 278016, 'steps': 1447, 'loss/train': 4.6464879512786865} +01/27/2022 20:35:35 - INFO - codeparrot_training - Step 1448: {'lr': 0.000362, 'samples': 278208, 'steps': 1448, 'loss/train': 5.358344078063965} +01/27/2022 20:35:39 - INFO - codeparrot_training - Step 1449: {'lr': 0.00036225000000000005, 'samples': 278400, 'steps': 1449, 'loss/train': 5.268254399299622} +01/27/2022 20:35:45 - INFO - codeparrot_training - Step 1450: {'lr': 0.0003625, 'samples': 278592, 'steps': 1450, 'loss/train': 9.227090120315552} +01/27/2022 20:35:49 - INFO - codeparrot_training - Step 1451: {'lr': 0.00036275, 'samples': 278784, 'steps': 1451, 'loss/train': 5.206702709197998} +01/27/2022 20:35:53 - INFO - codeparrot_training - Step 1452: {'lr': 0.000363, 'samples': 278976, 'steps': 1452, 'loss/train': 6.03830623626709} +01/27/2022 20:35:57 - INFO - codeparrot_training - Step 1453: {'lr': 0.00036325, 'samples': 279168, 'steps': 1453, 'loss/train': 5.090924978256226} +01/27/2022 20:36:01 - INFO - codeparrot_training - Step 1454: {'lr': 0.0003635, 'samples': 279360, 'steps': 1454, 'loss/train': 5.084671139717102} +01/27/2022 20:36:06 - INFO - codeparrot_training - Step 1455: {'lr': 0.00036375000000000003, 'samples': 279552, 'steps': 1455, 'loss/train': 5.014504551887512} +01/27/2022 20:36:11 - INFO - codeparrot_training - Step 1456: {'lr': 0.000364, 'samples': 279744, 'steps': 1456, 'loss/train': 5.267293453216553} +01/27/2022 20:36:15 - INFO - codeparrot_training - Step 1457: {'lr': 0.00036425000000000004, 'samples': 279936, 'steps': 1457, 'loss/train': 4.5051562786102295} +01/27/2022 20:36:19 - INFO - codeparrot_training - Step 1458: {'lr': 0.0003645, 'samples': 280128, 'steps': 1458, 'loss/train': 5.632647156715393} +01/27/2022 20:36:23 - INFO - codeparrot_training - Step 1459: {'lr': 0.00036475, 'samples': 280320, 'steps': 1459, 'loss/train': 5.735796689987183} +01/27/2022 20:36:28 - INFO - codeparrot_training - Step 1460: {'lr': 0.000365, 'samples': 280512, 'steps': 1460, 'loss/train': 4.906913638114929} +01/27/2022 20:36:32 - INFO - codeparrot_training - Step 1461: {'lr': 0.00036525, 'samples': 280704, 'steps': 1461, 'loss/train': 6.09788703918457} +01/27/2022 20:36:37 - INFO - codeparrot_training - Step 1462: {'lr': 0.0003655, 'samples': 280896, 'steps': 1462, 'loss/train': 4.120104432106018} +01/27/2022 20:36:41 - INFO - codeparrot_training - Step 1463: {'lr': 0.00036575, 'samples': 281088, 'steps': 1463, 'loss/train': 5.0831029415130615} +01/27/2022 20:36:45 - INFO - codeparrot_training - Step 1464: {'lr': 0.000366, 'samples': 281280, 'steps': 1464, 'loss/train': 4.8359938859939575} +01/27/2022 20:36:50 - INFO - codeparrot_training - Step 1465: {'lr': 0.00036625000000000004, 'samples': 281472, 'steps': 1465, 'loss/train': 4.709892511367798} +01/27/2022 20:36:54 - INFO - codeparrot_training - Step 1466: {'lr': 0.0003665, 'samples': 281664, 'steps': 1466, 'loss/train': 4.493479371070862} +01/27/2022 20:36:58 - INFO - codeparrot_training - Step 1467: {'lr': 0.00036675000000000005, 'samples': 281856, 'steps': 1467, 'loss/train': 5.326740860939026} +01/27/2022 20:37:03 - INFO - codeparrot_training - Step 1468: {'lr': 0.000367, 'samples': 282048, 'steps': 1468, 'loss/train': 2.735465705394745} +01/27/2022 20:37:07 - INFO - codeparrot_training - Step 1469: {'lr': 0.00036725, 'samples': 282240, 'steps': 1469, 'loss/train': 5.649134159088135} +01/27/2022 20:37:13 - INFO - codeparrot_training - Step 1470: {'lr': 0.0003675, 'samples': 282432, 'steps': 1470, 'loss/train': 5.295011758804321} +01/27/2022 20:37:17 - INFO - codeparrot_training - Step 1471: {'lr': 0.00036775, 'samples': 282624, 'steps': 1471, 'loss/train': 6.056208372116089} +01/27/2022 20:37:21 - INFO - codeparrot_training - Step 1472: {'lr': 0.000368, 'samples': 282816, 'steps': 1472, 'loss/train': 5.210906982421875} +01/27/2022 20:37:25 - INFO - codeparrot_training - Step 1473: {'lr': 0.00036825000000000003, 'samples': 283008, 'steps': 1473, 'loss/train': 5.32011866569519} +01/27/2022 20:37:29 - INFO - codeparrot_training - Step 1474: {'lr': 0.0003685, 'samples': 283200, 'steps': 1474, 'loss/train': 5.910141706466675} +01/27/2022 20:37:34 - INFO - codeparrot_training - Step 1475: {'lr': 0.00036875000000000005, 'samples': 283392, 'steps': 1475, 'loss/train': 5.243043422698975} +01/27/2022 20:37:39 - INFO - codeparrot_training - Step 1476: {'lr': 0.000369, 'samples': 283584, 'steps': 1476, 'loss/train': 3.3713961839675903} +01/27/2022 20:37:43 - INFO - codeparrot_training - Step 1477: {'lr': 0.00036925, 'samples': 283776, 'steps': 1477, 'loss/train': 5.813286781311035} +01/27/2022 20:37:47 - INFO - codeparrot_training - Step 1478: {'lr': 0.0003695, 'samples': 283968, 'steps': 1478, 'loss/train': 3.6485652923583984} +01/27/2022 20:37:51 - INFO - codeparrot_training - Step 1479: {'lr': 0.00036975, 'samples': 284160, 'steps': 1479, 'loss/train': 4.983058333396912} +01/27/2022 20:37:57 - INFO - codeparrot_training - Step 1480: {'lr': 0.00037, 'samples': 284352, 'steps': 1480, 'loss/train': 5.243464350700378} +01/27/2022 20:38:01 - INFO - codeparrot_training - Step 1481: {'lr': 0.00037025000000000003, 'samples': 284544, 'steps': 1481, 'loss/train': 5.352209687232971} +01/27/2022 20:38:05 - INFO - codeparrot_training - Step 1482: {'lr': 0.0003705, 'samples': 284736, 'steps': 1482, 'loss/train': 5.5908472537994385} +01/27/2022 20:38:09 - INFO - codeparrot_training - Step 1483: {'lr': 0.00037075000000000004, 'samples': 284928, 'steps': 1483, 'loss/train': 5.441688895225525} +01/27/2022 20:38:14 - INFO - codeparrot_training - Step 1484: {'lr': 0.000371, 'samples': 285120, 'steps': 1484, 'loss/train': 5.308485746383667} +01/27/2022 20:38:19 - INFO - codeparrot_training - Step 1485: {'lr': 0.00037125000000000005, 'samples': 285312, 'steps': 1485, 'loss/train': 5.070591330528259} +01/27/2022 20:38:23 - INFO - codeparrot_training - Step 1486: {'lr': 0.00037150000000000003, 'samples': 285504, 'steps': 1486, 'loss/train': 5.304766058921814} +01/27/2022 20:38:27 - INFO - codeparrot_training - Step 1487: {'lr': 0.00037175, 'samples': 285696, 'steps': 1487, 'loss/train': 6.033056259155273} +01/27/2022 20:38:31 - INFO - codeparrot_training - Step 1488: {'lr': 0.000372, 'samples': 285888, 'steps': 1488, 'loss/train': 4.6105087995529175} +01/27/2022 20:38:35 - INFO - codeparrot_training - Step 1489: {'lr': 0.00037225, 'samples': 286080, 'steps': 1489, 'loss/train': 8.464864253997803} +01/27/2022 20:38:40 - INFO - codeparrot_training - Step 1490: {'lr': 0.0003725, 'samples': 286272, 'steps': 1490, 'loss/train': 4.637064456939697} +01/27/2022 20:38:45 - INFO - codeparrot_training - Step 1491: {'lr': 0.00037275000000000003, 'samples': 286464, 'steps': 1491, 'loss/train': 5.377440333366394} +01/27/2022 20:38:49 - INFO - codeparrot_training - Step 1492: {'lr': 0.000373, 'samples': 286656, 'steps': 1492, 'loss/train': 5.113956570625305} +01/27/2022 20:38:53 - INFO - codeparrot_training - Step 1493: {'lr': 0.00037325000000000005, 'samples': 286848, 'steps': 1493, 'loss/train': 5.29119086265564} +01/27/2022 20:38:57 - INFO - codeparrot_training - Step 1494: {'lr': 0.0003735, 'samples': 287040, 'steps': 1494, 'loss/train': 4.833268046379089} +01/27/2022 20:39:03 - INFO - codeparrot_training - Step 1495: {'lr': 0.00037375000000000006, 'samples': 287232, 'steps': 1495, 'loss/train': 5.805394649505615} +01/27/2022 20:39:07 - INFO - codeparrot_training - Step 1496: {'lr': 0.000374, 'samples': 287424, 'steps': 1496, 'loss/train': 5.163054585456848} +01/27/2022 20:39:11 - INFO - codeparrot_training - Step 1497: {'lr': 0.00037425, 'samples': 287616, 'steps': 1497, 'loss/train': 6.053295135498047} +01/27/2022 20:39:15 - INFO - codeparrot_training - Step 1498: {'lr': 0.0003745, 'samples': 287808, 'steps': 1498, 'loss/train': 5.041678547859192} +01/27/2022 20:39:20 - INFO - codeparrot_training - Step 1499: {'lr': 0.00037475000000000003, 'samples': 288000, 'steps': 1499, 'loss/train': 5.274630188941956} +01/27/2022 20:39:25 - INFO - codeparrot_training - Step 1500: {'lr': 0.000375, 'samples': 288192, 'steps': 1500, 'loss/train': 5.205589056015015} +01/27/2022 20:39:29 - INFO - codeparrot_training - Step 1501: {'lr': 0.00037525, 'samples': 288384, 'steps': 1501, 'loss/train': 5.572053551673889} +01/27/2022 20:39:33 - INFO - codeparrot_training - Step 1502: {'lr': 0.0003755, 'samples': 288576, 'steps': 1502, 'loss/train': 5.647122502326965} +01/27/2022 20:39:37 - INFO - codeparrot_training - Step 1503: {'lr': 0.00037575, 'samples': 288768, 'steps': 1503, 'loss/train': 2.705719470977783} +01/27/2022 20:39:41 - INFO - codeparrot_training - Step 1504: {'lr': 0.00037600000000000003, 'samples': 288960, 'steps': 1504, 'loss/train': 4.762568950653076} +01/27/2022 20:39:46 - INFO - codeparrot_training - Step 1505: {'lr': 0.00037624999999999996, 'samples': 289152, 'steps': 1505, 'loss/train': 6.0654966831207275} +01/27/2022 20:39:51 - INFO - codeparrot_training - Step 1506: {'lr': 0.0003765, 'samples': 289344, 'steps': 1506, 'loss/train': 6.051204442977905} +01/27/2022 20:39:55 - INFO - codeparrot_training - Step 1507: {'lr': 0.00037674999999999997, 'samples': 289536, 'steps': 1507, 'loss/train': 5.4785696268081665} +01/27/2022 20:39:59 - INFO - codeparrot_training - Step 1508: {'lr': 0.000377, 'samples': 289728, 'steps': 1508, 'loss/train': 6.292721271514893} +01/27/2022 20:40:03 - INFO - codeparrot_training - Step 1509: {'lr': 0.00037725, 'samples': 289920, 'steps': 1509, 'loss/train': 6.2328407764434814} +01/27/2022 20:40:09 - INFO - codeparrot_training - Step 1510: {'lr': 0.0003775, 'samples': 290112, 'steps': 1510, 'loss/train': 5.49200963973999} +01/27/2022 20:40:13 - INFO - codeparrot_training - Step 1511: {'lr': 0.00037775, 'samples': 290304, 'steps': 1511, 'loss/train': 4.554342269897461} +01/27/2022 20:40:17 - INFO - codeparrot_training - Step 1512: {'lr': 0.000378, 'samples': 290496, 'steps': 1512, 'loss/train': 5.183329939842224} +01/27/2022 20:40:21 - INFO - codeparrot_training - Step 1513: {'lr': 0.00037825, 'samples': 290688, 'steps': 1513, 'loss/train': 6.6806960105896} +01/27/2022 20:40:25 - INFO - codeparrot_training - Step 1514: {'lr': 0.0003785, 'samples': 290880, 'steps': 1514, 'loss/train': 5.068376183509827} +01/27/2022 20:40:31 - INFO - codeparrot_training - Step 1515: {'lr': 0.00037874999999999996, 'samples': 291072, 'steps': 1515, 'loss/train': 6.151756525039673} +01/27/2022 20:40:35 - INFO - codeparrot_training - Step 1516: {'lr': 0.000379, 'samples': 291264, 'steps': 1516, 'loss/train': 4.8808043003082275} +01/27/2022 20:40:39 - INFO - codeparrot_training - Step 1517: {'lr': 0.00037925, 'samples': 291456, 'steps': 1517, 'loss/train': 6.073041200637817} +01/27/2022 20:40:43 - INFO - codeparrot_training - Step 1518: {'lr': 0.0003795, 'samples': 291648, 'steps': 1518, 'loss/train': 7.414225101470947} +01/27/2022 20:40:47 - INFO - codeparrot_training - Step 1519: {'lr': 0.00037975, 'samples': 291840, 'steps': 1519, 'loss/train': 4.969941258430481} +01/27/2022 20:40:52 - INFO - codeparrot_training - Step 1520: {'lr': 0.00038, 'samples': 292032, 'steps': 1520, 'loss/train': 6.691567897796631} +01/27/2022 20:40:56 - INFO - codeparrot_training - Step 1521: {'lr': 0.00038025, 'samples': 292224, 'steps': 1521, 'loss/train': 5.593958258628845} +01/27/2022 20:41:01 - INFO - codeparrot_training - Step 1522: {'lr': 0.00038050000000000003, 'samples': 292416, 'steps': 1522, 'loss/train': 5.019861459732056} +01/27/2022 20:41:05 - INFO - codeparrot_training - Step 1523: {'lr': 0.00038075, 'samples': 292608, 'steps': 1523, 'loss/train': 5.610503911972046} +01/27/2022 20:41:09 - INFO - codeparrot_training - Step 1524: {'lr': 0.000381, 'samples': 292800, 'steps': 1524, 'loss/train': 5.230224251747131} +01/27/2022 20:41:14 - INFO - codeparrot_training - Step 1525: {'lr': 0.00038124999999999997, 'samples': 292992, 'steps': 1525, 'loss/train': 5.2162288427352905} +01/27/2022 20:41:18 - INFO - codeparrot_training - Step 1526: {'lr': 0.0003815, 'samples': 293184, 'steps': 1526, 'loss/train': 5.494348168373108} +01/27/2022 20:41:22 - INFO - codeparrot_training - Step 1527: {'lr': 0.00038175, 'samples': 293376, 'steps': 1527, 'loss/train': 5.9144861698150635} +01/27/2022 20:41:26 - INFO - codeparrot_training - Step 1528: {'lr': 0.000382, 'samples': 293568, 'steps': 1528, 'loss/train': 5.5439969301223755} +01/27/2022 20:41:30 - INFO - codeparrot_training - Step 1529: {'lr': 0.00038225, 'samples': 293760, 'steps': 1529, 'loss/train': 3.6012353897094727} +01/27/2022 20:41:37 - INFO - codeparrot_training - Step 1530: {'lr': 0.00038250000000000003, 'samples': 293952, 'steps': 1530, 'loss/train': 4.143694639205933} +01/27/2022 20:41:41 - INFO - codeparrot_training - Step 1531: {'lr': 0.00038275, 'samples': 294144, 'steps': 1531, 'loss/train': 4.783222675323486} +01/27/2022 20:41:46 - INFO - codeparrot_training - Step 1532: {'lr': 0.00038300000000000004, 'samples': 294336, 'steps': 1532, 'loss/train': 5.426028370857239} +01/27/2022 20:41:50 - INFO - codeparrot_training - Step 1533: {'lr': 0.00038324999999999996, 'samples': 294528, 'steps': 1533, 'loss/train': 4.787345051765442} +01/27/2022 20:41:54 - INFO - codeparrot_training - Step 1534: {'lr': 0.0003835, 'samples': 294720, 'steps': 1534, 'loss/train': 4.922302007675171} +01/27/2022 20:41:59 - INFO - codeparrot_training - Step 1535: {'lr': 0.00038375, 'samples': 294912, 'steps': 1535, 'loss/train': 3.7891005277633667} +01/27/2022 20:42:03 - INFO - codeparrot_training - Step 1536: {'lr': 0.000384, 'samples': 295104, 'steps': 1536, 'loss/train': 6.030781030654907} +01/27/2022 20:42:08 - INFO - codeparrot_training - Step 1537: {'lr': 0.00038425, 'samples': 295296, 'steps': 1537, 'loss/train': 4.749072074890137} +01/27/2022 20:42:12 - INFO - codeparrot_training - Step 1538: {'lr': 0.0003845, 'samples': 295488, 'steps': 1538, 'loss/train': 4.922683954238892} +01/27/2022 20:42:16 - INFO - codeparrot_training - Step 1539: {'lr': 0.00038475, 'samples': 295680, 'steps': 1539, 'loss/train': 3.9118831157684326} +01/27/2022 20:42:22 - INFO - codeparrot_training - Step 1540: {'lr': 0.00038500000000000003, 'samples': 295872, 'steps': 1540, 'loss/train': 7.51905369758606} +01/27/2022 20:42:26 - INFO - codeparrot_training - Step 1541: {'lr': 0.00038525, 'samples': 296064, 'steps': 1541, 'loss/train': 8.751177549362183} +01/27/2022 20:42:30 - INFO - codeparrot_training - Step 1542: {'lr': 0.0003855, 'samples': 296256, 'steps': 1542, 'loss/train': 4.647533655166626} +01/27/2022 20:42:34 - INFO - codeparrot_training - Step 1543: {'lr': 0.00038574999999999997, 'samples': 296448, 'steps': 1543, 'loss/train': 5.61114764213562} +01/27/2022 20:42:38 - INFO - codeparrot_training - Step 1544: {'lr': 0.000386, 'samples': 296640, 'steps': 1544, 'loss/train': 5.708655953407288} +01/27/2022 20:42:43 - INFO - codeparrot_training - Step 1545: {'lr': 0.00038625, 'samples': 296832, 'steps': 1545, 'loss/train': 5.464983701705933} +01/27/2022 20:42:48 - INFO - codeparrot_training - Step 1546: {'lr': 0.0003865, 'samples': 297024, 'steps': 1546, 'loss/train': 5.688818335533142} +01/27/2022 20:42:52 - INFO - codeparrot_training - Step 1547: {'lr': 0.00038675, 'samples': 297216, 'steps': 1547, 'loss/train': 5.926201701164246} +01/27/2022 20:42:56 - INFO - codeparrot_training - Step 1548: {'lr': 0.00038700000000000003, 'samples': 297408, 'steps': 1548, 'loss/train': 4.735667824745178} +01/27/2022 20:43:00 - INFO - codeparrot_training - Step 1549: {'lr': 0.00038725, 'samples': 297600, 'steps': 1549, 'loss/train': 4.420346260070801} +01/27/2022 20:43:05 - INFO - codeparrot_training - Step 1550: {'lr': 0.00038750000000000004, 'samples': 297792, 'steps': 1550, 'loss/train': 4.591160774230957} +01/27/2022 20:43:09 - INFO - codeparrot_training - Step 1551: {'lr': 0.00038774999999999997, 'samples': 297984, 'steps': 1551, 'loss/train': 4.2985639572143555} +01/27/2022 20:43:13 - INFO - codeparrot_training - Step 1552: {'lr': 0.000388, 'samples': 298176, 'steps': 1552, 'loss/train': 4.6727306842803955} +01/27/2022 20:43:18 - INFO - codeparrot_training - Step 1553: {'lr': 0.00038825, 'samples': 298368, 'steps': 1553, 'loss/train': 3.6735552549362183} +01/27/2022 20:43:22 - INFO - codeparrot_training - Step 1554: {'lr': 0.0003885, 'samples': 298560, 'steps': 1554, 'loss/train': 5.152446985244751} +01/27/2022 20:43:28 - INFO - codeparrot_training - Step 1555: {'lr': 0.00038875, 'samples': 298752, 'steps': 1555, 'loss/train': 5.147357225418091} +01/27/2022 20:43:32 - INFO - codeparrot_training - Step 1556: {'lr': 0.000389, 'samples': 298944, 'steps': 1556, 'loss/train': 4.523068070411682} +01/27/2022 20:43:36 - INFO - codeparrot_training - Step 1557: {'lr': 0.00038925, 'samples': 299136, 'steps': 1557, 'loss/train': 4.796229600906372} +01/27/2022 20:43:41 - INFO - codeparrot_training - Step 1558: {'lr': 0.00038950000000000003, 'samples': 299328, 'steps': 1558, 'loss/train': 4.981274843215942} +01/27/2022 20:43:46 - INFO - codeparrot_training - Step 1559: {'lr': 0.00038975, 'samples': 299520, 'steps': 1559, 'loss/train': 5.344164133071899} +01/27/2022 20:43:50 - INFO - codeparrot_training - Step 1560: {'lr': 0.00039000000000000005, 'samples': 299712, 'steps': 1560, 'loss/train': 3.658048152923584} +01/27/2022 20:43:54 - INFO - codeparrot_training - Step 1561: {'lr': 0.00039024999999999997, 'samples': 299904, 'steps': 1561, 'loss/train': 6.048384189605713} +01/27/2022 20:43:58 - INFO - codeparrot_training - Step 1562: {'lr': 0.0003905, 'samples': 300096, 'steps': 1562, 'loss/train': 4.9259244203567505} +01/27/2022 20:44:02 - INFO - codeparrot_training - Step 1563: {'lr': 0.00039075, 'samples': 300288, 'steps': 1563, 'loss/train': 4.429136037826538} +01/27/2022 20:44:08 - INFO - codeparrot_training - Step 1564: {'lr': 0.000391, 'samples': 300480, 'steps': 1564, 'loss/train': 4.8463979959487915} +01/27/2022 20:44:12 - INFO - codeparrot_training - Step 1565: {'lr': 0.00039125, 'samples': 300672, 'steps': 1565, 'loss/train': 4.949241399765015} +01/27/2022 20:44:16 - INFO - codeparrot_training - Step 1566: {'lr': 0.00039150000000000003, 'samples': 300864, 'steps': 1566, 'loss/train': 4.108010530471802} +01/27/2022 20:44:20 - INFO - codeparrot_training - Step 1567: {'lr': 0.00039175, 'samples': 301056, 'steps': 1567, 'loss/train': 4.8989574909210205} +01/27/2022 20:44:25 - INFO - codeparrot_training - Step 1568: {'lr': 0.00039200000000000004, 'samples': 301248, 'steps': 1568, 'loss/train': 4.339829206466675} +01/27/2022 20:44:30 - INFO - codeparrot_training - Step 1569: {'lr': 0.00039225, 'samples': 301440, 'steps': 1569, 'loss/train': 4.469141006469727} +01/27/2022 20:44:34 - INFO - codeparrot_training - Step 1570: {'lr': 0.0003925, 'samples': 301632, 'steps': 1570, 'loss/train': 3.9126977920532227} +01/27/2022 20:44:38 - INFO - codeparrot_training - Step 1571: {'lr': 0.00039275, 'samples': 301824, 'steps': 1571, 'loss/train': 5.024573922157288} +01/27/2022 20:44:42 - INFO - codeparrot_training - Step 1572: {'lr': 0.000393, 'samples': 302016, 'steps': 1572, 'loss/train': 4.451012134552002} +01/27/2022 20:44:47 - INFO - codeparrot_training - Step 1573: {'lr': 0.00039325, 'samples': 302208, 'steps': 1573, 'loss/train': 4.521331787109375} +01/27/2022 20:44:52 - INFO - codeparrot_training - Step 1574: {'lr': 0.0003935, 'samples': 302400, 'steps': 1574, 'loss/train': 4.5153197050094604} +01/27/2022 20:44:56 - INFO - codeparrot_training - Step 1575: {'lr': 0.00039375, 'samples': 302592, 'steps': 1575, 'loss/train': 4.996673941612244} +01/27/2022 20:45:00 - INFO - codeparrot_training - Step 1576: {'lr': 0.00039400000000000004, 'samples': 302784, 'steps': 1576, 'loss/train': 5.757978558540344} +01/27/2022 20:45:04 - INFO - codeparrot_training - Step 1577: {'lr': 0.00039425, 'samples': 302976, 'steps': 1577, 'loss/train': 5.083407282829285} +01/27/2022 20:45:08 - INFO - codeparrot_training - Step 1578: {'lr': 0.00039450000000000005, 'samples': 303168, 'steps': 1578, 'loss/train': 5.262720823287964} +01/27/2022 20:45:13 - INFO - codeparrot_training - Step 1579: {'lr': 0.00039474999999999997, 'samples': 303360, 'steps': 1579, 'loss/train': 5.140163898468018} +01/27/2022 20:45:18 - INFO - codeparrot_training - Step 1580: {'lr': 0.000395, 'samples': 303552, 'steps': 1580, 'loss/train': 5.56126070022583} +01/27/2022 20:45:22 - INFO - codeparrot_training - Step 1581: {'lr': 0.00039525, 'samples': 303744, 'steps': 1581, 'loss/train': 4.782194137573242} +01/27/2022 20:45:26 - INFO - codeparrot_training - Step 1582: {'lr': 0.0003955, 'samples': 303936, 'steps': 1582, 'loss/train': 4.439503312110901} +01/27/2022 20:45:30 - INFO - codeparrot_training - Step 1583: {'lr': 0.00039575, 'samples': 304128, 'steps': 1583, 'loss/train': 3.5207608938217163} +01/27/2022 20:45:36 - INFO - codeparrot_training - Step 1584: {'lr': 0.00039600000000000003, 'samples': 304320, 'steps': 1584, 'loss/train': 5.967398643493652} +01/27/2022 20:45:40 - INFO - codeparrot_training - Step 1585: {'lr': 0.00039625, 'samples': 304512, 'steps': 1585, 'loss/train': 5.810554504394531} +01/27/2022 20:45:45 - INFO - codeparrot_training - Step 1586: {'lr': 0.00039650000000000004, 'samples': 304704, 'steps': 1586, 'loss/train': 2.8902506232261658} +01/27/2022 20:45:49 - INFO - codeparrot_training - Step 1587: {'lr': 0.00039675, 'samples': 304896, 'steps': 1587, 'loss/train': 4.792583584785461} +01/27/2022 20:45:53 - INFO - codeparrot_training - Step 1588: {'lr': 0.00039700000000000005, 'samples': 305088, 'steps': 1588, 'loss/train': 4.8030846118927} +01/27/2022 20:45:58 - INFO - codeparrot_training - Step 1589: {'lr': 0.00039725, 'samples': 305280, 'steps': 1589, 'loss/train': 4.019704699516296} +01/27/2022 20:46:02 - INFO - codeparrot_training - Step 1590: {'lr': 0.0003975, 'samples': 305472, 'steps': 1590, 'loss/train': 4.39790153503418} +01/27/2022 20:46:06 - INFO - codeparrot_training - Step 1591: {'lr': 0.00039775, 'samples': 305664, 'steps': 1591, 'loss/train': 4.640955805778503} +01/27/2022 20:46:10 - INFO - codeparrot_training - Step 1592: {'lr': 0.000398, 'samples': 305856, 'steps': 1592, 'loss/train': 4.73585844039917} +01/27/2022 20:46:15 - INFO - codeparrot_training - Step 1593: {'lr': 0.00039825, 'samples': 306048, 'steps': 1593, 'loss/train': 4.972269058227539} +01/27/2022 20:46:20 - INFO - codeparrot_training - Step 1594: {'lr': 0.00039850000000000004, 'samples': 306240, 'steps': 1594, 'loss/train': 4.492441534996033} +01/27/2022 20:46:24 - INFO - codeparrot_training - Step 1595: {'lr': 0.00039875, 'samples': 306432, 'steps': 1595, 'loss/train': 3.3818349838256836} +01/27/2022 20:46:28 - INFO - codeparrot_training - Step 1596: {'lr': 0.00039900000000000005, 'samples': 306624, 'steps': 1596, 'loss/train': 5.988375306129456} +01/27/2022 20:46:32 - INFO - codeparrot_training - Step 1597: {'lr': 0.00039925000000000003, 'samples': 306816, 'steps': 1597, 'loss/train': 4.95640754699707} +01/27/2022 20:46:36 - INFO - codeparrot_training - Step 1598: {'lr': 0.0003995, 'samples': 307008, 'steps': 1598, 'loss/train': 4.50589907169342} +01/27/2022 20:46:42 - INFO - codeparrot_training - Step 1599: {'lr': 0.00039975, 'samples': 307200, 'steps': 1599, 'loss/train': 1.7140374183654785} +01/27/2022 20:46:46 - INFO - codeparrot_training - Step 1600: {'lr': 0.0004, 'samples': 307392, 'steps': 1600, 'loss/train': 4.591402530670166} +01/27/2022 20:46:51 - INFO - codeparrot_training - Step 1601: {'lr': 0.00040025, 'samples': 307584, 'steps': 1601, 'loss/train': 4.896166205406189} +01/27/2022 20:46:55 - INFO - codeparrot_training - Step 1602: {'lr': 0.00040050000000000003, 'samples': 307776, 'steps': 1602, 'loss/train': 3.837166428565979} +01/27/2022 20:46:59 - INFO - codeparrot_training - Step 1603: {'lr': 0.00040075, 'samples': 307968, 'steps': 1603, 'loss/train': 5.559482574462891} +01/27/2022 20:47:04 - INFO - codeparrot_training - Step 1604: {'lr': 0.00040100000000000004, 'samples': 308160, 'steps': 1604, 'loss/train': 3.984802007675171} +01/27/2022 20:47:08 - INFO - codeparrot_training - Step 1605: {'lr': 0.00040125, 'samples': 308352, 'steps': 1605, 'loss/train': 3.6002179384231567} +01/27/2022 20:47:12 - INFO - codeparrot_training - Step 1606: {'lr': 0.00040150000000000006, 'samples': 308544, 'steps': 1606, 'loss/train': 5.544490456581116} +01/27/2022 20:47:17 - INFO - codeparrot_training - Step 1607: {'lr': 0.00040175, 'samples': 308736, 'steps': 1607, 'loss/train': 5.94091272354126} +01/27/2022 20:47:21 - INFO - codeparrot_training - Step 1608: {'lr': 0.000402, 'samples': 308928, 'steps': 1608, 'loss/train': 6.148103713989258} +01/27/2022 20:47:27 - INFO - codeparrot_training - Step 1609: {'lr': 0.00040225, 'samples': 309120, 'steps': 1609, 'loss/train': 5.291676163673401} +01/27/2022 20:47:31 - INFO - codeparrot_training - Step 1610: {'lr': 0.0004025, 'samples': 309312, 'steps': 1610, 'loss/train': 4.492552042007446} +01/27/2022 20:47:35 - INFO - codeparrot_training - Step 1611: {'lr': 0.00040275, 'samples': 309504, 'steps': 1611, 'loss/train': 4.539552569389343} +01/27/2022 20:47:39 - INFO - codeparrot_training - Step 1612: {'lr': 0.00040300000000000004, 'samples': 309696, 'steps': 1612, 'loss/train': 5.3907119035720825} +01/27/2022 20:47:43 - INFO - codeparrot_training - Step 1613: {'lr': 0.00040325, 'samples': 309888, 'steps': 1613, 'loss/train': 4.658654093742371} +01/27/2022 20:47:48 - INFO - codeparrot_training - Step 1614: {'lr': 0.00040350000000000005, 'samples': 310080, 'steps': 1614, 'loss/train': 3.8750767707824707} +01/27/2022 20:47:53 - INFO - codeparrot_training - Step 1615: {'lr': 0.00040375000000000003, 'samples': 310272, 'steps': 1615, 'loss/train': 4.661151051521301} +01/27/2022 20:47:57 - INFO - codeparrot_training - Step 1616: {'lr': 0.000404, 'samples': 310464, 'steps': 1616, 'loss/train': 4.621346712112427} +01/27/2022 20:48:01 - INFO - codeparrot_training - Step 1617: {'lr': 0.00040425, 'samples': 310656, 'steps': 1617, 'loss/train': 6.432635307312012} +01/27/2022 20:48:05 - INFO - codeparrot_training - Step 1618: {'lr': 0.0004045, 'samples': 310848, 'steps': 1618, 'loss/train': 5.453046798706055} +01/27/2022 20:48:10 - INFO - codeparrot_training - Step 1619: {'lr': 0.00040475, 'samples': 311040, 'steps': 1619, 'loss/train': 4.535217046737671} +01/27/2022 20:48:14 - INFO - codeparrot_training - Step 1620: {'lr': 0.00040500000000000003, 'samples': 311232, 'steps': 1620, 'loss/train': 5.5862696170806885} +01/27/2022 20:48:19 - INFO - codeparrot_training - Step 1621: {'lr': 0.00040525, 'samples': 311424, 'steps': 1621, 'loss/train': 4.422206282615662} +01/27/2022 20:48:23 - INFO - codeparrot_training - Step 1622: {'lr': 0.00040550000000000004, 'samples': 311616, 'steps': 1622, 'loss/train': 4.436450958251953} +01/27/2022 20:48:27 - INFO - codeparrot_training - Step 1623: {'lr': 0.00040575, 'samples': 311808, 'steps': 1623, 'loss/train': 4.70411217212677} +01/27/2022 20:48:33 - INFO - codeparrot_training - Step 1624: {'lr': 0.00040600000000000006, 'samples': 312000, 'steps': 1624, 'loss/train': 6.878641605377197} +01/27/2022 20:48:37 - INFO - codeparrot_training - Step 1625: {'lr': 0.00040625000000000004, 'samples': 312192, 'steps': 1625, 'loss/train': 5.109841346740723} +01/27/2022 20:48:41 - INFO - codeparrot_training - Step 1626: {'lr': 0.00040649999999999996, 'samples': 312384, 'steps': 1626, 'loss/train': 3.6674916744232178} +01/27/2022 20:48:45 - INFO - codeparrot_training - Step 1627: {'lr': 0.00040675, 'samples': 312576, 'steps': 1627, 'loss/train': 5.621813178062439} +01/27/2022 20:48:49 - INFO - codeparrot_training - Step 1628: {'lr': 0.00040699999999999997, 'samples': 312768, 'steps': 1628, 'loss/train': 4.177264451980591} +01/27/2022 20:48:55 - INFO - codeparrot_training - Step 1629: {'lr': 0.00040725, 'samples': 312960, 'steps': 1629, 'loss/train': 4.30924665927887} +01/27/2022 20:48:59 - INFO - codeparrot_training - Step 1630: {'lr': 0.0004075, 'samples': 313152, 'steps': 1630, 'loss/train': 4.878913521766663} +01/27/2022 20:49:03 - INFO - codeparrot_training - Step 1631: {'lr': 0.00040775, 'samples': 313344, 'steps': 1631, 'loss/train': 4.684783101081848} +01/27/2022 20:49:07 - INFO - codeparrot_training - Step 1632: {'lr': 0.000408, 'samples': 313536, 'steps': 1632, 'loss/train': 4.662874817848206} +01/27/2022 20:49:11 - INFO - codeparrot_training - Step 1633: {'lr': 0.00040825000000000003, 'samples': 313728, 'steps': 1633, 'loss/train': 4.981182932853699} +01/27/2022 20:49:17 - INFO - codeparrot_training - Step 1634: {'lr': 0.0004085, 'samples': 313920, 'steps': 1634, 'loss/train': 4.151762008666992} +01/27/2022 20:49:21 - INFO - codeparrot_training - Step 1635: {'lr': 0.00040875, 'samples': 314112, 'steps': 1635, 'loss/train': 5.552863240242004} +01/27/2022 20:49:25 - INFO - codeparrot_training - Step 1636: {'lr': 0.00040899999999999997, 'samples': 314304, 'steps': 1636, 'loss/train': 5.81500518321991} +01/27/2022 20:49:29 - INFO - codeparrot_training - Step 1637: {'lr': 0.00040925, 'samples': 314496, 'steps': 1637, 'loss/train': 5.945162773132324} +01/27/2022 20:49:33 - INFO - codeparrot_training - Step 1638: {'lr': 0.0004095, 'samples': 314688, 'steps': 1638, 'loss/train': 5.293833374977112} +01/27/2022 20:49:39 - INFO - codeparrot_training - Step 1639: {'lr': 0.00040975, 'samples': 314880, 'steps': 1639, 'loss/train': 5.059976935386658} +01/27/2022 20:49:43 - INFO - codeparrot_training - Step 1640: {'lr': 0.00041, 'samples': 315072, 'steps': 1640, 'loss/train': 5.369177341461182} +01/27/2022 20:49:47 - INFO - codeparrot_training - Step 1641: {'lr': 0.00041025, 'samples': 315264, 'steps': 1641, 'loss/train': 5.4606417417526245} +01/27/2022 20:49:51 - INFO - codeparrot_training - Step 1642: {'lr': 0.0004105, 'samples': 315456, 'steps': 1642, 'loss/train': 4.924026846885681} +01/27/2022 20:49:58 - INFO - codeparrot_training - Step 1643: {'lr': 0.00041075000000000004, 'samples': 315648, 'steps': 1643, 'loss/train': 4.861361145973206} +01/27/2022 20:50:02 - INFO - codeparrot_training - Step 1644: {'lr': 0.00041099999999999996, 'samples': 315840, 'steps': 1644, 'loss/train': 4.4336124658584595} +01/27/2022 20:50:06 - INFO - codeparrot_training - Step 1645: {'lr': 0.00041125, 'samples': 316032, 'steps': 1645, 'loss/train': 6.497914552688599} +01/27/2022 20:50:10 - INFO - codeparrot_training - Step 1646: {'lr': 0.0004115, 'samples': 316224, 'steps': 1646, 'loss/train': 4.900483846664429} +01/27/2022 20:50:14 - INFO - codeparrot_training - Step 1647: {'lr': 0.00041175, 'samples': 316416, 'steps': 1647, 'loss/train': 5.458470582962036} +01/27/2022 20:50:18 - INFO - codeparrot_training - Step 1648: {'lr': 0.000412, 'samples': 316608, 'steps': 1648, 'loss/train': 8.15811538696289} +01/27/2022 20:50:24 - INFO - codeparrot_training - Step 1649: {'lr': 0.00041225, 'samples': 316800, 'steps': 1649, 'loss/train': 5.343349456787109} +01/27/2022 20:50:28 - INFO - codeparrot_training - Step 1650: {'lr': 0.0004125, 'samples': 316992, 'steps': 1650, 'loss/train': 4.865218877792358} +01/27/2022 20:50:32 - INFO - codeparrot_training - Step 1651: {'lr': 0.00041275000000000003, 'samples': 317184, 'steps': 1651, 'loss/train': 4.42691445350647} +01/27/2022 20:50:36 - INFO - codeparrot_training - Step 1652: {'lr': 0.000413, 'samples': 317376, 'steps': 1652, 'loss/train': 5.561220645904541} +01/27/2022 20:50:40 - INFO - codeparrot_training - Step 1653: {'lr': 0.00041325, 'samples': 317568, 'steps': 1653, 'loss/train': 3.0752928256988525} +01/27/2022 20:50:46 - INFO - codeparrot_training - Step 1654: {'lr': 0.00041349999999999997, 'samples': 317760, 'steps': 1654, 'loss/train': 4.584151983261108} +01/27/2022 20:50:50 - INFO - codeparrot_training - Step 1655: {'lr': 0.00041375, 'samples': 317952, 'steps': 1655, 'loss/train': 2.5737547874450684} +01/27/2022 20:50:55 - INFO - codeparrot_training - Step 1656: {'lr': 0.000414, 'samples': 318144, 'steps': 1656, 'loss/train': 4.079554796218872} +01/27/2022 20:50:59 - INFO - codeparrot_training - Step 1657: {'lr': 0.00041425, 'samples': 318336, 'steps': 1657, 'loss/train': 3.551210403442383} +01/27/2022 20:51:03 - INFO - codeparrot_training - Step 1658: {'lr': 0.0004145, 'samples': 318528, 'steps': 1658, 'loss/train': 5.817299008369446} +01/27/2022 20:51:09 - INFO - codeparrot_training - Step 1659: {'lr': 0.00041475, 'samples': 318720, 'steps': 1659, 'loss/train': 5.47824239730835} +01/27/2022 20:51:13 - INFO - codeparrot_training - Step 1660: {'lr': 0.000415, 'samples': 318912, 'steps': 1660, 'loss/train': 4.1034064292907715} +01/27/2022 20:51:18 - INFO - codeparrot_training - Step 1661: {'lr': 0.00041525000000000004, 'samples': 319104, 'steps': 1661, 'loss/train': 4.750672459602356} +01/27/2022 20:51:22 - INFO - codeparrot_training - Step 1662: {'lr': 0.00041549999999999996, 'samples': 319296, 'steps': 1662, 'loss/train': 4.089618802070618} +01/27/2022 20:51:26 - INFO - codeparrot_training - Step 1663: {'lr': 0.00041575, 'samples': 319488, 'steps': 1663, 'loss/train': 4.134867310523987} +01/27/2022 20:51:30 - INFO - codeparrot_training - Step 1664: {'lr': 0.000416, 'samples': 319680, 'steps': 1664, 'loss/train': 4.357253193855286} +01/27/2022 20:51:35 - INFO - codeparrot_training - Step 1665: {'lr': 0.00041625, 'samples': 319872, 'steps': 1665, 'loss/train': 5.185676693916321} +01/27/2022 20:51:39 - INFO - codeparrot_training - Step 1666: {'lr': 0.0004165, 'samples': 320064, 'steps': 1666, 'loss/train': 4.968478202819824} +01/27/2022 20:51:44 - INFO - codeparrot_training - Step 1667: {'lr': 0.00041675, 'samples': 320256, 'steps': 1667, 'loss/train': 10.678154468536377} +01/27/2022 20:51:48 - INFO - codeparrot_training - Step 1668: {'lr': 0.000417, 'samples': 320448, 'steps': 1668, 'loss/train': 6.057011604309082} +01/27/2022 20:51:52 - INFO - codeparrot_training - Step 1669: {'lr': 0.00041725000000000003, 'samples': 320640, 'steps': 1669, 'loss/train': 4.69346022605896} +01/27/2022 20:51:58 - INFO - codeparrot_training - Step 1670: {'lr': 0.0004175, 'samples': 320832, 'steps': 1670, 'loss/train': 5.976093292236328} +01/27/2022 20:52:02 - INFO - codeparrot_training - Step 1671: {'lr': 0.00041775000000000004, 'samples': 321024, 'steps': 1671, 'loss/train': 5.555848717689514} +01/27/2022 20:52:07 - INFO - codeparrot_training - Step 1672: {'lr': 0.00041799999999999997, 'samples': 321216, 'steps': 1672, 'loss/train': 5.629608750343323} +01/27/2022 20:52:11 - INFO - codeparrot_training - Step 1673: {'lr': 0.00041825, 'samples': 321408, 'steps': 1673, 'loss/train': 4.571760535240173} +01/27/2022 20:52:15 - INFO - codeparrot_training - Step 1674: {'lr': 0.0004185, 'samples': 321600, 'steps': 1674, 'loss/train': 3.648239493370056} +01/27/2022 20:52:20 - INFO - codeparrot_training - Step 1675: {'lr': 0.00041875, 'samples': 321792, 'steps': 1675, 'loss/train': 6.776950836181641} +01/27/2022 20:52:24 - INFO - codeparrot_training - Step 1676: {'lr': 0.000419, 'samples': 321984, 'steps': 1676, 'loss/train': 5.541209936141968} +01/27/2022 20:52:29 - INFO - codeparrot_training - Step 1677: {'lr': 0.00041925, 'samples': 322176, 'steps': 1677, 'loss/train': 4.438369274139404} +01/27/2022 20:52:33 - INFO - codeparrot_training - Step 1678: {'lr': 0.0004195, 'samples': 322368, 'steps': 1678, 'loss/train': 5.130235075950623} +01/27/2022 20:52:37 - INFO - codeparrot_training - Step 1679: {'lr': 0.00041975000000000004, 'samples': 322560, 'steps': 1679, 'loss/train': 4.956532716751099} +01/27/2022 20:52:42 - INFO - codeparrot_training - Step 1680: {'lr': 0.00042, 'samples': 322752, 'steps': 1680, 'loss/train': 7.478966474533081} +01/27/2022 20:52:46 - INFO - codeparrot_training - Step 1681: {'lr': 0.00042025, 'samples': 322944, 'steps': 1681, 'loss/train': 5.931625127792358} +01/27/2022 20:52:51 - INFO - codeparrot_training - Step 1682: {'lr': 0.0004205, 'samples': 323136, 'steps': 1682, 'loss/train': 4.653693079948425} +01/27/2022 20:52:55 - INFO - codeparrot_training - Step 1683: {'lr': 0.00042075, 'samples': 323328, 'steps': 1683, 'loss/train': 5.044461250305176} +01/27/2022 20:52:59 - INFO - codeparrot_training - Step 1684: {'lr': 0.000421, 'samples': 323520, 'steps': 1684, 'loss/train': 3.442354917526245} +01/27/2022 20:53:06 - INFO - codeparrot_training - Step 1685: {'lr': 0.00042125, 'samples': 323712, 'steps': 1685, 'loss/train': 4.211433291435242} +01/27/2022 20:53:10 - INFO - codeparrot_training - Step 1686: {'lr': 0.0004215, 'samples': 323904, 'steps': 1686, 'loss/train': 5.015124678611755} +01/27/2022 20:53:14 - INFO - codeparrot_training - Step 1687: {'lr': 0.00042175000000000003, 'samples': 324096, 'steps': 1687, 'loss/train': 5.645220637321472} +01/27/2022 20:53:18 - INFO - codeparrot_training - Step 1688: {'lr': 0.000422, 'samples': 324288, 'steps': 1688, 'loss/train': 5.594154596328735} +01/27/2022 20:53:23 - INFO - codeparrot_training - Step 1689: {'lr': 0.00042225000000000005, 'samples': 324480, 'steps': 1689, 'loss/train': 3.2915682792663574} +01/27/2022 20:53:28 - INFO - codeparrot_training - Step 1690: {'lr': 0.00042249999999999997, 'samples': 324672, 'steps': 1690, 'loss/train': 3.065605401992798} +01/27/2022 20:53:32 - INFO - codeparrot_training - Step 1691: {'lr': 0.00042275, 'samples': 324864, 'steps': 1691, 'loss/train': 4.275627493858337} +01/27/2022 20:53:36 - INFO - codeparrot_training - Step 1692: {'lr': 0.000423, 'samples': 325056, 'steps': 1692, 'loss/train': 5.2252349853515625} +01/27/2022 20:53:40 - INFO - codeparrot_training - Step 1693: {'lr': 0.00042325, 'samples': 325248, 'steps': 1693, 'loss/train': 4.2724692821502686} +01/27/2022 20:53:44 - INFO - codeparrot_training - Step 1694: {'lr': 0.0004235, 'samples': 325440, 'steps': 1694, 'loss/train': 5.012627720832825} +01/27/2022 20:53:49 - INFO - codeparrot_training - Step 1695: {'lr': 0.00042375000000000003, 'samples': 325632, 'steps': 1695, 'loss/train': 4.095845460891724} +01/27/2022 20:53:54 - INFO - codeparrot_training - Step 1696: {'lr': 0.000424, 'samples': 325824, 'steps': 1696, 'loss/train': 4.753535270690918} +01/27/2022 20:53:58 - INFO - codeparrot_training - Step 1697: {'lr': 0.00042425000000000004, 'samples': 326016, 'steps': 1697, 'loss/train': 4.817803144454956} +01/27/2022 20:54:02 - INFO - codeparrot_training - Step 1698: {'lr': 0.0004245, 'samples': 326208, 'steps': 1698, 'loss/train': 4.125654816627502} +01/27/2022 20:54:06 - INFO - codeparrot_training - Step 1699: {'lr': 0.00042475000000000005, 'samples': 326400, 'steps': 1699, 'loss/train': 4.899822950363159} +01/27/2022 20:54:12 - INFO - codeparrot_training - Step 1700: {'lr': 0.000425, 'samples': 326592, 'steps': 1700, 'loss/train': 4.947185754776001} +01/27/2022 20:54:16 - INFO - codeparrot_training - Step 1701: {'lr': 0.00042525, 'samples': 326784, 'steps': 1701, 'loss/train': 6.078960657119751} +01/27/2022 20:54:20 - INFO - codeparrot_training - Step 1702: {'lr': 0.0004255, 'samples': 326976, 'steps': 1702, 'loss/train': 5.877833604812622} +01/27/2022 20:54:25 - INFO - codeparrot_training - Step 1703: {'lr': 0.00042575, 'samples': 327168, 'steps': 1703, 'loss/train': 4.6492838859558105} +01/27/2022 20:54:29 - INFO - codeparrot_training - Step 1704: {'lr': 0.000426, 'samples': 327360, 'steps': 1704, 'loss/train': 5.065916419029236} +01/27/2022 20:54:34 - INFO - codeparrot_training - Step 1705: {'lr': 0.00042625000000000003, 'samples': 327552, 'steps': 1705, 'loss/train': 5.7648056745529175} +01/27/2022 20:54:38 - INFO - codeparrot_training - Step 1706: {'lr': 0.0004265, 'samples': 327744, 'steps': 1706, 'loss/train': 5.916955947875977} +01/27/2022 20:54:42 - INFO - codeparrot_training - Step 1707: {'lr': 0.00042675000000000005, 'samples': 327936, 'steps': 1707, 'loss/train': 4.648202061653137} +01/27/2022 20:54:46 - INFO - codeparrot_training - Step 1708: {'lr': 0.000427, 'samples': 328128, 'steps': 1708, 'loss/train': 4.2541598081588745} +01/27/2022 20:54:51 - INFO - codeparrot_training - Step 1709: {'lr': 0.00042725, 'samples': 328320, 'steps': 1709, 'loss/train': 4.176839590072632} +01/27/2022 20:54:56 - INFO - codeparrot_training - Step 1710: {'lr': 0.0004275, 'samples': 328512, 'steps': 1710, 'loss/train': 5.986522436141968} +01/27/2022 20:55:00 - INFO - codeparrot_training - Step 1711: {'lr': 0.00042775, 'samples': 328704, 'steps': 1711, 'loss/train': 3.9285460710525513} +01/27/2022 20:55:04 - INFO - codeparrot_training - Step 1712: {'lr': 0.000428, 'samples': 328896, 'steps': 1712, 'loss/train': 5.205624103546143} +01/27/2022 20:55:08 - INFO - codeparrot_training - Step 1713: {'lr': 0.00042825000000000003, 'samples': 329088, 'steps': 1713, 'loss/train': 5.124661445617676} +01/27/2022 20:55:12 - INFO - codeparrot_training - Step 1714: {'lr': 0.0004285, 'samples': 329280, 'steps': 1714, 'loss/train': 5.29037082195282} +01/27/2022 20:55:18 - INFO - codeparrot_training - Step 1715: {'lr': 0.00042875000000000004, 'samples': 329472, 'steps': 1715, 'loss/train': 3.200813055038452} +01/27/2022 20:55:22 - INFO - codeparrot_training - Step 1716: {'lr': 0.000429, 'samples': 329664, 'steps': 1716, 'loss/train': 5.33403217792511} +01/27/2022 20:55:27 - INFO - codeparrot_training - Step 1717: {'lr': 0.00042925000000000005, 'samples': 329856, 'steps': 1717, 'loss/train': 5.40205192565918} +01/27/2022 20:55:31 - INFO - codeparrot_training - Step 1718: {'lr': 0.0004295, 'samples': 330048, 'steps': 1718, 'loss/train': 5.164235830307007} +01/27/2022 20:55:35 - INFO - codeparrot_training - Step 1719: {'lr': 0.00042975, 'samples': 330240, 'steps': 1719, 'loss/train': 5.0945563316345215} +01/27/2022 20:55:40 - INFO - codeparrot_training - Step 1720: {'lr': 0.00043, 'samples': 330432, 'steps': 1720, 'loss/train': 5.805701851844788} +01/27/2022 20:55:44 - INFO - codeparrot_training - Step 1721: {'lr': 0.00043025, 'samples': 330624, 'steps': 1721, 'loss/train': 5.174437522888184} +01/27/2022 20:55:48 - INFO - codeparrot_training - Step 1722: {'lr': 0.0004305, 'samples': 330816, 'steps': 1722, 'loss/train': 5.244656324386597} +01/27/2022 20:55:53 - INFO - codeparrot_training - Step 1723: {'lr': 0.00043075000000000003, 'samples': 331008, 'steps': 1723, 'loss/train': 5.414682984352112} +01/27/2022 20:55:57 - INFO - codeparrot_training - Step 1724: {'lr': 0.000431, 'samples': 331200, 'steps': 1724, 'loss/train': 5.539515495300293} +01/27/2022 20:56:02 - INFO - codeparrot_training - Step 1725: {'lr': 0.00043125000000000005, 'samples': 331392, 'steps': 1725, 'loss/train': 4.8776267766952515} +01/27/2022 20:56:06 - INFO - codeparrot_training - Step 1726: {'lr': 0.0004315, 'samples': 331584, 'steps': 1726, 'loss/train': 5.9577155113220215} +01/27/2022 20:56:10 - INFO - codeparrot_training - Step 1727: {'lr': 0.00043175, 'samples': 331776, 'steps': 1727, 'loss/train': 5.266177296638489} +01/27/2022 20:56:14 - INFO - codeparrot_training - Step 1728: {'lr': 0.000432, 'samples': 331968, 'steps': 1728, 'loss/train': 4.860105514526367} +01/27/2022 20:56:18 - INFO - codeparrot_training - Step 1729: {'lr': 0.00043225, 'samples': 332160, 'steps': 1729, 'loss/train': 4.335245490074158} +01/27/2022 20:56:24 - INFO - codeparrot_training - Step 1730: {'lr': 0.0004325, 'samples': 332352, 'steps': 1730, 'loss/train': 6.060381174087524} +01/27/2022 20:56:29 - INFO - codeparrot_training - Step 1731: {'lr': 0.00043275000000000003, 'samples': 332544, 'steps': 1731, 'loss/train': 4.861485242843628} +01/27/2022 20:56:33 - INFO - codeparrot_training - Step 1732: {'lr': 0.000433, 'samples': 332736, 'steps': 1732, 'loss/train': 4.13783061504364} +01/27/2022 20:56:37 - INFO - codeparrot_training - Step 1733: {'lr': 0.00043325000000000004, 'samples': 332928, 'steps': 1733, 'loss/train': 4.61021625995636} +01/27/2022 20:56:41 - INFO - codeparrot_training - Step 1734: {'lr': 0.0004335, 'samples': 333120, 'steps': 1734, 'loss/train': 6.506906747817993} +01/27/2022 20:56:46 - INFO - codeparrot_training - Step 1735: {'lr': 0.00043375000000000005, 'samples': 333312, 'steps': 1735, 'loss/train': 4.977795481681824} +01/27/2022 20:56:50 - INFO - codeparrot_training - Step 1736: {'lr': 0.00043400000000000003, 'samples': 333504, 'steps': 1736, 'loss/train': 4.854881644248962} +01/27/2022 20:56:54 - INFO - codeparrot_training - Step 1737: {'lr': 0.00043425, 'samples': 333696, 'steps': 1737, 'loss/train': 4.509007215499878} +01/27/2022 20:56:59 - INFO - codeparrot_training - Step 1738: {'lr': 0.0004345, 'samples': 333888, 'steps': 1738, 'loss/train': 3.3236074447631836} +01/27/2022 20:57:03 - INFO - codeparrot_training - Step 1739: {'lr': 0.00043475, 'samples': 334080, 'steps': 1739, 'loss/train': 3.633428692817688} +01/27/2022 20:57:09 - INFO - codeparrot_training - Step 1740: {'lr': 0.000435, 'samples': 334272, 'steps': 1740, 'loss/train': 3.8261711597442627} +01/27/2022 20:57:13 - INFO - codeparrot_training - Step 1741: {'lr': 0.00043525000000000004, 'samples': 334464, 'steps': 1741, 'loss/train': 5.210784673690796} +01/27/2022 20:57:17 - INFO - codeparrot_training - Step 1742: {'lr': 0.0004355, 'samples': 334656, 'steps': 1742, 'loss/train': 4.622898817062378} +01/27/2022 20:57:21 - INFO - codeparrot_training - Step 1743: {'lr': 0.00043575000000000005, 'samples': 334848, 'steps': 1743, 'loss/train': 4.687637686729431} +01/27/2022 20:57:25 - INFO - codeparrot_training - Step 1744: {'lr': 0.000436, 'samples': 335040, 'steps': 1744, 'loss/train': 5.478589653968811} +01/27/2022 20:57:31 - INFO - codeparrot_training - Step 1745: {'lr': 0.00043625000000000006, 'samples': 335232, 'steps': 1745, 'loss/train': 5.851840496063232} +01/27/2022 20:57:35 - INFO - codeparrot_training - Step 1746: {'lr': 0.0004365, 'samples': 335424, 'steps': 1746, 'loss/train': 4.417731285095215} +01/27/2022 20:57:40 - INFO - codeparrot_training - Step 1747: {'lr': 0.00043675, 'samples': 335616, 'steps': 1747, 'loss/train': 5.634359836578369} +01/27/2022 20:57:44 - INFO - codeparrot_training - Step 1748: {'lr': 0.000437, 'samples': 335808, 'steps': 1748, 'loss/train': 4.898866653442383} +01/27/2022 20:57:48 - INFO - codeparrot_training - Step 1749: {'lr': 0.00043725000000000003, 'samples': 336000, 'steps': 1749, 'loss/train': 4.736881613731384} +01/27/2022 20:57:52 - INFO - codeparrot_training - Step 1750: {'lr': 0.0004375, 'samples': 336192, 'steps': 1750, 'loss/train': 4.807565331459045} +01/27/2022 20:57:57 - INFO - codeparrot_training - Step 1751: {'lr': 0.00043775, 'samples': 336384, 'steps': 1751, 'loss/train': 4.518897771835327} +01/27/2022 20:58:01 - INFO - codeparrot_training - Step 1752: {'lr': 0.000438, 'samples': 336576, 'steps': 1752, 'loss/train': 3.820875406265259} +01/27/2022 20:58:06 - INFO - codeparrot_training - Step 1753: {'lr': 0.00043825, 'samples': 336768, 'steps': 1753, 'loss/train': 5.065893173217773} +01/27/2022 20:58:10 - INFO - codeparrot_training - Step 1754: {'lr': 0.00043850000000000003, 'samples': 336960, 'steps': 1754, 'loss/train': 4.350127100944519} +01/27/2022 20:58:14 - INFO - codeparrot_training - Step 1755: {'lr': 0.00043874999999999996, 'samples': 337152, 'steps': 1755, 'loss/train': 4.935661554336548} +01/27/2022 20:58:19 - INFO - codeparrot_training - Step 1756: {'lr': 0.000439, 'samples': 337344, 'steps': 1756, 'loss/train': 5.391317367553711} +01/27/2022 20:58:23 - INFO - codeparrot_training - Step 1757: {'lr': 0.00043924999999999997, 'samples': 337536, 'steps': 1757, 'loss/train': 5.007919192314148} +01/27/2022 20:58:27 - INFO - codeparrot_training - Step 1758: {'lr': 0.0004395, 'samples': 337728, 'steps': 1758, 'loss/train': 3.904427647590637} +01/27/2022 20:58:31 - INFO - codeparrot_training - Step 1759: {'lr': 0.00043975, 'samples': 337920, 'steps': 1759, 'loss/train': 4.726148843765259} +01/27/2022 20:58:36 - INFO - codeparrot_training - Step 1760: {'lr': 0.00044, 'samples': 338112, 'steps': 1760, 'loss/train': 4.59481143951416} +01/27/2022 20:58:42 - INFO - codeparrot_training - Step 1761: {'lr': 0.00044025, 'samples': 338304, 'steps': 1761, 'loss/train': 4.144498586654663} +01/27/2022 20:58:46 - INFO - codeparrot_training - Step 1762: {'lr': 0.00044050000000000003, 'samples': 338496, 'steps': 1762, 'loss/train': 4.81161904335022} +01/27/2022 20:58:50 - INFO - codeparrot_training - Step 1763: {'lr': 0.00044075, 'samples': 338688, 'steps': 1763, 'loss/train': 4.207608461380005} +01/27/2022 20:58:54 - INFO - codeparrot_training - Step 1764: {'lr': 0.000441, 'samples': 338880, 'steps': 1764, 'loss/train': 5.70531964302063} +01/27/2022 20:58:58 - INFO - codeparrot_training - Step 1765: {'lr': 0.00044124999999999996, 'samples': 339072, 'steps': 1765, 'loss/train': 5.701893925666809} +01/27/2022 20:59:03 - INFO - codeparrot_training - Step 1766: {'lr': 0.0004415, 'samples': 339264, 'steps': 1766, 'loss/train': 4.631235122680664} +01/27/2022 20:59:08 - INFO - codeparrot_training - Step 1767: {'lr': 0.00044175, 'samples': 339456, 'steps': 1767, 'loss/train': 4.755450367927551} +01/27/2022 20:59:12 - INFO - codeparrot_training - Step 1768: {'lr': 0.000442, 'samples': 339648, 'steps': 1768, 'loss/train': 5.2073482275009155} +01/27/2022 20:59:16 - INFO - codeparrot_training - Step 1769: {'lr': 0.00044225, 'samples': 339840, 'steps': 1769, 'loss/train': 5.654919147491455} +01/27/2022 20:59:20 - INFO - codeparrot_training - Step 1770: {'lr': 0.0004425, 'samples': 340032, 'steps': 1770, 'loss/train': 5.131520748138428} +01/27/2022 20:59:25 - INFO - codeparrot_training - Step 1771: {'lr': 0.00044275, 'samples': 340224, 'steps': 1771, 'loss/train': 6.042468309402466} +01/27/2022 20:59:30 - INFO - codeparrot_training - Step 1772: {'lr': 0.00044300000000000003, 'samples': 340416, 'steps': 1772, 'loss/train': 3.4836251735687256} +01/27/2022 20:59:34 - INFO - codeparrot_training - Step 1773: {'lr': 0.00044325, 'samples': 340608, 'steps': 1773, 'loss/train': 4.968786835670471} +01/27/2022 20:59:38 - INFO - codeparrot_training - Step 1774: {'lr': 0.0004435, 'samples': 340800, 'steps': 1774, 'loss/train': 3.3600107431411743} +01/27/2022 20:59:42 - INFO - codeparrot_training - Step 1775: {'lr': 0.00044374999999999997, 'samples': 340992, 'steps': 1775, 'loss/train': 5.393645524978638} +01/27/2022 20:59:48 - INFO - codeparrot_training - Step 1776: {'lr': 0.000444, 'samples': 341184, 'steps': 1776, 'loss/train': 3.910504460334778} +01/27/2022 20:59:52 - INFO - codeparrot_training - Step 1777: {'lr': 0.00044425, 'samples': 341376, 'steps': 1777, 'loss/train': 5.942509889602661} +01/27/2022 20:59:57 - INFO - codeparrot_training - Step 1778: {'lr': 0.0004445, 'samples': 341568, 'steps': 1778, 'loss/train': 1.6830652356147766} +01/27/2022 21:00:01 - INFO - codeparrot_training - Step 1779: {'lr': 0.00044475, 'samples': 341760, 'steps': 1779, 'loss/train': 5.512582898139954} +01/27/2022 21:00:05 - INFO - codeparrot_training - Step 1780: {'lr': 0.00044500000000000003, 'samples': 341952, 'steps': 1780, 'loss/train': 4.7168920040130615} +01/27/2022 21:00:10 - INFO - codeparrot_training - Step 1781: {'lr': 0.00044525, 'samples': 342144, 'steps': 1781, 'loss/train': 5.519904613494873} +01/27/2022 21:00:14 - INFO - codeparrot_training - Step 1782: {'lr': 0.00044550000000000004, 'samples': 342336, 'steps': 1782, 'loss/train': 4.961467981338501} +01/27/2022 21:00:19 - INFO - codeparrot_training - Step 1783: {'lr': 0.00044574999999999997, 'samples': 342528, 'steps': 1783, 'loss/train': 4.779161095619202} +01/27/2022 21:00:23 - INFO - codeparrot_training - Step 1784: {'lr': 0.000446, 'samples': 342720, 'steps': 1784, 'loss/train': 3.9149551391601562} +01/27/2022 21:00:27 - INFO - codeparrot_training - Step 1785: {'lr': 0.00044625, 'samples': 342912, 'steps': 1785, 'loss/train': 5.313560128211975} +01/27/2022 21:00:33 - INFO - codeparrot_training - Step 1786: {'lr': 0.0004465, 'samples': 343104, 'steps': 1786, 'loss/train': 4.449593782424927} +01/27/2022 21:00:37 - INFO - codeparrot_training - Step 1787: {'lr': 0.00044675, 'samples': 343296, 'steps': 1787, 'loss/train': 4.626879930496216} +01/27/2022 21:00:41 - INFO - codeparrot_training - Step 1788: {'lr': 0.000447, 'samples': 343488, 'steps': 1788, 'loss/train': 4.868821978569031} +01/27/2022 21:00:45 - INFO - codeparrot_training - Step 1789: {'lr': 0.00044725, 'samples': 343680, 'steps': 1789, 'loss/train': 4.6601643562316895} +01/27/2022 21:00:49 - INFO - codeparrot_training - Step 1790: {'lr': 0.00044750000000000004, 'samples': 343872, 'steps': 1790, 'loss/train': 3.3959877490997314} +01/27/2022 21:00:54 - INFO - codeparrot_training - Step 1791: {'lr': 0.00044775, 'samples': 344064, 'steps': 1791, 'loss/train': 4.442063927650452} +01/27/2022 21:00:59 - INFO - codeparrot_training - Step 1792: {'lr': 0.000448, 'samples': 344256, 'steps': 1792, 'loss/train': 4.918506145477295} +01/27/2022 21:01:03 - INFO - codeparrot_training - Step 1793: {'lr': 0.00044824999999999997, 'samples': 344448, 'steps': 1793, 'loss/train': 5.150592684745789} +01/27/2022 21:01:07 - INFO - codeparrot_training - Step 1794: {'lr': 0.0004485, 'samples': 344640, 'steps': 1794, 'loss/train': 5.543230175971985} +01/27/2022 21:01:11 - INFO - codeparrot_training - Step 1795: {'lr': 0.00044875, 'samples': 344832, 'steps': 1795, 'loss/train': 5.858844995498657} +01/27/2022 21:01:16 - INFO - codeparrot_training - Step 1796: {'lr': 0.000449, 'samples': 345024, 'steps': 1796, 'loss/train': 3.9244412183761597} +01/27/2022 21:01:20 - INFO - codeparrot_training - Step 1797: {'lr': 0.00044925, 'samples': 345216, 'steps': 1797, 'loss/train': 5.390080690383911} +01/27/2022 21:01:25 - INFO - codeparrot_training - Step 1798: {'lr': 0.00044950000000000003, 'samples': 345408, 'steps': 1798, 'loss/train': 5.1660075187683105} +01/27/2022 21:01:29 - INFO - codeparrot_training - Step 1799: {'lr': 0.00044975, 'samples': 345600, 'steps': 1799, 'loss/train': 4.866254925727844} +01/27/2022 21:01:33 - INFO - codeparrot_training - Step 1800: {'lr': 0.00045000000000000004, 'samples': 345792, 'steps': 1800, 'loss/train': 5.002342700958252} +01/27/2022 21:01:39 - INFO - codeparrot_training - Step 1801: {'lr': 0.00045024999999999997, 'samples': 345984, 'steps': 1801, 'loss/train': 4.4176822900772095} +01/27/2022 21:01:43 - INFO - codeparrot_training - Step 1802: {'lr': 0.0004505, 'samples': 346176, 'steps': 1802, 'loss/train': 4.236628890037537} +01/27/2022 21:01:47 - INFO - codeparrot_training - Step 1803: {'lr': 0.00045075, 'samples': 346368, 'steps': 1803, 'loss/train': 2.723323702812195} +01/27/2022 21:01:51 - INFO - codeparrot_training - Step 1804: {'lr': 0.000451, 'samples': 346560, 'steps': 1804, 'loss/train': 4.8871750831604} +01/27/2022 21:01:55 - INFO - codeparrot_training - Step 1805: {'lr': 0.00045125, 'samples': 346752, 'steps': 1805, 'loss/train': 3.266984224319458} +01/27/2022 21:02:01 - INFO - codeparrot_training - Step 1806: {'lr': 0.0004515, 'samples': 346944, 'steps': 1806, 'loss/train': 4.703328251838684} +01/27/2022 21:02:05 - INFO - codeparrot_training - Step 1807: {'lr': 0.00045175, 'samples': 347136, 'steps': 1807, 'loss/train': 7.13761568069458} +01/27/2022 21:02:09 - INFO - codeparrot_training - Step 1808: {'lr': 0.00045200000000000004, 'samples': 347328, 'steps': 1808, 'loss/train': 5.49048113822937} +01/27/2022 21:02:13 - INFO - codeparrot_training - Step 1809: {'lr': 0.00045225, 'samples': 347520, 'steps': 1809, 'loss/train': 6.181141376495361} +01/27/2022 21:02:17 - INFO - codeparrot_training - Step 1810: {'lr': 0.00045250000000000005, 'samples': 347712, 'steps': 1810, 'loss/train': 4.769554853439331} +01/27/2022 21:02:23 - INFO - codeparrot_training - Step 1811: {'lr': 0.00045275, 'samples': 347904, 'steps': 1811, 'loss/train': 5.000910043716431} +01/27/2022 21:02:27 - INFO - codeparrot_training - Step 1812: {'lr': 0.000453, 'samples': 348096, 'steps': 1812, 'loss/train': 4.625191569328308} +01/27/2022 21:02:31 - INFO - codeparrot_training - Step 1813: {'lr': 0.00045325, 'samples': 348288, 'steps': 1813, 'loss/train': 7.105871915817261} +01/27/2022 21:02:35 - INFO - codeparrot_training - Step 1814: {'lr': 0.0004535, 'samples': 348480, 'steps': 1814, 'loss/train': 4.6342878341674805} +01/27/2022 21:02:39 - INFO - codeparrot_training - Step 1815: {'lr': 0.00045375, 'samples': 348672, 'steps': 1815, 'loss/train': 5.0025869607925415} +01/27/2022 21:02:44 - INFO - codeparrot_training - Step 1816: {'lr': 0.00045400000000000003, 'samples': 348864, 'steps': 1816, 'loss/train': 4.478776216506958} +01/27/2022 21:02:49 - INFO - codeparrot_training - Step 1817: {'lr': 0.00045425, 'samples': 349056, 'steps': 1817, 'loss/train': 3.7656140327453613} +01/27/2022 21:02:53 - INFO - codeparrot_training - Step 1818: {'lr': 0.00045450000000000004, 'samples': 349248, 'steps': 1818, 'loss/train': 4.221714019775391} +01/27/2022 21:02:57 - INFO - codeparrot_training - Step 1819: {'lr': 0.00045475, 'samples': 349440, 'steps': 1819, 'loss/train': 5.6670098304748535} +01/27/2022 21:03:01 - INFO - codeparrot_training - Step 1820: {'lr': 0.000455, 'samples': 349632, 'steps': 1820, 'loss/train': 6.225698947906494} +01/27/2022 21:03:07 - INFO - codeparrot_training - Step 1821: {'lr': 0.00045525, 'samples': 349824, 'steps': 1821, 'loss/train': 5.336203336715698} +01/27/2022 21:03:11 - INFO - codeparrot_training - Step 1822: {'lr': 0.0004555, 'samples': 350016, 'steps': 1822, 'loss/train': 4.444505095481873} +01/27/2022 21:03:15 - INFO - codeparrot_training - Step 1823: {'lr': 0.00045575, 'samples': 350208, 'steps': 1823, 'loss/train': 4.926588535308838} +01/27/2022 21:03:20 - INFO - codeparrot_training - Step 1824: {'lr': 0.000456, 'samples': 350400, 'steps': 1824, 'loss/train': 5.198822021484375} +01/27/2022 21:03:24 - INFO - codeparrot_training - Step 1825: {'lr': 0.00045625, 'samples': 350592, 'steps': 1825, 'loss/train': 5.063373327255249} +01/27/2022 21:03:29 - INFO - codeparrot_training - Step 1826: {'lr': 0.00045650000000000004, 'samples': 350784, 'steps': 1826, 'loss/train': 4.201614260673523} +01/27/2022 21:03:33 - INFO - codeparrot_training - Step 1827: {'lr': 0.00045675, 'samples': 350976, 'steps': 1827, 'loss/train': 4.0784443616867065} +01/27/2022 21:03:37 - INFO - codeparrot_training - Step 1828: {'lr': 0.00045700000000000005, 'samples': 351168, 'steps': 1828, 'loss/train': 3.9706603288650513} +01/27/2022 21:03:41 - INFO - codeparrot_training - Step 1829: {'lr': 0.00045725, 'samples': 351360, 'steps': 1829, 'loss/train': 4.465611577033997} +01/27/2022 21:03:46 - INFO - codeparrot_training - Step 1830: {'lr': 0.0004575, 'samples': 351552, 'steps': 1830, 'loss/train': 5.088142991065979} +01/27/2022 21:03:52 - INFO - codeparrot_training - Step 1831: {'lr': 0.00045775, 'samples': 351744, 'steps': 1831, 'loss/train': 4.200507760047913} +01/27/2022 21:03:56 - INFO - codeparrot_training - Step 1832: {'lr': 0.000458, 'samples': 351936, 'steps': 1832, 'loss/train': 3.968131184577942} +01/27/2022 21:04:00 - INFO - codeparrot_training - Step 1833: {'lr': 0.00045825, 'samples': 352128, 'steps': 1833, 'loss/train': 5.042641282081604} +01/27/2022 21:04:05 - INFO - codeparrot_training - Step 1834: {'lr': 0.00045850000000000003, 'samples': 352320, 'steps': 1834, 'loss/train': 4.397329330444336} +01/27/2022 21:04:09 - INFO - codeparrot_training - Step 1835: {'lr': 0.00045875, 'samples': 352512, 'steps': 1835, 'loss/train': 5.026055932044983} +01/27/2022 21:04:14 - INFO - codeparrot_training - Step 1836: {'lr': 0.00045900000000000004, 'samples': 352704, 'steps': 1836, 'loss/train': 3.6876357793807983} +01/27/2022 21:04:18 - INFO - codeparrot_training - Step 1837: {'lr': 0.00045925, 'samples': 352896, 'steps': 1837, 'loss/train': 6.1690993309021} +01/27/2022 21:04:22 - INFO - codeparrot_training - Step 1838: {'lr': 0.00045950000000000006, 'samples': 353088, 'steps': 1838, 'loss/train': 4.706387042999268} +01/27/2022 21:04:26 - INFO - codeparrot_training - Step 1839: {'lr': 0.00045975, 'samples': 353280, 'steps': 1839, 'loss/train': 6.05207633972168} +01/27/2022 21:04:31 - INFO - codeparrot_training - Step 1840: {'lr': 0.00046, 'samples': 353472, 'steps': 1840, 'loss/train': 3.8779070377349854} +01/27/2022 21:04:36 - INFO - codeparrot_training - Step 1841: {'lr': 0.00046025, 'samples': 353664, 'steps': 1841, 'loss/train': 4.576024532318115} +01/27/2022 21:04:40 - INFO - codeparrot_training - Step 1842: {'lr': 0.0004605, 'samples': 353856, 'steps': 1842, 'loss/train': 4.642486810684204} +01/27/2022 21:04:44 - INFO - codeparrot_training - Step 1843: {'lr': 0.00046075, 'samples': 354048, 'steps': 1843, 'loss/train': 4.1859272718429565} +01/27/2022 21:04:48 - INFO - codeparrot_training - Step 1844: {'lr': 0.00046100000000000004, 'samples': 354240, 'steps': 1844, 'loss/train': 4.481332540512085} +01/27/2022 21:04:52 - INFO - codeparrot_training - Step 1845: {'lr': 0.00046125, 'samples': 354432, 'steps': 1845, 'loss/train': 3.7361608743667603} +01/27/2022 21:04:58 - INFO - codeparrot_training - Step 1846: {'lr': 0.00046150000000000005, 'samples': 354624, 'steps': 1846, 'loss/train': 5.090752601623535} +01/27/2022 21:05:02 - INFO - codeparrot_training - Step 1847: {'lr': 0.00046175000000000003, 'samples': 354816, 'steps': 1847, 'loss/train': 5.028010010719299} +01/27/2022 21:05:06 - INFO - codeparrot_training - Step 1848: {'lr': 0.000462, 'samples': 355008, 'steps': 1848, 'loss/train': 4.967589497566223} +01/27/2022 21:05:10 - INFO - codeparrot_training - Step 1849: {'lr': 0.00046225, 'samples': 355200, 'steps': 1849, 'loss/train': 4.8270556926727295} +01/27/2022 21:05:15 - INFO - codeparrot_training - Step 1850: {'lr': 0.0004625, 'samples': 355392, 'steps': 1850, 'loss/train': 5.161792159080505} +01/27/2022 21:05:20 - INFO - codeparrot_training - Step 1851: {'lr': 0.00046275, 'samples': 355584, 'steps': 1851, 'loss/train': 5.122801423072815} +01/27/2022 21:05:24 - INFO - codeparrot_training - Step 1852: {'lr': 0.00046300000000000003, 'samples': 355776, 'steps': 1852, 'loss/train': 2.74101322889328} +01/27/2022 21:05:28 - INFO - codeparrot_training - Step 1853: {'lr': 0.00046325, 'samples': 355968, 'steps': 1853, 'loss/train': 5.113685131072998} +01/27/2022 21:05:32 - INFO - codeparrot_training - Step 1854: {'lr': 0.00046350000000000004, 'samples': 356160, 'steps': 1854, 'loss/train': 4.935330390930176} +01/27/2022 21:05:37 - INFO - codeparrot_training - Step 1855: {'lr': 0.00046375, 'samples': 356352, 'steps': 1855, 'loss/train': 4.784466862678528} +01/27/2022 21:05:42 - INFO - codeparrot_training - Step 1856: {'lr': 0.00046400000000000006, 'samples': 356544, 'steps': 1856, 'loss/train': 4.772746682167053} +01/27/2022 21:05:46 - INFO - codeparrot_training - Step 1857: {'lr': 0.00046425, 'samples': 356736, 'steps': 1857, 'loss/train': 6.764199256896973} +01/27/2022 21:05:50 - INFO - codeparrot_training - Step 1858: {'lr': 0.0004645, 'samples': 356928, 'steps': 1858, 'loss/train': 4.822646856307983} +01/27/2022 21:05:54 - INFO - codeparrot_training - Step 1859: {'lr': 0.00046475, 'samples': 357120, 'steps': 1859, 'loss/train': 3.762139320373535} +01/27/2022 21:05:58 - INFO - codeparrot_training - Step 1860: {'lr': 0.000465, 'samples': 357312, 'steps': 1860, 'loss/train': 5.29742968082428} +01/27/2022 21:06:04 - INFO - codeparrot_training - Step 1861: {'lr': 0.00046525, 'samples': 357504, 'steps': 1861, 'loss/train': 5.090372085571289} +01/27/2022 21:06:08 - INFO - codeparrot_training - Step 1862: {'lr': 0.00046550000000000004, 'samples': 357696, 'steps': 1862, 'loss/train': 4.783032774925232} +01/27/2022 21:06:12 - INFO - codeparrot_training - Step 1863: {'lr': 0.00046575, 'samples': 357888, 'steps': 1863, 'loss/train': 5.143263816833496} +01/27/2022 21:06:16 - INFO - codeparrot_training - Step 1864: {'lr': 0.00046600000000000005, 'samples': 358080, 'steps': 1864, 'loss/train': 4.6861878633499146} +01/27/2022 21:06:20 - INFO - codeparrot_training - Step 1865: {'lr': 0.00046625000000000003, 'samples': 358272, 'steps': 1865, 'loss/train': 3.656361937522888} +01/27/2022 21:06:26 - INFO - codeparrot_training - Step 1866: {'lr': 0.0004665, 'samples': 358464, 'steps': 1866, 'loss/train': 3.8588401079177856} +01/27/2022 21:06:30 - INFO - codeparrot_training - Step 1867: {'lr': 0.00046675, 'samples': 358656, 'steps': 1867, 'loss/train': 4.612795114517212} +01/27/2022 21:06:35 - INFO - codeparrot_training - Step 1868: {'lr': 0.000467, 'samples': 358848, 'steps': 1868, 'loss/train': 4.297463893890381} +01/27/2022 21:06:39 - INFO - codeparrot_training - Step 1869: {'lr': 0.00046725, 'samples': 359040, 'steps': 1869, 'loss/train': 4.389006972312927} +01/27/2022 21:06:43 - INFO - codeparrot_training - Step 1870: {'lr': 0.00046750000000000003, 'samples': 359232, 'steps': 1870, 'loss/train': 3.264990448951721} +01/27/2022 21:06:48 - INFO - codeparrot_training - Step 1871: {'lr': 0.00046775, 'samples': 359424, 'steps': 1871, 'loss/train': 5.188838839530945} +01/27/2022 21:06:52 - INFO - codeparrot_training - Step 1872: {'lr': 0.00046800000000000005, 'samples': 359616, 'steps': 1872, 'loss/train': 4.107481598854065} +01/27/2022 21:06:57 - INFO - codeparrot_training - Step 1873: {'lr': 0.00046825, 'samples': 359808, 'steps': 1873, 'loss/train': 4.440730690956116} +01/27/2022 21:07:01 - INFO - codeparrot_training - Step 1874: {'lr': 0.00046850000000000006, 'samples': 360000, 'steps': 1874, 'loss/train': 3.99187695980072} +01/27/2022 21:07:05 - INFO - codeparrot_training - Step 1875: {'lr': 0.00046875, 'samples': 360192, 'steps': 1875, 'loss/train': 4.760858416557312} +01/27/2022 21:07:11 - INFO - codeparrot_training - Step 1876: {'lr': 0.00046899999999999996, 'samples': 360384, 'steps': 1876, 'loss/train': 5.039731979370117} +01/27/2022 21:07:15 - INFO - codeparrot_training - Step 1877: {'lr': 0.00046925, 'samples': 360576, 'steps': 1877, 'loss/train': 4.0667442083358765} +01/27/2022 21:07:19 - INFO - codeparrot_training - Step 1878: {'lr': 0.0004695, 'samples': 360768, 'steps': 1878, 'loss/train': 4.332077264785767} +01/27/2022 21:07:23 - INFO - codeparrot_training - Step 1879: {'lr': 0.00046975, 'samples': 360960, 'steps': 1879, 'loss/train': 4.177701473236084} +01/27/2022 21:07:27 - INFO - codeparrot_training - Step 1880: {'lr': 0.00047, 'samples': 361152, 'steps': 1880, 'loss/train': 4.516612529754639} +01/27/2022 21:07:32 - INFO - codeparrot_training - Step 1881: {'lr': 0.00047025, 'samples': 361344, 'steps': 1881, 'loss/train': 3.8615119457244873} +01/27/2022 21:07:37 - INFO - codeparrot_training - Step 1882: {'lr': 0.0004705, 'samples': 361536, 'steps': 1882, 'loss/train': 5.216192007064819} +01/27/2022 21:07:41 - INFO - codeparrot_training - Step 1883: {'lr': 0.00047075000000000003, 'samples': 361728, 'steps': 1883, 'loss/train': 5.14946722984314} +01/27/2022 21:07:45 - INFO - codeparrot_training - Step 1884: {'lr': 0.000471, 'samples': 361920, 'steps': 1884, 'loss/train': 4.049653172492981} +01/27/2022 21:07:49 - INFO - codeparrot_training - Step 1885: {'lr': 0.00047125, 'samples': 362112, 'steps': 1885, 'loss/train': 4.4000043869018555} +01/27/2022 21:07:54 - INFO - codeparrot_training - Step 1886: {'lr': 0.00047149999999999997, 'samples': 362304, 'steps': 1886, 'loss/train': 4.617537975311279} +01/27/2022 21:07:58 - INFO - codeparrot_training - Step 1887: {'lr': 0.00047175, 'samples': 362496, 'steps': 1887, 'loss/train': 3.882035493850708} +01/27/2022 21:08:03 - INFO - codeparrot_training - Step 1888: {'lr': 0.000472, 'samples': 362688, 'steps': 1888, 'loss/train': 7.318262815475464} +01/27/2022 21:08:07 - INFO - codeparrot_training - Step 1889: {'lr': 0.00047225, 'samples': 362880, 'steps': 1889, 'loss/train': 5.3229228258132935} +01/27/2022 21:08:11 - INFO - codeparrot_training - Step 1890: {'lr': 0.0004725, 'samples': 363072, 'steps': 1890, 'loss/train': 4.344042062759399} +01/27/2022 21:08:17 - INFO - codeparrot_training - Step 1891: {'lr': 0.00047275, 'samples': 363264, 'steps': 1891, 'loss/train': 4.763678312301636} +01/27/2022 21:08:21 - INFO - codeparrot_training - Step 1892: {'lr': 0.000473, 'samples': 363456, 'steps': 1892, 'loss/train': 4.571455121040344} +01/27/2022 21:08:25 - INFO - codeparrot_training - Step 1893: {'lr': 0.00047325000000000004, 'samples': 363648, 'steps': 1893, 'loss/train': 5.195192098617554} +01/27/2022 21:08:29 - INFO - codeparrot_training - Step 1894: {'lr': 0.00047349999999999996, 'samples': 363840, 'steps': 1894, 'loss/train': 5.191724181175232} +01/27/2022 21:08:33 - INFO - codeparrot_training - Step 1895: {'lr': 0.00047375, 'samples': 364032, 'steps': 1895, 'loss/train': 4.533761501312256} +01/27/2022 21:08:38 - INFO - codeparrot_training - Step 1896: {'lr': 0.000474, 'samples': 364224, 'steps': 1896, 'loss/train': 4.539787173271179} +01/27/2022 21:08:43 - INFO - codeparrot_training - Step 1897: {'lr': 0.00047425, 'samples': 364416, 'steps': 1897, 'loss/train': 4.5197049379348755} +01/27/2022 21:08:47 - INFO - codeparrot_training - Step 1898: {'lr': 0.0004745, 'samples': 364608, 'steps': 1898, 'loss/train': 4.860765337944031} +01/27/2022 21:08:51 - INFO - codeparrot_training - Step 1899: {'lr': 0.00047475, 'samples': 364800, 'steps': 1899, 'loss/train': 4.127663969993591} +01/27/2022 21:08:55 - INFO - codeparrot_training - Step 1900: {'lr': 0.000475, 'samples': 364992, 'steps': 1900, 'loss/train': 4.609157681465149} +01/27/2022 21:09:00 - INFO - codeparrot_training - Step 1901: {'lr': 0.00047525000000000003, 'samples': 365184, 'steps': 1901, 'loss/train': 5.054586410522461} +01/27/2022 21:09:05 - INFO - codeparrot_training - Step 1902: {'lr': 0.0004755, 'samples': 365376, 'steps': 1902, 'loss/train': 4.420566558837891} +01/27/2022 21:09:09 - INFO - codeparrot_training - Step 1903: {'lr': 0.00047575, 'samples': 365568, 'steps': 1903, 'loss/train': 4.976064920425415} +01/27/2022 21:09:13 - INFO - codeparrot_training - Step 1904: {'lr': 0.00047599999999999997, 'samples': 365760, 'steps': 1904, 'loss/train': 4.838501930236816} +01/27/2022 21:09:19 - INFO - codeparrot_training - Step 1905: {'lr': 0.00047625, 'samples': 365952, 'steps': 1905, 'loss/train': 4.80051326751709} +01/27/2022 21:09:23 - INFO - codeparrot_training - Step 1906: {'lr': 0.0004765, 'samples': 366144, 'steps': 1906, 'loss/train': 4.284734487533569} +01/27/2022 21:09:27 - INFO - codeparrot_training - Step 1907: {'lr': 0.00047675, 'samples': 366336, 'steps': 1907, 'loss/train': 4.938504338264465} +01/27/2022 21:09:31 - INFO - codeparrot_training - Step 1908: {'lr': 0.000477, 'samples': 366528, 'steps': 1908, 'loss/train': 2.722438395023346} +01/27/2022 21:09:35 - INFO - codeparrot_training - Step 1909: {'lr': 0.00047725, 'samples': 366720, 'steps': 1909, 'loss/train': 3.8878129720687866} +01/27/2022 21:09:41 - INFO - codeparrot_training - Step 1910: {'lr': 0.0004775, 'samples': 366912, 'steps': 1910, 'loss/train': 4.722251772880554} +01/27/2022 21:09:45 - INFO - codeparrot_training - Step 1911: {'lr': 0.00047775000000000004, 'samples': 367104, 'steps': 1911, 'loss/train': 4.588999629020691} +01/27/2022 21:09:49 - INFO - codeparrot_training - Step 1912: {'lr': 0.00047799999999999996, 'samples': 367296, 'steps': 1912, 'loss/train': 4.008896827697754} +01/27/2022 21:09:53 - INFO - codeparrot_training - Step 1913: {'lr': 0.00047825, 'samples': 367488, 'steps': 1913, 'loss/train': 4.707217454910278} +01/27/2022 21:09:57 - INFO - codeparrot_training - Step 1914: {'lr': 0.0004785, 'samples': 367680, 'steps': 1914, 'loss/train': 3.5580400228500366} +01/27/2022 21:10:01 - INFO - codeparrot_training - Step 1915: {'lr': 0.00047875, 'samples': 367872, 'steps': 1915, 'loss/train': 4.5836663246154785} +01/27/2022 21:10:08 - INFO - codeparrot_training - Step 1916: {'lr': 0.000479, 'samples': 368064, 'steps': 1916, 'loss/train': 4.874203562736511} +01/27/2022 21:10:12 - INFO - codeparrot_training - Step 1917: {'lr': 0.00047925, 'samples': 368256, 'steps': 1917, 'loss/train': 4.514940619468689} +01/27/2022 21:10:16 - INFO - codeparrot_training - Step 1918: {'lr': 0.0004795, 'samples': 368448, 'steps': 1918, 'loss/train': 3.8254787921905518} +01/27/2022 21:10:20 - INFO - codeparrot_training - Step 1919: {'lr': 0.00047975000000000003, 'samples': 368640, 'steps': 1919, 'loss/train': 3.3733913898468018} +01/27/2022 21:10:24 - INFO - codeparrot_training - Step 1920: {'lr': 0.00048, 'samples': 368832, 'steps': 1920, 'loss/train': 4.963331937789917} +01/27/2022 21:10:28 - INFO - codeparrot_training - Step 1921: {'lr': 0.00048025000000000005, 'samples': 369024, 'steps': 1921, 'loss/train': 4.147143244743347} +01/27/2022 21:10:34 - INFO - codeparrot_training - Step 1922: {'lr': 0.00048049999999999997, 'samples': 369216, 'steps': 1922, 'loss/train': 4.811792492866516} +01/27/2022 21:10:39 - INFO - codeparrot_training - Step 1923: {'lr': 0.00048075, 'samples': 369408, 'steps': 1923, 'loss/train': 4.590798854827881} +01/27/2022 21:10:43 - INFO - codeparrot_training - Step 1924: {'lr': 0.000481, 'samples': 369600, 'steps': 1924, 'loss/train': 4.559292197227478} +01/27/2022 21:10:47 - INFO - codeparrot_training - Step 1925: {'lr': 0.00048125, 'samples': 369792, 'steps': 1925, 'loss/train': 4.20210599899292} +01/27/2022 21:10:52 - INFO - codeparrot_training - Step 1926: {'lr': 0.0004815, 'samples': 369984, 'steps': 1926, 'loss/train': 4.739063858985901} +01/27/2022 21:10:56 - INFO - codeparrot_training - Step 1927: {'lr': 0.00048175000000000003, 'samples': 370176, 'steps': 1927, 'loss/train': 4.388291716575623} +01/27/2022 21:11:00 - INFO - codeparrot_training - Step 1928: {'lr': 0.000482, 'samples': 370368, 'steps': 1928, 'loss/train': 4.732992053031921} +01/27/2022 21:11:05 - INFO - codeparrot_training - Step 1929: {'lr': 0.00048225000000000004, 'samples': 370560, 'steps': 1929, 'loss/train': 4.608087658882141} +01/27/2022 21:11:09 - INFO - codeparrot_training - Step 1930: {'lr': 0.0004825, 'samples': 370752, 'steps': 1930, 'loss/train': 5.404157638549805} +01/27/2022 21:11:14 - INFO - codeparrot_training - Step 1931: {'lr': 0.00048275, 'samples': 370944, 'steps': 1931, 'loss/train': 5.553591012954712} +01/27/2022 21:11:18 - INFO - codeparrot_training - Step 1932: {'lr': 0.000483, 'samples': 371136, 'steps': 1932, 'loss/train': 4.732909083366394} +01/27/2022 21:11:22 - INFO - codeparrot_training - Step 1933: {'lr': 0.00048325, 'samples': 371328, 'steps': 1933, 'loss/train': 5.583869576454163} +01/27/2022 21:11:26 - INFO - codeparrot_training - Step 1934: {'lr': 0.0004835, 'samples': 371520, 'steps': 1934, 'loss/train': 3.6927484273910522} +01/27/2022 21:11:31 - INFO - codeparrot_training - Step 1935: {'lr': 0.00048375, 'samples': 371712, 'steps': 1935, 'loss/train': 3.2610021829605103} +01/27/2022 21:11:36 - INFO - codeparrot_training - Step 1936: {'lr': 0.000484, 'samples': 371904, 'steps': 1936, 'loss/train': 3.924904704093933} +01/27/2022 21:11:41 - INFO - codeparrot_training - Step 1937: {'lr': 0.00048425000000000003, 'samples': 372096, 'steps': 1937, 'loss/train': 3.885619640350342} +01/27/2022 21:11:45 - INFO - codeparrot_training - Step 1938: {'lr': 0.0004845, 'samples': 372288, 'steps': 1938, 'loss/train': 4.519023656845093} +01/27/2022 21:11:49 - INFO - codeparrot_training - Step 1939: {'lr': 0.00048475000000000005, 'samples': 372480, 'steps': 1939, 'loss/train': 4.748381495475769} +01/27/2022 21:11:53 - INFO - codeparrot_training - Step 1940: {'lr': 0.00048499999999999997, 'samples': 372672, 'steps': 1940, 'loss/train': 4.6287617683410645} +01/27/2022 21:11:58 - INFO - codeparrot_training - Step 1941: {'lr': 0.00048525, 'samples': 372864, 'steps': 1941, 'loss/train': 4.685520887374878} +01/27/2022 21:12:02 - INFO - codeparrot_training - Step 1942: {'lr': 0.0004855, 'samples': 373056, 'steps': 1942, 'loss/train': 5.095425009727478} +01/27/2022 21:12:06 - INFO - codeparrot_training - Step 1943: {'lr': 0.00048575, 'samples': 373248, 'steps': 1943, 'loss/train': 5.088365077972412} +01/27/2022 21:12:11 - INFO - codeparrot_training - Step 1944: {'lr': 0.000486, 'samples': 373440, 'steps': 1944, 'loss/train': 1.493291974067688} +01/27/2022 21:12:15 - INFO - codeparrot_training - Step 1945: {'lr': 0.00048625000000000003, 'samples': 373632, 'steps': 1945, 'loss/train': 4.513129234313965} +01/27/2022 21:12:20 - INFO - codeparrot_training - Step 1946: {'lr': 0.0004865, 'samples': 373824, 'steps': 1946, 'loss/train': 5.02623438835144} +01/27/2022 21:12:24 - INFO - codeparrot_training - Step 1947: {'lr': 0.00048675000000000004, 'samples': 374016, 'steps': 1947, 'loss/train': 4.261286973953247} +01/27/2022 21:12:28 - INFO - codeparrot_training - Step 1948: {'lr': 0.000487, 'samples': 374208, 'steps': 1948, 'loss/train': 4.509857654571533} +01/27/2022 21:12:32 - INFO - codeparrot_training - Step 1949: {'lr': 0.00048725000000000005, 'samples': 374400, 'steps': 1949, 'loss/train': 3.888123035430908} +01/27/2022 21:12:37 - INFO - codeparrot_training - Step 1950: {'lr': 0.0004875, 'samples': 374592, 'steps': 1950, 'loss/train': 5.118421196937561} +01/27/2022 21:12:42 - INFO - codeparrot_training - Step 1951: {'lr': 0.00048775, 'samples': 374784, 'steps': 1951, 'loss/train': 4.300903558731079} +01/27/2022 21:12:47 - INFO - codeparrot_training - Step 1952: {'lr': 0.000488, 'samples': 374976, 'steps': 1952, 'loss/train': 4.937902808189392} +01/27/2022 21:12:51 - INFO - codeparrot_training - Step 1953: {'lr': 0.00048825, 'samples': 375168, 'steps': 1953, 'loss/train': 2.7438263297080994} +01/27/2022 21:12:55 - INFO - codeparrot_training - Step 1954: {'lr': 0.0004885, 'samples': 375360, 'steps': 1954, 'loss/train': 4.456546068191528} +01/27/2022 21:12:59 - INFO - codeparrot_training - Step 1955: {'lr': 0.00048875, 'samples': 375552, 'steps': 1955, 'loss/train': 3.7986055612564087} +01/27/2022 21:13:04 - INFO - codeparrot_training - Step 1956: {'lr': 0.000489, 'samples': 375744, 'steps': 1956, 'loss/train': 4.974090814590454} +01/27/2022 21:13:08 - INFO - codeparrot_training - Step 1957: {'lr': 0.00048925, 'samples': 375936, 'steps': 1957, 'loss/train': 3.625651717185974} +01/27/2022 21:13:12 - INFO - codeparrot_training - Step 1958: {'lr': 0.0004895, 'samples': 376128, 'steps': 1958, 'loss/train': 5.105894565582275} +01/27/2022 21:13:17 - INFO - codeparrot_training - Step 1959: {'lr': 0.0004897500000000001, 'samples': 376320, 'steps': 1959, 'loss/train': 4.603535413742065} +01/27/2022 21:13:21 - INFO - codeparrot_training - Step 1960: {'lr': 0.00049, 'samples': 376512, 'steps': 1960, 'loss/train': 4.694121837615967} +01/27/2022 21:13:27 - INFO - codeparrot_training - Step 1961: {'lr': 0.00049025, 'samples': 376704, 'steps': 1961, 'loss/train': 6.37076210975647} +01/27/2022 21:13:31 - INFO - codeparrot_training - Step 1962: {'lr': 0.0004905, 'samples': 376896, 'steps': 1962, 'loss/train': 4.068798780441284} +01/27/2022 21:13:36 - INFO - codeparrot_training - Step 1963: {'lr': 0.0004907500000000001, 'samples': 377088, 'steps': 1963, 'loss/train': 4.044296979904175} +01/27/2022 21:13:40 - INFO - codeparrot_training - Step 1964: {'lr': 0.000491, 'samples': 377280, 'steps': 1964, 'loss/train': 4.433956861495972} +01/27/2022 21:13:44 - INFO - codeparrot_training - Step 1965: {'lr': 0.00049125, 'samples': 377472, 'steps': 1965, 'loss/train': 3.865524172782898} +01/27/2022 21:13:49 - INFO - codeparrot_training - Step 1966: {'lr': 0.0004915, 'samples': 377664, 'steps': 1966, 'loss/train': 4.424926042556763} +01/27/2022 21:13:53 - INFO - codeparrot_training - Step 1967: {'lr': 0.00049175, 'samples': 377856, 'steps': 1967, 'loss/train': 4.4014259576797485} +01/27/2022 21:13:57 - INFO - codeparrot_training - Step 1968: {'lr': 0.000492, 'samples': 378048, 'steps': 1968, 'loss/train': 3.88840913772583} +01/27/2022 21:14:01 - INFO - codeparrot_training - Step 1969: {'lr': 0.0004922500000000001, 'samples': 378240, 'steps': 1969, 'loss/train': 5.5969297885894775} +01/27/2022 21:14:06 - INFO - codeparrot_training - Step 1970: {'lr': 0.0004925, 'samples': 378432, 'steps': 1970, 'loss/train': 4.795366644859314} +01/27/2022 21:14:11 - INFO - codeparrot_training - Step 1971: {'lr': 0.00049275, 'samples': 378624, 'steps': 1971, 'loss/train': 5.265831828117371} +01/27/2022 21:14:16 - INFO - codeparrot_training - Step 1972: {'lr': 0.0004930000000000001, 'samples': 378816, 'steps': 1972, 'loss/train': 3.9992154836654663} +01/27/2022 21:14:20 - INFO - codeparrot_training - Step 1973: {'lr': 0.00049325, 'samples': 379008, 'steps': 1973, 'loss/train': 4.666123151779175} +01/27/2022 21:14:24 - INFO - codeparrot_training - Step 1974: {'lr': 0.0004935, 'samples': 379200, 'steps': 1974, 'loss/train': 6.514744520187378} +01/27/2022 21:14:28 - INFO - codeparrot_training - Step 1975: {'lr': 0.00049375, 'samples': 379392, 'steps': 1975, 'loss/train': 3.383027672767639} +01/27/2022 21:14:33 - INFO - codeparrot_training - Step 1976: {'lr': 0.000494, 'samples': 379584, 'steps': 1976, 'loss/train': 4.652289032936096} +01/27/2022 21:14:37 - INFO - codeparrot_training - Step 1977: {'lr': 0.00049425, 'samples': 379776, 'steps': 1977, 'loss/train': 5.445010185241699} +01/27/2022 21:14:41 - INFO - codeparrot_training - Step 1978: {'lr': 0.0004945, 'samples': 379968, 'steps': 1978, 'loss/train': 3.688882827758789} +01/27/2022 21:14:46 - INFO - codeparrot_training - Step 1979: {'lr': 0.0004947500000000001, 'samples': 380160, 'steps': 1979, 'loss/train': 4.830942749977112} +01/27/2022 21:14:50 - INFO - codeparrot_training - Step 1980: {'lr': 0.000495, 'samples': 380352, 'steps': 1980, 'loss/train': 3.948821783065796} +01/27/2022 21:14:56 - INFO - codeparrot_training - Step 1981: {'lr': 0.00049525, 'samples': 380544, 'steps': 1981, 'loss/train': 4.744636058807373} +01/27/2022 21:15:00 - INFO - codeparrot_training - Step 1982: {'lr': 0.0004955, 'samples': 380736, 'steps': 1982, 'loss/train': 5.086475729942322} +01/27/2022 21:15:04 - INFO - codeparrot_training - Step 1983: {'lr': 0.00049575, 'samples': 380928, 'steps': 1983, 'loss/train': 5.406883120536804} +01/27/2022 21:15:08 - INFO - codeparrot_training - Step 1984: {'lr': 0.000496, 'samples': 381120, 'steps': 1984, 'loss/train': 4.5938379764556885} +01/27/2022 21:15:12 - INFO - codeparrot_training - Step 1985: {'lr': 0.0004962500000000001, 'samples': 381312, 'steps': 1985, 'loss/train': 3.1530808210372925} +01/27/2022 21:15:18 - INFO - codeparrot_training - Step 1986: {'lr': 0.0004965, 'samples': 381504, 'steps': 1986, 'loss/train': 4.398847460746765} +01/27/2022 21:15:22 - INFO - codeparrot_training - Step 1987: {'lr': 0.00049675, 'samples': 381696, 'steps': 1987, 'loss/train': 4.239651918411255} +01/27/2022 21:15:26 - INFO - codeparrot_training - Step 1988: {'lr': 0.000497, 'samples': 381888, 'steps': 1988, 'loss/train': 5.302281618118286} +01/27/2022 21:15:30 - INFO - codeparrot_training - Step 1989: {'lr': 0.0004972500000000001, 'samples': 382080, 'steps': 1989, 'loss/train': 4.933800101280212} +01/27/2022 21:15:34 - INFO - codeparrot_training - Step 1990: {'lr': 0.0004975, 'samples': 382272, 'steps': 1990, 'loss/train': 3.3851537704467773} +01/27/2022 21:15:40 - INFO - codeparrot_training - Step 1991: {'lr': 0.00049775, 'samples': 382464, 'steps': 1991, 'loss/train': 4.300583839416504} +01/27/2022 21:15:44 - INFO - codeparrot_training - Step 1992: {'lr': 0.000498, 'samples': 382656, 'steps': 1992, 'loss/train': 3.1776952743530273} +01/27/2022 21:15:48 - INFO - codeparrot_training - Step 1993: {'lr': 0.00049825, 'samples': 382848, 'steps': 1993, 'loss/train': 4.60379683971405} +01/27/2022 21:15:52 - INFO - codeparrot_training - Step 1994: {'lr': 0.0004985, 'samples': 383040, 'steps': 1994, 'loss/train': 4.928932785987854} +01/27/2022 21:15:56 - INFO - codeparrot_training - Step 1995: {'lr': 0.0004987500000000001, 'samples': 383232, 'steps': 1995, 'loss/train': 4.511248826980591} +01/27/2022 21:16:02 - INFO - codeparrot_training - Step 1996: {'lr': 0.000499, 'samples': 383424, 'steps': 1996, 'loss/train': 5.340387940406799} +01/27/2022 21:16:06 - INFO - codeparrot_training - Step 1997: {'lr': 0.00049925, 'samples': 383616, 'steps': 1997, 'loss/train': 4.429168224334717} +01/27/2022 21:16:10 - INFO - codeparrot_training - Step 1998: {'lr': 0.0004995, 'samples': 383808, 'steps': 1998, 'loss/train': 4.711615204811096} +01/27/2022 21:16:15 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004997500000000001, 'samples': 384000, 'steps': 1999, 'loss/train': 4.689506649971008} +01/27/2022 21:16:15 - INFO - codeparrot_training - Evaluating and saving model checkpoint