diff --git "a/log/debug_0.log" "b/log/debug_0.log" new file mode 100644--- /dev/null +++ "b/log/debug_0.log" @@ -0,0 +1,3180 @@ +01/22/2022 20:52:35 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 20:52:35 - WARNING - huggingface_hub.repository - Revision `cool-snowflake-1` does not exist. Created and checked out branch `cool-snowflake-1`. +01/22/2022 20:52:35 - WARNING - huggingface_hub.repository - +01/22/2022 20:52:48 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 20:52:49 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 20:53:09 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 16, 'steps': 0, 'loss/train': 12.395364761352539} +01/22/2022 20:54:32 - INFO - codeparrot_training - Step 2: {'lr': 2.5e-07, 'samples': 32, 'steps': 1, 'loss/train': 12.380073547363281} +01/22/2022 20:55:49 - INFO - codeparrot_training - Step 3: {'lr': 5e-07, 'samples': 48, 'steps': 2, 'loss/train': 12.365830421447754} +01/22/2022 20:55:49 - INFO - codeparrot_training - Step 4: {'lr': 7.5e-07, 'samples': 64, 'steps': 3, 'loss/train': 12.331064224243164} +01/22/2022 20:55:49 - INFO - codeparrot_training - Step 5: {'lr': 1e-06, 'samples': 80, 'steps': 4, 'loss/train': 12.286980628967285} +01/22/2022 20:55:50 - INFO - codeparrot_training - Step 6: {'lr': 1.25e-06, 'samples': 96, 'steps': 5, 'loss/train': 12.225024223327637} +01/22/2022 20:55:50 - INFO - codeparrot_training - Step 7: {'lr': 1.5e-06, 'samples': 112, 'steps': 6, 'loss/train': 12.212639808654785} +01/22/2022 20:55:50 - INFO - codeparrot_training - Step 8: {'lr': 1.75e-06, 'samples': 128, 'steps': 7, 'loss/train': 12.118595123291016} +01/22/2022 20:55:51 - INFO - codeparrot_training - Step 9: {'lr': 2e-06, 'samples': 144, 'steps': 8, 'loss/train': 12.167859077453613} +01/22/2022 20:55:51 - INFO - codeparrot_training - Step 10: {'lr': 2.25e-06, 'samples': 160, 'steps': 9, 'loss/train': 12.072549819946289} +01/22/2022 20:55:51 - INFO - codeparrot_training - Step 11: {'lr': 2.5e-06, 'samples': 176, 'steps': 10, 'loss/train': 11.703225135803223} +01/22/2022 20:55:52 - INFO - codeparrot_training - Step 12: {'lr': 2.75e-06, 'samples': 192, 'steps': 11, 'loss/train': 11.834994316101074} +01/22/2022 20:55:52 - INFO - codeparrot_training - Step 13: {'lr': 3e-06, 'samples': 208, 'steps': 12, 'loss/train': 11.85538387298584} +01/22/2022 20:55:52 - INFO - codeparrot_training - Step 14: {'lr': 3.25e-06, 'samples': 224, 'steps': 13, 'loss/train': 11.481832504272461} +01/22/2022 20:55:53 - INFO - codeparrot_training - Step 15: {'lr': 3.5e-06, 'samples': 240, 'steps': 14, 'loss/train': 11.691040992736816} +01/22/2022 20:55:53 - INFO - codeparrot_training - Step 16: {'lr': 3.75e-06, 'samples': 256, 'steps': 15, 'loss/train': 11.338891983032227} +01/22/2022 20:55:53 - INFO - codeparrot_training - Step 17: {'lr': 4e-06, 'samples': 272, 'steps': 16, 'loss/train': 11.123676300048828} +01/22/2022 20:55:54 - INFO - codeparrot_training - Step 18: {'lr': 4.250000000000001e-06, 'samples': 288, 'steps': 17, 'loss/train': 11.147907257080078} +01/22/2022 20:55:54 - INFO - codeparrot_training - Step 19: {'lr': 4.5e-06, 'samples': 304, 'steps': 18, 'loss/train': 10.928781509399414} +01/22/2022 20:55:55 - INFO - codeparrot_training - Step 20: {'lr': 4.75e-06, 'samples': 320, 'steps': 19, 'loss/train': 11.311997413635254} +01/22/2022 20:55:55 - INFO - codeparrot_training - Step 21: {'lr': 5e-06, 'samples': 336, 'steps': 20, 'loss/train': 12.104765892028809} +01/22/2022 20:55:55 - INFO - codeparrot_training - Step 22: {'lr': 5.2500000000000006e-06, 'samples': 352, 'steps': 21, 'loss/train': 11.283578872680664} +01/22/2022 20:55:56 - INFO - codeparrot_training - Step 23: {'lr': 5.5e-06, 'samples': 368, 'steps': 22, 'loss/train': 10.738919258117676} +01/22/2022 20:55:56 - INFO - codeparrot_training - Step 24: {'lr': 5.75e-06, 'samples': 384, 'steps': 23, 'loss/train': 11.239081382751465} +01/22/2022 20:55:56 - INFO - codeparrot_training - Step 25: {'lr': 6e-06, 'samples': 400, 'steps': 24, 'loss/train': 11.29448413848877} +01/22/2022 20:55:57 - INFO - codeparrot_training - Step 26: {'lr': 6.25e-06, 'samples': 416, 'steps': 25, 'loss/train': 11.0428466796875} +01/22/2022 20:55:57 - INFO - codeparrot_training - Step 27: {'lr': 6.5e-06, 'samples': 432, 'steps': 26, 'loss/train': 10.94733715057373} +01/22/2022 20:55:57 - INFO - codeparrot_training - Step 28: {'lr': 6.75e-06, 'samples': 448, 'steps': 27, 'loss/train': 11.044628143310547} +01/22/2022 20:55:58 - INFO - codeparrot_training - Step 29: {'lr': 7e-06, 'samples': 464, 'steps': 28, 'loss/train': 10.80739688873291} +01/22/2022 20:55:58 - INFO - codeparrot_training - Step 30: {'lr': 7.250000000000001e-06, 'samples': 480, 'steps': 29, 'loss/train': 11.351018905639648} +01/22/2022 20:55:58 - INFO - codeparrot_training - Step 31: {'lr': 7.5e-06, 'samples': 496, 'steps': 30, 'loss/train': 11.361815452575684} +01/22/2022 20:55:59 - INFO - codeparrot_training - Step 32: {'lr': 7.75e-06, 'samples': 512, 'steps': 31, 'loss/train': 11.041726112365723} +01/22/2022 20:55:59 - INFO - codeparrot_training - Step 33: {'lr': 8e-06, 'samples': 528, 'steps': 32, 'loss/train': 10.02576732635498} +01/22/2022 20:55:59 - INFO - codeparrot_training - Step 34: {'lr': 8.25e-06, 'samples': 544, 'steps': 33, 'loss/train': 11.248358726501465} +01/22/2022 20:56:00 - INFO - codeparrot_training - Step 35: {'lr': 8.500000000000002e-06, 'samples': 560, 'steps': 34, 'loss/train': 10.389111518859863} +01/22/2022 20:56:00 - INFO - codeparrot_training - Step 36: {'lr': 8.750000000000001e-06, 'samples': 576, 'steps': 35, 'loss/train': 10.200538635253906} +01/22/2022 20:56:00 - INFO - codeparrot_training - Step 37: {'lr': 9e-06, 'samples': 592, 'steps': 36, 'loss/train': 10.598636627197266} +01/22/2022 20:56:01 - INFO - codeparrot_training - Step 38: {'lr': 9.25e-06, 'samples': 608, 'steps': 37, 'loss/train': 11.401820182800293} +01/22/2022 20:56:01 - INFO - codeparrot_training - Step 39: {'lr': 9.5e-06, 'samples': 624, 'steps': 38, 'loss/train': 11.032316207885742} +01/22/2022 20:56:01 - INFO - codeparrot_training - Step 40: {'lr': 9.75e-06, 'samples': 640, 'steps': 39, 'loss/train': 11.054495811462402} +01/22/2022 20:56:02 - INFO - codeparrot_training - Step 41: {'lr': 1e-05, 'samples': 656, 'steps': 40, 'loss/train': 10.676679611206055} +01/22/2022 20:56:02 - INFO - codeparrot_training - Step 42: {'lr': 1.025e-05, 'samples': 672, 'steps': 41, 'loss/train': 11.124625205993652} +01/22/2022 20:56:03 - INFO - codeparrot_training - Step 43: {'lr': 1.0500000000000001e-05, 'samples': 688, 'steps': 42, 'loss/train': 9.860312461853027} +01/22/2022 20:56:03 - INFO - codeparrot_training - Step 44: {'lr': 1.0749999999999999e-05, 'samples': 704, 'steps': 43, 'loss/train': 10.163208961486816} +01/22/2022 20:56:03 - INFO - codeparrot_training - Step 45: {'lr': 1.1e-05, 'samples': 720, 'steps': 44, 'loss/train': 9.784882545471191} +01/22/2022 20:56:04 - INFO - codeparrot_training - Step 46: {'lr': 1.1249999999999999e-05, 'samples': 736, 'steps': 45, 'loss/train': 11.147700309753418} +01/22/2022 20:56:04 - INFO - codeparrot_training - Step 47: {'lr': 1.15e-05, 'samples': 752, 'steps': 46, 'loss/train': 10.9955415725708} +01/22/2022 20:56:04 - INFO - codeparrot_training - Step 48: {'lr': 1.1750000000000001e-05, 'samples': 768, 'steps': 47, 'loss/train': 11.426728248596191} +01/22/2022 20:56:05 - INFO - codeparrot_training - Step 49: {'lr': 1.2e-05, 'samples': 784, 'steps': 48, 'loss/train': 11.440892219543457} +01/22/2022 20:56:05 - INFO - codeparrot_training - Step 50: {'lr': 1.2250000000000001e-05, 'samples': 800, 'steps': 49, 'loss/train': 10.05504035949707} +01/22/2022 20:56:05 - INFO - codeparrot_training - Step 51: {'lr': 1.25e-05, 'samples': 816, 'steps': 50, 'loss/train': 10.756003379821777} +01/22/2022 20:56:06 - INFO - codeparrot_training - Step 52: {'lr': 1.275e-05, 'samples': 832, 'steps': 51, 'loss/train': 10.536264419555664} +01/22/2022 20:56:06 - INFO - codeparrot_training - Step 53: {'lr': 1.3e-05, 'samples': 848, 'steps': 52, 'loss/train': 10.602701187133789} +01/22/2022 20:56:06 - INFO - codeparrot_training - Step 54: {'lr': 1.325e-05, 'samples': 864, 'steps': 53, 'loss/train': 10.935178756713867} +01/22/2022 20:56:07 - INFO - codeparrot_training - Step 55: {'lr': 1.35e-05, 'samples': 880, 'steps': 54, 'loss/train': 11.077844619750977} +01/22/2022 20:56:07 - INFO - codeparrot_training - Step 56: {'lr': 1.375e-05, 'samples': 896, 'steps': 55, 'loss/train': 11.258915901184082} +01/22/2022 20:56:07 - INFO - codeparrot_training - Step 57: {'lr': 1.4e-05, 'samples': 912, 'steps': 56, 'loss/train': 11.015084266662598} +01/22/2022 20:56:08 - INFO - codeparrot_training - Step 58: {'lr': 1.425e-05, 'samples': 928, 'steps': 57, 'loss/train': 11.075411796569824} +01/22/2022 20:56:09 - INFO - codeparrot_training - Step 59: {'lr': 1.4500000000000002e-05, 'samples': 944, 'steps': 58, 'loss/train': 10.109169006347656} +01/22/2022 20:56:09 - INFO - codeparrot_training - Step 60: {'lr': 1.475e-05, 'samples': 960, 'steps': 59, 'loss/train': 10.823247909545898} +01/22/2022 20:56:10 - INFO - codeparrot_training - Step 61: {'lr': 1.5e-05, 'samples': 976, 'steps': 60, 'loss/train': 10.406753540039062} +01/22/2022 20:56:10 - INFO - codeparrot_training - Step 62: {'lr': 1.525e-05, 'samples': 992, 'steps': 61, 'loss/train': 10.470144271850586} +01/22/2022 20:56:10 - INFO - codeparrot_training - Step 63: {'lr': 1.55e-05, 'samples': 1008, 'steps': 62, 'loss/train': 11.110251426696777} +01/22/2022 20:56:11 - INFO - codeparrot_training - Step 64: {'lr': 1.575e-05, 'samples': 1024, 'steps': 63, 'loss/train': 11.27746868133545} +01/22/2022 20:56:11 - INFO - codeparrot_training - Step 65: {'lr': 1.6e-05, 'samples': 1040, 'steps': 64, 'loss/train': 10.849693298339844} +01/22/2022 20:56:11 - INFO - codeparrot_training - Step 66: {'lr': 1.6250000000000002e-05, 'samples': 1056, 'steps': 65, 'loss/train': 10.711199760437012} +01/22/2022 20:56:12 - INFO - codeparrot_training - Step 67: {'lr': 1.65e-05, 'samples': 1072, 'steps': 66, 'loss/train': 10.465060234069824} +01/22/2022 20:56:12 - INFO - codeparrot_training - Step 68: {'lr': 1.675e-05, 'samples': 1088, 'steps': 67, 'loss/train': 10.188969612121582} +01/22/2022 20:56:12 - INFO - codeparrot_training - Step 69: {'lr': 1.7000000000000003e-05, 'samples': 1104, 'steps': 68, 'loss/train': 10.782708168029785} +01/22/2022 20:56:13 - INFO - codeparrot_training - Step 70: {'lr': 1.7250000000000003e-05, 'samples': 1120, 'steps': 69, 'loss/train': 11.109210968017578} +01/22/2022 20:56:13 - INFO - codeparrot_training - Step 71: {'lr': 1.7500000000000002e-05, 'samples': 1136, 'steps': 70, 'loss/train': 10.386109352111816} +01/22/2022 20:56:13 - INFO - codeparrot_training - Step 72: {'lr': 1.7749999999999998e-05, 'samples': 1152, 'steps': 71, 'loss/train': 11.366995811462402} +01/22/2022 20:56:14 - INFO - codeparrot_training - Step 73: {'lr': 1.8e-05, 'samples': 1168, 'steps': 72, 'loss/train': 10.820302963256836} +01/22/2022 20:56:14 - INFO - codeparrot_training - Step 74: {'lr': 1.825e-05, 'samples': 1184, 'steps': 73, 'loss/train': 10.64069652557373} +01/22/2022 20:56:14 - INFO - codeparrot_training - Step 75: {'lr': 1.85e-05, 'samples': 1200, 'steps': 74, 'loss/train': 10.842151641845703} +01/22/2022 20:56:15 - INFO - codeparrot_training - Step 76: {'lr': 1.875e-05, 'samples': 1216, 'steps': 75, 'loss/train': 10.25055980682373} +01/22/2022 20:56:15 - INFO - codeparrot_training - Step 77: {'lr': 1.9e-05, 'samples': 1232, 'steps': 76, 'loss/train': 10.273639678955078} +01/22/2022 20:56:15 - INFO - codeparrot_training - Step 78: {'lr': 1.925e-05, 'samples': 1248, 'steps': 77, 'loss/train': 10.013932228088379} +01/22/2022 20:56:16 - INFO - codeparrot_training - Step 79: {'lr': 1.95e-05, 'samples': 1264, 'steps': 78, 'loss/train': 10.555398941040039} +01/22/2022 20:56:16 - INFO - codeparrot_training - Step 80: {'lr': 1.975e-05, 'samples': 1280, 'steps': 79, 'loss/train': 10.614514350891113} +01/22/2022 20:56:16 - INFO - codeparrot_training - Step 81: {'lr': 2e-05, 'samples': 1296, 'steps': 80, 'loss/train': 9.87515640258789} +01/22/2022 20:56:17 - INFO - codeparrot_training - Step 82: {'lr': 2.025e-05, 'samples': 1312, 'steps': 81, 'loss/train': 10.26626205444336} +01/22/2022 20:56:17 - INFO - codeparrot_training - Step 83: {'lr': 2.05e-05, 'samples': 1328, 'steps': 82, 'loss/train': 10.267132759094238} +01/22/2022 20:56:18 - INFO - codeparrot_training - Step 84: {'lr': 2.0750000000000003e-05, 'samples': 1344, 'steps': 83, 'loss/train': 11.17909049987793} +01/22/2022 20:56:18 - INFO - codeparrot_training - Step 85: {'lr': 2.1000000000000002e-05, 'samples': 1360, 'steps': 84, 'loss/train': 11.411527633666992} +01/22/2022 20:56:18 - INFO - codeparrot_training - Step 86: {'lr': 2.125e-05, 'samples': 1376, 'steps': 85, 'loss/train': 10.811710357666016} +01/22/2022 20:56:19 - INFO - codeparrot_training - Step 87: {'lr': 2.1499999999999997e-05, 'samples': 1392, 'steps': 86, 'loss/train': 10.285664558410645} +01/22/2022 20:56:19 - INFO - codeparrot_training - Step 88: {'lr': 2.175e-05, 'samples': 1408, 'steps': 87, 'loss/train': 10.126285552978516} +01/22/2022 20:56:19 - INFO - codeparrot_training - Step 89: {'lr': 2.2e-05, 'samples': 1424, 'steps': 88, 'loss/train': 10.457144737243652} +01/22/2022 20:56:20 - INFO - codeparrot_training - Step 90: {'lr': 2.225e-05, 'samples': 1440, 'steps': 89, 'loss/train': 10.942045211791992} +01/22/2022 20:56:20 - INFO - codeparrot_training - Step 91: {'lr': 2.2499999999999998e-05, 'samples': 1456, 'steps': 90, 'loss/train': 10.065037727355957} +01/22/2022 20:56:20 - INFO - codeparrot_training - Step 92: {'lr': 2.275e-05, 'samples': 1472, 'steps': 91, 'loss/train': 10.384719848632812} +01/22/2022 20:56:21 - INFO - codeparrot_training - Step 93: {'lr': 2.3e-05, 'samples': 1488, 'steps': 92, 'loss/train': 10.713189125061035} +01/22/2022 20:56:21 - INFO - codeparrot_training - Step 94: {'lr': 2.325e-05, 'samples': 1504, 'steps': 93, 'loss/train': 10.583086013793945} +01/22/2022 20:56:21 - INFO - codeparrot_training - Step 95: {'lr': 2.3500000000000002e-05, 'samples': 1520, 'steps': 94, 'loss/train': 10.780173301696777} +01/22/2022 20:56:22 - INFO - codeparrot_training - Step 96: {'lr': 2.375e-05, 'samples': 1536, 'steps': 95, 'loss/train': 11.227330207824707} +01/22/2022 20:56:22 - INFO - codeparrot_training - Step 97: {'lr': 2.4e-05, 'samples': 1552, 'steps': 96, 'loss/train': 10.835565567016602} +01/22/2022 20:56:22 - INFO - codeparrot_training - Step 98: {'lr': 2.425e-05, 'samples': 1568, 'steps': 97, 'loss/train': 10.934825897216797} +01/22/2022 20:56:23 - INFO - codeparrot_training - Step 99: {'lr': 2.4500000000000003e-05, 'samples': 1584, 'steps': 98, 'loss/train': 10.03113079071045} +01/22/2022 20:56:23 - INFO - codeparrot_training - Step 100: {'lr': 2.4750000000000002e-05, 'samples': 1600, 'steps': 99, 'loss/train': 10.193426132202148} +01/22/2022 20:56:23 - INFO - codeparrot_training - Step 101: {'lr': 2.5e-05, 'samples': 1616, 'steps': 100, 'loss/train': 10.379514694213867} +01/22/2022 20:56:24 - INFO - codeparrot_training - Step 102: {'lr': 2.525e-05, 'samples': 1632, 'steps': 101, 'loss/train': 10.262869834899902} +01/22/2022 20:56:24 - INFO - codeparrot_training - Step 103: {'lr': 2.55e-05, 'samples': 1648, 'steps': 102, 'loss/train': 10.728103637695312} +01/22/2022 20:56:24 - INFO - codeparrot_training - Step 104: {'lr': 2.575e-05, 'samples': 1664, 'steps': 103, 'loss/train': 10.676426887512207} +01/22/2022 20:56:25 - INFO - codeparrot_training - Step 105: {'lr': 2.6e-05, 'samples': 1680, 'steps': 104, 'loss/train': 9.917617797851562} +01/22/2022 20:56:25 - INFO - codeparrot_training - Step 106: {'lr': 2.625e-05, 'samples': 1696, 'steps': 105, 'loss/train': 9.786602020263672} +01/22/2022 20:56:25 - INFO - codeparrot_training - Step 107: {'lr': 2.65e-05, 'samples': 1712, 'steps': 106, 'loss/train': 9.777457237243652} +01/22/2022 20:56:26 - INFO - codeparrot_training - Step 108: {'lr': 2.675e-05, 'samples': 1728, 'steps': 107, 'loss/train': 10.306411743164062} +01/22/2022 20:56:26 - INFO - codeparrot_training - Step 109: {'lr': 2.7e-05, 'samples': 1744, 'steps': 108, 'loss/train': 9.97089958190918} +01/22/2022 20:56:26 - INFO - codeparrot_training - Step 110: {'lr': 2.725e-05, 'samples': 1760, 'steps': 109, 'loss/train': 10.40146541595459} +01/22/2022 20:56:27 - INFO - codeparrot_training - Step 111: {'lr': 2.75e-05, 'samples': 1776, 'steps': 110, 'loss/train': 10.55298137664795} +01/22/2022 20:56:27 - INFO - codeparrot_training - Step 112: {'lr': 2.775e-05, 'samples': 1792, 'steps': 111, 'loss/train': 9.506136894226074} +01/22/2022 20:56:27 - INFO - codeparrot_training - Step 113: {'lr': 2.8e-05, 'samples': 1808, 'steps': 112, 'loss/train': 10.882134437561035} +01/22/2022 20:56:28 - INFO - codeparrot_training - Step 114: {'lr': 2.8250000000000002e-05, 'samples': 1824, 'steps': 113, 'loss/train': 10.286812782287598} +01/22/2022 20:56:28 - INFO - codeparrot_training - Step 115: {'lr': 2.85e-05, 'samples': 1840, 'steps': 114, 'loss/train': 9.520947456359863} +01/22/2022 20:56:28 - INFO - codeparrot_training - Step 116: {'lr': 2.875e-05, 'samples': 1856, 'steps': 115, 'loss/train': 10.363905906677246} +01/22/2022 20:56:29 - INFO - codeparrot_training - Step 117: {'lr': 2.9000000000000004e-05, 'samples': 1872, 'steps': 116, 'loss/train': 10.30086898803711} +01/22/2022 20:56:29 - INFO - codeparrot_training - Step 118: {'lr': 2.9250000000000003e-05, 'samples': 1888, 'steps': 117, 'loss/train': 10.166752815246582} +01/22/2022 20:56:32 - INFO - codeparrot_training - Step 119: {'lr': 2.95e-05, 'samples': 1904, 'steps': 118, 'loss/train': 10.167657852172852} +01/22/2022 20:56:33 - INFO - codeparrot_training - Step 120: {'lr': 2.9749999999999998e-05, 'samples': 1920, 'steps': 119, 'loss/train': 10.2529296875} +01/22/2022 20:56:33 - INFO - codeparrot_training - Step 121: {'lr': 3e-05, 'samples': 1936, 'steps': 120, 'loss/train': 9.889345169067383} +01/22/2022 20:56:33 - INFO - codeparrot_training - Step 122: {'lr': 3.025e-05, 'samples': 1952, 'steps': 121, 'loss/train': 10.42039680480957} +01/22/2022 20:56:34 - INFO - codeparrot_training - Step 123: {'lr': 3.05e-05, 'samples': 1968, 'steps': 122, 'loss/train': 10.33146858215332} +01/22/2022 20:56:34 - INFO - codeparrot_training - Step 124: {'lr': 3.075e-05, 'samples': 1984, 'steps': 123, 'loss/train': 9.850687980651855} +01/22/2022 20:56:34 - INFO - codeparrot_training - Step 125: {'lr': 3.1e-05, 'samples': 2000, 'steps': 124, 'loss/train': 10.192840576171875} +01/22/2022 20:56:35 - INFO - codeparrot_training - Step 126: {'lr': 3.125e-05, 'samples': 2016, 'steps': 125, 'loss/train': 9.512140274047852} +01/22/2022 20:56:35 - INFO - codeparrot_training - Step 127: {'lr': 3.15e-05, 'samples': 2032, 'steps': 126, 'loss/train': 9.781015396118164} +01/22/2022 20:56:35 - INFO - codeparrot_training - Step 128: {'lr': 3.175e-05, 'samples': 2048, 'steps': 127, 'loss/train': 9.639117240905762} +01/22/2022 20:56:36 - INFO - codeparrot_training - Step 129: {'lr': 3.2e-05, 'samples': 2064, 'steps': 128, 'loss/train': 10.002121925354004} +01/22/2022 20:56:36 - INFO - codeparrot_training - Step 130: {'lr': 3.2250000000000005e-05, 'samples': 2080, 'steps': 129, 'loss/train': 10.164360046386719} +01/22/2022 20:56:37 - INFO - codeparrot_training - Step 131: {'lr': 3.2500000000000004e-05, 'samples': 2096, 'steps': 130, 'loss/train': 10.155795097351074} +01/22/2022 20:56:37 - INFO - codeparrot_training - Step 132: {'lr': 3.275e-05, 'samples': 2112, 'steps': 131, 'loss/train': 9.502567291259766} +01/22/2022 20:56:37 - INFO - codeparrot_training - Step 133: {'lr': 3.3e-05, 'samples': 2128, 'steps': 132, 'loss/train': 9.794000625610352} +01/22/2022 20:56:38 - INFO - codeparrot_training - Step 134: {'lr': 3.325e-05, 'samples': 2144, 'steps': 133, 'loss/train': 9.97574234008789} +01/22/2022 20:56:38 - INFO - codeparrot_training - Step 135: {'lr': 3.35e-05, 'samples': 2160, 'steps': 134, 'loss/train': 10.383116722106934} +01/22/2022 20:56:38 - INFO - codeparrot_training - Step 136: {'lr': 3.375e-05, 'samples': 2176, 'steps': 135, 'loss/train': 10.488580703735352} +01/22/2022 20:56:39 - INFO - codeparrot_training - Step 137: {'lr': 3.4000000000000007e-05, 'samples': 2192, 'steps': 136, 'loss/train': 9.469535827636719} +01/22/2022 20:56:39 - INFO - codeparrot_training - Step 138: {'lr': 3.4250000000000006e-05, 'samples': 2208, 'steps': 137, 'loss/train': 9.889079093933105} +01/22/2022 20:56:39 - INFO - codeparrot_training - Step 139: {'lr': 3.4500000000000005e-05, 'samples': 2224, 'steps': 138, 'loss/train': 10.372810363769531} +01/22/2022 20:56:40 - INFO - codeparrot_training - Step 140: {'lr': 3.4750000000000004e-05, 'samples': 2240, 'steps': 139, 'loss/train': 9.959515571594238} +01/22/2022 20:56:40 - INFO - codeparrot_training - Step 141: {'lr': 3.5000000000000004e-05, 'samples': 2256, 'steps': 140, 'loss/train': 10.221864700317383} +01/22/2022 20:56:40 - INFO - codeparrot_training - Step 142: {'lr': 3.5249999999999996e-05, 'samples': 2272, 'steps': 141, 'loss/train': 9.521830558776855} +01/22/2022 20:56:41 - INFO - codeparrot_training - Step 143: {'lr': 3.5499999999999996e-05, 'samples': 2288, 'steps': 142, 'loss/train': 9.839672088623047} +01/22/2022 20:56:41 - INFO - codeparrot_training - Step 144: {'lr': 3.5749999999999995e-05, 'samples': 2304, 'steps': 143, 'loss/train': 9.646993637084961} +01/22/2022 20:56:41 - INFO - codeparrot_training - Step 145: {'lr': 3.6e-05, 'samples': 2320, 'steps': 144, 'loss/train': 9.607089042663574} +01/22/2022 20:56:42 - INFO - codeparrot_training - Step 146: {'lr': 3.625e-05, 'samples': 2336, 'steps': 145, 'loss/train': 9.616317749023438} +01/22/2022 20:56:42 - INFO - codeparrot_training - Step 147: {'lr': 3.65e-05, 'samples': 2352, 'steps': 146, 'loss/train': 8.164849281311035} +01/22/2022 20:56:42 - INFO - codeparrot_training - Step 148: {'lr': 3.675e-05, 'samples': 2368, 'steps': 147, 'loss/train': 9.732786178588867} +01/22/2022 20:56:43 - INFO - codeparrot_training - Step 149: {'lr': 3.7e-05, 'samples': 2384, 'steps': 148, 'loss/train': 9.960180282592773} +01/22/2022 20:56:43 - INFO - codeparrot_training - Step 150: {'lr': 3.725e-05, 'samples': 2400, 'steps': 149, 'loss/train': 8.496954917907715} +01/22/2022 20:56:43 - INFO - codeparrot_training - Step 151: {'lr': 3.75e-05, 'samples': 2416, 'steps': 150, 'loss/train': 8.021126747131348} +01/22/2022 20:56:44 - INFO - codeparrot_training - Step 152: {'lr': 3.775e-05, 'samples': 2432, 'steps': 151, 'loss/train': 8.655198097229004} +01/22/2022 20:56:44 - INFO - codeparrot_training - Step 153: {'lr': 3.8e-05, 'samples': 2448, 'steps': 152, 'loss/train': 11.455052375793457} +01/22/2022 20:56:44 - INFO - codeparrot_training - Step 154: {'lr': 3.825e-05, 'samples': 2464, 'steps': 153, 'loss/train': 9.26590633392334} +01/22/2022 20:56:45 - INFO - codeparrot_training - Step 155: {'lr': 3.85e-05, 'samples': 2480, 'steps': 154, 'loss/train': 9.630188941955566} +01/22/2022 20:56:45 - INFO - codeparrot_training - Step 156: {'lr': 3.875e-05, 'samples': 2496, 'steps': 155, 'loss/train': 9.9923095703125} +01/22/2022 20:56:45 - INFO - codeparrot_training - Step 157: {'lr': 3.9e-05, 'samples': 2512, 'steps': 156, 'loss/train': 10.606157302856445} +01/22/2022 20:56:46 - INFO - codeparrot_training - Step 158: {'lr': 3.925e-05, 'samples': 2528, 'steps': 157, 'loss/train': 9.155928611755371} +01/22/2022 20:56:46 - INFO - codeparrot_training - Step 159: {'lr': 3.95e-05, 'samples': 2544, 'steps': 158, 'loss/train': 9.482091903686523} +01/22/2022 20:56:47 - INFO - codeparrot_training - Step 160: {'lr': 3.9750000000000004e-05, 'samples': 2560, 'steps': 159, 'loss/train': 10.334733009338379} +01/22/2022 20:56:47 - INFO - codeparrot_training - Step 161: {'lr': 4e-05, 'samples': 2576, 'steps': 160, 'loss/train': 9.646163940429688} +01/22/2022 20:56:47 - INFO - codeparrot_training - Step 162: {'lr': 4.025e-05, 'samples': 2592, 'steps': 161, 'loss/train': 9.751974105834961} +01/22/2022 20:56:48 - INFO - codeparrot_training - Step 163: {'lr': 4.05e-05, 'samples': 2608, 'steps': 162, 'loss/train': 10.007874488830566} +01/22/2022 20:56:48 - INFO - codeparrot_training - Step 164: {'lr': 4.075e-05, 'samples': 2624, 'steps': 163, 'loss/train': 10.005730628967285} +01/22/2022 20:56:48 - INFO - codeparrot_training - Step 165: {'lr': 4.1e-05, 'samples': 2640, 'steps': 164, 'loss/train': 9.387503623962402} +01/22/2022 20:56:49 - INFO - codeparrot_training - Step 166: {'lr': 4.125e-05, 'samples': 2656, 'steps': 165, 'loss/train': 9.417560577392578} +01/22/2022 20:56:49 - INFO - codeparrot_training - Step 167: {'lr': 4.1500000000000006e-05, 'samples': 2672, 'steps': 166, 'loss/train': 9.2996187210083} +01/22/2022 20:56:49 - INFO - codeparrot_training - Step 168: {'lr': 4.1750000000000005e-05, 'samples': 2688, 'steps': 167, 'loss/train': 10.39356517791748} +01/22/2022 20:56:50 - INFO - codeparrot_training - Step 169: {'lr': 4.2000000000000004e-05, 'samples': 2704, 'steps': 168, 'loss/train': 10.536253929138184} +01/22/2022 20:56:50 - INFO - codeparrot_training - Step 170: {'lr': 4.2250000000000004e-05, 'samples': 2720, 'steps': 169, 'loss/train': 10.551913261413574} +01/22/2022 20:56:50 - INFO - codeparrot_training - Step 171: {'lr': 4.25e-05, 'samples': 2736, 'steps': 170, 'loss/train': 10.02873420715332} +01/22/2022 20:56:51 - INFO - codeparrot_training - Step 172: {'lr': 4.275e-05, 'samples': 2752, 'steps': 171, 'loss/train': 9.67300033569336} +01/22/2022 20:56:51 - INFO - codeparrot_training - Step 173: {'lr': 4.2999999999999995e-05, 'samples': 2768, 'steps': 172, 'loss/train': 9.354616165161133} +01/22/2022 20:56:51 - INFO - codeparrot_training - Step 174: {'lr': 4.325e-05, 'samples': 2784, 'steps': 173, 'loss/train': 9.05424690246582} +01/22/2022 20:56:52 - INFO - codeparrot_training - Step 175: {'lr': 4.35e-05, 'samples': 2800, 'steps': 174, 'loss/train': 9.115776062011719} +01/22/2022 20:56:52 - INFO - codeparrot_training - Step 176: {'lr': 4.375e-05, 'samples': 2816, 'steps': 175, 'loss/train': 8.28026294708252} +01/22/2022 20:56:52 - INFO - codeparrot_training - Step 177: {'lr': 4.4e-05, 'samples': 2832, 'steps': 176, 'loss/train': 10.305718421936035} +01/22/2022 20:56:53 - INFO - codeparrot_training - Step 178: {'lr': 4.425e-05, 'samples': 2848, 'steps': 177, 'loss/train': 10.17711353302002} +01/22/2022 20:56:54 - INFO - codeparrot_training - Step 179: {'lr': 4.45e-05, 'samples': 2864, 'steps': 178, 'loss/train': 9.678985595703125} +01/22/2022 20:56:54 - INFO - codeparrot_training - Step 180: {'lr': 4.475e-05, 'samples': 2880, 'steps': 179, 'loss/train': 8.8773832321167} +01/22/2022 20:56:54 - INFO - codeparrot_training - Step 181: {'lr': 4.4999999999999996e-05, 'samples': 2896, 'steps': 180, 'loss/train': 10.663888931274414} +01/22/2022 20:56:55 - INFO - codeparrot_training - Step 182: {'lr': 4.525e-05, 'samples': 2912, 'steps': 181, 'loss/train': 9.669669151306152} +01/22/2022 20:56:55 - INFO - codeparrot_training - Step 183: {'lr': 4.55e-05, 'samples': 2928, 'steps': 182, 'loss/train': 9.95152759552002} +01/22/2022 20:56:55 - INFO - codeparrot_training - Step 184: {'lr': 4.575e-05, 'samples': 2944, 'steps': 183, 'loss/train': 9.387929916381836} +01/22/2022 20:56:56 - INFO - codeparrot_training - Step 185: {'lr': 4.6e-05, 'samples': 2960, 'steps': 184, 'loss/train': 10.279635429382324} +01/22/2022 20:56:56 - INFO - codeparrot_training - Step 186: {'lr': 4.625e-05, 'samples': 2976, 'steps': 185, 'loss/train': 10.16331672668457} +01/22/2022 20:56:56 - INFO - codeparrot_training - Step 187: {'lr': 4.65e-05, 'samples': 2992, 'steps': 186, 'loss/train': 9.79081916809082} +01/22/2022 20:56:57 - INFO - codeparrot_training - Step 188: {'lr': 4.675e-05, 'samples': 3008, 'steps': 187, 'loss/train': 9.258560180664062} +01/22/2022 20:56:57 - INFO - codeparrot_training - Step 189: {'lr': 4.7000000000000004e-05, 'samples': 3024, 'steps': 188, 'loss/train': 9.407833099365234} +01/22/2022 20:56:57 - INFO - codeparrot_training - Step 190: {'lr': 4.725e-05, 'samples': 3040, 'steps': 189, 'loss/train': 10.318158149719238} +01/22/2022 20:56:58 - INFO - codeparrot_training - Step 191: {'lr': 4.75e-05, 'samples': 3056, 'steps': 190, 'loss/train': 10.494075775146484} +01/22/2022 20:56:58 - INFO - codeparrot_training - Step 192: {'lr': 4.775e-05, 'samples': 3072, 'steps': 191, 'loss/train': 10.135696411132812} +01/22/2022 20:56:59 - INFO - codeparrot_training - Step 193: {'lr': 4.8e-05, 'samples': 3088, 'steps': 192, 'loss/train': 9.647210121154785} +01/22/2022 20:56:59 - INFO - codeparrot_training - Step 194: {'lr': 4.825e-05, 'samples': 3104, 'steps': 193, 'loss/train': 9.735612869262695} +01/22/2022 20:56:59 - INFO - codeparrot_training - Step 195: {'lr': 4.85e-05, 'samples': 3120, 'steps': 194, 'loss/train': 9.434239387512207} +01/22/2022 20:57:00 - INFO - codeparrot_training - Step 196: {'lr': 4.8750000000000006e-05, 'samples': 3136, 'steps': 195, 'loss/train': 10.325057029724121} +01/22/2022 20:57:00 - INFO - codeparrot_training - Step 197: {'lr': 4.9000000000000005e-05, 'samples': 3152, 'steps': 196, 'loss/train': 10.190777778625488} +01/22/2022 20:57:00 - INFO - codeparrot_training - Step 198: {'lr': 4.9250000000000004e-05, 'samples': 3168, 'steps': 197, 'loss/train': 9.649053573608398} +01/22/2022 20:57:01 - INFO - codeparrot_training - Step 199: {'lr': 4.9500000000000004e-05, 'samples': 3184, 'steps': 198, 'loss/train': 8.961697578430176} +01/22/2022 20:57:01 - INFO - codeparrot_training - Step 200: {'lr': 4.975e-05, 'samples': 3200, 'steps': 199, 'loss/train': 9.047950744628906} +01/22/2022 20:57:01 - INFO - codeparrot_training - Step 201: {'lr': 5e-05, 'samples': 3216, 'steps': 200, 'loss/train': 9.705939292907715} +01/22/2022 20:57:02 - INFO - codeparrot_training - Step 202: {'lr': 5.025e-05, 'samples': 3232, 'steps': 201, 'loss/train': 9.753437042236328} +01/22/2022 20:57:02 - INFO - codeparrot_training - Step 203: {'lr': 5.05e-05, 'samples': 3248, 'steps': 202, 'loss/train': 10.07818603515625} +01/22/2022 20:57:02 - INFO - codeparrot_training - Step 204: {'lr': 5.075000000000001e-05, 'samples': 3264, 'steps': 203, 'loss/train': 9.823471069335938} +01/22/2022 20:57:03 - INFO - codeparrot_training - Step 205: {'lr': 5.1e-05, 'samples': 3280, 'steps': 204, 'loss/train': 8.930741310119629} +01/22/2022 20:57:03 - INFO - codeparrot_training - Step 206: {'lr': 5.125e-05, 'samples': 3296, 'steps': 205, 'loss/train': 8.931654930114746} +01/22/2022 20:57:03 - INFO - codeparrot_training - Step 207: {'lr': 5.15e-05, 'samples': 3312, 'steps': 206, 'loss/train': 9.466598510742188} +01/22/2022 20:57:04 - INFO - codeparrot_training - Step 208: {'lr': 5.175e-05, 'samples': 3328, 'steps': 207, 'loss/train': 10.018186569213867} +01/22/2022 20:57:04 - INFO - codeparrot_training - Step 209: {'lr': 5.2e-05, 'samples': 3344, 'steps': 208, 'loss/train': 9.609627723693848} +01/22/2022 20:57:04 - INFO - codeparrot_training - Step 210: {'lr': 5.2249999999999996e-05, 'samples': 3360, 'steps': 209, 'loss/train': 9.997960090637207} +01/22/2022 20:57:05 - INFO - codeparrot_training - Step 211: {'lr': 5.25e-05, 'samples': 3376, 'steps': 210, 'loss/train': 9.76566219329834} +01/22/2022 20:57:05 - INFO - codeparrot_training - Step 212: {'lr': 5.275e-05, 'samples': 3392, 'steps': 211, 'loss/train': 8.803820610046387} +01/22/2022 20:57:05 - INFO - codeparrot_training - Step 213: {'lr': 5.3e-05, 'samples': 3408, 'steps': 212, 'loss/train': 9.293028831481934} +01/22/2022 20:57:06 - INFO - codeparrot_training - Step 214: {'lr': 5.325e-05, 'samples': 3424, 'steps': 213, 'loss/train': 9.582266807556152} +01/22/2022 20:57:06 - INFO - codeparrot_training - Step 215: {'lr': 5.35e-05, 'samples': 3440, 'steps': 214, 'loss/train': 10.133804321289062} +01/22/2022 20:57:06 - INFO - codeparrot_training - Step 216: {'lr': 5.375e-05, 'samples': 3456, 'steps': 215, 'loss/train': 9.519866943359375} +01/22/2022 20:57:07 - INFO - codeparrot_training - Step 217: {'lr': 5.4e-05, 'samples': 3472, 'steps': 216, 'loss/train': 10.391036033630371} +01/22/2022 20:57:07 - INFO - codeparrot_training - Step 218: {'lr': 5.4250000000000004e-05, 'samples': 3488, 'steps': 217, 'loss/train': 9.021539688110352} +01/22/2022 20:57:07 - INFO - codeparrot_training - Step 219: {'lr': 5.45e-05, 'samples': 3504, 'steps': 218, 'loss/train': 10.21409797668457} +01/22/2022 20:57:08 - INFO - codeparrot_training - Step 220: {'lr': 5.475e-05, 'samples': 3520, 'steps': 219, 'loss/train': 9.79776382446289} +01/22/2022 20:57:08 - INFO - codeparrot_training - Step 221: {'lr': 5.5e-05, 'samples': 3536, 'steps': 220, 'loss/train': 8.551114082336426} +01/22/2022 20:57:08 - INFO - codeparrot_training - Step 222: {'lr': 5.525e-05, 'samples': 3552, 'steps': 221, 'loss/train': 9.325658798217773} +01/22/2022 20:57:09 - INFO - codeparrot_training - Step 223: {'lr': 5.55e-05, 'samples': 3568, 'steps': 222, 'loss/train': 8.953974723815918} +01/22/2022 20:57:09 - INFO - codeparrot_training - Step 224: {'lr': 5.575e-05, 'samples': 3584, 'steps': 223, 'loss/train': 9.841620445251465} +01/22/2022 20:57:09 - INFO - codeparrot_training - Step 225: {'lr': 5.6e-05, 'samples': 3600, 'steps': 224, 'loss/train': 9.721784591674805} +01/22/2022 20:57:10 - INFO - codeparrot_training - Step 226: {'lr': 5.6250000000000005e-05, 'samples': 3616, 'steps': 225, 'loss/train': 9.575292587280273} +01/22/2022 20:57:10 - INFO - codeparrot_training - Step 227: {'lr': 5.6500000000000005e-05, 'samples': 3632, 'steps': 226, 'loss/train': 10.191972732543945} +01/22/2022 20:57:11 - INFO - codeparrot_training - Step 228: {'lr': 5.6750000000000004e-05, 'samples': 3648, 'steps': 227, 'loss/train': 10.300728797912598} +01/22/2022 20:57:11 - INFO - codeparrot_training - Step 229: {'lr': 5.7e-05, 'samples': 3664, 'steps': 228, 'loss/train': 9.851534843444824} +01/22/2022 20:57:11 - INFO - codeparrot_training - Step 230: {'lr': 5.725e-05, 'samples': 3680, 'steps': 229, 'loss/train': 9.558744430541992} +01/22/2022 20:57:12 - INFO - codeparrot_training - Step 231: {'lr': 5.75e-05, 'samples': 3696, 'steps': 230, 'loss/train': 8.474645614624023} +01/22/2022 20:57:12 - INFO - codeparrot_training - Step 232: {'lr': 5.775e-05, 'samples': 3712, 'steps': 231, 'loss/train': 10.289031028747559} +01/22/2022 20:57:12 - INFO - codeparrot_training - Step 233: {'lr': 5.800000000000001e-05, 'samples': 3728, 'steps': 232, 'loss/train': 10.048102378845215} +01/22/2022 20:57:13 - INFO - codeparrot_training - Step 234: {'lr': 5.8250000000000006e-05, 'samples': 3744, 'steps': 233, 'loss/train': 9.36636734008789} +01/22/2022 20:57:13 - INFO - codeparrot_training - Step 235: {'lr': 5.8500000000000006e-05, 'samples': 3760, 'steps': 234, 'loss/train': 9.784287452697754} +01/22/2022 20:57:13 - INFO - codeparrot_training - Step 236: {'lr': 5.875e-05, 'samples': 3776, 'steps': 235, 'loss/train': 9.75759506225586} +01/22/2022 20:57:14 - INFO - codeparrot_training - Step 237: {'lr': 5.9e-05, 'samples': 3792, 'steps': 236, 'loss/train': 9.489968299865723} +01/22/2022 20:57:14 - INFO - codeparrot_training - Step 238: {'lr': 5.925e-05, 'samples': 3808, 'steps': 237, 'loss/train': 9.614306449890137} +01/22/2022 20:57:15 - INFO - codeparrot_training - Step 239: {'lr': 5.9499999999999996e-05, 'samples': 3824, 'steps': 238, 'loss/train': 8.893197059631348} +01/22/2022 20:57:15 - INFO - codeparrot_training - Step 240: {'lr': 5.9749999999999995e-05, 'samples': 3840, 'steps': 239, 'loss/train': 9.911049842834473} +01/22/2022 20:57:16 - INFO - codeparrot_training - Step 241: {'lr': 6e-05, 'samples': 3856, 'steps': 240, 'loss/train': 10.15143871307373} +01/22/2022 20:57:16 - INFO - codeparrot_training - Step 242: {'lr': 6.025e-05, 'samples': 3872, 'steps': 241, 'loss/train': 9.887776374816895} +01/22/2022 20:57:16 - INFO - codeparrot_training - Step 243: {'lr': 6.05e-05, 'samples': 3888, 'steps': 242, 'loss/train': 8.977574348449707} +01/22/2022 20:57:17 - INFO - codeparrot_training - Step 244: {'lr': 6.075e-05, 'samples': 3904, 'steps': 243, 'loss/train': 9.219064712524414} +01/22/2022 20:57:17 - INFO - codeparrot_training - Step 245: {'lr': 6.1e-05, 'samples': 3920, 'steps': 244, 'loss/train': 9.202638626098633} +01/22/2022 20:57:17 - INFO - codeparrot_training - Step 246: {'lr': 6.125e-05, 'samples': 3936, 'steps': 245, 'loss/train': 10.261231422424316} +01/22/2022 20:57:18 - INFO - codeparrot_training - Step 247: {'lr': 6.15e-05, 'samples': 3952, 'steps': 246, 'loss/train': 9.092325210571289} +01/22/2022 20:57:18 - INFO - codeparrot_training - Step 248: {'lr': 6.175e-05, 'samples': 3968, 'steps': 247, 'loss/train': 9.44670295715332} +01/22/2022 20:57:18 - INFO - codeparrot_training - Step 249: {'lr': 6.2e-05, 'samples': 3984, 'steps': 248, 'loss/train': 9.10513973236084} +01/22/2022 20:57:19 - INFO - codeparrot_training - Step 250: {'lr': 6.225e-05, 'samples': 4000, 'steps': 249, 'loss/train': 10.261302947998047} +01/22/2022 20:57:19 - INFO - codeparrot_training - Step 251: {'lr': 6.25e-05, 'samples': 4016, 'steps': 250, 'loss/train': 9.941973686218262} +01/22/2022 20:57:19 - INFO - codeparrot_training - Step 252: {'lr': 6.275000000000001e-05, 'samples': 4032, 'steps': 251, 'loss/train': 10.306811332702637} +01/22/2022 20:57:20 - INFO - codeparrot_training - Step 253: {'lr': 6.3e-05, 'samples': 4048, 'steps': 252, 'loss/train': 9.855435371398926} +01/22/2022 20:57:20 - INFO - codeparrot_training - Step 254: {'lr': 6.325e-05, 'samples': 4064, 'steps': 253, 'loss/train': 8.919676780700684} +01/22/2022 20:57:21 - INFO - codeparrot_training - Step 255: {'lr': 6.35e-05, 'samples': 4080, 'steps': 254, 'loss/train': 8.799837112426758} +01/22/2022 20:57:21 - INFO - codeparrot_training - Step 256: {'lr': 6.375e-05, 'samples': 4096, 'steps': 255, 'loss/train': 9.672041893005371} +01/22/2022 20:57:21 - INFO - codeparrot_training - Step 257: {'lr': 6.4e-05, 'samples': 4112, 'steps': 256, 'loss/train': 9.959013938903809} +01/22/2022 20:57:22 - INFO - codeparrot_training - Step 258: {'lr': 6.425e-05, 'samples': 4128, 'steps': 257, 'loss/train': 9.66960334777832} +01/22/2022 20:57:22 - INFO - codeparrot_training - Step 259: {'lr': 6.450000000000001e-05, 'samples': 4144, 'steps': 258, 'loss/train': 9.745097160339355} +01/22/2022 20:57:22 - INFO - codeparrot_training - Step 260: {'lr': 6.475e-05, 'samples': 4160, 'steps': 259, 'loss/train': 10.31001091003418} +01/22/2022 20:57:23 - INFO - codeparrot_training - Step 261: {'lr': 6.500000000000001e-05, 'samples': 4176, 'steps': 260, 'loss/train': 9.52465534210205} +01/22/2022 20:57:23 - INFO - codeparrot_training - Step 262: {'lr': 6.525e-05, 'samples': 4192, 'steps': 261, 'loss/train': 10.113151550292969} +01/22/2022 20:57:23 - INFO - codeparrot_training - Step 263: {'lr': 6.55e-05, 'samples': 4208, 'steps': 262, 'loss/train': 9.267586708068848} +01/22/2022 20:57:24 - INFO - codeparrot_training - Step 264: {'lr': 6.575e-05, 'samples': 4224, 'steps': 263, 'loss/train': 8.866236686706543} +01/22/2022 20:57:24 - INFO - codeparrot_training - Step 265: {'lr': 6.6e-05, 'samples': 4240, 'steps': 264, 'loss/train': 9.065783500671387} +01/22/2022 20:57:24 - INFO - codeparrot_training - Step 266: {'lr': 6.625000000000001e-05, 'samples': 4256, 'steps': 265, 'loss/train': 10.423636436462402} +01/22/2022 20:57:25 - INFO - codeparrot_training - Step 267: {'lr': 6.65e-05, 'samples': 4272, 'steps': 266, 'loss/train': 8.418909072875977} +01/22/2022 20:57:25 - INFO - codeparrot_training - Step 268: {'lr': 6.675000000000001e-05, 'samples': 4288, 'steps': 267, 'loss/train': 9.487255096435547} +01/22/2022 20:57:25 - INFO - codeparrot_training - Step 269: {'lr': 6.7e-05, 'samples': 4304, 'steps': 268, 'loss/train': 9.930765151977539} +01/22/2022 20:57:26 - INFO - codeparrot_training - Step 270: {'lr': 6.725000000000001e-05, 'samples': 4320, 'steps': 269, 'loss/train': 9.816969871520996} +01/22/2022 20:57:26 - INFO - codeparrot_training - Step 271: {'lr': 6.75e-05, 'samples': 4336, 'steps': 270, 'loss/train': 10.063676834106445} +01/22/2022 20:57:26 - INFO - codeparrot_training - Step 272: {'lr': 6.775000000000001e-05, 'samples': 4352, 'steps': 271, 'loss/train': 9.64018726348877} +01/22/2022 20:57:27 - INFO - codeparrot_training - Step 273: {'lr': 6.800000000000001e-05, 'samples': 4368, 'steps': 272, 'loss/train': 9.900096893310547} +01/22/2022 20:57:27 - INFO - codeparrot_training - Step 274: {'lr': 6.825e-05, 'samples': 4384, 'steps': 273, 'loss/train': 9.679298400878906} +01/22/2022 20:57:27 - INFO - codeparrot_training - Step 275: {'lr': 6.850000000000001e-05, 'samples': 4400, 'steps': 274, 'loss/train': 9.22791576385498} +01/22/2022 20:57:28 - INFO - codeparrot_training - Step 276: {'lr': 6.875e-05, 'samples': 4416, 'steps': 275, 'loss/train': 8.300003051757812} +01/22/2022 20:57:28 - INFO - codeparrot_training - Step 277: {'lr': 6.900000000000001e-05, 'samples': 4432, 'steps': 276, 'loss/train': 9.90855598449707} +01/22/2022 20:57:28 - INFO - codeparrot_training - Step 278: {'lr': 6.925e-05, 'samples': 4448, 'steps': 277, 'loss/train': 8.738543510437012} +01/22/2022 20:57:29 - INFO - codeparrot_training - Step 279: {'lr': 6.950000000000001e-05, 'samples': 4464, 'steps': 278, 'loss/train': 10.105484962463379} +01/22/2022 20:57:29 - INFO - codeparrot_training - Step 280: {'lr': 6.975e-05, 'samples': 4480, 'steps': 279, 'loss/train': 9.883695602416992} +01/22/2022 20:57:29 - INFO - codeparrot_training - Step 281: {'lr': 7.000000000000001e-05, 'samples': 4496, 'steps': 280, 'loss/train': 9.552157402038574} +01/22/2022 20:57:30 - INFO - codeparrot_training - Step 282: {'lr': 7.025000000000001e-05, 'samples': 4512, 'steps': 281, 'loss/train': 8.474114418029785} +01/22/2022 20:57:30 - INFO - codeparrot_training - Step 283: {'lr': 7.049999999999999e-05, 'samples': 4528, 'steps': 282, 'loss/train': 9.212514877319336} +01/22/2022 20:57:31 - INFO - codeparrot_training - Step 284: {'lr': 7.075e-05, 'samples': 4544, 'steps': 283, 'loss/train': 8.783228874206543} +01/22/2022 20:57:31 - INFO - codeparrot_training - Step 285: {'lr': 7.099999999999999e-05, 'samples': 4560, 'steps': 284, 'loss/train': 7.990888595581055} +01/22/2022 20:57:31 - INFO - codeparrot_training - Step 286: {'lr': 7.125e-05, 'samples': 4576, 'steps': 285, 'loss/train': 9.308939933776855} +01/22/2022 20:57:32 - INFO - codeparrot_training - Step 287: {'lr': 7.149999999999999e-05, 'samples': 4592, 'steps': 286, 'loss/train': 10.847709655761719} +01/22/2022 20:57:32 - INFO - codeparrot_training - Step 288: {'lr': 7.175e-05, 'samples': 4608, 'steps': 287, 'loss/train': 8.908475875854492} +01/22/2022 20:57:32 - INFO - codeparrot_training - Step 289: {'lr': 7.2e-05, 'samples': 4624, 'steps': 288, 'loss/train': 8.332261085510254} +01/22/2022 20:57:33 - INFO - codeparrot_training - Step 290: {'lr': 7.225e-05, 'samples': 4640, 'steps': 289, 'loss/train': 8.716437339782715} +01/22/2022 20:57:33 - INFO - codeparrot_training - Step 291: {'lr': 7.25e-05, 'samples': 4656, 'steps': 290, 'loss/train': 9.924864768981934} +01/22/2022 20:57:33 - INFO - codeparrot_training - Step 292: {'lr': 7.274999999999999e-05, 'samples': 4672, 'steps': 291, 'loss/train': 9.320968627929688} +01/22/2022 20:57:34 - INFO - codeparrot_training - Step 293: {'lr': 7.3e-05, 'samples': 4688, 'steps': 292, 'loss/train': 8.58724594116211} +01/22/2022 20:57:34 - INFO - codeparrot_training - Step 294: {'lr': 7.324999999999999e-05, 'samples': 4704, 'steps': 293, 'loss/train': 8.042777061462402} +01/22/2022 20:57:34 - INFO - codeparrot_training - Step 295: {'lr': 7.35e-05, 'samples': 4720, 'steps': 294, 'loss/train': 10.23192024230957} +01/22/2022 20:57:35 - INFO - codeparrot_training - Step 296: {'lr': 7.375e-05, 'samples': 4736, 'steps': 295, 'loss/train': 10.43820858001709} +01/22/2022 20:57:35 - INFO - codeparrot_training - Step 297: {'lr': 7.4e-05, 'samples': 4752, 'steps': 296, 'loss/train': 8.489400863647461} +01/22/2022 20:57:35 - INFO - codeparrot_training - Step 298: {'lr': 7.425e-05, 'samples': 4768, 'steps': 297, 'loss/train': 9.176985740661621} +01/22/2022 20:57:36 - INFO - codeparrot_training - Step 299: {'lr': 7.45e-05, 'samples': 4784, 'steps': 298, 'loss/train': 8.790864944458008} +01/22/2022 20:57:36 - INFO - codeparrot_training - Step 300: {'lr': 7.475e-05, 'samples': 4800, 'steps': 299, 'loss/train': 8.899185180664062} +01/22/2022 20:57:37 - INFO - codeparrot_training - Step 301: {'lr': 7.5e-05, 'samples': 4816, 'steps': 300, 'loss/train': 8.337492942810059} +01/22/2022 20:57:37 - INFO - codeparrot_training - Step 302: {'lr': 7.525e-05, 'samples': 4832, 'steps': 301, 'loss/train': 10.197667121887207} +01/22/2022 20:57:38 - INFO - codeparrot_training - Step 303: {'lr': 7.55e-05, 'samples': 4848, 'steps': 302, 'loss/train': 9.67094612121582} +01/22/2022 20:57:38 - INFO - codeparrot_training - Step 304: {'lr': 7.575e-05, 'samples': 4864, 'steps': 303, 'loss/train': 9.263642311096191} +01/22/2022 20:57:38 - INFO - codeparrot_training - Step 305: {'lr': 7.6e-05, 'samples': 4880, 'steps': 304, 'loss/train': 9.155837059020996} +01/22/2022 20:57:39 - INFO - codeparrot_training - Step 306: {'lr': 7.625e-05, 'samples': 4896, 'steps': 305, 'loss/train': 9.99549388885498} +01/22/2022 20:57:39 - INFO - codeparrot_training - Step 307: {'lr': 7.65e-05, 'samples': 4912, 'steps': 306, 'loss/train': 10.390087127685547} +01/22/2022 20:57:39 - INFO - codeparrot_training - Step 308: {'lr': 7.675e-05, 'samples': 4928, 'steps': 307, 'loss/train': 9.112935066223145} +01/22/2022 20:57:40 - INFO - codeparrot_training - Step 309: {'lr': 7.7e-05, 'samples': 4944, 'steps': 308, 'loss/train': 9.381646156311035} +01/22/2022 20:57:40 - INFO - codeparrot_training - Step 310: {'lr': 7.725000000000001e-05, 'samples': 4960, 'steps': 309, 'loss/train': 9.259207725524902} +01/22/2022 20:57:41 - INFO - codeparrot_training - Step 311: {'lr': 7.75e-05, 'samples': 4976, 'steps': 310, 'loss/train': 8.89177131652832} +01/22/2022 20:57:41 - INFO - codeparrot_training - Step 312: {'lr': 7.775e-05, 'samples': 4992, 'steps': 311, 'loss/train': 9.346880912780762} +01/22/2022 20:57:41 - INFO - codeparrot_training - Step 313: {'lr': 7.8e-05, 'samples': 5008, 'steps': 312, 'loss/train': 9.960911750793457} +01/22/2022 20:57:42 - INFO - codeparrot_training - Step 314: {'lr': 7.825e-05, 'samples': 5024, 'steps': 313, 'loss/train': 8.985231399536133} +01/22/2022 20:57:42 - INFO - codeparrot_training - Step 315: {'lr': 7.85e-05, 'samples': 5040, 'steps': 314, 'loss/train': 10.009469985961914} +01/22/2022 20:57:42 - INFO - codeparrot_training - Step 316: {'lr': 7.875e-05, 'samples': 5056, 'steps': 315, 'loss/train': 8.87830924987793} +01/22/2022 20:57:43 - INFO - codeparrot_training - Step 317: {'lr': 7.9e-05, 'samples': 5072, 'steps': 316, 'loss/train': 10.45069694519043} +01/22/2022 20:57:43 - INFO - codeparrot_training - Step 318: {'lr': 7.925e-05, 'samples': 5088, 'steps': 317, 'loss/train': 9.474170684814453} +01/22/2022 20:57:43 - INFO - codeparrot_training - Step 319: {'lr': 7.950000000000001e-05, 'samples': 5104, 'steps': 318, 'loss/train': 9.42161750793457} +01/22/2022 20:57:44 - INFO - codeparrot_training - Step 320: {'lr': 7.975e-05, 'samples': 5120, 'steps': 319, 'loss/train': 8.404040336608887} +01/22/2022 20:57:44 - INFO - codeparrot_training - Step 321: {'lr': 8e-05, 'samples': 5136, 'steps': 320, 'loss/train': 9.01351261138916} +01/22/2022 20:57:44 - INFO - codeparrot_training - Step 322: {'lr': 8.025e-05, 'samples': 5152, 'steps': 321, 'loss/train': 9.830482482910156} +01/22/2022 20:57:45 - INFO - codeparrot_training - Step 323: {'lr': 8.05e-05, 'samples': 5168, 'steps': 322, 'loss/train': 9.562469482421875} +01/22/2022 20:57:45 - INFO - codeparrot_training - Step 324: {'lr': 8.075e-05, 'samples': 5184, 'steps': 323, 'loss/train': 9.621129989624023} +01/22/2022 20:57:45 - INFO - codeparrot_training - Step 325: {'lr': 8.1e-05, 'samples': 5200, 'steps': 324, 'loss/train': 8.974664688110352} +01/22/2022 20:57:46 - INFO - codeparrot_training - Step 326: {'lr': 8.125000000000001e-05, 'samples': 5216, 'steps': 325, 'loss/train': 9.132997512817383} +01/22/2022 20:57:46 - INFO - codeparrot_training - Step 327: {'lr': 8.15e-05, 'samples': 5232, 'steps': 326, 'loss/train': 10.035571098327637} +01/22/2022 20:57:46 - INFO - codeparrot_training - Step 328: {'lr': 8.175000000000001e-05, 'samples': 5248, 'steps': 327, 'loss/train': 9.5532808303833} +01/22/2022 20:57:47 - INFO - codeparrot_training - Step 329: {'lr': 8.2e-05, 'samples': 5264, 'steps': 328, 'loss/train': 9.57081127166748} +01/22/2022 20:57:47 - INFO - codeparrot_training - Step 330: {'lr': 8.225000000000001e-05, 'samples': 5280, 'steps': 329, 'loss/train': 9.741230010986328} +01/22/2022 20:57:47 - INFO - codeparrot_training - Step 331: {'lr': 8.25e-05, 'samples': 5296, 'steps': 330, 'loss/train': 10.134722709655762} +01/22/2022 20:57:48 - INFO - codeparrot_training - Step 332: {'lr': 8.275e-05, 'samples': 5312, 'steps': 331, 'loss/train': 9.301830291748047} +01/22/2022 20:57:48 - INFO - codeparrot_training - Step 333: {'lr': 8.300000000000001e-05, 'samples': 5328, 'steps': 332, 'loss/train': 9.371390342712402} +01/22/2022 20:57:48 - INFO - codeparrot_training - Step 334: {'lr': 8.325e-05, 'samples': 5344, 'steps': 333, 'loss/train': 9.354187965393066} +01/22/2022 20:57:49 - INFO - codeparrot_training - Step 335: {'lr': 8.350000000000001e-05, 'samples': 5360, 'steps': 334, 'loss/train': 9.532604217529297} +01/22/2022 20:57:49 - INFO - codeparrot_training - Step 336: {'lr': 8.375e-05, 'samples': 5376, 'steps': 335, 'loss/train': 9.529627799987793} +01/22/2022 20:57:49 - INFO - codeparrot_training - Step 337: {'lr': 8.400000000000001e-05, 'samples': 5392, 'steps': 336, 'loss/train': 8.802553176879883} +01/22/2022 20:57:50 - INFO - codeparrot_training - Step 338: {'lr': 8.425e-05, 'samples': 5408, 'steps': 337, 'loss/train': 9.760191917419434} +01/22/2022 20:57:50 - INFO - codeparrot_training - Step 339: {'lr': 8.450000000000001e-05, 'samples': 5424, 'steps': 338, 'loss/train': 9.35767936706543} +01/22/2022 20:57:50 - INFO - codeparrot_training - Step 340: {'lr': 8.475000000000001e-05, 'samples': 5440, 'steps': 339, 'loss/train': 9.80904483795166} +01/22/2022 20:57:51 - INFO - codeparrot_training - Step 341: {'lr': 8.5e-05, 'samples': 5456, 'steps': 340, 'loss/train': 10.550032615661621} +01/22/2022 20:57:51 - INFO - codeparrot_training - Step 342: {'lr': 8.525000000000001e-05, 'samples': 5472, 'steps': 341, 'loss/train': 9.336383819580078} +01/22/2022 20:57:51 - INFO - codeparrot_training - Step 343: {'lr': 8.55e-05, 'samples': 5488, 'steps': 342, 'loss/train': 9.410957336425781} +01/22/2022 20:57:52 - INFO - codeparrot_training - Step 344: {'lr': 8.575000000000001e-05, 'samples': 5504, 'steps': 343, 'loss/train': 9.203252792358398} +01/22/2022 20:57:52 - INFO - codeparrot_training - Step 345: {'lr': 8.599999999999999e-05, 'samples': 5520, 'steps': 344, 'loss/train': 9.01279354095459} +01/22/2022 20:57:53 - INFO - codeparrot_training - Step 346: {'lr': 8.625e-05, 'samples': 5536, 'steps': 345, 'loss/train': 9.204567909240723} +01/22/2022 20:57:53 - INFO - codeparrot_training - Step 347: {'lr': 8.65e-05, 'samples': 5552, 'steps': 346, 'loss/train': 9.649930000305176} +01/22/2022 20:57:53 - INFO - codeparrot_training - Step 348: {'lr': 8.675e-05, 'samples': 5568, 'steps': 347, 'loss/train': 9.149508476257324} +01/22/2022 20:57:54 - INFO - codeparrot_training - Step 349: {'lr': 8.7e-05, 'samples': 5584, 'steps': 348, 'loss/train': 9.070633888244629} +01/22/2022 20:57:54 - INFO - codeparrot_training - Step 350: {'lr': 8.724999999999999e-05, 'samples': 5600, 'steps': 349, 'loss/train': 9.070714950561523} +01/22/2022 20:57:54 - INFO - codeparrot_training - Step 351: {'lr': 8.75e-05, 'samples': 5616, 'steps': 350, 'loss/train': 9.019255638122559} +01/22/2022 20:57:55 - INFO - codeparrot_training - Step 352: {'lr': 8.774999999999999e-05, 'samples': 5632, 'steps': 351, 'loss/train': 10.093969345092773} +01/22/2022 20:57:55 - INFO - codeparrot_training - Step 353: {'lr': 8.8e-05, 'samples': 5648, 'steps': 352, 'loss/train': 8.852364540100098} +01/22/2022 20:57:55 - INFO - codeparrot_training - Step 354: {'lr': 8.824999999999999e-05, 'samples': 5664, 'steps': 353, 'loss/train': 9.352640151977539} +01/22/2022 20:57:56 - INFO - codeparrot_training - Step 355: {'lr': 8.85e-05, 'samples': 5680, 'steps': 354, 'loss/train': 10.099061012268066} +01/22/2022 20:57:56 - INFO - codeparrot_training - Step 356: {'lr': 8.875e-05, 'samples': 5696, 'steps': 355, 'loss/train': 9.700894355773926} +01/22/2022 20:57:56 - INFO - codeparrot_training - Step 357: {'lr': 8.9e-05, 'samples': 5712, 'steps': 356, 'loss/train': 9.041502952575684} +01/22/2022 20:57:57 - INFO - codeparrot_training - Step 358: {'lr': 8.925e-05, 'samples': 5728, 'steps': 357, 'loss/train': 9.082510948181152} +01/22/2022 20:57:57 - INFO - codeparrot_training - Step 359: {'lr': 8.95e-05, 'samples': 5744, 'steps': 358, 'loss/train': 9.337154388427734} +01/22/2022 20:57:58 - INFO - codeparrot_training - Step 360: {'lr': 8.975e-05, 'samples': 5760, 'steps': 359, 'loss/train': 9.348626136779785} +01/22/2022 20:57:58 - INFO - codeparrot_training - Step 361: {'lr': 8.999999999999999e-05, 'samples': 5776, 'steps': 360, 'loss/train': 8.773628234863281} +01/22/2022 20:57:59 - INFO - codeparrot_training - Step 362: {'lr': 9.025e-05, 'samples': 5792, 'steps': 361, 'loss/train': 9.252784729003906} +01/22/2022 20:57:59 - INFO - codeparrot_training - Step 363: {'lr': 9.05e-05, 'samples': 5808, 'steps': 362, 'loss/train': 8.934161186218262} +01/22/2022 20:57:59 - INFO - codeparrot_training - Step 364: {'lr': 9.075e-05, 'samples': 5824, 'steps': 363, 'loss/train': 10.425604820251465} +01/22/2022 20:58:00 - INFO - codeparrot_training - Step 365: {'lr': 9.1e-05, 'samples': 5840, 'steps': 364, 'loss/train': 9.052485466003418} +01/22/2022 20:58:00 - INFO - codeparrot_training - Step 366: {'lr': 9.125e-05, 'samples': 5856, 'steps': 365, 'loss/train': 10.054530143737793} +01/22/2022 20:58:00 - INFO - codeparrot_training - Step 367: {'lr': 9.15e-05, 'samples': 5872, 'steps': 366, 'loss/train': 8.97590446472168} +01/22/2022 20:58:01 - INFO - codeparrot_training - Step 368: {'lr': 9.175e-05, 'samples': 5888, 'steps': 367, 'loss/train': 8.857767105102539} +01/22/2022 20:58:01 - INFO - codeparrot_training - Step 369: {'lr': 9.2e-05, 'samples': 5904, 'steps': 368, 'loss/train': 8.046717643737793} +01/22/2022 20:58:01 - INFO - codeparrot_training - Step 370: {'lr': 9.225e-05, 'samples': 5920, 'steps': 369, 'loss/train': 8.708664894104004} +01/22/2022 20:58:02 - INFO - codeparrot_training - Step 371: {'lr': 9.25e-05, 'samples': 5936, 'steps': 370, 'loss/train': 8.91010856628418} +01/22/2022 20:58:02 - INFO - codeparrot_training - Step 372: {'lr': 9.275e-05, 'samples': 5952, 'steps': 371, 'loss/train': 8.654004096984863} +01/22/2022 20:58:02 - INFO - codeparrot_training - Step 373: {'lr': 9.3e-05, 'samples': 5968, 'steps': 372, 'loss/train': 9.424354553222656} +01/22/2022 20:58:03 - INFO - codeparrot_training - Step 374: {'lr': 9.325e-05, 'samples': 5984, 'steps': 373, 'loss/train': 9.60505199432373} +01/22/2022 20:58:03 - INFO - codeparrot_training - Step 375: {'lr': 9.35e-05, 'samples': 6000, 'steps': 374, 'loss/train': 8.834843635559082} +01/22/2022 20:58:04 - INFO - codeparrot_training - Step 376: {'lr': 9.375e-05, 'samples': 6016, 'steps': 375, 'loss/train': 8.542076110839844} +01/22/2022 20:58:04 - INFO - codeparrot_training - Step 377: {'lr': 9.400000000000001e-05, 'samples': 6032, 'steps': 376, 'loss/train': 10.025857925415039} +01/22/2022 20:58:04 - INFO - codeparrot_training - Step 378: {'lr': 9.425e-05, 'samples': 6048, 'steps': 377, 'loss/train': 9.229769706726074} +01/22/2022 20:58:05 - INFO - codeparrot_training - Step 379: {'lr': 9.45e-05, 'samples': 6064, 'steps': 378, 'loss/train': 9.509035110473633} +01/22/2022 20:58:05 - INFO - codeparrot_training - Step 380: {'lr': 9.475e-05, 'samples': 6080, 'steps': 379, 'loss/train': 9.537371635437012} +01/22/2022 20:58:05 - INFO - codeparrot_training - Step 381: {'lr': 9.5e-05, 'samples': 6096, 'steps': 380, 'loss/train': 8.654661178588867} +01/22/2022 20:58:06 - INFO - codeparrot_training - Step 382: {'lr': 9.525e-05, 'samples': 6112, 'steps': 381, 'loss/train': 8.903295516967773} +01/22/2022 20:58:06 - INFO - codeparrot_training - Step 383: {'lr': 9.55e-05, 'samples': 6128, 'steps': 382, 'loss/train': 9.175506591796875} +01/22/2022 20:58:06 - INFO - codeparrot_training - Step 384: {'lr': 9.575000000000001e-05, 'samples': 6144, 'steps': 383, 'loss/train': 9.444051742553711} +01/22/2022 20:58:07 - INFO - codeparrot_training - Step 385: {'lr': 9.6e-05, 'samples': 6160, 'steps': 384, 'loss/train': 10.026365280151367} +01/22/2022 20:58:07 - INFO - codeparrot_training - Step 386: {'lr': 9.625000000000001e-05, 'samples': 6176, 'steps': 385, 'loss/train': 8.789953231811523} +01/22/2022 20:58:07 - INFO - codeparrot_training - Step 387: {'lr': 9.65e-05, 'samples': 6192, 'steps': 386, 'loss/train': 8.99285888671875} +01/22/2022 20:58:08 - INFO - codeparrot_training - Step 388: {'lr': 9.675000000000001e-05, 'samples': 6208, 'steps': 387, 'loss/train': 8.213546752929688} +01/22/2022 20:58:08 - INFO - codeparrot_training - Step 389: {'lr': 9.7e-05, 'samples': 6224, 'steps': 388, 'loss/train': 8.97140121459961} +01/22/2022 20:58:08 - INFO - codeparrot_training - Step 390: {'lr': 9.725e-05, 'samples': 6240, 'steps': 389, 'loss/train': 10.463143348693848} +01/22/2022 20:58:09 - INFO - codeparrot_training - Step 391: {'lr': 9.750000000000001e-05, 'samples': 6256, 'steps': 390, 'loss/train': 9.661022186279297} +01/22/2022 20:58:09 - INFO - codeparrot_training - Step 392: {'lr': 9.775e-05, 'samples': 6272, 'steps': 391, 'loss/train': 9.486577033996582} +01/22/2022 20:58:09 - INFO - codeparrot_training - Step 393: {'lr': 9.800000000000001e-05, 'samples': 6288, 'steps': 392, 'loss/train': 8.758456230163574} +01/22/2022 20:58:10 - INFO - codeparrot_training - Step 394: {'lr': 9.825e-05, 'samples': 6304, 'steps': 393, 'loss/train': 8.71645736694336} +01/22/2022 20:58:10 - INFO - codeparrot_training - Step 395: {'lr': 9.850000000000001e-05, 'samples': 6320, 'steps': 394, 'loss/train': 9.450484275817871} +01/22/2022 20:58:10 - INFO - codeparrot_training - Step 396: {'lr': 9.875e-05, 'samples': 6336, 'steps': 395, 'loss/train': 9.4133882522583} +01/22/2022 20:58:11 - INFO - codeparrot_training - Step 397: {'lr': 9.900000000000001e-05, 'samples': 6352, 'steps': 396, 'loss/train': 9.840461730957031} +01/22/2022 20:58:11 - INFO - codeparrot_training - Step 398: {'lr': 9.925000000000001e-05, 'samples': 6368, 'steps': 397, 'loss/train': 8.816099166870117} +01/22/2022 20:58:11 - INFO - codeparrot_training - Step 399: {'lr': 9.95e-05, 'samples': 6384, 'steps': 398, 'loss/train': 9.094099998474121} +01/22/2022 20:58:12 - INFO - codeparrot_training - Step 400: {'lr': 9.975000000000001e-05, 'samples': 6400, 'steps': 399, 'loss/train': 8.299336433410645} +01/22/2022 20:58:12 - INFO - codeparrot_training - Step 401: {'lr': 0.0001, 'samples': 6416, 'steps': 400, 'loss/train': 9.07571029663086} +01/22/2022 20:58:12 - INFO - codeparrot_training - Step 402: {'lr': 0.00010025000000000001, 'samples': 6432, 'steps': 401, 'loss/train': 8.302351951599121} +01/22/2022 20:58:13 - INFO - codeparrot_training - Step 403: {'lr': 0.0001005, 'samples': 6448, 'steps': 402, 'loss/train': 8.640420913696289} +01/22/2022 20:58:13 - INFO - codeparrot_training - Step 404: {'lr': 0.00010075000000000001, 'samples': 6464, 'steps': 403, 'loss/train': 9.367877960205078} +01/22/2022 20:58:13 - INFO - codeparrot_training - Step 405: {'lr': 0.000101, 'samples': 6480, 'steps': 404, 'loss/train': 8.609994888305664} +01/22/2022 20:58:14 - INFO - codeparrot_training - Step 406: {'lr': 0.00010125000000000001, 'samples': 6496, 'steps': 405, 'loss/train': 8.796429634094238} +01/22/2022 20:58:14 - INFO - codeparrot_training - Step 407: {'lr': 0.00010150000000000001, 'samples': 6512, 'steps': 406, 'loss/train': 9.400025367736816} +01/22/2022 20:58:14 - INFO - codeparrot_training - Step 408: {'lr': 0.00010174999999999999, 'samples': 6528, 'steps': 407, 'loss/train': 9.8967924118042} +01/22/2022 20:58:15 - INFO - codeparrot_training - Step 409: {'lr': 0.000102, 'samples': 6544, 'steps': 408, 'loss/train': 9.072064399719238} +01/22/2022 20:58:15 - INFO - codeparrot_training - Step 410: {'lr': 0.00010224999999999999, 'samples': 6560, 'steps': 409, 'loss/train': 9.200801849365234} +01/22/2022 20:58:16 - INFO - codeparrot_training - Step 411: {'lr': 0.0001025, 'samples': 6576, 'steps': 410, 'loss/train': 8.831583976745605} +01/22/2022 20:58:16 - INFO - codeparrot_training - Step 412: {'lr': 0.00010274999999999999, 'samples': 6592, 'steps': 411, 'loss/train': 9.83707332611084} +01/22/2022 20:58:16 - INFO - codeparrot_training - Step 413: {'lr': 0.000103, 'samples': 6608, 'steps': 412, 'loss/train': 9.408841133117676} +01/22/2022 20:58:17 - INFO - codeparrot_training - Step 414: {'lr': 0.00010325, 'samples': 6624, 'steps': 413, 'loss/train': 9.237653732299805} +01/22/2022 20:58:17 - INFO - codeparrot_training - Step 415: {'lr': 0.0001035, 'samples': 6640, 'steps': 414, 'loss/train': 8.949479103088379} +01/22/2022 20:58:17 - INFO - codeparrot_training - Step 416: {'lr': 0.00010375, 'samples': 6656, 'steps': 415, 'loss/train': 8.664557456970215} +01/22/2022 20:58:18 - INFO - codeparrot_training - Step 417: {'lr': 0.000104, 'samples': 6672, 'steps': 416, 'loss/train': 8.161252975463867} +01/22/2022 20:58:18 - INFO - codeparrot_training - Step 418: {'lr': 0.00010425, 'samples': 6688, 'steps': 417, 'loss/train': 8.959433555603027} +01/22/2022 20:58:18 - INFO - codeparrot_training - Step 419: {'lr': 0.00010449999999999999, 'samples': 6704, 'steps': 418, 'loss/train': 9.311105728149414} +01/22/2022 20:58:21 - INFO - codeparrot_training - Step 420: {'lr': 0.00010475, 'samples': 6720, 'steps': 419, 'loss/train': 9.230398178100586} +01/22/2022 20:58:22 - INFO - codeparrot_training - Step 421: {'lr': 0.000105, 'samples': 6736, 'steps': 420, 'loss/train': 8.67992115020752} +01/22/2022 20:58:22 - INFO - codeparrot_training - Step 422: {'lr': 0.00010525, 'samples': 6752, 'steps': 421, 'loss/train': 9.949875831604004} +01/22/2022 20:58:22 - INFO - codeparrot_training - Step 423: {'lr': 0.0001055, 'samples': 6768, 'steps': 422, 'loss/train': 9.079950332641602} +01/22/2022 20:58:23 - INFO - codeparrot_training - Step 424: {'lr': 0.00010575, 'samples': 6784, 'steps': 423, 'loss/train': 9.117156982421875} +01/22/2022 20:58:23 - INFO - codeparrot_training - Step 425: {'lr': 0.000106, 'samples': 6800, 'steps': 424, 'loss/train': 9.294184684753418} +01/22/2022 20:58:23 - INFO - codeparrot_training - Step 426: {'lr': 0.00010625, 'samples': 6816, 'steps': 425, 'loss/train': 8.647862434387207} +01/22/2022 20:58:24 - INFO - codeparrot_training - Step 427: {'lr': 0.0001065, 'samples': 6832, 'steps': 426, 'loss/train': 9.342692375183105} +01/22/2022 20:58:24 - INFO - codeparrot_training - Step 428: {'lr': 0.00010675, 'samples': 6848, 'steps': 427, 'loss/train': 9.917657852172852} +01/22/2022 20:58:24 - INFO - codeparrot_training - Step 429: {'lr': 0.000107, 'samples': 6864, 'steps': 428, 'loss/train': 9.721179962158203} +01/22/2022 20:58:25 - INFO - codeparrot_training - Step 430: {'lr': 0.00010725, 'samples': 6880, 'steps': 429, 'loss/train': 8.368453979492188} +01/22/2022 20:58:25 - INFO - codeparrot_training - Step 431: {'lr': 0.0001075, 'samples': 6896, 'steps': 430, 'loss/train': 8.821770668029785} +01/22/2022 20:58:25 - INFO - codeparrot_training - Step 432: {'lr': 0.00010775, 'samples': 6912, 'steps': 431, 'loss/train': 9.770350456237793} +01/22/2022 20:58:26 - INFO - codeparrot_training - Step 433: {'lr': 0.000108, 'samples': 6928, 'steps': 432, 'loss/train': 9.180501937866211} +01/22/2022 20:58:26 - INFO - codeparrot_training - Step 434: {'lr': 0.00010825, 'samples': 6944, 'steps': 433, 'loss/train': 8.840584754943848} +01/22/2022 20:58:26 - INFO - codeparrot_training - Step 435: {'lr': 0.00010850000000000001, 'samples': 6960, 'steps': 434, 'loss/train': 10.12399959564209} +01/22/2022 20:58:27 - INFO - codeparrot_training - Step 436: {'lr': 0.00010875, 'samples': 6976, 'steps': 435, 'loss/train': 9.706656455993652} +01/22/2022 20:58:27 - INFO - codeparrot_training - Step 437: {'lr': 0.000109, 'samples': 6992, 'steps': 436, 'loss/train': 9.696211814880371} +01/22/2022 20:58:27 - INFO - codeparrot_training - Step 438: {'lr': 0.00010925, 'samples': 7008, 'steps': 437, 'loss/train': 9.06942081451416} +01/22/2022 20:58:28 - INFO - codeparrot_training - Step 439: {'lr': 0.0001095, 'samples': 7024, 'steps': 438, 'loss/train': 9.097882270812988} +01/22/2022 20:58:28 - INFO - codeparrot_training - Step 440: {'lr': 0.00010975, 'samples': 7040, 'steps': 439, 'loss/train': 8.608757972717285} +01/22/2022 20:58:28 - INFO - codeparrot_training - Step 441: {'lr': 0.00011, 'samples': 7056, 'steps': 440, 'loss/train': 8.26650619506836} +01/22/2022 20:58:29 - INFO - codeparrot_training - Step 442: {'lr': 0.00011025, 'samples': 7072, 'steps': 441, 'loss/train': 9.956624031066895} +01/22/2022 20:58:29 - INFO - codeparrot_training - Step 443: {'lr': 0.0001105, 'samples': 7088, 'steps': 442, 'loss/train': 9.373085021972656} +01/22/2022 20:58:30 - INFO - codeparrot_training - Step 444: {'lr': 0.00011075000000000001, 'samples': 7104, 'steps': 443, 'loss/train': 9.08255386352539} +01/22/2022 20:58:30 - INFO - codeparrot_training - Step 445: {'lr': 0.000111, 'samples': 7120, 'steps': 444, 'loss/train': 9.345389366149902} +01/22/2022 20:58:30 - INFO - codeparrot_training - Step 446: {'lr': 0.00011125000000000001, 'samples': 7136, 'steps': 445, 'loss/train': 9.300911903381348} +01/22/2022 20:58:31 - INFO - codeparrot_training - Step 447: {'lr': 0.0001115, 'samples': 7152, 'steps': 446, 'loss/train': 9.337592124938965} +01/22/2022 20:58:31 - INFO - codeparrot_training - Step 448: {'lr': 0.00011175, 'samples': 7168, 'steps': 447, 'loss/train': 8.277180671691895} +01/22/2022 20:58:31 - INFO - codeparrot_training - Step 449: {'lr': 0.000112, 'samples': 7184, 'steps': 448, 'loss/train': 9.615056991577148} +01/22/2022 20:58:32 - INFO - codeparrot_training - Step 450: {'lr': 0.00011225, 'samples': 7200, 'steps': 449, 'loss/train': 9.38110065460205} +01/22/2022 20:58:32 - INFO - codeparrot_training - Step 451: {'lr': 0.00011250000000000001, 'samples': 7216, 'steps': 450, 'loss/train': 9.150519371032715} +01/22/2022 20:58:32 - INFO - codeparrot_training - Step 452: {'lr': 0.00011275, 'samples': 7232, 'steps': 451, 'loss/train': 9.228389739990234} +01/22/2022 20:58:33 - INFO - codeparrot_training - Step 453: {'lr': 0.00011300000000000001, 'samples': 7248, 'steps': 452, 'loss/train': 9.461592674255371} +01/22/2022 20:58:33 - INFO - codeparrot_training - Step 454: {'lr': 0.00011325, 'samples': 7264, 'steps': 453, 'loss/train': 9.169112205505371} +01/22/2022 20:58:33 - INFO - codeparrot_training - Step 455: {'lr': 0.00011350000000000001, 'samples': 7280, 'steps': 454, 'loss/train': 9.40683364868164} +01/22/2022 20:58:34 - INFO - codeparrot_training - Step 456: {'lr': 0.00011375, 'samples': 7296, 'steps': 455, 'loss/train': 9.994182586669922} +01/22/2022 20:58:34 - INFO - codeparrot_training - Step 457: {'lr': 0.000114, 'samples': 7312, 'steps': 456, 'loss/train': 8.91905403137207} +01/22/2022 20:58:34 - INFO - codeparrot_training - Step 458: {'lr': 0.00011425000000000001, 'samples': 7328, 'steps': 457, 'loss/train': 9.270313262939453} +01/22/2022 20:58:35 - INFO - codeparrot_training - Step 459: {'lr': 0.0001145, 'samples': 7344, 'steps': 458, 'loss/train': 8.773589134216309} +01/22/2022 20:58:35 - INFO - codeparrot_training - Step 460: {'lr': 0.00011475000000000001, 'samples': 7360, 'steps': 459, 'loss/train': 8.689289093017578} +01/22/2022 20:58:35 - INFO - codeparrot_training - Step 461: {'lr': 0.000115, 'samples': 7376, 'steps': 460, 'loss/train': 9.036508560180664} +01/22/2022 20:58:36 - INFO - codeparrot_training - Step 462: {'lr': 0.00011525000000000001, 'samples': 7392, 'steps': 461, 'loss/train': 9.585177421569824} +01/22/2022 20:58:36 - INFO - codeparrot_training - Step 463: {'lr': 0.0001155, 'samples': 7408, 'steps': 462, 'loss/train': 9.119417190551758} +01/22/2022 20:58:36 - INFO - codeparrot_training - Step 464: {'lr': 0.00011575000000000001, 'samples': 7424, 'steps': 463, 'loss/train': 8.882226943969727} +01/22/2022 20:58:37 - INFO - codeparrot_training - Step 465: {'lr': 0.00011600000000000001, 'samples': 7440, 'steps': 464, 'loss/train': 9.944592475891113} +01/22/2022 20:58:37 - INFO - codeparrot_training - Step 466: {'lr': 0.00011625, 'samples': 7456, 'steps': 465, 'loss/train': 9.109798431396484} +01/22/2022 20:58:37 - INFO - codeparrot_training - Step 467: {'lr': 0.00011650000000000001, 'samples': 7472, 'steps': 466, 'loss/train': 8.745268821716309} +01/22/2022 20:58:38 - INFO - codeparrot_training - Step 468: {'lr': 0.00011675, 'samples': 7488, 'steps': 467, 'loss/train': 9.546388626098633} +01/22/2022 20:58:38 - INFO - codeparrot_training - Step 469: {'lr': 0.00011700000000000001, 'samples': 7504, 'steps': 468, 'loss/train': 8.67956256866455} +01/22/2022 20:58:38 - INFO - codeparrot_training - Step 470: {'lr': 0.00011724999999999999, 'samples': 7520, 'steps': 469, 'loss/train': 8.94892406463623} +01/22/2022 20:58:39 - INFO - codeparrot_training - Step 471: {'lr': 0.0001175, 'samples': 7536, 'steps': 470, 'loss/train': 8.697021484375} +01/22/2022 20:58:39 - INFO - codeparrot_training - Step 472: {'lr': 0.00011775, 'samples': 7552, 'steps': 471, 'loss/train': 9.232431411743164} +01/22/2022 20:58:40 - INFO - codeparrot_training - Step 473: {'lr': 0.000118, 'samples': 7568, 'steps': 472, 'loss/train': 9.029326438903809} +01/22/2022 20:58:40 - INFO - codeparrot_training - Step 474: {'lr': 0.00011825, 'samples': 7584, 'steps': 473, 'loss/train': 9.503674507141113} +01/22/2022 20:58:40 - INFO - codeparrot_training - Step 475: {'lr': 0.0001185, 'samples': 7600, 'steps': 474, 'loss/train': 9.402981758117676} +01/22/2022 20:58:41 - INFO - codeparrot_training - Step 476: {'lr': 0.00011875, 'samples': 7616, 'steps': 475, 'loss/train': 8.987305641174316} +01/22/2022 20:58:41 - INFO - codeparrot_training - Step 477: {'lr': 0.00011899999999999999, 'samples': 7632, 'steps': 476, 'loss/train': 9.713814735412598} +01/22/2022 20:58:41 - INFO - codeparrot_training - Step 478: {'lr': 0.00011925, 'samples': 7648, 'steps': 477, 'loss/train': 9.48485279083252} +01/22/2022 20:58:42 - INFO - codeparrot_training - Step 479: {'lr': 0.00011949999999999999, 'samples': 7664, 'steps': 478, 'loss/train': 6.490025520324707} +01/22/2022 20:58:43 - INFO - codeparrot_training - Step 480: {'lr': 0.00011975, 'samples': 7680, 'steps': 479, 'loss/train': 7.829193115234375} +01/22/2022 20:58:43 - INFO - codeparrot_training - Step 481: {'lr': 0.00012, 'samples': 7696, 'steps': 480, 'loss/train': 8.754100799560547} +01/22/2022 20:58:43 - INFO - codeparrot_training - Step 482: {'lr': 0.00012025, 'samples': 7712, 'steps': 481, 'loss/train': 10.681097030639648} +01/22/2022 20:58:44 - INFO - codeparrot_training - Step 483: {'lr': 0.0001205, 'samples': 7728, 'steps': 482, 'loss/train': 8.702665328979492} +01/22/2022 20:58:44 - INFO - codeparrot_training - Step 484: {'lr': 0.00012075, 'samples': 7744, 'steps': 483, 'loss/train': 8.235371589660645} +01/22/2022 20:58:44 - INFO - codeparrot_training - Step 485: {'lr': 0.000121, 'samples': 7760, 'steps': 484, 'loss/train': 9.461774826049805} +01/22/2022 20:58:45 - INFO - codeparrot_training - Step 486: {'lr': 0.00012124999999999999, 'samples': 7776, 'steps': 485, 'loss/train': 8.933204650878906} +01/22/2022 20:58:45 - INFO - codeparrot_training - Step 487: {'lr': 0.0001215, 'samples': 7792, 'steps': 486, 'loss/train': 8.647503852844238} +01/22/2022 20:58:45 - INFO - codeparrot_training - Step 488: {'lr': 0.00012175, 'samples': 7808, 'steps': 487, 'loss/train': 9.545587539672852} +01/22/2022 20:58:46 - INFO - codeparrot_training - Step 489: {'lr': 0.000122, 'samples': 7824, 'steps': 488, 'loss/train': 9.142321586608887} +01/22/2022 20:58:46 - INFO - codeparrot_training - Step 490: {'lr': 0.00012225, 'samples': 7840, 'steps': 489, 'loss/train': 9.215739250183105} +01/22/2022 20:58:46 - INFO - codeparrot_training - Step 491: {'lr': 0.0001225, 'samples': 7856, 'steps': 490, 'loss/train': 8.482568740844727} +01/22/2022 20:58:47 - INFO - codeparrot_training - Step 492: {'lr': 0.00012275, 'samples': 7872, 'steps': 491, 'loss/train': 7.991040229797363} +01/22/2022 20:58:47 - INFO - codeparrot_training - Step 493: {'lr': 0.000123, 'samples': 7888, 'steps': 492, 'loss/train': 8.57867431640625} +01/22/2022 20:58:47 - INFO - codeparrot_training - Step 494: {'lr': 0.00012325000000000001, 'samples': 7904, 'steps': 493, 'loss/train': 9.27800464630127} +01/22/2022 20:58:48 - INFO - codeparrot_training - Step 495: {'lr': 0.0001235, 'samples': 7920, 'steps': 494, 'loss/train': 9.966198921203613} +01/22/2022 20:58:48 - INFO - codeparrot_training - Step 496: {'lr': 0.00012375, 'samples': 7936, 'steps': 495, 'loss/train': 9.496504783630371} +01/22/2022 20:58:49 - INFO - codeparrot_training - Step 497: {'lr': 0.000124, 'samples': 7952, 'steps': 496, 'loss/train': 8.614720344543457} +01/22/2022 20:58:49 - INFO - codeparrot_training - Step 498: {'lr': 0.00012425, 'samples': 7968, 'steps': 497, 'loss/train': 9.402966499328613} +01/22/2022 20:58:49 - INFO - codeparrot_training - Step 499: {'lr': 0.0001245, 'samples': 7984, 'steps': 498, 'loss/train': 9.398275375366211} +01/22/2022 20:58:50 - INFO - codeparrot_training - Step 500: {'lr': 0.00012475, 'samples': 8000, 'steps': 499, 'loss/train': 9.138171195983887} +01/22/2022 20:58:50 - INFO - codeparrot_training - Step 501: {'lr': 0.000125, 'samples': 8016, 'steps': 500, 'loss/train': 8.982580184936523} +01/22/2022 20:58:50 - INFO - codeparrot_training - Step 502: {'lr': 0.00012525, 'samples': 8032, 'steps': 501, 'loss/train': 9.195324897766113} +01/22/2022 20:58:51 - INFO - codeparrot_training - Step 503: {'lr': 0.00012550000000000001, 'samples': 8048, 'steps': 502, 'loss/train': 9.78219223022461} +01/22/2022 20:58:51 - INFO - codeparrot_training - Step 504: {'lr': 0.00012575, 'samples': 8064, 'steps': 503, 'loss/train': 8.0342435836792} +01/22/2022 20:58:51 - INFO - codeparrot_training - Step 505: {'lr': 0.000126, 'samples': 8080, 'steps': 504, 'loss/train': 11.44943618774414} +01/22/2022 20:58:52 - INFO - codeparrot_training - Step 506: {'lr': 0.00012625, 'samples': 8096, 'steps': 505, 'loss/train': 8.372631072998047} +01/22/2022 20:58:52 - INFO - codeparrot_training - Step 507: {'lr': 0.0001265, 'samples': 8112, 'steps': 506, 'loss/train': 9.478333473205566} +01/22/2022 20:58:52 - INFO - codeparrot_training - Step 508: {'lr': 0.00012675, 'samples': 8128, 'steps': 507, 'loss/train': 9.39074993133545} +01/22/2022 20:58:53 - INFO - codeparrot_training - Step 509: {'lr': 0.000127, 'samples': 8144, 'steps': 508, 'loss/train': 8.292672157287598} +01/22/2022 20:58:53 - INFO - codeparrot_training - Step 510: {'lr': 0.00012725, 'samples': 8160, 'steps': 509, 'loss/train': 8.878990173339844} +01/22/2022 20:58:53 - INFO - codeparrot_training - Step 511: {'lr': 0.0001275, 'samples': 8176, 'steps': 510, 'loss/train': 8.554393768310547} +01/22/2022 20:58:54 - INFO - codeparrot_training - Step 512: {'lr': 0.00012775000000000002, 'samples': 8192, 'steps': 511, 'loss/train': 9.083661079406738} +01/22/2022 20:58:54 - INFO - codeparrot_training - Step 513: {'lr': 0.000128, 'samples': 8208, 'steps': 512, 'loss/train': 9.548246383666992} +01/22/2022 20:58:54 - INFO - codeparrot_training - Step 514: {'lr': 0.00012825, 'samples': 8224, 'steps': 513, 'loss/train': 10.084650993347168} +01/22/2022 20:58:55 - INFO - codeparrot_training - Step 515: {'lr': 0.0001285, 'samples': 8240, 'steps': 514, 'loss/train': 9.11759090423584} +01/22/2022 20:58:55 - INFO - codeparrot_training - Step 516: {'lr': 0.00012875, 'samples': 8256, 'steps': 515, 'loss/train': 9.433667182922363} +01/22/2022 20:58:55 - INFO - codeparrot_training - Step 517: {'lr': 0.00012900000000000002, 'samples': 8272, 'steps': 516, 'loss/train': 9.758384704589844} +01/22/2022 20:58:56 - INFO - codeparrot_training - Step 518: {'lr': 0.00012925, 'samples': 8288, 'steps': 517, 'loss/train': 8.889694213867188} +01/22/2022 20:58:56 - INFO - codeparrot_training - Step 519: {'lr': 0.0001295, 'samples': 8304, 'steps': 518, 'loss/train': 8.452557563781738} +01/22/2022 20:58:56 - INFO - codeparrot_training - Step 520: {'lr': 0.00012975, 'samples': 8320, 'steps': 519, 'loss/train': 7.942901134490967} +01/22/2022 20:58:57 - INFO - codeparrot_training - Step 521: {'lr': 0.00013000000000000002, 'samples': 8336, 'steps': 520, 'loss/train': 9.095209121704102} +01/22/2022 20:58:57 - INFO - codeparrot_training - Step 522: {'lr': 0.00013025, 'samples': 8352, 'steps': 521, 'loss/train': 8.852845191955566} +01/22/2022 20:58:58 - INFO - codeparrot_training - Step 523: {'lr': 0.0001305, 'samples': 8368, 'steps': 522, 'loss/train': 9.38457202911377} +01/22/2022 20:58:58 - INFO - codeparrot_training - Step 524: {'lr': 0.00013075, 'samples': 8384, 'steps': 523, 'loss/train': 8.530694007873535} +01/22/2022 20:58:58 - INFO - codeparrot_training - Step 525: {'lr': 0.000131, 'samples': 8400, 'steps': 524, 'loss/train': 8.916065216064453} +01/22/2022 20:58:59 - INFO - codeparrot_training - Step 526: {'lr': 0.00013125000000000002, 'samples': 8416, 'steps': 525, 'loss/train': 10.07585334777832} +01/22/2022 20:58:59 - INFO - codeparrot_training - Step 527: {'lr': 0.0001315, 'samples': 8432, 'steps': 526, 'loss/train': 8.909978866577148} +01/22/2022 20:58:59 - INFO - codeparrot_training - Step 528: {'lr': 0.00013175, 'samples': 8448, 'steps': 527, 'loss/train': 8.548264503479004} +01/22/2022 20:59:00 - INFO - codeparrot_training - Step 529: {'lr': 0.000132, 'samples': 8464, 'steps': 528, 'loss/train': 8.883501052856445} +01/22/2022 20:59:00 - INFO - codeparrot_training - Step 530: {'lr': 0.00013225000000000002, 'samples': 8480, 'steps': 529, 'loss/train': 9.170668601989746} +01/22/2022 20:59:00 - INFO - codeparrot_training - Step 531: {'lr': 0.00013250000000000002, 'samples': 8496, 'steps': 530, 'loss/train': 9.794912338256836} +01/22/2022 20:59:01 - INFO - codeparrot_training - Step 532: {'lr': 0.00013275, 'samples': 8512, 'steps': 531, 'loss/train': 10.07598876953125} +01/22/2022 20:59:01 - INFO - codeparrot_training - Step 533: {'lr': 0.000133, 'samples': 8528, 'steps': 532, 'loss/train': 9.025663375854492} +01/22/2022 20:59:01 - INFO - codeparrot_training - Step 534: {'lr': 0.00013325, 'samples': 8544, 'steps': 533, 'loss/train': 8.82016658782959} +01/22/2022 20:59:02 - INFO - codeparrot_training - Step 535: {'lr': 0.00013350000000000002, 'samples': 8560, 'steps': 534, 'loss/train': 9.815396308898926} +01/22/2022 20:59:02 - INFO - codeparrot_training - Step 536: {'lr': 0.00013375, 'samples': 8576, 'steps': 535, 'loss/train': 8.980450630187988} +01/22/2022 20:59:02 - INFO - codeparrot_training - Step 537: {'lr': 0.000134, 'samples': 8592, 'steps': 536, 'loss/train': 8.121761322021484} +01/22/2022 20:59:03 - INFO - codeparrot_training - Step 538: {'lr': 0.00013425, 'samples': 8608, 'steps': 537, 'loss/train': 9.149971961975098} +01/22/2022 20:59:04 - INFO - codeparrot_training - Step 539: {'lr': 0.00013450000000000002, 'samples': 8624, 'steps': 538, 'loss/train': 9.049347877502441} +01/22/2022 20:59:04 - INFO - codeparrot_training - Step 540: {'lr': 0.00013475000000000002, 'samples': 8640, 'steps': 539, 'loss/train': 8.695594787597656} +01/22/2022 20:59:04 - INFO - codeparrot_training - Step 541: {'lr': 0.000135, 'samples': 8656, 'steps': 540, 'loss/train': 8.62574291229248} +01/22/2022 20:59:05 - INFO - codeparrot_training - Step 542: {'lr': 0.00013525, 'samples': 8672, 'steps': 541, 'loss/train': 8.708123207092285} +01/22/2022 20:59:05 - INFO - codeparrot_training - Step 543: {'lr': 0.00013550000000000001, 'samples': 8688, 'steps': 542, 'loss/train': 8.998844146728516} +01/22/2022 20:59:05 - INFO - codeparrot_training - Step 544: {'lr': 0.00013575000000000002, 'samples': 8704, 'steps': 543, 'loss/train': 6.9779438972473145} +01/22/2022 20:59:06 - INFO - codeparrot_training - Step 545: {'lr': 0.00013600000000000003, 'samples': 8720, 'steps': 544, 'loss/train': 9.690673828125} +01/22/2022 20:59:06 - INFO - codeparrot_training - Step 546: {'lr': 0.00013625, 'samples': 8736, 'steps': 545, 'loss/train': 8.808652877807617} +01/22/2022 21:00:46 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 21:00:46 - WARNING - huggingface_hub.repository - Revision `crisp-armadillo-2` does not exist. Created and checked out branch `crisp-armadillo-2`. +01/22/2022 21:00:46 - WARNING - huggingface_hub.repository - +01/22/2022 21:00:58 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 21:00:59 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 21:02:27 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 21:02:28 - WARNING - huggingface_hub.repository - Revision `vocal-serenity-3` does not exist. Created and checked out branch `vocal-serenity-3`. +01/22/2022 21:02:28 - WARNING - huggingface_hub.repository - +01/22/2022 21:02:40 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 21:02:41 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 21:03:51 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 21:03:51 - WARNING - huggingface_hub.repository - Revision `clear-breeze-4` does not exist. Created and checked out branch `clear-breeze-4`. +01/22/2022 21:03:51 - WARNING - huggingface_hub.repository - +01/22/2022 21:04:04 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 21:04:05 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 21:04:26 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 12.383767127990723} +01/22/2022 22:30:20 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 22:30:20 - WARNING - huggingface_hub.repository - Revision `devoted-gorge-5` does not exist. Created and checked out branch `devoted-gorge-5`. +01/22/2022 22:30:20 - WARNING - huggingface_hub.repository - +01/22/2022 22:30:41 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 22:30:42 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 22:31:06 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 12.383767127990723} +01/22/2022 22:32:39 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 22:32:39 - WARNING - huggingface_hub.repository - Revision `dauntless-spaceship-6` does not exist. Created and checked out branch `dauntless-spaceship-6`. +01/22/2022 22:32:39 - WARNING - huggingface_hub.repository - +01/22/2022 22:32:51 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 22:32:53 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 22:33:13 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 12.39226245880127} +01/22/2022 22:33:27 - INFO - codeparrot_training - Step 2: {'lr': 0.0, 'samples': 64, 'steps': 0, 'loss/train': 12.35910415649414} +01/22/2022 22:33:39 - INFO - codeparrot_training - Step 3: {'lr': 0.0, 'samples': 96, 'steps': 0, 'loss/train': 12.36422061920166} +01/22/2022 22:33:51 - INFO - codeparrot_training - Step 4: {'lr': 0.0, 'samples': 128, 'steps': 0, 'loss/train': 12.400172233581543} +01/22/2022 22:35:09 - INFO - codeparrot_training - Step 5: {'lr': 2.5e-07, 'samples': 160, 'steps': 1, 'loss/train': 12.3590669631958} +01/22/2022 22:35:10 - INFO - codeparrot_training - Step 6: {'lr': 2.5e-07, 'samples': 192, 'steps': 1, 'loss/train': 12.361250877380371} +01/22/2022 22:35:10 - INFO - codeparrot_training - Step 7: {'lr': 2.5e-07, 'samples': 224, 'steps': 1, 'loss/train': 12.339007377624512} +01/22/2022 22:35:11 - INFO - codeparrot_training - Step 8: {'lr': 2.5e-07, 'samples': 256, 'steps': 1, 'loss/train': 12.382378578186035} +01/22/2022 22:37:22 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/22/2022 22:37:23 - WARNING - huggingface_hub.repository - Revision `lilac-plant-7` does not exist. Created and checked out branch `lilac-plant-7`. +01/22/2022 22:37:23 - WARNING - huggingface_hub.repository - +01/22/2022 22:37:35 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/22/2022 22:37:36 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/22/2022 22:37:57 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 12.39226245880127} +01/22/2022 22:39:25 - INFO - codeparrot_training - Step 2: {'lr': 2.5e-07, 'samples': 64, 'steps': 1, 'loss/train': 12.35910415649414} +01/22/2022 22:40:47 - INFO - codeparrot_training - Step 3: {'lr': 5e-07, 'samples': 96, 'steps': 2, 'loss/train': 12.353788375854492} +01/22/2022 22:40:48 - INFO - codeparrot_training - Step 4: {'lr': 7.5e-07, 'samples': 128, 'steps': 3, 'loss/train': 12.361062049865723} +01/22/2022 22:40:48 - INFO - codeparrot_training - Step 5: {'lr': 1e-06, 'samples': 160, 'steps': 4, 'loss/train': 12.307576179504395} +01/22/2022 22:40:49 - INFO - codeparrot_training - Step 6: {'lr': 1.25e-06, 'samples': 192, 'steps': 5, 'loss/train': 12.199652671813965} +01/22/2022 22:40:50 - INFO - codeparrot_training - Step 7: {'lr': 1.5e-06, 'samples': 224, 'steps': 6, 'loss/train': 12.202125549316406} +01/22/2022 22:40:50 - INFO - codeparrot_training - Step 8: {'lr': 1.75e-06, 'samples': 256, 'steps': 7, 'loss/train': 12.178979873657227} +01/22/2022 22:40:51 - INFO - codeparrot_training - Step 9: {'lr': 2e-06, 'samples': 288, 'steps': 8, 'loss/train': 11.944904327392578} +01/22/2022 22:40:51 - INFO - codeparrot_training - Step 10: {'lr': 2.25e-06, 'samples': 320, 'steps': 9, 'loss/train': 11.830462455749512} +01/22/2022 22:40:52 - INFO - codeparrot_training - Step 11: {'lr': 2.5e-06, 'samples': 352, 'steps': 10, 'loss/train': 12.110416412353516} +01/22/2022 22:40:52 - INFO - codeparrot_training - Step 12: {'lr': 2.75e-06, 'samples': 384, 'steps': 11, 'loss/train': 11.714008331298828} +01/22/2022 22:40:53 - INFO - codeparrot_training - Step 13: {'lr': 3e-06, 'samples': 416, 'steps': 12, 'loss/train': 11.789088249206543} +01/22/2022 22:40:54 - INFO - codeparrot_training - Step 14: {'lr': 3.25e-06, 'samples': 448, 'steps': 13, 'loss/train': 11.538559913635254} +01/22/2022 22:40:54 - INFO - codeparrot_training - Step 15: {'lr': 3.5e-06, 'samples': 480, 'steps': 14, 'loss/train': 11.503335952758789} +01/22/2022 22:40:55 - INFO - codeparrot_training - Step 16: {'lr': 3.75e-06, 'samples': 512, 'steps': 15, 'loss/train': 11.795683860778809} +01/22/2022 22:40:55 - INFO - codeparrot_training - Step 17: {'lr': 4e-06, 'samples': 544, 'steps': 16, 'loss/train': 11.00994873046875} +01/22/2022 22:40:56 - INFO - codeparrot_training - Step 18: {'lr': 4.250000000000001e-06, 'samples': 576, 'steps': 17, 'loss/train': 11.134169578552246} +01/22/2022 22:40:56 - INFO - codeparrot_training - Step 19: {'lr': 4.5e-06, 'samples': 608, 'steps': 18, 'loss/train': 11.032825469970703} +01/22/2022 22:40:57 - INFO - codeparrot_training - Step 20: {'lr': 4.75e-06, 'samples': 640, 'steps': 19, 'loss/train': 11.375362396240234} +01/22/2022 22:40:58 - INFO - codeparrot_training - Step 21: {'lr': 5e-06, 'samples': 672, 'steps': 20, 'loss/train': 11.258044242858887} +01/22/2022 22:40:58 - INFO - codeparrot_training - Step 22: {'lr': 5.2500000000000006e-06, 'samples': 704, 'steps': 21, 'loss/train': 10.208221435546875} +01/22/2022 22:40:59 - INFO - codeparrot_training - Step 23: {'lr': 5.5e-06, 'samples': 736, 'steps': 22, 'loss/train': 10.191875457763672} +01/22/2022 22:40:59 - INFO - codeparrot_training - Step 24: {'lr': 5.75e-06, 'samples': 768, 'steps': 23, 'loss/train': 11.293356895446777} +01/22/2022 22:41:00 - INFO - codeparrot_training - Step 25: {'lr': 6e-06, 'samples': 800, 'steps': 24, 'loss/train': 11.456777572631836} +01/22/2022 22:41:00 - INFO - codeparrot_training - Step 26: {'lr': 6.25e-06, 'samples': 832, 'steps': 25, 'loss/train': 11.048440933227539} +01/22/2022 22:41:01 - INFO - codeparrot_training - Step 27: {'lr': 6.5e-06, 'samples': 864, 'steps': 26, 'loss/train': 10.786277770996094} +01/22/2022 22:41:01 - INFO - codeparrot_training - Step 28: {'lr': 6.75e-06, 'samples': 896, 'steps': 27, 'loss/train': 11.196805000305176} +01/22/2022 22:41:02 - INFO - codeparrot_training - Step 29: {'lr': 7e-06, 'samples': 928, 'steps': 28, 'loss/train': 11.152705192565918} +01/22/2022 22:41:03 - INFO - codeparrot_training - Step 30: {'lr': 7.250000000000001e-06, 'samples': 960, 'steps': 29, 'loss/train': 10.213715553283691} +01/22/2022 22:41:04 - INFO - codeparrot_training - Step 31: {'lr': 7.5e-06, 'samples': 992, 'steps': 30, 'loss/train': 10.594708442687988} +01/22/2022 22:41:04 - INFO - codeparrot_training - Step 32: {'lr': 7.75e-06, 'samples': 1024, 'steps': 31, 'loss/train': 11.169306755065918} +01/22/2022 22:41:05 - INFO - codeparrot_training - Step 33: {'lr': 8e-06, 'samples': 1056, 'steps': 32, 'loss/train': 11.021336555480957} +01/22/2022 22:41:06 - INFO - codeparrot_training - Step 34: {'lr': 8.25e-06, 'samples': 1088, 'steps': 33, 'loss/train': 10.517526626586914} +01/22/2022 22:41:06 - INFO - codeparrot_training - Step 35: {'lr': 8.500000000000002e-06, 'samples': 1120, 'steps': 34, 'loss/train': 10.871438980102539} +01/22/2022 22:41:07 - INFO - codeparrot_training - Step 36: {'lr': 8.750000000000001e-06, 'samples': 1152, 'steps': 35, 'loss/train': 10.259847640991211} +01/22/2022 22:41:08 - INFO - codeparrot_training - Step 37: {'lr': 9e-06, 'samples': 1184, 'steps': 36, 'loss/train': 10.787789344787598} +01/22/2022 22:41:08 - INFO - codeparrot_training - Step 38: {'lr': 9.25e-06, 'samples': 1216, 'steps': 37, 'loss/train': 10.98786735534668} +01/22/2022 22:41:09 - INFO - codeparrot_training - Step 39: {'lr': 9.5e-06, 'samples': 1248, 'steps': 38, 'loss/train': 10.568509101867676} +01/22/2022 22:41:09 - INFO - codeparrot_training - Step 40: {'lr': 9.75e-06, 'samples': 1280, 'steps': 39, 'loss/train': 10.781625747680664} +01/22/2022 22:41:10 - INFO - codeparrot_training - Step 41: {'lr': 1e-05, 'samples': 1312, 'steps': 40, 'loss/train': 9.999937057495117} +01/22/2022 22:41:10 - INFO - codeparrot_training - Step 42: {'lr': 1.025e-05, 'samples': 1344, 'steps': 41, 'loss/train': 10.516951560974121} +01/22/2022 22:41:11 - INFO - codeparrot_training - Step 43: {'lr': 1.0500000000000001e-05, 'samples': 1376, 'steps': 42, 'loss/train': 11.459147453308105} +01/22/2022 22:41:12 - INFO - codeparrot_training - Step 44: {'lr': 1.0749999999999999e-05, 'samples': 1408, 'steps': 43, 'loss/train': 10.771370887756348} +01/22/2022 22:41:12 - INFO - codeparrot_training - Step 45: {'lr': 1.1e-05, 'samples': 1440, 'steps': 44, 'loss/train': 10.808180809020996} +01/22/2022 22:41:13 - INFO - codeparrot_training - Step 46: {'lr': 1.1249999999999999e-05, 'samples': 1472, 'steps': 45, 'loss/train': 10.45852279663086} +01/22/2022 22:41:13 - INFO - codeparrot_training - Step 47: {'lr': 1.15e-05, 'samples': 1504, 'steps': 46, 'loss/train': 11.133156776428223} +01/22/2022 22:41:14 - INFO - codeparrot_training - Step 48: {'lr': 1.1750000000000001e-05, 'samples': 1536, 'steps': 47, 'loss/train': 10.99738597869873} +01/22/2022 22:41:14 - INFO - codeparrot_training - Step 49: {'lr': 1.2e-05, 'samples': 1568, 'steps': 48, 'loss/train': 11.270173072814941} +01/22/2022 22:41:15 - INFO - codeparrot_training - Step 50: {'lr': 1.2250000000000001e-05, 'samples': 1600, 'steps': 49, 'loss/train': 10.836322784423828} +01/22/2022 22:41:15 - INFO - codeparrot_training - Step 51: {'lr': 1.25e-05, 'samples': 1632, 'steps': 50, 'loss/train': 11.053540229797363} +01/22/2022 22:41:16 - INFO - codeparrot_training - Step 52: {'lr': 1.275e-05, 'samples': 1664, 'steps': 51, 'loss/train': 11.137327194213867} +01/22/2022 22:41:17 - INFO - codeparrot_training - Step 53: {'lr': 1.3e-05, 'samples': 1696, 'steps': 52, 'loss/train': 10.60883617401123} +01/22/2022 22:41:17 - INFO - codeparrot_training - Step 54: {'lr': 1.325e-05, 'samples': 1728, 'steps': 53, 'loss/train': 10.501367568969727} +01/22/2022 22:41:18 - INFO - codeparrot_training - Step 55: {'lr': 1.35e-05, 'samples': 1760, 'steps': 54, 'loss/train': 10.589244842529297} +01/22/2022 22:41:18 - INFO - codeparrot_training - Step 56: {'lr': 1.375e-05, 'samples': 1792, 'steps': 55, 'loss/train': 10.817575454711914} +01/22/2022 22:41:19 - INFO - codeparrot_training - Step 57: {'lr': 1.4e-05, 'samples': 1824, 'steps': 56, 'loss/train': 11.04649829864502} +01/22/2022 22:41:19 - INFO - codeparrot_training - Step 58: {'lr': 1.425e-05, 'samples': 1856, 'steps': 57, 'loss/train': 10.681659698486328} +01/22/2022 22:41:20 - INFO - codeparrot_training - Step 59: {'lr': 1.4500000000000002e-05, 'samples': 1888, 'steps': 58, 'loss/train': 10.836919784545898} +01/22/2022 22:41:22 - INFO - codeparrot_training - Step 60: {'lr': 1.475e-05, 'samples': 1920, 'steps': 59, 'loss/train': 10.63708782196045} +01/22/2022 22:41:23 - INFO - codeparrot_training - Step 61: {'lr': 1.5e-05, 'samples': 1952, 'steps': 60, 'loss/train': 10.799856185913086} +01/22/2022 22:41:23 - INFO - codeparrot_training - Step 62: {'lr': 1.525e-05, 'samples': 1984, 'steps': 61, 'loss/train': 10.998774528503418} +01/22/2022 22:41:24 - INFO - codeparrot_training - Step 63: {'lr': 1.55e-05, 'samples': 2016, 'steps': 62, 'loss/train': 11.037872314453125} +01/22/2022 22:41:24 - INFO - codeparrot_training - Step 64: {'lr': 1.575e-05, 'samples': 2048, 'steps': 63, 'loss/train': 10.457962036132812} +01/22/2022 22:41:25 - INFO - codeparrot_training - Step 65: {'lr': 1.6e-05, 'samples': 2080, 'steps': 64, 'loss/train': 10.65717887878418} +01/22/2022 22:41:26 - INFO - codeparrot_training - Step 66: {'lr': 1.6250000000000002e-05, 'samples': 2112, 'steps': 65, 'loss/train': 10.757637977600098} +01/22/2022 22:41:26 - INFO - codeparrot_training - Step 67: {'lr': 1.65e-05, 'samples': 2144, 'steps': 66, 'loss/train': 10.233272552490234} +01/22/2022 22:41:27 - INFO - codeparrot_training - Step 68: {'lr': 1.675e-05, 'samples': 2176, 'steps': 67, 'loss/train': 10.95653247833252} +01/22/2022 22:41:27 - INFO - codeparrot_training - Step 69: {'lr': 1.7000000000000003e-05, 'samples': 2208, 'steps': 68, 'loss/train': 10.763069152832031} +01/22/2022 22:41:28 - INFO - codeparrot_training - Step 70: {'lr': 1.7250000000000003e-05, 'samples': 2240, 'steps': 69, 'loss/train': 10.882318496704102} +01/22/2022 22:41:28 - INFO - codeparrot_training - Step 71: {'lr': 1.7500000000000002e-05, 'samples': 2272, 'steps': 70, 'loss/train': 10.776512145996094} +01/22/2022 22:41:29 - INFO - codeparrot_training - Step 72: {'lr': 1.7749999999999998e-05, 'samples': 2304, 'steps': 71, 'loss/train': 10.750555992126465} +01/22/2022 22:41:30 - INFO - codeparrot_training - Step 73: {'lr': 1.8e-05, 'samples': 2336, 'steps': 72, 'loss/train': 10.306445121765137} +01/22/2022 22:41:30 - INFO - codeparrot_training - Step 74: {'lr': 1.825e-05, 'samples': 2368, 'steps': 73, 'loss/train': 9.901557922363281} +01/22/2022 22:41:31 - INFO - codeparrot_training - Step 75: {'lr': 1.85e-05, 'samples': 2400, 'steps': 74, 'loss/train': 10.831633567810059} +01/22/2022 22:41:31 - INFO - codeparrot_training - Step 76: {'lr': 1.875e-05, 'samples': 2432, 'steps': 75, 'loss/train': 9.355510711669922} +01/22/2022 22:41:32 - INFO - codeparrot_training - Step 77: {'lr': 1.9e-05, 'samples': 2464, 'steps': 76, 'loss/train': 11.28085994720459} +01/22/2022 22:41:32 - INFO - codeparrot_training - Step 78: {'lr': 1.925e-05, 'samples': 2496, 'steps': 77, 'loss/train': 10.275238037109375} +01/22/2022 22:41:33 - INFO - codeparrot_training - Step 79: {'lr': 1.95e-05, 'samples': 2528, 'steps': 78, 'loss/train': 10.661409378051758} +01/22/2022 22:41:33 - INFO - codeparrot_training - Step 80: {'lr': 1.975e-05, 'samples': 2560, 'steps': 79, 'loss/train': 10.607142448425293} +01/22/2022 22:41:34 - INFO - codeparrot_training - Step 81: {'lr': 2e-05, 'samples': 2592, 'steps': 80, 'loss/train': 10.20798397064209} +01/22/2022 22:41:35 - INFO - codeparrot_training - Step 82: {'lr': 2.025e-05, 'samples': 2624, 'steps': 81, 'loss/train': 10.202529907226562} +01/22/2022 22:41:35 - INFO - codeparrot_training - Step 83: {'lr': 2.05e-05, 'samples': 2656, 'steps': 82, 'loss/train': 10.082209587097168} +01/22/2022 22:41:36 - INFO - codeparrot_training - Step 84: {'lr': 2.0750000000000003e-05, 'samples': 2688, 'steps': 83, 'loss/train': 10.3154935836792} +01/22/2022 22:41:36 - INFO - codeparrot_training - Step 85: {'lr': 2.1000000000000002e-05, 'samples': 2720, 'steps': 84, 'loss/train': 11.072750091552734} +01/22/2022 22:41:37 - INFO - codeparrot_training - Step 86: {'lr': 2.125e-05, 'samples': 2752, 'steps': 85, 'loss/train': 10.945484161376953} +01/22/2022 22:41:37 - INFO - codeparrot_training - Step 87: {'lr': 2.1499999999999997e-05, 'samples': 2784, 'steps': 86, 'loss/train': 10.496516227722168} +01/22/2022 22:41:38 - INFO - codeparrot_training - Step 88: {'lr': 2.175e-05, 'samples': 2816, 'steps': 87, 'loss/train': 10.449604034423828} +01/22/2022 22:41:39 - INFO - codeparrot_training - Step 89: {'lr': 2.2e-05, 'samples': 2848, 'steps': 88, 'loss/train': 11.252097129821777} +01/22/2022 22:41:40 - INFO - codeparrot_training - Step 90: {'lr': 2.225e-05, 'samples': 2880, 'steps': 89, 'loss/train': 10.560819625854492} +01/22/2022 22:41:40 - INFO - codeparrot_training - Step 91: {'lr': 2.2499999999999998e-05, 'samples': 2912, 'steps': 90, 'loss/train': 10.938161849975586} +01/22/2022 22:41:41 - INFO - codeparrot_training - Step 92: {'lr': 2.275e-05, 'samples': 2944, 'steps': 91, 'loss/train': 10.531072616577148} +01/22/2022 22:41:41 - INFO - codeparrot_training - Step 93: {'lr': 2.3e-05, 'samples': 2976, 'steps': 92, 'loss/train': 10.76926326751709} +01/22/2022 22:41:42 - INFO - codeparrot_training - Step 94: {'lr': 2.325e-05, 'samples': 3008, 'steps': 93, 'loss/train': 10.426630973815918} +01/22/2022 22:41:43 - INFO - codeparrot_training - Step 95: {'lr': 2.3500000000000002e-05, 'samples': 3040, 'steps': 94, 'loss/train': 10.335378646850586} +01/22/2022 22:41:43 - INFO - codeparrot_training - Step 96: {'lr': 2.375e-05, 'samples': 3072, 'steps': 95, 'loss/train': 10.610383033752441} +01/22/2022 22:41:44 - INFO - codeparrot_training - Step 97: {'lr': 2.4e-05, 'samples': 3104, 'steps': 96, 'loss/train': 10.274197578430176} +01/22/2022 22:41:44 - INFO - codeparrot_training - Step 98: {'lr': 2.425e-05, 'samples': 3136, 'steps': 97, 'loss/train': 10.167412757873535} +01/22/2022 22:41:45 - INFO - codeparrot_training - Step 99: {'lr': 2.4500000000000003e-05, 'samples': 3168, 'steps': 98, 'loss/train': 10.483407974243164} +01/22/2022 22:41:45 - INFO - codeparrot_training - Step 100: {'lr': 2.4750000000000002e-05, 'samples': 3200, 'steps': 99, 'loss/train': 9.65080451965332} +01/22/2022 22:41:46 - INFO - codeparrot_training - Step 101: {'lr': 2.5e-05, 'samples': 3232, 'steps': 100, 'loss/train': 10.045638084411621} +01/22/2022 22:41:47 - INFO - codeparrot_training - Step 102: {'lr': 2.525e-05, 'samples': 3264, 'steps': 101, 'loss/train': 10.382650375366211} +01/22/2022 22:41:47 - INFO - codeparrot_training - Step 103: {'lr': 2.55e-05, 'samples': 3296, 'steps': 102, 'loss/train': 9.568435668945312} +01/22/2022 22:41:48 - INFO - codeparrot_training - Step 104: {'lr': 2.575e-05, 'samples': 3328, 'steps': 103, 'loss/train': 10.018266677856445} +01/22/2022 22:41:48 - INFO - codeparrot_training - Step 105: {'lr': 2.6e-05, 'samples': 3360, 'steps': 104, 'loss/train': 10.176482200622559} +01/22/2022 22:41:49 - INFO - codeparrot_training - Step 106: {'lr': 2.625e-05, 'samples': 3392, 'steps': 105, 'loss/train': 10.556117057800293} +01/22/2022 22:41:49 - INFO - codeparrot_training - Step 107: {'lr': 2.65e-05, 'samples': 3424, 'steps': 106, 'loss/train': 9.766399383544922} +01/22/2022 22:41:50 - INFO - codeparrot_training - Step 108: {'lr': 2.675e-05, 'samples': 3456, 'steps': 107, 'loss/train': 10.505266189575195} +01/22/2022 22:41:50 - INFO - codeparrot_training - Step 109: {'lr': 2.7e-05, 'samples': 3488, 'steps': 108, 'loss/train': 10.298112869262695} +01/22/2022 22:41:51 - INFO - codeparrot_training - Step 110: {'lr': 2.725e-05, 'samples': 3520, 'steps': 109, 'loss/train': 10.500211715698242} +01/22/2022 22:41:52 - INFO - codeparrot_training - Step 111: {'lr': 2.75e-05, 'samples': 3552, 'steps': 110, 'loss/train': 9.788132667541504} +01/22/2022 22:41:52 - INFO - codeparrot_training - Step 112: {'lr': 2.775e-05, 'samples': 3584, 'steps': 111, 'loss/train': 9.898433685302734} +01/22/2022 22:41:53 - INFO - codeparrot_training - Step 113: {'lr': 2.8e-05, 'samples': 3616, 'steps': 112, 'loss/train': 9.982443809509277} +01/22/2022 22:41:53 - INFO - codeparrot_training - Step 114: {'lr': 2.8250000000000002e-05, 'samples': 3648, 'steps': 113, 'loss/train': 10.91128921508789} +01/22/2022 22:41:54 - INFO - codeparrot_training - Step 115: {'lr': 2.85e-05, 'samples': 3680, 'steps': 114, 'loss/train': 10.188284873962402} +01/22/2022 22:41:54 - INFO - codeparrot_training - Step 116: {'lr': 2.875e-05, 'samples': 3712, 'steps': 115, 'loss/train': 9.392685890197754} +01/22/2022 22:41:55 - INFO - codeparrot_training - Step 117: {'lr': 2.9000000000000004e-05, 'samples': 3744, 'steps': 116, 'loss/train': 10.309635162353516} +01/22/2022 22:41:56 - INFO - codeparrot_training - Step 118: {'lr': 2.9250000000000003e-05, 'samples': 3776, 'steps': 117, 'loss/train': 10.19661808013916} +01/22/2022 22:41:56 - INFO - codeparrot_training - Step 119: {'lr': 2.95e-05, 'samples': 3808, 'steps': 118, 'loss/train': 9.877015113830566} +01/22/2022 22:41:57 - INFO - codeparrot_training - Step 120: {'lr': 2.9749999999999998e-05, 'samples': 3840, 'steps': 119, 'loss/train': 9.299678802490234} +01/22/2022 22:41:58 - INFO - codeparrot_training - Step 121: {'lr': 3e-05, 'samples': 3872, 'steps': 120, 'loss/train': 10.295487403869629} +01/22/2022 22:41:59 - INFO - codeparrot_training - Step 122: {'lr': 3.025e-05, 'samples': 3904, 'steps': 121, 'loss/train': 9.334184646606445} +01/22/2022 22:41:59 - INFO - codeparrot_training - Step 123: {'lr': 3.05e-05, 'samples': 3936, 'steps': 122, 'loss/train': 9.916821479797363} +01/22/2022 22:42:00 - INFO - codeparrot_training - Step 124: {'lr': 3.075e-05, 'samples': 3968, 'steps': 123, 'loss/train': 9.585711479187012} +01/22/2022 22:42:00 - INFO - codeparrot_training - Step 125: {'lr': 3.1e-05, 'samples': 4000, 'steps': 124, 'loss/train': 9.624146461486816} +01/22/2022 22:42:01 - INFO - codeparrot_training - Step 126: {'lr': 3.125e-05, 'samples': 4032, 'steps': 125, 'loss/train': 10.085657119750977} +01/22/2022 22:42:01 - INFO - codeparrot_training - Step 127: {'lr': 3.15e-05, 'samples': 4064, 'steps': 126, 'loss/train': 10.520400047302246} +01/22/2022 22:42:02 - INFO - codeparrot_training - Step 128: {'lr': 3.175e-05, 'samples': 4096, 'steps': 127, 'loss/train': 9.151983261108398} +01/22/2022 22:42:02 - INFO - codeparrot_training - Step 129: {'lr': 3.2e-05, 'samples': 4128, 'steps': 128, 'loss/train': 9.880334854125977} +01/22/2022 22:42:03 - INFO - codeparrot_training - Step 130: {'lr': 3.2250000000000005e-05, 'samples': 4160, 'steps': 129, 'loss/train': 10.26987075805664} +01/22/2022 22:42:04 - INFO - codeparrot_training - Step 131: {'lr': 3.2500000000000004e-05, 'samples': 4192, 'steps': 130, 'loss/train': 9.659605026245117} +01/22/2022 22:42:04 - INFO - codeparrot_training - Step 132: {'lr': 3.275e-05, 'samples': 4224, 'steps': 131, 'loss/train': 9.344758033752441} +01/22/2022 22:42:05 - INFO - codeparrot_training - Step 133: {'lr': 3.3e-05, 'samples': 4256, 'steps': 132, 'loss/train': 9.500842094421387} +01/22/2022 22:42:05 - INFO - codeparrot_training - Step 134: {'lr': 3.325e-05, 'samples': 4288, 'steps': 133, 'loss/train': 9.173142433166504} +01/22/2022 22:42:06 - INFO - codeparrot_training - Step 135: {'lr': 3.35e-05, 'samples': 4320, 'steps': 134, 'loss/train': 9.618180274963379} +01/22/2022 22:42:06 - INFO - codeparrot_training - Step 136: {'lr': 3.375e-05, 'samples': 4352, 'steps': 135, 'loss/train': 10.489212036132812} +01/22/2022 22:42:07 - INFO - codeparrot_training - Step 137: {'lr': 3.4000000000000007e-05, 'samples': 4384, 'steps': 136, 'loss/train': 9.744336128234863} +01/22/2022 22:42:07 - INFO - codeparrot_training - Step 138: {'lr': 3.4250000000000006e-05, 'samples': 4416, 'steps': 137, 'loss/train': 9.300286293029785} +01/22/2022 22:42:08 - INFO - codeparrot_training - Step 139: {'lr': 3.4500000000000005e-05, 'samples': 4448, 'steps': 138, 'loss/train': 10.24305248260498} +01/22/2022 22:42:09 - INFO - codeparrot_training - Step 140: {'lr': 3.4750000000000004e-05, 'samples': 4480, 'steps': 139, 'loss/train': 10.561542510986328} +01/22/2022 22:42:09 - INFO - codeparrot_training - Step 141: {'lr': 3.5000000000000004e-05, 'samples': 4512, 'steps': 140, 'loss/train': 9.80003833770752} +01/22/2022 22:42:10 - INFO - codeparrot_training - Step 142: {'lr': 3.5249999999999996e-05, 'samples': 4544, 'steps': 141, 'loss/train': 9.553709983825684} +01/22/2022 22:42:10 - INFO - codeparrot_training - Step 143: {'lr': 3.5499999999999996e-05, 'samples': 4576, 'steps': 142, 'loss/train': 8.964605331420898} +01/22/2022 22:42:11 - INFO - codeparrot_training - Step 144: {'lr': 3.5749999999999995e-05, 'samples': 4608, 'steps': 143, 'loss/train': 10.302897453308105} +01/22/2022 22:42:11 - INFO - codeparrot_training - Step 145: {'lr': 3.6e-05, 'samples': 4640, 'steps': 144, 'loss/train': 8.568774223327637} +01/22/2022 22:42:12 - INFO - codeparrot_training - Step 146: {'lr': 3.625e-05, 'samples': 4672, 'steps': 145, 'loss/train': 10.322160720825195} +01/22/2022 22:42:12 - INFO - codeparrot_training - Step 147: {'lr': 3.65e-05, 'samples': 4704, 'steps': 146, 'loss/train': 9.181485176086426} +01/22/2022 22:42:13 - INFO - codeparrot_training - Step 148: {'lr': 3.675e-05, 'samples': 4736, 'steps': 147, 'loss/train': 10.00892162322998} +01/22/2022 22:42:14 - INFO - codeparrot_training - Step 149: {'lr': 3.7e-05, 'samples': 4768, 'steps': 148, 'loss/train': 8.951850891113281} +01/22/2022 22:42:14 - INFO - codeparrot_training - Step 150: {'lr': 3.725e-05, 'samples': 4800, 'steps': 149, 'loss/train': 8.797324180603027} +01/22/2022 22:42:15 - INFO - codeparrot_training - Step 151: {'lr': 3.75e-05, 'samples': 4832, 'steps': 150, 'loss/train': 8.745787620544434} +01/22/2022 22:42:16 - INFO - codeparrot_training - Step 152: {'lr': 3.775e-05, 'samples': 4864, 'steps': 151, 'loss/train': 10.164430618286133} +01/22/2022 22:42:17 - INFO - codeparrot_training - Step 153: {'lr': 3.8e-05, 'samples': 4896, 'steps': 152, 'loss/train': 9.92223072052002} +01/22/2022 22:42:17 - INFO - codeparrot_training - Step 154: {'lr': 3.825e-05, 'samples': 4928, 'steps': 153, 'loss/train': 10.489767074584961} +01/22/2022 22:42:18 - INFO - codeparrot_training - Step 155: {'lr': 3.85e-05, 'samples': 4960, 'steps': 154, 'loss/train': 10.013514518737793} +01/22/2022 22:42:18 - INFO - codeparrot_training - Step 156: {'lr': 3.875e-05, 'samples': 4992, 'steps': 155, 'loss/train': 9.416831016540527} +01/22/2022 22:42:19 - INFO - codeparrot_training - Step 157: {'lr': 3.9e-05, 'samples': 5024, 'steps': 156, 'loss/train': 10.258069038391113} +01/22/2022 22:42:19 - INFO - codeparrot_training - Step 158: {'lr': 3.925e-05, 'samples': 5056, 'steps': 157, 'loss/train': 10.34802532196045} +01/22/2022 22:42:20 - INFO - codeparrot_training - Step 159: {'lr': 3.95e-05, 'samples': 5088, 'steps': 158, 'loss/train': 10.426945686340332} +01/22/2022 22:42:20 - INFO - codeparrot_training - Step 160: {'lr': 3.9750000000000004e-05, 'samples': 5120, 'steps': 159, 'loss/train': 9.7449369430542} +01/22/2022 22:42:21 - INFO - codeparrot_training - Step 161: {'lr': 4e-05, 'samples': 5152, 'steps': 160, 'loss/train': 9.353628158569336} +01/22/2022 22:42:22 - INFO - codeparrot_training - Step 162: {'lr': 4.025e-05, 'samples': 5184, 'steps': 161, 'loss/train': 10.259864807128906} +01/22/2022 22:42:22 - INFO - codeparrot_training - Step 163: {'lr': 4.05e-05, 'samples': 5216, 'steps': 162, 'loss/train': 9.503830909729004} +01/22/2022 22:42:23 - INFO - codeparrot_training - Step 164: {'lr': 4.075e-05, 'samples': 5248, 'steps': 163, 'loss/train': 10.237804412841797} +01/22/2022 22:42:23 - INFO - codeparrot_training - Step 165: {'lr': 4.1e-05, 'samples': 5280, 'steps': 164, 'loss/train': 9.857827186584473} +01/22/2022 22:42:24 - INFO - codeparrot_training - Step 166: {'lr': 4.125e-05, 'samples': 5312, 'steps': 165, 'loss/train': 10.114072799682617} +01/22/2022 22:42:24 - INFO - codeparrot_training - Step 167: {'lr': 4.1500000000000006e-05, 'samples': 5344, 'steps': 166, 'loss/train': 10.0111665725708} +01/22/2022 22:42:25 - INFO - codeparrot_training - Step 168: {'lr': 4.1750000000000005e-05, 'samples': 5376, 'steps': 167, 'loss/train': 9.714751243591309} +01/22/2022 22:42:26 - INFO - codeparrot_training - Step 169: {'lr': 4.2000000000000004e-05, 'samples': 5408, 'steps': 168, 'loss/train': 9.12442684173584} +01/22/2022 22:42:26 - INFO - codeparrot_training - Step 170: {'lr': 4.2250000000000004e-05, 'samples': 5440, 'steps': 169, 'loss/train': 8.825699806213379} +01/22/2022 22:42:27 - INFO - codeparrot_training - Step 171: {'lr': 4.25e-05, 'samples': 5472, 'steps': 170, 'loss/train': 10.00616455078125} +01/22/2022 22:42:27 - INFO - codeparrot_training - Step 172: {'lr': 4.275e-05, 'samples': 5504, 'steps': 171, 'loss/train': 9.762033462524414} +01/22/2022 22:42:28 - INFO - codeparrot_training - Step 173: {'lr': 4.2999999999999995e-05, 'samples': 5536, 'steps': 172, 'loss/train': 9.882036209106445} +01/22/2022 22:42:28 - INFO - codeparrot_training - Step 174: {'lr': 4.325e-05, 'samples': 5568, 'steps': 173, 'loss/train': 10.034646034240723} +01/22/2022 22:42:29 - INFO - codeparrot_training - Step 175: {'lr': 4.35e-05, 'samples': 5600, 'steps': 174, 'loss/train': 9.614574432373047} +01/22/2022 22:42:29 - INFO - codeparrot_training - Step 176: {'lr': 4.375e-05, 'samples': 5632, 'steps': 175, 'loss/train': 9.608891487121582} +01/22/2022 22:42:30 - INFO - codeparrot_training - Step 177: {'lr': 4.4e-05, 'samples': 5664, 'steps': 176, 'loss/train': 9.173517227172852} +01/22/2022 22:42:31 - INFO - codeparrot_training - Step 178: {'lr': 4.425e-05, 'samples': 5696, 'steps': 177, 'loss/train': 9.670883178710938} +01/22/2022 22:42:31 - INFO - codeparrot_training - Step 179: {'lr': 4.45e-05, 'samples': 5728, 'steps': 178, 'loss/train': 9.34721851348877} +01/22/2022 22:42:32 - INFO - codeparrot_training - Step 180: {'lr': 4.475e-05, 'samples': 5760, 'steps': 179, 'loss/train': 9.602530479431152} +01/22/2022 22:42:33 - INFO - codeparrot_training - Step 181: {'lr': 4.4999999999999996e-05, 'samples': 5792, 'steps': 180, 'loss/train': 9.297558784484863} +01/22/2022 22:42:33 - INFO - codeparrot_training - Step 182: {'lr': 4.525e-05, 'samples': 5824, 'steps': 181, 'loss/train': 9.829305648803711} +01/22/2022 22:42:34 - INFO - codeparrot_training - Step 183: {'lr': 4.55e-05, 'samples': 5856, 'steps': 182, 'loss/train': 9.429950714111328} +01/22/2022 22:42:35 - INFO - codeparrot_training - Step 184: {'lr': 4.575e-05, 'samples': 5888, 'steps': 183, 'loss/train': 9.863890647888184} +01/22/2022 22:42:35 - INFO - codeparrot_training - Step 185: {'lr': 4.6e-05, 'samples': 5920, 'steps': 184, 'loss/train': 8.60593318939209} +01/22/2022 22:42:36 - INFO - codeparrot_training - Step 186: {'lr': 4.625e-05, 'samples': 5952, 'steps': 185, 'loss/train': 9.602508544921875} +01/22/2022 22:42:36 - INFO - codeparrot_training - Step 187: {'lr': 4.65e-05, 'samples': 5984, 'steps': 186, 'loss/train': 9.753731727600098} +01/22/2022 22:42:37 - INFO - codeparrot_training - Step 188: {'lr': 4.675e-05, 'samples': 6016, 'steps': 187, 'loss/train': 9.293747901916504} +01/22/2022 22:42:37 - INFO - codeparrot_training - Step 189: {'lr': 4.7000000000000004e-05, 'samples': 6048, 'steps': 188, 'loss/train': 9.92492389678955} +01/22/2022 22:42:38 - INFO - codeparrot_training - Step 190: {'lr': 4.725e-05, 'samples': 6080, 'steps': 189, 'loss/train': 9.119004249572754} +01/22/2022 22:42:39 - INFO - codeparrot_training - Step 191: {'lr': 4.75e-05, 'samples': 6112, 'steps': 190, 'loss/train': 8.894718170166016} +01/22/2022 22:42:39 - INFO - codeparrot_training - Step 192: {'lr': 4.775e-05, 'samples': 6144, 'steps': 191, 'loss/train': 9.295947074890137} +01/22/2022 22:42:40 - INFO - codeparrot_training - Step 193: {'lr': 4.8e-05, 'samples': 6176, 'steps': 192, 'loss/train': 10.388311386108398} +01/22/2022 22:42:40 - INFO - codeparrot_training - Step 194: {'lr': 4.825e-05, 'samples': 6208, 'steps': 193, 'loss/train': 9.33549976348877} +01/22/2022 22:42:41 - INFO - codeparrot_training - Step 195: {'lr': 4.85e-05, 'samples': 6240, 'steps': 194, 'loss/train': 9.435409545898438} +01/22/2022 22:42:41 - INFO - codeparrot_training - Step 196: {'lr': 4.8750000000000006e-05, 'samples': 6272, 'steps': 195, 'loss/train': 9.913029670715332} +01/22/2022 22:42:42 - INFO - codeparrot_training - Step 197: {'lr': 4.9000000000000005e-05, 'samples': 6304, 'steps': 196, 'loss/train': 9.300405502319336} +01/22/2022 22:42:42 - INFO - codeparrot_training - Step 198: {'lr': 4.9250000000000004e-05, 'samples': 6336, 'steps': 197, 'loss/train': 9.280932426452637} +01/22/2022 22:42:43 - INFO - codeparrot_training - Step 199: {'lr': 4.9500000000000004e-05, 'samples': 6368, 'steps': 198, 'loss/train': 9.925702095031738} +01/22/2022 22:42:44 - INFO - codeparrot_training - Step 200: {'lr': 4.975e-05, 'samples': 6400, 'steps': 199, 'loss/train': 9.520857810974121} +01/22/2022 22:42:44 - INFO - codeparrot_training - Step 201: {'lr': 5e-05, 'samples': 6432, 'steps': 200, 'loss/train': 9.087495803833008} +01/22/2022 22:42:45 - INFO - codeparrot_training - Step 202: {'lr': 5.025e-05, 'samples': 6464, 'steps': 201, 'loss/train': 9.246336936950684} +01/22/2022 22:42:45 - INFO - codeparrot_training - Step 203: {'lr': 5.05e-05, 'samples': 6496, 'steps': 202, 'loss/train': 8.59572982788086} +01/22/2022 22:42:46 - INFO - codeparrot_training - Step 204: {'lr': 5.075000000000001e-05, 'samples': 6528, 'steps': 203, 'loss/train': 9.695992469787598} +01/22/2022 22:42:46 - INFO - codeparrot_training - Step 205: {'lr': 5.1e-05, 'samples': 6560, 'steps': 204, 'loss/train': 9.832139015197754} +01/22/2022 22:42:47 - INFO - codeparrot_training - Step 206: {'lr': 5.125e-05, 'samples': 6592, 'steps': 205, 'loss/train': 9.18824577331543} +01/22/2022 22:42:48 - INFO - codeparrot_training - Step 207: {'lr': 5.15e-05, 'samples': 6624, 'steps': 206, 'loss/train': 9.604211807250977} +01/22/2022 22:42:48 - INFO - codeparrot_training - Step 208: {'lr': 5.175e-05, 'samples': 6656, 'steps': 207, 'loss/train': 9.246150016784668} +01/22/2022 22:42:49 - INFO - codeparrot_training - Step 209: {'lr': 5.2e-05, 'samples': 6688, 'steps': 208, 'loss/train': 8.919072151184082} +01/22/2022 22:42:51 - INFO - codeparrot_training - Step 210: {'lr': 5.2249999999999996e-05, 'samples': 6720, 'steps': 209, 'loss/train': 9.78226375579834} +01/22/2022 22:42:51 - INFO - codeparrot_training - Step 211: {'lr': 5.25e-05, 'samples': 6752, 'steps': 210, 'loss/train': 9.283915519714355} +01/22/2022 22:42:52 - INFO - codeparrot_training - Step 212: {'lr': 5.275e-05, 'samples': 6784, 'steps': 211, 'loss/train': 9.390427589416504} +01/22/2022 22:42:52 - INFO - codeparrot_training - Step 213: {'lr': 5.3e-05, 'samples': 6816, 'steps': 212, 'loss/train': 9.055682182312012} +01/22/2022 22:42:53 - INFO - codeparrot_training - Step 214: {'lr': 5.325e-05, 'samples': 6848, 'steps': 213, 'loss/train': 9.773158073425293} +01/22/2022 22:42:53 - INFO - codeparrot_training - Step 215: {'lr': 5.35e-05, 'samples': 6880, 'steps': 214, 'loss/train': 9.989599227905273} +01/22/2022 22:42:54 - INFO - codeparrot_training - Step 216: {'lr': 5.375e-05, 'samples': 6912, 'steps': 215, 'loss/train': 9.33253288269043} +01/22/2022 22:42:54 - INFO - codeparrot_training - Step 217: {'lr': 5.4e-05, 'samples': 6944, 'steps': 216, 'loss/train': 9.52785587310791} +01/22/2022 22:42:55 - INFO - codeparrot_training - Step 218: {'lr': 5.4250000000000004e-05, 'samples': 6976, 'steps': 217, 'loss/train': 9.869882583618164} +01/22/2022 22:42:56 - INFO - codeparrot_training - Step 219: {'lr': 5.45e-05, 'samples': 7008, 'steps': 218, 'loss/train': 10.199316024780273} +01/22/2022 22:42:56 - INFO - codeparrot_training - Step 220: {'lr': 5.475e-05, 'samples': 7040, 'steps': 219, 'loss/train': 9.900988578796387} +01/22/2022 22:42:57 - INFO - codeparrot_training - Step 221: {'lr': 5.5e-05, 'samples': 7072, 'steps': 220, 'loss/train': 8.901515007019043} +01/22/2022 22:42:57 - INFO - codeparrot_training - Step 222: {'lr': 5.525e-05, 'samples': 7104, 'steps': 221, 'loss/train': 9.206402778625488} +01/22/2022 22:42:58 - INFO - codeparrot_training - Step 223: {'lr': 5.55e-05, 'samples': 7136, 'steps': 222, 'loss/train': 9.233396530151367} +01/22/2022 22:42:58 - INFO - codeparrot_training - Step 224: {'lr': 5.575e-05, 'samples': 7168, 'steps': 223, 'loss/train': 9.787510871887207} +01/22/2022 22:42:59 - INFO - codeparrot_training - Step 225: {'lr': 5.6e-05, 'samples': 7200, 'steps': 224, 'loss/train': 9.59318733215332} +01/22/2022 22:43:00 - INFO - codeparrot_training - Step 226: {'lr': 5.6250000000000005e-05, 'samples': 7232, 'steps': 225, 'loss/train': 9.422319412231445} +01/22/2022 22:43:00 - INFO - codeparrot_training - Step 227: {'lr': 5.6500000000000005e-05, 'samples': 7264, 'steps': 226, 'loss/train': 9.690566062927246} +01/22/2022 22:43:01 - INFO - codeparrot_training - Step 228: {'lr': 5.6750000000000004e-05, 'samples': 7296, 'steps': 227, 'loss/train': 9.593408584594727} +01/22/2022 22:43:01 - INFO - codeparrot_training - Step 229: {'lr': 5.7e-05, 'samples': 7328, 'steps': 228, 'loss/train': 9.305581092834473} +01/22/2022 22:43:02 - INFO - codeparrot_training - Step 230: {'lr': 5.725e-05, 'samples': 7360, 'steps': 229, 'loss/train': 8.91329574584961} +01/22/2022 22:43:02 - INFO - codeparrot_training - Step 231: {'lr': 5.75e-05, 'samples': 7392, 'steps': 230, 'loss/train': 9.531028747558594} +01/22/2022 22:43:03 - INFO - codeparrot_training - Step 232: {'lr': 5.775e-05, 'samples': 7424, 'steps': 231, 'loss/train': 9.189087867736816} +01/22/2022 22:43:03 - INFO - codeparrot_training - Step 233: {'lr': 5.800000000000001e-05, 'samples': 7456, 'steps': 232, 'loss/train': 9.575912475585938} +01/22/2022 22:43:04 - INFO - codeparrot_training - Step 234: {'lr': 5.8250000000000006e-05, 'samples': 7488, 'steps': 233, 'loss/train': 9.195819854736328} +01/22/2022 22:43:05 - INFO - codeparrot_training - Step 235: {'lr': 5.8500000000000006e-05, 'samples': 7520, 'steps': 234, 'loss/train': 9.36935806274414} +01/22/2022 22:43:05 - INFO - codeparrot_training - Step 236: {'lr': 5.875e-05, 'samples': 7552, 'steps': 235, 'loss/train': 9.146292686462402} +01/22/2022 22:43:06 - INFO - codeparrot_training - Step 237: {'lr': 5.9e-05, 'samples': 7584, 'steps': 236, 'loss/train': 9.442827224731445} +01/22/2022 22:43:06 - INFO - codeparrot_training - Step 238: {'lr': 5.925e-05, 'samples': 7616, 'steps': 237, 'loss/train': 9.852892875671387} +01/22/2022 22:43:07 - INFO - codeparrot_training - Step 239: {'lr': 5.9499999999999996e-05, 'samples': 7648, 'steps': 238, 'loss/train': 9.923450469970703} +01/22/2022 22:43:08 - INFO - codeparrot_training - Step 240: {'lr': 5.9749999999999995e-05, 'samples': 7680, 'steps': 239, 'loss/train': 7.380150318145752} +01/22/2022 22:43:09 - INFO - codeparrot_training - Step 241: {'lr': 6e-05, 'samples': 7712, 'steps': 240, 'loss/train': 9.116171836853027} +01/22/2022 22:43:09 - INFO - codeparrot_training - Step 242: {'lr': 6.025e-05, 'samples': 7744, 'steps': 241, 'loss/train': 9.363700866699219} +01/22/2022 22:43:10 - INFO - codeparrot_training - Step 243: {'lr': 6.05e-05, 'samples': 7776, 'steps': 242, 'loss/train': 9.386653900146484} +01/22/2022 22:43:10 - INFO - codeparrot_training - Step 244: {'lr': 6.075e-05, 'samples': 7808, 'steps': 243, 'loss/train': 9.122329711914062} +01/22/2022 22:43:11 - INFO - codeparrot_training - Step 245: {'lr': 6.1e-05, 'samples': 7840, 'steps': 244, 'loss/train': 9.598908424377441} +01/22/2022 22:43:12 - INFO - codeparrot_training - Step 246: {'lr': 6.125e-05, 'samples': 7872, 'steps': 245, 'loss/train': 9.45728874206543} +01/22/2022 22:43:12 - INFO - codeparrot_training - Step 247: {'lr': 6.15e-05, 'samples': 7904, 'steps': 246, 'loss/train': 8.821805953979492} +01/22/2022 22:43:13 - INFO - codeparrot_training - Step 248: {'lr': 6.175e-05, 'samples': 7936, 'steps': 247, 'loss/train': 9.80358600616455} +01/22/2022 22:43:13 - INFO - codeparrot_training - Step 249: {'lr': 6.2e-05, 'samples': 7968, 'steps': 248, 'loss/train': 9.6636323928833} +01/22/2022 22:43:14 - INFO - codeparrot_training - Step 250: {'lr': 6.225e-05, 'samples': 8000, 'steps': 249, 'loss/train': 9.594844818115234} +01/22/2022 22:43:14 - INFO - codeparrot_training - Step 251: {'lr': 6.25e-05, 'samples': 8032, 'steps': 250, 'loss/train': 8.952106475830078} +01/22/2022 22:43:15 - INFO - codeparrot_training - Step 252: {'lr': 6.275000000000001e-05, 'samples': 8064, 'steps': 251, 'loss/train': 9.872370719909668} +01/22/2022 22:43:16 - INFO - codeparrot_training - Step 253: {'lr': 6.3e-05, 'samples': 8096, 'steps': 252, 'loss/train': 11.692463874816895} +01/22/2022 22:43:16 - INFO - codeparrot_training - Step 254: {'lr': 6.325e-05, 'samples': 8128, 'steps': 253, 'loss/train': 9.53775405883789} +01/22/2022 22:43:17 - INFO - codeparrot_training - Step 255: {'lr': 6.35e-05, 'samples': 8160, 'steps': 254, 'loss/train': 8.999451637268066} +01/22/2022 22:43:17 - INFO - codeparrot_training - Step 256: {'lr': 6.375e-05, 'samples': 8192, 'steps': 255, 'loss/train': 9.118752479553223} +01/22/2022 22:43:18 - INFO - codeparrot_training - Step 257: {'lr': 6.4e-05, 'samples': 8224, 'steps': 256, 'loss/train': 9.87115478515625} +01/22/2022 22:43:18 - INFO - codeparrot_training - Step 258: {'lr': 6.425e-05, 'samples': 8256, 'steps': 257, 'loss/train': 9.506589889526367} +01/22/2022 22:43:19 - INFO - codeparrot_training - Step 259: {'lr': 6.450000000000001e-05, 'samples': 8288, 'steps': 258, 'loss/train': 10.109659194946289} +01/22/2022 22:43:19 - INFO - codeparrot_training - Step 260: {'lr': 6.475e-05, 'samples': 8320, 'steps': 259, 'loss/train': 8.766955375671387} +01/22/2022 22:43:20 - INFO - codeparrot_training - Step 261: {'lr': 6.500000000000001e-05, 'samples': 8352, 'steps': 260, 'loss/train': 9.601101875305176} +01/22/2022 22:43:21 - INFO - codeparrot_training - Step 262: {'lr': 6.525e-05, 'samples': 8384, 'steps': 261, 'loss/train': 9.52595329284668} +01/22/2022 22:43:21 - INFO - codeparrot_training - Step 263: {'lr': 6.55e-05, 'samples': 8416, 'steps': 262, 'loss/train': 9.428339958190918} +01/22/2022 22:43:22 - INFO - codeparrot_training - Step 264: {'lr': 6.575e-05, 'samples': 8448, 'steps': 263, 'loss/train': 9.059041976928711} +01/22/2022 22:43:22 - INFO - codeparrot_training - Step 265: {'lr': 6.6e-05, 'samples': 8480, 'steps': 264, 'loss/train': 9.292367935180664} +01/22/2022 22:43:23 - INFO - codeparrot_training - Step 266: {'lr': 6.625000000000001e-05, 'samples': 8512, 'steps': 265, 'loss/train': 9.969255447387695} +01/22/2022 22:43:23 - INFO - codeparrot_training - Step 267: {'lr': 6.65e-05, 'samples': 8544, 'steps': 266, 'loss/train': 8.824904441833496} +01/22/2022 22:43:24 - INFO - codeparrot_training - Step 268: {'lr': 6.675000000000001e-05, 'samples': 8576, 'steps': 267, 'loss/train': 10.210618019104004} +01/22/2022 22:43:24 - INFO - codeparrot_training - Step 269: {'lr': 6.7e-05, 'samples': 8608, 'steps': 268, 'loss/train': 8.811304092407227} +01/22/2022 22:43:26 - INFO - codeparrot_training - Step 270: {'lr': 6.725000000000001e-05, 'samples': 8640, 'steps': 269, 'loss/train': 9.401799201965332} +01/22/2022 22:43:26 - INFO - codeparrot_training - Step 271: {'lr': 6.75e-05, 'samples': 8672, 'steps': 270, 'loss/train': 8.538224220275879} +01/22/2022 22:43:27 - INFO - codeparrot_training - Step 272: {'lr': 6.775000000000001e-05, 'samples': 8704, 'steps': 271, 'loss/train': 9.453034400939941} +01/22/2022 22:43:27 - INFO - codeparrot_training - Step 273: {'lr': 6.800000000000001e-05, 'samples': 8736, 'steps': 272, 'loss/train': 9.66877555847168} +01/22/2022 22:43:28 - INFO - codeparrot_training - Step 274: {'lr': 6.825e-05, 'samples': 8768, 'steps': 273, 'loss/train': 9.4146728515625} +01/22/2022 22:43:29 - INFO - codeparrot_training - Step 275: {'lr': 6.850000000000001e-05, 'samples': 8800, 'steps': 274, 'loss/train': 8.967911720275879} +01/22/2022 22:43:29 - INFO - codeparrot_training - Step 276: {'lr': 6.875e-05, 'samples': 8832, 'steps': 275, 'loss/train': 9.120333671569824} +01/22/2022 22:43:30 - INFO - codeparrot_training - Step 277: {'lr': 6.900000000000001e-05, 'samples': 8864, 'steps': 276, 'loss/train': 9.54574966430664} +01/22/2022 22:43:30 - INFO - codeparrot_training - Step 278: {'lr': 6.925e-05, 'samples': 8896, 'steps': 277, 'loss/train': 9.828523635864258} +01/22/2022 22:43:31 - INFO - codeparrot_training - Step 279: {'lr': 6.950000000000001e-05, 'samples': 8928, 'steps': 278, 'loss/train': 10.25891399383545} +01/22/2022 22:43:31 - INFO - codeparrot_training - Step 280: {'lr': 6.975e-05, 'samples': 8960, 'steps': 279, 'loss/train': 8.406841278076172} +01/22/2022 22:43:32 - INFO - codeparrot_training - Step 281: {'lr': 7.000000000000001e-05, 'samples': 8992, 'steps': 280, 'loss/train': 9.450603485107422} +01/22/2022 22:43:32 - INFO - codeparrot_training - Step 282: {'lr': 7.025000000000001e-05, 'samples': 9024, 'steps': 281, 'loss/train': 9.260323524475098} +01/22/2022 22:43:33 - INFO - codeparrot_training - Step 283: {'lr': 7.049999999999999e-05, 'samples': 9056, 'steps': 282, 'loss/train': 9.407333374023438} +01/22/2022 22:43:34 - INFO - codeparrot_training - Step 284: {'lr': 7.075e-05, 'samples': 9088, 'steps': 283, 'loss/train': 9.316780090332031} +01/22/2022 22:43:34 - INFO - codeparrot_training - Step 285: {'lr': 7.099999999999999e-05, 'samples': 9120, 'steps': 284, 'loss/train': 9.41631031036377} +01/22/2022 22:43:35 - INFO - codeparrot_training - Step 286: {'lr': 7.125e-05, 'samples': 9152, 'steps': 285, 'loss/train': 9.417278289794922} +01/22/2022 22:43:35 - INFO - codeparrot_training - Step 287: {'lr': 7.149999999999999e-05, 'samples': 9184, 'steps': 286, 'loss/train': 8.250036239624023} +01/22/2022 22:43:36 - INFO - codeparrot_training - Step 288: {'lr': 7.175e-05, 'samples': 9216, 'steps': 287, 'loss/train': 9.453792572021484} +01/22/2022 22:43:36 - INFO - codeparrot_training - Step 289: {'lr': 7.2e-05, 'samples': 9248, 'steps': 288, 'loss/train': 9.032466888427734} +01/22/2022 22:43:37 - INFO - codeparrot_training - Step 290: {'lr': 7.225e-05, 'samples': 9280, 'steps': 289, 'loss/train': 9.233776092529297} +01/22/2022 22:43:37 - INFO - codeparrot_training - Step 291: {'lr': 7.25e-05, 'samples': 9312, 'steps': 290, 'loss/train': 8.991429328918457} +01/22/2022 22:43:38 - INFO - codeparrot_training - Step 292: {'lr': 7.274999999999999e-05, 'samples': 9344, 'steps': 291, 'loss/train': 9.044414520263672} +01/22/2022 22:43:39 - INFO - codeparrot_training - Step 293: {'lr': 7.3e-05, 'samples': 9376, 'steps': 292, 'loss/train': 9.717764854431152} +01/22/2022 22:43:39 - INFO - codeparrot_training - Step 294: {'lr': 7.324999999999999e-05, 'samples': 9408, 'steps': 293, 'loss/train': 10.256107330322266} +01/22/2022 22:43:40 - INFO - codeparrot_training - Step 295: {'lr': 7.35e-05, 'samples': 9440, 'steps': 294, 'loss/train': 9.621987342834473} +01/22/2022 22:43:40 - INFO - codeparrot_training - Step 296: {'lr': 7.375e-05, 'samples': 9472, 'steps': 295, 'loss/train': 9.6366605758667} +01/22/2022 22:43:41 - INFO - codeparrot_training - Step 297: {'lr': 7.4e-05, 'samples': 9504, 'steps': 296, 'loss/train': 11.119963645935059} +01/22/2022 22:43:41 - INFO - codeparrot_training - Step 298: {'lr': 7.425e-05, 'samples': 9536, 'steps': 297, 'loss/train': 9.616317749023438} +01/22/2022 22:43:42 - INFO - codeparrot_training - Step 299: {'lr': 7.45e-05, 'samples': 9568, 'steps': 298, 'loss/train': 9.464062690734863} +01/22/2022 22:43:43 - INFO - codeparrot_training - Step 300: {'lr': 7.475e-05, 'samples': 9600, 'steps': 299, 'loss/train': 9.29200267791748} +01/22/2022 22:43:44 - INFO - codeparrot_training - Step 301: {'lr': 7.5e-05, 'samples': 9632, 'steps': 300, 'loss/train': 9.826775550842285} +01/22/2022 22:43:44 - INFO - codeparrot_training - Step 302: {'lr': 7.525e-05, 'samples': 9664, 'steps': 301, 'loss/train': 9.162609100341797} +01/22/2022 22:43:45 - INFO - codeparrot_training - Step 303: {'lr': 7.55e-05, 'samples': 9696, 'steps': 302, 'loss/train': 9.687301635742188} +01/22/2022 22:43:46 - INFO - codeparrot_training - Step 304: {'lr': 7.575e-05, 'samples': 9728, 'steps': 303, 'loss/train': 9.178020477294922} +01/22/2022 22:43:46 - INFO - codeparrot_training - Step 305: {'lr': 7.6e-05, 'samples': 9760, 'steps': 304, 'loss/train': 8.831764221191406} +01/22/2022 22:43:47 - INFO - codeparrot_training - Step 306: {'lr': 7.625e-05, 'samples': 9792, 'steps': 305, 'loss/train': 9.71541690826416} +01/22/2022 22:43:47 - INFO - codeparrot_training - Step 307: {'lr': 7.65e-05, 'samples': 9824, 'steps': 306, 'loss/train': 9.042821884155273} +01/22/2022 22:43:48 - INFO - codeparrot_training - Step 308: {'lr': 7.675e-05, 'samples': 9856, 'steps': 307, 'loss/train': 9.769678115844727} +01/22/2022 22:43:48 - INFO - codeparrot_training - Step 309: {'lr': 7.7e-05, 'samples': 9888, 'steps': 308, 'loss/train': 9.330351829528809} +01/22/2022 22:43:49 - INFO - codeparrot_training - Step 310: {'lr': 7.725000000000001e-05, 'samples': 9920, 'steps': 309, 'loss/train': 9.457258224487305} +01/22/2022 22:43:50 - INFO - codeparrot_training - Step 311: {'lr': 7.75e-05, 'samples': 9952, 'steps': 310, 'loss/train': 9.638697624206543} +01/22/2022 22:43:50 - INFO - codeparrot_training - Step 312: {'lr': 7.775e-05, 'samples': 9984, 'steps': 311, 'loss/train': 9.150445938110352} +01/22/2022 22:43:51 - INFO - codeparrot_training - Step 313: {'lr': 7.8e-05, 'samples': 10016, 'steps': 312, 'loss/train': 10.021202087402344} +01/22/2022 22:43:51 - INFO - codeparrot_training - Step 314: {'lr': 7.825e-05, 'samples': 10048, 'steps': 313, 'loss/train': 9.31416130065918} +01/22/2022 22:43:52 - INFO - codeparrot_training - Step 315: {'lr': 7.85e-05, 'samples': 10080, 'steps': 314, 'loss/train': 8.426275253295898} +01/22/2022 22:43:52 - INFO - codeparrot_training - Step 316: {'lr': 7.875e-05, 'samples': 10112, 'steps': 315, 'loss/train': 9.764921188354492} +01/22/2022 22:43:53 - INFO - codeparrot_training - Step 317: {'lr': 7.9e-05, 'samples': 10144, 'steps': 316, 'loss/train': 8.745186805725098} +01/22/2022 22:43:53 - INFO - codeparrot_training - Step 318: {'lr': 7.925e-05, 'samples': 10176, 'steps': 317, 'loss/train': 9.174814224243164} +01/22/2022 22:43:54 - INFO - codeparrot_training - Step 319: {'lr': 7.950000000000001e-05, 'samples': 10208, 'steps': 318, 'loss/train': 9.799240112304688} +01/22/2022 22:43:55 - INFO - codeparrot_training - Step 320: {'lr': 7.975e-05, 'samples': 10240, 'steps': 319, 'loss/train': 8.850435256958008} +01/22/2022 22:43:55 - INFO - codeparrot_training - Step 321: {'lr': 8e-05, 'samples': 10272, 'steps': 320, 'loss/train': 11.250716209411621} +01/22/2022 22:43:56 - INFO - codeparrot_training - Step 322: {'lr': 8.025e-05, 'samples': 10304, 'steps': 321, 'loss/train': 9.963932037353516} +01/22/2022 22:43:56 - INFO - codeparrot_training - Step 323: {'lr': 8.05e-05, 'samples': 10336, 'steps': 322, 'loss/train': 9.804258346557617} +01/22/2022 22:43:57 - INFO - codeparrot_training - Step 324: {'lr': 8.075e-05, 'samples': 10368, 'steps': 323, 'loss/train': 9.313946723937988} +01/22/2022 22:43:57 - INFO - codeparrot_training - Step 325: {'lr': 8.1e-05, 'samples': 10400, 'steps': 324, 'loss/train': 9.174221992492676} +01/22/2022 22:43:58 - INFO - codeparrot_training - Step 326: {'lr': 8.125000000000001e-05, 'samples': 10432, 'steps': 325, 'loss/train': 9.857719421386719} +01/22/2022 22:43:59 - INFO - codeparrot_training - Step 327: {'lr': 8.15e-05, 'samples': 10464, 'steps': 326, 'loss/train': 9.095353126525879} +01/22/2022 22:44:00 - INFO - codeparrot_training - Step 328: {'lr': 8.175000000000001e-05, 'samples': 10496, 'steps': 327, 'loss/train': 9.503901481628418} +01/22/2022 22:44:00 - INFO - codeparrot_training - Step 329: {'lr': 8.2e-05, 'samples': 10528, 'steps': 328, 'loss/train': 9.238961219787598} +01/22/2022 22:44:01 - INFO - codeparrot_training - Step 330: {'lr': 8.225000000000001e-05, 'samples': 10560, 'steps': 329, 'loss/train': 8.547113418579102} +01/22/2022 22:44:01 - INFO - codeparrot_training - Step 331: {'lr': 8.25e-05, 'samples': 10592, 'steps': 330, 'loss/train': 9.04595947265625} +01/22/2022 22:44:02 - INFO - codeparrot_training - Step 332: {'lr': 8.275e-05, 'samples': 10624, 'steps': 331, 'loss/train': 9.381396293640137} +01/22/2022 22:44:03 - INFO - codeparrot_training - Step 333: {'lr': 8.300000000000001e-05, 'samples': 10656, 'steps': 332, 'loss/train': 9.308022499084473} +01/22/2022 22:44:03 - INFO - codeparrot_training - Step 334: {'lr': 8.325e-05, 'samples': 10688, 'steps': 333, 'loss/train': 10.018190383911133} +01/22/2022 22:44:04 - INFO - codeparrot_training - Step 335: {'lr': 8.350000000000001e-05, 'samples': 10720, 'steps': 334, 'loss/train': 9.341448783874512} +01/22/2022 22:44:04 - INFO - codeparrot_training - Step 336: {'lr': 8.375e-05, 'samples': 10752, 'steps': 335, 'loss/train': 9.65217399597168} +01/22/2022 22:44:05 - INFO - codeparrot_training - Step 337: {'lr': 8.400000000000001e-05, 'samples': 10784, 'steps': 336, 'loss/train': 9.091493606567383} +01/22/2022 22:44:05 - INFO - codeparrot_training - Step 338: {'lr': 8.425e-05, 'samples': 10816, 'steps': 337, 'loss/train': 9.412522315979004} +01/22/2022 22:44:06 - INFO - codeparrot_training - Step 339: {'lr': 8.450000000000001e-05, 'samples': 10848, 'steps': 338, 'loss/train': 10.189144134521484} +01/22/2022 22:44:06 - INFO - codeparrot_training - Step 340: {'lr': 8.475000000000001e-05, 'samples': 10880, 'steps': 339, 'loss/train': 9.331099510192871} +01/22/2022 22:44:07 - INFO - codeparrot_training - Step 341: {'lr': 8.5e-05, 'samples': 10912, 'steps': 340, 'loss/train': 9.085092544555664} +01/22/2022 22:44:08 - INFO - codeparrot_training - Step 342: {'lr': 8.525000000000001e-05, 'samples': 10944, 'steps': 341, 'loss/train': 8.544513702392578} +01/22/2022 22:44:08 - INFO - codeparrot_training - Step 343: {'lr': 8.55e-05, 'samples': 10976, 'steps': 342, 'loss/train': 8.56588077545166} +01/22/2022 22:44:09 - INFO - codeparrot_training - Step 344: {'lr': 8.575000000000001e-05, 'samples': 11008, 'steps': 343, 'loss/train': 9.40851879119873} +01/22/2022 22:44:09 - INFO - codeparrot_training - Step 345: {'lr': 8.599999999999999e-05, 'samples': 11040, 'steps': 344, 'loss/train': 10.191856384277344} +01/22/2022 22:44:10 - INFO - codeparrot_training - Step 346: {'lr': 8.625e-05, 'samples': 11072, 'steps': 345, 'loss/train': 8.904547691345215} +01/22/2022 22:44:10 - INFO - codeparrot_training - Step 347: {'lr': 8.65e-05, 'samples': 11104, 'steps': 346, 'loss/train': 9.006621360778809} +01/22/2022 22:44:11 - INFO - codeparrot_training - Step 348: {'lr': 8.675e-05, 'samples': 11136, 'steps': 347, 'loss/train': 8.251219749450684} +01/22/2022 22:44:12 - INFO - codeparrot_training - Step 349: {'lr': 8.7e-05, 'samples': 11168, 'steps': 348, 'loss/train': 8.524999618530273} +01/22/2022 22:44:12 - INFO - codeparrot_training - Step 350: {'lr': 8.724999999999999e-05, 'samples': 11200, 'steps': 349, 'loss/train': 9.299408912658691} +01/22/2022 22:44:13 - INFO - codeparrot_training - Step 351: {'lr': 8.75e-05, 'samples': 11232, 'steps': 350, 'loss/train': 6.79514217376709} +01/22/2022 22:44:13 - INFO - codeparrot_training - Step 352: {'lr': 8.774999999999999e-05, 'samples': 11264, 'steps': 351, 'loss/train': 8.789837837219238} +01/22/2022 22:44:14 - INFO - codeparrot_training - Step 353: {'lr': 8.8e-05, 'samples': 11296, 'steps': 352, 'loss/train': 9.06583023071289} +01/22/2022 22:44:14 - INFO - codeparrot_training - Step 354: {'lr': 8.824999999999999e-05, 'samples': 11328, 'steps': 353, 'loss/train': 7.057380676269531} +01/22/2022 22:44:15 - INFO - codeparrot_training - Step 355: {'lr': 8.85e-05, 'samples': 11360, 'steps': 354, 'loss/train': 9.290522575378418} +01/22/2022 22:44:15 - INFO - codeparrot_training - Step 356: {'lr': 8.875e-05, 'samples': 11392, 'steps': 355, 'loss/train': 9.634346961975098} +01/22/2022 22:44:16 - INFO - codeparrot_training - Step 357: {'lr': 8.9e-05, 'samples': 11424, 'steps': 356, 'loss/train': 8.842891693115234} +01/22/2022 22:44:19 - INFO - codeparrot_training - Step 358: {'lr': 8.925e-05, 'samples': 11456, 'steps': 357, 'loss/train': 9.488931655883789} +01/22/2022 22:44:20 - INFO - codeparrot_training - Step 359: {'lr': 8.95e-05, 'samples': 11488, 'steps': 358, 'loss/train': 9.761804580688477} +01/22/2022 22:44:21 - INFO - codeparrot_training - Step 360: {'lr': 8.975e-05, 'samples': 11520, 'steps': 359, 'loss/train': 9.22692584991455} +01/22/2022 22:44:21 - INFO - codeparrot_training - Step 361: {'lr': 8.999999999999999e-05, 'samples': 11552, 'steps': 360, 'loss/train': 8.214913368225098} +01/22/2022 22:44:22 - INFO - codeparrot_training - Step 362: {'lr': 9.025e-05, 'samples': 11584, 'steps': 361, 'loss/train': 8.524460792541504} +01/22/2022 22:44:22 - INFO - codeparrot_training - Step 363: {'lr': 9.05e-05, 'samples': 11616, 'steps': 362, 'loss/train': 9.048952102661133} +01/22/2022 22:44:23 - INFO - codeparrot_training - Step 364: {'lr': 9.075e-05, 'samples': 11648, 'steps': 363, 'loss/train': 7.8582563400268555} +01/22/2022 22:44:23 - INFO - codeparrot_training - Step 365: {'lr': 9.1e-05, 'samples': 11680, 'steps': 364, 'loss/train': 8.339914321899414} +01/22/2022 22:44:24 - INFO - codeparrot_training - Step 366: {'lr': 9.125e-05, 'samples': 11712, 'steps': 365, 'loss/train': 9.306173324584961} +01/22/2022 22:44:24 - INFO - codeparrot_training - Step 367: {'lr': 9.15e-05, 'samples': 11744, 'steps': 366, 'loss/train': 9.273667335510254} +01/22/2022 22:44:25 - INFO - codeparrot_training - Step 368: {'lr': 9.175e-05, 'samples': 11776, 'steps': 367, 'loss/train': 9.50926399230957} +01/22/2022 22:44:26 - INFO - codeparrot_training - Step 369: {'lr': 9.2e-05, 'samples': 11808, 'steps': 368, 'loss/train': 9.51335620880127} +01/22/2022 22:44:26 - INFO - codeparrot_training - Step 370: {'lr': 9.225e-05, 'samples': 11840, 'steps': 369, 'loss/train': 10.568428993225098} +01/22/2022 22:44:27 - INFO - codeparrot_training - Step 371: {'lr': 9.25e-05, 'samples': 11872, 'steps': 370, 'loss/train': 9.282797813415527} +01/22/2022 22:44:27 - INFO - codeparrot_training - Step 372: {'lr': 9.275e-05, 'samples': 11904, 'steps': 371, 'loss/train': 10.185258865356445} +01/22/2022 22:44:28 - INFO - codeparrot_training - Step 373: {'lr': 9.3e-05, 'samples': 11936, 'steps': 372, 'loss/train': 6.881534099578857} +01/22/2022 22:44:28 - INFO - codeparrot_training - Step 374: {'lr': 9.325e-05, 'samples': 11968, 'steps': 373, 'loss/train': 9.733763694763184} +01/22/2022 22:44:29 - INFO - codeparrot_training - Step 375: {'lr': 9.35e-05, 'samples': 12000, 'steps': 374, 'loss/train': 9.779559135437012} +01/22/2022 22:44:29 - INFO - codeparrot_training - Step 376: {'lr': 9.375e-05, 'samples': 12032, 'steps': 375, 'loss/train': 9.361427307128906} +01/22/2022 22:44:30 - INFO - codeparrot_training - Step 377: {'lr': 9.400000000000001e-05, 'samples': 12064, 'steps': 376, 'loss/train': 9.382140159606934} +01/22/2022 22:44:31 - INFO - codeparrot_training - Step 378: {'lr': 9.425e-05, 'samples': 12096, 'steps': 377, 'loss/train': 8.066976547241211} +01/22/2022 22:44:31 - INFO - codeparrot_training - Step 379: {'lr': 9.45e-05, 'samples': 12128, 'steps': 378, 'loss/train': 9.354551315307617} +01/22/2022 22:44:32 - INFO - codeparrot_training - Step 380: {'lr': 9.475e-05, 'samples': 12160, 'steps': 379, 'loss/train': 9.55402660369873} +01/22/2022 22:44:32 - INFO - codeparrot_training - Step 381: {'lr': 9.5e-05, 'samples': 12192, 'steps': 380, 'loss/train': 8.380102157592773} +01/22/2022 22:44:33 - INFO - codeparrot_training - Step 382: {'lr': 9.525e-05, 'samples': 12224, 'steps': 381, 'loss/train': 9.471182823181152} +01/22/2022 22:44:33 - INFO - codeparrot_training - Step 383: {'lr': 9.55e-05, 'samples': 12256, 'steps': 382, 'loss/train': 9.27988338470459} +01/22/2022 22:44:34 - INFO - codeparrot_training - Step 384: {'lr': 9.575000000000001e-05, 'samples': 12288, 'steps': 383, 'loss/train': 9.013681411743164} +01/22/2022 22:44:34 - INFO - codeparrot_training - Step 385: {'lr': 9.6e-05, 'samples': 12320, 'steps': 384, 'loss/train': 9.043734550476074} +01/22/2022 22:44:35 - INFO - codeparrot_training - Step 386: {'lr': 9.625000000000001e-05, 'samples': 12352, 'steps': 385, 'loss/train': 8.61291217803955} +01/22/2022 22:44:37 - INFO - codeparrot_training - Step 387: {'lr': 9.65e-05, 'samples': 12384, 'steps': 386, 'loss/train': 8.42471694946289} +01/22/2022 22:44:37 - INFO - codeparrot_training - Step 388: {'lr': 9.675000000000001e-05, 'samples': 12416, 'steps': 387, 'loss/train': 9.901338577270508} +01/22/2022 22:44:38 - INFO - codeparrot_training - Step 389: {'lr': 9.7e-05, 'samples': 12448, 'steps': 388, 'loss/train': 9.008298873901367} +01/22/2022 22:44:39 - INFO - codeparrot_training - Step 390: {'lr': 9.725e-05, 'samples': 12480, 'steps': 389, 'loss/train': 8.439838409423828} +01/22/2022 22:44:39 - INFO - codeparrot_training - Step 391: {'lr': 9.750000000000001e-05, 'samples': 12512, 'steps': 390, 'loss/train': 9.87720775604248} +01/22/2022 22:44:40 - INFO - codeparrot_training - Step 392: {'lr': 9.775e-05, 'samples': 12544, 'steps': 391, 'loss/train': 8.37280559539795} +01/22/2022 22:44:40 - INFO - codeparrot_training - Step 393: {'lr': 9.800000000000001e-05, 'samples': 12576, 'steps': 392, 'loss/train': 8.514657020568848} +01/22/2022 22:44:41 - INFO - codeparrot_training - Step 394: {'lr': 9.825e-05, 'samples': 12608, 'steps': 393, 'loss/train': 9.109665870666504} +01/22/2022 22:44:41 - INFO - codeparrot_training - Step 395: {'lr': 9.850000000000001e-05, 'samples': 12640, 'steps': 394, 'loss/train': 8.705455780029297} +01/22/2022 22:44:42 - INFO - codeparrot_training - Step 396: {'lr': 9.875e-05, 'samples': 12672, 'steps': 395, 'loss/train': 8.0825834274292} +01/22/2022 22:44:43 - INFO - codeparrot_training - Step 397: {'lr': 9.900000000000001e-05, 'samples': 12704, 'steps': 396, 'loss/train': 7.6226348876953125} +01/22/2022 22:44:43 - INFO - codeparrot_training - Step 398: {'lr': 9.925000000000001e-05, 'samples': 12736, 'steps': 397, 'loss/train': 6.9629693031311035} +01/22/2022 22:44:44 - INFO - codeparrot_training - Step 399: {'lr': 9.95e-05, 'samples': 12768, 'steps': 398, 'loss/train': 6.666027545928955} +01/22/2022 22:44:44 - INFO - codeparrot_training - Step 400: {'lr': 9.975000000000001e-05, 'samples': 12800, 'steps': 399, 'loss/train': 6.26764440536499} +01/22/2022 22:44:45 - INFO - codeparrot_training - Step 401: {'lr': 0.0001, 'samples': 12832, 'steps': 400, 'loss/train': 6.281090259552002} +01/22/2022 22:44:45 - INFO - codeparrot_training - Step 402: {'lr': 0.00010025000000000001, 'samples': 12864, 'steps': 401, 'loss/train': 10.32182788848877} +01/22/2022 22:44:46 - INFO - codeparrot_training - Step 403: {'lr': 0.0001005, 'samples': 12896, 'steps': 402, 'loss/train': 9.899457931518555} +01/22/2022 22:44:46 - INFO - codeparrot_training - Step 404: {'lr': 0.00010075000000000001, 'samples': 12928, 'steps': 403, 'loss/train': 9.85293960571289} +01/22/2022 22:44:47 - INFO - codeparrot_training - Step 405: {'lr': 0.000101, 'samples': 12960, 'steps': 404, 'loss/train': 9.83810806274414} +01/22/2022 22:44:48 - INFO - codeparrot_training - Step 406: {'lr': 0.00010125000000000001, 'samples': 12992, 'steps': 405, 'loss/train': 9.13258171081543} +01/22/2022 22:44:48 - INFO - codeparrot_training - Step 407: {'lr': 0.00010150000000000001, 'samples': 13024, 'steps': 406, 'loss/train': 9.11043643951416} +01/22/2022 22:44:49 - INFO - codeparrot_training - Step 408: {'lr': 0.00010174999999999999, 'samples': 13056, 'steps': 407, 'loss/train': 9.016789436340332} +01/22/2022 22:44:49 - INFO - codeparrot_training - Step 409: {'lr': 0.000102, 'samples': 13088, 'steps': 408, 'loss/train': 9.978825569152832} +01/22/2022 22:44:50 - INFO - codeparrot_training - Step 410: {'lr': 0.00010224999999999999, 'samples': 13120, 'steps': 409, 'loss/train': 9.750426292419434} +01/22/2022 22:44:50 - INFO - codeparrot_training - Step 411: {'lr': 0.0001025, 'samples': 13152, 'steps': 410, 'loss/train': 9.599894523620605} +01/22/2022 22:44:51 - INFO - codeparrot_training - Step 412: {'lr': 0.00010274999999999999, 'samples': 13184, 'steps': 411, 'loss/train': 9.155754089355469} +01/22/2022 22:44:51 - INFO - codeparrot_training - Step 413: {'lr': 0.000103, 'samples': 13216, 'steps': 412, 'loss/train': 9.92332935333252} +01/22/2022 22:44:52 - INFO - codeparrot_training - Step 414: {'lr': 0.00010325, 'samples': 13248, 'steps': 413, 'loss/train': 9.223645210266113} +01/22/2022 22:44:53 - INFO - codeparrot_training - Step 415: {'lr': 0.0001035, 'samples': 13280, 'steps': 414, 'loss/train': 9.54773235321045} +01/22/2022 22:44:53 - INFO - codeparrot_training - Step 416: {'lr': 0.00010375, 'samples': 13312, 'steps': 415, 'loss/train': 9.808785438537598} +01/22/2022 22:44:54 - INFO - codeparrot_training - Step 417: {'lr': 0.000104, 'samples': 13344, 'steps': 416, 'loss/train': 9.057464599609375} +01/22/2022 22:44:54 - INFO - codeparrot_training - Step 418: {'lr': 0.00010425, 'samples': 13376, 'steps': 417, 'loss/train': 8.995979309082031} +01/22/2022 22:44:55 - INFO - codeparrot_training - Step 419: {'lr': 0.00010449999999999999, 'samples': 13408, 'steps': 418, 'loss/train': 9.098223686218262} +01/22/2022 22:44:56 - INFO - codeparrot_training - Step 420: {'lr': 0.00010475, 'samples': 13440, 'steps': 419, 'loss/train': 9.405739784240723} +01/22/2022 22:44:57 - INFO - codeparrot_training - Step 421: {'lr': 0.000105, 'samples': 13472, 'steps': 420, 'loss/train': 9.342942237854004} +01/22/2022 22:44:57 - INFO - codeparrot_training - Step 422: {'lr': 0.00010525, 'samples': 13504, 'steps': 421, 'loss/train': 9.03178882598877} +01/22/2022 22:44:58 - INFO - codeparrot_training - Step 423: {'lr': 0.0001055, 'samples': 13536, 'steps': 422, 'loss/train': 9.164657592773438} +01/22/2022 22:44:58 - INFO - codeparrot_training - Step 424: {'lr': 0.00010575, 'samples': 13568, 'steps': 423, 'loss/train': 8.96740436553955} +01/22/2022 22:44:59 - INFO - codeparrot_training - Step 425: {'lr': 0.000106, 'samples': 13600, 'steps': 424, 'loss/train': 8.899016380310059} +01/22/2022 22:44:59 - INFO - codeparrot_training - Step 426: {'lr': 0.00010625, 'samples': 13632, 'steps': 425, 'loss/train': 9.078044891357422} +01/22/2022 22:45:00 - INFO - codeparrot_training - Step 427: {'lr': 0.0001065, 'samples': 13664, 'steps': 426, 'loss/train': 9.109296798706055} +01/22/2022 22:45:01 - INFO - codeparrot_training - Step 428: {'lr': 0.00010675, 'samples': 13696, 'steps': 427, 'loss/train': 8.85823917388916} +01/22/2022 22:45:01 - INFO - codeparrot_training - Step 429: {'lr': 0.000107, 'samples': 13728, 'steps': 428, 'loss/train': 9.310163497924805} +01/22/2022 22:45:02 - INFO - codeparrot_training - Step 430: {'lr': 0.00010725, 'samples': 13760, 'steps': 429, 'loss/train': 9.050468444824219} +01/22/2022 22:45:02 - INFO - codeparrot_training - Step 431: {'lr': 0.0001075, 'samples': 13792, 'steps': 430, 'loss/train': 9.145134925842285} +01/22/2022 22:45:03 - INFO - codeparrot_training - Step 432: {'lr': 0.00010775, 'samples': 13824, 'steps': 431, 'loss/train': 9.730239868164062} +01/22/2022 22:45:03 - INFO - codeparrot_training - Step 433: {'lr': 0.000108, 'samples': 13856, 'steps': 432, 'loss/train': 8.8503999710083} +01/22/2022 22:45:04 - INFO - codeparrot_training - Step 434: {'lr': 0.00010825, 'samples': 13888, 'steps': 433, 'loss/train': 9.025632858276367} +01/22/2022 22:45:04 - INFO - codeparrot_training - Step 435: {'lr': 0.00010850000000000001, 'samples': 13920, 'steps': 434, 'loss/train': 9.001106262207031} +01/22/2022 22:45:05 - INFO - codeparrot_training - Step 436: {'lr': 0.00010875, 'samples': 13952, 'steps': 435, 'loss/train': 8.551477432250977} +01/22/2022 22:45:06 - INFO - codeparrot_training - Step 437: {'lr': 0.000109, 'samples': 13984, 'steps': 436, 'loss/train': 8.45949649810791} +01/22/2022 22:45:06 - INFO - codeparrot_training - Step 438: {'lr': 0.00010925, 'samples': 14016, 'steps': 437, 'loss/train': 6.5827131271362305} +01/22/2022 22:45:07 - INFO - codeparrot_training - Step 439: {'lr': 0.0001095, 'samples': 14048, 'steps': 438, 'loss/train': 8.996200561523438} +01/22/2022 22:45:07 - INFO - codeparrot_training - Step 440: {'lr': 0.00010975, 'samples': 14080, 'steps': 439, 'loss/train': 9.514100074768066} +01/22/2022 22:45:08 - INFO - codeparrot_training - Step 441: {'lr': 0.00011, 'samples': 14112, 'steps': 440, 'loss/train': 8.982752799987793} +01/22/2022 22:45:08 - INFO - codeparrot_training - Step 442: {'lr': 0.00011025, 'samples': 14144, 'steps': 441, 'loss/train': 8.623948097229004} +01/22/2022 22:45:09 - INFO - codeparrot_training - Step 443: {'lr': 0.0001105, 'samples': 14176, 'steps': 442, 'loss/train': 10.030566215515137} +01/22/2022 22:45:09 - INFO - codeparrot_training - Step 444: {'lr': 0.00011075000000000001, 'samples': 14208, 'steps': 443, 'loss/train': 9.69078540802002} +01/22/2022 22:45:10 - INFO - codeparrot_training - Step 445: {'lr': 0.000111, 'samples': 14240, 'steps': 444, 'loss/train': 9.865701675415039} +01/22/2022 22:45:11 - INFO - codeparrot_training - Step 446: {'lr': 0.00011125000000000001, 'samples': 14272, 'steps': 445, 'loss/train': 9.182367324829102} +01/22/2022 22:45:11 - INFO - codeparrot_training - Step 447: {'lr': 0.0001115, 'samples': 14304, 'steps': 446, 'loss/train': 9.121793746948242} +01/22/2022 22:45:12 - INFO - codeparrot_training - Step 448: {'lr': 0.00011175, 'samples': 14336, 'steps': 447, 'loss/train': 8.80112361907959} +01/22/2022 22:45:13 - INFO - codeparrot_training - Step 449: {'lr': 0.000112, 'samples': 14368, 'steps': 448, 'loss/train': 8.766927719116211} +01/22/2022 22:45:14 - INFO - codeparrot_training - Step 450: {'lr': 0.00011225, 'samples': 14400, 'steps': 449, 'loss/train': 8.817313194274902} +01/22/2022 22:45:14 - INFO - codeparrot_training - Step 451: {'lr': 0.00011250000000000001, 'samples': 14432, 'steps': 450, 'loss/train': 9.914933204650879} +01/22/2022 22:45:15 - INFO - codeparrot_training - Step 452: {'lr': 0.00011275, 'samples': 14464, 'steps': 451, 'loss/train': 9.07975959777832} +01/22/2022 22:45:16 - INFO - codeparrot_training - Step 453: {'lr': 0.00011300000000000001, 'samples': 14496, 'steps': 452, 'loss/train': 9.467864036560059} +01/22/2022 22:45:16 - INFO - codeparrot_training - Step 454: {'lr': 0.00011325, 'samples': 14528, 'steps': 453, 'loss/train': 8.90892219543457} +01/22/2022 22:45:17 - INFO - codeparrot_training - Step 455: {'lr': 0.00011350000000000001, 'samples': 14560, 'steps': 454, 'loss/train': 9.638179779052734} +01/22/2022 22:45:17 - INFO - codeparrot_training - Step 456: {'lr': 0.00011375, 'samples': 14592, 'steps': 455, 'loss/train': 9.625571250915527} +01/22/2022 22:45:18 - INFO - codeparrot_training - Step 457: {'lr': 0.000114, 'samples': 14624, 'steps': 456, 'loss/train': 8.850834846496582} +01/22/2022 22:45:18 - INFO - codeparrot_training - Step 458: {'lr': 0.00011425000000000001, 'samples': 14656, 'steps': 457, 'loss/train': 9.5346097946167} +01/22/2022 22:45:19 - INFO - codeparrot_training - Step 459: {'lr': 0.0001145, 'samples': 14688, 'steps': 458, 'loss/train': 10.135457992553711} +01/22/2022 22:45:19 - INFO - codeparrot_training - Step 460: {'lr': 0.00011475000000000001, 'samples': 14720, 'steps': 459, 'loss/train': 9.375407218933105} +01/22/2022 22:45:20 - INFO - codeparrot_training - Step 461: {'lr': 0.000115, 'samples': 14752, 'steps': 460, 'loss/train': 7.744449615478516} +01/22/2022 22:45:21 - INFO - codeparrot_training - Step 462: {'lr': 0.00011525000000000001, 'samples': 14784, 'steps': 461, 'loss/train': 6.728628158569336} +01/22/2022 22:45:21 - INFO - codeparrot_training - Step 463: {'lr': 0.0001155, 'samples': 14816, 'steps': 462, 'loss/train': 6.500629425048828} +01/22/2022 22:45:22 - INFO - codeparrot_training - Step 464: {'lr': 0.00011575000000000001, 'samples': 14848, 'steps': 463, 'loss/train': 6.486570358276367} +01/22/2022 22:45:22 - INFO - codeparrot_training - Step 465: {'lr': 0.00011600000000000001, 'samples': 14880, 'steps': 464, 'loss/train': 6.275211811065674} +01/22/2022 22:45:23 - INFO - codeparrot_training - Step 466: {'lr': 0.00011625, 'samples': 14912, 'steps': 465, 'loss/train': 5.998690605163574} +01/22/2022 22:45:23 - INFO - codeparrot_training - Step 467: {'lr': 0.00011650000000000001, 'samples': 14944, 'steps': 466, 'loss/train': 5.715348243713379} +01/22/2022 22:45:24 - INFO - codeparrot_training - Step 468: {'lr': 0.00011675, 'samples': 14976, 'steps': 467, 'loss/train': 9.931135177612305} +01/22/2022 22:45:24 - INFO - codeparrot_training - Step 469: {'lr': 0.00011700000000000001, 'samples': 15008, 'steps': 468, 'loss/train': 9.42446517944336} +01/22/2022 22:45:25 - INFO - codeparrot_training - Step 470: {'lr': 0.00011724999999999999, 'samples': 15040, 'steps': 469, 'loss/train': 9.053323745727539} +01/22/2022 22:45:26 - INFO - codeparrot_training - Step 471: {'lr': 0.0001175, 'samples': 15072, 'steps': 470, 'loss/train': 8.81923770904541} +01/22/2022 22:45:26 - INFO - codeparrot_training - Step 472: {'lr': 0.00011775, 'samples': 15104, 'steps': 471, 'loss/train': 8.76329231262207} +01/22/2022 22:45:27 - INFO - codeparrot_training - Step 473: {'lr': 0.000118, 'samples': 15136, 'steps': 472, 'loss/train': 8.543845176696777} +01/22/2022 22:45:27 - INFO - codeparrot_training - Step 474: {'lr': 0.00011825, 'samples': 15168, 'steps': 473, 'loss/train': 8.385889053344727} +01/22/2022 22:45:28 - INFO - codeparrot_training - Step 475: {'lr': 0.0001185, 'samples': 15200, 'steps': 474, 'loss/train': 8.304553031921387} +01/22/2022 22:45:28 - INFO - codeparrot_training - Step 476: {'lr': 0.00011875, 'samples': 15232, 'steps': 475, 'loss/train': 8.611750602722168} +01/22/2022 22:45:29 - INFO - codeparrot_training - Step 477: {'lr': 0.00011899999999999999, 'samples': 15264, 'steps': 476, 'loss/train': 9.036888122558594} +01/22/2022 22:45:30 - INFO - codeparrot_training - Step 478: {'lr': 0.00011925, 'samples': 15296, 'steps': 477, 'loss/train': 9.64372444152832} +01/22/2022 22:45:30 - INFO - codeparrot_training - Step 479: {'lr': 0.00011949999999999999, 'samples': 15328, 'steps': 478, 'loss/train': 9.356858253479004} +01/22/2022 22:45:31 - INFO - codeparrot_training - Step 480: {'lr': 0.00011975, 'samples': 15360, 'steps': 479, 'loss/train': 7.59367036819458} +01/22/2022 22:45:31 - INFO - codeparrot_training - Step 481: {'lr': 0.00012, 'samples': 15392, 'steps': 480, 'loss/train': 8.854754447937012} +01/22/2022 22:45:32 - INFO - codeparrot_training - Step 482: {'lr': 0.00012025, 'samples': 15424, 'steps': 481, 'loss/train': 9.106698989868164} +01/22/2022 22:45:33 - INFO - codeparrot_training - Step 483: {'lr': 0.0001205, 'samples': 15456, 'steps': 482, 'loss/train': 8.269722938537598} +01/22/2022 22:45:34 - INFO - codeparrot_training - Step 484: {'lr': 0.00012075, 'samples': 15488, 'steps': 483, 'loss/train': 8.980502128601074} +01/22/2022 22:45:34 - INFO - codeparrot_training - Step 485: {'lr': 0.000121, 'samples': 15520, 'steps': 484, 'loss/train': 8.660039901733398} +01/22/2022 22:45:35 - INFO - codeparrot_training - Step 486: {'lr': 0.00012124999999999999, 'samples': 15552, 'steps': 485, 'loss/train': 8.077055931091309} +01/22/2022 22:45:35 - INFO - codeparrot_training - Step 487: {'lr': 0.0001215, 'samples': 15584, 'steps': 486, 'loss/train': 8.922719955444336} +01/22/2022 22:45:36 - INFO - codeparrot_training - Step 488: {'lr': 0.00012175, 'samples': 15616, 'steps': 487, 'loss/train': 8.924331665039062} +01/22/2022 22:45:36 - INFO - codeparrot_training - Step 489: {'lr': 0.000122, 'samples': 15648, 'steps': 488, 'loss/train': 9.062881469726562} +01/22/2022 22:45:37 - INFO - codeparrot_training - Step 490: {'lr': 0.00012225, 'samples': 15680, 'steps': 489, 'loss/train': 7.25378942489624} +01/22/2022 22:45:37 - INFO - codeparrot_training - Step 491: {'lr': 0.0001225, 'samples': 15712, 'steps': 490, 'loss/train': 9.203317642211914} +01/22/2022 22:45:38 - INFO - codeparrot_training - Step 492: {'lr': 0.00012275, 'samples': 15744, 'steps': 491, 'loss/train': 8.729555130004883} +01/22/2022 22:45:39 - INFO - codeparrot_training - Step 493: {'lr': 0.000123, 'samples': 15776, 'steps': 492, 'loss/train': 8.802947998046875} +01/22/2022 22:45:39 - INFO - codeparrot_training - Step 494: {'lr': 0.00012325000000000001, 'samples': 15808, 'steps': 493, 'loss/train': 9.886625289916992} +01/22/2022 22:45:40 - INFO - codeparrot_training - Step 495: {'lr': 0.0001235, 'samples': 15840, 'steps': 494, 'loss/train': 10.218600273132324} +01/22/2022 22:45:40 - INFO - codeparrot_training - Step 496: {'lr': 0.00012375, 'samples': 15872, 'steps': 495, 'loss/train': 10.283507347106934} +01/22/2022 22:45:41 - INFO - codeparrot_training - Step 497: {'lr': 0.000124, 'samples': 15904, 'steps': 496, 'loss/train': 8.869685173034668} +01/22/2022 22:45:41 - INFO - codeparrot_training - Step 498: {'lr': 0.00012425, 'samples': 15936, 'steps': 497, 'loss/train': 8.874066352844238} +01/22/2022 22:45:42 - INFO - codeparrot_training - Step 499: {'lr': 0.0001245, 'samples': 15968, 'steps': 498, 'loss/train': 8.979452133178711} +01/22/2022 22:45:43 - INFO - codeparrot_training - Step 500: {'lr': 0.00012475, 'samples': 16000, 'steps': 499, 'loss/train': 9.157403945922852} +01/22/2022 22:45:43 - INFO - codeparrot_training - Step 501: {'lr': 0.000125, 'samples': 16032, 'steps': 500, 'loss/train': 8.585611343383789} +01/22/2022 22:45:44 - INFO - codeparrot_training - Step 502: {'lr': 0.00012525, 'samples': 16064, 'steps': 501, 'loss/train': 9.333965301513672} +01/22/2022 22:45:44 - INFO - codeparrot_training - Step 503: {'lr': 0.00012550000000000001, 'samples': 16096, 'steps': 502, 'loss/train': 9.030278205871582} +01/22/2022 22:45:45 - INFO - codeparrot_training - Step 504: {'lr': 0.00012575, 'samples': 16128, 'steps': 503, 'loss/train': 8.657073020935059} +01/22/2022 22:45:45 - INFO - codeparrot_training - Step 505: {'lr': 0.000126, 'samples': 16160, 'steps': 504, 'loss/train': 9.493157386779785} +01/22/2022 22:45:46 - INFO - codeparrot_training - Step 506: {'lr': 0.00012625, 'samples': 16192, 'steps': 505, 'loss/train': 8.344677925109863} +01/22/2022 22:45:46 - INFO - codeparrot_training - Step 507: {'lr': 0.0001265, 'samples': 16224, 'steps': 506, 'loss/train': 8.895200729370117} +01/22/2022 22:45:47 - INFO - codeparrot_training - Step 508: {'lr': 0.00012675, 'samples': 16256, 'steps': 507, 'loss/train': 8.582348823547363} +01/22/2022 22:45:48 - INFO - codeparrot_training - Step 509: {'lr': 0.000127, 'samples': 16288, 'steps': 508, 'loss/train': 8.595846176147461} +01/22/2022 22:45:48 - INFO - codeparrot_training - Step 510: {'lr': 0.00012725, 'samples': 16320, 'steps': 509, 'loss/train': 8.209686279296875} +01/22/2022 22:45:49 - INFO - codeparrot_training - Step 511: {'lr': 0.0001275, 'samples': 16352, 'steps': 510, 'loss/train': 8.473603248596191} +01/22/2022 22:45:52 - INFO - codeparrot_training - Step 512: {'lr': 0.00012775000000000002, 'samples': 16384, 'steps': 511, 'loss/train': 9.2072114944458} +01/22/2022 22:45:52 - INFO - codeparrot_training - Step 513: {'lr': 0.000128, 'samples': 16416, 'steps': 512, 'loss/train': 8.846043586730957} +01/22/2022 22:45:53 - INFO - codeparrot_training - Step 514: {'lr': 0.00012825, 'samples': 16448, 'steps': 513, 'loss/train': 8.850171089172363} +01/22/2022 22:45:53 - INFO - codeparrot_training - Step 515: {'lr': 0.0001285, 'samples': 16480, 'steps': 514, 'loss/train': 7.458377838134766} +01/22/2022 22:45:54 - INFO - codeparrot_training - Step 516: {'lr': 0.00012875, 'samples': 16512, 'steps': 515, 'loss/train': 7.9562764167785645} +01/22/2022 22:45:54 - INFO - codeparrot_training - Step 517: {'lr': 0.00012900000000000002, 'samples': 16544, 'steps': 516, 'loss/train': 8.915765762329102} +01/22/2022 22:45:55 - INFO - codeparrot_training - Step 518: {'lr': 0.00012925, 'samples': 16576, 'steps': 517, 'loss/train': 9.018022537231445} +01/22/2022 22:45:56 - INFO - codeparrot_training - Step 519: {'lr': 0.0001295, 'samples': 16608, 'steps': 518, 'loss/train': 8.567914962768555} +01/22/2022 22:45:56 - INFO - codeparrot_training - Step 520: {'lr': 0.00012975, 'samples': 16640, 'steps': 519, 'loss/train': 8.849289894104004} +01/22/2022 22:45:57 - INFO - codeparrot_training - Step 521: {'lr': 0.00013000000000000002, 'samples': 16672, 'steps': 520, 'loss/train': 9.94747543334961} +01/22/2022 22:45:57 - INFO - codeparrot_training - Step 522: {'lr': 0.00013025, 'samples': 16704, 'steps': 521, 'loss/train': 8.800361633300781} +01/22/2022 22:45:58 - INFO - codeparrot_training - Step 523: {'lr': 0.0001305, 'samples': 16736, 'steps': 522, 'loss/train': 9.005666732788086} +01/22/2022 22:45:58 - INFO - codeparrot_training - Step 524: {'lr': 0.00013075, 'samples': 16768, 'steps': 523, 'loss/train': 8.843351364135742} +01/22/2022 22:45:59 - INFO - codeparrot_training - Step 525: {'lr': 0.000131, 'samples': 16800, 'steps': 524, 'loss/train': 8.632165908813477} +01/22/2022 22:45:59 - INFO - codeparrot_training - Step 526: {'lr': 0.00013125000000000002, 'samples': 16832, 'steps': 525, 'loss/train': 9.653810501098633} +01/22/2022 22:46:00 - INFO - codeparrot_training - Step 527: {'lr': 0.0001315, 'samples': 16864, 'steps': 526, 'loss/train': 8.50106430053711} +01/22/2022 22:46:01 - INFO - codeparrot_training - Step 528: {'lr': 0.00013175, 'samples': 16896, 'steps': 527, 'loss/train': 8.970911979675293} +01/22/2022 22:46:01 - INFO - codeparrot_training - Step 529: {'lr': 0.000132, 'samples': 16928, 'steps': 528, 'loss/train': 10.680617332458496} +01/22/2022 22:46:02 - INFO - codeparrot_training - Step 530: {'lr': 0.00013225000000000002, 'samples': 16960, 'steps': 529, 'loss/train': 8.8971529006958} +01/22/2022 22:46:02 - INFO - codeparrot_training - Step 531: {'lr': 0.00013250000000000002, 'samples': 16992, 'steps': 530, 'loss/train': 9.048765182495117} +01/22/2022 22:46:03 - INFO - codeparrot_training - Step 532: {'lr': 0.00013275, 'samples': 17024, 'steps': 531, 'loss/train': 7.736001014709473} +01/22/2022 22:46:03 - INFO - codeparrot_training - Step 533: {'lr': 0.000133, 'samples': 17056, 'steps': 532, 'loss/train': 9.1373872756958} +01/22/2022 22:46:04 - INFO - codeparrot_training - Step 534: {'lr': 0.00013325, 'samples': 17088, 'steps': 533, 'loss/train': 8.606627464294434} +01/22/2022 22:46:05 - INFO - codeparrot_training - Step 535: {'lr': 0.00013350000000000002, 'samples': 17120, 'steps': 534, 'loss/train': 8.313236236572266} +01/22/2022 22:46:05 - INFO - codeparrot_training - Step 536: {'lr': 0.00013375, 'samples': 17152, 'steps': 535, 'loss/train': 8.64844799041748} +01/22/2022 22:46:06 - INFO - codeparrot_training - Step 537: {'lr': 0.000134, 'samples': 17184, 'steps': 536, 'loss/train': 9.012542724609375} +01/22/2022 22:46:06 - INFO - codeparrot_training - Step 538: {'lr': 0.00013425, 'samples': 17216, 'steps': 537, 'loss/train': 9.534067153930664} +01/22/2022 22:46:07 - INFO - codeparrot_training - Step 539: {'lr': 0.00013450000000000002, 'samples': 17248, 'steps': 538, 'loss/train': 8.352346420288086} +01/22/2022 22:46:07 - INFO - codeparrot_training - Step 540: {'lr': 0.00013475000000000002, 'samples': 17280, 'steps': 539, 'loss/train': 8.925926208496094} +01/22/2022 22:46:08 - INFO - codeparrot_training - Step 541: {'lr': 0.000135, 'samples': 17312, 'steps': 540, 'loss/train': 8.605809211730957} +01/22/2022 22:46:09 - INFO - codeparrot_training - Step 542: {'lr': 0.00013525, 'samples': 17344, 'steps': 541, 'loss/train': 8.95393180847168} +01/22/2022 22:46:10 - INFO - codeparrot_training - Step 543: {'lr': 0.00013550000000000001, 'samples': 17376, 'steps': 542, 'loss/train': 9.068395614624023} +01/22/2022 22:46:10 - INFO - codeparrot_training - Step 544: {'lr': 0.00013575000000000002, 'samples': 17408, 'steps': 543, 'loss/train': 7.747339725494385} +01/22/2022 22:46:11 - INFO - codeparrot_training - Step 545: {'lr': 0.00013600000000000003, 'samples': 17440, 'steps': 544, 'loss/train': 8.863734245300293} +01/22/2022 22:46:11 - INFO - codeparrot_training - Step 546: {'lr': 0.00013625, 'samples': 17472, 'steps': 545, 'loss/train': 8.376053810119629} +01/22/2022 22:46:12 - INFO - codeparrot_training - Step 547: {'lr': 0.0001365, 'samples': 17504, 'steps': 546, 'loss/train': 8.383845329284668} +01/22/2022 22:46:12 - INFO - codeparrot_training - Step 548: {'lr': 0.00013675000000000002, 'samples': 17536, 'steps': 547, 'loss/train': 8.682369232177734} +01/22/2022 22:46:13 - INFO - codeparrot_training - Step 549: {'lr': 0.00013700000000000002, 'samples': 17568, 'steps': 548, 'loss/train': 9.090871810913086} +01/22/2022 22:46:14 - INFO - codeparrot_training - Step 550: {'lr': 0.00013725, 'samples': 17600, 'steps': 549, 'loss/train': 8.21212387084961} +01/22/2022 22:46:14 - INFO - codeparrot_training - Step 551: {'lr': 0.0001375, 'samples': 17632, 'steps': 550, 'loss/train': 8.604456901550293} +01/22/2022 22:46:15 - INFO - codeparrot_training - Step 552: {'lr': 0.00013775000000000001, 'samples': 17664, 'steps': 551, 'loss/train': 10.368521690368652} +01/22/2022 22:46:15 - INFO - codeparrot_training - Step 553: {'lr': 0.00013800000000000002, 'samples': 17696, 'steps': 552, 'loss/train': 9.160516738891602} +01/22/2022 22:46:16 - INFO - codeparrot_training - Step 554: {'lr': 0.00013825000000000003, 'samples': 17728, 'steps': 553, 'loss/train': 8.63400936126709} +01/22/2022 22:46:16 - INFO - codeparrot_training - Step 555: {'lr': 0.0001385, 'samples': 17760, 'steps': 554, 'loss/train': 9.302752494812012} +01/22/2022 22:46:17 - INFO - codeparrot_training - Step 556: {'lr': 0.00013875, 'samples': 17792, 'steps': 555, 'loss/train': 8.544466018676758} +01/22/2022 22:46:17 - INFO - codeparrot_training - Step 557: {'lr': 0.00013900000000000002, 'samples': 17824, 'steps': 556, 'loss/train': 7.935085296630859} +01/22/2022 22:46:18 - INFO - codeparrot_training - Step 558: {'lr': 0.00013925000000000002, 'samples': 17856, 'steps': 557, 'loss/train': 8.536758422851562} +01/22/2022 22:46:19 - INFO - codeparrot_training - Step 559: {'lr': 0.0001395, 'samples': 17888, 'steps': 558, 'loss/train': 9.011187553405762} +01/22/2022 22:46:19 - INFO - codeparrot_training - Step 560: {'lr': 0.00013975, 'samples': 17920, 'steps': 559, 'loss/train': 8.32465648651123} +01/22/2022 22:46:20 - INFO - codeparrot_training - Step 561: {'lr': 0.00014000000000000001, 'samples': 17952, 'steps': 560, 'loss/train': 7.893373489379883} +01/22/2022 22:46:20 - INFO - codeparrot_training - Step 562: {'lr': 0.00014025000000000002, 'samples': 17984, 'steps': 561, 'loss/train': 8.895259857177734} +01/22/2022 22:46:21 - INFO - codeparrot_training - Step 563: {'lr': 0.00014050000000000003, 'samples': 18016, 'steps': 562, 'loss/train': 9.190135955810547} +01/22/2022 22:46:21 - INFO - codeparrot_training - Step 564: {'lr': 0.00014074999999999998, 'samples': 18048, 'steps': 563, 'loss/train': 8.743096351623535} +01/22/2022 22:46:22 - INFO - codeparrot_training - Step 565: {'lr': 0.00014099999999999998, 'samples': 18080, 'steps': 564, 'loss/train': 8.110854148864746} +01/22/2022 22:46:22 - INFO - codeparrot_training - Step 566: {'lr': 0.00014125, 'samples': 18112, 'steps': 565, 'loss/train': 8.208645820617676} +01/22/2022 22:46:23 - INFO - codeparrot_training - Step 567: {'lr': 0.0001415, 'samples': 18144, 'steps': 566, 'loss/train': 7.868147373199463} +01/22/2022 22:46:24 - INFO - codeparrot_training - Step 568: {'lr': 0.00014175, 'samples': 18176, 'steps': 567, 'loss/train': 8.895848274230957} +01/22/2022 22:46:24 - INFO - codeparrot_training - Step 569: {'lr': 0.00014199999999999998, 'samples': 18208, 'steps': 568, 'loss/train': 10.179607391357422} +01/22/2022 22:46:25 - INFO - codeparrot_training - Step 570: {'lr': 0.00014225, 'samples': 18240, 'steps': 569, 'loss/train': 7.680943965911865} +01/22/2022 22:46:26 - INFO - codeparrot_training - Step 571: {'lr': 0.0001425, 'samples': 18272, 'steps': 570, 'loss/train': 8.630720138549805} +01/22/2022 22:46:26 - INFO - codeparrot_training - Step 572: {'lr': 0.00014275, 'samples': 18304, 'steps': 571, 'loss/train': 9.298431396484375} +01/22/2022 22:46:27 - INFO - codeparrot_training - Step 573: {'lr': 0.00014299999999999998, 'samples': 18336, 'steps': 572, 'loss/train': 9.179400444030762} +01/22/2022 22:46:28 - INFO - codeparrot_training - Step 574: {'lr': 0.00014324999999999999, 'samples': 18368, 'steps': 573, 'loss/train': 8.075512886047363} +01/22/2022 22:46:28 - INFO - codeparrot_training - Step 575: {'lr': 0.0001435, 'samples': 18400, 'steps': 574, 'loss/train': 9.000823020935059} +01/22/2022 22:46:29 - INFO - codeparrot_training - Step 576: {'lr': 0.00014375, 'samples': 18432, 'steps': 575, 'loss/train': 8.721220970153809} +01/22/2022 22:46:29 - INFO - codeparrot_training - Step 577: {'lr': 0.000144, 'samples': 18464, 'steps': 576, 'loss/train': 8.355218887329102} +01/22/2022 22:46:30 - INFO - codeparrot_training - Step 578: {'lr': 0.00014424999999999998, 'samples': 18496, 'steps': 577, 'loss/train': 9.233628273010254} +01/22/2022 22:46:30 - INFO - codeparrot_training - Step 579: {'lr': 0.0001445, 'samples': 18528, 'steps': 578, 'loss/train': 8.857300758361816} +01/22/2022 22:46:31 - INFO - codeparrot_training - Step 580: {'lr': 0.00014475, 'samples': 18560, 'steps': 579, 'loss/train': 8.9879789352417} +01/22/2022 22:46:32 - INFO - codeparrot_training - Step 581: {'lr': 0.000145, 'samples': 18592, 'steps': 580, 'loss/train': 9.19580364227295} +01/22/2022 22:46:32 - INFO - codeparrot_training - Step 582: {'lr': 0.00014524999999999998, 'samples': 18624, 'steps': 581, 'loss/train': 9.583305358886719} +01/22/2022 22:46:33 - INFO - codeparrot_training - Step 583: {'lr': 0.00014549999999999999, 'samples': 18656, 'steps': 582, 'loss/train': 8.67888355255127} +01/22/2022 22:46:33 - INFO - codeparrot_training - Step 584: {'lr': 0.00014575, 'samples': 18688, 'steps': 583, 'loss/train': 9.553257942199707} +01/22/2022 22:46:34 - INFO - codeparrot_training - Step 585: {'lr': 0.000146, 'samples': 18720, 'steps': 584, 'loss/train': 8.946166038513184} +01/22/2022 22:46:34 - INFO - codeparrot_training - Step 586: {'lr': 0.00014625, 'samples': 18752, 'steps': 585, 'loss/train': 9.009350776672363} +01/22/2022 22:46:35 - INFO - codeparrot_training - Step 587: {'lr': 0.00014649999999999998, 'samples': 18784, 'steps': 586, 'loss/train': 7.606625556945801} +01/22/2022 22:46:35 - INFO - codeparrot_training - Step 588: {'lr': 0.00014675, 'samples': 18816, 'steps': 587, 'loss/train': 9.259263038635254} +01/22/2022 22:46:36 - INFO - codeparrot_training - Step 589: {'lr': 0.000147, 'samples': 18848, 'steps': 588, 'loss/train': 8.20702838897705} +01/22/2022 22:46:37 - INFO - codeparrot_training - Step 590: {'lr': 0.00014725, 'samples': 18880, 'steps': 589, 'loss/train': 8.947803497314453} +01/22/2022 22:46:37 - INFO - codeparrot_training - Step 591: {'lr': 0.0001475, 'samples': 18912, 'steps': 590, 'loss/train': 9.17077350616455} +01/22/2022 22:46:38 - INFO - codeparrot_training - Step 592: {'lr': 0.00014774999999999999, 'samples': 18944, 'steps': 591, 'loss/train': 8.733564376831055} +01/22/2022 22:46:38 - INFO - codeparrot_training - Step 593: {'lr': 0.000148, 'samples': 18976, 'steps': 592, 'loss/train': 8.538811683654785} +01/22/2022 22:46:39 - INFO - codeparrot_training - Step 594: {'lr': 0.00014825, 'samples': 19008, 'steps': 593, 'loss/train': 8.598627090454102} +01/22/2022 22:46:39 - INFO - codeparrot_training - Step 595: {'lr': 0.0001485, 'samples': 19040, 'steps': 594, 'loss/train': 9.143169403076172} +01/22/2022 22:46:40 - INFO - codeparrot_training - Step 596: {'lr': 0.00014874999999999998, 'samples': 19072, 'steps': 595, 'loss/train': 8.927964210510254} +01/22/2022 22:46:40 - INFO - codeparrot_training - Step 597: {'lr': 0.000149, 'samples': 19104, 'steps': 596, 'loss/train': 8.64260196685791} +01/22/2022 22:46:41 - INFO - codeparrot_training - Step 598: {'lr': 0.00014925, 'samples': 19136, 'steps': 597, 'loss/train': 9.214986801147461} +01/22/2022 22:46:42 - INFO - codeparrot_training - Step 599: {'lr': 0.0001495, 'samples': 19168, 'steps': 598, 'loss/train': 8.284449577331543} +01/22/2022 22:46:43 - INFO - codeparrot_training - Step 600: {'lr': 0.00014975, 'samples': 19200, 'steps': 599, 'loss/train': 8.507474899291992} +01/22/2022 22:46:43 - INFO - codeparrot_training - Step 601: {'lr': 0.00015, 'samples': 19232, 'steps': 600, 'loss/train': 7.847227096557617} +01/22/2022 22:46:44 - INFO - codeparrot_training - Step 602: {'lr': 0.00015025, 'samples': 19264, 'steps': 601, 'loss/train': 8.651098251342773} +01/22/2022 22:46:45 - INFO - codeparrot_training - Step 603: {'lr': 0.0001505, 'samples': 19296, 'steps': 602, 'loss/train': 8.48475170135498} +01/22/2022 22:46:45 - INFO - codeparrot_training - Step 604: {'lr': 0.00015075, 'samples': 19328, 'steps': 603, 'loss/train': 9.089558601379395} +01/22/2022 22:46:46 - INFO - codeparrot_training - Step 605: {'lr': 0.000151, 'samples': 19360, 'steps': 604, 'loss/train': 8.474851608276367} +01/22/2022 22:46:46 - INFO - codeparrot_training - Step 606: {'lr': 0.00015125, 'samples': 19392, 'steps': 605, 'loss/train': 7.753115653991699} +01/22/2022 22:46:47 - INFO - codeparrot_training - Step 607: {'lr': 0.0001515, 'samples': 19424, 'steps': 606, 'loss/train': 8.794160842895508} +01/22/2022 22:46:47 - INFO - codeparrot_training - Step 608: {'lr': 0.00015175, 'samples': 19456, 'steps': 607, 'loss/train': 8.319067001342773} +01/22/2022 22:46:48 - INFO - codeparrot_training - Step 609: {'lr': 0.000152, 'samples': 19488, 'steps': 608, 'loss/train': 8.9052734375} +01/22/2022 22:46:48 - INFO - codeparrot_training - Step 610: {'lr': 0.00015225, 'samples': 19520, 'steps': 609, 'loss/train': 9.292723655700684} +01/22/2022 22:46:49 - INFO - codeparrot_training - Step 611: {'lr': 0.0001525, 'samples': 19552, 'steps': 610, 'loss/train': 8.452613830566406} +01/22/2022 22:46:50 - INFO - codeparrot_training - Step 612: {'lr': 0.00015275, 'samples': 19584, 'steps': 611, 'loss/train': 9.566993713378906} +01/22/2022 22:46:50 - INFO - codeparrot_training - Step 613: {'lr': 0.000153, 'samples': 19616, 'steps': 612, 'loss/train': 8.784796714782715} +01/22/2022 22:46:51 - INFO - codeparrot_training - Step 614: {'lr': 0.00015325, 'samples': 19648, 'steps': 613, 'loss/train': 8.738357543945312} +01/22/2022 22:46:51 - INFO - codeparrot_training - Step 615: {'lr': 0.0001535, 'samples': 19680, 'steps': 614, 'loss/train': 9.165543556213379} +01/22/2022 22:46:52 - INFO - codeparrot_training - Step 616: {'lr': 0.00015375, 'samples': 19712, 'steps': 615, 'loss/train': 8.56166934967041} +01/22/2022 22:46:52 - INFO - codeparrot_training - Step 617: {'lr': 0.000154, 'samples': 19744, 'steps': 616, 'loss/train': 8.901839256286621} +01/22/2022 22:46:53 - INFO - codeparrot_training - Step 618: {'lr': 0.00015425, 'samples': 19776, 'steps': 617, 'loss/train': 8.226439476013184} +01/22/2022 22:46:54 - INFO - codeparrot_training - Step 619: {'lr': 0.00015450000000000001, 'samples': 19808, 'steps': 618, 'loss/train': 8.988242149353027} +01/22/2022 22:46:54 - INFO - codeparrot_training - Step 620: {'lr': 0.00015475, 'samples': 19840, 'steps': 619, 'loss/train': 8.727021217346191} +01/22/2022 22:46:55 - INFO - codeparrot_training - Step 621: {'lr': 0.000155, 'samples': 19872, 'steps': 620, 'loss/train': 9.28014087677002} +01/22/2022 22:46:55 - INFO - codeparrot_training - Step 622: {'lr': 0.00015525, 'samples': 19904, 'steps': 621, 'loss/train': 8.216615676879883} +01/22/2022 22:46:56 - INFO - codeparrot_training - Step 623: {'lr': 0.0001555, 'samples': 19936, 'steps': 622, 'loss/train': 7.989234924316406} +01/22/2022 22:46:56 - INFO - codeparrot_training - Step 624: {'lr': 0.00015575, 'samples': 19968, 'steps': 623, 'loss/train': 8.75947093963623} +01/22/2022 22:46:57 - INFO - codeparrot_training - Step 625: {'lr': 0.000156, 'samples': 20000, 'steps': 624, 'loss/train': 9.297712326049805} +01/22/2022 22:46:57 - INFO - codeparrot_training - Step 626: {'lr': 0.00015625, 'samples': 20032, 'steps': 625, 'loss/train': 9.574373245239258} +01/22/2022 22:46:58 - INFO - codeparrot_training - Step 627: {'lr': 0.0001565, 'samples': 20064, 'steps': 626, 'loss/train': 9.598540306091309} +01/22/2022 22:46:59 - INFO - codeparrot_training - Step 628: {'lr': 0.00015675000000000002, 'samples': 20096, 'steps': 627, 'loss/train': 8.21660041809082} +01/22/2022 22:47:00 - INFO - codeparrot_training - Step 629: {'lr': 0.000157, 'samples': 20128, 'steps': 628, 'loss/train': 9.178356170654297} +01/22/2022 22:47:00 - INFO - codeparrot_training - Step 630: {'lr': 0.00015725, 'samples': 20160, 'steps': 629, 'loss/train': 8.880914688110352} +01/22/2022 22:47:01 - INFO - codeparrot_training - Step 631: {'lr': 0.0001575, 'samples': 20192, 'steps': 630, 'loss/train': 8.316930770874023} +01/22/2022 22:47:01 - INFO - codeparrot_training - Step 632: {'lr': 0.00015775, 'samples': 20224, 'steps': 631, 'loss/train': 9.126971244812012} +01/22/2022 22:47:02 - INFO - codeparrot_training - Step 633: {'lr': 0.000158, 'samples': 20256, 'steps': 632, 'loss/train': 8.887096405029297} +01/22/2022 22:47:03 - INFO - codeparrot_training - Step 634: {'lr': 0.00015825, 'samples': 20288, 'steps': 633, 'loss/train': 8.101481437683105} +01/22/2022 22:47:03 - INFO - codeparrot_training - Step 635: {'lr': 0.0001585, 'samples': 20320, 'steps': 634, 'loss/train': 9.019575119018555} +01/22/2022 22:47:04 - INFO - codeparrot_training - Step 636: {'lr': 0.00015875, 'samples': 20352, 'steps': 635, 'loss/train': 8.489371299743652} +01/22/2022 22:47:04 - INFO - codeparrot_training - Step 637: {'lr': 0.00015900000000000002, 'samples': 20384, 'steps': 636, 'loss/train': 8.396371841430664} +01/22/2022 22:47:05 - INFO - codeparrot_training - Step 638: {'lr': 0.00015925, 'samples': 20416, 'steps': 637, 'loss/train': 9.01747989654541} +01/22/2022 22:47:05 - INFO - codeparrot_training - Step 639: {'lr': 0.0001595, 'samples': 20448, 'steps': 638, 'loss/train': 8.526130676269531} +01/22/2022 22:47:06 - INFO - codeparrot_training - Step 640: {'lr': 0.00015975, 'samples': 20480, 'steps': 639, 'loss/train': 8.46207046508789} +01/22/2022 22:47:06 - INFO - codeparrot_training - Step 641: {'lr': 0.00016, 'samples': 20512, 'steps': 640, 'loss/train': 8.978306770324707} +01/22/2022 22:47:07 - INFO - codeparrot_training - Step 642: {'lr': 0.00016025000000000002, 'samples': 20544, 'steps': 641, 'loss/train': 8.609273910522461} +01/22/2022 22:47:08 - INFO - codeparrot_training - Step 643: {'lr': 0.0001605, 'samples': 20576, 'steps': 642, 'loss/train': 8.930930137634277} +01/22/2022 22:47:08 - INFO - codeparrot_training - Step 644: {'lr': 0.00016075, 'samples': 20608, 'steps': 643, 'loss/train': 8.576926231384277} +01/22/2022 22:47:09 - INFO - codeparrot_training - Step 645: {'lr': 0.000161, 'samples': 20640, 'steps': 644, 'loss/train': 8.135324478149414} +01/22/2022 22:47:09 - INFO - codeparrot_training - Step 646: {'lr': 0.00016125000000000002, 'samples': 20672, 'steps': 645, 'loss/train': 8.536359786987305} +01/22/2022 22:47:10 - INFO - codeparrot_training - Step 647: {'lr': 0.0001615, 'samples': 20704, 'steps': 646, 'loss/train': 7.634723663330078} +01/22/2022 22:47:10 - INFO - codeparrot_training - Step 648: {'lr': 0.00016175, 'samples': 20736, 'steps': 647, 'loss/train': 7.74436092376709} +01/22/2022 22:47:11 - INFO - codeparrot_training - Step 649: {'lr': 0.000162, 'samples': 20768, 'steps': 648, 'loss/train': 8.55435848236084} +01/22/2022 22:47:11 - INFO - codeparrot_training - Step 650: {'lr': 0.00016225000000000001, 'samples': 20800, 'steps': 649, 'loss/train': 7.529900074005127} +01/22/2022 22:47:12 - INFO - codeparrot_training - Step 651: {'lr': 0.00016250000000000002, 'samples': 20832, 'steps': 650, 'loss/train': 8.24134635925293} +01/22/2022 22:47:13 - INFO - codeparrot_training - Step 652: {'lr': 0.00016275, 'samples': 20864, 'steps': 651, 'loss/train': 7.325622081756592} +01/22/2022 22:47:13 - INFO - codeparrot_training - Step 653: {'lr': 0.000163, 'samples': 20896, 'steps': 652, 'loss/train': 8.55883502960205} +01/22/2022 22:47:14 - INFO - codeparrot_training - Step 654: {'lr': 0.00016325, 'samples': 20928, 'steps': 653, 'loss/train': 7.811609745025635} +01/22/2022 22:47:14 - INFO - codeparrot_training - Step 655: {'lr': 0.00016350000000000002, 'samples': 20960, 'steps': 654, 'loss/train': 8.723841667175293} +01/22/2022 22:47:15 - INFO - codeparrot_training - Step 656: {'lr': 0.00016375000000000002, 'samples': 20992, 'steps': 655, 'loss/train': 8.020588874816895} +01/22/2022 22:47:15 - INFO - codeparrot_training - Step 657: {'lr': 0.000164, 'samples': 21024, 'steps': 656, 'loss/train': 8.746264457702637} +01/22/2022 22:47:16 - INFO - codeparrot_training - Step 658: {'lr': 0.00016425, 'samples': 21056, 'steps': 657, 'loss/train': 8.722439765930176} +01/22/2022 22:47:17 - INFO - codeparrot_training - Step 659: {'lr': 0.00016450000000000001, 'samples': 21088, 'steps': 658, 'loss/train': 8.892648696899414} +01/22/2022 22:47:18 - INFO - codeparrot_training - Step 660: {'lr': 0.00016475000000000002, 'samples': 21120, 'steps': 659, 'loss/train': 9.136396408081055} +01/22/2022 22:47:18 - INFO - codeparrot_training - Step 661: {'lr': 0.000165, 'samples': 21152, 'steps': 660, 'loss/train': 7.521387100219727} +01/22/2022 22:47:19 - INFO - codeparrot_training - Step 662: {'lr': 0.00016525, 'samples': 21184, 'steps': 661, 'loss/train': 8.302600860595703} +01/22/2022 22:47:19 - INFO - codeparrot_training - Step 663: {'lr': 0.0001655, 'samples': 21216, 'steps': 662, 'loss/train': 8.900461196899414} +01/22/2022 22:47:20 - INFO - codeparrot_training - Step 664: {'lr': 0.00016575000000000002, 'samples': 21248, 'steps': 663, 'loss/train': 8.034031867980957} +01/22/2022 22:47:21 - INFO - codeparrot_training - Step 665: {'lr': 0.00016600000000000002, 'samples': 21280, 'steps': 664, 'loss/train': 7.954836368560791} +01/22/2022 22:47:21 - INFO - codeparrot_training - Step 666: {'lr': 0.00016625, 'samples': 21312, 'steps': 665, 'loss/train': 8.113659858703613} +01/22/2022 22:47:22 - INFO - codeparrot_training - Step 667: {'lr': 0.0001665, 'samples': 21344, 'steps': 666, 'loss/train': 7.33713436126709} +01/22/2022 22:47:22 - INFO - codeparrot_training - Step 668: {'lr': 0.00016675000000000001, 'samples': 21376, 'steps': 667, 'loss/train': 6.9350481033325195} +01/22/2022 22:47:23 - INFO - codeparrot_training - Step 669: {'lr': 0.00016700000000000002, 'samples': 21408, 'steps': 668, 'loss/train': 8.277999877929688} +01/22/2022 22:47:23 - INFO - codeparrot_training - Step 670: {'lr': 0.00016725000000000003, 'samples': 21440, 'steps': 669, 'loss/train': 8.604161262512207} +01/22/2022 22:47:24 - INFO - codeparrot_training - Step 671: {'lr': 0.0001675, 'samples': 21472, 'steps': 670, 'loss/train': 8.73328685760498} +01/22/2022 22:47:24 - INFO - codeparrot_training - Step 672: {'lr': 0.00016775, 'samples': 21504, 'steps': 671, 'loss/train': 8.066764831542969} +01/22/2022 22:47:25 - INFO - codeparrot_training - Step 673: {'lr': 0.00016800000000000002, 'samples': 21536, 'steps': 672, 'loss/train': 8.643463134765625} +01/22/2022 22:47:26 - INFO - codeparrot_training - Step 674: {'lr': 0.00016825000000000002, 'samples': 21568, 'steps': 673, 'loss/train': 8.642475128173828} +01/22/2022 22:47:26 - INFO - codeparrot_training - Step 675: {'lr': 0.0001685, 'samples': 21600, 'steps': 674, 'loss/train': 9.051352500915527} +01/22/2022 22:47:27 - INFO - codeparrot_training - Step 676: {'lr': 0.00016875, 'samples': 21632, 'steps': 675, 'loss/train': 9.236879348754883} +01/22/2022 22:47:27 - INFO - codeparrot_training - Step 677: {'lr': 0.00016900000000000002, 'samples': 21664, 'steps': 676, 'loss/train': 9.195563316345215} +01/22/2022 22:47:28 - INFO - codeparrot_training - Step 678: {'lr': 0.00016925000000000002, 'samples': 21696, 'steps': 677, 'loss/train': 8.284234046936035} +01/22/2022 22:47:28 - INFO - codeparrot_training - Step 679: {'lr': 0.00016950000000000003, 'samples': 21728, 'steps': 678, 'loss/train': 8.515478134155273} +01/22/2022 22:47:29 - INFO - codeparrot_training - Step 680: {'lr': 0.00016975, 'samples': 21760, 'steps': 679, 'loss/train': 8.081453323364258} +01/22/2022 22:47:29 - INFO - codeparrot_training - Step 681: {'lr': 0.00017, 'samples': 21792, 'steps': 680, 'loss/train': 7.837827205657959} +01/22/2022 22:47:30 - INFO - codeparrot_training - Step 682: {'lr': 0.00017025000000000002, 'samples': 21824, 'steps': 681, 'loss/train': 8.54543399810791} +01/22/2022 22:47:31 - INFO - codeparrot_training - Step 683: {'lr': 0.00017050000000000002, 'samples': 21856, 'steps': 682, 'loss/train': 8.504234313964844} +01/22/2022 22:47:31 - INFO - codeparrot_training - Step 684: {'lr': 0.00017075, 'samples': 21888, 'steps': 683, 'loss/train': 8.811345100402832} +01/22/2022 22:47:32 - INFO - codeparrot_training - Step 685: {'lr': 0.000171, 'samples': 21920, 'steps': 684, 'loss/train': 8.69779109954834} +01/22/2022 22:47:32 - INFO - codeparrot_training - Step 686: {'lr': 0.00017125000000000002, 'samples': 21952, 'steps': 685, 'loss/train': 8.908151626586914} +01/22/2022 22:47:33 - INFO - codeparrot_training - Step 687: {'lr': 0.00017150000000000002, 'samples': 21984, 'steps': 686, 'loss/train': 7.853282928466797} +01/22/2022 22:47:34 - INFO - codeparrot_training - Step 688: {'lr': 0.00017175000000000003, 'samples': 22016, 'steps': 687, 'loss/train': 8.45693302154541} +01/22/2022 22:47:35 - INFO - codeparrot_training - Step 689: {'lr': 0.00017199999999999998, 'samples': 22048, 'steps': 688, 'loss/train': 8.471305847167969} +01/22/2022 22:47:35 - INFO - codeparrot_training - Step 690: {'lr': 0.00017224999999999999, 'samples': 22080, 'steps': 689, 'loss/train': 8.895808219909668} +01/22/2022 22:47:36 - INFO - codeparrot_training - Step 691: {'lr': 0.0001725, 'samples': 22112, 'steps': 690, 'loss/train': 8.234037399291992} +01/22/2022 22:47:36 - INFO - codeparrot_training - Step 692: {'lr': 0.00017275, 'samples': 22144, 'steps': 691, 'loss/train': 8.049099922180176} +01/22/2022 22:47:37 - INFO - codeparrot_training - Step 693: {'lr': 0.000173, 'samples': 22176, 'steps': 692, 'loss/train': 8.761587142944336} +01/22/2022 22:47:37 - INFO - codeparrot_training - Step 694: {'lr': 0.00017324999999999998, 'samples': 22208, 'steps': 693, 'loss/train': 8.79301929473877} +01/22/2022 22:47:38 - INFO - codeparrot_training - Step 695: {'lr': 0.0001735, 'samples': 22240, 'steps': 694, 'loss/train': 7.816861152648926} +01/22/2022 22:47:39 - INFO - codeparrot_training - Step 696: {'lr': 0.00017375, 'samples': 22272, 'steps': 695, 'loss/train': 8.739291191101074} +01/22/2022 22:47:39 - INFO - codeparrot_training - Step 697: {'lr': 0.000174, 'samples': 22304, 'steps': 696, 'loss/train': 8.698533058166504} +01/22/2022 22:47:40 - INFO - codeparrot_training - Step 698: {'lr': 0.00017424999999999998, 'samples': 22336, 'steps': 697, 'loss/train': 8.766914367675781} +01/22/2022 22:47:40 - INFO - codeparrot_training - Step 699: {'lr': 0.00017449999999999999, 'samples': 22368, 'steps': 698, 'loss/train': 9.046239852905273} +01/22/2022 22:47:41 - INFO - codeparrot_training - Step 700: {'lr': 0.00017475, 'samples': 22400, 'steps': 699, 'loss/train': 8.984891891479492} +01/22/2022 22:47:41 - INFO - codeparrot_training - Step 701: {'lr': 0.000175, 'samples': 22432, 'steps': 700, 'loss/train': 8.794310569763184} +01/22/2022 22:47:42 - INFO - codeparrot_training - Step 702: {'lr': 0.00017525, 'samples': 22464, 'steps': 701, 'loss/train': 8.64378833770752} +01/22/2022 22:47:42 - INFO - codeparrot_training - Step 703: {'lr': 0.00017549999999999998, 'samples': 22496, 'steps': 702, 'loss/train': 8.487848281860352} +01/22/2022 22:47:43 - INFO - codeparrot_training - Step 704: {'lr': 0.00017575, 'samples': 22528, 'steps': 703, 'loss/train': 8.213703155517578} +01/22/2022 22:47:44 - INFO - codeparrot_training - Step 705: {'lr': 0.000176, 'samples': 22560, 'steps': 704, 'loss/train': 7.726264953613281} +01/22/2022 22:47:44 - INFO - codeparrot_training - Step 706: {'lr': 0.00017625, 'samples': 22592, 'steps': 705, 'loss/train': 8.055606842041016} +01/22/2022 22:47:45 - INFO - codeparrot_training - Step 707: {'lr': 0.00017649999999999998, 'samples': 22624, 'steps': 706, 'loss/train': 8.836901664733887} +01/22/2022 22:47:45 - INFO - codeparrot_training - Step 708: {'lr': 0.00017675, 'samples': 22656, 'steps': 707, 'loss/train': 8.435892105102539} +01/22/2022 22:47:46 - INFO - codeparrot_training - Step 709: {'lr': 0.000177, 'samples': 22688, 'steps': 708, 'loss/train': 7.9386305809021} +01/22/2022 22:47:46 - INFO - codeparrot_training - Step 710: {'lr': 0.00017725, 'samples': 22720, 'steps': 709, 'loss/train': 8.353105545043945} +01/22/2022 22:47:47 - INFO - codeparrot_training - Step 711: {'lr': 0.0001775, 'samples': 22752, 'steps': 710, 'loss/train': 8.209373474121094} +01/22/2022 22:47:48 - INFO - codeparrot_training - Step 712: {'lr': 0.00017774999999999998, 'samples': 22784, 'steps': 711, 'loss/train': 8.086353302001953} +01/22/2022 22:47:48 - INFO - codeparrot_training - Step 713: {'lr': 0.000178, 'samples': 22816, 'steps': 712, 'loss/train': 8.580016136169434} +01/22/2022 22:47:49 - INFO - codeparrot_training - Step 714: {'lr': 0.00017825, 'samples': 22848, 'steps': 713, 'loss/train': 8.425539016723633} +01/22/2022 22:47:49 - INFO - codeparrot_training - Step 715: {'lr': 0.0001785, 'samples': 22880, 'steps': 714, 'loss/train': 9.281387329101562} +01/22/2022 22:47:50 - INFO - codeparrot_training - Step 716: {'lr': 0.00017875, 'samples': 22912, 'steps': 715, 'loss/train': 7.845706462860107} +01/22/2022 22:47:51 - INFO - codeparrot_training - Step 717: {'lr': 0.000179, 'samples': 22944, 'steps': 716, 'loss/train': 8.166410446166992} +01/22/2022 22:47:52 - INFO - codeparrot_training - Step 718: {'lr': 0.00017925, 'samples': 22976, 'steps': 717, 'loss/train': 8.163521766662598} +01/22/2022 22:47:52 - INFO - codeparrot_training - Step 719: {'lr': 0.0001795, 'samples': 23008, 'steps': 718, 'loss/train': 8.79107666015625} +01/22/2022 22:47:53 - INFO - codeparrot_training - Step 720: {'lr': 0.00017975, 'samples': 23040, 'steps': 719, 'loss/train': 8.143933296203613} +01/22/2022 22:47:53 - INFO - codeparrot_training - Step 721: {'lr': 0.00017999999999999998, 'samples': 23072, 'steps': 720, 'loss/train': 8.425100326538086} +01/22/2022 22:47:54 - INFO - codeparrot_training - Step 722: {'lr': 0.00018025, 'samples': 23104, 'steps': 721, 'loss/train': 8.590532302856445} +01/22/2022 22:47:54 - INFO - codeparrot_training - Step 723: {'lr': 0.0001805, 'samples': 23136, 'steps': 722, 'loss/train': 9.175063133239746} +01/22/2022 22:47:55 - INFO - codeparrot_training - Step 724: {'lr': 0.00018075, 'samples': 23168, 'steps': 723, 'loss/train': 7.791674613952637} +01/22/2022 22:47:55 - INFO - codeparrot_training - Step 725: {'lr': 0.000181, 'samples': 23200, 'steps': 724, 'loss/train': 8.273862838745117} +01/22/2022 22:47:56 - INFO - codeparrot_training - Step 726: {'lr': 0.00018125, 'samples': 23232, 'steps': 725, 'loss/train': 8.916443824768066} +01/22/2022 22:47:57 - INFO - codeparrot_training - Step 727: {'lr': 0.0001815, 'samples': 23264, 'steps': 726, 'loss/train': 7.991215229034424} +01/22/2022 22:47:57 - INFO - codeparrot_training - Step 728: {'lr': 0.00018175, 'samples': 23296, 'steps': 727, 'loss/train': 8.772823333740234} +01/22/2022 22:47:58 - INFO - codeparrot_training - Step 729: {'lr': 0.000182, 'samples': 23328, 'steps': 728, 'loss/train': 7.785933494567871} +01/22/2022 22:47:58 - INFO - codeparrot_training - Step 730: {'lr': 0.00018225, 'samples': 23360, 'steps': 729, 'loss/train': 8.237862586975098} +01/22/2022 22:47:59 - INFO - codeparrot_training - Step 731: {'lr': 0.0001825, 'samples': 23392, 'steps': 730, 'loss/train': 7.969146728515625} +01/22/2022 22:47:59 - INFO - codeparrot_training - Step 732: {'lr': 0.00018275, 'samples': 23424, 'steps': 731, 'loss/train': 8.233857154846191} +01/22/2022 22:48:00 - INFO - codeparrot_training - Step 733: {'lr': 0.000183, 'samples': 23456, 'steps': 732, 'loss/train': 8.427638053894043} +01/22/2022 22:48:00 - INFO - codeparrot_training - Step 734: {'lr': 0.00018325, 'samples': 23488, 'steps': 733, 'loss/train': 7.5296406745910645} +01/22/2022 22:48:01 - INFO - codeparrot_training - Step 735: {'lr': 0.0001835, 'samples': 23520, 'steps': 734, 'loss/train': 8.418582916259766} +01/22/2022 22:48:02 - INFO - codeparrot_training - Step 736: {'lr': 0.00018375, 'samples': 23552, 'steps': 735, 'loss/train': 8.433479309082031} +01/22/2022 22:48:02 - INFO - codeparrot_training - Step 737: {'lr': 0.000184, 'samples': 23584, 'steps': 736, 'loss/train': 8.52504825592041} +01/22/2022 22:48:03 - INFO - codeparrot_training - Step 738: {'lr': 0.00018425, 'samples': 23616, 'steps': 737, 'loss/train': 8.468026161193848} +01/22/2022 22:48:03 - INFO - codeparrot_training - Step 739: {'lr': 0.0001845, 'samples': 23648, 'steps': 738, 'loss/train': 7.804992198944092} +01/22/2022 22:48:04 - INFO - codeparrot_training - Step 740: {'lr': 0.00018475, 'samples': 23680, 'steps': 739, 'loss/train': 8.4063720703125} +01/22/2022 22:48:04 - INFO - codeparrot_training - Step 741: {'lr': 0.000185, 'samples': 23712, 'steps': 740, 'loss/train': 9.1447172164917} +01/22/2022 22:48:05 - INFO - codeparrot_training - Step 742: {'lr': 0.00018525, 'samples': 23744, 'steps': 741, 'loss/train': 8.391322135925293} +01/22/2022 22:48:05 - INFO - codeparrot_training - Step 743: {'lr': 0.0001855, 'samples': 23776, 'steps': 742, 'loss/train': 7.451840877532959} +01/22/2022 22:48:06 - INFO - codeparrot_training - Step 744: {'lr': 0.00018575000000000002, 'samples': 23808, 'steps': 743, 'loss/train': 8.738476753234863} +01/22/2022 22:48:07 - INFO - codeparrot_training - Step 745: {'lr': 0.000186, 'samples': 23840, 'steps': 744, 'loss/train': 8.635151863098145} +01/22/2022 22:48:07 - INFO - codeparrot_training - Step 746: {'lr': 0.00018625, 'samples': 23872, 'steps': 745, 'loss/train': 8.462993621826172} +01/22/2022 22:48:08 - INFO - codeparrot_training - Step 747: {'lr': 0.0001865, 'samples': 23904, 'steps': 746, 'loss/train': 8.06332778930664} +01/22/2022 22:48:09 - INFO - codeparrot_training - Step 748: {'lr': 0.00018675, 'samples': 23936, 'steps': 747, 'loss/train': 8.015939712524414} +01/22/2022 22:48:09 - INFO - codeparrot_training - Step 749: {'lr': 0.000187, 'samples': 23968, 'steps': 748, 'loss/train': 7.541342735290527} +01/22/2022 22:48:10 - INFO - codeparrot_training - Step 750: {'lr': 0.00018725, 'samples': 24000, 'steps': 749, 'loss/train': 9.913533210754395} +01/22/2022 22:48:11 - INFO - codeparrot_training - Step 751: {'lr': 0.0001875, 'samples': 24032, 'steps': 750, 'loss/train': 7.240062236785889} +01/22/2022 22:48:11 - INFO - codeparrot_training - Step 752: {'lr': 0.00018775, 'samples': 24064, 'steps': 751, 'loss/train': 7.960046768188477} +01/22/2022 22:48:12 - INFO - codeparrot_training - Step 753: {'lr': 0.00018800000000000002, 'samples': 24096, 'steps': 752, 'loss/train': 8.63418960571289} +01/22/2022 22:48:12 - INFO - codeparrot_training - Step 754: {'lr': 0.00018825, 'samples': 24128, 'steps': 753, 'loss/train': 8.398453712463379} +01/22/2022 22:48:13 - INFO - codeparrot_training - Step 755: {'lr': 0.0001885, 'samples': 24160, 'steps': 754, 'loss/train': 8.076301574707031} +01/22/2022 22:48:13 - INFO - codeparrot_training - Step 756: {'lr': 0.00018875, 'samples': 24192, 'steps': 755, 'loss/train': 7.729125499725342} +01/22/2022 22:48:14 - INFO - codeparrot_training - Step 757: {'lr': 0.000189, 'samples': 24224, 'steps': 756, 'loss/train': 8.521883964538574} +01/22/2022 22:48:14 - INFO - codeparrot_training - Step 758: {'lr': 0.00018925, 'samples': 24256, 'steps': 757, 'loss/train': 8.222210884094238} +01/22/2022 22:48:15 - INFO - codeparrot_training - Step 759: {'lr': 0.0001895, 'samples': 24288, 'steps': 758, 'loss/train': 6.709630489349365} +01/22/2022 22:48:16 - INFO - codeparrot_training - Step 760: {'lr': 0.00018975, 'samples': 24320, 'steps': 759, 'loss/train': 8.420241355895996} +01/22/2022 22:48:16 - INFO - codeparrot_training - Step 761: {'lr': 0.00019, 'samples': 24352, 'steps': 760, 'loss/train': 7.877121448516846} +01/22/2022 22:48:17 - INFO - codeparrot_training - Step 762: {'lr': 0.00019025000000000002, 'samples': 24384, 'steps': 761, 'loss/train': 8.085004806518555} +01/22/2022 22:48:17 - INFO - codeparrot_training - Step 763: {'lr': 0.0001905, 'samples': 24416, 'steps': 762, 'loss/train': 7.42026424407959} +01/22/2022 22:48:18 - INFO - codeparrot_training - Step 764: {'lr': 0.00019075, 'samples': 24448, 'steps': 763, 'loss/train': 7.304914951324463} +01/22/2022 22:48:18 - INFO - codeparrot_training - Step 765: {'lr': 0.000191, 'samples': 24480, 'steps': 764, 'loss/train': 8.047215461730957} +01/22/2022 22:48:19 - INFO - codeparrot_training - Step 766: {'lr': 0.00019125000000000001, 'samples': 24512, 'steps': 765, 'loss/train': 8.698212623596191} +01/22/2022 22:48:19 - INFO - codeparrot_training - Step 767: {'lr': 0.00019150000000000002, 'samples': 24544, 'steps': 766, 'loss/train': 8.314916610717773} +01/22/2022 22:48:20 - INFO - codeparrot_training - Step 768: {'lr': 0.00019175, 'samples': 24576, 'steps': 767, 'loss/train': 7.095439910888672} +01/22/2022 22:48:21 - INFO - codeparrot_training - Step 769: {'lr': 0.000192, 'samples': 24608, 'steps': 768, 'loss/train': 8.379477500915527} +01/22/2022 22:48:21 - INFO - codeparrot_training - Step 770: {'lr': 0.00019225, 'samples': 24640, 'steps': 769, 'loss/train': 7.863307476043701} +01/22/2022 22:48:22 - INFO - codeparrot_training - Step 771: {'lr': 0.00019250000000000002, 'samples': 24672, 'steps': 770, 'loss/train': 8.67959213256836} +01/22/2022 22:48:22 - INFO - codeparrot_training - Step 772: {'lr': 0.00019275, 'samples': 24704, 'steps': 771, 'loss/train': 9.661703109741211} +01/22/2022 22:48:23 - INFO - codeparrot_training - Step 773: {'lr': 0.000193, 'samples': 24736, 'steps': 772, 'loss/train': 8.130134582519531} +01/22/2022 22:48:23 - INFO - codeparrot_training - Step 774: {'lr': 0.00019325, 'samples': 24768, 'steps': 773, 'loss/train': 8.722664833068848} +01/22/2022 22:48:24 - INFO - codeparrot_training - Step 775: {'lr': 0.00019350000000000001, 'samples': 24800, 'steps': 774, 'loss/train': 7.9335198402404785} +01/22/2022 22:48:24 - INFO - codeparrot_training - Step 776: {'lr': 0.00019375000000000002, 'samples': 24832, 'steps': 775, 'loss/train': 8.45226764678955} +01/22/2022 22:48:27 - INFO - codeparrot_training - Step 777: {'lr': 0.000194, 'samples': 24864, 'steps': 776, 'loss/train': 8.205399513244629} +01/22/2022 22:48:28 - INFO - codeparrot_training - Step 778: {'lr': 0.00019425, 'samples': 24896, 'steps': 777, 'loss/train': 7.814345836639404} +01/22/2022 22:48:29 - INFO - codeparrot_training - Step 779: {'lr': 0.0001945, 'samples': 24928, 'steps': 778, 'loss/train': 8.168481826782227} +01/22/2022 22:48:29 - INFO - codeparrot_training - Step 780: {'lr': 0.00019475000000000002, 'samples': 24960, 'steps': 779, 'loss/train': 8.238773345947266} +01/22/2022 22:48:30 - INFO - codeparrot_training - Step 781: {'lr': 0.00019500000000000002, 'samples': 24992, 'steps': 780, 'loss/train': 8.2846040725708} +01/22/2022 22:48:30 - INFO - codeparrot_training - Step 782: {'lr': 0.00019525, 'samples': 25024, 'steps': 781, 'loss/train': 8.735726356506348} +01/22/2022 22:48:31 - INFO - codeparrot_training - Step 783: {'lr': 0.0001955, 'samples': 25056, 'steps': 782, 'loss/train': 8.603111267089844} +01/22/2022 22:48:31 - INFO - codeparrot_training - Step 784: {'lr': 0.00019575000000000001, 'samples': 25088, 'steps': 783, 'loss/train': 8.510063171386719} +01/22/2022 22:48:32 - INFO - codeparrot_training - Step 785: {'lr': 0.00019600000000000002, 'samples': 25120, 'steps': 784, 'loss/train': 7.865845203399658} +01/22/2022 22:48:32 - INFO - codeparrot_training - Step 786: {'lr': 0.00019625, 'samples': 25152, 'steps': 785, 'loss/train': 7.925961971282959} +01/22/2022 22:48:33 - INFO - codeparrot_training - Step 787: {'lr': 0.0001965, 'samples': 25184, 'steps': 786, 'loss/train': 8.220290184020996} +01/22/2022 22:48:34 - INFO - codeparrot_training - Step 788: {'lr': 0.00019675, 'samples': 25216, 'steps': 787, 'loss/train': 7.931642055511475} +01/22/2022 22:48:34 - INFO - codeparrot_training - Step 789: {'lr': 0.00019700000000000002, 'samples': 25248, 'steps': 788, 'loss/train': 8.149664878845215} +01/22/2022 22:48:35 - INFO - codeparrot_training - Step 790: {'lr': 0.00019725000000000002, 'samples': 25280, 'steps': 789, 'loss/train': 8.43366813659668} +01/22/2022 22:48:35 - INFO - codeparrot_training - Step 791: {'lr': 0.0001975, 'samples': 25312, 'steps': 790, 'loss/train': 8.780027389526367} +01/22/2022 22:48:36 - INFO - codeparrot_training - Step 792: {'lr': 0.00019775, 'samples': 25344, 'steps': 791, 'loss/train': 8.392693519592285} +01/22/2022 22:48:36 - INFO - codeparrot_training - Step 793: {'lr': 0.00019800000000000002, 'samples': 25376, 'steps': 792, 'loss/train': 7.864264965057373} +01/22/2022 22:48:37 - INFO - codeparrot_training - Step 794: {'lr': 0.00019825000000000002, 'samples': 25408, 'steps': 793, 'loss/train': 7.918691635131836} +01/22/2022 22:48:37 - INFO - codeparrot_training - Step 795: {'lr': 0.00019850000000000003, 'samples': 25440, 'steps': 794, 'loss/train': 8.213133811950684} +01/22/2022 22:48:38 - INFO - codeparrot_training - Step 796: {'lr': 0.00019875, 'samples': 25472, 'steps': 795, 'loss/train': 8.449883460998535} +01/22/2022 22:48:39 - INFO - codeparrot_training - Step 797: {'lr': 0.000199, 'samples': 25504, 'steps': 796, 'loss/train': 8.09809398651123} +01/22/2022 22:48:39 - INFO - codeparrot_training - Step 798: {'lr': 0.00019925000000000002, 'samples': 25536, 'steps': 797, 'loss/train': 7.556297302246094} +01/22/2022 22:48:40 - INFO - codeparrot_training - Step 799: {'lr': 0.00019950000000000002, 'samples': 25568, 'steps': 798, 'loss/train': 7.947111129760742} +01/22/2022 22:48:40 - INFO - codeparrot_training - Step 800: {'lr': 0.00019975, 'samples': 25600, 'steps': 799, 'loss/train': 8.315369606018066} +01/22/2022 22:48:41 - INFO - codeparrot_training - Step 801: {'lr': 0.0002, 'samples': 25632, 'steps': 800, 'loss/train': 8.229379653930664} +01/22/2022 22:48:41 - INFO - codeparrot_training - Step 802: {'lr': 0.00020025000000000002, 'samples': 25664, 'steps': 801, 'loss/train': 7.795218467712402} +01/22/2022 22:48:42 - INFO - codeparrot_training - Step 803: {'lr': 0.00020050000000000002, 'samples': 25696, 'steps': 802, 'loss/train': 7.00067138671875} +01/22/2022 22:48:43 - INFO - codeparrot_training - Step 804: {'lr': 0.00020075000000000003, 'samples': 25728, 'steps': 803, 'loss/train': 8.232527732849121} +01/22/2022 22:48:43 - INFO - codeparrot_training - Step 805: {'lr': 0.000201, 'samples': 25760, 'steps': 804, 'loss/train': 6.494942665100098} +01/22/2022 22:48:44 - INFO - codeparrot_training - Step 806: {'lr': 0.00020125, 'samples': 25792, 'steps': 805, 'loss/train': 8.411247253417969} +01/22/2022 22:48:45 - INFO - codeparrot_training - Step 807: {'lr': 0.00020150000000000002, 'samples': 25824, 'steps': 806, 'loss/train': 9.05556583404541} +01/22/2022 22:48:45 - INFO - codeparrot_training - Step 808: {'lr': 0.00020175000000000003, 'samples': 25856, 'steps': 807, 'loss/train': 8.395228385925293} +01/22/2022 22:48:46 - INFO - codeparrot_training - Step 809: {'lr': 0.000202, 'samples': 25888, 'steps': 808, 'loss/train': 7.660242557525635} +01/22/2022 22:48:47 - INFO - codeparrot_training - Step 810: {'lr': 0.00020225, 'samples': 25920, 'steps': 809, 'loss/train': 7.957713603973389} +01/22/2022 22:48:47 - INFO - codeparrot_training - Step 811: {'lr': 0.00020250000000000002, 'samples': 25952, 'steps': 810, 'loss/train': 8.158524513244629} +01/22/2022 22:48:48 - INFO - codeparrot_training - Step 812: {'lr': 0.00020275000000000002, 'samples': 25984, 'steps': 811, 'loss/train': 8.736252784729004} +01/22/2022 22:48:48 - INFO - codeparrot_training - Step 813: {'lr': 0.00020300000000000003, 'samples': 26016, 'steps': 812, 'loss/train': 8.11170768737793} +01/22/2022 22:48:49 - INFO - codeparrot_training - Step 814: {'lr': 0.00020324999999999998, 'samples': 26048, 'steps': 813, 'loss/train': 7.472760200500488} +01/22/2022 22:48:49 - INFO - codeparrot_training - Step 815: {'lr': 0.00020349999999999999, 'samples': 26080, 'steps': 814, 'loss/train': 8.540314674377441} +01/22/2022 22:48:50 - INFO - codeparrot_training - Step 816: {'lr': 0.00020375, 'samples': 26112, 'steps': 815, 'loss/train': 8.067387580871582} +01/22/2022 22:48:50 - INFO - codeparrot_training - Step 817: {'lr': 0.000204, 'samples': 26144, 'steps': 816, 'loss/train': 8.299606323242188} +01/22/2022 22:48:51 - INFO - codeparrot_training - Step 818: {'lr': 0.00020425, 'samples': 26176, 'steps': 817, 'loss/train': 7.566084861755371} +01/22/2022 22:48:52 - INFO - codeparrot_training - Step 819: {'lr': 0.00020449999999999998, 'samples': 26208, 'steps': 818, 'loss/train': 8.03598690032959} +01/22/2022 22:48:52 - INFO - codeparrot_training - Step 820: {'lr': 0.00020475, 'samples': 26240, 'steps': 819, 'loss/train': 7.425419330596924} +01/22/2022 22:48:53 - INFO - codeparrot_training - Step 821: {'lr': 0.000205, 'samples': 26272, 'steps': 820, 'loss/train': 8.37061882019043} +01/22/2022 22:48:53 - INFO - codeparrot_training - Step 822: {'lr': 0.00020525, 'samples': 26304, 'steps': 821, 'loss/train': 7.775050640106201} +01/22/2022 22:48:54 - INFO - codeparrot_training - Step 823: {'lr': 0.00020549999999999998, 'samples': 26336, 'steps': 822, 'loss/train': 7.552173614501953} +01/22/2022 22:48:54 - INFO - codeparrot_training - Step 824: {'lr': 0.00020575, 'samples': 26368, 'steps': 823, 'loss/train': 8.989880561828613} +01/22/2022 22:48:55 - INFO - codeparrot_training - Step 825: {'lr': 0.000206, 'samples': 26400, 'steps': 824, 'loss/train': 8.644359588623047} +01/22/2022 22:48:55 - INFO - codeparrot_training - Step 826: {'lr': 0.00020625, 'samples': 26432, 'steps': 825, 'loss/train': 9.764077186584473} +01/22/2022 22:48:56 - INFO - codeparrot_training - Step 827: {'lr': 0.0002065, 'samples': 26464, 'steps': 826, 'loss/train': 8.787796974182129} +01/22/2022 22:48:57 - INFO - codeparrot_training - Step 828: {'lr': 0.00020674999999999998, 'samples': 26496, 'steps': 827, 'loss/train': 8.281824111938477} +01/22/2022 22:48:57 - INFO - codeparrot_training - Step 829: {'lr': 0.000207, 'samples': 26528, 'steps': 828, 'loss/train': 8.366814613342285} +01/22/2022 22:48:58 - INFO - codeparrot_training - Step 830: {'lr': 0.00020725, 'samples': 26560, 'steps': 829, 'loss/train': 7.688086032867432} +01/22/2022 22:48:58 - INFO - codeparrot_training - Step 831: {'lr': 0.0002075, 'samples': 26592, 'steps': 830, 'loss/train': 8.138201713562012} +01/22/2022 22:48:59 - INFO - codeparrot_training - Step 832: {'lr': 0.00020774999999999998, 'samples': 26624, 'steps': 831, 'loss/train': 8.522978782653809} +01/22/2022 22:48:59 - INFO - codeparrot_training - Step 833: {'lr': 0.000208, 'samples': 26656, 'steps': 832, 'loss/train': 8.56046199798584} +01/22/2022 22:49:00 - INFO - codeparrot_training - Step 834: {'lr': 0.00020825, 'samples': 26688, 'steps': 833, 'loss/train': 8.453502655029297} +01/22/2022 22:49:00 - INFO - codeparrot_training - Step 835: {'lr': 0.0002085, 'samples': 26720, 'steps': 834, 'loss/train': 8.162931442260742} +01/22/2022 22:49:02 - INFO - codeparrot_training - Step 836: {'lr': 0.00020875, 'samples': 26752, 'steps': 835, 'loss/train': 7.892072677612305} +01/22/2022 22:49:02 - INFO - codeparrot_training - Step 837: {'lr': 0.00020899999999999998, 'samples': 26784, 'steps': 836, 'loss/train': 8.17908000946045} +01/22/2022 22:49:03 - INFO - codeparrot_training - Step 838: {'lr': 0.00020925, 'samples': 26816, 'steps': 837, 'loss/train': 8.323698043823242} +01/22/2022 22:49:04 - INFO - codeparrot_training - Step 839: {'lr': 0.0002095, 'samples': 26848, 'steps': 838, 'loss/train': 7.590595245361328} +01/22/2022 22:49:04 - INFO - codeparrot_training - Step 840: {'lr': 0.00020975, 'samples': 26880, 'steps': 839, 'loss/train': 7.644768714904785} +01/22/2022 22:49:05 - INFO - codeparrot_training - Step 841: {'lr': 0.00021, 'samples': 26912, 'steps': 840, 'loss/train': 8.189897537231445} +01/22/2022 22:49:05 - INFO - codeparrot_training - Step 842: {'lr': 0.00021025, 'samples': 26944, 'steps': 841, 'loss/train': 8.911812782287598} +01/22/2022 22:49:06 - INFO - codeparrot_training - Step 843: {'lr': 0.0002105, 'samples': 26976, 'steps': 842, 'loss/train': 7.261569023132324} +01/22/2022 22:49:06 - INFO - codeparrot_training - Step 844: {'lr': 0.00021075, 'samples': 27008, 'steps': 843, 'loss/train': 7.714791774749756} +01/22/2022 22:49:07 - INFO - codeparrot_training - Step 845: {'lr': 0.000211, 'samples': 27040, 'steps': 844, 'loss/train': 8.975062370300293} +01/22/2022 22:49:07 - INFO - codeparrot_training - Step 846: {'lr': 0.00021124999999999998, 'samples': 27072, 'steps': 845, 'loss/train': 8.491747856140137} +01/22/2022 22:49:08 - INFO - codeparrot_training - Step 847: {'lr': 0.0002115, 'samples': 27104, 'steps': 846, 'loss/train': 7.677897930145264} +01/22/2022 22:49:09 - INFO - codeparrot_training - Step 848: {'lr': 0.00021175, 'samples': 27136, 'steps': 847, 'loss/train': 7.217350006103516} +01/22/2022 22:49:09 - INFO - codeparrot_training - Step 849: {'lr': 0.000212, 'samples': 27168, 'steps': 848, 'loss/train': 8.381860733032227} +01/22/2022 22:49:10 - INFO - codeparrot_training - Step 850: {'lr': 0.00021225, 'samples': 27200, 'steps': 849, 'loss/train': 7.260543346405029} +01/22/2022 22:49:10 - INFO - codeparrot_training - Step 851: {'lr': 0.0002125, 'samples': 27232, 'steps': 850, 'loss/train': 8.023141860961914} +01/22/2022 22:49:11 - INFO - codeparrot_training - Step 852: {'lr': 0.00021275, 'samples': 27264, 'steps': 851, 'loss/train': 8.386008262634277} +01/22/2022 22:49:11 - INFO - codeparrot_training - Step 853: {'lr': 0.000213, 'samples': 27296, 'steps': 852, 'loss/train': 8.239236831665039} +01/22/2022 22:49:12 - INFO - codeparrot_training - Step 854: {'lr': 0.00021325, 'samples': 27328, 'steps': 853, 'loss/train': 8.601543426513672} +01/22/2022 22:49:12 - INFO - codeparrot_training - Step 855: {'lr': 0.0002135, 'samples': 27360, 'steps': 854, 'loss/train': 7.124788761138916} +01/22/2022 22:49:13 - INFO - codeparrot_training - Step 856: {'lr': 0.00021375, 'samples': 27392, 'steps': 855, 'loss/train': 8.919700622558594} +01/22/2022 22:49:14 - INFO - codeparrot_training - Step 857: {'lr': 0.000214, 'samples': 27424, 'steps': 856, 'loss/train': 8.208852767944336} +01/22/2022 22:49:14 - INFO - codeparrot_training - Step 858: {'lr': 0.00021425, 'samples': 27456, 'steps': 857, 'loss/train': 7.997008323669434} +01/22/2022 22:49:15 - INFO - codeparrot_training - Step 859: {'lr': 0.0002145, 'samples': 27488, 'steps': 858, 'loss/train': 7.1437177658081055} +01/22/2022 22:49:15 - INFO - codeparrot_training - Step 860: {'lr': 0.00021475, 'samples': 27520, 'steps': 859, 'loss/train': 8.494915008544922} +01/22/2022 22:49:16 - INFO - codeparrot_training - Step 861: {'lr': 0.000215, 'samples': 27552, 'steps': 860, 'loss/train': 8.025507926940918} +01/22/2022 22:49:16 - INFO - codeparrot_training - Step 862: {'lr': 0.00021525, 'samples': 27584, 'steps': 861, 'loss/train': 7.925518989562988} +01/22/2022 22:49:17 - INFO - codeparrot_training - Step 863: {'lr': 0.0002155, 'samples': 27616, 'steps': 862, 'loss/train': 7.822272777557373} +01/22/2022 22:49:18 - INFO - codeparrot_training - Step 864: {'lr': 0.00021575, 'samples': 27648, 'steps': 863, 'loss/train': 7.739991188049316} +01/22/2022 22:49:19 - INFO - codeparrot_training - Step 865: {'lr': 0.000216, 'samples': 27680, 'steps': 864, 'loss/train': 6.2165207862854} +01/22/2022 22:49:19 - INFO - codeparrot_training - Step 866: {'lr': 0.00021625, 'samples': 27712, 'steps': 865, 'loss/train': 8.083052635192871} +01/22/2022 22:49:20 - INFO - codeparrot_training - Step 867: {'lr': 0.0002165, 'samples': 27744, 'steps': 866, 'loss/train': 8.60106372833252} +01/22/2022 22:49:20 - INFO - codeparrot_training - Step 868: {'lr': 0.00021675, 'samples': 27776, 'steps': 867, 'loss/train': 7.691922664642334} +01/22/2022 22:49:21 - INFO - codeparrot_training - Step 869: {'lr': 0.00021700000000000002, 'samples': 27808, 'steps': 868, 'loss/train': 8.392871856689453} +01/22/2022 22:49:22 - INFO - codeparrot_training - Step 870: {'lr': 0.00021725, 'samples': 27840, 'steps': 869, 'loss/train': 8.47274398803711} +01/22/2022 22:49:22 - INFO - codeparrot_training - Step 871: {'lr': 0.0002175, 'samples': 27872, 'steps': 870, 'loss/train': 8.238438606262207} +01/22/2022 22:49:23 - INFO - codeparrot_training - Step 872: {'lr': 0.00021775, 'samples': 27904, 'steps': 871, 'loss/train': 7.85081672668457} +01/22/2022 22:49:23 - INFO - codeparrot_training - Step 873: {'lr': 0.000218, 'samples': 27936, 'steps': 872, 'loss/train': 7.89611291885376} +01/22/2022 22:49:24 - INFO - codeparrot_training - Step 874: {'lr': 0.00021825, 'samples': 27968, 'steps': 873, 'loss/train': 7.709920883178711} +01/22/2022 22:49:24 - INFO - codeparrot_training - Step 875: {'lr': 0.0002185, 'samples': 28000, 'steps': 874, 'loss/train': 7.974410057067871} +01/22/2022 22:49:25 - INFO - codeparrot_training - Step 876: {'lr': 0.00021875, 'samples': 28032, 'steps': 875, 'loss/train': 8.033716201782227} +01/22/2022 22:49:25 - INFO - codeparrot_training - Step 877: {'lr': 0.000219, 'samples': 28064, 'steps': 876, 'loss/train': 8.19480037689209} +01/22/2022 22:49:26 - INFO - codeparrot_training - Step 878: {'lr': 0.00021925000000000002, 'samples': 28096, 'steps': 877, 'loss/train': 7.971493244171143} +01/22/2022 22:49:27 - INFO - codeparrot_training - Step 879: {'lr': 0.0002195, 'samples': 28128, 'steps': 878, 'loss/train': 8.276020050048828} +01/22/2022 22:49:27 - INFO - codeparrot_training - Step 880: {'lr': 0.00021975, 'samples': 28160, 'steps': 879, 'loss/train': 8.110627174377441} +01/22/2022 22:49:28 - INFO - codeparrot_training - Step 881: {'lr': 0.00022, 'samples': 28192, 'steps': 880, 'loss/train': 8.62890338897705} +01/22/2022 22:49:28 - INFO - codeparrot_training - Step 882: {'lr': 0.00022025000000000001, 'samples': 28224, 'steps': 881, 'loss/train': 8.895747184753418} +01/22/2022 22:49:29 - INFO - codeparrot_training - Step 883: {'lr': 0.0002205, 'samples': 28256, 'steps': 882, 'loss/train': 7.630026817321777} +01/22/2022 22:49:29 - INFO - codeparrot_training - Step 884: {'lr': 0.00022075, 'samples': 28288, 'steps': 883, 'loss/train': 7.294689178466797} +01/22/2022 22:49:30 - INFO - codeparrot_training - Step 885: {'lr': 0.000221, 'samples': 28320, 'steps': 884, 'loss/train': 7.670246601104736} +01/22/2022 22:49:30 - INFO - codeparrot_training - Step 886: {'lr': 0.00022125, 'samples': 28352, 'steps': 885, 'loss/train': 7.731987476348877} +01/22/2022 22:49:31 - INFO - codeparrot_training - Step 887: {'lr': 0.00022150000000000002, 'samples': 28384, 'steps': 886, 'loss/train': 8.076013565063477} +01/22/2022 22:49:32 - INFO - codeparrot_training - Step 888: {'lr': 0.00022175, 'samples': 28416, 'steps': 887, 'loss/train': 8.491042137145996} +01/22/2022 22:49:32 - INFO - codeparrot_training - Step 889: {'lr': 0.000222, 'samples': 28448, 'steps': 888, 'loss/train': 8.100528717041016} +01/22/2022 22:49:33 - INFO - codeparrot_training - Step 890: {'lr': 0.00022225, 'samples': 28480, 'steps': 889, 'loss/train': 7.6193952560424805} +01/22/2022 22:49:33 - INFO - codeparrot_training - Step 891: {'lr': 0.00022250000000000001, 'samples': 28512, 'steps': 890, 'loss/train': 7.911403179168701} +01/22/2022 22:49:34 - INFO - codeparrot_training - Step 892: {'lr': 0.00022275000000000002, 'samples': 28544, 'steps': 891, 'loss/train': 7.824960708618164} +01/22/2022 22:49:35 - INFO - codeparrot_training - Step 893: {'lr': 0.000223, 'samples': 28576, 'steps': 892, 'loss/train': 7.823108673095703} +01/22/2022 22:49:36 - INFO - codeparrot_training - Step 894: {'lr': 0.00022325, 'samples': 28608, 'steps': 893, 'loss/train': 7.803391456604004} +01/22/2022 22:49:36 - INFO - codeparrot_training - Step 895: {'lr': 0.0002235, 'samples': 28640, 'steps': 894, 'loss/train': 7.315047740936279} +01/22/2022 22:49:37 - INFO - codeparrot_training - Step 896: {'lr': 0.00022375000000000002, 'samples': 28672, 'steps': 895, 'loss/train': 7.47711706161499} +01/22/2022 22:49:37 - INFO - codeparrot_training - Step 897: {'lr': 0.000224, 'samples': 28704, 'steps': 896, 'loss/train': 7.544156551361084} +01/22/2022 22:49:38 - INFO - codeparrot_training - Step 898: {'lr': 0.00022425, 'samples': 28736, 'steps': 897, 'loss/train': 7.726268291473389} +01/22/2022 22:49:39 - INFO - codeparrot_training - Step 899: {'lr': 0.0002245, 'samples': 28768, 'steps': 898, 'loss/train': 7.724407196044922} +01/22/2022 22:49:39 - INFO - codeparrot_training - Step 900: {'lr': 0.00022475000000000001, 'samples': 28800, 'steps': 899, 'loss/train': 7.461881637573242} +01/22/2022 22:49:40 - INFO - codeparrot_training - Step 901: {'lr': 0.00022500000000000002, 'samples': 28832, 'steps': 900, 'loss/train': 7.183468341827393} +01/22/2022 22:49:40 - INFO - codeparrot_training - Step 902: {'lr': 0.00022525, 'samples': 28864, 'steps': 901, 'loss/train': 7.683173179626465} +01/22/2022 22:49:41 - INFO - codeparrot_training - Step 903: {'lr': 0.0002255, 'samples': 28896, 'steps': 902, 'loss/train': 8.214045524597168} +01/22/2022 22:49:41 - INFO - codeparrot_training - Step 904: {'lr': 0.00022575, 'samples': 28928, 'steps': 903, 'loss/train': 9.126930236816406} +01/22/2022 22:49:42 - INFO - codeparrot_training - Step 905: {'lr': 0.00022600000000000002, 'samples': 28960, 'steps': 904, 'loss/train': 8.113203048706055} +01/22/2022 22:49:42 - INFO - codeparrot_training - Step 906: {'lr': 0.00022625000000000002, 'samples': 28992, 'steps': 905, 'loss/train': 7.639524936676025} +01/22/2022 22:49:43 - INFO - codeparrot_training - Step 907: {'lr': 0.0002265, 'samples': 29024, 'steps': 906, 'loss/train': 7.660022258758545} +01/22/2022 22:49:44 - INFO - codeparrot_training - Step 908: {'lr': 0.00022675, 'samples': 29056, 'steps': 907, 'loss/train': 8.524105072021484} +01/22/2022 22:49:44 - INFO - codeparrot_training - Step 909: {'lr': 0.00022700000000000002, 'samples': 29088, 'steps': 908, 'loss/train': 6.352202892303467} +01/22/2022 22:49:45 - INFO - codeparrot_training - Step 910: {'lr': 0.00022725000000000002, 'samples': 29120, 'steps': 909, 'loss/train': 7.995129585266113} +01/22/2022 22:49:45 - INFO - codeparrot_training - Step 911: {'lr': 0.0002275, 'samples': 29152, 'steps': 910, 'loss/train': 7.388238430023193} +01/22/2022 22:49:46 - INFO - codeparrot_training - Step 912: {'lr': 0.00022775, 'samples': 29184, 'steps': 911, 'loss/train': 7.34617805480957} +01/22/2022 22:49:46 - INFO - codeparrot_training - Step 913: {'lr': 0.000228, 'samples': 29216, 'steps': 912, 'loss/train': 7.492978572845459} +01/22/2022 22:49:47 - INFO - codeparrot_training - Step 914: {'lr': 0.00022825000000000002, 'samples': 29248, 'steps': 913, 'loss/train': 7.903454303741455} +01/22/2022 22:49:47 - INFO - codeparrot_training - Step 915: {'lr': 0.00022850000000000002, 'samples': 29280, 'steps': 914, 'loss/train': 7.630887508392334} +01/22/2022 22:49:48 - INFO - codeparrot_training - Step 916: {'lr': 0.00022875, 'samples': 29312, 'steps': 915, 'loss/train': 8.286359786987305} +01/22/2022 22:49:49 - INFO - codeparrot_training - Step 917: {'lr': 0.000229, 'samples': 29344, 'steps': 916, 'loss/train': 6.574650287628174} +01/22/2022 22:49:49 - INFO - codeparrot_training - Step 918: {'lr': 0.00022925000000000002, 'samples': 29376, 'steps': 917, 'loss/train': 8.153668403625488} +01/22/2022 22:49:50 - INFO - codeparrot_training - Step 919: {'lr': 0.00022950000000000002, 'samples': 29408, 'steps': 918, 'loss/train': 7.903294086456299} +01/22/2022 22:49:50 - INFO - codeparrot_training - Step 920: {'lr': 0.00022975000000000003, 'samples': 29440, 'steps': 919, 'loss/train': 7.032538414001465} +01/22/2022 22:49:51 - INFO - codeparrot_training - Step 921: {'lr': 0.00023, 'samples': 29472, 'steps': 920, 'loss/train': 8.170762062072754} +01/22/2022 22:49:51 - INFO - codeparrot_training - Step 922: {'lr': 0.00023025, 'samples': 29504, 'steps': 921, 'loss/train': 8.347040176391602} +01/22/2022 22:49:54 - INFO - codeparrot_training - Step 923: {'lr': 0.00023050000000000002, 'samples': 29536, 'steps': 922, 'loss/train': 7.755369663238525} +01/22/2022 22:49:55 - INFO - codeparrot_training - Step 924: {'lr': 0.00023075000000000003, 'samples': 29568, 'steps': 923, 'loss/train': 7.638537406921387} +01/22/2022 22:49:56 - INFO - codeparrot_training - Step 925: {'lr': 0.000231, 'samples': 29600, 'steps': 924, 'loss/train': 7.502619743347168} +01/22/2022 22:49:56 - INFO - codeparrot_training - Step 926: {'lr': 0.00023125, 'samples': 29632, 'steps': 925, 'loss/train': 8.034525871276855} +01/22/2022 22:49:57 - INFO - codeparrot_training - Step 927: {'lr': 0.00023150000000000002, 'samples': 29664, 'steps': 926, 'loss/train': 8.12807846069336} +01/22/2022 22:49:57 - INFO - codeparrot_training - Step 928: {'lr': 0.00023175000000000002, 'samples': 29696, 'steps': 927, 'loss/train': 8.919533729553223} +01/22/2022 22:49:58 - INFO - codeparrot_training - Step 929: {'lr': 0.00023200000000000003, 'samples': 29728, 'steps': 928, 'loss/train': 8.273025512695312} +01/22/2022 22:49:58 - INFO - codeparrot_training - Step 930: {'lr': 0.00023225, 'samples': 29760, 'steps': 929, 'loss/train': 9.564797401428223} +01/22/2022 22:49:59 - INFO - codeparrot_training - Step 931: {'lr': 0.0002325, 'samples': 29792, 'steps': 930, 'loss/train': 8.895371437072754} +01/22/2022 22:49:59 - INFO - codeparrot_training - Step 932: {'lr': 0.00023275000000000002, 'samples': 29824, 'steps': 931, 'loss/train': 9.47883415222168} +01/22/2022 22:50:00 - INFO - codeparrot_training - Step 933: {'lr': 0.00023300000000000003, 'samples': 29856, 'steps': 932, 'loss/train': 8.133171081542969} +01/22/2022 22:50:01 - INFO - codeparrot_training - Step 934: {'lr': 0.00023325, 'samples': 29888, 'steps': 933, 'loss/train': 7.112802505493164} +01/22/2022 22:50:01 - INFO - codeparrot_training - Step 935: {'lr': 0.0002335, 'samples': 29920, 'steps': 934, 'loss/train': 8.344207763671875} +01/22/2022 22:50:02 - INFO - codeparrot_training - Step 936: {'lr': 0.00023375000000000002, 'samples': 29952, 'steps': 935, 'loss/train': 7.903488636016846} +01/22/2022 22:50:02 - INFO - codeparrot_training - Step 937: {'lr': 0.00023400000000000002, 'samples': 29984, 'steps': 936, 'loss/train': 8.440382957458496} +01/22/2022 22:50:03 - INFO - codeparrot_training - Step 938: {'lr': 0.00023425000000000003, 'samples': 30016, 'steps': 937, 'loss/train': 8.745182991027832} +01/22/2022 22:50:03 - INFO - codeparrot_training - Step 939: {'lr': 0.00023449999999999998, 'samples': 30048, 'steps': 938, 'loss/train': 8.367509841918945} +01/22/2022 22:50:04 - INFO - codeparrot_training - Step 940: {'lr': 0.00023475, 'samples': 30080, 'steps': 939, 'loss/train': 8.200569152832031} +01/22/2022 22:50:05 - INFO - codeparrot_training - Step 941: {'lr': 0.000235, 'samples': 30112, 'steps': 940, 'loss/train': 7.6512064933776855} +01/22/2022 22:50:05 - INFO - codeparrot_training - Step 942: {'lr': 0.00023525, 'samples': 30144, 'steps': 941, 'loss/train': 9.749458312988281} +01/22/2022 22:50:06 - INFO - codeparrot_training - Step 943: {'lr': 0.0002355, 'samples': 30176, 'steps': 942, 'loss/train': 8.003072738647461} +01/22/2022 22:50:06 - INFO - codeparrot_training - Step 944: {'lr': 0.00023574999999999998, 'samples': 30208, 'steps': 943, 'loss/train': 7.8578619956970215} +01/22/2022 22:50:07 - INFO - codeparrot_training - Step 945: {'lr': 0.000236, 'samples': 30240, 'steps': 944, 'loss/train': 8.805011749267578} +01/22/2022 22:50:07 - INFO - codeparrot_training - Step 946: {'lr': 0.00023625, 'samples': 30272, 'steps': 945, 'loss/train': 7.618407726287842} +01/22/2022 22:50:08 - INFO - codeparrot_training - Step 947: {'lr': 0.0002365, 'samples': 30304, 'steps': 946, 'loss/train': 7.288451671600342} +01/22/2022 22:50:08 - INFO - codeparrot_training - Step 948: {'lr': 0.00023674999999999998, 'samples': 30336, 'steps': 947, 'loss/train': 8.528386116027832} +01/22/2022 22:50:09 - INFO - codeparrot_training - Step 949: {'lr': 0.000237, 'samples': 30368, 'steps': 948, 'loss/train': 8.512721061706543} +01/22/2022 22:50:10 - INFO - codeparrot_training - Step 950: {'lr': 0.00023725, 'samples': 30400, 'steps': 949, 'loss/train': 7.960535049438477} +01/22/2022 22:50:11 - INFO - codeparrot_training - Step 951: {'lr': 0.0002375, 'samples': 30432, 'steps': 950, 'loss/train': 8.018369674682617} +01/22/2022 22:50:11 - INFO - codeparrot_training - Step 952: {'lr': 0.00023775, 'samples': 30464, 'steps': 951, 'loss/train': 11.536239624023438} +01/22/2022 22:50:12 - INFO - codeparrot_training - Step 953: {'lr': 0.00023799999999999998, 'samples': 30496, 'steps': 952, 'loss/train': 11.259650230407715} +01/22/2022 22:50:13 - INFO - codeparrot_training - Step 954: {'lr': 0.00023825, 'samples': 30528, 'steps': 953, 'loss/train': 11.202821731567383} +01/22/2022 22:50:13 - INFO - codeparrot_training - Step 955: {'lr': 0.0002385, 'samples': 30560, 'steps': 954, 'loss/train': 11.088037490844727} +01/22/2022 22:50:14 - INFO - codeparrot_training - Step 956: {'lr': 0.00023875, 'samples': 30592, 'steps': 955, 'loss/train': 8.17888355255127} +01/22/2022 22:50:14 - INFO - codeparrot_training - Step 957: {'lr': 0.00023899999999999998, 'samples': 30624, 'steps': 956, 'loss/train': 7.729437828063965} +01/22/2022 22:50:15 - INFO - codeparrot_training - Step 958: {'lr': 0.00023925, 'samples': 30656, 'steps': 957, 'loss/train': 9.024599075317383} +01/22/2022 22:50:15 - INFO - codeparrot_training - Step 959: {'lr': 0.0002395, 'samples': 30688, 'steps': 958, 'loss/train': 7.361656188964844} +01/22/2022 22:50:16 - INFO - codeparrot_training - Step 960: {'lr': 0.00023975, 'samples': 30720, 'steps': 959, 'loss/train': 7.95465087890625} +01/22/2022 22:50:16 - INFO - codeparrot_training - Step 961: {'lr': 0.00024, 'samples': 30752, 'steps': 960, 'loss/train': 7.152798652648926} +01/22/2022 22:50:17 - INFO - codeparrot_training - Step 962: {'lr': 0.00024024999999999999, 'samples': 30784, 'steps': 961, 'loss/train': 7.845981121063232} +01/22/2022 22:50:18 - INFO - codeparrot_training - Step 963: {'lr': 0.0002405, 'samples': 30816, 'steps': 962, 'loss/train': 7.937064170837402} +01/22/2022 22:50:18 - INFO - codeparrot_training - Step 964: {'lr': 0.00024075, 'samples': 30848, 'steps': 963, 'loss/train': 7.226635456085205} +01/22/2022 22:50:19 - INFO - codeparrot_training - Step 965: {'lr': 0.000241, 'samples': 30880, 'steps': 964, 'loss/train': 8.310813903808594} +01/22/2022 22:50:19 - INFO - codeparrot_training - Step 966: {'lr': 0.00024125, 'samples': 30912, 'steps': 965, 'loss/train': 7.651209354400635} +01/22/2022 22:50:20 - INFO - codeparrot_training - Step 967: {'lr': 0.0002415, 'samples': 30944, 'steps': 966, 'loss/train': 7.823807239532471} +01/22/2022 22:50:20 - INFO - codeparrot_training - Step 968: {'lr': 0.00024175, 'samples': 30976, 'steps': 967, 'loss/train': 7.299647331237793} +01/22/2022 22:50:21 - INFO - codeparrot_training - Step 969: {'lr': 0.000242, 'samples': 31008, 'steps': 968, 'loss/train': 7.800596714019775} +01/22/2022 22:50:21 - INFO - codeparrot_training - Step 970: {'lr': 0.00024225, 'samples': 31040, 'steps': 969, 'loss/train': 8.094486236572266} +01/22/2022 22:50:22 - INFO - codeparrot_training - Step 971: {'lr': 0.00024249999999999999, 'samples': 31072, 'steps': 970, 'loss/train': 7.372497081756592} +01/22/2022 22:50:23 - INFO - codeparrot_training - Step 972: {'lr': 0.00024275, 'samples': 31104, 'steps': 971, 'loss/train': 8.264067649841309} +01/22/2022 22:50:23 - INFO - codeparrot_training - Step 973: {'lr': 0.000243, 'samples': 31136, 'steps': 972, 'loss/train': 7.993603706359863} +01/22/2022 22:50:24 - INFO - codeparrot_training - Step 974: {'lr': 0.00024325, 'samples': 31168, 'steps': 973, 'loss/train': 7.666140556335449} +01/22/2022 22:50:24 - INFO - codeparrot_training - Step 975: {'lr': 0.0002435, 'samples': 31200, 'steps': 974, 'loss/train': 8.444876670837402} +01/22/2022 22:50:25 - INFO - codeparrot_training - Step 976: {'lr': 0.00024375, 'samples': 31232, 'steps': 975, 'loss/train': 8.014307022094727} +01/22/2022 22:50:25 - INFO - codeparrot_training - Step 977: {'lr': 0.000244, 'samples': 31264, 'steps': 976, 'loss/train': 7.504613876342773} +01/22/2022 22:50:26 - INFO - codeparrot_training - Step 978: {'lr': 0.00024425, 'samples': 31296, 'steps': 977, 'loss/train': 8.097036361694336} +01/22/2022 22:50:26 - INFO - codeparrot_training - Step 979: {'lr': 0.0002445, 'samples': 31328, 'steps': 978, 'loss/train': 8.131064414978027} +01/22/2022 22:50:27 - INFO - codeparrot_training - Step 980: {'lr': 0.00024475, 'samples': 31360, 'steps': 979, 'loss/train': 7.495687007904053} +01/22/2022 22:50:28 - INFO - codeparrot_training - Step 981: {'lr': 0.000245, 'samples': 31392, 'steps': 980, 'loss/train': 8.84865951538086} +01/22/2022 22:50:28 - INFO - codeparrot_training - Step 982: {'lr': 0.00024525, 'samples': 31424, 'steps': 981, 'loss/train': 7.86865758895874} +01/22/2022 22:50:29 - INFO - codeparrot_training - Step 983: {'lr': 0.0002455, 'samples': 31456, 'steps': 982, 'loss/train': 6.803676128387451} +01/22/2022 22:50:30 - INFO - codeparrot_training - Step 984: {'lr': 0.00024575, 'samples': 31488, 'steps': 983, 'loss/train': 7.418421268463135} +01/22/2022 22:50:30 - INFO - codeparrot_training - Step 985: {'lr': 0.000246, 'samples': 31520, 'steps': 984, 'loss/train': 8.638200759887695} +01/22/2022 22:50:31 - INFO - codeparrot_training - Step 986: {'lr': 0.00024625, 'samples': 31552, 'steps': 985, 'loss/train': 7.501163005828857} +01/22/2022 22:50:32 - INFO - codeparrot_training - Step 987: {'lr': 0.00024650000000000003, 'samples': 31584, 'steps': 986, 'loss/train': 8.010010719299316} +01/22/2022 22:50:32 - INFO - codeparrot_training - Step 988: {'lr': 0.00024675, 'samples': 31616, 'steps': 987, 'loss/train': 8.505363464355469} +01/22/2022 22:50:33 - INFO - codeparrot_training - Step 989: {'lr': 0.000247, 'samples': 31648, 'steps': 988, 'loss/train': 7.905603885650635} +01/22/2022 22:50:33 - INFO - codeparrot_training - Step 990: {'lr': 0.00024725, 'samples': 31680, 'steps': 989, 'loss/train': 7.112934112548828} +01/22/2022 22:50:34 - INFO - codeparrot_training - Step 991: {'lr': 0.0002475, 'samples': 31712, 'steps': 990, 'loss/train': 7.180699348449707} +01/22/2022 22:50:34 - INFO - codeparrot_training - Step 992: {'lr': 0.00024775, 'samples': 31744, 'steps': 991, 'loss/train': 8.4020414352417} +01/22/2022 22:50:35 - INFO - codeparrot_training - Step 993: {'lr': 0.000248, 'samples': 31776, 'steps': 992, 'loss/train': 8.145386695861816} +01/22/2022 22:50:35 - INFO - codeparrot_training - Step 994: {'lr': 0.00024825, 'samples': 31808, 'steps': 993, 'loss/train': 7.6311936378479} +01/22/2022 22:50:36 - INFO - codeparrot_training - Step 995: {'lr': 0.0002485, 'samples': 31840, 'steps': 994, 'loss/train': 7.768056392669678} +01/22/2022 22:50:37 - INFO - codeparrot_training - Step 996: {'lr': 0.00024875, 'samples': 31872, 'steps': 995, 'loss/train': 8.292525291442871} +01/22/2022 22:50:37 - INFO - codeparrot_training - Step 997: {'lr': 0.000249, 'samples': 31904, 'steps': 996, 'loss/train': 7.671205043792725} +01/22/2022 22:50:38 - INFO - codeparrot_training - Step 998: {'lr': 0.00024925, 'samples': 31936, 'steps': 997, 'loss/train': 8.121063232421875} +01/22/2022 22:50:38 - INFO - codeparrot_training - Step 999: {'lr': 0.0002495, 'samples': 31968, 'steps': 998, 'loss/train': 7.193724155426025} +01/22/2022 22:50:39 - INFO - codeparrot_training - Step 1000: {'lr': 0.00024975, 'samples': 32000, 'steps': 999, 'loss/train': 7.780331611633301} +01/22/2022 22:50:39 - INFO - codeparrot_training - Step 1001: {'lr': 0.00025, 'samples': 32032, 'steps': 1000, 'loss/train': 8.97726058959961} +01/22/2022 22:50:40 - INFO - codeparrot_training - Step 1002: {'lr': 0.00025025, 'samples': 32064, 'steps': 1001, 'loss/train': 7.895163536071777} +01/22/2022 22:50:40 - INFO - codeparrot_training - Step 1003: {'lr': 0.0002505, 'samples': 32096, 'steps': 1002, 'loss/train': 8.079300880432129} +01/22/2022 22:50:41 - INFO - codeparrot_training - Step 1004: {'lr': 0.00025075, 'samples': 32128, 'steps': 1003, 'loss/train': 8.743096351623535} +01/22/2022 22:50:42 - INFO - codeparrot_training - Step 1005: {'lr': 0.00025100000000000003, 'samples': 32160, 'steps': 1004, 'loss/train': 7.9241132736206055} +01/22/2022 22:50:42 - INFO - codeparrot_training - Step 1006: {'lr': 0.00025124999999999995, 'samples': 32192, 'steps': 1005, 'loss/train': 7.865138530731201} +01/22/2022 22:50:43 - INFO - codeparrot_training - Step 1007: {'lr': 0.0002515, 'samples': 32224, 'steps': 1006, 'loss/train': 7.557893753051758} +01/22/2022 22:50:43 - INFO - codeparrot_training - Step 1008: {'lr': 0.00025174999999999997, 'samples': 32256, 'steps': 1007, 'loss/train': 7.808202743530273} +01/22/2022 22:50:44 - INFO - codeparrot_training - Step 1009: {'lr': 0.000252, 'samples': 32288, 'steps': 1008, 'loss/train': 7.1792473793029785} +01/22/2022 22:50:44 - INFO - codeparrot_training - Step 1010: {'lr': 0.00025225, 'samples': 32320, 'steps': 1009, 'loss/train': 7.491529941558838} +01/22/2022 22:50:45 - INFO - codeparrot_training - Step 1011: {'lr': 0.0002525, 'samples': 32352, 'steps': 1010, 'loss/train': 7.6429009437561035} +01/22/2022 22:50:45 - INFO - codeparrot_training - Step 1012: {'lr': 0.00025275, 'samples': 32384, 'steps': 1011, 'loss/train': 9.266167640686035} +01/22/2022 22:50:47 - INFO - codeparrot_training - Step 1013: {'lr': 0.000253, 'samples': 32416, 'steps': 1012, 'loss/train': 8.243592262268066} +01/22/2022 22:50:47 - INFO - codeparrot_training - Step 1014: {'lr': 0.00025325, 'samples': 32448, 'steps': 1013, 'loss/train': 7.360544204711914} +01/22/2022 22:50:48 - INFO - codeparrot_training - Step 1015: {'lr': 0.0002535, 'samples': 32480, 'steps': 1014, 'loss/train': 7.70560884475708} +01/22/2022 22:50:48 - INFO - codeparrot_training - Step 1016: {'lr': 0.00025374999999999996, 'samples': 32512, 'steps': 1015, 'loss/train': 4.045639991760254} +01/22/2022 22:50:49 - INFO - codeparrot_training - Step 1017: {'lr': 0.000254, 'samples': 32544, 'steps': 1016, 'loss/train': 7.7802958488464355} +01/22/2022 22:50:49 - INFO - codeparrot_training - Step 1018: {'lr': 0.00025425, 'samples': 32576, 'steps': 1017, 'loss/train': 7.9524993896484375} +01/22/2022 22:50:50 - INFO - codeparrot_training - Step 1019: {'lr': 0.0002545, 'samples': 32608, 'steps': 1018, 'loss/train': 5.488943576812744} +01/22/2022 22:50:51 - INFO - codeparrot_training - Step 1020: {'lr': 0.00025475, 'samples': 32640, 'steps': 1019, 'loss/train': 7.27760124206543} +01/22/2022 22:50:51 - INFO - codeparrot_training - Step 1021: {'lr': 0.000255, 'samples': 32672, 'steps': 1020, 'loss/train': 7.499791622161865} +01/22/2022 22:50:52 - INFO - codeparrot_training - Step 1022: {'lr': 0.00025525, 'samples': 32704, 'steps': 1021, 'loss/train': 7.266519069671631} +01/22/2022 22:50:52 - INFO - codeparrot_training - Step 1023: {'lr': 0.00025550000000000003, 'samples': 32736, 'steps': 1022, 'loss/train': 7.254077911376953} +01/22/2022 22:50:53 - INFO - codeparrot_training - Step 1024: {'lr': 0.00025575, 'samples': 32768, 'steps': 1023, 'loss/train': 8.423787117004395} +01/22/2022 22:50:53 - INFO - codeparrot_training - Step 1025: {'lr': 0.000256, 'samples': 32800, 'steps': 1024, 'loss/train': 8.105910301208496} +01/22/2022 22:50:54 - INFO - codeparrot_training - Step 1026: {'lr': 0.00025624999999999997, 'samples': 32832, 'steps': 1025, 'loss/train': 7.7124762535095215} +01/22/2022 22:50:54 - INFO - codeparrot_training - Step 1027: {'lr': 0.0002565, 'samples': 32864, 'steps': 1026, 'loss/train': 7.251527786254883} +01/22/2022 22:50:55 - INFO - codeparrot_training - Step 1028: {'lr': 0.00025675, 'samples': 32896, 'steps': 1027, 'loss/train': 7.364525318145752} +01/22/2022 22:50:56 - INFO - codeparrot_training - Step 1029: {'lr': 0.000257, 'samples': 32928, 'steps': 1028, 'loss/train': 7.018332481384277} +01/22/2022 22:50:56 - INFO - codeparrot_training - Step 1030: {'lr': 0.00025725, 'samples': 32960, 'steps': 1029, 'loss/train': 7.697657108306885} +01/22/2022 22:50:57 - INFO - codeparrot_training - Step 1031: {'lr': 0.0002575, 'samples': 32992, 'steps': 1030, 'loss/train': 7.144382476806641} +01/22/2022 22:50:57 - INFO - codeparrot_training - Step 1032: {'lr': 0.00025775, 'samples': 33024, 'steps': 1031, 'loss/train': 9.305621147155762} +01/22/2022 22:50:58 - INFO - codeparrot_training - Step 1033: {'lr': 0.00025800000000000004, 'samples': 33056, 'steps': 1032, 'loss/train': 7.627344131469727} +01/22/2022 22:50:58 - INFO - codeparrot_training - Step 1034: {'lr': 0.00025824999999999996, 'samples': 33088, 'steps': 1033, 'loss/train': 7.451657295227051} +01/22/2022 22:50:59 - INFO - codeparrot_training - Step 1035: {'lr': 0.0002585, 'samples': 33120, 'steps': 1034, 'loss/train': 8.347365379333496} +01/22/2022 22:50:59 - INFO - codeparrot_training - Step 1036: {'lr': 0.00025875, 'samples': 33152, 'steps': 1035, 'loss/train': 7.210925102233887} +01/22/2022 22:51:00 - INFO - codeparrot_training - Step 1037: {'lr': 0.000259, 'samples': 33184, 'steps': 1036, 'loss/train': 4.644311428070068} +01/22/2022 22:51:01 - INFO - codeparrot_training - Step 1038: {'lr': 0.00025925, 'samples': 33216, 'steps': 1037, 'loss/train': 7.4698286056518555} +01/22/2022 22:51:01 - INFO - codeparrot_training - Step 1039: {'lr': 0.0002595, 'samples': 33248, 'steps': 1038, 'loss/train': 8.464361190795898} +01/22/2022 22:51:02 - INFO - codeparrot_training - Step 1040: {'lr': 0.00025975, 'samples': 33280, 'steps': 1039, 'loss/train': 8.231058120727539} +01/22/2022 22:51:02 - INFO - codeparrot_training - Step 1041: {'lr': 0.00026000000000000003, 'samples': 33312, 'steps': 1040, 'loss/train': 6.459791660308838} +01/22/2022 22:51:03 - INFO - codeparrot_training - Step 1042: {'lr': 0.00026025, 'samples': 33344, 'steps': 1041, 'loss/train': 7.697694778442383} +01/22/2022 22:51:04 - INFO - codeparrot_training - Step 1043: {'lr': 0.0002605, 'samples': 33376, 'steps': 1042, 'loss/train': 6.737176895141602} +01/22/2022 22:51:05 - INFO - codeparrot_training - Step 1044: {'lr': 0.00026074999999999997, 'samples': 33408, 'steps': 1043, 'loss/train': 7.791807651519775} +01/22/2022 22:51:05 - INFO - codeparrot_training - Step 1045: {'lr': 0.000261, 'samples': 33440, 'steps': 1044, 'loss/train': 7.328395366668701} +01/22/2022 22:51:06 - INFO - codeparrot_training - Step 1046: {'lr': 0.00026125, 'samples': 33472, 'steps': 1045, 'loss/train': 8.124420166015625} +01/22/2022 22:51:06 - INFO - codeparrot_training - Step 1047: {'lr': 0.0002615, 'samples': 33504, 'steps': 1046, 'loss/train': 9.7803373336792} +01/22/2022 22:51:07 - INFO - codeparrot_training - Step 1048: {'lr': 0.00026175, 'samples': 33536, 'steps': 1047, 'loss/train': 9.089180946350098} +01/22/2022 22:51:07 - INFO - codeparrot_training - Step 1049: {'lr': 0.000262, 'samples': 33568, 'steps': 1048, 'loss/train': 9.168219566345215} +01/22/2022 22:51:08 - INFO - codeparrot_training - Step 1050: {'lr': 0.00026225, 'samples': 33600, 'steps': 1049, 'loss/train': 8.402801513671875} +01/22/2022 22:51:09 - INFO - codeparrot_training - Step 1051: {'lr': 0.00026250000000000004, 'samples': 33632, 'steps': 1050, 'loss/train': 7.489709854125977} +01/22/2022 22:51:09 - INFO - codeparrot_training - Step 1052: {'lr': 0.00026274999999999996, 'samples': 33664, 'steps': 1051, 'loss/train': 8.244412422180176} +01/22/2022 22:51:10 - INFO - codeparrot_training - Step 1053: {'lr': 0.000263, 'samples': 33696, 'steps': 1052, 'loss/train': 8.24311637878418} +01/22/2022 22:51:10 - INFO - codeparrot_training - Step 1054: {'lr': 0.00026325, 'samples': 33728, 'steps': 1053, 'loss/train': 8.109145164489746} +01/22/2022 22:51:11 - INFO - codeparrot_training - Step 1055: {'lr': 0.0002635, 'samples': 33760, 'steps': 1054, 'loss/train': 7.6063232421875} +01/22/2022 22:51:11 - INFO - codeparrot_training - Step 1056: {'lr': 0.00026375, 'samples': 33792, 'steps': 1055, 'loss/train': 7.252101421356201} +01/22/2022 22:51:12 - INFO - codeparrot_training - Step 1057: {'lr': 0.000264, 'samples': 33824, 'steps': 1056, 'loss/train': 7.209290981292725} +01/22/2022 22:51:12 - INFO - codeparrot_training - Step 1058: {'lr': 0.00026425, 'samples': 33856, 'steps': 1057, 'loss/train': 6.6590895652771} +01/22/2022 22:51:13 - INFO - codeparrot_training - Step 1059: {'lr': 0.00026450000000000003, 'samples': 33888, 'steps': 1058, 'loss/train': 6.9906816482543945} +01/22/2022 22:51:14 - INFO - codeparrot_training - Step 1060: {'lr': 0.00026475, 'samples': 33920, 'steps': 1059, 'loss/train': 8.269049644470215} +01/22/2022 22:51:14 - INFO - codeparrot_training - Step 1061: {'lr': 0.00026500000000000004, 'samples': 33952, 'steps': 1060, 'loss/train': 7.50347375869751} +01/22/2022 22:51:15 - INFO - codeparrot_training - Step 1062: {'lr': 0.00026524999999999997, 'samples': 33984, 'steps': 1061, 'loss/train': 7.44711971282959} +01/22/2022 22:51:15 - INFO - codeparrot_training - Step 1063: {'lr': 0.0002655, 'samples': 34016, 'steps': 1062, 'loss/train': 8.273433685302734} +01/22/2022 22:51:16 - INFO - codeparrot_training - Step 1064: {'lr': 0.00026575, 'samples': 34048, 'steps': 1063, 'loss/train': 7.848727226257324} +01/22/2022 22:51:16 - INFO - codeparrot_training - Step 1065: {'lr': 0.000266, 'samples': 34080, 'steps': 1064, 'loss/train': 7.386734962463379} +01/22/2022 22:51:17 - INFO - codeparrot_training - Step 1066: {'lr': 0.00026625, 'samples': 34112, 'steps': 1065, 'loss/train': 6.925583839416504} +01/22/2022 22:51:17 - INFO - codeparrot_training - Step 1067: {'lr': 0.0002665, 'samples': 34144, 'steps': 1066, 'loss/train': 7.3410539627075195} +01/22/2022 22:51:18 - INFO - codeparrot_training - Step 1068: {'lr': 0.00026675, 'samples': 34176, 'steps': 1067, 'loss/train': 7.097792148590088} +01/22/2022 22:51:19 - INFO - codeparrot_training - Step 1069: {'lr': 0.00026700000000000004, 'samples': 34208, 'steps': 1068, 'loss/train': 6.80520486831665} +01/22/2022 22:51:19 - INFO - codeparrot_training - Step 1070: {'lr': 0.00026725, 'samples': 34240, 'steps': 1069, 'loss/train': 4.098143100738525} +01/22/2022 22:51:20 - INFO - codeparrot_training - Step 1071: {'lr': 0.0002675, 'samples': 34272, 'steps': 1070, 'loss/train': 7.678524971008301} +01/22/2022 22:51:20 - INFO - codeparrot_training - Step 1072: {'lr': 0.00026775, 'samples': 34304, 'steps': 1071, 'loss/train': 7.936852931976318} +01/22/2022 22:51:23 - INFO - codeparrot_training - Step 1073: {'lr': 0.000268, 'samples': 34336, 'steps': 1072, 'loss/train': 8.179617881774902} +01/22/2022 22:51:24 - INFO - codeparrot_training - Step 1074: {'lr': 0.00026825, 'samples': 34368, 'steps': 1073, 'loss/train': 7.736458778381348} +01/22/2022 22:51:24 - INFO - codeparrot_training - Step 1075: {'lr': 0.0002685, 'samples': 34400, 'steps': 1074, 'loss/train': 7.9290852546691895} +01/22/2022 22:51:25 - INFO - codeparrot_training - Step 1076: {'lr': 0.00026875, 'samples': 34432, 'steps': 1075, 'loss/train': 7.953436851501465} +01/22/2022 22:51:26 - INFO - codeparrot_training - Step 1077: {'lr': 0.00026900000000000003, 'samples': 34464, 'steps': 1076, 'loss/train': 7.2176103591918945} +01/22/2022 22:51:26 - INFO - codeparrot_training - Step 1078: {'lr': 0.00026925, 'samples': 34496, 'steps': 1077, 'loss/train': 7.39737606048584} +01/22/2022 22:51:27 - INFO - codeparrot_training - Step 1079: {'lr': 0.00026950000000000005, 'samples': 34528, 'steps': 1078, 'loss/train': 8.346632957458496} +01/22/2022 22:51:27 - INFO - codeparrot_training - Step 1080: {'lr': 0.00026974999999999997, 'samples': 34560, 'steps': 1079, 'loss/train': 9.5011568069458} +01/22/2022 22:51:28 - INFO - codeparrot_training - Step 1081: {'lr': 0.00027, 'samples': 34592, 'steps': 1080, 'loss/train': 7.575537204742432} +01/22/2022 22:51:28 - INFO - codeparrot_training - Step 1082: {'lr': 0.00027025, 'samples': 34624, 'steps': 1081, 'loss/train': 7.860004425048828} +01/22/2022 22:51:29 - INFO - codeparrot_training - Step 1083: {'lr': 0.0002705, 'samples': 34656, 'steps': 1082, 'loss/train': 7.113722324371338} +01/22/2022 22:51:29 - INFO - codeparrot_training - Step 1084: {'lr': 0.00027075, 'samples': 34688, 'steps': 1083, 'loss/train': 7.9745283126831055} +01/22/2022 22:51:30 - INFO - codeparrot_training - Step 1085: {'lr': 0.00027100000000000003, 'samples': 34720, 'steps': 1084, 'loss/train': 7.710585594177246} +01/22/2022 22:51:31 - INFO - codeparrot_training - Step 1086: {'lr': 0.00027125, 'samples': 34752, 'steps': 1085, 'loss/train': 4.611213684082031} +01/22/2022 22:51:31 - INFO - codeparrot_training - Step 1087: {'lr': 0.00027150000000000004, 'samples': 34784, 'steps': 1086, 'loss/train': 7.584122180938721} +01/22/2022 22:51:32 - INFO - codeparrot_training - Step 1088: {'lr': 0.00027175, 'samples': 34816, 'steps': 1087, 'loss/train': 8.007559776306152} +01/22/2022 22:51:32 - INFO - codeparrot_training - Step 1089: {'lr': 0.00027200000000000005, 'samples': 34848, 'steps': 1088, 'loss/train': 7.27069091796875} +01/22/2022 22:51:33 - INFO - codeparrot_training - Step 1090: {'lr': 0.00027225, 'samples': 34880, 'steps': 1089, 'loss/train': 7.448082447052002} +01/22/2022 22:51:33 - INFO - codeparrot_training - Step 1091: {'lr': 0.0002725, 'samples': 34912, 'steps': 1090, 'loss/train': 7.8715057373046875} +01/22/2022 22:51:34 - INFO - codeparrot_training - Step 1092: {'lr': 0.00027275, 'samples': 34944, 'steps': 1091, 'loss/train': 7.383069038391113} +01/22/2022 22:51:34 - INFO - codeparrot_training - Step 1093: {'lr': 0.000273, 'samples': 34976, 'steps': 1092, 'loss/train': 8.45059585571289} +01/22/2022 22:51:35 - INFO - codeparrot_training - Step 1094: {'lr': 0.00027325, 'samples': 35008, 'steps': 1093, 'loss/train': 7.480466842651367} +01/22/2022 22:51:36 - INFO - codeparrot_training - Step 1095: {'lr': 0.00027350000000000003, 'samples': 35040, 'steps': 1094, 'loss/train': 8.31132698059082} +01/22/2022 22:51:36 - INFO - codeparrot_training - Step 1096: {'lr': 0.00027375, 'samples': 35072, 'steps': 1095, 'loss/train': 8.74916934967041} +01/22/2022 22:51:37 - INFO - codeparrot_training - Step 1097: {'lr': 0.00027400000000000005, 'samples': 35104, 'steps': 1096, 'loss/train': 7.476004123687744} +01/22/2022 22:51:37 - INFO - codeparrot_training - Step 1098: {'lr': 0.00027425, 'samples': 35136, 'steps': 1097, 'loss/train': 7.349726676940918} +01/22/2022 22:51:38 - INFO - codeparrot_training - Step 1099: {'lr': 0.0002745, 'samples': 35168, 'steps': 1098, 'loss/train': 7.758894443511963} +01/22/2022 22:51:38 - INFO - codeparrot_training - Step 1100: {'lr': 0.00027475, 'samples': 35200, 'steps': 1099, 'loss/train': 7.074851036071777} +01/22/2022 22:51:39 - INFO - codeparrot_training - Step 1101: {'lr': 0.000275, 'samples': 35232, 'steps': 1100, 'loss/train': 9.215550422668457} +01/22/2022 22:51:40 - INFO - codeparrot_training - Step 1102: {'lr': 0.00027525, 'samples': 35264, 'steps': 1101, 'loss/train': 7.680541515350342} +01/22/2022 22:51:41 - INFO - codeparrot_training - Step 1103: {'lr': 0.00027550000000000003, 'samples': 35296, 'steps': 1102, 'loss/train': 6.771366596221924} +01/22/2022 22:51:41 - INFO - codeparrot_training - Step 1104: {'lr': 0.00027575, 'samples': 35328, 'steps': 1103, 'loss/train': 6.930596828460693} +01/22/2022 22:51:42 - INFO - codeparrot_training - Step 1105: {'lr': 0.00027600000000000004, 'samples': 35360, 'steps': 1104, 'loss/train': 8.796313285827637} +01/22/2022 22:51:42 - INFO - codeparrot_training - Step 1106: {'lr': 0.00027625, 'samples': 35392, 'steps': 1105, 'loss/train': 7.597792148590088} +01/22/2022 22:51:43 - INFO - codeparrot_training - Step 1107: {'lr': 0.00027650000000000005, 'samples': 35424, 'steps': 1106, 'loss/train': 8.485983848571777} +01/22/2022 22:51:43 - INFO - codeparrot_training - Step 1108: {'lr': 0.00027675, 'samples': 35456, 'steps': 1107, 'loss/train': 6.186303615570068} +01/22/2022 22:51:44 - INFO - codeparrot_training - Step 1109: {'lr': 0.000277, 'samples': 35488, 'steps': 1108, 'loss/train': 7.895164966583252} +01/22/2022 22:51:45 - INFO - codeparrot_training - Step 1110: {'lr': 0.00027725, 'samples': 35520, 'steps': 1109, 'loss/train': 8.605523109436035} +01/22/2022 22:51:45 - INFO - codeparrot_training - Step 1111: {'lr': 0.0002775, 'samples': 35552, 'steps': 1110, 'loss/train': 7.190681457519531} +01/22/2022 22:51:46 - INFO - codeparrot_training - Step 1112: {'lr': 0.00027775, 'samples': 35584, 'steps': 1111, 'loss/train': 7.327890872955322} +01/22/2022 22:51:46 - INFO - codeparrot_training - Step 1113: {'lr': 0.00027800000000000004, 'samples': 35616, 'steps': 1112, 'loss/train': 6.82438325881958} +01/22/2022 22:51:47 - INFO - codeparrot_training - Step 1114: {'lr': 0.00027825, 'samples': 35648, 'steps': 1113, 'loss/train': 6.994159698486328} +01/22/2022 22:51:47 - INFO - codeparrot_training - Step 1115: {'lr': 0.00027850000000000005, 'samples': 35680, 'steps': 1114, 'loss/train': 7.810370445251465} +01/22/2022 22:51:48 - INFO - codeparrot_training - Step 1116: {'lr': 0.00027875, 'samples': 35712, 'steps': 1115, 'loss/train': 7.162477493286133} +01/22/2022 22:51:48 - INFO - codeparrot_training - Step 1117: {'lr': 0.000279, 'samples': 35744, 'steps': 1116, 'loss/train': 8.697196006774902} +01/22/2022 22:51:49 - INFO - codeparrot_training - Step 1118: {'lr': 0.00027925, 'samples': 35776, 'steps': 1117, 'loss/train': 7.586367607116699} +01/22/2022 22:51:50 - INFO - codeparrot_training - Step 1119: {'lr': 0.0002795, 'samples': 35808, 'steps': 1118, 'loss/train': 9.30983829498291} +01/22/2022 22:51:50 - INFO - codeparrot_training - Step 1120: {'lr': 0.00027975, 'samples': 35840, 'steps': 1119, 'loss/train': 6.127519130706787} +01/22/2022 22:51:51 - INFO - codeparrot_training - Step 1121: {'lr': 0.00028000000000000003, 'samples': 35872, 'steps': 1120, 'loss/train': 6.822423458099365} +01/22/2022 22:51:51 - INFO - codeparrot_training - Step 1122: {'lr': 0.00028025, 'samples': 35904, 'steps': 1121, 'loss/train': 7.074620723724365} +01/22/2022 22:51:52 - INFO - codeparrot_training - Step 1123: {'lr': 0.00028050000000000004, 'samples': 35936, 'steps': 1122, 'loss/train': 7.57810115814209} +01/22/2022 22:51:52 - INFO - codeparrot_training - Step 1124: {'lr': 0.00028075, 'samples': 35968, 'steps': 1123, 'loss/train': 6.6084442138671875} +01/22/2022 22:51:53 - INFO - codeparrot_training - Step 1125: {'lr': 0.00028100000000000005, 'samples': 36000, 'steps': 1124, 'loss/train': 7.216201305389404} +01/22/2022 22:51:53 - INFO - codeparrot_training - Step 1126: {'lr': 0.00028125000000000003, 'samples': 36032, 'steps': 1125, 'loss/train': 7.616080284118652} +01/22/2022 22:51:54 - INFO - codeparrot_training - Step 1127: {'lr': 0.00028149999999999996, 'samples': 36064, 'steps': 1126, 'loss/train': 8.069820404052734} +01/22/2022 22:51:55 - INFO - codeparrot_training - Step 1128: {'lr': 0.00028175, 'samples': 36096, 'steps': 1127, 'loss/train': 7.787780284881592} +01/22/2022 22:51:55 - INFO - codeparrot_training - Step 1129: {'lr': 0.00028199999999999997, 'samples': 36128, 'steps': 1128, 'loss/train': 6.839706897735596} +01/22/2022 22:51:56 - INFO - codeparrot_training - Step 1130: {'lr': 0.00028225, 'samples': 36160, 'steps': 1129, 'loss/train': 7.3004150390625} +01/22/2022 22:51:56 - INFO - codeparrot_training - Step 1131: {'lr': 0.0002825, 'samples': 36192, 'steps': 1130, 'loss/train': 7.89224100112915} +01/22/2022 22:51:57 - INFO - codeparrot_training - Step 1132: {'lr': 0.00028275, 'samples': 36224, 'steps': 1131, 'loss/train': 6.920696258544922} +01/22/2022 22:51:58 - INFO - codeparrot_training - Step 1133: {'lr': 0.000283, 'samples': 36256, 'steps': 1132, 'loss/train': 8.154302597045898} +01/22/2022 22:51:59 - INFO - codeparrot_training - Step 1134: {'lr': 0.00028325000000000003, 'samples': 36288, 'steps': 1133, 'loss/train': 7.581409454345703} +01/22/2022 22:51:59 - INFO - codeparrot_training - Step 1135: {'lr': 0.0002835, 'samples': 36320, 'steps': 1134, 'loss/train': 7.85944938659668} +01/22/2022 22:52:00 - INFO - codeparrot_training - Step 1136: {'lr': 0.00028375, 'samples': 36352, 'steps': 1135, 'loss/train': 7.445674896240234} +01/22/2022 22:52:00 - INFO - codeparrot_training - Step 1137: {'lr': 0.00028399999999999996, 'samples': 36384, 'steps': 1136, 'loss/train': 7.078855514526367} +01/22/2022 22:52:01 - INFO - codeparrot_training - Step 1138: {'lr': 0.00028425, 'samples': 36416, 'steps': 1137, 'loss/train': 7.452606678009033} +01/22/2022 22:52:01 - INFO - codeparrot_training - Step 1139: {'lr': 0.0002845, 'samples': 36448, 'steps': 1138, 'loss/train': 7.9658613204956055} +01/22/2022 22:52:02 - INFO - codeparrot_training - Step 1140: {'lr': 0.00028475, 'samples': 36480, 'steps': 1139, 'loss/train': 8.4839448928833} +01/22/2022 22:52:02 - INFO - codeparrot_training - Step 1141: {'lr': 0.000285, 'samples': 36512, 'steps': 1140, 'loss/train': 6.9724249839782715} +01/22/2022 22:52:03 - INFO - codeparrot_training - Step 1142: {'lr': 0.00028525, 'samples': 36544, 'steps': 1141, 'loss/train': 7.585428237915039} +01/22/2022 22:52:03 - INFO - codeparrot_training - Step 1143: {'lr': 0.0002855, 'samples': 36576, 'steps': 1142, 'loss/train': 7.535899639129639} +01/22/2022 22:52:04 - INFO - codeparrot_training - Step 1144: {'lr': 0.00028575000000000003, 'samples': 36608, 'steps': 1143, 'loss/train': 7.0666913986206055} +01/22/2022 22:52:05 - INFO - codeparrot_training - Step 1145: {'lr': 0.00028599999999999996, 'samples': 36640, 'steps': 1144, 'loss/train': 7.564232349395752} +01/22/2022 22:52:05 - INFO - codeparrot_training - Step 1146: {'lr': 0.00028625, 'samples': 36672, 'steps': 1145, 'loss/train': 7.574392318725586} +01/22/2022 22:52:06 - INFO - codeparrot_training - Step 1147: {'lr': 0.00028649999999999997, 'samples': 36704, 'steps': 1146, 'loss/train': 7.907914638519287} +01/22/2022 22:52:06 - INFO - codeparrot_training - Step 1148: {'lr': 0.00028675, 'samples': 36736, 'steps': 1147, 'loss/train': 7.1539154052734375} +01/22/2022 22:52:07 - INFO - codeparrot_training - Step 1149: {'lr': 0.000287, 'samples': 36768, 'steps': 1148, 'loss/train': 7.605190753936768} +01/22/2022 22:52:07 - INFO - codeparrot_training - Step 1150: {'lr': 0.00028725, 'samples': 36800, 'steps': 1149, 'loss/train': 7.311681270599365} +01/22/2022 22:52:08 - INFO - codeparrot_training - Step 1151: {'lr': 0.0002875, 'samples': 36832, 'steps': 1150, 'loss/train': 7.159059524536133} +01/22/2022 22:52:09 - INFO - codeparrot_training - Step 1152: {'lr': 0.00028775000000000003, 'samples': 36864, 'steps': 1151, 'loss/train': 7.567832946777344} +01/22/2022 22:52:09 - INFO - codeparrot_training - Step 1153: {'lr': 0.000288, 'samples': 36896, 'steps': 1152, 'loss/train': 7.666322708129883} +01/22/2022 22:52:10 - INFO - codeparrot_training - Step 1154: {'lr': 0.00028825, 'samples': 36928, 'steps': 1153, 'loss/train': 7.2905378341674805} +01/22/2022 22:52:10 - INFO - codeparrot_training - Step 1155: {'lr': 0.00028849999999999997, 'samples': 36960, 'steps': 1154, 'loss/train': 7.6733222007751465} +01/22/2022 22:52:11 - INFO - codeparrot_training - Step 1156: {'lr': 0.00028875, 'samples': 36992, 'steps': 1155, 'loss/train': 7.919204235076904} +01/22/2022 22:52:11 - INFO - codeparrot_training - Step 1157: {'lr': 0.000289, 'samples': 37024, 'steps': 1156, 'loss/train': 3.5407252311706543} +01/22/2022 22:52:12 - INFO - codeparrot_training - Step 1158: {'lr': 0.00028925, 'samples': 37056, 'steps': 1157, 'loss/train': 8.449868202209473} +01/22/2022 22:52:12 - INFO - codeparrot_training - Step 1159: {'lr': 0.0002895, 'samples': 37088, 'steps': 1158, 'loss/train': 7.6915082931518555} +01/22/2022 22:52:13 - INFO - codeparrot_training - Step 1160: {'lr': 0.00028975, 'samples': 37120, 'steps': 1159, 'loss/train': 7.946893215179443} +01/22/2022 22:52:14 - INFO - codeparrot_training - Step 1161: {'lr': 0.00029, 'samples': 37152, 'steps': 1160, 'loss/train': 3.706364631652832} +01/22/2022 22:52:15 - INFO - codeparrot_training - Step 1162: {'lr': 0.00029025000000000003, 'samples': 37184, 'steps': 1161, 'loss/train': 7.878750324249268} +01/22/2022 22:52:15 - INFO - codeparrot_training - Step 1163: {'lr': 0.00029049999999999996, 'samples': 37216, 'steps': 1162, 'loss/train': 6.9276652336120605} +01/22/2022 22:52:16 - INFO - codeparrot_training - Step 1164: {'lr': 0.00029075, 'samples': 37248, 'steps': 1163, 'loss/train': 7.7303876876831055} +01/22/2022 22:52:16 - INFO - codeparrot_training - Step 1165: {'lr': 0.00029099999999999997, 'samples': 37280, 'steps': 1164, 'loss/train': 7.376260280609131} +01/22/2022 22:52:17 - INFO - codeparrot_training - Step 1166: {'lr': 0.00029125, 'samples': 37312, 'steps': 1165, 'loss/train': 7.546977519989014} +01/22/2022 22:52:18 - INFO - codeparrot_training - Step 1167: {'lr': 0.0002915, 'samples': 37344, 'steps': 1166, 'loss/train': 7.145103931427002} +01/22/2022 22:52:18 - INFO - codeparrot_training - Step 1168: {'lr': 0.00029175, 'samples': 37376, 'steps': 1167, 'loss/train': 7.544468402862549} +01/22/2022 22:52:19 - INFO - codeparrot_training - Step 1169: {'lr': 0.000292, 'samples': 37408, 'steps': 1168, 'loss/train': 7.109976291656494} +01/22/2022 22:52:19 - INFO - codeparrot_training - Step 1170: {'lr': 0.00029225000000000003, 'samples': 37440, 'steps': 1169, 'loss/train': 6.959239482879639} +01/22/2022 22:52:20 - INFO - codeparrot_training - Step 1171: {'lr': 0.0002925, 'samples': 37472, 'steps': 1170, 'loss/train': 7.204222679138184} +01/22/2022 22:52:20 - INFO - codeparrot_training - Step 1172: {'lr': 0.00029275000000000004, 'samples': 37504, 'steps': 1171, 'loss/train': 6.998696327209473} +01/22/2022 22:52:21 - INFO - codeparrot_training - Step 1173: {'lr': 0.00029299999999999997, 'samples': 37536, 'steps': 1172, 'loss/train': 8.01220703125} +01/22/2022 22:52:21 - INFO - codeparrot_training - Step 1174: {'lr': 0.00029325, 'samples': 37568, 'steps': 1173, 'loss/train': 7.927822589874268} +01/22/2022 22:52:22 - INFO - codeparrot_training - Step 1175: {'lr': 0.0002935, 'samples': 37600, 'steps': 1174, 'loss/train': 7.247946739196777} +01/22/2022 22:52:23 - INFO - codeparrot_training - Step 1176: {'lr': 0.00029375, 'samples': 37632, 'steps': 1175, 'loss/train': 6.473228931427002} +01/22/2022 22:52:23 - INFO - codeparrot_training - Step 1177: {'lr': 0.000294, 'samples': 37664, 'steps': 1176, 'loss/train': 7.591438293457031} +01/22/2022 22:52:24 - INFO - codeparrot_training - Step 1178: {'lr': 0.00029425, 'samples': 37696, 'steps': 1177, 'loss/train': 6.7412567138671875} +01/22/2022 22:52:24 - INFO - codeparrot_training - Step 1179: {'lr': 0.0002945, 'samples': 37728, 'steps': 1178, 'loss/train': 7.261268615722656} +01/22/2022 22:52:25 - INFO - codeparrot_training - Step 1180: {'lr': 0.00029475000000000004, 'samples': 37760, 'steps': 1179, 'loss/train': 7.617155075073242} +01/22/2022 22:52:25 - INFO - codeparrot_training - Step 1181: {'lr': 0.000295, 'samples': 37792, 'steps': 1180, 'loss/train': 7.7122955322265625} +01/22/2022 22:52:26 - INFO - codeparrot_training - Step 1182: {'lr': 0.00029525, 'samples': 37824, 'steps': 1181, 'loss/train': 7.645023345947266} +01/22/2022 22:52:26 - INFO - codeparrot_training - Step 1183: {'lr': 0.00029549999999999997, 'samples': 37856, 'steps': 1182, 'loss/train': 7.285182476043701} +01/22/2022 22:52:27 - INFO - codeparrot_training - Step 1184: {'lr': 0.00029575, 'samples': 37888, 'steps': 1183, 'loss/train': 7.766895294189453} +01/22/2022 22:52:28 - INFO - codeparrot_training - Step 1185: {'lr': 0.000296, 'samples': 37920, 'steps': 1184, 'loss/train': 6.956263065338135} +01/22/2022 22:52:28 - INFO - codeparrot_training - Step 1186: {'lr': 0.00029625, 'samples': 37952, 'steps': 1185, 'loss/train': 7.540711879730225} +01/22/2022 22:52:29 - INFO - codeparrot_training - Step 1187: {'lr': 0.0002965, 'samples': 37984, 'steps': 1186, 'loss/train': 8.095101356506348} +01/22/2022 22:52:29 - INFO - codeparrot_training - Step 1188: {'lr': 0.00029675000000000003, 'samples': 38016, 'steps': 1187, 'loss/train': 6.934068202972412} +01/22/2022 22:52:30 - INFO - codeparrot_training - Step 1189: {'lr': 0.000297, 'samples': 38048, 'steps': 1188, 'loss/train': 7.718388557434082} +01/22/2022 22:52:31 - INFO - codeparrot_training - Step 1190: {'lr': 0.00029725000000000004, 'samples': 38080, 'steps': 1189, 'loss/train': 6.851922512054443} +01/22/2022 22:52:32 - INFO - codeparrot_training - Step 1191: {'lr': 0.00029749999999999997, 'samples': 38112, 'steps': 1190, 'loss/train': 6.501777648925781} +01/22/2022 22:52:32 - INFO - codeparrot_training - Step 1192: {'lr': 0.00029775, 'samples': 38144, 'steps': 1191, 'loss/train': 7.393248081207275} +01/22/2022 22:52:33 - INFO - codeparrot_training - Step 1193: {'lr': 0.000298, 'samples': 38176, 'steps': 1192, 'loss/train': 6.49429178237915} +01/22/2022 22:52:33 - INFO - codeparrot_training - Step 1194: {'lr': 0.00029825, 'samples': 38208, 'steps': 1193, 'loss/train': 7.149497985839844} +01/22/2022 22:52:34 - INFO - codeparrot_training - Step 1195: {'lr': 0.0002985, 'samples': 38240, 'steps': 1194, 'loss/train': 7.180826663970947} +01/22/2022 22:52:34 - INFO - codeparrot_training - Step 1196: {'lr': 0.00029875, 'samples': 38272, 'steps': 1195, 'loss/train': 7.403633117675781} +01/22/2022 22:52:35 - INFO - codeparrot_training - Step 1197: {'lr': 0.000299, 'samples': 38304, 'steps': 1196, 'loss/train': 7.568321228027344} +01/22/2022 22:52:35 - INFO - codeparrot_training - Step 1198: {'lr': 0.00029925000000000004, 'samples': 38336, 'steps': 1197, 'loss/train': 7.8351874351501465} +01/22/2022 22:52:36 - INFO - codeparrot_training - Step 1199: {'lr': 0.0002995, 'samples': 38368, 'steps': 1198, 'loss/train': 7.483076095581055} +01/22/2022 22:52:37 - INFO - codeparrot_training - Step 1200: {'lr': 0.00029975000000000005, 'samples': 38400, 'steps': 1199, 'loss/train': 7.905860424041748} +01/22/2022 22:52:37 - INFO - codeparrot_training - Step 1201: {'lr': 0.0003, 'samples': 38432, 'steps': 1200, 'loss/train': 7.449904918670654} +01/22/2022 22:52:38 - INFO - codeparrot_training - Step 1202: {'lr': 0.00030025, 'samples': 38464, 'steps': 1201, 'loss/train': 7.0888447761535645} +01/22/2022 22:52:38 - INFO - codeparrot_training - Step 1203: {'lr': 0.0003005, 'samples': 38496, 'steps': 1202, 'loss/train': 6.624892711639404} +01/22/2022 22:52:39 - INFO - codeparrot_training - Step 1204: {'lr': 0.00030075, 'samples': 38528, 'steps': 1203, 'loss/train': 9.049830436706543} +01/22/2022 22:52:39 - INFO - codeparrot_training - Step 1205: {'lr': 0.000301, 'samples': 38560, 'steps': 1204, 'loss/train': 7.227042198181152} +01/22/2022 22:52:40 - INFO - codeparrot_training - Step 1206: {'lr': 0.00030125000000000003, 'samples': 38592, 'steps': 1205, 'loss/train': 10.125506401062012} +01/22/2022 22:52:40 - INFO - codeparrot_training - Step 1207: {'lr': 0.0003015, 'samples': 38624, 'steps': 1206, 'loss/train': 7.4194512367248535} +01/22/2022 22:52:41 - INFO - codeparrot_training - Step 1208: {'lr': 0.00030175000000000004, 'samples': 38656, 'steps': 1207, 'loss/train': 7.758455753326416} +01/22/2022 22:52:42 - INFO - codeparrot_training - Step 1209: {'lr': 0.000302, 'samples': 38688, 'steps': 1208, 'loss/train': 6.87568998336792} +01/22/2022 22:52:42 - INFO - codeparrot_training - Step 1210: {'lr': 0.00030225, 'samples': 38720, 'steps': 1209, 'loss/train': 7.480325698852539} +01/22/2022 22:52:43 - INFO - codeparrot_training - Step 1211: {'lr': 0.0003025, 'samples': 38752, 'steps': 1210, 'loss/train': 5.9130754470825195} +01/22/2022 22:52:43 - INFO - codeparrot_training - Step 1212: {'lr': 0.00030275, 'samples': 38784, 'steps': 1211, 'loss/train': 7.567111968994141} +01/22/2022 22:52:44 - INFO - codeparrot_training - Step 1213: {'lr': 0.000303, 'samples': 38816, 'steps': 1212, 'loss/train': 7.1754302978515625} +01/22/2022 22:52:44 - INFO - codeparrot_training - Step 1214: {'lr': 0.00030325, 'samples': 38848, 'steps': 1213, 'loss/train': 7.086296558380127} +01/22/2022 22:52:45 - INFO - codeparrot_training - Step 1215: {'lr': 0.0003035, 'samples': 38880, 'steps': 1214, 'loss/train': 7.08080530166626} +01/22/2022 22:52:46 - INFO - codeparrot_training - Step 1216: {'lr': 0.00030375000000000004, 'samples': 38912, 'steps': 1215, 'loss/train': 7.071740627288818} +01/22/2022 22:52:46 - INFO - codeparrot_training - Step 1217: {'lr': 0.000304, 'samples': 38944, 'steps': 1216, 'loss/train': 7.118080139160156} +01/22/2022 22:52:47 - INFO - codeparrot_training - Step 1218: {'lr': 0.00030425000000000005, 'samples': 38976, 'steps': 1217, 'loss/train': 7.80046272277832} +01/22/2022 22:52:47 - INFO - codeparrot_training - Step 1219: {'lr': 0.0003045, 'samples': 39008, 'steps': 1218, 'loss/train': 6.875263690948486} +01/22/2022 22:52:50 - INFO - codeparrot_training - Step 1220: {'lr': 0.00030475, 'samples': 39040, 'steps': 1219, 'loss/train': 7.640588283538818} +01/22/2022 22:52:51 - INFO - codeparrot_training - Step 1221: {'lr': 0.000305, 'samples': 39072, 'steps': 1220, 'loss/train': 7.613340377807617} +01/22/2022 22:52:51 - INFO - codeparrot_training - Step 1222: {'lr': 0.00030525, 'samples': 39104, 'steps': 1221, 'loss/train': 7.1868438720703125} +01/22/2022 22:52:52 - INFO - codeparrot_training - Step 1223: {'lr': 0.0003055, 'samples': 39136, 'steps': 1222, 'loss/train': 7.374711990356445} +01/22/2022 22:52:52 - INFO - codeparrot_training - Step 1224: {'lr': 0.00030575000000000003, 'samples': 39168, 'steps': 1223, 'loss/train': 7.392219543457031} +01/22/2022 22:52:53 - INFO - codeparrot_training - Step 1225: {'lr': 0.000306, 'samples': 39200, 'steps': 1224, 'loss/train': 8.495787620544434} +01/22/2022 22:52:54 - INFO - codeparrot_training - Step 1226: {'lr': 0.00030625000000000004, 'samples': 39232, 'steps': 1225, 'loss/train': 7.040177345275879} +01/22/2022 22:52:54 - INFO - codeparrot_training - Step 1227: {'lr': 0.0003065, 'samples': 39264, 'steps': 1226, 'loss/train': 7.328976154327393} +01/22/2022 22:52:55 - INFO - codeparrot_training - Step 1228: {'lr': 0.00030675, 'samples': 39296, 'steps': 1227, 'loss/train': 7.417562007904053} +01/22/2022 22:52:55 - INFO - codeparrot_training - Step 1229: {'lr': 0.000307, 'samples': 39328, 'steps': 1228, 'loss/train': 8.52994155883789} +01/22/2022 22:52:56 - INFO - codeparrot_training - Step 1230: {'lr': 0.00030725, 'samples': 39360, 'steps': 1229, 'loss/train': 7.0443878173828125} +01/22/2022 22:52:56 - INFO - codeparrot_training - Step 1231: {'lr': 0.0003075, 'samples': 39392, 'steps': 1230, 'loss/train': 8.514076232910156} +01/22/2022 22:52:57 - INFO - codeparrot_training - Step 1232: {'lr': 0.00030775, 'samples': 39424, 'steps': 1231, 'loss/train': 7.951232433319092} +01/22/2022 22:52:57 - INFO - codeparrot_training - Step 1233: {'lr': 0.000308, 'samples': 39456, 'steps': 1232, 'loss/train': 7.164873123168945} +01/22/2022 22:52:58 - INFO - codeparrot_training - Step 1234: {'lr': 0.00030825000000000004, 'samples': 39488, 'steps': 1233, 'loss/train': 7.235807418823242} +01/22/2022 22:52:59 - INFO - codeparrot_training - Step 1235: {'lr': 0.0003085, 'samples': 39520, 'steps': 1234, 'loss/train': 7.106600284576416} +01/22/2022 22:52:59 - INFO - codeparrot_training - Step 1236: {'lr': 0.00030875000000000005, 'samples': 39552, 'steps': 1235, 'loss/train': 7.537955284118652} +01/22/2022 22:53:00 - INFO - codeparrot_training - Step 1237: {'lr': 0.00030900000000000003, 'samples': 39584, 'steps': 1236, 'loss/train': 6.935229778289795} +01/22/2022 22:53:00 - INFO - codeparrot_training - Step 1238: {'lr': 0.00030925, 'samples': 39616, 'steps': 1237, 'loss/train': 7.561107635498047} +01/22/2022 22:53:01 - INFO - codeparrot_training - Step 1239: {'lr': 0.0003095, 'samples': 39648, 'steps': 1238, 'loss/train': 8.087024688720703} +01/22/2022 22:53:01 - INFO - codeparrot_training - Step 1240: {'lr': 0.00030975, 'samples': 39680, 'steps': 1239, 'loss/train': 7.661020278930664} +01/22/2022 22:53:02 - INFO - codeparrot_training - Step 1241: {'lr': 0.00031, 'samples': 39712, 'steps': 1240, 'loss/train': 7.287478923797607} +01/22/2022 22:53:02 - INFO - codeparrot_training - Step 1242: {'lr': 0.00031025000000000003, 'samples': 39744, 'steps': 1241, 'loss/train': 6.075560569763184} +01/22/2022 22:53:03 - INFO - codeparrot_training - Step 1243: {'lr': 0.0003105, 'samples': 39776, 'steps': 1242, 'loss/train': 6.942577362060547} +01/22/2022 22:53:04 - INFO - codeparrot_training - Step 1244: {'lr': 0.00031075000000000005, 'samples': 39808, 'steps': 1243, 'loss/train': 8.147042274475098} +01/22/2022 22:53:04 - INFO - codeparrot_training - Step 1245: {'lr': 0.000311, 'samples': 39840, 'steps': 1244, 'loss/train': 6.866776943206787} +01/22/2022 22:53:05 - INFO - codeparrot_training - Step 1246: {'lr': 0.00031125000000000006, 'samples': 39872, 'steps': 1245, 'loss/train': 7.502773284912109} +01/22/2022 22:53:05 - INFO - codeparrot_training - Step 1247: {'lr': 0.0003115, 'samples': 39904, 'steps': 1246, 'loss/train': 8.227264404296875} +01/22/2022 22:53:06 - INFO - codeparrot_training - Step 1248: {'lr': 0.00031175, 'samples': 39936, 'steps': 1247, 'loss/train': 7.346026420593262} +01/22/2022 22:53:07 - INFO - codeparrot_training - Step 1249: {'lr': 0.000312, 'samples': 39968, 'steps': 1248, 'loss/train': 6.961223125457764} +01/22/2022 22:53:08 - INFO - codeparrot_training - Step 1250: {'lr': 0.00031225000000000003, 'samples': 40000, 'steps': 1249, 'loss/train': 7.502315044403076} +01/22/2022 22:53:08 - INFO - codeparrot_training - Step 1251: {'lr': 0.0003125, 'samples': 40032, 'steps': 1250, 'loss/train': 6.640307426452637} +01/22/2022 22:53:09 - INFO - codeparrot_training - Step 1252: {'lr': 0.00031275, 'samples': 40064, 'steps': 1251, 'loss/train': 7.125513076782227} +01/22/2022 22:53:09 - INFO - codeparrot_training - Step 1253: {'lr': 0.000313, 'samples': 40096, 'steps': 1252, 'loss/train': 7.373682498931885} +01/22/2022 22:53:10 - INFO - codeparrot_training - Step 1254: {'lr': 0.00031325, 'samples': 40128, 'steps': 1253, 'loss/train': 7.444737434387207} +01/22/2022 22:53:10 - INFO - codeparrot_training - Step 1255: {'lr': 0.00031350000000000003, 'samples': 40160, 'steps': 1254, 'loss/train': 6.591500759124756} +01/22/2022 22:53:11 - INFO - codeparrot_training - Step 1256: {'lr': 0.00031374999999999996, 'samples': 40192, 'steps': 1255, 'loss/train': 7.262077808380127} +01/22/2022 22:53:12 - INFO - codeparrot_training - Step 1257: {'lr': 0.000314, 'samples': 40224, 'steps': 1256, 'loss/train': 7.586088180541992} +01/22/2022 22:53:12 - INFO - codeparrot_training - Step 1258: {'lr': 0.00031424999999999997, 'samples': 40256, 'steps': 1257, 'loss/train': 5.969338417053223} +01/22/2022 22:53:13 - INFO - codeparrot_training - Step 1259: {'lr': 0.0003145, 'samples': 40288, 'steps': 1258, 'loss/train': 7.200606346130371} +01/22/2022 22:53:13 - INFO - codeparrot_training - Step 1260: {'lr': 0.00031475, 'samples': 40320, 'steps': 1259, 'loss/train': 5.129059791564941} +01/22/2022 22:53:14 - INFO - codeparrot_training - Step 1261: {'lr': 0.000315, 'samples': 40352, 'steps': 1260, 'loss/train': 5.484157562255859} +01/22/2022 22:53:14 - INFO - codeparrot_training - Step 1262: {'lr': 0.00031525, 'samples': 40384, 'steps': 1261, 'loss/train': 6.980556011199951} +01/22/2022 22:53:15 - INFO - codeparrot_training - Step 1263: {'lr': 0.0003155, 'samples': 40416, 'steps': 1262, 'loss/train': 7.132169723510742} +01/22/2022 22:53:15 - INFO - codeparrot_training - Step 1264: {'lr': 0.00031575, 'samples': 40448, 'steps': 1263, 'loss/train': 6.9766950607299805} +01/22/2022 22:53:16 - INFO - codeparrot_training - Step 1265: {'lr': 0.000316, 'samples': 40480, 'steps': 1264, 'loss/train': 5.991718292236328} +01/22/2022 22:53:17 - INFO - codeparrot_training - Step 1266: {'lr': 0.00031624999999999996, 'samples': 40512, 'steps': 1265, 'loss/train': 8.031294822692871} +01/22/2022 22:53:17 - INFO - codeparrot_training - Step 1267: {'lr': 0.0003165, 'samples': 40544, 'steps': 1266, 'loss/train': 7.173184394836426} +01/22/2022 22:53:18 - INFO - codeparrot_training - Step 1268: {'lr': 0.00031675, 'samples': 40576, 'steps': 1267, 'loss/train': 6.652865886688232} +01/22/2022 22:53:18 - INFO - codeparrot_training - Step 1269: {'lr': 0.000317, 'samples': 40608, 'steps': 1268, 'loss/train': 7.92770528793335} +01/22/2022 22:53:19 - INFO - codeparrot_training - Step 1270: {'lr': 0.00031725, 'samples': 40640, 'steps': 1269, 'loss/train': 8.188958168029785} +01/22/2022 22:53:19 - INFO - codeparrot_training - Step 1271: {'lr': 0.0003175, 'samples': 40672, 'steps': 1270, 'loss/train': 7.679257869720459} +01/22/2022 22:53:20 - INFO - codeparrot_training - Step 1272: {'lr': 0.00031775, 'samples': 40704, 'steps': 1271, 'loss/train': 7.2324066162109375} +01/22/2022 22:53:20 - INFO - codeparrot_training - Step 1273: {'lr': 0.00031800000000000003, 'samples': 40736, 'steps': 1272, 'loss/train': 7.161613941192627} +01/22/2022 22:53:21 - INFO - codeparrot_training - Step 1274: {'lr': 0.00031825, 'samples': 40768, 'steps': 1273, 'loss/train': 7.622838020324707} +01/22/2022 22:53:22 - INFO - codeparrot_training - Step 1275: {'lr': 0.0003185, 'samples': 40800, 'steps': 1274, 'loss/train': 7.565229892730713} +01/22/2022 22:53:22 - INFO - codeparrot_training - Step 1276: {'lr': 0.00031874999999999997, 'samples': 40832, 'steps': 1275, 'loss/train': 7.38745641708374} +01/22/2022 22:53:23 - INFO - codeparrot_training - Step 1277: {'lr': 0.000319, 'samples': 40864, 'steps': 1276, 'loss/train': 7.385593414306641} +01/22/2022 22:53:23 - INFO - codeparrot_training - Step 1278: {'lr': 0.00031925, 'samples': 40896, 'steps': 1277, 'loss/train': 7.602026462554932} +01/22/2022 22:53:24 - INFO - codeparrot_training - Step 1279: {'lr': 0.0003195, 'samples': 40928, 'steps': 1278, 'loss/train': 8.025714874267578} +01/22/2022 22:53:25 - INFO - codeparrot_training - Step 1280: {'lr': 0.00031975, 'samples': 40960, 'steps': 1279, 'loss/train': 7.700012683868408} +01/22/2022 22:53:26 - INFO - codeparrot_training - Step 1281: {'lr': 0.00032, 'samples': 40992, 'steps': 1280, 'loss/train': 7.4131693840026855} +01/22/2022 22:53:26 - INFO - codeparrot_training - Step 1282: {'lr': 0.00032025, 'samples': 41024, 'steps': 1281, 'loss/train': 7.439236164093018} +01/22/2022 22:53:27 - INFO - codeparrot_training - Step 1283: {'lr': 0.00032050000000000004, 'samples': 41056, 'steps': 1282, 'loss/train': 6.6363348960876465} +01/22/2022 22:53:27 - INFO - codeparrot_training - Step 1284: {'lr': 0.00032074999999999996, 'samples': 41088, 'steps': 1283, 'loss/train': 6.556957244873047} +01/22/2022 22:53:28 - INFO - codeparrot_training - Step 1285: {'lr': 0.000321, 'samples': 41120, 'steps': 1284, 'loss/train': 6.1645426750183105} +01/22/2022 22:53:28 - INFO - codeparrot_training - Step 1286: {'lr': 0.00032125, 'samples': 41152, 'steps': 1285, 'loss/train': 7.4963603019714355} +01/22/2022 22:53:29 - INFO - codeparrot_training - Step 1287: {'lr': 0.0003215, 'samples': 41184, 'steps': 1286, 'loss/train': 7.711255073547363} +01/22/2022 22:53:29 - INFO - codeparrot_training - Step 1288: {'lr': 0.00032175, 'samples': 41216, 'steps': 1287, 'loss/train': 8.243742942810059} +01/22/2022 22:53:30 - INFO - codeparrot_training - Step 1289: {'lr': 0.000322, 'samples': 41248, 'steps': 1288, 'loss/train': 6.754426956176758} +01/22/2022 22:53:31 - INFO - codeparrot_training - Step 1290: {'lr': 0.00032225, 'samples': 41280, 'steps': 1289, 'loss/train': 7.349318027496338} +01/22/2022 22:53:31 - INFO - codeparrot_training - Step 1291: {'lr': 0.00032250000000000003, 'samples': 41312, 'steps': 1290, 'loss/train': 8.073697090148926} +01/22/2022 22:53:32 - INFO - codeparrot_training - Step 1292: {'lr': 0.00032275, 'samples': 41344, 'steps': 1291, 'loss/train': 6.9832329750061035} +01/22/2022 22:53:32 - INFO - codeparrot_training - Step 1293: {'lr': 0.000323, 'samples': 41376, 'steps': 1292, 'loss/train': 6.9710259437561035} +01/22/2022 22:53:33 - INFO - codeparrot_training - Step 1294: {'lr': 0.00032324999999999997, 'samples': 41408, 'steps': 1293, 'loss/train': 8.073531150817871} +01/22/2022 22:53:33 - INFO - codeparrot_training - Step 1295: {'lr': 0.0003235, 'samples': 41440, 'steps': 1294, 'loss/train': 7.278412342071533} +01/22/2022 22:53:34 - INFO - codeparrot_training - Step 1296: {'lr': 0.00032375, 'samples': 41472, 'steps': 1295, 'loss/train': 6.8541340827941895} +01/22/2022 22:53:35 - INFO - codeparrot_training - Step 1297: {'lr': 0.000324, 'samples': 41504, 'steps': 1296, 'loss/train': 7.501044750213623} +01/22/2022 22:53:35 - INFO - codeparrot_training - Step 1298: {'lr': 0.00032425, 'samples': 41536, 'steps': 1297, 'loss/train': 7.640462398529053} +01/22/2022 22:53:36 - INFO - codeparrot_training - Step 1299: {'lr': 0.00032450000000000003, 'samples': 41568, 'steps': 1298, 'loss/train': 7.344558238983154} +01/22/2022 22:53:36 - INFO - codeparrot_training - Step 1300: {'lr': 0.00032475, 'samples': 41600, 'steps': 1299, 'loss/train': 6.688586711883545} +01/22/2022 22:53:37 - INFO - codeparrot_training - Step 1301: {'lr': 0.00032500000000000004, 'samples': 41632, 'steps': 1300, 'loss/train': 6.612552165985107} +01/22/2022 22:53:37 - INFO - codeparrot_training - Step 1302: {'lr': 0.00032524999999999996, 'samples': 41664, 'steps': 1301, 'loss/train': 7.504476070404053} +01/22/2022 22:53:38 - INFO - codeparrot_training - Step 1303: {'lr': 0.0003255, 'samples': 41696, 'steps': 1302, 'loss/train': 6.327552795410156} +01/22/2022 22:53:38 - INFO - codeparrot_training - Step 1304: {'lr': 0.00032575, 'samples': 41728, 'steps': 1303, 'loss/train': 7.701913356781006} +01/22/2022 22:53:39 - INFO - codeparrot_training - Step 1305: {'lr': 0.000326, 'samples': 41760, 'steps': 1304, 'loss/train': 7.345712661743164} +01/22/2022 22:53:40 - INFO - codeparrot_training - Step 1306: {'lr': 0.00032625, 'samples': 41792, 'steps': 1305, 'loss/train': 7.586214065551758} +01/22/2022 22:53:40 - INFO - codeparrot_training - Step 1307: {'lr': 0.0003265, 'samples': 41824, 'steps': 1306, 'loss/train': 7.259960174560547} +01/22/2022 22:53:41 - INFO - codeparrot_training - Step 1308: {'lr': 0.00032675, 'samples': 41856, 'steps': 1307, 'loss/train': 6.555629730224609} +01/22/2022 22:53:42 - INFO - codeparrot_training - Step 1309: {'lr': 0.00032700000000000003, 'samples': 41888, 'steps': 1308, 'loss/train': 7.106746196746826} +01/22/2022 22:53:43 - INFO - codeparrot_training - Step 1310: {'lr': 0.00032725, 'samples': 41920, 'steps': 1309, 'loss/train': 7.268167495727539} +01/22/2022 22:53:43 - INFO - codeparrot_training - Step 1311: {'lr': 0.00032750000000000005, 'samples': 41952, 'steps': 1310, 'loss/train': 6.334803104400635} +01/22/2022 22:53:44 - INFO - codeparrot_training - Step 1312: {'lr': 0.00032774999999999997, 'samples': 41984, 'steps': 1311, 'loss/train': 7.222009658813477} +01/22/2022 22:53:44 - INFO - codeparrot_training - Step 1313: {'lr': 0.000328, 'samples': 42016, 'steps': 1312, 'loss/train': 8.127902030944824} +01/22/2022 22:53:45 - INFO - codeparrot_training - Step 1314: {'lr': 0.00032825, 'samples': 42048, 'steps': 1313, 'loss/train': 7.44563627243042} +01/22/2022 22:53:45 - INFO - codeparrot_training - Step 1315: {'lr': 0.0003285, 'samples': 42080, 'steps': 1314, 'loss/train': 7.738230228424072} +01/22/2022 22:53:46 - INFO - codeparrot_training - Step 1316: {'lr': 0.00032875, 'samples': 42112, 'steps': 1315, 'loss/train': 7.9166646003723145} +01/22/2022 22:53:46 - INFO - codeparrot_training - Step 1317: {'lr': 0.00032900000000000003, 'samples': 42144, 'steps': 1316, 'loss/train': 6.431938648223877} +01/22/2022 22:53:47 - INFO - codeparrot_training - Step 1318: {'lr': 0.00032925, 'samples': 42176, 'steps': 1317, 'loss/train': 6.545115947723389} +01/22/2022 22:53:48 - INFO - codeparrot_training - Step 1319: {'lr': 0.00032950000000000004, 'samples': 42208, 'steps': 1318, 'loss/train': 6.858592510223389} +01/22/2022 22:53:48 - INFO - codeparrot_training - Step 1320: {'lr': 0.00032975, 'samples': 42240, 'steps': 1319, 'loss/train': 7.677618503570557} +01/22/2022 22:53:49 - INFO - codeparrot_training - Step 1321: {'lr': 0.00033, 'samples': 42272, 'steps': 1320, 'loss/train': 7.568064212799072} +01/22/2022 22:53:49 - INFO - codeparrot_training - Step 1322: {'lr': 0.00033025, 'samples': 42304, 'steps': 1321, 'loss/train': 6.536092758178711} +01/22/2022 22:53:50 - INFO - codeparrot_training - Step 1323: {'lr': 0.0003305, 'samples': 42336, 'steps': 1322, 'loss/train': 7.26668643951416} +01/22/2022 22:53:50 - INFO - codeparrot_training - Step 1324: {'lr': 0.00033075, 'samples': 42368, 'steps': 1323, 'loss/train': 7.610743522644043} +01/22/2022 22:53:51 - INFO - codeparrot_training - Step 1325: {'lr': 0.000331, 'samples': 42400, 'steps': 1324, 'loss/train': 7.42066764831543} +01/22/2022 22:53:51 - INFO - codeparrot_training - Step 1326: {'lr': 0.00033125, 'samples': 42432, 'steps': 1325, 'loss/train': 6.890969753265381} +01/22/2022 22:53:52 - INFO - codeparrot_training - Step 1327: {'lr': 0.00033150000000000003, 'samples': 42464, 'steps': 1326, 'loss/train': 6.120325088500977} +01/22/2022 22:53:53 - INFO - codeparrot_training - Step 1328: {'lr': 0.00033175, 'samples': 42496, 'steps': 1327, 'loss/train': 8.082094192504883} +01/22/2022 22:53:53 - INFO - codeparrot_training - Step 1329: {'lr': 0.00033200000000000005, 'samples': 42528, 'steps': 1328, 'loss/train': 7.705525875091553} +01/22/2022 22:53:54 - INFO - codeparrot_training - Step 1330: {'lr': 0.00033224999999999997, 'samples': 42560, 'steps': 1329, 'loss/train': 7.645618438720703} +01/22/2022 22:53:54 - INFO - codeparrot_training - Step 1331: {'lr': 0.0003325, 'samples': 42592, 'steps': 1330, 'loss/train': 6.915611743927002} +01/22/2022 22:53:55 - INFO - codeparrot_training - Step 1332: {'lr': 0.00033275, 'samples': 42624, 'steps': 1331, 'loss/train': 7.556669235229492} +01/22/2022 22:53:55 - INFO - codeparrot_training - Step 1333: {'lr': 0.000333, 'samples': 42656, 'steps': 1332, 'loss/train': 7.547966957092285} +01/22/2022 22:53:56 - INFO - codeparrot_training - Step 1334: {'lr': 0.00033325, 'samples': 42688, 'steps': 1333, 'loss/train': 6.841764450073242} +01/22/2022 22:53:56 - INFO - codeparrot_training - Step 1335: {'lr': 0.00033350000000000003, 'samples': 42720, 'steps': 1334, 'loss/train': 6.809600830078125} +01/22/2022 22:53:57 - INFO - codeparrot_training - Step 1336: {'lr': 0.00033375, 'samples': 42752, 'steps': 1335, 'loss/train': 6.721015930175781} +01/22/2022 22:53:58 - INFO - codeparrot_training - Step 1337: {'lr': 0.00033400000000000004, 'samples': 42784, 'steps': 1336, 'loss/train': 7.5598931312561035} +01/22/2022 22:53:59 - INFO - codeparrot_training - Step 1338: {'lr': 0.00033425, 'samples': 42816, 'steps': 1337, 'loss/train': 7.736660003662109} +01/22/2022 22:54:00 - INFO - codeparrot_training - Step 1339: {'lr': 0.00033450000000000005, 'samples': 42848, 'steps': 1338, 'loss/train': 7.211137294769287} +01/22/2022 22:54:00 - INFO - codeparrot_training - Step 1340: {'lr': 0.00033475, 'samples': 42880, 'steps': 1339, 'loss/train': 7.678459644317627} +01/22/2022 22:54:01 - INFO - codeparrot_training - Step 1341: {'lr': 0.000335, 'samples': 42912, 'steps': 1340, 'loss/train': 7.229006767272949} +01/22/2022 22:54:01 - INFO - codeparrot_training - Step 1342: {'lr': 0.00033525, 'samples': 42944, 'steps': 1341, 'loss/train': 7.166625499725342} +01/22/2022 22:54:02 - INFO - codeparrot_training - Step 1343: {'lr': 0.0003355, 'samples': 42976, 'steps': 1342, 'loss/train': 6.483127117156982} +01/22/2022 22:54:03 - INFO - codeparrot_training - Step 1344: {'lr': 0.00033575, 'samples': 43008, 'steps': 1343, 'loss/train': 6.685086250305176} +01/22/2022 22:54:03 - INFO - codeparrot_training - Step 1345: {'lr': 0.00033600000000000004, 'samples': 43040, 'steps': 1344, 'loss/train': 4.804778099060059} +01/22/2022 22:54:04 - INFO - codeparrot_training - Step 1346: {'lr': 0.00033625, 'samples': 43072, 'steps': 1345, 'loss/train': 4.786461353302002} +01/22/2022 22:54:04 - INFO - codeparrot_training - Step 1347: {'lr': 0.00033650000000000005, 'samples': 43104, 'steps': 1346, 'loss/train': 5.865910530090332} +01/22/2022 22:54:05 - INFO - codeparrot_training - Step 1348: {'lr': 0.00033675, 'samples': 43136, 'steps': 1347, 'loss/train': 5.622980117797852} +01/22/2022 22:54:05 - INFO - codeparrot_training - Step 1349: {'lr': 0.000337, 'samples': 43168, 'steps': 1348, 'loss/train': 6.78125} +01/22/2022 22:54:06 - INFO - codeparrot_training - Step 1350: {'lr': 0.00033725, 'samples': 43200, 'steps': 1349, 'loss/train': 9.705102920532227} +01/22/2022 22:54:06 - INFO - codeparrot_training - Step 1351: {'lr': 0.0003375, 'samples': 43232, 'steps': 1350, 'loss/train': 9.122598648071289} +01/22/2022 22:54:07 - INFO - codeparrot_training - Step 1352: {'lr': 0.00033775, 'samples': 43264, 'steps': 1351, 'loss/train': 7.296748161315918} +01/22/2022 22:54:08 - INFO - codeparrot_training - Step 1353: {'lr': 0.00033800000000000003, 'samples': 43296, 'steps': 1352, 'loss/train': 6.338590145111084} +01/22/2022 22:54:08 - INFO - codeparrot_training - Step 1354: {'lr': 0.00033825, 'samples': 43328, 'steps': 1353, 'loss/train': 7.332754611968994} +01/22/2022 22:54:09 - INFO - codeparrot_training - Step 1355: {'lr': 0.00033850000000000004, 'samples': 43360, 'steps': 1354, 'loss/train': 8.541180610656738} +01/22/2022 22:54:09 - INFO - codeparrot_training - Step 1356: {'lr': 0.00033875, 'samples': 43392, 'steps': 1355, 'loss/train': 7.249197959899902} +01/22/2022 22:54:10 - INFO - codeparrot_training - Step 1357: {'lr': 0.00033900000000000005, 'samples': 43424, 'steps': 1356, 'loss/train': 6.799402236938477} +01/22/2022 22:54:10 - INFO - codeparrot_training - Step 1358: {'lr': 0.00033925, 'samples': 43456, 'steps': 1357, 'loss/train': 6.056573867797852} +01/22/2022 22:54:11 - INFO - codeparrot_training - Step 1359: {'lr': 0.0003395, 'samples': 43488, 'steps': 1358, 'loss/train': 7.223528861999512} +01/22/2022 22:54:12 - INFO - codeparrot_training - Step 1360: {'lr': 0.00033975, 'samples': 43520, 'steps': 1359, 'loss/train': 7.462124824523926} +01/22/2022 22:54:12 - INFO - codeparrot_training - Step 1361: {'lr': 0.00034, 'samples': 43552, 'steps': 1360, 'loss/train': 7.937614440917969} +01/22/2022 22:54:13 - INFO - codeparrot_training - Step 1362: {'lr': 0.00034025, 'samples': 43584, 'steps': 1361, 'loss/train': 7.842447280883789} +01/22/2022 22:54:13 - INFO - codeparrot_training - Step 1363: {'lr': 0.00034050000000000004, 'samples': 43616, 'steps': 1362, 'loss/train': 6.346263408660889} +01/22/2022 22:54:14 - INFO - codeparrot_training - Step 1364: {'lr': 0.00034075, 'samples': 43648, 'steps': 1363, 'loss/train': 8.39656925201416} +01/22/2022 22:54:14 - INFO - codeparrot_training - Step 1365: {'lr': 0.00034100000000000005, 'samples': 43680, 'steps': 1364, 'loss/train': 6.872430324554443} +01/22/2022 22:54:15 - INFO - codeparrot_training - Step 1366: {'lr': 0.00034125000000000003, 'samples': 43712, 'steps': 1365, 'loss/train': 7.642165184020996} +01/22/2022 22:54:15 - INFO - codeparrot_training - Step 1367: {'lr': 0.0003415, 'samples': 43744, 'steps': 1366, 'loss/train': 7.600316047668457} +01/22/2022 22:54:16 - INFO - codeparrot_training - Step 1368: {'lr': 0.00034175, 'samples': 43776, 'steps': 1367, 'loss/train': 6.722955226898193} +01/22/2022 22:54:17 - INFO - codeparrot_training - Step 1369: {'lr': 0.000342, 'samples': 43808, 'steps': 1368, 'loss/train': 8.034934043884277} +01/22/2022 22:54:20 - INFO - codeparrot_training - Step 1370: {'lr': 0.00034225, 'samples': 43840, 'steps': 1369, 'loss/train': 6.461850643157959} +01/22/2022 22:54:20 - INFO - codeparrot_training - Step 1371: {'lr': 0.00034250000000000003, 'samples': 43872, 'steps': 1370, 'loss/train': 6.799879550933838} +01/22/2022 22:54:21 - INFO - codeparrot_training - Step 1372: {'lr': 0.00034275, 'samples': 43904, 'steps': 1371, 'loss/train': 6.111268043518066} +01/22/2022 22:54:21 - INFO - codeparrot_training - Step 1373: {'lr': 0.00034300000000000004, 'samples': 43936, 'steps': 1372, 'loss/train': 9.400870323181152} +01/22/2022 22:54:22 - INFO - codeparrot_training - Step 1374: {'lr': 0.00034325, 'samples': 43968, 'steps': 1373, 'loss/train': 6.710910320281982} +01/22/2022 22:54:22 - INFO - codeparrot_training - Step 1375: {'lr': 0.00034350000000000006, 'samples': 44000, 'steps': 1374, 'loss/train': 7.904114723205566} +01/22/2022 22:54:23 - INFO - codeparrot_training - Step 1376: {'lr': 0.00034375, 'samples': 44032, 'steps': 1375, 'loss/train': 7.486062049865723} +01/22/2022 22:54:24 - INFO - codeparrot_training - Step 1377: {'lr': 0.00034399999999999996, 'samples': 44064, 'steps': 1376, 'loss/train': 7.464454174041748} +01/22/2022 22:54:24 - INFO - codeparrot_training - Step 1378: {'lr': 0.00034425, 'samples': 44096, 'steps': 1377, 'loss/train': 7.2441534996032715} +01/22/2022 22:54:25 - INFO - codeparrot_training - Step 1379: {'lr': 0.00034449999999999997, 'samples': 44128, 'steps': 1378, 'loss/train': 7.265435695648193} +01/22/2022 22:54:25 - INFO - codeparrot_training - Step 1380: {'lr': 0.00034475, 'samples': 44160, 'steps': 1379, 'loss/train': 6.988049507141113} +01/22/2022 22:54:26 - INFO - codeparrot_training - Step 1381: {'lr': 0.000345, 'samples': 44192, 'steps': 1380, 'loss/train': 7.972893714904785} +01/22/2022 22:54:26 - INFO - codeparrot_training - Step 1382: {'lr': 0.00034525, 'samples': 44224, 'steps': 1381, 'loss/train': 7.916919708251953} +01/22/2022 22:54:27 - INFO - codeparrot_training - Step 1383: {'lr': 0.0003455, 'samples': 44256, 'steps': 1382, 'loss/train': 8.008688926696777} +01/22/2022 22:54:27 - INFO - codeparrot_training - Step 1384: {'lr': 0.00034575000000000003, 'samples': 44288, 'steps': 1383, 'loss/train': 7.272824764251709} +01/22/2022 22:54:28 - INFO - codeparrot_training - Step 1385: {'lr': 0.000346, 'samples': 44320, 'steps': 1384, 'loss/train': 6.727482318878174} +01/22/2022 22:54:29 - INFO - codeparrot_training - Step 1386: {'lr': 0.00034625, 'samples': 44352, 'steps': 1385, 'loss/train': 6.561367034912109} +01/22/2022 22:54:29 - INFO - codeparrot_training - Step 1387: {'lr': 0.00034649999999999997, 'samples': 44384, 'steps': 1386, 'loss/train': 6.88720178604126} +01/22/2022 22:54:30 - INFO - codeparrot_training - Step 1388: {'lr': 0.00034675, 'samples': 44416, 'steps': 1387, 'loss/train': 6.690746784210205} +01/22/2022 22:54:30 - INFO - codeparrot_training - Step 1389: {'lr': 0.000347, 'samples': 44448, 'steps': 1388, 'loss/train': 7.015870571136475} +01/22/2022 22:54:31 - INFO - codeparrot_training - Step 1390: {'lr': 0.00034725, 'samples': 44480, 'steps': 1389, 'loss/train': 6.300332069396973} +01/22/2022 22:54:31 - INFO - codeparrot_training - Step 1391: {'lr': 0.0003475, 'samples': 44512, 'steps': 1390, 'loss/train': 8.066484451293945} +01/22/2022 22:54:32 - INFO - codeparrot_training - Step 1392: {'lr': 0.00034775, 'samples': 44544, 'steps': 1391, 'loss/train': 7.0581159591674805} +01/22/2022 22:54:32 - INFO - codeparrot_training - Step 1393: {'lr': 0.000348, 'samples': 44576, 'steps': 1392, 'loss/train': 7.120490550994873} +01/22/2022 22:54:33 - INFO - codeparrot_training - Step 1394: {'lr': 0.00034825000000000004, 'samples': 44608, 'steps': 1393, 'loss/train': 6.8539347648620605} +01/22/2022 22:54:34 - INFO - codeparrot_training - Step 1395: {'lr': 0.00034849999999999996, 'samples': 44640, 'steps': 1394, 'loss/train': 6.705235958099365} +01/22/2022 22:54:34 - INFO - codeparrot_training - Step 1396: {'lr': 0.00034875, 'samples': 44672, 'steps': 1395, 'loss/train': 7.126040458679199} +01/22/2022 22:54:35 - INFO - codeparrot_training - Step 1397: {'lr': 0.00034899999999999997, 'samples': 44704, 'steps': 1396, 'loss/train': 7.538510322570801} +01/22/2022 22:54:35 - INFO - codeparrot_training - Step 1398: {'lr': 0.00034925, 'samples': 44736, 'steps': 1397, 'loss/train': 7.688287258148193} +01/22/2022 22:54:36 - INFO - codeparrot_training - Step 1399: {'lr': 0.0003495, 'samples': 44768, 'steps': 1398, 'loss/train': 6.918392181396484} +01/22/2022 22:54:38 - INFO - codeparrot_training - Step 1400: {'lr': 0.00034975, 'samples': 44800, 'steps': 1399, 'loss/train': 7.289069652557373} +01/22/2022 22:54:38 - INFO - codeparrot_training - Step 1401: {'lr': 0.00035, 'samples': 44832, 'steps': 1400, 'loss/train': 5.919920444488525} +01/22/2022 22:54:39 - INFO - codeparrot_training - Step 1402: {'lr': 0.00035025000000000003, 'samples': 44864, 'steps': 1401, 'loss/train': 6.975988388061523} +01/22/2022 22:54:39 - INFO - codeparrot_training - Step 1403: {'lr': 0.0003505, 'samples': 44896, 'steps': 1402, 'loss/train': 6.316102027893066} +01/22/2022 22:54:40 - INFO - codeparrot_training - Step 1404: {'lr': 0.00035075, 'samples': 44928, 'steps': 1403, 'loss/train': 7.563632011413574} +01/22/2022 22:54:41 - INFO - codeparrot_training - Step 1405: {'lr': 0.00035099999999999997, 'samples': 44960, 'steps': 1404, 'loss/train': 6.695364475250244} +01/22/2022 22:54:41 - INFO - codeparrot_training - Step 1406: {'lr': 0.00035125, 'samples': 44992, 'steps': 1405, 'loss/train': 6.625555992126465} +01/22/2022 22:54:42 - INFO - codeparrot_training - Step 1407: {'lr': 0.0003515, 'samples': 45024, 'steps': 1406, 'loss/train': 7.363807678222656} +01/22/2022 22:54:42 - INFO - codeparrot_training - Step 1408: {'lr': 0.00035175, 'samples': 45056, 'steps': 1407, 'loss/train': 7.472508430480957} +01/22/2022 22:54:43 - INFO - codeparrot_training - Step 1409: {'lr': 0.000352, 'samples': 45088, 'steps': 1408, 'loss/train': 7.577577590942383} +01/22/2022 22:54:43 - INFO - codeparrot_training - Step 1410: {'lr': 0.00035225, 'samples': 45120, 'steps': 1409, 'loss/train': 7.99955940246582} +01/22/2022 22:54:44 - INFO - codeparrot_training - Step 1411: {'lr': 0.0003525, 'samples': 45152, 'steps': 1410, 'loss/train': 7.512993335723877} +01/22/2022 22:54:45 - INFO - codeparrot_training - Step 1412: {'lr': 0.00035275000000000004, 'samples': 45184, 'steps': 1411, 'loss/train': 6.081205368041992} +01/22/2022 22:54:45 - INFO - codeparrot_training - Step 1413: {'lr': 0.00035299999999999996, 'samples': 45216, 'steps': 1412, 'loss/train': 7.281032085418701} +01/22/2022 22:54:46 - INFO - codeparrot_training - Step 1414: {'lr': 0.00035325, 'samples': 45248, 'steps': 1413, 'loss/train': 7.487579822540283} +01/22/2022 22:54:46 - INFO - codeparrot_training - Step 1415: {'lr': 0.0003535, 'samples': 45280, 'steps': 1414, 'loss/train': 6.651734352111816} +01/22/2022 22:54:47 - INFO - codeparrot_training - Step 1416: {'lr': 0.00035375, 'samples': 45312, 'steps': 1415, 'loss/train': 6.61850643157959} +01/22/2022 22:54:47 - INFO - codeparrot_training - Step 1417: {'lr': 0.000354, 'samples': 45344, 'steps': 1416, 'loss/train': 6.526944160461426} +01/22/2022 22:54:48 - INFO - codeparrot_training - Step 1418: {'lr': 0.00035425, 'samples': 45376, 'steps': 1417, 'loss/train': 6.215895652770996} +01/22/2022 22:54:48 - INFO - codeparrot_training - Step 1419: {'lr': 0.0003545, 'samples': 45408, 'steps': 1418, 'loss/train': 6.275140285491943} +01/22/2022 22:54:49 - INFO - codeparrot_training - Step 1420: {'lr': 0.00035475000000000003, 'samples': 45440, 'steps': 1419, 'loss/train': 6.181918144226074} +01/22/2022 22:54:50 - INFO - codeparrot_training - Step 1421: {'lr': 0.000355, 'samples': 45472, 'steps': 1420, 'loss/train': 6.145594596862793} +01/22/2022 22:54:50 - INFO - codeparrot_training - Step 1422: {'lr': 0.00035525000000000004, 'samples': 45504, 'steps': 1421, 'loss/train': 6.20737886428833} +01/22/2022 22:54:51 - INFO - codeparrot_training - Step 1423: {'lr': 0.00035549999999999997, 'samples': 45536, 'steps': 1422, 'loss/train': 6.205904006958008} +01/22/2022 22:54:51 - INFO - codeparrot_training - Step 1424: {'lr': 0.00035575, 'samples': 45568, 'steps': 1423, 'loss/train': 6.258909225463867} +01/22/2022 22:54:52 - INFO - codeparrot_training - Step 1425: {'lr': 0.000356, 'samples': 45600, 'steps': 1424, 'loss/train': 6.104935169219971} +01/22/2022 22:54:52 - INFO - codeparrot_training - Step 1426: {'lr': 0.00035625, 'samples': 45632, 'steps': 1425, 'loss/train': 6.161203384399414} +01/22/2022 22:54:53 - INFO - codeparrot_training - Step 1427: {'lr': 0.0003565, 'samples': 45664, 'steps': 1426, 'loss/train': 5.972276210784912} +01/22/2022 22:54:53 - INFO - codeparrot_training - Step 1428: {'lr': 0.00035675, 'samples': 45696, 'steps': 1427, 'loss/train': 8.120165824890137} +01/22/2022 22:54:54 - INFO - codeparrot_training - Step 1429: {'lr': 0.000357, 'samples': 45728, 'steps': 1428, 'loss/train': 8.247547149658203} +01/22/2022 22:54:54 - INFO - codeparrot_training - Step 1430: {'lr': 0.00035725000000000004, 'samples': 45760, 'steps': 1429, 'loss/train': 7.932175636291504} +01/22/2022 22:54:55 - INFO - codeparrot_training - Step 1431: {'lr': 0.0003575, 'samples': 45792, 'steps': 1430, 'loss/train': 7.355058193206787} +01/22/2022 22:54:56 - INFO - codeparrot_training - Step 1432: {'lr': 0.00035775, 'samples': 45824, 'steps': 1431, 'loss/train': 7.122995376586914} +01/22/2022 22:54:56 - INFO - codeparrot_training - Step 1433: {'lr': 0.000358, 'samples': 45856, 'steps': 1432, 'loss/train': 7.2107696533203125} +01/22/2022 22:54:57 - INFO - codeparrot_training - Step 1434: {'lr': 0.00035825, 'samples': 45888, 'steps': 1433, 'loss/train': 8.775432586669922} +01/22/2022 22:54:57 - INFO - codeparrot_training - Step 1435: {'lr': 0.0003585, 'samples': 45920, 'steps': 1434, 'loss/train': 6.572107315063477} +01/22/2022 22:54:58 - INFO - codeparrot_training - Step 1436: {'lr': 0.00035875, 'samples': 45952, 'steps': 1435, 'loss/train': 7.127361297607422} +01/22/2022 22:54:58 - INFO - codeparrot_training - Step 1437: {'lr': 0.000359, 'samples': 45984, 'steps': 1436, 'loss/train': 8.432510375976562} +01/22/2022 22:54:59 - INFO - codeparrot_training - Step 1438: {'lr': 0.00035925000000000003, 'samples': 46016, 'steps': 1437, 'loss/train': 6.755761623382568} +01/22/2022 22:54:59 - INFO - codeparrot_training - Step 1439: {'lr': 0.0003595, 'samples': 46048, 'steps': 1438, 'loss/train': 7.619016647338867} +01/22/2022 22:55:01 - INFO - codeparrot_training - Step 1440: {'lr': 0.00035975000000000004, 'samples': 46080, 'steps': 1439, 'loss/train': 7.255987644195557} +01/22/2022 22:55:01 - INFO - codeparrot_training - Step 1441: {'lr': 0.00035999999999999997, 'samples': 46112, 'steps': 1440, 'loss/train': 7.82636833190918} +01/22/2022 22:55:02 - INFO - codeparrot_training - Step 1442: {'lr': 0.00036025, 'samples': 46144, 'steps': 1441, 'loss/train': 7.195211887359619} +01/22/2022 22:55:02 - INFO - codeparrot_training - Step 1443: {'lr': 0.0003605, 'samples': 46176, 'steps': 1442, 'loss/train': 7.43692684173584} +01/22/2022 22:55:03 - INFO - codeparrot_training - Step 1444: {'lr': 0.00036075, 'samples': 46208, 'steps': 1443, 'loss/train': 6.954167366027832} +01/22/2022 22:55:03 - INFO - codeparrot_training - Step 1445: {'lr': 0.000361, 'samples': 46240, 'steps': 1444, 'loss/train': 6.370446681976318} +01/22/2022 22:55:04 - INFO - codeparrot_training - Step 1446: {'lr': 0.00036125, 'samples': 46272, 'steps': 1445, 'loss/train': 7.701722145080566} +01/22/2022 22:55:05 - INFO - codeparrot_training - Step 1447: {'lr': 0.0003615, 'samples': 46304, 'steps': 1446, 'loss/train': 6.91168212890625} +01/22/2022 22:55:05 - INFO - codeparrot_training - Step 1448: {'lr': 0.00036175000000000004, 'samples': 46336, 'steps': 1447, 'loss/train': 8.130128860473633} +01/22/2022 22:55:06 - INFO - codeparrot_training - Step 1449: {'lr': 0.000362, 'samples': 46368, 'steps': 1448, 'loss/train': 6.351043224334717} +01/22/2022 22:55:06 - INFO - codeparrot_training - Step 1450: {'lr': 0.00036225000000000005, 'samples': 46400, 'steps': 1449, 'loss/train': 6.898026943206787} +01/22/2022 22:55:07 - INFO - codeparrot_training - Step 1451: {'lr': 0.0003625, 'samples': 46432, 'steps': 1450, 'loss/train': 8.771586418151855} +01/22/2022 22:55:07 - INFO - codeparrot_training - Step 1452: {'lr': 0.00036275, 'samples': 46464, 'steps': 1451, 'loss/train': 6.99213981628418} +01/22/2022 22:55:08 - INFO - codeparrot_training - Step 1453: {'lr': 0.000363, 'samples': 46496, 'steps': 1452, 'loss/train': 7.824850082397461} +01/22/2022 22:55:08 - INFO - codeparrot_training - Step 1454: {'lr': 0.00036325, 'samples': 46528, 'steps': 1453, 'loss/train': 7.05818510055542} +01/22/2022 22:55:09 - INFO - codeparrot_training - Step 1455: {'lr': 0.0003635, 'samples': 46560, 'steps': 1454, 'loss/train': 7.868736267089844} +01/22/2022 22:55:10 - INFO - codeparrot_training - Step 1456: {'lr': 0.00036375000000000003, 'samples': 46592, 'steps': 1455, 'loss/train': 7.7728166580200195} +01/22/2022 22:55:10 - INFO - codeparrot_training - Step 1457: {'lr': 0.000364, 'samples': 46624, 'steps': 1456, 'loss/train': 6.774587154388428} +01/22/2022 22:55:11 - INFO - codeparrot_training - Step 1458: {'lr': 0.00036425000000000004, 'samples': 46656, 'steps': 1457, 'loss/train': 6.833324909210205} +01/22/2022 22:55:11 - INFO - codeparrot_training - Step 1459: {'lr': 0.0003645, 'samples': 46688, 'steps': 1458, 'loss/train': 9.568245887756348} +01/22/2022 22:55:12 - INFO - codeparrot_training - Step 1460: {'lr': 0.00036475, 'samples': 46720, 'steps': 1459, 'loss/train': 7.745977878570557} +01/22/2022 22:55:12 - INFO - codeparrot_training - Step 1461: {'lr': 0.000365, 'samples': 46752, 'steps': 1460, 'loss/train': 7.829929351806641} +01/22/2022 22:55:13 - INFO - codeparrot_training - Step 1462: {'lr': 0.00036525, 'samples': 46784, 'steps': 1461, 'loss/train': 10.096686363220215} +01/22/2022 22:55:13 - INFO - codeparrot_training - Step 1463: {'lr': 0.0003655, 'samples': 46816, 'steps': 1462, 'loss/train': 7.392120361328125} +01/22/2022 22:55:14 - INFO - codeparrot_training - Step 1464: {'lr': 0.00036575, 'samples': 46848, 'steps': 1463, 'loss/train': 6.300974369049072} +01/22/2022 22:55:15 - INFO - codeparrot_training - Step 1465: {'lr': 0.000366, 'samples': 46880, 'steps': 1464, 'loss/train': 7.171439170837402} +01/22/2022 22:55:15 - INFO - codeparrot_training - Step 1466: {'lr': 0.00036625000000000004, 'samples': 46912, 'steps': 1465, 'loss/train': 8.635977745056152} +01/22/2022 22:55:16 - INFO - codeparrot_training - Step 1467: {'lr': 0.0003665, 'samples': 46944, 'steps': 1466, 'loss/train': 7.296307563781738} +01/22/2022 22:55:16 - INFO - codeparrot_training - Step 1468: {'lr': 0.00036675000000000005, 'samples': 46976, 'steps': 1467, 'loss/train': 8.066701889038086} +01/22/2022 22:55:18 - INFO - codeparrot_training - Step 1469: {'lr': 0.000367, 'samples': 47008, 'steps': 1468, 'loss/train': 7.321585178375244} +01/22/2022 22:55:18 - INFO - codeparrot_training - Step 1470: {'lr': 0.00036725, 'samples': 47040, 'steps': 1469, 'loss/train': 8.757060050964355} +01/22/2022 22:55:19 - INFO - codeparrot_training - Step 1471: {'lr': 0.0003675, 'samples': 47072, 'steps': 1470, 'loss/train': 7.700822353363037} +01/22/2022 22:55:19 - INFO - codeparrot_training - Step 1472: {'lr': 0.00036775, 'samples': 47104, 'steps': 1471, 'loss/train': 8.021994590759277} +01/22/2022 22:55:20 - INFO - codeparrot_training - Step 1473: {'lr': 0.000368, 'samples': 47136, 'steps': 1472, 'loss/train': 6.688321590423584} +01/22/2022 22:55:20 - INFO - codeparrot_training - Step 1474: {'lr': 0.00036825000000000003, 'samples': 47168, 'steps': 1473, 'loss/train': 5.419166088104248} +01/22/2022 22:55:21 - INFO - codeparrot_training - Step 1475: {'lr': 0.0003685, 'samples': 47200, 'steps': 1474, 'loss/train': 7.092249393463135} +01/22/2022 22:55:22 - INFO - codeparrot_training - Step 1476: {'lr': 0.00036875000000000005, 'samples': 47232, 'steps': 1475, 'loss/train': 6.065741539001465} +01/22/2022 22:55:22 - INFO - codeparrot_training - Step 1477: {'lr': 0.000369, 'samples': 47264, 'steps': 1476, 'loss/train': 7.116474628448486} +01/22/2022 22:55:23 - INFO - codeparrot_training - Step 1478: {'lr': 0.00036925, 'samples': 47296, 'steps': 1477, 'loss/train': 7.1212568283081055} +01/22/2022 22:55:23 - INFO - codeparrot_training - Step 1479: {'lr': 0.0003695, 'samples': 47328, 'steps': 1478, 'loss/train': 6.605282306671143} +01/22/2022 22:55:24 - INFO - codeparrot_training - Step 1480: {'lr': 0.00036975, 'samples': 47360, 'steps': 1479, 'loss/train': 8.508355140686035} +01/22/2022 22:55:24 - INFO - codeparrot_training - Step 1481: {'lr': 0.00037, 'samples': 47392, 'steps': 1480, 'loss/train': 7.123233318328857} +01/22/2022 22:55:25 - INFO - codeparrot_training - Step 1482: {'lr': 0.00037025000000000003, 'samples': 47424, 'steps': 1481, 'loss/train': 7.21391487121582} +01/22/2022 22:55:25 - INFO - codeparrot_training - Step 1483: {'lr': 0.0003705, 'samples': 47456, 'steps': 1482, 'loss/train': 7.651086807250977} +01/22/2022 22:55:26 - INFO - codeparrot_training - Step 1484: {'lr': 0.00037075000000000004, 'samples': 47488, 'steps': 1483, 'loss/train': 6.706630706787109} +01/22/2022 22:55:27 - INFO - codeparrot_training - Step 1485: {'lr': 0.000371, 'samples': 47520, 'steps': 1484, 'loss/train': 6.7554731369018555} +01/22/2022 22:55:27 - INFO - codeparrot_training - Step 1486: {'lr': 0.00037125000000000005, 'samples': 47552, 'steps': 1485, 'loss/train': 6.054472923278809} +01/22/2022 22:55:28 - INFO - codeparrot_training - Step 1487: {'lr': 0.00037150000000000003, 'samples': 47584, 'steps': 1486, 'loss/train': 6.530693531036377} +01/22/2022 22:55:28 - INFO - codeparrot_training - Step 1488: {'lr': 0.00037175, 'samples': 47616, 'steps': 1487, 'loss/train': 7.312196731567383} +01/22/2022 22:55:29 - INFO - codeparrot_training - Step 1489: {'lr': 0.000372, 'samples': 47648, 'steps': 1488, 'loss/train': 6.130829334259033} +01/22/2022 22:55:29 - INFO - codeparrot_training - Step 1490: {'lr': 0.00037225, 'samples': 47680, 'steps': 1489, 'loss/train': 7.210897922515869} +01/22/2022 22:55:30 - INFO - codeparrot_training - Step 1491: {'lr': 0.0003725, 'samples': 47712, 'steps': 1490, 'loss/train': 6.795236110687256} +01/22/2022 22:55:30 - INFO - codeparrot_training - Step 1492: {'lr': 0.00037275000000000003, 'samples': 47744, 'steps': 1491, 'loss/train': 6.6370344161987305} +01/22/2022 22:55:31 - INFO - codeparrot_training - Step 1493: {'lr': 0.000373, 'samples': 47776, 'steps': 1492, 'loss/train': 7.055462837219238} +01/22/2022 22:55:32 - INFO - codeparrot_training - Step 1494: {'lr': 0.00037325000000000005, 'samples': 47808, 'steps': 1493, 'loss/train': 5.830691814422607} +01/22/2022 22:55:32 - INFO - codeparrot_training - Step 1495: {'lr': 0.0003735, 'samples': 47840, 'steps': 1494, 'loss/train': 4.9607038497924805} +01/22/2022 22:55:33 - INFO - codeparrot_training - Step 1496: {'lr': 0.00037375000000000006, 'samples': 47872, 'steps': 1495, 'loss/train': 4.746813774108887} +01/22/2022 22:55:33 - INFO - codeparrot_training - Step 1497: {'lr': 0.000374, 'samples': 47904, 'steps': 1496, 'loss/train': 4.962794780731201} +01/22/2022 22:55:34 - INFO - codeparrot_training - Step 1498: {'lr': 0.00037425, 'samples': 47936, 'steps': 1497, 'loss/train': 4.618837833404541} +01/22/2022 22:55:34 - INFO - codeparrot_training - Step 1499: {'lr': 0.0003745, 'samples': 47968, 'steps': 1498, 'loss/train': 7.642550468444824} +01/22/2022 22:55:35 - INFO - codeparrot_training - Step 1500: {'lr': 0.00037475000000000003, 'samples': 48000, 'steps': 1499, 'loss/train': 6.583582401275635} +01/22/2022 22:55:36 - INFO - codeparrot_training - Step 1501: {'lr': 0.000375, 'samples': 48032, 'steps': 1500, 'loss/train': 7.682594299316406} +01/22/2022 22:55:37 - INFO - codeparrot_training - Step 1502: {'lr': 0.00037525, 'samples': 48064, 'steps': 1501, 'loss/train': 8.087519645690918} +01/22/2022 22:55:37 - INFO - codeparrot_training - Step 1503: {'lr': 0.0003755, 'samples': 48096, 'steps': 1502, 'loss/train': 6.534908294677734} +01/22/2022 22:55:38 - INFO - codeparrot_training - Step 1504: {'lr': 0.00037575, 'samples': 48128, 'steps': 1503, 'loss/train': 6.448390483856201} +01/22/2022 22:55:38 - INFO - codeparrot_training - Step 1505: {'lr': 0.00037600000000000003, 'samples': 48160, 'steps': 1504, 'loss/train': 7.150945663452148} +01/22/2022 22:55:39 - INFO - codeparrot_training - Step 1506: {'lr': 0.00037624999999999996, 'samples': 48192, 'steps': 1505, 'loss/train': 6.68596887588501} +01/22/2022 22:55:39 - INFO - codeparrot_training - Step 1507: {'lr': 0.0003765, 'samples': 48224, 'steps': 1506, 'loss/train': 5.801848888397217} +01/22/2022 22:55:40 - INFO - codeparrot_training - Step 1508: {'lr': 0.00037674999999999997, 'samples': 48256, 'steps': 1507, 'loss/train': 7.915428638458252} +01/22/2022 22:55:40 - INFO - codeparrot_training - Step 1509: {'lr': 0.000377, 'samples': 48288, 'steps': 1508, 'loss/train': 6.742934703826904} +01/22/2022 22:55:41 - INFO - codeparrot_training - Step 1510: {'lr': 0.00037725, 'samples': 48320, 'steps': 1509, 'loss/train': 6.44883918762207} +01/22/2022 22:55:42 - INFO - codeparrot_training - Step 1511: {'lr': 0.0003775, 'samples': 48352, 'steps': 1510, 'loss/train': 6.357187747955322} +01/22/2022 22:55:42 - INFO - codeparrot_training - Step 1512: {'lr': 0.00037775, 'samples': 48384, 'steps': 1511, 'loss/train': 6.553242206573486} +01/22/2022 22:55:43 - INFO - codeparrot_training - Step 1513: {'lr': 0.000378, 'samples': 48416, 'steps': 1512, 'loss/train': 6.965249538421631} +01/22/2022 22:55:43 - INFO - codeparrot_training - Step 1514: {'lr': 0.00037825, 'samples': 48448, 'steps': 1513, 'loss/train': 6.839247703552246} +01/22/2022 22:55:44 - INFO - codeparrot_training - Step 1515: {'lr': 0.0003785, 'samples': 48480, 'steps': 1514, 'loss/train': 7.432754993438721} +01/22/2022 22:55:44 - INFO - codeparrot_training - Step 1516: {'lr': 0.00037874999999999996, 'samples': 48512, 'steps': 1515, 'loss/train': 6.564929008483887} +01/22/2022 22:55:45 - INFO - codeparrot_training - Step 1517: {'lr': 0.000379, 'samples': 48544, 'steps': 1516, 'loss/train': 7.783709526062012} +01/22/2022 22:55:45 - INFO - codeparrot_training - Step 1518: {'lr': 0.00037925, 'samples': 48576, 'steps': 1517, 'loss/train': 6.949582576751709} +01/22/2022 22:55:46 - INFO - codeparrot_training - Step 1519: {'lr': 0.0003795, 'samples': 48608, 'steps': 1518, 'loss/train': 7.174350738525391} +01/22/2022 22:55:47 - INFO - codeparrot_training - Step 1520: {'lr': 0.00037975, 'samples': 48640, 'steps': 1519, 'loss/train': 6.766066551208496} +01/22/2022 22:55:47 - INFO - codeparrot_training - Step 1521: {'lr': 0.00038, 'samples': 48672, 'steps': 1520, 'loss/train': 7.232879638671875} +01/22/2022 22:55:48 - INFO - codeparrot_training - Step 1522: {'lr': 0.00038025, 'samples': 48704, 'steps': 1521, 'loss/train': 8.640250205993652} +01/22/2022 22:55:48 - INFO - codeparrot_training - Step 1523: {'lr': 0.00038050000000000003, 'samples': 48736, 'steps': 1522, 'loss/train': 8.177327156066895} +01/22/2022 22:55:49 - INFO - codeparrot_training - Step 1524: {'lr': 0.00038075, 'samples': 48768, 'steps': 1523, 'loss/train': 7.16298770904541} +01/22/2022 22:55:49 - INFO - codeparrot_training - Step 1525: {'lr': 0.000381, 'samples': 48800, 'steps': 1524, 'loss/train': 7.01773738861084} +01/22/2022 22:55:50 - INFO - codeparrot_training - Step 1526: {'lr': 0.00038124999999999997, 'samples': 48832, 'steps': 1525, 'loss/train': 5.437597274780273} +01/22/2022 22:55:50 - INFO - codeparrot_training - Step 1527: {'lr': 0.0003815, 'samples': 48864, 'steps': 1526, 'loss/train': 7.444111347198486} +01/22/2022 22:55:51 - INFO - codeparrot_training - Step 1528: {'lr': 0.00038175, 'samples': 48896, 'steps': 1527, 'loss/train': 7.083988666534424} +01/22/2022 22:55:52 - INFO - codeparrot_training - Step 1529: {'lr': 0.000382, 'samples': 48928, 'steps': 1528, 'loss/train': 6.85234260559082} +01/22/2022 22:55:53 - INFO - codeparrot_training - Step 1530: {'lr': 0.00038225, 'samples': 48960, 'steps': 1529, 'loss/train': 6.68505859375} +01/22/2022 22:55:53 - INFO - codeparrot_training - Step 1531: {'lr': 0.00038250000000000003, 'samples': 48992, 'steps': 1530, 'loss/train': 7.05059814453125} +01/22/2022 22:55:54 - INFO - codeparrot_training - Step 1532: {'lr': 0.00038275, 'samples': 49024, 'steps': 1531, 'loss/train': 6.819420337677002} +01/22/2022 22:55:55 - INFO - codeparrot_training - Step 1533: {'lr': 0.00038300000000000004, 'samples': 49056, 'steps': 1532, 'loss/train': 7.485810279846191} +01/22/2022 22:55:55 - INFO - codeparrot_training - Step 1534: {'lr': 0.00038324999999999996, 'samples': 49088, 'steps': 1533, 'loss/train': 7.213313579559326} +01/22/2022 22:55:56 - INFO - codeparrot_training - Step 1535: {'lr': 0.0003835, 'samples': 49120, 'steps': 1534, 'loss/train': 6.549358367919922} +01/22/2022 22:55:56 - INFO - codeparrot_training - Step 1536: {'lr': 0.00038375, 'samples': 49152, 'steps': 1535, 'loss/train': 7.312503337860107} +01/22/2022 22:55:57 - INFO - codeparrot_training - Step 1537: {'lr': 0.000384, 'samples': 49184, 'steps': 1536, 'loss/train': 6.898200988769531} +01/22/2022 22:55:57 - INFO - codeparrot_training - Step 1538: {'lr': 0.00038425, 'samples': 49216, 'steps': 1537, 'loss/train': 6.924280166625977} +01/22/2022 22:55:58 - INFO - codeparrot_training - Step 1539: {'lr': 0.0003845, 'samples': 49248, 'steps': 1538, 'loss/train': 7.258934020996094} +01/22/2022 22:55:58 - INFO - codeparrot_training - Step 1540: {'lr': 0.00038475, 'samples': 49280, 'steps': 1539, 'loss/train': 7.138413906097412} +01/22/2022 22:55:59 - INFO - codeparrot_training - Step 1541: {'lr': 0.00038500000000000003, 'samples': 49312, 'steps': 1540, 'loss/train': 7.986020088195801} +01/22/2022 22:56:00 - INFO - codeparrot_training - Step 1542: {'lr': 0.00038525, 'samples': 49344, 'steps': 1541, 'loss/train': 6.520683288574219} +01/22/2022 22:56:00 - INFO - codeparrot_training - Step 1543: {'lr': 0.0003855, 'samples': 49376, 'steps': 1542, 'loss/train': 6.944231033325195} +01/22/2022 22:56:01 - INFO - codeparrot_training - Step 1544: {'lr': 0.00038574999999999997, 'samples': 49408, 'steps': 1543, 'loss/train': 6.504802703857422} +01/22/2022 22:56:01 - INFO - codeparrot_training - Step 1545: {'lr': 0.000386, 'samples': 49440, 'steps': 1544, 'loss/train': 7.64503812789917} +01/22/2022 22:56:02 - INFO - codeparrot_training - Step 1546: {'lr': 0.00038625, 'samples': 49472, 'steps': 1545, 'loss/train': 7.904760837554932} +01/22/2022 22:56:02 - INFO - codeparrot_training - Step 1547: {'lr': 0.0003865, 'samples': 49504, 'steps': 1546, 'loss/train': 6.1765899658203125} +01/22/2022 22:56:03 - INFO - codeparrot_training - Step 1548: {'lr': 0.00038675, 'samples': 49536, 'steps': 1547, 'loss/train': 6.945890426635742} +01/22/2022 22:56:03 - INFO - codeparrot_training - Step 1549: {'lr': 0.00038700000000000003, 'samples': 49568, 'steps': 1548, 'loss/train': 6.3751606941223145} +01/22/2022 22:56:04 - INFO - codeparrot_training - Step 1550: {'lr': 0.00038725, 'samples': 49600, 'steps': 1549, 'loss/train': 6.692594051361084} +01/22/2022 22:56:05 - INFO - codeparrot_training - Step 1551: {'lr': 0.00038750000000000004, 'samples': 49632, 'steps': 1550, 'loss/train': 6.177323341369629} +01/22/2022 22:56:05 - INFO - codeparrot_training - Step 1552: {'lr': 0.00038774999999999997, 'samples': 49664, 'steps': 1551, 'loss/train': 6.453592300415039} +01/22/2022 22:56:06 - INFO - codeparrot_training - Step 1553: {'lr': 0.000388, 'samples': 49696, 'steps': 1552, 'loss/train': 6.278348922729492} +01/22/2022 22:56:06 - INFO - codeparrot_training - Step 1554: {'lr': 0.00038825, 'samples': 49728, 'steps': 1553, 'loss/train': 7.059391498565674} +01/22/2022 22:56:07 - INFO - codeparrot_training - Step 1555: {'lr': 0.0003885, 'samples': 49760, 'steps': 1554, 'loss/train': 7.748558044433594} +01/22/2022 22:56:07 - INFO - codeparrot_training - Step 1556: {'lr': 0.00038875, 'samples': 49792, 'steps': 1555, 'loss/train': 6.465755462646484} +01/22/2022 22:56:08 - INFO - codeparrot_training - Step 1557: {'lr': 0.000389, 'samples': 49824, 'steps': 1556, 'loss/train': 6.852005958557129} +01/22/2022 22:56:08 - INFO - codeparrot_training - Step 1558: {'lr': 0.00038925, 'samples': 49856, 'steps': 1557, 'loss/train': 6.539154529571533} +01/22/2022 22:56:09 - INFO - codeparrot_training - Step 1559: {'lr': 0.00038950000000000003, 'samples': 49888, 'steps': 1558, 'loss/train': 6.2046799659729} +01/22/2022 22:56:10 - INFO - codeparrot_training - Step 1560: {'lr': 0.00038975, 'samples': 49920, 'steps': 1559, 'loss/train': 6.265097618103027} +01/22/2022 22:56:11 - INFO - codeparrot_training - Step 1561: {'lr': 0.00039000000000000005, 'samples': 49952, 'steps': 1560, 'loss/train': 6.698146343231201} +01/22/2022 22:56:11 - INFO - codeparrot_training - Step 1562: {'lr': 0.00039024999999999997, 'samples': 49984, 'steps': 1561, 'loss/train': 6.081639289855957} +01/22/2022 22:56:12 - INFO - codeparrot_training - Step 1563: {'lr': 0.0003905, 'samples': 50016, 'steps': 1562, 'loss/train': 7.189520359039307} +01/22/2022 22:56:12 - INFO - codeparrot_training - Step 1564: {'lr': 0.00039075, 'samples': 50048, 'steps': 1563, 'loss/train': 7.160435676574707} +01/22/2022 22:56:13 - INFO - codeparrot_training - Step 1565: {'lr': 0.000391, 'samples': 50080, 'steps': 1564, 'loss/train': 6.7341508865356445} +01/22/2022 22:56:13 - INFO - codeparrot_training - Step 1566: {'lr': 0.00039125, 'samples': 50112, 'steps': 1565, 'loss/train': 6.812106132507324} +01/22/2022 22:56:14 - INFO - codeparrot_training - Step 1567: {'lr': 0.00039150000000000003, 'samples': 50144, 'steps': 1566, 'loss/train': 3.0386204719543457} +01/22/2022 22:56:15 - INFO - codeparrot_training - Step 1568: {'lr': 0.00039175, 'samples': 50176, 'steps': 1567, 'loss/train': 6.281991958618164} +01/22/2022 22:56:15 - INFO - codeparrot_training - Step 1569: {'lr': 0.00039200000000000004, 'samples': 50208, 'steps': 1568, 'loss/train': 7.109677314758301} +01/22/2022 22:56:16 - INFO - codeparrot_training - Step 1570: {'lr': 0.00039225, 'samples': 50240, 'steps': 1569, 'loss/train': 6.929615020751953} +01/22/2022 22:56:16 - INFO - codeparrot_training - Step 1571: {'lr': 0.0003925, 'samples': 50272, 'steps': 1570, 'loss/train': 6.756868839263916} +01/22/2022 22:56:17 - INFO - codeparrot_training - Step 1572: {'lr': 0.00039275, 'samples': 50304, 'steps': 1571, 'loss/train': 7.527432918548584} +01/22/2022 22:56:17 - INFO - codeparrot_training - Step 1573: {'lr': 0.000393, 'samples': 50336, 'steps': 1572, 'loss/train': 7.519040107727051} +01/22/2022 22:56:18 - INFO - codeparrot_training - Step 1574: {'lr': 0.00039325, 'samples': 50368, 'steps': 1573, 'loss/train': 7.019894599914551} +01/22/2022 22:56:18 - INFO - codeparrot_training - Step 1575: {'lr': 0.0003935, 'samples': 50400, 'steps': 1574, 'loss/train': 7.3103346824646} +01/22/2022 22:56:19 - INFO - codeparrot_training - Step 1576: {'lr': 0.00039375, 'samples': 50432, 'steps': 1575, 'loss/train': 7.6645379066467285} +01/22/2022 22:56:20 - INFO - codeparrot_training - Step 1577: {'lr': 0.00039400000000000004, 'samples': 50464, 'steps': 1576, 'loss/train': 6.965704917907715} +01/22/2022 22:56:20 - INFO - codeparrot_training - Step 1578: {'lr': 0.00039425, 'samples': 50496, 'steps': 1577, 'loss/train': 7.593804359436035} +01/22/2022 22:56:21 - INFO - codeparrot_training - Step 1579: {'lr': 0.00039450000000000005, 'samples': 50528, 'steps': 1578, 'loss/train': 7.721973419189453} +01/22/2022 22:56:21 - INFO - codeparrot_training - Step 1580: {'lr': 0.00039474999999999997, 'samples': 50560, 'steps': 1579, 'loss/train': 6.753321170806885} +01/22/2022 22:56:22 - INFO - codeparrot_training - Step 1581: {'lr': 0.000395, 'samples': 50592, 'steps': 1580, 'loss/train': 5.02427339553833} +01/22/2022 22:56:22 - INFO - codeparrot_training - Step 1582: {'lr': 0.00039525, 'samples': 50624, 'steps': 1581, 'loss/train': 6.3336896896362305} +01/22/2022 22:56:23 - INFO - codeparrot_training - Step 1583: {'lr': 0.0003955, 'samples': 50656, 'steps': 1582, 'loss/train': 8.096038818359375} +01/22/2022 22:56:23 - INFO - codeparrot_training - Step 1584: {'lr': 0.00039575, 'samples': 50688, 'steps': 1583, 'loss/train': 6.699711799621582} +01/22/2022 22:56:24 - INFO - codeparrot_training - Step 1585: {'lr': 0.00039600000000000003, 'samples': 50720, 'steps': 1584, 'loss/train': 5.857112407684326} +01/22/2022 22:56:25 - INFO - codeparrot_training - Step 1586: {'lr': 0.00039625, 'samples': 50752, 'steps': 1585, 'loss/train': 7.2545952796936035} +01/22/2022 22:56:25 - INFO - codeparrot_training - Step 1587: {'lr': 0.00039650000000000004, 'samples': 50784, 'steps': 1586, 'loss/train': 7.077922821044922} +01/22/2022 22:56:26 - INFO - codeparrot_training - Step 1588: {'lr': 0.00039675, 'samples': 50816, 'steps': 1587, 'loss/train': 8.039056777954102} +01/22/2022 22:56:27 - INFO - codeparrot_training - Step 1589: {'lr': 0.00039700000000000005, 'samples': 50848, 'steps': 1588, 'loss/train': 6.580598831176758} +01/22/2022 22:56:27 - INFO - codeparrot_training - Step 1590: {'lr': 0.00039725, 'samples': 50880, 'steps': 1589, 'loss/train': 6.4283833503723145} +01/22/2022 22:56:28 - INFO - codeparrot_training - Step 1591: {'lr': 0.0003975, 'samples': 50912, 'steps': 1590, 'loss/train': 5.850194931030273} +01/22/2022 22:56:29 - INFO - codeparrot_training - Step 1592: {'lr': 0.00039775, 'samples': 50944, 'steps': 1591, 'loss/train': 6.874515533447266} +01/22/2022 22:56:29 - INFO - codeparrot_training - Step 1593: {'lr': 0.000398, 'samples': 50976, 'steps': 1592, 'loss/train': 7.806508541107178} +01/22/2022 22:56:30 - INFO - codeparrot_training - Step 1594: {'lr': 0.00039825, 'samples': 51008, 'steps': 1593, 'loss/train': 7.1353559494018555} +01/22/2022 22:56:30 - INFO - codeparrot_training - Step 1595: {'lr': 0.00039850000000000004, 'samples': 51040, 'steps': 1594, 'loss/train': 6.241382598876953} +01/22/2022 22:56:31 - INFO - codeparrot_training - Step 1596: {'lr': 0.00039875, 'samples': 51072, 'steps': 1595, 'loss/train': 6.3349080085754395} +01/22/2022 22:56:31 - INFO - codeparrot_training - Step 1597: {'lr': 0.00039900000000000005, 'samples': 51104, 'steps': 1596, 'loss/train': 7.099588394165039} +01/22/2022 22:56:32 - INFO - codeparrot_training - Step 1598: {'lr': 0.00039925000000000003, 'samples': 51136, 'steps': 1597, 'loss/train': 7.358517646789551} +01/22/2022 22:56:32 - INFO - codeparrot_training - Step 1599: {'lr': 0.0003995, 'samples': 51168, 'steps': 1598, 'loss/train': 6.244859218597412} +01/22/2022 22:56:33 - INFO - codeparrot_training - Step 1600: {'lr': 0.00039975, 'samples': 51200, 'steps': 1599, 'loss/train': 6.839120864868164} +01/22/2022 22:56:34 - INFO - codeparrot_training - Step 1601: {'lr': 0.0004, 'samples': 51232, 'steps': 1600, 'loss/train': 7.16201639175415} +01/22/2022 22:56:34 - INFO - codeparrot_training - Step 1602: {'lr': 0.00040025, 'samples': 51264, 'steps': 1601, 'loss/train': 6.885538578033447} +01/22/2022 22:56:35 - INFO - codeparrot_training - Step 1603: {'lr': 0.00040050000000000003, 'samples': 51296, 'steps': 1602, 'loss/train': 6.788628578186035} +01/22/2022 22:56:35 - INFO - codeparrot_training - Step 1604: {'lr': 0.00040075, 'samples': 51328, 'steps': 1603, 'loss/train': 6.9974141120910645} +01/22/2022 22:56:36 - INFO - codeparrot_training - Step 1605: {'lr': 0.00040100000000000004, 'samples': 51360, 'steps': 1604, 'loss/train': 6.7278313636779785} +01/22/2022 22:56:36 - INFO - codeparrot_training - Step 1606: {'lr': 0.00040125, 'samples': 51392, 'steps': 1605, 'loss/train': 8.286443710327148} +01/22/2022 22:56:37 - INFO - codeparrot_training - Step 1607: {'lr': 0.00040150000000000006, 'samples': 51424, 'steps': 1606, 'loss/train': 6.9117431640625} +01/22/2022 22:56:37 - INFO - codeparrot_training - Step 1608: {'lr': 0.00040175, 'samples': 51456, 'steps': 1607, 'loss/train': 6.9075727462768555} +01/22/2022 22:56:38 - INFO - codeparrot_training - Step 1609: {'lr': 0.000402, 'samples': 51488, 'steps': 1608, 'loss/train': 6.5377984046936035} +01/22/2022 22:56:39 - INFO - codeparrot_training - Step 1610: {'lr': 0.00040225, 'samples': 51520, 'steps': 1609, 'loss/train': 7.150696277618408} +01/22/2022 22:56:39 - INFO - codeparrot_training - Step 1611: {'lr': 0.0004025, 'samples': 51552, 'steps': 1610, 'loss/train': 7.030532360076904} +01/22/2022 22:56:40 - INFO - codeparrot_training - Step 1612: {'lr': 0.00040275, 'samples': 51584, 'steps': 1611, 'loss/train': 8.663025856018066} +01/22/2022 22:56:40 - INFO - codeparrot_training - Step 1613: {'lr': 0.00040300000000000004, 'samples': 51616, 'steps': 1612, 'loss/train': 6.312267303466797} +01/22/2022 22:56:41 - INFO - codeparrot_training - Step 1614: {'lr': 0.00040325, 'samples': 51648, 'steps': 1613, 'loss/train': 7.320019245147705} +01/22/2022 22:56:41 - INFO - codeparrot_training - Step 1615: {'lr': 0.00040350000000000005, 'samples': 51680, 'steps': 1614, 'loss/train': 8.00234603881836} +01/22/2022 22:56:42 - INFO - codeparrot_training - Step 1616: {'lr': 0.00040375000000000003, 'samples': 51712, 'steps': 1615, 'loss/train': 6.842278003692627} +01/22/2022 22:56:42 - INFO - codeparrot_training - Step 1617: {'lr': 0.000404, 'samples': 51744, 'steps': 1616, 'loss/train': 7.519162654876709} +01/22/2022 22:56:46 - INFO - codeparrot_training - Step 1618: {'lr': 0.00040425, 'samples': 51776, 'steps': 1617, 'loss/train': 5.795008659362793} +01/22/2022 22:56:46 - INFO - codeparrot_training - Step 1619: {'lr': 0.0004045, 'samples': 51808, 'steps': 1618, 'loss/train': 10.329031944274902} +01/22/2022 22:56:47 - INFO - codeparrot_training - Step 1620: {'lr': 0.00040475, 'samples': 51840, 'steps': 1619, 'loss/train': 10.24384593963623} +01/22/2022 22:56:47 - INFO - codeparrot_training - Step 1621: {'lr': 0.00040500000000000003, 'samples': 51872, 'steps': 1620, 'loss/train': 9.274225234985352} +01/22/2022 22:56:48 - INFO - codeparrot_training - Step 1622: {'lr': 0.00040525, 'samples': 51904, 'steps': 1621, 'loss/train': 8.438464164733887} +01/22/2022 22:56:48 - INFO - codeparrot_training - Step 1623: {'lr': 0.00040550000000000004, 'samples': 51936, 'steps': 1622, 'loss/train': 8.686137199401855} +01/22/2022 22:56:49 - INFO - codeparrot_training - Step 1624: {'lr': 0.00040575, 'samples': 51968, 'steps': 1623, 'loss/train': 6.822724342346191} +01/22/2022 22:56:50 - INFO - codeparrot_training - Step 1625: {'lr': 0.00040600000000000006, 'samples': 52000, 'steps': 1624, 'loss/train': 6.107885837554932} +01/22/2022 22:56:50 - INFO - codeparrot_training - Step 1626: {'lr': 0.00040625000000000004, 'samples': 52032, 'steps': 1625, 'loss/train': 7.619203567504883} +01/22/2022 22:56:51 - INFO - codeparrot_training - Step 1627: {'lr': 0.00040649999999999996, 'samples': 52064, 'steps': 1626, 'loss/train': 6.217233657836914} +01/22/2022 22:56:51 - INFO - codeparrot_training - Step 1628: {'lr': 0.00040675, 'samples': 52096, 'steps': 1627, 'loss/train': 7.931346893310547} +01/22/2022 22:56:52 - INFO - codeparrot_training - Step 1629: {'lr': 0.00040699999999999997, 'samples': 52128, 'steps': 1628, 'loss/train': 5.658714294433594} +01/22/2022 22:56:52 - INFO - codeparrot_training - Step 1630: {'lr': 0.00040725, 'samples': 52160, 'steps': 1629, 'loss/train': 6.802981376647949} +01/22/2022 22:56:53 - INFO - codeparrot_training - Step 1631: {'lr': 0.0004075, 'samples': 52192, 'steps': 1630, 'loss/train': 6.647806167602539} +01/22/2022 22:56:53 - INFO - codeparrot_training - Step 1632: {'lr': 0.00040775, 'samples': 52224, 'steps': 1631, 'loss/train': 7.275111198425293} +01/22/2022 22:56:54 - INFO - codeparrot_training - Step 1633: {'lr': 0.000408, 'samples': 52256, 'steps': 1632, 'loss/train': 6.62696647644043} +01/22/2022 22:56:55 - INFO - codeparrot_training - Step 1634: {'lr': 0.00040825000000000003, 'samples': 52288, 'steps': 1633, 'loss/train': 7.391646385192871} +01/22/2022 22:56:55 - INFO - codeparrot_training - Step 1635: {'lr': 0.0004085, 'samples': 52320, 'steps': 1634, 'loss/train': 6.442867279052734} +01/22/2022 22:56:56 - INFO - codeparrot_training - Step 1636: {'lr': 0.00040875, 'samples': 52352, 'steps': 1635, 'loss/train': 6.087299823760986} +01/22/2022 22:56:56 - INFO - codeparrot_training - Step 1637: {'lr': 0.00040899999999999997, 'samples': 52384, 'steps': 1636, 'loss/train': 5.7260026931762695} +01/22/2022 22:56:57 - INFO - codeparrot_training - Step 1638: {'lr': 0.00040925, 'samples': 52416, 'steps': 1637, 'loss/train': 7.398775577545166} +01/22/2022 22:56:57 - INFO - codeparrot_training - Step 1639: {'lr': 0.0004095, 'samples': 52448, 'steps': 1638, 'loss/train': 7.554065227508545} +01/22/2022 22:56:58 - INFO - codeparrot_training - Step 1640: {'lr': 0.00040975, 'samples': 52480, 'steps': 1639, 'loss/train': 6.0532732009887695} +01/22/2022 22:56:58 - INFO - codeparrot_training - Step 1641: {'lr': 0.00041, 'samples': 52512, 'steps': 1640, 'loss/train': 6.975116729736328} +01/22/2022 22:56:59 - INFO - codeparrot_training - Step 1642: {'lr': 0.00041025, 'samples': 52544, 'steps': 1641, 'loss/train': 7.755023956298828} +01/22/2022 22:57:00 - INFO - codeparrot_training - Step 1643: {'lr': 0.0004105, 'samples': 52576, 'steps': 1642, 'loss/train': 7.212932586669922} +01/22/2022 22:57:00 - INFO - codeparrot_training - Step 1644: {'lr': 0.00041075000000000004, 'samples': 52608, 'steps': 1643, 'loss/train': 7.000539779663086} +01/22/2022 22:57:01 - INFO - codeparrot_training - Step 1645: {'lr': 0.00041099999999999996, 'samples': 52640, 'steps': 1644, 'loss/train': 7.296216011047363} +01/22/2022 22:57:01 - INFO - codeparrot_training - Step 1646: {'lr': 0.00041125, 'samples': 52672, 'steps': 1645, 'loss/train': 6.9472880363464355} +01/22/2022 22:57:03 - INFO - codeparrot_training - Step 1647: {'lr': 0.0004115, 'samples': 52704, 'steps': 1646, 'loss/train': 6.254222869873047} +01/22/2022 22:57:03 - INFO - codeparrot_training - Step 1648: {'lr': 0.00041175, 'samples': 52736, 'steps': 1647, 'loss/train': 7.030433654785156} +01/22/2022 22:57:04 - INFO - codeparrot_training - Step 1649: {'lr': 0.000412, 'samples': 52768, 'steps': 1648, 'loss/train': 5.980892658233643} +01/22/2022 22:57:05 - INFO - codeparrot_training - Step 1650: {'lr': 0.00041225, 'samples': 52800, 'steps': 1649, 'loss/train': 7.045206546783447} +01/22/2022 22:57:05 - INFO - codeparrot_training - Step 1651: {'lr': 0.0004125, 'samples': 52832, 'steps': 1650, 'loss/train': 7.211202621459961} +01/22/2022 22:57:06 - INFO - codeparrot_training - Step 1652: {'lr': 0.00041275000000000003, 'samples': 52864, 'steps': 1651, 'loss/train': 8.584561347961426} +01/22/2022 22:57:06 - INFO - codeparrot_training - Step 1653: {'lr': 0.000413, 'samples': 52896, 'steps': 1652, 'loss/train': 8.613454818725586} +01/22/2022 22:57:07 - INFO - codeparrot_training - Step 1654: {'lr': 0.00041325, 'samples': 52928, 'steps': 1653, 'loss/train': 6.667103290557861} +01/22/2022 22:57:07 - INFO - codeparrot_training - Step 1655: {'lr': 0.00041349999999999997, 'samples': 52960, 'steps': 1654, 'loss/train': 7.1807475090026855} +01/22/2022 22:57:08 - INFO - codeparrot_training - Step 1656: {'lr': 0.00041375, 'samples': 52992, 'steps': 1655, 'loss/train': 5.200162887573242} +01/22/2022 22:57:08 - INFO - codeparrot_training - Step 1657: {'lr': 0.000414, 'samples': 53024, 'steps': 1656, 'loss/train': 6.362869739532471} +01/22/2022 22:57:09 - INFO - codeparrot_training - Step 1658: {'lr': 0.00041425, 'samples': 53056, 'steps': 1657, 'loss/train': 6.7323479652404785} +01/22/2022 22:57:10 - INFO - codeparrot_training - Step 1659: {'lr': 0.0004145, 'samples': 53088, 'steps': 1658, 'loss/train': 6.857406139373779} +01/22/2022 22:57:10 - INFO - codeparrot_training - Step 1660: {'lr': 0.00041475, 'samples': 53120, 'steps': 1659, 'loss/train': 6.46556282043457} +01/22/2022 22:57:11 - INFO - codeparrot_training - Step 1661: {'lr': 0.000415, 'samples': 53152, 'steps': 1660, 'loss/train': 6.603553771972656} +01/22/2022 22:57:11 - INFO - codeparrot_training - Step 1662: {'lr': 0.00041525000000000004, 'samples': 53184, 'steps': 1661, 'loss/train': 7.005068302154541} +01/22/2022 22:57:12 - INFO - codeparrot_training - Step 1663: {'lr': 0.00041549999999999996, 'samples': 53216, 'steps': 1662, 'loss/train': 6.23445987701416} +01/22/2022 22:57:12 - INFO - codeparrot_training - Step 1664: {'lr': 0.00041575, 'samples': 53248, 'steps': 1663, 'loss/train': 6.620657920837402} +01/22/2022 22:57:13 - INFO - codeparrot_training - Step 1665: {'lr': 0.000416, 'samples': 53280, 'steps': 1664, 'loss/train': 7.156607151031494} +01/22/2022 22:57:13 - INFO - codeparrot_training - Step 1666: {'lr': 0.00041625, 'samples': 53312, 'steps': 1665, 'loss/train': 6.853488922119141} +01/22/2022 22:57:14 - INFO - codeparrot_training - Step 1667: {'lr': 0.0004165, 'samples': 53344, 'steps': 1666, 'loss/train': 8.095769882202148} +01/22/2022 22:57:15 - INFO - codeparrot_training - Step 1668: {'lr': 0.00041675, 'samples': 53376, 'steps': 1667, 'loss/train': 7.9816789627075195} +01/22/2022 22:57:15 - INFO - codeparrot_training - Step 1669: {'lr': 0.000417, 'samples': 53408, 'steps': 1668, 'loss/train': 7.54531192779541} +01/22/2022 22:57:16 - INFO - codeparrot_training - Step 1670: {'lr': 0.00041725000000000003, 'samples': 53440, 'steps': 1669, 'loss/train': 6.565365314483643} +01/22/2022 22:57:16 - INFO - codeparrot_training - Step 1671: {'lr': 0.0004175, 'samples': 53472, 'steps': 1670, 'loss/train': 6.914548873901367} +01/22/2022 22:57:17 - INFO - codeparrot_training - Step 1672: {'lr': 0.00041775000000000004, 'samples': 53504, 'steps': 1671, 'loss/train': 6.823920726776123} +01/22/2022 22:57:17 - INFO - codeparrot_training - Step 1673: {'lr': 0.00041799999999999997, 'samples': 53536, 'steps': 1672, 'loss/train': 6.5753374099731445} +01/22/2022 22:57:18 - INFO - codeparrot_training - Step 1674: {'lr': 0.00041825, 'samples': 53568, 'steps': 1673, 'loss/train': 6.148717403411865} +01/22/2022 22:57:19 - INFO - codeparrot_training - Step 1675: {'lr': 0.0004185, 'samples': 53600, 'steps': 1674, 'loss/train': 5.719173908233643} +01/22/2022 22:57:19 - INFO - codeparrot_training - Step 1676: {'lr': 0.00041875, 'samples': 53632, 'steps': 1675, 'loss/train': 6.591923713684082} +01/22/2022 22:57:20 - INFO - codeparrot_training - Step 1677: {'lr': 0.000419, 'samples': 53664, 'steps': 1676, 'loss/train': 7.137322902679443} +01/22/2022 22:57:20 - INFO - codeparrot_training - Step 1678: {'lr': 0.00041925, 'samples': 53696, 'steps': 1677, 'loss/train': 7.895626068115234} +01/22/2022 22:57:21 - INFO - codeparrot_training - Step 1679: {'lr': 0.0004195, 'samples': 53728, 'steps': 1678, 'loss/train': 7.343735694885254} +01/22/2022 22:57:21 - INFO - codeparrot_training - Step 1680: {'lr': 0.00041975000000000004, 'samples': 53760, 'steps': 1679, 'loss/train': 6.963775634765625} +01/22/2022 22:57:23 - INFO - codeparrot_training - Step 1681: {'lr': 0.00042, 'samples': 53792, 'steps': 1680, 'loss/train': 7.416505336761475} +01/22/2022 22:57:23 - INFO - codeparrot_training - Step 1682: {'lr': 0.00042025, 'samples': 53824, 'steps': 1681, 'loss/train': 6.9873151779174805} +01/22/2022 22:57:24 - INFO - codeparrot_training - Step 1683: {'lr': 0.0004205, 'samples': 53856, 'steps': 1682, 'loss/train': 7.8208231925964355} +01/22/2022 22:57:24 - INFO - codeparrot_training - Step 1684: {'lr': 0.00042075, 'samples': 53888, 'steps': 1683, 'loss/train': 7.243191242218018} +01/22/2022 22:57:25 - INFO - codeparrot_training - Step 1685: {'lr': 0.000421, 'samples': 53920, 'steps': 1684, 'loss/train': 6.796907901763916} +01/22/2022 22:57:25 - INFO - codeparrot_training - Step 1686: {'lr': 0.00042125, 'samples': 53952, 'steps': 1685, 'loss/train': 7.463184833526611} +01/22/2022 22:57:26 - INFO - codeparrot_training - Step 1687: {'lr': 0.0004215, 'samples': 53984, 'steps': 1686, 'loss/train': 8.01009750366211} +01/22/2022 22:57:26 - INFO - codeparrot_training - Step 1688: {'lr': 0.00042175000000000003, 'samples': 54016, 'steps': 1687, 'loss/train': 6.452075958251953} +01/22/2022 22:57:27 - INFO - codeparrot_training - Step 1689: {'lr': 0.000422, 'samples': 54048, 'steps': 1688, 'loss/train': 6.920141220092773} +01/22/2022 22:57:28 - INFO - codeparrot_training - Step 1690: {'lr': 0.00042225000000000005, 'samples': 54080, 'steps': 1689, 'loss/train': 4.42692756652832} +01/22/2022 22:57:28 - INFO - codeparrot_training - Step 1691: {'lr': 0.00042249999999999997, 'samples': 54112, 'steps': 1690, 'loss/train': 3.9586315155029297} +01/22/2022 22:57:29 - INFO - codeparrot_training - Step 1692: {'lr': 0.00042275, 'samples': 54144, 'steps': 1691, 'loss/train': 3.673039197921753} +01/22/2022 22:57:29 - INFO - codeparrot_training - Step 1693: {'lr': 0.000423, 'samples': 54176, 'steps': 1692, 'loss/train': 3.462704658508301} +01/22/2022 22:57:30 - INFO - codeparrot_training - Step 1694: {'lr': 0.00042325, 'samples': 54208, 'steps': 1693, 'loss/train': 7.359477519989014} +01/22/2022 22:57:30 - INFO - codeparrot_training - Step 1695: {'lr': 0.0004235, 'samples': 54240, 'steps': 1694, 'loss/train': 7.313514232635498} +01/22/2022 22:57:31 - INFO - codeparrot_training - Step 1696: {'lr': 0.00042375000000000003, 'samples': 54272, 'steps': 1695, 'loss/train': 7.7329936027526855} +01/22/2022 22:57:31 - INFO - codeparrot_training - Step 1697: {'lr': 0.000424, 'samples': 54304, 'steps': 1696, 'loss/train': 7.397958278656006} +01/22/2022 22:57:32 - INFO - codeparrot_training - Step 1698: {'lr': 0.00042425000000000004, 'samples': 54336, 'steps': 1697, 'loss/train': 7.160500526428223} +01/22/2022 22:57:33 - INFO - codeparrot_training - Step 1699: {'lr': 0.0004245, 'samples': 54368, 'steps': 1698, 'loss/train': 7.86340856552124} +01/22/2022 22:57:33 - INFO - codeparrot_training - Step 1700: {'lr': 0.00042475000000000005, 'samples': 54400, 'steps': 1699, 'loss/train': 4.803197860717773} +01/22/2022 22:57:34 - INFO - codeparrot_training - Step 1701: {'lr': 0.000425, 'samples': 54432, 'steps': 1700, 'loss/train': 7.541376113891602} +01/22/2022 22:57:34 - INFO - codeparrot_training - Step 1702: {'lr': 0.00042525, 'samples': 54464, 'steps': 1701, 'loss/train': 6.8037238121032715} +01/22/2022 22:57:35 - INFO - codeparrot_training - Step 1703: {'lr': 0.0004255, 'samples': 54496, 'steps': 1702, 'loss/train': 6.364565372467041} +01/22/2022 22:57:35 - INFO - codeparrot_training - Step 1704: {'lr': 0.00042575, 'samples': 54528, 'steps': 1703, 'loss/train': 5.970940113067627} +01/22/2022 22:57:36 - INFO - codeparrot_training - Step 1705: {'lr': 0.000426, 'samples': 54560, 'steps': 1704, 'loss/train': 7.29542350769043} +01/22/2022 22:57:36 - INFO - codeparrot_training - Step 1706: {'lr': 0.00042625000000000003, 'samples': 54592, 'steps': 1705, 'loss/train': 8.444196701049805} +01/22/2022 22:57:37 - INFO - codeparrot_training - Step 1707: {'lr': 0.0004265, 'samples': 54624, 'steps': 1706, 'loss/train': 6.820549964904785} +01/22/2022 22:57:38 - INFO - codeparrot_training - Step 1708: {'lr': 0.00042675000000000005, 'samples': 54656, 'steps': 1707, 'loss/train': 7.076043605804443} +01/22/2022 22:57:38 - INFO - codeparrot_training - Step 1709: {'lr': 0.000427, 'samples': 54688, 'steps': 1708, 'loss/train': 7.2021965980529785} +01/22/2022 22:57:39 - INFO - codeparrot_training - Step 1710: {'lr': 0.00042725, 'samples': 54720, 'steps': 1709, 'loss/train': 6.013803958892822} +01/22/2022 22:57:39 - INFO - codeparrot_training - Step 1711: {'lr': 0.0004275, 'samples': 54752, 'steps': 1710, 'loss/train': 6.942599773406982} +01/22/2022 22:57:40 - INFO - codeparrot_training - Step 1712: {'lr': 0.00042775, 'samples': 54784, 'steps': 1711, 'loss/train': 7.695791721343994} +01/22/2022 22:57:40 - INFO - codeparrot_training - Step 1713: {'lr': 0.000428, 'samples': 54816, 'steps': 1712, 'loss/train': 6.137791633605957} +01/22/2022 22:57:42 - INFO - codeparrot_training - Step 1714: {'lr': 0.00042825000000000003, 'samples': 54848, 'steps': 1713, 'loss/train': 6.834042072296143} +01/22/2022 22:57:42 - INFO - codeparrot_training - Step 1715: {'lr': 0.0004285, 'samples': 54880, 'steps': 1714, 'loss/train': 8.213141441345215} +01/22/2022 22:57:43 - INFO - codeparrot_training - Step 1716: {'lr': 0.00042875000000000004, 'samples': 54912, 'steps': 1715, 'loss/train': 7.186507225036621} +01/22/2022 22:57:43 - INFO - codeparrot_training - Step 1717: {'lr': 0.000429, 'samples': 54944, 'steps': 1716, 'loss/train': 7.252810478210449} +01/22/2022 22:57:44 - INFO - codeparrot_training - Step 1718: {'lr': 0.00042925000000000005, 'samples': 54976, 'steps': 1717, 'loss/train': 6.911928653717041} +01/22/2022 22:57:44 - INFO - codeparrot_training - Step 1719: {'lr': 0.0004295, 'samples': 55008, 'steps': 1718, 'loss/train': 6.8972954750061035} +01/22/2022 22:57:45 - INFO - codeparrot_training - Step 1720: {'lr': 0.00042975, 'samples': 55040, 'steps': 1719, 'loss/train': 6.962718963623047} +01/22/2022 22:57:46 - INFO - codeparrot_training - Step 1721: {'lr': 0.00043, 'samples': 55072, 'steps': 1720, 'loss/train': 7.383131504058838} +01/22/2022 22:57:46 - INFO - codeparrot_training - Step 1722: {'lr': 0.00043025, 'samples': 55104, 'steps': 1721, 'loss/train': 6.457530975341797} +01/22/2022 22:57:47 - INFO - codeparrot_training - Step 1723: {'lr': 0.0004305, 'samples': 55136, 'steps': 1722, 'loss/train': 7.783806324005127} +01/22/2022 22:57:47 - INFO - codeparrot_training - Step 1724: {'lr': 0.00043075000000000003, 'samples': 55168, 'steps': 1723, 'loss/train': 6.872702121734619} +01/22/2022 22:57:48 - INFO - codeparrot_training - Step 1725: {'lr': 0.000431, 'samples': 55200, 'steps': 1724, 'loss/train': 6.80531120300293} +01/22/2022 22:57:48 - INFO - codeparrot_training - Step 1726: {'lr': 0.00043125000000000005, 'samples': 55232, 'steps': 1725, 'loss/train': 6.128391265869141} +01/22/2022 22:57:49 - INFO - codeparrot_training - Step 1727: {'lr': 0.0004315, 'samples': 55264, 'steps': 1726, 'loss/train': 6.725710391998291} +01/22/2022 22:57:49 - INFO - codeparrot_training - Step 1728: {'lr': 0.00043175, 'samples': 55296, 'steps': 1727, 'loss/train': 5.21824836730957} +01/22/2022 22:57:50 - INFO - codeparrot_training - Step 1729: {'lr': 0.000432, 'samples': 55328, 'steps': 1728, 'loss/train': 5.289026260375977} +01/22/2022 22:57:51 - INFO - codeparrot_training - Step 1730: {'lr': 0.00043225, 'samples': 55360, 'steps': 1729, 'loss/train': 6.622781753540039} +01/22/2022 22:57:51 - INFO - codeparrot_training - Step 1731: {'lr': 0.0004325, 'samples': 55392, 'steps': 1730, 'loss/train': 6.6854777336120605} +01/22/2022 22:57:52 - INFO - codeparrot_training - Step 1732: {'lr': 0.00043275000000000003, 'samples': 55424, 'steps': 1731, 'loss/train': 7.447129726409912} +01/22/2022 22:57:52 - INFO - codeparrot_training - Step 1733: {'lr': 0.000433, 'samples': 55456, 'steps': 1732, 'loss/train': 6.886803150177002} +01/22/2022 22:57:53 - INFO - codeparrot_training - Step 1734: {'lr': 0.00043325000000000004, 'samples': 55488, 'steps': 1733, 'loss/train': 8.245725631713867} +01/22/2022 22:57:53 - INFO - codeparrot_training - Step 1735: {'lr': 0.0004335, 'samples': 55520, 'steps': 1734, 'loss/train': 7.21261739730835} +01/22/2022 22:57:54 - INFO - codeparrot_training - Step 1736: {'lr': 0.00043375000000000005, 'samples': 55552, 'steps': 1735, 'loss/train': 6.907580375671387} +01/22/2022 22:57:54 - INFO - codeparrot_training - Step 1737: {'lr': 0.00043400000000000003, 'samples': 55584, 'steps': 1736, 'loss/train': 7.946069240570068} +01/22/2022 22:57:55 - INFO - codeparrot_training - Step 1738: {'lr': 0.00043425, 'samples': 55616, 'steps': 1737, 'loss/train': 6.451979637145996} +01/22/2022 22:57:56 - INFO - codeparrot_training - Step 1739: {'lr': 0.0004345, 'samples': 55648, 'steps': 1738, 'loss/train': 6.391631126403809} +01/22/2022 22:57:56 - INFO - codeparrot_training - Step 1740: {'lr': 0.00043475, 'samples': 55680, 'steps': 1739, 'loss/train': 6.065285682678223} +01/22/2022 22:57:57 - INFO - codeparrot_training - Step 1741: {'lr': 0.000435, 'samples': 55712, 'steps': 1740, 'loss/train': 9.295160293579102} +01/22/2022 22:57:57 - INFO - codeparrot_training - Step 1742: {'lr': 0.00043525000000000004, 'samples': 55744, 'steps': 1741, 'loss/train': 6.423277378082275} +01/22/2022 22:57:58 - INFO - codeparrot_training - Step 1743: {'lr': 0.0004355, 'samples': 55776, 'steps': 1742, 'loss/train': 6.525204658508301} +01/22/2022 22:57:59 - INFO - codeparrot_training - Step 1744: {'lr': 0.00043575000000000005, 'samples': 55808, 'steps': 1743, 'loss/train': 7.356773376464844} +01/22/2022 22:58:00 - INFO - codeparrot_training - Step 1745: {'lr': 0.000436, 'samples': 55840, 'steps': 1744, 'loss/train': 8.069647789001465} +01/22/2022 22:58:00 - INFO - codeparrot_training - Step 1746: {'lr': 0.00043625000000000006, 'samples': 55872, 'steps': 1745, 'loss/train': 6.447486877441406} +01/22/2022 22:58:01 - INFO - codeparrot_training - Step 1747: {'lr': 0.0004365, 'samples': 55904, 'steps': 1746, 'loss/train': 6.306025505065918} +01/22/2022 22:58:01 - INFO - codeparrot_training - Step 1748: {'lr': 0.00043675, 'samples': 55936, 'steps': 1747, 'loss/train': 9.050104141235352} +01/22/2022 22:58:02 - INFO - codeparrot_training - Step 1749: {'lr': 0.000437, 'samples': 55968, 'steps': 1748, 'loss/train': 6.585012912750244} +01/22/2022 22:58:02 - INFO - codeparrot_training - Step 1750: {'lr': 0.00043725000000000003, 'samples': 56000, 'steps': 1749, 'loss/train': 7.208786487579346} +01/22/2022 22:58:03 - INFO - codeparrot_training - Step 1751: {'lr': 0.0004375, 'samples': 56032, 'steps': 1750, 'loss/train': 6.448922157287598} +01/22/2022 22:58:03 - INFO - codeparrot_training - Step 1752: {'lr': 0.00043775, 'samples': 56064, 'steps': 1751, 'loss/train': 6.150957107543945} +01/22/2022 22:58:04 - INFO - codeparrot_training - Step 1753: {'lr': 0.000438, 'samples': 56096, 'steps': 1752, 'loss/train': 6.722433567047119} +01/22/2022 22:58:05 - INFO - codeparrot_training - Step 1754: {'lr': 0.00043825, 'samples': 56128, 'steps': 1753, 'loss/train': 6.831794738769531} +01/22/2022 22:58:05 - INFO - codeparrot_training - Step 1755: {'lr': 0.00043850000000000003, 'samples': 56160, 'steps': 1754, 'loss/train': 6.498454570770264} +01/22/2022 22:58:06 - INFO - codeparrot_training - Step 1756: {'lr': 0.00043874999999999996, 'samples': 56192, 'steps': 1755, 'loss/train': 7.037643909454346} +01/22/2022 22:58:06 - INFO - codeparrot_training - Step 1757: {'lr': 0.000439, 'samples': 56224, 'steps': 1756, 'loss/train': 6.525977611541748} +01/22/2022 22:58:07 - INFO - codeparrot_training - Step 1758: {'lr': 0.00043924999999999997, 'samples': 56256, 'steps': 1757, 'loss/train': 6.6537885665893555} +01/22/2022 22:58:07 - INFO - codeparrot_training - Step 1759: {'lr': 0.0004395, 'samples': 56288, 'steps': 1758, 'loss/train': 6.968910217285156} +01/22/2022 22:58:08 - INFO - codeparrot_training - Step 1760: {'lr': 0.00043975, 'samples': 56320, 'steps': 1759, 'loss/train': 5.794710636138916} +01/22/2022 22:58:09 - INFO - codeparrot_training - Step 1761: {'lr': 0.00044, 'samples': 56352, 'steps': 1760, 'loss/train': 5.817799091339111} +01/22/2022 22:58:09 - INFO - codeparrot_training - Step 1762: {'lr': 0.00044025, 'samples': 56384, 'steps': 1761, 'loss/train': 6.480599403381348} +01/22/2022 22:58:10 - INFO - codeparrot_training - Step 1763: {'lr': 0.00044050000000000003, 'samples': 56416, 'steps': 1762, 'loss/train': 7.3922343254089355} +01/22/2022 22:58:10 - INFO - codeparrot_training - Step 1764: {'lr': 0.00044075, 'samples': 56448, 'steps': 1763, 'loss/train': 6.063863754272461} +01/22/2022 22:58:11 - INFO - codeparrot_training - Step 1765: {'lr': 0.000441, 'samples': 56480, 'steps': 1764, 'loss/train': 8.302885055541992} +01/22/2022 22:58:11 - INFO - codeparrot_training - Step 1766: {'lr': 0.00044124999999999996, 'samples': 56512, 'steps': 1765, 'loss/train': 3.655529022216797} +01/22/2022 22:58:12 - INFO - codeparrot_training - Step 1767: {'lr': 0.0004415, 'samples': 56544, 'steps': 1766, 'loss/train': 6.55881404876709} +01/22/2022 22:58:12 - INFO - codeparrot_training - Step 1768: {'lr': 0.00044175, 'samples': 56576, 'steps': 1767, 'loss/train': 6.9892778396606445} +01/22/2022 22:58:13 - INFO - codeparrot_training - Step 1769: {'lr': 0.000442, 'samples': 56608, 'steps': 1768, 'loss/train': 5.594648361206055} +01/22/2022 22:58:14 - INFO - codeparrot_training - Step 1770: {'lr': 0.00044225, 'samples': 56640, 'steps': 1769, 'loss/train': 6.0051655769348145} +01/22/2022 22:58:14 - INFO - codeparrot_training - Step 1771: {'lr': 0.0004425, 'samples': 56672, 'steps': 1770, 'loss/train': 6.800883769989014} +01/22/2022 22:58:15 - INFO - codeparrot_training - Step 1772: {'lr': 0.00044275, 'samples': 56704, 'steps': 1771, 'loss/train': 7.360313892364502} +01/22/2022 22:58:18 - INFO - codeparrot_training - Step 1773: {'lr': 0.00044300000000000003, 'samples': 56736, 'steps': 1772, 'loss/train': 6.248723983764648} +01/22/2022 22:58:18 - INFO - codeparrot_training - Step 1774: {'lr': 0.00044325, 'samples': 56768, 'steps': 1773, 'loss/train': 3.1253347396850586} +01/22/2022 22:58:19 - INFO - codeparrot_training - Step 1775: {'lr': 0.0004435, 'samples': 56800, 'steps': 1774, 'loss/train': 5.908626079559326} +01/22/2022 22:58:19 - INFO - codeparrot_training - Step 1776: {'lr': 0.00044374999999999997, 'samples': 56832, 'steps': 1775, 'loss/train': 7.139800548553467} +01/22/2022 22:58:20 - INFO - codeparrot_training - Step 1777: {'lr': 0.000444, 'samples': 56864, 'steps': 1776, 'loss/train': 6.590355396270752} +01/22/2022 22:58:20 - INFO - codeparrot_training - Step 1778: {'lr': 0.00044425, 'samples': 56896, 'steps': 1777, 'loss/train': 6.109304904937744} +01/22/2022 22:58:21 - INFO - codeparrot_training - Step 1779: {'lr': 0.0004445, 'samples': 56928, 'steps': 1778, 'loss/train': 7.3173112869262695} +01/22/2022 22:58:21 - INFO - codeparrot_training - Step 1780: {'lr': 0.00044475, 'samples': 56960, 'steps': 1779, 'loss/train': 6.533891201019287} +01/22/2022 22:58:22 - INFO - codeparrot_training - Step 1781: {'lr': 0.00044500000000000003, 'samples': 56992, 'steps': 1780, 'loss/train': 6.662802219390869} +01/22/2022 22:58:23 - INFO - codeparrot_training - Step 1782: {'lr': 0.00044525, 'samples': 57024, 'steps': 1781, 'loss/train': 6.562715530395508} +01/22/2022 22:58:23 - INFO - codeparrot_training - Step 1783: {'lr': 0.00044550000000000004, 'samples': 57056, 'steps': 1782, 'loss/train': 6.6454925537109375} +01/22/2022 22:58:24 - INFO - codeparrot_training - Step 1784: {'lr': 0.00044574999999999997, 'samples': 57088, 'steps': 1783, 'loss/train': 6.195021152496338} +01/22/2022 22:58:24 - INFO - codeparrot_training - Step 1785: {'lr': 0.000446, 'samples': 57120, 'steps': 1784, 'loss/train': 7.475767612457275} +01/22/2022 22:58:25 - INFO - codeparrot_training - Step 1786: {'lr': 0.00044625, 'samples': 57152, 'steps': 1785, 'loss/train': 6.689405918121338} +01/22/2022 22:58:25 - INFO - codeparrot_training - Step 1787: {'lr': 0.0004465, 'samples': 57184, 'steps': 1786, 'loss/train': 6.137472152709961} +01/22/2022 22:58:26 - INFO - codeparrot_training - Step 1788: {'lr': 0.00044675, 'samples': 57216, 'steps': 1787, 'loss/train': 6.881978988647461} +01/22/2022 22:58:27 - INFO - codeparrot_training - Step 1789: {'lr': 0.000447, 'samples': 57248, 'steps': 1788, 'loss/train': 6.424258708953857} +01/22/2022 22:58:27 - INFO - codeparrot_training - Step 1790: {'lr': 0.00044725, 'samples': 57280, 'steps': 1789, 'loss/train': 6.755660533905029} +01/22/2022 22:58:28 - INFO - codeparrot_training - Step 1791: {'lr': 0.00044750000000000004, 'samples': 57312, 'steps': 1790, 'loss/train': 6.384985446929932} +01/22/2022 22:58:28 - INFO - codeparrot_training - Step 1792: {'lr': 0.00044775, 'samples': 57344, 'steps': 1791, 'loss/train': 6.285956382751465} +01/22/2022 22:58:29 - INFO - codeparrot_training - Step 1793: {'lr': 0.000448, 'samples': 57376, 'steps': 1792, 'loss/train': 6.893615245819092} +01/22/2022 22:58:29 - INFO - codeparrot_training - Step 1794: {'lr': 0.00044824999999999997, 'samples': 57408, 'steps': 1793, 'loss/train': 7.358097076416016} +01/22/2022 22:58:30 - INFO - codeparrot_training - Step 1795: {'lr': 0.0004485, 'samples': 57440, 'steps': 1794, 'loss/train': 7.233516216278076} +01/22/2022 22:58:31 - INFO - codeparrot_training - Step 1796: {'lr': 0.00044875, 'samples': 57472, 'steps': 1795, 'loss/train': 6.122325420379639} +01/22/2022 22:58:31 - INFO - codeparrot_training - Step 1797: {'lr': 0.000449, 'samples': 57504, 'steps': 1796, 'loss/train': 7.123651027679443} +01/22/2022 22:58:32 - INFO - codeparrot_training - Step 1798: {'lr': 0.00044925, 'samples': 57536, 'steps': 1797, 'loss/train': 6.6003217697143555} +01/22/2022 22:58:32 - INFO - codeparrot_training - Step 1799: {'lr': 0.00044950000000000003, 'samples': 57568, 'steps': 1798, 'loss/train': 7.028745651245117} +01/22/2022 22:58:33 - INFO - codeparrot_training - Step 1800: {'lr': 0.00044975, 'samples': 57600, 'steps': 1799, 'loss/train': 6.328068733215332} +01/22/2022 22:58:33 - INFO - codeparrot_training - Step 1801: {'lr': 0.00045000000000000004, 'samples': 57632, 'steps': 1800, 'loss/train': 6.612029552459717} +01/22/2022 22:58:34 - INFO - codeparrot_training - Step 1802: {'lr': 0.00045024999999999997, 'samples': 57664, 'steps': 1801, 'loss/train': 6.94903564453125} +01/22/2022 22:58:35 - INFO - codeparrot_training - Step 1803: {'lr': 0.0004505, 'samples': 57696, 'steps': 1802, 'loss/train': 6.29318904876709} +01/22/2022 22:58:36 - INFO - codeparrot_training - Step 1804: {'lr': 0.00045075, 'samples': 57728, 'steps': 1803, 'loss/train': 6.756988048553467} +01/22/2022 22:58:36 - INFO - codeparrot_training - Step 1805: {'lr': 0.000451, 'samples': 57760, 'steps': 1804, 'loss/train': 6.657665729522705} +01/22/2022 22:58:37 - INFO - codeparrot_training - Step 1806: {'lr': 0.00045125, 'samples': 57792, 'steps': 1805, 'loss/train': 6.681492805480957} +01/22/2022 22:58:37 - INFO - codeparrot_training - Step 1807: {'lr': 0.0004515, 'samples': 57824, 'steps': 1806, 'loss/train': 5.891361713409424} +01/22/2022 22:58:38 - INFO - codeparrot_training - Step 1808: {'lr': 0.00045175, 'samples': 57856, 'steps': 1807, 'loss/train': 6.759525299072266} +01/22/2022 22:58:38 - INFO - codeparrot_training - Step 1809: {'lr': 0.00045200000000000004, 'samples': 57888, 'steps': 1808, 'loss/train': 6.559369087219238} +01/22/2022 22:58:39 - INFO - codeparrot_training - Step 1810: {'lr': 0.00045225, 'samples': 57920, 'steps': 1809, 'loss/train': 7.630070686340332} +01/22/2022 22:58:40 - INFO - codeparrot_training - Step 1811: {'lr': 0.00045250000000000005, 'samples': 57952, 'steps': 1810, 'loss/train': 7.09797477722168} +01/22/2022 22:58:40 - INFO - codeparrot_training - Step 1812: {'lr': 0.00045275, 'samples': 57984, 'steps': 1811, 'loss/train': 6.677160739898682} +01/22/2022 22:58:41 - INFO - codeparrot_training - Step 1813: {'lr': 0.000453, 'samples': 58016, 'steps': 1812, 'loss/train': 6.996142864227295} +01/22/2022 22:58:41 - INFO - codeparrot_training - Step 1814: {'lr': 0.00045325, 'samples': 58048, 'steps': 1813, 'loss/train': 5.751055717468262} +01/22/2022 22:58:42 - INFO - codeparrot_training - Step 1815: {'lr': 0.0004535, 'samples': 58080, 'steps': 1814, 'loss/train': 6.852121829986572} +01/22/2022 22:58:42 - INFO - codeparrot_training - Step 1816: {'lr': 0.00045375, 'samples': 58112, 'steps': 1815, 'loss/train': 8.821587562561035} +01/22/2022 22:58:43 - INFO - codeparrot_training - Step 1817: {'lr': 0.00045400000000000003, 'samples': 58144, 'steps': 1816, 'loss/train': 6.5530548095703125} +01/22/2022 22:58:43 - INFO - codeparrot_training - Step 1818: {'lr': 0.00045425, 'samples': 58176, 'steps': 1817, 'loss/train': 7.946497917175293} +01/22/2022 22:58:44 - INFO - codeparrot_training - Step 1819: {'lr': 0.00045450000000000004, 'samples': 58208, 'steps': 1818, 'loss/train': 6.449390888214111} +01/22/2022 22:58:45 - INFO - codeparrot_training - Step 1820: {'lr': 0.00045475, 'samples': 58240, 'steps': 1819, 'loss/train': 8.28693962097168} +01/22/2022 22:58:45 - INFO - codeparrot_training - Step 1821: {'lr': 0.000455, 'samples': 58272, 'steps': 1820, 'loss/train': 7.705758571624756} +01/22/2022 22:58:46 - INFO - codeparrot_training - Step 1822: {'lr': 0.00045525, 'samples': 58304, 'steps': 1821, 'loss/train': 6.291810035705566} +01/22/2022 22:58:46 - INFO - codeparrot_training - Step 1823: {'lr': 0.0004555, 'samples': 58336, 'steps': 1822, 'loss/train': 6.617900848388672} +01/22/2022 22:58:47 - INFO - codeparrot_training - Step 1824: {'lr': 0.00045575, 'samples': 58368, 'steps': 1823, 'loss/train': 6.973372936248779} +01/22/2022 22:58:47 - INFO - codeparrot_training - Step 1825: {'lr': 0.000456, 'samples': 58400, 'steps': 1824, 'loss/train': 7.438027381896973} +01/22/2022 22:58:48 - INFO - codeparrot_training - Step 1826: {'lr': 0.00045625, 'samples': 58432, 'steps': 1825, 'loss/train': 6.704293251037598} +01/22/2022 22:58:48 - INFO - codeparrot_training - Step 1827: {'lr': 0.00045650000000000004, 'samples': 58464, 'steps': 1826, 'loss/train': 7.330345630645752} +01/22/2022 22:58:49 - INFO - codeparrot_training - Step 1828: {'lr': 0.00045675, 'samples': 58496, 'steps': 1827, 'loss/train': 6.240826606750488} +01/22/2022 22:58:50 - INFO - codeparrot_training - Step 1829: {'lr': 0.00045700000000000005, 'samples': 58528, 'steps': 1828, 'loss/train': 8.026159286499023} +01/22/2022 22:58:50 - INFO - codeparrot_training - Step 1830: {'lr': 0.00045725, 'samples': 58560, 'steps': 1829, 'loss/train': 5.732626438140869} +01/22/2022 22:58:51 - INFO - codeparrot_training - Step 1831: {'lr': 0.0004575, 'samples': 58592, 'steps': 1830, 'loss/train': 6.1970601081848145} +01/22/2022 22:58:51 - INFO - codeparrot_training - Step 1832: {'lr': 0.00045775, 'samples': 58624, 'steps': 1831, 'loss/train': 6.189219951629639} +01/22/2022 22:58:52 - INFO - codeparrot_training - Step 1833: {'lr': 0.000458, 'samples': 58656, 'steps': 1832, 'loss/train': 7.393641471862793} +01/22/2022 22:58:53 - INFO - codeparrot_training - Step 1834: {'lr': 0.00045825, 'samples': 58688, 'steps': 1833, 'loss/train': 6.3010687828063965} +01/22/2022 22:58:53 - INFO - codeparrot_training - Step 1835: {'lr': 0.00045850000000000003, 'samples': 58720, 'steps': 1834, 'loss/train': 7.539854526519775} +01/22/2022 22:58:54 - INFO - codeparrot_training - Step 1836: {'lr': 0.00045875, 'samples': 58752, 'steps': 1835, 'loss/train': 5.203782558441162} +01/22/2022 22:58:55 - INFO - codeparrot_training - Step 1837: {'lr': 0.00045900000000000004, 'samples': 58784, 'steps': 1836, 'loss/train': 6.581721305847168} +01/22/2022 22:58:55 - INFO - codeparrot_training - Step 1838: {'lr': 0.00045925, 'samples': 58816, 'steps': 1837, 'loss/train': 6.834224224090576} +01/22/2022 22:58:56 - INFO - codeparrot_training - Step 1839: {'lr': 0.00045950000000000006, 'samples': 58848, 'steps': 1838, 'loss/train': 6.523037433624268} +01/22/2022 22:58:56 - INFO - codeparrot_training - Step 1840: {'lr': 0.00045975, 'samples': 58880, 'steps': 1839, 'loss/train': 6.971290588378906} +01/22/2022 22:58:57 - INFO - codeparrot_training - Step 1841: {'lr': 0.00046, 'samples': 58912, 'steps': 1840, 'loss/train': 7.0572991371154785} +01/22/2022 22:58:57 - INFO - codeparrot_training - Step 1842: {'lr': 0.00046025, 'samples': 58944, 'steps': 1841, 'loss/train': 6.669233322143555} +01/22/2022 22:58:58 - INFO - codeparrot_training - Step 1843: {'lr': 0.0004605, 'samples': 58976, 'steps': 1842, 'loss/train': 5.754608154296875} +01/22/2022 22:58:58 - INFO - codeparrot_training - Step 1844: {'lr': 0.00046075, 'samples': 59008, 'steps': 1843, 'loss/train': 3.3327229022979736} +01/22/2022 22:58:59 - INFO - codeparrot_training - Step 1845: {'lr': 0.00046100000000000004, 'samples': 59040, 'steps': 1844, 'loss/train': 7.887156963348389} +01/22/2022 22:59:00 - INFO - codeparrot_training - Step 1846: {'lr': 0.00046125, 'samples': 59072, 'steps': 1845, 'loss/train': 6.42283821105957} +01/22/2022 22:59:00 - INFO - codeparrot_training - Step 1847: {'lr': 0.00046150000000000005, 'samples': 59104, 'steps': 1846, 'loss/train': 7.230447769165039} +01/22/2022 22:59:01 - INFO - codeparrot_training - Step 1848: {'lr': 0.00046175000000000003, 'samples': 59136, 'steps': 1847, 'loss/train': 7.349234104156494} +01/22/2022 22:59:01 - INFO - codeparrot_training - Step 1849: {'lr': 0.000462, 'samples': 59168, 'steps': 1848, 'loss/train': 6.543290138244629} +01/22/2022 22:59:02 - INFO - codeparrot_training - Step 1850: {'lr': 0.00046225, 'samples': 59200, 'steps': 1849, 'loss/train': 5.490878582000732} +01/22/2022 22:59:02 - INFO - codeparrot_training - Step 1851: {'lr': 0.0004625, 'samples': 59232, 'steps': 1850, 'loss/train': 6.639327526092529} +01/22/2022 22:59:03 - INFO - codeparrot_training - Step 1852: {'lr': 0.00046275, 'samples': 59264, 'steps': 1851, 'loss/train': 6.857795238494873} +01/22/2022 22:59:03 - INFO - codeparrot_training - Step 1853: {'lr': 0.00046300000000000003, 'samples': 59296, 'steps': 1852, 'loss/train': 6.959165096282959} +01/22/2022 22:59:04 - INFO - codeparrot_training - Step 1854: {'lr': 0.00046325, 'samples': 59328, 'steps': 1853, 'loss/train': 6.721272945404053} +01/22/2022 22:59:05 - INFO - codeparrot_training - Step 1855: {'lr': 0.00046350000000000004, 'samples': 59360, 'steps': 1854, 'loss/train': 6.7905592918396} +01/22/2022 22:59:05 - INFO - codeparrot_training - Step 1856: {'lr': 0.00046375, 'samples': 59392, 'steps': 1855, 'loss/train': 7.240139007568359} +01/22/2022 22:59:06 - INFO - codeparrot_training - Step 1857: {'lr': 0.00046400000000000006, 'samples': 59424, 'steps': 1856, 'loss/train': 7.050957679748535} +01/22/2022 22:59:06 - INFO - codeparrot_training - Step 1858: {'lr': 0.00046425, 'samples': 59456, 'steps': 1857, 'loss/train': 9.000147819519043} +01/22/2022 22:59:07 - INFO - codeparrot_training - Step 1859: {'lr': 0.0004645, 'samples': 59488, 'steps': 1858, 'loss/train': 6.3980231285095215} +01/22/2022 22:59:07 - INFO - codeparrot_training - Step 1860: {'lr': 0.00046475, 'samples': 59520, 'steps': 1859, 'loss/train': 6.383477687835693} +01/22/2022 22:59:08 - INFO - codeparrot_training - Step 1861: {'lr': 0.000465, 'samples': 59552, 'steps': 1860, 'loss/train': 6.386602878570557} +01/22/2022 22:59:09 - INFO - codeparrot_training - Step 1862: {'lr': 0.00046525, 'samples': 59584, 'steps': 1861, 'loss/train': 7.4174017906188965} +01/22/2022 22:59:10 - INFO - codeparrot_training - Step 1863: {'lr': 0.00046550000000000004, 'samples': 59616, 'steps': 1862, 'loss/train': 6.270468235015869} +01/22/2022 22:59:10 - INFO - codeparrot_training - Step 1864: {'lr': 0.00046575, 'samples': 59648, 'steps': 1863, 'loss/train': 7.042333602905273} +01/22/2022 22:59:11 - INFO - codeparrot_training - Step 1865: {'lr': 0.00046600000000000005, 'samples': 59680, 'steps': 1864, 'loss/train': 7.308569431304932} +01/22/2022 22:59:11 - INFO - codeparrot_training - Step 1866: {'lr': 0.00046625000000000003, 'samples': 59712, 'steps': 1865, 'loss/train': 4.514084815979004} +01/22/2022 22:59:12 - INFO - codeparrot_training - Step 1867: {'lr': 0.0004665, 'samples': 59744, 'steps': 1866, 'loss/train': 6.913158416748047} +01/22/2022 22:59:13 - INFO - codeparrot_training - Step 1868: {'lr': 0.00046675, 'samples': 59776, 'steps': 1867, 'loss/train': 7.353203296661377} +01/22/2022 22:59:13 - INFO - codeparrot_training - Step 1869: {'lr': 0.000467, 'samples': 59808, 'steps': 1868, 'loss/train': 7.251315593719482} +01/22/2022 22:59:14 - INFO - codeparrot_training - Step 1870: {'lr': 0.00046725, 'samples': 59840, 'steps': 1869, 'loss/train': 6.0660319328308105} +01/22/2022 22:59:14 - INFO - codeparrot_training - Step 1871: {'lr': 0.00046750000000000003, 'samples': 59872, 'steps': 1870, 'loss/train': 6.720779895782471} +01/22/2022 22:59:15 - INFO - codeparrot_training - Step 1872: {'lr': 0.00046775, 'samples': 59904, 'steps': 1871, 'loss/train': 7.204751968383789} +01/22/2022 22:59:15 - INFO - codeparrot_training - Step 1873: {'lr': 0.00046800000000000005, 'samples': 59936, 'steps': 1872, 'loss/train': 6.710639953613281} +01/22/2022 22:59:16 - INFO - codeparrot_training - Step 1874: {'lr': 0.00046825, 'samples': 59968, 'steps': 1873, 'loss/train': 7.051923751831055} +01/22/2022 22:59:16 - INFO - codeparrot_training - Step 1875: {'lr': 0.00046850000000000006, 'samples': 60000, 'steps': 1874, 'loss/train': 6.9112091064453125} +01/22/2022 22:59:17 - INFO - codeparrot_training - Step 1876: {'lr': 0.00046875, 'samples': 60032, 'steps': 1875, 'loss/train': 6.876968860626221} +01/22/2022 22:59:18 - INFO - codeparrot_training - Step 1877: {'lr': 0.00046899999999999996, 'samples': 60064, 'steps': 1876, 'loss/train': 6.312643527984619} +01/22/2022 22:59:18 - INFO - codeparrot_training - Step 1878: {'lr': 0.00046925, 'samples': 60096, 'steps': 1877, 'loss/train': 5.675683975219727} +01/22/2022 22:59:19 - INFO - codeparrot_training - Step 1879: {'lr': 0.0004695, 'samples': 60128, 'steps': 1878, 'loss/train': 6.261249542236328} +01/22/2022 22:59:19 - INFO - codeparrot_training - Step 1880: {'lr': 0.00046975, 'samples': 60160, 'steps': 1879, 'loss/train': 6.56421422958374} +01/22/2022 22:59:20 - INFO - codeparrot_training - Step 1881: {'lr': 0.00047, 'samples': 60192, 'steps': 1880, 'loss/train': 8.342716217041016} +01/22/2022 22:59:20 - INFO - codeparrot_training - Step 1882: {'lr': 0.00047025, 'samples': 60224, 'steps': 1881, 'loss/train': 6.7691450119018555} +01/22/2022 22:59:21 - INFO - codeparrot_training - Step 1883: {'lr': 0.0004705, 'samples': 60256, 'steps': 1882, 'loss/train': 7.906585693359375} +01/22/2022 22:59:21 - INFO - codeparrot_training - Step 1884: {'lr': 0.00047075000000000003, 'samples': 60288, 'steps': 1883, 'loss/train': 5.802099704742432} +01/22/2022 22:59:22 - INFO - codeparrot_training - Step 1885: {'lr': 0.000471, 'samples': 60320, 'steps': 1884, 'loss/train': 6.268570899963379} +01/22/2022 22:59:23 - INFO - codeparrot_training - Step 1886: {'lr': 0.00047125, 'samples': 60352, 'steps': 1885, 'loss/train': 8.16417121887207} +01/22/2022 22:59:23 - INFO - codeparrot_training - Step 1887: {'lr': 0.00047149999999999997, 'samples': 60384, 'steps': 1886, 'loss/train': 6.91790246963501} +01/22/2022 22:59:24 - INFO - codeparrot_training - Step 1888: {'lr': 0.00047175, 'samples': 60416, 'steps': 1887, 'loss/train': 6.485135555267334} +01/22/2022 22:59:24 - INFO - codeparrot_training - Step 1889: {'lr': 0.000472, 'samples': 60448, 'steps': 1888, 'loss/train': 6.343036651611328} +01/22/2022 22:59:25 - INFO - codeparrot_training - Step 1890: {'lr': 0.00047225, 'samples': 60480, 'steps': 1889, 'loss/train': 7.011646747589111} +01/22/2022 22:59:25 - INFO - codeparrot_training - Step 1891: {'lr': 0.0004725, 'samples': 60512, 'steps': 1890, 'loss/train': 7.798588752746582} +01/22/2022 22:59:27 - INFO - codeparrot_training - Step 1892: {'lr': 0.00047275, 'samples': 60544, 'steps': 1891, 'loss/train': 6.776928901672363} +01/22/2022 22:59:27 - INFO - codeparrot_training - Step 1893: {'lr': 0.000473, 'samples': 60576, 'steps': 1892, 'loss/train': 6.897092819213867} +01/22/2022 22:59:28 - INFO - codeparrot_training - Step 1894: {'lr': 0.00047325000000000004, 'samples': 60608, 'steps': 1893, 'loss/train': 7.140537738800049} +01/22/2022 22:59:28 - INFO - codeparrot_training - Step 1895: {'lr': 0.00047349999999999996, 'samples': 60640, 'steps': 1894, 'loss/train': 6.847639560699463} +01/22/2022 22:59:29 - INFO - codeparrot_training - Step 1896: {'lr': 0.00047375, 'samples': 60672, 'steps': 1895, 'loss/train': 6.900247097015381} +01/22/2022 22:59:29 - INFO - codeparrot_training - Step 1897: {'lr': 0.000474, 'samples': 60704, 'steps': 1896, 'loss/train': 6.5025200843811035} +01/22/2022 22:59:30 - INFO - codeparrot_training - Step 1898: {'lr': 0.00047425, 'samples': 60736, 'steps': 1897, 'loss/train': 6.0703606605529785} +01/22/2022 22:59:30 - INFO - codeparrot_training - Step 1899: {'lr': 0.0004745, 'samples': 60768, 'steps': 1898, 'loss/train': 6.574043273925781} +01/22/2022 22:59:31 - INFO - codeparrot_training - Step 1900: {'lr': 0.00047475, 'samples': 60800, 'steps': 1899, 'loss/train': 6.772813320159912} +01/22/2022 22:59:32 - INFO - codeparrot_training - Step 1901: {'lr': 0.000475, 'samples': 60832, 'steps': 1900, 'loss/train': 6.166356086730957} +01/22/2022 22:59:32 - INFO - codeparrot_training - Step 1902: {'lr': 0.00047525000000000003, 'samples': 60864, 'steps': 1901, 'loss/train': 6.8163886070251465} +01/22/2022 22:59:33 - INFO - codeparrot_training - Step 1903: {'lr': 0.0004755, 'samples': 60896, 'steps': 1902, 'loss/train': 7.306416988372803} +01/22/2022 22:59:33 - INFO - codeparrot_training - Step 1904: {'lr': 0.00047575, 'samples': 60928, 'steps': 1903, 'loss/train': 6.701196193695068} +01/22/2022 22:59:34 - INFO - codeparrot_training - Step 1905: {'lr': 0.00047599999999999997, 'samples': 60960, 'steps': 1904, 'loss/train': 6.694813251495361} +01/22/2022 22:59:34 - INFO - codeparrot_training - Step 1906: {'lr': 0.00047625, 'samples': 60992, 'steps': 1905, 'loss/train': 7.387792110443115} +01/22/2022 22:59:35 - INFO - codeparrot_training - Step 1907: {'lr': 0.0004765, 'samples': 61024, 'steps': 1906, 'loss/train': 6.403735637664795} +01/22/2022 22:59:35 - INFO - codeparrot_training - Step 1908: {'lr': 0.00047675, 'samples': 61056, 'steps': 1907, 'loss/train': 7.4814066886901855} +01/22/2022 22:59:36 - INFO - codeparrot_training - Step 1909: {'lr': 0.000477, 'samples': 61088, 'steps': 1908, 'loss/train': 6.259079933166504} +01/22/2022 22:59:37 - INFO - codeparrot_training - Step 1910: {'lr': 0.00047725, 'samples': 61120, 'steps': 1909, 'loss/train': 6.121779441833496} +01/22/2022 22:59:37 - INFO - codeparrot_training - Step 1911: {'lr': 0.0004775, 'samples': 61152, 'steps': 1910, 'loss/train': 6.752954006195068} +01/22/2022 22:59:38 - INFO - codeparrot_training - Step 1912: {'lr': 0.00047775000000000004, 'samples': 61184, 'steps': 1911, 'loss/train': 6.421021461486816} +01/22/2022 22:59:38 - INFO - codeparrot_training - Step 1913: {'lr': 0.00047799999999999996, 'samples': 61216, 'steps': 1912, 'loss/train': 5.847579002380371} +01/22/2022 22:59:39 - INFO - codeparrot_training - Step 1914: {'lr': 0.00047825, 'samples': 61248, 'steps': 1913, 'loss/train': 5.635835647583008} +01/22/2022 22:59:39 - INFO - codeparrot_training - Step 1915: {'lr': 0.0004785, 'samples': 61280, 'steps': 1914, 'loss/train': 6.520924091339111} +01/22/2022 22:59:40 - INFO - codeparrot_training - Step 1916: {'lr': 0.00047875, 'samples': 61312, 'steps': 1915, 'loss/train': 6.543104648590088} +01/22/2022 22:59:40 - INFO - codeparrot_training - Step 1917: {'lr': 0.000479, 'samples': 61344, 'steps': 1916, 'loss/train': 6.493753910064697} +01/22/2022 22:59:41 - INFO - codeparrot_training - Step 1918: {'lr': 0.00047925, 'samples': 61376, 'steps': 1917, 'loss/train': 5.7163238525390625} +01/22/2022 22:59:42 - INFO - codeparrot_training - Step 1919: {'lr': 0.0004795, 'samples': 61408, 'steps': 1918, 'loss/train': 5.034031391143799} +01/22/2022 22:59:42 - INFO - codeparrot_training - Step 1920: {'lr': 0.00047975000000000003, 'samples': 61440, 'steps': 1919, 'loss/train': 6.217477321624756} +01/22/2022 22:59:43 - INFO - codeparrot_training - Step 1921: {'lr': 0.00048, 'samples': 61472, 'steps': 1920, 'loss/train': 10.87963581085205} +01/22/2022 22:59:46 - INFO - codeparrot_training - Step 1922: {'lr': 0.00048025000000000005, 'samples': 61504, 'steps': 1921, 'loss/train': 10.419942855834961} +01/22/2022 22:59:46 - INFO - codeparrot_training - Step 1923: {'lr': 0.00048049999999999997, 'samples': 61536, 'steps': 1922, 'loss/train': 7.359313011169434} +01/22/2022 22:59:47 - INFO - codeparrot_training - Step 1924: {'lr': 0.00048075, 'samples': 61568, 'steps': 1923, 'loss/train': 7.359200954437256} +01/22/2022 22:59:47 - INFO - codeparrot_training - Step 1925: {'lr': 0.000481, 'samples': 61600, 'steps': 1924, 'loss/train': 7.9838080406188965} +01/22/2022 22:59:48 - INFO - codeparrot_training - Step 1926: {'lr': 0.00048125, 'samples': 61632, 'steps': 1925, 'loss/train': 8.00261402130127} +01/22/2022 22:59:49 - INFO - codeparrot_training - Step 1927: {'lr': 0.0004815, 'samples': 61664, 'steps': 1926, 'loss/train': 6.388486862182617} +01/22/2022 22:59:49 - INFO - codeparrot_training - Step 1928: {'lr': 0.00048175000000000003, 'samples': 61696, 'steps': 1927, 'loss/train': 6.693338394165039} +01/22/2022 22:59:50 - INFO - codeparrot_training - Step 1929: {'lr': 0.000482, 'samples': 61728, 'steps': 1928, 'loss/train': 6.791950702667236} +01/22/2022 22:59:50 - INFO - codeparrot_training - Step 1930: {'lr': 0.00048225000000000004, 'samples': 61760, 'steps': 1929, 'loss/train': 6.31251335144043} +01/22/2022 22:59:51 - INFO - codeparrot_training - Step 1931: {'lr': 0.0004825, 'samples': 61792, 'steps': 1930, 'loss/train': 6.162063121795654} +01/22/2022 22:59:51 - INFO - codeparrot_training - Step 1932: {'lr': 0.00048275, 'samples': 61824, 'steps': 1931, 'loss/train': 6.049973964691162} +01/22/2022 22:59:52 - INFO - codeparrot_training - Step 1933: {'lr': 0.000483, 'samples': 61856, 'steps': 1932, 'loss/train': 6.857202053070068} +01/22/2022 22:59:52 - INFO - codeparrot_training - Step 1934: {'lr': 0.00048325, 'samples': 61888, 'steps': 1933, 'loss/train': 6.779285430908203} +01/22/2022 22:59:53 - INFO - codeparrot_training - Step 1935: {'lr': 0.0004835, 'samples': 61920, 'steps': 1934, 'loss/train': 5.838208198547363} +01/22/2022 22:59:54 - INFO - codeparrot_training - Step 1936: {'lr': 0.00048375, 'samples': 61952, 'steps': 1935, 'loss/train': 6.064708232879639} +01/22/2022 22:59:54 - INFO - codeparrot_training - Step 1937: {'lr': 0.000484, 'samples': 61984, 'steps': 1936, 'loss/train': 6.060610294342041} +01/22/2022 22:59:55 - INFO - codeparrot_training - Step 1938: {'lr': 0.00048425000000000003, 'samples': 62016, 'steps': 1937, 'loss/train': 5.49398946762085} +01/22/2022 22:59:55 - INFO - codeparrot_training - Step 1939: {'lr': 0.0004845, 'samples': 62048, 'steps': 1938, 'loss/train': 6.718085289001465} +01/22/2022 22:59:56 - INFO - codeparrot_training - Step 1940: {'lr': 0.00048475000000000005, 'samples': 62080, 'steps': 1939, 'loss/train': 6.572368621826172} +01/22/2022 22:59:56 - INFO - codeparrot_training - Step 1941: {'lr': 0.00048499999999999997, 'samples': 62112, 'steps': 1940, 'loss/train': 5.930635929107666} +01/22/2022 22:59:57 - INFO - codeparrot_training - Step 1942: {'lr': 0.00048525, 'samples': 62144, 'steps': 1941, 'loss/train': 6.092946529388428} +01/22/2022 22:59:57 - INFO - codeparrot_training - Step 1943: {'lr': 0.0004855, 'samples': 62176, 'steps': 1942, 'loss/train': 5.966977119445801} +01/22/2022 22:59:58 - INFO - codeparrot_training - Step 1944: {'lr': 0.00048575, 'samples': 62208, 'steps': 1943, 'loss/train': 6.310474872589111} +01/22/2022 22:59:59 - INFO - codeparrot_training - Step 1945: {'lr': 0.000486, 'samples': 62240, 'steps': 1944, 'loss/train': 7.085684299468994} +01/22/2022 22:59:59 - INFO - codeparrot_training - Step 1946: {'lr': 0.00048625000000000003, 'samples': 62272, 'steps': 1945, 'loss/train': 7.017955303192139} +01/22/2022 23:00:00 - INFO - codeparrot_training - Step 1947: {'lr': 0.0004865, 'samples': 62304, 'steps': 1946, 'loss/train': 6.919875621795654} +01/22/2022 23:00:00 - INFO - codeparrot_training - Step 1948: {'lr': 0.00048675000000000004, 'samples': 62336, 'steps': 1947, 'loss/train': 6.865899085998535} +01/22/2022 23:00:01 - INFO - codeparrot_training - Step 1949: {'lr': 0.000487, 'samples': 62368, 'steps': 1948, 'loss/train': 6.247832298278809} +01/22/2022 23:00:01 - INFO - codeparrot_training - Step 1950: {'lr': 0.00048725000000000005, 'samples': 62400, 'steps': 1949, 'loss/train': 7.236422538757324} +01/22/2022 23:00:02 - INFO - codeparrot_training - Step 1951: {'lr': 0.0004875, 'samples': 62432, 'steps': 1950, 'loss/train': 7.322019577026367} +01/22/2022 23:00:03 - INFO - codeparrot_training - Step 1952: {'lr': 0.00048775, 'samples': 62464, 'steps': 1951, 'loss/train': 5.981645584106445} +01/22/2022 23:00:04 - INFO - codeparrot_training - Step 1953: {'lr': 0.000488, 'samples': 62496, 'steps': 1952, 'loss/train': 7.087392807006836} +01/22/2022 23:00:04 - INFO - codeparrot_training - Step 1954: {'lr': 0.00048825, 'samples': 62528, 'steps': 1953, 'loss/train': 6.022073745727539} +01/22/2022 23:00:05 - INFO - codeparrot_training - Step 1955: {'lr': 0.0004885, 'samples': 62560, 'steps': 1954, 'loss/train': 6.923575401306152} +01/22/2022 23:00:05 - INFO - codeparrot_training - Step 1956: {'lr': 0.00048875, 'samples': 62592, 'steps': 1955, 'loss/train': 6.196526527404785} +01/22/2022 23:00:06 - INFO - codeparrot_training - Step 1957: {'lr': 0.000489, 'samples': 62624, 'steps': 1956, 'loss/train': 6.777785778045654} +01/22/2022 23:00:06 - INFO - codeparrot_training - Step 1958: {'lr': 0.00048925, 'samples': 62656, 'steps': 1957, 'loss/train': 6.685795783996582} +01/22/2022 23:00:07 - INFO - codeparrot_training - Step 1959: {'lr': 0.0004895, 'samples': 62688, 'steps': 1958, 'loss/train': 5.776857376098633} +01/22/2022 23:00:07 - INFO - codeparrot_training - Step 1960: {'lr': 0.0004897500000000001, 'samples': 62720, 'steps': 1959, 'loss/train': 6.407835960388184} +01/22/2022 23:00:08 - INFO - codeparrot_training - Step 1961: {'lr': 0.00049, 'samples': 62752, 'steps': 1960, 'loss/train': 7.663313865661621} +01/22/2022 23:00:09 - INFO - codeparrot_training - Step 1962: {'lr': 0.00049025, 'samples': 62784, 'steps': 1961, 'loss/train': 6.340246677398682} +01/22/2022 23:00:09 - INFO - codeparrot_training - Step 1963: {'lr': 0.0004905, 'samples': 62816, 'steps': 1962, 'loss/train': 6.788925647735596} +01/22/2022 23:00:10 - INFO - codeparrot_training - Step 1964: {'lr': 0.0004907500000000001, 'samples': 62848, 'steps': 1963, 'loss/train': 6.345199108123779} +01/22/2022 23:00:10 - INFO - codeparrot_training - Step 1965: {'lr': 0.000491, 'samples': 62880, 'steps': 1964, 'loss/train': 7.375931739807129} +01/22/2022 23:00:11 - INFO - codeparrot_training - Step 1966: {'lr': 0.00049125, 'samples': 62912, 'steps': 1965, 'loss/train': 5.712161540985107} +01/22/2022 23:00:11 - INFO - codeparrot_training - Step 1967: {'lr': 0.0004915, 'samples': 62944, 'steps': 1966, 'loss/train': 7.24740743637085} +01/22/2022 23:00:12 - INFO - codeparrot_training - Step 1968: {'lr': 0.00049175, 'samples': 62976, 'steps': 1967, 'loss/train': 6.637411594390869} +01/22/2022 23:00:13 - INFO - codeparrot_training - Step 1969: {'lr': 0.000492, 'samples': 63008, 'steps': 1968, 'loss/train': 6.683908462524414} +01/22/2022 23:00:13 - INFO - codeparrot_training - Step 1970: {'lr': 0.0004922500000000001, 'samples': 63040, 'steps': 1969, 'loss/train': 6.222272872924805} +01/22/2022 23:00:14 - INFO - codeparrot_training - Step 1971: {'lr': 0.0004925, 'samples': 63072, 'steps': 1970, 'loss/train': 6.923182487487793} +01/22/2022 23:00:14 - INFO - codeparrot_training - Step 1972: {'lr': 0.00049275, 'samples': 63104, 'steps': 1971, 'loss/train': 4.334473133087158} +01/22/2022 23:00:15 - INFO - codeparrot_training - Step 1973: {'lr': 0.0004930000000000001, 'samples': 63136, 'steps': 1972, 'loss/train': 6.184952259063721} +01/22/2022 23:00:15 - INFO - codeparrot_training - Step 1974: {'lr': 0.00049325, 'samples': 63168, 'steps': 1973, 'loss/train': 5.940185546875} +01/22/2022 23:00:16 - INFO - codeparrot_training - Step 1975: {'lr': 0.0004935, 'samples': 63200, 'steps': 1974, 'loss/train': 6.051249027252197} +01/22/2022 23:00:16 - INFO - codeparrot_training - Step 1976: {'lr': 0.00049375, 'samples': 63232, 'steps': 1975, 'loss/train': 6.876236915588379} +01/22/2022 23:00:17 - INFO - codeparrot_training - Step 1977: {'lr': 0.000494, 'samples': 63264, 'steps': 1976, 'loss/train': 6.426875591278076} +01/22/2022 23:00:17 - INFO - codeparrot_training - Step 1978: {'lr': 0.00049425, 'samples': 63296, 'steps': 1977, 'loss/train': 6.9576592445373535} +01/22/2022 23:00:18 - INFO - codeparrot_training - Step 1979: {'lr': 0.0004945, 'samples': 63328, 'steps': 1978, 'loss/train': 7.186690807342529} +01/22/2022 23:00:19 - INFO - codeparrot_training - Step 1980: {'lr': 0.0004947500000000001, 'samples': 63360, 'steps': 1979, 'loss/train': 6.1242475509643555} +01/22/2022 23:00:20 - INFO - codeparrot_training - Step 1981: {'lr': 0.000495, 'samples': 63392, 'steps': 1980, 'loss/train': 7.082981586456299} +01/22/2022 23:00:20 - INFO - codeparrot_training - Step 1982: {'lr': 0.00049525, 'samples': 63424, 'steps': 1981, 'loss/train': 6.525054454803467} +01/22/2022 23:00:21 - INFO - codeparrot_training - Step 1983: {'lr': 0.0004955, 'samples': 63456, 'steps': 1982, 'loss/train': 5.425645351409912} +01/22/2022 23:00:21 - INFO - codeparrot_training - Step 1984: {'lr': 0.00049575, 'samples': 63488, 'steps': 1983, 'loss/train': 5.865527629852295} +01/22/2022 23:00:22 - INFO - codeparrot_training - Step 1985: {'lr': 0.000496, 'samples': 63520, 'steps': 1984, 'loss/train': 5.638011932373047} +01/22/2022 23:00:23 - INFO - codeparrot_training - Step 1986: {'lr': 0.0004962500000000001, 'samples': 63552, 'steps': 1985, 'loss/train': 6.352575302124023} +01/22/2022 23:00:23 - INFO - codeparrot_training - Step 1987: {'lr': 0.0004965, 'samples': 63584, 'steps': 1986, 'loss/train': 7.044677257537842} +01/22/2022 23:00:24 - INFO - codeparrot_training - Step 1988: {'lr': 0.00049675, 'samples': 63616, 'steps': 1987, 'loss/train': 6.930549144744873} +01/22/2022 23:00:24 - INFO - codeparrot_training - Step 1989: {'lr': 0.000497, 'samples': 63648, 'steps': 1988, 'loss/train': 6.839816093444824} +01/22/2022 23:00:25 - INFO - codeparrot_training - Step 1990: {'lr': 0.0004972500000000001, 'samples': 63680, 'steps': 1989, 'loss/train': 6.088400363922119} +01/22/2022 23:00:25 - INFO - codeparrot_training - Step 1991: {'lr': 0.0004975, 'samples': 63712, 'steps': 1990, 'loss/train': 6.047018051147461} +01/22/2022 23:00:26 - INFO - codeparrot_training - Step 1992: {'lr': 0.00049775, 'samples': 63744, 'steps': 1991, 'loss/train': 5.891190052032471} +01/22/2022 23:00:26 - INFO - codeparrot_training - Step 1993: {'lr': 0.000498, 'samples': 63776, 'steps': 1992, 'loss/train': 5.89976167678833} +01/22/2022 23:00:27 - INFO - codeparrot_training - Step 1994: {'lr': 0.00049825, 'samples': 63808, 'steps': 1993, 'loss/train': 6.550804138183594} +01/22/2022 23:00:28 - INFO - codeparrot_training - Step 1995: {'lr': 0.0004985, 'samples': 63840, 'steps': 1994, 'loss/train': 5.638559341430664} +01/22/2022 23:00:28 - INFO - codeparrot_training - Step 1996: {'lr': 0.0004987500000000001, 'samples': 63872, 'steps': 1995, 'loss/train': 7.0229105949401855} +01/22/2022 23:00:29 - INFO - codeparrot_training - Step 1997: {'lr': 0.000499, 'samples': 63904, 'steps': 1996, 'loss/train': 5.999317646026611} +01/22/2022 23:00:29 - INFO - codeparrot_training - Step 1998: {'lr': 0.00049925, 'samples': 63936, 'steps': 1997, 'loss/train': 7.227867603302002} +01/22/2022 23:00:30 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004995, 'samples': 63968, 'steps': 1998, 'loss/train': 6.042109489440918} +01/22/2022 23:00:30 - INFO - codeparrot_training - Step 2000: {'lr': 0.0004997500000000001, 'samples': 64000, 'steps': 1999, 'loss/train': 7.229968547821045} +01/22/2022 23:00:31 - INFO - codeparrot_training - Step 2001: {'lr': 0.0005, 'samples': 64032, 'steps': 2000, 'loss/train': 6.50927209854126} +01/22/2022 23:00:31 - INFO - codeparrot_training - Step 2002: {'lr': 0.0004999999994645397, 'samples': 64064, 'steps': 2001, 'loss/train': 5.324577331542969} +01/22/2022 23:00:32 - INFO - codeparrot_training - Step 2003: {'lr': 0.0004999999978581587, 'samples': 64096, 'steps': 2002, 'loss/train': 5.682460308074951} +01/22/2022 23:00:33 - INFO - codeparrot_training - Step 2004: {'lr': 0.0004999999951808573, 'samples': 64128, 'steps': 2003, 'loss/train': 7.198102951049805} +01/22/2022 23:00:33 - INFO - codeparrot_training - Step 2005: {'lr': 0.0004999999914326351, 'samples': 64160, 'steps': 2004, 'loss/train': 6.8766961097717285} +01/22/2022 23:00:34 - INFO - codeparrot_training - Step 2006: {'lr': 0.0004999999866134924, 'samples': 64192, 'steps': 2005, 'loss/train': 5.827096462249756} +01/22/2022 23:00:34 - INFO - codeparrot_training - Step 2007: {'lr': 0.0004999999807234292, 'samples': 64224, 'steps': 2006, 'loss/train': 6.32208251953125} +01/22/2022 23:00:35 - INFO - codeparrot_training - Step 2008: {'lr': 0.0004999999737624453, 'samples': 64256, 'steps': 2007, 'loss/train': 6.167959213256836} +01/22/2022 23:00:35 - INFO - codeparrot_training - Step 2009: {'lr': 0.0004999999657305411, 'samples': 64288, 'steps': 2008, 'loss/train': 6.659964561462402} +01/22/2022 23:00:37 - INFO - codeparrot_training - Step 2010: {'lr': 0.0004999999566277163, 'samples': 64320, 'steps': 2009, 'loss/train': 8.065342903137207} +01/22/2022 23:00:37 - INFO - codeparrot_training - Step 2011: {'lr': 0.0004999999464539711, 'samples': 64352, 'steps': 2010, 'loss/train': 6.571898460388184} +01/22/2022 23:00:38 - INFO - codeparrot_training - Step 2012: {'lr': 0.0004999999352093055, 'samples': 64384, 'steps': 2011, 'loss/train': 6.388328552246094} +01/22/2022 23:00:38 - INFO - codeparrot_training - Step 2013: {'lr': 0.0004999999228937196, 'samples': 64416, 'steps': 2012, 'loss/train': 7.5409393310546875} +01/22/2022 23:00:39 - INFO - codeparrot_training - Step 2014: {'lr': 0.0004999999095072135, 'samples': 64448, 'steps': 2013, 'loss/train': 6.563540458679199} +01/22/2022 23:00:40 - INFO - codeparrot_training - Step 2015: {'lr': 0.0004999998950497869, 'samples': 64480, 'steps': 2014, 'loss/train': 6.578091144561768} +01/22/2022 23:00:40 - INFO - codeparrot_training - Step 2016: {'lr': 0.0004999998795214404, 'samples': 64512, 'steps': 2015, 'loss/train': 6.83243989944458} +01/22/2022 23:00:41 - INFO - codeparrot_training - Step 2017: {'lr': 0.0004999998629221736, 'samples': 64544, 'steps': 2016, 'loss/train': 8.212639808654785} +01/22/2022 23:00:41 - INFO - codeparrot_training - Step 2018: {'lr': 0.0004999998452519869, 'samples': 64576, 'steps': 2017, 'loss/train': 6.833791732788086} +01/22/2022 23:00:42 - INFO - codeparrot_training - Step 2019: {'lr': 0.0004999998265108802, 'samples': 64608, 'steps': 2018, 'loss/train': 6.705368995666504} +01/22/2022 23:00:42 - INFO - codeparrot_training - Step 2020: {'lr': 0.0004999998066988537, 'samples': 64640, 'steps': 2019, 'loss/train': 6.16022253036499} +01/22/2022 23:00:43 - INFO - codeparrot_training - Step 2021: {'lr': 0.0004999997858159073, 'samples': 64672, 'steps': 2020, 'loss/train': 6.5103759765625} +01/22/2022 23:00:43 - INFO - codeparrot_training - Step 2022: {'lr': 0.0004999997638620412, 'samples': 64704, 'steps': 2021, 'loss/train': 6.908587455749512} +01/22/2022 23:00:44 - INFO - codeparrot_training - Step 2023: {'lr': 0.0004999997408372557, 'samples': 64736, 'steps': 2022, 'loss/train': 6.7269816398620605} +01/22/2022 23:00:45 - INFO - codeparrot_training - Step 2024: {'lr': 0.0004999997167415504, 'samples': 64768, 'steps': 2023, 'loss/train': 7.015747547149658} +01/22/2022 23:00:45 - INFO - codeparrot_training - Step 2025: {'lr': 0.0004999996915749259, 'samples': 64800, 'steps': 2024, 'loss/train': 6.68533992767334} +01/22/2022 23:00:46 - INFO - codeparrot_training - Step 2026: {'lr': 0.0004999996653373821, 'samples': 64832, 'steps': 2025, 'loss/train': 6.266223430633545} +01/22/2022 23:00:46 - INFO - codeparrot_training - Step 2027: {'lr': 0.000499999638028919, 'samples': 64864, 'steps': 2026, 'loss/train': 6.34724235534668} +01/22/2022 23:00:47 - INFO - codeparrot_training - Step 2028: {'lr': 0.0004999996096495369, 'samples': 64896, 'steps': 2027, 'loss/train': 5.857456207275391} +01/22/2022 23:00:47 - INFO - codeparrot_training - Step 2029: {'lr': 0.0004999995801992359, 'samples': 64928, 'steps': 2028, 'loss/train': 5.071035385131836} +01/22/2022 23:00:48 - INFO - codeparrot_training - Step 2030: {'lr': 0.000499999549678016, 'samples': 64960, 'steps': 2029, 'loss/train': 4.8642120361328125} +01/22/2022 23:00:48 - INFO - codeparrot_training - Step 2031: {'lr': 0.0004999995180858774, 'samples': 64992, 'steps': 2030, 'loss/train': 7.863161087036133} +01/22/2022 23:00:49 - INFO - codeparrot_training - Step 2032: {'lr': 0.0004999994854228203, 'samples': 65024, 'steps': 2031, 'loss/train': 6.999675273895264} +01/22/2022 23:00:50 - INFO - codeparrot_training - Step 2033: {'lr': 0.0004999994516888449, 'samples': 65056, 'steps': 2032, 'loss/train': 6.225742816925049} +01/22/2022 23:00:50 - INFO - codeparrot_training - Step 2034: {'lr': 0.000499999416883951, 'samples': 65088, 'steps': 2033, 'loss/train': 6.97227668762207} +01/22/2022 23:00:51 - INFO - codeparrot_training - Step 2035: {'lr': 0.0004999993810081391, 'samples': 65120, 'steps': 2034, 'loss/train': 6.062245845794678} +01/22/2022 23:00:51 - INFO - codeparrot_training - Step 2036: {'lr': 0.0004999993440614092, 'samples': 65152, 'steps': 2035, 'loss/train': 6.867842674255371} +01/22/2022 23:00:52 - INFO - codeparrot_training - Step 2037: {'lr': 0.0004999993060437616, 'samples': 65184, 'steps': 2036, 'loss/train': 6.929886341094971} +01/22/2022 23:00:52 - INFO - codeparrot_training - Step 2038: {'lr': 0.0004999992669551962, 'samples': 65216, 'steps': 2037, 'loss/train': 5.878575325012207} +01/22/2022 23:00:53 - INFO - codeparrot_training - Step 2039: {'lr': 0.0004999992267957135, 'samples': 65248, 'steps': 2038, 'loss/train': 5.9505133628845215} +01/22/2022 23:00:54 - INFO - codeparrot_training - Step 2040: {'lr': 0.0004999991855653134, 'samples': 65280, 'steps': 2039, 'loss/train': 7.352499485015869} +01/22/2022 23:00:55 - INFO - codeparrot_training - Step 2041: {'lr': 0.0004999991432639963, 'samples': 65312, 'steps': 2040, 'loss/train': 6.398353099822998} +01/22/2022 23:00:55 - INFO - codeparrot_training - Step 2042: {'lr': 0.0004999990998917621, 'samples': 65344, 'steps': 2041, 'loss/train': 6.910398483276367} +01/22/2022 23:00:56 - INFO - codeparrot_training - Step 2043: {'lr': 0.0004999990554486111, 'samples': 65376, 'steps': 2042, 'loss/train': 5.774745464324951} +01/22/2022 23:00:56 - INFO - codeparrot_training - Step 2044: {'lr': 0.0004999990099345436, 'samples': 65408, 'steps': 2043, 'loss/train': 5.53390645980835} +01/22/2022 23:00:57 - INFO - codeparrot_training - Step 2045: {'lr': 0.0004999989633495597, 'samples': 65440, 'steps': 2044, 'loss/train': 6.386229991912842} +01/22/2022 23:00:57 - INFO - codeparrot_training - Step 2046: {'lr': 0.0004999989156936597, 'samples': 65472, 'steps': 2045, 'loss/train': 7.073824882507324} +01/22/2022 23:00:58 - INFO - codeparrot_training - Step 2047: {'lr': 0.0004999988669668437, 'samples': 65504, 'steps': 2046, 'loss/train': 7.353301048278809} +01/22/2022 23:00:58 - INFO - codeparrot_training - Step 2048: {'lr': 0.0004999988171691119, 'samples': 65536, 'steps': 2047, 'loss/train': 6.343545436859131} +01/22/2022 23:00:59 - INFO - codeparrot_training - Step 2049: {'lr': 0.0004999987663004646, 'samples': 65568, 'steps': 2048, 'loss/train': 6.014913558959961} +01/22/2022 23:01:00 - INFO - codeparrot_training - Step 2050: {'lr': 0.0004999987143609019, 'samples': 65600, 'steps': 2049, 'loss/train': 6.458000659942627} +01/22/2022 23:01:00 - INFO - codeparrot_training - Step 2051: {'lr': 0.0004999986613504242, 'samples': 65632, 'steps': 2050, 'loss/train': 6.146261215209961} +01/22/2022 23:01:01 - INFO - codeparrot_training - Step 2052: {'lr': 0.0004999986072690315, 'samples': 65664, 'steps': 2051, 'loss/train': 5.339205265045166} +01/22/2022 23:01:01 - INFO - codeparrot_training - Step 2053: {'lr': 0.0004999985521167242, 'samples': 65696, 'steps': 2052, 'loss/train': 6.073482036590576} +01/22/2022 23:01:02 - INFO - codeparrot_training - Step 2054: {'lr': 0.0004999984958935025, 'samples': 65728, 'steps': 2053, 'loss/train': 7.186446666717529} +01/22/2022 23:01:02 - INFO - codeparrot_training - Step 2055: {'lr': 0.0004999984385993665, 'samples': 65760, 'steps': 2054, 'loss/train': 6.95858097076416} +01/22/2022 23:01:03 - INFO - codeparrot_training - Step 2056: {'lr': 0.0004999983802343168, 'samples': 65792, 'steps': 2055, 'loss/train': 5.671885013580322} +01/22/2022 23:01:03 - INFO - codeparrot_training - Step 2057: {'lr': 0.0004999983207983532, 'samples': 65824, 'steps': 2056, 'loss/train': 6.406462669372559} +01/22/2022 23:01:04 - INFO - codeparrot_training - Step 2058: {'lr': 0.0004999982602914763, 'samples': 65856, 'steps': 2057, 'loss/train': 7.813049793243408} +01/22/2022 23:01:05 - INFO - codeparrot_training - Step 2059: {'lr': 0.0004999981987136862, 'samples': 65888, 'steps': 2058, 'loss/train': 6.001928806304932} +01/22/2022 23:01:05 - INFO - codeparrot_training - Step 2060: {'lr': 0.0004999981360649833, 'samples': 65920, 'steps': 2059, 'loss/train': 5.420037269592285} +01/22/2022 23:01:06 - INFO - codeparrot_training - Step 2061: {'lr': 0.0004999980723453676, 'samples': 65952, 'steps': 2060, 'loss/train': 6.231645584106445} +01/22/2022 23:01:06 - INFO - codeparrot_training - Step 2062: {'lr': 0.0004999980075548397, 'samples': 65984, 'steps': 2061, 'loss/train': 6.902248382568359} +01/22/2022 23:01:07 - INFO - codeparrot_training - Step 2063: {'lr': 0.0004999979416933997, 'samples': 66016, 'steps': 2062, 'loss/train': 6.601744651794434} +01/22/2022 23:01:07 - INFO - codeparrot_training - Step 2064: {'lr': 0.0004999978747610478, 'samples': 66048, 'steps': 2063, 'loss/train': 7.215444087982178} +01/22/2022 23:01:08 - INFO - codeparrot_training - Step 2065: {'lr': 0.0004999978067577843, 'samples': 66080, 'steps': 2064, 'loss/train': 5.8792924880981445} +01/22/2022 23:01:08 - INFO - codeparrot_training - Step 2066: {'lr': 0.0004999977376836098, 'samples': 66112, 'steps': 2065, 'loss/train': 7.074293613433838} +01/22/2022 23:01:09 - INFO - codeparrot_training - Step 2067: {'lr': 0.0004999976675385243, 'samples': 66144, 'steps': 2066, 'loss/train': 6.733389377593994} +01/22/2022 23:01:10 - INFO - codeparrot_training - Step 2068: {'lr': 0.0004999975963225282, 'samples': 66176, 'steps': 2067, 'loss/train': 5.5214457511901855} +01/22/2022 23:01:10 - INFO - codeparrot_training - Step 2069: {'lr': 0.0004999975240356217, 'samples': 66208, 'steps': 2068, 'loss/train': 7.142658233642578} +01/22/2022 23:01:11 - INFO - codeparrot_training - Step 2070: {'lr': 0.0004999974506778053, 'samples': 66240, 'steps': 2069, 'loss/train': 6.1900954246521} +01/22/2022 23:01:12 - INFO - codeparrot_training - Step 2071: {'lr': 0.0004999973762490792, 'samples': 66272, 'steps': 2070, 'loss/train': 4.877043724060059} +01/22/2022 23:01:12 - INFO - codeparrot_training - Step 2072: {'lr': 0.0004999973007494436, 'samples': 66304, 'steps': 2071, 'loss/train': 6.292701244354248} +01/22/2022 23:01:13 - INFO - codeparrot_training - Step 2073: {'lr': 0.000499997224178899, 'samples': 66336, 'steps': 2072, 'loss/train': 6.933654308319092} +01/22/2022 23:01:14 - INFO - codeparrot_training - Step 2074: {'lr': 0.0004999971465374457, 'samples': 66368, 'steps': 2073, 'loss/train': 5.968182563781738} +01/22/2022 23:01:14 - INFO - codeparrot_training - Step 2075: {'lr': 0.000499997067825084, 'samples': 66400, 'steps': 2074, 'loss/train': 6.067746162414551} +01/22/2022 23:01:15 - INFO - codeparrot_training - Step 2076: {'lr': 0.0004999969880418142, 'samples': 66432, 'steps': 2075, 'loss/train': 7.470337390899658} +01/22/2022 23:01:15 - INFO - codeparrot_training - Step 2077: {'lr': 0.0004999969071876367, 'samples': 66464, 'steps': 2076, 'loss/train': 6.32814359664917} +01/22/2022 23:01:16 - INFO - codeparrot_training - Step 2078: {'lr': 0.0004999968252625519, 'samples': 66496, 'steps': 2077, 'loss/train': 7.278321743011475} +01/22/2022 23:01:16 - INFO - codeparrot_training - Step 2079: {'lr': 0.00049999674226656, 'samples': 66528, 'steps': 2078, 'loss/train': 6.99389123916626} +01/22/2022 23:01:17 - INFO - codeparrot_training - Step 2080: {'lr': 0.0004999966581996616, 'samples': 66560, 'steps': 2079, 'loss/train': 7.71636962890625} +01/22/2022 23:01:17 - INFO - codeparrot_training - Step 2081: {'lr': 0.0004999965730618567, 'samples': 66592, 'steps': 2080, 'loss/train': 6.238718509674072} +01/22/2022 23:01:18 - INFO - codeparrot_training - Step 2082: {'lr': 0.000499996486853146, 'samples': 66624, 'steps': 2081, 'loss/train': 7.713623046875} +01/22/2022 23:01:19 - INFO - codeparrot_training - Step 2083: {'lr': 0.0004999963995735296, 'samples': 66656, 'steps': 2082, 'loss/train': 5.624390125274658} +01/22/2022 23:01:19 - INFO - codeparrot_training - Step 2084: {'lr': 0.0004999963112230081, 'samples': 66688, 'steps': 2083, 'loss/train': 7.111047744750977} +01/22/2022 23:01:20 - INFO - codeparrot_training - Step 2085: {'lr': 0.0004999962218015818, 'samples': 66720, 'steps': 2084, 'loss/train': 5.729655742645264} +01/22/2022 23:01:20 - INFO - codeparrot_training - Step 2086: {'lr': 0.0004999961313092511, 'samples': 66752, 'steps': 2085, 'loss/train': 5.842134952545166} +01/22/2022 23:01:21 - INFO - codeparrot_training - Step 2087: {'lr': 0.0004999960397460162, 'samples': 66784, 'steps': 2086, 'loss/train': 5.290778160095215} +01/22/2022 23:01:21 - INFO - codeparrot_training - Step 2088: {'lr': 0.0004999959471118778, 'samples': 66816, 'steps': 2087, 'loss/train': 5.994714260101318} +01/22/2022 23:01:22 - INFO - codeparrot_training - Step 2089: {'lr': 0.000499995853406836, 'samples': 66848, 'steps': 2088, 'loss/train': 7.755853652954102} +01/22/2022 23:01:22 - INFO - codeparrot_training - Step 2090: {'lr': 0.0004999957586308914, 'samples': 66880, 'steps': 2089, 'loss/train': 6.896233558654785} +01/22/2022 23:01:23 - INFO - codeparrot_training - Step 2091: {'lr': 0.0004999956627840445, 'samples': 66912, 'steps': 2090, 'loss/train': 6.8619489669799805} +01/22/2022 23:01:24 - INFO - codeparrot_training - Step 2092: {'lr': 0.0004999955658662954, 'samples': 66944, 'steps': 2091, 'loss/train': 6.6442179679870605} +01/22/2022 23:01:24 - INFO - codeparrot_training - Step 2093: {'lr': 0.0004999954678776448, 'samples': 66976, 'steps': 2092, 'loss/train': 5.650410175323486} +01/22/2022 23:01:25 - INFO - codeparrot_training - Step 2094: {'lr': 0.0004999953688180929, 'samples': 67008, 'steps': 2093, 'loss/train': 5.904873371124268} +01/22/2022 23:01:25 - INFO - codeparrot_training - Step 2095: {'lr': 0.0004999952686876402, 'samples': 67040, 'steps': 2094, 'loss/train': 5.628599643707275} +01/22/2022 23:01:26 - INFO - codeparrot_training - Step 2096: {'lr': 0.0004999951674862872, 'samples': 67072, 'steps': 2095, 'loss/train': 6.281675338745117} +01/22/2022 23:01:26 - INFO - codeparrot_training - Step 2097: {'lr': 0.0004999950652140343, 'samples': 67104, 'steps': 2096, 'loss/train': 6.442784786224365} +01/22/2022 23:01:27 - INFO - codeparrot_training - Step 2098: {'lr': 0.0004999949618708819, 'samples': 67136, 'steps': 2097, 'loss/train': 6.1389570236206055} +01/22/2022 23:01:27 - INFO - codeparrot_training - Step 2099: {'lr': 0.0004999948574568305, 'samples': 67168, 'steps': 2098, 'loss/train': 6.688233375549316} +01/22/2022 23:01:29 - INFO - codeparrot_training - Step 2100: {'lr': 0.0004999947519718805, 'samples': 67200, 'steps': 2099, 'loss/train': 6.636012554168701} +01/22/2022 23:01:29 - INFO - codeparrot_training - Step 2101: {'lr': 0.0004999946454160324, 'samples': 67232, 'steps': 2100, 'loss/train': 5.671481609344482} +01/22/2022 23:01:30 - INFO - codeparrot_training - Step 2102: {'lr': 0.0004999945377892865, 'samples': 67264, 'steps': 2101, 'loss/train': 6.074208736419678} +01/22/2022 23:01:30 - INFO - codeparrot_training - Step 2103: {'lr': 0.0004999944290916434, 'samples': 67296, 'steps': 2102, 'loss/train': 5.773697376251221} +01/22/2022 23:01:31 - INFO - codeparrot_training - Step 2104: {'lr': 0.0004999943193231037, 'samples': 67328, 'steps': 2103, 'loss/train': 7.111809730529785} +01/22/2022 23:01:31 - INFO - codeparrot_training - Step 2105: {'lr': 0.0004999942084836675, 'samples': 67360, 'steps': 2104, 'loss/train': 5.8470916748046875} +01/22/2022 23:01:32 - INFO - codeparrot_training - Step 2106: {'lr': 0.0004999940965733356, 'samples': 67392, 'steps': 2105, 'loss/train': 6.8390703201293945} +01/22/2022 23:01:33 - INFO - codeparrot_training - Step 2107: {'lr': 0.0004999939835921085, 'samples': 67424, 'steps': 2106, 'loss/train': 5.594030380249023} +01/22/2022 23:01:33 - INFO - codeparrot_training - Step 2108: {'lr': 0.0004999938695399864, 'samples': 67456, 'steps': 2107, 'loss/train': 6.98422908782959} +01/22/2022 23:01:34 - INFO - codeparrot_training - Step 2109: {'lr': 0.00049999375441697, 'samples': 67488, 'steps': 2108, 'loss/train': 6.6895833015441895} +01/22/2022 23:01:34 - INFO - codeparrot_training - Step 2110: {'lr': 0.0004999936382230597, 'samples': 67520, 'steps': 2109, 'loss/train': 5.553679943084717} +01/22/2022 23:01:35 - INFO - codeparrot_training - Step 2111: {'lr': 0.000499993520958256, 'samples': 67552, 'steps': 2110, 'loss/train': 5.48417329788208} +01/22/2022 23:01:35 - INFO - codeparrot_training - Step 2112: {'lr': 0.0004999934026225595, 'samples': 67584, 'steps': 2111, 'loss/train': 6.9976277351379395} +01/22/2022 23:01:36 - INFO - codeparrot_training - Step 2113: {'lr': 0.0004999932832159708, 'samples': 67616, 'steps': 2112, 'loss/train': 6.493070125579834} +01/22/2022 23:01:36 - INFO - codeparrot_training - Step 2114: {'lr': 0.00049999316273849, 'samples': 67648, 'steps': 2113, 'loss/train': 5.916332721710205} +01/22/2022 23:01:37 - INFO - codeparrot_training - Step 2115: {'lr': 0.0004999930411901181, 'samples': 67680, 'steps': 2114, 'loss/train': 6.631826400756836} +01/22/2022 23:01:38 - INFO - codeparrot_training - Step 2116: {'lr': 0.0004999929185708551, 'samples': 67712, 'steps': 2115, 'loss/train': 6.13346004486084} +01/22/2022 23:01:38 - INFO - codeparrot_training - Step 2117: {'lr': 0.000499992794880702, 'samples': 67744, 'steps': 2116, 'loss/train': 6.285863399505615} +01/22/2022 23:01:39 - INFO - codeparrot_training - Step 2118: {'lr': 0.0004999926701196592, 'samples': 67776, 'steps': 2117, 'loss/train': 6.637896537780762} +01/22/2022 23:01:39 - INFO - codeparrot_training - Step 2119: {'lr': 0.0004999925442877271, 'samples': 67808, 'steps': 2118, 'loss/train': 5.250631332397461} +01/22/2022 23:01:40 - INFO - codeparrot_training - Step 2120: {'lr': 0.0004999924173849063, 'samples': 67840, 'steps': 2119, 'loss/train': 6.255832195281982} +01/22/2022 23:01:40 - INFO - codeparrot_training - Step 2121: {'lr': 0.0004999922894111975, 'samples': 67872, 'steps': 2120, 'loss/train': 6.7650532722473145} +01/22/2022 23:01:41 - INFO - codeparrot_training - Step 2122: {'lr': 0.000499992160366601, 'samples': 67904, 'steps': 2121, 'loss/train': 5.342574119567871} +01/22/2022 23:01:42 - INFO - codeparrot_training - Step 2123: {'lr': 0.0004999920302511175, 'samples': 67936, 'steps': 2122, 'loss/train': 6.08259916305542} +01/22/2022 23:01:42 - INFO - codeparrot_training - Step 2124: {'lr': 0.0004999918990647474, 'samples': 67968, 'steps': 2123, 'loss/train': 5.719043254852295} +01/22/2022 23:01:43 - INFO - codeparrot_training - Step 2125: {'lr': 0.0004999917668074915, 'samples': 68000, 'steps': 2124, 'loss/train': 6.541088104248047} +01/22/2022 23:01:43 - INFO - codeparrot_training - Step 2126: {'lr': 0.0004999916334793503, 'samples': 68032, 'steps': 2125, 'loss/train': 5.875202178955078} +01/22/2022 23:01:44 - INFO - codeparrot_training - Step 2127: {'lr': 0.0004999914990803242, 'samples': 68064, 'steps': 2126, 'loss/train': 7.782944679260254} +01/22/2022 23:01:44 - INFO - codeparrot_training - Step 2128: {'lr': 0.000499991363610414, 'samples': 68096, 'steps': 2127, 'loss/train': 5.839483261108398} +01/22/2022 23:01:45 - INFO - codeparrot_training - Step 2129: {'lr': 0.0004999912270696202, 'samples': 68128, 'steps': 2128, 'loss/train': 6.043307781219482} +01/22/2022 23:01:46 - INFO - codeparrot_training - Step 2130: {'lr': 0.0004999910894579432, 'samples': 68160, 'steps': 2129, 'loss/train': 6.019781589508057} +01/22/2022 23:01:47 - INFO - codeparrot_training - Step 2131: {'lr': 0.000499990950775384, 'samples': 68192, 'steps': 2130, 'loss/train': 6.036535739898682} +01/22/2022 23:01:47 - INFO - codeparrot_training - Step 2132: {'lr': 0.0004999908110219428, 'samples': 68224, 'steps': 2131, 'loss/train': 6.598226547241211} +01/22/2022 23:01:48 - INFO - codeparrot_training - Step 2133: {'lr': 0.0004999906701976203, 'samples': 68256, 'steps': 2132, 'loss/train': 6.706815719604492} +01/22/2022 23:01:48 - INFO - codeparrot_training - Step 2134: {'lr': 0.0004999905283024172, 'samples': 68288, 'steps': 2133, 'loss/train': 5.4146409034729} +01/22/2022 23:01:49 - INFO - codeparrot_training - Step 2135: {'lr': 0.0004999903853363341, 'samples': 68320, 'steps': 2134, 'loss/train': 6.19075870513916} +01/22/2022 23:01:49 - INFO - codeparrot_training - Step 2136: {'lr': 0.0004999902412993715, 'samples': 68352, 'steps': 2135, 'loss/train': 6.371946811676025} +01/22/2022 23:01:50 - INFO - codeparrot_training - Step 2137: {'lr': 0.0004999900961915302, 'samples': 68384, 'steps': 2136, 'loss/train': 6.7433366775512695} +01/22/2022 23:01:50 - INFO - codeparrot_training - Step 2138: {'lr': 0.0004999899500128107, 'samples': 68416, 'steps': 2137, 'loss/train': 5.403799057006836} +01/22/2022 23:01:51 - INFO - codeparrot_training - Step 2139: {'lr': 0.0004999898027632135, 'samples': 68448, 'steps': 2138, 'loss/train': 5.504350662231445} +01/22/2022 23:01:52 - INFO - codeparrot_training - Step 2140: {'lr': 0.0004999896544427394, 'samples': 68480, 'steps': 2139, 'loss/train': 6.77522611618042} +01/22/2022 23:01:52 - INFO - codeparrot_training - Step 2141: {'lr': 0.0004999895050513891, 'samples': 68512, 'steps': 2140, 'loss/train': 6.867016792297363} +01/22/2022 23:01:53 - INFO - codeparrot_training - Step 2142: {'lr': 0.0004999893545891631, 'samples': 68544, 'steps': 2141, 'loss/train': 6.118316650390625} +01/22/2022 23:01:53 - INFO - codeparrot_training - Step 2143: {'lr': 0.000499989203056062, 'samples': 68576, 'steps': 2142, 'loss/train': 6.546067714691162} +01/22/2022 23:01:54 - INFO - codeparrot_training - Step 2144: {'lr': 0.0004999890504520866, 'samples': 68608, 'steps': 2143, 'loss/train': 6.3838090896606445} +01/22/2022 23:01:54 - INFO - codeparrot_training - Step 2145: {'lr': 0.0004999888967772375, 'samples': 68640, 'steps': 2144, 'loss/train': 6.609239101409912} +01/22/2022 23:01:55 - INFO - codeparrot_training - Step 2146: {'lr': 0.0004999887420315153, 'samples': 68672, 'steps': 2145, 'loss/train': 5.553549766540527} +01/22/2022 23:01:55 - INFO - codeparrot_training - Step 2147: {'lr': 0.0004999885862149207, 'samples': 68704, 'steps': 2146, 'loss/train': 6.070709228515625} +01/22/2022 23:01:56 - INFO - codeparrot_training - Step 2148: {'lr': 0.0004999884293274545, 'samples': 68736, 'steps': 2147, 'loss/train': 6.0179314613342285} +01/22/2022 23:01:57 - INFO - codeparrot_training - Step 2149: {'lr': 0.0004999882713691171, 'samples': 68768, 'steps': 2148, 'loss/train': 5.441877841949463} +01/22/2022 23:01:57 - INFO - codeparrot_training - Step 2150: {'lr': 0.0004999881123399093, 'samples': 68800, 'steps': 2149, 'loss/train': 5.93379020690918} +01/22/2022 23:01:58 - INFO - codeparrot_training - Step 2151: {'lr': 0.000499987952239832, 'samples': 68832, 'steps': 2150, 'loss/train': 5.937089443206787} +01/22/2022 23:01:58 - INFO - codeparrot_training - Step 2152: {'lr': 0.0004999877910688856, 'samples': 68864, 'steps': 2151, 'loss/train': 5.819939613342285} +01/22/2022 23:01:59 - INFO - codeparrot_training - Step 2153: {'lr': 0.0004999876288270708, 'samples': 68896, 'steps': 2152, 'loss/train': 6.362030982971191} +01/22/2022 23:01:59 - INFO - codeparrot_training - Step 2154: {'lr': 0.0004999874655143886, 'samples': 68928, 'steps': 2153, 'loss/train': 5.54760217666626} +01/22/2022 23:02:00 - INFO - codeparrot_training - Step 2155: {'lr': 0.0004999873011308393, 'samples': 68960, 'steps': 2154, 'loss/train': 6.361309051513672} +01/22/2022 23:02:01 - INFO - codeparrot_training - Step 2156: {'lr': 0.0004999871356764238, 'samples': 68992, 'steps': 2155, 'loss/train': 6.448252201080322} +01/22/2022 23:02:01 - INFO - codeparrot_training - Step 2157: {'lr': 0.0004999869691511428, 'samples': 69024, 'steps': 2156, 'loss/train': 5.565062999725342} +01/22/2022 23:02:02 - INFO - codeparrot_training - Step 2158: {'lr': 0.000499986801554997, 'samples': 69056, 'steps': 2157, 'loss/train': 6.239667892456055} +01/22/2022 23:02:05 - INFO - codeparrot_training - Step 2159: {'lr': 0.0004999866328879871, 'samples': 69088, 'steps': 2158, 'loss/train': 6.7333831787109375} +01/22/2022 23:02:06 - INFO - codeparrot_training - Step 2160: {'lr': 0.0004999864631501139, 'samples': 69120, 'steps': 2159, 'loss/train': 6.069599628448486} +01/22/2022 23:02:06 - INFO - codeparrot_training - Step 2161: {'lr': 0.000499986292341378, 'samples': 69152, 'steps': 2160, 'loss/train': 6.680003643035889} +01/22/2022 23:02:07 - INFO - codeparrot_training - Step 2162: {'lr': 0.0004999861204617803, 'samples': 69184, 'steps': 2161, 'loss/train': 6.518747329711914} +01/22/2022 23:02:07 - INFO - codeparrot_training - Step 2163: {'lr': 0.0004999859475113213, 'samples': 69216, 'steps': 2162, 'loss/train': 7.40693998336792} +01/22/2022 23:02:08 - INFO - codeparrot_training - Step 2164: {'lr': 0.0004999857734900021, 'samples': 69248, 'steps': 2163, 'loss/train': 5.727807998657227} +01/22/2022 23:02:08 - INFO - codeparrot_training - Step 2165: {'lr': 0.000499985598397823, 'samples': 69280, 'steps': 2164, 'loss/train': 6.190133571624756} +01/22/2022 23:02:09 - INFO - codeparrot_training - Step 2166: {'lr': 0.0004999854222347851, 'samples': 69312, 'steps': 2165, 'loss/train': 5.722295761108398} +01/22/2022 23:02:09 - INFO - codeparrot_training - Step 2167: {'lr': 0.000499985245000889, 'samples': 69344, 'steps': 2166, 'loss/train': 6.110927581787109} +01/22/2022 23:02:10 - INFO - codeparrot_training - Step 2168: {'lr': 0.0004999850666961355, 'samples': 69376, 'steps': 2167, 'loss/train': 5.859567165374756} +01/22/2022 23:02:11 - INFO - codeparrot_training - Step 2169: {'lr': 0.0004999848873205254, 'samples': 69408, 'steps': 2168, 'loss/train': 7.260993957519531} +01/22/2022 23:02:11 - INFO - codeparrot_training - Step 2170: {'lr': 0.0004999847068740593, 'samples': 69440, 'steps': 2169, 'loss/train': 5.754944324493408} +01/22/2022 23:02:12 - INFO - codeparrot_training - Step 2171: {'lr': 0.0004999845253567382, 'samples': 69472, 'steps': 2170, 'loss/train': 5.592918872833252} +01/22/2022 23:02:12 - INFO - codeparrot_training - Step 2172: {'lr': 0.0004999843427685627, 'samples': 69504, 'steps': 2171, 'loss/train': 6.119709491729736} +01/22/2022 23:02:13 - INFO - codeparrot_training - Step 2173: {'lr': 0.0004999841591095337, 'samples': 69536, 'steps': 2172, 'loss/train': 6.437160491943359} +01/22/2022 23:02:13 - INFO - codeparrot_training - Step 2174: {'lr': 0.0004999839743796519, 'samples': 69568, 'steps': 2173, 'loss/train': 5.220887184143066} +01/22/2022 23:02:14 - INFO - codeparrot_training - Step 2175: {'lr': 0.0004999837885789182, 'samples': 69600, 'steps': 2174, 'loss/train': 8.513277053833008} +01/22/2022 23:02:14 - INFO - codeparrot_training - Step 2176: {'lr': 0.0004999836017073332, 'samples': 69632, 'steps': 2175, 'loss/train': 7.259474754333496} +01/22/2022 23:02:15 - INFO - codeparrot_training - Step 2177: {'lr': 0.000499983413764898, 'samples': 69664, 'steps': 2176, 'loss/train': 6.834036827087402} +01/22/2022 23:02:16 - INFO - codeparrot_training - Step 2178: {'lr': 0.0004999832247516132, 'samples': 69696, 'steps': 2177, 'loss/train': 6.045631408691406} +01/22/2022 23:02:16 - INFO - codeparrot_training - Step 2179: {'lr': 0.0004999830346674796, 'samples': 69728, 'steps': 2178, 'loss/train': 5.314755916595459} +01/22/2022 23:02:17 - INFO - codeparrot_training - Step 2180: {'lr': 0.000499982843512498, 'samples': 69760, 'steps': 2179, 'loss/train': 5.397880554199219} +01/22/2022 23:02:17 - INFO - codeparrot_training - Step 2181: {'lr': 0.0004999826512866693, 'samples': 69792, 'steps': 2180, 'loss/train': 6.140744686126709} +01/22/2022 23:02:18 - INFO - codeparrot_training - Step 2182: {'lr': 0.0004999824579899944, 'samples': 69824, 'steps': 2181, 'loss/train': 5.636348724365234} +01/22/2022 23:02:18 - INFO - codeparrot_training - Step 2183: {'lr': 0.000499982263622474, 'samples': 69856, 'steps': 2182, 'loss/train': 6.28033447265625} +01/22/2022 23:02:19 - INFO - codeparrot_training - Step 2184: {'lr': 0.0004999820681841088, 'samples': 69888, 'steps': 2183, 'loss/train': 6.152684688568115} +01/22/2022 23:02:19 - INFO - codeparrot_training - Step 2185: {'lr': 0.0004999818716748999, 'samples': 69920, 'steps': 2184, 'loss/train': 5.9169745445251465} +01/22/2022 23:02:20 - INFO - codeparrot_training - Step 2186: {'lr': 0.0004999816740948481, 'samples': 69952, 'steps': 2185, 'loss/train': 6.037203788757324} +01/22/2022 23:02:20 - INFO - codeparrot_training - Step 2187: {'lr': 0.0004999814754439542, 'samples': 69984, 'steps': 2186, 'loss/train': 6.530601978302002} +01/22/2022 23:02:21 - INFO - codeparrot_training - Step 2188: {'lr': 0.000499981275722219, 'samples': 70016, 'steps': 2187, 'loss/train': 6.625133991241455} +01/22/2022 23:02:22 - INFO - codeparrot_training - Step 2189: {'lr': 0.0004999810749296434, 'samples': 70048, 'steps': 2188, 'loss/train': 5.739337921142578} +01/22/2022 23:02:23 - INFO - codeparrot_training - Step 2190: {'lr': 0.0004999808730662282, 'samples': 70080, 'steps': 2189, 'loss/train': 5.776611804962158} +01/22/2022 23:02:23 - INFO - codeparrot_training - Step 2191: {'lr': 0.0004999806701319743, 'samples': 70112, 'steps': 2190, 'loss/train': 5.663517951965332} +01/22/2022 23:02:24 - INFO - codeparrot_training - Step 2192: {'lr': 0.0004999804661268827, 'samples': 70144, 'steps': 2191, 'loss/train': 6.017219066619873} +01/22/2022 23:02:24 - INFO - codeparrot_training - Step 2193: {'lr': 0.0004999802610509541, 'samples': 70176, 'steps': 2192, 'loss/train': 6.198954105377197} +01/22/2022 23:02:25 - INFO - codeparrot_training - Step 2194: {'lr': 0.0004999800549041894, 'samples': 70208, 'steps': 2193, 'loss/train': 5.530963897705078} +01/22/2022 23:02:26 - INFO - codeparrot_training - Step 2195: {'lr': 0.0004999798476865895, 'samples': 70240, 'steps': 2194, 'loss/train': 5.713320255279541} +01/22/2022 23:02:26 - INFO - codeparrot_training - Step 2196: {'lr': 0.0004999796393981554, 'samples': 70272, 'steps': 2195, 'loss/train': 5.062238693237305} +01/22/2022 23:02:27 - INFO - codeparrot_training - Step 2197: {'lr': 0.0004999794300388879, 'samples': 70304, 'steps': 2196, 'loss/train': 4.996308326721191} +01/22/2022 23:02:27 - INFO - codeparrot_training - Step 2198: {'lr': 0.0004999792196087879, 'samples': 70336, 'steps': 2197, 'loss/train': 6.8712897300720215} +01/22/2022 23:02:28 - INFO - codeparrot_training - Step 2199: {'lr': 0.0004999790081078562, 'samples': 70368, 'steps': 2198, 'loss/train': 6.344396591186523} +01/22/2022 23:02:29 - INFO - codeparrot_training - Step 2200: {'lr': 0.0004999787955360939, 'samples': 70400, 'steps': 2199, 'loss/train': 6.255932807922363} +01/22/2022 23:02:29 - INFO - codeparrot_training - Step 2201: {'lr': 0.0004999785818935018, 'samples': 70432, 'steps': 2200, 'loss/train': 6.597516059875488} +01/22/2022 23:02:30 - INFO - codeparrot_training - Step 2202: {'lr': 0.0004999783671800808, 'samples': 70464, 'steps': 2201, 'loss/train': 5.948124885559082} +01/22/2022 23:02:30 - INFO - codeparrot_training - Step 2203: {'lr': 0.0004999781513958318, 'samples': 70496, 'steps': 2202, 'loss/train': 5.85418176651001} +01/22/2022 23:02:31 - INFO - codeparrot_training - Step 2204: {'lr': 0.000499977934540756, 'samples': 70528, 'steps': 2203, 'loss/train': 6.110688209533691} +01/22/2022 23:02:31 - INFO - codeparrot_training - Step 2205: {'lr': 0.0004999777166148539, 'samples': 70560, 'steps': 2204, 'loss/train': 6.611911773681641} +01/22/2022 23:02:32 - INFO - codeparrot_training - Step 2206: {'lr': 0.0004999774976181267, 'samples': 70592, 'steps': 2205, 'loss/train': 5.126443386077881} +01/22/2022 23:02:32 - INFO - codeparrot_training - Step 2207: {'lr': 0.0004999772775505753, 'samples': 70624, 'steps': 2206, 'loss/train': 5.8202900886535645} +01/22/2022 23:02:33 - INFO - codeparrot_training - Step 2208: {'lr': 0.0004999770564122005, 'samples': 70656, 'steps': 2207, 'loss/train': 7.178384304046631} +01/22/2022 23:02:34 - INFO - codeparrot_training - Step 2209: {'lr': 0.0004999768342030035, 'samples': 70688, 'steps': 2208, 'loss/train': 3.712338447570801} +01/22/2022 23:02:34 - INFO - codeparrot_training - Step 2210: {'lr': 0.0004999766109229851, 'samples': 70720, 'steps': 2209, 'loss/train': 4.209397792816162} +01/22/2022 23:02:35 - INFO - codeparrot_training - Step 2211: {'lr': 0.0004999763865721463, 'samples': 70752, 'steps': 2210, 'loss/train': 5.531566143035889} +01/22/2022 23:02:35 - INFO - codeparrot_training - Step 2212: {'lr': 0.000499976161150488, 'samples': 70784, 'steps': 2211, 'loss/train': 6.391098499298096} +01/22/2022 23:02:36 - INFO - codeparrot_training - Step 2213: {'lr': 0.0004999759346580111, 'samples': 70816, 'steps': 2212, 'loss/train': 6.206331253051758} +01/22/2022 23:02:36 - INFO - codeparrot_training - Step 2214: {'lr': 0.0004999757070947168, 'samples': 70848, 'steps': 2213, 'loss/train': 5.777990341186523} +01/22/2022 23:02:37 - INFO - codeparrot_training - Step 2215: {'lr': 0.0004999754784606058, 'samples': 70880, 'steps': 2214, 'loss/train': 6.774616718292236} +01/22/2022 23:02:37 - INFO - codeparrot_training - Step 2216: {'lr': 0.0004999752487556794, 'samples': 70912, 'steps': 2215, 'loss/train': 5.979187965393066} +01/22/2022 23:02:38 - INFO - codeparrot_training - Step 2217: {'lr': 0.0004999750179799383, 'samples': 70944, 'steps': 2216, 'loss/train': 5.6175432205200195} +01/22/2022 23:02:39 - INFO - codeparrot_training - Step 2218: {'lr': 0.0004999747861333838, 'samples': 70976, 'steps': 2217, 'loss/train': 5.746852874755859} +01/22/2022 23:02:40 - INFO - codeparrot_training - Step 2219: {'lr': 0.0004999745532160164, 'samples': 71008, 'steps': 2218, 'loss/train': 6.409604549407959} +01/22/2022 23:02:40 - INFO - codeparrot_training - Step 2220: {'lr': 0.0004999743192278377, 'samples': 71040, 'steps': 2219, 'loss/train': 5.912817001342773} +01/22/2022 23:02:41 - INFO - codeparrot_training - Step 2221: {'lr': 0.0004999740841688481, 'samples': 71072, 'steps': 2220, 'loss/train': 6.746061325073242} +01/22/2022 23:02:42 - INFO - codeparrot_training - Step 2222: {'lr': 0.000499973848039049, 'samples': 71104, 'steps': 2221, 'loss/train': 6.688259601593018} +01/22/2022 23:02:42 - INFO - codeparrot_training - Step 2223: {'lr': 0.0004999736108384414, 'samples': 71136, 'steps': 2222, 'loss/train': 5.781885147094727} +01/22/2022 23:02:43 - INFO - codeparrot_training - Step 2224: {'lr': 0.0004999733725670261, 'samples': 71168, 'steps': 2223, 'loss/train': 6.0281572341918945} +01/22/2022 23:02:43 - INFO - codeparrot_training - Step 2225: {'lr': 0.0004999731332248044, 'samples': 71200, 'steps': 2224, 'loss/train': 5.804926872253418} +01/22/2022 23:02:44 - INFO - codeparrot_training - Step 2226: {'lr': 0.0004999728928117771, 'samples': 71232, 'steps': 2225, 'loss/train': 6.535507678985596} +01/22/2022 23:02:44 - INFO - codeparrot_training - Step 2227: {'lr': 0.0004999726513279452, 'samples': 71264, 'steps': 2226, 'loss/train': 5.810279369354248} +01/22/2022 23:02:45 - INFO - codeparrot_training - Step 2228: {'lr': 0.0004999724087733099, 'samples': 71296, 'steps': 2227, 'loss/train': 5.633481025695801} +01/22/2022 23:02:45 - INFO - codeparrot_training - Step 2229: {'lr': 0.0004999721651478723, 'samples': 71328, 'steps': 2228, 'loss/train': 6.37047815322876} +01/22/2022 23:02:46 - INFO - codeparrot_training - Step 2230: {'lr': 0.0004999719204516332, 'samples': 71360, 'steps': 2229, 'loss/train': 6.256646156311035} +01/22/2022 23:02:47 - INFO - codeparrot_training - Step 2231: {'lr': 0.0004999716746845937, 'samples': 71392, 'steps': 2230, 'loss/train': 6.301931858062744} +01/22/2022 23:02:47 - INFO - codeparrot_training - Step 2232: {'lr': 0.0004999714278467551, 'samples': 71424, 'steps': 2231, 'loss/train': 6.059449672698975} +01/22/2022 23:02:48 - INFO - codeparrot_training - Step 2233: {'lr': 0.0004999711799381181, 'samples': 71456, 'steps': 2232, 'loss/train': 5.840454578399658} +01/22/2022 23:02:48 - INFO - codeparrot_training - Step 2234: {'lr': 0.000499970930958684, 'samples': 71488, 'steps': 2233, 'loss/train': 5.499769687652588} +01/22/2022 23:02:49 - INFO - codeparrot_training - Step 2235: {'lr': 0.0004999706809084538, 'samples': 71520, 'steps': 2234, 'loss/train': 6.733277797698975} +01/22/2022 23:02:49 - INFO - codeparrot_training - Step 2236: {'lr': 0.0004999704297874287, 'samples': 71552, 'steps': 2235, 'loss/train': 7.082030296325684} +01/22/2022 23:02:50 - INFO - codeparrot_training - Step 2237: {'lr': 0.0004999701775956095, 'samples': 71584, 'steps': 2236, 'loss/train': 6.668962001800537} +01/22/2022 23:02:50 - INFO - codeparrot_training - Step 2238: {'lr': 0.0004999699243329975, 'samples': 71616, 'steps': 2237, 'loss/train': 6.446104049682617} +01/22/2022 23:02:51 - INFO - codeparrot_training - Step 2239: {'lr': 0.0004999696699995937, 'samples': 71648, 'steps': 2238, 'loss/train': 6.356940746307373} +01/22/2022 23:02:52 - INFO - codeparrot_training - Step 2240: {'lr': 0.0004999694145953992, 'samples': 71680, 'steps': 2239, 'loss/train': 6.308216094970703} +01/22/2022 23:02:52 - INFO - codeparrot_training - Step 2241: {'lr': 0.0004999691581204152, 'samples': 71712, 'steps': 2240, 'loss/train': 6.751335144042969} +01/22/2022 23:02:53 - INFO - codeparrot_training - Step 2242: {'lr': 0.0004999689005746426, 'samples': 71744, 'steps': 2241, 'loss/train': 5.511754512786865} +01/22/2022 23:02:53 - INFO - codeparrot_training - Step 2243: {'lr': 0.0004999686419580827, 'samples': 71776, 'steps': 2242, 'loss/train': 5.014559745788574} +01/22/2022 23:02:54 - INFO - codeparrot_training - Step 2244: {'lr': 0.0004999683822707364, 'samples': 71808, 'steps': 2243, 'loss/train': 6.027141094207764} +01/22/2022 23:02:54 - INFO - codeparrot_training - Step 2245: {'lr': 0.0004999681215126049, 'samples': 71840, 'steps': 2244, 'loss/train': 5.430290222167969} +01/22/2022 23:02:55 - INFO - codeparrot_training - Step 2246: {'lr': 0.0004999678596836894, 'samples': 71872, 'steps': 2245, 'loss/train': 5.867331027984619} +01/22/2022 23:02:55 - INFO - codeparrot_training - Step 2247: {'lr': 0.000499967596783991, 'samples': 71904, 'steps': 2246, 'loss/train': 5.95485782623291} +01/22/2022 23:02:57 - INFO - codeparrot_training - Step 2248: {'lr': 0.0004999673328135107, 'samples': 71936, 'steps': 2247, 'loss/train': 6.0834784507751465} +01/22/2022 23:02:57 - INFO - codeparrot_training - Step 2249: {'lr': 0.0004999670677722498, 'samples': 71968, 'steps': 2248, 'loss/train': 5.605029582977295} +01/22/2022 23:02:58 - INFO - codeparrot_training - Step 2250: {'lr': 0.0004999668016602094, 'samples': 72000, 'steps': 2249, 'loss/train': 6.38045072555542} +01/22/2022 23:02:58 - INFO - codeparrot_training - Step 2251: {'lr': 0.0004999665344773905, 'samples': 72032, 'steps': 2250, 'loss/train': 5.473904132843018} +01/22/2022 23:02:59 - INFO - codeparrot_training - Step 2252: {'lr': 0.0004999662662237943, 'samples': 72064, 'steps': 2251, 'loss/train': 5.7066826820373535} +01/22/2022 23:03:00 - INFO - codeparrot_training - Step 2253: {'lr': 0.0004999659968994221, 'samples': 72096, 'steps': 2252, 'loss/train': 6.718323230743408} +01/22/2022 23:03:00 - INFO - codeparrot_training - Step 2254: {'lr': 0.0004999657265042748, 'samples': 72128, 'steps': 2253, 'loss/train': 6.442641258239746} +01/22/2022 23:03:01 - INFO - codeparrot_training - Step 2255: {'lr': 0.0004999654550383539, 'samples': 72160, 'steps': 2254, 'loss/train': 6.1039533615112305} +01/22/2022 23:03:01 - INFO - codeparrot_training - Step 2256: {'lr': 0.0004999651825016603, 'samples': 72192, 'steps': 2255, 'loss/train': 5.1654558181762695} +01/22/2022 23:03:02 - INFO - codeparrot_training - Step 2257: {'lr': 0.0004999649088941951, 'samples': 72224, 'steps': 2256, 'loss/train': 5.7873454093933105} +01/22/2022 23:03:02 - INFO - codeparrot_training - Step 2258: {'lr': 0.0004999646342159597, 'samples': 72256, 'steps': 2257, 'loss/train': 6.559278964996338} +01/22/2022 23:03:03 - INFO - codeparrot_training - Step 2259: {'lr': 0.0004999643584669552, 'samples': 72288, 'steps': 2258, 'loss/train': 6.017865180969238} +01/22/2022 23:03:03 - INFO - codeparrot_training - Step 2260: {'lr': 0.0004999640816471827, 'samples': 72320, 'steps': 2259, 'loss/train': 6.141698360443115} +01/22/2022 23:03:04 - INFO - codeparrot_training - Step 2261: {'lr': 0.0004999638037566436, 'samples': 72352, 'steps': 2260, 'loss/train': 6.394923210144043} +01/22/2022 23:03:05 - INFO - codeparrot_training - Step 2262: {'lr': 0.0004999635247953387, 'samples': 72384, 'steps': 2261, 'loss/train': 5.760805606842041} +01/22/2022 23:03:05 - INFO - codeparrot_training - Step 2263: {'lr': 0.0004999632447632696, 'samples': 72416, 'steps': 2262, 'loss/train': 6.747614860534668} +01/22/2022 23:03:06 - INFO - codeparrot_training - Step 2264: {'lr': 0.0004999629636604372, 'samples': 72448, 'steps': 2263, 'loss/train': 5.472726345062256} +01/22/2022 23:03:06 - INFO - codeparrot_training - Step 2265: {'lr': 0.0004999626814868429, 'samples': 72480, 'steps': 2264, 'loss/train': 6.080602169036865} +01/22/2022 23:03:07 - INFO - codeparrot_training - Step 2266: {'lr': 0.0004999623982424879, 'samples': 72512, 'steps': 2265, 'loss/train': 5.31564474105835} +01/22/2022 23:03:07 - INFO - codeparrot_training - Step 2267: {'lr': 0.0004999621139273733, 'samples': 72544, 'steps': 2266, 'loss/train': 4.617450714111328} +01/22/2022 23:03:08 - INFO - codeparrot_training - Step 2268: {'lr': 0.0004999618285415004, 'samples': 72576, 'steps': 2267, 'loss/train': 5.9042558670043945} +01/22/2022 23:03:08 - INFO - codeparrot_training - Step 2269: {'lr': 0.0004999615420848704, 'samples': 72608, 'steps': 2268, 'loss/train': 6.805499076843262} +01/22/2022 23:03:09 - INFO - codeparrot_training - Step 2270: {'lr': 0.0004999612545574845, 'samples': 72640, 'steps': 2269, 'loss/train': 6.463893890380859} +01/22/2022 23:03:10 - INFO - codeparrot_training - Step 2271: {'lr': 0.000499960965959344, 'samples': 72672, 'steps': 2270, 'loss/train': 5.307524681091309} +01/22/2022 23:03:10 - INFO - codeparrot_training - Step 2272: {'lr': 0.0004999606762904501, 'samples': 72704, 'steps': 2271, 'loss/train': 5.494930744171143} +01/22/2022 23:03:11 - INFO - codeparrot_training - Step 2273: {'lr': 0.000499960385550804, 'samples': 72736, 'steps': 2272, 'loss/train': 6.342989921569824} +01/22/2022 23:03:11 - INFO - codeparrot_training - Step 2274: {'lr': 0.000499960093740407, 'samples': 72768, 'steps': 2273, 'loss/train': 4.93414306640625} +01/22/2022 23:03:12 - INFO - codeparrot_training - Step 2275: {'lr': 0.0004999598008592603, 'samples': 72800, 'steps': 2274, 'loss/train': 5.9724578857421875} +01/22/2022 23:03:12 - INFO - codeparrot_training - Step 2276: {'lr': 0.0004999595069073653, 'samples': 72832, 'steps': 2275, 'loss/train': 6.657103538513184} +01/22/2022 23:03:14 - INFO - codeparrot_training - Step 2277: {'lr': 0.0004999592118847229, 'samples': 72864, 'steps': 2276, 'loss/train': 5.977766513824463} +01/22/2022 23:03:14 - INFO - codeparrot_training - Step 2278: {'lr': 0.0004999589157913348, 'samples': 72896, 'steps': 2277, 'loss/train': 5.901230335235596} +01/22/2022 23:03:15 - INFO - codeparrot_training - Step 2279: {'lr': 0.0004999586186272021, 'samples': 72928, 'steps': 2278, 'loss/train': 6.5948004722595215} +01/22/2022 23:03:15 - INFO - codeparrot_training - Step 2280: {'lr': 0.000499958320392326, 'samples': 72960, 'steps': 2279, 'loss/train': 6.457819938659668} +01/22/2022 23:03:16 - INFO - codeparrot_training - Step 2281: {'lr': 0.0004999580210867077, 'samples': 72992, 'steps': 2280, 'loss/train': 6.47916841506958} +01/22/2022 23:03:16 - INFO - codeparrot_training - Step 2282: {'lr': 0.0004999577207103487, 'samples': 73024, 'steps': 2281, 'loss/train': 5.826916217803955} +01/22/2022 23:03:17 - INFO - codeparrot_training - Step 2283: {'lr': 0.0004999574192632502, 'samples': 73056, 'steps': 2282, 'loss/train': 6.284051418304443} +01/22/2022 23:03:17 - INFO - codeparrot_training - Step 2284: {'lr': 0.0004999571167454135, 'samples': 73088, 'steps': 2283, 'loss/train': 6.177947998046875} +01/22/2022 23:03:18 - INFO - codeparrot_training - Step 2285: {'lr': 0.0004999568131568399, 'samples': 73120, 'steps': 2284, 'loss/train': 5.548189640045166} +01/22/2022 23:03:19 - INFO - codeparrot_training - Step 2286: {'lr': 0.0004999565084975306, 'samples': 73152, 'steps': 2285, 'loss/train': 7.538168907165527} +01/22/2022 23:03:19 - INFO - codeparrot_training - Step 2287: {'lr': 0.0004999562027674871, 'samples': 73184, 'steps': 2286, 'loss/train': 6.671011447906494} +01/22/2022 23:03:20 - INFO - codeparrot_training - Step 2288: {'lr': 0.0004999558959667105, 'samples': 73216, 'steps': 2287, 'loss/train': 4.90957498550415} +01/22/2022 23:03:20 - INFO - codeparrot_training - Step 2289: {'lr': 0.0004999555880952023, 'samples': 73248, 'steps': 2288, 'loss/train': 5.54325008392334} +01/22/2022 23:03:21 - INFO - codeparrot_training - Step 2290: {'lr': 0.0004999552791529637, 'samples': 73280, 'steps': 2289, 'loss/train': 5.383858680725098} +01/22/2022 23:03:21 - INFO - codeparrot_training - Step 2291: {'lr': 0.000499954969139996, 'samples': 73312, 'steps': 2290, 'loss/train': 6.149674415588379} +01/22/2022 23:03:22 - INFO - codeparrot_training - Step 2292: {'lr': 0.0004999546580563006, 'samples': 73344, 'steps': 2291, 'loss/train': 5.427423000335693} +01/22/2022 23:03:22 - INFO - codeparrot_training - Step 2293: {'lr': 0.0004999543459018788, 'samples': 73376, 'steps': 2292, 'loss/train': 4.548494338989258} +01/22/2022 23:03:23 - INFO - codeparrot_training - Step 2294: {'lr': 0.000499954032676732, 'samples': 73408, 'steps': 2293, 'loss/train': 5.654486179351807} +01/22/2022 23:03:24 - INFO - codeparrot_training - Step 2295: {'lr': 0.0004999537183808614, 'samples': 73440, 'steps': 2294, 'loss/train': 6.007665157318115} +01/22/2022 23:03:24 - INFO - codeparrot_training - Step 2296: {'lr': 0.0004999534030142686, 'samples': 73472, 'steps': 2295, 'loss/train': 6.077384948730469} +01/22/2022 23:03:25 - INFO - codeparrot_training - Step 2297: {'lr': 0.0004999530865769547, 'samples': 73504, 'steps': 2296, 'loss/train': 5.015054702758789} +01/22/2022 23:03:25 - INFO - codeparrot_training - Step 2298: {'lr': 0.0004999527690689212, 'samples': 73536, 'steps': 2297, 'loss/train': 4.250180721282959} +01/22/2022 23:03:26 - INFO - codeparrot_training - Step 2299: {'lr': 0.0004999524504901694, 'samples': 73568, 'steps': 2298, 'loss/train': 5.991507530212402} +01/22/2022 23:03:26 - INFO - codeparrot_training - Step 2300: {'lr': 0.0004999521308407006, 'samples': 73600, 'steps': 2299, 'loss/train': 5.732363224029541} +01/22/2022 23:03:27 - INFO - codeparrot_training - Step 2301: {'lr': 0.0004999518101205162, 'samples': 73632, 'steps': 2300, 'loss/train': 6.1274566650390625} +01/22/2022 23:03:27 - INFO - codeparrot_training - Step 2302: {'lr': 0.0004999514883296176, 'samples': 73664, 'steps': 2301, 'loss/train': 6.342630863189697} +01/22/2022 23:03:28 - INFO - codeparrot_training - Step 2303: {'lr': 0.0004999511654680064, 'samples': 73696, 'steps': 2302, 'loss/train': 6.222777843475342} +01/22/2022 23:03:29 - INFO - codeparrot_training - Step 2304: {'lr': 0.0004999508415356836, 'samples': 73728, 'steps': 2303, 'loss/train': 4.719554901123047} +01/22/2022 23:03:29 - INFO - codeparrot_training - Step 2305: {'lr': 0.0004999505165326509, 'samples': 73760, 'steps': 2304, 'loss/train': 6.334107875823975} +01/22/2022 23:03:32 - INFO - codeparrot_training - Step 2306: {'lr': 0.0004999501904589095, 'samples': 73792, 'steps': 2305, 'loss/train': 5.682618618011475} +01/22/2022 23:03:33 - INFO - codeparrot_training - Step 2307: {'lr': 0.0004999498633144608, 'samples': 73824, 'steps': 2306, 'loss/train': 6.9068098068237305} +01/22/2022 23:03:33 - INFO - codeparrot_training - Step 2308: {'lr': 0.0004999495350993062, 'samples': 73856, 'steps': 2307, 'loss/train': 6.081098556518555} +01/22/2022 23:03:34 - INFO - codeparrot_training - Step 2309: {'lr': 0.0004999492058134473, 'samples': 73888, 'steps': 2308, 'loss/train': 4.82407808303833} +01/22/2022 23:03:35 - INFO - codeparrot_training - Step 2310: {'lr': 0.0004999488754568853, 'samples': 73920, 'steps': 2309, 'loss/train': 5.932939052581787} +01/22/2022 23:03:35 - INFO - codeparrot_training - Step 2311: {'lr': 0.0004999485440296216, 'samples': 73952, 'steps': 2310, 'loss/train': 4.795346736907959} +01/22/2022 23:03:36 - INFO - codeparrot_training - Step 2312: {'lr': 0.0004999482115316579, 'samples': 73984, 'steps': 2311, 'loss/train': 4.639101982116699} +01/22/2022 23:03:36 - INFO - codeparrot_training - Step 2313: {'lr': 0.0004999478779629953, 'samples': 74016, 'steps': 2312, 'loss/train': 4.566995143890381} +01/22/2022 23:03:37 - INFO - codeparrot_training - Step 2314: {'lr': 0.0004999475433236354, 'samples': 74048, 'steps': 2313, 'loss/train': 4.113242149353027} +01/22/2022 23:03:37 - INFO - codeparrot_training - Step 2315: {'lr': 0.0004999472076135796, 'samples': 74080, 'steps': 2314, 'loss/train': 8.155808448791504} +01/22/2022 23:03:38 - INFO - codeparrot_training - Step 2316: {'lr': 0.0004999468708328293, 'samples': 74112, 'steps': 2315, 'loss/train': 5.7931623458862305} +01/22/2022 23:03:38 - INFO - codeparrot_training - Step 2317: {'lr': 0.0004999465329813859, 'samples': 74144, 'steps': 2316, 'loss/train': 6.1185407638549805} +01/22/2022 23:03:39 - INFO - codeparrot_training - Step 2318: {'lr': 0.000499946194059251, 'samples': 74176, 'steps': 2317, 'loss/train': 6.6525797843933105} +01/22/2022 23:03:40 - INFO - codeparrot_training - Step 2319: {'lr': 0.000499945854066426, 'samples': 74208, 'steps': 2318, 'loss/train': 6.121242523193359} +01/22/2022 23:03:40 - INFO - codeparrot_training - Step 2320: {'lr': 0.0004999455130029123, 'samples': 74240, 'steps': 2319, 'loss/train': 5.852152347564697} +01/22/2022 23:03:41 - INFO - codeparrot_training - Step 2321: {'lr': 0.0004999451708687113, 'samples': 74272, 'steps': 2320, 'loss/train': 6.615255355834961} +01/22/2022 23:03:41 - INFO - codeparrot_training - Step 2322: {'lr': 0.0004999448276638247, 'samples': 74304, 'steps': 2321, 'loss/train': 6.832551956176758} +01/22/2022 23:03:42 - INFO - codeparrot_training - Step 2323: {'lr': 0.0004999444833882538, 'samples': 74336, 'steps': 2322, 'loss/train': 6.39352560043335} +01/22/2022 23:03:42 - INFO - codeparrot_training - Step 2324: {'lr': 0.000499944138042, 'samples': 74368, 'steps': 2323, 'loss/train': 4.699119567871094} +01/22/2022 23:03:43 - INFO - codeparrot_training - Step 2325: {'lr': 0.000499943791625065, 'samples': 74400, 'steps': 2324, 'loss/train': 6.0907487869262695} +01/22/2022 23:03:44 - INFO - codeparrot_training - Step 2326: {'lr': 0.0004999434441374501, 'samples': 74432, 'steps': 2325, 'loss/train': 6.101710796356201} +01/22/2022 23:03:44 - INFO - codeparrot_training - Step 2327: {'lr': 0.0004999430955791569, 'samples': 74464, 'steps': 2326, 'loss/train': 7.171142101287842} +01/22/2022 23:03:45 - INFO - codeparrot_training - Step 2328: {'lr': 0.0004999427459501868, 'samples': 74496, 'steps': 2327, 'loss/train': 6.470152854919434} +01/22/2022 23:03:45 - INFO - codeparrot_training - Step 2329: {'lr': 0.0004999423952505414, 'samples': 74528, 'steps': 2328, 'loss/train': 5.24013614654541} +01/22/2022 23:03:46 - INFO - codeparrot_training - Step 2330: {'lr': 0.000499942043480222, 'samples': 74560, 'steps': 2329, 'loss/train': 6.27314567565918} +01/22/2022 23:03:46 - INFO - codeparrot_training - Step 2331: {'lr': 0.0004999416906392303, 'samples': 74592, 'steps': 2330, 'loss/train': 5.664548397064209} +01/22/2022 23:03:47 - INFO - codeparrot_training - Step 2332: {'lr': 0.0004999413367275678, 'samples': 74624, 'steps': 2331, 'loss/train': 6.130919933319092} +01/22/2022 23:03:47 - INFO - codeparrot_training - Step 2333: {'lr': 0.000499940981745236, 'samples': 74656, 'steps': 2332, 'loss/train': 6.144593715667725} +01/22/2022 23:03:48 - INFO - codeparrot_training - Step 2334: {'lr': 0.0004999406256922365, 'samples': 74688, 'steps': 2333, 'loss/train': 6.4442009925842285} +01/22/2022 23:03:49 - INFO - codeparrot_training - Step 2335: {'lr': 0.0004999402685685705, 'samples': 74720, 'steps': 2334, 'loss/train': 5.359102725982666} +01/22/2022 23:03:49 - INFO - codeparrot_training - Step 2336: {'lr': 0.0004999399103742399, 'samples': 74752, 'steps': 2335, 'loss/train': 6.131755352020264} +01/22/2022 23:03:50 - INFO - codeparrot_training - Step 2337: {'lr': 0.000499939551109246, 'samples': 74784, 'steps': 2336, 'loss/train': 6.191539287567139} +01/22/2022 23:03:50 - INFO - codeparrot_training - Step 2338: {'lr': 0.0004999391907735905, 'samples': 74816, 'steps': 2337, 'loss/train': 4.787024021148682} +01/22/2022 23:03:52 - INFO - codeparrot_training - Step 2339: {'lr': 0.0004999388293672748, 'samples': 74848, 'steps': 2338, 'loss/train': 5.841518402099609} +01/22/2022 23:03:53 - INFO - codeparrot_training - Step 2340: {'lr': 0.0004999384668903006, 'samples': 74880, 'steps': 2339, 'loss/train': 6.237504482269287} +01/22/2022 23:03:53 - INFO - codeparrot_training - Step 2341: {'lr': 0.0004999381033426693, 'samples': 74912, 'steps': 2340, 'loss/train': 5.715233325958252} +01/22/2022 23:03:54 - INFO - codeparrot_training - Step 2342: {'lr': 0.0004999377387243827, 'samples': 74944, 'steps': 2341, 'loss/train': 5.481383323669434} +01/22/2022 23:03:55 - INFO - codeparrot_training - Step 2343: {'lr': 0.0004999373730354419, 'samples': 74976, 'steps': 2342, 'loss/train': 5.866429328918457} +01/22/2022 23:03:55 - INFO - codeparrot_training - Step 2344: {'lr': 0.0004999370062758491, 'samples': 75008, 'steps': 2343, 'loss/train': 6.527946949005127} +01/22/2022 23:03:56 - INFO - codeparrot_training - Step 2345: {'lr': 0.0004999366384456052, 'samples': 75040, 'steps': 2344, 'loss/train': 6.131675720214844} +01/22/2022 23:03:56 - INFO - codeparrot_training - Step 2346: {'lr': 0.0004999362695447123, 'samples': 75072, 'steps': 2345, 'loss/train': 4.746463298797607} +01/22/2022 23:03:57 - INFO - codeparrot_training - Step 2347: {'lr': 0.0004999358995731718, 'samples': 75104, 'steps': 2346, 'loss/train': 6.793693542480469} +01/22/2022 23:03:57 - INFO - codeparrot_training - Step 2348: {'lr': 0.0004999355285309851, 'samples': 75136, 'steps': 2347, 'loss/train': 5.706360340118408} +01/22/2022 23:03:58 - INFO - codeparrot_training - Step 2349: {'lr': 0.0004999351564181541, 'samples': 75168, 'steps': 2348, 'loss/train': 5.666876316070557} +01/22/2022 23:03:58 - INFO - codeparrot_training - Step 2350: {'lr': 0.0004999347832346802, 'samples': 75200, 'steps': 2349, 'loss/train': 6.172057151794434} +01/22/2022 23:03:59 - INFO - codeparrot_training - Step 2351: {'lr': 0.0004999344089805651, 'samples': 75232, 'steps': 2350, 'loss/train': 6.292308807373047} +01/22/2022 23:04:00 - INFO - codeparrot_training - Step 2352: {'lr': 0.0004999340336558104, 'samples': 75264, 'steps': 2351, 'loss/train': 5.169074058532715} +01/22/2022 23:04:00 - INFO - codeparrot_training - Step 2353: {'lr': 0.0004999336572604175, 'samples': 75296, 'steps': 2352, 'loss/train': 6.283961296081543} +01/22/2022 23:04:01 - INFO - codeparrot_training - Step 2354: {'lr': 0.0004999332797943883, 'samples': 75328, 'steps': 2353, 'loss/train': 5.687952041625977} +01/22/2022 23:04:01 - INFO - codeparrot_training - Step 2355: {'lr': 0.0004999329012577243, 'samples': 75360, 'steps': 2354, 'loss/train': 7.556490898132324} +01/22/2022 23:04:02 - INFO - codeparrot_training - Step 2356: {'lr': 0.000499932521650427, 'samples': 75392, 'steps': 2355, 'loss/train': 6.182750225067139} +01/22/2022 23:04:02 - INFO - codeparrot_training - Step 2357: {'lr': 0.0004999321409724982, 'samples': 75424, 'steps': 2356, 'loss/train': 6.378958702087402} +01/22/2022 23:04:03 - INFO - codeparrot_training - Step 2358: {'lr': 0.0004999317592239395, 'samples': 75456, 'steps': 2357, 'loss/train': 6.239578723907471} +01/22/2022 23:04:03 - INFO - codeparrot_training - Step 2359: {'lr': 0.0004999313764047525, 'samples': 75488, 'steps': 2358, 'loss/train': 4.896305561065674} +01/22/2022 23:04:04 - INFO - codeparrot_training - Step 2360: {'lr': 0.0004999309925149388, 'samples': 75520, 'steps': 2359, 'loss/train': 4.86107873916626} +01/22/2022 23:04:05 - INFO - codeparrot_training - Step 2361: {'lr': 0.0004999306075545002, 'samples': 75552, 'steps': 2360, 'loss/train': 4.439815521240234} +01/22/2022 23:04:05 - INFO - codeparrot_training - Step 2362: {'lr': 0.0004999302215234381, 'samples': 75584, 'steps': 2361, 'loss/train': 4.561789512634277} +01/22/2022 23:04:06 - INFO - codeparrot_training - Step 2363: {'lr': 0.0004999298344217543, 'samples': 75616, 'steps': 2362, 'loss/train': 4.166752338409424} +01/22/2022 23:04:06 - INFO - codeparrot_training - Step 2364: {'lr': 0.0004999294462494506, 'samples': 75648, 'steps': 2363, 'loss/train': 4.032501697540283} +01/22/2022 23:04:07 - INFO - codeparrot_training - Step 2365: {'lr': 0.0004999290570065284, 'samples': 75680, 'steps': 2364, 'loss/train': 4.041882038116455} +01/22/2022 23:04:07 - INFO - codeparrot_training - Step 2366: {'lr': 0.0004999286666929895, 'samples': 75712, 'steps': 2365, 'loss/train': 4.052896976470947} +01/22/2022 23:04:08 - INFO - codeparrot_training - Step 2367: {'lr': 0.0004999282753088356, 'samples': 75744, 'steps': 2366, 'loss/train': 4.004087448120117} +01/22/2022 23:04:08 - INFO - codeparrot_training - Step 2368: {'lr': 0.0004999278828540682, 'samples': 75776, 'steps': 2367, 'loss/train': 3.9099459648132324} +01/22/2022 23:04:09 - INFO - codeparrot_training - Step 2369: {'lr': 0.0004999274893286893, 'samples': 75808, 'steps': 2368, 'loss/train': 3.874690055847168} +01/22/2022 23:04:10 - INFO - codeparrot_training - Step 2370: {'lr': 0.0004999270947327003, 'samples': 75840, 'steps': 2369, 'loss/train': 3.898728609085083} +01/22/2022 23:04:10 - INFO - codeparrot_training - Step 2371: {'lr': 0.0004999266990661029, 'samples': 75872, 'steps': 2370, 'loss/train': 3.8904476165771484} +01/22/2022 23:04:11 - INFO - codeparrot_training - Step 2372: {'lr': 0.0004999263023288989, 'samples': 75904, 'steps': 2371, 'loss/train': 3.910867691040039} +01/22/2022 23:04:11 - INFO - codeparrot_training - Step 2373: {'lr': 0.0004999259045210901, 'samples': 75936, 'steps': 2372, 'loss/train': 3.967233180999756} +01/22/2022 23:04:12 - INFO - codeparrot_training - Step 2374: {'lr': 0.000499925505642678, 'samples': 75968, 'steps': 2373, 'loss/train': 3.8130006790161133} +01/22/2022 23:04:12 - INFO - codeparrot_training - Step 2375: {'lr': 0.0004999251056936645, 'samples': 76000, 'steps': 2374, 'loss/train': 3.8855743408203125} +01/22/2022 23:04:13 - INFO - codeparrot_training - Step 2376: {'lr': 0.000499924704674051, 'samples': 76032, 'steps': 2375, 'loss/train': 3.8432374000549316} +01/22/2022 23:04:13 - INFO - codeparrot_training - Step 2377: {'lr': 0.0004999243025838396, 'samples': 76064, 'steps': 2376, 'loss/train': 3.8273446559906006} +01/22/2022 23:04:14 - INFO - codeparrot_training - Step 2378: {'lr': 0.0004999238994230318, 'samples': 76096, 'steps': 2377, 'loss/train': 10.813787460327148} +01/22/2022 23:04:15 - INFO - codeparrot_training - Step 2379: {'lr': 0.0004999234951916293, 'samples': 76128, 'steps': 2378, 'loss/train': 8.179521560668945} +01/22/2022 23:04:15 - INFO - codeparrot_training - Step 2380: {'lr': 0.0004999230898896341, 'samples': 76160, 'steps': 2379, 'loss/train': 7.398268222808838} +01/22/2022 23:04:16 - INFO - codeparrot_training - Step 2381: {'lr': 0.0004999226835170476, 'samples': 76192, 'steps': 2380, 'loss/train': 6.871077060699463} +01/22/2022 23:04:16 - INFO - codeparrot_training - Step 2382: {'lr': 0.0004999222760738717, 'samples': 76224, 'steps': 2381, 'loss/train': 6.635439872741699} +01/22/2022 23:04:17 - INFO - codeparrot_training - Step 2383: {'lr': 0.0004999218675601081, 'samples': 76256, 'steps': 2382, 'loss/train': 6.7965474128723145} +01/22/2022 23:04:18 - INFO - codeparrot_training - Step 2384: {'lr': 0.0004999214579757586, 'samples': 76288, 'steps': 2383, 'loss/train': 6.357760906219482} +01/22/2022 23:04:19 - INFO - codeparrot_training - Step 2385: {'lr': 0.000499921047320825, 'samples': 76320, 'steps': 2384, 'loss/train': 6.075595378875732} +01/22/2022 23:04:19 - INFO - codeparrot_training - Step 2386: {'lr': 0.000499920635595309, 'samples': 76352, 'steps': 2385, 'loss/train': 6.359742164611816} +01/22/2022 23:04:20 - INFO - codeparrot_training - Step 2387: {'lr': 0.0004999202227992122, 'samples': 76384, 'steps': 2386, 'loss/train': 5.486861705780029} +01/22/2022 23:04:20 - INFO - codeparrot_training - Step 2388: {'lr': 0.0004999198089325367, 'samples': 76416, 'steps': 2387, 'loss/train': 6.365532398223877} +01/22/2022 23:04:21 - INFO - codeparrot_training - Step 2389: {'lr': 0.0004999193939952839, 'samples': 76448, 'steps': 2388, 'loss/train': 6.146604537963867} +01/22/2022 23:04:21 - INFO - codeparrot_training - Step 2390: {'lr': 0.000499918977987456, 'samples': 76480, 'steps': 2389, 'loss/train': 6.854086875915527} +01/22/2022 23:04:22 - INFO - codeparrot_training - Step 2391: {'lr': 0.0004999185609090544, 'samples': 76512, 'steps': 2390, 'loss/train': 6.388641357421875} +01/22/2022 23:04:22 - INFO - codeparrot_training - Step 2392: {'lr': 0.0004999181427600811, 'samples': 76544, 'steps': 2391, 'loss/train': 6.753787040710449} +01/22/2022 23:04:23 - INFO - codeparrot_training - Step 2393: {'lr': 0.0004999177235405378, 'samples': 76576, 'steps': 2392, 'loss/train': 6.769316673278809} +01/22/2022 23:04:24 - INFO - codeparrot_training - Step 2394: {'lr': 0.0004999173032504264, 'samples': 76608, 'steps': 2393, 'loss/train': 6.412133693695068} +01/22/2022 23:04:24 - INFO - codeparrot_training - Step 2395: {'lr': 0.0004999168818897486, 'samples': 76640, 'steps': 2394, 'loss/train': 6.666980743408203} +01/22/2022 23:04:25 - INFO - codeparrot_training - Step 2396: {'lr': 0.0004999164594585062, 'samples': 76672, 'steps': 2395, 'loss/train': 4.831477165222168} +01/22/2022 23:04:25 - INFO - codeparrot_training - Step 2397: {'lr': 0.0004999160359567011, 'samples': 76704, 'steps': 2396, 'loss/train': 6.568365573883057} +01/22/2022 23:04:26 - INFO - codeparrot_training - Step 2398: {'lr': 0.000499915611384335, 'samples': 76736, 'steps': 2397, 'loss/train': 6.036619663238525} +01/22/2022 23:04:26 - INFO - codeparrot_training - Step 2399: {'lr': 0.0004999151857414099, 'samples': 76768, 'steps': 2398, 'loss/train': 5.817127227783203} +01/22/2022 23:04:27 - INFO - codeparrot_training - Step 2400: {'lr': 0.0004999147590279273, 'samples': 76800, 'steps': 2399, 'loss/train': 5.787895202636719} +01/22/2022 23:04:27 - INFO - codeparrot_training - Step 2401: {'lr': 0.0004999143312438893, 'samples': 76832, 'steps': 2400, 'loss/train': 5.831036567687988} +01/22/2022 23:04:28 - INFO - codeparrot_training - Step 2402: {'lr': 0.0004999139023892978, 'samples': 76864, 'steps': 2401, 'loss/train': 5.366390705108643} +01/22/2022 23:04:29 - INFO - codeparrot_training - Step 2403: {'lr': 0.0004999134724641543, 'samples': 76896, 'steps': 2402, 'loss/train': 6.160436630249023} +01/22/2022 23:04:29 - INFO - codeparrot_training - Step 2404: {'lr': 0.000499913041468461, 'samples': 76928, 'steps': 2403, 'loss/train': 4.986739635467529} +01/22/2022 23:04:30 - INFO - codeparrot_training - Step 2405: {'lr': 0.0004999126094022195, 'samples': 76960, 'steps': 2404, 'loss/train': 6.196484565734863} +01/22/2022 23:04:30 - INFO - codeparrot_training - Step 2406: {'lr': 0.0004999121762654318, 'samples': 76992, 'steps': 2405, 'loss/train': 7.403865337371826} +01/22/2022 23:04:31 - INFO - codeparrot_training - Step 2407: {'lr': 0.0004999117420580996, 'samples': 77024, 'steps': 2406, 'loss/train': 5.693836688995361} +01/22/2022 23:04:31 - INFO - codeparrot_training - Step 2408: {'lr': 0.0004999113067802249, 'samples': 77056, 'steps': 2407, 'loss/train': 5.930436134338379} +01/22/2022 23:04:32 - INFO - codeparrot_training - Step 2409: {'lr': 0.0004999108704318095, 'samples': 77088, 'steps': 2408, 'loss/train': 6.334992408752441} +01/22/2022 23:04:32 - INFO - codeparrot_training - Step 2410: {'lr': 0.0004999104330128553, 'samples': 77120, 'steps': 2409, 'loss/train': 5.927346706390381} +01/22/2022 23:04:33 - INFO - codeparrot_training - Step 2411: {'lr': 0.0004999099945233641, 'samples': 77152, 'steps': 2410, 'loss/train': 5.374996662139893} +01/22/2022 23:04:34 - INFO - codeparrot_training - Step 2412: {'lr': 0.000499909554963338, 'samples': 77184, 'steps': 2411, 'loss/train': 6.411588191986084} +01/22/2022 23:04:35 - INFO - codeparrot_training - Step 2413: {'lr': 0.0004999091143327786, 'samples': 77216, 'steps': 2412, 'loss/train': 6.587262153625488} +01/22/2022 23:04:35 - INFO - codeparrot_training - Step 2414: {'lr': 0.000499908672631688, 'samples': 77248, 'steps': 2413, 'loss/train': 6.175594329833984} +01/22/2022 23:04:36 - INFO - codeparrot_training - Step 2415: {'lr': 0.0004999082298600679, 'samples': 77280, 'steps': 2414, 'loss/train': 5.549478054046631} +01/22/2022 23:04:36 - INFO - codeparrot_training - Step 2416: {'lr': 0.0004999077860179204, 'samples': 77312, 'steps': 2415, 'loss/train': 7.056313514709473} +01/22/2022 23:04:37 - INFO - codeparrot_training - Step 2417: {'lr': 0.0004999073411052472, 'samples': 77344, 'steps': 2416, 'loss/train': 4.66088342666626} +01/22/2022 23:04:38 - INFO - codeparrot_training - Step 2418: {'lr': 0.0004999068951220503, 'samples': 77376, 'steps': 2417, 'loss/train': 5.543019771575928} +01/22/2022 23:04:38 - INFO - codeparrot_training - Step 2419: {'lr': 0.0004999064480683317, 'samples': 77408, 'steps': 2418, 'loss/train': 4.615750789642334} +01/22/2022 23:04:39 - INFO - codeparrot_training - Step 2420: {'lr': 0.0004999059999440932, 'samples': 77440, 'steps': 2419, 'loss/train': 5.7760844230651855} +01/22/2022 23:04:39 - INFO - codeparrot_training - Step 2421: {'lr': 0.0004999055507493368, 'samples': 77472, 'steps': 2420, 'loss/train': 6.644707679748535} +01/22/2022 23:04:40 - INFO - codeparrot_training - Step 2422: {'lr': 0.0004999051004840642, 'samples': 77504, 'steps': 2421, 'loss/train': 5.597184181213379} +01/22/2022 23:04:40 - INFO - codeparrot_training - Step 2423: {'lr': 0.0004999046491482777, 'samples': 77536, 'steps': 2422, 'loss/train': 6.595596790313721} +01/22/2022 23:04:41 - INFO - codeparrot_training - Step 2424: {'lr': 0.000499904196741979, 'samples': 77568, 'steps': 2423, 'loss/train': 5.756009101867676} +01/22/2022 23:04:41 - INFO - codeparrot_training - Step 2425: {'lr': 0.00049990374326517, 'samples': 77600, 'steps': 2424, 'loss/train': 6.318925380706787} +01/22/2022 23:04:42 - INFO - codeparrot_training - Step 2426: {'lr': 0.0004999032887178527, 'samples': 77632, 'steps': 2425, 'loss/train': 5.218374729156494} +01/22/2022 23:04:43 - INFO - codeparrot_training - Step 2427: {'lr': 0.000499902833100029, 'samples': 77664, 'steps': 2426, 'loss/train': 5.223934173583984} +01/22/2022 23:04:43 - INFO - codeparrot_training - Step 2428: {'lr': 0.0004999023764117011, 'samples': 77696, 'steps': 2427, 'loss/train': 6.420833587646484} +01/22/2022 23:04:44 - INFO - codeparrot_training - Step 2429: {'lr': 0.0004999019186528708, 'samples': 77728, 'steps': 2428, 'loss/train': 6.067582607269287} +01/22/2022 23:04:44 - INFO - codeparrot_training - Step 2430: {'lr': 0.0004999014598235399, 'samples': 77760, 'steps': 2429, 'loss/train': 5.869591236114502} +01/22/2022 23:04:45 - INFO - codeparrot_training - Step 2431: {'lr': 0.0004999009999237105, 'samples': 77792, 'steps': 2430, 'loss/train': 6.309983730316162} +01/22/2022 23:04:45 - INFO - codeparrot_training - Step 2432: {'lr': 0.0004999005389533846, 'samples': 77824, 'steps': 2431, 'loss/train': 5.771934986114502} +01/22/2022 23:04:46 - INFO - codeparrot_training - Step 2433: {'lr': 0.0004999000769125642, 'samples': 77856, 'steps': 2432, 'loss/train': 5.918872356414795} +01/22/2022 23:04:46 - INFO - codeparrot_training - Step 2434: {'lr': 0.0004998996138012512, 'samples': 77888, 'steps': 2433, 'loss/train': 6.4619364738464355} +01/22/2022 23:04:47 - INFO - codeparrot_training - Step 2435: {'lr': 0.0004998991496194475, 'samples': 77920, 'steps': 2434, 'loss/train': 5.366245746612549} +01/22/2022 23:04:48 - INFO - codeparrot_training - Step 2436: {'lr': 0.0004998986843671552, 'samples': 77952, 'steps': 2435, 'loss/train': 5.168727874755859} +01/22/2022 23:04:48 - INFO - codeparrot_training - Step 2437: {'lr': 0.0004998982180443764, 'samples': 77984, 'steps': 2436, 'loss/train': 5.724026679992676} +01/22/2022 23:04:49 - INFO - codeparrot_training - Step 2438: {'lr': 0.000499897750651113, 'samples': 78016, 'steps': 2437, 'loss/train': 5.183835506439209} +01/22/2022 23:04:49 - INFO - codeparrot_training - Step 2439: {'lr': 0.0004998972821873668, 'samples': 78048, 'steps': 2438, 'loss/train': 5.787855625152588} +01/22/2022 23:04:50 - INFO - codeparrot_training - Step 2440: {'lr': 0.0004998968126531402, 'samples': 78080, 'steps': 2439, 'loss/train': 5.868952751159668} +01/22/2022 23:04:50 - INFO - codeparrot_training - Step 2441: {'lr': 0.0004998963420484349, 'samples': 78112, 'steps': 2440, 'loss/train': 5.921375274658203} +01/22/2022 23:04:52 - INFO - codeparrot_training - Step 2442: {'lr': 0.0004998958703732532, 'samples': 78144, 'steps': 2441, 'loss/train': 5.353790283203125} +01/22/2022 23:04:52 - INFO - codeparrot_training - Step 2443: {'lr': 0.0004998953976275966, 'samples': 78176, 'steps': 2442, 'loss/train': 6.157747745513916} +01/22/2022 23:04:53 - INFO - codeparrot_training - Step 2444: {'lr': 0.0004998949238114677, 'samples': 78208, 'steps': 2443, 'loss/train': 5.52628231048584} +01/22/2022 23:04:53 - INFO - codeparrot_training - Step 2445: {'lr': 0.0004998944489248683, 'samples': 78240, 'steps': 2444, 'loss/train': 5.228135585784912} +01/22/2022 23:04:54 - INFO - codeparrot_training - Step 2446: {'lr': 0.0004998939729678004, 'samples': 78272, 'steps': 2445, 'loss/train': 5.569291114807129} +01/22/2022 23:04:54 - INFO - codeparrot_training - Step 2447: {'lr': 0.000499893495940266, 'samples': 78304, 'steps': 2446, 'loss/train': 5.194700717926025} +01/22/2022 23:04:55 - INFO - codeparrot_training - Step 2448: {'lr': 0.0004998930178422673, 'samples': 78336, 'steps': 2447, 'loss/train': 5.806185245513916} +01/22/2022 23:04:55 - INFO - codeparrot_training - Step 2449: {'lr': 0.0004998925386738062, 'samples': 78368, 'steps': 2448, 'loss/train': 4.618330001831055} +01/22/2022 23:04:56 - INFO - codeparrot_training - Step 2450: {'lr': 0.0004998920584348849, 'samples': 78400, 'steps': 2449, 'loss/train': 6.797521591186523} +01/22/2022 23:04:57 - INFO - codeparrot_training - Step 2451: {'lr': 0.0004998915771255053, 'samples': 78432, 'steps': 2450, 'loss/train': 8.154417037963867} +01/22/2022 23:04:57 - INFO - codeparrot_training - Step 2452: {'lr': 0.0004998910947456696, 'samples': 78464, 'steps': 2451, 'loss/train': 5.967724800109863} +01/22/2022 23:04:58 - INFO - codeparrot_training - Step 2453: {'lr': 0.0004998906112953797, 'samples': 78496, 'steps': 2452, 'loss/train': 5.947542190551758} +01/22/2022 23:04:58 - INFO - codeparrot_training - Step 2454: {'lr': 0.0004998901267746379, 'samples': 78528, 'steps': 2453, 'loss/train': 6.188281059265137} +01/22/2022 23:04:59 - INFO - codeparrot_training - Step 2455: {'lr': 0.0004998896411834461, 'samples': 78560, 'steps': 2454, 'loss/train': 5.251712322235107} +01/22/2022 23:04:59 - INFO - codeparrot_training - Step 2456: {'lr': 0.0004998891545218063, 'samples': 78592, 'steps': 2455, 'loss/train': 4.769472122192383} +01/22/2022 23:05:00 - INFO - codeparrot_training - Step 2457: {'lr': 0.0004998886667897209, 'samples': 78624, 'steps': 2456, 'loss/train': 6.6871747970581055} +01/22/2022 23:05:00 - INFO - codeparrot_training - Step 2458: {'lr': 0.0004998881779871917, 'samples': 78656, 'steps': 2457, 'loss/train': 5.748456001281738} +01/22/2022 23:05:01 - INFO - codeparrot_training - Step 2459: {'lr': 0.0004998876881142208, 'samples': 78688, 'steps': 2458, 'loss/train': 5.940188884735107} +01/22/2022 23:05:02 - INFO - codeparrot_training - Step 2460: {'lr': 0.0004998871971708106, 'samples': 78720, 'steps': 2459, 'loss/train': 6.94915246963501} +01/22/2022 23:05:02 - INFO - codeparrot_training - Step 2461: {'lr': 0.0004998867051569627, 'samples': 78752, 'steps': 2460, 'loss/train': 6.877841472625732} +01/22/2022 23:05:03 - INFO - codeparrot_training - Step 2462: {'lr': 0.0004998862120726798, 'samples': 78784, 'steps': 2461, 'loss/train': 6.263663291931152} +01/22/2022 23:05:03 - INFO - codeparrot_training - Step 2463: {'lr': 0.0004998857179179636, 'samples': 78816, 'steps': 2462, 'loss/train': 6.903162956237793} +01/22/2022 23:05:04 - INFO - codeparrot_training - Step 2464: {'lr': 0.0004998852226928164, 'samples': 78848, 'steps': 2463, 'loss/train': 5.363440990447998} +01/22/2022 23:05:04 - INFO - codeparrot_training - Step 2465: {'lr': 0.0004998847263972401, 'samples': 78880, 'steps': 2464, 'loss/train': 6.206592559814453} +01/22/2022 23:05:05 - INFO - codeparrot_training - Step 2466: {'lr': 0.0004998842290312371, 'samples': 78912, 'steps': 2465, 'loss/train': 6.514979362487793} +01/22/2022 23:05:06 - INFO - codeparrot_training - Step 2467: {'lr': 0.0004998837305948094, 'samples': 78944, 'steps': 2466, 'loss/train': 5.725783824920654} +01/22/2022 23:05:06 - INFO - codeparrot_training - Step 2468: {'lr': 0.0004998832310879591, 'samples': 78976, 'steps': 2467, 'loss/train': 5.839977264404297} +01/22/2022 23:05:07 - INFO - codeparrot_training - Step 2469: {'lr': 0.0004998827305106884, 'samples': 79008, 'steps': 2468, 'loss/train': 5.617611885070801} +01/22/2022 23:05:07 - INFO - codeparrot_training - Step 2470: {'lr': 0.0004998822288629995, 'samples': 79040, 'steps': 2469, 'loss/train': 5.654068946838379} +01/22/2022 23:05:09 - INFO - codeparrot_training - Step 2471: {'lr': 0.0004998817261448943, 'samples': 79072, 'steps': 2470, 'loss/train': 5.007466793060303} +01/22/2022 23:05:09 - INFO - codeparrot_training - Step 2472: {'lr': 0.0004998812223563754, 'samples': 79104, 'steps': 2471, 'loss/train': 4.984733581542969} +01/22/2022 23:05:10 - INFO - codeparrot_training - Step 2473: {'lr': 0.0004998807174974445, 'samples': 79136, 'steps': 2472, 'loss/train': 5.113739967346191} +01/22/2022 23:05:10 - INFO - codeparrot_training - Step 2474: {'lr': 0.0004998802115681039, 'samples': 79168, 'steps': 2473, 'loss/train': 6.409999370574951} +01/22/2022 23:05:11 - INFO - codeparrot_training - Step 2475: {'lr': 0.000499879704568356, 'samples': 79200, 'steps': 2474, 'loss/train': 5.617440223693848} +01/22/2022 23:05:11 - INFO - codeparrot_training - Step 2476: {'lr': 0.0004998791964982026, 'samples': 79232, 'steps': 2475, 'loss/train': 5.958219051361084} +01/22/2022 23:05:12 - INFO - codeparrot_training - Step 2477: {'lr': 0.0004998786873576462, 'samples': 79264, 'steps': 2476, 'loss/train': 7.510532855987549} +01/22/2022 23:05:12 - INFO - codeparrot_training - Step 2478: {'lr': 0.0004998781771466889, 'samples': 79296, 'steps': 2477, 'loss/train': 6.015326499938965} +01/22/2022 23:05:13 - INFO - codeparrot_training - Step 2479: {'lr': 0.0004998776658653327, 'samples': 79328, 'steps': 2478, 'loss/train': 7.42247200012207} +01/22/2022 23:05:14 - INFO - codeparrot_training - Step 2480: {'lr': 0.00049987715351358, 'samples': 79360, 'steps': 2479, 'loss/train': 5.924372673034668} +01/22/2022 23:05:14 - INFO - codeparrot_training - Step 2481: {'lr': 0.0004998766400914329, 'samples': 79392, 'steps': 2480, 'loss/train': 5.267820358276367} +01/22/2022 23:05:15 - INFO - codeparrot_training - Step 2482: {'lr': 0.0004998761255988936, 'samples': 79424, 'steps': 2481, 'loss/train': 5.315567493438721} +01/22/2022 23:05:15 - INFO - codeparrot_training - Step 2483: {'lr': 0.0004998756100359643, 'samples': 79456, 'steps': 2482, 'loss/train': 6.010915279388428} +01/22/2022 23:05:16 - INFO - codeparrot_training - Step 2484: {'lr': 0.0004998750934026474, 'samples': 79488, 'steps': 2483, 'loss/train': 5.735971927642822} +01/22/2022 23:05:16 - INFO - codeparrot_training - Step 2485: {'lr': 0.0004998745756989448, 'samples': 79520, 'steps': 2484, 'loss/train': 6.522494316101074} +01/22/2022 23:05:17 - INFO - codeparrot_training - Step 2486: {'lr': 0.0004998740569248588, 'samples': 79552, 'steps': 2485, 'loss/train': 6.231141090393066} +01/22/2022 23:05:17 - INFO - codeparrot_training - Step 2487: {'lr': 0.0004998735370803917, 'samples': 79584, 'steps': 2486, 'loss/train': 5.819472789764404} +01/22/2022 23:05:18 - INFO - codeparrot_training - Step 2488: {'lr': 0.0004998730161655459, 'samples': 79616, 'steps': 2487, 'loss/train': 6.216517448425293} +01/22/2022 23:05:19 - INFO - codeparrot_training - Step 2489: {'lr': 0.0004998724941803232, 'samples': 79648, 'steps': 2488, 'loss/train': 5.503469944000244} +01/22/2022 23:05:19 - INFO - codeparrot_training - Step 2490: {'lr': 0.0004998719711247262, 'samples': 79680, 'steps': 2489, 'loss/train': 5.214082717895508} +01/22/2022 23:05:20 - INFO - codeparrot_training - Step 2491: {'lr': 0.0004998714469987571, 'samples': 79712, 'steps': 2490, 'loss/train': 4.592529296875} +01/22/2022 23:05:20 - INFO - codeparrot_training - Step 2492: {'lr': 0.000499870921802418, 'samples': 79744, 'steps': 2491, 'loss/train': 3.5322413444519043} +01/22/2022 23:05:21 - INFO - codeparrot_training - Step 2493: {'lr': 0.0004998703955357111, 'samples': 79776, 'steps': 2492, 'loss/train': 3.5254485607147217} +01/22/2022 23:05:21 - INFO - codeparrot_training - Step 2494: {'lr': 0.0004998698681986389, 'samples': 79808, 'steps': 2493, 'loss/train': 5.3347673416137695} +01/22/2022 23:05:22 - INFO - codeparrot_training - Step 2495: {'lr': 0.0004998693397912034, 'samples': 79840, 'steps': 2494, 'loss/train': 6.029024124145508} +01/22/2022 23:05:22 - INFO - codeparrot_training - Step 2496: {'lr': 0.0004998688103134072, 'samples': 79872, 'steps': 2495, 'loss/train': 6.24415922164917} +01/22/2022 23:05:23 - INFO - codeparrot_training - Step 2497: {'lr': 0.0004998682797652522, 'samples': 79904, 'steps': 2496, 'loss/train': 5.193914890289307} +01/22/2022 23:05:24 - INFO - codeparrot_training - Step 2498: {'lr': 0.0004998677481467408, 'samples': 79936, 'steps': 2497, 'loss/train': 5.826408386230469} +01/22/2022 23:05:24 - INFO - codeparrot_training - Step 2499: {'lr': 0.0004998672154578754, 'samples': 79968, 'steps': 2498, 'loss/train': 6.094350337982178} +01/22/2022 23:05:27 - INFO - codeparrot_training - Step 2500: {'lr': 0.0004998666816986582, 'samples': 80000, 'steps': 2499, 'loss/train': 5.555222988128662} +01/22/2022 23:05:27 - INFO - codeparrot_training - Evaluating and saving model checkpoint +01/23/2022 01:33:31 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/23/2022 01:33:31 - WARNING - huggingface_hub.repository - Revision `silver-tree-8` does not exist. Created and checked out branch `silver-tree-8`. +01/23/2022 01:33:31 - WARNING - huggingface_hub.repository - +01/23/2022 01:33:44 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/23/2022 01:33:45 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/23/2022 01:34:08 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 12.39226245880127} +01/23/2022 01:35:32 - INFO - codeparrot_training - Step 2: {'lr': 2.5e-07, 'samples': 64, 'steps': 1, 'loss/train': 12.35910415649414} +01/23/2022 01:36:56 - INFO - codeparrot_training - Step 3: {'lr': 5e-07, 'samples': 96, 'steps': 2, 'loss/train': 12.353788375854492} +01/23/2022 01:36:57 - INFO - codeparrot_training - Step 4: {'lr': 7.5e-07, 'samples': 128, 'steps': 3, 'loss/train': 12.361062049865723} +01/23/2022 01:36:58 - INFO - codeparrot_training - Step 5: {'lr': 1e-06, 'samples': 160, 'steps': 4, 'loss/train': 12.307576179504395} +01/23/2022 01:36:58 - INFO - codeparrot_training - Step 6: {'lr': 1.25e-06, 'samples': 192, 'steps': 5, 'loss/train': 12.199652671813965} +01/23/2022 01:36:59 - INFO - codeparrot_training - Step 7: {'lr': 1.5e-06, 'samples': 224, 'steps': 6, 'loss/train': 12.202125549316406} +01/23/2022 01:36:59 - INFO - codeparrot_training - Step 8: {'lr': 1.75e-06, 'samples': 256, 'steps': 7, 'loss/train': 12.178979873657227} +01/23/2022 01:37:00 - INFO - codeparrot_training - Step 9: {'lr': 2e-06, 'samples': 288, 'steps': 8, 'loss/train': 11.944904327392578} +01/23/2022 01:37:01 - INFO - codeparrot_training - Step 10: {'lr': 2.25e-06, 'samples': 320, 'steps': 9, 'loss/train': 11.830462455749512} +01/23/2022 01:37:01 - INFO - codeparrot_training - Evaluating and saving model checkpoint +01/23/2022 01:50:40 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/23/2022 01:50:40 - WARNING - huggingface_hub.repository - Revision `ancient-oath-9` does not exist. Created and checked out branch `ancient-oath-9`. +01/23/2022 01:50:40 - WARNING - huggingface_hub.repository - +01/23/2022 01:50:53 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/23/2022 01:50:54 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/23/2022 01:51:14 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 12.39226245880127} +01/23/2022 01:52:37 - INFO - codeparrot_training - Step 2: {'lr': 2.5e-07, 'samples': 64, 'steps': 1, 'loss/train': 12.35910415649414} +01/23/2022 01:53:58 - INFO - codeparrot_training - Step 3: {'lr': 5e-07, 'samples': 96, 'steps': 2, 'loss/train': 12.353788375854492} +01/23/2022 01:53:58 - INFO - codeparrot_training - Step 4: {'lr': 7.5e-07, 'samples': 128, 'steps': 3, 'loss/train': 12.361062049865723} +01/23/2022 01:53:59 - INFO - codeparrot_training - Step 5: {'lr': 1e-06, 'samples': 160, 'steps': 4, 'loss/train': 12.307576179504395} +01/23/2022 01:53:59 - INFO - codeparrot_training - Step 6: {'lr': 1.25e-06, 'samples': 192, 'steps': 5, 'loss/train': 12.199652671813965} +01/23/2022 01:54:00 - INFO - codeparrot_training - Step 7: {'lr': 1.5e-06, 'samples': 224, 'steps': 6, 'loss/train': 12.202125549316406} +01/23/2022 01:54:01 - INFO - codeparrot_training - Step 8: {'lr': 1.75e-06, 'samples': 256, 'steps': 7, 'loss/train': 12.178979873657227} +01/23/2022 01:54:01 - INFO - codeparrot_training - Step 9: {'lr': 2e-06, 'samples': 288, 'steps': 8, 'loss/train': 11.944904327392578} +01/23/2022 01:54:02 - INFO - codeparrot_training - Step 10: {'lr': 2.25e-06, 'samples': 320, 'steps': 9, 'loss/train': 11.830462455749512} +01/23/2022 01:54:02 - INFO - codeparrot_training - Evaluating and saving model checkpoint +01/23/2022 01:54:16 - INFO - codeparrot_training - Evaluation loss: 11.831884384155273 perplexity: 137569.484375 +01/23/2022 01:54:16 - INFO - codeparrot_training - Step 10: {'loss/eval': 11.831884384155273, 'perplexity': 137569.484375}