diff --git "a/log/debug_0.log" "b/log/debug_0.log" new file mode 100644--- /dev/null +++ "b/log/debug_0.log" @@ -0,0 +1,2012 @@ +01/26/2022 19:48:33 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +01/26/2022 19:48:33 - WARNING - huggingface_hub.repository - Revision `royal-monkey-12` does not exist. Created and checked out branch `royal-monkey-12`. +01/26/2022 19:48:33 - WARNING - huggingface_hub.repository - +01/26/2022 19:48:41 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-train-a1efdd1059bd841d +01/26/2022 19:48:42 - WARNING - datasets.builder - Using custom data configuration lvwerra___codeparrot-clean-valid-a800eb55c299abc0 +01/26/2022 19:49:16 - INFO - codeparrot_training - Step 0: {'lr': 0.0, 'samples': 192, 'steps': 0, 'loss/train': 1.3174253404140472} +01/26/2022 19:50:22 - INFO - codeparrot_training - Step 1: {'lr': 2.5e-07, 'samples': 384, 'steps': 1, 'loss/train': 1.098493069410324} +01/26/2022 19:51:32 - INFO - codeparrot_training - Step 2: {'lr': 5e-07, 'samples': 576, 'steps': 2, 'loss/train': 1.4475705027580261} +01/26/2022 19:51:35 - INFO - codeparrot_training - Step 3: {'lr': 7.5e-07, 'samples': 768, 'steps': 3, 'loss/train': 0.7283393740653992} +01/26/2022 19:51:38 - INFO - codeparrot_training - Step 4: {'lr': 1e-06, 'samples': 960, 'steps': 4, 'loss/train': 0.8016244769096375} +01/26/2022 19:51:41 - INFO - codeparrot_training - Step 5: {'lr': 1.25e-06, 'samples': 1152, 'steps': 5, 'loss/train': 1.1862211525440216} +01/26/2022 19:51:44 - INFO - codeparrot_training - Step 6: {'lr': 1.5e-06, 'samples': 1344, 'steps': 6, 'loss/train': 0.9115740358829498} +01/26/2022 19:51:47 - INFO - codeparrot_training - Step 7: {'lr': 1.75e-06, 'samples': 1536, 'steps': 7, 'loss/train': 0.7556531131267548} +01/26/2022 19:51:53 - INFO - codeparrot_training - Step 8: {'lr': 2e-06, 'samples': 1728, 'steps': 8, 'loss/train': 1.2928651571273804} +01/26/2022 19:51:56 - INFO - codeparrot_training - Step 9: {'lr': 2.25e-06, 'samples': 1920, 'steps': 9, 'loss/train': 1.7122818231582642} +01/26/2022 19:51:59 - INFO - codeparrot_training - Step 10: {'lr': 2.5e-06, 'samples': 2112, 'steps': 10, 'loss/train': 0.66591677069664} +01/26/2022 19:52:02 - INFO - codeparrot_training - Step 11: {'lr': 2.75e-06, 'samples': 2304, 'steps': 11, 'loss/train': 0.45707841217517853} +01/26/2022 19:52:06 - INFO - codeparrot_training - Step 12: {'lr': 3e-06, 'samples': 2496, 'steps': 12, 'loss/train': 1.0142653584480286} +01/26/2022 19:52:09 - INFO - codeparrot_training - Step 13: {'lr': 3.25e-06, 'samples': 2688, 'steps': 13, 'loss/train': 1.2159733772277832} +01/26/2022 19:52:12 - INFO - codeparrot_training - Step 14: {'lr': 3.5e-06, 'samples': 2880, 'steps': 14, 'loss/train': 0.8099969029426575} +01/26/2022 19:52:15 - INFO - codeparrot_training - Step 15: {'lr': 3.75e-06, 'samples': 3072, 'steps': 15, 'loss/train': 0.9018677473068237} +01/26/2022 19:52:18 - INFO - codeparrot_training - Step 16: {'lr': 4e-06, 'samples': 3264, 'steps': 16, 'loss/train': 1.4756002128124237} +01/26/2022 19:52:23 - INFO - codeparrot_training - Step 17: {'lr': 4.250000000000001e-06, 'samples': 3456, 'steps': 17, 'loss/train': 0.8930210173130035} +01/26/2022 19:52:26 - INFO - codeparrot_training - Step 18: {'lr': 4.5e-06, 'samples': 3648, 'steps': 18, 'loss/train': 1.3594367802143097} +01/26/2022 19:52:29 - INFO - codeparrot_training - Step 19: {'lr': 4.75e-06, 'samples': 3840, 'steps': 19, 'loss/train': 0.9841994047164917} +01/26/2022 19:52:32 - INFO - codeparrot_training - Step 20: {'lr': 5e-06, 'samples': 4032, 'steps': 20, 'loss/train': 0.5345936715602875} +01/26/2022 19:52:35 - INFO - codeparrot_training - Step 21: {'lr': 5.2500000000000006e-06, 'samples': 4224, 'steps': 21, 'loss/train': 0.637407973408699} +01/26/2022 19:52:38 - INFO - codeparrot_training - Step 22: {'lr': 5.5e-06, 'samples': 4416, 'steps': 22, 'loss/train': 1.2250346839427948} +01/26/2022 19:52:41 - INFO - codeparrot_training - Step 23: {'lr': 5.75e-06, 'samples': 4608, 'steps': 23, 'loss/train': 1.0428715646266937} +01/26/2022 19:52:45 - INFO - codeparrot_training - Step 24: {'lr': 6e-06, 'samples': 4800, 'steps': 24, 'loss/train': 0.9611641466617584} +01/26/2022 19:52:50 - INFO - codeparrot_training - Step 25: {'lr': 6.25e-06, 'samples': 4992, 'steps': 25, 'loss/train': 1.2642099559307098} +01/26/2022 19:52:53 - INFO - codeparrot_training - Step 26: {'lr': 6.5e-06, 'samples': 5184, 'steps': 26, 'loss/train': 0.38639961183071136} +01/26/2022 19:52:56 - INFO - codeparrot_training - Step 27: {'lr': 6.75e-06, 'samples': 5376, 'steps': 27, 'loss/train': 1.3766223192214966} +01/26/2022 19:52:59 - INFO - codeparrot_training - Step 28: {'lr': 7e-06, 'samples': 5568, 'steps': 28, 'loss/train': 1.0789436995983124} +01/26/2022 19:53:02 - INFO - codeparrot_training - Step 29: {'lr': 7.250000000000001e-06, 'samples': 5760, 'steps': 29, 'loss/train': 1.0117962956428528} +01/26/2022 19:53:05 - INFO - codeparrot_training - Step 30: {'lr': 7.5e-06, 'samples': 5952, 'steps': 30, 'loss/train': 0.4729790836572647} +01/26/2022 19:53:08 - INFO - codeparrot_training - Step 31: {'lr': 7.75e-06, 'samples': 6144, 'steps': 31, 'loss/train': 1.2678570449352264} +01/26/2022 19:53:12 - INFO - codeparrot_training - Step 32: {'lr': 8e-06, 'samples': 6336, 'steps': 32, 'loss/train': 1.0175764560699463} +01/26/2022 19:53:15 - INFO - codeparrot_training - Step 33: {'lr': 8.25e-06, 'samples': 6528, 'steps': 33, 'loss/train': 0.6667446792125702} +01/26/2022 19:53:19 - INFO - codeparrot_training - Step 34: {'lr': 8.500000000000002e-06, 'samples': 6720, 'steps': 34, 'loss/train': 1.0679191052913666} +01/26/2022 19:53:22 - INFO - codeparrot_training - Step 35: {'lr': 8.750000000000001e-06, 'samples': 6912, 'steps': 35, 'loss/train': 0.7404331415891647} +01/26/2022 19:53:25 - INFO - codeparrot_training - Step 36: {'lr': 9e-06, 'samples': 7104, 'steps': 36, 'loss/train': 1.0515027344226837} +01/26/2022 19:53:29 - INFO - codeparrot_training - Step 37: {'lr': 9.25e-06, 'samples': 7296, 'steps': 37, 'loss/train': 1.3058996200561523} +01/26/2022 19:53:32 - INFO - codeparrot_training - Step 38: {'lr': 9.5e-06, 'samples': 7488, 'steps': 38, 'loss/train': 1.1324551105499268} +01/26/2022 19:53:35 - INFO - codeparrot_training - Step 39: {'lr': 9.75e-06, 'samples': 7680, 'steps': 39, 'loss/train': 0.8902977705001831} +01/26/2022 19:53:38 - INFO - codeparrot_training - Step 40: {'lr': 1e-05, 'samples': 7872, 'steps': 40, 'loss/train': 0.5506274253129959} +01/26/2022 19:53:41 - INFO - codeparrot_training - Step 41: {'lr': 1.025e-05, 'samples': 8064, 'steps': 41, 'loss/train': 0.953863263130188} +01/26/2022 19:53:44 - INFO - codeparrot_training - Step 42: {'lr': 1.0500000000000001e-05, 'samples': 8256, 'steps': 42, 'loss/train': 1.2091366946697235} +01/26/2022 19:53:49 - INFO - codeparrot_training - Step 43: {'lr': 1.0749999999999999e-05, 'samples': 8448, 'steps': 43, 'loss/train': 0.6340087652206421} +01/26/2022 19:53:52 - INFO - codeparrot_training - Step 44: {'lr': 1.1e-05, 'samples': 8640, 'steps': 44, 'loss/train': 1.1415702402591705} +01/26/2022 19:53:55 - INFO - codeparrot_training - Step 45: {'lr': 1.1249999999999999e-05, 'samples': 8832, 'steps': 45, 'loss/train': 1.1699762642383575} +01/26/2022 19:53:58 - INFO - codeparrot_training - Step 46: {'lr': 1.15e-05, 'samples': 9024, 'steps': 46, 'loss/train': 0.519690066576004} +01/26/2022 19:54:01 - INFO - codeparrot_training - Step 47: {'lr': 1.1750000000000001e-05, 'samples': 9216, 'steps': 47, 'loss/train': 0.9116686284542084} +01/26/2022 19:54:04 - INFO - codeparrot_training - Step 48: {'lr': 1.2e-05, 'samples': 9408, 'steps': 48, 'loss/train': 0.8386119604110718} +01/26/2022 19:54:08 - INFO - codeparrot_training - Step 49: {'lr': 1.2250000000000001e-05, 'samples': 9600, 'steps': 49, 'loss/train': 1.3592749536037445} +01/26/2022 19:54:11 - INFO - codeparrot_training - Step 50: {'lr': 1.25e-05, 'samples': 9792, 'steps': 50, 'loss/train': 1.262882262468338} +01/26/2022 19:54:14 - INFO - codeparrot_training - Step 51: {'lr': 1.275e-05, 'samples': 9984, 'steps': 51, 'loss/train': 0.8111315667629242} +01/26/2022 19:54:19 - INFO - codeparrot_training - Step 52: {'lr': 1.3e-05, 'samples': 10176, 'steps': 52, 'loss/train': 0.9616953134536743} +01/26/2022 19:54:22 - INFO - codeparrot_training - Step 53: {'lr': 1.325e-05, 'samples': 10368, 'steps': 53, 'loss/train': 0.6483338177204132} +01/26/2022 19:54:26 - INFO - codeparrot_training - Step 54: {'lr': 1.35e-05, 'samples': 10560, 'steps': 54, 'loss/train': 1.130149930715561} +01/26/2022 19:54:29 - INFO - codeparrot_training - Step 55: {'lr': 1.375e-05, 'samples': 10752, 'steps': 55, 'loss/train': 0.6978979557752609} +01/26/2022 19:54:32 - INFO - codeparrot_training - Step 56: {'lr': 1.4e-05, 'samples': 10944, 'steps': 56, 'loss/train': 0.8231143355369568} +01/26/2022 19:54:35 - INFO - codeparrot_training - Step 57: {'lr': 1.425e-05, 'samples': 11136, 'steps': 57, 'loss/train': 1.9858249425888062} +01/26/2022 19:54:38 - INFO - codeparrot_training - Step 58: {'lr': 1.4500000000000002e-05, 'samples': 11328, 'steps': 58, 'loss/train': 1.0599297881126404} +01/26/2022 19:54:41 - INFO - codeparrot_training - Step 59: {'lr': 1.475e-05, 'samples': 11520, 'steps': 59, 'loss/train': 0.8293282985687256} +01/26/2022 19:54:44 - INFO - codeparrot_training - Step 60: {'lr': 1.5e-05, 'samples': 11712, 'steps': 60, 'loss/train': 0.4658845067024231} +01/26/2022 19:54:49 - INFO - codeparrot_training - Step 61: {'lr': 1.525e-05, 'samples': 11904, 'steps': 61, 'loss/train': 0.7947032153606415} +01/26/2022 19:54:52 - INFO - codeparrot_training - Step 62: {'lr': 1.55e-05, 'samples': 12096, 'steps': 62, 'loss/train': 0.481005996465683} +01/26/2022 19:54:56 - INFO - codeparrot_training - Step 63: {'lr': 1.575e-05, 'samples': 12288, 'steps': 63, 'loss/train': 0.9705469608306885} +01/26/2022 19:54:59 - INFO - codeparrot_training - Step 64: {'lr': 1.6e-05, 'samples': 12480, 'steps': 64, 'loss/train': 1.0617777407169342} +01/26/2022 19:55:02 - INFO - codeparrot_training - Step 65: {'lr': 1.6250000000000002e-05, 'samples': 12672, 'steps': 65, 'loss/train': 1.2117932438850403} +01/26/2022 19:55:05 - INFO - codeparrot_training - Step 66: {'lr': 1.65e-05, 'samples': 12864, 'steps': 66, 'loss/train': 1.173624336719513} +01/26/2022 19:55:08 - INFO - codeparrot_training - Step 67: {'lr': 1.675e-05, 'samples': 13056, 'steps': 67, 'loss/train': 1.6627777218818665} +01/26/2022 19:55:11 - INFO - codeparrot_training - Step 68: {'lr': 1.7000000000000003e-05, 'samples': 13248, 'steps': 68, 'loss/train': 1.043434739112854} +01/26/2022 19:55:14 - INFO - codeparrot_training - Step 69: {'lr': 1.7250000000000003e-05, 'samples': 13440, 'steps': 69, 'loss/train': 1.2198073267936707} +01/26/2022 19:55:19 - INFO - codeparrot_training - Step 70: {'lr': 1.7500000000000002e-05, 'samples': 13632, 'steps': 70, 'loss/train': 0.9751145839691162} +01/26/2022 19:55:22 - INFO - codeparrot_training - Step 71: {'lr': 1.7749999999999998e-05, 'samples': 13824, 'steps': 71, 'loss/train': 0.7835906445980072} +01/26/2022 19:55:25 - INFO - codeparrot_training - Step 72: {'lr': 1.8e-05, 'samples': 14016, 'steps': 72, 'loss/train': 0.7807894349098206} +01/26/2022 19:55:28 - INFO - codeparrot_training - Step 73: {'lr': 1.825e-05, 'samples': 14208, 'steps': 73, 'loss/train': 1.3491949439048767} +01/26/2022 19:55:31 - INFO - codeparrot_training - Step 74: {'lr': 1.85e-05, 'samples': 14400, 'steps': 74, 'loss/train': 1.1515992879867554} +01/26/2022 19:55:34 - INFO - codeparrot_training - Step 75: {'lr': 1.875e-05, 'samples': 14592, 'steps': 75, 'loss/train': 1.0031714737415314} +01/26/2022 19:55:38 - INFO - codeparrot_training - Step 76: {'lr': 1.9e-05, 'samples': 14784, 'steps': 76, 'loss/train': 1.1666240394115448} +01/26/2022 19:55:41 - INFO - codeparrot_training - Step 77: {'lr': 1.925e-05, 'samples': 14976, 'steps': 77, 'loss/train': 0.4633499085903168} +01/26/2022 19:55:44 - INFO - codeparrot_training - Step 78: {'lr': 1.95e-05, 'samples': 15168, 'steps': 78, 'loss/train': 1.052702933549881} +01/26/2022 19:55:49 - INFO - codeparrot_training - Step 79: {'lr': 1.975e-05, 'samples': 15360, 'steps': 79, 'loss/train': 0.7855435609817505} +01/26/2022 19:55:52 - INFO - codeparrot_training - Step 80: {'lr': 2e-05, 'samples': 15552, 'steps': 80, 'loss/train': 1.2250367403030396} +01/26/2022 19:55:56 - INFO - codeparrot_training - Step 81: {'lr': 2.025e-05, 'samples': 15744, 'steps': 81, 'loss/train': 0.906893402338028} +01/26/2022 19:55:59 - INFO - codeparrot_training - Step 82: {'lr': 2.05e-05, 'samples': 15936, 'steps': 82, 'loss/train': 0.9534100592136383} +01/26/2022 19:56:02 - INFO - codeparrot_training - Step 83: {'lr': 2.0750000000000003e-05, 'samples': 16128, 'steps': 83, 'loss/train': 1.1026679277420044} +01/26/2022 19:56:05 - INFO - codeparrot_training - Step 84: {'lr': 2.1000000000000002e-05, 'samples': 16320, 'steps': 84, 'loss/train': 0.35788920521736145} +01/26/2022 19:56:08 - INFO - codeparrot_training - Step 85: {'lr': 2.125e-05, 'samples': 16512, 'steps': 85, 'loss/train': 1.1877082586288452} +01/26/2022 19:56:11 - INFO - codeparrot_training - Step 86: {'lr': 2.1499999999999997e-05, 'samples': 16704, 'steps': 86, 'loss/train': 0.8088233470916748} +01/26/2022 19:56:16 - INFO - codeparrot_training - Step 87: {'lr': 2.175e-05, 'samples': 16896, 'steps': 87, 'loss/train': 1.3929831683635712} +01/26/2022 19:56:19 - INFO - codeparrot_training - Step 88: {'lr': 2.2e-05, 'samples': 17088, 'steps': 88, 'loss/train': 0.47710342705249786} +01/26/2022 19:56:22 - INFO - codeparrot_training - Step 89: {'lr': 2.225e-05, 'samples': 17280, 'steps': 89, 'loss/train': 0.763294905424118} +01/26/2022 19:56:25 - INFO - codeparrot_training - Step 90: {'lr': 2.2499999999999998e-05, 'samples': 17472, 'steps': 90, 'loss/train': 0.9009081423282623} +01/26/2022 19:56:28 - INFO - codeparrot_training - Step 91: {'lr': 2.275e-05, 'samples': 17664, 'steps': 91, 'loss/train': 1.0131529569625854} +01/26/2022 19:56:31 - INFO - codeparrot_training - Step 92: {'lr': 2.3e-05, 'samples': 17856, 'steps': 92, 'loss/train': 1.1171832382678986} +01/26/2022 19:56:34 - INFO - codeparrot_training - Step 93: {'lr': 2.325e-05, 'samples': 18048, 'steps': 93, 'loss/train': 1.1443756520748138} +01/26/2022 19:56:37 - INFO - codeparrot_training - Step 94: {'lr': 2.3500000000000002e-05, 'samples': 18240, 'steps': 94, 'loss/train': 1.0366325676441193} +01/26/2022 19:56:41 - INFO - codeparrot_training - Step 95: {'lr': 2.375e-05, 'samples': 18432, 'steps': 95, 'loss/train': 1.444101244211197} +01/26/2022 19:56:45 - INFO - codeparrot_training - Step 96: {'lr': 2.4e-05, 'samples': 18624, 'steps': 96, 'loss/train': 0.979115903377533} +01/26/2022 19:56:48 - INFO - codeparrot_training - Step 97: {'lr': 2.425e-05, 'samples': 18816, 'steps': 97, 'loss/train': 1.0008891820907593} +01/26/2022 19:56:51 - INFO - codeparrot_training - Step 98: {'lr': 2.4500000000000003e-05, 'samples': 19008, 'steps': 98, 'loss/train': 1.0458069741725922} +01/26/2022 19:56:54 - INFO - codeparrot_training - Step 99: {'lr': 2.4750000000000002e-05, 'samples': 19200, 'steps': 99, 'loss/train': 0.7854014039039612} +01/26/2022 19:56:58 - INFO - codeparrot_training - Step 100: {'lr': 2.5e-05, 'samples': 19392, 'steps': 100, 'loss/train': 0.9934608042240143} +01/26/2022 19:57:01 - INFO - codeparrot_training - Step 101: {'lr': 2.525e-05, 'samples': 19584, 'steps': 101, 'loss/train': 0.9963217377662659} +01/26/2022 19:57:04 - INFO - codeparrot_training - Step 102: {'lr': 2.55e-05, 'samples': 19776, 'steps': 102, 'loss/train': 0.7054498493671417} +01/26/2022 19:57:07 - INFO - codeparrot_training - Step 103: {'lr': 2.575e-05, 'samples': 19968, 'steps': 103, 'loss/train': 1.1544867753982544} +01/26/2022 19:57:10 - INFO - codeparrot_training - Step 104: {'lr': 2.6e-05, 'samples': 20160, 'steps': 104, 'loss/train': 0.8630503714084625} +01/26/2022 19:57:14 - INFO - codeparrot_training - Step 105: {'lr': 2.625e-05, 'samples': 20352, 'steps': 105, 'loss/train': 1.3325937688350677} +01/26/2022 19:57:18 - INFO - codeparrot_training - Step 106: {'lr': 2.65e-05, 'samples': 20544, 'steps': 106, 'loss/train': 0.945432722568512} +01/26/2022 19:57:21 - INFO - codeparrot_training - Step 107: {'lr': 2.675e-05, 'samples': 20736, 'steps': 107, 'loss/train': 0.8983843624591827} +01/26/2022 19:57:24 - INFO - codeparrot_training - Step 108: {'lr': 2.7e-05, 'samples': 20928, 'steps': 108, 'loss/train': 0.8008756935596466} +01/26/2022 19:57:27 - INFO - codeparrot_training - Step 109: {'lr': 2.725e-05, 'samples': 21120, 'steps': 109, 'loss/train': 1.5613712668418884} +01/26/2022 19:57:30 - INFO - codeparrot_training - Step 110: {'lr': 2.75e-05, 'samples': 21312, 'steps': 110, 'loss/train': 0.5566383004188538} +01/26/2022 19:57:33 - INFO - codeparrot_training - Step 111: {'lr': 2.775e-05, 'samples': 21504, 'steps': 111, 'loss/train': 0.6587662249803543} +01/26/2022 19:57:36 - INFO - codeparrot_training - Step 112: {'lr': 2.8e-05, 'samples': 21696, 'steps': 112, 'loss/train': 0.9899620413780212} +01/26/2022 19:57:39 - INFO - codeparrot_training - Step 113: {'lr': 2.8250000000000002e-05, 'samples': 21888, 'steps': 113, 'loss/train': 1.196788340806961} +01/26/2022 19:57:45 - INFO - codeparrot_training - Step 114: {'lr': 2.85e-05, 'samples': 22080, 'steps': 114, 'loss/train': 1.3232413530349731} +01/26/2022 19:57:48 - INFO - codeparrot_training - Step 115: {'lr': 2.875e-05, 'samples': 22272, 'steps': 115, 'loss/train': 1.111331284046173} +01/26/2022 19:57:51 - INFO - codeparrot_training - Step 116: {'lr': 2.9000000000000004e-05, 'samples': 22464, 'steps': 116, 'loss/train': 1.4940314590930939} +01/26/2022 19:57:54 - INFO - codeparrot_training - Step 117: {'lr': 2.9250000000000003e-05, 'samples': 22656, 'steps': 117, 'loss/train': 0.9729132950305939} +01/26/2022 19:57:57 - INFO - codeparrot_training - Step 118: {'lr': 2.95e-05, 'samples': 22848, 'steps': 118, 'loss/train': 1.5360177755355835} +01/26/2022 19:58:00 - INFO - codeparrot_training - Step 119: {'lr': 2.9749999999999998e-05, 'samples': 23040, 'steps': 119, 'loss/train': 1.4864407181739807} +01/26/2022 19:58:04 - INFO - codeparrot_training - Step 120: {'lr': 3e-05, 'samples': 23232, 'steps': 120, 'loss/train': 1.0524914860725403} +01/26/2022 19:58:07 - INFO - codeparrot_training - Step 121: {'lr': 3.025e-05, 'samples': 23424, 'steps': 121, 'loss/train': 0.8903714418411255} +01/26/2022 19:58:11 - INFO - codeparrot_training - Step 122: {'lr': 3.05e-05, 'samples': 23616, 'steps': 122, 'loss/train': 0.9781338572502136} +01/26/2022 19:58:15 - INFO - codeparrot_training - Step 123: {'lr': 3.075e-05, 'samples': 23808, 'steps': 123, 'loss/train': 0.5676954388618469} +01/26/2022 19:58:18 - INFO - codeparrot_training - Step 124: {'lr': 3.1e-05, 'samples': 24000, 'steps': 124, 'loss/train': 0.6250019073486328} +01/26/2022 19:58:21 - INFO - codeparrot_training - Step 125: {'lr': 3.125e-05, 'samples': 24192, 'steps': 125, 'loss/train': 1.1279764473438263} +01/26/2022 19:58:24 - INFO - codeparrot_training - Step 126: {'lr': 3.15e-05, 'samples': 24384, 'steps': 126, 'loss/train': 0.6120450049638748} +01/26/2022 19:58:27 - INFO - codeparrot_training - Step 127: {'lr': 3.175e-05, 'samples': 24576, 'steps': 127, 'loss/train': 0.6227641850709915} +01/26/2022 19:58:30 - INFO - codeparrot_training - Step 128: {'lr': 3.2e-05, 'samples': 24768, 'steps': 128, 'loss/train': 1.412695974111557} +01/26/2022 19:58:33 - INFO - codeparrot_training - Step 129: {'lr': 3.2250000000000005e-05, 'samples': 24960, 'steps': 129, 'loss/train': 1.011093556880951} +01/26/2022 19:58:36 - INFO - codeparrot_training - Step 130: {'lr': 3.2500000000000004e-05, 'samples': 25152, 'steps': 130, 'loss/train': 0.9026037454605103} +01/26/2022 19:58:42 - INFO - codeparrot_training - Step 131: {'lr': 3.275e-05, 'samples': 25344, 'steps': 131, 'loss/train': 1.2298235893249512} +01/26/2022 19:58:45 - INFO - codeparrot_training - Step 132: {'lr': 3.3e-05, 'samples': 25536, 'steps': 132, 'loss/train': 1.3381428122520447} +01/26/2022 19:58:48 - INFO - codeparrot_training - Step 133: {'lr': 3.325e-05, 'samples': 25728, 'steps': 133, 'loss/train': 1.043137639760971} +01/26/2022 19:58:51 - INFO - codeparrot_training - Step 134: {'lr': 3.35e-05, 'samples': 25920, 'steps': 134, 'loss/train': 1.189176231622696} +01/26/2022 19:58:54 - INFO - codeparrot_training - Step 135: {'lr': 3.375e-05, 'samples': 26112, 'steps': 135, 'loss/train': 1.3090053498744965} +01/26/2022 19:58:57 - INFO - codeparrot_training - Step 136: {'lr': 3.4000000000000007e-05, 'samples': 26304, 'steps': 136, 'loss/train': 1.3251706659793854} +01/26/2022 19:59:00 - INFO - codeparrot_training - Step 137: {'lr': 3.4250000000000006e-05, 'samples': 26496, 'steps': 137, 'loss/train': 0.9288864433765411} +01/26/2022 19:59:04 - INFO - codeparrot_training - Step 138: {'lr': 3.4500000000000005e-05, 'samples': 26688, 'steps': 138, 'loss/train': 1.0418122708797455} +01/26/2022 19:59:07 - INFO - codeparrot_training - Step 139: {'lr': 3.4750000000000004e-05, 'samples': 26880, 'steps': 139, 'loss/train': 0.9203312695026398} +01/26/2022 19:59:11 - INFO - codeparrot_training - Step 140: {'lr': 3.5000000000000004e-05, 'samples': 27072, 'steps': 140, 'loss/train': 0.9538873136043549} +01/26/2022 19:59:14 - INFO - codeparrot_training - Step 141: {'lr': 3.5249999999999996e-05, 'samples': 27264, 'steps': 141, 'loss/train': 0.9224672913551331} +01/26/2022 19:59:18 - INFO - codeparrot_training - Step 142: {'lr': 3.5499999999999996e-05, 'samples': 27456, 'steps': 142, 'loss/train': 0.6625053584575653} +01/26/2022 19:59:21 - INFO - codeparrot_training - Step 143: {'lr': 3.5749999999999995e-05, 'samples': 27648, 'steps': 143, 'loss/train': 0.9124108850955963} +01/26/2022 19:59:24 - INFO - codeparrot_training - Step 144: {'lr': 3.6e-05, 'samples': 27840, 'steps': 144, 'loss/train': 0.914698988199234} +01/26/2022 19:59:27 - INFO - codeparrot_training - Step 145: {'lr': 3.625e-05, 'samples': 28032, 'steps': 145, 'loss/train': 1.015060544013977} +01/26/2022 19:59:30 - INFO - codeparrot_training - Step 146: {'lr': 3.65e-05, 'samples': 28224, 'steps': 146, 'loss/train': 0.6130726039409637} +01/26/2022 19:59:33 - INFO - codeparrot_training - Step 147: {'lr': 3.675e-05, 'samples': 28416, 'steps': 147, 'loss/train': 1.1192330718040466} +01/26/2022 19:59:36 - INFO - codeparrot_training - Step 148: {'lr': 3.7e-05, 'samples': 28608, 'steps': 148, 'loss/train': 0.7602757215499878} +01/26/2022 19:59:41 - INFO - codeparrot_training - Step 149: {'lr': 3.725e-05, 'samples': 28800, 'steps': 149, 'loss/train': 0.9535870850086212} +01/26/2022 19:59:44 - INFO - codeparrot_training - Step 150: {'lr': 3.75e-05, 'samples': 28992, 'steps': 150, 'loss/train': 0.4617677181959152} +01/26/2022 19:59:47 - INFO - codeparrot_training - Step 151: {'lr': 3.775e-05, 'samples': 29184, 'steps': 151, 'loss/train': 1.0293497443199158} +01/26/2022 19:59:50 - INFO - codeparrot_training - Step 152: {'lr': 3.8e-05, 'samples': 29376, 'steps': 152, 'loss/train': 0.8444517552852631} +01/26/2022 19:59:53 - INFO - codeparrot_training - Step 153: {'lr': 3.825e-05, 'samples': 29568, 'steps': 153, 'loss/train': 0.5775855481624603} +01/26/2022 19:59:56 - INFO - codeparrot_training - Step 154: {'lr': 3.85e-05, 'samples': 29760, 'steps': 154, 'loss/train': 1.0447262227535248} +01/26/2022 20:00:00 - INFO - codeparrot_training - Step 155: {'lr': 3.875e-05, 'samples': 29952, 'steps': 155, 'loss/train': 1.02508082985878} +01/26/2022 20:00:03 - INFO - codeparrot_training - Step 156: {'lr': 3.9e-05, 'samples': 30144, 'steps': 156, 'loss/train': 0.9991528987884521} +01/26/2022 20:00:06 - INFO - codeparrot_training - Step 157: {'lr': 3.925e-05, 'samples': 30336, 'steps': 157, 'loss/train': 0.9459106028079987} +01/26/2022 20:00:11 - INFO - codeparrot_training - Step 158: {'lr': 3.95e-05, 'samples': 30528, 'steps': 158, 'loss/train': 0.8784597516059875} +01/26/2022 20:00:14 - INFO - codeparrot_training - Step 159: {'lr': 3.9750000000000004e-05, 'samples': 30720, 'steps': 159, 'loss/train': 0.8632999062538147} +01/26/2022 20:00:18 - INFO - codeparrot_training - Step 160: {'lr': 4e-05, 'samples': 30912, 'steps': 160, 'loss/train': 1.1137281954288483} +01/26/2022 20:00:21 - INFO - codeparrot_training - Step 161: {'lr': 4.025e-05, 'samples': 31104, 'steps': 161, 'loss/train': 0.729734480381012} +01/26/2022 20:00:24 - INFO - codeparrot_training - Step 162: {'lr': 4.05e-05, 'samples': 31296, 'steps': 162, 'loss/train': 0.4884277582168579} +01/26/2022 20:00:27 - INFO - codeparrot_training - Step 163: {'lr': 4.075e-05, 'samples': 31488, 'steps': 163, 'loss/train': 0.8900181949138641} +01/26/2022 20:00:30 - INFO - codeparrot_training - Step 164: {'lr': 4.1e-05, 'samples': 31680, 'steps': 164, 'loss/train': 0.6685371547937393} +01/26/2022 20:00:33 - INFO - codeparrot_training - Step 165: {'lr': 4.125e-05, 'samples': 31872, 'steps': 165, 'loss/train': 0.7850227653980255} +01/26/2022 20:00:36 - INFO - codeparrot_training - Step 166: {'lr': 4.1500000000000006e-05, 'samples': 32064, 'steps': 166, 'loss/train': 1.035389095544815} +01/26/2022 20:00:41 - INFO - codeparrot_training - Step 167: {'lr': 4.1750000000000005e-05, 'samples': 32256, 'steps': 167, 'loss/train': 1.4050793051719666} +01/26/2022 20:00:44 - INFO - codeparrot_training - Step 168: {'lr': 4.2000000000000004e-05, 'samples': 32448, 'steps': 168, 'loss/train': 0.31938090920448303} +01/26/2022 20:00:47 - INFO - codeparrot_training - Step 169: {'lr': 4.2250000000000004e-05, 'samples': 32640, 'steps': 169, 'loss/train': 0.9819863140583038} +01/26/2022 20:00:50 - INFO - codeparrot_training - Step 170: {'lr': 4.25e-05, 'samples': 32832, 'steps': 170, 'loss/train': 1.3573500216007233} +01/26/2022 20:00:53 - INFO - codeparrot_training - Step 171: {'lr': 4.275e-05, 'samples': 33024, 'steps': 171, 'loss/train': 0.6732581108808517} +01/26/2022 20:00:56 - INFO - codeparrot_training - Step 172: {'lr': 4.2999999999999995e-05, 'samples': 33216, 'steps': 172, 'loss/train': 1.555029273033142} +01/26/2022 20:01:00 - INFO - codeparrot_training - Step 173: {'lr': 4.325e-05, 'samples': 33408, 'steps': 173, 'loss/train': 1.3485970795154572} +01/26/2022 20:01:03 - INFO - codeparrot_training - Step 174: {'lr': 4.35e-05, 'samples': 33600, 'steps': 174, 'loss/train': 0.8738776445388794} +01/26/2022 20:01:06 - INFO - codeparrot_training - Step 175: {'lr': 4.375e-05, 'samples': 33792, 'steps': 175, 'loss/train': 0.763803094625473} +01/26/2022 20:01:10 - INFO - codeparrot_training - Step 176: {'lr': 4.4e-05, 'samples': 33984, 'steps': 176, 'loss/train': 0.7981584370136261} +01/26/2022 20:01:13 - INFO - codeparrot_training - Step 177: {'lr': 4.425e-05, 'samples': 34176, 'steps': 177, 'loss/train': 1.0527384281158447} +01/26/2022 20:01:16 - INFO - codeparrot_training - Step 178: {'lr': 4.45e-05, 'samples': 34368, 'steps': 178, 'loss/train': 1.026532530784607} +01/26/2022 20:01:19 - INFO - codeparrot_training - Step 179: {'lr': 4.475e-05, 'samples': 34560, 'steps': 179, 'loss/train': 1.1515803337097168} +01/26/2022 20:01:23 - INFO - codeparrot_training - Step 180: {'lr': 4.4999999999999996e-05, 'samples': 34752, 'steps': 180, 'loss/train': 0.6853055655956268} +01/26/2022 20:01:26 - INFO - codeparrot_training - Step 181: {'lr': 4.525e-05, 'samples': 34944, 'steps': 181, 'loss/train': 0.4972548186779022} +01/26/2022 20:01:29 - INFO - codeparrot_training - Step 182: {'lr': 4.55e-05, 'samples': 35136, 'steps': 182, 'loss/train': 0.8265936970710754} +01/26/2022 20:01:32 - INFO - codeparrot_training - Step 183: {'lr': 4.575e-05, 'samples': 35328, 'steps': 183, 'loss/train': 0.8314197063446045} +01/26/2022 20:01:37 - INFO - codeparrot_training - Step 184: {'lr': 4.6e-05, 'samples': 35520, 'steps': 184, 'loss/train': 1.1978421807289124} +01/26/2022 20:01:40 - INFO - codeparrot_training - Step 185: {'lr': 4.625e-05, 'samples': 35712, 'steps': 185, 'loss/train': 1.3686970174312592} +01/26/2022 20:01:43 - INFO - codeparrot_training - Step 186: {'lr': 4.65e-05, 'samples': 35904, 'steps': 186, 'loss/train': 0.7919806838035583} +01/26/2022 20:01:46 - INFO - codeparrot_training - Step 187: {'lr': 4.675e-05, 'samples': 36096, 'steps': 187, 'loss/train': 0.9425971806049347} +01/26/2022 20:01:50 - INFO - codeparrot_training - Step 188: {'lr': 4.7000000000000004e-05, 'samples': 36288, 'steps': 188, 'loss/train': 1.0793277025222778} +01/26/2022 20:01:53 - INFO - codeparrot_training - Step 189: {'lr': 4.725e-05, 'samples': 36480, 'steps': 189, 'loss/train': 0.9649386405944824} +01/26/2022 20:01:56 - INFO - codeparrot_training - Step 190: {'lr': 4.75e-05, 'samples': 36672, 'steps': 190, 'loss/train': 1.1041245460510254} +01/26/2022 20:01:59 - INFO - codeparrot_training - Step 191: {'lr': 4.775e-05, 'samples': 36864, 'steps': 191, 'loss/train': 0.9970739185810089} +01/26/2022 20:02:02 - INFO - codeparrot_training - Step 192: {'lr': 4.8e-05, 'samples': 37056, 'steps': 192, 'loss/train': 1.1216506361961365} +01/26/2022 20:02:07 - INFO - codeparrot_training - Step 193: {'lr': 4.825e-05, 'samples': 37248, 'steps': 193, 'loss/train': 0.9117452502250671} +01/26/2022 20:02:10 - INFO - codeparrot_training - Step 194: {'lr': 4.85e-05, 'samples': 37440, 'steps': 194, 'loss/train': 0.947819709777832} +01/26/2022 20:02:13 - INFO - codeparrot_training - Step 195: {'lr': 4.8750000000000006e-05, 'samples': 37632, 'steps': 195, 'loss/train': 0.9705623388290405} +01/26/2022 20:02:16 - INFO - codeparrot_training - Step 196: {'lr': 4.9000000000000005e-05, 'samples': 37824, 'steps': 196, 'loss/train': 0.750628262758255} +01/26/2022 20:02:19 - INFO - codeparrot_training - Step 197: {'lr': 4.9250000000000004e-05, 'samples': 38016, 'steps': 197, 'loss/train': 1.0953399538993835} +01/26/2022 20:02:22 - INFO - codeparrot_training - Step 198: {'lr': 4.9500000000000004e-05, 'samples': 38208, 'steps': 198, 'loss/train': 0.8188500702381134} +01/26/2022 20:02:25 - INFO - codeparrot_training - Step 199: {'lr': 4.975e-05, 'samples': 38400, 'steps': 199, 'loss/train': 1.3112203180789948} +01/26/2022 20:02:29 - INFO - codeparrot_training - Step 200: {'lr': 5e-05, 'samples': 38592, 'steps': 200, 'loss/train': 1.0671295523643494} +01/26/2022 20:02:32 - INFO - codeparrot_training - Step 201: {'lr': 5.025e-05, 'samples': 38784, 'steps': 201, 'loss/train': 1.0156736969947815} +01/26/2022 20:02:37 - INFO - codeparrot_training - Step 202: {'lr': 5.05e-05, 'samples': 38976, 'steps': 202, 'loss/train': 1.6629311442375183} +01/26/2022 20:02:41 - INFO - codeparrot_training - Step 203: {'lr': 5.075000000000001e-05, 'samples': 39168, 'steps': 203, 'loss/train': 1.3752846121788025} +01/26/2022 20:02:44 - INFO - codeparrot_training - Step 204: {'lr': 5.1e-05, 'samples': 39360, 'steps': 204, 'loss/train': 1.1547268331050873} +01/26/2022 20:02:47 - INFO - codeparrot_training - Step 205: {'lr': 5.125e-05, 'samples': 39552, 'steps': 205, 'loss/train': 0.9555048644542694} +01/26/2022 20:02:50 - INFO - codeparrot_training - Step 206: {'lr': 5.15e-05, 'samples': 39744, 'steps': 206, 'loss/train': 1.1384654939174652} +01/26/2022 20:02:53 - INFO - codeparrot_training - Step 207: {'lr': 5.175e-05, 'samples': 39936, 'steps': 207, 'loss/train': 1.1253110468387604} +01/26/2022 20:02:56 - INFO - codeparrot_training - Step 208: {'lr': 5.2e-05, 'samples': 40128, 'steps': 208, 'loss/train': 0.8209310173988342} +01/26/2022 20:02:59 - INFO - codeparrot_training - Step 209: {'lr': 5.2249999999999996e-05, 'samples': 40320, 'steps': 209, 'loss/train': 2.520827293395996} +01/26/2022 20:03:03 - INFO - codeparrot_training - Step 210: {'lr': 5.25e-05, 'samples': 40512, 'steps': 210, 'loss/train': 1.0695734024047852} +01/26/2022 20:03:07 - INFO - codeparrot_training - Step 211: {'lr': 5.275e-05, 'samples': 40704, 'steps': 211, 'loss/train': 0.47925974428653717} +01/26/2022 20:03:10 - INFO - codeparrot_training - Step 212: {'lr': 5.3e-05, 'samples': 40896, 'steps': 212, 'loss/train': 1.0081109404563904} +01/26/2022 20:03:13 - INFO - codeparrot_training - Step 213: {'lr': 5.325e-05, 'samples': 41088, 'steps': 213, 'loss/train': 0.9470605552196503} +01/26/2022 20:03:16 - INFO - codeparrot_training - Step 214: {'lr': 5.35e-05, 'samples': 41280, 'steps': 214, 'loss/train': 0.7130788117647171} +01/26/2022 20:03:19 - INFO - codeparrot_training - Step 215: {'lr': 5.375e-05, 'samples': 41472, 'steps': 215, 'loss/train': 1.2014243602752686} +01/26/2022 20:03:23 - INFO - codeparrot_training - Step 216: {'lr': 5.4e-05, 'samples': 41664, 'steps': 216, 'loss/train': 1.1142117083072662} +01/26/2022 20:03:26 - INFO - codeparrot_training - Step 217: {'lr': 5.4250000000000004e-05, 'samples': 41856, 'steps': 217, 'loss/train': 0.5922805666923523} +01/26/2022 20:03:29 - INFO - codeparrot_training - Step 218: {'lr': 5.45e-05, 'samples': 42048, 'steps': 218, 'loss/train': 0.5249330252408981} +01/26/2022 20:03:32 - INFO - codeparrot_training - Step 219: {'lr': 5.475e-05, 'samples': 42240, 'steps': 219, 'loss/train': 1.1915513277053833} +01/26/2022 20:03:37 - INFO - codeparrot_training - Step 220: {'lr': 5.5e-05, 'samples': 42432, 'steps': 220, 'loss/train': 0.35965168476104736} +01/26/2022 20:03:40 - INFO - codeparrot_training - Step 221: {'lr': 5.525e-05, 'samples': 42624, 'steps': 221, 'loss/train': 0.8307751715183258} +01/26/2022 20:03:43 - INFO - codeparrot_training - Step 222: {'lr': 5.55e-05, 'samples': 42816, 'steps': 222, 'loss/train': 0.5622230172157288} +01/26/2022 20:03:46 - INFO - codeparrot_training - Step 223: {'lr': 5.575e-05, 'samples': 43008, 'steps': 223, 'loss/train': 2.1728580594062805} +01/26/2022 20:03:49 - INFO - codeparrot_training - Step 224: {'lr': 5.6e-05, 'samples': 43200, 'steps': 224, 'loss/train': 1.3944549858570099} +01/26/2022 20:03:52 - INFO - codeparrot_training - Step 225: {'lr': 5.6250000000000005e-05, 'samples': 43392, 'steps': 225, 'loss/train': 1.1415264308452606} +01/26/2022 20:03:55 - INFO - codeparrot_training - Step 226: {'lr': 5.6500000000000005e-05, 'samples': 43584, 'steps': 226, 'loss/train': 1.3640662729740143} +01/26/2022 20:03:59 - INFO - codeparrot_training - Step 227: {'lr': 5.6750000000000004e-05, 'samples': 43776, 'steps': 227, 'loss/train': 1.2744399905204773} +01/26/2022 20:04:02 - INFO - codeparrot_training - Step 228: {'lr': 5.7e-05, 'samples': 43968, 'steps': 228, 'loss/train': 0.6094226539134979} +01/26/2022 20:04:06 - INFO - codeparrot_training - Step 229: {'lr': 5.725e-05, 'samples': 44160, 'steps': 229, 'loss/train': 0.8389751315116882} +01/26/2022 20:04:09 - INFO - codeparrot_training - Step 230: {'lr': 5.75e-05, 'samples': 44352, 'steps': 230, 'loss/train': 1.1073690354824066} +01/26/2022 20:04:12 - INFO - codeparrot_training - Step 231: {'lr': 5.775e-05, 'samples': 44544, 'steps': 231, 'loss/train': 0.6085668057203293} +01/26/2022 20:04:16 - INFO - codeparrot_training - Step 232: {'lr': 5.800000000000001e-05, 'samples': 44736, 'steps': 232, 'loss/train': 0.8861702978610992} +01/26/2022 20:04:19 - INFO - codeparrot_training - Step 233: {'lr': 5.8250000000000006e-05, 'samples': 44928, 'steps': 233, 'loss/train': 0.8692090809345245} +01/26/2022 20:04:22 - INFO - codeparrot_training - Step 234: {'lr': 5.8500000000000006e-05, 'samples': 45120, 'steps': 234, 'loss/train': 1.3772276937961578} +01/26/2022 20:04:25 - INFO - codeparrot_training - Step 235: {'lr': 5.875e-05, 'samples': 45312, 'steps': 235, 'loss/train': 0.7176054865121841} +01/26/2022 20:04:28 - INFO - codeparrot_training - Step 236: {'lr': 5.9e-05, 'samples': 45504, 'steps': 236, 'loss/train': 1.1665324866771698} +01/26/2022 20:04:34 - INFO - codeparrot_training - Step 237: {'lr': 5.925e-05, 'samples': 45696, 'steps': 237, 'loss/train': 1.10550257563591} +01/26/2022 20:04:37 - INFO - codeparrot_training - Step 238: {'lr': 5.9499999999999996e-05, 'samples': 45888, 'steps': 238, 'loss/train': 1.043763667345047} +01/26/2022 20:04:40 - INFO - codeparrot_training - Step 239: {'lr': 5.9749999999999995e-05, 'samples': 46080, 'steps': 239, 'loss/train': 0.9871982038021088} +01/26/2022 20:04:43 - INFO - codeparrot_training - Step 240: {'lr': 6e-05, 'samples': 46272, 'steps': 240, 'loss/train': 0.5332265049219131} +01/26/2022 20:04:46 - INFO - codeparrot_training - Step 241: {'lr': 6.025e-05, 'samples': 46464, 'steps': 241, 'loss/train': 1.2732048332691193} +01/26/2022 20:04:49 - INFO - codeparrot_training - Step 242: {'lr': 6.05e-05, 'samples': 46656, 'steps': 242, 'loss/train': 1.1088880598545074} +01/26/2022 20:04:53 - INFO - codeparrot_training - Step 243: {'lr': 6.075e-05, 'samples': 46848, 'steps': 243, 'loss/train': 1.1678805649280548} +01/26/2022 20:04:56 - INFO - codeparrot_training - Step 244: {'lr': 6.1e-05, 'samples': 47040, 'steps': 244, 'loss/train': 1.129733294248581} +01/26/2022 20:04:59 - INFO - codeparrot_training - Step 245: {'lr': 6.125e-05, 'samples': 47232, 'steps': 245, 'loss/train': 0.5138150900602341} +01/26/2022 20:05:03 - INFO - codeparrot_training - Step 246: {'lr': 6.15e-05, 'samples': 47424, 'steps': 246, 'loss/train': 1.1513821184635162} +01/26/2022 20:05:06 - INFO - codeparrot_training - Step 247: {'lr': 6.175e-05, 'samples': 47616, 'steps': 247, 'loss/train': 0.5100005865097046} +01/26/2022 20:05:10 - INFO - codeparrot_training - Step 248: {'lr': 6.2e-05, 'samples': 47808, 'steps': 248, 'loss/train': 0.7218698859214783} +01/26/2022 20:05:13 - INFO - codeparrot_training - Step 249: {'lr': 6.225e-05, 'samples': 48000, 'steps': 249, 'loss/train': 1.0485215485095978} +01/26/2022 20:05:16 - INFO - codeparrot_training - Step 250: {'lr': 6.25e-05, 'samples': 48192, 'steps': 250, 'loss/train': 0.43163707852363586} +01/26/2022 20:05:19 - INFO - codeparrot_training - Step 251: {'lr': 6.275000000000001e-05, 'samples': 48384, 'steps': 251, 'loss/train': 0.8172822296619415} +01/26/2022 20:05:22 - INFO - codeparrot_training - Step 252: {'lr': 6.3e-05, 'samples': 48576, 'steps': 252, 'loss/train': 0.6068518459796906} +01/26/2022 20:05:25 - INFO - codeparrot_training - Step 253: {'lr': 6.325e-05, 'samples': 48768, 'steps': 253, 'loss/train': 0.884998083114624} +01/26/2022 20:05:28 - INFO - codeparrot_training - Step 254: {'lr': 6.35e-05, 'samples': 48960, 'steps': 254, 'loss/train': 0.758701354265213} +01/26/2022 20:05:33 - INFO - codeparrot_training - Step 255: {'lr': 6.375e-05, 'samples': 49152, 'steps': 255, 'loss/train': 1.4007873237133026} +01/26/2022 20:05:37 - INFO - codeparrot_training - Step 256: {'lr': 6.4e-05, 'samples': 49344, 'steps': 256, 'loss/train': 0.7115031480789185} +01/26/2022 20:05:40 - INFO - codeparrot_training - Step 257: {'lr': 6.425e-05, 'samples': 49536, 'steps': 257, 'loss/train': 1.1868571043014526} +01/26/2022 20:05:43 - INFO - codeparrot_training - Step 258: {'lr': 6.450000000000001e-05, 'samples': 49728, 'steps': 258, 'loss/train': 0.49912266433238983} +01/26/2022 20:05:46 - INFO - codeparrot_training - Step 259: {'lr': 6.475e-05, 'samples': 49920, 'steps': 259, 'loss/train': 0.8214937448501587} +01/26/2022 20:05:49 - INFO - codeparrot_training - Step 260: {'lr': 6.500000000000001e-05, 'samples': 50112, 'steps': 260, 'loss/train': 1.1232694387435913} +01/26/2022 20:05:52 - INFO - codeparrot_training - Step 261: {'lr': 6.525e-05, 'samples': 50304, 'steps': 261, 'loss/train': 1.041410654783249} +01/26/2022 20:05:55 - INFO - codeparrot_training - Step 262: {'lr': 6.55e-05, 'samples': 50496, 'steps': 262, 'loss/train': 1.0826551616191864} +01/26/2022 20:05:58 - INFO - codeparrot_training - Step 263: {'lr': 6.575e-05, 'samples': 50688, 'steps': 263, 'loss/train': 1.3080483376979828} +01/26/2022 20:06:03 - INFO - codeparrot_training - Step 264: {'lr': 6.6e-05, 'samples': 50880, 'steps': 264, 'loss/train': 0.9459260702133179} +01/26/2022 20:06:06 - INFO - codeparrot_training - Step 265: {'lr': 6.625000000000001e-05, 'samples': 51072, 'steps': 265, 'loss/train': 1.602814257144928} +01/26/2022 20:06:09 - INFO - codeparrot_training - Step 266: {'lr': 6.65e-05, 'samples': 51264, 'steps': 266, 'loss/train': 0.9994533956050873} +01/26/2022 20:06:12 - INFO - codeparrot_training - Step 267: {'lr': 6.675000000000001e-05, 'samples': 51456, 'steps': 267, 'loss/train': 0.7647925615310669} +01/26/2022 20:06:15 - INFO - codeparrot_training - Step 268: {'lr': 6.7e-05, 'samples': 51648, 'steps': 268, 'loss/train': 1.1421605944633484} +01/26/2022 20:06:18 - INFO - codeparrot_training - Step 269: {'lr': 6.725000000000001e-05, 'samples': 51840, 'steps': 269, 'loss/train': 0.9605331122875214} +01/26/2022 20:06:22 - INFO - codeparrot_training - Step 270: {'lr': 6.75e-05, 'samples': 52032, 'steps': 270, 'loss/train': 0.8399298191070557} +01/26/2022 20:06:25 - INFO - codeparrot_training - Step 271: {'lr': 6.775000000000001e-05, 'samples': 52224, 'steps': 271, 'loss/train': 0.9594549536705017} +01/26/2022 20:06:28 - INFO - codeparrot_training - Step 272: {'lr': 6.800000000000001e-05, 'samples': 52416, 'steps': 272, 'loss/train': 1.1892599165439606} +01/26/2022 20:06:32 - INFO - codeparrot_training - Step 273: {'lr': 6.825e-05, 'samples': 52608, 'steps': 273, 'loss/train': 1.0380887389183044} +01/26/2022 20:06:35 - INFO - codeparrot_training - Step 274: {'lr': 6.850000000000001e-05, 'samples': 52800, 'steps': 274, 'loss/train': 0.5679173916578293} +01/26/2022 20:06:39 - INFO - codeparrot_training - Step 275: {'lr': 6.875e-05, 'samples': 52992, 'steps': 275, 'loss/train': 0.508046954870224} +01/26/2022 20:06:42 - INFO - codeparrot_training - Step 276: {'lr': 6.900000000000001e-05, 'samples': 53184, 'steps': 276, 'loss/train': 1.1423460245132446} +01/26/2022 20:06:45 - INFO - codeparrot_training - Step 277: {'lr': 6.925e-05, 'samples': 53376, 'steps': 277, 'loss/train': 0.7753852307796478} +01/26/2022 20:06:48 - INFO - codeparrot_training - Step 278: {'lr': 6.950000000000001e-05, 'samples': 53568, 'steps': 278, 'loss/train': 1.3239911198616028} +01/26/2022 20:06:51 - INFO - codeparrot_training - Step 279: {'lr': 6.975e-05, 'samples': 53760, 'steps': 279, 'loss/train': 1.1902539432048798} +01/26/2022 20:06:54 - INFO - codeparrot_training - Step 280: {'lr': 7.000000000000001e-05, 'samples': 53952, 'steps': 280, 'loss/train': 0.96241495013237} +01/26/2022 20:07:00 - INFO - codeparrot_training - Step 281: {'lr': 7.025000000000001e-05, 'samples': 54144, 'steps': 281, 'loss/train': 1.3379597067832947} +01/26/2022 20:07:03 - INFO - codeparrot_training - Step 282: {'lr': 7.049999999999999e-05, 'samples': 54336, 'steps': 282, 'loss/train': 1.1913694739341736} +01/26/2022 20:07:06 - INFO - codeparrot_training - Step 283: {'lr': 7.075e-05, 'samples': 54528, 'steps': 283, 'loss/train': 1.0873757600784302} +01/26/2022 20:07:09 - INFO - codeparrot_training - Step 284: {'lr': 7.099999999999999e-05, 'samples': 54720, 'steps': 284, 'loss/train': 1.291018009185791} +01/26/2022 20:07:12 - INFO - codeparrot_training - Step 285: {'lr': 7.125e-05, 'samples': 54912, 'steps': 285, 'loss/train': 0.7471304833889008} +01/26/2022 20:07:15 - INFO - codeparrot_training - Step 286: {'lr': 7.149999999999999e-05, 'samples': 55104, 'steps': 286, 'loss/train': 1.0613690614700317} +01/26/2022 20:07:18 - INFO - codeparrot_training - Step 287: {'lr': 7.175e-05, 'samples': 55296, 'steps': 287, 'loss/train': 0.9576367735862732} +01/26/2022 20:07:22 - INFO - codeparrot_training - Step 288: {'lr': 7.2e-05, 'samples': 55488, 'steps': 288, 'loss/train': 1.2826087474822998} +01/26/2022 20:07:25 - INFO - codeparrot_training - Step 289: {'lr': 7.225e-05, 'samples': 55680, 'steps': 289, 'loss/train': 0.972164511680603} +01/26/2022 20:07:29 - INFO - codeparrot_training - Step 290: {'lr': 7.25e-05, 'samples': 55872, 'steps': 290, 'loss/train': 0.40963388979434967} +01/26/2022 20:07:32 - INFO - codeparrot_training - Step 291: {'lr': 7.274999999999999e-05, 'samples': 56064, 'steps': 291, 'loss/train': 1.1230065822601318} +01/26/2022 20:07:35 - INFO - codeparrot_training - Step 292: {'lr': 7.3e-05, 'samples': 56256, 'steps': 292, 'loss/train': 0.8228602409362793} +01/26/2022 20:07:39 - INFO - codeparrot_training - Step 293: {'lr': 7.324999999999999e-05, 'samples': 56448, 'steps': 293, 'loss/train': 1.324948489665985} +01/26/2022 20:07:42 - INFO - codeparrot_training - Step 294: {'lr': 7.35e-05, 'samples': 56640, 'steps': 294, 'loss/train': 1.183780699968338} +01/26/2022 20:07:45 - INFO - codeparrot_training - Step 295: {'lr': 7.375e-05, 'samples': 56832, 'steps': 295, 'loss/train': 0.7068225145339966} +01/26/2022 20:07:48 - INFO - codeparrot_training - Step 296: {'lr': 7.4e-05, 'samples': 57024, 'steps': 296, 'loss/train': 1.191939800977707} +01/26/2022 20:07:51 - INFO - codeparrot_training - Step 297: {'lr': 7.425e-05, 'samples': 57216, 'steps': 297, 'loss/train': 1.0787363648414612} +01/26/2022 20:07:54 - INFO - codeparrot_training - Step 298: {'lr': 7.45e-05, 'samples': 57408, 'steps': 298, 'loss/train': 1.1144662499427795} +01/26/2022 20:07:59 - INFO - codeparrot_training - Step 299: {'lr': 7.475e-05, 'samples': 57600, 'steps': 299, 'loss/train': 0.8711251616477966} +01/26/2022 20:08:02 - INFO - codeparrot_training - Step 300: {'lr': 7.5e-05, 'samples': 57792, 'steps': 300, 'loss/train': 0.9427872598171234} +01/26/2022 20:08:05 - INFO - codeparrot_training - Step 301: {'lr': 7.525e-05, 'samples': 57984, 'steps': 301, 'loss/train': 0.9164088070392609} +01/26/2022 20:08:08 - INFO - codeparrot_training - Step 302: {'lr': 7.55e-05, 'samples': 58176, 'steps': 302, 'loss/train': 0.8746973276138306} +01/26/2022 20:08:11 - INFO - codeparrot_training - Step 303: {'lr': 7.575e-05, 'samples': 58368, 'steps': 303, 'loss/train': 0.9978926181793213} +01/26/2022 20:08:14 - INFO - codeparrot_training - Step 304: {'lr': 7.6e-05, 'samples': 58560, 'steps': 304, 'loss/train': 0.8042625188827515} +01/26/2022 20:08:17 - INFO - codeparrot_training - Step 305: {'lr': 7.625e-05, 'samples': 58752, 'steps': 305, 'loss/train': 0.7100889533758163} +01/26/2022 20:08:21 - INFO - codeparrot_training - Step 306: {'lr': 7.65e-05, 'samples': 58944, 'steps': 306, 'loss/train': 1.33299520611763} +01/26/2022 20:08:24 - INFO - codeparrot_training - Step 307: {'lr': 7.675e-05, 'samples': 59136, 'steps': 307, 'loss/train': 0.8356545567512512} +01/26/2022 20:08:28 - INFO - codeparrot_training - Step 308: {'lr': 7.7e-05, 'samples': 59328, 'steps': 308, 'loss/train': 0.751328855752945} +01/26/2022 20:08:31 - INFO - codeparrot_training - Step 309: {'lr': 7.725000000000001e-05, 'samples': 59520, 'steps': 309, 'loss/train': 1.005783587694168} +01/26/2022 20:08:34 - INFO - codeparrot_training - Step 310: {'lr': 7.75e-05, 'samples': 59712, 'steps': 310, 'loss/train': 1.2585538029670715} +01/26/2022 20:08:37 - INFO - codeparrot_training - Step 311: {'lr': 7.775e-05, 'samples': 59904, 'steps': 311, 'loss/train': 0.8243108689785004} +01/26/2022 20:08:41 - INFO - codeparrot_training - Step 312: {'lr': 7.8e-05, 'samples': 60096, 'steps': 312, 'loss/train': 0.9810410141944885} +01/26/2022 20:08:44 - INFO - codeparrot_training - Step 313: {'lr': 7.825e-05, 'samples': 60288, 'steps': 313, 'loss/train': 0.8090919256210327} +01/26/2022 20:08:47 - INFO - codeparrot_training - Step 314: {'lr': 7.85e-05, 'samples': 60480, 'steps': 314, 'loss/train': 1.1202751100063324} +01/26/2022 20:08:50 - INFO - codeparrot_training - Step 315: {'lr': 7.875e-05, 'samples': 60672, 'steps': 315, 'loss/train': 1.09663724899292} +01/26/2022 20:08:53 - INFO - codeparrot_training - Step 316: {'lr': 7.9e-05, 'samples': 60864, 'steps': 316, 'loss/train': 1.0598064959049225} +01/26/2022 20:08:59 - INFO - codeparrot_training - Step 317: {'lr': 7.925e-05, 'samples': 61056, 'steps': 317, 'loss/train': 1.0347656607627869} +01/26/2022 20:09:02 - INFO - codeparrot_training - Step 318: {'lr': 7.950000000000001e-05, 'samples': 61248, 'steps': 318, 'loss/train': 0.399854376912117} +01/26/2022 20:09:05 - INFO - codeparrot_training - Step 319: {'lr': 7.975e-05, 'samples': 61440, 'steps': 319, 'loss/train': 0.9313833117485046} +01/26/2022 20:09:08 - INFO - codeparrot_training - Step 320: {'lr': 8e-05, 'samples': 61632, 'steps': 320, 'loss/train': 0.39090071618556976} +01/26/2022 20:09:11 - INFO - codeparrot_training - Step 321: {'lr': 8.025e-05, 'samples': 61824, 'steps': 321, 'loss/train': 0.40278929471969604} +01/26/2022 20:09:14 - INFO - codeparrot_training - Step 322: {'lr': 8.05e-05, 'samples': 62016, 'steps': 322, 'loss/train': 0.8294789493083954} +01/26/2022 20:09:17 - INFO - codeparrot_training - Step 323: {'lr': 8.075e-05, 'samples': 62208, 'steps': 323, 'loss/train': 0.8699845969676971} +01/26/2022 20:09:21 - INFO - codeparrot_training - Step 324: {'lr': 8.1e-05, 'samples': 62400, 'steps': 324, 'loss/train': 1.1144334375858307} +01/26/2022 20:09:25 - INFO - codeparrot_training - Step 325: {'lr': 8.125000000000001e-05, 'samples': 62592, 'steps': 325, 'loss/train': 1.0010827481746674} +01/26/2022 20:09:28 - INFO - codeparrot_training - Step 326: {'lr': 8.15e-05, 'samples': 62784, 'steps': 326, 'loss/train': 0.8288448750972748} +01/26/2022 20:09:31 - INFO - codeparrot_training - Step 327: {'lr': 8.175000000000001e-05, 'samples': 62976, 'steps': 327, 'loss/train': 1.173732876777649} +01/26/2022 20:09:34 - INFO - codeparrot_training - Step 328: {'lr': 8.2e-05, 'samples': 63168, 'steps': 328, 'loss/train': 0.6803929656744003} +01/26/2022 20:09:38 - INFO - codeparrot_training - Step 329: {'lr': 8.225000000000001e-05, 'samples': 63360, 'steps': 329, 'loss/train': 1.233411580324173} +01/26/2022 20:09:41 - INFO - codeparrot_training - Step 330: {'lr': 8.25e-05, 'samples': 63552, 'steps': 330, 'loss/train': 0.6752614974975586} +01/26/2022 20:09:44 - INFO - codeparrot_training - Step 331: {'lr': 8.275e-05, 'samples': 63744, 'steps': 331, 'loss/train': 1.3333468437194824} +01/26/2022 20:09:47 - INFO - codeparrot_training - Step 332: {'lr': 8.300000000000001e-05, 'samples': 63936, 'steps': 332, 'loss/train': 0.6622332036495209} +01/26/2022 20:09:50 - INFO - codeparrot_training - Step 333: {'lr': 8.325e-05, 'samples': 64128, 'steps': 333, 'loss/train': 1.038160353899002} +01/26/2022 20:09:55 - INFO - codeparrot_training - Step 334: {'lr': 8.350000000000001e-05, 'samples': 64320, 'steps': 334, 'loss/train': 2.0789029598236084} +01/26/2022 20:09:58 - INFO - codeparrot_training - Step 335: {'lr': 8.375e-05, 'samples': 64512, 'steps': 335, 'loss/train': 1.0992600917816162} +01/26/2022 20:10:01 - INFO - codeparrot_training - Step 336: {'lr': 8.400000000000001e-05, 'samples': 64704, 'steps': 336, 'loss/train': 0.7307141125202179} +01/26/2022 20:10:04 - INFO - codeparrot_training - Step 337: {'lr': 8.425e-05, 'samples': 64896, 'steps': 337, 'loss/train': 0.755567729473114} +01/26/2022 20:10:07 - INFO - codeparrot_training - Step 338: {'lr': 8.450000000000001e-05, 'samples': 65088, 'steps': 338, 'loss/train': 1.1346741020679474} +01/26/2022 20:10:10 - INFO - codeparrot_training - Step 339: {'lr': 8.475000000000001e-05, 'samples': 65280, 'steps': 339, 'loss/train': 1.054144710302353} +01/26/2022 20:10:13 - INFO - codeparrot_training - Step 340: {'lr': 8.5e-05, 'samples': 65472, 'steps': 340, 'loss/train': 0.7578913271427155} +01/26/2022 20:10:17 - INFO - codeparrot_training - Step 341: {'lr': 8.525000000000001e-05, 'samples': 65664, 'steps': 341, 'loss/train': 0.9189346432685852} +01/26/2022 20:10:20 - INFO - codeparrot_training - Step 342: {'lr': 8.55e-05, 'samples': 65856, 'steps': 342, 'loss/train': 0.7414054423570633} +01/26/2022 20:10:27 - INFO - codeparrot_training - Step 343: {'lr': 8.575000000000001e-05, 'samples': 66048, 'steps': 343, 'loss/train': 0.747487485408783} +01/26/2022 20:10:30 - INFO - codeparrot_training - Step 344: {'lr': 8.599999999999999e-05, 'samples': 66240, 'steps': 344, 'loss/train': 0.6916420161724091} +01/26/2022 20:10:33 - INFO - codeparrot_training - Step 345: {'lr': 8.625e-05, 'samples': 66432, 'steps': 345, 'loss/train': 1.0344393253326416} +01/26/2022 20:10:37 - INFO - codeparrot_training - Step 346: {'lr': 8.65e-05, 'samples': 66624, 'steps': 346, 'loss/train': 2.085118353366852} +01/26/2022 20:10:40 - INFO - codeparrot_training - Step 347: {'lr': 8.675e-05, 'samples': 66816, 'steps': 347, 'loss/train': 1.888598084449768} +01/26/2022 20:10:43 - INFO - codeparrot_training - Step 348: {'lr': 8.7e-05, 'samples': 67008, 'steps': 348, 'loss/train': 0.7010593861341476} +01/26/2022 20:10:46 - INFO - codeparrot_training - Step 349: {'lr': 8.724999999999999e-05, 'samples': 67200, 'steps': 349, 'loss/train': 0.7900976836681366} +01/26/2022 20:10:49 - INFO - codeparrot_training - Step 350: {'lr': 8.75e-05, 'samples': 67392, 'steps': 350, 'loss/train': 1.0326433181762695} +01/26/2022 20:10:52 - INFO - codeparrot_training - Step 351: {'lr': 8.774999999999999e-05, 'samples': 67584, 'steps': 351, 'loss/train': 0.8805137872695923} +01/26/2022 20:10:57 - INFO - codeparrot_training - Step 352: {'lr': 8.8e-05, 'samples': 67776, 'steps': 352, 'loss/train': 0.9114089012145996} +01/26/2022 20:11:00 - INFO - codeparrot_training - Step 353: {'lr': 8.824999999999999e-05, 'samples': 67968, 'steps': 353, 'loss/train': 1.4836265444755554} +01/26/2022 20:11:03 - INFO - codeparrot_training - Step 354: {'lr': 8.85e-05, 'samples': 68160, 'steps': 354, 'loss/train': 0.7094464302062988} +01/26/2022 20:11:06 - INFO - codeparrot_training - Step 355: {'lr': 8.875e-05, 'samples': 68352, 'steps': 355, 'loss/train': 1.0316323935985565} +01/26/2022 20:11:09 - INFO - codeparrot_training - Step 356: {'lr': 8.9e-05, 'samples': 68544, 'steps': 356, 'loss/train': 0.8668911159038544} +01/26/2022 20:11:12 - INFO - codeparrot_training - Step 357: {'lr': 8.925e-05, 'samples': 68736, 'steps': 357, 'loss/train': 1.010850191116333} +01/26/2022 20:11:16 - INFO - codeparrot_training - Step 358: {'lr': 8.95e-05, 'samples': 68928, 'steps': 358, 'loss/train': 0.8607926666736603} +01/26/2022 20:11:19 - INFO - codeparrot_training - Step 359: {'lr': 8.975e-05, 'samples': 69120, 'steps': 359, 'loss/train': 0.9607502818107605} +01/26/2022 20:11:22 - INFO - codeparrot_training - Step 360: {'lr': 8.999999999999999e-05, 'samples': 69312, 'steps': 360, 'loss/train': 1.2990716993808746} +01/26/2022 20:11:27 - INFO - codeparrot_training - Step 361: {'lr': 9.025e-05, 'samples': 69504, 'steps': 361, 'loss/train': 0.7665486931800842} +01/26/2022 20:11:30 - INFO - codeparrot_training - Step 362: {'lr': 9.05e-05, 'samples': 69696, 'steps': 362, 'loss/train': 0.6945787221193314} +01/26/2022 20:11:33 - INFO - codeparrot_training - Step 363: {'lr': 9.075e-05, 'samples': 69888, 'steps': 363, 'loss/train': 1.2044677734375} +01/26/2022 20:11:36 - INFO - codeparrot_training - Step 364: {'lr': 9.1e-05, 'samples': 70080, 'steps': 364, 'loss/train': 1.199683964252472} +01/26/2022 20:11:39 - INFO - codeparrot_training - Step 365: {'lr': 9.125e-05, 'samples': 70272, 'steps': 365, 'loss/train': 0.3417014926671982} +01/26/2022 20:11:43 - INFO - codeparrot_training - Step 366: {'lr': 9.15e-05, 'samples': 70464, 'steps': 366, 'loss/train': 0.9123673439025879} +01/26/2022 20:11:46 - INFO - codeparrot_training - Step 367: {'lr': 9.175e-05, 'samples': 70656, 'steps': 367, 'loss/train': 1.1374095976352692} +01/26/2022 20:11:49 - INFO - codeparrot_training - Step 368: {'lr': 9.2e-05, 'samples': 70848, 'steps': 368, 'loss/train': 0.871574878692627} +01/26/2022 20:11:52 - INFO - codeparrot_training - Step 369: {'lr': 9.225e-05, 'samples': 71040, 'steps': 369, 'loss/train': 0.8438461124897003} +01/26/2022 20:11:56 - INFO - codeparrot_training - Step 370: {'lr': 9.25e-05, 'samples': 71232, 'steps': 370, 'loss/train': 1.0463526248931885} +01/26/2022 20:11:59 - INFO - codeparrot_training - Step 371: {'lr': 9.275e-05, 'samples': 71424, 'steps': 371, 'loss/train': 0.8958008587360382} +01/26/2022 20:12:03 - INFO - codeparrot_training - Step 372: {'lr': 9.3e-05, 'samples': 71616, 'steps': 372, 'loss/train': 1.0595494508743286} +01/26/2022 20:12:06 - INFO - codeparrot_training - Step 373: {'lr': 9.325e-05, 'samples': 71808, 'steps': 373, 'loss/train': 0.9859458804130554} +01/26/2022 20:12:09 - INFO - codeparrot_training - Step 374: {'lr': 9.35e-05, 'samples': 72000, 'steps': 374, 'loss/train': 0.7631303071975708} +01/26/2022 20:12:12 - INFO - codeparrot_training - Step 375: {'lr': 9.375e-05, 'samples': 72192, 'steps': 375, 'loss/train': 1.4473093450069427} +01/26/2022 20:12:15 - INFO - codeparrot_training - Step 376: {'lr': 9.400000000000001e-05, 'samples': 72384, 'steps': 376, 'loss/train': 0.7394805550575256} +01/26/2022 20:12:18 - INFO - codeparrot_training - Step 377: {'lr': 9.425e-05, 'samples': 72576, 'steps': 377, 'loss/train': 0.8891026675701141} +01/26/2022 20:12:21 - INFO - codeparrot_training - Step 378: {'lr': 9.45e-05, 'samples': 72768, 'steps': 378, 'loss/train': 0.9185190796852112} +01/26/2022 20:12:26 - INFO - codeparrot_training - Step 379: {'lr': 9.475e-05, 'samples': 72960, 'steps': 379, 'loss/train': 0.6343764960765839} +01/26/2022 20:12:29 - INFO - codeparrot_training - Step 380: {'lr': 9.5e-05, 'samples': 73152, 'steps': 380, 'loss/train': 0.8839260935783386} +01/26/2022 20:12:32 - INFO - codeparrot_training - Step 381: {'lr': 9.525e-05, 'samples': 73344, 'steps': 381, 'loss/train': 0.8907753825187683} +01/26/2022 20:12:35 - INFO - codeparrot_training - Step 382: {'lr': 9.55e-05, 'samples': 73536, 'steps': 382, 'loss/train': 1.2589702606201172} +01/26/2022 20:12:38 - INFO - codeparrot_training - Step 383: {'lr': 9.575000000000001e-05, 'samples': 73728, 'steps': 383, 'loss/train': 1.2859916388988495} +01/26/2022 20:12:41 - INFO - codeparrot_training - Step 384: {'lr': 9.6e-05, 'samples': 73920, 'steps': 384, 'loss/train': 0.6472349613904953} +01/26/2022 20:12:45 - INFO - codeparrot_training - Step 385: {'lr': 9.625000000000001e-05, 'samples': 74112, 'steps': 385, 'loss/train': 1.01359024643898} +01/26/2022 20:12:48 - INFO - codeparrot_training - Step 386: {'lr': 9.65e-05, 'samples': 74304, 'steps': 386, 'loss/train': 0.8292834162712097} +01/26/2022 20:12:51 - INFO - codeparrot_training - Step 387: {'lr': 9.675000000000001e-05, 'samples': 74496, 'steps': 387, 'loss/train': 1.038096696138382} +01/26/2022 20:12:56 - INFO - codeparrot_training - Step 388: {'lr': 9.7e-05, 'samples': 74688, 'steps': 388, 'loss/train': 1.0593030452728271} +01/26/2022 20:12:59 - INFO - codeparrot_training - Step 389: {'lr': 9.725e-05, 'samples': 74880, 'steps': 389, 'loss/train': 0.3904605209827423} +01/26/2022 20:13:02 - INFO - codeparrot_training - Step 390: {'lr': 9.750000000000001e-05, 'samples': 75072, 'steps': 390, 'loss/train': 1.214891105890274} +01/26/2022 20:13:06 - INFO - codeparrot_training - Step 391: {'lr': 9.775e-05, 'samples': 75264, 'steps': 391, 'loss/train': 1.1453385651111603} +01/26/2022 20:13:09 - INFO - codeparrot_training - Step 392: {'lr': 9.800000000000001e-05, 'samples': 75456, 'steps': 392, 'loss/train': 0.9596201777458191} +01/26/2022 20:13:12 - INFO - codeparrot_training - Step 393: {'lr': 9.825e-05, 'samples': 75648, 'steps': 393, 'loss/train': 1.13962984085083} +01/26/2022 20:13:15 - INFO - codeparrot_training - Step 394: {'lr': 9.850000000000001e-05, 'samples': 75840, 'steps': 394, 'loss/train': 0.9503010213375092} +01/26/2022 20:13:18 - INFO - codeparrot_training - Step 395: {'lr': 9.875e-05, 'samples': 76032, 'steps': 395, 'loss/train': 0.7186999171972275} +01/26/2022 20:13:21 - INFO - codeparrot_training - Step 396: {'lr': 9.900000000000001e-05, 'samples': 76224, 'steps': 396, 'loss/train': 0.5636555403470993} +01/26/2022 20:13:26 - INFO - codeparrot_training - Step 397: {'lr': 9.925000000000001e-05, 'samples': 76416, 'steps': 397, 'loss/train': 1.0697872638702393} +01/26/2022 20:13:29 - INFO - codeparrot_training - Step 398: {'lr': 9.95e-05, 'samples': 76608, 'steps': 398, 'loss/train': 0.7212703675031662} +01/26/2022 20:13:32 - INFO - codeparrot_training - Step 399: {'lr': 9.975000000000001e-05, 'samples': 76800, 'steps': 399, 'loss/train': 0.9841853678226471} +01/26/2022 20:13:35 - INFO - codeparrot_training - Step 400: {'lr': 0.0001, 'samples': 76992, 'steps': 400, 'loss/train': 1.08234241604805} +01/26/2022 20:13:38 - INFO - codeparrot_training - Step 401: {'lr': 0.00010025000000000001, 'samples': 77184, 'steps': 401, 'loss/train': 1.5624375343322754} +01/26/2022 20:13:41 - INFO - codeparrot_training - Step 402: {'lr': 0.0001005, 'samples': 77376, 'steps': 402, 'loss/train': 1.475949615240097} +01/26/2022 20:13:44 - INFO - codeparrot_training - Step 403: {'lr': 0.00010075000000000001, 'samples': 77568, 'steps': 403, 'loss/train': 1.1508514881134033} +01/26/2022 20:13:48 - INFO - codeparrot_training - Step 404: {'lr': 0.000101, 'samples': 77760, 'steps': 404, 'loss/train': 0.8824456036090851} +01/26/2022 20:13:53 - INFO - codeparrot_training - Step 405: {'lr': 0.00010125000000000001, 'samples': 77952, 'steps': 405, 'loss/train': 1.1006841659545898} +01/26/2022 20:13:56 - INFO - codeparrot_training - Step 406: {'lr': 0.00010150000000000001, 'samples': 78144, 'steps': 406, 'loss/train': 0.8000113070011139} +01/26/2022 20:13:59 - INFO - codeparrot_training - Step 407: {'lr': 0.00010174999999999999, 'samples': 78336, 'steps': 407, 'loss/train': 1.2521542310714722} +01/26/2022 20:14:02 - INFO - codeparrot_training - Step 408: {'lr': 0.000102, 'samples': 78528, 'steps': 408, 'loss/train': 0.6710378676652908} +01/26/2022 20:14:05 - INFO - codeparrot_training - Step 409: {'lr': 0.00010224999999999999, 'samples': 78720, 'steps': 409, 'loss/train': 0.7425864636898041} +01/26/2022 20:14:08 - INFO - codeparrot_training - Step 410: {'lr': 0.0001025, 'samples': 78912, 'steps': 410, 'loss/train': 0.8330551385879517} +01/26/2022 20:14:11 - INFO - codeparrot_training - Step 411: {'lr': 0.00010274999999999999, 'samples': 79104, 'steps': 411, 'loss/train': 1.99857759475708} +01/26/2022 20:14:15 - INFO - codeparrot_training - Step 412: {'lr': 0.000103, 'samples': 79296, 'steps': 412, 'loss/train': 0.495266318321228} +01/26/2022 20:14:18 - INFO - codeparrot_training - Step 413: {'lr': 0.00010325, 'samples': 79488, 'steps': 413, 'loss/train': 0.6426213383674622} +01/26/2022 20:14:22 - INFO - codeparrot_training - Step 414: {'lr': 0.0001035, 'samples': 79680, 'steps': 414, 'loss/train': 0.8009860217571259} +01/26/2022 20:14:25 - INFO - codeparrot_training - Step 415: {'lr': 0.00010375, 'samples': 79872, 'steps': 415, 'loss/train': 0.8425185978412628} +01/26/2022 20:14:28 - INFO - codeparrot_training - Step 416: {'lr': 0.000104, 'samples': 80064, 'steps': 416, 'loss/train': 1.3310015201568604} +01/26/2022 20:14:31 - INFO - codeparrot_training - Step 417: {'lr': 0.00010425, 'samples': 80256, 'steps': 417, 'loss/train': 1.3193172812461853} +01/26/2022 20:14:35 - INFO - codeparrot_training - Step 418: {'lr': 0.00010449999999999999, 'samples': 80448, 'steps': 418, 'loss/train': 0.6111249625682831} +01/26/2022 20:14:38 - INFO - codeparrot_training - Step 419: {'lr': 0.00010475, 'samples': 80640, 'steps': 419, 'loss/train': 1.204498440027237} +01/26/2022 20:14:41 - INFO - codeparrot_training - Step 420: {'lr': 0.000105, 'samples': 80832, 'steps': 420, 'loss/train': 1.018784075975418} +01/26/2022 20:14:44 - INFO - codeparrot_training - Step 421: {'lr': 0.00010525, 'samples': 81024, 'steps': 421, 'loss/train': 1.0076421797275543} +01/26/2022 20:14:47 - INFO - codeparrot_training - Step 422: {'lr': 0.0001055, 'samples': 81216, 'steps': 422, 'loss/train': 0.953014612197876} +01/26/2022 20:14:52 - INFO - codeparrot_training - Step 423: {'lr': 0.00010575, 'samples': 81408, 'steps': 423, 'loss/train': 1.0320123732089996} +01/26/2022 20:14:55 - INFO - codeparrot_training - Step 424: {'lr': 0.000106, 'samples': 81600, 'steps': 424, 'loss/train': 1.5705456733703613} +01/26/2022 20:14:58 - INFO - codeparrot_training - Step 425: {'lr': 0.00010625, 'samples': 81792, 'steps': 425, 'loss/train': 0.3477340489625931} +01/26/2022 20:15:01 - INFO - codeparrot_training - Step 426: {'lr': 0.0001065, 'samples': 81984, 'steps': 426, 'loss/train': 1.2109199166297913} +01/26/2022 20:15:04 - INFO - codeparrot_training - Step 427: {'lr': 0.00010675, 'samples': 82176, 'steps': 427, 'loss/train': 0.9677959978580475} +01/26/2022 20:15:07 - INFO - codeparrot_training - Step 428: {'lr': 0.000107, 'samples': 82368, 'steps': 428, 'loss/train': 0.8218532502651215} +01/26/2022 20:15:10 - INFO - codeparrot_training - Step 429: {'lr': 0.00010725, 'samples': 82560, 'steps': 429, 'loss/train': 0.8969594836235046} +01/26/2022 20:15:14 - INFO - codeparrot_training - Step 430: {'lr': 0.0001075, 'samples': 82752, 'steps': 430, 'loss/train': 1.0345425009727478} +01/26/2022 20:15:17 - INFO - codeparrot_training - Step 431: {'lr': 0.00010775, 'samples': 82944, 'steps': 431, 'loss/train': 0.6871862411499023} +01/26/2022 20:15:21 - INFO - codeparrot_training - Step 432: {'lr': 0.000108, 'samples': 83136, 'steps': 432, 'loss/train': 1.0284851789474487} +01/26/2022 20:15:24 - INFO - codeparrot_training - Step 433: {'lr': 0.00010825, 'samples': 83328, 'steps': 433, 'loss/train': 0.4605419933795929} +01/26/2022 20:15:27 - INFO - codeparrot_training - Step 434: {'lr': 0.00010850000000000001, 'samples': 83520, 'steps': 434, 'loss/train': 0.8205590844154358} +01/26/2022 20:15:31 - INFO - codeparrot_training - Step 435: {'lr': 0.00010875, 'samples': 83712, 'steps': 435, 'loss/train': 0.9477976262569427} +01/26/2022 20:15:34 - INFO - codeparrot_training - Step 436: {'lr': 0.000109, 'samples': 83904, 'steps': 436, 'loss/train': 0.6931536644697189} +01/26/2022 20:15:37 - INFO - codeparrot_training - Step 437: {'lr': 0.00010925, 'samples': 84096, 'steps': 437, 'loss/train': 1.0598368048667908} +01/26/2022 20:15:40 - INFO - codeparrot_training - Step 438: {'lr': 0.0001095, 'samples': 84288, 'steps': 438, 'loss/train': 0.6807989180088043} +01/26/2022 20:15:43 - INFO - codeparrot_training - Step 439: {'lr': 0.00010975, 'samples': 84480, 'steps': 439, 'loss/train': 0.5363283008337021} +01/26/2022 20:15:49 - INFO - codeparrot_training - Step 440: {'lr': 0.00011, 'samples': 84672, 'steps': 440, 'loss/train': 1.1454049050807953} +01/26/2022 20:15:52 - INFO - codeparrot_training - Step 441: {'lr': 0.00011025, 'samples': 84864, 'steps': 441, 'loss/train': 0.7169860750436783} +01/26/2022 20:15:55 - INFO - codeparrot_training - Step 442: {'lr': 0.0001105, 'samples': 85056, 'steps': 442, 'loss/train': 0.7018677145242691} +01/26/2022 20:15:58 - INFO - codeparrot_training - Step 443: {'lr': 0.00011075000000000001, 'samples': 85248, 'steps': 443, 'loss/train': 1.0023103952407837} +01/26/2022 20:16:02 - INFO - codeparrot_training - Step 444: {'lr': 0.000111, 'samples': 85440, 'steps': 444, 'loss/train': 0.7472628504037857} +01/26/2022 20:16:05 - INFO - codeparrot_training - Step 445: {'lr': 0.00011125000000000001, 'samples': 85632, 'steps': 445, 'loss/train': 1.162532240152359} +01/26/2022 20:16:08 - INFO - codeparrot_training - Step 446: {'lr': 0.0001115, 'samples': 85824, 'steps': 446, 'loss/train': 1.272094488143921} +01/26/2022 20:16:11 - INFO - codeparrot_training - Step 447: {'lr': 0.00011175, 'samples': 86016, 'steps': 447, 'loss/train': 0.9917010962963104} +01/26/2022 20:16:14 - INFO - codeparrot_training - Step 448: {'lr': 0.000112, 'samples': 86208, 'steps': 448, 'loss/train': 0.7656463086605072} +01/26/2022 20:16:17 - INFO - codeparrot_training - Step 449: {'lr': 0.00011225, 'samples': 86400, 'steps': 449, 'loss/train': 0.4950437843799591} +01/26/2022 20:16:22 - INFO - codeparrot_training - Step 450: {'lr': 0.00011250000000000001, 'samples': 86592, 'steps': 450, 'loss/train': 0.9192585647106171} +01/26/2022 20:16:25 - INFO - codeparrot_training - Step 451: {'lr': 0.00011275, 'samples': 86784, 'steps': 451, 'loss/train': 0.6744060963392258} +01/26/2022 20:16:28 - INFO - codeparrot_training - Step 452: {'lr': 0.00011300000000000001, 'samples': 86976, 'steps': 452, 'loss/train': 0.7491473704576492} +01/26/2022 20:16:31 - INFO - codeparrot_training - Step 453: {'lr': 0.00011325, 'samples': 87168, 'steps': 453, 'loss/train': 0.7701495587825775} +01/26/2022 20:16:34 - INFO - codeparrot_training - Step 454: {'lr': 0.00011350000000000001, 'samples': 87360, 'steps': 454, 'loss/train': 0.6129368394613266} +01/26/2022 20:16:37 - INFO - codeparrot_training - Step 455: {'lr': 0.00011375, 'samples': 87552, 'steps': 455, 'loss/train': 1.2186667621135712} +01/26/2022 20:16:40 - INFO - codeparrot_training - Step 456: {'lr': 0.000114, 'samples': 87744, 'steps': 456, 'loss/train': 0.970245748758316} +01/26/2022 20:16:44 - INFO - codeparrot_training - Step 457: {'lr': 0.00011425000000000001, 'samples': 87936, 'steps': 457, 'loss/train': 1.1947226822376251} +01/26/2022 20:16:48 - INFO - codeparrot_training - Step 458: {'lr': 0.0001145, 'samples': 88128, 'steps': 458, 'loss/train': 1.3567461669445038} +01/26/2022 20:16:51 - INFO - codeparrot_training - Step 459: {'lr': 0.00011475000000000001, 'samples': 88320, 'steps': 459, 'loss/train': 0.6487510353326797} +01/26/2022 20:16:54 - INFO - codeparrot_training - Step 460: {'lr': 0.000115, 'samples': 88512, 'steps': 460, 'loss/train': 0.8757333755493164} +01/26/2022 20:16:58 - INFO - codeparrot_training - Step 461: {'lr': 0.00011525000000000001, 'samples': 88704, 'steps': 461, 'loss/train': 1.011967420578003} +01/26/2022 20:17:01 - INFO - codeparrot_training - Step 462: {'lr': 0.0001155, 'samples': 88896, 'steps': 462, 'loss/train': 1.5802865624427795} +01/26/2022 20:17:04 - INFO - codeparrot_training - Step 463: {'lr': 0.00011575000000000001, 'samples': 89088, 'steps': 463, 'loss/train': 0.8010456562042236} +01/26/2022 20:17:07 - INFO - codeparrot_training - Step 464: {'lr': 0.00011600000000000001, 'samples': 89280, 'steps': 464, 'loss/train': 0.8695641160011292} +01/26/2022 20:17:10 - INFO - codeparrot_training - Step 465: {'lr': 0.00011625, 'samples': 89472, 'steps': 465, 'loss/train': 0.27032042294740677} +01/26/2022 20:17:13 - INFO - codeparrot_training - Step 466: {'lr': 0.00011650000000000001, 'samples': 89664, 'steps': 466, 'loss/train': 0.7990081608295441} +01/26/2022 20:17:19 - INFO - codeparrot_training - Step 467: {'lr': 0.00011675, 'samples': 89856, 'steps': 467, 'loss/train': 0.9661056697368622} +01/26/2022 20:17:22 - INFO - codeparrot_training - Step 468: {'lr': 0.00011700000000000001, 'samples': 90048, 'steps': 468, 'loss/train': 1.120609313249588} +01/26/2022 20:17:25 - INFO - codeparrot_training - Step 469: {'lr': 0.00011724999999999999, 'samples': 90240, 'steps': 469, 'loss/train': 0.47997334599494934} +01/26/2022 20:17:28 - INFO - codeparrot_training - Step 470: {'lr': 0.0001175, 'samples': 90432, 'steps': 470, 'loss/train': 0.836497038602829} +01/26/2022 20:17:31 - INFO - codeparrot_training - Step 471: {'lr': 0.00011775, 'samples': 90624, 'steps': 471, 'loss/train': 0.8853902220726013} +01/26/2022 20:17:34 - INFO - codeparrot_training - Step 472: {'lr': 0.000118, 'samples': 90816, 'steps': 472, 'loss/train': 0.6629137247800827} +01/26/2022 20:17:37 - INFO - codeparrot_training - Step 473: {'lr': 0.00011825, 'samples': 91008, 'steps': 473, 'loss/train': 1.1359244585037231} +01/26/2022 20:17:41 - INFO - codeparrot_training - Step 474: {'lr': 0.0001185, 'samples': 91200, 'steps': 474, 'loss/train': 0.8318636119365692} +01/26/2022 20:17:44 - INFO - codeparrot_training - Step 475: {'lr': 0.00011875, 'samples': 91392, 'steps': 475, 'loss/train': 0.49809399247169495} +01/26/2022 20:17:48 - INFO - codeparrot_training - Step 476: {'lr': 0.00011899999999999999, 'samples': 91584, 'steps': 476, 'loss/train': 1.2043863236904144} +01/26/2022 20:17:51 - INFO - codeparrot_training - Step 477: {'lr': 0.00011925, 'samples': 91776, 'steps': 477, 'loss/train': 0.8016407489776611} +01/26/2022 20:17:55 - INFO - codeparrot_training - Step 478: {'lr': 0.00011949999999999999, 'samples': 91968, 'steps': 478, 'loss/train': 0.9700524508953094} +01/26/2022 20:17:58 - INFO - codeparrot_training - Step 479: {'lr': 0.00011975, 'samples': 92160, 'steps': 479, 'loss/train': 0.6533531248569489} +01/26/2022 20:18:01 - INFO - codeparrot_training - Step 480: {'lr': 0.00012, 'samples': 92352, 'steps': 480, 'loss/train': 1.0995571911334991} +01/26/2022 20:18:04 - INFO - codeparrot_training - Step 481: {'lr': 0.00012025, 'samples': 92544, 'steps': 481, 'loss/train': 0.46675287187099457} +01/26/2022 20:18:07 - INFO - codeparrot_training - Step 482: {'lr': 0.0001205, 'samples': 92736, 'steps': 482, 'loss/train': 1.4225375354290009} +01/26/2022 20:18:10 - INFO - codeparrot_training - Step 483: {'lr': 0.00012075, 'samples': 92928, 'steps': 483, 'loss/train': 0.6436797380447388} +01/26/2022 20:18:13 - INFO - codeparrot_training - Step 484: {'lr': 0.000121, 'samples': 93120, 'steps': 484, 'loss/train': 0.9993196427822113} +01/26/2022 20:18:20 - INFO - codeparrot_training - Step 485: {'lr': 0.00012124999999999999, 'samples': 93312, 'steps': 485, 'loss/train': 0.7051179707050323} +01/26/2022 20:18:23 - INFO - codeparrot_training - Step 486: {'lr': 0.0001215, 'samples': 93504, 'steps': 486, 'loss/train': 0.8310778141021729} +01/26/2022 20:18:26 - INFO - codeparrot_training - Step 487: {'lr': 0.00012175, 'samples': 93696, 'steps': 487, 'loss/train': 0.7130374610424042} +01/26/2022 20:18:29 - INFO - codeparrot_training - Step 488: {'lr': 0.000122, 'samples': 93888, 'steps': 488, 'loss/train': 0.6525427848100662} +01/26/2022 20:18:32 - INFO - codeparrot_training - Step 489: {'lr': 0.00012225, 'samples': 94080, 'steps': 489, 'loss/train': 1.263034701347351} +01/26/2022 20:18:35 - INFO - codeparrot_training - Step 490: {'lr': 0.0001225, 'samples': 94272, 'steps': 490, 'loss/train': 1.2272007465362549} +01/26/2022 20:18:38 - INFO - codeparrot_training - Step 491: {'lr': 0.00012275, 'samples': 94464, 'steps': 491, 'loss/train': 0.3253694325685501} +01/26/2022 20:18:42 - INFO - codeparrot_training - Step 492: {'lr': 0.000123, 'samples': 94656, 'steps': 492, 'loss/train': 1.7392045855522156} +01/26/2022 20:18:45 - INFO - codeparrot_training - Step 493: {'lr': 0.00012325000000000001, 'samples': 94848, 'steps': 493, 'loss/train': 1.0487505197525024} +01/26/2022 20:18:49 - INFO - codeparrot_training - Step 494: {'lr': 0.0001235, 'samples': 95040, 'steps': 494, 'loss/train': 0.8816839456558228} +01/26/2022 20:18:53 - INFO - codeparrot_training - Step 495: {'lr': 0.00012375, 'samples': 95232, 'steps': 495, 'loss/train': 0.7144645750522614} +01/26/2022 20:18:56 - INFO - codeparrot_training - Step 496: {'lr': 0.000124, 'samples': 95424, 'steps': 496, 'loss/train': 0.7718562483787537} +01/26/2022 20:18:59 - INFO - codeparrot_training - Step 497: {'lr': 0.00012425, 'samples': 95616, 'steps': 497, 'loss/train': 1.0395937263965607} +01/26/2022 20:19:02 - INFO - codeparrot_training - Step 498: {'lr': 0.0001245, 'samples': 95808, 'steps': 498, 'loss/train': 0.9165495336055756} +01/26/2022 20:19:05 - INFO - codeparrot_training - Step 499: {'lr': 0.00012475, 'samples': 96000, 'steps': 499, 'loss/train': 0.5416628569364548} +01/26/2022 20:19:08 - INFO - codeparrot_training - Step 500: {'lr': 0.000125, 'samples': 96192, 'steps': 500, 'loss/train': 0.672015443444252} +01/26/2022 20:19:12 - INFO - codeparrot_training - Step 501: {'lr': 0.00012525, 'samples': 96384, 'steps': 501, 'loss/train': 1.9715461134910583} +01/26/2022 20:19:15 - INFO - codeparrot_training - Step 502: {'lr': 0.00012550000000000001, 'samples': 96576, 'steps': 502, 'loss/train': 0.8303739130496979} +01/26/2022 20:19:19 - INFO - codeparrot_training - Step 503: {'lr': 0.00012575, 'samples': 96768, 'steps': 503, 'loss/train': 1.1383517682552338} +01/26/2022 20:19:22 - INFO - codeparrot_training - Step 504: {'lr': 0.000126, 'samples': 96960, 'steps': 504, 'loss/train': 0.512078046798706} +01/26/2022 20:19:25 - INFO - codeparrot_training - Step 505: {'lr': 0.00012625, 'samples': 97152, 'steps': 505, 'loss/train': 0.5505821406841278} +01/26/2022 20:19:29 - INFO - codeparrot_training - Step 506: {'lr': 0.0001265, 'samples': 97344, 'steps': 506, 'loss/train': 0.6685009449720383} +01/26/2022 20:19:32 - INFO - codeparrot_training - Step 507: {'lr': 0.00012675, 'samples': 97536, 'steps': 507, 'loss/train': 0.5938424617052078} +01/26/2022 20:19:35 - INFO - codeparrot_training - Step 508: {'lr': 0.000127, 'samples': 97728, 'steps': 508, 'loss/train': 0.8808158934116364} +01/26/2022 20:19:38 - INFO - codeparrot_training - Step 509: {'lr': 0.00012725, 'samples': 97920, 'steps': 509, 'loss/train': 0.7746322453022003} +01/26/2022 20:19:41 - INFO - codeparrot_training - Step 510: {'lr': 0.0001275, 'samples': 98112, 'steps': 510, 'loss/train': 1.0879081785678864} +01/26/2022 20:19:44 - INFO - codeparrot_training - Step 511: {'lr': 0.00012775000000000002, 'samples': 98304, 'steps': 511, 'loss/train': 0.7731634676456451} +01/26/2022 20:19:49 - INFO - codeparrot_training - Step 512: {'lr': 0.000128, 'samples': 98496, 'steps': 512, 'loss/train': 0.6284303516149521} +01/26/2022 20:19:52 - INFO - codeparrot_training - Step 513: {'lr': 0.00012825, 'samples': 98688, 'steps': 513, 'loss/train': 0.27421581745147705} +01/26/2022 20:19:55 - INFO - codeparrot_training - Step 514: {'lr': 0.0001285, 'samples': 98880, 'steps': 514, 'loss/train': 0.9253816902637482} +01/26/2022 20:19:58 - INFO - codeparrot_training - Step 515: {'lr': 0.00012875, 'samples': 99072, 'steps': 515, 'loss/train': 1.2660154402256012} +01/26/2022 20:20:02 - INFO - codeparrot_training - Step 516: {'lr': 0.00012900000000000002, 'samples': 99264, 'steps': 516, 'loss/train': 0.8392927944660187} +01/26/2022 20:20:05 - INFO - codeparrot_training - Step 517: {'lr': 0.00012925, 'samples': 99456, 'steps': 517, 'loss/train': 1.019319087266922} +01/26/2022 20:20:08 - INFO - codeparrot_training - Step 518: {'lr': 0.0001295, 'samples': 99648, 'steps': 518, 'loss/train': 1.2289848625659943} +01/26/2022 20:20:11 - INFO - codeparrot_training - Step 519: {'lr': 0.00012975, 'samples': 99840, 'steps': 519, 'loss/train': 0.9302180707454681} +01/26/2022 20:20:14 - INFO - codeparrot_training - Step 520: {'lr': 0.00013000000000000002, 'samples': 100032, 'steps': 520, 'loss/train': 0.8552613258361816} +01/26/2022 20:20:20 - INFO - codeparrot_training - Step 521: {'lr': 0.00013025, 'samples': 100224, 'steps': 521, 'loss/train': 1.01572984457016} +01/26/2022 20:20:23 - INFO - codeparrot_training - Step 522: {'lr': 0.0001305, 'samples': 100416, 'steps': 522, 'loss/train': 0.7750627398490906} +01/26/2022 20:20:26 - INFO - codeparrot_training - Step 523: {'lr': 0.00013075, 'samples': 100608, 'steps': 523, 'loss/train': 1.4055988490581512} +01/26/2022 20:20:29 - INFO - codeparrot_training - Step 524: {'lr': 0.000131, 'samples': 100800, 'steps': 524, 'loss/train': 0.6941443383693695} +01/26/2022 20:20:32 - INFO - codeparrot_training - Step 525: {'lr': 0.00013125000000000002, 'samples': 100992, 'steps': 525, 'loss/train': 2.018996000289917} +01/26/2022 20:20:35 - INFO - codeparrot_training - Step 526: {'lr': 0.0001315, 'samples': 101184, 'steps': 526, 'loss/train': 1.4397898614406586} +01/26/2022 20:20:39 - INFO - codeparrot_training - Step 527: {'lr': 0.00013175, 'samples': 101376, 'steps': 527, 'loss/train': 0.9549647569656372} +01/26/2022 20:20:42 - INFO - codeparrot_training - Step 528: {'lr': 0.000132, 'samples': 101568, 'steps': 528, 'loss/train': 1.12039715051651} +01/26/2022 20:20:45 - INFO - codeparrot_training - Step 529: {'lr': 0.00013225000000000002, 'samples': 101760, 'steps': 529, 'loss/train': 0.8304433822631836} +01/26/2022 20:20:49 - INFO - codeparrot_training - Step 530: {'lr': 0.00013250000000000002, 'samples': 101952, 'steps': 530, 'loss/train': 1.0192952156066895} +01/26/2022 20:20:52 - INFO - codeparrot_training - Step 531: {'lr': 0.00013275, 'samples': 102144, 'steps': 531, 'loss/train': 1.0101014971733093} +01/26/2022 20:20:55 - INFO - codeparrot_training - Step 532: {'lr': 0.000133, 'samples': 102336, 'steps': 532, 'loss/train': 1.182517558336258} +01/26/2022 20:20:59 - INFO - codeparrot_training - Step 533: {'lr': 0.00013325, 'samples': 102528, 'steps': 533, 'loss/train': 0.7047746032476425} +01/26/2022 20:21:02 - INFO - codeparrot_training - Step 534: {'lr': 0.00013350000000000002, 'samples': 102720, 'steps': 534, 'loss/train': 0.9654974341392517} +01/26/2022 20:21:05 - INFO - codeparrot_training - Step 535: {'lr': 0.00013375, 'samples': 102912, 'steps': 535, 'loss/train': 1.4733890891075134} +01/26/2022 20:21:08 - INFO - codeparrot_training - Step 536: {'lr': 0.000134, 'samples': 103104, 'steps': 536, 'loss/train': 1.2371703386306763} +01/26/2022 20:21:11 - INFO - codeparrot_training - Step 537: {'lr': 0.00013425, 'samples': 103296, 'steps': 537, 'loss/train': 1.280784398317337} +01/26/2022 20:21:14 - INFO - codeparrot_training - Step 538: {'lr': 0.00013450000000000002, 'samples': 103488, 'steps': 538, 'loss/train': 1.0170392096042633} +01/26/2022 20:21:19 - INFO - codeparrot_training - Step 539: {'lr': 0.00013475000000000002, 'samples': 103680, 'steps': 539, 'loss/train': 1.1649912893772125} +01/26/2022 20:21:22 - INFO - codeparrot_training - Step 540: {'lr': 0.000135, 'samples': 103872, 'steps': 540, 'loss/train': 0.8566865622997284} +01/26/2022 20:21:25 - INFO - codeparrot_training - Step 541: {'lr': 0.00013525, 'samples': 104064, 'steps': 541, 'loss/train': 0.5552657693624496} +01/26/2022 20:21:28 - INFO - codeparrot_training - Step 542: {'lr': 0.00013550000000000001, 'samples': 104256, 'steps': 542, 'loss/train': 0.796043336391449} +01/26/2022 20:21:31 - INFO - codeparrot_training - Step 543: {'lr': 0.00013575000000000002, 'samples': 104448, 'steps': 543, 'loss/train': 0.7574986517429352} +01/26/2022 20:21:34 - INFO - codeparrot_training - Step 544: {'lr': 0.00013600000000000003, 'samples': 104640, 'steps': 544, 'loss/train': 0.6186468601226807} +01/26/2022 20:21:38 - INFO - codeparrot_training - Step 545: {'lr': 0.00013625, 'samples': 104832, 'steps': 545, 'loss/train': 0.9620187878608704} +01/26/2022 20:21:41 - INFO - codeparrot_training - Step 546: {'lr': 0.0001365, 'samples': 105024, 'steps': 546, 'loss/train': 0.9344134032726288} +01/26/2022 20:21:46 - INFO - codeparrot_training - Step 547: {'lr': 0.00013675000000000002, 'samples': 105216, 'steps': 547, 'loss/train': 0.6428420394659042} +01/26/2022 20:21:49 - INFO - codeparrot_training - Step 548: {'lr': 0.00013700000000000002, 'samples': 105408, 'steps': 548, 'loss/train': 1.1159744560718536} +01/26/2022 20:21:52 - INFO - codeparrot_training - Step 549: {'lr': 0.00013725, 'samples': 105600, 'steps': 549, 'loss/train': 0.5158883482217789} +01/26/2022 20:21:56 - INFO - codeparrot_training - Step 550: {'lr': 0.0001375, 'samples': 105792, 'steps': 550, 'loss/train': 1.0909550786018372} +01/26/2022 20:21:59 - INFO - codeparrot_training - Step 551: {'lr': 0.00013775000000000001, 'samples': 105984, 'steps': 551, 'loss/train': 1.6916638612747192} +01/26/2022 20:22:02 - INFO - codeparrot_training - Step 552: {'lr': 0.00013800000000000002, 'samples': 106176, 'steps': 552, 'loss/train': 0.9012663066387177} +01/26/2022 20:22:05 - INFO - codeparrot_training - Step 553: {'lr': 0.00013825000000000003, 'samples': 106368, 'steps': 553, 'loss/train': 0.7789967358112335} +01/26/2022 20:22:08 - INFO - codeparrot_training - Step 554: {'lr': 0.0001385, 'samples': 106560, 'steps': 554, 'loss/train': 1.0671734511852264} +01/26/2022 20:22:11 - INFO - codeparrot_training - Step 555: {'lr': 0.00013875, 'samples': 106752, 'steps': 555, 'loss/train': 1.0245478749275208} +01/26/2022 20:22:16 - INFO - codeparrot_training - Step 556: {'lr': 0.00013900000000000002, 'samples': 106944, 'steps': 556, 'loss/train': 0.5020046085119247} +01/26/2022 20:22:19 - INFO - codeparrot_training - Step 557: {'lr': 0.00013925000000000002, 'samples': 107136, 'steps': 557, 'loss/train': 1.0817587673664093} +01/26/2022 20:22:22 - INFO - codeparrot_training - Step 558: {'lr': 0.0001395, 'samples': 107328, 'steps': 558, 'loss/train': 0.9696366190910339} +01/26/2022 20:22:25 - INFO - codeparrot_training - Step 559: {'lr': 0.00013975, 'samples': 107520, 'steps': 559, 'loss/train': 1.6461342573165894} +01/26/2022 20:22:28 - INFO - codeparrot_training - Step 560: {'lr': 0.00014000000000000001, 'samples': 107712, 'steps': 560, 'loss/train': 0.7242932170629501} +01/26/2022 20:22:31 - INFO - codeparrot_training - Step 561: {'lr': 0.00014025000000000002, 'samples': 107904, 'steps': 561, 'loss/train': 0.6898515522480011} +01/26/2022 20:22:34 - INFO - codeparrot_training - Step 562: {'lr': 0.00014050000000000003, 'samples': 108096, 'steps': 562, 'loss/train': 1.1182992160320282} +01/26/2022 20:22:38 - INFO - codeparrot_training - Step 563: {'lr': 0.00014074999999999998, 'samples': 108288, 'steps': 563, 'loss/train': 1.2305793464183807} +01/26/2022 20:22:41 - INFO - codeparrot_training - Step 564: {'lr': 0.00014099999999999998, 'samples': 108480, 'steps': 564, 'loss/train': 0.6293094009160995} +01/26/2022 20:22:46 - INFO - codeparrot_training - Step 565: {'lr': 0.00014125, 'samples': 108672, 'steps': 565, 'loss/train': 0.7494006603956223} +01/26/2022 20:22:49 - INFO - codeparrot_training - Step 566: {'lr': 0.0001415, 'samples': 108864, 'steps': 566, 'loss/train': 0.6767587065696716} +01/26/2022 20:22:52 - INFO - codeparrot_training - Step 567: {'lr': 0.00014175, 'samples': 109056, 'steps': 567, 'loss/train': 0.7286884635686874} +01/26/2022 20:22:55 - INFO - codeparrot_training - Step 568: {'lr': 0.00014199999999999998, 'samples': 109248, 'steps': 568, 'loss/train': 1.3158212900161743} +01/26/2022 20:22:58 - INFO - codeparrot_training - Step 569: {'lr': 0.00014225, 'samples': 109440, 'steps': 569, 'loss/train': 1.304736077785492} +01/26/2022 20:23:02 - INFO - codeparrot_training - Step 570: {'lr': 0.0001425, 'samples': 109632, 'steps': 570, 'loss/train': 0.7607653141021729} +01/26/2022 20:23:05 - INFO - codeparrot_training - Step 571: {'lr': 0.00014275, 'samples': 109824, 'steps': 571, 'loss/train': 1.084415227174759} +01/26/2022 20:23:08 - INFO - codeparrot_training - Step 572: {'lr': 0.00014299999999999998, 'samples': 110016, 'steps': 572, 'loss/train': 0.9314425885677338} +01/26/2022 20:23:11 - INFO - codeparrot_training - Step 573: {'lr': 0.00014324999999999999, 'samples': 110208, 'steps': 573, 'loss/train': 0.8982188701629639} +01/26/2022 20:23:15 - INFO - codeparrot_training - Step 574: {'lr': 0.0001435, 'samples': 110400, 'steps': 574, 'loss/train': 1.0459118485450745} +01/26/2022 20:23:18 - INFO - codeparrot_training - Step 575: {'lr': 0.00014375, 'samples': 110592, 'steps': 575, 'loss/train': 1.24342542886734} +01/26/2022 20:23:22 - INFO - codeparrot_training - Step 576: {'lr': 0.000144, 'samples': 110784, 'steps': 576, 'loss/train': 1.0160776376724243} +01/26/2022 20:23:25 - INFO - codeparrot_training - Step 577: {'lr': 0.00014424999999999998, 'samples': 110976, 'steps': 577, 'loss/train': 1.1467996537685394} +01/26/2022 20:23:28 - INFO - codeparrot_training - Step 578: {'lr': 0.0001445, 'samples': 111168, 'steps': 578, 'loss/train': 0.8268345594406128} +01/26/2022 20:23:31 - INFO - codeparrot_training - Step 579: {'lr': 0.00014475, 'samples': 111360, 'steps': 579, 'loss/train': 0.873445987701416} +01/26/2022 20:23:34 - INFO - codeparrot_training - Step 580: {'lr': 0.000145, 'samples': 111552, 'steps': 580, 'loss/train': 0.45913079380989075} +01/26/2022 20:23:37 - INFO - codeparrot_training - Step 581: {'lr': 0.00014524999999999998, 'samples': 111744, 'steps': 581, 'loss/train': 0.8863986432552338} +01/26/2022 20:23:42 - INFO - codeparrot_training - Step 582: {'lr': 0.00014549999999999999, 'samples': 111936, 'steps': 582, 'loss/train': 0.8918125033378601} +01/26/2022 20:23:45 - INFO - codeparrot_training - Step 583: {'lr': 0.00014575, 'samples': 112128, 'steps': 583, 'loss/train': 1.0199389457702637} +01/26/2022 20:23:48 - INFO - codeparrot_training - Step 584: {'lr': 0.000146, 'samples': 112320, 'steps': 584, 'loss/train': 1.2752460837364197} +01/26/2022 20:23:51 - INFO - codeparrot_training - Step 585: {'lr': 0.00014625, 'samples': 112512, 'steps': 585, 'loss/train': 0.8354544639587402} +01/26/2022 20:23:54 - INFO - codeparrot_training - Step 586: {'lr': 0.00014649999999999998, 'samples': 112704, 'steps': 586, 'loss/train': 0.6896410435438156} +01/26/2022 20:23:58 - INFO - codeparrot_training - Step 587: {'lr': 0.00014675, 'samples': 112896, 'steps': 587, 'loss/train': 0.755020022392273} +01/26/2022 20:24:01 - INFO - codeparrot_training - Step 588: {'lr': 0.000147, 'samples': 113088, 'steps': 588, 'loss/train': 0.8740944564342499} +01/26/2022 20:24:04 - INFO - codeparrot_training - Step 589: {'lr': 0.00014725, 'samples': 113280, 'steps': 589, 'loss/train': 1.613927185535431} +01/26/2022 20:24:07 - INFO - codeparrot_training - Step 590: {'lr': 0.0001475, 'samples': 113472, 'steps': 590, 'loss/train': 0.8339685201644897} +01/26/2022 20:24:11 - INFO - codeparrot_training - Step 591: {'lr': 0.00014774999999999999, 'samples': 113664, 'steps': 591, 'loss/train': 0.899694174528122} +01/26/2022 20:24:15 - INFO - codeparrot_training - Step 592: {'lr': 0.000148, 'samples': 113856, 'steps': 592, 'loss/train': 1.1107328832149506} +01/26/2022 20:24:18 - INFO - codeparrot_training - Step 593: {'lr': 0.00014825, 'samples': 114048, 'steps': 593, 'loss/train': 0.875143826007843} +01/26/2022 20:24:21 - INFO - codeparrot_training - Step 594: {'lr': 0.0001485, 'samples': 114240, 'steps': 594, 'loss/train': 0.669572576880455} +01/26/2022 20:24:24 - INFO - codeparrot_training - Step 595: {'lr': 0.00014874999999999998, 'samples': 114432, 'steps': 595, 'loss/train': 1.0809493660926819} +01/26/2022 20:24:27 - INFO - codeparrot_training - Step 596: {'lr': 0.000149, 'samples': 114624, 'steps': 596, 'loss/train': 0.47745783627033234} +01/26/2022 20:24:30 - INFO - codeparrot_training - Step 597: {'lr': 0.00014925, 'samples': 114816, 'steps': 597, 'loss/train': 1.8296976685523987} +01/26/2022 20:24:34 - INFO - codeparrot_training - Step 598: {'lr': 0.0001495, 'samples': 115008, 'steps': 598, 'loss/train': 0.5959167033433914} +01/26/2022 20:24:37 - INFO - codeparrot_training - Step 599: {'lr': 0.00014975, 'samples': 115200, 'steps': 599, 'loss/train': 0.926987886428833} +01/26/2022 20:24:40 - INFO - codeparrot_training - Step 600: {'lr': 0.00015, 'samples': 115392, 'steps': 600, 'loss/train': 0.6421291083097458} +01/26/2022 20:24:46 - INFO - codeparrot_training - Step 601: {'lr': 0.00015025, 'samples': 115584, 'steps': 601, 'loss/train': 1.1162900626659393} +01/26/2022 20:24:49 - INFO - codeparrot_training - Step 602: {'lr': 0.0001505, 'samples': 115776, 'steps': 602, 'loss/train': 1.2625629901885986} +01/26/2022 20:24:52 - INFO - codeparrot_training - Step 603: {'lr': 0.00015075, 'samples': 115968, 'steps': 603, 'loss/train': 0.8391094207763672} +01/26/2022 20:24:55 - INFO - codeparrot_training - Step 604: {'lr': 0.000151, 'samples': 116160, 'steps': 604, 'loss/train': 1.0422407984733582} +01/26/2022 20:24:58 - INFO - codeparrot_training - Step 605: {'lr': 0.00015125, 'samples': 116352, 'steps': 605, 'loss/train': 1.1536369621753693} +01/26/2022 20:25:01 - INFO - codeparrot_training - Step 606: {'lr': 0.0001515, 'samples': 116544, 'steps': 606, 'loss/train': 0.8908830285072327} +01/26/2022 20:25:04 - INFO - codeparrot_training - Step 607: {'lr': 0.00015175, 'samples': 116736, 'steps': 607, 'loss/train': 0.7630718350410461} +01/26/2022 20:25:08 - INFO - codeparrot_training - Step 608: {'lr': 0.000152, 'samples': 116928, 'steps': 608, 'loss/train': 1.165149986743927} +01/26/2022 20:25:12 - INFO - codeparrot_training - Step 609: {'lr': 0.00015225, 'samples': 117120, 'steps': 609, 'loss/train': 0.5184662640094757} +01/26/2022 20:25:15 - INFO - codeparrot_training - Step 610: {'lr': 0.0001525, 'samples': 117312, 'steps': 610, 'loss/train': 0.813170313835144} +01/26/2022 20:25:18 - INFO - codeparrot_training - Step 611: {'lr': 0.00015275, 'samples': 117504, 'steps': 611, 'loss/train': 0.8342358469963074} +01/26/2022 20:25:21 - INFO - codeparrot_training - Step 612: {'lr': 0.000153, 'samples': 117696, 'steps': 612, 'loss/train': 1.4845359027385712} +01/26/2022 20:25:25 - INFO - codeparrot_training - Step 613: {'lr': 0.00015325, 'samples': 117888, 'steps': 613, 'loss/train': 0.7532512843608856} +01/26/2022 20:25:28 - INFO - codeparrot_training - Step 614: {'lr': 0.0001535, 'samples': 118080, 'steps': 614, 'loss/train': 0.9865740537643433} +01/26/2022 20:25:31 - INFO - codeparrot_training - Step 615: {'lr': 0.00015375, 'samples': 118272, 'steps': 615, 'loss/train': 0.8163191378116608} +01/26/2022 20:25:34 - INFO - codeparrot_training - Step 616: {'lr': 0.000154, 'samples': 118464, 'steps': 616, 'loss/train': 1.3022699654102325} +01/26/2022 20:25:37 - INFO - codeparrot_training - Step 617: {'lr': 0.00015425, 'samples': 118656, 'steps': 617, 'loss/train': 1.1061480939388275} +01/26/2022 20:25:41 - INFO - codeparrot_training - Step 618: {'lr': 0.00015450000000000001, 'samples': 118848, 'steps': 618, 'loss/train': 1.2850266695022583} +01/26/2022 20:25:45 - INFO - codeparrot_training - Step 619: {'lr': 0.00015475, 'samples': 119040, 'steps': 619, 'loss/train': 1.0637675821781158} +01/26/2022 20:25:48 - INFO - codeparrot_training - Step 620: {'lr': 0.000155, 'samples': 119232, 'steps': 620, 'loss/train': 1.1182284951210022} +01/26/2022 20:25:51 - INFO - codeparrot_training - Step 621: {'lr': 0.00015525, 'samples': 119424, 'steps': 621, 'loss/train': 0.6352360993623734} +01/26/2022 20:25:54 - INFO - codeparrot_training - Step 622: {'lr': 0.0001555, 'samples': 119616, 'steps': 622, 'loss/train': 1.0795150995254517} +01/26/2022 20:25:57 - INFO - codeparrot_training - Step 623: {'lr': 0.00015575, 'samples': 119808, 'steps': 623, 'loss/train': 0.8984178900718689} +01/26/2022 20:26:00 - INFO - codeparrot_training - Step 624: {'lr': 0.000156, 'samples': 120000, 'steps': 624, 'loss/train': 1.230320155620575} +01/26/2022 20:26:03 - INFO - codeparrot_training - Step 625: {'lr': 0.00015625, 'samples': 120192, 'steps': 625, 'loss/train': 0.9912900924682617} +01/26/2022 20:26:07 - INFO - codeparrot_training - Step 626: {'lr': 0.0001565, 'samples': 120384, 'steps': 626, 'loss/train': 0.7683042883872986} +01/26/2022 20:26:13 - INFO - codeparrot_training - Step 627: {'lr': 0.00015675000000000002, 'samples': 120576, 'steps': 627, 'loss/train': 1.0303488671779633} +01/26/2022 20:26:16 - INFO - codeparrot_training - Step 628: {'lr': 0.000157, 'samples': 120768, 'steps': 628, 'loss/train': 0.9553826451301575} +01/26/2022 20:26:19 - INFO - codeparrot_training - Step 629: {'lr': 0.00015725, 'samples': 120960, 'steps': 629, 'loss/train': 1.071675717830658} +01/26/2022 20:26:23 - INFO - codeparrot_training - Step 630: {'lr': 0.0001575, 'samples': 121152, 'steps': 630, 'loss/train': 3.7067281007766724} +01/26/2022 20:26:26 - INFO - codeparrot_training - Step 631: {'lr': 0.00015775, 'samples': 121344, 'steps': 631, 'loss/train': 0.9579127728939056} +01/26/2022 20:26:29 - INFO - codeparrot_training - Step 632: {'lr': 0.000158, 'samples': 121536, 'steps': 632, 'loss/train': 0.8656308352947235} +01/26/2022 20:26:32 - INFO - codeparrot_training - Step 633: {'lr': 0.00015825, 'samples': 121728, 'steps': 633, 'loss/train': 1.243520826101303} +01/26/2022 20:26:35 - INFO - codeparrot_training - Step 634: {'lr': 0.0001585, 'samples': 121920, 'steps': 634, 'loss/train': 0.5189113765954971} +01/26/2022 20:26:38 - INFO - codeparrot_training - Step 635: {'lr': 0.00015875, 'samples': 122112, 'steps': 635, 'loss/train': 1.0090940594673157} +01/26/2022 20:26:43 - INFO - codeparrot_training - Step 636: {'lr': 0.00015900000000000002, 'samples': 122304, 'steps': 636, 'loss/train': 1.1085612773895264} +01/26/2022 20:26:46 - INFO - codeparrot_training - Step 637: {'lr': 0.00015925, 'samples': 122496, 'steps': 637, 'loss/train': 1.3199002146720886} +01/26/2022 20:26:49 - INFO - codeparrot_training - Step 638: {'lr': 0.0001595, 'samples': 122688, 'steps': 638, 'loss/train': 1.4171862602233887} +01/26/2022 20:26:52 - INFO - codeparrot_training - Step 639: {'lr': 0.00015975, 'samples': 122880, 'steps': 639, 'loss/train': 0.4769306480884552} +01/26/2022 20:26:55 - INFO - codeparrot_training - Step 640: {'lr': 0.00016, 'samples': 123072, 'steps': 640, 'loss/train': 0.6695694476366043} +01/26/2022 20:26:58 - INFO - codeparrot_training - Step 641: {'lr': 0.00016025000000000002, 'samples': 123264, 'steps': 641, 'loss/train': 0.8001262843608856} +01/26/2022 20:27:02 - INFO - codeparrot_training - Step 642: {'lr': 0.0001605, 'samples': 123456, 'steps': 642, 'loss/train': 1.0185562670230865} +01/26/2022 20:27:05 - INFO - codeparrot_training - Step 643: {'lr': 0.00016075, 'samples': 123648, 'steps': 643, 'loss/train': 1.3380499184131622} +01/26/2022 20:27:08 - INFO - codeparrot_training - Step 644: {'lr': 0.000161, 'samples': 123840, 'steps': 644, 'loss/train': 0.9916056096553802} +01/26/2022 20:27:14 - INFO - codeparrot_training - Step 645: {'lr': 0.00016125000000000002, 'samples': 124032, 'steps': 645, 'loss/train': 0.9142903089523315} +01/26/2022 20:27:17 - INFO - codeparrot_training - Step 646: {'lr': 0.0001615, 'samples': 124224, 'steps': 646, 'loss/train': 0.6464662849903107} +01/26/2022 20:27:20 - INFO - codeparrot_training - Step 647: {'lr': 0.00016175, 'samples': 124416, 'steps': 647, 'loss/train': 0.6285644620656967} +01/26/2022 20:27:23 - INFO - codeparrot_training - Step 648: {'lr': 0.000162, 'samples': 124608, 'steps': 648, 'loss/train': 0.8848017454147339} +01/26/2022 20:27:27 - INFO - codeparrot_training - Step 649: {'lr': 0.00016225000000000001, 'samples': 124800, 'steps': 649, 'loss/train': 0.6977881193161011} +01/26/2022 20:27:30 - INFO - codeparrot_training - Step 650: {'lr': 0.00016250000000000002, 'samples': 124992, 'steps': 650, 'loss/train': 0.8536549508571625} +01/26/2022 20:27:33 - INFO - codeparrot_training - Step 651: {'lr': 0.00016275, 'samples': 125184, 'steps': 651, 'loss/train': 0.6653050482273102} +01/26/2022 20:27:36 - INFO - codeparrot_training - Step 652: {'lr': 0.000163, 'samples': 125376, 'steps': 652, 'loss/train': 0.7839305698871613} +01/26/2022 20:27:39 - INFO - codeparrot_training - Step 653: {'lr': 0.00016325, 'samples': 125568, 'steps': 653, 'loss/train': 1.036845713853836} +01/26/2022 20:27:43 - INFO - codeparrot_training - Step 654: {'lr': 0.00016350000000000002, 'samples': 125760, 'steps': 654, 'loss/train': 1.2619302570819855} +01/26/2022 20:27:47 - INFO - codeparrot_training - Step 655: {'lr': 0.00016375000000000002, 'samples': 125952, 'steps': 655, 'loss/train': 1.097649335861206} +01/26/2022 20:27:50 - INFO - codeparrot_training - Step 656: {'lr': 0.000164, 'samples': 126144, 'steps': 656, 'loss/train': 1.4299048483371735} +01/26/2022 20:27:53 - INFO - codeparrot_training - Step 657: {'lr': 0.00016425, 'samples': 126336, 'steps': 657, 'loss/train': 0.698960691690445} +01/26/2022 20:27:56 - INFO - codeparrot_training - Step 658: {'lr': 0.00016450000000000001, 'samples': 126528, 'steps': 658, 'loss/train': 1.0095846354961395} +01/26/2022 20:27:59 - INFO - codeparrot_training - Step 659: {'lr': 0.00016475000000000002, 'samples': 126720, 'steps': 659, 'loss/train': 1.0719581544399261} +01/26/2022 20:28:02 - INFO - codeparrot_training - Step 660: {'lr': 0.000165, 'samples': 126912, 'steps': 660, 'loss/train': 1.2665534913539886} +01/26/2022 20:28:05 - INFO - codeparrot_training - Step 661: {'lr': 0.00016525, 'samples': 127104, 'steps': 661, 'loss/train': 1.1495090425014496} +01/26/2022 20:28:10 - INFO - codeparrot_training - Step 662: {'lr': 0.0001655, 'samples': 127296, 'steps': 662, 'loss/train': 0.6644181311130524} +01/26/2022 20:28:13 - INFO - codeparrot_training - Step 663: {'lr': 0.00016575000000000002, 'samples': 127488, 'steps': 663, 'loss/train': 1.0472686886787415} +01/26/2022 20:28:16 - INFO - codeparrot_training - Step 664: {'lr': 0.00016600000000000002, 'samples': 127680, 'steps': 664, 'loss/train': 1.1239711940288544} +01/26/2022 20:28:19 - INFO - codeparrot_training - Step 665: {'lr': 0.00016625, 'samples': 127872, 'steps': 665, 'loss/train': 1.3414275348186493} +01/26/2022 20:28:23 - INFO - codeparrot_training - Step 666: {'lr': 0.0001665, 'samples': 128064, 'steps': 666, 'loss/train': 0.7016846090555191} +01/26/2022 20:28:26 - INFO - codeparrot_training - Step 667: {'lr': 0.00016675000000000001, 'samples': 128256, 'steps': 667, 'loss/train': 0.8392623960971832} +01/26/2022 20:28:29 - INFO - codeparrot_training - Step 668: {'lr': 0.00016700000000000002, 'samples': 128448, 'steps': 668, 'loss/train': 0.6881774067878723} +01/26/2022 20:28:32 - INFO - codeparrot_training - Step 669: {'lr': 0.00016725000000000003, 'samples': 128640, 'steps': 669, 'loss/train': 0.8654219806194305} +01/26/2022 20:28:35 - INFO - codeparrot_training - Step 670: {'lr': 0.0001675, 'samples': 128832, 'steps': 670, 'loss/train': 1.4148901104927063} +01/26/2022 20:28:41 - INFO - codeparrot_training - Step 671: {'lr': 0.00016775, 'samples': 129024, 'steps': 671, 'loss/train': 0.7871894538402557} +01/26/2022 20:28:44 - INFO - codeparrot_training - Step 672: {'lr': 0.00016800000000000002, 'samples': 129216, 'steps': 672, 'loss/train': 0.7191271483898163} +01/26/2022 20:28:48 - INFO - codeparrot_training - Step 673: {'lr': 0.00016825000000000002, 'samples': 129408, 'steps': 673, 'loss/train': 1.2848331034183502} +01/26/2022 20:28:51 - INFO - codeparrot_training - Step 674: {'lr': 0.0001685, 'samples': 129600, 'steps': 674, 'loss/train': 0.8642187416553497} +01/26/2022 20:28:54 - INFO - codeparrot_training - Step 675: {'lr': 0.00016875, 'samples': 129792, 'steps': 675, 'loss/train': 1.0577407479286194} +01/26/2022 20:28:57 - INFO - codeparrot_training - Step 676: {'lr': 0.00016900000000000002, 'samples': 129984, 'steps': 676, 'loss/train': 0.8613667488098145} +01/26/2022 20:29:00 - INFO - codeparrot_training - Step 677: {'lr': 0.00016925000000000002, 'samples': 130176, 'steps': 677, 'loss/train': 0.7029958516359329} +01/26/2022 20:29:03 - INFO - codeparrot_training - Step 678: {'lr': 0.00016950000000000003, 'samples': 130368, 'steps': 678, 'loss/train': 1.0868782997131348} +01/26/2022 20:29:06 - INFO - codeparrot_training - Step 679: {'lr': 0.00016975, 'samples': 130560, 'steps': 679, 'loss/train': 0.6318007707595825} +01/26/2022 20:29:11 - INFO - codeparrot_training - Step 680: {'lr': 0.00017, 'samples': 130752, 'steps': 680, 'loss/train': 0.7896529734134674} +01/26/2022 20:29:14 - INFO - codeparrot_training - Step 681: {'lr': 0.00017025000000000002, 'samples': 130944, 'steps': 681, 'loss/train': 0.7487976104021072} +01/26/2022 20:29:17 - INFO - codeparrot_training - Step 682: {'lr': 0.00017050000000000002, 'samples': 131136, 'steps': 682, 'loss/train': 0.5079184323549271} +01/26/2022 20:29:20 - INFO - codeparrot_training - Step 683: {'lr': 0.00017075, 'samples': 131328, 'steps': 683, 'loss/train': 0.7469399571418762} +01/26/2022 20:29:23 - INFO - codeparrot_training - Step 684: {'lr': 0.000171, 'samples': 131520, 'steps': 684, 'loss/train': 0.8566912114620209} +01/26/2022 20:29:27 - INFO - codeparrot_training - Step 685: {'lr': 0.00017125000000000002, 'samples': 131712, 'steps': 685, 'loss/train': 1.0669482350349426} +01/26/2022 20:29:30 - INFO - codeparrot_training - Step 686: {'lr': 0.00017150000000000002, 'samples': 131904, 'steps': 686, 'loss/train': 0.6893389374017715} +01/26/2022 20:29:33 - INFO - codeparrot_training - Step 687: {'lr': 0.00017175000000000003, 'samples': 132096, 'steps': 687, 'loss/train': 0.42208729684352875} +01/26/2022 20:29:36 - INFO - codeparrot_training - Step 688: {'lr': 0.00017199999999999998, 'samples': 132288, 'steps': 688, 'loss/train': 1.2730027735233307} +01/26/2022 20:29:40 - INFO - codeparrot_training - Step 689: {'lr': 0.00017224999999999999, 'samples': 132480, 'steps': 689, 'loss/train': 1.454077273607254} +01/26/2022 20:29:43 - INFO - codeparrot_training - Step 690: {'lr': 0.0001725, 'samples': 132672, 'steps': 690, 'loss/train': 1.051508903503418} +01/26/2022 20:29:47 - INFO - codeparrot_training - Step 691: {'lr': 0.00017275, 'samples': 132864, 'steps': 691, 'loss/train': 0.35310687124729156} +01/26/2022 20:29:50 - INFO - codeparrot_training - Step 692: {'lr': 0.000173, 'samples': 133056, 'steps': 692, 'loss/train': 0.9737249314785004} +01/26/2022 20:29:53 - INFO - codeparrot_training - Step 693: {'lr': 0.00017324999999999998, 'samples': 133248, 'steps': 693, 'loss/train': 0.8442901074886322} +01/26/2022 20:29:56 - INFO - codeparrot_training - Step 694: {'lr': 0.0001735, 'samples': 133440, 'steps': 694, 'loss/train': 0.9881038963794708} +01/26/2022 20:29:59 - INFO - codeparrot_training - Step 695: {'lr': 0.00017375, 'samples': 133632, 'steps': 695, 'loss/train': 0.9713330268859863} +01/26/2022 20:30:02 - INFO - codeparrot_training - Step 696: {'lr': 0.000174, 'samples': 133824, 'steps': 696, 'loss/train': 1.1849716901779175} +01/26/2022 20:30:05 - INFO - codeparrot_training - Step 697: {'lr': 0.00017424999999999998, 'samples': 134016, 'steps': 697, 'loss/train': 0.9789444208145142} +01/26/2022 20:30:12 - INFO - codeparrot_training - Step 698: {'lr': 0.00017449999999999999, 'samples': 134208, 'steps': 698, 'loss/train': 0.8334293961524963} +01/26/2022 20:30:15 - INFO - codeparrot_training - Step 699: {'lr': 0.00017475, 'samples': 134400, 'steps': 699, 'loss/train': 0.8760148286819458} +01/26/2022 20:30:18 - INFO - codeparrot_training - Step 700: {'lr': 0.000175, 'samples': 134592, 'steps': 700, 'loss/train': 1.3783383071422577} +01/26/2022 20:30:21 - INFO - codeparrot_training - Step 701: {'lr': 0.00017525, 'samples': 134784, 'steps': 701, 'loss/train': 1.022957682609558} +01/26/2022 20:30:24 - INFO - codeparrot_training - Step 702: {'lr': 0.00017549999999999998, 'samples': 134976, 'steps': 702, 'loss/train': 1.0660897493362427} +01/26/2022 20:30:28 - INFO - codeparrot_training - Step 703: {'lr': 0.00017575, 'samples': 135168, 'steps': 703, 'loss/train': 1.036992073059082} +01/26/2022 20:30:31 - INFO - codeparrot_training - Step 704: {'lr': 0.000176, 'samples': 135360, 'steps': 704, 'loss/train': 1.1998098492622375} +01/26/2022 20:30:34 - INFO - codeparrot_training - Step 705: {'lr': 0.00017625, 'samples': 135552, 'steps': 705, 'loss/train': 0.8409687280654907} +01/26/2022 20:30:38 - INFO - codeparrot_training - Step 706: {'lr': 0.00017649999999999998, 'samples': 135744, 'steps': 706, 'loss/train': 0.6072161346673965} +01/26/2022 20:30:41 - INFO - codeparrot_training - Step 707: {'lr': 0.00017675, 'samples': 135936, 'steps': 707, 'loss/train': 1.4789616465568542} +01/26/2022 20:30:45 - INFO - codeparrot_training - Step 708: {'lr': 0.000177, 'samples': 136128, 'steps': 708, 'loss/train': 0.8612742125988007} +01/26/2022 20:30:48 - INFO - codeparrot_training - Step 709: {'lr': 0.00017725, 'samples': 136320, 'steps': 709, 'loss/train': 0.9710145592689514} +01/26/2022 20:30:51 - INFO - codeparrot_training - Step 710: {'lr': 0.0001775, 'samples': 136512, 'steps': 710, 'loss/train': 1.1347587704658508} +01/26/2022 20:30:54 - INFO - codeparrot_training - Step 711: {'lr': 0.00017774999999999998, 'samples': 136704, 'steps': 711, 'loss/train': 0.818534642457962} +01/26/2022 20:30:57 - INFO - codeparrot_training - Step 712: {'lr': 0.000178, 'samples': 136896, 'steps': 712, 'loss/train': 0.9411788284778595} +01/26/2022 20:31:00 - INFO - codeparrot_training - Step 713: {'lr': 0.00017825, 'samples': 137088, 'steps': 713, 'loss/train': 1.6106010675430298} +01/26/2022 20:31:03 - INFO - codeparrot_training - Step 714: {'lr': 0.0001785, 'samples': 137280, 'steps': 714, 'loss/train': 1.0063637495040894} +01/26/2022 20:31:08 - INFO - codeparrot_training - Step 715: {'lr': 0.00017875, 'samples': 137472, 'steps': 715, 'loss/train': 1.0250695645809174} +01/26/2022 20:31:11 - INFO - codeparrot_training - Step 716: {'lr': 0.000179, 'samples': 137664, 'steps': 716, 'loss/train': 1.235056310892105} +01/26/2022 20:31:14 - INFO - codeparrot_training - Step 717: {'lr': 0.00017925, 'samples': 137856, 'steps': 717, 'loss/train': 1.1703775227069855} +01/26/2022 20:31:17 - INFO - codeparrot_training - Step 718: {'lr': 0.0001795, 'samples': 138048, 'steps': 718, 'loss/train': 0.6291922777891159} +01/26/2022 20:31:20 - INFO - codeparrot_training - Step 719: {'lr': 0.00017975, 'samples': 138240, 'steps': 719, 'loss/train': 0.6772595196962357} +01/26/2022 20:31:23 - INFO - codeparrot_training - Step 720: {'lr': 0.00017999999999999998, 'samples': 138432, 'steps': 720, 'loss/train': 0.562017023563385} +01/26/2022 20:31:27 - INFO - codeparrot_training - Step 721: {'lr': 0.00018025, 'samples': 138624, 'steps': 721, 'loss/train': 0.9805403351783752} +01/26/2022 20:31:30 - INFO - codeparrot_training - Step 722: {'lr': 0.0001805, 'samples': 138816, 'steps': 722, 'loss/train': 0.3919403851032257} +01/26/2022 20:31:33 - INFO - codeparrot_training - Step 723: {'lr': 0.00018075, 'samples': 139008, 'steps': 723, 'loss/train': 0.8560447990894318} +01/26/2022 20:31:37 - INFO - codeparrot_training - Step 724: {'lr': 0.000181, 'samples': 139200, 'steps': 724, 'loss/train': 1.100720465183258} +01/26/2022 20:31:40 - INFO - codeparrot_training - Step 725: {'lr': 0.00018125, 'samples': 139392, 'steps': 725, 'loss/train': 0.7327403426170349} +01/26/2022 20:31:43 - INFO - codeparrot_training - Step 726: {'lr': 0.0001815, 'samples': 139584, 'steps': 726, 'loss/train': 1.1028411090373993} +01/26/2022 20:31:47 - INFO - codeparrot_training - Step 727: {'lr': 0.00018175, 'samples': 139776, 'steps': 727, 'loss/train': 1.0841010510921478} +01/26/2022 20:31:50 - INFO - codeparrot_training - Step 728: {'lr': 0.000182, 'samples': 139968, 'steps': 728, 'loss/train': 0.6613376587629318} +01/26/2022 20:31:53 - INFO - codeparrot_training - Step 729: {'lr': 0.00018225, 'samples': 140160, 'steps': 729, 'loss/train': 0.6293601393699646} +01/26/2022 20:31:56 - INFO - codeparrot_training - Step 730: {'lr': 0.0001825, 'samples': 140352, 'steps': 730, 'loss/train': 0.9111529290676117} +01/26/2022 20:31:59 - INFO - codeparrot_training - Step 731: {'lr': 0.00018275, 'samples': 140544, 'steps': 731, 'loss/train': 1.2561307847499847} +01/26/2022 20:32:02 - INFO - codeparrot_training - Step 732: {'lr': 0.000183, 'samples': 140736, 'steps': 732, 'loss/train': 0.8419937789440155} +01/26/2022 20:32:09 - INFO - codeparrot_training - Step 733: {'lr': 0.00018325, 'samples': 140928, 'steps': 733, 'loss/train': 0.9878305792808533} +01/26/2022 20:32:12 - INFO - codeparrot_training - Step 734: {'lr': 0.0001835, 'samples': 141120, 'steps': 734, 'loss/train': 1.305568814277649} +01/26/2022 20:32:16 - INFO - codeparrot_training - Step 735: {'lr': 0.00018375, 'samples': 141312, 'steps': 735, 'loss/train': 0.1390710510313511} +01/26/2022 20:32:19 - INFO - codeparrot_training - Step 736: {'lr': 0.000184, 'samples': 141504, 'steps': 736, 'loss/train': 0.7866142094135284} +01/26/2022 20:32:22 - INFO - codeparrot_training - Step 737: {'lr': 0.00018425, 'samples': 141696, 'steps': 737, 'loss/train': 1.1417504847049713} +01/26/2022 20:32:25 - INFO - codeparrot_training - Step 738: {'lr': 0.0001845, 'samples': 141888, 'steps': 738, 'loss/train': 0.8403282165527344} +01/26/2022 20:32:28 - INFO - codeparrot_training - Step 739: {'lr': 0.00018475, 'samples': 142080, 'steps': 739, 'loss/train': 1.592317521572113} +01/26/2022 20:32:31 - INFO - codeparrot_training - Step 740: {'lr': 0.000185, 'samples': 142272, 'steps': 740, 'loss/train': 0.8372698724269867} +01/26/2022 20:32:34 - INFO - codeparrot_training - Step 741: {'lr': 0.00018525, 'samples': 142464, 'steps': 741, 'loss/train': 1.2187131643295288} +01/26/2022 20:32:38 - INFO - codeparrot_training - Step 742: {'lr': 0.0001855, 'samples': 142656, 'steps': 742, 'loss/train': 1.3483904600143433} +01/26/2022 20:32:42 - INFO - codeparrot_training - Step 743: {'lr': 0.00018575000000000002, 'samples': 142848, 'steps': 743, 'loss/train': 0.8372267782688141} +01/26/2022 20:32:45 - INFO - codeparrot_training - Step 744: {'lr': 0.000186, 'samples': 143040, 'steps': 744, 'loss/train': 0.9834917485713959} +01/26/2022 20:32:49 - INFO - codeparrot_training - Step 745: {'lr': 0.00018625, 'samples': 143232, 'steps': 745, 'loss/train': 0.7956083714962006} +01/26/2022 20:32:52 - INFO - codeparrot_training - Step 746: {'lr': 0.0001865, 'samples': 143424, 'steps': 746, 'loss/train': 0.7947530150413513} +01/26/2022 20:32:55 - INFO - codeparrot_training - Step 747: {'lr': 0.00018675, 'samples': 143616, 'steps': 747, 'loss/train': 1.4260998666286469} +01/26/2022 20:32:58 - INFO - codeparrot_training - Step 748: {'lr': 0.000187, 'samples': 143808, 'steps': 748, 'loss/train': 0.9100359678268433} +01/26/2022 20:33:01 - INFO - codeparrot_training - Step 749: {'lr': 0.00018725, 'samples': 144000, 'steps': 749, 'loss/train': 0.8670728802680969} +01/26/2022 20:33:04 - INFO - codeparrot_training - Step 750: {'lr': 0.0001875, 'samples': 144192, 'steps': 750, 'loss/train': 0.5285973697900772} +01/26/2022 20:33:07 - INFO - codeparrot_training - Step 751: {'lr': 0.00018775, 'samples': 144384, 'steps': 751, 'loss/train': 1.3827945291996002} +01/26/2022 20:33:14 - INFO - codeparrot_training - Step 752: {'lr': 0.00018800000000000002, 'samples': 144576, 'steps': 752, 'loss/train': 0.8902200758457184} +01/26/2022 20:33:17 - INFO - codeparrot_training - Step 753: {'lr': 0.00018825, 'samples': 144768, 'steps': 753, 'loss/train': 0.7913635075092316} +01/26/2022 20:33:20 - INFO - codeparrot_training - Step 754: {'lr': 0.0001885, 'samples': 144960, 'steps': 754, 'loss/train': 1.4179195761680603} +01/26/2022 20:33:23 - INFO - codeparrot_training - Step 755: {'lr': 0.00018875, 'samples': 145152, 'steps': 755, 'loss/train': 0.9423087537288666} +01/26/2022 20:33:26 - INFO - codeparrot_training - Step 756: {'lr': 0.000189, 'samples': 145344, 'steps': 756, 'loss/train': 0.9094258546829224} +01/26/2022 20:33:29 - INFO - codeparrot_training - Step 757: {'lr': 0.00018925, 'samples': 145536, 'steps': 757, 'loss/train': 1.1640411615371704} +01/26/2022 20:33:33 - INFO - codeparrot_training - Step 758: {'lr': 0.0001895, 'samples': 145728, 'steps': 758, 'loss/train': 1.0199125707149506} +01/26/2022 20:33:36 - INFO - codeparrot_training - Step 759: {'lr': 0.00018975, 'samples': 145920, 'steps': 759, 'loss/train': 1.3976908922195435} +01/26/2022 20:33:40 - INFO - codeparrot_training - Step 760: {'lr': 0.00019, 'samples': 146112, 'steps': 760, 'loss/train': 0.8301973342895508} +01/26/2022 20:33:43 - INFO - codeparrot_training - Step 761: {'lr': 0.00019025000000000002, 'samples': 146304, 'steps': 761, 'loss/train': 0.8738430440425873} +01/26/2022 20:33:46 - INFO - codeparrot_training - Step 762: {'lr': 0.0001905, 'samples': 146496, 'steps': 762, 'loss/train': 0.9114019274711609} +01/26/2022 20:33:50 - INFO - codeparrot_training - Step 763: {'lr': 0.00019075, 'samples': 146688, 'steps': 763, 'loss/train': 1.0480453670024872} +01/26/2022 20:33:53 - INFO - codeparrot_training - Step 764: {'lr': 0.000191, 'samples': 146880, 'steps': 764, 'loss/train': 1.0675413608551025} +01/26/2022 20:33:56 - INFO - codeparrot_training - Step 765: {'lr': 0.00019125000000000001, 'samples': 147072, 'steps': 765, 'loss/train': 0.9275387227535248} +01/26/2022 20:33:59 - INFO - codeparrot_training - Step 766: {'lr': 0.00019150000000000002, 'samples': 147264, 'steps': 766, 'loss/train': 1.5479466319084167} +01/26/2022 20:34:02 - INFO - codeparrot_training - Step 767: {'lr': 0.00019175, 'samples': 147456, 'steps': 767, 'loss/train': 1.1427192091941833} +01/26/2022 20:34:05 - INFO - codeparrot_training - Step 768: {'lr': 0.000192, 'samples': 147648, 'steps': 768, 'loss/train': 1.0345340967178345} +01/26/2022 20:34:10 - INFO - codeparrot_training - Step 769: {'lr': 0.00019225, 'samples': 147840, 'steps': 769, 'loss/train': 0.7592909932136536} +01/26/2022 20:34:13 - INFO - codeparrot_training - Step 770: {'lr': 0.00019250000000000002, 'samples': 148032, 'steps': 770, 'loss/train': 1.3460177779197693} +01/26/2022 20:34:16 - INFO - codeparrot_training - Step 771: {'lr': 0.00019275, 'samples': 148224, 'steps': 771, 'loss/train': 1.425268292427063} +01/26/2022 20:34:19 - INFO - codeparrot_training - Step 772: {'lr': 0.000193, 'samples': 148416, 'steps': 772, 'loss/train': 1.2665028870105743} +01/26/2022 20:34:22 - INFO - codeparrot_training - Step 773: {'lr': 0.00019325, 'samples': 148608, 'steps': 773, 'loss/train': 1.3485895693302155} +01/26/2022 20:34:25 - INFO - codeparrot_training - Step 774: {'lr': 0.00019350000000000001, 'samples': 148800, 'steps': 774, 'loss/train': 0.8594979643821716} +01/26/2022 20:34:28 - INFO - codeparrot_training - Step 775: {'lr': 0.00019375000000000002, 'samples': 148992, 'steps': 775, 'loss/train': 0.8714957535266876} +01/26/2022 20:34:32 - INFO - codeparrot_training - Step 776: {'lr': 0.000194, 'samples': 149184, 'steps': 776, 'loss/train': 0.5841094404459} +01/26/2022 20:34:35 - INFO - codeparrot_training - Step 777: {'lr': 0.00019425, 'samples': 149376, 'steps': 777, 'loss/train': 1.1731106042861938} +01/26/2022 20:34:41 - INFO - codeparrot_training - Step 778: {'lr': 0.0001945, 'samples': 149568, 'steps': 778, 'loss/train': 0.7482174932956696} +01/26/2022 20:34:44 - INFO - codeparrot_training - Step 779: {'lr': 0.00019475000000000002, 'samples': 149760, 'steps': 779, 'loss/train': 1.1149235665798187} +01/26/2022 20:34:47 - INFO - codeparrot_training - Step 780: {'lr': 0.00019500000000000002, 'samples': 149952, 'steps': 780, 'loss/train': 0.935143768787384} +01/26/2022 20:34:50 - INFO - codeparrot_training - Step 781: {'lr': 0.00019525, 'samples': 150144, 'steps': 781, 'loss/train': 0.8500780463218689} +01/26/2022 20:34:53 - INFO - codeparrot_training - Step 782: {'lr': 0.0001955, 'samples': 150336, 'steps': 782, 'loss/train': 0.5289049744606018} +01/26/2022 20:34:56 - INFO - codeparrot_training - Step 783: {'lr': 0.00019575000000000001, 'samples': 150528, 'steps': 783, 'loss/train': 1.4131156504154205} +01/26/2022 20:35:00 - INFO - codeparrot_training - Step 784: {'lr': 0.00019600000000000002, 'samples': 150720, 'steps': 784, 'loss/train': 0.30913594365119934} +01/26/2022 20:35:03 - INFO - codeparrot_training - Step 785: {'lr': 0.00019625, 'samples': 150912, 'steps': 785, 'loss/train': 1.2493534684181213} +01/26/2022 20:35:06 - INFO - codeparrot_training - Step 786: {'lr': 0.0001965, 'samples': 151104, 'steps': 786, 'loss/train': 0.9694152474403381} +01/26/2022 20:35:10 - INFO - codeparrot_training - Step 787: {'lr': 0.00019675, 'samples': 151296, 'steps': 787, 'loss/train': 0.3550092503428459} +01/26/2022 20:35:13 - INFO - codeparrot_training - Step 788: {'lr': 0.00019700000000000002, 'samples': 151488, 'steps': 788, 'loss/train': 0.690741777420044} +01/26/2022 20:35:16 - INFO - codeparrot_training - Step 789: {'lr': 0.00019725000000000002, 'samples': 151680, 'steps': 789, 'loss/train': 1.1352488100528717} +01/26/2022 20:35:20 - INFO - codeparrot_training - Step 790: {'lr': 0.0001975, 'samples': 151872, 'steps': 790, 'loss/train': 1.0375614166259766} +01/26/2022 20:35:23 - INFO - codeparrot_training - Step 791: {'lr': 0.00019775, 'samples': 152064, 'steps': 791, 'loss/train': 0.9023004770278931} +01/26/2022 20:35:26 - INFO - codeparrot_training - Step 792: {'lr': 0.00019800000000000002, 'samples': 152256, 'steps': 792, 'loss/train': 0.6497882902622223} +01/26/2022 20:35:29 - INFO - codeparrot_training - Step 793: {'lr': 0.00019825000000000002, 'samples': 152448, 'steps': 793, 'loss/train': 0.6133932173252106} +01/26/2022 20:35:32 - INFO - codeparrot_training - Step 794: {'lr': 0.00019850000000000003, 'samples': 152640, 'steps': 794, 'loss/train': 0.37911419570446014} +01/26/2022 20:35:35 - INFO - codeparrot_training - Step 795: {'lr': 0.00019875, 'samples': 152832, 'steps': 795, 'loss/train': 0.9305910766124725} +01/26/2022 20:35:42 - INFO - codeparrot_training - Step 796: {'lr': 0.000199, 'samples': 153024, 'steps': 796, 'loss/train': 1.0752443969249725} +01/26/2022 20:35:45 - INFO - codeparrot_training - Step 797: {'lr': 0.00019925000000000002, 'samples': 153216, 'steps': 797, 'loss/train': 0.7176533639431} +01/26/2022 20:35:48 - INFO - codeparrot_training - Step 798: {'lr': 0.00019950000000000002, 'samples': 153408, 'steps': 798, 'loss/train': 1.2106296122074127} +01/26/2022 20:35:51 - INFO - codeparrot_training - Step 799: {'lr': 0.00019975, 'samples': 153600, 'steps': 799, 'loss/train': 0.7819571793079376} +01/26/2022 20:35:54 - INFO - codeparrot_training - Step 800: {'lr': 0.0002, 'samples': 153792, 'steps': 800, 'loss/train': 1.0153715908527374} +01/26/2022 20:35:57 - INFO - codeparrot_training - Step 801: {'lr': 0.00020025000000000002, 'samples': 153984, 'steps': 801, 'loss/train': 1.2898483872413635} +01/26/2022 20:36:00 - INFO - codeparrot_training - Step 802: {'lr': 0.00020050000000000002, 'samples': 154176, 'steps': 802, 'loss/train': 1.1867233514785767} +01/26/2022 20:36:04 - INFO - codeparrot_training - Step 803: {'lr': 0.00020075000000000003, 'samples': 154368, 'steps': 803, 'loss/train': 0.6986654698848724} +01/26/2022 20:36:07 - INFO - codeparrot_training - Step 804: {'lr': 0.000201, 'samples': 154560, 'steps': 804, 'loss/train': 0.9793243110179901} +01/26/2022 20:36:11 - INFO - codeparrot_training - Step 805: {'lr': 0.00020125, 'samples': 154752, 'steps': 805, 'loss/train': 0.5471542775630951} +01/26/2022 20:36:15 - INFO - codeparrot_training - Step 806: {'lr': 0.00020150000000000002, 'samples': 154944, 'steps': 806, 'loss/train': 1.5339066982269287} +01/26/2022 20:36:18 - INFO - codeparrot_training - Step 807: {'lr': 0.00020175000000000003, 'samples': 155136, 'steps': 807, 'loss/train': 0.3004101812839508} +01/26/2022 20:36:21 - INFO - codeparrot_training - Step 808: {'lr': 0.000202, 'samples': 155328, 'steps': 808, 'loss/train': 0.9867880046367645} +01/26/2022 20:36:24 - INFO - codeparrot_training - Step 809: {'lr': 0.00020225, 'samples': 155520, 'steps': 809, 'loss/train': 1.2848010957241058} +01/26/2022 20:36:27 - INFO - codeparrot_training - Step 810: {'lr': 0.00020250000000000002, 'samples': 155712, 'steps': 810, 'loss/train': 1.0448269844055176} +01/26/2022 20:36:30 - INFO - codeparrot_training - Step 811: {'lr': 0.00020275000000000002, 'samples': 155904, 'steps': 811, 'loss/train': 1.1478981971740723} +01/26/2022 20:36:33 - INFO - codeparrot_training - Step 812: {'lr': 0.00020300000000000003, 'samples': 156096, 'steps': 812, 'loss/train': 0.8939153552055359} +01/26/2022 20:36:37 - INFO - codeparrot_training - Step 813: {'lr': 0.00020324999999999998, 'samples': 156288, 'steps': 813, 'loss/train': 0.6805943101644516} +01/26/2022 20:36:41 - INFO - codeparrot_training - Step 814: {'lr': 0.00020349999999999999, 'samples': 156480, 'steps': 814, 'loss/train': 0.5918071120977402} +01/26/2022 20:36:44 - INFO - codeparrot_training - Step 815: {'lr': 0.00020375, 'samples': 156672, 'steps': 815, 'loss/train': 0.7792027294635773} +01/26/2022 20:36:47 - INFO - codeparrot_training - Step 816: {'lr': 0.000204, 'samples': 156864, 'steps': 816, 'loss/train': 1.0537404119968414} +01/26/2022 20:36:51 - INFO - codeparrot_training - Step 817: {'lr': 0.00020425, 'samples': 157056, 'steps': 817, 'loss/train': 0.874046266078949} +01/26/2022 20:36:54 - INFO - codeparrot_training - Step 818: {'lr': 0.00020449999999999998, 'samples': 157248, 'steps': 818, 'loss/train': 0.8286419212818146} +01/26/2022 20:36:57 - INFO - codeparrot_training - Step 819: {'lr': 0.00020475, 'samples': 157440, 'steps': 819, 'loss/train': 1.1937158703804016} +01/26/2022 20:37:00 - INFO - codeparrot_training - Step 820: {'lr': 0.000205, 'samples': 157632, 'steps': 820, 'loss/train': 0.24175997078418732} +01/26/2022 20:37:03 - INFO - codeparrot_training - Step 821: {'lr': 0.00020525, 'samples': 157824, 'steps': 821, 'loss/train': 1.4559266567230225} +01/26/2022 20:37:07 - INFO - codeparrot_training - Step 822: {'lr': 0.00020549999999999998, 'samples': 158016, 'steps': 822, 'loss/train': 1.3116374015808105} +01/26/2022 20:37:11 - INFO - codeparrot_training - Step 823: {'lr': 0.00020575, 'samples': 158208, 'steps': 823, 'loss/train': 1.1885481476783752} +01/26/2022 20:37:14 - INFO - codeparrot_training - Step 824: {'lr': 0.000206, 'samples': 158400, 'steps': 824, 'loss/train': 0.7910436987876892} +01/26/2022 20:37:17 - INFO - codeparrot_training - Step 825: {'lr': 0.00020625, 'samples': 158592, 'steps': 825, 'loss/train': 1.0141403675079346} +01/26/2022 20:37:20 - INFO - codeparrot_training - Step 826: {'lr': 0.0002065, 'samples': 158784, 'steps': 826, 'loss/train': 1.0890448093414307} +01/26/2022 20:37:23 - INFO - codeparrot_training - Step 827: {'lr': 0.00020674999999999998, 'samples': 158976, 'steps': 827, 'loss/train': 1.073274314403534} +01/26/2022 20:37:26 - INFO - codeparrot_training - Step 828: {'lr': 0.000207, 'samples': 159168, 'steps': 828, 'loss/train': 1.4990364611148834} +01/26/2022 20:37:29 - INFO - codeparrot_training - Step 829: {'lr': 0.00020725, 'samples': 159360, 'steps': 829, 'loss/train': 1.4358133971691132} +01/26/2022 20:37:33 - INFO - codeparrot_training - Step 830: {'lr': 0.0002075, 'samples': 159552, 'steps': 830, 'loss/train': 0.9667293727397919} +01/26/2022 20:37:39 - INFO - codeparrot_training - Step 831: {'lr': 0.00020774999999999998, 'samples': 159744, 'steps': 831, 'loss/train': 1.0747352242469788} +01/26/2022 20:37:42 - INFO - codeparrot_training - Step 832: {'lr': 0.000208, 'samples': 159936, 'steps': 832, 'loss/train': 1.0136394202709198} +01/26/2022 20:37:45 - INFO - codeparrot_training - Step 833: {'lr': 0.00020825, 'samples': 160128, 'steps': 833, 'loss/train': 1.1674889624118805} +01/26/2022 20:37:48 - INFO - codeparrot_training - Step 834: {'lr': 0.0002085, 'samples': 160320, 'steps': 834, 'loss/train': 1.2853999435901642} +01/26/2022 20:37:51 - INFO - codeparrot_training - Step 835: {'lr': 0.00020875, 'samples': 160512, 'steps': 835, 'loss/train': 1.1943403780460358} +01/26/2022 20:37:54 - INFO - codeparrot_training - Step 836: {'lr': 0.00020899999999999998, 'samples': 160704, 'steps': 836, 'loss/train': 1.020514190196991} +01/26/2022 20:37:58 - INFO - codeparrot_training - Step 837: {'lr': 0.00020925, 'samples': 160896, 'steps': 837, 'loss/train': 0.9671913385391235} +01/26/2022 20:38:01 - INFO - codeparrot_training - Step 838: {'lr': 0.0002095, 'samples': 161088, 'steps': 838, 'loss/train': 1.0849076807498932} +01/26/2022 20:38:04 - INFO - codeparrot_training - Step 839: {'lr': 0.00020975, 'samples': 161280, 'steps': 839, 'loss/train': 1.339478462934494} +01/26/2022 20:38:08 - INFO - codeparrot_training - Step 840: {'lr': 0.00021, 'samples': 161472, 'steps': 840, 'loss/train': 0.7180408984422684} +01/26/2022 20:38:11 - INFO - codeparrot_training - Step 841: {'lr': 0.00021025, 'samples': 161664, 'steps': 841, 'loss/train': 0.9816141128540039} +01/26/2022 20:38:15 - INFO - codeparrot_training - Step 842: {'lr': 0.0002105, 'samples': 161856, 'steps': 842, 'loss/train': 0.8072361946105957} +01/26/2022 20:38:18 - INFO - codeparrot_training - Step 843: {'lr': 0.00021075, 'samples': 162048, 'steps': 843, 'loss/train': 1.1432275772094727} +01/26/2022 20:38:21 - INFO - codeparrot_training - Step 844: {'lr': 0.000211, 'samples': 162240, 'steps': 844, 'loss/train': 1.3443118929862976} +01/26/2022 20:38:24 - INFO - codeparrot_training - Step 845: {'lr': 0.00021124999999999998, 'samples': 162432, 'steps': 845, 'loss/train': 0.9823727309703827} +01/26/2022 20:38:27 - INFO - codeparrot_training - Step 846: {'lr': 0.0002115, 'samples': 162624, 'steps': 846, 'loss/train': 1.2291758358478546} +01/26/2022 20:38:30 - INFO - codeparrot_training - Step 847: {'lr': 0.00021175, 'samples': 162816, 'steps': 847, 'loss/train': 0.8374214172363281} +01/26/2022 20:38:34 - INFO - codeparrot_training - Step 848: {'lr': 0.000212, 'samples': 163008, 'steps': 848, 'loss/train': 0.9853262901306152} +01/26/2022 20:38:38 - INFO - codeparrot_training - Step 849: {'lr': 0.00021225, 'samples': 163200, 'steps': 849, 'loss/train': 0.9183481335639954} +01/26/2022 20:38:41 - INFO - codeparrot_training - Step 850: {'lr': 0.0002125, 'samples': 163392, 'steps': 850, 'loss/train': 1.2009184956550598} +01/26/2022 20:38:44 - INFO - codeparrot_training - Step 851: {'lr': 0.00021275, 'samples': 163584, 'steps': 851, 'loss/train': 0.850340723991394} +01/26/2022 20:38:47 - INFO - codeparrot_training - Step 852: {'lr': 0.000213, 'samples': 163776, 'steps': 852, 'loss/train': 0.9754958152770996} +01/26/2022 20:38:51 - INFO - codeparrot_training - Step 853: {'lr': 0.00021325, 'samples': 163968, 'steps': 853, 'loss/train': 0.9106079041957855} +01/26/2022 20:38:54 - INFO - codeparrot_training - Step 854: {'lr': 0.0002135, 'samples': 164160, 'steps': 854, 'loss/train': 1.1068985760211945} +01/26/2022 20:38:57 - INFO - codeparrot_training - Step 855: {'lr': 0.00021375, 'samples': 164352, 'steps': 855, 'loss/train': 0.9738516211509705} +01/26/2022 20:39:00 - INFO - codeparrot_training - Step 856: {'lr': 0.000214, 'samples': 164544, 'steps': 856, 'loss/train': 1.3173284232616425} +01/26/2022 20:39:03 - INFO - codeparrot_training - Step 857: {'lr': 0.00021425, 'samples': 164736, 'steps': 857, 'loss/train': 0.8615550398826599} +01/26/2022 20:39:09 - INFO - codeparrot_training - Step 858: {'lr': 0.0002145, 'samples': 164928, 'steps': 858, 'loss/train': 1.5239757299423218} +01/26/2022 20:39:12 - INFO - codeparrot_training - Step 859: {'lr': 0.00021475, 'samples': 165120, 'steps': 859, 'loss/train': 0.7371685802936554} +01/26/2022 20:39:16 - INFO - codeparrot_training - Step 860: {'lr': 0.000215, 'samples': 165312, 'steps': 860, 'loss/train': 0.7635878920555115} +01/26/2022 20:39:19 - INFO - codeparrot_training - Step 861: {'lr': 0.00021525, 'samples': 165504, 'steps': 861, 'loss/train': 0.7282658368349075} +01/26/2022 20:39:22 - INFO - codeparrot_training - Step 862: {'lr': 0.0002155, 'samples': 165696, 'steps': 862, 'loss/train': 1.1572135090827942} +01/26/2022 20:39:25 - INFO - codeparrot_training - Step 863: {'lr': 0.00021575, 'samples': 165888, 'steps': 863, 'loss/train': 1.0142411291599274} +01/26/2022 20:39:28 - INFO - codeparrot_training - Step 864: {'lr': 0.000216, 'samples': 166080, 'steps': 864, 'loss/train': 0.6775917559862137} +01/26/2022 20:39:31 - INFO - codeparrot_training - Step 865: {'lr': 0.00021625, 'samples': 166272, 'steps': 865, 'loss/train': 1.0829683542251587} +01/26/2022 20:39:34 - INFO - codeparrot_training - Step 866: {'lr': 0.0002165, 'samples': 166464, 'steps': 866, 'loss/train': 0.8288234174251556} +01/26/2022 20:39:39 - INFO - codeparrot_training - Step 867: {'lr': 0.00021675, 'samples': 166656, 'steps': 867, 'loss/train': 1.6893358826637268} +01/26/2022 20:39:42 - INFO - codeparrot_training - Step 868: {'lr': 0.00021700000000000002, 'samples': 166848, 'steps': 868, 'loss/train': 1.5192065834999084} +01/26/2022 20:39:45 - INFO - codeparrot_training - Step 869: {'lr': 0.00021725, 'samples': 167040, 'steps': 869, 'loss/train': 0.832884281873703} +01/26/2022 20:39:48 - INFO - codeparrot_training - Step 870: {'lr': 0.0002175, 'samples': 167232, 'steps': 870, 'loss/train': 0.710791289806366} +01/26/2022 20:39:51 - INFO - codeparrot_training - Step 871: {'lr': 0.00021775, 'samples': 167424, 'steps': 871, 'loss/train': 1.0109756290912628} +01/26/2022 20:39:54 - INFO - codeparrot_training - Step 872: {'lr': 0.000218, 'samples': 167616, 'steps': 872, 'loss/train': 0.9577648043632507} +01/26/2022 20:39:58 - INFO - codeparrot_training - Step 873: {'lr': 0.00021825, 'samples': 167808, 'steps': 873, 'loss/train': 0.6962565332651138} +01/26/2022 20:40:01 - INFO - codeparrot_training - Step 874: {'lr': 0.0002185, 'samples': 168000, 'steps': 874, 'loss/train': 1.0033046007156372} +01/26/2022 20:40:04 - INFO - codeparrot_training - Step 875: {'lr': 0.00021875, 'samples': 168192, 'steps': 875, 'loss/train': 0.4613918960094452} +01/26/2022 20:40:10 - INFO - codeparrot_training - Step 876: {'lr': 0.000219, 'samples': 168384, 'steps': 876, 'loss/train': 0.6006049811840057} +01/26/2022 20:40:13 - INFO - codeparrot_training - Step 877: {'lr': 0.00021925000000000002, 'samples': 168576, 'steps': 877, 'loss/train': 1.1248971819877625} +01/26/2022 20:40:16 - INFO - codeparrot_training - Step 878: {'lr': 0.0002195, 'samples': 168768, 'steps': 878, 'loss/train': 0.8051652610301971} +01/26/2022 20:40:20 - INFO - codeparrot_training - Step 879: {'lr': 0.00021975, 'samples': 168960, 'steps': 879, 'loss/train': 0.6274931877851486} +01/26/2022 20:40:23 - INFO - codeparrot_training - Step 880: {'lr': 0.00022, 'samples': 169152, 'steps': 880, 'loss/train': 1.4137959480285645} +01/26/2022 20:40:26 - INFO - codeparrot_training - Step 881: {'lr': 0.00022025000000000001, 'samples': 169344, 'steps': 881, 'loss/train': 1.2263758778572083} +01/26/2022 20:40:29 - INFO - codeparrot_training - Step 882: {'lr': 0.0002205, 'samples': 169536, 'steps': 882, 'loss/train': 0.9560001790523529} +01/26/2022 20:40:32 - INFO - codeparrot_training - Step 883: {'lr': 0.00022075, 'samples': 169728, 'steps': 883, 'loss/train': 1.1473515629768372} +01/26/2022 20:40:36 - INFO - codeparrot_training - Step 884: {'lr': 0.000221, 'samples': 169920, 'steps': 884, 'loss/train': 0.3859972804784775} +01/26/2022 20:40:40 - INFO - codeparrot_training - Step 885: {'lr': 0.00022125, 'samples': 170112, 'steps': 885, 'loss/train': 1.1466841399669647} +01/26/2022 20:40:43 - INFO - codeparrot_training - Step 886: {'lr': 0.00022150000000000002, 'samples': 170304, 'steps': 886, 'loss/train': 0.6745972484350204} +01/26/2022 20:40:46 - INFO - codeparrot_training - Step 887: {'lr': 0.00022175, 'samples': 170496, 'steps': 887, 'loss/train': 0.8258094191551208} +01/26/2022 20:40:49 - INFO - codeparrot_training - Step 888: {'lr': 0.000222, 'samples': 170688, 'steps': 888, 'loss/train': 0.4547792226076126} +01/26/2022 20:40:52 - INFO - codeparrot_training - Step 889: {'lr': 0.00022225, 'samples': 170880, 'steps': 889, 'loss/train': 1.1471035480499268} +01/26/2022 20:40:55 - INFO - codeparrot_training - Step 890: {'lr': 0.00022250000000000001, 'samples': 171072, 'steps': 890, 'loss/train': 1.7396194338798523} +01/26/2022 20:40:58 - INFO - codeparrot_training - Step 891: {'lr': 0.00022275000000000002, 'samples': 171264, 'steps': 891, 'loss/train': 0.4545985758304596} +01/26/2022 20:41:02 - INFO - codeparrot_training - Step 892: {'lr': 0.000223, 'samples': 171456, 'steps': 892, 'loss/train': 0.8459577262401581} +01/26/2022 20:41:06 - INFO - codeparrot_training - Step 893: {'lr': 0.00022325, 'samples': 171648, 'steps': 893, 'loss/train': 0.9746755957603455} +01/26/2022 20:41:09 - INFO - codeparrot_training - Step 894: {'lr': 0.0002235, 'samples': 171840, 'steps': 894, 'loss/train': 0.8435381054878235} +01/26/2022 20:41:12 - INFO - codeparrot_training - Step 895: {'lr': 0.00022375000000000002, 'samples': 172032, 'steps': 895, 'loss/train': 1.2720170617103577} +01/26/2022 20:41:15 - INFO - codeparrot_training - Step 896: {'lr': 0.000224, 'samples': 172224, 'steps': 896, 'loss/train': 0.9266642332077026} +01/26/2022 20:41:19 - INFO - codeparrot_training - Step 897: {'lr': 0.00022425, 'samples': 172416, 'steps': 897, 'loss/train': 1.1812182068824768} +01/26/2022 20:41:22 - INFO - codeparrot_training - Step 898: {'lr': 0.0002245, 'samples': 172608, 'steps': 898, 'loss/train': 0.930000901222229} +01/26/2022 20:41:25 - INFO - codeparrot_training - Step 899: {'lr': 0.00022475000000000001, 'samples': 172800, 'steps': 899, 'loss/train': 0.6613834798336029} +01/26/2022 20:41:28 - INFO - codeparrot_training - Step 900: {'lr': 0.00022500000000000002, 'samples': 172992, 'steps': 900, 'loss/train': 1.0311627388000488} +01/26/2022 20:41:31 - INFO - codeparrot_training - Step 901: {'lr': 0.00022525, 'samples': 173184, 'steps': 901, 'loss/train': 0.3564440757036209} +01/26/2022 20:41:38 - INFO - codeparrot_training - Step 902: {'lr': 0.0002255, 'samples': 173376, 'steps': 902, 'loss/train': 1.0167976319789886} +01/26/2022 20:41:41 - INFO - codeparrot_training - Step 903: {'lr': 0.00022575, 'samples': 173568, 'steps': 903, 'loss/train': 1.3098012506961823} +01/26/2022 20:41:44 - INFO - codeparrot_training - Step 904: {'lr': 0.00022600000000000002, 'samples': 173760, 'steps': 904, 'loss/train': 0.791702538728714} +01/26/2022 20:41:47 - INFO - codeparrot_training - Step 905: {'lr': 0.00022625000000000002, 'samples': 173952, 'steps': 905, 'loss/train': 0.9221445322036743} +01/26/2022 20:41:50 - INFO - codeparrot_training - Step 906: {'lr': 0.0002265, 'samples': 174144, 'steps': 906, 'loss/train': 0.8842598497867584} +01/26/2022 20:41:53 - INFO - codeparrot_training - Step 907: {'lr': 0.00022675, 'samples': 174336, 'steps': 907, 'loss/train': 1.2631148099899292} +01/26/2022 20:41:56 - INFO - codeparrot_training - Step 908: {'lr': 0.00022700000000000002, 'samples': 174528, 'steps': 908, 'loss/train': 1.1954034268856049} +01/26/2022 20:42:00 - INFO - codeparrot_training - Step 909: {'lr': 0.00022725000000000002, 'samples': 174720, 'steps': 909, 'loss/train': 1.0482499301433563} +01/26/2022 20:42:03 - INFO - codeparrot_training - Step 910: {'lr': 0.0002275, 'samples': 174912, 'steps': 910, 'loss/train': 0.7730309665203094} +01/26/2022 20:42:07 - INFO - codeparrot_training - Step 911: {'lr': 0.00022775, 'samples': 175104, 'steps': 911, 'loss/train': 0.5209553986787796} +01/26/2022 20:42:10 - INFO - codeparrot_training - Step 912: {'lr': 0.000228, 'samples': 175296, 'steps': 912, 'loss/train': 1.046157717704773} +01/26/2022 20:42:13 - INFO - codeparrot_training - Step 913: {'lr': 0.00022825000000000002, 'samples': 175488, 'steps': 913, 'loss/train': 0.655177965760231} +01/26/2022 20:42:16 - INFO - codeparrot_training - Step 914: {'lr': 0.00022850000000000002, 'samples': 175680, 'steps': 914, 'loss/train': 0.6155382245779037} +01/26/2022 20:42:20 - INFO - codeparrot_training - Step 915: {'lr': 0.00022875, 'samples': 175872, 'steps': 915, 'loss/train': 1.552494764328003} +01/26/2022 20:42:23 - INFO - codeparrot_training - Step 916: {'lr': 0.000229, 'samples': 176064, 'steps': 916, 'loss/train': 0.7812283337116241} +01/26/2022 20:42:26 - INFO - codeparrot_training - Step 917: {'lr': 0.00022925000000000002, 'samples': 176256, 'steps': 917, 'loss/train': 1.674992859363556} +01/26/2022 20:42:29 - INFO - codeparrot_training - Step 918: {'lr': 0.00022950000000000002, 'samples': 176448, 'steps': 918, 'loss/train': 0.7351734638214111} +01/26/2022 20:42:32 - INFO - codeparrot_training - Step 919: {'lr': 0.00022975000000000003, 'samples': 176640, 'steps': 919, 'loss/train': 0.8693040311336517} +01/26/2022 20:42:38 - INFO - codeparrot_training - Step 920: {'lr': 0.00023, 'samples': 176832, 'steps': 920, 'loss/train': 0.5426577776670456} +01/26/2022 20:42:41 - INFO - codeparrot_training - Step 921: {'lr': 0.00023025, 'samples': 177024, 'steps': 921, 'loss/train': 1.1854038834571838} +01/26/2022 20:42:45 - INFO - codeparrot_training - Step 922: {'lr': 0.00023050000000000002, 'samples': 177216, 'steps': 922, 'loss/train': 1.2404606938362122} +01/26/2022 20:42:48 - INFO - codeparrot_training - Step 923: {'lr': 0.00023075000000000003, 'samples': 177408, 'steps': 923, 'loss/train': 0.4229538291692734} +01/26/2022 20:42:51 - INFO - codeparrot_training - Step 924: {'lr': 0.000231, 'samples': 177600, 'steps': 924, 'loss/train': 1.0949006080627441} +01/26/2022 20:42:54 - INFO - codeparrot_training - Step 925: {'lr': 0.00023125, 'samples': 177792, 'steps': 925, 'loss/train': 0.6338553428649902} +01/26/2022 20:42:57 - INFO - codeparrot_training - Step 926: {'lr': 0.00023150000000000002, 'samples': 177984, 'steps': 926, 'loss/train': 0.7405180782079697} +01/26/2022 20:43:00 - INFO - codeparrot_training - Step 927: {'lr': 0.00023175000000000002, 'samples': 178176, 'steps': 927, 'loss/train': 1.1272001266479492} +01/26/2022 20:43:03 - INFO - codeparrot_training - Step 928: {'lr': 0.00023200000000000003, 'samples': 178368, 'steps': 928, 'loss/train': 0.3659774512052536} +01/26/2022 20:43:08 - INFO - codeparrot_training - Step 929: {'lr': 0.00023225, 'samples': 178560, 'steps': 929, 'loss/train': 0.8474507331848145} +01/26/2022 20:43:11 - INFO - codeparrot_training - Step 930: {'lr': 0.0002325, 'samples': 178752, 'steps': 930, 'loss/train': 0.7008123099803925} +01/26/2022 20:43:14 - INFO - codeparrot_training - Step 931: {'lr': 0.00023275000000000002, 'samples': 178944, 'steps': 931, 'loss/train': 0.6545947194099426} +01/26/2022 20:43:17 - INFO - codeparrot_training - Step 932: {'lr': 0.00023300000000000003, 'samples': 179136, 'steps': 932, 'loss/train': 1.0568382740020752} +01/26/2022 20:43:20 - INFO - codeparrot_training - Step 933: {'lr': 0.00023325, 'samples': 179328, 'steps': 933, 'loss/train': 0.4824586361646652} +01/26/2022 20:43:23 - INFO - codeparrot_training - Step 934: {'lr': 0.0002335, 'samples': 179520, 'steps': 934, 'loss/train': 0.3725056126713753} +01/26/2022 20:43:27 - INFO - codeparrot_training - Step 935: {'lr': 0.00023375000000000002, 'samples': 179712, 'steps': 935, 'loss/train': 0.4487442076206207} +01/26/2022 20:43:30 - INFO - codeparrot_training - Step 936: {'lr': 0.00023400000000000002, 'samples': 179904, 'steps': 936, 'loss/train': 0.9568904042243958} +01/26/2022 20:43:33 - INFO - codeparrot_training - Step 937: {'lr': 0.00023425000000000003, 'samples': 180096, 'steps': 937, 'loss/train': 1.1710163354873657} +01/26/2022 20:43:37 - INFO - codeparrot_training - Step 938: {'lr': 0.00023449999999999998, 'samples': 180288, 'steps': 938, 'loss/train': 1.0348673164844513} +01/26/2022 20:43:40 - INFO - codeparrot_training - Step 939: {'lr': 0.00023475, 'samples': 180480, 'steps': 939, 'loss/train': 1.2007386088371277} +01/26/2022 20:43:44 - INFO - codeparrot_training - Step 940: {'lr': 0.000235, 'samples': 180672, 'steps': 940, 'loss/train': 1.03763347864151} +01/26/2022 20:43:47 - INFO - codeparrot_training - Step 941: {'lr': 0.00023525, 'samples': 180864, 'steps': 941, 'loss/train': 1.4499427378177643} +01/26/2022 20:43:50 - INFO - codeparrot_training - Step 942: {'lr': 0.0002355, 'samples': 181056, 'steps': 942, 'loss/train': 1.1046107411384583} +01/26/2022 20:43:53 - INFO - codeparrot_training - Step 943: {'lr': 0.00023574999999999998, 'samples': 181248, 'steps': 943, 'loss/train': 0.8374215960502625} +01/26/2022 20:43:56 - INFO - codeparrot_training - Step 944: {'lr': 0.000236, 'samples': 181440, 'steps': 944, 'loss/train': 0.8754523694515228} +01/26/2022 20:43:59 - INFO - codeparrot_training - Step 945: {'lr': 0.00023625, 'samples': 181632, 'steps': 945, 'loss/train': 1.2402465641498566} +01/26/2022 20:44:04 - INFO - codeparrot_training - Step 946: {'lr': 0.0002365, 'samples': 181824, 'steps': 946, 'loss/train': 0.6099693328142166} +01/26/2022 20:44:07 - INFO - codeparrot_training - Step 947: {'lr': 0.00023674999999999998, 'samples': 182016, 'steps': 947, 'loss/train': 0.970650851726532} +01/26/2022 20:44:10 - INFO - codeparrot_training - Step 948: {'lr': 0.000237, 'samples': 182208, 'steps': 948, 'loss/train': 0.5941029936075211} +01/26/2022 20:44:13 - INFO - codeparrot_training - Step 949: {'lr': 0.00023725, 'samples': 182400, 'steps': 949, 'loss/train': 1.2262366712093353} +01/26/2022 20:44:16 - INFO - codeparrot_training - Step 950: {'lr': 0.0002375, 'samples': 182592, 'steps': 950, 'loss/train': 0.9913835227489471} +01/26/2022 20:44:19 - INFO - codeparrot_training - Step 951: {'lr': 0.00023775, 'samples': 182784, 'steps': 951, 'loss/train': 0.7844657599925995} +01/26/2022 20:44:23 - INFO - codeparrot_training - Step 952: {'lr': 0.00023799999999999998, 'samples': 182976, 'steps': 952, 'loss/train': 1.0873497426509857} +01/26/2022 20:44:26 - INFO - codeparrot_training - Step 953: {'lr': 0.00023825, 'samples': 183168, 'steps': 953, 'loss/train': 0.8692873120307922} +01/26/2022 20:44:29 - INFO - codeparrot_training - Step 954: {'lr': 0.0002385, 'samples': 183360, 'steps': 954, 'loss/train': 0.6407058835029602} +01/26/2022 20:44:35 - INFO - codeparrot_training - Step 955: {'lr': 0.00023875, 'samples': 183552, 'steps': 955, 'loss/train': 1.2325417399406433} +01/26/2022 20:44:38 - INFO - codeparrot_training - Step 956: {'lr': 0.00023899999999999998, 'samples': 183744, 'steps': 956, 'loss/train': 0.9815036952495575} +01/26/2022 20:44:41 - INFO - codeparrot_training - Step 957: {'lr': 0.00023925, 'samples': 183936, 'steps': 957, 'loss/train': 1.0643342435359955} +01/26/2022 20:44:45 - INFO - codeparrot_training - Step 958: {'lr': 0.0002395, 'samples': 184128, 'steps': 958, 'loss/train': 0.570918470621109} +01/26/2022 20:44:48 - INFO - codeparrot_training - Step 959: {'lr': 0.00023975, 'samples': 184320, 'steps': 959, 'loss/train': 1.0236015021800995} +01/26/2022 20:44:51 - INFO - codeparrot_training - Step 960: {'lr': 0.00024, 'samples': 184512, 'steps': 960, 'loss/train': 1.0369458496570587} +01/26/2022 20:44:54 - INFO - codeparrot_training - Step 961: {'lr': 0.00024024999999999999, 'samples': 184704, 'steps': 961, 'loss/train': 0.9108691513538361} +01/26/2022 20:44:57 - INFO - codeparrot_training - Step 962: {'lr': 0.0002405, 'samples': 184896, 'steps': 962, 'loss/train': 0.8744374215602875} +01/26/2022 20:45:00 - INFO - codeparrot_training - Step 963: {'lr': 0.00024075, 'samples': 185088, 'steps': 963, 'loss/train': 0.7000968307256699} +01/26/2022 20:45:05 - INFO - codeparrot_training - Step 964: {'lr': 0.000241, 'samples': 185280, 'steps': 964, 'loss/train': 1.119715690612793} +01/26/2022 20:45:08 - INFO - codeparrot_training - Step 965: {'lr': 0.00024125, 'samples': 185472, 'steps': 965, 'loss/train': 1.2492819428443909} +01/26/2022 20:45:11 - INFO - codeparrot_training - Step 966: {'lr': 0.0002415, 'samples': 185664, 'steps': 966, 'loss/train': 1.006849229335785} +01/26/2022 20:45:14 - INFO - codeparrot_training - Step 967: {'lr': 0.00024175, 'samples': 185856, 'steps': 967, 'loss/train': 0.8514295220375061} +01/26/2022 20:45:17 - INFO - codeparrot_training - Step 968: {'lr': 0.000242, 'samples': 186048, 'steps': 968, 'loss/train': 0.9313877820968628} +01/26/2022 20:45:20 - INFO - codeparrot_training - Step 969: {'lr': 0.00024225, 'samples': 186240, 'steps': 969, 'loss/train': 0.772581160068512} +01/26/2022 20:45:23 - INFO - codeparrot_training - Step 970: {'lr': 0.00024249999999999999, 'samples': 186432, 'steps': 970, 'loss/train': 0.7749391794204712} +01/26/2022 20:45:26 - INFO - codeparrot_training - Step 971: {'lr': 0.00024275, 'samples': 186624, 'steps': 971, 'loss/train': 1.497967153787613} +01/26/2022 20:45:30 - INFO - codeparrot_training - Step 972: {'lr': 0.000243, 'samples': 186816, 'steps': 972, 'loss/train': 0.6079481989145279} +01/26/2022 20:45:34 - INFO - codeparrot_training - Step 973: {'lr': 0.00024325, 'samples': 187008, 'steps': 973, 'loss/train': 1.072899878025055} +01/26/2022 20:45:37 - INFO - codeparrot_training - Step 974: {'lr': 0.0002435, 'samples': 187200, 'steps': 974, 'loss/train': 0.8720228970050812} +01/26/2022 20:45:40 - INFO - codeparrot_training - Step 975: {'lr': 0.00024375, 'samples': 187392, 'steps': 975, 'loss/train': 0.4871988594532013} +01/26/2022 20:45:44 - INFO - codeparrot_training - Step 976: {'lr': 0.000244, 'samples': 187584, 'steps': 976, 'loss/train': 0.7950179278850555} +01/26/2022 20:45:47 - INFO - codeparrot_training - Step 977: {'lr': 0.00024425, 'samples': 187776, 'steps': 977, 'loss/train': 1.214974969625473} +01/26/2022 20:45:50 - INFO - codeparrot_training - Step 978: {'lr': 0.0002445, 'samples': 187968, 'steps': 978, 'loss/train': 0.663908064365387} +01/26/2022 20:45:53 - INFO - codeparrot_training - Step 979: {'lr': 0.00024475, 'samples': 188160, 'steps': 979, 'loss/train': 0.6878142356872559} +01/26/2022 20:45:56 - INFO - codeparrot_training - Step 980: {'lr': 0.000245, 'samples': 188352, 'steps': 980, 'loss/train': 0.900773674249649} +01/26/2022 20:46:02 - INFO - codeparrot_training - Step 981: {'lr': 0.00024525, 'samples': 188544, 'steps': 981, 'loss/train': 0.9617729187011719} +01/26/2022 20:46:05 - INFO - codeparrot_training - Step 982: {'lr': 0.0002455, 'samples': 188736, 'steps': 982, 'loss/train': 0.7470361590385437} +01/26/2022 20:46:09 - INFO - codeparrot_training - Step 983: {'lr': 0.00024575, 'samples': 188928, 'steps': 983, 'loss/train': 1.06081885099411} +01/26/2022 20:46:12 - INFO - codeparrot_training - Step 984: {'lr': 0.000246, 'samples': 189120, 'steps': 984, 'loss/train': 0.6491813957691193} +01/26/2022 20:46:15 - INFO - codeparrot_training - Step 985: {'lr': 0.00024625, 'samples': 189312, 'steps': 985, 'loss/train': 0.8732985556125641} +01/26/2022 20:46:18 - INFO - codeparrot_training - Step 986: {'lr': 0.00024650000000000003, 'samples': 189504, 'steps': 986, 'loss/train': 0.8752740919589996} +01/26/2022 20:46:21 - INFO - codeparrot_training - Step 987: {'lr': 0.00024675, 'samples': 189696, 'steps': 987, 'loss/train': 1.5399380922317505} +01/26/2022 20:46:24 - INFO - codeparrot_training - Step 988: {'lr': 0.000247, 'samples': 189888, 'steps': 988, 'loss/train': 0.9013247787952423} +01/26/2022 20:46:27 - INFO - codeparrot_training - Step 989: {'lr': 0.00024725, 'samples': 190080, 'steps': 989, 'loss/train': 1.1562097370624542} +01/26/2022 20:46:32 - INFO - codeparrot_training - Step 990: {'lr': 0.0002475, 'samples': 190272, 'steps': 990, 'loss/train': 0.6458780318498611} +01/26/2022 20:46:35 - INFO - codeparrot_training - Step 991: {'lr': 0.00024775, 'samples': 190464, 'steps': 991, 'loss/train': 1.0805221796035767} +01/26/2022 20:46:38 - INFO - codeparrot_training - Step 992: {'lr': 0.000248, 'samples': 190656, 'steps': 992, 'loss/train': 0.8056624531745911} +01/26/2022 20:46:41 - INFO - codeparrot_training - Step 993: {'lr': 0.00024825, 'samples': 190848, 'steps': 993, 'loss/train': 0.6836653500795364} +01/26/2022 20:46:44 - INFO - codeparrot_training - Step 994: {'lr': 0.0002485, 'samples': 191040, 'steps': 994, 'loss/train': 0.6399315297603607} +01/26/2022 20:46:48 - INFO - codeparrot_training - Step 995: {'lr': 0.00024875, 'samples': 191232, 'steps': 995, 'loss/train': 0.6765156090259552} +01/26/2022 20:46:51 - INFO - codeparrot_training - Step 996: {'lr': 0.000249, 'samples': 191424, 'steps': 996, 'loss/train': 1.0690458118915558} +01/26/2022 20:46:54 - INFO - codeparrot_training - Step 997: {'lr': 0.00024925, 'samples': 191616, 'steps': 997, 'loss/train': 0.9835208058357239} +01/26/2022 20:46:57 - INFO - codeparrot_training - Step 998: {'lr': 0.0002495, 'samples': 191808, 'steps': 998, 'loss/train': 1.2982279658317566} +01/26/2022 20:47:01 - INFO - codeparrot_training - Step 999: {'lr': 0.00024975, 'samples': 192000, 'steps': 999, 'loss/train': 1.1632397174835205} +01/26/2022 20:47:04 - INFO - codeparrot_training - Step 1000: {'lr': 0.00025, 'samples': 192192, 'steps': 1000, 'loss/train': 1.7013718485832214} +01/26/2022 20:47:08 - INFO - codeparrot_training - Step 1001: {'lr': 0.00025025, 'samples': 192384, 'steps': 1001, 'loss/train': 0.9946185350418091} +01/26/2022 20:47:11 - INFO - codeparrot_training - Step 1002: {'lr': 0.0002505, 'samples': 192576, 'steps': 1002, 'loss/train': 1.195780634880066} +01/26/2022 20:47:14 - INFO - codeparrot_training - Step 1003: {'lr': 0.00025075, 'samples': 192768, 'steps': 1003, 'loss/train': 0.4232182949781418} +01/26/2022 20:47:17 - INFO - codeparrot_training - Step 1004: {'lr': 0.00025100000000000003, 'samples': 192960, 'steps': 1004, 'loss/train': 0.876092255115509} +01/26/2022 20:47:20 - INFO - codeparrot_training - Step 1005: {'lr': 0.00025124999999999995, 'samples': 193152, 'steps': 1005, 'loss/train': 0.7324695289134979} +01/26/2022 20:47:23 - INFO - codeparrot_training - Step 1006: {'lr': 0.0002515, 'samples': 193344, 'steps': 1006, 'loss/train': 0.9005085825920105} +01/26/2022 20:47:26 - INFO - codeparrot_training - Step 1007: {'lr': 0.00025174999999999997, 'samples': 193536, 'steps': 1007, 'loss/train': 0.7350870072841644} +01/26/2022 20:47:32 - INFO - codeparrot_training - Step 1008: {'lr': 0.000252, 'samples': 193728, 'steps': 1008, 'loss/train': 0.7800723016262054} +01/26/2022 20:47:36 - INFO - codeparrot_training - Step 1009: {'lr': 0.00025225, 'samples': 193920, 'steps': 1009, 'loss/train': 0.8280471861362457} +01/26/2022 20:47:39 - INFO - codeparrot_training - Step 1010: {'lr': 0.0002525, 'samples': 194112, 'steps': 1010, 'loss/train': 1.1435025036334991} +01/26/2022 20:47:42 - INFO - codeparrot_training - Step 1011: {'lr': 0.00025275, 'samples': 194304, 'steps': 1011, 'loss/train': 0.8306262195110321} +01/26/2022 20:47:45 - INFO - codeparrot_training - Step 1012: {'lr': 0.000253, 'samples': 194496, 'steps': 1012, 'loss/train': 0.9594416320323944} +01/26/2022 20:47:48 - INFO - codeparrot_training - Step 1013: {'lr': 0.00025325, 'samples': 194688, 'steps': 1013, 'loss/train': 0.9443064630031586} +01/26/2022 20:47:52 - INFO - codeparrot_training - Step 1014: {'lr': 0.0002535, 'samples': 194880, 'steps': 1014, 'loss/train': 0.9492665827274323} +01/26/2022 20:47:55 - INFO - codeparrot_training - Step 1015: {'lr': 0.00025374999999999996, 'samples': 195072, 'steps': 1015, 'loss/train': 1.1060684323310852} +01/26/2022 20:47:58 - INFO - codeparrot_training - Step 1016: {'lr': 0.000254, 'samples': 195264, 'steps': 1016, 'loss/train': 1.4302572906017303} +01/26/2022 20:48:02 - INFO - codeparrot_training - Step 1017: {'lr': 0.00025425, 'samples': 195456, 'steps': 1017, 'loss/train': 1.0134314596652985} +01/26/2022 20:48:05 - INFO - codeparrot_training - Step 1018: {'lr': 0.0002545, 'samples': 195648, 'steps': 1018, 'loss/train': 1.147715002298355} +01/26/2022 20:48:09 - INFO - codeparrot_training - Step 1019: {'lr': 0.00025475, 'samples': 195840, 'steps': 1019, 'loss/train': 0.834953248500824} +01/26/2022 20:48:12 - INFO - codeparrot_training - Step 1020: {'lr': 0.000255, 'samples': 196032, 'steps': 1020, 'loss/train': 0.7808264493942261} +01/26/2022 20:48:15 - INFO - codeparrot_training - Step 1021: {'lr': 0.00025525, 'samples': 196224, 'steps': 1021, 'loss/train': 0.19913220405578613} +01/26/2022 20:48:18 - INFO - codeparrot_training - Step 1022: {'lr': 0.00025550000000000003, 'samples': 196416, 'steps': 1022, 'loss/train': 1.3839197158813477} +01/26/2022 20:48:21 - INFO - codeparrot_training - Step 1023: {'lr': 0.00025575, 'samples': 196608, 'steps': 1023, 'loss/train': 0.697216808795929} +01/26/2022 20:48:24 - INFO - codeparrot_training - Step 1024: {'lr': 0.000256, 'samples': 196800, 'steps': 1024, 'loss/train': 0.7201445996761322} +01/26/2022 20:48:30 - INFO - codeparrot_training - Step 1025: {'lr': 0.00025624999999999997, 'samples': 196992, 'steps': 1025, 'loss/train': 0.802868127822876} +01/26/2022 20:48:33 - INFO - codeparrot_training - Step 1026: {'lr': 0.0002565, 'samples': 197184, 'steps': 1026, 'loss/train': 0.8758995831012726} +01/26/2022 20:48:37 - INFO - codeparrot_training - Step 1027: {'lr': 0.00025675, 'samples': 197376, 'steps': 1027, 'loss/train': 0.8112261593341827} +01/26/2022 20:48:40 - INFO - codeparrot_training - Step 1028: {'lr': 0.000257, 'samples': 197568, 'steps': 1028, 'loss/train': 1.1565943658351898} +01/26/2022 20:48:43 - INFO - codeparrot_training - Step 1029: {'lr': 0.00025725, 'samples': 197760, 'steps': 1029, 'loss/train': 1.0054325759410858} +01/26/2022 20:48:46 - INFO - codeparrot_training - Step 1030: {'lr': 0.0002575, 'samples': 197952, 'steps': 1030, 'loss/train': 1.1029196977615356} +01/26/2022 20:48:49 - INFO - codeparrot_training - Step 1031: {'lr': 0.00025775, 'samples': 198144, 'steps': 1031, 'loss/train': 0.7207678556442261} +01/26/2022 20:48:52 - INFO - codeparrot_training - Step 1032: {'lr': 0.00025800000000000004, 'samples': 198336, 'steps': 1032, 'loss/train': 0.9148613512516022} +01/26/2022 20:48:55 - INFO - codeparrot_training - Step 1033: {'lr': 0.00025824999999999996, 'samples': 198528, 'steps': 1033, 'loss/train': 0.9676394462585449} +01/26/2022 20:49:00 - INFO - codeparrot_training - Step 1034: {'lr': 0.0002585, 'samples': 198720, 'steps': 1034, 'loss/train': 0.6324540227651596} +01/26/2022 20:49:03 - INFO - codeparrot_training - Step 1035: {'lr': 0.00025875, 'samples': 198912, 'steps': 1035, 'loss/train': 0.6709214597940445} +01/26/2022 20:49:06 - INFO - codeparrot_training - Step 1036: {'lr': 0.000259, 'samples': 199104, 'steps': 1036, 'loss/train': 0.915786623954773} +01/26/2022 20:49:09 - INFO - codeparrot_training - Step 1037: {'lr': 0.00025925, 'samples': 199296, 'steps': 1037, 'loss/train': 0.8128197491168976} +01/26/2022 20:49:12 - INFO - codeparrot_training - Step 1038: {'lr': 0.0002595, 'samples': 199488, 'steps': 1038, 'loss/train': 0.7403267025947571} +01/26/2022 20:49:16 - INFO - codeparrot_training - Step 1039: {'lr': 0.00025975, 'samples': 199680, 'steps': 1039, 'loss/train': 0.88175368309021} +01/26/2022 20:49:19 - INFO - codeparrot_training - Step 1040: {'lr': 0.00026000000000000003, 'samples': 199872, 'steps': 1040, 'loss/train': 0.9358507096767426} +01/26/2022 20:49:22 - INFO - codeparrot_training - Step 1041: {'lr': 0.00026025, 'samples': 200064, 'steps': 1041, 'loss/train': 0.9998910427093506} +01/26/2022 20:49:25 - INFO - codeparrot_training - Step 1042: {'lr': 0.0002605, 'samples': 200256, 'steps': 1042, 'loss/train': 0.5532927811145782} +01/26/2022 20:49:30 - INFO - codeparrot_training - Step 1043: {'lr': 0.00026074999999999997, 'samples': 200448, 'steps': 1043, 'loss/train': 1.051554411649704} +01/26/2022 20:49:33 - INFO - codeparrot_training - Step 1044: {'lr': 0.000261, 'samples': 200640, 'steps': 1044, 'loss/train': 0.8074525594711304} +01/26/2022 20:49:36 - INFO - codeparrot_training - Step 1045: {'lr': 0.00026125, 'samples': 200832, 'steps': 1045, 'loss/train': 0.8834658265113831} +01/26/2022 20:49:39 - INFO - codeparrot_training - Step 1046: {'lr': 0.0002615, 'samples': 201024, 'steps': 1046, 'loss/train': 0.7426712214946747} +01/26/2022 20:49:42 - INFO - codeparrot_training - Step 1047: {'lr': 0.00026175, 'samples': 201216, 'steps': 1047, 'loss/train': 1.4034993946552277} +01/26/2022 20:49:45 - INFO - codeparrot_training - Step 1048: {'lr': 0.000262, 'samples': 201408, 'steps': 1048, 'loss/train': 0.7991145551204681} +01/26/2022 20:49:48 - INFO - codeparrot_training - Step 1049: {'lr': 0.00026225, 'samples': 201600, 'steps': 1049, 'loss/train': 1.1421080231666565} +01/26/2022 20:49:52 - INFO - codeparrot_training - Step 1050: {'lr': 0.00026250000000000004, 'samples': 201792, 'steps': 1050, 'loss/train': 1.2328433096408844} +01/26/2022 20:49:56 - INFO - codeparrot_training - Step 1051: {'lr': 0.00026274999999999996, 'samples': 201984, 'steps': 1051, 'loss/train': 1.2644303441047668} +01/26/2022 20:49:59 - INFO - codeparrot_training - Step 1052: {'lr': 0.000263, 'samples': 202176, 'steps': 1052, 'loss/train': 1.3665103018283844} +01/26/2022 20:50:02 - INFO - codeparrot_training - Step 1053: {'lr': 0.00026325, 'samples': 202368, 'steps': 1053, 'loss/train': 1.5380902290344238} +01/26/2022 20:50:06 - INFO - codeparrot_training - Step 1054: {'lr': 0.0002635, 'samples': 202560, 'steps': 1054, 'loss/train': 0.954221248626709} +01/26/2022 20:50:09 - INFO - codeparrot_training - Step 1055: {'lr': 0.00026375, 'samples': 202752, 'steps': 1055, 'loss/train': 1.0929900705814362} +01/26/2022 20:50:12 - INFO - codeparrot_training - Step 1056: {'lr': 0.000264, 'samples': 202944, 'steps': 1056, 'loss/train': 0.8090872764587402} +01/26/2022 20:50:15 - INFO - codeparrot_training - Step 1057: {'lr': 0.00026425, 'samples': 203136, 'steps': 1057, 'loss/train': 0.904187947511673} +01/26/2022 20:50:18 - INFO - codeparrot_training - Step 1058: {'lr': 0.00026450000000000003, 'samples': 203328, 'steps': 1058, 'loss/train': 1.3563102185726166} +01/26/2022 20:50:21 - INFO - codeparrot_training - Step 1059: {'lr': 0.00026475, 'samples': 203520, 'steps': 1059, 'loss/train': 0.8256677091121674} +01/26/2022 20:50:28 - INFO - codeparrot_training - Step 1060: {'lr': 0.00026500000000000004, 'samples': 203712, 'steps': 1060, 'loss/train': 1.652735710144043} +01/26/2022 20:50:31 - INFO - codeparrot_training - Step 1061: {'lr': 0.00026524999999999997, 'samples': 203904, 'steps': 1061, 'loss/train': 1.290626049041748} +01/26/2022 20:50:34 - INFO - codeparrot_training - Step 1062: {'lr': 0.0002655, 'samples': 204096, 'steps': 1062, 'loss/train': 0.7257387936115265} +01/26/2022 20:50:37 - INFO - codeparrot_training - Step 1063: {'lr': 0.00026575, 'samples': 204288, 'steps': 1063, 'loss/train': 1.090696781873703} +01/26/2022 20:50:40 - INFO - codeparrot_training - Step 1064: {'lr': 0.000266, 'samples': 204480, 'steps': 1064, 'loss/train': 1.3471647799015045} +01/26/2022 20:50:43 - INFO - codeparrot_training - Step 1065: {'lr': 0.00026625, 'samples': 204672, 'steps': 1065, 'loss/train': 1.224743664264679} +01/26/2022 20:50:46 - INFO - codeparrot_training - Step 1066: {'lr': 0.0002665, 'samples': 204864, 'steps': 1066, 'loss/train': 0.9322084486484528} +01/26/2022 20:50:50 - INFO - codeparrot_training - Step 1067: {'lr': 0.00026675, 'samples': 205056, 'steps': 1067, 'loss/train': 1.0117403268814087} +01/26/2022 20:50:53 - INFO - codeparrot_training - Step 1068: {'lr': 0.00026700000000000004, 'samples': 205248, 'steps': 1068, 'loss/train': 1.013206422328949} +01/26/2022 20:50:57 - INFO - codeparrot_training - Step 1069: {'lr': 0.00026725, 'samples': 205440, 'steps': 1069, 'loss/train': 1.4741791784763336} +01/26/2022 20:51:00 - INFO - codeparrot_training - Step 1070: {'lr': 0.0002675, 'samples': 205632, 'steps': 1070, 'loss/train': 1.0951833128929138} +01/26/2022 20:51:03 - INFO - codeparrot_training - Step 1071: {'lr': 0.00026775, 'samples': 205824, 'steps': 1071, 'loss/train': 0.48377394676208496} +01/26/2022 20:51:07 - INFO - codeparrot_training - Step 1072: {'lr': 0.000268, 'samples': 206016, 'steps': 1072, 'loss/train': 0.8418543040752411} +01/26/2022 20:51:10 - INFO - codeparrot_training - Step 1073: {'lr': 0.00026825, 'samples': 206208, 'steps': 1073, 'loss/train': 0.6488857716321945} +01/26/2022 20:51:13 - INFO - codeparrot_training - Step 1074: {'lr': 0.0002685, 'samples': 206400, 'steps': 1074, 'loss/train': 0.3778975009918213} +01/26/2022 20:51:16 - INFO - codeparrot_training - Step 1075: {'lr': 0.00026875, 'samples': 206592, 'steps': 1075, 'loss/train': 1.105844110250473} +01/26/2022 20:51:19 - INFO - codeparrot_training - Step 1076: {'lr': 0.00026900000000000003, 'samples': 206784, 'steps': 1076, 'loss/train': 0.9468898773193359} +01/26/2022 20:51:22 - INFO - codeparrot_training - Step 1077: {'lr': 0.00026925, 'samples': 206976, 'steps': 1077, 'loss/train': 0.98614302277565} +01/26/2022 20:51:27 - INFO - codeparrot_training - Step 1078: {'lr': 0.00026950000000000005, 'samples': 207168, 'steps': 1078, 'loss/train': 0.49622131884098053} +01/26/2022 20:51:30 - INFO - codeparrot_training - Step 1079: {'lr': 0.00026974999999999997, 'samples': 207360, 'steps': 1079, 'loss/train': 0.7102539986371994} +01/26/2022 20:51:33 - INFO - codeparrot_training - Step 1080: {'lr': 0.00027, 'samples': 207552, 'steps': 1080, 'loss/train': 1.3239697515964508} +01/26/2022 20:51:36 - INFO - codeparrot_training - Step 1081: {'lr': 0.00027025, 'samples': 207744, 'steps': 1081, 'loss/train': 0.71196149289608} +01/26/2022 20:51:39 - INFO - codeparrot_training - Step 1082: {'lr': 0.0002705, 'samples': 207936, 'steps': 1082, 'loss/train': 1.1662839353084564} +01/26/2022 20:51:42 - INFO - codeparrot_training - Step 1083: {'lr': 0.00027075, 'samples': 208128, 'steps': 1083, 'loss/train': 0.24068096280097961} +01/26/2022 20:51:45 - INFO - codeparrot_training - Step 1084: {'lr': 0.00027100000000000003, 'samples': 208320, 'steps': 1084, 'loss/train': 0.9668920934200287} +01/26/2022 20:51:48 - INFO - codeparrot_training - Step 1085: {'lr': 0.00027125, 'samples': 208512, 'steps': 1085, 'loss/train': 0.518968865275383} +01/26/2022 20:51:52 - INFO - codeparrot_training - Step 1086: {'lr': 0.00027150000000000004, 'samples': 208704, 'steps': 1086, 'loss/train': 0.5857425928115845} +01/26/2022 20:51:58 - INFO - codeparrot_training - Step 1087: {'lr': 0.00027175, 'samples': 208896, 'steps': 1087, 'loss/train': 1.2705464959144592} +01/26/2022 20:52:01 - INFO - codeparrot_training - Step 1088: {'lr': 0.00027200000000000005, 'samples': 209088, 'steps': 1088, 'loss/train': 1.142454832792282} +01/26/2022 20:52:04 - INFO - codeparrot_training - Step 1089: {'lr': 0.00027225, 'samples': 209280, 'steps': 1089, 'loss/train': 1.1075121760368347} +01/26/2022 20:52:07 - INFO - codeparrot_training - Step 1090: {'lr': 0.0002725, 'samples': 209472, 'steps': 1090, 'loss/train': 0.8761851489543915} +01/26/2022 20:52:10 - INFO - codeparrot_training - Step 1091: {'lr': 0.00027275, 'samples': 209664, 'steps': 1091, 'loss/train': 1.0561780035495758} +01/26/2022 20:52:13 - INFO - codeparrot_training - Step 1092: {'lr': 0.000273, 'samples': 209856, 'steps': 1092, 'loss/train': 1.1019476652145386} +01/26/2022 20:52:17 - INFO - codeparrot_training - Step 1093: {'lr': 0.00027325, 'samples': 210048, 'steps': 1093, 'loss/train': 0.9809303283691406} +01/26/2022 20:52:20 - INFO - codeparrot_training - Step 1094: {'lr': 0.00027350000000000003, 'samples': 210240, 'steps': 1094, 'loss/train': 0.5167847871780396} +01/26/2022 20:52:23 - INFO - codeparrot_training - Step 1095: {'lr': 0.00027375, 'samples': 210432, 'steps': 1095, 'loss/train': 0.6833093762397766} +01/26/2022 20:52:28 - INFO - codeparrot_training - Step 1096: {'lr': 0.00027400000000000005, 'samples': 210624, 'steps': 1096, 'loss/train': 0.26570820808410645} +01/26/2022 20:52:31 - INFO - codeparrot_training - Step 1097: {'lr': 0.00027425, 'samples': 210816, 'steps': 1097, 'loss/train': 0.3386870250105858} +01/26/2022 20:52:34 - INFO - codeparrot_training - Step 1098: {'lr': 0.0002745, 'samples': 211008, 'steps': 1098, 'loss/train': 0.688764363527298} +01/26/2022 20:52:37 - INFO - codeparrot_training - Step 1099: {'lr': 0.00027475, 'samples': 211200, 'steps': 1099, 'loss/train': 1.2169432640075684} +01/26/2022 20:52:40 - INFO - codeparrot_training - Step 1100: {'lr': 0.000275, 'samples': 211392, 'steps': 1100, 'loss/train': 0.256294347345829} +01/26/2022 20:52:43 - INFO - codeparrot_training - Step 1101: {'lr': 0.00027525, 'samples': 211584, 'steps': 1101, 'loss/train': 1.6608785390853882} +01/26/2022 20:52:47 - INFO - codeparrot_training - Step 1102: {'lr': 0.00027550000000000003, 'samples': 211776, 'steps': 1102, 'loss/train': 0.7106503844261169} +01/26/2022 20:52:50 - INFO - codeparrot_training - Step 1103: {'lr': 0.00027575, 'samples': 211968, 'steps': 1103, 'loss/train': 0.49972186982631683} +01/26/2022 20:52:56 - INFO - codeparrot_training - Step 1104: {'lr': 0.00027600000000000004, 'samples': 212160, 'steps': 1104, 'loss/train': 0.6209851205348969} +01/26/2022 20:52:59 - INFO - codeparrot_training - Step 1105: {'lr': 0.00027625, 'samples': 212352, 'steps': 1105, 'loss/train': 1.2214830815792084} +01/26/2022 20:53:02 - INFO - codeparrot_training - Step 1106: {'lr': 0.00027650000000000005, 'samples': 212544, 'steps': 1106, 'loss/train': 1.0984822511672974} +01/26/2022 20:53:05 - INFO - codeparrot_training - Step 1107: {'lr': 0.00027675, 'samples': 212736, 'steps': 1107, 'loss/train': 1.1388744413852692} +01/26/2022 20:53:09 - INFO - codeparrot_training - Step 1108: {'lr': 0.000277, 'samples': 212928, 'steps': 1108, 'loss/train': 0.9910741746425629} +01/26/2022 20:53:12 - INFO - codeparrot_training - Step 1109: {'lr': 0.00027725, 'samples': 213120, 'steps': 1109, 'loss/train': 0.3422721326351166} +01/26/2022 20:53:15 - INFO - codeparrot_training - Step 1110: {'lr': 0.0002775, 'samples': 213312, 'steps': 1110, 'loss/train': 0.6875036358833313} +01/26/2022 20:53:18 - INFO - codeparrot_training - Step 1111: {'lr': 0.00027775, 'samples': 213504, 'steps': 1111, 'loss/train': 0.35392677783966064} +01/26/2022 20:53:21 - INFO - codeparrot_training - Step 1112: {'lr': 0.00027800000000000004, 'samples': 213696, 'steps': 1112, 'loss/train': 1.1671156883239746} +01/26/2022 20:53:25 - INFO - codeparrot_training - Step 1113: {'lr': 0.00027825, 'samples': 213888, 'steps': 1113, 'loss/train': 0.35262975096702576} +01/26/2022 20:53:29 - INFO - codeparrot_training - Step 1114: {'lr': 0.00027850000000000005, 'samples': 214080, 'steps': 1114, 'loss/train': 1.2366746664047241} +01/26/2022 20:53:32 - INFO - codeparrot_training - Step 1115: {'lr': 0.00027875, 'samples': 214272, 'steps': 1115, 'loss/train': 0.650038942694664} +01/26/2022 20:53:35 - INFO - codeparrot_training - Step 1116: {'lr': 0.000279, 'samples': 214464, 'steps': 1116, 'loss/train': 0.6133878976106644} +01/26/2022 20:53:38 - INFO - codeparrot_training - Step 1117: {'lr': 0.00027925, 'samples': 214656, 'steps': 1117, 'loss/train': 1.2080609500408173} +01/26/2022 20:53:41 - INFO - codeparrot_training - Step 1118: {'lr': 0.0002795, 'samples': 214848, 'steps': 1118, 'loss/train': 1.3723876476287842} +01/26/2022 20:53:44 - INFO - codeparrot_training - Step 1119: {'lr': 0.00027975, 'samples': 215040, 'steps': 1119, 'loss/train': 0.9902322292327881} +01/26/2022 20:53:47 - INFO - codeparrot_training - Step 1120: {'lr': 0.00028000000000000003, 'samples': 215232, 'steps': 1120, 'loss/train': 0.8555106818675995} +01/26/2022 20:53:51 - INFO - codeparrot_training - Step 1121: {'lr': 0.00028025, 'samples': 215424, 'steps': 1121, 'loss/train': 0.6759006679058075} +01/26/2022 20:53:55 - INFO - codeparrot_training - Step 1122: {'lr': 0.00028050000000000004, 'samples': 215616, 'steps': 1122, 'loss/train': 0.8378792703151703} +01/26/2022 20:53:58 - INFO - codeparrot_training - Step 1123: {'lr': 0.00028075, 'samples': 215808, 'steps': 1123, 'loss/train': 1.138431340456009} +01/26/2022 20:54:01 - INFO - codeparrot_training - Step 1124: {'lr': 0.00028100000000000005, 'samples': 216000, 'steps': 1124, 'loss/train': 1.1794633269309998} +01/26/2022 20:54:04 - INFO - codeparrot_training - Step 1125: {'lr': 0.00028125000000000003, 'samples': 216192, 'steps': 1125, 'loss/train': 0.6962398141622543} +01/26/2022 20:54:07 - INFO - codeparrot_training - Step 1126: {'lr': 0.00028149999999999996, 'samples': 216384, 'steps': 1126, 'loss/train': 1.3031834363937378} +01/26/2022 20:54:11 - INFO - codeparrot_training - Step 1127: {'lr': 0.00028175, 'samples': 216576, 'steps': 1127, 'loss/train': 0.5448819547891617} +01/26/2022 20:54:14 - INFO - codeparrot_training - Step 1128: {'lr': 0.00028199999999999997, 'samples': 216768, 'steps': 1128, 'loss/train': 0.7928208410739899} +01/26/2022 20:54:17 - INFO - codeparrot_training - Step 1129: {'lr': 0.00028225, 'samples': 216960, 'steps': 1129, 'loss/train': 0.8456499874591827} +01/26/2022 20:54:20 - INFO - codeparrot_training - Step 1130: {'lr': 0.0002825, 'samples': 217152, 'steps': 1130, 'loss/train': 0.6639377474784851} +01/26/2022 20:54:26 - INFO - codeparrot_training - Step 1131: {'lr': 0.00028275, 'samples': 217344, 'steps': 1131, 'loss/train': 1.1020688116550446} +01/26/2022 20:54:29 - INFO - codeparrot_training - Step 1132: {'lr': 0.000283, 'samples': 217536, 'steps': 1132, 'loss/train': 4.993828296661377} +01/26/2022 20:54:32 - INFO - codeparrot_training - Step 1133: {'lr': 0.00028325000000000003, 'samples': 217728, 'steps': 1133, 'loss/train': 0.9387196898460388} +01/26/2022 20:54:35 - INFO - codeparrot_training - Step 1134: {'lr': 0.0002835, 'samples': 217920, 'steps': 1134, 'loss/train': 0.9615424275398254} +01/26/2022 20:54:39 - INFO - codeparrot_training - Step 1135: {'lr': 0.00028375, 'samples': 218112, 'steps': 1135, 'loss/train': 1.0674448907375336} +01/26/2022 20:54:42 - INFO - codeparrot_training - Step 1136: {'lr': 0.00028399999999999996, 'samples': 218304, 'steps': 1136, 'loss/train': 1.068463146686554} +01/26/2022 20:54:45 - INFO - codeparrot_training - Step 1137: {'lr': 0.00028425, 'samples': 218496, 'steps': 1137, 'loss/train': 0.6541077196598053} +01/26/2022 20:54:48 - INFO - codeparrot_training - Step 1138: {'lr': 0.0002845, 'samples': 218688, 'steps': 1138, 'loss/train': 0.9804241955280304} +01/26/2022 20:54:51 - INFO - codeparrot_training - Step 1139: {'lr': 0.00028475, 'samples': 218880, 'steps': 1139, 'loss/train': 0.3464767411351204} +01/26/2022 20:54:56 - INFO - codeparrot_training - Step 1140: {'lr': 0.000285, 'samples': 219072, 'steps': 1140, 'loss/train': 1.2621967792510986} +01/26/2022 20:54:59 - INFO - codeparrot_training - Step 1141: {'lr': 0.00028525, 'samples': 219264, 'steps': 1141, 'loss/train': 0.7673389613628387} +01/26/2022 20:55:02 - INFO - codeparrot_training - Step 1142: {'lr': 0.0002855, 'samples': 219456, 'steps': 1142, 'loss/train': 1.1267178654670715} +01/26/2022 20:55:05 - INFO - codeparrot_training - Step 1143: {'lr': 0.00028575000000000003, 'samples': 219648, 'steps': 1143, 'loss/train': 0.8958519995212555} +01/26/2022 20:55:08 - INFO - codeparrot_training - Step 1144: {'lr': 0.00028599999999999996, 'samples': 219840, 'steps': 1144, 'loss/train': 2.1498432755470276} +01/26/2022 20:55:11 - INFO - codeparrot_training - Step 1145: {'lr': 0.00028625, 'samples': 220032, 'steps': 1145, 'loss/train': 1.0694282948970795} +01/26/2022 20:55:14 - INFO - codeparrot_training - Step 1146: {'lr': 0.00028649999999999997, 'samples': 220224, 'steps': 1146, 'loss/train': 1.1303611099720001} +01/26/2022 20:55:17 - INFO - codeparrot_training - Step 1147: {'lr': 0.00028675, 'samples': 220416, 'steps': 1147, 'loss/train': 0.9350847601890564} +01/26/2022 20:55:21 - INFO - codeparrot_training - Step 1148: {'lr': 0.000287, 'samples': 220608, 'steps': 1148, 'loss/train': 1.5029119849205017} +01/26/2022 20:55:25 - INFO - codeparrot_training - Step 1149: {'lr': 0.00028725, 'samples': 220800, 'steps': 1149, 'loss/train': 0.7740932106971741} +01/26/2022 20:55:28 - INFO - codeparrot_training - Step 1150: {'lr': 0.0002875, 'samples': 220992, 'steps': 1150, 'loss/train': 0.8809188008308411} +01/26/2022 20:55:31 - INFO - codeparrot_training - Step 1151: {'lr': 0.00028775000000000003, 'samples': 221184, 'steps': 1151, 'loss/train': 1.3801161646842957} +01/26/2022 20:55:35 - INFO - codeparrot_training - Step 1152: {'lr': 0.000288, 'samples': 221376, 'steps': 1152, 'loss/train': 0.6021814048290253} +01/26/2022 20:55:38 - INFO - codeparrot_training - Step 1153: {'lr': 0.00028825, 'samples': 221568, 'steps': 1153, 'loss/train': 0.8896468877792358} +01/26/2022 20:55:41 - INFO - codeparrot_training - Step 1154: {'lr': 0.00028849999999999997, 'samples': 221760, 'steps': 1154, 'loss/train': 1.0600275099277496} +01/26/2022 20:55:44 - INFO - codeparrot_training - Step 1155: {'lr': 0.00028875, 'samples': 221952, 'steps': 1155, 'loss/train': 1.1250623166561127} +01/26/2022 20:55:47 - INFO - codeparrot_training - Step 1156: {'lr': 0.000289, 'samples': 222144, 'steps': 1156, 'loss/train': 0.5853099524974823} +01/26/2022 20:55:54 - INFO - codeparrot_training - Step 1157: {'lr': 0.00028925, 'samples': 222336, 'steps': 1157, 'loss/train': 0.709986001253128} +01/26/2022 20:55:57 - INFO - codeparrot_training - Step 1158: {'lr': 0.0002895, 'samples': 222528, 'steps': 1158, 'loss/train': 0.5911504179239273} +01/26/2022 20:56:00 - INFO - codeparrot_training - Step 1159: {'lr': 0.00028975, 'samples': 222720, 'steps': 1159, 'loss/train': 0.4766962230205536} +01/26/2022 20:56:03 - INFO - codeparrot_training - Step 1160: {'lr': 0.00029, 'samples': 222912, 'steps': 1160, 'loss/train': 1.1779105961322784} +01/26/2022 20:56:07 - INFO - codeparrot_training - Step 1161: {'lr': 0.00029025000000000003, 'samples': 223104, 'steps': 1161, 'loss/train': 1.2804690599441528} +01/26/2022 20:56:10 - INFO - codeparrot_training - Step 1162: {'lr': 0.00029049999999999996, 'samples': 223296, 'steps': 1162, 'loss/train': 1.1594082713127136} +01/26/2022 20:56:13 - INFO - codeparrot_training - Step 1163: {'lr': 0.00029075, 'samples': 223488, 'steps': 1163, 'loss/train': 1.1964775621891022} +01/26/2022 20:56:16 - INFO - codeparrot_training - Step 1164: {'lr': 0.00029099999999999997, 'samples': 223680, 'steps': 1164, 'loss/train': 0.9824680387973785} +01/26/2022 20:56:19 - INFO - codeparrot_training - Step 1165: {'lr': 0.00029125, 'samples': 223872, 'steps': 1165, 'loss/train': 0.8150842487812042} +01/26/2022 20:56:23 - INFO - codeparrot_training - Step 1166: {'lr': 0.0002915, 'samples': 224064, 'steps': 1166, 'loss/train': 1.152783751487732} +01/26/2022 20:56:27 - INFO - codeparrot_training - Step 1167: {'lr': 0.00029175, 'samples': 224256, 'steps': 1167, 'loss/train': 0.7861387431621552} +01/26/2022 20:56:30 - INFO - codeparrot_training - Step 1168: {'lr': 0.000292, 'samples': 224448, 'steps': 1168, 'loss/train': 0.24597644805908203} +01/26/2022 20:56:33 - INFO - codeparrot_training - Step 1169: {'lr': 0.00029225000000000003, 'samples': 224640, 'steps': 1169, 'loss/train': 0.9678133428096771} +01/26/2022 20:56:36 - INFO - codeparrot_training - Step 1170: {'lr': 0.0002925, 'samples': 224832, 'steps': 1170, 'loss/train': 1.0234933197498322} +01/26/2022 20:56:39 - INFO - codeparrot_training - Step 1171: {'lr': 0.00029275000000000004, 'samples': 225024, 'steps': 1171, 'loss/train': 0.9173187017440796} +01/26/2022 20:56:42 - INFO - codeparrot_training - Step 1172: {'lr': 0.00029299999999999997, 'samples': 225216, 'steps': 1172, 'loss/train': 0.8178969025611877} +01/26/2022 20:56:45 - INFO - codeparrot_training - Step 1173: {'lr': 0.00029325, 'samples': 225408, 'steps': 1173, 'loss/train': 0.6272235363721848} +01/26/2022 20:56:48 - INFO - codeparrot_training - Step 1174: {'lr': 0.0002935, 'samples': 225600, 'steps': 1174, 'loss/train': 0.7621327042579651} +01/26/2022 20:56:53 - INFO - codeparrot_training - Step 1175: {'lr': 0.00029375, 'samples': 225792, 'steps': 1175, 'loss/train': 1.562220811843872} +01/26/2022 20:56:56 - INFO - codeparrot_training - Step 1176: {'lr': 0.000294, 'samples': 225984, 'steps': 1176, 'loss/train': 1.083278328180313} +01/26/2022 20:56:59 - INFO - codeparrot_training - Step 1177: {'lr': 0.00029425, 'samples': 226176, 'steps': 1177, 'loss/train': 0.8461199104785919} +01/26/2022 20:57:02 - INFO - codeparrot_training - Step 1178: {'lr': 0.0002945, 'samples': 226368, 'steps': 1178, 'loss/train': 1.3932880461215973} +01/26/2022 20:57:05 - INFO - codeparrot_training - Step 1179: {'lr': 0.00029475000000000004, 'samples': 226560, 'steps': 1179, 'loss/train': 1.1243003904819489} +01/26/2022 20:57:09 - INFO - codeparrot_training - Step 1180: {'lr': 0.000295, 'samples': 226752, 'steps': 1180, 'loss/train': 1.2851622104644775} +01/26/2022 20:57:12 - INFO - codeparrot_training - Step 1181: {'lr': 0.00029525, 'samples': 226944, 'steps': 1181, 'loss/train': 0.5264954566955566} +01/26/2022 20:57:15 - INFO - codeparrot_training - Step 1182: {'lr': 0.00029549999999999997, 'samples': 227136, 'steps': 1182, 'loss/train': 1.279321700334549} +01/26/2022 20:57:18 - INFO - codeparrot_training - Step 1183: {'lr': 0.00029575, 'samples': 227328, 'steps': 1183, 'loss/train': 1.7727146744728088} +01/26/2022 20:57:24 - INFO - codeparrot_training - Step 1184: {'lr': 0.000296, 'samples': 227520, 'steps': 1184, 'loss/train': 0.11715855449438095} +01/26/2022 20:57:28 - INFO - codeparrot_training - Step 1185: {'lr': 0.00029625, 'samples': 227712, 'steps': 1185, 'loss/train': 0.7162847220897675} +01/26/2022 20:57:31 - INFO - codeparrot_training - Step 1186: {'lr': 0.0002965, 'samples': 227904, 'steps': 1186, 'loss/train': 0.5857726335525513} +01/26/2022 20:57:34 - INFO - codeparrot_training - Step 1187: {'lr': 0.00029675000000000003, 'samples': 228096, 'steps': 1187, 'loss/train': 0.9531798362731934} +01/26/2022 20:57:37 - INFO - codeparrot_training - Step 1188: {'lr': 0.000297, 'samples': 228288, 'steps': 1188, 'loss/train': 0.8573541641235352} +01/26/2022 20:57:40 - INFO - codeparrot_training - Step 1189: {'lr': 0.00029725000000000004, 'samples': 228480, 'steps': 1189, 'loss/train': 1.2324146032333374} +01/26/2022 20:57:43 - INFO - codeparrot_training - Step 1190: {'lr': 0.00029749999999999997, 'samples': 228672, 'steps': 1190, 'loss/train': 0.836683988571167} +01/26/2022 20:57:46 - INFO - codeparrot_training - Step 1191: {'lr': 0.00029775, 'samples': 228864, 'steps': 1191, 'loss/train': 0.9754275977611542} +01/26/2022 20:57:51 - INFO - codeparrot_training - Step 1192: {'lr': 0.000298, 'samples': 229056, 'steps': 1192, 'loss/train': 1.3395634889602661} +01/26/2022 20:57:54 - INFO - codeparrot_training - Step 1193: {'lr': 0.00029825, 'samples': 229248, 'steps': 1193, 'loss/train': 1.1002348065376282} +01/26/2022 20:57:57 - INFO - codeparrot_training - Step 1194: {'lr': 0.0002985, 'samples': 229440, 'steps': 1194, 'loss/train': 0.7102044224739075} +01/26/2022 20:58:00 - INFO - codeparrot_training - Step 1195: {'lr': 0.00029875, 'samples': 229632, 'steps': 1195, 'loss/train': 0.33209938555955887} +01/26/2022 20:58:04 - INFO - codeparrot_training - Step 1196: {'lr': 0.000299, 'samples': 229824, 'steps': 1196, 'loss/train': 0.4997599124908447} +01/26/2022 20:58:07 - INFO - codeparrot_training - Step 1197: {'lr': 0.00029925000000000004, 'samples': 230016, 'steps': 1197, 'loss/train': 0.5510084331035614} +01/26/2022 20:58:10 - INFO - codeparrot_training - Step 1198: {'lr': 0.0002995, 'samples': 230208, 'steps': 1198, 'loss/train': 0.9658601582050323} +01/26/2022 20:58:13 - INFO - codeparrot_training - Step 1199: {'lr': 0.00029975000000000005, 'samples': 230400, 'steps': 1199, 'loss/train': 1.0016333162784576} +01/26/2022 20:58:16 - INFO - codeparrot_training - Step 1200: {'lr': 0.0003, 'samples': 230592, 'steps': 1200, 'loss/train': 0.25452665984630585} +01/26/2022 20:58:21 - INFO - codeparrot_training - Step 1201: {'lr': 0.00030025, 'samples': 230784, 'steps': 1201, 'loss/train': 1.086582988500595} +01/26/2022 20:58:24 - INFO - codeparrot_training - Step 1202: {'lr': 0.0003005, 'samples': 230976, 'steps': 1202, 'loss/train': 0.2490437775850296} +01/26/2022 20:58:27 - INFO - codeparrot_training - Step 1203: {'lr': 0.00030075, 'samples': 231168, 'steps': 1203, 'loss/train': 0.43811631202697754} +01/26/2022 20:58:30 - INFO - codeparrot_training - Step 1204: {'lr': 0.000301, 'samples': 231360, 'steps': 1204, 'loss/train': 0.5827521085739136} +01/26/2022 20:58:33 - INFO - codeparrot_training - Step 1205: {'lr': 0.00030125000000000003, 'samples': 231552, 'steps': 1205, 'loss/train': 0.5712075680494308} +01/26/2022 20:58:36 - INFO - codeparrot_training - Step 1206: {'lr': 0.0003015, 'samples': 231744, 'steps': 1206, 'loss/train': 1.046557903289795} +01/26/2022 20:58:39 - INFO - codeparrot_training - Step 1207: {'lr': 0.00030175000000000004, 'samples': 231936, 'steps': 1207, 'loss/train': 1.0815697610378265} +01/26/2022 20:58:42 - INFO - codeparrot_training - Step 1208: {'lr': 0.000302, 'samples': 232128, 'steps': 1208, 'loss/train': 0.1934211105108261} +01/26/2022 20:58:46 - INFO - codeparrot_training - Step 1209: {'lr': 0.00030225, 'samples': 232320, 'steps': 1209, 'loss/train': 1.0718797445297241} +01/26/2022 20:58:52 - INFO - codeparrot_training - Step 1210: {'lr': 0.0003025, 'samples': 232512, 'steps': 1210, 'loss/train': 1.093067318201065} +01/26/2022 20:58:55 - INFO - codeparrot_training - Step 1211: {'lr': 0.00030275, 'samples': 232704, 'steps': 1211, 'loss/train': 0.2502623051404953} +01/26/2022 20:58:58 - INFO - codeparrot_training - Step 1212: {'lr': 0.000303, 'samples': 232896, 'steps': 1212, 'loss/train': 1.9116675853729248} +01/26/2022 20:59:02 - INFO - codeparrot_training - Step 1213: {'lr': 0.00030325, 'samples': 233088, 'steps': 1213, 'loss/train': 0.36320026963949203} +01/26/2022 20:59:05 - INFO - codeparrot_training - Step 1214: {'lr': 0.0003035, 'samples': 233280, 'steps': 1214, 'loss/train': 0.9241034388542175} +01/26/2022 20:59:08 - INFO - codeparrot_training - Step 1215: {'lr': 0.00030375000000000004, 'samples': 233472, 'steps': 1215, 'loss/train': 0.7539133429527283} +01/26/2022 20:59:11 - INFO - codeparrot_training - Step 1216: {'lr': 0.000304, 'samples': 233664, 'steps': 1216, 'loss/train': 0.8841446936130524} +01/26/2022 20:59:14 - INFO - codeparrot_training - Step 1217: {'lr': 0.00030425000000000005, 'samples': 233856, 'steps': 1217, 'loss/train': 0.5467899888753891} +01/26/2022 20:59:17 - INFO - codeparrot_training - Step 1218: {'lr': 0.0003045, 'samples': 234048, 'steps': 1218, 'loss/train': 0.931069403886795} +01/26/2022 20:59:22 - INFO - codeparrot_training - Step 1219: {'lr': 0.00030475, 'samples': 234240, 'steps': 1219, 'loss/train': 0.5878031551837921} +01/26/2022 20:59:25 - INFO - codeparrot_training - Step 1220: {'lr': 0.000305, 'samples': 234432, 'steps': 1220, 'loss/train': 0.9409241080284119} +01/26/2022 20:59:28 - INFO - codeparrot_training - Step 1221: {'lr': 0.00030525, 'samples': 234624, 'steps': 1221, 'loss/train': 1.2209935784339905} +01/26/2022 20:59:31 - INFO - codeparrot_training - Step 1222: {'lr': 0.0003055, 'samples': 234816, 'steps': 1222, 'loss/train': 1.092833697795868} +01/26/2022 20:59:34 - INFO - codeparrot_training - Step 1223: {'lr': 0.00030575000000000003, 'samples': 235008, 'steps': 1223, 'loss/train': 0.9506379961967468} +01/26/2022 20:59:37 - INFO - codeparrot_training - Step 1224: {'lr': 0.000306, 'samples': 235200, 'steps': 1224, 'loss/train': 0.8738553822040558} +01/26/2022 20:59:41 - INFO - codeparrot_training - Step 1225: {'lr': 0.00030625000000000004, 'samples': 235392, 'steps': 1225, 'loss/train': 0.6989216208457947} +01/26/2022 20:59:44 - INFO - codeparrot_training - Step 1226: {'lr': 0.0003065, 'samples': 235584, 'steps': 1226, 'loss/train': 1.1033534109592438} +01/26/2022 20:59:50 - INFO - codeparrot_training - Step 1227: {'lr': 0.00030675, 'samples': 235776, 'steps': 1227, 'loss/train': 1.10317263007164} +01/26/2022 20:59:53 - INFO - codeparrot_training - Step 1228: {'lr': 0.000307, 'samples': 235968, 'steps': 1228, 'loss/train': 1.2997640669345856} +01/26/2022 20:59:56 - INFO - codeparrot_training - Step 1229: {'lr': 0.00030725, 'samples': 236160, 'steps': 1229, 'loss/train': 0.9714340567588806} +01/26/2022 20:59:59 - INFO - codeparrot_training - Step 1230: {'lr': 0.0003075, 'samples': 236352, 'steps': 1230, 'loss/train': 0.7407642602920532} +01/26/2022 21:00:02 - INFO - codeparrot_training - Step 1231: {'lr': 0.00030775, 'samples': 236544, 'steps': 1231, 'loss/train': 0.698677584528923} +01/26/2022 21:00:05 - INFO - codeparrot_training - Step 1232: {'lr': 0.000308, 'samples': 236736, 'steps': 1232, 'loss/train': 0.6075624525547028} +01/26/2022 21:00:09 - INFO - codeparrot_training - Step 1233: {'lr': 0.00030825000000000004, 'samples': 236928, 'steps': 1233, 'loss/train': 0.7729901969432831} +01/26/2022 21:00:12 - INFO - codeparrot_training - Step 1234: {'lr': 0.0003085, 'samples': 237120, 'steps': 1234, 'loss/train': 1.0640463531017303} +01/26/2022 21:00:15 - INFO - codeparrot_training - Step 1235: {'lr': 0.00030875000000000005, 'samples': 237312, 'steps': 1235, 'loss/train': 0.8610347807407379} +01/26/2022 21:00:19 - INFO - codeparrot_training - Step 1236: {'lr': 0.00030900000000000003, 'samples': 237504, 'steps': 1236, 'loss/train': 1.2136048078536987} +01/26/2022 21:00:22 - INFO - codeparrot_training - Step 1237: {'lr': 0.00030925, 'samples': 237696, 'steps': 1237, 'loss/train': 0.8963712751865387} +01/26/2022 21:00:25 - INFO - codeparrot_training - Step 1238: {'lr': 0.0003095, 'samples': 237888, 'steps': 1238, 'loss/train': 0.3150174468755722} +01/26/2022 21:00:28 - INFO - codeparrot_training - Step 1239: {'lr': 0.00030975, 'samples': 238080, 'steps': 1239, 'loss/train': 0.7881652414798737} +01/26/2022 21:00:32 - INFO - codeparrot_training - Step 1240: {'lr': 0.00031, 'samples': 238272, 'steps': 1240, 'loss/train': 0.9741031229496002} +01/26/2022 21:00:35 - INFO - codeparrot_training - Step 1241: {'lr': 0.00031025000000000003, 'samples': 238464, 'steps': 1241, 'loss/train': 1.236857682466507} +01/26/2022 21:00:38 - INFO - codeparrot_training - Step 1242: {'lr': 0.0003105, 'samples': 238656, 'steps': 1242, 'loss/train': 0.39366231858730316} +01/26/2022 21:00:41 - INFO - codeparrot_training - Step 1243: {'lr': 0.00031075000000000005, 'samples': 238848, 'steps': 1243, 'loss/train': 0.8875214159488678} +01/26/2022 21:00:44 - INFO - codeparrot_training - Step 1244: {'lr': 0.000311, 'samples': 239040, 'steps': 1244, 'loss/train': 1.340793639421463} +01/26/2022 21:00:49 - INFO - codeparrot_training - Step 1245: {'lr': 0.00031125000000000006, 'samples': 239232, 'steps': 1245, 'loss/train': 0.6737084984779358} +01/26/2022 21:00:52 - INFO - codeparrot_training - Step 1246: {'lr': 0.0003115, 'samples': 239424, 'steps': 1246, 'loss/train': 1.1468097567558289} +01/26/2022 21:00:55 - INFO - codeparrot_training - Step 1247: {'lr': 0.00031175, 'samples': 239616, 'steps': 1247, 'loss/train': 0.9451137185096741} +01/26/2022 21:00:58 - INFO - codeparrot_training - Step 1248: {'lr': 0.000312, 'samples': 239808, 'steps': 1248, 'loss/train': 0.9751624166965485} +01/26/2022 21:01:01 - INFO - codeparrot_training - Step 1249: {'lr': 0.00031225000000000003, 'samples': 240000, 'steps': 1249, 'loss/train': 1.5141663551330566} +01/26/2022 21:01:04 - INFO - codeparrot_training - Step 1250: {'lr': 0.0003125, 'samples': 240192, 'steps': 1250, 'loss/train': 1.6736984252929688} +01/26/2022 21:01:07 - INFO - codeparrot_training - Step 1251: {'lr': 0.00031275, 'samples': 240384, 'steps': 1251, 'loss/train': 1.3493045568466187} +01/26/2022 21:01:11 - INFO - codeparrot_training - Step 1252: {'lr': 0.000313, 'samples': 240576, 'steps': 1252, 'loss/train': 1.2300075888633728} +01/26/2022 21:01:14 - INFO - codeparrot_training - Step 1253: {'lr': 0.00031325, 'samples': 240768, 'steps': 1253, 'loss/train': 0.7751044929027557} +01/26/2022 21:01:19 - INFO - codeparrot_training - Step 1254: {'lr': 0.00031350000000000003, 'samples': 240960, 'steps': 1254, 'loss/train': 0.534063309431076} +01/26/2022 21:01:22 - INFO - codeparrot_training - Step 1255: {'lr': 0.00031374999999999996, 'samples': 241152, 'steps': 1255, 'loss/train': 1.0949264466762543} +01/26/2022 21:01:26 - INFO - codeparrot_training - Step 1256: {'lr': 0.000314, 'samples': 241344, 'steps': 1256, 'loss/train': 0.8517526388168335} +01/26/2022 21:01:29 - INFO - codeparrot_training - Step 1257: {'lr': 0.00031424999999999997, 'samples': 241536, 'steps': 1257, 'loss/train': 0.951142430305481} +01/26/2022 21:01:32 - INFO - codeparrot_training - Step 1258: {'lr': 0.0003145, 'samples': 241728, 'steps': 1258, 'loss/train': 0.8981476128101349} +01/26/2022 21:01:35 - INFO - codeparrot_training - Step 1259: {'lr': 0.00031475, 'samples': 241920, 'steps': 1259, 'loss/train': 1.1605018973350525} +01/26/2022 21:01:38 - INFO - codeparrot_training - Step 1260: {'lr': 0.000315, 'samples': 242112, 'steps': 1260, 'loss/train': 1.2106269299983978} +01/26/2022 21:01:41 - INFO - codeparrot_training - Step 1261: {'lr': 0.00031525, 'samples': 242304, 'steps': 1261, 'loss/train': 0.47663457691669464} +01/26/2022 21:01:46 - INFO - codeparrot_training - Step 1262: {'lr': 0.0003155, 'samples': 242496, 'steps': 1262, 'loss/train': 0.8414538502693176} +01/26/2022 21:01:49 - INFO - codeparrot_training - Step 1263: {'lr': 0.00031575, 'samples': 242688, 'steps': 1263, 'loss/train': 0.8157608807086945} +01/26/2022 21:01:52 - INFO - codeparrot_training - Step 1264: {'lr': 0.000316, 'samples': 242880, 'steps': 1264, 'loss/train': 1.1345813870429993} +01/26/2022 21:01:55 - INFO - codeparrot_training - Step 1265: {'lr': 0.00031624999999999996, 'samples': 243072, 'steps': 1265, 'loss/train': 0.3450457602739334} +01/26/2022 21:01:58 - INFO - codeparrot_training - Step 1266: {'lr': 0.0003165, 'samples': 243264, 'steps': 1266, 'loss/train': 0.8551756739616394} +01/26/2022 21:02:01 - INFO - codeparrot_training - Step 1267: {'lr': 0.00031675, 'samples': 243456, 'steps': 1267, 'loss/train': 0.9590050578117371} +01/26/2022 21:02:04 - INFO - codeparrot_training - Step 1268: {'lr': 0.000317, 'samples': 243648, 'steps': 1268, 'loss/train': 0.4402073174715042} +01/26/2022 21:02:08 - INFO - codeparrot_training - Step 1269: {'lr': 0.00031725, 'samples': 243840, 'steps': 1269, 'loss/train': 0.9316028952598572} +01/26/2022 21:02:11 - INFO - codeparrot_training - Step 1270: {'lr': 0.0003175, 'samples': 244032, 'steps': 1270, 'loss/train': 1.153227835893631} +01/26/2022 21:02:15 - INFO - codeparrot_training - Step 1271: {'lr': 0.00031775, 'samples': 244224, 'steps': 1271, 'loss/train': 1.801672875881195} +01/26/2022 21:02:18 - INFO - codeparrot_training - Step 1272: {'lr': 0.00031800000000000003, 'samples': 244416, 'steps': 1272, 'loss/train': 0.8158755004405975} +01/26/2022 21:02:21 - INFO - codeparrot_training - Step 1273: {'lr': 0.00031825, 'samples': 244608, 'steps': 1273, 'loss/train': 1.1565444767475128} +01/26/2022 21:02:25 - INFO - codeparrot_training - Step 1274: {'lr': 0.0003185, 'samples': 244800, 'steps': 1274, 'loss/train': 0.6587251871824265} +01/26/2022 21:02:28 - INFO - codeparrot_training - Step 1275: {'lr': 0.00031874999999999997, 'samples': 244992, 'steps': 1275, 'loss/train': 1.1284300088882446} +01/26/2022 21:02:31 - INFO - codeparrot_training - Step 1276: {'lr': 0.000319, 'samples': 245184, 'steps': 1276, 'loss/train': 1.1241610944271088} +01/26/2022 21:02:34 - INFO - codeparrot_training - Step 1277: {'lr': 0.00031925, 'samples': 245376, 'steps': 1277, 'loss/train': 0.5060954242944717} +01/26/2022 21:02:37 - INFO - codeparrot_training - Step 1278: {'lr': 0.0003195, 'samples': 245568, 'steps': 1278, 'loss/train': 0.9319193065166473} +01/26/2022 21:02:40 - INFO - codeparrot_training - Step 1279: {'lr': 0.00031975, 'samples': 245760, 'steps': 1279, 'loss/train': 1.2106119096279144} +01/26/2022 21:02:44 - INFO - codeparrot_training - Step 1280: {'lr': 0.00032, 'samples': 245952, 'steps': 1280, 'loss/train': 1.040795624256134} +01/26/2022 21:02:48 - INFO - codeparrot_training - Step 1281: {'lr': 0.00032025, 'samples': 246144, 'steps': 1281, 'loss/train': 0.630868211388588} +01/26/2022 21:02:51 - INFO - codeparrot_training - Step 1282: {'lr': 0.00032050000000000004, 'samples': 246336, 'steps': 1282, 'loss/train': 1.2275404930114746} +01/26/2022 21:02:54 - INFO - codeparrot_training - Step 1283: {'lr': 0.00032074999999999996, 'samples': 246528, 'steps': 1283, 'loss/train': 0.8045752644538879} +01/26/2022 21:02:57 - INFO - codeparrot_training - Step 1284: {'lr': 0.000321, 'samples': 246720, 'steps': 1284, 'loss/train': 1.0104618966579437} +01/26/2022 21:03:00 - INFO - codeparrot_training - Step 1285: {'lr': 0.00032125, 'samples': 246912, 'steps': 1285, 'loss/train': 0.7690997421741486} +01/26/2022 21:03:03 - INFO - codeparrot_training - Step 1286: {'lr': 0.0003215, 'samples': 247104, 'steps': 1286, 'loss/train': 1.0202703773975372} +01/26/2022 21:03:06 - INFO - codeparrot_training - Step 1287: {'lr': 0.00032175, 'samples': 247296, 'steps': 1287, 'loss/train': 1.2204847633838654} +01/26/2022 21:03:10 - INFO - codeparrot_training - Step 1288: {'lr': 0.000322, 'samples': 247488, 'steps': 1288, 'loss/train': 1.1590179204940796} +01/26/2022 21:03:16 - INFO - codeparrot_training - Step 1289: {'lr': 0.00032225, 'samples': 247680, 'steps': 1289, 'loss/train': 0.9954877495765686} +01/26/2022 21:03:19 - INFO - codeparrot_training - Step 1290: {'lr': 0.00032250000000000003, 'samples': 247872, 'steps': 1290, 'loss/train': 1.1254505217075348} +01/26/2022 21:03:23 - INFO - codeparrot_training - Step 1291: {'lr': 0.00032275, 'samples': 248064, 'steps': 1291, 'loss/train': 0.990976095199585} +01/26/2022 21:03:26 - INFO - codeparrot_training - Step 1292: {'lr': 0.000323, 'samples': 248256, 'steps': 1292, 'loss/train': 0.8347549438476562} +01/26/2022 21:03:29 - INFO - codeparrot_training - Step 1293: {'lr': 0.00032324999999999997, 'samples': 248448, 'steps': 1293, 'loss/train': 0.5836189985275269} +01/26/2022 21:03:32 - INFO - codeparrot_training - Step 1294: {'lr': 0.0003235, 'samples': 248640, 'steps': 1294, 'loss/train': 0.7854525446891785} +01/26/2022 21:03:35 - INFO - codeparrot_training - Step 1295: {'lr': 0.00032375, 'samples': 248832, 'steps': 1295, 'loss/train': 1.9712843298912048} +01/26/2022 21:03:38 - INFO - codeparrot_training - Step 1296: {'lr': 0.000324, 'samples': 249024, 'steps': 1296, 'loss/train': 1.9580798149108887} +01/26/2022 21:03:41 - INFO - codeparrot_training - Step 1297: {'lr': 0.00032425, 'samples': 249216, 'steps': 1297, 'loss/train': 1.2691041827201843} +01/26/2022 21:03:46 - INFO - codeparrot_training - Step 1298: {'lr': 0.00032450000000000003, 'samples': 249408, 'steps': 1298, 'loss/train': 1.062127411365509} +01/26/2022 21:03:49 - INFO - codeparrot_training - Step 1299: {'lr': 0.00032475, 'samples': 249600, 'steps': 1299, 'loss/train': 0.4577314406633377} +01/26/2022 21:03:52 - INFO - codeparrot_training - Step 1300: {'lr': 0.00032500000000000004, 'samples': 249792, 'steps': 1300, 'loss/train': 0.49498289823532104} +01/26/2022 21:03:55 - INFO - codeparrot_training - Step 1301: {'lr': 0.00032524999999999996, 'samples': 249984, 'steps': 1301, 'loss/train': 1.0439395308494568} +01/26/2022 21:03:58 - INFO - codeparrot_training - Step 1302: {'lr': 0.0003255, 'samples': 250176, 'steps': 1302, 'loss/train': 0.9454618692398071} +01/26/2022 21:04:02 - INFO - codeparrot_training - Step 1303: {'lr': 0.00032575, 'samples': 250368, 'steps': 1303, 'loss/train': 0.8306179940700531} +01/26/2022 21:04:05 - INFO - codeparrot_training - Step 1304: {'lr': 0.000326, 'samples': 250560, 'steps': 1304, 'loss/train': 1.0661275684833527} +01/26/2022 21:04:08 - INFO - codeparrot_training - Step 1305: {'lr': 0.00032625, 'samples': 250752, 'steps': 1305, 'loss/train': 0.6971832364797592} +01/26/2022 21:04:11 - INFO - codeparrot_training - Step 1306: {'lr': 0.0003265, 'samples': 250944, 'steps': 1306, 'loss/train': 1.0274827480316162} +01/26/2022 21:04:17 - INFO - codeparrot_training - Step 1307: {'lr': 0.00032675, 'samples': 251136, 'steps': 1307, 'loss/train': 1.6470826864242554} +01/26/2022 21:04:20 - INFO - codeparrot_training - Step 1308: {'lr': 0.00032700000000000003, 'samples': 251328, 'steps': 1308, 'loss/train': 1.3024613857269287} +01/26/2022 21:04:23 - INFO - codeparrot_training - Step 1309: {'lr': 0.00032725, 'samples': 251520, 'steps': 1309, 'loss/train': 1.2962641417980194} +01/26/2022 21:04:27 - INFO - codeparrot_training - Step 1310: {'lr': 0.00032750000000000005, 'samples': 251712, 'steps': 1310, 'loss/train': 1.204667866230011} +01/26/2022 21:04:30 - INFO - codeparrot_training - Step 1311: {'lr': 0.00032774999999999997, 'samples': 251904, 'steps': 1311, 'loss/train': 0.6610483825206757} +01/26/2022 21:04:33 - INFO - codeparrot_training - Step 1312: {'lr': 0.000328, 'samples': 252096, 'steps': 1312, 'loss/train': 0.8699553608894348} +01/26/2022 21:04:36 - INFO - codeparrot_training - Step 1313: {'lr': 0.00032825, 'samples': 252288, 'steps': 1313, 'loss/train': 0.8802606761455536} +01/26/2022 21:04:39 - INFO - codeparrot_training - Step 1314: {'lr': 0.0003285, 'samples': 252480, 'steps': 1314, 'loss/train': 1.1301237344741821} +01/26/2022 21:04:42 - INFO - codeparrot_training - Step 1315: {'lr': 0.00032875, 'samples': 252672, 'steps': 1315, 'loss/train': 0.8648131191730499} +01/26/2022 21:04:47 - INFO - codeparrot_training - Step 1316: {'lr': 0.00032900000000000003, 'samples': 252864, 'steps': 1316, 'loss/train': 0.8390500545501709} +01/26/2022 21:04:50 - INFO - codeparrot_training - Step 1317: {'lr': 0.00032925, 'samples': 253056, 'steps': 1317, 'loss/train': 0.5135869234800339} +01/26/2022 21:04:53 - INFO - codeparrot_training - Step 1318: {'lr': 0.00032950000000000004, 'samples': 253248, 'steps': 1318, 'loss/train': 1.0603323876857758} +01/26/2022 21:04:56 - INFO - codeparrot_training - Step 1319: {'lr': 0.00032975, 'samples': 253440, 'steps': 1319, 'loss/train': 0.9542219638824463} +01/26/2022 21:04:59 - INFO - codeparrot_training - Step 1320: {'lr': 0.00033, 'samples': 253632, 'steps': 1320, 'loss/train': 1.0566553473472595} +01/26/2022 21:05:03 - INFO - codeparrot_training - Step 1321: {'lr': 0.00033025, 'samples': 253824, 'steps': 1321, 'loss/train': 1.3202703595161438} +01/26/2022 21:05:06 - INFO - codeparrot_training - Step 1322: {'lr': 0.0003305, 'samples': 254016, 'steps': 1322, 'loss/train': 1.001068890094757} +01/26/2022 21:05:09 - INFO - codeparrot_training - Step 1323: {'lr': 0.00033075, 'samples': 254208, 'steps': 1323, 'loss/train': 0.8870709836483002} +01/26/2022 21:05:13 - INFO - codeparrot_training - Step 1324: {'lr': 0.000331, 'samples': 254400, 'steps': 1324, 'loss/train': 1.2355210483074188} +01/26/2022 21:05:16 - INFO - codeparrot_training - Step 1325: {'lr': 0.00033125, 'samples': 254592, 'steps': 1325, 'loss/train': 1.148157387971878} +01/26/2022 21:05:19 - INFO - codeparrot_training - Step 1326: {'lr': 0.00033150000000000003, 'samples': 254784, 'steps': 1326, 'loss/train': 0.9794060289859772} +01/26/2022 21:05:23 - INFO - codeparrot_training - Step 1327: {'lr': 0.00033175, 'samples': 254976, 'steps': 1327, 'loss/train': 0.44070857763290405} +01/26/2022 21:05:26 - INFO - codeparrot_training - Step 1328: {'lr': 0.00033200000000000005, 'samples': 255168, 'steps': 1328, 'loss/train': 0.8632693290710449} +01/26/2022 21:05:29 - INFO - codeparrot_training - Step 1329: {'lr': 0.00033224999999999997, 'samples': 255360, 'steps': 1329, 'loss/train': 0.9719774723052979} +01/26/2022 21:05:32 - INFO - codeparrot_training - Step 1330: {'lr': 0.0003325, 'samples': 255552, 'steps': 1330, 'loss/train': 3.8416589498519897} +01/26/2022 21:05:35 - INFO - codeparrot_training - Step 1331: {'lr': 0.00033275, 'samples': 255744, 'steps': 1331, 'loss/train': 1.1004060208797455} +01/26/2022 21:05:38 - INFO - codeparrot_training - Step 1332: {'lr': 0.000333, 'samples': 255936, 'steps': 1332, 'loss/train': 0.7440213114023209} +01/26/2022 21:05:44 - INFO - codeparrot_training - Step 1333: {'lr': 0.00033325, 'samples': 256128, 'steps': 1333, 'loss/train': 0.8231377601623535} +01/26/2022 21:05:47 - INFO - codeparrot_training - Step 1334: {'lr': 0.00033350000000000003, 'samples': 256320, 'steps': 1334, 'loss/train': 0.7276638597249985} +01/26/2022 21:05:51 - INFO - codeparrot_training - Step 1335: {'lr': 0.00033375, 'samples': 256512, 'steps': 1335, 'loss/train': 1.099365770816803} +01/26/2022 21:05:54 - INFO - codeparrot_training - Step 1336: {'lr': 0.00033400000000000004, 'samples': 256704, 'steps': 1336, 'loss/train': 1.4680210053920746} +01/26/2022 21:05:57 - INFO - codeparrot_training - Step 1337: {'lr': 0.00033425, 'samples': 256896, 'steps': 1337, 'loss/train': 1.1128609478473663} +01/26/2022 21:06:00 - INFO - codeparrot_training - Step 1338: {'lr': 0.00033450000000000005, 'samples': 257088, 'steps': 1338, 'loss/train': 0.822838693857193} +01/26/2022 21:06:03 - INFO - codeparrot_training - Step 1339: {'lr': 0.00033475, 'samples': 257280, 'steps': 1339, 'loss/train': 0.8494419157505035} +01/26/2022 21:06:06 - INFO - codeparrot_training - Step 1340: {'lr': 0.000335, 'samples': 257472, 'steps': 1340, 'loss/train': 0.7268175780773163} +01/26/2022 21:06:09 - INFO - codeparrot_training - Step 1341: {'lr': 0.00033525, 'samples': 257664, 'steps': 1341, 'loss/train': 0.868606299161911} +01/26/2022 21:06:14 - INFO - codeparrot_training - Step 1342: {'lr': 0.0003355, 'samples': 257856, 'steps': 1342, 'loss/train': 0.818723738193512} +01/26/2022 21:06:17 - INFO - codeparrot_training - Step 1343: {'lr': 0.00033575, 'samples': 258048, 'steps': 1343, 'loss/train': 1.5978684425354004} +01/26/2022 21:06:20 - INFO - codeparrot_training - Step 1344: {'lr': 0.00033600000000000004, 'samples': 258240, 'steps': 1344, 'loss/train': 0.5804193019866943} +01/26/2022 21:06:23 - INFO - codeparrot_training - Step 1345: {'lr': 0.00033625, 'samples': 258432, 'steps': 1345, 'loss/train': 1.0418038666248322} +01/26/2022 21:06:26 - INFO - codeparrot_training - Step 1346: {'lr': 0.00033650000000000005, 'samples': 258624, 'steps': 1346, 'loss/train': 0.8577019572257996} +01/26/2022 21:06:30 - INFO - codeparrot_training - Step 1347: {'lr': 0.00033675, 'samples': 258816, 'steps': 1347, 'loss/train': 0.6332640498876572} +01/26/2022 21:06:33 - INFO - codeparrot_training - Step 1348: {'lr': 0.000337, 'samples': 259008, 'steps': 1348, 'loss/train': 1.1220255196094513} +01/26/2022 21:06:36 - INFO - codeparrot_training - Step 1349: {'lr': 0.00033725, 'samples': 259200, 'steps': 1349, 'loss/train': 0.7499145269393921} +01/26/2022 21:06:39 - INFO - codeparrot_training - Step 1350: {'lr': 0.0003375, 'samples': 259392, 'steps': 1350, 'loss/train': 1.040358155965805} +01/26/2022 21:06:44 - INFO - codeparrot_training - Step 1351: {'lr': 0.00033775, 'samples': 259584, 'steps': 1351, 'loss/train': 1.2843874096870422} +01/26/2022 21:06:47 - INFO - codeparrot_training - Step 1352: {'lr': 0.00033800000000000003, 'samples': 259776, 'steps': 1352, 'loss/train': 1.238112062215805} +01/26/2022 21:06:50 - INFO - codeparrot_training - Step 1353: {'lr': 0.00033825, 'samples': 259968, 'steps': 1353, 'loss/train': 0.3218713849782944} +01/26/2022 21:06:53 - INFO - codeparrot_training - Step 1354: {'lr': 0.00033850000000000004, 'samples': 260160, 'steps': 1354, 'loss/train': 0.7913881838321686} +01/26/2022 21:06:56 - INFO - codeparrot_training - Step 1355: {'lr': 0.00033875, 'samples': 260352, 'steps': 1355, 'loss/train': 0.6941387057304382} +01/26/2022 21:06:59 - INFO - codeparrot_training - Step 1356: {'lr': 0.00033900000000000005, 'samples': 260544, 'steps': 1356, 'loss/train': 0.8672609925270081} +01/26/2022 21:07:03 - INFO - codeparrot_training - Step 1357: {'lr': 0.00033925, 'samples': 260736, 'steps': 1357, 'loss/train': 0.4973887950181961} +01/26/2022 21:07:06 - INFO - codeparrot_training - Step 1358: {'lr': 0.0003395, 'samples': 260928, 'steps': 1358, 'loss/train': 0.8578560054302216} +01/26/2022 21:07:10 - INFO - codeparrot_training - Step 1359: {'lr': 0.00033975, 'samples': 261120, 'steps': 1359, 'loss/train': 0.753614991903305} +01/26/2022 21:07:14 - INFO - codeparrot_training - Step 1360: {'lr': 0.00034, 'samples': 261312, 'steps': 1360, 'loss/train': 0.7949243187904358} +01/26/2022 21:07:17 - INFO - codeparrot_training - Step 1361: {'lr': 0.00034025, 'samples': 261504, 'steps': 1361, 'loss/train': 1.0931392908096313} +01/26/2022 21:07:20 - INFO - codeparrot_training - Step 1362: {'lr': 0.00034050000000000004, 'samples': 261696, 'steps': 1362, 'loss/train': 0.7639859318733215} +01/26/2022 21:07:23 - INFO - codeparrot_training - Step 1363: {'lr': 0.00034075, 'samples': 261888, 'steps': 1363, 'loss/train': 1.110782414674759} +01/26/2022 21:07:26 - INFO - codeparrot_training - Step 1364: {'lr': 0.00034100000000000005, 'samples': 262080, 'steps': 1364, 'loss/train': 0.7246271520853043} +01/26/2022 21:07:29 - INFO - codeparrot_training - Step 1365: {'lr': 0.00034125000000000003, 'samples': 262272, 'steps': 1365, 'loss/train': 0.735277533531189} +01/26/2022 21:07:32 - INFO - codeparrot_training - Step 1366: {'lr': 0.0003415, 'samples': 262464, 'steps': 1366, 'loss/train': 1.0827614665031433} +01/26/2022 21:07:36 - INFO - codeparrot_training - Step 1367: {'lr': 0.00034175, 'samples': 262656, 'steps': 1367, 'loss/train': 0.8820734024047852} +01/26/2022 21:07:41 - INFO - codeparrot_training - Step 1368: {'lr': 0.000342, 'samples': 262848, 'steps': 1368, 'loss/train': 0.8954282104969025} +01/26/2022 21:07:44 - INFO - codeparrot_training - Step 1369: {'lr': 0.00034225, 'samples': 263040, 'steps': 1369, 'loss/train': 0.8317834138870239} +01/26/2022 21:07:47 - INFO - codeparrot_training - Step 1370: {'lr': 0.00034250000000000003, 'samples': 263232, 'steps': 1370, 'loss/train': 0.8572661876678467} +01/26/2022 21:07:51 - INFO - codeparrot_training - Step 1371: {'lr': 0.00034275, 'samples': 263424, 'steps': 1371, 'loss/train': 0.7083683162927628} +01/26/2022 21:07:54 - INFO - codeparrot_training - Step 1372: {'lr': 0.00034300000000000004, 'samples': 263616, 'steps': 1372, 'loss/train': 0.9145535230636597} +01/26/2022 21:07:57 - INFO - codeparrot_training - Step 1373: {'lr': 0.00034325, 'samples': 263808, 'steps': 1373, 'loss/train': 1.3301053941249847} +01/26/2022 21:08:00 - INFO - codeparrot_training - Step 1374: {'lr': 0.00034350000000000006, 'samples': 264000, 'steps': 1374, 'loss/train': 1.1468211114406586} +01/26/2022 21:08:03 - INFO - codeparrot_training - Step 1375: {'lr': 0.00034375, 'samples': 264192, 'steps': 1375, 'loss/train': 0.7009869664907455} +01/26/2022 21:08:06 - INFO - codeparrot_training - Step 1376: {'lr': 0.00034399999999999996, 'samples': 264384, 'steps': 1376, 'loss/train': 0.9463578164577484} +01/26/2022 21:08:11 - INFO - codeparrot_training - Step 1377: {'lr': 0.00034425, 'samples': 264576, 'steps': 1377, 'loss/train': 1.1591105461120605} +01/26/2022 21:08:14 - INFO - codeparrot_training - Step 1378: {'lr': 0.00034449999999999997, 'samples': 264768, 'steps': 1378, 'loss/train': 0.96595099568367} +01/26/2022 21:08:17 - INFO - codeparrot_training - Step 1379: {'lr': 0.00034475, 'samples': 264960, 'steps': 1379, 'loss/train': 0.6593697667121887} +01/26/2022 21:08:20 - INFO - codeparrot_training - Step 1380: {'lr': 0.000345, 'samples': 265152, 'steps': 1380, 'loss/train': 0.8035639822483063} +01/26/2022 21:08:23 - INFO - codeparrot_training - Step 1381: {'lr': 0.00034525, 'samples': 265344, 'steps': 1381, 'loss/train': 0.8536484241485596} +01/26/2022 21:08:26 - INFO - codeparrot_training - Step 1382: {'lr': 0.0003455, 'samples': 265536, 'steps': 1382, 'loss/train': 1.4567998945713043} +01/26/2022 21:08:29 - INFO - codeparrot_training - Step 1383: {'lr': 0.00034575000000000003, 'samples': 265728, 'steps': 1383, 'loss/train': 1.1728605329990387} +01/26/2022 21:08:33 - INFO - codeparrot_training - Step 1384: {'lr': 0.000346, 'samples': 265920, 'steps': 1384, 'loss/train': 0.5751859098672867} +01/26/2022 21:08:36 - INFO - codeparrot_training - Step 1385: {'lr': 0.00034625, 'samples': 266112, 'steps': 1385, 'loss/train': 0.722655862569809} +01/26/2022 21:08:42 - INFO - codeparrot_training - Step 1386: {'lr': 0.00034649999999999997, 'samples': 266304, 'steps': 1386, 'loss/train': 0.9264733493328094} +01/26/2022 21:08:45 - INFO - codeparrot_training - Step 1387: {'lr': 0.00034675, 'samples': 266496, 'steps': 1387, 'loss/train': 0.6819387674331665} +01/26/2022 21:08:48 - INFO - codeparrot_training - Step 1388: {'lr': 0.000347, 'samples': 266688, 'steps': 1388, 'loss/train': 1.0372397303581238} +01/26/2022 21:08:51 - INFO - codeparrot_training - Step 1389: {'lr': 0.00034725, 'samples': 266880, 'steps': 1389, 'loss/train': 0.6640693992376328} +01/26/2022 21:08:55 - INFO - codeparrot_training - Step 1390: {'lr': 0.0003475, 'samples': 267072, 'steps': 1390, 'loss/train': 0.23234233260154724} +01/26/2022 21:08:58 - INFO - codeparrot_training - Step 1391: {'lr': 0.00034775, 'samples': 267264, 'steps': 1391, 'loss/train': 0.6895884722471237} +01/26/2022 21:09:01 - INFO - codeparrot_training - Step 1392: {'lr': 0.000348, 'samples': 267456, 'steps': 1392, 'loss/train': 0.13055146113038063} +01/26/2022 21:09:04 - INFO - codeparrot_training - Step 1393: {'lr': 0.00034825000000000004, 'samples': 267648, 'steps': 1393, 'loss/train': 1.0278124809265137} +01/26/2022 21:09:08 - INFO - codeparrot_training - Step 1394: {'lr': 0.00034849999999999996, 'samples': 267840, 'steps': 1394, 'loss/train': 1.3332147896289825} +01/26/2022 21:09:11 - INFO - codeparrot_training - Step 1395: {'lr': 0.00034875, 'samples': 268032, 'steps': 1395, 'loss/train': 1.1082002520561218} +01/26/2022 21:09:15 - INFO - codeparrot_training - Step 1396: {'lr': 0.00034899999999999997, 'samples': 268224, 'steps': 1396, 'loss/train': 1.2473546862602234} +01/26/2022 21:09:18 - INFO - codeparrot_training - Step 1397: {'lr': 0.00034925, 'samples': 268416, 'steps': 1397, 'loss/train': 1.0238586366176605} +01/26/2022 21:09:21 - INFO - codeparrot_training - Step 1398: {'lr': 0.0003495, 'samples': 268608, 'steps': 1398, 'loss/train': 0.8205553293228149} +01/26/2022 21:09:24 - INFO - codeparrot_training - Step 1399: {'lr': 0.00034975, 'samples': 268800, 'steps': 1399, 'loss/train': 1.0121375620365143} +01/26/2022 21:09:27 - INFO - codeparrot_training - Step 1400: {'lr': 0.00035, 'samples': 268992, 'steps': 1400, 'loss/train': 0.8104203343391418} +01/26/2022 21:09:30 - INFO - codeparrot_training - Step 1401: {'lr': 0.00035025000000000003, 'samples': 269184, 'steps': 1401, 'loss/train': 0.12986082583665848} +01/26/2022 21:09:33 - INFO - codeparrot_training - Step 1402: {'lr': 0.0003505, 'samples': 269376, 'steps': 1402, 'loss/train': 1.314984530210495} +01/26/2022 21:09:38 - INFO - codeparrot_training - Step 1403: {'lr': 0.00035075, 'samples': 269568, 'steps': 1403, 'loss/train': 0.2538214176893234} +01/26/2022 21:09:41 - INFO - codeparrot_training - Step 1404: {'lr': 0.00035099999999999997, 'samples': 269760, 'steps': 1404, 'loss/train': 1.0766460299491882} +01/26/2022 21:09:44 - INFO - codeparrot_training - Step 1405: {'lr': 0.00035125, 'samples': 269952, 'steps': 1405, 'loss/train': 0.2770286500453949} +01/26/2022 21:09:47 - INFO - codeparrot_training - Step 1406: {'lr': 0.0003515, 'samples': 270144, 'steps': 1406, 'loss/train': 1.054178237915039} +01/26/2022 21:09:50 - INFO - codeparrot_training - Step 1407: {'lr': 0.00035175, 'samples': 270336, 'steps': 1407, 'loss/train': 0.8579706251621246} +01/26/2022 21:09:53 - INFO - codeparrot_training - Step 1408: {'lr': 0.000352, 'samples': 270528, 'steps': 1408, 'loss/train': 1.0789376199245453} +01/26/2022 21:09:57 - INFO - codeparrot_training - Step 1409: {'lr': 0.00035225, 'samples': 270720, 'steps': 1409, 'loss/train': 1.3925848603248596} +01/26/2022 21:10:00 - INFO - codeparrot_training - Step 1410: {'lr': 0.0003525, 'samples': 270912, 'steps': 1410, 'loss/train': 0.5225864052772522} +01/26/2022 21:10:03 - INFO - codeparrot_training - Step 1411: {'lr': 0.00035275000000000004, 'samples': 271104, 'steps': 1411, 'loss/train': 1.2726842164993286} +01/26/2022 21:10:09 - INFO - codeparrot_training - Step 1412: {'lr': 0.00035299999999999996, 'samples': 271296, 'steps': 1412, 'loss/train': 0.7153510898351669} +01/26/2022 21:10:12 - INFO - codeparrot_training - Step 1413: {'lr': 0.00035325, 'samples': 271488, 'steps': 1413, 'loss/train': 0.40713250637054443} +01/26/2022 21:10:15 - INFO - codeparrot_training - Step 1414: {'lr': 0.0003535, 'samples': 271680, 'steps': 1414, 'loss/train': 0.7666701972484589} +01/26/2022 21:10:18 - INFO - codeparrot_training - Step 1415: {'lr': 0.00035375, 'samples': 271872, 'steps': 1415, 'loss/train': 1.1024984121322632} +01/26/2022 21:10:22 - INFO - codeparrot_training - Step 1416: {'lr': 0.000354, 'samples': 272064, 'steps': 1416, 'loss/train': 0.9069237112998962} +01/26/2022 21:10:25 - INFO - codeparrot_training - Step 1417: {'lr': 0.00035425, 'samples': 272256, 'steps': 1417, 'loss/train': 0.9518739581108093} +01/26/2022 21:10:28 - INFO - codeparrot_training - Step 1418: {'lr': 0.0003545, 'samples': 272448, 'steps': 1418, 'loss/train': 1.203192561864853} +01/26/2022 21:10:31 - INFO - codeparrot_training - Step 1419: {'lr': 0.00035475000000000003, 'samples': 272640, 'steps': 1419, 'loss/train': 1.001071572303772} +01/26/2022 21:10:34 - INFO - codeparrot_training - Step 1420: {'lr': 0.000355, 'samples': 272832, 'steps': 1420, 'loss/train': 1.5776689052581787} +01/26/2022 21:10:39 - INFO - codeparrot_training - Step 1421: {'lr': 0.00035525000000000004, 'samples': 273024, 'steps': 1421, 'loss/train': 0.9579824209213257} +01/26/2022 21:10:42 - INFO - codeparrot_training - Step 1422: {'lr': 0.00035549999999999997, 'samples': 273216, 'steps': 1422, 'loss/train': 0.22726918011903763} +01/26/2022 21:10:45 - INFO - codeparrot_training - Step 1423: {'lr': 0.00035575, 'samples': 273408, 'steps': 1423, 'loss/train': 1.2481546998023987} +01/26/2022 21:10:48 - INFO - codeparrot_training - Step 1424: {'lr': 0.000356, 'samples': 273600, 'steps': 1424, 'loss/train': 0.850852757692337} +01/26/2022 21:10:51 - INFO - codeparrot_training - Step 1425: {'lr': 0.00035625, 'samples': 273792, 'steps': 1425, 'loss/train': 0.9370703101158142} +01/26/2022 21:10:55 - INFO - codeparrot_training - Step 1426: {'lr': 0.0003565, 'samples': 273984, 'steps': 1426, 'loss/train': 1.0206585824489594} +01/26/2022 21:10:58 - INFO - codeparrot_training - Step 1427: {'lr': 0.00035675, 'samples': 274176, 'steps': 1427, 'loss/train': 1.035037636756897} +01/26/2022 21:11:01 - INFO - codeparrot_training - Step 1428: {'lr': 0.000357, 'samples': 274368, 'steps': 1428, 'loss/train': 0.8349533379077911} +01/26/2022 21:11:05 - INFO - codeparrot_training - Step 1429: {'lr': 0.00035725000000000004, 'samples': 274560, 'steps': 1429, 'loss/train': 0.8950520753860474} +01/26/2022 21:11:08 - INFO - codeparrot_training - Step 1430: {'lr': 0.0003575, 'samples': 274752, 'steps': 1430, 'loss/train': 1.1159393191337585} +01/26/2022 21:11:11 - INFO - codeparrot_training - Step 1431: {'lr': 0.00035775, 'samples': 274944, 'steps': 1431, 'loss/train': 1.2659131586551666} +01/26/2022 21:11:15 - INFO - codeparrot_training - Step 1432: {'lr': 0.000358, 'samples': 275136, 'steps': 1432, 'loss/train': 1.0870972573757172} +01/26/2022 21:11:18 - INFO - codeparrot_training - Step 1433: {'lr': 0.00035825, 'samples': 275328, 'steps': 1433, 'loss/train': 0.8313236832618713} +01/26/2022 21:11:21 - INFO - codeparrot_training - Step 1434: {'lr': 0.0003585, 'samples': 275520, 'steps': 1434, 'loss/train': 1.0548158884048462} +01/26/2022 21:11:24 - INFO - codeparrot_training - Step 1435: {'lr': 0.00035875, 'samples': 275712, 'steps': 1435, 'loss/train': 0.6349529027938843} +01/26/2022 21:11:27 - INFO - codeparrot_training - Step 1436: {'lr': 0.000359, 'samples': 275904, 'steps': 1436, 'loss/train': 0.7269305884838104} +01/26/2022 21:11:30 - INFO - codeparrot_training - Step 1437: {'lr': 0.00035925000000000003, 'samples': 276096, 'steps': 1437, 'loss/train': 0.8836067318916321} +01/26/2022 21:11:36 - INFO - codeparrot_training - Step 1438: {'lr': 0.0003595, 'samples': 276288, 'steps': 1438, 'loss/train': 1.2289762794971466} +01/26/2022 21:11:40 - INFO - codeparrot_training - Step 1439: {'lr': 0.00035975000000000004, 'samples': 276480, 'steps': 1439, 'loss/train': 0.9617836475372314} +01/26/2022 21:11:43 - INFO - codeparrot_training - Step 1440: {'lr': 0.00035999999999999997, 'samples': 276672, 'steps': 1440, 'loss/train': 1.362577110528946} +01/26/2022 21:11:46 - INFO - codeparrot_training - Step 1441: {'lr': 0.00036025, 'samples': 276864, 'steps': 1441, 'loss/train': 0.7411259114742279} +01/26/2022 21:11:49 - INFO - codeparrot_training - Step 1442: {'lr': 0.0003605, 'samples': 277056, 'steps': 1442, 'loss/train': 1.1015606224536896} +01/26/2022 21:11:52 - INFO - codeparrot_training - Step 1443: {'lr': 0.00036075, 'samples': 277248, 'steps': 1443, 'loss/train': 1.2923219203948975} +01/26/2022 21:11:55 - INFO - codeparrot_training - Step 1444: {'lr': 0.000361, 'samples': 277440, 'steps': 1444, 'loss/train': 1.3723007440567017} +01/26/2022 21:11:58 - INFO - codeparrot_training - Step 1445: {'lr': 0.00036125, 'samples': 277632, 'steps': 1445, 'loss/train': 1.5804736018180847} +01/26/2022 21:12:01 - INFO - codeparrot_training - Step 1446: {'lr': 0.0003615, 'samples': 277824, 'steps': 1446, 'loss/train': 1.3025188744068146} +01/26/2022 21:12:06 - INFO - codeparrot_training - Step 1447: {'lr': 0.00036175000000000004, 'samples': 278016, 'steps': 1447, 'loss/train': 0.9075505435466766} +01/26/2022 21:12:09 - INFO - codeparrot_training - Step 1448: {'lr': 0.000362, 'samples': 278208, 'steps': 1448, 'loss/train': 0.576828807592392} +01/26/2022 21:12:13 - INFO - codeparrot_training - Step 1449: {'lr': 0.00036225000000000005, 'samples': 278400, 'steps': 1449, 'loss/train': 0.9907841384410858} +01/26/2022 21:12:16 - INFO - codeparrot_training - Step 1450: {'lr': 0.0003625, 'samples': 278592, 'steps': 1450, 'loss/train': 1.2009357511997223} +01/26/2022 21:12:19 - INFO - codeparrot_training - Step 1451: {'lr': 0.00036275, 'samples': 278784, 'steps': 1451, 'loss/train': 1.6536110043525696} +01/26/2022 21:12:22 - INFO - codeparrot_training - Step 1452: {'lr': 0.000363, 'samples': 278976, 'steps': 1452, 'loss/train': 1.003242552280426} +01/26/2022 21:12:25 - INFO - codeparrot_training - Step 1453: {'lr': 0.00036325, 'samples': 279168, 'steps': 1453, 'loss/train': 0.8086393475532532} +01/26/2022 21:12:28 - INFO - codeparrot_training - Step 1454: {'lr': 0.0003635, 'samples': 279360, 'steps': 1454, 'loss/train': 0.7789215445518494} +01/26/2022 21:12:33 - INFO - codeparrot_training - Step 1455: {'lr': 0.00036375000000000003, 'samples': 279552, 'steps': 1455, 'loss/train': 0.8268111348152161} +01/26/2022 21:12:36 - INFO - codeparrot_training - Step 1456: {'lr': 0.000364, 'samples': 279744, 'steps': 1456, 'loss/train': 1.1320781707763672} +01/26/2022 21:12:39 - INFO - codeparrot_training - Step 1457: {'lr': 0.00036425000000000004, 'samples': 279936, 'steps': 1457, 'loss/train': 0.30621258169412613} +01/26/2022 21:12:42 - INFO - codeparrot_training - Step 1458: {'lr': 0.0003645, 'samples': 280128, 'steps': 1458, 'loss/train': 0.2289910465478897} +01/26/2022 21:12:45 - INFO - codeparrot_training - Step 1459: {'lr': 0.00036475, 'samples': 280320, 'steps': 1459, 'loss/train': 0.6802295297384262} +01/26/2022 21:12:48 - INFO - codeparrot_training - Step 1460: {'lr': 0.000365, 'samples': 280512, 'steps': 1460, 'loss/train': 0.41221417486667633} +01/26/2022 21:12:51 - INFO - codeparrot_training - Step 1461: {'lr': 0.00036525, 'samples': 280704, 'steps': 1461, 'loss/train': 0.7212480157613754} +01/26/2022 21:12:55 - INFO - codeparrot_training - Step 1462: {'lr': 0.0003655, 'samples': 280896, 'steps': 1462, 'loss/train': 1.1107229590415955} +01/26/2022 21:12:58 - INFO - codeparrot_training - Step 1463: {'lr': 0.00036575, 'samples': 281088, 'steps': 1463, 'loss/train': 1.149871587753296} +01/26/2022 21:13:04 - INFO - codeparrot_training - Step 1464: {'lr': 0.000366, 'samples': 281280, 'steps': 1464, 'loss/train': 0.6629093885421753} +01/26/2022 21:13:07 - INFO - codeparrot_training - Step 1465: {'lr': 0.00036625000000000004, 'samples': 281472, 'steps': 1465, 'loss/train': 0.6574883311986923} +01/26/2022 21:13:10 - INFO - codeparrot_training - Step 1466: {'lr': 0.0003665, 'samples': 281664, 'steps': 1466, 'loss/train': 0.3459419757127762} +01/26/2022 21:13:13 - INFO - codeparrot_training - Step 1467: {'lr': 0.00036675000000000005, 'samples': 281856, 'steps': 1467, 'loss/train': 1.0028822422027588} +01/26/2022 21:13:16 - INFO - codeparrot_training - Step 1468: {'lr': 0.000367, 'samples': 282048, 'steps': 1468, 'loss/train': 0.8441137075424194} +01/26/2022 21:13:19 - INFO - codeparrot_training - Step 1469: {'lr': 0.00036725, 'samples': 282240, 'steps': 1469, 'loss/train': 0.7084958553314209} +01/26/2022 21:13:23 - INFO - codeparrot_training - Step 1470: {'lr': 0.0003675, 'samples': 282432, 'steps': 1470, 'loss/train': 0.8485589325428009} +01/26/2022 21:13:26 - INFO - codeparrot_training - Step 1471: {'lr': 0.00036775, 'samples': 282624, 'steps': 1471, 'loss/train': 0.964582085609436} +01/26/2022 21:13:29 - INFO - codeparrot_training - Step 1472: {'lr': 0.000368, 'samples': 282816, 'steps': 1472, 'loss/train': 1.0355412662029266} +01/26/2022 21:13:33 - INFO - codeparrot_training - Step 1473: {'lr': 0.00036825000000000003, 'samples': 283008, 'steps': 1473, 'loss/train': 1.048643320798874} +01/26/2022 21:13:37 - INFO - codeparrot_training - Step 1474: {'lr': 0.0003685, 'samples': 283200, 'steps': 1474, 'loss/train': 1.1607147753238678} +01/26/2022 21:13:40 - INFO - codeparrot_training - Step 1475: {'lr': 0.00036875000000000005, 'samples': 283392, 'steps': 1475, 'loss/train': 0.9776473045349121} +01/26/2022 21:13:43 - INFO - codeparrot_training - Step 1476: {'lr': 0.000369, 'samples': 283584, 'steps': 1476, 'loss/train': 0.8770821690559387} +01/26/2022 21:13:46 - INFO - codeparrot_training - Step 1477: {'lr': 0.00036925, 'samples': 283776, 'steps': 1477, 'loss/train': 0.19134023040533066} +01/26/2022 21:13:49 - INFO - codeparrot_training - Step 1478: {'lr': 0.0003695, 'samples': 283968, 'steps': 1478, 'loss/train': 1.9430841207504272} +01/26/2022 21:13:52 - INFO - codeparrot_training - Step 1479: {'lr': 0.00036975, 'samples': 284160, 'steps': 1479, 'loss/train': 1.4303966760635376} +01/26/2022 21:13:55 - INFO - codeparrot_training - Step 1480: {'lr': 0.00037, 'samples': 284352, 'steps': 1480, 'loss/train': 0.836928516626358} +01/26/2022 21:13:59 - INFO - codeparrot_training - Step 1481: {'lr': 0.00037025000000000003, 'samples': 284544, 'steps': 1481, 'loss/train': 0.5673790723085403} +01/26/2022 21:14:03 - INFO - codeparrot_training - Step 1482: {'lr': 0.0003705, 'samples': 284736, 'steps': 1482, 'loss/train': 1.098906934261322} +01/26/2022 21:14:06 - INFO - codeparrot_training - Step 1483: {'lr': 0.00037075000000000004, 'samples': 284928, 'steps': 1483, 'loss/train': 0.23770031332969666} +01/26/2022 21:14:10 - INFO - codeparrot_training - Step 1484: {'lr': 0.000371, 'samples': 285120, 'steps': 1484, 'loss/train': 1.1671381294727325} +01/26/2022 21:14:13 - INFO - codeparrot_training - Step 1485: {'lr': 0.00037125000000000005, 'samples': 285312, 'steps': 1485, 'loss/train': 0.1803583838045597} +01/26/2022 21:14:16 - INFO - codeparrot_training - Step 1486: {'lr': 0.00037150000000000003, 'samples': 285504, 'steps': 1486, 'loss/train': 0.8468064665794373} +01/26/2022 21:14:19 - INFO - codeparrot_training - Step 1487: {'lr': 0.00037175, 'samples': 285696, 'steps': 1487, 'loss/train': 0.9230948388576508} +01/26/2022 21:14:22 - INFO - codeparrot_training - Step 1488: {'lr': 0.000372, 'samples': 285888, 'steps': 1488, 'loss/train': 0.6031788736581802} +01/26/2022 21:14:25 - INFO - codeparrot_training - Step 1489: {'lr': 0.00037225, 'samples': 286080, 'steps': 1489, 'loss/train': 0.8745079636573792} +01/26/2022 21:14:28 - INFO - codeparrot_training - Step 1490: {'lr': 0.0003725, 'samples': 286272, 'steps': 1490, 'loss/train': 0.7502643764019012} +01/26/2022 21:14:35 - INFO - codeparrot_training - Step 1491: {'lr': 0.00037275000000000003, 'samples': 286464, 'steps': 1491, 'loss/train': 0.8902399241924286} +01/26/2022 21:14:38 - INFO - codeparrot_training - Step 1492: {'lr': 0.000373, 'samples': 286656, 'steps': 1492, 'loss/train': 0.8958548605442047} +01/26/2022 21:14:41 - INFO - codeparrot_training - Step 1493: {'lr': 0.00037325000000000005, 'samples': 286848, 'steps': 1493, 'loss/train': 1.2120806872844696} +01/26/2022 21:14:45 - INFO - codeparrot_training - Step 1494: {'lr': 0.0003735, 'samples': 287040, 'steps': 1494, 'loss/train': 1.110617458820343} +01/26/2022 21:14:48 - INFO - codeparrot_training - Step 1495: {'lr': 0.00037375000000000006, 'samples': 287232, 'steps': 1495, 'loss/train': 4.196964383125305} +01/26/2022 21:14:51 - INFO - codeparrot_training - Step 1496: {'lr': 0.000374, 'samples': 287424, 'steps': 1496, 'loss/train': 1.0553580522537231} +01/26/2022 21:14:54 - INFO - codeparrot_training - Step 1497: {'lr': 0.00037425, 'samples': 287616, 'steps': 1497, 'loss/train': 2.0101332664489746} +01/26/2022 21:14:57 - INFO - codeparrot_training - Step 1498: {'lr': 0.0003745, 'samples': 287808, 'steps': 1498, 'loss/train': 0.8249520063400269} +01/26/2022 21:15:00 - INFO - codeparrot_training - Step 1499: {'lr': 0.00037475000000000003, 'samples': 288000, 'steps': 1499, 'loss/train': 0.48685017228126526} +01/26/2022 21:15:05 - INFO - codeparrot_training - Step 1500: {'lr': 0.000375, 'samples': 288192, 'steps': 1500, 'loss/train': 1.4895641505718231} +01/26/2022 21:15:08 - INFO - codeparrot_training - Step 1501: {'lr': 0.00037525, 'samples': 288384, 'steps': 1501, 'loss/train': 0.2093707099556923} +01/26/2022 21:15:11 - INFO - codeparrot_training - Step 1502: {'lr': 0.0003755, 'samples': 288576, 'steps': 1502, 'loss/train': 0.5461329817771912} +01/26/2022 21:15:14 - INFO - codeparrot_training - Step 1503: {'lr': 0.00037575, 'samples': 288768, 'steps': 1503, 'loss/train': 1.0211555063724518} +01/26/2022 21:15:17 - INFO - codeparrot_training - Step 1504: {'lr': 0.00037600000000000003, 'samples': 288960, 'steps': 1504, 'loss/train': 1.2740503549575806} +01/26/2022 21:15:20 - INFO - codeparrot_training - Step 1505: {'lr': 0.00037624999999999996, 'samples': 289152, 'steps': 1505, 'loss/train': 0.6302264928817749} +01/26/2022 21:15:23 - INFO - codeparrot_training - Step 1506: {'lr': 0.0003765, 'samples': 289344, 'steps': 1506, 'loss/train': 1.5738256573677063} +01/26/2022 21:15:27 - INFO - codeparrot_training - Step 1507: {'lr': 0.00037674999999999997, 'samples': 289536, 'steps': 1507, 'loss/train': 0.9112080037593842} +01/26/2022 21:15:30 - INFO - codeparrot_training - Step 1508: {'lr': 0.000377, 'samples': 289728, 'steps': 1508, 'loss/train': 0.9607047736644745} +01/26/2022 21:15:34 - INFO - codeparrot_training - Step 1509: {'lr': 0.00037725, 'samples': 289920, 'steps': 1509, 'loss/train': 0.4343150854110718} +01/26/2022 21:15:37 - INFO - codeparrot_training - Step 1510: {'lr': 0.0003775, 'samples': 290112, 'steps': 1510, 'loss/train': 1.0953315496444702} +01/26/2022 21:15:40 - INFO - codeparrot_training - Step 1511: {'lr': 0.00037775, 'samples': 290304, 'steps': 1511, 'loss/train': 0.8842317759990692} +01/26/2022 21:15:44 - INFO - codeparrot_training - Step 1512: {'lr': 0.000378, 'samples': 290496, 'steps': 1512, 'loss/train': 0.7734381258487701} +01/26/2022 21:15:47 - INFO - codeparrot_training - Step 1513: {'lr': 0.00037825, 'samples': 290688, 'steps': 1513, 'loss/train': 0.5693178176879883} +01/26/2022 21:15:50 - INFO - codeparrot_training - Step 1514: {'lr': 0.0003785, 'samples': 290880, 'steps': 1514, 'loss/train': 0.7337125092744827} +01/26/2022 21:15:53 - INFO - codeparrot_training - Step 1515: {'lr': 0.00037874999999999996, 'samples': 291072, 'steps': 1515, 'loss/train': 0.7267966568470001} +01/26/2022 21:15:56 - INFO - codeparrot_training - Step 1516: {'lr': 0.000379, 'samples': 291264, 'steps': 1516, 'loss/train': 1.007617861032486} +01/26/2022 21:15:59 - INFO - codeparrot_training - Step 1517: {'lr': 0.00037925, 'samples': 291456, 'steps': 1517, 'loss/train': 1.068414956331253} +01/26/2022 21:16:05 - INFO - codeparrot_training - Step 1518: {'lr': 0.0003795, 'samples': 291648, 'steps': 1518, 'loss/train': 0.7719941139221191} +01/26/2022 21:16:08 - INFO - codeparrot_training - Step 1519: {'lr': 0.00037975, 'samples': 291840, 'steps': 1519, 'loss/train': 1.0266875624656677} +01/26/2022 21:16:11 - INFO - codeparrot_training - Step 1520: {'lr': 0.00038, 'samples': 292032, 'steps': 1520, 'loss/train': 1.0779113173484802} +01/26/2022 21:16:14 - INFO - codeparrot_training - Step 1521: {'lr': 0.00038025, 'samples': 292224, 'steps': 1521, 'loss/train': 1.1040978133678436} +01/26/2022 21:16:17 - INFO - codeparrot_training - Step 1522: {'lr': 0.00038050000000000003, 'samples': 292416, 'steps': 1522, 'loss/train': 0.7414789348840714} +01/26/2022 21:16:21 - INFO - codeparrot_training - Step 1523: {'lr': 0.00038075, 'samples': 292608, 'steps': 1523, 'loss/train': 1.0996066331863403} +01/26/2022 21:16:24 - INFO - codeparrot_training - Step 1524: {'lr': 0.000381, 'samples': 292800, 'steps': 1524, 'loss/train': 0.7356545180082321} +01/26/2022 21:16:27 - INFO - codeparrot_training - Step 1525: {'lr': 0.00038124999999999997, 'samples': 292992, 'steps': 1525, 'loss/train': 0.6968617290258408} +01/26/2022 21:16:31 - INFO - codeparrot_training - Step 1526: {'lr': 0.0003815, 'samples': 293184, 'steps': 1526, 'loss/train': 0.9847961962223053} +01/26/2022 21:16:34 - INFO - codeparrot_training - Step 1527: {'lr': 0.00038175, 'samples': 293376, 'steps': 1527, 'loss/train': 1.3369852602481842} +01/26/2022 21:16:38 - INFO - codeparrot_training - Step 1528: {'lr': 0.000382, 'samples': 293568, 'steps': 1528, 'loss/train': 1.1806872189044952} +01/26/2022 21:16:41 - INFO - codeparrot_training - Step 1529: {'lr': 0.00038225, 'samples': 293760, 'steps': 1529, 'loss/train': 1.4961203634738922} +01/26/2022 21:16:44 - INFO - codeparrot_training - Step 1530: {'lr': 0.00038250000000000003, 'samples': 293952, 'steps': 1530, 'loss/train': 1.1804668307304382} +01/26/2022 21:16:47 - INFO - codeparrot_training - Step 1531: {'lr': 0.00038275, 'samples': 294144, 'steps': 1531, 'loss/train': 0.5334660708904266} +01/26/2022 21:16:50 - INFO - codeparrot_training - Step 1532: {'lr': 0.00038300000000000004, 'samples': 294336, 'steps': 1532, 'loss/train': 1.3035263121128082} +01/26/2022 21:16:53 - INFO - codeparrot_training - Step 1533: {'lr': 0.00038324999999999996, 'samples': 294528, 'steps': 1533, 'loss/train': 0.8394831418991089} +01/26/2022 21:16:56 - INFO - codeparrot_training - Step 1534: {'lr': 0.0003835, 'samples': 294720, 'steps': 1534, 'loss/train': 0.5574247241020203} +01/26/2022 21:17:01 - INFO - codeparrot_training - Step 1535: {'lr': 0.00038375, 'samples': 294912, 'steps': 1535, 'loss/train': 1.1966947317123413} +01/26/2022 21:17:04 - INFO - codeparrot_training - Step 1536: {'lr': 0.000384, 'samples': 295104, 'steps': 1536, 'loss/train': 0.9524939060211182} +01/26/2022 21:17:07 - INFO - codeparrot_training - Step 1537: {'lr': 0.00038425, 'samples': 295296, 'steps': 1537, 'loss/train': 0.9000881016254425} +01/26/2022 21:17:10 - INFO - codeparrot_training - Step 1538: {'lr': 0.0003845, 'samples': 295488, 'steps': 1538, 'loss/train': 1.6516569256782532} +01/26/2022 21:17:14 - INFO - codeparrot_training - Step 1539: {'lr': 0.00038475, 'samples': 295680, 'steps': 1539, 'loss/train': 1.3666580021381378} +01/26/2022 21:17:17 - INFO - codeparrot_training - Step 1540: {'lr': 0.00038500000000000003, 'samples': 295872, 'steps': 1540, 'loss/train': 1.0497311353683472} +01/26/2022 21:17:20 - INFO - codeparrot_training - Step 1541: {'lr': 0.00038525, 'samples': 296064, 'steps': 1541, 'loss/train': 0.8218839168548584} +01/26/2022 21:17:23 - INFO - codeparrot_training - Step 1542: {'lr': 0.0003855, 'samples': 296256, 'steps': 1542, 'loss/train': 0.9426329433917999} +01/26/2022 21:17:26 - INFO - codeparrot_training - Step 1543: {'lr': 0.00038574999999999997, 'samples': 296448, 'steps': 1543, 'loss/train': 1.2239013612270355} +01/26/2022 21:17:31 - INFO - codeparrot_training - Step 1544: {'lr': 0.000386, 'samples': 296640, 'steps': 1544, 'loss/train': 0.5781396925449371} +01/26/2022 21:17:34 - INFO - codeparrot_training - Step 1545: {'lr': 0.00038625, 'samples': 296832, 'steps': 1545, 'loss/train': 0.8666767179965973} +01/26/2022 21:17:37 - INFO - codeparrot_training - Step 1546: {'lr': 0.0003865, 'samples': 297024, 'steps': 1546, 'loss/train': 0.18094372004270554} +01/26/2022 21:17:40 - INFO - codeparrot_training - Step 1547: {'lr': 0.00038675, 'samples': 297216, 'steps': 1547, 'loss/train': 0.9551449120044708} +01/26/2022 21:17:43 - INFO - codeparrot_training - Step 1548: {'lr': 0.00038700000000000003, 'samples': 297408, 'steps': 1548, 'loss/train': 0.5519776493310928} +01/26/2022 21:17:46 - INFO - codeparrot_training - Step 1549: {'lr': 0.00038725, 'samples': 297600, 'steps': 1549, 'loss/train': 1.1795437037944794} +01/26/2022 21:17:50 - INFO - codeparrot_training - Step 1550: {'lr': 0.00038750000000000004, 'samples': 297792, 'steps': 1550, 'loss/train': 1.1903632879257202} +01/26/2022 21:17:53 - INFO - codeparrot_training - Step 1551: {'lr': 0.00038774999999999997, 'samples': 297984, 'steps': 1551, 'loss/train': 1.211746484041214} +01/26/2022 21:17:56 - INFO - codeparrot_training - Step 1552: {'lr': 0.000388, 'samples': 298176, 'steps': 1552, 'loss/train': 1.132394939661026} +01/26/2022 21:18:02 - INFO - codeparrot_training - Step 1553: {'lr': 0.00038825, 'samples': 298368, 'steps': 1553, 'loss/train': 1.5255194306373596} +01/26/2022 21:18:05 - INFO - codeparrot_training - Step 1554: {'lr': 0.0003885, 'samples': 298560, 'steps': 1554, 'loss/train': 0.9148447215557098} +01/26/2022 21:18:09 - INFO - codeparrot_training - Step 1555: {'lr': 0.00038875, 'samples': 298752, 'steps': 1555, 'loss/train': 0.8803000152111053} +01/26/2022 21:18:12 - INFO - codeparrot_training - Step 1556: {'lr': 0.000389, 'samples': 298944, 'steps': 1556, 'loss/train': 1.1481870710849762} +01/26/2022 21:18:15 - INFO - codeparrot_training - Step 1557: {'lr': 0.00038925, 'samples': 299136, 'steps': 1557, 'loss/train': 0.44970184564590454} +01/26/2022 21:18:18 - INFO - codeparrot_training - Step 1558: {'lr': 0.00038950000000000003, 'samples': 299328, 'steps': 1558, 'loss/train': 1.0816842019557953} +01/26/2022 21:18:21 - INFO - codeparrot_training - Step 1559: {'lr': 0.00038975, 'samples': 299520, 'steps': 1559, 'loss/train': 0.6674492508172989} +01/26/2022 21:18:24 - INFO - codeparrot_training - Step 1560: {'lr': 0.00039000000000000005, 'samples': 299712, 'steps': 1560, 'loss/train': 0.9751677811145782} +01/26/2022 21:18:27 - INFO - codeparrot_training - Step 1561: {'lr': 0.00039024999999999997, 'samples': 299904, 'steps': 1561, 'loss/train': 0.43891793489456177} +01/26/2022 21:18:32 - INFO - codeparrot_training - Step 1562: {'lr': 0.0003905, 'samples': 300096, 'steps': 1562, 'loss/train': 1.0710235834121704} +01/26/2022 21:18:35 - INFO - codeparrot_training - Step 1563: {'lr': 0.00039075, 'samples': 300288, 'steps': 1563, 'loss/train': 0.9703768193721771} +01/26/2022 21:18:38 - INFO - codeparrot_training - Step 1564: {'lr': 0.000391, 'samples': 300480, 'steps': 1564, 'loss/train': 1.481842964887619} +01/26/2022 21:18:41 - INFO - codeparrot_training - Step 1565: {'lr': 0.00039125, 'samples': 300672, 'steps': 1565, 'loss/train': 0.8194465041160583} +01/26/2022 21:18:44 - INFO - codeparrot_training - Step 1566: {'lr': 0.00039150000000000003, 'samples': 300864, 'steps': 1566, 'loss/train': 1.0495053827762604} +01/26/2022 21:18:47 - INFO - codeparrot_training - Step 1567: {'lr': 0.00039175, 'samples': 301056, 'steps': 1567, 'loss/train': 0.9151850938796997} +01/26/2022 21:18:51 - INFO - codeparrot_training - Step 1568: {'lr': 0.00039200000000000004, 'samples': 301248, 'steps': 1568, 'loss/train': 0.6693004667758942} +01/26/2022 21:18:54 - INFO - codeparrot_training - Step 1569: {'lr': 0.00039225, 'samples': 301440, 'steps': 1569, 'loss/train': 1.0176717638969421} +01/26/2022 21:18:57 - INFO - codeparrot_training - Step 1570: {'lr': 0.0003925, 'samples': 301632, 'steps': 1570, 'loss/train': 0.900753915309906} +01/26/2022 21:19:03 - INFO - codeparrot_training - Step 1571: {'lr': 0.00039275, 'samples': 301824, 'steps': 1571, 'loss/train': 0.5285711288452148} +01/26/2022 21:19:06 - INFO - codeparrot_training - Step 1572: {'lr': 0.000393, 'samples': 302016, 'steps': 1572, 'loss/train': 1.1502817869186401} +01/26/2022 21:19:09 - INFO - codeparrot_training - Step 1573: {'lr': 0.00039325, 'samples': 302208, 'steps': 1573, 'loss/train': 0.6094715595245361} +01/26/2022 21:19:12 - INFO - codeparrot_training - Step 1574: {'lr': 0.0003935, 'samples': 302400, 'steps': 1574, 'loss/train': 0.6199387907981873} +01/26/2022 21:19:15 - INFO - codeparrot_training - Step 1575: {'lr': 0.00039375, 'samples': 302592, 'steps': 1575, 'loss/train': 1.0760919749736786} +01/26/2022 21:19:19 - INFO - codeparrot_training - Step 1576: {'lr': 0.00039400000000000004, 'samples': 302784, 'steps': 1576, 'loss/train': 1.1562922596931458} +01/26/2022 21:19:22 - INFO - codeparrot_training - Step 1577: {'lr': 0.00039425, 'samples': 302976, 'steps': 1577, 'loss/train': 1.089037925004959} +01/26/2022 21:19:25 - INFO - codeparrot_training - Step 1578: {'lr': 0.00039450000000000005, 'samples': 303168, 'steps': 1578, 'loss/train': 1.0494210720062256} +01/26/2022 21:19:29 - INFO - codeparrot_training - Step 1579: {'lr': 0.00039474999999999997, 'samples': 303360, 'steps': 1579, 'loss/train': 0.9432246387004852} +01/26/2022 21:19:33 - INFO - codeparrot_training - Step 1580: {'lr': 0.000395, 'samples': 303552, 'steps': 1580, 'loss/train': 0.7735198438167572} +01/26/2022 21:19:36 - INFO - codeparrot_training - Step 1581: {'lr': 0.00039525, 'samples': 303744, 'steps': 1581, 'loss/train': 1.1471571028232574} +01/26/2022 21:19:39 - INFO - codeparrot_training - Step 1582: {'lr': 0.0003955, 'samples': 303936, 'steps': 1582, 'loss/train': 1.1255502998828888} +01/26/2022 21:19:42 - INFO - codeparrot_training - Step 1583: {'lr': 0.00039575, 'samples': 304128, 'steps': 1583, 'loss/train': 1.1929344534873962} +01/26/2022 21:19:45 - INFO - codeparrot_training - Step 1584: {'lr': 0.00039600000000000003, 'samples': 304320, 'steps': 1584, 'loss/train': 0.7604456841945648} +01/26/2022 21:19:48 - INFO - codeparrot_training - Step 1585: {'lr': 0.00039625, 'samples': 304512, 'steps': 1585, 'loss/train': 0.886058896780014} +01/26/2022 21:19:51 - INFO - codeparrot_training - Step 1586: {'lr': 0.00039650000000000004, 'samples': 304704, 'steps': 1586, 'loss/train': 1.392673820257187} +01/26/2022 21:19:55 - INFO - codeparrot_training - Step 1587: {'lr': 0.00039675, 'samples': 304896, 'steps': 1587, 'loss/train': 1.0805667042732239} +01/26/2022 21:19:59 - INFO - codeparrot_training - Step 1588: {'lr': 0.00039700000000000005, 'samples': 305088, 'steps': 1588, 'loss/train': 0.7766793072223663} +01/26/2022 21:20:02 - INFO - codeparrot_training - Step 1589: {'lr': 0.00039725, 'samples': 305280, 'steps': 1589, 'loss/train': 0.9202056527137756} +01/26/2022 21:20:05 - INFO - codeparrot_training - Step 1590: {'lr': 0.0003975, 'samples': 305472, 'steps': 1590, 'loss/train': 0.42704272270202637} +01/26/2022 21:20:08 - INFO - codeparrot_training - Step 1591: {'lr': 0.00039775, 'samples': 305664, 'steps': 1591, 'loss/train': 0.937246710062027} +01/26/2022 21:20:12 - INFO - codeparrot_training - Step 1592: {'lr': 0.000398, 'samples': 305856, 'steps': 1592, 'loss/train': 1.04826021194458} +01/26/2022 21:20:15 - INFO - codeparrot_training - Step 1593: {'lr': 0.00039825, 'samples': 306048, 'steps': 1593, 'loss/train': 1.472885549068451} +01/26/2022 21:20:18 - INFO - codeparrot_training - Step 1594: {'lr': 0.00039850000000000004, 'samples': 306240, 'steps': 1594, 'loss/train': 0.7873425185680389} +01/26/2022 21:20:21 - INFO - codeparrot_training - Step 1595: {'lr': 0.00039875, 'samples': 306432, 'steps': 1595, 'loss/train': 0.7252905517816544} +01/26/2022 21:20:24 - INFO - codeparrot_training - Step 1596: {'lr': 0.00039900000000000005, 'samples': 306624, 'steps': 1596, 'loss/train': 1.0548094511032104} +01/26/2022 21:20:30 - INFO - codeparrot_training - Step 1597: {'lr': 0.00039925000000000003, 'samples': 306816, 'steps': 1597, 'loss/train': 1.1972896456718445} +01/26/2022 21:20:33 - INFO - codeparrot_training - Step 1598: {'lr': 0.0003995, 'samples': 307008, 'steps': 1598, 'loss/train': 0.8496110737323761} +01/26/2022 21:20:36 - INFO - codeparrot_training - Step 1599: {'lr': 0.00039975, 'samples': 307200, 'steps': 1599, 'loss/train': 0.8173715472221375} +01/26/2022 21:20:40 - INFO - codeparrot_training - Step 1600: {'lr': 0.0004, 'samples': 307392, 'steps': 1600, 'loss/train': 1.2981213927268982} +01/26/2022 21:20:43 - INFO - codeparrot_training - Step 1601: {'lr': 0.00040025, 'samples': 307584, 'steps': 1601, 'loss/train': 0.8052924871444702} +01/26/2022 21:20:46 - INFO - codeparrot_training - Step 1602: {'lr': 0.00040050000000000003, 'samples': 307776, 'steps': 1602, 'loss/train': 0.903513103723526} +01/26/2022 21:20:49 - INFO - codeparrot_training - Step 1603: {'lr': 0.00040075, 'samples': 307968, 'steps': 1603, 'loss/train': 0.8908604979515076} +01/26/2022 21:20:52 - INFO - codeparrot_training - Step 1604: {'lr': 0.00040100000000000004, 'samples': 308160, 'steps': 1604, 'loss/train': 1.2467265129089355} +01/26/2022 21:20:57 - INFO - codeparrot_training - Step 1605: {'lr': 0.00040125, 'samples': 308352, 'steps': 1605, 'loss/train': 1.1703225374221802} +01/26/2022 21:21:00 - INFO - codeparrot_training - Step 1606: {'lr': 0.00040150000000000006, 'samples': 308544, 'steps': 1606, 'loss/train': 0.9032231569290161} +01/26/2022 21:21:03 - INFO - codeparrot_training - Step 1607: {'lr': 0.00040175, 'samples': 308736, 'steps': 1607, 'loss/train': 0.570895716547966} +01/26/2022 21:21:06 - INFO - codeparrot_training - Step 1608: {'lr': 0.000402, 'samples': 308928, 'steps': 1608, 'loss/train': 1.2200436294078827} +01/26/2022 21:21:09 - INFO - codeparrot_training - Step 1609: {'lr': 0.00040225, 'samples': 309120, 'steps': 1609, 'loss/train': 0.8977490365505219} +01/26/2022 21:21:12 - INFO - codeparrot_training - Step 1610: {'lr': 0.0004025, 'samples': 309312, 'steps': 1610, 'loss/train': 0.9952645003795624} +01/26/2022 21:21:15 - INFO - codeparrot_training - Step 1611: {'lr': 0.00040275, 'samples': 309504, 'steps': 1611, 'loss/train': 0.902794361114502} +01/26/2022 21:21:19 - INFO - codeparrot_training - Step 1612: {'lr': 0.00040300000000000004, 'samples': 309696, 'steps': 1612, 'loss/train': 0.9980608820915222} +01/26/2022 21:21:22 - INFO - codeparrot_training - Step 1613: {'lr': 0.00040325, 'samples': 309888, 'steps': 1613, 'loss/train': 1.1632069051265717} +01/26/2022 21:21:28 - INFO - codeparrot_training - Step 1614: {'lr': 0.00040350000000000005, 'samples': 310080, 'steps': 1614, 'loss/train': 0.8721232116222382} +01/26/2022 21:21:31 - INFO - codeparrot_training - Step 1615: {'lr': 0.00040375000000000003, 'samples': 310272, 'steps': 1615, 'loss/train': 0.6417164504528046} +01/26/2022 21:21:35 - INFO - codeparrot_training - Step 1616: {'lr': 0.000404, 'samples': 310464, 'steps': 1616, 'loss/train': 1.0448070466518402} +01/26/2022 21:21:38 - INFO - codeparrot_training - Step 1617: {'lr': 0.00040425, 'samples': 310656, 'steps': 1617, 'loss/train': 0.763612300157547} +01/26/2022 21:21:41 - INFO - codeparrot_training - Step 1618: {'lr': 0.0004045, 'samples': 310848, 'steps': 1618, 'loss/train': 0.9192868173122406} +01/26/2022 21:21:44 - INFO - codeparrot_training - Step 1619: {'lr': 0.00040475, 'samples': 311040, 'steps': 1619, 'loss/train': 0.8964953720569611} +01/26/2022 21:21:47 - INFO - codeparrot_training - Step 1620: {'lr': 0.00040500000000000003, 'samples': 311232, 'steps': 1620, 'loss/train': 0.4305136352777481} +01/26/2022 21:21:50 - INFO - codeparrot_training - Step 1621: {'lr': 0.00040525, 'samples': 311424, 'steps': 1621, 'loss/train': 1.2225679457187653} +01/26/2022 21:21:53 - INFO - codeparrot_training - Step 1622: {'lr': 0.00040550000000000004, 'samples': 311616, 'steps': 1622, 'loss/train': 0.8143474459648132} +01/26/2022 21:21:58 - INFO - codeparrot_training - Step 1623: {'lr': 0.00040575, 'samples': 311808, 'steps': 1623, 'loss/train': 0.9397753179073334} +01/26/2022 21:22:01 - INFO - codeparrot_training - Step 1624: {'lr': 0.00040600000000000006, 'samples': 312000, 'steps': 1624, 'loss/train': 0.9602738320827484} +01/26/2022 21:22:04 - INFO - codeparrot_training - Step 1625: {'lr': 0.00040625000000000004, 'samples': 312192, 'steps': 1625, 'loss/train': 1.1994936168193817} +01/26/2022 21:22:07 - INFO - codeparrot_training - Step 1626: {'lr': 0.00040649999999999996, 'samples': 312384, 'steps': 1626, 'loss/train': 0.852255642414093} +01/26/2022 21:22:10 - INFO - codeparrot_training - Step 1627: {'lr': 0.00040675, 'samples': 312576, 'steps': 1627, 'loss/train': 0.7201205044984818} +01/26/2022 21:22:13 - INFO - codeparrot_training - Step 1628: {'lr': 0.00040699999999999997, 'samples': 312768, 'steps': 1628, 'loss/train': 0.7200788408517838} +01/26/2022 21:22:17 - INFO - codeparrot_training - Step 1629: {'lr': 0.00040725, 'samples': 312960, 'steps': 1629, 'loss/train': 1.2039445638656616} +01/26/2022 21:22:20 - INFO - codeparrot_training - Step 1630: {'lr': 0.0004075, 'samples': 313152, 'steps': 1630, 'loss/train': 0.8602632880210876} +01/26/2022 21:22:23 - INFO - codeparrot_training - Step 1631: {'lr': 0.00040775, 'samples': 313344, 'steps': 1631, 'loss/train': 0.3987828344106674} +01/26/2022 21:22:27 - INFO - codeparrot_training - Step 1632: {'lr': 0.000408, 'samples': 313536, 'steps': 1632, 'loss/train': 1.8231298327445984} +01/26/2022 21:22:30 - INFO - codeparrot_training - Step 1633: {'lr': 0.00040825000000000003, 'samples': 313728, 'steps': 1633, 'loss/train': 0.7313338816165924} +01/26/2022 21:22:33 - INFO - codeparrot_training - Step 1634: {'lr': 0.0004085, 'samples': 313920, 'steps': 1634, 'loss/train': 0.9621573686599731} +01/26/2022 21:22:37 - INFO - codeparrot_training - Step 1635: {'lr': 0.00040875, 'samples': 314112, 'steps': 1635, 'loss/train': 1.0393626987934113} +01/26/2022 21:22:40 - INFO - codeparrot_training - Step 1636: {'lr': 0.00040899999999999997, 'samples': 314304, 'steps': 1636, 'loss/train': 0.6312345117330551} +01/26/2022 21:22:43 - INFO - codeparrot_training - Step 1637: {'lr': 0.00040925, 'samples': 314496, 'steps': 1637, 'loss/train': 1.0294510424137115} +01/26/2022 21:22:46 - INFO - codeparrot_training - Step 1638: {'lr': 0.0004095, 'samples': 314688, 'steps': 1638, 'loss/train': 0.5636235326528549} +01/26/2022 21:22:49 - INFO - codeparrot_training - Step 1639: {'lr': 0.00040975, 'samples': 314880, 'steps': 1639, 'loss/train': 1.4927867352962494} +01/26/2022 21:22:52 - INFO - codeparrot_training - Step 1640: {'lr': 0.00041, 'samples': 315072, 'steps': 1640, 'loss/train': 1.0345790684223175} +01/26/2022 21:22:57 - INFO - codeparrot_training - Step 1641: {'lr': 0.00041025, 'samples': 315264, 'steps': 1641, 'loss/train': 0.38186345994472504} +01/26/2022 21:23:00 - INFO - codeparrot_training - Step 1642: {'lr': 0.0004105, 'samples': 315456, 'steps': 1642, 'loss/train': 0.9589768052101135} +01/26/2022 21:23:03 - INFO - codeparrot_training - Step 1643: {'lr': 0.00041075000000000004, 'samples': 315648, 'steps': 1643, 'loss/train': 1.0343111157417297} +01/26/2022 21:23:06 - INFO - codeparrot_training - Step 1644: {'lr': 0.00041099999999999996, 'samples': 315840, 'steps': 1644, 'loss/train': 0.7044095993041992} +01/26/2022 21:23:09 - INFO - codeparrot_training - Step 1645: {'lr': 0.00041125, 'samples': 316032, 'steps': 1645, 'loss/train': 0.8021068274974823} +01/26/2022 21:23:13 - INFO - codeparrot_training - Step 1646: {'lr': 0.0004115, 'samples': 316224, 'steps': 1646, 'loss/train': 0.7520883679389954} +01/26/2022 21:23:16 - INFO - codeparrot_training - Step 1647: {'lr': 0.00041175, 'samples': 316416, 'steps': 1647, 'loss/train': 0.47065356373786926} +01/26/2022 21:23:19 - INFO - codeparrot_training - Step 1648: {'lr': 0.000412, 'samples': 316608, 'steps': 1648, 'loss/train': 1.2325392365455627} +01/26/2022 21:23:24 - INFO - codeparrot_training - Step 1649: {'lr': 0.00041225, 'samples': 316800, 'steps': 1649, 'loss/train': 1.624911367893219} +01/26/2022 21:23:28 - INFO - codeparrot_training - Step 1650: {'lr': 0.0004125, 'samples': 316992, 'steps': 1650, 'loss/train': 0.9685000777244568} +01/26/2022 21:23:31 - INFO - codeparrot_training - Step 1651: {'lr': 0.00041275000000000003, 'samples': 317184, 'steps': 1651, 'loss/train': 0.7986305952072144} +01/26/2022 21:23:34 - INFO - codeparrot_training - Step 1652: {'lr': 0.000413, 'samples': 317376, 'steps': 1652, 'loss/train': 0.8410978317260742} +01/26/2022 21:23:37 - INFO - codeparrot_training - Step 1653: {'lr': 0.00041325, 'samples': 317568, 'steps': 1653, 'loss/train': 0.6380593925714493} +01/26/2022 21:23:40 - INFO - codeparrot_training - Step 1654: {'lr': 0.00041349999999999997, 'samples': 317760, 'steps': 1654, 'loss/train': 0.7542805373668671} +01/26/2022 21:23:43 - INFO - codeparrot_training - Step 1655: {'lr': 0.00041375, 'samples': 317952, 'steps': 1655, 'loss/train': 0.8882549107074738} +01/26/2022 21:23:46 - INFO - codeparrot_training - Step 1656: {'lr': 0.000414, 'samples': 318144, 'steps': 1656, 'loss/train': 0.4313184767961502} +01/26/2022 21:23:50 - INFO - codeparrot_training - Step 1657: {'lr': 0.00041425, 'samples': 318336, 'steps': 1657, 'loss/train': 1.1223379075527191} +01/26/2022 21:23:54 - INFO - codeparrot_training - Step 1658: {'lr': 0.0004145, 'samples': 318528, 'steps': 1658, 'loss/train': 1.3333961963653564} +01/26/2022 21:23:57 - INFO - codeparrot_training - Step 1659: {'lr': 0.00041475, 'samples': 318720, 'steps': 1659, 'loss/train': 0.7927728295326233} +01/26/2022 21:24:00 - INFO - codeparrot_training - Step 1660: {'lr': 0.000415, 'samples': 318912, 'steps': 1660, 'loss/train': 1.2785732746124268} +01/26/2022 21:24:04 - INFO - codeparrot_training - Step 1661: {'lr': 0.00041525000000000004, 'samples': 319104, 'steps': 1661, 'loss/train': 0.15419504418969154} +01/26/2022 21:24:07 - INFO - codeparrot_training - Step 1662: {'lr': 0.00041549999999999996, 'samples': 319296, 'steps': 1662, 'loss/train': 1.1597148478031158} +01/26/2022 21:24:10 - INFO - codeparrot_training - Step 1663: {'lr': 0.00041575, 'samples': 319488, 'steps': 1663, 'loss/train': 1.2206933498382568} +01/26/2022 21:24:13 - INFO - codeparrot_training - Step 1664: {'lr': 0.000416, 'samples': 319680, 'steps': 1664, 'loss/train': 0.9429126977920532} +01/26/2022 21:24:16 - INFO - codeparrot_training - Step 1665: {'lr': 0.00041625, 'samples': 319872, 'steps': 1665, 'loss/train': 2.2514559030532837} +01/26/2022 21:24:19 - INFO - codeparrot_training - Step 1666: {'lr': 0.0004165, 'samples': 320064, 'steps': 1666, 'loss/train': 0.6221943497657776} +01/26/2022 21:24:24 - INFO - codeparrot_training - Step 1667: {'lr': 0.00041675, 'samples': 320256, 'steps': 1667, 'loss/train': 1.1753460466861725} +01/26/2022 21:24:27 - INFO - codeparrot_training - Step 1668: {'lr': 0.000417, 'samples': 320448, 'steps': 1668, 'loss/train': 1.175249844789505} +01/26/2022 21:24:30 - INFO - codeparrot_training - Step 1669: {'lr': 0.00041725000000000003, 'samples': 320640, 'steps': 1669, 'loss/train': 1.073170781135559} +01/26/2022 21:24:33 - INFO - codeparrot_training - Step 1670: {'lr': 0.0004175, 'samples': 320832, 'steps': 1670, 'loss/train': 0.5739630460739136} +01/26/2022 21:24:36 - INFO - codeparrot_training - Step 1671: {'lr': 0.00041775000000000004, 'samples': 321024, 'steps': 1671, 'loss/train': 0.6225871592760086} +01/26/2022 21:24:39 - INFO - codeparrot_training - Step 1672: {'lr': 0.00041799999999999997, 'samples': 321216, 'steps': 1672, 'loss/train': 0.7493513077497482} +01/26/2022 21:24:43 - INFO - codeparrot_training - Step 1673: {'lr': 0.00041825, 'samples': 321408, 'steps': 1673, 'loss/train': 1.1422923803329468} +01/26/2022 21:24:46 - INFO - codeparrot_training - Step 1674: {'lr': 0.0004185, 'samples': 321600, 'steps': 1674, 'loss/train': 0.8022510409355164} +01/26/2022 21:24:49 - INFO - codeparrot_training - Step 1675: {'lr': 0.00041875, 'samples': 321792, 'steps': 1675, 'loss/train': 0.9691024124622345} +01/26/2022 21:24:55 - INFO - codeparrot_training - Step 1676: {'lr': 0.000419, 'samples': 321984, 'steps': 1676, 'loss/train': 0.40006019175052643} +01/26/2022 21:24:58 - INFO - codeparrot_training - Step 1677: {'lr': 0.00041925, 'samples': 322176, 'steps': 1677, 'loss/train': 1.0653008222579956} +01/26/2022 21:25:02 - INFO - codeparrot_training - Step 1678: {'lr': 0.0004195, 'samples': 322368, 'steps': 1678, 'loss/train': 0.6026585251092911} +01/26/2022 21:25:05 - INFO - codeparrot_training - Step 1679: {'lr': 0.00041975000000000004, 'samples': 322560, 'steps': 1679, 'loss/train': 1.0619997382164001} +01/26/2022 21:25:08 - INFO - codeparrot_training - Step 1680: {'lr': 0.00042, 'samples': 322752, 'steps': 1680, 'loss/train': 1.0160093307495117} +01/26/2022 21:25:11 - INFO - codeparrot_training - Step 1681: {'lr': 0.00042025, 'samples': 322944, 'steps': 1681, 'loss/train': 0.18223318085074425} +01/26/2022 21:25:14 - INFO - codeparrot_training - Step 1682: {'lr': 0.0004205, 'samples': 323136, 'steps': 1682, 'loss/train': 1.0753837823867798} +01/26/2022 21:25:17 - INFO - codeparrot_training - Step 1683: {'lr': 0.00042075, 'samples': 323328, 'steps': 1683, 'loss/train': 0.9113363027572632} +01/26/2022 21:25:20 - INFO - codeparrot_training - Step 1684: {'lr': 0.000421, 'samples': 323520, 'steps': 1684, 'loss/train': 0.8418412506580353} +01/26/2022 21:25:25 - INFO - codeparrot_training - Step 1685: {'lr': 0.00042125, 'samples': 323712, 'steps': 1685, 'loss/train': 0.8789848387241364} +01/26/2022 21:25:28 - INFO - codeparrot_training - Step 1686: {'lr': 0.0004215, 'samples': 323904, 'steps': 1686, 'loss/train': 0.8070947527885437} +01/26/2022 21:25:32 - INFO - codeparrot_training - Step 1687: {'lr': 0.00042175000000000003, 'samples': 324096, 'steps': 1687, 'loss/train': 1.122605949640274} +01/26/2022 21:25:35 - INFO - codeparrot_training - Step 1688: {'lr': 0.000422, 'samples': 324288, 'steps': 1688, 'loss/train': 1.000831425189972} +01/26/2022 21:25:38 - INFO - codeparrot_training - Step 1689: {'lr': 0.00042225000000000005, 'samples': 324480, 'steps': 1689, 'loss/train': 1.348246157169342} +01/26/2022 21:25:41 - INFO - codeparrot_training - Step 1690: {'lr': 0.00042249999999999997, 'samples': 324672, 'steps': 1690, 'loss/train': 0.4896686375141144} +01/26/2022 21:25:44 - INFO - codeparrot_training - Step 1691: {'lr': 0.00042275, 'samples': 324864, 'steps': 1691, 'loss/train': 1.15392404794693} +01/26/2022 21:25:47 - INFO - codeparrot_training - Step 1692: {'lr': 0.000423, 'samples': 325056, 'steps': 1692, 'loss/train': 1.1223683953285217} +01/26/2022 21:25:54 - INFO - codeparrot_training - Step 1693: {'lr': 0.00042325, 'samples': 325248, 'steps': 1693, 'loss/train': 1.1055345833301544} +01/26/2022 21:25:57 - INFO - codeparrot_training - Step 1694: {'lr': 0.0004235, 'samples': 325440, 'steps': 1694, 'loss/train': 0.9645794034004211} +01/26/2022 21:26:00 - INFO - codeparrot_training - Step 1695: {'lr': 0.00042375000000000003, 'samples': 325632, 'steps': 1695, 'loss/train': 1.160351425409317} +01/26/2022 21:26:03 - INFO - codeparrot_training - Step 1696: {'lr': 0.000424, 'samples': 325824, 'steps': 1696, 'loss/train': 1.1919098496437073} +01/26/2022 21:26:06 - INFO - codeparrot_training - Step 1697: {'lr': 0.00042425000000000004, 'samples': 326016, 'steps': 1697, 'loss/train': 0.35773663222789764} +01/26/2022 21:26:09 - INFO - codeparrot_training - Step 1698: {'lr': 0.0004245, 'samples': 326208, 'steps': 1698, 'loss/train': 0.9857082366943359} +01/26/2022 21:26:12 - INFO - codeparrot_training - Step 1699: {'lr': 0.00042475000000000005, 'samples': 326400, 'steps': 1699, 'loss/train': 0.6878446340560913} +01/26/2022 21:26:16 - INFO - codeparrot_training - Step 1700: {'lr': 0.000425, 'samples': 326592, 'steps': 1700, 'loss/train': 1.1664015054702759} +01/26/2022 21:26:19 - INFO - codeparrot_training - Step 1701: {'lr': 0.00042525, 'samples': 326784, 'steps': 1701, 'loss/train': 1.1706021130084991} +01/26/2022 21:26:23 - INFO - codeparrot_training - Step 1702: {'lr': 0.0004255, 'samples': 326976, 'steps': 1702, 'loss/train': 0.7951970100402832} +01/26/2022 21:26:26 - INFO - codeparrot_training - Step 1703: {'lr': 0.00042575, 'samples': 327168, 'steps': 1703, 'loss/train': 1.207459956407547} +01/26/2022 21:26:29 - INFO - codeparrot_training - Step 1704: {'lr': 0.000426, 'samples': 327360, 'steps': 1704, 'loss/train': 0.6888840794563293} +01/26/2022 21:26:32 - INFO - codeparrot_training - Step 1705: {'lr': 0.00042625000000000003, 'samples': 327552, 'steps': 1705, 'loss/train': 0.8381628692150116} +01/26/2022 21:26:36 - INFO - codeparrot_training - Step 1706: {'lr': 0.0004265, 'samples': 327744, 'steps': 1706, 'loss/train': 1.0316231846809387} +01/26/2022 21:26:39 - INFO - codeparrot_training - Step 1707: {'lr': 0.00042675000000000005, 'samples': 327936, 'steps': 1707, 'loss/train': 1.0781696140766144} +01/26/2022 21:26:42 - INFO - codeparrot_training - Step 1708: {'lr': 0.000427, 'samples': 328128, 'steps': 1708, 'loss/train': 1.189242660999298} +01/26/2022 21:26:45 - INFO - codeparrot_training - Step 1709: {'lr': 0.00042725, 'samples': 328320, 'steps': 1709, 'loss/train': 1.295003056526184} +01/26/2022 21:26:48 - INFO - codeparrot_training - Step 1710: {'lr': 0.0004275, 'samples': 328512, 'steps': 1710, 'loss/train': 0.6395526677370071} +01/26/2022 21:26:53 - INFO - codeparrot_training - Step 1711: {'lr': 0.00042775, 'samples': 328704, 'steps': 1711, 'loss/train': 1.3519046008586884} +01/26/2022 21:26:56 - INFO - codeparrot_training - Step 1712: {'lr': 0.000428, 'samples': 328896, 'steps': 1712, 'loss/train': 0.4921133816242218} +01/26/2022 21:26:59 - INFO - codeparrot_training - Step 1713: {'lr': 0.00042825000000000003, 'samples': 329088, 'steps': 1713, 'loss/train': 0.9713128209114075} +01/26/2022 21:27:02 - INFO - codeparrot_training - Step 1714: {'lr': 0.0004285, 'samples': 329280, 'steps': 1714, 'loss/train': 0.8321100175380707} +01/26/2022 21:27:05 - INFO - codeparrot_training - Step 1715: {'lr': 0.00042875000000000004, 'samples': 329472, 'steps': 1715, 'loss/train': 1.1703671514987946} +01/26/2022 21:27:08 - INFO - codeparrot_training - Step 1716: {'lr': 0.000429, 'samples': 329664, 'steps': 1716, 'loss/train': 0.8002711236476898} +01/26/2022 21:27:11 - INFO - codeparrot_training - Step 1717: {'lr': 0.00042925000000000005, 'samples': 329856, 'steps': 1717, 'loss/train': 1.1443277299404144} +01/26/2022 21:27:15 - INFO - codeparrot_training - Step 1718: {'lr': 0.0004295, 'samples': 330048, 'steps': 1718, 'loss/train': 1.6503835916519165} +01/26/2022 21:27:19 - INFO - codeparrot_training - Step 1719: {'lr': 0.00042975, 'samples': 330240, 'steps': 1719, 'loss/train': 0.4028375744819641} +01/26/2022 21:27:22 - INFO - codeparrot_training - Step 1720: {'lr': 0.00043, 'samples': 330432, 'steps': 1720, 'loss/train': 0.9084438979625702} +01/26/2022 21:27:26 - INFO - codeparrot_training - Step 1721: {'lr': 0.00043025, 'samples': 330624, 'steps': 1721, 'loss/train': 0.2540413811802864} +01/26/2022 21:27:29 - INFO - codeparrot_training - Step 1722: {'lr': 0.0004305, 'samples': 330816, 'steps': 1722, 'loss/train': 1.1666429042816162} +01/26/2022 21:27:32 - INFO - codeparrot_training - Step 1723: {'lr': 0.00043075000000000003, 'samples': 331008, 'steps': 1723, 'loss/train': 0.8956621885299683} +01/26/2022 21:27:35 - INFO - codeparrot_training - Step 1724: {'lr': 0.000431, 'samples': 331200, 'steps': 1724, 'loss/train': 0.5922983586788177} +01/26/2022 21:27:38 - INFO - codeparrot_training - Step 1725: {'lr': 0.00043125000000000005, 'samples': 331392, 'steps': 1725, 'loss/train': 1.00207981467247} +01/26/2022 21:27:41 - INFO - codeparrot_training - Step 1726: {'lr': 0.0004315, 'samples': 331584, 'steps': 1726, 'loss/train': 0.9447910487651825} +01/26/2022 21:27:44 - INFO - codeparrot_training - Step 1727: {'lr': 0.00043175, 'samples': 331776, 'steps': 1727, 'loss/train': 1.1207318007946014} +01/26/2022 21:27:51 - INFO - codeparrot_training - Step 1728: {'lr': 0.000432, 'samples': 331968, 'steps': 1728, 'loss/train': 0.9733808040618896} +01/26/2022 21:27:54 - INFO - codeparrot_training - Step 1729: {'lr': 0.00043225, 'samples': 332160, 'steps': 1729, 'loss/train': 1.3409484028816223} +01/26/2022 21:27:57 - INFO - codeparrot_training - Step 1730: {'lr': 0.0004325, 'samples': 332352, 'steps': 1730, 'loss/train': 0.9002691507339478} +01/26/2022 21:28:00 - INFO - codeparrot_training - Step 1731: {'lr': 0.00043275000000000003, 'samples': 332544, 'steps': 1731, 'loss/train': 0.8659699559211731} +01/26/2022 21:28:03 - INFO - codeparrot_training - Step 1732: {'lr': 0.000433, 'samples': 332736, 'steps': 1732, 'loss/train': 1.156950294971466} +01/26/2022 21:28:06 - INFO - codeparrot_training - Step 1733: {'lr': 0.00043325000000000004, 'samples': 332928, 'steps': 1733, 'loss/train': 0.7196600139141083} +01/26/2022 21:28:09 - INFO - codeparrot_training - Step 1734: {'lr': 0.0004335, 'samples': 333120, 'steps': 1734, 'loss/train': 0.6546306610107422} +01/26/2022 21:28:13 - INFO - codeparrot_training - Step 1735: {'lr': 0.00043375000000000005, 'samples': 333312, 'steps': 1735, 'loss/train': 0.9295001327991486} +01/26/2022 21:28:16 - INFO - codeparrot_training - Step 1736: {'lr': 0.00043400000000000003, 'samples': 333504, 'steps': 1736, 'loss/train': 2.139705240726471} +01/26/2022 21:28:20 - INFO - codeparrot_training - Step 1737: {'lr': 0.00043425, 'samples': 333696, 'steps': 1737, 'loss/train': 0.6314667463302612} +01/26/2022 21:28:23 - INFO - codeparrot_training - Step 1738: {'lr': 0.0004345, 'samples': 333888, 'steps': 1738, 'loss/train': 0.887237548828125} +01/26/2022 21:28:27 - INFO - codeparrot_training - Step 1739: {'lr': 0.00043475, 'samples': 334080, 'steps': 1739, 'loss/train': 0.9448980689048767} +01/26/2022 21:28:30 - INFO - codeparrot_training - Step 1740: {'lr': 0.000435, 'samples': 334272, 'steps': 1740, 'loss/train': 0.6644312739372253} +01/26/2022 21:28:33 - INFO - codeparrot_training - Step 1741: {'lr': 0.00043525000000000004, 'samples': 334464, 'steps': 1741, 'loss/train': 0.9693324565887451} +01/26/2022 21:28:36 - INFO - codeparrot_training - Step 1742: {'lr': 0.0004355, 'samples': 334656, 'steps': 1742, 'loss/train': 0.9859629571437836} +01/26/2022 21:28:39 - INFO - codeparrot_training - Step 1743: {'lr': 0.00043575000000000005, 'samples': 334848, 'steps': 1743, 'loss/train': 0.8252474963665009} +01/26/2022 21:28:42 - INFO - codeparrot_training - Step 1744: {'lr': 0.000436, 'samples': 335040, 'steps': 1744, 'loss/train': 1.0943603217601776} +01/26/2022 21:28:47 - INFO - codeparrot_training - Step 1745: {'lr': 0.00043625000000000006, 'samples': 335232, 'steps': 1745, 'loss/train': 0.8645595610141754} +01/26/2022 21:28:50 - INFO - codeparrot_training - Step 1746: {'lr': 0.0004365, 'samples': 335424, 'steps': 1746, 'loss/train': 0.7886210381984711} +01/26/2022 21:28:53 - INFO - codeparrot_training - Step 1747: {'lr': 0.00043675, 'samples': 335616, 'steps': 1747, 'loss/train': 1.2167228758335114} +01/26/2022 21:28:56 - INFO - codeparrot_training - Step 1748: {'lr': 0.000437, 'samples': 335808, 'steps': 1748, 'loss/train': 0.8175221979618073} +01/26/2022 21:28:59 - INFO - codeparrot_training - Step 1749: {'lr': 0.00043725000000000003, 'samples': 336000, 'steps': 1749, 'loss/train': 0.7292230725288391} +01/26/2022 21:29:02 - INFO - codeparrot_training - Step 1750: {'lr': 0.0004375, 'samples': 336192, 'steps': 1750, 'loss/train': 1.10347580909729} +01/26/2022 21:29:06 - INFO - codeparrot_training - Step 1751: {'lr': 0.00043775, 'samples': 336384, 'steps': 1751, 'loss/train': 0.7977529764175415} +01/26/2022 21:29:09 - INFO - codeparrot_training - Step 1752: {'lr': 0.000438, 'samples': 336576, 'steps': 1752, 'loss/train': 0.7142423093318939} +01/26/2022 21:29:12 - INFO - codeparrot_training - Step 1753: {'lr': 0.00043825, 'samples': 336768, 'steps': 1753, 'loss/train': 1.27318874001503} +01/26/2022 21:29:18 - INFO - codeparrot_training - Step 1754: {'lr': 0.00043850000000000003, 'samples': 336960, 'steps': 1754, 'loss/train': 0.9715580642223358} +01/26/2022 21:29:21 - INFO - codeparrot_training - Step 1755: {'lr': 0.00043874999999999996, 'samples': 337152, 'steps': 1755, 'loss/train': 0.7470668256282806} +01/26/2022 21:29:24 - INFO - codeparrot_training - Step 1756: {'lr': 0.000439, 'samples': 337344, 'steps': 1756, 'loss/train': 1.004561573266983} +01/26/2022 21:29:27 - INFO - codeparrot_training - Step 1757: {'lr': 0.00043924999999999997, 'samples': 337536, 'steps': 1757, 'loss/train': 1.2235007286071777} +01/26/2022 21:29:30 - INFO - codeparrot_training - Step 1758: {'lr': 0.0004395, 'samples': 337728, 'steps': 1758, 'loss/train': 2.091886818408966} +01/26/2022 21:29:34 - INFO - codeparrot_training - Step 1759: {'lr': 0.00043975, 'samples': 337920, 'steps': 1759, 'loss/train': 0.7003630846738815} +01/26/2022 21:29:37 - INFO - codeparrot_training - Step 1760: {'lr': 0.00044, 'samples': 338112, 'steps': 1760, 'loss/train': 0.9811866581439972} +01/26/2022 21:29:40 - INFO - codeparrot_training - Step 1761: {'lr': 0.00044025, 'samples': 338304, 'steps': 1761, 'loss/train': 1.5664928555488586} +01/26/2022 21:29:43 - INFO - codeparrot_training - Step 1762: {'lr': 0.00044050000000000003, 'samples': 338496, 'steps': 1762, 'loss/train': 0.9035204350948334} +01/26/2022 21:29:47 - INFO - codeparrot_training - Step 1763: {'lr': 0.00044075, 'samples': 338688, 'steps': 1763, 'loss/train': 1.044754832983017} +01/26/2022 21:29:51 - INFO - codeparrot_training - Step 1764: {'lr': 0.000441, 'samples': 338880, 'steps': 1764, 'loss/train': 0.44676947593688965} +01/26/2022 21:29:54 - INFO - codeparrot_training - Step 1765: {'lr': 0.00044124999999999996, 'samples': 339072, 'steps': 1765, 'loss/train': 1.2584916651248932} +01/26/2022 21:29:57 - INFO - codeparrot_training - Step 1766: {'lr': 0.0004415, 'samples': 339264, 'steps': 1766, 'loss/train': 0.6115643084049225} +01/26/2022 21:30:00 - INFO - codeparrot_training - Step 1767: {'lr': 0.00044175, 'samples': 339456, 'steps': 1767, 'loss/train': 0.7946164906024933} +01/26/2022 21:30:03 - INFO - codeparrot_training - Step 1768: {'lr': 0.000442, 'samples': 339648, 'steps': 1768, 'loss/train': 0.886534720659256} +01/26/2022 21:30:06 - INFO - codeparrot_training - Step 1769: {'lr': 0.00044225, 'samples': 339840, 'steps': 1769, 'loss/train': 1.2899263501167297} +01/26/2022 21:30:09 - INFO - codeparrot_training - Step 1770: {'lr': 0.0004425, 'samples': 340032, 'steps': 1770, 'loss/train': 0.9034107327461243} +01/26/2022 21:30:13 - INFO - codeparrot_training - Step 1771: {'lr': 0.00044275, 'samples': 340224, 'steps': 1771, 'loss/train': 0.6806822866201401} +01/26/2022 21:30:19 - INFO - codeparrot_training - Step 1772: {'lr': 0.00044300000000000003, 'samples': 340416, 'steps': 1772, 'loss/train': 1.2623277604579926} +01/26/2022 21:30:22 - INFO - codeparrot_training - Step 1773: {'lr': 0.00044325, 'samples': 340608, 'steps': 1773, 'loss/train': 0.43822503089904785} +01/26/2022 21:30:25 - INFO - codeparrot_training - Step 1774: {'lr': 0.0004435, 'samples': 340800, 'steps': 1774, 'loss/train': 0.7557626366615295} +01/26/2022 21:30:28 - INFO - codeparrot_training - Step 1775: {'lr': 0.00044374999999999997, 'samples': 340992, 'steps': 1775, 'loss/train': 1.0331142246723175} +01/26/2022 21:30:31 - INFO - codeparrot_training - Step 1776: {'lr': 0.000444, 'samples': 341184, 'steps': 1776, 'loss/train': 0.7802868783473969} +01/26/2022 21:30:34 - INFO - codeparrot_training - Step 1777: {'lr': 0.00044425, 'samples': 341376, 'steps': 1777, 'loss/train': 0.9548504054546356} +01/26/2022 21:30:37 - INFO - codeparrot_training - Step 1778: {'lr': 0.0004445, 'samples': 341568, 'steps': 1778, 'loss/train': 0.6218579113483429} +01/26/2022 21:30:41 - INFO - codeparrot_training - Step 1779: {'lr': 0.00044475, 'samples': 341760, 'steps': 1779, 'loss/train': 1.225717842578888} +01/26/2022 21:30:45 - INFO - codeparrot_training - Step 1780: {'lr': 0.00044500000000000003, 'samples': 341952, 'steps': 1780, 'loss/train': 0.8935568332672119} +01/26/2022 21:30:48 - INFO - codeparrot_training - Step 1781: {'lr': 0.00044525, 'samples': 342144, 'steps': 1781, 'loss/train': 1.4280221164226532} +01/26/2022 21:30:51 - INFO - codeparrot_training - Step 1782: {'lr': 0.00044550000000000004, 'samples': 342336, 'steps': 1782, 'loss/train': 0.7796886563301086} +01/26/2022 21:30:54 - INFO - codeparrot_training - Step 1783: {'lr': 0.00044574999999999997, 'samples': 342528, 'steps': 1783, 'loss/train': 0.900734156370163} +01/26/2022 21:30:57 - INFO - codeparrot_training - Step 1784: {'lr': 0.000446, 'samples': 342720, 'steps': 1784, 'loss/train': 1.3631338477134705} +01/26/2022 21:31:00 - INFO - codeparrot_training - Step 1785: {'lr': 0.00044625, 'samples': 342912, 'steps': 1785, 'loss/train': 0.9239045977592468} +01/26/2022 21:31:04 - INFO - codeparrot_training - Step 1786: {'lr': 0.0004465, 'samples': 343104, 'steps': 1786, 'loss/train': 0.8701066374778748} +01/26/2022 21:31:07 - INFO - codeparrot_training - Step 1787: {'lr': 0.00044675, 'samples': 343296, 'steps': 1787, 'loss/train': 0.844506025314331} +01/26/2022 21:31:10 - INFO - codeparrot_training - Step 1788: {'lr': 0.000447, 'samples': 343488, 'steps': 1788, 'loss/train': 0.8615960776805878} +01/26/2022 21:31:15 - INFO - codeparrot_training - Step 1789: {'lr': 0.00044725, 'samples': 343680, 'steps': 1789, 'loss/train': 1.0209888517856598} +01/26/2022 21:31:18 - INFO - codeparrot_training - Step 1790: {'lr': 0.00044750000000000004, 'samples': 343872, 'steps': 1790, 'loss/train': 0.7795795798301697} +01/26/2022 21:31:21 - INFO - codeparrot_training - Step 1791: {'lr': 0.00044775, 'samples': 344064, 'steps': 1791, 'loss/train': 1.035276710987091} +01/26/2022 21:31:24 - INFO - codeparrot_training - Step 1792: {'lr': 0.000448, 'samples': 344256, 'steps': 1792, 'loss/train': 1.2197923958301544} +01/26/2022 21:31:27 - INFO - codeparrot_training - Step 1793: {'lr': 0.00044824999999999997, 'samples': 344448, 'steps': 1793, 'loss/train': 0.5810040235519409} +01/26/2022 21:31:30 - INFO - codeparrot_training - Step 1794: {'lr': 0.0004485, 'samples': 344640, 'steps': 1794, 'loss/train': 0.9683815240859985} +01/26/2022 21:31:33 - INFO - codeparrot_training - Step 1795: {'lr': 0.00044875, 'samples': 344832, 'steps': 1795, 'loss/train': 0.3584947809576988} +01/26/2022 21:31:37 - INFO - codeparrot_training - Step 1796: {'lr': 0.000449, 'samples': 345024, 'steps': 1796, 'loss/train': 0.965572714805603} +01/26/2022 21:31:40 - INFO - codeparrot_training - Step 1797: {'lr': 0.00044925, 'samples': 345216, 'steps': 1797, 'loss/train': 1.208339273929596} +01/26/2022 21:31:46 - INFO - codeparrot_training - Step 1798: {'lr': 0.00044950000000000003, 'samples': 345408, 'steps': 1798, 'loss/train': 0.9125329256057739} +01/26/2022 21:31:49 - INFO - codeparrot_training - Step 1799: {'lr': 0.00044975, 'samples': 345600, 'steps': 1799, 'loss/train': 0.1478106938302517} +01/26/2022 21:31:52 - INFO - codeparrot_training - Step 1800: {'lr': 0.00045000000000000004, 'samples': 345792, 'steps': 1800, 'loss/train': 1.5486858487129211} +01/26/2022 21:31:55 - INFO - codeparrot_training - Step 1801: {'lr': 0.00045024999999999997, 'samples': 345984, 'steps': 1801, 'loss/train': 0.8634694218635559} +01/26/2022 21:31:58 - INFO - codeparrot_training - Step 1802: {'lr': 0.0004505, 'samples': 346176, 'steps': 1802, 'loss/train': 0.6972474753856659} +01/26/2022 21:32:02 - INFO - codeparrot_training - Step 1803: {'lr': 0.00045075, 'samples': 346368, 'steps': 1803, 'loss/train': 0.710689902305603} +01/26/2022 21:32:05 - INFO - codeparrot_training - Step 1804: {'lr': 0.000451, 'samples': 346560, 'steps': 1804, 'loss/train': 0.7198485285043716} +01/26/2022 21:32:08 - INFO - codeparrot_training - Step 1805: {'lr': 0.00045125, 'samples': 346752, 'steps': 1805, 'loss/train': 0.8817155957221985} +01/26/2022 21:32:11 - INFO - codeparrot_training - Step 1806: {'lr': 0.0004515, 'samples': 346944, 'steps': 1806, 'loss/train': 0.6702062487602234} +01/26/2022 21:32:15 - INFO - codeparrot_training - Step 1807: {'lr': 0.00045175, 'samples': 347136, 'steps': 1807, 'loss/train': 1.1744391918182373} +01/26/2022 21:32:18 - INFO - codeparrot_training - Step 1808: {'lr': 0.00045200000000000004, 'samples': 347328, 'steps': 1808, 'loss/train': 0.7731001675128937} +01/26/2022 21:32:22 - INFO - codeparrot_training - Step 1809: {'lr': 0.00045225, 'samples': 347520, 'steps': 1809, 'loss/train': 1.4250400364398956} +01/26/2022 21:32:25 - INFO - codeparrot_training - Step 1810: {'lr': 0.00045250000000000005, 'samples': 347712, 'steps': 1810, 'loss/train': 0.5657884776592255} +01/26/2022 21:32:28 - INFO - codeparrot_training - Step 1811: {'lr': 0.00045275, 'samples': 347904, 'steps': 1811, 'loss/train': 1.0133686065673828} +01/26/2022 21:32:31 - INFO - codeparrot_training - Step 1812: {'lr': 0.000453, 'samples': 348096, 'steps': 1812, 'loss/train': 0.6502123922109604} +01/26/2022 21:32:34 - INFO - codeparrot_training - Step 1813: {'lr': 0.00045325, 'samples': 348288, 'steps': 1813, 'loss/train': 0.9410197734832764} +01/26/2022 21:32:37 - INFO - codeparrot_training - Step 1814: {'lr': 0.0004535, 'samples': 348480, 'steps': 1814, 'loss/train': 1.1278867721557617} +01/26/2022 21:32:42 - INFO - codeparrot_training - Step 1815: {'lr': 0.00045375, 'samples': 348672, 'steps': 1815, 'loss/train': 1.2114013731479645} +01/26/2022 21:32:45 - INFO - codeparrot_training - Step 1816: {'lr': 0.00045400000000000003, 'samples': 348864, 'steps': 1816, 'loss/train': 0.8608831465244293} +01/26/2022 21:32:48 - INFO - codeparrot_training - Step 1817: {'lr': 0.00045425, 'samples': 349056, 'steps': 1817, 'loss/train': 1.0036253929138184} +01/26/2022 21:32:51 - INFO - codeparrot_training - Step 1818: {'lr': 0.00045450000000000004, 'samples': 349248, 'steps': 1818, 'loss/train': 0.41098009049892426} +01/26/2022 21:32:54 - INFO - codeparrot_training - Step 1819: {'lr': 0.00045475, 'samples': 349440, 'steps': 1819, 'loss/train': 0.9145841002464294} +01/26/2022 21:32:57 - INFO - codeparrot_training - Step 1820: {'lr': 0.000455, 'samples': 349632, 'steps': 1820, 'loss/train': 1.5986244678497314} +01/26/2022 21:33:01 - INFO - codeparrot_training - Step 1821: {'lr': 0.00045525, 'samples': 349824, 'steps': 1821, 'loss/train': 0.7239570021629333} +01/26/2022 21:33:04 - INFO - codeparrot_training - Step 1822: {'lr': 0.0004555, 'samples': 350016, 'steps': 1822, 'loss/train': 1.2572510540485382} +01/26/2022 21:33:07 - INFO - codeparrot_training - Step 1823: {'lr': 0.00045575, 'samples': 350208, 'steps': 1823, 'loss/train': 0.2298322319984436} +01/26/2022 21:33:11 - INFO - codeparrot_training - Step 1824: {'lr': 0.000456, 'samples': 350400, 'steps': 1824, 'loss/train': 1.1167780458927155} +01/26/2022 21:33:14 - INFO - codeparrot_training - Step 1825: {'lr': 0.00045625, 'samples': 350592, 'steps': 1825, 'loss/train': 0.4124080538749695} +01/26/2022 21:33:18 - INFO - codeparrot_training - Step 1826: {'lr': 0.00045650000000000004, 'samples': 350784, 'steps': 1826, 'loss/train': 0.7638796269893646} +01/26/2022 21:33:21 - INFO - codeparrot_training - Step 1827: {'lr': 0.00045675, 'samples': 350976, 'steps': 1827, 'loss/train': 0.573961928486824} +01/26/2022 21:33:24 - INFO - codeparrot_training - Step 1828: {'lr': 0.00045700000000000005, 'samples': 351168, 'steps': 1828, 'loss/train': 1.0830810964107513} +01/26/2022 21:33:27 - INFO - codeparrot_training - Step 1829: {'lr': 0.00045725, 'samples': 351360, 'steps': 1829, 'loss/train': 0.6615297049283981} +01/26/2022 21:33:30 - INFO - codeparrot_training - Step 1830: {'lr': 0.0004575, 'samples': 351552, 'steps': 1830, 'loss/train': 0.9944534003734589} +01/26/2022 21:33:33 - INFO - codeparrot_training - Step 1831: {'lr': 0.00045775, 'samples': 351744, 'steps': 1831, 'loss/train': 0.7959812879562378} +01/26/2022 21:33:36 - INFO - codeparrot_training - Step 1832: {'lr': 0.000458, 'samples': 351936, 'steps': 1832, 'loss/train': 0.8877821266651154} +01/26/2022 21:33:43 - INFO - codeparrot_training - Step 1833: {'lr': 0.00045825, 'samples': 352128, 'steps': 1833, 'loss/train': 1.261086791753769} +01/26/2022 21:33:46 - INFO - codeparrot_training - Step 1834: {'lr': 0.00045850000000000003, 'samples': 352320, 'steps': 1834, 'loss/train': 0.5066499710083008} +01/26/2022 21:33:49 - INFO - codeparrot_training - Step 1835: {'lr': 0.00045875, 'samples': 352512, 'steps': 1835, 'loss/train': 1.0754070281982422} +01/26/2022 21:33:52 - INFO - codeparrot_training - Step 1836: {'lr': 0.00045900000000000004, 'samples': 352704, 'steps': 1836, 'loss/train': 0.831580638885498} +01/26/2022 21:33:55 - INFO - codeparrot_training - Step 1837: {'lr': 0.00045925, 'samples': 352896, 'steps': 1837, 'loss/train': 0.7616505324840546} +01/26/2022 21:33:58 - INFO - codeparrot_training - Step 1838: {'lr': 0.00045950000000000006, 'samples': 353088, 'steps': 1838, 'loss/train': 0.37710709869861603} +01/26/2022 21:34:02 - INFO - codeparrot_training - Step 1839: {'lr': 0.00045975, 'samples': 353280, 'steps': 1839, 'loss/train': 0.7463306486606598} +01/26/2022 21:34:05 - INFO - codeparrot_training - Step 1840: {'lr': 0.00046, 'samples': 353472, 'steps': 1840, 'loss/train': 0.8024594485759735} +01/26/2022 21:34:08 - INFO - codeparrot_training - Step 1841: {'lr': 0.00046025, 'samples': 353664, 'steps': 1841, 'loss/train': 1.3716038167476654} +01/26/2022 21:34:13 - INFO - codeparrot_training - Step 1842: {'lr': 0.0004605, 'samples': 353856, 'steps': 1842, 'loss/train': 1.1901555061340332} +01/26/2022 21:34:16 - INFO - codeparrot_training - Step 1843: {'lr': 0.00046075, 'samples': 354048, 'steps': 1843, 'loss/train': 0.5672524273395538} +01/26/2022 21:34:19 - INFO - codeparrot_training - Step 1844: {'lr': 0.00046100000000000004, 'samples': 354240, 'steps': 1844, 'loss/train': 0.32982489466667175} +01/26/2022 21:34:22 - INFO - codeparrot_training - Step 1845: {'lr': 0.00046125, 'samples': 354432, 'steps': 1845, 'loss/train': 0.42343519628047943} +01/26/2022 21:34:25 - INFO - codeparrot_training - Step 1846: {'lr': 0.00046150000000000005, 'samples': 354624, 'steps': 1846, 'loss/train': 0.8662121593952179} +01/26/2022 21:34:28 - INFO - codeparrot_training - Step 1847: {'lr': 0.00046175000000000003, 'samples': 354816, 'steps': 1847, 'loss/train': 0.6935672610998154} +01/26/2022 21:34:31 - INFO - codeparrot_training - Step 1848: {'lr': 0.000462, 'samples': 355008, 'steps': 1848, 'loss/train': 0.9938570559024811} +01/26/2022 21:34:34 - INFO - codeparrot_training - Step 1849: {'lr': 0.00046225, 'samples': 355200, 'steps': 1849, 'loss/train': 1.0552408397197723} +01/26/2022 21:34:39 - INFO - codeparrot_training - Step 1850: {'lr': 0.0004625, 'samples': 355392, 'steps': 1850, 'loss/train': 0.8935852646827698} +01/26/2022 21:34:42 - INFO - codeparrot_training - Step 1851: {'lr': 0.00046275, 'samples': 355584, 'steps': 1851, 'loss/train': 1.531786859035492} +01/26/2022 21:34:45 - INFO - codeparrot_training - Step 1852: {'lr': 0.00046300000000000003, 'samples': 355776, 'steps': 1852, 'loss/train': 0.5282375514507294} +01/26/2022 21:34:48 - INFO - codeparrot_training - Step 1853: {'lr': 0.00046325, 'samples': 355968, 'steps': 1853, 'loss/train': 0.9514651894569397} +01/26/2022 21:34:52 - INFO - codeparrot_training - Step 1854: {'lr': 0.00046350000000000004, 'samples': 356160, 'steps': 1854, 'loss/train': 0.5179400593042374} +01/26/2022 21:34:55 - INFO - codeparrot_training - Step 1855: {'lr': 0.00046375, 'samples': 356352, 'steps': 1855, 'loss/train': 0.8588109612464905} +01/26/2022 21:34:58 - INFO - codeparrot_training - Step 1856: {'lr': 0.00046400000000000006, 'samples': 356544, 'steps': 1856, 'loss/train': 0.6994162648916245} +01/26/2022 21:35:01 - INFO - codeparrot_training - Step 1857: {'lr': 0.00046425, 'samples': 356736, 'steps': 1857, 'loss/train': 0.6678935587406158} +01/26/2022 21:35:04 - INFO - codeparrot_training - Step 1858: {'lr': 0.0004645, 'samples': 356928, 'steps': 1858, 'loss/train': 0.7436229139566422} +01/26/2022 21:35:11 - INFO - codeparrot_training - Step 1859: {'lr': 0.00046475, 'samples': 357120, 'steps': 1859, 'loss/train': 0.9596456587314606} +01/26/2022 21:35:14 - INFO - codeparrot_training - Step 1860: {'lr': 0.000465, 'samples': 357312, 'steps': 1860, 'loss/train': 1.4724598824977875} +01/26/2022 21:35:17 - INFO - codeparrot_training - Step 1861: {'lr': 0.00046525, 'samples': 357504, 'steps': 1861, 'loss/train': 0.9142586588859558} +01/26/2022 21:35:20 - INFO - codeparrot_training - Step 1862: {'lr': 0.00046550000000000004, 'samples': 357696, 'steps': 1862, 'loss/train': 0.3554471433162689} +01/26/2022 21:35:23 - INFO - codeparrot_training - Step 1863: {'lr': 0.00046575, 'samples': 357888, 'steps': 1863, 'loss/train': 0.8216174840927124} +01/26/2022 21:35:26 - INFO - codeparrot_training - Step 1864: {'lr': 0.00046600000000000005, 'samples': 358080, 'steps': 1864, 'loss/train': 1.3381338715553284} +01/26/2022 21:35:29 - INFO - codeparrot_training - Step 1865: {'lr': 0.00046625000000000003, 'samples': 358272, 'steps': 1865, 'loss/train': 0.659206286072731} +01/26/2022 21:35:33 - INFO - codeparrot_training - Step 1866: {'lr': 0.0004665, 'samples': 358464, 'steps': 1866, 'loss/train': 0.6387936025857925} +01/26/2022 21:35:36 - INFO - codeparrot_training - Step 1867: {'lr': 0.00046675, 'samples': 358656, 'steps': 1867, 'loss/train': 0.9759268462657928} +01/26/2022 21:35:40 - INFO - codeparrot_training - Step 1868: {'lr': 0.000467, 'samples': 358848, 'steps': 1868, 'loss/train': 0.8318382203578949} +01/26/2022 21:35:43 - INFO - codeparrot_training - Step 1869: {'lr': 0.00046725, 'samples': 359040, 'steps': 1869, 'loss/train': 0.8856178522109985} +01/26/2022 21:35:46 - INFO - codeparrot_training - Step 1870: {'lr': 0.00046750000000000003, 'samples': 359232, 'steps': 1870, 'loss/train': 0.7402231693267822} +01/26/2022 21:35:50 - INFO - codeparrot_training - Step 1871: {'lr': 0.00046775, 'samples': 359424, 'steps': 1871, 'loss/train': 0.4504626989364624} +01/26/2022 21:35:53 - INFO - codeparrot_training - Step 1872: {'lr': 0.00046800000000000005, 'samples': 359616, 'steps': 1872, 'loss/train': 0.6901952773332596} +01/26/2022 21:35:56 - INFO - codeparrot_training - Step 1873: {'lr': 0.00046825, 'samples': 359808, 'steps': 1873, 'loss/train': 1.3196231424808502} +01/26/2022 21:35:59 - INFO - codeparrot_training - Step 1874: {'lr': 0.00046850000000000006, 'samples': 360000, 'steps': 1874, 'loss/train': 0.8008153438568115} +01/26/2022 21:36:02 - INFO - codeparrot_training - Step 1875: {'lr': 0.00046875, 'samples': 360192, 'steps': 1875, 'loss/train': 0.5548744797706604} +01/26/2022 21:36:05 - INFO - codeparrot_training - Step 1876: {'lr': 0.00046899999999999996, 'samples': 360384, 'steps': 1876, 'loss/train': 0.6174647659063339} +01/26/2022 21:36:12 - INFO - codeparrot_training - Step 1877: {'lr': 0.00046925, 'samples': 360576, 'steps': 1877, 'loss/train': 1.070265144109726} +01/26/2022 21:36:15 - INFO - codeparrot_training - Step 1878: {'lr': 0.0004695, 'samples': 360768, 'steps': 1878, 'loss/train': 1.1434812247753143} +01/26/2022 21:36:18 - INFO - codeparrot_training - Step 1879: {'lr': 0.00046975, 'samples': 360960, 'steps': 1879, 'loss/train': 0.7403541505336761} +01/26/2022 21:36:21 - INFO - codeparrot_training - Step 1880: {'lr': 0.00047, 'samples': 361152, 'steps': 1880, 'loss/train': 2.072912871837616} +01/26/2022 21:36:24 - INFO - codeparrot_training - Step 1881: {'lr': 0.00047025, 'samples': 361344, 'steps': 1881, 'loss/train': 0.5732850283384323} +01/26/2022 21:36:27 - INFO - codeparrot_training - Step 1882: {'lr': 0.0004705, 'samples': 361536, 'steps': 1882, 'loss/train': 0.8318330347537994} +01/26/2022 21:36:30 - INFO - codeparrot_training - Step 1883: {'lr': 0.00047075000000000003, 'samples': 361728, 'steps': 1883, 'loss/train': 0.30028827488422394} +01/26/2022 21:36:34 - INFO - codeparrot_training - Step 1884: {'lr': 0.000471, 'samples': 361920, 'steps': 1884, 'loss/train': 0.9006307125091553} +01/26/2022 21:36:37 - INFO - codeparrot_training - Step 1885: {'lr': 0.00047125, 'samples': 362112, 'steps': 1885, 'loss/train': 1.1136357486248016} +01/26/2022 21:36:41 - INFO - codeparrot_training - Step 1886: {'lr': 0.00047149999999999997, 'samples': 362304, 'steps': 1886, 'loss/train': 1.07257479429245} +01/26/2022 21:36:44 - INFO - codeparrot_training - Step 1887: {'lr': 0.00047175, 'samples': 362496, 'steps': 1887, 'loss/train': 1.3226381242275238} +01/26/2022 21:36:47 - INFO - codeparrot_training - Step 1888: {'lr': 0.000472, 'samples': 362688, 'steps': 1888, 'loss/train': 0.7470325380563736} +01/26/2022 21:36:50 - INFO - codeparrot_training - Step 1889: {'lr': 0.00047225, 'samples': 362880, 'steps': 1889, 'loss/train': 1.038808822631836} +01/26/2022 21:36:54 - INFO - codeparrot_training - Step 1890: {'lr': 0.0004725, 'samples': 363072, 'steps': 1890, 'loss/train': 1.0341619849205017} +01/26/2022 21:36:57 - INFO - codeparrot_training - Step 1891: {'lr': 0.00047275, 'samples': 363264, 'steps': 1891, 'loss/train': 0.7290969640016556} +01/26/2022 21:37:00 - INFO - codeparrot_training - Step 1892: {'lr': 0.000473, 'samples': 363456, 'steps': 1892, 'loss/train': 0.8344210982322693} +01/26/2022 21:37:03 - INFO - codeparrot_training - Step 1893: {'lr': 0.00047325000000000004, 'samples': 363648, 'steps': 1893, 'loss/train': 0.7739488184452057} +01/26/2022 21:37:07 - INFO - codeparrot_training - Step 1894: {'lr': 0.00047349999999999996, 'samples': 363840, 'steps': 1894, 'loss/train': 1.6855548620224} +01/26/2022 21:37:11 - INFO - codeparrot_training - Step 1895: {'lr': 0.00047375, 'samples': 364032, 'steps': 1895, 'loss/train': 1.100717157125473} +01/26/2022 21:37:14 - INFO - codeparrot_training - Step 1896: {'lr': 0.000474, 'samples': 364224, 'steps': 1896, 'loss/train': 1.0391847789287567} +01/26/2022 21:37:17 - INFO - codeparrot_training - Step 1897: {'lr': 0.00047425, 'samples': 364416, 'steps': 1897, 'loss/train': 0.3638544827699661} +01/26/2022 21:37:20 - INFO - codeparrot_training - Step 1898: {'lr': 0.0004745, 'samples': 364608, 'steps': 1898, 'loss/train': 1.035841315984726} +01/26/2022 21:37:23 - INFO - codeparrot_training - Step 1899: {'lr': 0.00047475, 'samples': 364800, 'steps': 1899, 'loss/train': 1.0633786618709564} +01/26/2022 21:37:26 - INFO - codeparrot_training - Step 1900: {'lr': 0.000475, 'samples': 364992, 'steps': 1900, 'loss/train': 1.2380416989326477} +01/26/2022 21:37:29 - INFO - codeparrot_training - Step 1901: {'lr': 0.00047525000000000003, 'samples': 365184, 'steps': 1901, 'loss/train': 0.9911230802536011} +01/26/2022 21:37:33 - INFO - codeparrot_training - Step 1902: {'lr': 0.0004755, 'samples': 365376, 'steps': 1902, 'loss/train': 1.433401644229889} +01/26/2022 21:37:39 - INFO - codeparrot_training - Step 1903: {'lr': 0.00047575, 'samples': 365568, 'steps': 1903, 'loss/train': 0.6902095824480057} +01/26/2022 21:37:42 - INFO - codeparrot_training - Step 1904: {'lr': 0.00047599999999999997, 'samples': 365760, 'steps': 1904, 'loss/train': 0.07514088414609432} +01/26/2022 21:37:45 - INFO - codeparrot_training - Step 1905: {'lr': 0.00047625, 'samples': 365952, 'steps': 1905, 'loss/train': 0.9614972770214081} +01/26/2022 21:37:48 - INFO - codeparrot_training - Step 1906: {'lr': 0.0004765, 'samples': 366144, 'steps': 1906, 'loss/train': 1.0431278944015503} +01/26/2022 21:37:51 - INFO - codeparrot_training - Step 1907: {'lr': 0.00047675, 'samples': 366336, 'steps': 1907, 'loss/train': 0.8581434488296509} +01/26/2022 21:37:55 - INFO - codeparrot_training - Step 1908: {'lr': 0.000477, 'samples': 366528, 'steps': 1908, 'loss/train': 1.7163219451904297} +01/26/2022 21:37:58 - INFO - codeparrot_training - Step 1909: {'lr': 0.00047725, 'samples': 366720, 'steps': 1909, 'loss/train': 0.8825593292713165} +01/26/2022 21:38:01 - INFO - codeparrot_training - Step 1910: {'lr': 0.0004775, 'samples': 366912, 'steps': 1910, 'loss/train': 1.0761583149433136} +01/26/2022 21:38:04 - INFO - codeparrot_training - Step 1911: {'lr': 0.00047775000000000004, 'samples': 367104, 'steps': 1911, 'loss/train': 1.2633014917373657} +01/26/2022 21:38:09 - INFO - codeparrot_training - Step 1912: {'lr': 0.00047799999999999996, 'samples': 367296, 'steps': 1912, 'loss/train': 1.1808938384056091} +01/26/2022 21:38:12 - INFO - codeparrot_training - Step 1913: {'lr': 0.00047825, 'samples': 367488, 'steps': 1913, 'loss/train': 0.677722156047821} +01/26/2022 21:38:15 - INFO - codeparrot_training - Step 1914: {'lr': 0.0004785, 'samples': 367680, 'steps': 1914, 'loss/train': 0.6951000988483429} +01/26/2022 21:38:18 - INFO - codeparrot_training - Step 1915: {'lr': 0.00047875, 'samples': 367872, 'steps': 1915, 'loss/train': 0.5601763129234314} +01/26/2022 21:38:21 - INFO - codeparrot_training - Step 1916: {'lr': 0.000479, 'samples': 368064, 'steps': 1916, 'loss/train': 1.130952537059784} +01/26/2022 21:38:24 - INFO - codeparrot_training - Step 1917: {'lr': 0.00047925, 'samples': 368256, 'steps': 1917, 'loss/train': 1.269955426454544} +01/26/2022 21:38:28 - INFO - codeparrot_training - Step 1918: {'lr': 0.0004795, 'samples': 368448, 'steps': 1918, 'loss/train': 0.8425226211547852} +01/26/2022 21:38:31 - INFO - codeparrot_training - Step 1919: {'lr': 0.00047975000000000003, 'samples': 368640, 'steps': 1919, 'loss/train': 0.6803317219018936} +01/26/2022 21:38:35 - INFO - codeparrot_training - Step 1920: {'lr': 0.00048, 'samples': 368832, 'steps': 1920, 'loss/train': 0.9387582242488861} +01/26/2022 21:38:38 - INFO - codeparrot_training - Step 1921: {'lr': 0.00048025000000000005, 'samples': 369024, 'steps': 1921, 'loss/train': 0.9797181487083435} +01/26/2022 21:38:41 - INFO - codeparrot_training - Step 1922: {'lr': 0.00048049999999999997, 'samples': 369216, 'steps': 1922, 'loss/train': 1.0194525718688965} +01/26/2022 21:38:44 - INFO - codeparrot_training - Step 1923: {'lr': 0.00048075, 'samples': 369408, 'steps': 1923, 'loss/train': 0.30047231912612915} +01/26/2022 21:38:48 - INFO - codeparrot_training - Step 1924: {'lr': 0.000481, 'samples': 369600, 'steps': 1924, 'loss/train': 0.983442485332489} +01/26/2022 21:38:51 - INFO - codeparrot_training - Step 1925: {'lr': 0.00048125, 'samples': 369792, 'steps': 1925, 'loss/train': 1.318665325641632} +01/26/2022 21:38:54 - INFO - codeparrot_training - Step 1926: {'lr': 0.0004815, 'samples': 369984, 'steps': 1926, 'loss/train': 1.295868694782257} +01/26/2022 21:38:57 - INFO - codeparrot_training - Step 1927: {'lr': 0.00048175000000000003, 'samples': 370176, 'steps': 1927, 'loss/train': 0.8086439073085785} +01/26/2022 21:39:00 - INFO - codeparrot_training - Step 1928: {'lr': 0.000482, 'samples': 370368, 'steps': 1928, 'loss/train': 1.0307956337928772} +01/26/2022 21:39:04 - INFO - codeparrot_training - Step 1929: {'lr': 0.00048225000000000004, 'samples': 370560, 'steps': 1929, 'loss/train': 1.4041475057601929} +01/26/2022 21:39:08 - INFO - codeparrot_training - Step 1930: {'lr': 0.0004825, 'samples': 370752, 'steps': 1930, 'loss/train': 0.9882602691650391} +01/26/2022 21:39:11 - INFO - codeparrot_training - Step 1931: {'lr': 0.00048275, 'samples': 370944, 'steps': 1931, 'loss/train': 1.1968180239200592} +01/26/2022 21:39:14 - INFO - codeparrot_training - Step 1932: {'lr': 0.000483, 'samples': 371136, 'steps': 1932, 'loss/train': 0.6910819709300995} +01/26/2022 21:39:17 - INFO - codeparrot_training - Step 1933: {'lr': 0.00048325, 'samples': 371328, 'steps': 1933, 'loss/train': 0.7734626233577728} +01/26/2022 21:39:20 - INFO - codeparrot_training - Step 1934: {'lr': 0.0004835, 'samples': 371520, 'steps': 1934, 'loss/train': 1.1351169347763062} +01/26/2022 21:39:23 - INFO - codeparrot_training - Step 1935: {'lr': 0.00048375, 'samples': 371712, 'steps': 1935, 'loss/train': 1.1465918719768524} +01/26/2022 21:39:26 - INFO - codeparrot_training - Step 1936: {'lr': 0.000484, 'samples': 371904, 'steps': 1936, 'loss/train': 0.4684644788503647} +01/26/2022 21:39:30 - INFO - codeparrot_training - Step 1937: {'lr': 0.00048425000000000003, 'samples': 372096, 'steps': 1937, 'loss/train': 1.0882051885128021} +01/26/2022 21:39:36 - INFO - codeparrot_training - Step 1938: {'lr': 0.0004845, 'samples': 372288, 'steps': 1938, 'loss/train': 0.8394992351531982} +01/26/2022 21:39:39 - INFO - codeparrot_training - Step 1939: {'lr': 0.00048475000000000005, 'samples': 372480, 'steps': 1939, 'loss/train': 0.993206262588501} +01/26/2022 21:39:42 - INFO - codeparrot_training - Step 1940: {'lr': 0.00048499999999999997, 'samples': 372672, 'steps': 1940, 'loss/train': 0.18441668897867203} +01/26/2022 21:39:45 - INFO - codeparrot_training - Step 1941: {'lr': 0.00048525, 'samples': 372864, 'steps': 1941, 'loss/train': 0.7797511518001556} +01/26/2022 21:39:48 - INFO - codeparrot_training - Step 1942: {'lr': 0.0004855, 'samples': 373056, 'steps': 1942, 'loss/train': 0.6799484342336655} +01/26/2022 21:39:51 - INFO - codeparrot_training - Step 1943: {'lr': 0.00048575, 'samples': 373248, 'steps': 1943, 'loss/train': 0.7915197908878326} +01/26/2022 21:39:55 - INFO - codeparrot_training - Step 1944: {'lr': 0.000486, 'samples': 373440, 'steps': 1944, 'loss/train': 1.0098568797111511} +01/26/2022 21:39:58 - INFO - codeparrot_training - Step 1945: {'lr': 0.00048625000000000003, 'samples': 373632, 'steps': 1945, 'loss/train': 0.7834502756595612} +01/26/2022 21:40:01 - INFO - codeparrot_training - Step 1946: {'lr': 0.0004865, 'samples': 373824, 'steps': 1946, 'loss/train': 1.05072820186615} +01/26/2022 21:40:05 - INFO - codeparrot_training - Step 1947: {'lr': 0.00048675000000000004, 'samples': 374016, 'steps': 1947, 'loss/train': 0.49344009160995483} +01/26/2022 21:40:08 - INFO - codeparrot_training - Step 1948: {'lr': 0.000487, 'samples': 374208, 'steps': 1948, 'loss/train': 1.1734929084777832} +01/26/2022 21:40:11 - INFO - codeparrot_training - Step 1949: {'lr': 0.00048725000000000005, 'samples': 374400, 'steps': 1949, 'loss/train': 1.2145574390888214} +01/26/2022 21:40:15 - INFO - codeparrot_training - Step 1950: {'lr': 0.0004875, 'samples': 374592, 'steps': 1950, 'loss/train': 0.8924444317817688} +01/26/2022 21:40:18 - INFO - codeparrot_training - Step 1951: {'lr': 0.00048775, 'samples': 374784, 'steps': 1951, 'loss/train': 0.7068826854228973} +01/26/2022 21:40:21 - INFO - codeparrot_training - Step 1952: {'lr': 0.000488, 'samples': 374976, 'steps': 1952, 'loss/train': 0.9460795819759369} +01/26/2022 21:40:24 - INFO - codeparrot_training - Step 1953: {'lr': 0.00048825, 'samples': 375168, 'steps': 1953, 'loss/train': 0.7600442469120026} +01/26/2022 21:40:27 - INFO - codeparrot_training - Step 1954: {'lr': 0.0004885, 'samples': 375360, 'steps': 1954, 'loss/train': 0.8999834954738617} +01/26/2022 21:40:33 - INFO - codeparrot_training - Step 1955: {'lr': 0.00048875, 'samples': 375552, 'steps': 1955, 'loss/train': 0.8945177793502808} +01/26/2022 21:40:36 - INFO - codeparrot_training - Step 1956: {'lr': 0.000489, 'samples': 375744, 'steps': 1956, 'loss/train': 0.6711243689060211} +01/26/2022 21:40:40 - INFO - codeparrot_training - Step 1957: {'lr': 0.00048925, 'samples': 375936, 'steps': 1957, 'loss/train': 0.9730430245399475} +01/26/2022 21:40:43 - INFO - codeparrot_training - Step 1958: {'lr': 0.0004895, 'samples': 376128, 'steps': 1958, 'loss/train': 0.6753373593091965} +01/26/2022 21:40:46 - INFO - codeparrot_training - Step 1959: {'lr': 0.0004897500000000001, 'samples': 376320, 'steps': 1959, 'loss/train': 0.7206808626651764} +01/26/2022 21:40:49 - INFO - codeparrot_training - Step 1960: {'lr': 0.00049, 'samples': 376512, 'steps': 1960, 'loss/train': 1.1024954617023468} +01/26/2022 21:40:52 - INFO - codeparrot_training - Step 1961: {'lr': 0.00049025, 'samples': 376704, 'steps': 1961, 'loss/train': 1.1701643764972687} +01/26/2022 21:40:55 - INFO - codeparrot_training - Step 1962: {'lr': 0.0004905, 'samples': 376896, 'steps': 1962, 'loss/train': 0.9999094605445862} +01/26/2022 21:40:58 - INFO - codeparrot_training - Step 1963: {'lr': 0.0004907500000000001, 'samples': 377088, 'steps': 1963, 'loss/train': 0.8964014947414398} +01/26/2022 21:41:03 - INFO - codeparrot_training - Step 1964: {'lr': 0.000491, 'samples': 377280, 'steps': 1964, 'loss/train': 0.8065761923789978} +01/26/2022 21:41:06 - INFO - codeparrot_training - Step 1965: {'lr': 0.00049125, 'samples': 377472, 'steps': 1965, 'loss/train': 0.9917560815811157} +01/26/2022 21:41:09 - INFO - codeparrot_training - Step 1966: {'lr': 0.0004915, 'samples': 377664, 'steps': 1966, 'loss/train': 1.1736750304698944} +01/26/2022 21:41:12 - INFO - codeparrot_training - Step 1967: {'lr': 0.00049175, 'samples': 377856, 'steps': 1967, 'loss/train': 0.8484614789485931} +01/26/2022 21:41:15 - INFO - codeparrot_training - Step 1968: {'lr': 0.000492, 'samples': 378048, 'steps': 1968, 'loss/train': 0.9249576330184937} +01/26/2022 21:41:18 - INFO - codeparrot_training - Step 1969: {'lr': 0.0004922500000000001, 'samples': 378240, 'steps': 1969, 'loss/train': 1.0223771631717682} +01/26/2022 21:41:22 - INFO - codeparrot_training - Step 1970: {'lr': 0.0004925, 'samples': 378432, 'steps': 1970, 'loss/train': 0.5959418714046478} +01/26/2022 21:41:25 - INFO - codeparrot_training - Step 1971: {'lr': 0.00049275, 'samples': 378624, 'steps': 1971, 'loss/train': 0.4839349687099457} +01/26/2022 21:41:28 - INFO - codeparrot_training - Step 1972: {'lr': 0.0004930000000000001, 'samples': 378816, 'steps': 1972, 'loss/train': 0.73919378221035} +01/26/2022 21:41:32 - INFO - codeparrot_training - Step 1973: {'lr': 0.00049325, 'samples': 379008, 'steps': 1973, 'loss/train': 0.8754311800003052} +01/26/2022 21:41:35 - INFO - codeparrot_training - Step 1974: {'lr': 0.0004935, 'samples': 379200, 'steps': 1974, 'loss/train': 0.2693951725959778} +01/26/2022 21:41:38 - INFO - codeparrot_training - Step 1975: {'lr': 0.00049375, 'samples': 379392, 'steps': 1975, 'loss/train': 0.8377140462398529} +01/26/2022 21:41:42 - INFO - codeparrot_training - Step 1976: {'lr': 0.000494, 'samples': 379584, 'steps': 1976, 'loss/train': 0.7758494317531586} +01/26/2022 21:41:45 - INFO - codeparrot_training - Step 1977: {'lr': 0.00049425, 'samples': 379776, 'steps': 1977, 'loss/train': 1.216726541519165} +01/26/2022 21:41:48 - INFO - codeparrot_training - Step 1978: {'lr': 0.0004945, 'samples': 379968, 'steps': 1978, 'loss/train': 0.9289956986904144} +01/26/2022 21:41:51 - INFO - codeparrot_training - Step 1979: {'lr': 0.0004947500000000001, 'samples': 380160, 'steps': 1979, 'loss/train': 0.8398751020431519} +01/26/2022 21:41:54 - INFO - codeparrot_training - Step 1980: {'lr': 0.000495, 'samples': 380352, 'steps': 1980, 'loss/train': 0.9946226477622986} +01/26/2022 21:42:01 - INFO - codeparrot_training - Step 1981: {'lr': 0.00049525, 'samples': 380544, 'steps': 1981, 'loss/train': 0.6382877826690674} +01/26/2022 21:42:04 - INFO - codeparrot_training - Step 1982: {'lr': 0.0004955, 'samples': 380736, 'steps': 1982, 'loss/train': 0.6531389057636261} +01/26/2022 21:42:07 - INFO - codeparrot_training - Step 1983: {'lr': 0.00049575, 'samples': 380928, 'steps': 1983, 'loss/train': 1.2436447441577911} +01/26/2022 21:42:10 - INFO - codeparrot_training - Step 1984: {'lr': 0.000496, 'samples': 381120, 'steps': 1984, 'loss/train': 0.8173876404762268} +01/26/2022 21:42:13 - INFO - codeparrot_training - Step 1985: {'lr': 0.0004962500000000001, 'samples': 381312, 'steps': 1985, 'loss/train': 0.2629638612270355} +01/26/2022 21:42:16 - INFO - codeparrot_training - Step 1986: {'lr': 0.0004965, 'samples': 381504, 'steps': 1986, 'loss/train': 0.4969499856233597} +01/26/2022 21:42:19 - INFO - codeparrot_training - Step 1987: {'lr': 0.00049675, 'samples': 381696, 'steps': 1987, 'loss/train': 1.013057827949524} +01/26/2022 21:42:23 - INFO - codeparrot_training - Step 1988: {'lr': 0.000497, 'samples': 381888, 'steps': 1988, 'loss/train': 0.8126345872879028} +01/26/2022 21:42:26 - INFO - codeparrot_training - Step 1989: {'lr': 0.0004972500000000001, 'samples': 382080, 'steps': 1989, 'loss/train': 1.1256734132766724} +01/26/2022 21:42:30 - INFO - codeparrot_training - Step 1990: {'lr': 0.0004975, 'samples': 382272, 'steps': 1990, 'loss/train': 0.9168229401111603} +01/26/2022 21:42:33 - INFO - codeparrot_training - Step 1991: {'lr': 0.00049775, 'samples': 382464, 'steps': 1991, 'loss/train': 0.941860556602478} +01/26/2022 21:42:36 - INFO - codeparrot_training - Step 1992: {'lr': 0.000498, 'samples': 382656, 'steps': 1992, 'loss/train': 1.4965969026088715} +01/26/2022 21:42:40 - INFO - codeparrot_training - Step 1993: {'lr': 0.00049825, 'samples': 382848, 'steps': 1993, 'loss/train': 1.0781153440475464} +01/26/2022 21:42:43 - INFO - codeparrot_training - Step 1994: {'lr': 0.0004985, 'samples': 383040, 'steps': 1994, 'loss/train': 0.9627453088760376} +01/26/2022 21:42:46 - INFO - codeparrot_training - Step 1995: {'lr': 0.0004987500000000001, 'samples': 383232, 'steps': 1995, 'loss/train': 0.9109219908714294} +01/26/2022 21:42:49 - INFO - codeparrot_training - Step 1996: {'lr': 0.000499, 'samples': 383424, 'steps': 1996, 'loss/train': 0.13320894911885262} +01/26/2022 21:42:52 - INFO - codeparrot_training - Step 1997: {'lr': 0.00049925, 'samples': 383616, 'steps': 1997, 'loss/train': 1.204388827085495} +01/26/2022 21:42:55 - INFO - codeparrot_training - Step 1998: {'lr': 0.0004995, 'samples': 383808, 'steps': 1998, 'loss/train': 1.3314221799373627} +01/26/2022 21:43:00 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004997500000000001, 'samples': 384000, 'steps': 1999, 'loss/train': 1.2572254836559296} +01/26/2022 21:43:00 - INFO - codeparrot_training - Evaluating and saving model checkpoint