diff --git "a/log/debug_0.log" "b/log/debug_0.log" new file mode 100644--- /dev/null +++ "b/log/debug_0.log" @@ -0,0 +1,8075 @@ +03/03/2022 12:50:35 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +03/03/2022 12:50:35 - WARNING - huggingface_hub.repository - Revision `fast-glitter-2` does not exist. Created and checked out branch `fast-glitter-2`. +03/03/2022 12:50:35 - WARNING - huggingface_hub.repository - +03/03/2022 12:50:47 - INFO - datasets.data_files - Some files matched the pattern '/home/nathan/codeparrot-clean-train/**' at /home/nathan/codeparrot-clean-train but don't have valid data file extensions: [PosixPath('/home/nathan/codeparrot-clean-train/.git/description'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/5e/175e7375d6f65993071aa653bdd4e8b117cc02d1d2353cd7bcdbaaf7fe8b3c9c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/b6/55b6989a41ae296337356153e6081c61484d0b6734b6905683823e7317d01c42'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/54/60/5460223b92bb118814a7777a939f4005b7426a7e4a068c193c10d1b86eeb862b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-commit'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-receive.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2e/aa/2eaa21b832ed1496fb7f0b259666dbfc36ed483d81494d1e8705f9d601509c12'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5a/5f/5a5fbc19e0e76787f668ada7235203c10b0cbcdea0ecf8f873f8ec281cfe3494'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ae/45/ae45741df674456bc63bad91374d2ba5ef988d33d6e2a322ef0a5ac8af040371'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/32/be/32beb30e381ff02fb71854b5534306f395ef00f51f02b62da1f027c8c7fab26f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-rebase.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/50/38/503872def2ac44733fbefc2602ab16224caca0896aa1eba045025ef2d60efcdc'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/56/80/56803c607a19ccb576c90bdb10a02cfa7b3affc67dd150fa41b00cc22213b174'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/prepare-commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/info/exclude'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cc/58/cc58b22515c4fd7d891287ee717c2054290b20c17b1c34693fd8964ab730687b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/60/41/604177fe5560efd99d93091fadab6293afe7cd7d12f81638c301de1c937c1583'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/df/08/df0840d1657530c8fa9f82864be5999c515f54341d926c430a82528a6bb83740'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-merge'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/a7/f1a7a250e1f6164a7fb602131ff54b69deb305258792f2358075403769d58fe5'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d0/02/d0024828eece6d4d1c25cb4e539328be97fa28ce66a3b8d2374a117711cfd520'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5f/d1/5fd1bb56db810b65d1fd3866dc43d9c7b690c8f52b9ca8119b2a5f4c49d13eec'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/05/39/053944e1daead0b6de8e46ea2e0bc68b9247604c63a55d444ac3b9adb12e2cd2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/74/31/7431977a8e3a6eb0348b821009495f85d9373c1f730f4a74b0db43326568f77d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/90/a5/90a573501de640c3e0e6f1b3508306febc96faf6061bb33c67894c168a1879c6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/a4/6b/a46b5c08d39691524b46fadf78eab5efefa29978edfee799ec3587d928dc1302'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fa/e6/fae6b44a24c1c35f15053a19a6b2b2af5cc9fb8bdaf0da409068a2a1f333f28e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/ce/b6ce495492aedfc91b66efdfd214b2dfe44867c719d51590e1868e42f4e9b6dd'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/7c/0e/7c0ef87edb0e556939282c859c7c893a91b5b0f931394ca4cca4f4ec98a61951'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0b/f3/0bf3cd1320065c163f47a112458dc107650e3e862094b703b76073bd0b68663d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/3e/f2/3ef240d0b394384803ae1bbe3b30974e11eb9b1b6ad4f49afc2ed0f7c9eae0d6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/73/73/737327c2b47693e00050aa3410c5eb402c66211a79740ab57f1c763a1e557563'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/37/26/3726a0239b5cb7d0ef3ea36886c533d0becc7404217763015559edb546d53c94'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-merge-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/67/f1/67f1ff0d590fbf4aa9afa161c290fe9be17538d4b723278bb21fd6408b0e6a3e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ee/c1/eec1a9546aac0444a706c09f6aab67cd64403940657417e30212b7ff1e16665c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e6/48/e6484a578778beccab26c8549608ec13970e6bcdb9541cdccad20f4d984e8181'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-applypatch.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-checkout'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/applypatch-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/62/f162b06b5dca01aa85ef9a675d396c0fbab1d009b5bee1c5b7ea6b415c6f12a4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/96/1796f12729d0407cc57500c9c87959e0e7becd729f37374702868ed8765015f4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/15/ac/15ac016e4cd702bb184457cbf5674d71b632fc34c29611ba4de549b85c67acfb'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/86/0e/860eda34e90456533e9dd41a5c0fdb74c54dc8d9cf43d6c60b887b2c858be831'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/dc/ac/dcacb03d8f43f7879c5eab4422644d7b3797b47dbb0c9c84d88cbc85822d8306'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9b/1b/9b1b8e52b9262f03f1719d3950dc8dfa2b9719dc2e273603023f6f329c1b2068'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5d/42/5d42ba9f195510757a3699005a7c43ddede4b598caf8a5f2f8c84d1125fa6324'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cd/33/cd339656799518495d23aedf1503459be6d3086e22672e80edab8403d12ded1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2f/62/2f628d890bceee216f87edb3c45d2e384ee2501ce41a4c4169efaa3363bef1d2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/e3/ace3ac440b380d604ab198cf8e838a2a375e7b0a6b5699ec74a8c79648f4bab8'), PosixPath('/home/nathan/codeparrot-clean-train/.git/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/packed-refs'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ef/e1/efe1759837b74b5b5ed3df1a09d4c880f9ad20413d958f79d35bf1cb6a2a09d4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b4/83/b4836655e350f0796acd2b1a206e657c2808d9f136afae095e0b94a790c704e1'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/fsmonitor-watchman.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9f/7e/9f7e18a3980d4b3d5ed9469ab7a2d67b608e8aa6fff38d876f86719c8f2a7a82'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0f/7a/0f7a67cd83c1c069995f0f2510ebf818dcc71d9658f189de1231d2b7aac8883c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/index'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/c9/55c9c0b2f26de96e0311ee43e8eaa78ad1af387d0c59a26f22c5ebd507dda321'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.idx'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/14/08/14089cad26037080ee900bede2fd42d5cac70738b2e77402b36681e1d2a521f6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/config'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/36/ac36d12d37c1dc8ee8d3b8f0eae93966ae73482ef725615bb1a715802ddd4dd4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e7/a9/e7a9ccbfe6bd92476f83eba205c47ed23732ace4c1bd7458d76d666ebbba3b1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.pack'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/12/8d/128d56e09d9d741b2778d733e595838a50a5e82fdc9adbb0aa8645457716b97e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2a/7e/2a7e50bbdb90d6c4cec534c3f1dc7ec0e6a0dada15c07cfd94615940c632ce02'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/4e/39/4e392fcaae564652d234d07b4f71eeed90efe51b1b714831e39d77f3e537d3df'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fb/84/fb84ca8000808f62718994e4b44e79d88a05b345e9638d9f6cf6c8a5472da01f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d4/9f/d49f1929644619c39cff677367ff2e18223a8046ec8f61e224954a10aa2ccf8f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/8a/b68a74f9784402dcb311f4db72a873035e47b98b185a1813ab2c1645cb7255a2')] +03/03/2022 12:50:47 - WARNING - datasets.builder - Using custom data configuration codeparrot-clean-train-86fef7ac9fb06b05 +03/03/2022 12:50:49 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-valid-a800eb55c299abc0 +03/03/2022 12:51:29 - INFO - codeparrot_training - Step 0: {'lr': 0.0, 'samples': 512, 'steps': 0, 'loss/train': 10.075563430786133} +03/03/2022 12:52:43 - INFO - codeparrot_training - Step 1: {'lr': 2.5e-07, 'samples': 1024, 'steps': 1, 'loss/train': 10.124848365783691} +03/03/2022 12:52:43 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 12:54:06 - INFO - codeparrot_training - Step 2: {'lr': 5e-07, 'samples': 1536, 'steps': 2, 'loss/train': 10.012473106384277} +03/03/2022 12:54:09 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/03/2022 12:54:11 - INFO - codeparrot_training - Step 3: {'lr': 7.5e-07, 'samples': 2048, 'steps': 3, 'loss/train': 10.121092796325684} +03/03/2022 12:54:14 - INFO - codeparrot_training - Step 4: {'lr': 1e-06, 'samples': 2560, 'steps': 4, 'loss/train': 10.095026016235352} +03/03/2022 12:54:17 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 12:54:20 - INFO - codeparrot_training - Step 5: {'lr': 1.25e-06, 'samples': 3072, 'steps': 5, 'loss/train': 9.843606948852539} +03/03/2022 12:54:23 - INFO - codeparrot_training - Step 6: {'lr': 1.5e-06, 'samples': 3584, 'steps': 6, 'loss/train': 10.02976131439209} +03/03/2022 12:54:26 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/03/2022 12:54:28 - INFO - codeparrot_training - Step 7: {'lr': 1.75e-06, 'samples': 4096, 'steps': 7, 'loss/train': 10.09312629699707} +03/03/2022 12:54:32 - INFO - codeparrot_training - Step 8: {'lr': 2e-06, 'samples': 4608, 'steps': 8, 'loss/train': 9.968323707580566} +03/03/2022 12:54:34 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/03/2022 12:54:37 - INFO - codeparrot_training - Step 9: {'lr': 2.25e-06, 'samples': 5120, 'steps': 9, 'loss/train': 9.982961654663086} +03/03/2022 12:54:40 - INFO - codeparrot_training - Step 10: {'lr': 2.5e-06, 'samples': 5632, 'steps': 10, 'loss/train': 9.881282806396484} +03/03/2022 12:54:43 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/03/2022 12:54:45 - INFO - codeparrot_training - Step 11: {'lr': 2.75e-06, 'samples': 6144, 'steps': 11, 'loss/train': 9.89371395111084} +03/03/2022 12:54:49 - INFO - codeparrot_training - Step 12: {'lr': 3e-06, 'samples': 6656, 'steps': 12, 'loss/train': 9.743671417236328} +03/03/2022 12:54:51 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/03/2022 12:54:54 - INFO - codeparrot_training - Step 13: {'lr': 3.25e-06, 'samples': 7168, 'steps': 13, 'loss/train': 9.71867847442627} +03/03/2022 12:54:57 - INFO - codeparrot_training - Step 14: {'lr': 3.5e-06, 'samples': 7680, 'steps': 14, 'loss/train': 9.706364631652832} +03/03/2022 12:55:01 - INFO - codeparrot_training - Step 15: {'lr': 3.75e-06, 'samples': 8192, 'steps': 15, 'loss/train': 9.625722885131836} +03/03/2022 12:55:01 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/03/2022 12:55:06 - INFO - codeparrot_training - Step 16: {'lr': 4e-06, 'samples': 8704, 'steps': 16, 'loss/train': 9.538063049316406} +03/03/2022 12:55:09 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/03/2022 12:55:11 - INFO - codeparrot_training - Step 17: {'lr': 4.250000000000001e-06, 'samples': 9216, 'steps': 17, 'loss/train': 9.547978401184082} +03/03/2022 12:55:14 - INFO - codeparrot_training - Step 18: {'lr': 4.5e-06, 'samples': 9728, 'steps': 18, 'loss/train': 9.613702774047852} +03/03/2022 12:55:17 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/03/2022 12:55:20 - INFO - codeparrot_training - Step 19: {'lr': 4.75e-06, 'samples': 10240, 'steps': 19, 'loss/train': 9.567453384399414} +03/03/2022 12:55:23 - INFO - codeparrot_training - Step 20: {'lr': 5e-06, 'samples': 10752, 'steps': 20, 'loss/train': 9.41970443725586} +03/03/2022 12:55:25 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/03/2022 12:55:28 - INFO - codeparrot_training - Step 21: {'lr': 5.2500000000000006e-06, 'samples': 11264, 'steps': 21, 'loss/train': 9.453089714050293} +03/03/2022 12:55:31 - INFO - codeparrot_training - Step 22: {'lr': 5.5e-06, 'samples': 11776, 'steps': 22, 'loss/train': 9.410444259643555} +03/03/2022 12:55:34 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/03/2022 12:55:37 - INFO - codeparrot_training - Step 23: {'lr': 5.75e-06, 'samples': 12288, 'steps': 23, 'loss/train': 8.90427303314209} +03/03/2022 12:55:40 - INFO - codeparrot_training - Step 24: {'lr': 6e-06, 'samples': 12800, 'steps': 24, 'loss/train': 9.217617988586426} +03/03/2022 12:55:42 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/03/2022 12:55:45 - INFO - codeparrot_training - Step 25: {'lr': 6.25e-06, 'samples': 13312, 'steps': 25, 'loss/train': 9.386109352111816} +03/03/2022 12:55:48 - INFO - codeparrot_training - Step 26: {'lr': 6.5e-06, 'samples': 13824, 'steps': 26, 'loss/train': 8.986451148986816} +03/03/2022 12:55:50 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/03/2022 12:55:54 - INFO - codeparrot_training - Step 27: {'lr': 6.75e-06, 'samples': 14336, 'steps': 27, 'loss/train': 9.02078914642334} +03/03/2022 12:55:57 - INFO - codeparrot_training - Step 28: {'lr': 7e-06, 'samples': 14848, 'steps': 28, 'loss/train': 9.070732116699219} +03/03/2022 12:55:58 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/03/2022 12:56:02 - INFO - codeparrot_training - Step 29: {'lr': 7.250000000000001e-06, 'samples': 15360, 'steps': 29, 'loss/train': 8.967691421508789} +03/03/2022 12:56:05 - INFO - codeparrot_training - Step 30: {'lr': 7.5e-06, 'samples': 15872, 'steps': 30, 'loss/train': 8.433162689208984} +03/03/2022 12:56:07 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/03/2022 12:56:11 - INFO - codeparrot_training - Step 31: {'lr': 7.75e-06, 'samples': 16384, 'steps': 31, 'loss/train': 9.051046371459961} +03/03/2022 12:56:14 - INFO - codeparrot_training - Step 32: {'lr': 8e-06, 'samples': 16896, 'steps': 32, 'loss/train': 8.816210746765137} +03/03/2022 12:56:15 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/03/2022 12:56:19 - INFO - codeparrot_training - Step 33: {'lr': 8.25e-06, 'samples': 17408, 'steps': 33, 'loss/train': 9.837918281555176} +03/03/2022 12:56:22 - INFO - codeparrot_training - Step 34: {'lr': 8.500000000000002e-06, 'samples': 17920, 'steps': 34, 'loss/train': 9.328680992126465} +03/03/2022 12:56:24 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 12:56:28 - INFO - codeparrot_training - Step 35: {'lr': 8.750000000000001e-06, 'samples': 18432, 'steps': 35, 'loss/train': 8.939859390258789} +03/03/2022 12:56:31 - INFO - codeparrot_training - Step 36: {'lr': 9e-06, 'samples': 18944, 'steps': 36, 'loss/train': 9.023159980773926} +03/03/2022 12:56:32 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/03/2022 12:56:36 - INFO - codeparrot_training - Step 37: {'lr': 9.25e-06, 'samples': 19456, 'steps': 37, 'loss/train': 9.151119232177734} +03/03/2022 12:56:39 - INFO - codeparrot_training - Step 38: {'lr': 9.5e-06, 'samples': 19968, 'steps': 38, 'loss/train': 8.467151641845703} +03/03/2022 12:56:40 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/03/2022 12:56:45 - INFO - codeparrot_training - Step 39: {'lr': 9.75e-06, 'samples': 20480, 'steps': 39, 'loss/train': 8.554976463317871} +03/03/2022 12:56:48 - INFO - codeparrot_training - Step 40: {'lr': 1e-05, 'samples': 20992, 'steps': 40, 'loss/train': 9.488030433654785} +03/03/2022 12:56:48 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/03/2022 12:56:53 - INFO - codeparrot_training - Step 41: {'lr': 1.025e-05, 'samples': 21504, 'steps': 41, 'loss/train': 9.343393325805664} +03/03/2022 12:56:56 - INFO - codeparrot_training - Step 42: {'lr': 1.0500000000000001e-05, 'samples': 22016, 'steps': 42, 'loss/train': 8.726397514343262} +03/03/2022 12:56:57 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/03/2022 12:57:01 - INFO - codeparrot_training - Step 43: {'lr': 1.0749999999999999e-05, 'samples': 22528, 'steps': 43, 'loss/train': 8.832099914550781} +03/03/2022 12:57:05 - INFO - codeparrot_training - Step 44: {'lr': 1.1e-05, 'samples': 23040, 'steps': 44, 'loss/train': 8.617071151733398} +03/03/2022 12:57:05 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 12:57:10 - INFO - codeparrot_training - Step 45: {'lr': 1.1249999999999999e-05, 'samples': 23552, 'steps': 45, 'loss/train': 8.621835708618164} +03/03/2022 12:57:13 - INFO - codeparrot_training - Step 46: {'lr': 1.15e-05, 'samples': 24064, 'steps': 46, 'loss/train': 8.607819557189941} +03/03/2022 12:57:13 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/03/2022 12:57:19 - INFO - codeparrot_training - Step 47: {'lr': 1.1750000000000001e-05, 'samples': 24576, 'steps': 47, 'loss/train': 8.41963005065918} +03/03/2022 12:57:21 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/03/2022 12:57:24 - INFO - codeparrot_training - Step 48: {'lr': 1.2e-05, 'samples': 25088, 'steps': 48, 'loss/train': 8.563586235046387} +03/03/2022 12:57:27 - INFO - codeparrot_training - Step 49: {'lr': 1.2250000000000001e-05, 'samples': 25600, 'steps': 49, 'loss/train': 9.081216812133789} +03/03/2022 12:57:30 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/03/2022 12:57:32 - INFO - codeparrot_training - Step 50: {'lr': 1.25e-05, 'samples': 26112, 'steps': 50, 'loss/train': 8.793917655944824} +03/03/2022 12:57:36 - INFO - codeparrot_training - Step 51: {'lr': 1.275e-05, 'samples': 26624, 'steps': 51, 'loss/train': 7.926756858825684} +03/03/2022 12:57:38 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/03/2022 12:57:41 - INFO - codeparrot_training - Step 52: {'lr': 1.3e-05, 'samples': 27136, 'steps': 52, 'loss/train': 8.878251075744629} +03/03/2022 12:57:44 - INFO - codeparrot_training - Step 53: {'lr': 1.325e-05, 'samples': 27648, 'steps': 53, 'loss/train': 8.69863224029541} +03/03/2022 12:57:47 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/03/2022 12:57:50 - INFO - codeparrot_training - Step 54: {'lr': 1.35e-05, 'samples': 28160, 'steps': 54, 'loss/train': 8.32911205291748} +03/03/2022 12:57:53 - INFO - codeparrot_training - Step 55: {'lr': 1.375e-05, 'samples': 28672, 'steps': 55, 'loss/train': 8.402761459350586} +03/03/2022 12:57:55 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/03/2022 12:57:58 - INFO - codeparrot_training - Step 56: {'lr': 1.4e-05, 'samples': 29184, 'steps': 56, 'loss/train': 8.484271049499512} +03/03/2022 12:58:01 - INFO - codeparrot_training - Step 57: {'lr': 1.425e-05, 'samples': 29696, 'steps': 57, 'loss/train': 8.7686767578125} +03/03/2022 12:58:04 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 12:58:07 - INFO - codeparrot_training - Step 58: {'lr': 1.4500000000000002e-05, 'samples': 30208, 'steps': 58, 'loss/train': 8.384221076965332} +03/03/2022 12:58:10 - INFO - codeparrot_training - Step 59: {'lr': 1.475e-05, 'samples': 30720, 'steps': 59, 'loss/train': 8.519675254821777} +03/03/2022 12:58:12 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/03/2022 12:58:15 - INFO - codeparrot_training - Step 60: {'lr': 1.5e-05, 'samples': 31232, 'steps': 60, 'loss/train': 8.939576148986816} +03/03/2022 12:58:19 - INFO - codeparrot_training - Step 61: {'lr': 1.525e-05, 'samples': 31744, 'steps': 61, 'loss/train': 8.751921653747559} +03/03/2022 12:58:21 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/03/2022 12:58:24 - INFO - codeparrot_training - Step 62: {'lr': 1.55e-05, 'samples': 32256, 'steps': 62, 'loss/train': 8.698548316955566} +03/03/2022 12:58:27 - INFO - codeparrot_training - Step 63: {'lr': 1.575e-05, 'samples': 32768, 'steps': 63, 'loss/train': 8.578275680541992} +03/03/2022 12:58:29 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/03/2022 12:58:32 - INFO - codeparrot_training - Step 64: {'lr': 1.6e-05, 'samples': 33280, 'steps': 64, 'loss/train': 8.510712623596191} +03/03/2022 12:58:36 - INFO - codeparrot_training - Step 65: {'lr': 1.6250000000000002e-05, 'samples': 33792, 'steps': 65, 'loss/train': 8.645843505859375} +03/03/2022 12:58:37 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/03/2022 12:58:41 - INFO - codeparrot_training - Step 66: {'lr': 1.65e-05, 'samples': 34304, 'steps': 66, 'loss/train': 8.519330024719238} +03/03/2022 12:58:44 - INFO - codeparrot_training - Step 67: {'lr': 1.675e-05, 'samples': 34816, 'steps': 67, 'loss/train': 8.555344581604004} +03/03/2022 12:58:46 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/03/2022 12:58:49 - INFO - codeparrot_training - Step 68: {'lr': 1.7000000000000003e-05, 'samples': 35328, 'steps': 68, 'loss/train': 8.772445678710938} +03/03/2022 12:58:53 - INFO - codeparrot_training - Step 69: {'lr': 1.7250000000000003e-05, 'samples': 35840, 'steps': 69, 'loss/train': 8.694655418395996} +03/03/2022 12:58:54 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 12:58:58 - INFO - codeparrot_training - Step 70: {'lr': 1.7500000000000002e-05, 'samples': 36352, 'steps': 70, 'loss/train': 8.430862426757812} +03/03/2022 12:59:01 - INFO - codeparrot_training - Step 71: {'lr': 1.7749999999999998e-05, 'samples': 36864, 'steps': 71, 'loss/train': 9.024465560913086} +03/03/2022 12:59:02 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/03/2022 12:59:06 - INFO - codeparrot_training - Step 72: {'lr': 1.8e-05, 'samples': 37376, 'steps': 72, 'loss/train': 8.457487106323242} +03/03/2022 12:59:10 - INFO - codeparrot_training - Step 73: {'lr': 1.825e-05, 'samples': 37888, 'steps': 73, 'loss/train': 8.562689781188965} +03/03/2022 12:59:11 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/03/2022 12:59:15 - INFO - codeparrot_training - Step 74: {'lr': 1.85e-05, 'samples': 38400, 'steps': 74, 'loss/train': 8.607612609863281} +03/03/2022 12:59:18 - INFO - codeparrot_training - Step 75: {'lr': 1.875e-05, 'samples': 38912, 'steps': 75, 'loss/train': 8.44783878326416} +03/03/2022 12:59:19 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/03/2022 12:59:23 - INFO - codeparrot_training - Step 76: {'lr': 1.9e-05, 'samples': 39424, 'steps': 76, 'loss/train': 9.018163681030273} +03/03/2022 12:59:27 - INFO - codeparrot_training - Step 77: {'lr': 1.925e-05, 'samples': 39936, 'steps': 77, 'loss/train': 8.568017959594727} +03/03/2022 12:59:28 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/03/2022 12:59:32 - INFO - codeparrot_training - Step 78: {'lr': 1.95e-05, 'samples': 40448, 'steps': 78, 'loss/train': 8.524364471435547} +03/03/2022 12:59:35 - INFO - codeparrot_training - Step 79: {'lr': 1.975e-05, 'samples': 40960, 'steps': 79, 'loss/train': 8.529118537902832} +03/03/2022 12:59:36 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/03/2022 12:59:40 - INFO - codeparrot_training - Step 80: {'lr': 2e-05, 'samples': 41472, 'steps': 80, 'loss/train': 8.654486656188965} +03/03/2022 12:59:44 - INFO - codeparrot_training - Step 81: {'lr': 2.025e-05, 'samples': 41984, 'steps': 81, 'loss/train': 8.813864707946777} +03/03/2022 12:59:45 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/03/2022 12:59:49 - INFO - codeparrot_training - Step 82: {'lr': 2.05e-05, 'samples': 42496, 'steps': 82, 'loss/train': 8.502435684204102} +03/03/2022 12:59:52 - INFO - codeparrot_training - Step 83: {'lr': 2.0750000000000003e-05, 'samples': 43008, 'steps': 83, 'loss/train': 9.020332336425781} +03/03/2022 12:59:53 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 12:59:58 - INFO - codeparrot_training - Step 84: {'lr': 2.1000000000000002e-05, 'samples': 43520, 'steps': 84, 'loss/train': 8.501923561096191} +03/03/2022 13:00:01 - INFO - codeparrot_training - Step 85: {'lr': 2.125e-05, 'samples': 44032, 'steps': 85, 'loss/train': 8.312204360961914} +03/03/2022 13:00:02 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/03/2022 13:00:06 - INFO - codeparrot_training - Step 86: {'lr': 2.1499999999999997e-05, 'samples': 44544, 'steps': 86, 'loss/train': 8.143906593322754} +03/03/2022 13:00:10 - INFO - codeparrot_training - Step 87: {'lr': 2.175e-05, 'samples': 45056, 'steps': 87, 'loss/train': 8.643278121948242} +03/03/2022 13:00:10 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/03/2022 13:00:15 - INFO - codeparrot_training - Step 88: {'lr': 2.2e-05, 'samples': 45568, 'steps': 88, 'loss/train': 8.471076011657715} +03/03/2022 13:00:18 - INFO - codeparrot_training - Step 89: {'lr': 2.225e-05, 'samples': 46080, 'steps': 89, 'loss/train': 8.585366249084473} +03/03/2022 13:00:19 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/03/2022 13:00:24 - INFO - codeparrot_training - Step 90: {'lr': 2.2499999999999998e-05, 'samples': 46592, 'steps': 90, 'loss/train': 8.756730079650879} +03/03/2022 13:00:27 - INFO - codeparrot_training - Step 91: {'lr': 2.275e-05, 'samples': 47104, 'steps': 91, 'loss/train': 8.959531784057617} +03/03/2022 13:00:27 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/03/2022 13:00:32 - INFO - codeparrot_training - Step 92: {'lr': 2.3e-05, 'samples': 47616, 'steps': 92, 'loss/train': 8.488385200500488} +03/03/2022 13:00:35 - INFO - codeparrot_training - Step 93: {'lr': 2.325e-05, 'samples': 48128, 'steps': 93, 'loss/train': 8.548238754272461} +03/03/2022 13:00:36 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/03/2022 13:00:40 - INFO - codeparrot_training - Step 94: {'lr': 2.3500000000000002e-05, 'samples': 48640, 'steps': 94, 'loss/train': 9.079781532287598} +03/03/2022 13:00:44 - INFO - codeparrot_training - Step 95: {'lr': 2.375e-05, 'samples': 49152, 'steps': 95, 'loss/train': 8.213057518005371} +03/03/2022 13:00:44 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/03/2022 13:00:49 - INFO - codeparrot_training - Step 96: {'lr': 2.4e-05, 'samples': 49664, 'steps': 96, 'loss/train': 9.193713188171387} +03/03/2022 13:00:52 - INFO - codeparrot_training - Step 97: {'lr': 2.425e-05, 'samples': 50176, 'steps': 97, 'loss/train': 8.648150444030762} +03/03/2022 13:00:52 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/03/2022 13:00:57 - INFO - codeparrot_training - Step 98: {'lr': 2.4500000000000003e-05, 'samples': 50688, 'steps': 98, 'loss/train': 8.951532363891602} +03/03/2022 13:01:01 - INFO - codeparrot_training - Step 99: {'lr': 2.4750000000000002e-05, 'samples': 51200, 'steps': 99, 'loss/train': 8.602143287658691} +03/03/2022 13:01:01 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/03/2022 13:01:06 - INFO - codeparrot_training - Step 100: {'lr': 2.5e-05, 'samples': 51712, 'steps': 100, 'loss/train': 8.64793586730957} +03/03/2022 13:01:09 - INFO - codeparrot_training - Step 101: {'lr': 2.525e-05, 'samples': 52224, 'steps': 101, 'loss/train': 8.7129545211792} +03/03/2022 13:01:09 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/03/2022 13:01:14 - INFO - codeparrot_training - Step 102: {'lr': 2.55e-05, 'samples': 52736, 'steps': 102, 'loss/train': 8.678922653198242} +03/03/2022 13:01:17 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/03/2022 13:01:20 - INFO - codeparrot_training - Step 103: {'lr': 2.575e-05, 'samples': 53248, 'steps': 103, 'loss/train': 8.083718299865723} +03/03/2022 13:01:23 - INFO - codeparrot_training - Step 104: {'lr': 2.6e-05, 'samples': 53760, 'steps': 104, 'loss/train': 8.35842227935791} +03/03/2022 13:01:26 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/03/2022 13:01:28 - INFO - codeparrot_training - Step 105: {'lr': 2.625e-05, 'samples': 54272, 'steps': 105, 'loss/train': 8.159714698791504} +03/03/2022 13:01:31 - INFO - codeparrot_training - Step 106: {'lr': 2.65e-05, 'samples': 54784, 'steps': 106, 'loss/train': 8.1396484375} +03/03/2022 13:01:34 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/03/2022 13:01:37 - INFO - codeparrot_training - Step 107: {'lr': 2.675e-05, 'samples': 55296, 'steps': 107, 'loss/train': 8.233939170837402} +03/03/2022 13:01:40 - INFO - codeparrot_training - Step 108: {'lr': 2.7e-05, 'samples': 55808, 'steps': 108, 'loss/train': 8.465450286865234} +03/03/2022 13:01:42 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/03/2022 13:01:45 - INFO - codeparrot_training - Step 109: {'lr': 2.725e-05, 'samples': 56320, 'steps': 109, 'loss/train': 8.154802322387695} +03/03/2022 13:01:48 - INFO - codeparrot_training - Step 110: {'lr': 2.75e-05, 'samples': 56832, 'steps': 110, 'loss/train': 8.17076301574707} +03/03/2022 13:01:50 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/03/2022 13:01:54 - INFO - codeparrot_training - Step 111: {'lr': 2.775e-05, 'samples': 57344, 'steps': 111, 'loss/train': 8.381338119506836} +03/03/2022 13:01:57 - INFO - codeparrot_training - Step 112: {'lr': 2.8e-05, 'samples': 57856, 'steps': 112, 'loss/train': 8.114055633544922} +03/03/2022 13:01:59 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 13:02:02 - INFO - codeparrot_training - Step 113: {'lr': 2.8250000000000002e-05, 'samples': 58368, 'steps': 113, 'loss/train': 7.933559894561768} +03/03/2022 13:02:06 - INFO - codeparrot_training - Step 114: {'lr': 2.85e-05, 'samples': 58880, 'steps': 114, 'loss/train': 9.193424224853516} +03/03/2022 13:02:08 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 13:02:11 - INFO - codeparrot_training - Step 115: {'lr': 2.875e-05, 'samples': 59392, 'steps': 115, 'loss/train': 7.855266094207764} +03/03/2022 13:02:14 - INFO - codeparrot_training - Step 116: {'lr': 2.9000000000000004e-05, 'samples': 59904, 'steps': 116, 'loss/train': 8.517616271972656} +03/03/2022 13:02:16 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/03/2022 13:02:19 - INFO - codeparrot_training - Step 117: {'lr': 2.9250000000000003e-05, 'samples': 60416, 'steps': 117, 'loss/train': 8.036568641662598} +03/03/2022 13:02:22 - INFO - codeparrot_training - Step 118: {'lr': 2.95e-05, 'samples': 60928, 'steps': 118, 'loss/train': 7.926286220550537} +03/03/2022 13:02:24 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/03/2022 13:02:28 - INFO - codeparrot_training - Step 119: {'lr': 2.9749999999999998e-05, 'samples': 61440, 'steps': 119, 'loss/train': 8.040275573730469} +03/03/2022 13:02:31 - INFO - codeparrot_training - Step 120: {'lr': 3e-05, 'samples': 61952, 'steps': 120, 'loss/train': 6.9734649658203125} +03/03/2022 13:02:33 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/03/2022 13:02:37 - INFO - codeparrot_training - Step 121: {'lr': 3.025e-05, 'samples': 62464, 'steps': 121, 'loss/train': 8.224665641784668} +03/03/2022 13:02:40 - INFO - codeparrot_training - Step 122: {'lr': 3.05e-05, 'samples': 62976, 'steps': 122, 'loss/train': 8.240490913391113} +03/03/2022 13:02:41 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 13:02:45 - INFO - codeparrot_training - Step 123: {'lr': 3.075e-05, 'samples': 63488, 'steps': 123, 'loss/train': 8.030046463012695} +03/03/2022 13:02:48 - INFO - codeparrot_training - Step 124: {'lr': 3.1e-05, 'samples': 64000, 'steps': 124, 'loss/train': 7.976356029510498} +03/03/2022 13:02:49 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/03/2022 13:02:54 - INFO - codeparrot_training - Step 125: {'lr': 3.125e-05, 'samples': 64512, 'steps': 125, 'loss/train': 8.348257064819336} +03/03/2022 13:02:57 - INFO - codeparrot_training - Step 126: {'lr': 3.15e-05, 'samples': 65024, 'steps': 126, 'loss/train': 7.844707489013672} +03/03/2022 13:02:58 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/03/2022 13:03:02 - INFO - codeparrot_training - Step 127: {'lr': 3.175e-05, 'samples': 65536, 'steps': 127, 'loss/train': 8.167869567871094} +03/03/2022 13:03:05 - INFO - codeparrot_training - Step 128: {'lr': 3.2e-05, 'samples': 66048, 'steps': 128, 'loss/train': 7.930793762207031} +03/03/2022 13:03:06 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 13:03:11 - INFO - codeparrot_training - Step 129: {'lr': 3.2250000000000005e-05, 'samples': 66560, 'steps': 129, 'loss/train': 7.673683166503906} +03/03/2022 13:03:14 - INFO - codeparrot_training - Step 130: {'lr': 3.2500000000000004e-05, 'samples': 67072, 'steps': 130, 'loss/train': 8.17394733428955} +03/03/2022 13:03:14 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/03/2022 13:03:19 - INFO - codeparrot_training - Step 131: {'lr': 3.275e-05, 'samples': 67584, 'steps': 131, 'loss/train': 7.390617847442627} +03/03/2022 13:03:22 - INFO - codeparrot_training - Step 132: {'lr': 3.3e-05, 'samples': 68096, 'steps': 132, 'loss/train': 7.104628562927246} +03/03/2022 13:03:22 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/03/2022 13:03:28 - INFO - codeparrot_training - Step 133: {'lr': 3.325e-05, 'samples': 68608, 'steps': 133, 'loss/train': 8.078102111816406} +03/03/2022 13:03:31 - INFO - codeparrot_training - Step 134: {'lr': 3.35e-05, 'samples': 69120, 'steps': 134, 'loss/train': 7.884213447570801} +03/03/2022 13:03:31 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/03/2022 13:03:36 - INFO - codeparrot_training - Step 135: {'lr': 3.375e-05, 'samples': 69632, 'steps': 135, 'loss/train': 8.450540542602539} +03/03/2022 13:03:39 - INFO - codeparrot_training - Step 136: {'lr': 3.4000000000000007e-05, 'samples': 70144, 'steps': 136, 'loss/train': 8.404011726379395} +03/03/2022 13:03:39 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 13:03:45 - INFO - codeparrot_training - Step 137: {'lr': 3.4250000000000006e-05, 'samples': 70656, 'steps': 137, 'loss/train': 7.6241350173950195} +03/03/2022 13:03:48 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 13:03:50 - INFO - codeparrot_training - Step 138: {'lr': 3.4500000000000005e-05, 'samples': 71168, 'steps': 138, 'loss/train': 7.530354022979736} +03/03/2022 13:03:53 - INFO - codeparrot_training - Step 139: {'lr': 3.4750000000000004e-05, 'samples': 71680, 'steps': 139, 'loss/train': 8.152228355407715} +03/03/2022 13:03:56 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/03/2022 13:03:58 - INFO - codeparrot_training - Step 140: {'lr': 3.5000000000000004e-05, 'samples': 72192, 'steps': 140, 'loss/train': 8.318809509277344} +03/03/2022 13:04:01 - INFO - codeparrot_training - Step 141: {'lr': 3.5249999999999996e-05, 'samples': 72704, 'steps': 141, 'loss/train': 7.475725173950195} +03/03/2022 13:04:04 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 13:04:07 - INFO - codeparrot_training - Step 142: {'lr': 3.5499999999999996e-05, 'samples': 73216, 'steps': 142, 'loss/train': 7.176308631896973} +03/03/2022 13:04:10 - INFO - codeparrot_training - Step 143: {'lr': 3.5749999999999995e-05, 'samples': 73728, 'steps': 143, 'loss/train': 8.0440034866333} +03/03/2022 13:04:12 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 13:04:15 - INFO - codeparrot_training - Step 144: {'lr': 3.6e-05, 'samples': 74240, 'steps': 144, 'loss/train': 8.216455459594727} +03/03/2022 13:04:19 - INFO - codeparrot_training - Step 145: {'lr': 3.625e-05, 'samples': 74752, 'steps': 145, 'loss/train': 8.240697860717773} +03/03/2022 13:04:21 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 13:04:24 - INFO - codeparrot_training - Step 146: {'lr': 3.65e-05, 'samples': 75264, 'steps': 146, 'loss/train': 7.32492208480835} +03/03/2022 13:04:27 - INFO - codeparrot_training - Step 147: {'lr': 3.675e-05, 'samples': 75776, 'steps': 147, 'loss/train': 7.730316162109375} +03/03/2022 13:04:29 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 13:04:32 - INFO - codeparrot_training - Step 148: {'lr': 3.7e-05, 'samples': 76288, 'steps': 148, 'loss/train': 7.661489009857178} +03/03/2022 13:04:36 - INFO - codeparrot_training - Step 149: {'lr': 3.725e-05, 'samples': 76800, 'steps': 149, 'loss/train': 7.78841495513916} +03/03/2022 13:04:38 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/03/2022 13:04:41 - INFO - codeparrot_training - Step 150: {'lr': 3.75e-05, 'samples': 77312, 'steps': 150, 'loss/train': 7.619410037994385} +03/03/2022 13:04:44 - INFO - codeparrot_training - Step 151: {'lr': 3.775e-05, 'samples': 77824, 'steps': 151, 'loss/train': 8.464534759521484} +03/03/2022 13:04:46 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/03/2022 13:04:49 - INFO - codeparrot_training - Step 152: {'lr': 3.8e-05, 'samples': 78336, 'steps': 152, 'loss/train': 7.549778461456299} +03/03/2022 13:04:53 - INFO - codeparrot_training - Step 153: {'lr': 3.825e-05, 'samples': 78848, 'steps': 153, 'loss/train': 7.371613502502441} +03/03/2022 13:04:54 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/03/2022 13:04:58 - INFO - codeparrot_training - Step 154: {'lr': 3.85e-05, 'samples': 79360, 'steps': 154, 'loss/train': 7.702090740203857} +03/03/2022 13:05:01 - INFO - codeparrot_training - Step 155: {'lr': 3.875e-05, 'samples': 79872, 'steps': 155, 'loss/train': 7.848684787750244} +03/03/2022 13:05:03 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/03/2022 13:05:07 - INFO - codeparrot_training - Step 156: {'lr': 3.9e-05, 'samples': 80384, 'steps': 156, 'loss/train': 7.010176181793213} +03/03/2022 13:05:10 - INFO - codeparrot_training - Step 157: {'lr': 3.925e-05, 'samples': 80896, 'steps': 157, 'loss/train': 4.933742523193359} +03/03/2022 13:05:13 - INFO - codeparrot_training - Step 158: {'lr': 3.95e-05, 'samples': 81408, 'steps': 158, 'loss/train': 8.188973426818848} +03/03/2022 13:05:13 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 13:05:19 - INFO - codeparrot_training - Step 159: {'lr': 3.9750000000000004e-05, 'samples': 81920, 'steps': 159, 'loss/train': 8.148823738098145} +03/03/2022 13:05:21 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/03/2022 13:05:24 - INFO - codeparrot_training - Step 160: {'lr': 4e-05, 'samples': 82432, 'steps': 160, 'loss/train': 7.696084976196289} +03/03/2022 13:05:27 - INFO - codeparrot_training - Step 161: {'lr': 4.025e-05, 'samples': 82944, 'steps': 161, 'loss/train': 8.04732608795166} +03/03/2022 13:05:29 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 13:05:32 - INFO - codeparrot_training - Step 162: {'lr': 4.05e-05, 'samples': 83456, 'steps': 162, 'loss/train': 7.864019870758057} +03/03/2022 13:05:36 - INFO - codeparrot_training - Step 163: {'lr': 4.075e-05, 'samples': 83968, 'steps': 163, 'loss/train': 7.568187713623047} +03/03/2022 13:05:38 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/03/2022 13:05:41 - INFO - codeparrot_training - Step 164: {'lr': 4.1e-05, 'samples': 84480, 'steps': 164, 'loss/train': 7.4156365394592285} +03/03/2022 13:05:44 - INFO - codeparrot_training - Step 165: {'lr': 4.125e-05, 'samples': 84992, 'steps': 165, 'loss/train': 8.103707313537598} +03/03/2022 13:05:46 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/03/2022 13:05:50 - INFO - codeparrot_training - Step 166: {'lr': 4.1500000000000006e-05, 'samples': 85504, 'steps': 166, 'loss/train': 7.68804407119751} +03/03/2022 13:05:53 - INFO - codeparrot_training - Step 167: {'lr': 4.1750000000000005e-05, 'samples': 86016, 'steps': 167, 'loss/train': 7.974943161010742} +03/03/2022 13:05:55 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/03/2022 13:05:58 - INFO - codeparrot_training - Step 168: {'lr': 4.2000000000000004e-05, 'samples': 86528, 'steps': 168, 'loss/train': 5.833436012268066} +03/03/2022 13:06:01 - INFO - codeparrot_training - Step 169: {'lr': 4.2250000000000004e-05, 'samples': 87040, 'steps': 169, 'loss/train': 7.7950568199157715} +03/03/2022 13:06:03 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/03/2022 13:06:06 - INFO - codeparrot_training - Step 170: {'lr': 4.25e-05, 'samples': 87552, 'steps': 170, 'loss/train': 7.657891273498535} +03/03/2022 13:06:10 - INFO - codeparrot_training - Step 171: {'lr': 4.275e-05, 'samples': 88064, 'steps': 171, 'loss/train': 7.877186298370361} +03/03/2022 13:06:11 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/03/2022 13:06:15 - INFO - codeparrot_training - Step 172: {'lr': 4.2999999999999995e-05, 'samples': 88576, 'steps': 172, 'loss/train': 7.893576622009277} +03/03/2022 13:06:18 - INFO - codeparrot_training - Step 173: {'lr': 4.325e-05, 'samples': 89088, 'steps': 173, 'loss/train': 7.6018571853637695} +03/03/2022 13:06:20 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/03/2022 13:06:24 - INFO - codeparrot_training - Step 174: {'lr': 4.35e-05, 'samples': 89600, 'steps': 174, 'loss/train': 7.9183549880981445} +03/03/2022 13:06:27 - INFO - codeparrot_training - Step 175: {'lr': 4.375e-05, 'samples': 90112, 'steps': 175, 'loss/train': 7.391948699951172} +03/03/2022 13:06:28 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/03/2022 13:06:32 - INFO - codeparrot_training - Step 176: {'lr': 4.4e-05, 'samples': 90624, 'steps': 176, 'loss/train': 7.670172691345215} +03/03/2022 13:06:35 - INFO - codeparrot_training - Step 177: {'lr': 4.425e-05, 'samples': 91136, 'steps': 177, 'loss/train': 7.4225029945373535} +03/03/2022 13:06:37 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/03/2022 13:06:41 - INFO - codeparrot_training - Step 178: {'lr': 4.45e-05, 'samples': 91648, 'steps': 178, 'loss/train': 7.456734657287598} +03/03/2022 13:06:44 - INFO - codeparrot_training - Step 179: {'lr': 4.475e-05, 'samples': 92160, 'steps': 179, 'loss/train': 8.120655059814453} +03/03/2022 13:06:45 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/03/2022 13:06:49 - INFO - codeparrot_training - Step 180: {'lr': 4.4999999999999996e-05, 'samples': 92672, 'steps': 180, 'loss/train': 7.598281383514404} +03/03/2022 13:06:52 - INFO - codeparrot_training - Step 181: {'lr': 4.525e-05, 'samples': 93184, 'steps': 181, 'loss/train': 8.022689819335938} +03/03/2022 13:06:54 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/03/2022 13:06:58 - INFO - codeparrot_training - Step 182: {'lr': 4.55e-05, 'samples': 93696, 'steps': 182, 'loss/train': 7.102559566497803} +03/03/2022 13:07:01 - INFO - codeparrot_training - Step 183: {'lr': 4.575e-05, 'samples': 94208, 'steps': 183, 'loss/train': 5.537654876708984} +03/03/2022 13:07:03 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/03/2022 13:07:07 - INFO - codeparrot_training - Step 184: {'lr': 4.6e-05, 'samples': 94720, 'steps': 184, 'loss/train': 7.378385066986084} +03/03/2022 13:07:10 - INFO - codeparrot_training - Step 185: {'lr': 4.625e-05, 'samples': 95232, 'steps': 185, 'loss/train': 7.413166046142578} +03/03/2022 13:07:11 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 13:07:15 - INFO - codeparrot_training - Step 186: {'lr': 4.65e-05, 'samples': 95744, 'steps': 186, 'loss/train': 7.445736885070801} +03/03/2022 13:07:18 - INFO - codeparrot_training - Step 187: {'lr': 4.675e-05, 'samples': 96256, 'steps': 187, 'loss/train': 8.496750831604004} +03/03/2022 13:07:19 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/03/2022 13:07:24 - INFO - codeparrot_training - Step 188: {'lr': 4.7000000000000004e-05, 'samples': 96768, 'steps': 188, 'loss/train': 7.747251033782959} +03/03/2022 13:07:27 - INFO - codeparrot_training - Step 189: {'lr': 4.725e-05, 'samples': 97280, 'steps': 189, 'loss/train': 7.649470806121826} +03/03/2022 13:07:28 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/03/2022 13:07:32 - INFO - codeparrot_training - Step 190: {'lr': 4.75e-05, 'samples': 97792, 'steps': 190, 'loss/train': 7.644663333892822} +03/03/2022 13:07:35 - INFO - codeparrot_training - Step 191: {'lr': 4.775e-05, 'samples': 98304, 'steps': 191, 'loss/train': 8.062586784362793} +03/03/2022 13:07:36 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/03/2022 13:07:40 - INFO - codeparrot_training - Step 192: {'lr': 4.8e-05, 'samples': 98816, 'steps': 192, 'loss/train': 7.344707489013672} +03/03/2022 13:07:44 - INFO - codeparrot_training - Step 193: {'lr': 4.825e-05, 'samples': 99328, 'steps': 193, 'loss/train': 8.712090492248535} +03/03/2022 13:07:44 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 13:07:49 - INFO - codeparrot_training - Step 194: {'lr': 4.85e-05, 'samples': 99840, 'steps': 194, 'loss/train': 8.00727653503418} +03/03/2022 13:07:52 - INFO - codeparrot_training - Step 195: {'lr': 4.8750000000000006e-05, 'samples': 100352, 'steps': 195, 'loss/train': 6.896945953369141} +03/03/2022 13:07:52 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/03/2022 13:07:57 - INFO - codeparrot_training - Step 196: {'lr': 4.9000000000000005e-05, 'samples': 100864, 'steps': 196, 'loss/train': 7.245913028717041} +03/03/2022 13:08:01 - INFO - codeparrot_training - Step 197: {'lr': 4.9250000000000004e-05, 'samples': 101376, 'steps': 197, 'loss/train': 7.287005424499512} +03/03/2022 13:08:01 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/03/2022 13:08:06 - INFO - codeparrot_training - Step 198: {'lr': 4.9500000000000004e-05, 'samples': 101888, 'steps': 198, 'loss/train': 7.7306318283081055} +03/03/2022 13:08:09 - INFO - codeparrot_training - Step 199: {'lr': 4.975e-05, 'samples': 102400, 'steps': 199, 'loss/train': 7.501760005950928} +03/03/2022 13:08:09 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/03/2022 13:08:14 - INFO - codeparrot_training - Step 200: {'lr': 5e-05, 'samples': 102912, 'steps': 200, 'loss/train': 7.2059173583984375} +03/03/2022 13:08:17 - INFO - codeparrot_training - Step 201: {'lr': 5.025e-05, 'samples': 103424, 'steps': 201, 'loss/train': 7.545304775238037} +03/03/2022 13:08:18 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/03/2022 13:08:23 - INFO - codeparrot_training - Step 202: {'lr': 5.05e-05, 'samples': 103936, 'steps': 202, 'loss/train': 6.92850923538208} +03/03/2022 13:08:26 - INFO - codeparrot_training - Step 203: {'lr': 5.075000000000001e-05, 'samples': 104448, 'steps': 203, 'loss/train': 7.187972068786621} +03/03/2022 13:08:26 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 13:08:31 - INFO - codeparrot_training - Step 204: {'lr': 5.1e-05, 'samples': 104960, 'steps': 204, 'loss/train': 7.582147121429443} +03/03/2022 13:08:35 - INFO - codeparrot_training - Step 205: {'lr': 5.125e-05, 'samples': 105472, 'steps': 205, 'loss/train': 9.222358703613281} +03/03/2022 13:08:35 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 13:08:40 - INFO - codeparrot_training - Step 206: {'lr': 5.15e-05, 'samples': 105984, 'steps': 206, 'loss/train': 7.789576530456543} +03/03/2022 13:08:43 - INFO - codeparrot_training - Step 207: {'lr': 5.175e-05, 'samples': 106496, 'steps': 207, 'loss/train': 7.880443572998047} +03/03/2022 13:08:44 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 13:08:48 - INFO - codeparrot_training - Step 208: {'lr': 5.2e-05, 'samples': 107008, 'steps': 208, 'loss/train': 7.160046100616455} +03/03/2022 13:08:52 - INFO - codeparrot_training - Step 209: {'lr': 5.2249999999999996e-05, 'samples': 107520, 'steps': 209, 'loss/train': 8.04822063446045} +03/03/2022 13:08:52 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 13:08:57 - INFO - codeparrot_training - Step 210: {'lr': 5.25e-05, 'samples': 108032, 'steps': 210, 'loss/train': 7.374578475952148} +03/03/2022 13:09:00 - INFO - codeparrot_training - Step 211: {'lr': 5.275e-05, 'samples': 108544, 'steps': 211, 'loss/train': 7.502261638641357} +03/03/2022 13:09:00 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/03/2022 13:09:05 - INFO - codeparrot_training - Step 212: {'lr': 5.3e-05, 'samples': 109056, 'steps': 212, 'loss/train': 7.538562297821045} +03/03/2022 13:09:09 - INFO - codeparrot_training - Step 213: {'lr': 5.325e-05, 'samples': 109568, 'steps': 213, 'loss/train': 8.062139511108398} +03/03/2022 13:09:09 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/03/2022 13:09:14 - INFO - codeparrot_training - Step 214: {'lr': 5.35e-05, 'samples': 110080, 'steps': 214, 'loss/train': 7.754930019378662} +03/03/2022 13:09:17 - INFO - codeparrot_training - Step 215: {'lr': 5.375e-05, 'samples': 110592, 'steps': 215, 'loss/train': 7.983520030975342} +03/03/2022 13:09:17 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/03/2022 13:09:22 - INFO - codeparrot_training - Step 216: {'lr': 5.4e-05, 'samples': 111104, 'steps': 216, 'loss/train': 7.068185329437256} +03/03/2022 13:09:25 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 13:09:28 - INFO - codeparrot_training - Step 217: {'lr': 5.4250000000000004e-05, 'samples': 111616, 'steps': 217, 'loss/train': 7.8822760581970215} +03/03/2022 13:09:31 - INFO - codeparrot_training - Step 218: {'lr': 5.45e-05, 'samples': 112128, 'steps': 218, 'loss/train': 7.853682518005371} +03/03/2022 13:09:34 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/03/2022 13:09:36 - INFO - codeparrot_training - Step 219: {'lr': 5.475e-05, 'samples': 112640, 'steps': 219, 'loss/train': 7.099795818328857} +03/03/2022 13:09:39 - INFO - codeparrot_training - Step 220: {'lr': 5.5e-05, 'samples': 113152, 'steps': 220, 'loss/train': 7.336885929107666} +03/03/2022 13:09:42 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/03/2022 13:09:45 - INFO - codeparrot_training - Step 221: {'lr': 5.525e-05, 'samples': 113664, 'steps': 221, 'loss/train': 7.086932182312012} +03/03/2022 13:09:48 - INFO - codeparrot_training - Step 222: {'lr': 5.55e-05, 'samples': 114176, 'steps': 222, 'loss/train': 7.5683746337890625} +03/03/2022 13:09:51 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/03/2022 13:09:53 - INFO - codeparrot_training - Step 223: {'lr': 5.575e-05, 'samples': 114688, 'steps': 223, 'loss/train': 6.692105770111084} +03/03/2022 13:09:56 - INFO - codeparrot_training - Step 224: {'lr': 5.6e-05, 'samples': 115200, 'steps': 224, 'loss/train': 7.558428764343262} +03/03/2022 13:09:59 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 13:10:02 - INFO - codeparrot_training - Step 225: {'lr': 5.6250000000000005e-05, 'samples': 115712, 'steps': 225, 'loss/train': 7.024529457092285} +03/03/2022 13:10:05 - INFO - codeparrot_training - Step 226: {'lr': 5.6500000000000005e-05, 'samples': 116224, 'steps': 226, 'loss/train': 7.132439613342285} +03/03/2022 13:10:07 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/03/2022 13:10:10 - INFO - codeparrot_training - Step 227: {'lr': 5.6750000000000004e-05, 'samples': 116736, 'steps': 227, 'loss/train': 7.505816459655762} +03/03/2022 13:10:13 - INFO - codeparrot_training - Step 228: {'lr': 5.7e-05, 'samples': 117248, 'steps': 228, 'loss/train': 7.923880100250244} +03/03/2022 13:10:16 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 13:10:19 - INFO - codeparrot_training - Step 229: {'lr': 5.725e-05, 'samples': 117760, 'steps': 229, 'loss/train': 8.218132019042969} +03/03/2022 13:10:22 - INFO - codeparrot_training - Step 230: {'lr': 5.75e-05, 'samples': 118272, 'steps': 230, 'loss/train': 6.376003742218018} +03/03/2022 13:10:24 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/03/2022 13:10:28 - INFO - codeparrot_training - Step 231: {'lr': 5.775e-05, 'samples': 118784, 'steps': 231, 'loss/train': 7.3082990646362305} +03/03/2022 13:10:31 - INFO - codeparrot_training - Step 232: {'lr': 5.800000000000001e-05, 'samples': 119296, 'steps': 232, 'loss/train': 7.335635662078857} +03/03/2022 13:10:34 - INFO - codeparrot_training - Step 233: {'lr': 5.8250000000000006e-05, 'samples': 119808, 'steps': 233, 'loss/train': 6.83720064163208} +03/03/2022 13:10:34 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 13:10:40 - INFO - codeparrot_training - Step 234: {'lr': 5.8500000000000006e-05, 'samples': 120320, 'steps': 234, 'loss/train': 6.864739894866943} +03/03/2022 13:10:43 - INFO - codeparrot_training - Step 235: {'lr': 5.875e-05, 'samples': 120832, 'steps': 235, 'loss/train': 8.335467338562012} +03/03/2022 13:10:43 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 13:10:48 - INFO - codeparrot_training - Step 236: {'lr': 5.9e-05, 'samples': 121344, 'steps': 236, 'loss/train': 6.926678657531738} +03/03/2022 13:10:51 - INFO - codeparrot_training - Step 237: {'lr': 5.925e-05, 'samples': 121856, 'steps': 237, 'loss/train': 6.76780891418457} +03/03/2022 13:10:51 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/03/2022 13:10:56 - INFO - codeparrot_training - Step 238: {'lr': 5.9499999999999996e-05, 'samples': 122368, 'steps': 238, 'loss/train': 7.604316711425781} +03/03/2022 13:11:00 - INFO - codeparrot_training - Step 239: {'lr': 5.9749999999999995e-05, 'samples': 122880, 'steps': 239, 'loss/train': 7.477373123168945} +03/03/2022 13:11:00 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/03/2022 13:11:05 - INFO - codeparrot_training - Step 240: {'lr': 6e-05, 'samples': 123392, 'steps': 240, 'loss/train': 6.324827671051025} +03/03/2022 13:11:08 - INFO - codeparrot_training - Step 241: {'lr': 6.025e-05, 'samples': 123904, 'steps': 241, 'loss/train': 7.703766822814941} +03/03/2022 13:11:08 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/03/2022 13:11:13 - INFO - codeparrot_training - Step 242: {'lr': 6.05e-05, 'samples': 124416, 'steps': 242, 'loss/train': 7.568194389343262} +03/03/2022 13:11:16 - INFO - codeparrot_training - Step 243: {'lr': 6.075e-05, 'samples': 124928, 'steps': 243, 'loss/train': 7.032049655914307} +03/03/2022 13:11:17 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/03/2022 13:11:22 - INFO - codeparrot_training - Step 244: {'lr': 6.1e-05, 'samples': 125440, 'steps': 244, 'loss/train': 7.975718021392822} +03/03/2022 13:11:25 - INFO - codeparrot_training - Step 245: {'lr': 6.125e-05, 'samples': 125952, 'steps': 245, 'loss/train': 9.234091758728027} +03/03/2022 13:11:26 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 13:11:30 - INFO - codeparrot_training - Step 246: {'lr': 6.15e-05, 'samples': 126464, 'steps': 246, 'loss/train': 7.30528450012207} +03/03/2022 13:11:33 - INFO - codeparrot_training - Step 247: {'lr': 6.175e-05, 'samples': 126976, 'steps': 247, 'loss/train': 7.250826835632324} +03/03/2022 13:11:34 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/03/2022 13:11:39 - INFO - codeparrot_training - Step 248: {'lr': 6.2e-05, 'samples': 127488, 'steps': 248, 'loss/train': 7.167181968688965} +03/03/2022 13:11:42 - INFO - codeparrot_training - Step 249: {'lr': 6.225e-05, 'samples': 128000, 'steps': 249, 'loss/train': 7.492107391357422} +03/03/2022 13:11:42 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/03/2022 13:11:47 - INFO - codeparrot_training - Step 250: {'lr': 6.25e-05, 'samples': 128512, 'steps': 250, 'loss/train': 7.4001617431640625} +03/03/2022 13:11:50 - INFO - codeparrot_training - Step 251: {'lr': 6.275000000000001e-05, 'samples': 129024, 'steps': 251, 'loss/train': 7.7985920906066895} +03/03/2022 13:11:51 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/03/2022 13:11:56 - INFO - codeparrot_training - Step 252: {'lr': 6.3e-05, 'samples': 129536, 'steps': 252, 'loss/train': 7.012681007385254} +03/03/2022 13:11:59 - INFO - codeparrot_training - Step 253: {'lr': 6.325e-05, 'samples': 130048, 'steps': 253, 'loss/train': 7.037346839904785} +03/03/2022 13:11:59 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/03/2022 13:12:04 - INFO - codeparrot_training - Step 254: {'lr': 6.35e-05, 'samples': 130560, 'steps': 254, 'loss/train': 6.874361991882324} +03/03/2022 13:12:07 - INFO - codeparrot_training - Step 255: {'lr': 6.375e-05, 'samples': 131072, 'steps': 255, 'loss/train': 8.25483512878418} +03/03/2022 13:12:08 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/03/2022 13:12:12 - INFO - codeparrot_training - Step 256: {'lr': 6.4e-05, 'samples': 131584, 'steps': 256, 'loss/train': 7.541172981262207} +03/03/2022 13:12:16 - INFO - codeparrot_training - Step 257: {'lr': 6.425e-05, 'samples': 132096, 'steps': 257, 'loss/train': 7.457941055297852} +03/03/2022 13:12:16 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/03/2022 13:12:21 - INFO - codeparrot_training - Step 258: {'lr': 6.450000000000001e-05, 'samples': 132608, 'steps': 258, 'loss/train': 7.014778137207031} +03/03/2022 13:12:24 - INFO - codeparrot_training - Step 259: {'lr': 6.475e-05, 'samples': 133120, 'steps': 259, 'loss/train': 6.806115627288818} +03/03/2022 13:12:24 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/03/2022 13:12:29 - INFO - codeparrot_training - Step 260: {'lr': 6.500000000000001e-05, 'samples': 133632, 'steps': 260, 'loss/train': 7.267054557800293} +03/03/2022 13:12:32 - INFO - codeparrot_training - Step 261: {'lr': 6.525e-05, 'samples': 134144, 'steps': 261, 'loss/train': 6.713515758514404} +03/03/2022 13:12:33 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 13:12:38 - INFO - codeparrot_training - Step 262: {'lr': 6.55e-05, 'samples': 134656, 'steps': 262, 'loss/train': 7.091745853424072} +03/03/2022 13:12:41 - INFO - codeparrot_training - Step 263: {'lr': 6.575e-05, 'samples': 135168, 'steps': 263, 'loss/train': 7.467846393585205} +03/03/2022 13:12:41 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/03/2022 13:12:46 - INFO - codeparrot_training - Step 264: {'lr': 6.6e-05, 'samples': 135680, 'steps': 264, 'loss/train': 6.91502046585083} +03/03/2022 13:12:49 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 13:12:51 - INFO - codeparrot_training - Step 265: {'lr': 6.625000000000001e-05, 'samples': 136192, 'steps': 265, 'loss/train': 6.923452854156494} +03/03/2022 13:12:55 - INFO - codeparrot_training - Step 266: {'lr': 6.65e-05, 'samples': 136704, 'steps': 266, 'loss/train': 7.499436378479004} +03/03/2022 13:12:57 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/03/2022 13:13:00 - INFO - codeparrot_training - Step 267: {'lr': 6.675000000000001e-05, 'samples': 137216, 'steps': 267, 'loss/train': 7.343801975250244} +03/03/2022 13:13:03 - INFO - codeparrot_training - Step 268: {'lr': 6.7e-05, 'samples': 137728, 'steps': 268, 'loss/train': 7.012816905975342} +03/03/2022 13:13:06 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 13:13:08 - INFO - codeparrot_training - Step 269: {'lr': 6.725000000000001e-05, 'samples': 138240, 'steps': 269, 'loss/train': 7.881154537200928} +03/03/2022 13:13:12 - INFO - codeparrot_training - Step 270: {'lr': 6.75e-05, 'samples': 138752, 'steps': 270, 'loss/train': 7.508479118347168} +03/03/2022 13:13:14 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 13:13:17 - INFO - codeparrot_training - Step 271: {'lr': 6.775000000000001e-05, 'samples': 139264, 'steps': 271, 'loss/train': 6.856858730316162} +03/03/2022 13:13:20 - INFO - codeparrot_training - Step 272: {'lr': 6.800000000000001e-05, 'samples': 139776, 'steps': 272, 'loss/train': 7.2710041999816895} +03/03/2022 13:13:23 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 13:13:26 - INFO - codeparrot_training - Step 273: {'lr': 6.825e-05, 'samples': 140288, 'steps': 273, 'loss/train': 6.965377330780029} +03/03/2022 13:13:29 - INFO - codeparrot_training - Step 274: {'lr': 6.850000000000001e-05, 'samples': 140800, 'steps': 274, 'loss/train': 6.166491985321045} +03/03/2022 13:13:32 - INFO - codeparrot_training - Step 275: {'lr': 6.875e-05, 'samples': 141312, 'steps': 275, 'loss/train': 7.818729877471924} +03/03/2022 13:13:34 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/03/2022 13:13:38 - INFO - codeparrot_training - Step 276: {'lr': 6.900000000000001e-05, 'samples': 141824, 'steps': 276, 'loss/train': 6.974045753479004} +03/03/2022 13:13:41 - INFO - codeparrot_training - Step 277: {'lr': 6.925e-05, 'samples': 142336, 'steps': 277, 'loss/train': 6.823301792144775} +03/03/2022 13:13:43 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 13:13:46 - INFO - codeparrot_training - Step 278: {'lr': 6.950000000000001e-05, 'samples': 142848, 'steps': 278, 'loss/train': 7.374868392944336} +03/03/2022 13:13:50 - INFO - codeparrot_training - Step 279: {'lr': 6.975e-05, 'samples': 143360, 'steps': 279, 'loss/train': 7.211111068725586} +03/03/2022 13:13:51 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/03/2022 13:13:55 - INFO - codeparrot_training - Step 280: {'lr': 7.000000000000001e-05, 'samples': 143872, 'steps': 280, 'loss/train': 7.095859527587891} +03/03/2022 13:13:58 - INFO - codeparrot_training - Step 281: {'lr': 7.025000000000001e-05, 'samples': 144384, 'steps': 281, 'loss/train': 6.3933634757995605} +03/03/2022 13:14:00 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 13:14:04 - INFO - codeparrot_training - Step 282: {'lr': 7.049999999999999e-05, 'samples': 144896, 'steps': 282, 'loss/train': 8.225425720214844} +03/03/2022 13:14:07 - INFO - codeparrot_training - Step 283: {'lr': 7.075e-05, 'samples': 145408, 'steps': 283, 'loss/train': 6.789872646331787} +03/03/2022 13:14:08 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/03/2022 13:14:12 - INFO - codeparrot_training - Step 284: {'lr': 7.099999999999999e-05, 'samples': 145920, 'steps': 284, 'loss/train': 7.208834171295166} +03/03/2022 13:14:15 - INFO - codeparrot_training - Step 285: {'lr': 7.125e-05, 'samples': 146432, 'steps': 285, 'loss/train': 5.778796672821045} +03/03/2022 13:14:17 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 13:14:21 - INFO - codeparrot_training - Step 286: {'lr': 7.149999999999999e-05, 'samples': 146944, 'steps': 286, 'loss/train': 6.7907490730285645} +03/03/2022 13:14:24 - INFO - codeparrot_training - Step 287: {'lr': 7.175e-05, 'samples': 147456, 'steps': 287, 'loss/train': 6.667514801025391} +03/03/2022 13:14:25 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/03/2022 13:14:29 - INFO - codeparrot_training - Step 288: {'lr': 7.2e-05, 'samples': 147968, 'steps': 288, 'loss/train': 7.220056533813477} +03/03/2022 13:14:32 - INFO - codeparrot_training - Step 289: {'lr': 7.225e-05, 'samples': 148480, 'steps': 289, 'loss/train': 6.969030380249023} +03/03/2022 13:14:33 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 13:14:37 - INFO - codeparrot_training - Step 290: {'lr': 7.25e-05, 'samples': 148992, 'steps': 290, 'loss/train': 7.167079925537109} +03/03/2022 13:14:41 - INFO - codeparrot_training - Step 291: {'lr': 7.274999999999999e-05, 'samples': 149504, 'steps': 291, 'loss/train': 7.262547016143799} +03/03/2022 13:14:42 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/03/2022 13:14:46 - INFO - codeparrot_training - Step 292: {'lr': 7.3e-05, 'samples': 150016, 'steps': 292, 'loss/train': 7.280813694000244} +03/03/2022 13:14:49 - INFO - codeparrot_training - Step 293: {'lr': 7.324999999999999e-05, 'samples': 150528, 'steps': 293, 'loss/train': 6.34620475769043} +03/03/2022 13:14:50 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/03/2022 13:14:54 - INFO - codeparrot_training - Step 294: {'lr': 7.35e-05, 'samples': 151040, 'steps': 294, 'loss/train': 7.634270191192627} +03/03/2022 13:14:58 - INFO - codeparrot_training - Step 295: {'lr': 7.375e-05, 'samples': 151552, 'steps': 295, 'loss/train': 4.9720330238342285} +03/03/2022 13:15:00 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/03/2022 13:15:03 - INFO - codeparrot_training - Step 296: {'lr': 7.4e-05, 'samples': 152064, 'steps': 296, 'loss/train': 7.290252208709717} +03/03/2022 13:15:06 - INFO - codeparrot_training - Step 297: {'lr': 7.425e-05, 'samples': 152576, 'steps': 297, 'loss/train': 7.179178237915039} +03/03/2022 13:15:08 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/03/2022 13:15:11 - INFO - codeparrot_training - Step 298: {'lr': 7.45e-05, 'samples': 153088, 'steps': 298, 'loss/train': 8.177205085754395} +03/03/2022 13:15:14 - INFO - codeparrot_training - Step 299: {'lr': 7.475e-05, 'samples': 153600, 'steps': 299, 'loss/train': 7.102594375610352} +03/03/2022 13:15:16 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/03/2022 13:15:20 - INFO - codeparrot_training - Step 300: {'lr': 7.5e-05, 'samples': 154112, 'steps': 300, 'loss/train': 7.6582746505737305} +03/03/2022 13:15:23 - INFO - codeparrot_training - Step 301: {'lr': 7.525e-05, 'samples': 154624, 'steps': 301, 'loss/train': 3.4812848567962646} +03/03/2022 13:15:24 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 13:15:28 - INFO - codeparrot_training - Step 302: {'lr': 7.55e-05, 'samples': 155136, 'steps': 302, 'loss/train': 6.857351779937744} +03/03/2022 13:15:31 - INFO - codeparrot_training - Step 303: {'lr': 7.575e-05, 'samples': 155648, 'steps': 303, 'loss/train': 7.291689395904541} +03/03/2022 13:15:32 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/03/2022 13:15:37 - INFO - codeparrot_training - Step 304: {'lr': 7.6e-05, 'samples': 156160, 'steps': 304, 'loss/train': 7.73492431640625} +03/03/2022 13:15:40 - INFO - codeparrot_training - Step 305: {'lr': 7.625e-05, 'samples': 156672, 'steps': 305, 'loss/train': 7.737000942230225} +03/03/2022 13:15:41 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/03/2022 13:15:45 - INFO - codeparrot_training - Step 306: {'lr': 7.65e-05, 'samples': 157184, 'steps': 306, 'loss/train': 6.85874605178833} +03/03/2022 13:15:48 - INFO - codeparrot_training - Step 307: {'lr': 7.675e-05, 'samples': 157696, 'steps': 307, 'loss/train': 6.972065448760986} +03/03/2022 13:15:50 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/03/2022 13:15:54 - INFO - codeparrot_training - Step 308: {'lr': 7.7e-05, 'samples': 158208, 'steps': 308, 'loss/train': 7.307086944580078} +03/03/2022 13:15:57 - INFO - codeparrot_training - Step 309: {'lr': 7.725000000000001e-05, 'samples': 158720, 'steps': 309, 'loss/train': 7.4384050369262695} +03/03/2022 13:15:58 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 13:16:02 - INFO - codeparrot_training - Step 310: {'lr': 7.75e-05, 'samples': 159232, 'steps': 310, 'loss/train': 7.601604461669922} +03/03/2022 13:16:05 - INFO - codeparrot_training - Step 311: {'lr': 7.775e-05, 'samples': 159744, 'steps': 311, 'loss/train': 7.247878551483154} +03/03/2022 13:16:06 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/03/2022 13:16:11 - INFO - codeparrot_training - Step 312: {'lr': 7.8e-05, 'samples': 160256, 'steps': 312, 'loss/train': 6.91134786605835} +03/03/2022 13:16:14 - INFO - codeparrot_training - Step 313: {'lr': 7.825e-05, 'samples': 160768, 'steps': 313, 'loss/train': 7.499279499053955} +03/03/2022 13:16:14 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/03/2022 13:16:19 - INFO - codeparrot_training - Step 314: {'lr': 7.85e-05, 'samples': 161280, 'steps': 314, 'loss/train': 7.371830463409424} +03/03/2022 13:16:22 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 13:16:25 - INFO - codeparrot_training - Step 315: {'lr': 7.875e-05, 'samples': 161792, 'steps': 315, 'loss/train': 7.107333660125732} +03/03/2022 13:16:28 - INFO - codeparrot_training - Step 316: {'lr': 7.9e-05, 'samples': 162304, 'steps': 316, 'loss/train': 8.673299789428711} +03/03/2022 13:16:31 - INFO - codeparrot_training - Step 317: {'lr': 7.925e-05, 'samples': 162816, 'steps': 317, 'loss/train': 7.807727813720703} +03/03/2022 13:16:32 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/03/2022 13:16:36 - INFO - codeparrot_training - Step 318: {'lr': 7.950000000000001e-05, 'samples': 163328, 'steps': 318, 'loss/train': 7.118139266967773} +03/03/2022 13:16:40 - INFO - codeparrot_training - Step 319: {'lr': 7.975e-05, 'samples': 163840, 'steps': 319, 'loss/train': 7.224991321563721} +03/03/2022 13:16:41 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/03/2022 13:16:45 - INFO - codeparrot_training - Step 320: {'lr': 8e-05, 'samples': 164352, 'steps': 320, 'loss/train': 7.151127338409424} +03/03/2022 13:16:48 - INFO - codeparrot_training - Step 321: {'lr': 8.025e-05, 'samples': 164864, 'steps': 321, 'loss/train': 6.802467346191406} +03/03/2022 13:16:49 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 13:16:53 - INFO - codeparrot_training - Step 322: {'lr': 8.05e-05, 'samples': 165376, 'steps': 322, 'loss/train': 7.11800479888916} +03/03/2022 13:16:56 - INFO - codeparrot_training - Step 323: {'lr': 8.075e-05, 'samples': 165888, 'steps': 323, 'loss/train': 6.9256367683410645} +03/03/2022 13:16:57 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/03/2022 13:17:02 - INFO - codeparrot_training - Step 324: {'lr': 8.1e-05, 'samples': 166400, 'steps': 324, 'loss/train': 5.947310924530029} +03/03/2022 13:17:05 - INFO - codeparrot_training - Step 325: {'lr': 8.125000000000001e-05, 'samples': 166912, 'steps': 325, 'loss/train': 7.745260238647461} +03/03/2022 13:17:07 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 13:17:10 - INFO - codeparrot_training - Step 326: {'lr': 8.15e-05, 'samples': 167424, 'steps': 326, 'loss/train': 6.477266311645508} +03/03/2022 13:17:13 - INFO - codeparrot_training - Step 327: {'lr': 8.175000000000001e-05, 'samples': 167936, 'steps': 327, 'loss/train': 7.103150844573975} +03/03/2022 13:17:15 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/03/2022 13:17:19 - INFO - codeparrot_training - Step 328: {'lr': 8.2e-05, 'samples': 168448, 'steps': 328, 'loss/train': 7.210000991821289} +03/03/2022 13:17:22 - INFO - codeparrot_training - Step 329: {'lr': 8.225000000000001e-05, 'samples': 168960, 'steps': 329, 'loss/train': 5.856969833374023} +03/03/2022 13:17:23 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/03/2022 13:17:27 - INFO - codeparrot_training - Step 330: {'lr': 8.25e-05, 'samples': 169472, 'steps': 330, 'loss/train': 7.112217426300049} +03/03/2022 13:17:30 - INFO - codeparrot_training - Step 331: {'lr': 8.275e-05, 'samples': 169984, 'steps': 331, 'loss/train': 7.2972517013549805} +03/03/2022 13:17:32 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 13:17:36 - INFO - codeparrot_training - Step 332: {'lr': 8.300000000000001e-05, 'samples': 170496, 'steps': 332, 'loss/train': 6.872681140899658} +03/03/2022 13:17:39 - INFO - codeparrot_training - Step 333: {'lr': 8.325e-05, 'samples': 171008, 'steps': 333, 'loss/train': 6.879507064819336} +03/03/2022 13:17:40 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/03/2022 13:17:44 - INFO - codeparrot_training - Step 334: {'lr': 8.350000000000001e-05, 'samples': 171520, 'steps': 334, 'loss/train': 7.25621223449707} +03/03/2022 13:17:47 - INFO - codeparrot_training - Step 335: {'lr': 8.375e-05, 'samples': 172032, 'steps': 335, 'loss/train': 6.667304515838623} +03/03/2022 13:17:48 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/03/2022 13:17:52 - INFO - codeparrot_training - Step 336: {'lr': 8.400000000000001e-05, 'samples': 172544, 'steps': 336, 'loss/train': 7.273214817047119} +03/03/2022 13:17:56 - INFO - codeparrot_training - Step 337: {'lr': 8.425e-05, 'samples': 173056, 'steps': 337, 'loss/train': 7.189443588256836} +03/03/2022 13:17:56 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/03/2022 13:18:01 - INFO - codeparrot_training - Step 338: {'lr': 8.450000000000001e-05, 'samples': 173568, 'steps': 338, 'loss/train': 6.605391025543213} +03/03/2022 13:18:04 - INFO - codeparrot_training - Step 339: {'lr': 8.475000000000001e-05, 'samples': 174080, 'steps': 339, 'loss/train': 6.908782005310059} +03/03/2022 13:18:05 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/03/2022 13:18:09 - INFO - codeparrot_training - Step 340: {'lr': 8.5e-05, 'samples': 174592, 'steps': 340, 'loss/train': 6.747285842895508} +03/03/2022 13:18:12 - INFO - codeparrot_training - Step 341: {'lr': 8.525000000000001e-05, 'samples': 175104, 'steps': 341, 'loss/train': 6.820441246032715} +03/03/2022 13:18:13 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/03/2022 13:18:18 - INFO - codeparrot_training - Step 342: {'lr': 8.55e-05, 'samples': 175616, 'steps': 342, 'loss/train': 7.335867404937744} +03/03/2022 13:18:21 - INFO - codeparrot_training - Step 343: {'lr': 8.575000000000001e-05, 'samples': 176128, 'steps': 343, 'loss/train': 6.804535388946533} +03/03/2022 13:18:22 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/03/2022 13:18:26 - INFO - codeparrot_training - Step 344: {'lr': 8.599999999999999e-05, 'samples': 176640, 'steps': 344, 'loss/train': 7.3406500816345215} +03/03/2022 13:18:29 - INFO - codeparrot_training - Step 345: {'lr': 8.625e-05, 'samples': 177152, 'steps': 345, 'loss/train': 6.626857757568359} +03/03/2022 13:18:30 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/03/2022 13:18:35 - INFO - codeparrot_training - Step 346: {'lr': 8.65e-05, 'samples': 177664, 'steps': 346, 'loss/train': 6.793419361114502} +03/03/2022 13:18:38 - INFO - codeparrot_training - Step 347: {'lr': 8.675e-05, 'samples': 178176, 'steps': 347, 'loss/train': 6.759413242340088} +03/03/2022 13:18:38 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/03/2022 13:18:43 - INFO - codeparrot_training - Step 348: {'lr': 8.7e-05, 'samples': 178688, 'steps': 348, 'loss/train': 6.160068035125732} +03/03/2022 13:18:46 - INFO - codeparrot_training - Step 349: {'lr': 8.724999999999999e-05, 'samples': 179200, 'steps': 349, 'loss/train': 7.11178731918335} +03/03/2022 13:18:46 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 13:18:51 - INFO - codeparrot_training - Step 350: {'lr': 8.75e-05, 'samples': 179712, 'steps': 350, 'loss/train': 6.960474014282227} +03/03/2022 13:18:54 - INFO - codeparrot_training - Step 351: {'lr': 8.774999999999999e-05, 'samples': 180224, 'steps': 351, 'loss/train': 7.643229961395264} +03/03/2022 13:18:54 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 13:19:00 - INFO - codeparrot_training - Step 352: {'lr': 8.8e-05, 'samples': 180736, 'steps': 352, 'loss/train': 6.8332319259643555} +03/03/2022 13:19:03 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/03/2022 13:19:05 - INFO - codeparrot_training - Step 353: {'lr': 8.824999999999999e-05, 'samples': 181248, 'steps': 353, 'loss/train': 6.829087734222412} +03/03/2022 13:19:08 - INFO - codeparrot_training - Step 354: {'lr': 8.85e-05, 'samples': 181760, 'steps': 354, 'loss/train': 6.977699279785156} +03/03/2022 13:19:11 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 13:19:13 - INFO - codeparrot_training - Step 355: {'lr': 8.875e-05, 'samples': 182272, 'steps': 355, 'loss/train': 7.036928653717041} +03/03/2022 13:19:17 - INFO - codeparrot_training - Step 356: {'lr': 8.9e-05, 'samples': 182784, 'steps': 356, 'loss/train': 6.960824489593506} +03/03/2022 13:19:19 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/03/2022 13:19:22 - INFO - codeparrot_training - Step 357: {'lr': 8.925e-05, 'samples': 183296, 'steps': 357, 'loss/train': 6.745418071746826} +03/03/2022 13:19:25 - INFO - codeparrot_training - Step 358: {'lr': 8.95e-05, 'samples': 183808, 'steps': 358, 'loss/train': 6.919394493103027} +03/03/2022 13:19:28 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/03/2022 13:19:30 - INFO - codeparrot_training - Step 359: {'lr': 8.975e-05, 'samples': 184320, 'steps': 359, 'loss/train': 6.71176815032959} +03/03/2022 13:19:33 - INFO - codeparrot_training - Step 360: {'lr': 8.999999999999999e-05, 'samples': 184832, 'steps': 360, 'loss/train': 6.548521518707275} +03/03/2022 13:19:36 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/03/2022 13:19:39 - INFO - codeparrot_training - Step 361: {'lr': 9.025e-05, 'samples': 185344, 'steps': 361, 'loss/train': 7.599088668823242} +03/03/2022 13:19:42 - INFO - codeparrot_training - Step 362: {'lr': 9.05e-05, 'samples': 185856, 'steps': 362, 'loss/train': 6.143686294555664} +03/03/2022 13:19:44 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/03/2022 13:19:47 - INFO - codeparrot_training - Step 363: {'lr': 9.075e-05, 'samples': 186368, 'steps': 363, 'loss/train': 7.022943019866943} +03/03/2022 13:19:50 - INFO - codeparrot_training - Step 364: {'lr': 9.1e-05, 'samples': 186880, 'steps': 364, 'loss/train': 6.801090240478516} +03/03/2022 13:19:52 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 13:19:55 - INFO - codeparrot_training - Step 365: {'lr': 9.125e-05, 'samples': 187392, 'steps': 365, 'loss/train': 6.444828510284424} +03/03/2022 13:19:59 - INFO - codeparrot_training - Step 366: {'lr': 9.15e-05, 'samples': 187904, 'steps': 366, 'loss/train': 5.071739673614502} +03/03/2022 13:20:01 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/03/2022 13:20:04 - INFO - codeparrot_training - Step 367: {'lr': 9.175e-05, 'samples': 188416, 'steps': 367, 'loss/train': 7.660885334014893} +03/03/2022 13:20:07 - INFO - codeparrot_training - Step 368: {'lr': 9.2e-05, 'samples': 188928, 'steps': 368, 'loss/train': 6.027246952056885} +03/03/2022 13:20:10 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 13:21:37 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +03/03/2022 13:21:38 - WARNING - huggingface_hub.repository - Revision `glowing-puddle-3` does not exist. Created and checked out branch `glowing-puddle-3`. +03/03/2022 13:21:38 - WARNING - huggingface_hub.repository - +03/03/2022 13:21:49 - INFO - datasets.data_files - Some files matched the pattern '/home/nathan/codeparrot-clean-train/**' at /home/nathan/codeparrot-clean-train but don't have valid data file extensions: [PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/96/1796f12729d0407cc57500c9c87959e0e7becd729f37374702868ed8765015f4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2e/aa/2eaa21b832ed1496fb7f0b259666dbfc36ed483d81494d1e8705f9d601509c12'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/7c/0e/7c0ef87edb0e556939282c859c7c893a91b5b0f931394ca4cca4f4ec98a61951'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/ce/b6ce495492aedfc91b66efdfd214b2dfe44867c719d51590e1868e42f4e9b6dd'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5d/42/5d42ba9f195510757a3699005a7c43ddede4b598caf8a5f2f8c84d1125fa6324'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cd/33/cd339656799518495d23aedf1503459be6d3086e22672e80edab8403d12ded1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/36/ac36d12d37c1dc8ee8d3b8f0eae93966ae73482ef725615bb1a715802ddd4dd4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/packed-refs'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/67/f1/67f1ff0d590fbf4aa9afa161c290fe9be17538d4b723278bb21fd6408b0e6a3e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/90/a5/90a573501de640c3e0e6f1b3508306febc96faf6061bb33c67894c168a1879c6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2f/62/2f628d890bceee216f87edb3c45d2e384ee2501ce41a4c4169efaa3363bef1d2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/prepare-commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/index'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.pack'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/74/31/7431977a8e3a6eb0348b821009495f85d9373c1f730f4a74b0db43326568f77d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/5e/175e7375d6f65993071aa653bdd4e8b117cc02d1d2353cd7bcdbaaf7fe8b3c9c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d0/02/d0024828eece6d4d1c25cb4e539328be97fa28ce66a3b8d2374a117711cfd520'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/15/ac/15ac016e4cd702bb184457cbf5674d71b632fc34c29611ba4de549b85c67acfb'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/32/be/32beb30e381ff02fb71854b5534306f395ef00f51f02b62da1f027c8c7fab26f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9f/7e/9f7e18a3980d4b3d5ed9469ab7a2d67b608e8aa6fff38d876f86719c8f2a7a82'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ee/c1/eec1a9546aac0444a706c09f6aab67cd64403940657417e30212b7ff1e16665c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/e3/ace3ac440b380d604ab198cf8e838a2a375e7b0a6b5699ec74a8c79648f4bab8'), PosixPath('/home/nathan/codeparrot-clean-train/.git/description'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-merge-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/60/41/604177fe5560efd99d93091fadab6293afe7cd7d12f81638c301de1c937c1583'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cc/58/cc58b22515c4fd7d891287ee717c2054290b20c17b1c34693fd8964ab730687b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/56/80/56803c607a19ccb576c90bdb10a02cfa7b3affc67dd150fa41b00cc22213b174'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/14/08/14089cad26037080ee900bede2fd42d5cac70738b2e77402b36681e1d2a521f6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/a7/f1a7a250e1f6164a7fb602131ff54b69deb305258792f2358075403769d58fe5'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9b/1b/9b1b8e52b9262f03f1719d3950dc8dfa2b9719dc2e273603023f6f329c1b2068'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ef/e1/efe1759837b74b5b5ed3df1a09d4c880f9ad20413d958f79d35bf1cb6a2a09d4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/54/60/5460223b92bb118814a7777a939f4005b7426a7e4a068c193c10d1b86eeb862b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0b/f3/0bf3cd1320065c163f47a112458dc107650e3e862094b703b76073bd0b68663d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/12/8d/128d56e09d9d741b2778d733e595838a50a5e82fdc9adbb0aa8645457716b97e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/info/exclude'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-checkout'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b4/83/b4836655e350f0796acd2b1a206e657c2808d9f136afae095e0b94a790c704e1'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/a4/6b/a46b5c08d39691524b46fadf78eab5efefa29978edfee799ec3587d928dc1302'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/73/73/737327c2b47693e00050aa3410c5eb402c66211a79740ab57f1c763a1e557563'), PosixPath('/home/nathan/codeparrot-clean-train/.git/config'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d4/9f/d49f1929644619c39cff677367ff2e18223a8046ec8f61e224954a10aa2ccf8f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ae/45/ae45741df674456bc63bad91374d2ba5ef988d33d6e2a322ef0a5ac8af040371'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e6/48/e6484a578778beccab26c8549608ec13970e6bcdb9541cdccad20f4d984e8181'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/4e/39/4e392fcaae564652d234d07b4f71eeed90efe51b1b714831e39d77f3e537d3df'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.idx'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-commit'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/fsmonitor-watchman.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5f/d1/5fd1bb56db810b65d1fd3866dc43d9c7b690c8f52b9ca8119b2a5f4c49d13eec'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-applypatch.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2a/7e/2a7e50bbdb90d6c4cec534c3f1dc7ec0e6a0dada15c07cfd94615940c632ce02'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-rebase.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/c9/55c9c0b2f26de96e0311ee43e8eaa78ad1af387d0c59a26f22c5ebd507dda321'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/8a/b68a74f9784402dcb311f4db72a873035e47b98b185a1813ab2c1645cb7255a2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e7/a9/e7a9ccbfe6bd92476f83eba205c47ed23732ace4c1bd7458d76d666ebbba3b1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/dc/ac/dcacb03d8f43f7879c5eab4422644d7b3797b47dbb0c9c84d88cbc85822d8306'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5a/5f/5a5fbc19e0e76787f668ada7235203c10b0cbcdea0ecf8f873f8ec281cfe3494'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fa/e6/fae6b44a24c1c35f15053a19a6b2b2af5cc9fb8bdaf0da409068a2a1f333f28e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/df/08/df0840d1657530c8fa9f82864be5999c515f54341d926c430a82528a6bb83740'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/50/38/503872def2ac44733fbefc2602ab16224caca0896aa1eba045025ef2d60efcdc'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-receive.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-merge'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/62/f162b06b5dca01aa85ef9a675d396c0fbab1d009b5bee1c5b7ea6b415c6f12a4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0f/7a/0f7a67cd83c1c069995f0f2510ebf818dcc71d9658f189de1231d2b7aac8883c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/86/0e/860eda34e90456533e9dd41a5c0fdb74c54dc8d9cf43d6c60b887b2c858be831'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/05/39/053944e1daead0b6de8e46ea2e0bc68b9247604c63a55d444ac3b9adb12e2cd2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/b6/55b6989a41ae296337356153e6081c61484d0b6734b6905683823e7317d01c42'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fb/84/fb84ca8000808f62718994e4b44e79d88a05b345e9638d9f6cf6c8a5472da01f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/37/26/3726a0239b5cb7d0ef3ea36886c533d0becc7404217763015559edb546d53c94'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/applypatch-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/3e/f2/3ef240d0b394384803ae1bbe3b30974e11eb9b1b6ad4f49afc2ed0f7c9eae0d6')] +03/03/2022 13:21:49 - WARNING - datasets.builder - Using custom data configuration codeparrot-clean-train-86fef7ac9fb06b05 +03/03/2022 13:21:50 - INFO - datasets.data_files - Some files matched the pattern '/home/nathan/codeparrot-clean-train/**' at /home/nathan/codeparrot-clean-train but don't have valid data file extensions: [PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/96/1796f12729d0407cc57500c9c87959e0e7becd729f37374702868ed8765015f4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2e/aa/2eaa21b832ed1496fb7f0b259666dbfc36ed483d81494d1e8705f9d601509c12'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/7c/0e/7c0ef87edb0e556939282c859c7c893a91b5b0f931394ca4cca4f4ec98a61951'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/ce/b6ce495492aedfc91b66efdfd214b2dfe44867c719d51590e1868e42f4e9b6dd'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5d/42/5d42ba9f195510757a3699005a7c43ddede4b598caf8a5f2f8c84d1125fa6324'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cd/33/cd339656799518495d23aedf1503459be6d3086e22672e80edab8403d12ded1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/36/ac36d12d37c1dc8ee8d3b8f0eae93966ae73482ef725615bb1a715802ddd4dd4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/packed-refs'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/67/f1/67f1ff0d590fbf4aa9afa161c290fe9be17538d4b723278bb21fd6408b0e6a3e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/90/a5/90a573501de640c3e0e6f1b3508306febc96faf6061bb33c67894c168a1879c6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2f/62/2f628d890bceee216f87edb3c45d2e384ee2501ce41a4c4169efaa3363bef1d2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/prepare-commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/index'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.pack'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/74/31/7431977a8e3a6eb0348b821009495f85d9373c1f730f4a74b0db43326568f77d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/17/5e/175e7375d6f65993071aa653bdd4e8b117cc02d1d2353cd7bcdbaaf7fe8b3c9c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d0/02/d0024828eece6d4d1c25cb4e539328be97fa28ce66a3b8d2374a117711cfd520'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/15/ac/15ac016e4cd702bb184457cbf5674d71b632fc34c29611ba4de549b85c67acfb'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/32/be/32beb30e381ff02fb71854b5534306f395ef00f51f02b62da1f027c8c7fab26f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9f/7e/9f7e18a3980d4b3d5ed9469ab7a2d67b608e8aa6fff38d876f86719c8f2a7a82'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ee/c1/eec1a9546aac0444a706c09f6aab67cd64403940657417e30212b7ff1e16665c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ac/e3/ace3ac440b380d604ab198cf8e838a2a375e7b0a6b5699ec74a8c79648f4bab8'), PosixPath('/home/nathan/codeparrot-clean-train/.git/description'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-merge-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/60/41/604177fe5560efd99d93091fadab6293afe7cd7d12f81638c301de1c937c1583'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/cc/58/cc58b22515c4fd7d891287ee717c2054290b20c17b1c34693fd8964ab730687b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/commit-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/56/80/56803c607a19ccb576c90bdb10a02cfa7b3affc67dd150fa41b00cc22213b174'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/14/08/14089cad26037080ee900bede2fd42d5cac70738b2e77402b36681e1d2a521f6'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/a7/f1a7a250e1f6164a7fb602131ff54b69deb305258792f2358075403769d58fe5'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/9b/1b/9b1b8e52b9262f03f1719d3950dc8dfa2b9719dc2e273603023f6f329c1b2068'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ef/e1/efe1759837b74b5b5ed3df1a09d4c880f9ad20413d958f79d35bf1cb6a2a09d4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/54/60/5460223b92bb118814a7777a939f4005b7426a7e4a068c193c10d1b86eeb862b'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0b/f3/0bf3cd1320065c163f47a112458dc107650e3e862094b703b76073bd0b68663d'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/12/8d/128d56e09d9d741b2778d733e595838a50a5e82fdc9adbb0aa8645457716b97e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/info/exclude'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-checkout'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b4/83/b4836655e350f0796acd2b1a206e657c2808d9f136afae095e0b94a790c704e1'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/a4/6b/a46b5c08d39691524b46fadf78eab5efefa29978edfee799ec3587d928dc1302'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/73/73/737327c2b47693e00050aa3410c5eb402c66211a79740ab57f1c763a1e557563'), PosixPath('/home/nathan/codeparrot-clean-train/.git/config'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/d4/9f/d49f1929644619c39cff677367ff2e18223a8046ec8f61e224954a10aa2ccf8f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-push.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/ae/45/ae45741df674456bc63bad91374d2ba5ef988d33d6e2a322ef0a5ac8af040371'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e6/48/e6484a578778beccab26c8549608ec13970e6bcdb9541cdccad20f4d984e8181'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/4e/39/4e392fcaae564652d234d07b4f71eeed90efe51b1b714831e39d77f3e537d3df'), PosixPath('/home/nathan/codeparrot-clean-train/.git/objects/pack/pack-df5296c3b6443e668dc098dc5c59f854d9d79f0f.idx'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-commit'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/fsmonitor-watchman.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5f/d1/5fd1bb56db810b65d1fd3866dc43d9c7b690c8f52b9ca8119b2a5f4c49d13eec'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-applypatch.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/2a/7e/2a7e50bbdb90d6c4cec534c3f1dc7ec0e6a0dada15c07cfd94615940c632ce02'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-rebase.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/c9/55c9c0b2f26de96e0311ee43e8eaa78ad1af387d0c59a26f22c5ebd507dda321'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/b6/8a/b68a74f9784402dcb311f4db72a873035e47b98b185a1813ab2c1645cb7255a2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/e7/a9/e7a9ccbfe6bd92476f83eba205c47ed23732ace4c1bd7458d76d666ebbba3b1c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-commit.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-update.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/dc/ac/dcacb03d8f43f7879c5eab4422644d7b3797b47dbb0c9c84d88cbc85822d8306'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/5a/5f/5a5fbc19e0e76787f668ada7235203c10b0cbcdea0ecf8f873f8ec281cfe3494'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fa/e6/fae6b44a24c1c35f15053a19a6b2b2af5cc9fb8bdaf0da409068a2a1f333f28e'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/df/08/df0840d1657530c8fa9f82864be5999c515f54341d926c430a82528a6bb83740'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/50/38/503872def2ac44733fbefc2602ab16224caca0896aa1eba045025ef2d60efcdc'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/pre-receive.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/refs/remotes/origin/HEAD'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/post-merge'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/f1/62/f162b06b5dca01aa85ef9a675d396c0fbab1d009b5bee1c5b7ea6b415c6f12a4'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/0f/7a/0f7a67cd83c1c069995f0f2510ebf818dcc71d9658f189de1231d2b7aac8883c'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/86/0e/860eda34e90456533e9dd41a5c0fdb74c54dc8d9cf43d6c60b887b2c858be831'), PosixPath('/home/nathan/codeparrot-clean-train/.git/logs/refs/heads/main'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/05/39/053944e1daead0b6de8e46ea2e0bc68b9247604c63a55d444ac3b9adb12e2cd2'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/55/b6/55b6989a41ae296337356153e6081c61484d0b6734b6905683823e7317d01c42'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/fb/84/fb84ca8000808f62718994e4b44e79d88a05b345e9638d9f6cf6c8a5472da01f'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/37/26/3726a0239b5cb7d0ef3ea36886c533d0becc7404217763015559edb546d53c94'), PosixPath('/home/nathan/codeparrot-clean-train/.git/hooks/applypatch-msg.sample'), PosixPath('/home/nathan/codeparrot-clean-train/.git/lfs/objects/3e/f2/3ef240d0b394384803ae1bbe3b30974e11eb9b1b6ad4f49afc2ed0f7c9eae0d6')] +03/03/2022 13:22:11 - WARNING - datasets.builder - Using custom data configuration codeparrot-clean-train-86fef7ac9fb06b05 +03/03/2022 13:22:50 - INFO - codeparrot_training - Step 0: {'lr': 0.0, 'samples': 512, 'steps': 0, 'loss/train': 10.075563430786133} +03/03/2022 13:24:05 - INFO - codeparrot_training - Step 1: {'lr': 2.5e-07, 'samples': 1024, 'steps': 1, 'loss/train': 10.124848365783691} +03/03/2022 13:24:05 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 13:25:24 - INFO - codeparrot_training - Step 2: {'lr': 5e-07, 'samples': 1536, 'steps': 2, 'loss/train': 10.012473106384277} +03/03/2022 13:25:27 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/03/2022 13:25:29 - INFO - codeparrot_training - Step 3: {'lr': 7.5e-07, 'samples': 2048, 'steps': 3, 'loss/train': 10.121092796325684} +03/03/2022 13:25:33 - INFO - codeparrot_training - Step 4: {'lr': 1e-06, 'samples': 2560, 'steps': 4, 'loss/train': 10.095026016235352} +03/03/2022 13:25:35 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 13:25:38 - INFO - codeparrot_training - Step 5: {'lr': 1.25e-06, 'samples': 3072, 'steps': 5, 'loss/train': 9.843606948852539} +03/03/2022 13:25:41 - INFO - codeparrot_training - Step 6: {'lr': 1.5e-06, 'samples': 3584, 'steps': 6, 'loss/train': 10.02976131439209} +03/03/2022 13:25:44 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/03/2022 13:25:47 - INFO - codeparrot_training - Step 7: {'lr': 1.75e-06, 'samples': 4096, 'steps': 7, 'loss/train': 10.09312629699707} +03/03/2022 13:25:50 - INFO - codeparrot_training - Step 8: {'lr': 2e-06, 'samples': 4608, 'steps': 8, 'loss/train': 9.968323707580566} +03/03/2022 13:25:53 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/03/2022 13:25:55 - INFO - codeparrot_training - Step 9: {'lr': 2.25e-06, 'samples': 5120, 'steps': 9, 'loss/train': 9.982961654663086} +03/03/2022 13:25:58 - INFO - codeparrot_training - Step 10: {'lr': 2.5e-06, 'samples': 5632, 'steps': 10, 'loss/train': 9.881282806396484} +03/03/2022 13:26:01 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/03/2022 13:26:03 - INFO - codeparrot_training - Step 11: {'lr': 2.75e-06, 'samples': 6144, 'steps': 11, 'loss/train': 9.89371395111084} +03/03/2022 13:26:07 - INFO - codeparrot_training - Step 12: {'lr': 3e-06, 'samples': 6656, 'steps': 12, 'loss/train': 9.743671417236328} +03/03/2022 13:26:09 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/03/2022 13:26:12 - INFO - codeparrot_training - Step 13: {'lr': 3.25e-06, 'samples': 7168, 'steps': 13, 'loss/train': 9.71867847442627} +03/03/2022 13:26:15 - INFO - codeparrot_training - Step 14: {'lr': 3.5e-06, 'samples': 7680, 'steps': 14, 'loss/train': 9.706364631652832} +03/03/2022 13:26:19 - INFO - codeparrot_training - Step 15: {'lr': 3.75e-06, 'samples': 8192, 'steps': 15, 'loss/train': 9.625722885131836} +03/03/2022 13:26:19 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/03/2022 13:26:24 - INFO - codeparrot_training - Step 16: {'lr': 4e-06, 'samples': 8704, 'steps': 16, 'loss/train': 9.538063049316406} +03/03/2022 13:26:27 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/03/2022 13:26:29 - INFO - codeparrot_training - Step 17: {'lr': 4.250000000000001e-06, 'samples': 9216, 'steps': 17, 'loss/train': 9.547978401184082} +03/03/2022 13:26:32 - INFO - codeparrot_training - Step 18: {'lr': 4.5e-06, 'samples': 9728, 'steps': 18, 'loss/train': 9.613702774047852} +03/03/2022 13:26:35 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/03/2022 13:26:38 - INFO - codeparrot_training - Step 19: {'lr': 4.75e-06, 'samples': 10240, 'steps': 19, 'loss/train': 9.567453384399414} +03/03/2022 13:26:41 - INFO - codeparrot_training - Step 20: {'lr': 5e-06, 'samples': 10752, 'steps': 20, 'loss/train': 9.41970443725586} +03/03/2022 13:26:43 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/03/2022 13:26:46 - INFO - codeparrot_training - Step 21: {'lr': 5.2500000000000006e-06, 'samples': 11264, 'steps': 21, 'loss/train': 9.453089714050293} +03/03/2022 13:26:49 - INFO - codeparrot_training - Step 22: {'lr': 5.5e-06, 'samples': 11776, 'steps': 22, 'loss/train': 9.410444259643555} +03/03/2022 13:26:51 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/03/2022 13:26:55 - INFO - codeparrot_training - Step 23: {'lr': 5.75e-06, 'samples': 12288, 'steps': 23, 'loss/train': 8.90427303314209} +03/03/2022 13:26:58 - INFO - codeparrot_training - Step 24: {'lr': 6e-06, 'samples': 12800, 'steps': 24, 'loss/train': 9.217617988586426} +03/03/2022 13:27:00 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/03/2022 13:27:03 - INFO - codeparrot_training - Step 25: {'lr': 6.25e-06, 'samples': 13312, 'steps': 25, 'loss/train': 9.386109352111816} +03/03/2022 13:27:06 - INFO - codeparrot_training - Step 26: {'lr': 6.5e-06, 'samples': 13824, 'steps': 26, 'loss/train': 8.986451148986816} +03/03/2022 13:27:08 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/03/2022 13:27:12 - INFO - codeparrot_training - Step 27: {'lr': 6.75e-06, 'samples': 14336, 'steps': 27, 'loss/train': 9.02078914642334} +03/03/2022 13:27:15 - INFO - codeparrot_training - Step 28: {'lr': 7e-06, 'samples': 14848, 'steps': 28, 'loss/train': 9.070732116699219} +03/03/2022 13:27:16 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/03/2022 13:27:20 - INFO - codeparrot_training - Step 29: {'lr': 7.250000000000001e-06, 'samples': 15360, 'steps': 29, 'loss/train': 8.967691421508789} +03/03/2022 13:27:23 - INFO - codeparrot_training - Step 30: {'lr': 7.5e-06, 'samples': 15872, 'steps': 30, 'loss/train': 8.433162689208984} +03/03/2022 13:27:25 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/03/2022 13:27:29 - INFO - codeparrot_training - Step 31: {'lr': 7.75e-06, 'samples': 16384, 'steps': 31, 'loss/train': 9.051046371459961} +03/03/2022 13:27:32 - INFO - codeparrot_training - Step 32: {'lr': 8e-06, 'samples': 16896, 'steps': 32, 'loss/train': 8.816210746765137} +03/03/2022 13:27:33 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/03/2022 13:27:37 - INFO - codeparrot_training - Step 33: {'lr': 8.25e-06, 'samples': 17408, 'steps': 33, 'loss/train': 9.837918281555176} +03/03/2022 13:27:40 - INFO - codeparrot_training - Step 34: {'lr': 8.500000000000002e-06, 'samples': 17920, 'steps': 34, 'loss/train': 9.328680992126465} +03/03/2022 13:27:41 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 13:27:46 - INFO - codeparrot_training - Step 35: {'lr': 8.750000000000001e-06, 'samples': 18432, 'steps': 35, 'loss/train': 8.939859390258789} +03/03/2022 13:27:49 - INFO - codeparrot_training - Step 36: {'lr': 9e-06, 'samples': 18944, 'steps': 36, 'loss/train': 9.023159980773926} +03/03/2022 13:27:50 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/03/2022 13:27:54 - INFO - codeparrot_training - Step 37: {'lr': 9.25e-06, 'samples': 19456, 'steps': 37, 'loss/train': 9.151119232177734} +03/03/2022 13:27:57 - INFO - codeparrot_training - Step 38: {'lr': 9.5e-06, 'samples': 19968, 'steps': 38, 'loss/train': 8.467151641845703} +03/03/2022 13:27:58 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/03/2022 13:28:03 - INFO - codeparrot_training - Step 39: {'lr': 9.75e-06, 'samples': 20480, 'steps': 39, 'loss/train': 8.554976463317871} +03/03/2022 13:28:06 - INFO - codeparrot_training - Step 40: {'lr': 1e-05, 'samples': 20992, 'steps': 40, 'loss/train': 9.488030433654785} +03/03/2022 13:28:06 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/03/2022 13:28:11 - INFO - codeparrot_training - Step 41: {'lr': 1.025e-05, 'samples': 21504, 'steps': 41, 'loss/train': 9.343393325805664} +03/03/2022 13:28:14 - INFO - codeparrot_training - Step 42: {'lr': 1.0500000000000001e-05, 'samples': 22016, 'steps': 42, 'loss/train': 8.726397514343262} +03/03/2022 13:28:15 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/03/2022 13:28:20 - INFO - codeparrot_training - Step 43: {'lr': 1.0749999999999999e-05, 'samples': 22528, 'steps': 43, 'loss/train': 8.832099914550781} +03/03/2022 13:28:23 - INFO - codeparrot_training - Step 44: {'lr': 1.1e-05, 'samples': 23040, 'steps': 44, 'loss/train': 8.617071151733398} +03/03/2022 13:28:23 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 13:28:28 - INFO - codeparrot_training - Step 45: {'lr': 1.1249999999999999e-05, 'samples': 23552, 'steps': 45, 'loss/train': 8.621835708618164} +03/03/2022 13:28:31 - INFO - codeparrot_training - Step 46: {'lr': 1.15e-05, 'samples': 24064, 'steps': 46, 'loss/train': 8.607819557189941} +03/03/2022 13:28:31 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/03/2022 13:28:37 - INFO - codeparrot_training - Step 47: {'lr': 1.1750000000000001e-05, 'samples': 24576, 'steps': 47, 'loss/train': 8.41963005065918} +03/03/2022 13:28:39 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/03/2022 13:28:42 - INFO - codeparrot_training - Step 48: {'lr': 1.2e-05, 'samples': 25088, 'steps': 48, 'loss/train': 8.563586235046387} +03/03/2022 13:28:45 - INFO - codeparrot_training - Step 49: {'lr': 1.2250000000000001e-05, 'samples': 25600, 'steps': 49, 'loss/train': 9.081216812133789} +03/03/2022 13:28:48 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/03/2022 13:28:50 - INFO - codeparrot_training - Step 50: {'lr': 1.25e-05, 'samples': 26112, 'steps': 50, 'loss/train': 8.793917655944824} +03/03/2022 13:28:54 - INFO - codeparrot_training - Step 51: {'lr': 1.275e-05, 'samples': 26624, 'steps': 51, 'loss/train': 7.926756858825684} +03/03/2022 13:28:56 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/03/2022 13:28:59 - INFO - codeparrot_training - Step 52: {'lr': 1.3e-05, 'samples': 27136, 'steps': 52, 'loss/train': 8.878251075744629} +03/03/2022 13:29:02 - INFO - codeparrot_training - Step 53: {'lr': 1.325e-05, 'samples': 27648, 'steps': 53, 'loss/train': 8.69863224029541} +03/03/2022 13:29:04 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/03/2022 13:29:08 - INFO - codeparrot_training - Step 54: {'lr': 1.35e-05, 'samples': 28160, 'steps': 54, 'loss/train': 8.32911205291748} +03/03/2022 13:29:11 - INFO - codeparrot_training - Step 55: {'lr': 1.375e-05, 'samples': 28672, 'steps': 55, 'loss/train': 8.402761459350586} +03/03/2022 13:29:13 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/03/2022 13:29:16 - INFO - codeparrot_training - Step 56: {'lr': 1.4e-05, 'samples': 29184, 'steps': 56, 'loss/train': 8.484271049499512} +03/03/2022 13:29:19 - INFO - codeparrot_training - Step 57: {'lr': 1.425e-05, 'samples': 29696, 'steps': 57, 'loss/train': 8.7686767578125} +03/03/2022 13:29:22 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 13:29:25 - INFO - codeparrot_training - Step 58: {'lr': 1.4500000000000002e-05, 'samples': 30208, 'steps': 58, 'loss/train': 8.384221076965332} +03/03/2022 13:29:28 - INFO - codeparrot_training - Step 59: {'lr': 1.475e-05, 'samples': 30720, 'steps': 59, 'loss/train': 8.519675254821777} +03/03/2022 13:29:30 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/03/2022 13:29:33 - INFO - codeparrot_training - Step 60: {'lr': 1.5e-05, 'samples': 31232, 'steps': 60, 'loss/train': 8.939576148986816} +03/03/2022 13:29:36 - INFO - codeparrot_training - Step 61: {'lr': 1.525e-05, 'samples': 31744, 'steps': 61, 'loss/train': 8.751921653747559} +03/03/2022 13:29:39 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/03/2022 13:29:42 - INFO - codeparrot_training - Step 62: {'lr': 1.55e-05, 'samples': 32256, 'steps': 62, 'loss/train': 8.698548316955566} +03/03/2022 13:29:45 - INFO - codeparrot_training - Step 63: {'lr': 1.575e-05, 'samples': 32768, 'steps': 63, 'loss/train': 8.578275680541992} +03/03/2022 13:29:47 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/03/2022 13:29:50 - INFO - codeparrot_training - Step 64: {'lr': 1.6e-05, 'samples': 33280, 'steps': 64, 'loss/train': 8.510712623596191} +03/03/2022 13:29:53 - INFO - codeparrot_training - Step 65: {'lr': 1.6250000000000002e-05, 'samples': 33792, 'steps': 65, 'loss/train': 8.645843505859375} +03/03/2022 13:29:55 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/03/2022 13:29:59 - INFO - codeparrot_training - Step 66: {'lr': 1.65e-05, 'samples': 34304, 'steps': 66, 'loss/train': 8.519330024719238} +03/03/2022 13:30:02 - INFO - codeparrot_training - Step 67: {'lr': 1.675e-05, 'samples': 34816, 'steps': 67, 'loss/train': 8.555344581604004} +03/03/2022 13:30:04 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/03/2022 13:30:07 - INFO - codeparrot_training - Step 68: {'lr': 1.7000000000000003e-05, 'samples': 35328, 'steps': 68, 'loss/train': 8.772445678710938} +03/03/2022 13:30:10 - INFO - codeparrot_training - Step 69: {'lr': 1.7250000000000003e-05, 'samples': 35840, 'steps': 69, 'loss/train': 8.694655418395996} +03/03/2022 13:30:12 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 13:30:16 - INFO - codeparrot_training - Step 70: {'lr': 1.7500000000000002e-05, 'samples': 36352, 'steps': 70, 'loss/train': 8.430862426757812} +03/03/2022 13:30:19 - INFO - codeparrot_training - Step 71: {'lr': 1.7749999999999998e-05, 'samples': 36864, 'steps': 71, 'loss/train': 9.024465560913086} +03/03/2022 13:30:20 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/03/2022 13:30:24 - INFO - codeparrot_training - Step 72: {'lr': 1.8e-05, 'samples': 37376, 'steps': 72, 'loss/train': 8.457487106323242} +03/03/2022 13:30:27 - INFO - codeparrot_training - Step 73: {'lr': 1.825e-05, 'samples': 37888, 'steps': 73, 'loss/train': 8.562689781188965} +03/03/2022 13:30:28 - INFO - codeparrot_training - Skipping example with length 1019 (seq_length=1024) +03/03/2022 13:30:33 - INFO - codeparrot_training - Step 74: {'lr': 1.85e-05, 'samples': 38400, 'steps': 74, 'loss/train': 8.607612609863281} +03/03/2022 13:30:36 - INFO - codeparrot_training - Step 75: {'lr': 1.875e-05, 'samples': 38912, 'steps': 75, 'loss/train': 8.44783878326416} +03/03/2022 13:30:37 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/03/2022 13:30:41 - INFO - codeparrot_training - Step 76: {'lr': 1.9e-05, 'samples': 39424, 'steps': 76, 'loss/train': 9.018163681030273} +03/03/2022 13:30:44 - INFO - codeparrot_training - Step 77: {'lr': 1.925e-05, 'samples': 39936, 'steps': 77, 'loss/train': 8.568017959594727} +03/03/2022 13:30:45 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/03/2022 13:30:50 - INFO - codeparrot_training - Step 78: {'lr': 1.95e-05, 'samples': 40448, 'steps': 78, 'loss/train': 8.524364471435547} +03/03/2022 13:30:53 - INFO - codeparrot_training - Step 79: {'lr': 1.975e-05, 'samples': 40960, 'steps': 79, 'loss/train': 8.529118537902832} +03/03/2022 13:30:54 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/03/2022 13:30:58 - INFO - codeparrot_training - Step 80: {'lr': 2e-05, 'samples': 41472, 'steps': 80, 'loss/train': 8.654486656188965} +03/03/2022 13:31:01 - INFO - codeparrot_training - Step 81: {'lr': 2.025e-05, 'samples': 41984, 'steps': 81, 'loss/train': 8.813864707946777} +03/03/2022 13:31:02 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/03/2022 13:31:07 - INFO - codeparrot_training - Step 82: {'lr': 2.05e-05, 'samples': 42496, 'steps': 82, 'loss/train': 8.502435684204102} +03/03/2022 13:31:10 - INFO - codeparrot_training - Step 83: {'lr': 2.0750000000000003e-05, 'samples': 43008, 'steps': 83, 'loss/train': 9.020332336425781} +03/03/2022 13:31:10 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 13:31:15 - INFO - codeparrot_training - Step 84: {'lr': 2.1000000000000002e-05, 'samples': 43520, 'steps': 84, 'loss/train': 8.501923561096191} +03/03/2022 13:31:18 - INFO - codeparrot_training - Step 85: {'lr': 2.125e-05, 'samples': 44032, 'steps': 85, 'loss/train': 8.312204360961914} +03/03/2022 13:31:19 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/03/2022 13:31:24 - INFO - codeparrot_training - Step 86: {'lr': 2.1499999999999997e-05, 'samples': 44544, 'steps': 86, 'loss/train': 8.143906593322754} +03/03/2022 13:31:27 - INFO - codeparrot_training - Step 87: {'lr': 2.175e-05, 'samples': 45056, 'steps': 87, 'loss/train': 8.643278121948242} +03/03/2022 13:31:28 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/03/2022 13:31:32 - INFO - codeparrot_training - Step 88: {'lr': 2.2e-05, 'samples': 45568, 'steps': 88, 'loss/train': 8.471076011657715} +03/03/2022 13:31:36 - INFO - codeparrot_training - Step 89: {'lr': 2.225e-05, 'samples': 46080, 'steps': 89, 'loss/train': 8.585366249084473} +03/03/2022 13:31:36 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/03/2022 13:31:41 - INFO - codeparrot_training - Step 90: {'lr': 2.2499999999999998e-05, 'samples': 46592, 'steps': 90, 'loss/train': 8.756730079650879} +03/03/2022 13:31:44 - INFO - codeparrot_training - Step 91: {'lr': 2.275e-05, 'samples': 47104, 'steps': 91, 'loss/train': 8.959531784057617} +03/03/2022 13:31:45 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/03/2022 13:31:49 - INFO - codeparrot_training - Step 92: {'lr': 2.3e-05, 'samples': 47616, 'steps': 92, 'loss/train': 8.488385200500488} +03/03/2022 13:31:52 - INFO - codeparrot_training - Step 93: {'lr': 2.325e-05, 'samples': 48128, 'steps': 93, 'loss/train': 8.548238754272461} +03/03/2022 13:31:53 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/03/2022 13:31:58 - INFO - codeparrot_training - Step 94: {'lr': 2.3500000000000002e-05, 'samples': 48640, 'steps': 94, 'loss/train': 9.079781532287598} +03/03/2022 13:32:01 - INFO - codeparrot_training - Step 95: {'lr': 2.375e-05, 'samples': 49152, 'steps': 95, 'loss/train': 8.213057518005371} +03/03/2022 13:32:01 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/03/2022 13:32:06 - INFO - codeparrot_training - Step 96: {'lr': 2.4e-05, 'samples': 49664, 'steps': 96, 'loss/train': 9.193713188171387} +03/03/2022 13:32:09 - INFO - codeparrot_training - Step 97: {'lr': 2.425e-05, 'samples': 50176, 'steps': 97, 'loss/train': 8.648150444030762} +03/03/2022 13:32:10 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/03/2022 13:32:15 - INFO - codeparrot_training - Step 98: {'lr': 2.4500000000000003e-05, 'samples': 50688, 'steps': 98, 'loss/train': 8.951532363891602} +03/03/2022 13:32:18 - INFO - codeparrot_training - Step 99: {'lr': 2.4750000000000002e-05, 'samples': 51200, 'steps': 99, 'loss/train': 8.602143287658691} +03/03/2022 13:32:18 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/03/2022 13:32:23 - INFO - codeparrot_training - Step 100: {'lr': 2.5e-05, 'samples': 51712, 'steps': 100, 'loss/train': 8.64793586730957} +03/03/2022 13:32:26 - INFO - codeparrot_training - Step 101: {'lr': 2.525e-05, 'samples': 52224, 'steps': 101, 'loss/train': 8.7129545211792} +03/03/2022 13:32:26 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/03/2022 13:32:32 - INFO - codeparrot_training - Step 102: {'lr': 2.55e-05, 'samples': 52736, 'steps': 102, 'loss/train': 8.678922653198242} +03/03/2022 13:32:35 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/03/2022 13:32:37 - INFO - codeparrot_training - Step 103: {'lr': 2.575e-05, 'samples': 53248, 'steps': 103, 'loss/train': 8.083718299865723} +03/03/2022 13:32:40 - INFO - codeparrot_training - Step 104: {'lr': 2.6e-05, 'samples': 53760, 'steps': 104, 'loss/train': 8.35842227935791} +03/03/2022 13:32:43 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/03/2022 13:32:45 - INFO - codeparrot_training - Step 105: {'lr': 2.625e-05, 'samples': 54272, 'steps': 105, 'loss/train': 8.159714698791504} +03/03/2022 13:32:49 - INFO - codeparrot_training - Step 106: {'lr': 2.65e-05, 'samples': 54784, 'steps': 106, 'loss/train': 8.1396484375} +03/03/2022 13:32:51 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/03/2022 13:32:54 - INFO - codeparrot_training - Step 107: {'lr': 2.675e-05, 'samples': 55296, 'steps': 107, 'loss/train': 8.233939170837402} +03/03/2022 13:32:57 - INFO - codeparrot_training - Step 108: {'lr': 2.7e-05, 'samples': 55808, 'steps': 108, 'loss/train': 8.465450286865234} +03/03/2022 13:32:59 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/03/2022 13:33:02 - INFO - codeparrot_training - Step 109: {'lr': 2.725e-05, 'samples': 56320, 'steps': 109, 'loss/train': 8.154802322387695} +03/03/2022 13:33:06 - INFO - codeparrot_training - Step 110: {'lr': 2.75e-05, 'samples': 56832, 'steps': 110, 'loss/train': 8.17076301574707} +03/03/2022 13:33:08 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/03/2022 13:33:11 - INFO - codeparrot_training - Step 111: {'lr': 2.775e-05, 'samples': 57344, 'steps': 111, 'loss/train': 8.381338119506836} +03/03/2022 13:33:14 - INFO - codeparrot_training - Step 112: {'lr': 2.8e-05, 'samples': 57856, 'steps': 112, 'loss/train': 8.114055633544922} +03/03/2022 13:33:16 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 13:33:20 - INFO - codeparrot_training - Step 113: {'lr': 2.8250000000000002e-05, 'samples': 58368, 'steps': 113, 'loss/train': 7.933559894561768} +03/03/2022 13:33:23 - INFO - codeparrot_training - Step 114: {'lr': 2.85e-05, 'samples': 58880, 'steps': 114, 'loss/train': 9.193424224853516} +03/03/2022 13:33:25 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 13:33:28 - INFO - codeparrot_training - Step 115: {'lr': 2.875e-05, 'samples': 59392, 'steps': 115, 'loss/train': 7.855266094207764} +03/03/2022 13:33:31 - INFO - codeparrot_training - Step 116: {'lr': 2.9000000000000004e-05, 'samples': 59904, 'steps': 116, 'loss/train': 8.517616271972656} +03/03/2022 13:33:33 - INFO - codeparrot_training - Skipping example with length 822 (seq_length=1024) +03/03/2022 13:33:36 - INFO - codeparrot_training - Step 117: {'lr': 2.9250000000000003e-05, 'samples': 60416, 'steps': 117, 'loss/train': 8.036568641662598} +03/03/2022 13:33:40 - INFO - codeparrot_training - Step 118: {'lr': 2.95e-05, 'samples': 60928, 'steps': 118, 'loss/train': 7.926286220550537} +03/03/2022 13:33:41 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/03/2022 13:33:45 - INFO - codeparrot_training - Step 119: {'lr': 2.9749999999999998e-05, 'samples': 61440, 'steps': 119, 'loss/train': 8.040275573730469} +03/03/2022 13:33:48 - INFO - codeparrot_training - Step 120: {'lr': 3e-05, 'samples': 61952, 'steps': 120, 'loss/train': 6.9734649658203125} +03/03/2022 13:33:50 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/03/2022 13:33:53 - INFO - codeparrot_training - Step 121: {'lr': 3.025e-05, 'samples': 62464, 'steps': 121, 'loss/train': 8.224665641784668} +03/03/2022 13:33:57 - INFO - codeparrot_training - Step 122: {'lr': 3.05e-05, 'samples': 62976, 'steps': 122, 'loss/train': 8.240490913391113} +03/03/2022 13:33:58 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 13:34:02 - INFO - codeparrot_training - Step 123: {'lr': 3.075e-05, 'samples': 63488, 'steps': 123, 'loss/train': 8.030046463012695} +03/03/2022 13:34:05 - INFO - codeparrot_training - Step 124: {'lr': 3.1e-05, 'samples': 64000, 'steps': 124, 'loss/train': 7.976356029510498} +03/03/2022 13:34:06 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/03/2022 13:34:10 - INFO - codeparrot_training - Step 125: {'lr': 3.125e-05, 'samples': 64512, 'steps': 125, 'loss/train': 8.348257064819336} +03/03/2022 13:34:13 - INFO - codeparrot_training - Step 126: {'lr': 3.15e-05, 'samples': 65024, 'steps': 126, 'loss/train': 7.844707489013672} +03/03/2022 13:34:14 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/03/2022 13:34:19 - INFO - codeparrot_training - Step 127: {'lr': 3.175e-05, 'samples': 65536, 'steps': 127, 'loss/train': 8.167869567871094} +03/03/2022 13:34:22 - INFO - codeparrot_training - Step 128: {'lr': 3.2e-05, 'samples': 66048, 'steps': 128, 'loss/train': 7.930793762207031} +03/03/2022 13:34:22 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 13:34:27 - INFO - codeparrot_training - Step 129: {'lr': 3.2250000000000005e-05, 'samples': 66560, 'steps': 129, 'loss/train': 7.673683166503906} +03/03/2022 13:34:30 - INFO - codeparrot_training - Step 130: {'lr': 3.2500000000000004e-05, 'samples': 67072, 'steps': 130, 'loss/train': 8.17394733428955} +03/03/2022 13:34:31 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/03/2022 13:34:36 - INFO - codeparrot_training - Step 131: {'lr': 3.275e-05, 'samples': 67584, 'steps': 131, 'loss/train': 7.390617847442627} +03/03/2022 13:34:39 - INFO - codeparrot_training - Step 132: {'lr': 3.3e-05, 'samples': 68096, 'steps': 132, 'loss/train': 7.104628562927246} +03/03/2022 13:34:39 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/03/2022 13:34:44 - INFO - codeparrot_training - Step 133: {'lr': 3.325e-05, 'samples': 68608, 'steps': 133, 'loss/train': 8.078102111816406} +03/03/2022 13:34:47 - INFO - codeparrot_training - Step 134: {'lr': 3.35e-05, 'samples': 69120, 'steps': 134, 'loss/train': 7.884213447570801} +03/03/2022 13:34:47 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/03/2022 13:34:53 - INFO - codeparrot_training - Step 135: {'lr': 3.375e-05, 'samples': 69632, 'steps': 135, 'loss/train': 8.450540542602539} +03/03/2022 13:34:56 - INFO - codeparrot_training - Step 136: {'lr': 3.4000000000000007e-05, 'samples': 70144, 'steps': 136, 'loss/train': 8.404011726379395} +03/03/2022 13:34:56 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 13:35:01 - INFO - codeparrot_training - Step 137: {'lr': 3.4250000000000006e-05, 'samples': 70656, 'steps': 137, 'loss/train': 7.6241350173950195} +03/03/2022 13:35:04 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 13:35:06 - INFO - codeparrot_training - Step 138: {'lr': 3.4500000000000005e-05, 'samples': 71168, 'steps': 138, 'loss/train': 7.530354022979736} +03/03/2022 13:35:09 - INFO - codeparrot_training - Step 139: {'lr': 3.4750000000000004e-05, 'samples': 71680, 'steps': 139, 'loss/train': 8.152228355407715} +03/03/2022 13:35:12 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/03/2022 13:35:15 - INFO - codeparrot_training - Step 140: {'lr': 3.5000000000000004e-05, 'samples': 72192, 'steps': 140, 'loss/train': 8.318809509277344} +03/03/2022 13:35:18 - INFO - codeparrot_training - Step 141: {'lr': 3.5249999999999996e-05, 'samples': 72704, 'steps': 141, 'loss/train': 7.475725173950195} +03/03/2022 13:35:20 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 13:35:23 - INFO - codeparrot_training - Step 142: {'lr': 3.5499999999999996e-05, 'samples': 73216, 'steps': 142, 'loss/train': 7.176308631896973} +03/03/2022 13:35:26 - INFO - codeparrot_training - Step 143: {'lr': 3.5749999999999995e-05, 'samples': 73728, 'steps': 143, 'loss/train': 8.0440034866333} +03/03/2022 13:35:29 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 13:35:32 - INFO - codeparrot_training - Step 144: {'lr': 3.6e-05, 'samples': 74240, 'steps': 144, 'loss/train': 8.216455459594727} +03/03/2022 13:35:35 - INFO - codeparrot_training - Step 145: {'lr': 3.625e-05, 'samples': 74752, 'steps': 145, 'loss/train': 8.240697860717773} +03/03/2022 13:35:37 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 13:35:40 - INFO - codeparrot_training - Step 146: {'lr': 3.65e-05, 'samples': 75264, 'steps': 146, 'loss/train': 7.32492208480835} +03/03/2022 13:35:43 - INFO - codeparrot_training - Step 147: {'lr': 3.675e-05, 'samples': 75776, 'steps': 147, 'loss/train': 7.730316162109375} +03/03/2022 13:35:45 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 13:35:48 - INFO - codeparrot_training - Step 148: {'lr': 3.7e-05, 'samples': 76288, 'steps': 148, 'loss/train': 7.661489009857178} +03/03/2022 13:35:52 - INFO - codeparrot_training - Step 149: {'lr': 3.725e-05, 'samples': 76800, 'steps': 149, 'loss/train': 7.78841495513916} +03/03/2022 13:35:54 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/03/2022 13:35:57 - INFO - codeparrot_training - Step 150: {'lr': 3.75e-05, 'samples': 77312, 'steps': 150, 'loss/train': 7.619410037994385} +03/03/2022 13:36:00 - INFO - codeparrot_training - Step 151: {'lr': 3.775e-05, 'samples': 77824, 'steps': 151, 'loss/train': 8.464534759521484} +03/03/2022 13:36:02 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/03/2022 13:36:05 - INFO - codeparrot_training - Step 152: {'lr': 3.8e-05, 'samples': 78336, 'steps': 152, 'loss/train': 7.549778461456299} +03/03/2022 13:36:09 - INFO - codeparrot_training - Step 153: {'lr': 3.825e-05, 'samples': 78848, 'steps': 153, 'loss/train': 7.371613502502441} +03/03/2022 13:36:10 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/03/2022 13:36:14 - INFO - codeparrot_training - Step 154: {'lr': 3.85e-05, 'samples': 79360, 'steps': 154, 'loss/train': 7.702090740203857} +03/03/2022 13:36:17 - INFO - codeparrot_training - Step 155: {'lr': 3.875e-05, 'samples': 79872, 'steps': 155, 'loss/train': 7.848684787750244} +03/03/2022 13:36:19 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/03/2022 13:36:23 - INFO - codeparrot_training - Step 156: {'lr': 3.9e-05, 'samples': 80384, 'steps': 156, 'loss/train': 7.010176181793213} +03/03/2022 13:36:26 - INFO - codeparrot_training - Step 157: {'lr': 3.925e-05, 'samples': 80896, 'steps': 157, 'loss/train': 4.933742523193359} +03/03/2022 13:36:29 - INFO - codeparrot_training - Step 158: {'lr': 3.95e-05, 'samples': 81408, 'steps': 158, 'loss/train': 8.188973426818848} +03/03/2022 13:36:29 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 13:36:35 - INFO - codeparrot_training - Step 159: {'lr': 3.9750000000000004e-05, 'samples': 81920, 'steps': 159, 'loss/train': 8.148823738098145} +03/03/2022 13:36:38 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/03/2022 13:36:40 - INFO - codeparrot_training - Step 160: {'lr': 4e-05, 'samples': 82432, 'steps': 160, 'loss/train': 7.696084976196289} +03/03/2022 13:36:43 - INFO - codeparrot_training - Step 161: {'lr': 4.025e-05, 'samples': 82944, 'steps': 161, 'loss/train': 8.04732608795166} +03/03/2022 13:36:46 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 13:36:48 - INFO - codeparrot_training - Step 162: {'lr': 4.05e-05, 'samples': 83456, 'steps': 162, 'loss/train': 7.864019870758057} +03/03/2022 13:36:52 - INFO - codeparrot_training - Step 163: {'lr': 4.075e-05, 'samples': 83968, 'steps': 163, 'loss/train': 7.568187713623047} +03/03/2022 13:36:54 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/03/2022 13:36:57 - INFO - codeparrot_training - Step 164: {'lr': 4.1e-05, 'samples': 84480, 'steps': 164, 'loss/train': 7.4156365394592285} +03/03/2022 13:37:00 - INFO - codeparrot_training - Step 165: {'lr': 4.125e-05, 'samples': 84992, 'steps': 165, 'loss/train': 8.103707313537598} +03/03/2022 13:37:02 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/03/2022 13:37:05 - INFO - codeparrot_training - Step 166: {'lr': 4.1500000000000006e-05, 'samples': 85504, 'steps': 166, 'loss/train': 7.68804407119751} +03/03/2022 13:37:08 - INFO - codeparrot_training - Step 167: {'lr': 4.1750000000000005e-05, 'samples': 86016, 'steps': 167, 'loss/train': 7.974943161010742} +03/03/2022 13:37:10 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/03/2022 13:37:14 - INFO - codeparrot_training - Step 168: {'lr': 4.2000000000000004e-05, 'samples': 86528, 'steps': 168, 'loss/train': 5.833436012268066} +03/03/2022 13:37:17 - INFO - codeparrot_training - Step 169: {'lr': 4.2250000000000004e-05, 'samples': 87040, 'steps': 169, 'loss/train': 7.7950568199157715} +03/03/2022 13:37:19 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/03/2022 13:37:22 - INFO - codeparrot_training - Step 170: {'lr': 4.25e-05, 'samples': 87552, 'steps': 170, 'loss/train': 7.657891273498535} +03/03/2022 13:37:25 - INFO - codeparrot_training - Step 171: {'lr': 4.275e-05, 'samples': 88064, 'steps': 171, 'loss/train': 7.877186298370361} +03/03/2022 13:37:27 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/03/2022 13:37:31 - INFO - codeparrot_training - Step 172: {'lr': 4.2999999999999995e-05, 'samples': 88576, 'steps': 172, 'loss/train': 7.893576622009277} +03/03/2022 13:37:34 - INFO - codeparrot_training - Step 173: {'lr': 4.325e-05, 'samples': 89088, 'steps': 173, 'loss/train': 7.6018571853637695} +03/03/2022 13:37:35 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/03/2022 13:37:39 - INFO - codeparrot_training - Step 174: {'lr': 4.35e-05, 'samples': 89600, 'steps': 174, 'loss/train': 7.9183549880981445} +03/03/2022 13:37:42 - INFO - codeparrot_training - Step 175: {'lr': 4.375e-05, 'samples': 90112, 'steps': 175, 'loss/train': 7.391948699951172} +03/03/2022 13:37:43 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/03/2022 13:37:47 - INFO - codeparrot_training - Step 176: {'lr': 4.4e-05, 'samples': 90624, 'steps': 176, 'loss/train': 7.670172691345215} +03/03/2022 13:37:51 - INFO - codeparrot_training - Step 177: {'lr': 4.425e-05, 'samples': 91136, 'steps': 177, 'loss/train': 7.4225029945373535} +03/03/2022 13:37:52 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/03/2022 13:37:56 - INFO - codeparrot_training - Step 178: {'lr': 4.45e-05, 'samples': 91648, 'steps': 178, 'loss/train': 7.456734657287598} +03/03/2022 13:37:59 - INFO - codeparrot_training - Step 179: {'lr': 4.475e-05, 'samples': 92160, 'steps': 179, 'loss/train': 8.120655059814453} +03/03/2022 13:38:00 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/03/2022 13:38:04 - INFO - codeparrot_training - Step 180: {'lr': 4.4999999999999996e-05, 'samples': 92672, 'steps': 180, 'loss/train': 7.598281383514404} +03/03/2022 13:38:07 - INFO - codeparrot_training - Step 181: {'lr': 4.525e-05, 'samples': 93184, 'steps': 181, 'loss/train': 8.022689819335938} +03/03/2022 13:38:08 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/03/2022 13:38:13 - INFO - codeparrot_training - Step 182: {'lr': 4.55e-05, 'samples': 93696, 'steps': 182, 'loss/train': 7.102559566497803} +03/03/2022 13:38:16 - INFO - codeparrot_training - Step 183: {'lr': 4.575e-05, 'samples': 94208, 'steps': 183, 'loss/train': 5.537654876708984} +03/03/2022 13:38:18 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/03/2022 13:38:21 - INFO - codeparrot_training - Step 184: {'lr': 4.6e-05, 'samples': 94720, 'steps': 184, 'loss/train': 7.378385066986084} +03/03/2022 13:38:24 - INFO - codeparrot_training - Step 185: {'lr': 4.625e-05, 'samples': 95232, 'steps': 185, 'loss/train': 7.413166046142578} +03/03/2022 13:38:25 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 13:38:29 - INFO - codeparrot_training - Step 186: {'lr': 4.65e-05, 'samples': 95744, 'steps': 186, 'loss/train': 7.445736885070801} +03/03/2022 13:38:32 - INFO - codeparrot_training - Step 187: {'lr': 4.675e-05, 'samples': 96256, 'steps': 187, 'loss/train': 8.496750831604004} +03/03/2022 13:38:34 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/03/2022 13:38:38 - INFO - codeparrot_training - Step 188: {'lr': 4.7000000000000004e-05, 'samples': 96768, 'steps': 188, 'loss/train': 7.747251033782959} +03/03/2022 13:38:41 - INFO - codeparrot_training - Step 189: {'lr': 4.725e-05, 'samples': 97280, 'steps': 189, 'loss/train': 7.649470806121826} +03/03/2022 13:38:42 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/03/2022 13:38:46 - INFO - codeparrot_training - Step 190: {'lr': 4.75e-05, 'samples': 97792, 'steps': 190, 'loss/train': 7.644663333892822} +03/03/2022 13:38:49 - INFO - codeparrot_training - Step 191: {'lr': 4.775e-05, 'samples': 98304, 'steps': 191, 'loss/train': 8.062586784362793} +03/03/2022 13:38:50 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/03/2022 13:38:54 - INFO - codeparrot_training - Step 192: {'lr': 4.8e-05, 'samples': 98816, 'steps': 192, 'loss/train': 7.344707489013672} +03/03/2022 13:38:58 - INFO - codeparrot_training - Step 193: {'lr': 4.825e-05, 'samples': 99328, 'steps': 193, 'loss/train': 8.712090492248535} +03/03/2022 13:38:58 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 13:39:03 - INFO - codeparrot_training - Step 194: {'lr': 4.85e-05, 'samples': 99840, 'steps': 194, 'loss/train': 8.00727653503418} +03/03/2022 13:39:06 - INFO - codeparrot_training - Step 195: {'lr': 4.8750000000000006e-05, 'samples': 100352, 'steps': 195, 'loss/train': 6.896945953369141} +03/03/2022 13:39:06 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/03/2022 13:39:11 - INFO - codeparrot_training - Step 196: {'lr': 4.9000000000000005e-05, 'samples': 100864, 'steps': 196, 'loss/train': 7.245913028717041} +03/03/2022 13:39:15 - INFO - codeparrot_training - Step 197: {'lr': 4.9250000000000004e-05, 'samples': 101376, 'steps': 197, 'loss/train': 7.287005424499512} +03/03/2022 13:39:15 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/03/2022 13:39:20 - INFO - codeparrot_training - Step 198: {'lr': 4.9500000000000004e-05, 'samples': 101888, 'steps': 198, 'loss/train': 7.7306318283081055} +03/03/2022 13:39:23 - INFO - codeparrot_training - Step 199: {'lr': 4.975e-05, 'samples': 102400, 'steps': 199, 'loss/train': 7.501760005950928} +03/03/2022 13:39:23 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/03/2022 13:39:28 - INFO - codeparrot_training - Step 200: {'lr': 5e-05, 'samples': 102912, 'steps': 200, 'loss/train': 7.2059173583984375} +03/03/2022 13:39:31 - INFO - codeparrot_training - Step 201: {'lr': 5.025e-05, 'samples': 103424, 'steps': 201, 'loss/train': 7.545304775238037} +03/03/2022 13:39:31 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/03/2022 13:39:37 - INFO - codeparrot_training - Step 202: {'lr': 5.05e-05, 'samples': 103936, 'steps': 202, 'loss/train': 6.92850923538208} +03/03/2022 13:39:40 - INFO - codeparrot_training - Step 203: {'lr': 5.075000000000001e-05, 'samples': 104448, 'steps': 203, 'loss/train': 7.187972068786621} +03/03/2022 13:39:40 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 13:39:45 - INFO - codeparrot_training - Step 204: {'lr': 5.1e-05, 'samples': 104960, 'steps': 204, 'loss/train': 7.582147121429443} +03/03/2022 13:39:48 - INFO - codeparrot_training - Step 205: {'lr': 5.125e-05, 'samples': 105472, 'steps': 205, 'loss/train': 9.222358703613281} +03/03/2022 13:39:49 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 13:39:53 - INFO - codeparrot_training - Step 206: {'lr': 5.15e-05, 'samples': 105984, 'steps': 206, 'loss/train': 7.789576530456543} +03/03/2022 13:39:57 - INFO - codeparrot_training - Step 207: {'lr': 5.175e-05, 'samples': 106496, 'steps': 207, 'loss/train': 7.880443572998047} +03/03/2022 13:39:57 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 13:40:02 - INFO - codeparrot_training - Step 208: {'lr': 5.2e-05, 'samples': 107008, 'steps': 208, 'loss/train': 7.160046100616455} +03/03/2022 13:40:05 - INFO - codeparrot_training - Step 209: {'lr': 5.2249999999999996e-05, 'samples': 107520, 'steps': 209, 'loss/train': 8.04822063446045} +03/03/2022 13:40:06 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 13:40:10 - INFO - codeparrot_training - Step 210: {'lr': 5.25e-05, 'samples': 108032, 'steps': 210, 'loss/train': 7.374578475952148} +03/03/2022 13:40:13 - INFO - codeparrot_training - Step 211: {'lr': 5.275e-05, 'samples': 108544, 'steps': 211, 'loss/train': 7.502261638641357} +03/03/2022 13:40:14 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/03/2022 13:40:19 - INFO - codeparrot_training - Step 212: {'lr': 5.3e-05, 'samples': 109056, 'steps': 212, 'loss/train': 7.538562297821045} +03/03/2022 13:40:22 - INFO - codeparrot_training - Step 213: {'lr': 5.325e-05, 'samples': 109568, 'steps': 213, 'loss/train': 8.062139511108398} +03/03/2022 13:40:22 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/03/2022 13:40:27 - INFO - codeparrot_training - Step 214: {'lr': 5.35e-05, 'samples': 110080, 'steps': 214, 'loss/train': 7.754930019378662} +03/03/2022 13:40:30 - INFO - codeparrot_training - Step 215: {'lr': 5.375e-05, 'samples': 110592, 'steps': 215, 'loss/train': 7.983520030975342} +03/03/2022 13:40:30 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/03/2022 13:40:35 - INFO - codeparrot_training - Step 216: {'lr': 5.4e-05, 'samples': 111104, 'steps': 216, 'loss/train': 7.068185329437256} +03/03/2022 13:40:38 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 13:40:41 - INFO - codeparrot_training - Step 217: {'lr': 5.4250000000000004e-05, 'samples': 111616, 'steps': 217, 'loss/train': 7.8822760581970215} +03/03/2022 13:40:44 - INFO - codeparrot_training - Step 218: {'lr': 5.45e-05, 'samples': 112128, 'steps': 218, 'loss/train': 7.853682518005371} +03/03/2022 13:40:46 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/03/2022 13:40:49 - INFO - codeparrot_training - Step 219: {'lr': 5.475e-05, 'samples': 112640, 'steps': 219, 'loss/train': 7.099795818328857} +03/03/2022 13:40:52 - INFO - codeparrot_training - Step 220: {'lr': 5.5e-05, 'samples': 113152, 'steps': 220, 'loss/train': 7.336885929107666} +03/03/2022 13:40:55 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/03/2022 13:40:58 - INFO - codeparrot_training - Step 221: {'lr': 5.525e-05, 'samples': 113664, 'steps': 221, 'loss/train': 7.086932182312012} +03/03/2022 13:41:01 - INFO - codeparrot_training - Step 222: {'lr': 5.55e-05, 'samples': 114176, 'steps': 222, 'loss/train': 7.5683746337890625} +03/03/2022 13:41:04 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/03/2022 13:41:06 - INFO - codeparrot_training - Step 223: {'lr': 5.575e-05, 'samples': 114688, 'steps': 223, 'loss/train': 6.692105770111084} +03/03/2022 13:41:09 - INFO - codeparrot_training - Step 224: {'lr': 5.6e-05, 'samples': 115200, 'steps': 224, 'loss/train': 7.558428764343262} +03/03/2022 13:41:12 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 13:41:14 - INFO - codeparrot_training - Step 225: {'lr': 5.6250000000000005e-05, 'samples': 115712, 'steps': 225, 'loss/train': 7.024529457092285} +03/03/2022 13:41:18 - INFO - codeparrot_training - Step 226: {'lr': 5.6500000000000005e-05, 'samples': 116224, 'steps': 226, 'loss/train': 7.132439613342285} +03/03/2022 13:41:20 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/03/2022 13:41:23 - INFO - codeparrot_training - Step 227: {'lr': 5.6750000000000004e-05, 'samples': 116736, 'steps': 227, 'loss/train': 7.505816459655762} +03/03/2022 13:41:26 - INFO - codeparrot_training - Step 228: {'lr': 5.7e-05, 'samples': 117248, 'steps': 228, 'loss/train': 7.923880100250244} +03/03/2022 13:41:28 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 13:41:31 - INFO - codeparrot_training - Step 229: {'lr': 5.725e-05, 'samples': 117760, 'steps': 229, 'loss/train': 8.218132019042969} +03/03/2022 13:41:34 - INFO - codeparrot_training - Step 230: {'lr': 5.75e-05, 'samples': 118272, 'steps': 230, 'loss/train': 6.376003742218018} +03/03/2022 13:41:37 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/03/2022 13:41:40 - INFO - codeparrot_training - Step 231: {'lr': 5.775e-05, 'samples': 118784, 'steps': 231, 'loss/train': 7.3082990646362305} +03/03/2022 13:41:43 - INFO - codeparrot_training - Step 232: {'lr': 5.800000000000001e-05, 'samples': 119296, 'steps': 232, 'loss/train': 7.335635662078857} +03/03/2022 13:41:46 - INFO - codeparrot_training - Step 233: {'lr': 5.8250000000000006e-05, 'samples': 119808, 'steps': 233, 'loss/train': 6.83720064163208} +03/03/2022 13:41:46 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 13:41:52 - INFO - codeparrot_training - Step 234: {'lr': 5.8500000000000006e-05, 'samples': 120320, 'steps': 234, 'loss/train': 6.864739894866943} +03/03/2022 13:41:55 - INFO - codeparrot_training - Step 235: {'lr': 5.875e-05, 'samples': 120832, 'steps': 235, 'loss/train': 8.335467338562012} +03/03/2022 13:41:55 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 13:42:00 - INFO - codeparrot_training - Step 236: {'lr': 5.9e-05, 'samples': 121344, 'steps': 236, 'loss/train': 6.926678657531738} +03/03/2022 13:42:03 - INFO - codeparrot_training - Step 237: {'lr': 5.925e-05, 'samples': 121856, 'steps': 237, 'loss/train': 6.76780891418457} +03/03/2022 13:42:03 - INFO - codeparrot_training - Skipping example with length 230 (seq_length=1024) +03/03/2022 13:42:08 - INFO - codeparrot_training - Step 238: {'lr': 5.9499999999999996e-05, 'samples': 122368, 'steps': 238, 'loss/train': 7.604316711425781} +03/03/2022 13:42:12 - INFO - codeparrot_training - Step 239: {'lr': 5.9749999999999995e-05, 'samples': 122880, 'steps': 239, 'loss/train': 7.477373123168945} +03/03/2022 13:42:12 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/03/2022 13:42:17 - INFO - codeparrot_training - Step 240: {'lr': 6e-05, 'samples': 123392, 'steps': 240, 'loss/train': 6.324827671051025} +03/03/2022 13:42:20 - INFO - codeparrot_training - Step 241: {'lr': 6.025e-05, 'samples': 123904, 'steps': 241, 'loss/train': 7.703766822814941} +03/03/2022 13:42:20 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/03/2022 13:42:25 - INFO - codeparrot_training - Step 242: {'lr': 6.05e-05, 'samples': 124416, 'steps': 242, 'loss/train': 7.568194389343262} +03/03/2022 13:42:28 - INFO - codeparrot_training - Step 243: {'lr': 6.075e-05, 'samples': 124928, 'steps': 243, 'loss/train': 7.032049655914307} +03/03/2022 13:42:28 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/03/2022 13:42:34 - INFO - codeparrot_training - Step 244: {'lr': 6.1e-05, 'samples': 125440, 'steps': 244, 'loss/train': 7.975718021392822} +03/03/2022 13:42:37 - INFO - codeparrot_training - Step 245: {'lr': 6.125e-05, 'samples': 125952, 'steps': 245, 'loss/train': 9.234091758728027} +03/03/2022 13:42:37 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 13:42:42 - INFO - codeparrot_training - Step 246: {'lr': 6.15e-05, 'samples': 126464, 'steps': 246, 'loss/train': 7.30528450012207} +03/03/2022 13:42:45 - INFO - codeparrot_training - Step 247: {'lr': 6.175e-05, 'samples': 126976, 'steps': 247, 'loss/train': 7.250826835632324} +03/03/2022 13:42:46 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/03/2022 13:42:50 - INFO - codeparrot_training - Step 248: {'lr': 6.2e-05, 'samples': 127488, 'steps': 248, 'loss/train': 7.167181968688965} +03/03/2022 13:42:54 - INFO - codeparrot_training - Step 249: {'lr': 6.225e-05, 'samples': 128000, 'steps': 249, 'loss/train': 7.492107391357422} +03/03/2022 13:42:54 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/03/2022 13:42:59 - INFO - codeparrot_training - Step 250: {'lr': 6.25e-05, 'samples': 128512, 'steps': 250, 'loss/train': 7.4001617431640625} +03/03/2022 13:43:02 - INFO - codeparrot_training - Step 251: {'lr': 6.275000000000001e-05, 'samples': 129024, 'steps': 251, 'loss/train': 7.7985920906066895} +03/03/2022 13:43:03 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/03/2022 13:43:07 - INFO - codeparrot_training - Step 252: {'lr': 6.3e-05, 'samples': 129536, 'steps': 252, 'loss/train': 7.012681007385254} +03/03/2022 13:43:10 - INFO - codeparrot_training - Step 253: {'lr': 6.325e-05, 'samples': 130048, 'steps': 253, 'loss/train': 7.037346839904785} +03/03/2022 13:43:11 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/03/2022 13:43:16 - INFO - codeparrot_training - Step 254: {'lr': 6.35e-05, 'samples': 130560, 'steps': 254, 'loss/train': 6.874361991882324} +03/03/2022 13:43:19 - INFO - codeparrot_training - Step 255: {'lr': 6.375e-05, 'samples': 131072, 'steps': 255, 'loss/train': 8.25483512878418} +03/03/2022 13:43:19 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/03/2022 13:43:24 - INFO - codeparrot_training - Step 256: {'lr': 6.4e-05, 'samples': 131584, 'steps': 256, 'loss/train': 7.541172981262207} +03/03/2022 13:43:27 - INFO - codeparrot_training - Step 257: {'lr': 6.425e-05, 'samples': 132096, 'steps': 257, 'loss/train': 7.457941055297852} +03/03/2022 13:43:28 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/03/2022 13:43:33 - INFO - codeparrot_training - Step 258: {'lr': 6.450000000000001e-05, 'samples': 132608, 'steps': 258, 'loss/train': 7.014778137207031} +03/03/2022 13:43:36 - INFO - codeparrot_training - Step 259: {'lr': 6.475e-05, 'samples': 133120, 'steps': 259, 'loss/train': 6.806115627288818} +03/03/2022 13:43:36 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/03/2022 13:43:41 - INFO - codeparrot_training - Step 260: {'lr': 6.500000000000001e-05, 'samples': 133632, 'steps': 260, 'loss/train': 7.267054557800293} +03/03/2022 13:43:44 - INFO - codeparrot_training - Step 261: {'lr': 6.525e-05, 'samples': 134144, 'steps': 261, 'loss/train': 6.713515758514404} +03/03/2022 13:43:44 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 13:43:49 - INFO - codeparrot_training - Step 262: {'lr': 6.55e-05, 'samples': 134656, 'steps': 262, 'loss/train': 7.091745853424072} +03/03/2022 13:43:52 - INFO - codeparrot_training - Step 263: {'lr': 6.575e-05, 'samples': 135168, 'steps': 263, 'loss/train': 7.467846393585205} +03/03/2022 13:43:52 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/03/2022 13:43:58 - INFO - codeparrot_training - Step 264: {'lr': 6.6e-05, 'samples': 135680, 'steps': 264, 'loss/train': 6.91502046585083} +03/03/2022 13:44:01 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 13:44:03 - INFO - codeparrot_training - Step 265: {'lr': 6.625000000000001e-05, 'samples': 136192, 'steps': 265, 'loss/train': 6.923452854156494} +03/03/2022 13:44:06 - INFO - codeparrot_training - Step 266: {'lr': 6.65e-05, 'samples': 136704, 'steps': 266, 'loss/train': 7.499436378479004} +03/03/2022 13:44:09 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/03/2022 13:44:11 - INFO - codeparrot_training - Step 267: {'lr': 6.675000000000001e-05, 'samples': 137216, 'steps': 267, 'loss/train': 7.343801975250244} +03/03/2022 13:44:15 - INFO - codeparrot_training - Step 268: {'lr': 6.7e-05, 'samples': 137728, 'steps': 268, 'loss/train': 7.012816905975342} +03/03/2022 13:44:17 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 13:44:20 - INFO - codeparrot_training - Step 269: {'lr': 6.725000000000001e-05, 'samples': 138240, 'steps': 269, 'loss/train': 7.881154537200928} +03/03/2022 13:44:23 - INFO - codeparrot_training - Step 270: {'lr': 6.75e-05, 'samples': 138752, 'steps': 270, 'loss/train': 7.508479118347168} +03/03/2022 13:44:26 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 13:44:28 - INFO - codeparrot_training - Step 271: {'lr': 6.775000000000001e-05, 'samples': 139264, 'steps': 271, 'loss/train': 6.856858730316162} +03/03/2022 13:44:31 - INFO - codeparrot_training - Step 272: {'lr': 6.800000000000001e-05, 'samples': 139776, 'steps': 272, 'loss/train': 7.2710041999816895} +03/03/2022 13:44:34 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 13:44:37 - INFO - codeparrot_training - Step 273: {'lr': 6.825e-05, 'samples': 140288, 'steps': 273, 'loss/train': 6.965377330780029} +03/03/2022 13:44:41 - INFO - codeparrot_training - Step 274: {'lr': 6.850000000000001e-05, 'samples': 140800, 'steps': 274, 'loss/train': 6.166491985321045} +03/03/2022 13:44:44 - INFO - codeparrot_training - Step 275: {'lr': 6.875e-05, 'samples': 141312, 'steps': 275, 'loss/train': 7.818729877471924} +03/03/2022 13:44:45 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/03/2022 13:44:49 - INFO - codeparrot_training - Step 276: {'lr': 6.900000000000001e-05, 'samples': 141824, 'steps': 276, 'loss/train': 6.974045753479004} +03/03/2022 13:44:52 - INFO - codeparrot_training - Step 277: {'lr': 6.925e-05, 'samples': 142336, 'steps': 277, 'loss/train': 6.823301792144775} +03/03/2022 13:44:54 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 13:44:57 - INFO - codeparrot_training - Step 278: {'lr': 6.950000000000001e-05, 'samples': 142848, 'steps': 278, 'loss/train': 7.374868392944336} +03/03/2022 13:45:01 - INFO - codeparrot_training - Step 279: {'lr': 6.975e-05, 'samples': 143360, 'steps': 279, 'loss/train': 7.211111068725586} +03/03/2022 13:45:02 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/03/2022 13:45:06 - INFO - codeparrot_training - Step 280: {'lr': 7.000000000000001e-05, 'samples': 143872, 'steps': 280, 'loss/train': 7.095859527587891} +03/03/2022 13:45:09 - INFO - codeparrot_training - Step 281: {'lr': 7.025000000000001e-05, 'samples': 144384, 'steps': 281, 'loss/train': 6.3933634757995605} +03/03/2022 13:45:10 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 13:45:14 - INFO - codeparrot_training - Step 282: {'lr': 7.049999999999999e-05, 'samples': 144896, 'steps': 282, 'loss/train': 8.225425720214844} +03/03/2022 13:45:17 - INFO - codeparrot_training - Step 283: {'lr': 7.075e-05, 'samples': 145408, 'steps': 283, 'loss/train': 6.789872646331787} +03/03/2022 13:45:19 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/03/2022 13:45:23 - INFO - codeparrot_training - Step 284: {'lr': 7.099999999999999e-05, 'samples': 145920, 'steps': 284, 'loss/train': 7.208834171295166} +03/03/2022 13:45:26 - INFO - codeparrot_training - Step 285: {'lr': 7.125e-05, 'samples': 146432, 'steps': 285, 'loss/train': 5.778796672821045} +03/03/2022 13:45:27 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 13:45:31 - INFO - codeparrot_training - Step 286: {'lr': 7.149999999999999e-05, 'samples': 146944, 'steps': 286, 'loss/train': 6.7907490730285645} +03/03/2022 13:45:34 - INFO - codeparrot_training - Step 287: {'lr': 7.175e-05, 'samples': 147456, 'steps': 287, 'loss/train': 6.667514801025391} +03/03/2022 13:45:35 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/03/2022 13:45:39 - INFO - codeparrot_training - Step 288: {'lr': 7.2e-05, 'samples': 147968, 'steps': 288, 'loss/train': 7.220056533813477} +03/03/2022 13:45:43 - INFO - codeparrot_training - Step 289: {'lr': 7.225e-05, 'samples': 148480, 'steps': 289, 'loss/train': 6.969030380249023} +03/03/2022 13:45:44 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 13:45:48 - INFO - codeparrot_training - Step 290: {'lr': 7.25e-05, 'samples': 148992, 'steps': 290, 'loss/train': 7.167079925537109} +03/03/2022 13:45:51 - INFO - codeparrot_training - Step 291: {'lr': 7.274999999999999e-05, 'samples': 149504, 'steps': 291, 'loss/train': 7.262547016143799} +03/03/2022 13:45:52 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/03/2022 13:45:56 - INFO - codeparrot_training - Step 292: {'lr': 7.3e-05, 'samples': 150016, 'steps': 292, 'loss/train': 7.280813694000244} +03/03/2022 13:45:59 - INFO - codeparrot_training - Step 293: {'lr': 7.324999999999999e-05, 'samples': 150528, 'steps': 293, 'loss/train': 6.34620475769043} +03/03/2022 13:46:00 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/03/2022 13:46:05 - INFO - codeparrot_training - Step 294: {'lr': 7.35e-05, 'samples': 151040, 'steps': 294, 'loss/train': 7.634270191192627} +03/03/2022 13:46:08 - INFO - codeparrot_training - Step 295: {'lr': 7.375e-05, 'samples': 151552, 'steps': 295, 'loss/train': 4.9720330238342285} +03/03/2022 13:46:10 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/03/2022 13:46:13 - INFO - codeparrot_training - Step 296: {'lr': 7.4e-05, 'samples': 152064, 'steps': 296, 'loss/train': 7.290252208709717} +03/03/2022 13:46:16 - INFO - codeparrot_training - Step 297: {'lr': 7.425e-05, 'samples': 152576, 'steps': 297, 'loss/train': 7.179178237915039} +03/03/2022 13:46:18 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/03/2022 13:46:22 - INFO - codeparrot_training - Step 298: {'lr': 7.45e-05, 'samples': 153088, 'steps': 298, 'loss/train': 8.177205085754395} +03/03/2022 13:46:25 - INFO - codeparrot_training - Step 299: {'lr': 7.475e-05, 'samples': 153600, 'steps': 299, 'loss/train': 7.102594375610352} +03/03/2022 13:46:26 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/03/2022 13:46:30 - INFO - codeparrot_training - Step 300: {'lr': 7.5e-05, 'samples': 154112, 'steps': 300, 'loss/train': 7.6582746505737305} +03/03/2022 13:46:33 - INFO - codeparrot_training - Step 301: {'lr': 7.525e-05, 'samples': 154624, 'steps': 301, 'loss/train': 3.4812848567962646} +03/03/2022 13:46:34 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 13:46:38 - INFO - codeparrot_training - Step 302: {'lr': 7.55e-05, 'samples': 155136, 'steps': 302, 'loss/train': 6.857351779937744} +03/03/2022 13:46:42 - INFO - codeparrot_training - Step 303: {'lr': 7.575e-05, 'samples': 155648, 'steps': 303, 'loss/train': 7.291689395904541} +03/03/2022 13:46:43 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/03/2022 13:46:47 - INFO - codeparrot_training - Step 304: {'lr': 7.6e-05, 'samples': 156160, 'steps': 304, 'loss/train': 7.73492431640625} +03/03/2022 13:46:50 - INFO - codeparrot_training - Step 305: {'lr': 7.625e-05, 'samples': 156672, 'steps': 305, 'loss/train': 7.737000942230225} +03/03/2022 13:46:51 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/03/2022 13:46:55 - INFO - codeparrot_training - Step 306: {'lr': 7.65e-05, 'samples': 157184, 'steps': 306, 'loss/train': 6.85874605178833} +03/03/2022 13:46:58 - INFO - codeparrot_training - Step 307: {'lr': 7.675e-05, 'samples': 157696, 'steps': 307, 'loss/train': 6.972065448760986} +03/03/2022 13:47:00 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/03/2022 13:47:04 - INFO - codeparrot_training - Step 308: {'lr': 7.7e-05, 'samples': 158208, 'steps': 308, 'loss/train': 7.307086944580078} +03/03/2022 13:47:07 - INFO - codeparrot_training - Step 309: {'lr': 7.725000000000001e-05, 'samples': 158720, 'steps': 309, 'loss/train': 7.4384050369262695} +03/03/2022 13:47:08 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 13:47:12 - INFO - codeparrot_training - Step 310: {'lr': 7.75e-05, 'samples': 159232, 'steps': 310, 'loss/train': 7.601604461669922} +03/03/2022 13:47:15 - INFO - codeparrot_training - Step 311: {'lr': 7.775e-05, 'samples': 159744, 'steps': 311, 'loss/train': 7.247878551483154} +03/03/2022 13:47:16 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/03/2022 13:47:20 - INFO - codeparrot_training - Step 312: {'lr': 7.8e-05, 'samples': 160256, 'steps': 312, 'loss/train': 6.91134786605835} +03/03/2022 13:47:24 - INFO - codeparrot_training - Step 313: {'lr': 7.825e-05, 'samples': 160768, 'steps': 313, 'loss/train': 7.499279499053955} +03/03/2022 13:47:24 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/03/2022 13:47:29 - INFO - codeparrot_training - Step 314: {'lr': 7.85e-05, 'samples': 161280, 'steps': 314, 'loss/train': 7.371830463409424} +03/03/2022 13:47:32 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 13:47:34 - INFO - codeparrot_training - Step 315: {'lr': 7.875e-05, 'samples': 161792, 'steps': 315, 'loss/train': 7.107333660125732} +03/03/2022 13:47:38 - INFO - codeparrot_training - Step 316: {'lr': 7.9e-05, 'samples': 162304, 'steps': 316, 'loss/train': 8.673299789428711} +03/03/2022 13:47:41 - INFO - codeparrot_training - Step 317: {'lr': 7.925e-05, 'samples': 162816, 'steps': 317, 'loss/train': 7.807727813720703} +03/03/2022 13:47:42 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/03/2022 13:47:46 - INFO - codeparrot_training - Step 318: {'lr': 7.950000000000001e-05, 'samples': 163328, 'steps': 318, 'loss/train': 7.118139266967773} +03/03/2022 13:47:49 - INFO - codeparrot_training - Step 319: {'lr': 7.975e-05, 'samples': 163840, 'steps': 319, 'loss/train': 7.224991321563721} +03/03/2022 13:47:51 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/03/2022 13:47:55 - INFO - codeparrot_training - Step 320: {'lr': 8e-05, 'samples': 164352, 'steps': 320, 'loss/train': 7.151127338409424} +03/03/2022 13:47:58 - INFO - codeparrot_training - Step 321: {'lr': 8.025e-05, 'samples': 164864, 'steps': 321, 'loss/train': 6.802467346191406} +03/03/2022 13:47:59 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 13:48:03 - INFO - codeparrot_training - Step 322: {'lr': 8.05e-05, 'samples': 165376, 'steps': 322, 'loss/train': 7.11800479888916} +03/03/2022 13:48:06 - INFO - codeparrot_training - Step 323: {'lr': 8.075e-05, 'samples': 165888, 'steps': 323, 'loss/train': 6.9256367683410645} +03/03/2022 13:48:07 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/03/2022 13:48:12 - INFO - codeparrot_training - Step 324: {'lr': 8.1e-05, 'samples': 166400, 'steps': 324, 'loss/train': 5.947310924530029} +03/03/2022 13:48:15 - INFO - codeparrot_training - Step 325: {'lr': 8.125000000000001e-05, 'samples': 166912, 'steps': 325, 'loss/train': 7.745260238647461} +03/03/2022 13:48:17 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 13:48:20 - INFO - codeparrot_training - Step 326: {'lr': 8.15e-05, 'samples': 167424, 'steps': 326, 'loss/train': 6.477266311645508} +03/03/2022 13:48:23 - INFO - codeparrot_training - Step 327: {'lr': 8.175000000000001e-05, 'samples': 167936, 'steps': 327, 'loss/train': 7.103150844573975} +03/03/2022 13:48:25 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/03/2022 13:48:28 - INFO - codeparrot_training - Step 328: {'lr': 8.2e-05, 'samples': 168448, 'steps': 328, 'loss/train': 7.210000991821289} +03/03/2022 13:48:31 - INFO - codeparrot_training - Step 329: {'lr': 8.225000000000001e-05, 'samples': 168960, 'steps': 329, 'loss/train': 5.856969833374023} +03/03/2022 13:48:33 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/03/2022 13:48:37 - INFO - codeparrot_training - Step 330: {'lr': 8.25e-05, 'samples': 169472, 'steps': 330, 'loss/train': 7.112217426300049} +03/03/2022 13:48:40 - INFO - codeparrot_training - Step 331: {'lr': 8.275e-05, 'samples': 169984, 'steps': 331, 'loss/train': 7.2972517013549805} +03/03/2022 13:48:41 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 13:48:45 - INFO - codeparrot_training - Step 332: {'lr': 8.300000000000001e-05, 'samples': 170496, 'steps': 332, 'loss/train': 6.872681140899658} +03/03/2022 13:48:48 - INFO - codeparrot_training - Step 333: {'lr': 8.325e-05, 'samples': 171008, 'steps': 333, 'loss/train': 6.879507064819336} +03/03/2022 13:48:49 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/03/2022 13:48:53 - INFO - codeparrot_training - Step 334: {'lr': 8.350000000000001e-05, 'samples': 171520, 'steps': 334, 'loss/train': 7.25621223449707} +03/03/2022 13:48:56 - INFO - codeparrot_training - Step 335: {'lr': 8.375e-05, 'samples': 172032, 'steps': 335, 'loss/train': 6.667304515838623} +03/03/2022 13:48:58 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/03/2022 13:49:02 - INFO - codeparrot_training - Step 336: {'lr': 8.400000000000001e-05, 'samples': 172544, 'steps': 336, 'loss/train': 7.273214817047119} +03/03/2022 13:49:05 - INFO - codeparrot_training - Step 337: {'lr': 8.425e-05, 'samples': 173056, 'steps': 337, 'loss/train': 7.189443588256836} +03/03/2022 13:49:06 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/03/2022 13:49:10 - INFO - codeparrot_training - Step 338: {'lr': 8.450000000000001e-05, 'samples': 173568, 'steps': 338, 'loss/train': 6.605391025543213} +03/03/2022 13:49:13 - INFO - codeparrot_training - Step 339: {'lr': 8.475000000000001e-05, 'samples': 174080, 'steps': 339, 'loss/train': 6.908782005310059} +03/03/2022 13:49:14 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/03/2022 13:49:18 - INFO - codeparrot_training - Step 340: {'lr': 8.5e-05, 'samples': 174592, 'steps': 340, 'loss/train': 6.747285842895508} +03/03/2022 13:49:22 - INFO - codeparrot_training - Step 341: {'lr': 8.525000000000001e-05, 'samples': 175104, 'steps': 341, 'loss/train': 6.820441246032715} +03/03/2022 13:49:22 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/03/2022 13:49:27 - INFO - codeparrot_training - Step 342: {'lr': 8.55e-05, 'samples': 175616, 'steps': 342, 'loss/train': 7.335867404937744} +03/03/2022 13:49:30 - INFO - codeparrot_training - Step 343: {'lr': 8.575000000000001e-05, 'samples': 176128, 'steps': 343, 'loss/train': 6.804535388946533} +03/03/2022 13:49:31 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/03/2022 13:49:35 - INFO - codeparrot_training - Step 344: {'lr': 8.599999999999999e-05, 'samples': 176640, 'steps': 344, 'loss/train': 7.3406500816345215} +03/03/2022 13:49:38 - INFO - codeparrot_training - Step 345: {'lr': 8.625e-05, 'samples': 177152, 'steps': 345, 'loss/train': 6.626857757568359} +03/03/2022 13:49:39 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/03/2022 13:49:44 - INFO - codeparrot_training - Step 346: {'lr': 8.65e-05, 'samples': 177664, 'steps': 346, 'loss/train': 6.793419361114502} +03/03/2022 13:49:47 - INFO - codeparrot_training - Step 347: {'lr': 8.675e-05, 'samples': 178176, 'steps': 347, 'loss/train': 6.759413242340088} +03/03/2022 13:49:47 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/03/2022 13:49:52 - INFO - codeparrot_training - Step 348: {'lr': 8.7e-05, 'samples': 178688, 'steps': 348, 'loss/train': 6.160068035125732} +03/03/2022 13:49:55 - INFO - codeparrot_training - Step 349: {'lr': 8.724999999999999e-05, 'samples': 179200, 'steps': 349, 'loss/train': 7.11178731918335} +03/03/2022 13:49:55 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 13:50:00 - INFO - codeparrot_training - Step 350: {'lr': 8.75e-05, 'samples': 179712, 'steps': 350, 'loss/train': 6.960474014282227} +03/03/2022 13:50:03 - INFO - codeparrot_training - Step 351: {'lr': 8.774999999999999e-05, 'samples': 180224, 'steps': 351, 'loss/train': 7.643229961395264} +03/03/2022 13:50:04 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 13:50:09 - INFO - codeparrot_training - Step 352: {'lr': 8.8e-05, 'samples': 180736, 'steps': 352, 'loss/train': 6.8332319259643555} +03/03/2022 13:50:12 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/03/2022 13:50:14 - INFO - codeparrot_training - Step 353: {'lr': 8.824999999999999e-05, 'samples': 181248, 'steps': 353, 'loss/train': 6.829087734222412} +03/03/2022 13:50:17 - INFO - codeparrot_training - Step 354: {'lr': 8.85e-05, 'samples': 181760, 'steps': 354, 'loss/train': 6.977699279785156} +03/03/2022 13:50:20 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 13:50:23 - INFO - codeparrot_training - Step 355: {'lr': 8.875e-05, 'samples': 182272, 'steps': 355, 'loss/train': 7.036928653717041} +03/03/2022 13:50:26 - INFO - codeparrot_training - Step 356: {'lr': 8.9e-05, 'samples': 182784, 'steps': 356, 'loss/train': 6.960824489593506} +03/03/2022 13:50:29 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/03/2022 13:50:31 - INFO - codeparrot_training - Step 357: {'lr': 8.925e-05, 'samples': 183296, 'steps': 357, 'loss/train': 6.745418071746826} +03/03/2022 13:50:34 - INFO - codeparrot_training - Step 358: {'lr': 8.95e-05, 'samples': 183808, 'steps': 358, 'loss/train': 6.919394493103027} +03/03/2022 13:50:37 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/03/2022 13:50:40 - INFO - codeparrot_training - Step 359: {'lr': 8.975e-05, 'samples': 184320, 'steps': 359, 'loss/train': 6.71176815032959} +03/03/2022 13:50:43 - INFO - codeparrot_training - Step 360: {'lr': 8.999999999999999e-05, 'samples': 184832, 'steps': 360, 'loss/train': 6.548521518707275} +03/03/2022 13:50:46 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/03/2022 13:50:48 - INFO - codeparrot_training - Step 361: {'lr': 9.025e-05, 'samples': 185344, 'steps': 361, 'loss/train': 7.599088668823242} +03/03/2022 13:50:51 - INFO - codeparrot_training - Step 362: {'lr': 9.05e-05, 'samples': 185856, 'steps': 362, 'loss/train': 6.143686294555664} +03/03/2022 13:50:54 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/03/2022 13:50:56 - INFO - codeparrot_training - Step 363: {'lr': 9.075e-05, 'samples': 186368, 'steps': 363, 'loss/train': 7.022943019866943} +03/03/2022 13:51:00 - INFO - codeparrot_training - Step 364: {'lr': 9.1e-05, 'samples': 186880, 'steps': 364, 'loss/train': 6.801090240478516} +03/03/2022 13:51:02 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 13:51:05 - INFO - codeparrot_training - Step 365: {'lr': 9.125e-05, 'samples': 187392, 'steps': 365, 'loss/train': 6.444828510284424} +03/03/2022 13:51:08 - INFO - codeparrot_training - Step 366: {'lr': 9.15e-05, 'samples': 187904, 'steps': 366, 'loss/train': 5.071739673614502} +03/03/2022 13:51:10 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/03/2022 13:51:13 - INFO - codeparrot_training - Step 367: {'lr': 9.175e-05, 'samples': 188416, 'steps': 367, 'loss/train': 7.660885334014893} +03/03/2022 13:51:17 - INFO - codeparrot_training - Step 368: {'lr': 9.2e-05, 'samples': 188928, 'steps': 368, 'loss/train': 6.027246952056885} +03/03/2022 13:51:19 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 13:51:22 - INFO - codeparrot_training - Step 369: {'lr': 9.225e-05, 'samples': 189440, 'steps': 369, 'loss/train': 7.053032398223877} +03/03/2022 13:51:25 - INFO - codeparrot_training - Step 370: {'lr': 9.25e-05, 'samples': 189952, 'steps': 370, 'loss/train': 7.259976863861084} +03/03/2022 13:51:27 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 13:51:30 - INFO - codeparrot_training - Step 371: {'lr': 9.275e-05, 'samples': 190464, 'steps': 371, 'loss/train': 3.8937602043151855} +03/03/2022 13:51:33 - INFO - codeparrot_training - Step 372: {'lr': 9.3e-05, 'samples': 190976, 'steps': 372, 'loss/train': 6.874353408813477} +03/03/2022 13:51:35 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/03/2022 13:51:38 - INFO - codeparrot_training - Step 373: {'lr': 9.325e-05, 'samples': 191488, 'steps': 373, 'loss/train': 6.929388523101807} +03/03/2022 13:51:42 - INFO - codeparrot_training - Step 374: {'lr': 9.35e-05, 'samples': 192000, 'steps': 374, 'loss/train': 6.950829029083252} +03/03/2022 13:51:44 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/03/2022 13:51:47 - INFO - codeparrot_training - Step 375: {'lr': 9.375e-05, 'samples': 192512, 'steps': 375, 'loss/train': 6.641457557678223} +03/03/2022 13:51:50 - INFO - codeparrot_training - Step 376: {'lr': 9.400000000000001e-05, 'samples': 193024, 'steps': 376, 'loss/train': 4.135335445404053} +03/03/2022 13:51:52 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/03/2022 13:51:55 - INFO - codeparrot_training - Step 377: {'lr': 9.425e-05, 'samples': 193536, 'steps': 377, 'loss/train': 6.767242908477783} +03/03/2022 13:51:58 - INFO - codeparrot_training - Step 378: {'lr': 9.45e-05, 'samples': 194048, 'steps': 378, 'loss/train': 7.012984275817871} +03/03/2022 13:52:00 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 13:52:04 - INFO - codeparrot_training - Step 379: {'lr': 9.475e-05, 'samples': 194560, 'steps': 379, 'loss/train': 6.804805278778076} +03/03/2022 13:52:07 - INFO - codeparrot_training - Step 380: {'lr': 9.5e-05, 'samples': 195072, 'steps': 380, 'loss/train': 7.384415626525879} +03/03/2022 13:52:08 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 13:52:12 - INFO - codeparrot_training - Step 381: {'lr': 9.525e-05, 'samples': 195584, 'steps': 381, 'loss/train': 8.17817211151123} +03/03/2022 13:52:15 - INFO - codeparrot_training - Step 382: {'lr': 9.55e-05, 'samples': 196096, 'steps': 382, 'loss/train': 7.321182727813721} +03/03/2022 13:52:16 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/03/2022 13:52:20 - INFO - codeparrot_training - Step 383: {'lr': 9.575000000000001e-05, 'samples': 196608, 'steps': 383, 'loss/train': 7.84943962097168} +03/03/2022 13:52:24 - INFO - codeparrot_training - Step 384: {'lr': 9.6e-05, 'samples': 197120, 'steps': 384, 'loss/train': 3.7669484615325928} +03/03/2022 13:52:25 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/03/2022 13:52:29 - INFO - codeparrot_training - Step 385: {'lr': 9.625000000000001e-05, 'samples': 197632, 'steps': 385, 'loss/train': 7.072676181793213} +03/03/2022 13:52:32 - INFO - codeparrot_training - Step 386: {'lr': 9.65e-05, 'samples': 198144, 'steps': 386, 'loss/train': 6.100987434387207} +03/03/2022 13:52:33 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/03/2022 13:52:37 - INFO - codeparrot_training - Step 387: {'lr': 9.675000000000001e-05, 'samples': 198656, 'steps': 387, 'loss/train': 7.215890407562256} +03/03/2022 13:52:40 - INFO - codeparrot_training - Step 388: {'lr': 9.7e-05, 'samples': 199168, 'steps': 388, 'loss/train': 7.250863552093506} +03/03/2022 13:52:42 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/03/2022 13:52:46 - INFO - codeparrot_training - Step 389: {'lr': 9.725e-05, 'samples': 199680, 'steps': 389, 'loss/train': 6.301525592803955} +03/03/2022 13:52:49 - INFO - codeparrot_training - Step 390: {'lr': 9.750000000000001e-05, 'samples': 200192, 'steps': 390, 'loss/train': 5.155089855194092} +03/03/2022 13:52:50 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/03/2022 13:52:54 - INFO - codeparrot_training - Step 391: {'lr': 9.775e-05, 'samples': 200704, 'steps': 391, 'loss/train': 6.642797470092773} +03/03/2022 13:52:57 - INFO - codeparrot_training - Step 392: {'lr': 9.800000000000001e-05, 'samples': 201216, 'steps': 392, 'loss/train': 7.064353942871094} +03/03/2022 13:52:58 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/03/2022 13:53:03 - INFO - codeparrot_training - Step 393: {'lr': 9.825e-05, 'samples': 201728, 'steps': 393, 'loss/train': 6.936179161071777} +03/03/2022 13:53:06 - INFO - codeparrot_training - Step 394: {'lr': 9.850000000000001e-05, 'samples': 202240, 'steps': 394, 'loss/train': 7.061115264892578} +03/03/2022 13:53:11 - INFO - codeparrot_training - Step 395: {'lr': 9.875e-05, 'samples': 202752, 'steps': 395, 'loss/train': 5.386733531951904} +03/03/2022 13:53:14 - INFO - codeparrot_training - Step 396: {'lr': 9.900000000000001e-05, 'samples': 203264, 'steps': 396, 'loss/train': 3.390434503555298} +03/03/2022 13:53:16 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/03/2022 13:53:20 - INFO - codeparrot_training - Step 397: {'lr': 9.925000000000001e-05, 'samples': 203776, 'steps': 397, 'loss/train': 6.450808048248291} +03/03/2022 13:53:23 - INFO - codeparrot_training - Step 398: {'lr': 9.95e-05, 'samples': 204288, 'steps': 398, 'loss/train': 7.519935607910156} +03/03/2022 13:53:23 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/03/2022 13:53:28 - INFO - codeparrot_training - Step 399: {'lr': 9.975000000000001e-05, 'samples': 204800, 'steps': 399, 'loss/train': 6.9697794914245605} +03/03/2022 13:53:31 - INFO - codeparrot_training - Step 400: {'lr': 0.0001, 'samples': 205312, 'steps': 400, 'loss/train': 6.924061298370361} +03/03/2022 13:53:32 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/03/2022 13:53:36 - INFO - codeparrot_training - Step 401: {'lr': 0.00010025000000000001, 'samples': 205824, 'steps': 401, 'loss/train': 6.575358867645264} +03/03/2022 13:53:39 - INFO - codeparrot_training - Step 402: {'lr': 0.0001005, 'samples': 206336, 'steps': 402, 'loss/train': 6.773837566375732} +03/03/2022 13:53:40 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/03/2022 13:53:45 - INFO - codeparrot_training - Step 403: {'lr': 0.00010075000000000001, 'samples': 206848, 'steps': 403, 'loss/train': 7.0787577629089355} +03/03/2022 13:53:48 - INFO - codeparrot_training - Step 404: {'lr': 0.000101, 'samples': 207360, 'steps': 404, 'loss/train': 6.351315975189209} +03/03/2022 13:53:48 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/03/2022 13:53:53 - INFO - codeparrot_training - Step 405: {'lr': 0.00010125000000000001, 'samples': 207872, 'steps': 405, 'loss/train': 6.846843719482422} +03/03/2022 13:53:56 - INFO - codeparrot_training - Step 406: {'lr': 0.00010150000000000001, 'samples': 208384, 'steps': 406, 'loss/train': 6.861880302429199} +03/03/2022 13:53:56 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 13:54:01 - INFO - codeparrot_training - Step 407: {'lr': 0.00010174999999999999, 'samples': 208896, 'steps': 407, 'loss/train': 6.381140232086182} +03/03/2022 13:54:05 - INFO - codeparrot_training - Step 408: {'lr': 0.000102, 'samples': 209408, 'steps': 408, 'loss/train': 4.809561729431152} +03/03/2022 13:54:05 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/03/2022 13:54:10 - INFO - codeparrot_training - Step 409: {'lr': 0.00010224999999999999, 'samples': 209920, 'steps': 409, 'loss/train': 6.883728981018066} +03/03/2022 13:54:13 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/03/2022 13:54:15 - INFO - codeparrot_training - Step 410: {'lr': 0.0001025, 'samples': 210432, 'steps': 410, 'loss/train': 6.608766078948975} +03/03/2022 13:54:18 - INFO - codeparrot_training - Step 411: {'lr': 0.00010274999999999999, 'samples': 210944, 'steps': 411, 'loss/train': 7.535120487213135} +03/03/2022 13:54:22 - INFO - codeparrot_training - Step 412: {'lr': 0.000103, 'samples': 211456, 'steps': 412, 'loss/train': 6.883657455444336} +03/03/2022 13:54:22 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/03/2022 13:54:27 - INFO - codeparrot_training - Step 413: {'lr': 0.00010325, 'samples': 211968, 'steps': 413, 'loss/train': 7.207785129547119} +03/03/2022 13:54:30 - INFO - codeparrot_training - Step 414: {'lr': 0.0001035, 'samples': 212480, 'steps': 414, 'loss/train': 6.686124324798584} +03/03/2022 13:54:30 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/03/2022 13:54:35 - INFO - codeparrot_training - Step 415: {'lr': 0.00010375, 'samples': 212992, 'steps': 415, 'loss/train': 6.87024450302124} +03/03/2022 13:54:38 - INFO - codeparrot_training - Step 416: {'lr': 0.000104, 'samples': 213504, 'steps': 416, 'loss/train': 6.131210803985596} +03/03/2022 13:54:39 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/03/2022 13:54:44 - INFO - codeparrot_training - Step 417: {'lr': 0.00010425, 'samples': 214016, 'steps': 417, 'loss/train': 6.6527204513549805} +03/03/2022 13:54:47 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/03/2022 13:54:49 - INFO - codeparrot_training - Step 418: {'lr': 0.00010449999999999999, 'samples': 214528, 'steps': 418, 'loss/train': 6.84839391708374} +03/03/2022 13:54:52 - INFO - codeparrot_training - Step 419: {'lr': 0.00010475, 'samples': 215040, 'steps': 419, 'loss/train': 5.261308193206787} +03/03/2022 13:54:55 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/03/2022 13:54:58 - INFO - codeparrot_training - Step 420: {'lr': 0.000105, 'samples': 215552, 'steps': 420, 'loss/train': 7.349946022033691} +03/03/2022 13:55:01 - INFO - codeparrot_training - Step 421: {'lr': 0.00010525, 'samples': 216064, 'steps': 421, 'loss/train': 7.383358478546143} +03/03/2022 13:55:03 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/03/2022 13:55:06 - INFO - codeparrot_training - Step 422: {'lr': 0.0001055, 'samples': 216576, 'steps': 422, 'loss/train': 7.277693271636963} +03/03/2022 13:55:09 - INFO - codeparrot_training - Step 423: {'lr': 0.00010575, 'samples': 217088, 'steps': 423, 'loss/train': 7.099862098693848} +03/03/2022 13:55:11 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/03/2022 13:55:15 - INFO - codeparrot_training - Step 424: {'lr': 0.000106, 'samples': 217600, 'steps': 424, 'loss/train': 7.674220561981201} +03/03/2022 13:55:18 - INFO - codeparrot_training - Step 425: {'lr': 0.00010625, 'samples': 218112, 'steps': 425, 'loss/train': 6.961071014404297} +03/03/2022 13:55:21 - INFO - codeparrot_training - Step 426: {'lr': 0.0001065, 'samples': 218624, 'steps': 426, 'loss/train': 7.311920642852783} +03/03/2022 13:55:21 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/03/2022 13:55:26 - INFO - codeparrot_training - Step 427: {'lr': 0.00010675, 'samples': 219136, 'steps': 427, 'loss/train': 7.3586320877075195} +03/03/2022 13:55:30 - INFO - codeparrot_training - Step 428: {'lr': 0.000107, 'samples': 219648, 'steps': 428, 'loss/train': 5.779116630554199} +03/03/2022 13:55:30 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/03/2022 13:55:35 - INFO - codeparrot_training - Step 429: {'lr': 0.00010725, 'samples': 220160, 'steps': 429, 'loss/train': 7.076303005218506} +03/03/2022 13:55:38 - INFO - codeparrot_training - Step 430: {'lr': 0.0001075, 'samples': 220672, 'steps': 430, 'loss/train': 7.492109298706055} +03/03/2022 13:55:38 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/03/2022 13:55:43 - INFO - codeparrot_training - Step 431: {'lr': 0.00010775, 'samples': 221184, 'steps': 431, 'loss/train': 7.107940673828125} +03/03/2022 13:55:46 - INFO - codeparrot_training - Step 432: {'lr': 0.000108, 'samples': 221696, 'steps': 432, 'loss/train': 6.590511322021484} +03/03/2022 13:55:46 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 13:55:52 - INFO - codeparrot_training - Step 433: {'lr': 0.00010825, 'samples': 222208, 'steps': 433, 'loss/train': 7.226790428161621} +03/03/2022 13:55:54 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/03/2022 13:55:57 - INFO - codeparrot_training - Step 434: {'lr': 0.00010850000000000001, 'samples': 222720, 'steps': 434, 'loss/train': 5.3204731941223145} +03/03/2022 13:56:00 - INFO - codeparrot_training - Step 435: {'lr': 0.00010875, 'samples': 223232, 'steps': 435, 'loss/train': 6.923895835876465} +03/03/2022 13:56:03 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/03/2022 13:56:05 - INFO - codeparrot_training - Step 436: {'lr': 0.000109, 'samples': 223744, 'steps': 436, 'loss/train': 6.937617778778076} +03/03/2022 13:56:08 - INFO - codeparrot_training - Step 437: {'lr': 0.00010925, 'samples': 224256, 'steps': 437, 'loss/train': 7.740363121032715} +03/03/2022 13:56:11 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 13:56:14 - INFO - codeparrot_training - Step 438: {'lr': 0.0001095, 'samples': 224768, 'steps': 438, 'loss/train': 6.6068596839904785} +03/03/2022 13:56:17 - INFO - codeparrot_training - Step 439: {'lr': 0.00010975, 'samples': 225280, 'steps': 439, 'loss/train': 6.751766204833984} +03/03/2022 13:56:20 - INFO - codeparrot_training - Step 440: {'lr': 0.00011, 'samples': 225792, 'steps': 440, 'loss/train': 3.797227621078491} +03/03/2022 13:56:20 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 13:56:25 - INFO - codeparrot_training - Step 441: {'lr': 0.00011025, 'samples': 226304, 'steps': 441, 'loss/train': 6.906583309173584} +03/03/2022 13:56:29 - INFO - codeparrot_training - Step 442: {'lr': 0.0001105, 'samples': 226816, 'steps': 442, 'loss/train': 6.599184036254883} +03/03/2022 13:56:29 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/03/2022 13:56:34 - INFO - codeparrot_training - Step 443: {'lr': 0.00011075000000000001, 'samples': 227328, 'steps': 443, 'loss/train': 6.03370475769043} +03/03/2022 13:56:37 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/03/2022 13:56:39 - INFO - codeparrot_training - Step 444: {'lr': 0.000111, 'samples': 227840, 'steps': 444, 'loss/train': 6.421036243438721} +03/03/2022 13:56:42 - INFO - codeparrot_training - Step 445: {'lr': 0.00011125000000000001, 'samples': 228352, 'steps': 445, 'loss/train': 6.453313827514648} +03/03/2022 13:56:45 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/03/2022 13:56:47 - INFO - codeparrot_training - Step 446: {'lr': 0.0001115, 'samples': 228864, 'steps': 446, 'loss/train': 6.5886664390563965} +03/03/2022 13:56:51 - INFO - codeparrot_training - Step 447: {'lr': 0.00011175, 'samples': 229376, 'steps': 447, 'loss/train': 7.1440253257751465} +03/03/2022 13:56:53 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/03/2022 13:56:56 - INFO - codeparrot_training - Step 448: {'lr': 0.000112, 'samples': 229888, 'steps': 448, 'loss/train': 6.501101016998291} +03/03/2022 13:56:59 - INFO - codeparrot_training - Step 449: {'lr': 0.00011225, 'samples': 230400, 'steps': 449, 'loss/train': 6.9688544273376465} +03/03/2022 13:57:02 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/03/2022 13:57:04 - INFO - codeparrot_training - Step 450: {'lr': 0.00011250000000000001, 'samples': 230912, 'steps': 450, 'loss/train': 7.000016689300537} +03/03/2022 13:57:07 - INFO - codeparrot_training - Step 451: {'lr': 0.00011275, 'samples': 231424, 'steps': 451, 'loss/train': 6.349514961242676} +03/03/2022 13:57:10 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/03/2022 13:57:13 - INFO - codeparrot_training - Step 452: {'lr': 0.00011300000000000001, 'samples': 231936, 'steps': 452, 'loss/train': 6.484461784362793} +03/03/2022 13:57:16 - INFO - codeparrot_training - Step 453: {'lr': 0.00011325, 'samples': 232448, 'steps': 453, 'loss/train': 5.946240425109863} +03/03/2022 13:57:18 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/03/2022 13:57:21 - INFO - codeparrot_training - Step 454: {'lr': 0.00011350000000000001, 'samples': 232960, 'steps': 454, 'loss/train': 6.402734756469727} +03/03/2022 13:57:24 - INFO - codeparrot_training - Step 455: {'lr': 0.00011375, 'samples': 233472, 'steps': 455, 'loss/train': 6.981008052825928} +03/03/2022 13:57:27 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/03/2022 13:57:30 - INFO - codeparrot_training - Step 456: {'lr': 0.000114, 'samples': 233984, 'steps': 456, 'loss/train': 7.203886032104492} +03/03/2022 13:57:33 - INFO - codeparrot_training - Step 457: {'lr': 0.00011425000000000001, 'samples': 234496, 'steps': 457, 'loss/train': 6.602855205535889} +03/03/2022 13:57:35 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/03/2022 13:57:38 - INFO - codeparrot_training - Step 458: {'lr': 0.0001145, 'samples': 235008, 'steps': 458, 'loss/train': 6.232977390289307} +03/03/2022 13:57:41 - INFO - codeparrot_training - Step 459: {'lr': 0.00011475000000000001, 'samples': 235520, 'steps': 459, 'loss/train': 6.966612815856934} +03/03/2022 13:57:43 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/03/2022 13:57:47 - INFO - codeparrot_training - Step 460: {'lr': 0.000115, 'samples': 236032, 'steps': 460, 'loss/train': 6.446167945861816} +03/03/2022 13:57:50 - INFO - codeparrot_training - Step 461: {'lr': 0.00011525000000000001, 'samples': 236544, 'steps': 461, 'loss/train': 6.609718322753906} +03/03/2022 13:57:52 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/03/2022 13:57:55 - INFO - codeparrot_training - Step 462: {'lr': 0.0001155, 'samples': 237056, 'steps': 462, 'loss/train': 6.4335174560546875} +03/03/2022 13:57:58 - INFO - codeparrot_training - Step 463: {'lr': 0.00011575000000000001, 'samples': 237568, 'steps': 463, 'loss/train': 7.3489885330200195} +03/03/2022 13:58:00 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/03/2022 13:58:04 - INFO - codeparrot_training - Step 464: {'lr': 0.00011600000000000001, 'samples': 238080, 'steps': 464, 'loss/train': 6.235637664794922} +03/03/2022 13:58:07 - INFO - codeparrot_training - Step 465: {'lr': 0.00011625, 'samples': 238592, 'steps': 465, 'loss/train': 7.946732044219971} +03/03/2022 13:58:08 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/03/2022 13:58:12 - INFO - codeparrot_training - Step 466: {'lr': 0.00011650000000000001, 'samples': 239104, 'steps': 466, 'loss/train': 6.125898361206055} +03/03/2022 13:58:15 - INFO - codeparrot_training - Step 467: {'lr': 0.00011675, 'samples': 239616, 'steps': 467, 'loss/train': 6.891539573669434} +03/03/2022 13:58:16 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/03/2022 13:58:20 - INFO - codeparrot_training - Step 468: {'lr': 0.00011700000000000001, 'samples': 240128, 'steps': 468, 'loss/train': 5.3264384269714355} +03/03/2022 13:58:24 - INFO - codeparrot_training - Step 469: {'lr': 0.00011724999999999999, 'samples': 240640, 'steps': 469, 'loss/train': 7.1578264236450195} +03/03/2022 13:58:25 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/03/2022 13:58:29 - INFO - codeparrot_training - Step 470: {'lr': 0.0001175, 'samples': 241152, 'steps': 470, 'loss/train': 6.831362724304199} +03/03/2022 13:58:32 - INFO - codeparrot_training - Step 471: {'lr': 0.00011775, 'samples': 241664, 'steps': 471, 'loss/train': 5.362008571624756} +03/03/2022 13:58:34 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/03/2022 13:58:37 - INFO - codeparrot_training - Step 472: {'lr': 0.000118, 'samples': 242176, 'steps': 472, 'loss/train': 6.215170383453369} +03/03/2022 13:58:40 - INFO - codeparrot_training - Step 473: {'lr': 0.00011825, 'samples': 242688, 'steps': 473, 'loss/train': 6.0758161544799805} +03/03/2022 13:58:42 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/03/2022 13:58:46 - INFO - codeparrot_training - Step 474: {'lr': 0.0001185, 'samples': 243200, 'steps': 474, 'loss/train': 6.737450122833252} +03/03/2022 13:58:49 - INFO - codeparrot_training - Step 475: {'lr': 0.00011875, 'samples': 243712, 'steps': 475, 'loss/train': 7.72297477722168} +03/03/2022 13:58:50 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/03/2022 13:58:54 - INFO - codeparrot_training - Step 476: {'lr': 0.00011899999999999999, 'samples': 244224, 'steps': 476, 'loss/train': 6.979861259460449} +03/03/2022 13:58:57 - INFO - codeparrot_training - Step 477: {'lr': 0.00011925, 'samples': 244736, 'steps': 477, 'loss/train': 6.8633880615234375} +03/03/2022 13:58:58 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/03/2022 13:59:02 - INFO - codeparrot_training - Step 478: {'lr': 0.00011949999999999999, 'samples': 245248, 'steps': 478, 'loss/train': 6.24032735824585} +03/03/2022 13:59:06 - INFO - codeparrot_training - Step 479: {'lr': 0.00011975, 'samples': 245760, 'steps': 479, 'loss/train': 7.0863542556762695} +03/03/2022 13:59:07 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/03/2022 13:59:11 - INFO - codeparrot_training - Step 480: {'lr': 0.00012, 'samples': 246272, 'steps': 480, 'loss/train': 6.356906414031982} +03/03/2022 13:59:14 - INFO - codeparrot_training - Step 481: {'lr': 0.00012025, 'samples': 246784, 'steps': 481, 'loss/train': 6.802030563354492} +03/03/2022 13:59:16 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/03/2022 13:59:19 - INFO - codeparrot_training - Step 482: {'lr': 0.0001205, 'samples': 247296, 'steps': 482, 'loss/train': 6.5732340812683105} +03/03/2022 13:59:22 - INFO - codeparrot_training - Step 483: {'lr': 0.00012075, 'samples': 247808, 'steps': 483, 'loss/train': 6.947469234466553} +03/03/2022 13:59:24 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/03/2022 13:59:28 - INFO - codeparrot_training - Step 484: {'lr': 0.000121, 'samples': 248320, 'steps': 484, 'loss/train': 6.745939254760742} +03/03/2022 13:59:31 - INFO - codeparrot_training - Step 485: {'lr': 0.00012124999999999999, 'samples': 248832, 'steps': 485, 'loss/train': 6.2873687744140625} +03/03/2022 13:59:33 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/03/2022 13:59:36 - INFO - codeparrot_training - Step 486: {'lr': 0.0001215, 'samples': 249344, 'steps': 486, 'loss/train': 6.63435697555542} +03/03/2022 13:59:40 - INFO - codeparrot_training - Step 487: {'lr': 0.00012175, 'samples': 249856, 'steps': 487, 'loss/train': 6.722681999206543} +03/03/2022 13:59:42 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 13:59:45 - INFO - codeparrot_training - Step 488: {'lr': 0.000122, 'samples': 250368, 'steps': 488, 'loss/train': 6.130354404449463} +03/03/2022 13:59:48 - INFO - codeparrot_training - Step 489: {'lr': 0.00012225, 'samples': 250880, 'steps': 489, 'loss/train': 6.676987171173096} +03/03/2022 13:59:50 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/03/2022 13:59:54 - INFO - codeparrot_training - Step 490: {'lr': 0.0001225, 'samples': 251392, 'steps': 490, 'loss/train': 6.809943675994873} +03/03/2022 13:59:57 - INFO - codeparrot_training - Step 491: {'lr': 0.00012275, 'samples': 251904, 'steps': 491, 'loss/train': 6.272919178009033} +03/03/2022 14:00:00 - INFO - codeparrot_training - Step 492: {'lr': 0.000123, 'samples': 252416, 'steps': 492, 'loss/train': 6.649359226226807} +03/03/2022 14:00:00 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/03/2022 14:00:05 - INFO - codeparrot_training - Step 493: {'lr': 0.00012325000000000001, 'samples': 252928, 'steps': 493, 'loss/train': 6.54364538192749} +03/03/2022 14:00:08 - INFO - codeparrot_training - Step 494: {'lr': 0.0001235, 'samples': 253440, 'steps': 494, 'loss/train': 6.606354713439941} +03/03/2022 14:00:08 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/03/2022 14:00:14 - INFO - codeparrot_training - Step 495: {'lr': 0.00012375, 'samples': 253952, 'steps': 495, 'loss/train': 6.029935836791992} +03/03/2022 14:00:17 - INFO - codeparrot_training - Step 496: {'lr': 0.000124, 'samples': 254464, 'steps': 496, 'loss/train': 6.439375877380371} +03/03/2022 14:00:17 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/03/2022 14:00:22 - INFO - codeparrot_training - Step 497: {'lr': 0.00012425, 'samples': 254976, 'steps': 497, 'loss/train': 6.189598083496094} +03/03/2022 14:00:25 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/03/2022 14:00:27 - INFO - codeparrot_training - Step 498: {'lr': 0.0001245, 'samples': 255488, 'steps': 498, 'loss/train': 6.953388214111328} +03/03/2022 14:00:31 - INFO - codeparrot_training - Step 499: {'lr': 0.00012475, 'samples': 256000, 'steps': 499, 'loss/train': 6.3105854988098145} +03/03/2022 14:00:33 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/03/2022 14:00:36 - INFO - codeparrot_training - Step 500: {'lr': 0.000125, 'samples': 256512, 'steps': 500, 'loss/train': 7.2298760414123535} +03/03/2022 14:00:39 - INFO - codeparrot_training - Step 501: {'lr': 0.00012525, 'samples': 257024, 'steps': 501, 'loss/train': 5.246858596801758} +03/03/2022 14:00:41 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/03/2022 14:00:44 - INFO - codeparrot_training - Step 502: {'lr': 0.00012550000000000001, 'samples': 257536, 'steps': 502, 'loss/train': 6.187530517578125} +03/03/2022 14:00:47 - INFO - codeparrot_training - Step 503: {'lr': 0.00012575, 'samples': 258048, 'steps': 503, 'loss/train': 3.552276611328125} +03/03/2022 14:00:49 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 14:00:52 - INFO - codeparrot_training - Step 504: {'lr': 0.000126, 'samples': 258560, 'steps': 504, 'loss/train': 7.031077861785889} +03/03/2022 14:00:56 - INFO - codeparrot_training - Step 505: {'lr': 0.00012625, 'samples': 259072, 'steps': 505, 'loss/train': 6.204738616943359} +03/03/2022 14:00:58 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/03/2022 14:01:01 - INFO - codeparrot_training - Step 506: {'lr': 0.0001265, 'samples': 259584, 'steps': 506, 'loss/train': 7.075988292694092} +03/03/2022 14:01:04 - INFO - codeparrot_training - Step 507: {'lr': 0.00012675, 'samples': 260096, 'steps': 507, 'loss/train': 6.6335272789001465} +03/03/2022 14:01:06 - INFO - codeparrot_training - Skipping example with length 440 (seq_length=1024) +03/03/2022 14:01:09 - INFO - codeparrot_training - Step 508: {'lr': 0.000127, 'samples': 260608, 'steps': 508, 'loss/train': 6.588409423828125} +03/03/2022 14:01:12 - INFO - codeparrot_training - Step 509: {'lr': 0.00012725, 'samples': 261120, 'steps': 509, 'loss/train': 7.296545028686523} +03/03/2022 14:01:14 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/03/2022 14:01:18 - INFO - codeparrot_training - Step 510: {'lr': 0.0001275, 'samples': 261632, 'steps': 510, 'loss/train': 7.028717517852783} +03/03/2022 14:01:21 - INFO - codeparrot_training - Step 511: {'lr': 0.00012775000000000002, 'samples': 262144, 'steps': 511, 'loss/train': 5.841274738311768} +03/03/2022 14:01:22 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/03/2022 14:01:26 - INFO - codeparrot_training - Step 512: {'lr': 0.000128, 'samples': 262656, 'steps': 512, 'loss/train': 6.456965923309326} +03/03/2022 14:01:29 - INFO - codeparrot_training - Step 513: {'lr': 0.00012825, 'samples': 263168, 'steps': 513, 'loss/train': 6.273007392883301} +03/03/2022 14:01:35 - INFO - codeparrot_training - Step 514: {'lr': 0.0001285, 'samples': 263680, 'steps': 514, 'loss/train': 4.0798211097717285} +03/03/2022 14:01:38 - INFO - codeparrot_training - Step 515: {'lr': 0.00012875, 'samples': 264192, 'steps': 515, 'loss/train': 6.905279159545898} +03/03/2022 14:01:40 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/03/2022 14:01:43 - INFO - codeparrot_training - Step 516: {'lr': 0.00012900000000000002, 'samples': 264704, 'steps': 516, 'loss/train': 6.346916675567627} +03/03/2022 14:01:46 - INFO - codeparrot_training - Step 517: {'lr': 0.00012925, 'samples': 265216, 'steps': 517, 'loss/train': 5.993963718414307} +03/03/2022 14:01:48 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/03/2022 14:01:51 - INFO - codeparrot_training - Step 518: {'lr': 0.0001295, 'samples': 265728, 'steps': 518, 'loss/train': 8.08281135559082} +03/03/2022 14:01:55 - INFO - codeparrot_training - Step 519: {'lr': 0.00012975, 'samples': 266240, 'steps': 519, 'loss/train': 5.490631580352783} +03/03/2022 14:01:56 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/03/2022 14:02:00 - INFO - codeparrot_training - Step 520: {'lr': 0.00013000000000000002, 'samples': 266752, 'steps': 520, 'loss/train': 6.573332786560059} +03/03/2022 14:02:03 - INFO - codeparrot_training - Step 521: {'lr': 0.00013025, 'samples': 267264, 'steps': 521, 'loss/train': 6.53049898147583} +03/03/2022 14:02:04 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/03/2022 14:02:08 - INFO - codeparrot_training - Step 522: {'lr': 0.0001305, 'samples': 267776, 'steps': 522, 'loss/train': 6.454466819763184} +03/03/2022 14:02:11 - INFO - codeparrot_training - Step 523: {'lr': 0.00013075, 'samples': 268288, 'steps': 523, 'loss/train': 6.2750749588012695} +03/03/2022 14:02:12 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/03/2022 14:02:17 - INFO - codeparrot_training - Step 524: {'lr': 0.000131, 'samples': 268800, 'steps': 524, 'loss/train': 6.3301262855529785} +03/03/2022 14:02:20 - INFO - codeparrot_training - Step 525: {'lr': 0.00013125000000000002, 'samples': 269312, 'steps': 525, 'loss/train': 6.276494979858398} +03/03/2022 14:02:21 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/03/2022 14:02:25 - INFO - codeparrot_training - Step 526: {'lr': 0.0001315, 'samples': 269824, 'steps': 526, 'loss/train': 6.3546600341796875} +03/03/2022 14:02:28 - INFO - codeparrot_training - Step 527: {'lr': 0.00013175, 'samples': 270336, 'steps': 527, 'loss/train': 6.923059463500977} +03/03/2022 14:02:29 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/03/2022 14:02:33 - INFO - codeparrot_training - Step 528: {'lr': 0.000132, 'samples': 270848, 'steps': 528, 'loss/train': 7.054484844207764} +03/03/2022 14:02:37 - INFO - codeparrot_training - Step 529: {'lr': 0.00013225000000000002, 'samples': 271360, 'steps': 529, 'loss/train': 6.102305889129639} +03/03/2022 14:02:37 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/03/2022 14:02:42 - INFO - codeparrot_training - Step 530: {'lr': 0.00013250000000000002, 'samples': 271872, 'steps': 530, 'loss/train': 6.931844711303711} +03/03/2022 14:02:45 - INFO - codeparrot_training - Step 531: {'lr': 0.00013275, 'samples': 272384, 'steps': 531, 'loss/train': 6.86146879196167} +03/03/2022 14:02:46 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/03/2022 14:02:50 - INFO - codeparrot_training - Step 532: {'lr': 0.000133, 'samples': 272896, 'steps': 532, 'loss/train': 6.249301910400391} +03/03/2022 14:02:53 - INFO - codeparrot_training - Step 533: {'lr': 0.00013325, 'samples': 273408, 'steps': 533, 'loss/train': 5.98560094833374} +03/03/2022 14:02:54 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/03/2022 14:02:59 - INFO - codeparrot_training - Step 534: {'lr': 0.00013350000000000002, 'samples': 273920, 'steps': 534, 'loss/train': 5.897489547729492} +03/03/2022 14:03:02 - INFO - codeparrot_training - Step 535: {'lr': 0.00013375, 'samples': 274432, 'steps': 535, 'loss/train': 5.953618049621582} +03/03/2022 14:03:02 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/03/2022 14:03:07 - INFO - codeparrot_training - Step 536: {'lr': 0.000134, 'samples': 274944, 'steps': 536, 'loss/train': 5.412363052368164} +03/03/2022 14:03:11 - INFO - codeparrot_training - Step 537: {'lr': 0.00013425, 'samples': 275456, 'steps': 537, 'loss/train': 6.587615013122559} +03/03/2022 14:03:11 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/03/2022 14:03:16 - INFO - codeparrot_training - Step 538: {'lr': 0.00013450000000000002, 'samples': 275968, 'steps': 538, 'loss/train': 7.666083812713623} +03/03/2022 14:03:19 - INFO - codeparrot_training - Step 539: {'lr': 0.00013475000000000002, 'samples': 276480, 'steps': 539, 'loss/train': 6.328266620635986} +03/03/2022 14:03:19 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/03/2022 14:03:24 - INFO - codeparrot_training - Step 540: {'lr': 0.000135, 'samples': 276992, 'steps': 540, 'loss/train': 6.373903274536133} +03/03/2022 14:03:27 - INFO - codeparrot_training - Step 541: {'lr': 0.00013525, 'samples': 277504, 'steps': 541, 'loss/train': 7.26821756362915} +03/03/2022 14:03:28 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/03/2022 14:03:33 - INFO - codeparrot_training - Step 542: {'lr': 0.00013550000000000001, 'samples': 278016, 'steps': 542, 'loss/train': 6.584914207458496} +03/03/2022 14:03:36 - INFO - codeparrot_training - Step 543: {'lr': 0.00013575000000000002, 'samples': 278528, 'steps': 543, 'loss/train': 5.7850165367126465} +03/03/2022 14:03:36 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/03/2022 14:03:41 - INFO - codeparrot_training - Step 544: {'lr': 0.00013600000000000003, 'samples': 279040, 'steps': 544, 'loss/train': 6.242919921875} +03/03/2022 14:03:44 - INFO - codeparrot_training - Step 545: {'lr': 0.00013625, 'samples': 279552, 'steps': 545, 'loss/train': 5.564044952392578} +03/03/2022 14:03:44 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/03/2022 14:03:49 - INFO - codeparrot_training - Step 546: {'lr': 0.0001365, 'samples': 280064, 'steps': 546, 'loss/train': 6.581683158874512} +03/03/2022 14:03:52 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/03/2022 14:03:55 - INFO - codeparrot_training - Step 547: {'lr': 0.00013675000000000002, 'samples': 280576, 'steps': 547, 'loss/train': 5.945650100708008} +03/03/2022 14:03:58 - INFO - codeparrot_training - Step 548: {'lr': 0.00013700000000000002, 'samples': 281088, 'steps': 548, 'loss/train': 6.196447372436523} +03/03/2022 14:04:01 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/03/2022 14:04:03 - INFO - codeparrot_training - Step 549: {'lr': 0.00013725, 'samples': 281600, 'steps': 549, 'loss/train': 6.543551921844482} +03/03/2022 14:04:06 - INFO - codeparrot_training - Step 550: {'lr': 0.0001375, 'samples': 282112, 'steps': 550, 'loss/train': 6.2957539558410645} +03/03/2022 14:04:09 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/03/2022 14:04:11 - INFO - codeparrot_training - Step 551: {'lr': 0.00013775000000000001, 'samples': 282624, 'steps': 551, 'loss/train': 6.463682651519775} +03/03/2022 14:04:15 - INFO - codeparrot_training - Step 552: {'lr': 0.00013800000000000002, 'samples': 283136, 'steps': 552, 'loss/train': 6.8313374519348145} +03/03/2022 14:04:17 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/03/2022 14:04:20 - INFO - codeparrot_training - Step 553: {'lr': 0.00013825000000000003, 'samples': 283648, 'steps': 553, 'loss/train': 7.100462913513184} +03/03/2022 14:04:23 - INFO - codeparrot_training - Step 554: {'lr': 0.0001385, 'samples': 284160, 'steps': 554, 'loss/train': 6.060208797454834} +03/03/2022 14:04:26 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/03/2022 14:04:28 - INFO - codeparrot_training - Step 555: {'lr': 0.00013875, 'samples': 284672, 'steps': 555, 'loss/train': 6.4385175704956055} +03/03/2022 14:04:32 - INFO - codeparrot_training - Step 556: {'lr': 0.00013900000000000002, 'samples': 285184, 'steps': 556, 'loss/train': 6.447488307952881} +03/03/2022 14:04:34 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 14:04:37 - INFO - codeparrot_training - Step 557: {'lr': 0.00013925000000000002, 'samples': 285696, 'steps': 557, 'loss/train': 6.174968719482422} +03/03/2022 14:04:40 - INFO - codeparrot_training - Step 558: {'lr': 0.0001395, 'samples': 286208, 'steps': 558, 'loss/train': 6.187171459197998} +03/03/2022 14:04:43 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/03/2022 14:04:46 - INFO - codeparrot_training - Step 559: {'lr': 0.00013975, 'samples': 286720, 'steps': 559, 'loss/train': 5.922861099243164} +03/03/2022 14:04:49 - INFO - codeparrot_training - Step 560: {'lr': 0.00014000000000000001, 'samples': 287232, 'steps': 560, 'loss/train': 6.676205158233643} +03/03/2022 14:04:52 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/03/2022 14:04:54 - INFO - codeparrot_training - Step 561: {'lr': 0.00014025000000000002, 'samples': 287744, 'steps': 561, 'loss/train': 6.076144218444824} +03/03/2022 14:04:57 - INFO - codeparrot_training - Step 562: {'lr': 0.00014050000000000003, 'samples': 288256, 'steps': 562, 'loss/train': 6.192407131195068} +03/03/2022 14:05:00 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 14:05:03 - INFO - codeparrot_training - Step 563: {'lr': 0.00014074999999999998, 'samples': 288768, 'steps': 563, 'loss/train': 5.674139976501465} +03/03/2022 14:05:06 - INFO - codeparrot_training - Step 564: {'lr': 0.00014099999999999998, 'samples': 289280, 'steps': 564, 'loss/train': 6.117628574371338} +03/03/2022 14:05:08 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/03/2022 14:05:11 - INFO - codeparrot_training - Step 565: {'lr': 0.00014125, 'samples': 289792, 'steps': 565, 'loss/train': 6.994811058044434} +03/03/2022 14:05:14 - INFO - codeparrot_training - Step 566: {'lr': 0.0001415, 'samples': 290304, 'steps': 566, 'loss/train': 6.521088123321533} +03/03/2022 14:05:17 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 14:05:19 - INFO - codeparrot_training - Step 567: {'lr': 0.00014175, 'samples': 290816, 'steps': 567, 'loss/train': 6.583520412445068} +03/03/2022 14:05:23 - INFO - codeparrot_training - Step 568: {'lr': 0.00014199999999999998, 'samples': 291328, 'steps': 568, 'loss/train': 5.825527191162109} +03/03/2022 14:05:25 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/03/2022 14:05:28 - INFO - codeparrot_training - Step 569: {'lr': 0.00014225, 'samples': 291840, 'steps': 569, 'loss/train': 6.117654323577881} +03/03/2022 14:05:31 - INFO - codeparrot_training - Step 570: {'lr': 0.0001425, 'samples': 292352, 'steps': 570, 'loss/train': 6.853424072265625} +03/03/2022 14:05:33 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/03/2022 14:05:36 - INFO - codeparrot_training - Step 571: {'lr': 0.00014275, 'samples': 292864, 'steps': 571, 'loss/train': 6.277547359466553} +03/03/2022 14:05:39 - INFO - codeparrot_training - Step 572: {'lr': 0.00014299999999999998, 'samples': 293376, 'steps': 572, 'loss/train': 8.810072898864746} +03/03/2022 14:05:41 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/03/2022 14:05:45 - INFO - codeparrot_training - Step 573: {'lr': 0.00014324999999999999, 'samples': 293888, 'steps': 573, 'loss/train': 5.866996765136719} +03/03/2022 14:05:48 - INFO - codeparrot_training - Step 574: {'lr': 0.0001435, 'samples': 294400, 'steps': 574, 'loss/train': 6.329942226409912} +03/03/2022 14:05:49 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/03/2022 14:05:53 - INFO - codeparrot_training - Step 575: {'lr': 0.00014375, 'samples': 294912, 'steps': 575, 'loss/train': 6.773353099822998} +03/03/2022 14:05:56 - INFO - codeparrot_training - Step 576: {'lr': 0.000144, 'samples': 295424, 'steps': 576, 'loss/train': 7.22576904296875} +03/03/2022 14:05:57 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/03/2022 14:06:01 - INFO - codeparrot_training - Step 577: {'lr': 0.00014424999999999998, 'samples': 295936, 'steps': 577, 'loss/train': 5.495701789855957} +03/03/2022 14:06:05 - INFO - codeparrot_training - Step 578: {'lr': 0.0001445, 'samples': 296448, 'steps': 578, 'loss/train': 6.104785442352295} +03/03/2022 14:06:05 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/03/2022 14:06:10 - INFO - codeparrot_training - Step 579: {'lr': 0.00014475, 'samples': 296960, 'steps': 579, 'loss/train': 6.159816741943359} +03/03/2022 14:06:13 - INFO - codeparrot_training - Step 580: {'lr': 0.000145, 'samples': 297472, 'steps': 580, 'loss/train': 7.078535556793213} +03/03/2022 14:06:14 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 14:06:18 - INFO - codeparrot_training - Step 581: {'lr': 0.00014524999999999998, 'samples': 297984, 'steps': 581, 'loss/train': 5.665816307067871} +03/03/2022 14:06:21 - INFO - codeparrot_training - Step 582: {'lr': 0.00014549999999999999, 'samples': 298496, 'steps': 582, 'loss/train': 6.4277167320251465} +03/03/2022 14:06:23 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/03/2022 14:06:27 - INFO - codeparrot_training - Step 583: {'lr': 0.00014575, 'samples': 299008, 'steps': 583, 'loss/train': 6.36889123916626} +03/03/2022 14:06:30 - INFO - codeparrot_training - Step 584: {'lr': 0.000146, 'samples': 299520, 'steps': 584, 'loss/train': 6.408004283905029} +03/03/2022 14:06:31 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/03/2022 14:06:35 - INFO - codeparrot_training - Step 585: {'lr': 0.00014625, 'samples': 300032, 'steps': 585, 'loss/train': 6.266024112701416} +03/03/2022 14:06:38 - INFO - codeparrot_training - Step 586: {'lr': 0.00014649999999999998, 'samples': 300544, 'steps': 586, 'loss/train': 6.211362838745117} +03/03/2022 14:06:39 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 14:06:43 - INFO - codeparrot_training - Step 587: {'lr': 0.00014675, 'samples': 301056, 'steps': 587, 'loss/train': 5.916329860687256} +03/03/2022 14:06:47 - INFO - codeparrot_training - Step 588: {'lr': 0.000147, 'samples': 301568, 'steps': 588, 'loss/train': 6.630198955535889} +03/03/2022 14:06:48 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/03/2022 14:06:52 - INFO - codeparrot_training - Step 589: {'lr': 0.00014725, 'samples': 302080, 'steps': 589, 'loss/train': 6.489871025085449} +03/03/2022 14:06:55 - INFO - codeparrot_training - Step 590: {'lr': 0.0001475, 'samples': 302592, 'steps': 590, 'loss/train': 6.236913681030273} +03/03/2022 14:06:56 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 14:07:00 - INFO - codeparrot_training - Step 591: {'lr': 0.00014774999999999999, 'samples': 303104, 'steps': 591, 'loss/train': 7.031076431274414} +03/03/2022 14:07:03 - INFO - codeparrot_training - Step 592: {'lr': 0.000148, 'samples': 303616, 'steps': 592, 'loss/train': 6.221190452575684} +03/03/2022 14:07:04 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/03/2022 14:07:09 - INFO - codeparrot_training - Step 593: {'lr': 0.00014825, 'samples': 304128, 'steps': 593, 'loss/train': 2.855438232421875} +03/03/2022 14:07:12 - INFO - codeparrot_training - Step 594: {'lr': 0.0001485, 'samples': 304640, 'steps': 594, 'loss/train': 5.569334983825684} +03/03/2022 14:07:13 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/03/2022 14:07:17 - INFO - codeparrot_training - Step 595: {'lr': 0.00014874999999999998, 'samples': 305152, 'steps': 595, 'loss/train': 5.709835052490234} +03/03/2022 14:07:20 - INFO - codeparrot_training - Step 596: {'lr': 0.000149, 'samples': 305664, 'steps': 596, 'loss/train': 5.010371685028076} +03/03/2022 14:07:21 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/03/2022 14:07:26 - INFO - codeparrot_training - Step 597: {'lr': 0.00014925, 'samples': 306176, 'steps': 597, 'loss/train': 6.1854729652404785} +03/03/2022 14:07:29 - INFO - codeparrot_training - Step 598: {'lr': 0.0001495, 'samples': 306688, 'steps': 598, 'loss/train': 5.076950550079346} +03/03/2022 14:07:29 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/03/2022 14:07:34 - INFO - codeparrot_training - Step 599: {'lr': 0.00014975, 'samples': 307200, 'steps': 599, 'loss/train': 6.055659294128418} +03/03/2022 14:07:37 - INFO - codeparrot_training - Step 600: {'lr': 0.00015, 'samples': 307712, 'steps': 600, 'loss/train': 6.123353481292725} +03/03/2022 14:07:38 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 14:07:42 - INFO - codeparrot_training - Step 601: {'lr': 0.00015025, 'samples': 308224, 'steps': 601, 'loss/train': 5.649929523468018} +03/03/2022 14:07:46 - INFO - codeparrot_training - Step 602: {'lr': 0.0001505, 'samples': 308736, 'steps': 602, 'loss/train': 6.700413227081299} +03/03/2022 14:07:46 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/03/2022 14:07:51 - INFO - codeparrot_training - Step 603: {'lr': 0.00015075, 'samples': 309248, 'steps': 603, 'loss/train': 6.63961124420166} +03/03/2022 14:07:54 - INFO - codeparrot_training - Step 604: {'lr': 0.000151, 'samples': 309760, 'steps': 604, 'loss/train': 6.31554651260376} +03/03/2022 14:07:55 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/03/2022 14:07:59 - INFO - codeparrot_training - Step 605: {'lr': 0.00015125, 'samples': 310272, 'steps': 605, 'loss/train': 3.064589738845825} +03/03/2022 14:08:03 - INFO - codeparrot_training - Step 606: {'lr': 0.0001515, 'samples': 310784, 'steps': 606, 'loss/train': 6.4073166847229} +03/03/2022 14:08:03 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/03/2022 14:08:08 - INFO - codeparrot_training - Step 607: {'lr': 0.00015175, 'samples': 311296, 'steps': 607, 'loss/train': 6.360343933105469} +03/03/2022 14:08:11 - INFO - codeparrot_training - Step 608: {'lr': 0.000152, 'samples': 311808, 'steps': 608, 'loss/train': 6.431277275085449} +03/03/2022 14:08:11 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/03/2022 14:08:16 - INFO - codeparrot_training - Step 609: {'lr': 0.00015225, 'samples': 312320, 'steps': 609, 'loss/train': 7.12800931930542} +03/03/2022 14:08:19 - INFO - codeparrot_training - Step 610: {'lr': 0.0001525, 'samples': 312832, 'steps': 610, 'loss/train': 7.39421272277832} +03/03/2022 14:08:20 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/03/2022 14:08:25 - INFO - codeparrot_training - Step 611: {'lr': 0.00015275, 'samples': 313344, 'steps': 611, 'loss/train': 6.497633934020996} +03/03/2022 14:08:28 - INFO - codeparrot_training - Step 612: {'lr': 0.000153, 'samples': 313856, 'steps': 612, 'loss/train': 3.2339026927948} +03/03/2022 14:08:28 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/03/2022 14:08:33 - INFO - codeparrot_training - Step 613: {'lr': 0.00015325, 'samples': 314368, 'steps': 613, 'loss/train': 5.780489444732666} +03/03/2022 14:08:36 - INFO - codeparrot_training - Step 614: {'lr': 0.0001535, 'samples': 314880, 'steps': 614, 'loss/train': 6.6113176345825195} +03/03/2022 14:08:36 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/03/2022 14:08:42 - INFO - codeparrot_training - Step 615: {'lr': 0.00015375, 'samples': 315392, 'steps': 615, 'loss/train': 5.941757678985596} +03/03/2022 14:08:45 - INFO - codeparrot_training - Step 616: {'lr': 0.000154, 'samples': 315904, 'steps': 616, 'loss/train': 5.896623611450195} +03/03/2022 14:08:47 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/03/2022 14:08:51 - INFO - codeparrot_training - Step 617: {'lr': 0.00015425, 'samples': 316416, 'steps': 617, 'loss/train': 6.162133693695068} +03/03/2022 14:08:54 - INFO - codeparrot_training - Step 618: {'lr': 0.00015450000000000001, 'samples': 316928, 'steps': 618, 'loss/train': 6.68670129776001} +03/03/2022 14:08:55 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/03/2022 14:08:59 - INFO - codeparrot_training - Step 619: {'lr': 0.00015475, 'samples': 317440, 'steps': 619, 'loss/train': 5.5909423828125} +03/03/2022 14:09:02 - INFO - codeparrot_training - Step 620: {'lr': 0.000155, 'samples': 317952, 'steps': 620, 'loss/train': 5.869774341583252} +03/03/2022 14:09:03 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/03/2022 14:09:07 - INFO - codeparrot_training - Step 621: {'lr': 0.00015525, 'samples': 318464, 'steps': 621, 'loss/train': 5.421020984649658} +03/03/2022 14:09:11 - INFO - codeparrot_training - Step 622: {'lr': 0.0001555, 'samples': 318976, 'steps': 622, 'loss/train': 6.940159797668457} +03/03/2022 14:09:11 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/03/2022 14:09:16 - INFO - codeparrot_training - Step 623: {'lr': 0.00015575, 'samples': 319488, 'steps': 623, 'loss/train': 6.464077949523926} +03/03/2022 14:09:19 - INFO - codeparrot_training - Step 624: {'lr': 0.000156, 'samples': 320000, 'steps': 624, 'loss/train': 6.078533172607422} +03/03/2022 14:09:20 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/03/2022 14:09:24 - INFO - codeparrot_training - Step 625: {'lr': 0.00015625, 'samples': 320512, 'steps': 625, 'loss/train': 6.872628211975098} +03/03/2022 14:09:27 - INFO - codeparrot_training - Step 626: {'lr': 0.0001565, 'samples': 321024, 'steps': 626, 'loss/train': 5.479709148406982} +03/03/2022 14:09:28 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/03/2022 14:09:33 - INFO - codeparrot_training - Step 627: {'lr': 0.00015675000000000002, 'samples': 321536, 'steps': 627, 'loss/train': 5.674254417419434} +03/03/2022 14:09:36 - INFO - codeparrot_training - Step 628: {'lr': 0.000157, 'samples': 322048, 'steps': 628, 'loss/train': 6.159651756286621} +03/03/2022 14:09:36 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/03/2022 14:09:41 - INFO - codeparrot_training - Step 629: {'lr': 0.00015725, 'samples': 322560, 'steps': 629, 'loss/train': 7.308654308319092} +03/03/2022 14:09:44 - INFO - codeparrot_training - Step 630: {'lr': 0.0001575, 'samples': 323072, 'steps': 630, 'loss/train': 6.169018745422363} +03/03/2022 14:09:44 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/03/2022 14:09:49 - INFO - codeparrot_training - Step 631: {'lr': 0.00015775, 'samples': 323584, 'steps': 631, 'loss/train': 5.524197578430176} +03/03/2022 14:09:53 - INFO - codeparrot_training - Step 632: {'lr': 0.000158, 'samples': 324096, 'steps': 632, 'loss/train': 5.829387187957764} +03/03/2022 14:09:53 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/03/2022 14:09:58 - INFO - codeparrot_training - Step 633: {'lr': 0.00015825, 'samples': 324608, 'steps': 633, 'loss/train': 6.475989818572998} +03/03/2022 14:10:01 - INFO - codeparrot_training - Step 634: {'lr': 0.0001585, 'samples': 325120, 'steps': 634, 'loss/train': 6.106311798095703} +03/03/2022 14:10:01 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/03/2022 14:10:06 - INFO - codeparrot_training - Step 635: {'lr': 0.00015875, 'samples': 325632, 'steps': 635, 'loss/train': 7.287618160247803} +03/03/2022 14:10:09 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/03/2022 14:10:12 - INFO - codeparrot_training - Step 636: {'lr': 0.00015900000000000002, 'samples': 326144, 'steps': 636, 'loss/train': 6.331869125366211} +03/03/2022 14:10:15 - INFO - codeparrot_training - Step 637: {'lr': 0.00015925, 'samples': 326656, 'steps': 637, 'loss/train': 5.934976577758789} +03/03/2022 14:10:18 - INFO - codeparrot_training - Step 638: {'lr': 0.0001595, 'samples': 327168, 'steps': 638, 'loss/train': 6.335379600524902} +03/03/2022 14:10:18 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/03/2022 14:10:23 - INFO - codeparrot_training - Step 639: {'lr': 0.00015975, 'samples': 327680, 'steps': 639, 'loss/train': 6.426903247833252} +03/03/2022 14:10:26 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/03/2022 14:10:28 - INFO - codeparrot_training - Step 640: {'lr': 0.00016, 'samples': 328192, 'steps': 640, 'loss/train': 5.964064598083496} +03/03/2022 14:10:32 - INFO - codeparrot_training - Step 641: {'lr': 0.00016025000000000002, 'samples': 328704, 'steps': 641, 'loss/train': 6.006753444671631} +03/03/2022 14:10:35 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/03/2022 14:10:37 - INFO - codeparrot_training - Step 642: {'lr': 0.0001605, 'samples': 329216, 'steps': 642, 'loss/train': 6.466495513916016} +03/03/2022 14:10:40 - INFO - codeparrot_training - Step 643: {'lr': 0.00016075, 'samples': 329728, 'steps': 643, 'loss/train': 5.993957996368408} +03/03/2022 14:10:43 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/03/2022 14:10:45 - INFO - codeparrot_training - Step 644: {'lr': 0.000161, 'samples': 330240, 'steps': 644, 'loss/train': 5.594142913818359} +03/03/2022 14:10:48 - INFO - codeparrot_training - Step 645: {'lr': 0.00016125000000000002, 'samples': 330752, 'steps': 645, 'loss/train': 6.494534492492676} +03/03/2022 14:10:51 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/03/2022 14:10:54 - INFO - codeparrot_training - Step 646: {'lr': 0.0001615, 'samples': 331264, 'steps': 646, 'loss/train': 6.0859150886535645} +03/03/2022 14:10:57 - INFO - codeparrot_training - Step 647: {'lr': 0.00016175, 'samples': 331776, 'steps': 647, 'loss/train': 5.9677557945251465} +03/03/2022 14:10:59 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/03/2022 14:11:02 - INFO - codeparrot_training - Step 648: {'lr': 0.000162, 'samples': 332288, 'steps': 648, 'loss/train': 6.707947254180908} +03/03/2022 14:11:05 - INFO - codeparrot_training - Step 649: {'lr': 0.00016225000000000001, 'samples': 332800, 'steps': 649, 'loss/train': 6.108962535858154} +03/03/2022 14:11:07 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/03/2022 14:11:10 - INFO - codeparrot_training - Step 650: {'lr': 0.00016250000000000002, 'samples': 333312, 'steps': 650, 'loss/train': 6.814627170562744} +03/03/2022 14:11:14 - INFO - codeparrot_training - Step 651: {'lr': 0.00016275, 'samples': 333824, 'steps': 651, 'loss/train': 5.893065452575684} +03/03/2022 14:11:15 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/03/2022 14:11:19 - INFO - codeparrot_training - Step 652: {'lr': 0.000163, 'samples': 334336, 'steps': 652, 'loss/train': 6.324979305267334} +03/03/2022 14:11:22 - INFO - codeparrot_training - Step 653: {'lr': 0.00016325, 'samples': 334848, 'steps': 653, 'loss/train': 6.214859485626221} +03/03/2022 14:11:24 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/03/2022 14:11:28 - INFO - codeparrot_training - Step 654: {'lr': 0.00016350000000000002, 'samples': 335360, 'steps': 654, 'loss/train': 3.775311231613159} +03/03/2022 14:11:31 - INFO - codeparrot_training - Step 655: {'lr': 0.00016375000000000002, 'samples': 335872, 'steps': 655, 'loss/train': 6.337494850158691} +03/03/2022 14:11:34 - INFO - codeparrot_training - Step 656: {'lr': 0.000164, 'samples': 336384, 'steps': 656, 'loss/train': 5.7109694480896} +03/03/2022 14:11:34 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/03/2022 14:11:40 - INFO - codeparrot_training - Step 657: {'lr': 0.00016425, 'samples': 336896, 'steps': 657, 'loss/train': 6.214146137237549} +03/03/2022 14:11:43 - INFO - codeparrot_training - Step 658: {'lr': 0.00016450000000000001, 'samples': 337408, 'steps': 658, 'loss/train': 5.56549072265625} +03/03/2022 14:11:43 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/03/2022 14:11:48 - INFO - codeparrot_training - Step 659: {'lr': 0.00016475000000000002, 'samples': 337920, 'steps': 659, 'loss/train': 6.0047454833984375} +03/03/2022 14:11:51 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 14:11:53 - INFO - codeparrot_training - Step 660: {'lr': 0.000165, 'samples': 338432, 'steps': 660, 'loss/train': 6.162009239196777} +03/03/2022 14:11:56 - INFO - codeparrot_training - Step 661: {'lr': 0.00016525, 'samples': 338944, 'steps': 661, 'loss/train': 6.934346675872803} +03/03/2022 14:11:59 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/03/2022 14:12:01 - INFO - codeparrot_training - Step 662: {'lr': 0.0001655, 'samples': 339456, 'steps': 662, 'loss/train': 5.5325775146484375} +03/03/2022 14:12:05 - INFO - codeparrot_training - Step 663: {'lr': 0.00016575000000000002, 'samples': 339968, 'steps': 663, 'loss/train': 5.783498287200928} +03/03/2022 14:12:07 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 14:12:10 - INFO - codeparrot_training - Step 664: {'lr': 0.00016600000000000002, 'samples': 340480, 'steps': 664, 'loss/train': 5.984687805175781} +03/03/2022 14:12:13 - INFO - codeparrot_training - Step 665: {'lr': 0.00016625, 'samples': 340992, 'steps': 665, 'loss/train': 6.424323558807373} +03/03/2022 14:12:16 - INFO - codeparrot_training - Step 666: {'lr': 0.0001665, 'samples': 341504, 'steps': 666, 'loss/train': 5.984019756317139} +03/03/2022 14:12:16 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/03/2022 14:12:22 - INFO - codeparrot_training - Step 667: {'lr': 0.00016675000000000001, 'samples': 342016, 'steps': 667, 'loss/train': 5.237941265106201} +03/03/2022 14:12:24 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/03/2022 14:12:27 - INFO - codeparrot_training - Step 668: {'lr': 0.00016700000000000002, 'samples': 342528, 'steps': 668, 'loss/train': 5.912707805633545} +03/03/2022 14:12:30 - INFO - codeparrot_training - Step 669: {'lr': 0.00016725000000000003, 'samples': 343040, 'steps': 669, 'loss/train': 4.301641464233398} +03/03/2022 14:12:33 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/03/2022 14:12:35 - INFO - codeparrot_training - Step 670: {'lr': 0.0001675, 'samples': 343552, 'steps': 670, 'loss/train': 6.000380039215088} +03/03/2022 14:12:39 - INFO - codeparrot_training - Step 671: {'lr': 0.00016775, 'samples': 344064, 'steps': 671, 'loss/train': 3.8122336864471436} +03/03/2022 14:12:41 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/03/2022 14:12:44 - INFO - codeparrot_training - Step 672: {'lr': 0.00016800000000000002, 'samples': 344576, 'steps': 672, 'loss/train': 6.087907791137695} +03/03/2022 14:12:47 - INFO - codeparrot_training - Step 673: {'lr': 0.00016825000000000002, 'samples': 345088, 'steps': 673, 'loss/train': 5.071622371673584} +03/03/2022 14:12:50 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/03/2022 14:12:52 - INFO - codeparrot_training - Step 674: {'lr': 0.0001685, 'samples': 345600, 'steps': 674, 'loss/train': 2.5481441020965576} +03/03/2022 14:12:55 - INFO - codeparrot_training - Step 675: {'lr': 0.00016875, 'samples': 346112, 'steps': 675, 'loss/train': 4.16834020614624} +03/03/2022 14:12:58 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/03/2022 14:13:01 - INFO - codeparrot_training - Step 676: {'lr': 0.00016900000000000002, 'samples': 346624, 'steps': 676, 'loss/train': 6.831921577453613} +03/03/2022 14:13:04 - INFO - codeparrot_training - Step 677: {'lr': 0.00016925000000000002, 'samples': 347136, 'steps': 677, 'loss/train': 5.582108020782471} +03/03/2022 14:13:06 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/03/2022 14:13:09 - INFO - codeparrot_training - Step 678: {'lr': 0.00016950000000000003, 'samples': 347648, 'steps': 678, 'loss/train': 6.289055824279785} +03/03/2022 14:13:12 - INFO - codeparrot_training - Step 679: {'lr': 0.00016975, 'samples': 348160, 'steps': 679, 'loss/train': 3.1966123580932617} +03/03/2022 14:13:15 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/03/2022 14:13:18 - INFO - codeparrot_training - Step 680: {'lr': 0.00017, 'samples': 348672, 'steps': 680, 'loss/train': 6.146064281463623} +03/03/2022 14:13:21 - INFO - codeparrot_training - Step 681: {'lr': 0.00017025000000000002, 'samples': 349184, 'steps': 681, 'loss/train': 6.17354679107666} +03/03/2022 14:13:23 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/03/2022 14:13:26 - INFO - codeparrot_training - Step 682: {'lr': 0.00017050000000000002, 'samples': 349696, 'steps': 682, 'loss/train': 5.563101291656494} +03/03/2022 14:13:29 - INFO - codeparrot_training - Step 683: {'lr': 0.00017075, 'samples': 350208, 'steps': 683, 'loss/train': 5.49904727935791} +03/03/2022 14:13:31 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/03/2022 14:13:34 - INFO - codeparrot_training - Step 684: {'lr': 0.000171, 'samples': 350720, 'steps': 684, 'loss/train': 3.3620424270629883} +03/03/2022 14:13:38 - INFO - codeparrot_training - Step 685: {'lr': 0.00017125000000000002, 'samples': 351232, 'steps': 685, 'loss/train': 6.645692825317383} +03/03/2022 14:13:39 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/03/2022 14:13:43 - INFO - codeparrot_training - Step 686: {'lr': 0.00017150000000000002, 'samples': 351744, 'steps': 686, 'loss/train': 6.154056549072266} +03/03/2022 14:13:46 - INFO - codeparrot_training - Step 687: {'lr': 0.00017175000000000003, 'samples': 352256, 'steps': 687, 'loss/train': 5.51598596572876} +03/03/2022 14:13:48 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/03/2022 14:13:51 - INFO - codeparrot_training - Step 688: {'lr': 0.00017199999999999998, 'samples': 352768, 'steps': 688, 'loss/train': 5.821977615356445} +03/03/2022 14:13:55 - INFO - codeparrot_training - Step 689: {'lr': 0.00017224999999999999, 'samples': 353280, 'steps': 689, 'loss/train': 6.13981819152832} +03/03/2022 14:13:57 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/03/2022 14:14:00 - INFO - codeparrot_training - Step 690: {'lr': 0.0001725, 'samples': 353792, 'steps': 690, 'loss/train': 5.183568954467773} +03/03/2022 14:14:03 - INFO - codeparrot_training - Step 691: {'lr': 0.00017275, 'samples': 354304, 'steps': 691, 'loss/train': 6.6873626708984375} +03/03/2022 14:14:05 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/03/2022 14:14:08 - INFO - codeparrot_training - Step 692: {'lr': 0.000173, 'samples': 354816, 'steps': 692, 'loss/train': 6.312127590179443} +03/03/2022 14:14:11 - INFO - codeparrot_training - Step 693: {'lr': 0.00017324999999999998, 'samples': 355328, 'steps': 693, 'loss/train': 8.582944869995117} +03/03/2022 14:14:13 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/03/2022 14:14:17 - INFO - codeparrot_training - Step 694: {'lr': 0.0001735, 'samples': 355840, 'steps': 694, 'loss/train': 6.388099670410156} +03/03/2022 14:14:20 - INFO - codeparrot_training - Step 695: {'lr': 0.00017375, 'samples': 356352, 'steps': 695, 'loss/train': 5.452615261077881} +03/03/2022 14:14:22 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/03/2022 14:14:25 - INFO - codeparrot_training - Step 696: {'lr': 0.000174, 'samples': 356864, 'steps': 696, 'loss/train': 6.11560583114624} +03/03/2022 14:14:28 - INFO - codeparrot_training - Step 697: {'lr': 0.00017424999999999998, 'samples': 357376, 'steps': 697, 'loss/train': 6.420699119567871} +03/03/2022 14:14:30 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/03/2022 14:14:33 - INFO - codeparrot_training - Step 698: {'lr': 0.00017449999999999999, 'samples': 357888, 'steps': 698, 'loss/train': 5.491775035858154} +03/03/2022 14:14:36 - INFO - codeparrot_training - Step 699: {'lr': 0.00017475, 'samples': 358400, 'steps': 699, 'loss/train': 5.5680084228515625} +03/03/2022 14:14:38 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/03/2022 14:14:42 - INFO - codeparrot_training - Step 700: {'lr': 0.000175, 'samples': 358912, 'steps': 700, 'loss/train': 6.3367180824279785} +03/03/2022 14:14:45 - INFO - codeparrot_training - Step 701: {'lr': 0.00017525, 'samples': 359424, 'steps': 701, 'loss/train': 5.854393482208252} +03/03/2022 14:14:46 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/03/2022 14:14:50 - INFO - codeparrot_training - Step 702: {'lr': 0.00017549999999999998, 'samples': 359936, 'steps': 702, 'loss/train': 5.815999507904053} +03/03/2022 14:14:53 - INFO - codeparrot_training - Step 703: {'lr': 0.00017575, 'samples': 360448, 'steps': 703, 'loss/train': 5.308962345123291} +03/03/2022 14:14:54 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/03/2022 14:14:58 - INFO - codeparrot_training - Step 704: {'lr': 0.000176, 'samples': 360960, 'steps': 704, 'loss/train': 6.1263427734375} +03/03/2022 14:15:02 - INFO - codeparrot_training - Step 705: {'lr': 0.00017625, 'samples': 361472, 'steps': 705, 'loss/train': 5.6121625900268555} +03/03/2022 14:15:03 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/03/2022 14:15:07 - INFO - codeparrot_training - Step 706: {'lr': 0.00017649999999999998, 'samples': 361984, 'steps': 706, 'loss/train': 5.607957363128662} +03/03/2022 14:15:10 - INFO - codeparrot_training - Step 707: {'lr': 0.00017675, 'samples': 362496, 'steps': 707, 'loss/train': 5.5100483894348145} +03/03/2022 14:15:11 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/03/2022 14:15:15 - INFO - codeparrot_training - Step 708: {'lr': 0.000177, 'samples': 363008, 'steps': 708, 'loss/train': 5.424149990081787} +03/03/2022 14:15:18 - INFO - codeparrot_training - Step 709: {'lr': 0.00017725, 'samples': 363520, 'steps': 709, 'loss/train': 5.624175071716309} +03/03/2022 14:15:20 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/03/2022 14:15:24 - INFO - codeparrot_training - Step 710: {'lr': 0.0001775, 'samples': 364032, 'steps': 710, 'loss/train': 5.415778636932373} +03/03/2022 14:15:27 - INFO - codeparrot_training - Step 711: {'lr': 0.00017774999999999998, 'samples': 364544, 'steps': 711, 'loss/train': 6.274073123931885} +03/03/2022 14:15:28 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/03/2022 14:15:32 - INFO - codeparrot_training - Step 712: {'lr': 0.000178, 'samples': 365056, 'steps': 712, 'loss/train': 6.082661151885986} +03/03/2022 14:15:35 - INFO - codeparrot_training - Step 713: {'lr': 0.00017825, 'samples': 365568, 'steps': 713, 'loss/train': 4.292624473571777} +03/03/2022 14:15:36 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/03/2022 14:15:41 - INFO - codeparrot_training - Step 714: {'lr': 0.0001785, 'samples': 366080, 'steps': 714, 'loss/train': 5.6145453453063965} +03/03/2022 14:15:44 - INFO - codeparrot_training - Step 715: {'lr': 0.00017875, 'samples': 366592, 'steps': 715, 'loss/train': 5.490015983581543} +03/03/2022 14:15:45 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/03/2022 14:15:49 - INFO - codeparrot_training - Step 716: {'lr': 0.000179, 'samples': 367104, 'steps': 716, 'loss/train': 5.876565456390381} +03/03/2022 14:15:52 - INFO - codeparrot_training - Step 717: {'lr': 0.00017925, 'samples': 367616, 'steps': 717, 'loss/train': 4.843731880187988} +03/03/2022 14:15:53 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/03/2022 14:15:57 - INFO - codeparrot_training - Step 718: {'lr': 0.0001795, 'samples': 368128, 'steps': 718, 'loss/train': 5.822505474090576} +03/03/2022 14:16:01 - INFO - codeparrot_training - Step 719: {'lr': 0.00017975, 'samples': 368640, 'steps': 719, 'loss/train': 6.161307334899902} +03/03/2022 14:16:02 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/03/2022 14:16:06 - INFO - codeparrot_training - Step 720: {'lr': 0.00017999999999999998, 'samples': 369152, 'steps': 720, 'loss/train': 4.712026119232178} +03/03/2022 14:16:09 - INFO - codeparrot_training - Step 721: {'lr': 0.00018025, 'samples': 369664, 'steps': 721, 'loss/train': 5.958030700683594} +03/03/2022 14:16:12 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/03/2022 14:16:14 - INFO - codeparrot_training - Step 722: {'lr': 0.0001805, 'samples': 370176, 'steps': 722, 'loss/train': 7.373122692108154} +03/03/2022 14:16:18 - INFO - codeparrot_training - Step 723: {'lr': 0.00018075, 'samples': 370688, 'steps': 723, 'loss/train': 5.89974308013916} +03/03/2022 14:16:20 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/03/2022 14:16:23 - INFO - codeparrot_training - Step 724: {'lr': 0.000181, 'samples': 371200, 'steps': 724, 'loss/train': 5.497884273529053} +03/03/2022 14:16:26 - INFO - codeparrot_training - Step 725: {'lr': 0.00018125, 'samples': 371712, 'steps': 725, 'loss/train': 6.3494696617126465} +03/03/2022 14:16:28 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/03/2022 14:16:31 - INFO - codeparrot_training - Step 726: {'lr': 0.0001815, 'samples': 372224, 'steps': 726, 'loss/train': 7.18757438659668} +03/03/2022 14:16:35 - INFO - codeparrot_training - Step 727: {'lr': 0.00018175, 'samples': 372736, 'steps': 727, 'loss/train': 5.6792120933532715} +03/03/2022 14:16:37 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/03/2022 14:16:40 - INFO - codeparrot_training - Step 728: {'lr': 0.000182, 'samples': 373248, 'steps': 728, 'loss/train': 5.624897003173828} +03/03/2022 14:16:43 - INFO - codeparrot_training - Step 729: {'lr': 0.00018225, 'samples': 373760, 'steps': 729, 'loss/train': 6.5129618644714355} +03/03/2022 14:16:45 - INFO - codeparrot_training - Skipping example with length 169 (seq_length=1024) +03/03/2022 14:16:48 - INFO - codeparrot_training - Step 730: {'lr': 0.0001825, 'samples': 374272, 'steps': 730, 'loss/train': 6.356884956359863} +03/03/2022 14:16:51 - INFO - codeparrot_training - Step 731: {'lr': 0.00018275, 'samples': 374784, 'steps': 731, 'loss/train': 5.9126877784729} +03/03/2022 14:16:53 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/03/2022 14:16:57 - INFO - codeparrot_training - Step 732: {'lr': 0.000183, 'samples': 375296, 'steps': 732, 'loss/train': 5.680290699005127} +03/03/2022 14:17:00 - INFO - codeparrot_training - Step 733: {'lr': 0.00018325, 'samples': 375808, 'steps': 733, 'loss/train': 6.887684345245361} +03/03/2022 14:17:01 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/03/2022 14:17:05 - INFO - codeparrot_training - Step 734: {'lr': 0.0001835, 'samples': 376320, 'steps': 734, 'loss/train': 6.368136882781982} +03/03/2022 14:17:08 - INFO - codeparrot_training - Step 735: {'lr': 0.00018375, 'samples': 376832, 'steps': 735, 'loss/train': 6.857802391052246} +03/03/2022 14:17:10 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/03/2022 14:17:14 - INFO - codeparrot_training - Step 736: {'lr': 0.000184, 'samples': 377344, 'steps': 736, 'loss/train': 5.284829616546631} +03/03/2022 14:17:17 - INFO - codeparrot_training - Step 737: {'lr': 0.00018425, 'samples': 377856, 'steps': 737, 'loss/train': 5.682382583618164} +03/03/2022 14:17:18 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/03/2022 14:17:22 - INFO - codeparrot_training - Step 738: {'lr': 0.0001845, 'samples': 378368, 'steps': 738, 'loss/train': 5.7081146240234375} +03/03/2022 14:17:25 - INFO - codeparrot_training - Step 739: {'lr': 0.00018475, 'samples': 378880, 'steps': 739, 'loss/train': 5.002049446105957} +03/03/2022 14:17:27 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/03/2022 14:17:30 - INFO - codeparrot_training - Step 740: {'lr': 0.000185, 'samples': 379392, 'steps': 740, 'loss/train': 5.609866142272949} +03/03/2022 14:17:34 - INFO - codeparrot_training - Step 741: {'lr': 0.00018525, 'samples': 379904, 'steps': 741, 'loss/train': 5.849599361419678} +03/03/2022 14:17:35 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/03/2022 14:17:39 - INFO - codeparrot_training - Step 742: {'lr': 0.0001855, 'samples': 380416, 'steps': 742, 'loss/train': 6.124844074249268} +03/03/2022 14:17:42 - INFO - codeparrot_training - Step 743: {'lr': 0.00018575000000000002, 'samples': 380928, 'steps': 743, 'loss/train': 6.352612495422363} +03/03/2022 14:17:43 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/03/2022 14:17:47 - INFO - codeparrot_training - Step 744: {'lr': 0.000186, 'samples': 381440, 'steps': 744, 'loss/train': 8.90346908569336} +03/03/2022 14:17:50 - INFO - codeparrot_training - Step 745: {'lr': 0.00018625, 'samples': 381952, 'steps': 745, 'loss/train': 5.728419303894043} +03/03/2022 14:17:52 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/03/2022 14:17:56 - INFO - codeparrot_training - Step 746: {'lr': 0.0001865, 'samples': 382464, 'steps': 746, 'loss/train': 6.3111186027526855} +03/03/2022 14:17:59 - INFO - codeparrot_training - Step 747: {'lr': 0.00018675, 'samples': 382976, 'steps': 747, 'loss/train': 5.691380023956299} +03/03/2022 14:18:00 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/03/2022 14:18:04 - INFO - codeparrot_training - Step 748: {'lr': 0.000187, 'samples': 383488, 'steps': 748, 'loss/train': 6.1752495765686035} +03/03/2022 14:18:07 - INFO - codeparrot_training - Step 749: {'lr': 0.00018725, 'samples': 384000, 'steps': 749, 'loss/train': 6.014646053314209} +03/03/2022 14:18:08 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/03/2022 14:18:12 - INFO - codeparrot_training - Step 750: {'lr': 0.0001875, 'samples': 384512, 'steps': 750, 'loss/train': 3.5758557319641113} +03/03/2022 14:18:16 - INFO - codeparrot_training - Step 751: {'lr': 0.00018775, 'samples': 385024, 'steps': 751, 'loss/train': 6.044946670532227} +03/03/2022 14:18:17 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/03/2022 14:18:21 - INFO - codeparrot_training - Step 752: {'lr': 0.00018800000000000002, 'samples': 385536, 'steps': 752, 'loss/train': 7.377934455871582} +03/03/2022 14:18:24 - INFO - codeparrot_training - Step 753: {'lr': 0.00018825, 'samples': 386048, 'steps': 753, 'loss/train': 5.684968948364258} +03/03/2022 14:18:26 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/03/2022 14:18:29 - INFO - codeparrot_training - Step 754: {'lr': 0.0001885, 'samples': 386560, 'steps': 754, 'loss/train': 5.705610275268555} +03/03/2022 14:18:32 - INFO - codeparrot_training - Step 755: {'lr': 0.00018875, 'samples': 387072, 'steps': 755, 'loss/train': 5.378270149230957} +03/03/2022 14:18:34 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 14:18:38 - INFO - codeparrot_training - Step 756: {'lr': 0.000189, 'samples': 387584, 'steps': 756, 'loss/train': 2.837033987045288} +03/03/2022 14:18:41 - INFO - codeparrot_training - Step 757: {'lr': 0.00018925, 'samples': 388096, 'steps': 757, 'loss/train': 2.5040268898010254} +03/03/2022 14:18:42 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/03/2022 14:18:46 - INFO - codeparrot_training - Step 758: {'lr': 0.0001895, 'samples': 388608, 'steps': 758, 'loss/train': 2.4035027027130127} +03/03/2022 14:18:49 - INFO - codeparrot_training - Step 759: {'lr': 0.00018975, 'samples': 389120, 'steps': 759, 'loss/train': 6.120370388031006} +03/03/2022 14:18:50 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/03/2022 14:18:55 - INFO - codeparrot_training - Step 760: {'lr': 0.00019, 'samples': 389632, 'steps': 760, 'loss/train': 6.647356033325195} +03/03/2022 14:18:58 - INFO - codeparrot_training - Step 761: {'lr': 0.00019025000000000002, 'samples': 390144, 'steps': 761, 'loss/train': 6.357416152954102} +03/03/2022 14:18:59 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/03/2022 14:19:03 - INFO - codeparrot_training - Step 762: {'lr': 0.0001905, 'samples': 390656, 'steps': 762, 'loss/train': 5.767858028411865} +03/03/2022 14:19:06 - INFO - codeparrot_training - Step 763: {'lr': 0.00019075, 'samples': 391168, 'steps': 763, 'loss/train': 5.526143550872803} +03/03/2022 14:19:07 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/03/2022 14:19:12 - INFO - codeparrot_training - Step 764: {'lr': 0.000191, 'samples': 391680, 'steps': 764, 'loss/train': 5.360974311828613} +03/03/2022 14:19:15 - INFO - codeparrot_training - Step 765: {'lr': 0.00019125000000000001, 'samples': 392192, 'steps': 765, 'loss/train': 6.272588729858398} +03/03/2022 14:19:16 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/03/2022 14:19:20 - INFO - codeparrot_training - Step 766: {'lr': 0.00019150000000000002, 'samples': 392704, 'steps': 766, 'loss/train': 6.959072589874268} +03/03/2022 14:19:23 - INFO - codeparrot_training - Step 767: {'lr': 0.00019175, 'samples': 393216, 'steps': 767, 'loss/train': 6.202326774597168} +03/03/2022 14:19:24 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/03/2022 14:19:28 - INFO - codeparrot_training - Step 768: {'lr': 0.000192, 'samples': 393728, 'steps': 768, 'loss/train': 5.654978275299072} +03/03/2022 14:19:32 - INFO - codeparrot_training - Step 769: {'lr': 0.00019225, 'samples': 394240, 'steps': 769, 'loss/train': 6.381503582000732} +03/03/2022 14:19:33 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/03/2022 14:19:37 - INFO - codeparrot_training - Step 770: {'lr': 0.00019250000000000002, 'samples': 394752, 'steps': 770, 'loss/train': 6.107685565948486} +03/03/2022 14:19:40 - INFO - codeparrot_training - Step 771: {'lr': 0.00019275, 'samples': 395264, 'steps': 771, 'loss/train': 5.950009822845459} +03/03/2022 14:19:41 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/03/2022 14:19:45 - INFO - codeparrot_training - Step 772: {'lr': 0.000193, 'samples': 395776, 'steps': 772, 'loss/train': 5.617612361907959} +03/03/2022 14:19:48 - INFO - codeparrot_training - Step 773: {'lr': 0.00019325, 'samples': 396288, 'steps': 773, 'loss/train': 6.658962249755859} +03/03/2022 14:19:49 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/03/2022 14:19:54 - INFO - codeparrot_training - Step 774: {'lr': 0.00019350000000000001, 'samples': 396800, 'steps': 774, 'loss/train': 4.896778583526611} +03/03/2022 14:19:57 - INFO - codeparrot_training - Step 775: {'lr': 0.00019375000000000002, 'samples': 397312, 'steps': 775, 'loss/train': 5.210784912109375} +03/03/2022 14:19:58 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 14:20:02 - INFO - codeparrot_training - Step 776: {'lr': 0.000194, 'samples': 397824, 'steps': 776, 'loss/train': 6.26840353012085} +03/03/2022 14:20:05 - INFO - codeparrot_training - Step 777: {'lr': 0.00019425, 'samples': 398336, 'steps': 777, 'loss/train': 5.268444061279297} +03/03/2022 14:20:06 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/03/2022 14:20:11 - INFO - codeparrot_training - Step 778: {'lr': 0.0001945, 'samples': 398848, 'steps': 778, 'loss/train': 5.089536190032959} +03/03/2022 14:20:14 - INFO - codeparrot_training - Step 779: {'lr': 0.00019475000000000002, 'samples': 399360, 'steps': 779, 'loss/train': 5.938715934753418} +03/03/2022 14:20:14 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/03/2022 14:20:19 - INFO - codeparrot_training - Step 780: {'lr': 0.00019500000000000002, 'samples': 399872, 'steps': 780, 'loss/train': 2.5215647220611572} +03/03/2022 14:20:22 - INFO - codeparrot_training - Step 781: {'lr': 0.00019525, 'samples': 400384, 'steps': 781, 'loss/train': 6.110848903656006} +03/03/2022 14:20:22 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/03/2022 14:20:27 - INFO - codeparrot_training - Step 782: {'lr': 0.0001955, 'samples': 400896, 'steps': 782, 'loss/train': 5.539410591125488} +03/03/2022 14:20:31 - INFO - codeparrot_training - Step 783: {'lr': 0.00019575000000000001, 'samples': 401408, 'steps': 783, 'loss/train': 5.276137828826904} +03/03/2022 14:20:31 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/03/2022 14:20:36 - INFO - codeparrot_training - Step 784: {'lr': 0.00019600000000000002, 'samples': 401920, 'steps': 784, 'loss/train': 5.616477012634277} +03/03/2022 14:20:39 - INFO - codeparrot_training - Step 785: {'lr': 0.00019625, 'samples': 402432, 'steps': 785, 'loss/train': 5.870465278625488} +03/03/2022 14:20:40 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/03/2022 14:20:44 - INFO - codeparrot_training - Step 786: {'lr': 0.0001965, 'samples': 402944, 'steps': 786, 'loss/train': 5.635030746459961} +03/03/2022 14:20:47 - INFO - codeparrot_training - Step 787: {'lr': 0.00019675, 'samples': 403456, 'steps': 787, 'loss/train': 6.045782566070557} +03/03/2022 14:20:48 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/03/2022 14:20:53 - INFO - codeparrot_training - Step 788: {'lr': 0.00019700000000000002, 'samples': 403968, 'steps': 788, 'loss/train': 5.126619815826416} +03/03/2022 14:20:56 - INFO - codeparrot_training - Step 789: {'lr': 0.00019725000000000002, 'samples': 404480, 'steps': 789, 'loss/train': 5.62884521484375} +03/03/2022 14:20:56 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/03/2022 14:21:01 - INFO - codeparrot_training - Step 790: {'lr': 0.0001975, 'samples': 404992, 'steps': 790, 'loss/train': 6.909239768981934} +03/03/2022 14:21:04 - INFO - codeparrot_training - Step 791: {'lr': 0.00019775, 'samples': 405504, 'steps': 791, 'loss/train': 6.068647861480713} +03/03/2022 14:21:05 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/03/2022 14:21:09 - INFO - codeparrot_training - Step 792: {'lr': 0.00019800000000000002, 'samples': 406016, 'steps': 792, 'loss/train': 6.472655773162842} +03/03/2022 14:21:12 - INFO - codeparrot_training - Step 793: {'lr': 0.00019825000000000002, 'samples': 406528, 'steps': 793, 'loss/train': 5.719446182250977} +03/03/2022 14:21:12 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 14:21:18 - INFO - codeparrot_training - Step 794: {'lr': 0.00019850000000000003, 'samples': 407040, 'steps': 794, 'loss/train': 6.148656368255615} +03/03/2022 14:21:21 - INFO - codeparrot_training - Step 795: {'lr': 0.00019875, 'samples': 407552, 'steps': 795, 'loss/train': 5.593094348907471} +03/03/2022 14:21:21 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/03/2022 14:21:26 - INFO - codeparrot_training - Step 796: {'lr': 0.000199, 'samples': 408064, 'steps': 796, 'loss/train': 6.0885443687438965} +03/03/2022 14:21:29 - INFO - codeparrot_training - Step 797: {'lr': 0.00019925000000000002, 'samples': 408576, 'steps': 797, 'loss/train': 5.952763557434082} +03/03/2022 14:21:29 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/03/2022 14:21:34 - INFO - codeparrot_training - Step 798: {'lr': 0.00019950000000000002, 'samples': 409088, 'steps': 798, 'loss/train': 5.247378826141357} +03/03/2022 14:21:37 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 14:21:40 - INFO - codeparrot_training - Step 799: {'lr': 0.00019975, 'samples': 409600, 'steps': 799, 'loss/train': 6.183495998382568} +03/03/2022 14:21:43 - INFO - codeparrot_training - Step 800: {'lr': 0.0002, 'samples': 410112, 'steps': 800, 'loss/train': 6.045914649963379} +03/03/2022 14:21:45 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 14:21:48 - INFO - codeparrot_training - Step 801: {'lr': 0.00020025000000000002, 'samples': 410624, 'steps': 801, 'loss/train': 5.351278305053711} +03/03/2022 14:21:51 - INFO - codeparrot_training - Step 802: {'lr': 0.00020050000000000002, 'samples': 411136, 'steps': 802, 'loss/train': 5.965078353881836} +03/03/2022 14:21:53 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/03/2022 14:21:56 - INFO - codeparrot_training - Step 803: {'lr': 0.00020075000000000003, 'samples': 411648, 'steps': 803, 'loss/train': 5.071366310119629} +03/03/2022 14:22:00 - INFO - codeparrot_training - Step 804: {'lr': 0.000201, 'samples': 412160, 'steps': 804, 'loss/train': 5.935673236846924} +03/03/2022 14:22:02 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/03/2022 14:22:05 - INFO - codeparrot_training - Step 805: {'lr': 0.00020125, 'samples': 412672, 'steps': 805, 'loss/train': 5.727970600128174} +03/03/2022 14:22:08 - INFO - codeparrot_training - Step 806: {'lr': 0.00020150000000000002, 'samples': 413184, 'steps': 806, 'loss/train': 5.5496931076049805} +03/03/2022 14:22:10 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/03/2022 14:22:13 - INFO - codeparrot_training - Step 807: {'lr': 0.00020175000000000003, 'samples': 413696, 'steps': 807, 'loss/train': 6.029293060302734} +03/03/2022 14:22:16 - INFO - codeparrot_training - Step 808: {'lr': 0.000202, 'samples': 414208, 'steps': 808, 'loss/train': 2.7012906074523926} +03/03/2022 14:22:18 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/03/2022 14:22:22 - INFO - codeparrot_training - Step 809: {'lr': 0.00020225, 'samples': 414720, 'steps': 809, 'loss/train': 5.281220436096191} +03/03/2022 14:22:25 - INFO - codeparrot_training - Step 810: {'lr': 0.00020250000000000002, 'samples': 415232, 'steps': 810, 'loss/train': 5.184370994567871} +03/03/2022 14:22:26 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/03/2022 14:22:30 - INFO - codeparrot_training - Step 811: {'lr': 0.00020275000000000002, 'samples': 415744, 'steps': 811, 'loss/train': 6.637288570404053} +03/03/2022 14:22:33 - INFO - codeparrot_training - Step 812: {'lr': 0.00020300000000000003, 'samples': 416256, 'steps': 812, 'loss/train': 4.509408473968506} +03/03/2022 14:22:35 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/03/2022 14:22:38 - INFO - codeparrot_training - Step 813: {'lr': 0.00020324999999999998, 'samples': 416768, 'steps': 813, 'loss/train': 6.244888782501221} +03/03/2022 14:22:42 - INFO - codeparrot_training - Step 814: {'lr': 0.00020349999999999999, 'samples': 417280, 'steps': 814, 'loss/train': 4.975861072540283} +03/03/2022 14:22:43 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/03/2022 14:22:47 - INFO - codeparrot_training - Step 815: {'lr': 0.00020375, 'samples': 417792, 'steps': 815, 'loss/train': 5.894258975982666} +03/03/2022 14:22:50 - INFO - codeparrot_training - Step 816: {'lr': 0.000204, 'samples': 418304, 'steps': 816, 'loss/train': 5.171676158905029} +03/03/2022 14:22:51 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/03/2022 14:22:55 - INFO - codeparrot_training - Step 817: {'lr': 0.00020425, 'samples': 418816, 'steps': 817, 'loss/train': 6.164328098297119} +03/03/2022 14:22:58 - INFO - codeparrot_training - Step 818: {'lr': 0.00020449999999999998, 'samples': 419328, 'steps': 818, 'loss/train': 4.458515167236328} +03/03/2022 14:23:00 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/03/2022 14:23:04 - INFO - codeparrot_training - Step 819: {'lr': 0.00020475, 'samples': 419840, 'steps': 819, 'loss/train': 5.796923637390137} +03/03/2022 14:23:07 - INFO - codeparrot_training - Step 820: {'lr': 0.000205, 'samples': 420352, 'steps': 820, 'loss/train': 5.776224613189697} +03/03/2022 14:23:09 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/03/2022 14:23:12 - INFO - codeparrot_training - Step 821: {'lr': 0.00020525, 'samples': 420864, 'steps': 821, 'loss/train': 4.670717716217041} +03/03/2022 14:23:15 - INFO - codeparrot_training - Step 822: {'lr': 0.00020549999999999998, 'samples': 421376, 'steps': 822, 'loss/train': 5.6624531745910645} +03/03/2022 14:23:17 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/03/2022 14:23:20 - INFO - codeparrot_training - Step 823: {'lr': 0.00020575, 'samples': 421888, 'steps': 823, 'loss/train': 4.882242202758789} +03/03/2022 14:23:24 - INFO - codeparrot_training - Step 824: {'lr': 0.000206, 'samples': 422400, 'steps': 824, 'loss/train': 5.353019714355469} +03/03/2022 14:23:25 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/03/2022 14:23:29 - INFO - codeparrot_training - Step 825: {'lr': 0.00020625, 'samples': 422912, 'steps': 825, 'loss/train': 4.812110424041748} +03/03/2022 14:23:32 - INFO - codeparrot_training - Step 826: {'lr': 0.0002065, 'samples': 423424, 'steps': 826, 'loss/train': 5.197993755340576} +03/03/2022 14:23:33 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/03/2022 14:23:37 - INFO - codeparrot_training - Step 827: {'lr': 0.00020674999999999998, 'samples': 423936, 'steps': 827, 'loss/train': 6.301405906677246} +03/03/2022 14:23:40 - INFO - codeparrot_training - Step 828: {'lr': 0.000207, 'samples': 424448, 'steps': 828, 'loss/train': 5.564136505126953} +03/03/2022 14:23:41 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/03/2022 14:23:46 - INFO - codeparrot_training - Step 829: {'lr': 0.00020725, 'samples': 424960, 'steps': 829, 'loss/train': 4.753064155578613} +03/03/2022 14:23:49 - INFO - codeparrot_training - Step 830: {'lr': 0.0002075, 'samples': 425472, 'steps': 830, 'loss/train': 4.964693546295166} +03/03/2022 14:23:50 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/03/2022 14:23:54 - INFO - codeparrot_training - Step 831: {'lr': 0.00020774999999999998, 'samples': 425984, 'steps': 831, 'loss/train': 5.069149494171143} +03/03/2022 14:23:57 - INFO - codeparrot_training - Step 832: {'lr': 0.000208, 'samples': 426496, 'steps': 832, 'loss/train': 5.400967597961426} +03/03/2022 14:23:58 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/03/2022 14:24:03 - INFO - codeparrot_training - Step 833: {'lr': 0.00020825, 'samples': 427008, 'steps': 833, 'loss/train': 6.3588104248046875} +03/03/2022 14:24:06 - INFO - codeparrot_training - Step 834: {'lr': 0.0002085, 'samples': 427520, 'steps': 834, 'loss/train': 5.880354404449463} +03/03/2022 14:24:06 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/03/2022 14:24:11 - INFO - codeparrot_training - Step 835: {'lr': 0.00020875, 'samples': 428032, 'steps': 835, 'loss/train': 5.498161792755127} +03/03/2022 14:24:14 - INFO - codeparrot_training - Step 836: {'lr': 0.00020899999999999998, 'samples': 428544, 'steps': 836, 'loss/train': 5.577434062957764} +03/03/2022 14:24:15 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/03/2022 14:24:19 - INFO - codeparrot_training - Step 837: {'lr': 0.00020925, 'samples': 429056, 'steps': 837, 'loss/train': 5.179044723510742} +03/03/2022 14:24:23 - INFO - codeparrot_training - Step 838: {'lr': 0.0002095, 'samples': 429568, 'steps': 838, 'loss/train': 5.296634197235107} +03/03/2022 14:24:23 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/03/2022 14:24:28 - INFO - codeparrot_training - Step 839: {'lr': 0.00020975, 'samples': 430080, 'steps': 839, 'loss/train': 5.37893009185791} +03/03/2022 14:24:31 - INFO - codeparrot_training - Step 840: {'lr': 0.00021, 'samples': 430592, 'steps': 840, 'loss/train': 5.557505130767822} +03/03/2022 14:24:31 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/03/2022 14:24:36 - INFO - codeparrot_training - Step 841: {'lr': 0.00021025, 'samples': 431104, 'steps': 841, 'loss/train': 6.843445777893066} +03/03/2022 14:24:39 - INFO - codeparrot_training - Step 842: {'lr': 0.0002105, 'samples': 431616, 'steps': 842, 'loss/train': 6.06321382522583} +03/03/2022 14:24:40 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/03/2022 14:24:45 - INFO - codeparrot_training - Step 843: {'lr': 0.00021075, 'samples': 432128, 'steps': 843, 'loss/train': 5.441623210906982} +03/03/2022 14:24:48 - INFO - codeparrot_training - Step 844: {'lr': 0.000211, 'samples': 432640, 'steps': 844, 'loss/train': 5.4186506271362305} +03/03/2022 14:24:48 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/03/2022 14:24:53 - INFO - codeparrot_training - Step 845: {'lr': 0.00021124999999999998, 'samples': 433152, 'steps': 845, 'loss/train': 7.605123996734619} +03/03/2022 14:24:57 - INFO - codeparrot_training - Step 846: {'lr': 0.0002115, 'samples': 433664, 'steps': 846, 'loss/train': 6.859156131744385} +03/03/2022 14:24:58 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 14:25:02 - INFO - codeparrot_training - Step 847: {'lr': 0.00021175, 'samples': 434176, 'steps': 847, 'loss/train': 4.927143573760986} +03/03/2022 14:25:05 - INFO - codeparrot_training - Step 848: {'lr': 0.000212, 'samples': 434688, 'steps': 848, 'loss/train': 5.4894609451293945} +03/03/2022 14:25:06 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/03/2022 14:25:10 - INFO - codeparrot_training - Step 849: {'lr': 0.00021225, 'samples': 435200, 'steps': 849, 'loss/train': 5.905498027801514} +03/03/2022 14:25:13 - INFO - codeparrot_training - Step 850: {'lr': 0.0002125, 'samples': 435712, 'steps': 850, 'loss/train': 6.350372791290283} +03/03/2022 14:25:15 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/03/2022 14:25:18 - INFO - codeparrot_training - Step 851: {'lr': 0.00021275, 'samples': 436224, 'steps': 851, 'loss/train': 5.40626859664917} +03/03/2022 14:25:22 - INFO - codeparrot_training - Step 852: {'lr': 0.000213, 'samples': 436736, 'steps': 852, 'loss/train': 6.239688396453857} +03/03/2022 14:25:23 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/03/2022 14:25:27 - INFO - codeparrot_training - Step 853: {'lr': 0.00021325, 'samples': 437248, 'steps': 853, 'loss/train': 6.076501369476318} +03/03/2022 14:25:30 - INFO - codeparrot_training - Step 854: {'lr': 0.0002135, 'samples': 437760, 'steps': 854, 'loss/train': 4.657830715179443} +03/03/2022 14:25:31 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/03/2022 14:25:35 - INFO - codeparrot_training - Step 855: {'lr': 0.00021375, 'samples': 438272, 'steps': 855, 'loss/train': 4.943812370300293} +03/03/2022 14:25:39 - INFO - codeparrot_training - Step 856: {'lr': 0.000214, 'samples': 438784, 'steps': 856, 'loss/train': 6.044374942779541} +03/03/2022 14:25:40 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/03/2022 14:25:44 - INFO - codeparrot_training - Step 857: {'lr': 0.00021425, 'samples': 439296, 'steps': 857, 'loss/train': 5.943901062011719} +03/03/2022 14:25:47 - INFO - codeparrot_training - Step 858: {'lr': 0.0002145, 'samples': 439808, 'steps': 858, 'loss/train': 5.431617259979248} +03/03/2022 14:25:50 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/03/2022 14:25:53 - INFO - codeparrot_training - Step 859: {'lr': 0.00021475, 'samples': 440320, 'steps': 859, 'loss/train': 6.063345432281494} +03/03/2022 14:25:56 - INFO - codeparrot_training - Step 860: {'lr': 0.000215, 'samples': 440832, 'steps': 860, 'loss/train': 5.475767612457275} +03/03/2022 14:25:58 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/03/2022 14:26:01 - INFO - codeparrot_training - Step 861: {'lr': 0.00021525, 'samples': 441344, 'steps': 861, 'loss/train': 5.581014633178711} +03/03/2022 14:26:04 - INFO - codeparrot_training - Step 862: {'lr': 0.0002155, 'samples': 441856, 'steps': 862, 'loss/train': 5.2505784034729} +03/03/2022 14:26:06 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/03/2022 14:26:09 - INFO - codeparrot_training - Step 863: {'lr': 0.00021575, 'samples': 442368, 'steps': 863, 'loss/train': 5.044404029846191} +03/03/2022 14:26:13 - INFO - codeparrot_training - Step 864: {'lr': 0.000216, 'samples': 442880, 'steps': 864, 'loss/train': 5.825626850128174} +03/03/2022 14:26:14 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/03/2022 14:26:18 - INFO - codeparrot_training - Step 865: {'lr': 0.00021625, 'samples': 443392, 'steps': 865, 'loss/train': 6.18358850479126} +03/03/2022 14:26:21 - INFO - codeparrot_training - Step 866: {'lr': 0.0002165, 'samples': 443904, 'steps': 866, 'loss/train': 5.078124046325684} +03/03/2022 14:26:23 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/03/2022 14:26:26 - INFO - codeparrot_training - Step 867: {'lr': 0.00021675, 'samples': 444416, 'steps': 867, 'loss/train': 5.569344997406006} +03/03/2022 14:26:29 - INFO - codeparrot_training - Step 868: {'lr': 0.00021700000000000002, 'samples': 444928, 'steps': 868, 'loss/train': 4.874898433685303} +03/03/2022 14:26:31 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/03/2022 14:26:35 - INFO - codeparrot_training - Step 869: {'lr': 0.00021725, 'samples': 445440, 'steps': 869, 'loss/train': 5.113021373748779} +03/03/2022 14:26:38 - INFO - codeparrot_training - Step 870: {'lr': 0.0002175, 'samples': 445952, 'steps': 870, 'loss/train': 5.168470859527588} +03/03/2022 14:26:39 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 14:26:43 - INFO - codeparrot_training - Step 871: {'lr': 0.00021775, 'samples': 446464, 'steps': 871, 'loss/train': 5.251348495483398} +03/03/2022 14:26:46 - INFO - codeparrot_training - Step 872: {'lr': 0.000218, 'samples': 446976, 'steps': 872, 'loss/train': 2.1578965187072754} +03/03/2022 14:26:47 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/03/2022 14:26:51 - INFO - codeparrot_training - Step 873: {'lr': 0.00021825, 'samples': 447488, 'steps': 873, 'loss/train': 5.851541519165039} +03/03/2022 14:26:55 - INFO - codeparrot_training - Step 874: {'lr': 0.0002185, 'samples': 448000, 'steps': 874, 'loss/train': 5.197221279144287} +03/03/2022 14:26:56 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/03/2022 14:27:00 - INFO - codeparrot_training - Step 875: {'lr': 0.00021875, 'samples': 448512, 'steps': 875, 'loss/train': 4.8866286277771} +03/03/2022 14:27:03 - INFO - codeparrot_training - Step 876: {'lr': 0.000219, 'samples': 449024, 'steps': 876, 'loss/train': 6.937936305999756} +03/03/2022 14:27:04 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/03/2022 14:27:08 - INFO - codeparrot_training - Step 877: {'lr': 0.00021925000000000002, 'samples': 449536, 'steps': 877, 'loss/train': 6.003185749053955} +03/03/2022 14:27:11 - INFO - codeparrot_training - Step 878: {'lr': 0.0002195, 'samples': 450048, 'steps': 878, 'loss/train': 5.534326076507568} +03/03/2022 14:27:12 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 14:27:17 - INFO - codeparrot_training - Step 879: {'lr': 0.00021975, 'samples': 450560, 'steps': 879, 'loss/train': 7.85198974609375} +03/03/2022 14:27:20 - INFO - codeparrot_training - Step 880: {'lr': 0.00022, 'samples': 451072, 'steps': 880, 'loss/train': 5.007497787475586} +03/03/2022 14:27:21 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/03/2022 14:27:25 - INFO - codeparrot_training - Step 881: {'lr': 0.00022025000000000001, 'samples': 451584, 'steps': 881, 'loss/train': 5.695184230804443} +03/03/2022 14:27:28 - INFO - codeparrot_training - Step 882: {'lr': 0.0002205, 'samples': 452096, 'steps': 882, 'loss/train': 6.506006717681885} +03/03/2022 14:27:29 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/03/2022 14:27:33 - INFO - codeparrot_training - Step 883: {'lr': 0.00022075, 'samples': 452608, 'steps': 883, 'loss/train': 4.965909957885742} +03/03/2022 14:27:37 - INFO - codeparrot_training - Step 884: {'lr': 0.000221, 'samples': 453120, 'steps': 884, 'loss/train': 5.829873561859131} +03/03/2022 14:27:37 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/03/2022 14:27:42 - INFO - codeparrot_training - Step 885: {'lr': 0.00022125, 'samples': 453632, 'steps': 885, 'loss/train': 5.798379421234131} +03/03/2022 14:27:45 - INFO - codeparrot_training - Step 886: {'lr': 0.00022150000000000002, 'samples': 454144, 'steps': 886, 'loss/train': 5.840906143188477} +03/03/2022 14:27:45 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/03/2022 14:27:50 - INFO - codeparrot_training - Step 887: {'lr': 0.00022175, 'samples': 454656, 'steps': 887, 'loss/train': 5.298375129699707} +03/03/2022 14:27:53 - INFO - codeparrot_training - Step 888: {'lr': 0.000222, 'samples': 455168, 'steps': 888, 'loss/train': 5.826112747192383} +03/03/2022 14:27:53 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 14:27:58 - INFO - codeparrot_training - Step 889: {'lr': 0.00022225, 'samples': 455680, 'steps': 889, 'loss/train': 5.942397594451904} +03/03/2022 14:28:02 - INFO - codeparrot_training - Step 890: {'lr': 0.00022250000000000001, 'samples': 456192, 'steps': 890, 'loss/train': 5.465619087219238} +03/03/2022 14:28:02 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/03/2022 14:28:07 - INFO - codeparrot_training - Step 891: {'lr': 0.00022275000000000002, 'samples': 456704, 'steps': 891, 'loss/train': 5.768956661224365} +03/03/2022 14:28:10 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/03/2022 14:28:12 - INFO - codeparrot_training - Step 892: {'lr': 0.000223, 'samples': 457216, 'steps': 892, 'loss/train': 6.329216957092285} +03/03/2022 14:28:15 - INFO - codeparrot_training - Step 893: {'lr': 0.00022325, 'samples': 457728, 'steps': 893, 'loss/train': 5.524855613708496} +03/03/2022 14:28:18 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 14:28:21 - INFO - codeparrot_training - Step 894: {'lr': 0.0002235, 'samples': 458240, 'steps': 894, 'loss/train': 6.467149257659912} +03/03/2022 14:28:24 - INFO - codeparrot_training - Step 895: {'lr': 0.00022375000000000002, 'samples': 458752, 'steps': 895, 'loss/train': 5.564155101776123} +03/03/2022 14:28:26 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/03/2022 14:28:29 - INFO - codeparrot_training - Step 896: {'lr': 0.000224, 'samples': 459264, 'steps': 896, 'loss/train': 5.399448871612549} +03/03/2022 14:28:32 - INFO - codeparrot_training - Step 897: {'lr': 0.00022425, 'samples': 459776, 'steps': 897, 'loss/train': 4.8847150802612305} +03/03/2022 14:28:35 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/03/2022 14:28:37 - INFO - codeparrot_training - Step 898: {'lr': 0.0002245, 'samples': 460288, 'steps': 898, 'loss/train': 5.0721964836120605} +03/03/2022 14:28:41 - INFO - codeparrot_training - Step 899: {'lr': 0.00022475000000000001, 'samples': 460800, 'steps': 899, 'loss/train': 5.297183990478516} +03/03/2022 14:28:43 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/03/2022 14:28:46 - INFO - codeparrot_training - Step 900: {'lr': 0.00022500000000000002, 'samples': 461312, 'steps': 900, 'loss/train': 6.471324920654297} +03/03/2022 14:28:49 - INFO - codeparrot_training - Step 901: {'lr': 0.00022525, 'samples': 461824, 'steps': 901, 'loss/train': 5.060986042022705} +03/03/2022 14:28:51 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/03/2022 14:28:54 - INFO - codeparrot_training - Step 902: {'lr': 0.0002255, 'samples': 462336, 'steps': 902, 'loss/train': 5.69996452331543} +03/03/2022 14:28:57 - INFO - codeparrot_training - Step 903: {'lr': 0.00022575, 'samples': 462848, 'steps': 903, 'loss/train': 6.508148193359375} +03/03/2022 14:28:59 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 14:29:02 - INFO - codeparrot_training - Step 904: {'lr': 0.00022600000000000002, 'samples': 463360, 'steps': 904, 'loss/train': 5.452254772186279} +03/03/2022 14:29:06 - INFO - codeparrot_training - Step 905: {'lr': 0.00022625000000000002, 'samples': 463872, 'steps': 905, 'loss/train': 6.821559906005859} +03/03/2022 14:29:07 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/03/2022 14:29:11 - INFO - codeparrot_training - Step 906: {'lr': 0.0002265, 'samples': 464384, 'steps': 906, 'loss/train': 5.089369773864746} +03/03/2022 14:29:14 - INFO - codeparrot_training - Step 907: {'lr': 0.00022675, 'samples': 464896, 'steps': 907, 'loss/train': 4.6399054527282715} +03/03/2022 14:29:15 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/03/2022 14:29:19 - INFO - codeparrot_training - Step 908: {'lr': 0.00022700000000000002, 'samples': 465408, 'steps': 908, 'loss/train': 5.006394863128662} +03/03/2022 14:29:22 - INFO - codeparrot_training - Step 909: {'lr': 0.00022725000000000002, 'samples': 465920, 'steps': 909, 'loss/train': 5.468337059020996} +03/03/2022 14:29:24 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/03/2022 14:29:28 - INFO - codeparrot_training - Step 910: {'lr': 0.0002275, 'samples': 466432, 'steps': 910, 'loss/train': 5.463813781738281} +03/03/2022 14:29:31 - INFO - codeparrot_training - Step 911: {'lr': 0.00022775, 'samples': 466944, 'steps': 911, 'loss/train': 5.19062614440918} +03/03/2022 14:29:32 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/03/2022 14:29:36 - INFO - codeparrot_training - Step 912: {'lr': 0.000228, 'samples': 467456, 'steps': 912, 'loss/train': 5.039602756500244} +03/03/2022 14:29:39 - INFO - codeparrot_training - Step 913: {'lr': 0.00022825000000000002, 'samples': 467968, 'steps': 913, 'loss/train': 5.904970169067383} +03/03/2022 14:29:41 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 14:29:44 - INFO - codeparrot_training - Step 914: {'lr': 0.00022850000000000002, 'samples': 468480, 'steps': 914, 'loss/train': 5.654131889343262} +03/03/2022 14:29:48 - INFO - codeparrot_training - Step 915: {'lr': 0.00022875, 'samples': 468992, 'steps': 915, 'loss/train': 5.448190689086914} +03/03/2022 14:29:49 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/03/2022 14:29:53 - INFO - codeparrot_training - Step 916: {'lr': 0.000229, 'samples': 469504, 'steps': 916, 'loss/train': 5.636771202087402} +03/03/2022 14:29:56 - INFO - codeparrot_training - Step 917: {'lr': 0.00022925000000000002, 'samples': 470016, 'steps': 917, 'loss/train': 5.562810897827148} +03/03/2022 14:29:57 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/03/2022 14:30:01 - INFO - codeparrot_training - Step 918: {'lr': 0.00022950000000000002, 'samples': 470528, 'steps': 918, 'loss/train': 5.274580001831055} +03/03/2022 14:30:04 - INFO - codeparrot_training - Step 919: {'lr': 0.00022975000000000003, 'samples': 471040, 'steps': 919, 'loss/train': 4.779916763305664} +03/03/2022 14:30:05 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/03/2022 14:30:10 - INFO - codeparrot_training - Step 920: {'lr': 0.00023, 'samples': 471552, 'steps': 920, 'loss/train': 5.501857280731201} +03/03/2022 14:30:13 - INFO - codeparrot_training - Step 921: {'lr': 0.00023025, 'samples': 472064, 'steps': 921, 'loss/train': 5.448602199554443} +03/03/2022 14:30:14 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/03/2022 14:30:18 - INFO - codeparrot_training - Step 922: {'lr': 0.00023050000000000002, 'samples': 472576, 'steps': 922, 'loss/train': 2.3943774700164795} +03/03/2022 14:30:21 - INFO - codeparrot_training - Step 923: {'lr': 0.00023075000000000003, 'samples': 473088, 'steps': 923, 'loss/train': 2.5441648960113525} +03/03/2022 14:30:22 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/03/2022 14:30:26 - INFO - codeparrot_training - Step 924: {'lr': 0.000231, 'samples': 473600, 'steps': 924, 'loss/train': 5.14992094039917} +03/03/2022 14:30:29 - INFO - codeparrot_training - Step 925: {'lr': 0.00023125, 'samples': 474112, 'steps': 925, 'loss/train': 5.638806343078613} +03/03/2022 14:30:30 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/03/2022 14:30:35 - INFO - codeparrot_training - Step 926: {'lr': 0.00023150000000000002, 'samples': 474624, 'steps': 926, 'loss/train': 5.534430503845215} +03/03/2022 14:30:38 - INFO - codeparrot_training - Step 927: {'lr': 0.00023175000000000002, 'samples': 475136, 'steps': 927, 'loss/train': 5.90189266204834} +03/03/2022 14:30:39 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/03/2022 14:30:43 - INFO - codeparrot_training - Step 928: {'lr': 0.00023200000000000003, 'samples': 475648, 'steps': 928, 'loss/train': 4.602533340454102} +03/03/2022 14:30:46 - INFO - codeparrot_training - Step 929: {'lr': 0.00023225, 'samples': 476160, 'steps': 929, 'loss/train': 5.524892330169678} +03/03/2022 14:30:47 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 14:30:51 - INFO - codeparrot_training - Step 930: {'lr': 0.0002325, 'samples': 476672, 'steps': 930, 'loss/train': 5.662743091583252} +03/03/2022 14:30:55 - INFO - codeparrot_training - Step 931: {'lr': 0.00023275000000000002, 'samples': 477184, 'steps': 931, 'loss/train': 4.883127212524414} +03/03/2022 14:30:55 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/03/2022 14:31:00 - INFO - codeparrot_training - Step 932: {'lr': 0.00023300000000000003, 'samples': 477696, 'steps': 932, 'loss/train': 5.51088809967041} +03/03/2022 14:31:03 - INFO - codeparrot_training - Step 933: {'lr': 0.00023325, 'samples': 478208, 'steps': 933, 'loss/train': 4.705718517303467} +03/03/2022 14:31:08 - INFO - codeparrot_training - Step 934: {'lr': 0.0002335, 'samples': 478720, 'steps': 934, 'loss/train': 5.287855625152588} +03/03/2022 14:31:12 - INFO - codeparrot_training - Step 935: {'lr': 0.00023375000000000002, 'samples': 479232, 'steps': 935, 'loss/train': 3.7623660564422607} +03/03/2022 14:31:12 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/03/2022 14:31:17 - INFO - codeparrot_training - Step 936: {'lr': 0.00023400000000000002, 'samples': 479744, 'steps': 936, 'loss/train': 4.8467183113098145} +03/03/2022 14:31:20 - INFO - codeparrot_training - Step 937: {'lr': 0.00023425000000000003, 'samples': 480256, 'steps': 937, 'loss/train': 4.810800075531006} +03/03/2022 14:31:20 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/03/2022 14:31:25 - INFO - codeparrot_training - Step 938: {'lr': 0.00023449999999999998, 'samples': 480768, 'steps': 938, 'loss/train': 5.6370368003845215} +03/03/2022 14:31:28 - INFO - codeparrot_training - Step 939: {'lr': 0.00023475, 'samples': 481280, 'steps': 939, 'loss/train': 4.962849140167236} +03/03/2022 14:31:28 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/03/2022 14:31:34 - INFO - codeparrot_training - Step 940: {'lr': 0.000235, 'samples': 481792, 'steps': 940, 'loss/train': 5.930874824523926} +03/03/2022 14:31:37 - INFO - codeparrot_training - Step 941: {'lr': 0.00023525, 'samples': 482304, 'steps': 941, 'loss/train': 4.297204494476318} +03/03/2022 14:31:37 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/03/2022 14:31:42 - INFO - codeparrot_training - Step 942: {'lr': 0.0002355, 'samples': 482816, 'steps': 942, 'loss/train': 4.700397968292236} +03/03/2022 14:31:45 - INFO - codeparrot_training - Step 943: {'lr': 0.00023574999999999998, 'samples': 483328, 'steps': 943, 'loss/train': 8.059362411499023} +03/03/2022 14:31:45 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/03/2022 14:31:51 - INFO - codeparrot_training - Step 944: {'lr': 0.000236, 'samples': 483840, 'steps': 944, 'loss/train': 5.662092208862305} +03/03/2022 14:31:53 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/03/2022 14:31:56 - INFO - codeparrot_training - Step 945: {'lr': 0.00023625, 'samples': 484352, 'steps': 945, 'loss/train': 5.7917633056640625} +03/03/2022 14:31:59 - INFO - codeparrot_training - Step 946: {'lr': 0.0002365, 'samples': 484864, 'steps': 946, 'loss/train': 6.147116184234619} +03/03/2022 14:32:02 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/03/2022 14:32:04 - INFO - codeparrot_training - Step 947: {'lr': 0.00023674999999999998, 'samples': 485376, 'steps': 947, 'loss/train': 5.417236804962158} +03/03/2022 14:32:07 - INFO - codeparrot_training - Step 948: {'lr': 0.000237, 'samples': 485888, 'steps': 948, 'loss/train': 4.750515937805176} +03/03/2022 14:32:10 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/03/2022 14:32:13 - INFO - codeparrot_training - Step 949: {'lr': 0.00023725, 'samples': 486400, 'steps': 949, 'loss/train': 4.7533040046691895} +03/03/2022 14:32:16 - INFO - codeparrot_training - Step 950: {'lr': 0.0002375, 'samples': 486912, 'steps': 950, 'loss/train': 5.379957675933838} +03/03/2022 14:32:19 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/03/2022 14:32:21 - INFO - codeparrot_training - Step 951: {'lr': 0.00023775, 'samples': 487424, 'steps': 951, 'loss/train': 5.162179470062256} +03/03/2022 14:32:24 - INFO - codeparrot_training - Step 952: {'lr': 0.00023799999999999998, 'samples': 487936, 'steps': 952, 'loss/train': 5.595740795135498} +03/03/2022 14:32:27 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/03/2022 14:32:29 - INFO - codeparrot_training - Step 953: {'lr': 0.00023825, 'samples': 488448, 'steps': 953, 'loss/train': 4.310987949371338} +03/03/2022 14:32:33 - INFO - codeparrot_training - Step 954: {'lr': 0.0002385, 'samples': 488960, 'steps': 954, 'loss/train': 5.940654277801514} +03/03/2022 14:32:35 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/03/2022 14:32:38 - INFO - codeparrot_training - Step 955: {'lr': 0.00023875, 'samples': 489472, 'steps': 955, 'loss/train': 5.359099864959717} +03/03/2022 14:32:41 - INFO - codeparrot_training - Step 956: {'lr': 0.00023899999999999998, 'samples': 489984, 'steps': 956, 'loss/train': 5.056358814239502} +03/03/2022 14:32:44 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/03/2022 14:32:46 - INFO - codeparrot_training - Step 957: {'lr': 0.00023925, 'samples': 490496, 'steps': 957, 'loss/train': 5.649569034576416} +03/03/2022 14:32:49 - INFO - codeparrot_training - Step 958: {'lr': 0.0002395, 'samples': 491008, 'steps': 958, 'loss/train': 5.705789089202881} +03/03/2022 14:32:52 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 14:32:55 - INFO - codeparrot_training - Step 959: {'lr': 0.00023975, 'samples': 491520, 'steps': 959, 'loss/train': 4.851657390594482} +03/03/2022 14:32:58 - INFO - codeparrot_training - Step 960: {'lr': 0.00024, 'samples': 492032, 'steps': 960, 'loss/train': 4.245975017547607} +03/03/2022 14:33:00 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/03/2022 14:33:03 - INFO - codeparrot_training - Step 961: {'lr': 0.00024024999999999999, 'samples': 492544, 'steps': 961, 'loss/train': 5.14569616317749} +03/03/2022 14:33:06 - INFO - codeparrot_training - Step 962: {'lr': 0.0002405, 'samples': 493056, 'steps': 962, 'loss/train': 6.001979351043701} +03/03/2022 14:33:08 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/03/2022 14:33:11 - INFO - codeparrot_training - Step 963: {'lr': 0.00024075, 'samples': 493568, 'steps': 963, 'loss/train': 4.987338066101074} +03/03/2022 14:33:15 - INFO - codeparrot_training - Step 964: {'lr': 0.000241, 'samples': 494080, 'steps': 964, 'loss/train': 5.097022533416748} +03/03/2022 14:33:16 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/03/2022 14:33:20 - INFO - codeparrot_training - Step 965: {'lr': 0.00024125, 'samples': 494592, 'steps': 965, 'loss/train': 6.025006294250488} +03/03/2022 14:33:23 - INFO - codeparrot_training - Step 966: {'lr': 0.0002415, 'samples': 495104, 'steps': 966, 'loss/train': 5.418980121612549} +03/03/2022 14:33:25 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/03/2022 14:33:29 - INFO - codeparrot_training - Step 967: {'lr': 0.00024175, 'samples': 495616, 'steps': 967, 'loss/train': 5.099531650543213} +03/03/2022 14:33:32 - INFO - codeparrot_training - Step 968: {'lr': 0.000242, 'samples': 496128, 'steps': 968, 'loss/train': 5.151614665985107} +03/03/2022 14:33:35 - INFO - codeparrot_training - Step 969: {'lr': 0.00024225, 'samples': 496640, 'steps': 969, 'loss/train': 5.159854888916016} +03/03/2022 14:33:35 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/03/2022 14:33:40 - INFO - codeparrot_training - Step 970: {'lr': 0.00024249999999999999, 'samples': 497152, 'steps': 970, 'loss/train': 6.823923110961914} +03/03/2022 14:33:43 - INFO - codeparrot_training - Step 971: {'lr': 0.00024275, 'samples': 497664, 'steps': 971, 'loss/train': 6.1451215744018555} +03/03/2022 14:33:43 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/03/2022 14:33:49 - INFO - codeparrot_training - Step 972: {'lr': 0.000243, 'samples': 498176, 'steps': 972, 'loss/train': 5.431978225708008} +03/03/2022 14:33:52 - INFO - codeparrot_training - Step 973: {'lr': 0.00024325, 'samples': 498688, 'steps': 973, 'loss/train': 4.159397125244141} +03/03/2022 14:33:52 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/03/2022 14:33:57 - INFO - codeparrot_training - Step 974: {'lr': 0.0002435, 'samples': 499200, 'steps': 974, 'loss/train': 4.99053430557251} +03/03/2022 14:34:00 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/03/2022 14:34:02 - INFO - codeparrot_training - Step 975: {'lr': 0.00024375, 'samples': 499712, 'steps': 975, 'loss/train': 5.461595058441162} +03/03/2022 14:34:05 - INFO - codeparrot_training - Step 976: {'lr': 0.000244, 'samples': 500224, 'steps': 976, 'loss/train': 4.606551170349121} +03/03/2022 14:34:08 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/03/2022 14:34:10 - INFO - codeparrot_training - Step 977: {'lr': 0.00024425, 'samples': 500736, 'steps': 977, 'loss/train': 4.558149814605713} +03/03/2022 14:34:14 - INFO - codeparrot_training - Step 978: {'lr': 0.0002445, 'samples': 501248, 'steps': 978, 'loss/train': 5.859238147735596} +03/03/2022 14:34:16 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/03/2022 14:34:19 - INFO - codeparrot_training - Step 979: {'lr': 0.00024475, 'samples': 501760, 'steps': 979, 'loss/train': 5.584682464599609} +03/03/2022 14:34:22 - INFO - codeparrot_training - Step 980: {'lr': 0.000245, 'samples': 502272, 'steps': 980, 'loss/train': 4.898726463317871} +03/03/2022 14:34:24 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/03/2022 14:34:27 - INFO - codeparrot_training - Step 981: {'lr': 0.00024525, 'samples': 502784, 'steps': 981, 'loss/train': 4.736696243286133} +03/03/2022 14:34:30 - INFO - codeparrot_training - Step 982: {'lr': 0.0002455, 'samples': 503296, 'steps': 982, 'loss/train': 5.225622653961182} +03/03/2022 14:34:33 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/03/2022 14:34:36 - INFO - codeparrot_training - Step 983: {'lr': 0.00024575, 'samples': 503808, 'steps': 983, 'loss/train': 5.422540664672852} +03/03/2022 14:34:39 - INFO - codeparrot_training - Step 984: {'lr': 0.000246, 'samples': 504320, 'steps': 984, 'loss/train': 5.052621841430664} +03/03/2022 14:34:41 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/03/2022 14:34:44 - INFO - codeparrot_training - Step 985: {'lr': 0.00024625, 'samples': 504832, 'steps': 985, 'loss/train': 4.097038269042969} +03/03/2022 14:34:47 - INFO - codeparrot_training - Step 986: {'lr': 0.00024650000000000003, 'samples': 505344, 'steps': 986, 'loss/train': 5.570449352264404} +03/03/2022 14:34:49 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/03/2022 14:34:52 - INFO - codeparrot_training - Step 987: {'lr': 0.00024675, 'samples': 505856, 'steps': 987, 'loss/train': 5.631117820739746} +03/03/2022 14:34:56 - INFO - codeparrot_training - Step 988: {'lr': 0.000247, 'samples': 506368, 'steps': 988, 'loss/train': 4.260595798492432} +03/03/2022 14:34:57 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/03/2022 14:35:01 - INFO - codeparrot_training - Step 989: {'lr': 0.00024725, 'samples': 506880, 'steps': 989, 'loss/train': 5.5409040451049805} +03/03/2022 14:35:04 - INFO - codeparrot_training - Step 990: {'lr': 0.0002475, 'samples': 507392, 'steps': 990, 'loss/train': 5.684276580810547} +03/03/2022 14:35:06 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/03/2022 14:35:10 - INFO - codeparrot_training - Step 991: {'lr': 0.00024775, 'samples': 507904, 'steps': 991, 'loss/train': 5.969166278839111} +03/03/2022 14:35:13 - INFO - codeparrot_training - Step 992: {'lr': 0.000248, 'samples': 508416, 'steps': 992, 'loss/train': 4.652510166168213} +03/03/2022 14:35:14 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/03/2022 14:35:18 - INFO - codeparrot_training - Step 993: {'lr': 0.00024825, 'samples': 508928, 'steps': 993, 'loss/train': 5.757342338562012} +03/03/2022 14:35:22 - INFO - codeparrot_training - Step 994: {'lr': 0.0002485, 'samples': 509440, 'steps': 994, 'loss/train': 4.967216491699219} +03/03/2022 14:35:24 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/03/2022 14:35:27 - INFO - codeparrot_training - Step 995: {'lr': 0.00024875, 'samples': 509952, 'steps': 995, 'loss/train': 6.8817057609558105} +03/03/2022 14:35:30 - INFO - codeparrot_training - Step 996: {'lr': 0.000249, 'samples': 510464, 'steps': 996, 'loss/train': 5.365835666656494} +03/03/2022 14:35:33 - INFO - codeparrot_training - Step 997: {'lr': 0.00024925, 'samples': 510976, 'steps': 997, 'loss/train': 5.559195041656494} +03/03/2022 14:35:34 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/03/2022 14:35:39 - INFO - codeparrot_training - Step 998: {'lr': 0.0002495, 'samples': 511488, 'steps': 998, 'loss/train': 5.3885650634765625} +03/03/2022 14:35:42 - INFO - codeparrot_training - Step 999: {'lr': 0.00024975, 'samples': 512000, 'steps': 999, 'loss/train': 5.635467052459717} +03/03/2022 14:35:42 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/03/2022 14:35:47 - INFO - codeparrot_training - Step 1000: {'lr': 0.00025, 'samples': 512512, 'steps': 1000, 'loss/train': 5.574415683746338} +03/03/2022 14:35:50 - INFO - codeparrot_training - Step 1001: {'lr': 0.00025025, 'samples': 513024, 'steps': 1001, 'loss/train': 5.000115394592285} +03/03/2022 14:35:50 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/03/2022 14:35:56 - INFO - codeparrot_training - Step 1002: {'lr': 0.0002505, 'samples': 513536, 'steps': 1002, 'loss/train': 5.251387596130371} +03/03/2022 14:35:59 - INFO - codeparrot_training - Step 1003: {'lr': 0.00025075, 'samples': 514048, 'steps': 1003, 'loss/train': 5.300058364868164} +03/03/2022 14:35:59 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/03/2022 14:36:04 - INFO - codeparrot_training - Step 1004: {'lr': 0.00025100000000000003, 'samples': 514560, 'steps': 1004, 'loss/train': 5.120811462402344} +03/03/2022 14:36:07 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/03/2022 14:36:09 - INFO - codeparrot_training - Step 1005: {'lr': 0.00025124999999999995, 'samples': 515072, 'steps': 1005, 'loss/train': 5.268819808959961} +03/03/2022 14:36:12 - INFO - codeparrot_training - Step 1006: {'lr': 0.0002515, 'samples': 515584, 'steps': 1006, 'loss/train': 4.450544357299805} +03/03/2022 14:36:15 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/03/2022 14:36:18 - INFO - codeparrot_training - Step 1007: {'lr': 0.00025174999999999997, 'samples': 516096, 'steps': 1007, 'loss/train': 4.952118873596191} +03/03/2022 14:36:21 - INFO - codeparrot_training - Step 1008: {'lr': 0.000252, 'samples': 516608, 'steps': 1008, 'loss/train': 5.472934246063232} +03/03/2022 14:36:24 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/03/2022 14:36:26 - INFO - codeparrot_training - Step 1009: {'lr': 0.00025225, 'samples': 517120, 'steps': 1009, 'loss/train': 6.064309120178223} +03/03/2022 14:36:29 - INFO - codeparrot_training - Step 1010: {'lr': 0.0002525, 'samples': 517632, 'steps': 1010, 'loss/train': 5.658780574798584} +03/03/2022 14:36:32 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/03/2022 14:36:34 - INFO - codeparrot_training - Step 1011: {'lr': 0.00025275, 'samples': 518144, 'steps': 1011, 'loss/train': 5.292049407958984} +03/03/2022 14:36:38 - INFO - codeparrot_training - Step 1012: {'lr': 0.000253, 'samples': 518656, 'steps': 1012, 'loss/train': 4.9148993492126465} +03/03/2022 14:36:40 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/03/2022 14:36:43 - INFO - codeparrot_training - Step 1013: {'lr': 0.00025325, 'samples': 519168, 'steps': 1013, 'loss/train': 5.79802942276001} +03/03/2022 14:36:46 - INFO - codeparrot_training - Step 1014: {'lr': 0.0002535, 'samples': 519680, 'steps': 1014, 'loss/train': 4.049562454223633} +03/03/2022 14:36:49 - INFO - codeparrot_training - Step 1015: {'lr': 0.00025374999999999996, 'samples': 520192, 'steps': 1015, 'loss/train': 2.8729851245880127} +03/03/2022 14:36:49 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/03/2022 14:36:55 - INFO - codeparrot_training - Step 1016: {'lr': 0.000254, 'samples': 520704, 'steps': 1016, 'loss/train': 4.415850639343262} +03/03/2022 14:36:57 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/03/2022 14:37:00 - INFO - codeparrot_training - Step 1017: {'lr': 0.00025425, 'samples': 521216, 'steps': 1017, 'loss/train': 4.908716678619385} +03/03/2022 14:37:03 - INFO - codeparrot_training - Step 1018: {'lr': 0.0002545, 'samples': 521728, 'steps': 1018, 'loss/train': 3.306058168411255} +03/03/2022 14:37:05 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/03/2022 14:37:08 - INFO - codeparrot_training - Step 1019: {'lr': 0.00025475, 'samples': 522240, 'steps': 1019, 'loss/train': 5.539009094238281} +03/03/2022 14:37:11 - INFO - codeparrot_training - Step 1020: {'lr': 0.000255, 'samples': 522752, 'steps': 1020, 'loss/train': 4.003036975860596} +03/03/2022 14:37:14 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/03/2022 14:37:16 - INFO - codeparrot_training - Step 1021: {'lr': 0.00025525, 'samples': 523264, 'steps': 1021, 'loss/train': 4.945549964904785} +03/03/2022 14:37:20 - INFO - codeparrot_training - Step 1022: {'lr': 0.00025550000000000003, 'samples': 523776, 'steps': 1022, 'loss/train': 5.16744327545166} +03/03/2022 14:37:22 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/03/2022 14:37:25 - INFO - codeparrot_training - Step 1023: {'lr': 0.00025575, 'samples': 524288, 'steps': 1023, 'loss/train': 4.918542861938477} +03/03/2022 14:37:28 - INFO - codeparrot_training - Step 1024: {'lr': 0.000256, 'samples': 524800, 'steps': 1024, 'loss/train': 4.853889465332031} +03/03/2022 14:37:30 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/03/2022 14:37:33 - INFO - codeparrot_training - Step 1025: {'lr': 0.00025624999999999997, 'samples': 525312, 'steps': 1025, 'loss/train': 4.4365997314453125} +03/03/2022 14:37:37 - INFO - codeparrot_training - Step 1026: {'lr': 0.0002565, 'samples': 525824, 'steps': 1026, 'loss/train': 4.449159622192383} +03/03/2022 14:37:39 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/03/2022 14:37:42 - INFO - codeparrot_training - Step 1027: {'lr': 0.00025675, 'samples': 526336, 'steps': 1027, 'loss/train': 5.204823970794678} +03/03/2022 14:37:45 - INFO - codeparrot_training - Step 1028: {'lr': 0.000257, 'samples': 526848, 'steps': 1028, 'loss/train': 4.834066390991211} +03/03/2022 14:37:47 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/03/2022 14:37:50 - INFO - codeparrot_training - Step 1029: {'lr': 0.00025725, 'samples': 527360, 'steps': 1029, 'loss/train': 5.412440299987793} +03/03/2022 14:37:53 - INFO - codeparrot_training - Step 1030: {'lr': 0.0002575, 'samples': 527872, 'steps': 1030, 'loss/train': 4.39460563659668} +03/03/2022 14:37:56 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/03/2022 14:37:59 - INFO - codeparrot_training - Step 1031: {'lr': 0.00025775, 'samples': 528384, 'steps': 1031, 'loss/train': 5.561583518981934} +03/03/2022 14:38:02 - INFO - codeparrot_training - Step 1032: {'lr': 0.00025800000000000004, 'samples': 528896, 'steps': 1032, 'loss/train': 4.344091415405273} +03/03/2022 14:38:04 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/03/2022 14:38:07 - INFO - codeparrot_training - Step 1033: {'lr': 0.00025824999999999996, 'samples': 529408, 'steps': 1033, 'loss/train': 5.726379871368408} +03/03/2022 14:38:10 - INFO - codeparrot_training - Step 1034: {'lr': 0.0002585, 'samples': 529920, 'steps': 1034, 'loss/train': 5.306400299072266} +03/03/2022 14:38:12 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/03/2022 14:38:15 - INFO - codeparrot_training - Step 1035: {'lr': 0.00025875, 'samples': 530432, 'steps': 1035, 'loss/train': 5.606037139892578} +03/03/2022 14:38:18 - INFO - codeparrot_training - Step 1036: {'lr': 0.000259, 'samples': 530944, 'steps': 1036, 'loss/train': 6.034673690795898} +03/03/2022 14:38:20 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/03/2022 14:38:24 - INFO - codeparrot_training - Step 1037: {'lr': 0.00025925, 'samples': 531456, 'steps': 1037, 'loss/train': 5.3619489669799805} +03/03/2022 14:38:27 - INFO - codeparrot_training - Step 1038: {'lr': 0.0002595, 'samples': 531968, 'steps': 1038, 'loss/train': 5.350738525390625} +03/03/2022 14:38:29 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/03/2022 14:38:32 - INFO - codeparrot_training - Step 1039: {'lr': 0.00025975, 'samples': 532480, 'steps': 1039, 'loss/train': 5.737588405609131} +03/03/2022 14:38:35 - INFO - codeparrot_training - Step 1040: {'lr': 0.00026000000000000003, 'samples': 532992, 'steps': 1040, 'loss/train': 5.76760721206665} +03/03/2022 14:38:37 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 14:38:40 - INFO - codeparrot_training - Step 1041: {'lr': 0.00026025, 'samples': 533504, 'steps': 1041, 'loss/train': 4.5547614097595215} +03/03/2022 14:38:44 - INFO - codeparrot_training - Step 1042: {'lr': 0.0002605, 'samples': 534016, 'steps': 1042, 'loss/train': 4.504413604736328} +03/03/2022 14:38:45 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/03/2022 14:38:49 - INFO - codeparrot_training - Step 1043: {'lr': 0.00026074999999999997, 'samples': 534528, 'steps': 1043, 'loss/train': 6.240774154663086} +03/03/2022 14:38:52 - INFO - codeparrot_training - Step 1044: {'lr': 0.000261, 'samples': 535040, 'steps': 1044, 'loss/train': 4.31958532333374} +03/03/2022 14:38:53 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 14:38:57 - INFO - codeparrot_training - Step 1045: {'lr': 0.00026125, 'samples': 535552, 'steps': 1045, 'loss/train': 5.091063022613525} +03/03/2022 14:39:00 - INFO - codeparrot_training - Step 1046: {'lr': 0.0002615, 'samples': 536064, 'steps': 1046, 'loss/train': 6.172536373138428} +03/03/2022 14:39:01 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/03/2022 14:39:06 - INFO - codeparrot_training - Step 1047: {'lr': 0.00026175, 'samples': 536576, 'steps': 1047, 'loss/train': 5.628197193145752} +03/03/2022 14:39:09 - INFO - codeparrot_training - Step 1048: {'lr': 0.000262, 'samples': 537088, 'steps': 1048, 'loss/train': 5.0062174797058105} +03/03/2022 14:39:10 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/03/2022 14:39:14 - INFO - codeparrot_training - Step 1049: {'lr': 0.00026225, 'samples': 537600, 'steps': 1049, 'loss/train': 4.725820541381836} +03/03/2022 14:39:17 - INFO - codeparrot_training - Step 1050: {'lr': 0.00026250000000000004, 'samples': 538112, 'steps': 1050, 'loss/train': 4.617496013641357} +03/03/2022 14:39:18 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/03/2022 14:39:22 - INFO - codeparrot_training - Step 1051: {'lr': 0.00026274999999999996, 'samples': 538624, 'steps': 1051, 'loss/train': 4.570369720458984} +03/03/2022 14:39:26 - INFO - codeparrot_training - Step 1052: {'lr': 0.000263, 'samples': 539136, 'steps': 1052, 'loss/train': 4.43674898147583} +03/03/2022 14:39:27 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 14:39:31 - INFO - codeparrot_training - Step 1053: {'lr': 0.00026325, 'samples': 539648, 'steps': 1053, 'loss/train': 4.584609508514404} +03/03/2022 14:39:34 - INFO - codeparrot_training - Step 1054: {'lr': 0.0002635, 'samples': 540160, 'steps': 1054, 'loss/train': 5.5938215255737305} +03/03/2022 14:39:35 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/03/2022 14:39:40 - INFO - codeparrot_training - Step 1055: {'lr': 0.00026375, 'samples': 540672, 'steps': 1055, 'loss/train': 5.205827236175537} +03/03/2022 14:39:43 - INFO - codeparrot_training - Step 1056: {'lr': 0.000264, 'samples': 541184, 'steps': 1056, 'loss/train': 4.771644592285156} +03/03/2022 14:39:44 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 14:39:48 - INFO - codeparrot_training - Step 1057: {'lr': 0.00026425, 'samples': 541696, 'steps': 1057, 'loss/train': 5.346999168395996} +03/03/2022 14:39:51 - INFO - codeparrot_training - Step 1058: {'lr': 0.00026450000000000003, 'samples': 542208, 'steps': 1058, 'loss/train': 4.526518821716309} +03/03/2022 14:39:52 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/03/2022 14:39:57 - INFO - codeparrot_training - Step 1059: {'lr': 0.00026475, 'samples': 542720, 'steps': 1059, 'loss/train': 4.452726364135742} +03/03/2022 14:40:00 - INFO - codeparrot_training - Step 1060: {'lr': 0.00026500000000000004, 'samples': 543232, 'steps': 1060, 'loss/train': 4.657252788543701} +03/03/2022 14:40:00 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/03/2022 14:40:05 - INFO - codeparrot_training - Step 1061: {'lr': 0.00026524999999999997, 'samples': 543744, 'steps': 1061, 'loss/train': 4.880711555480957} +03/03/2022 14:40:08 - INFO - codeparrot_training - Step 1062: {'lr': 0.0002655, 'samples': 544256, 'steps': 1062, 'loss/train': 4.187632083892822} +03/03/2022 14:40:09 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/03/2022 14:40:13 - INFO - codeparrot_training - Step 1063: {'lr': 0.00026575, 'samples': 544768, 'steps': 1063, 'loss/train': 4.795836448669434} +03/03/2022 14:40:17 - INFO - codeparrot_training - Step 1064: {'lr': 0.000266, 'samples': 545280, 'steps': 1064, 'loss/train': 5.463798999786377} +03/03/2022 14:40:17 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/03/2022 14:40:22 - INFO - codeparrot_training - Step 1065: {'lr': 0.00026625, 'samples': 545792, 'steps': 1065, 'loss/train': 4.310492038726807} +03/03/2022 14:40:25 - INFO - codeparrot_training - Step 1066: {'lr': 0.0002665, 'samples': 546304, 'steps': 1066, 'loss/train': 5.243485927581787} +03/03/2022 14:40:25 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/03/2022 14:40:30 - INFO - codeparrot_training - Step 1067: {'lr': 0.00026675, 'samples': 546816, 'steps': 1067, 'loss/train': 4.350306034088135} +03/03/2022 14:40:34 - INFO - codeparrot_training - Step 1068: {'lr': 0.00026700000000000004, 'samples': 547328, 'steps': 1068, 'loss/train': 4.03434944152832} +03/03/2022 14:40:34 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 14:40:39 - INFO - codeparrot_training - Step 1069: {'lr': 0.00026725, 'samples': 547840, 'steps': 1069, 'loss/train': 1.940299391746521} +03/03/2022 14:40:42 - INFO - codeparrot_training - Step 1070: {'lr': 0.0002675, 'samples': 548352, 'steps': 1070, 'loss/train': 6.076417922973633} +03/03/2022 14:40:42 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/03/2022 14:40:47 - INFO - codeparrot_training - Step 1071: {'lr': 0.00026775, 'samples': 548864, 'steps': 1071, 'loss/train': 6.043127059936523} +03/03/2022 14:40:50 - INFO - codeparrot_training - Step 1072: {'lr': 0.000268, 'samples': 549376, 'steps': 1072, 'loss/train': 4.990219593048096} +03/03/2022 14:40:50 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/03/2022 14:40:56 - INFO - codeparrot_training - Step 1073: {'lr': 0.00026825, 'samples': 549888, 'steps': 1073, 'loss/train': 4.431066513061523} +03/03/2022 14:40:58 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/03/2022 14:41:01 - INFO - codeparrot_training - Step 1074: {'lr': 0.0002685, 'samples': 550400, 'steps': 1074, 'loss/train': 5.25891637802124} +03/03/2022 14:41:04 - INFO - codeparrot_training - Step 1075: {'lr': 0.00026875, 'samples': 550912, 'steps': 1075, 'loss/train': 5.475607395172119} +03/03/2022 14:41:07 - INFO - codeparrot_training - Step 1076: {'lr': 0.00026900000000000003, 'samples': 551424, 'steps': 1076, 'loss/train': 3.9055325984954834} +03/03/2022 14:41:07 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/03/2022 14:41:12 - INFO - codeparrot_training - Step 1077: {'lr': 0.00026925, 'samples': 551936, 'steps': 1077, 'loss/train': 4.2556915283203125} +03/03/2022 14:41:16 - INFO - codeparrot_training - Step 1078: {'lr': 0.00026950000000000005, 'samples': 552448, 'steps': 1078, 'loss/train': 5.633589744567871} +03/03/2022 14:41:16 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/03/2022 14:41:21 - INFO - codeparrot_training - Step 1079: {'lr': 0.00026974999999999997, 'samples': 552960, 'steps': 1079, 'loss/train': 4.919765949249268} +03/03/2022 14:41:24 - INFO - codeparrot_training - Step 1080: {'lr': 0.00027, 'samples': 553472, 'steps': 1080, 'loss/train': 4.847507953643799} +03/03/2022 14:41:24 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/03/2022 14:41:29 - INFO - codeparrot_training - Step 1081: {'lr': 0.00027025, 'samples': 553984, 'steps': 1081, 'loss/train': 5.138010025024414} +03/03/2022 14:41:32 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/03/2022 14:41:35 - INFO - codeparrot_training - Step 1082: {'lr': 0.0002705, 'samples': 554496, 'steps': 1082, 'loss/train': 4.878818035125732} +03/03/2022 14:41:38 - INFO - codeparrot_training - Step 1083: {'lr': 0.00027075, 'samples': 555008, 'steps': 1083, 'loss/train': 4.895986080169678} +03/03/2022 14:41:41 - INFO - codeparrot_training - Step 1084: {'lr': 0.00027100000000000003, 'samples': 555520, 'steps': 1084, 'loss/train': 5.578668594360352} +03/03/2022 14:41:41 - INFO - codeparrot_training - Skipping example with length 999 (seq_length=1024) +03/03/2022 14:41:46 - INFO - codeparrot_training - Step 1085: {'lr': 0.00027125, 'samples': 556032, 'steps': 1085, 'loss/train': 4.8936004638671875} +03/03/2022 14:41:49 - INFO - codeparrot_training - Step 1086: {'lr': 0.00027150000000000004, 'samples': 556544, 'steps': 1086, 'loss/train': 5.960999011993408} +03/03/2022 14:41:49 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/03/2022 14:41:55 - INFO - codeparrot_training - Step 1087: {'lr': 0.00027175, 'samples': 557056, 'steps': 1087, 'loss/train': 5.246800899505615} +03/03/2022 14:41:58 - INFO - codeparrot_training - Step 1088: {'lr': 0.00027200000000000005, 'samples': 557568, 'steps': 1088, 'loss/train': 5.126637935638428} +03/03/2022 14:41:59 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 14:42:03 - INFO - codeparrot_training - Step 1089: {'lr': 0.00027225, 'samples': 558080, 'steps': 1089, 'loss/train': 4.347425937652588} +03/03/2022 14:42:07 - INFO - codeparrot_training - Step 1090: {'lr': 0.0002725, 'samples': 558592, 'steps': 1090, 'loss/train': 5.3398942947387695} +03/03/2022 14:42:07 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/03/2022 14:42:12 - INFO - codeparrot_training - Step 1091: {'lr': 0.00027275, 'samples': 559104, 'steps': 1091, 'loss/train': 5.132818222045898} +03/03/2022 14:42:15 - INFO - codeparrot_training - Step 1092: {'lr': 0.000273, 'samples': 559616, 'steps': 1092, 'loss/train': 4.986759662628174} +03/03/2022 14:42:15 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/03/2022 14:42:20 - INFO - codeparrot_training - Step 1093: {'lr': 0.00027325, 'samples': 560128, 'steps': 1093, 'loss/train': 4.198845386505127} +03/03/2022 14:42:23 - INFO - codeparrot_training - Step 1094: {'lr': 0.00027350000000000003, 'samples': 560640, 'steps': 1094, 'loss/train': 5.353718280792236} +03/03/2022 14:42:24 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/03/2022 14:42:29 - INFO - codeparrot_training - Step 1095: {'lr': 0.00027375, 'samples': 561152, 'steps': 1095, 'loss/train': 5.235002517700195} +03/03/2022 14:42:32 - INFO - codeparrot_training - Step 1096: {'lr': 0.00027400000000000005, 'samples': 561664, 'steps': 1096, 'loss/train': 3.0480704307556152} +03/03/2022 14:42:32 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/03/2022 14:42:37 - INFO - codeparrot_training - Step 1097: {'lr': 0.00027425, 'samples': 562176, 'steps': 1097, 'loss/train': 4.792338848114014} +03/03/2022 14:42:40 - INFO - codeparrot_training - Step 1098: {'lr': 0.0002745, 'samples': 562688, 'steps': 1098, 'loss/train': 5.226000785827637} +03/03/2022 14:42:40 - INFO - codeparrot_training - Skipping example with length 643 (seq_length=1024) +03/03/2022 14:42:45 - INFO - codeparrot_training - Step 1099: {'lr': 0.00027475, 'samples': 563200, 'steps': 1099, 'loss/train': 4.9972100257873535} +03/03/2022 14:42:49 - INFO - codeparrot_training - Step 1100: {'lr': 0.000275, 'samples': 563712, 'steps': 1100, 'loss/train': 5.435189247131348} +03/03/2022 14:42:49 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 14:42:54 - INFO - codeparrot_training - Step 1101: {'lr': 0.00027525, 'samples': 564224, 'steps': 1101, 'loss/train': 4.584694862365723} +03/03/2022 14:42:57 - INFO - codeparrot_training - Step 1102: {'lr': 0.00027550000000000003, 'samples': 564736, 'steps': 1102, 'loss/train': 4.522334098815918} +03/03/2022 14:42:57 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/03/2022 14:43:02 - INFO - codeparrot_training - Step 1103: {'lr': 0.00027575, 'samples': 565248, 'steps': 1103, 'loss/train': 5.064010143280029} +03/03/2022 14:43:05 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 14:43:08 - INFO - codeparrot_training - Step 1104: {'lr': 0.00027600000000000004, 'samples': 565760, 'steps': 1104, 'loss/train': 5.038564682006836} +03/03/2022 14:43:11 - INFO - codeparrot_training - Step 1105: {'lr': 0.00027625, 'samples': 566272, 'steps': 1105, 'loss/train': 4.567838668823242} +03/03/2022 14:43:13 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/03/2022 14:43:16 - INFO - codeparrot_training - Step 1106: {'lr': 0.00027650000000000005, 'samples': 566784, 'steps': 1106, 'loss/train': 4.308528423309326} +03/03/2022 14:43:19 - INFO - codeparrot_training - Step 1107: {'lr': 0.00027675, 'samples': 567296, 'steps': 1107, 'loss/train': 5.4389543533325195} +03/03/2022 14:43:22 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/03/2022 14:43:24 - INFO - codeparrot_training - Step 1108: {'lr': 0.000277, 'samples': 567808, 'steps': 1108, 'loss/train': 4.216660022735596} +03/03/2022 14:43:27 - INFO - codeparrot_training - Step 1109: {'lr': 0.00027725, 'samples': 568320, 'steps': 1109, 'loss/train': 3.7490479946136475} +03/03/2022 14:43:30 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/03/2022 14:43:33 - INFO - codeparrot_training - Step 1110: {'lr': 0.0002775, 'samples': 568832, 'steps': 1110, 'loss/train': 6.123933792114258} +03/03/2022 14:43:36 - INFO - codeparrot_training - Step 1111: {'lr': 0.00027775, 'samples': 569344, 'steps': 1111, 'loss/train': 5.457062244415283} +03/03/2022 14:43:38 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/03/2022 14:43:41 - INFO - codeparrot_training - Step 1112: {'lr': 0.00027800000000000004, 'samples': 569856, 'steps': 1112, 'loss/train': 6.0718607902526855} +03/03/2022 14:43:44 - INFO - codeparrot_training - Step 1113: {'lr': 0.00027825, 'samples': 570368, 'steps': 1113, 'loss/train': 6.278087139129639} +03/03/2022 14:43:46 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/03/2022 14:43:49 - INFO - codeparrot_training - Step 1114: {'lr': 0.00027850000000000005, 'samples': 570880, 'steps': 1114, 'loss/train': 4.37136697769165} +03/03/2022 14:43:53 - INFO - codeparrot_training - Step 1115: {'lr': 0.00027875, 'samples': 571392, 'steps': 1115, 'loss/train': 4.095114707946777} +03/03/2022 14:43:54 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/03/2022 14:43:58 - INFO - codeparrot_training - Step 1116: {'lr': 0.000279, 'samples': 571904, 'steps': 1116, 'loss/train': 4.286388874053955} +03/03/2022 14:44:01 - INFO - codeparrot_training - Step 1117: {'lr': 0.00027925, 'samples': 572416, 'steps': 1117, 'loss/train': 5.543516159057617} +03/03/2022 14:44:02 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/03/2022 14:44:06 - INFO - codeparrot_training - Step 1118: {'lr': 0.0002795, 'samples': 572928, 'steps': 1118, 'loss/train': 6.064958095550537} +03/03/2022 14:44:09 - INFO - codeparrot_training - Step 1119: {'lr': 0.00027975, 'samples': 573440, 'steps': 1119, 'loss/train': 5.2345452308654785} +03/03/2022 14:44:11 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/03/2022 14:44:15 - INFO - codeparrot_training - Step 1120: {'lr': 0.00028000000000000003, 'samples': 573952, 'steps': 1120, 'loss/train': 4.6932501792907715} +03/03/2022 14:44:18 - INFO - codeparrot_training - Step 1121: {'lr': 0.00028025, 'samples': 574464, 'steps': 1121, 'loss/train': 4.8953070640563965} +03/03/2022 14:44:19 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 14:44:23 - INFO - codeparrot_training - Step 1122: {'lr': 0.00028050000000000004, 'samples': 574976, 'steps': 1122, 'loss/train': 5.5702924728393555} +03/03/2022 14:44:26 - INFO - codeparrot_training - Step 1123: {'lr': 0.00028075, 'samples': 575488, 'steps': 1123, 'loss/train': 5.01413106918335} +03/03/2022 14:44:27 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/03/2022 14:44:32 - INFO - codeparrot_training - Step 1124: {'lr': 0.00028100000000000005, 'samples': 576000, 'steps': 1124, 'loss/train': 4.616030216217041} +03/03/2022 14:44:35 - INFO - codeparrot_training - Step 1125: {'lr': 0.00028125000000000003, 'samples': 576512, 'steps': 1125, 'loss/train': 4.766707420349121} +03/03/2022 14:44:36 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/03/2022 14:44:40 - INFO - codeparrot_training - Step 1126: {'lr': 0.00028149999999999996, 'samples': 577024, 'steps': 1126, 'loss/train': 4.632814884185791} +03/03/2022 14:44:43 - INFO - codeparrot_training - Step 1127: {'lr': 0.00028175, 'samples': 577536, 'steps': 1127, 'loss/train': 4.65377140045166} +03/03/2022 14:44:44 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/03/2022 14:44:49 - INFO - codeparrot_training - Step 1128: {'lr': 0.00028199999999999997, 'samples': 578048, 'steps': 1128, 'loss/train': 4.634227275848389} +03/03/2022 14:44:52 - INFO - codeparrot_training - Step 1129: {'lr': 0.00028225, 'samples': 578560, 'steps': 1129, 'loss/train': 5.282841682434082} +03/03/2022 14:44:52 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/03/2022 14:44:57 - INFO - codeparrot_training - Step 1130: {'lr': 0.0002825, 'samples': 579072, 'steps': 1130, 'loss/train': 5.040492534637451} +03/03/2022 14:45:00 - INFO - codeparrot_training - Step 1131: {'lr': 0.00028275, 'samples': 579584, 'steps': 1131, 'loss/train': 5.30776834487915} +03/03/2022 14:45:00 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/03/2022 14:45:05 - INFO - codeparrot_training - Step 1132: {'lr': 0.000283, 'samples': 580096, 'steps': 1132, 'loss/train': 5.669086933135986} +03/03/2022 14:45:09 - INFO - codeparrot_training - Step 1133: {'lr': 0.00028325000000000003, 'samples': 580608, 'steps': 1133, 'loss/train': 4.616725444793701} +03/03/2022 14:45:09 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/03/2022 14:45:14 - INFO - codeparrot_training - Step 1134: {'lr': 0.0002835, 'samples': 581120, 'steps': 1134, 'loss/train': 4.028615951538086} +03/03/2022 14:45:17 - INFO - codeparrot_training - Step 1135: {'lr': 0.00028375, 'samples': 581632, 'steps': 1135, 'loss/train': 4.0377726554870605} +03/03/2022 14:45:17 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/03/2022 14:45:22 - INFO - codeparrot_training - Step 1136: {'lr': 0.00028399999999999996, 'samples': 582144, 'steps': 1136, 'loss/train': 5.683346271514893} +03/03/2022 14:45:25 - INFO - codeparrot_training - Step 1137: {'lr': 0.00028425, 'samples': 582656, 'steps': 1137, 'loss/train': 6.318787574768066} +03/03/2022 14:45:25 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/03/2022 14:45:31 - INFO - codeparrot_training - Step 1138: {'lr': 0.0002845, 'samples': 583168, 'steps': 1138, 'loss/train': 2.4034740924835205} +03/03/2022 14:45:33 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/03/2022 14:45:36 - INFO - codeparrot_training - Step 1139: {'lr': 0.00028475, 'samples': 583680, 'steps': 1139, 'loss/train': 5.280616283416748} +03/03/2022 14:45:39 - INFO - codeparrot_training - Step 1140: {'lr': 0.000285, 'samples': 584192, 'steps': 1140, 'loss/train': 7.985814571380615} +03/03/2022 14:45:42 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/03/2022 14:45:44 - INFO - codeparrot_training - Step 1141: {'lr': 0.00028525, 'samples': 584704, 'steps': 1141, 'loss/train': 3.896122455596924} +03/03/2022 14:45:48 - INFO - codeparrot_training - Step 1142: {'lr': 0.0002855, 'samples': 585216, 'steps': 1142, 'loss/train': 5.168213367462158} +03/03/2022 14:45:50 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/03/2022 14:45:53 - INFO - codeparrot_training - Step 1143: {'lr': 0.00028575000000000003, 'samples': 585728, 'steps': 1143, 'loss/train': 5.325235843658447} +03/03/2022 14:45:56 - INFO - codeparrot_training - Step 1144: {'lr': 0.00028599999999999996, 'samples': 586240, 'steps': 1144, 'loss/train': 5.025601863861084} +03/03/2022 14:45:59 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/03/2022 14:46:01 - INFO - codeparrot_training - Step 1145: {'lr': 0.00028625, 'samples': 586752, 'steps': 1145, 'loss/train': 5.752750873565674} +03/03/2022 14:46:04 - INFO - codeparrot_training - Step 1146: {'lr': 0.00028649999999999997, 'samples': 587264, 'steps': 1146, 'loss/train': 5.020471572875977} +03/03/2022 14:46:07 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/03/2022 14:46:10 - INFO - codeparrot_training - Step 1147: {'lr': 0.00028675, 'samples': 587776, 'steps': 1147, 'loss/train': 4.9661946296691895} +03/03/2022 14:46:13 - INFO - codeparrot_training - Step 1148: {'lr': 0.000287, 'samples': 588288, 'steps': 1148, 'loss/train': 5.066573619842529} +03/03/2022 14:46:16 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 14:46:19 - INFO - codeparrot_training - Step 1149: {'lr': 0.00028725, 'samples': 588800, 'steps': 1149, 'loss/train': 4.306407451629639} +03/03/2022 14:46:22 - INFO - codeparrot_training - Step 1150: {'lr': 0.0002875, 'samples': 589312, 'steps': 1150, 'loss/train': 4.3311920166015625} +03/03/2022 14:46:25 - INFO - codeparrot_training - Step 1151: {'lr': 0.00028775000000000003, 'samples': 589824, 'steps': 1151, 'loss/train': 2.07806396484375} +03/03/2022 14:46:25 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/03/2022 14:46:30 - INFO - codeparrot_training - Step 1152: {'lr': 0.000288, 'samples': 590336, 'steps': 1152, 'loss/train': 4.967807769775391} +03/03/2022 14:46:33 - INFO - codeparrot_training - Step 1153: {'lr': 0.00028825, 'samples': 590848, 'steps': 1153, 'loss/train': 4.149472236633301} +03/03/2022 14:46:33 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/03/2022 14:46:39 - INFO - codeparrot_training - Step 1154: {'lr': 0.00028849999999999997, 'samples': 591360, 'steps': 1154, 'loss/train': 4.931750297546387} +03/03/2022 14:46:42 - INFO - codeparrot_training - Step 1155: {'lr': 0.00028875, 'samples': 591872, 'steps': 1155, 'loss/train': 4.433856010437012} +03/03/2022 14:46:42 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/03/2022 14:46:47 - INFO - codeparrot_training - Step 1156: {'lr': 0.000289, 'samples': 592384, 'steps': 1156, 'loss/train': 5.270572662353516} +03/03/2022 14:46:51 - INFO - codeparrot_training - Step 1157: {'lr': 0.00028925, 'samples': 592896, 'steps': 1157, 'loss/train': 4.90167760848999} +03/03/2022 14:46:51 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/03/2022 14:46:56 - INFO - codeparrot_training - Step 1158: {'lr': 0.0002895, 'samples': 593408, 'steps': 1158, 'loss/train': 4.335346221923828} +03/03/2022 14:46:59 - INFO - codeparrot_training - Step 1159: {'lr': 0.00028975, 'samples': 593920, 'steps': 1159, 'loss/train': 4.507898330688477} +03/03/2022 14:46:59 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/03/2022 14:47:04 - INFO - codeparrot_training - Step 1160: {'lr': 0.00029, 'samples': 594432, 'steps': 1160, 'loss/train': 5.678080081939697} +03/03/2022 14:47:08 - INFO - codeparrot_training - Step 1161: {'lr': 0.00029025000000000003, 'samples': 594944, 'steps': 1161, 'loss/train': 4.063498497009277} +03/03/2022 14:47:09 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/03/2022 14:47:13 - INFO - codeparrot_training - Step 1162: {'lr': 0.00029049999999999996, 'samples': 595456, 'steps': 1162, 'loss/train': 5.084910869598389} +03/03/2022 14:47:16 - INFO - codeparrot_training - Step 1163: {'lr': 0.00029075, 'samples': 595968, 'steps': 1163, 'loss/train': 4.5967230796813965} +03/03/2022 14:47:17 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/03/2022 14:47:21 - INFO - codeparrot_training - Step 1164: {'lr': 0.00029099999999999997, 'samples': 596480, 'steps': 1164, 'loss/train': 1.8424994945526123} +03/03/2022 14:47:25 - INFO - codeparrot_training - Step 1165: {'lr': 0.00029125, 'samples': 596992, 'steps': 1165, 'loss/train': 4.506917953491211} +03/03/2022 14:47:25 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/03/2022 14:47:30 - INFO - codeparrot_training - Step 1166: {'lr': 0.0002915, 'samples': 597504, 'steps': 1166, 'loss/train': 5.436644554138184} +03/03/2022 14:47:33 - INFO - codeparrot_training - Step 1167: {'lr': 0.00029175, 'samples': 598016, 'steps': 1167, 'loss/train': 4.689492225646973} +03/03/2022 14:47:34 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/03/2022 14:47:38 - INFO - codeparrot_training - Step 1168: {'lr': 0.000292, 'samples': 598528, 'steps': 1168, 'loss/train': 4.775059223175049} +03/03/2022 14:47:41 - INFO - codeparrot_training - Step 1169: {'lr': 0.00029225000000000003, 'samples': 599040, 'steps': 1169, 'loss/train': 3.9990384578704834} +03/03/2022 14:47:42 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/03/2022 14:47:47 - INFO - codeparrot_training - Step 1170: {'lr': 0.0002925, 'samples': 599552, 'steps': 1170, 'loss/train': 5.478631973266602} +03/03/2022 14:47:50 - INFO - codeparrot_training - Step 1171: {'lr': 0.00029275000000000004, 'samples': 600064, 'steps': 1171, 'loss/train': 4.522140026092529} +03/03/2022 14:47:51 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/03/2022 14:47:55 - INFO - codeparrot_training - Step 1172: {'lr': 0.00029299999999999997, 'samples': 600576, 'steps': 1172, 'loss/train': 4.634247779846191} +03/03/2022 14:47:58 - INFO - codeparrot_training - Step 1173: {'lr': 0.00029325, 'samples': 601088, 'steps': 1173, 'loss/train': 4.749699592590332} +03/03/2022 14:47:59 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/03/2022 14:48:03 - INFO - codeparrot_training - Step 1174: {'lr': 0.0002935, 'samples': 601600, 'steps': 1174, 'loss/train': 5.154363632202148} +03/03/2022 14:48:07 - INFO - codeparrot_training - Step 1175: {'lr': 0.00029375, 'samples': 602112, 'steps': 1175, 'loss/train': 5.280133247375488} +03/03/2022 14:48:07 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/03/2022 14:48:12 - INFO - codeparrot_training - Step 1176: {'lr': 0.000294, 'samples': 602624, 'steps': 1176, 'loss/train': 4.3173604011535645} +03/03/2022 14:48:15 - INFO - codeparrot_training - Step 1177: {'lr': 0.00029425, 'samples': 603136, 'steps': 1177, 'loss/train': 5.089110851287842} +03/03/2022 14:48:15 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/03/2022 14:48:20 - INFO - codeparrot_training - Step 1178: {'lr': 0.0002945, 'samples': 603648, 'steps': 1178, 'loss/train': 5.304769992828369} +03/03/2022 14:48:23 - INFO - codeparrot_training - Step 1179: {'lr': 0.00029475000000000004, 'samples': 604160, 'steps': 1179, 'loss/train': 4.326340198516846} +03/03/2022 14:48:23 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/03/2022 14:48:29 - INFO - codeparrot_training - Step 1180: {'lr': 0.000295, 'samples': 604672, 'steps': 1180, 'loss/train': 5.235725402832031} +03/03/2022 14:48:32 - INFO - codeparrot_training - Step 1181: {'lr': 0.00029525, 'samples': 605184, 'steps': 1181, 'loss/train': 4.663435459136963} +03/03/2022 14:48:32 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/03/2022 14:48:37 - INFO - codeparrot_training - Step 1182: {'lr': 0.00029549999999999997, 'samples': 605696, 'steps': 1182, 'loss/train': 4.434954643249512} +03/03/2022 14:48:40 - INFO - codeparrot_training - Step 1183: {'lr': 0.00029575, 'samples': 606208, 'steps': 1183, 'loss/train': 4.7711710929870605} +03/03/2022 14:48:40 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/03/2022 14:48:45 - INFO - codeparrot_training - Step 1184: {'lr': 0.000296, 'samples': 606720, 'steps': 1184, 'loss/train': 5.68754243850708} +03/03/2022 14:48:49 - INFO - codeparrot_training - Step 1185: {'lr': 0.00029625, 'samples': 607232, 'steps': 1185, 'loss/train': 4.661207675933838} +03/03/2022 14:48:49 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/03/2022 14:48:54 - INFO - codeparrot_training - Step 1186: {'lr': 0.0002965, 'samples': 607744, 'steps': 1186, 'loss/train': 5.469995021820068} +03/03/2022 14:48:57 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/03/2022 14:48:59 - INFO - codeparrot_training - Step 1187: {'lr': 0.00029675000000000003, 'samples': 608256, 'steps': 1187, 'loss/train': 5.257544994354248} +03/03/2022 14:49:02 - INFO - codeparrot_training - Step 1188: {'lr': 0.000297, 'samples': 608768, 'steps': 1188, 'loss/train': 3.3908140659332275} +03/03/2022 14:49:05 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 14:49:08 - INFO - codeparrot_training - Step 1189: {'lr': 0.00029725000000000004, 'samples': 609280, 'steps': 1189, 'loss/train': 5.660294055938721} +03/03/2022 14:49:11 - INFO - codeparrot_training - Step 1190: {'lr': 0.00029749999999999997, 'samples': 609792, 'steps': 1190, 'loss/train': 4.143644332885742} +03/03/2022 14:49:13 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/03/2022 14:49:16 - INFO - codeparrot_training - Step 1191: {'lr': 0.00029775, 'samples': 610304, 'steps': 1191, 'loss/train': 4.107789039611816} +03/03/2022 14:49:19 - INFO - codeparrot_training - Step 1192: {'lr': 0.000298, 'samples': 610816, 'steps': 1192, 'loss/train': 3.876453399658203} +03/03/2022 14:49:22 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/03/2022 14:49:25 - INFO - codeparrot_training - Step 1193: {'lr': 0.00029825, 'samples': 611328, 'steps': 1193, 'loss/train': 4.736563205718994} +03/03/2022 14:49:28 - INFO - codeparrot_training - Step 1194: {'lr': 0.0002985, 'samples': 611840, 'steps': 1194, 'loss/train': 4.076013565063477} +03/03/2022 14:49:31 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/03/2022 14:49:33 - INFO - codeparrot_training - Step 1195: {'lr': 0.00029875, 'samples': 612352, 'steps': 1195, 'loss/train': 5.8624067306518555} +03/03/2022 14:49:36 - INFO - codeparrot_training - Step 1196: {'lr': 0.000299, 'samples': 612864, 'steps': 1196, 'loss/train': 5.197993278503418} +03/03/2022 14:49:39 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/03/2022 14:49:41 - INFO - codeparrot_training - Step 1197: {'lr': 0.00029925000000000004, 'samples': 613376, 'steps': 1197, 'loss/train': 3.7101566791534424} +03/03/2022 14:49:45 - INFO - codeparrot_training - Step 1198: {'lr': 0.0002995, 'samples': 613888, 'steps': 1198, 'loss/train': 4.696809768676758} +03/03/2022 14:49:47 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/03/2022 14:49:50 - INFO - codeparrot_training - Step 1199: {'lr': 0.00029975000000000005, 'samples': 614400, 'steps': 1199, 'loss/train': 4.844315528869629} +03/03/2022 14:49:53 - INFO - codeparrot_training - Step 1200: {'lr': 0.0003, 'samples': 614912, 'steps': 1200, 'loss/train': 4.472458839416504} +03/03/2022 14:49:56 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/03/2022 14:49:58 - INFO - codeparrot_training - Step 1201: {'lr': 0.00030025, 'samples': 615424, 'steps': 1201, 'loss/train': 3.9490694999694824} +03/03/2022 14:50:01 - INFO - codeparrot_training - Step 1202: {'lr': 0.0003005, 'samples': 615936, 'steps': 1202, 'loss/train': 3.626290798187256} +03/03/2022 14:50:04 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/03/2022 14:50:07 - INFO - codeparrot_training - Step 1203: {'lr': 0.00030075, 'samples': 616448, 'steps': 1203, 'loss/train': 4.3813018798828125} +03/03/2022 14:50:10 - INFO - codeparrot_training - Step 1204: {'lr': 0.000301, 'samples': 616960, 'steps': 1204, 'loss/train': 4.078187942504883} +03/03/2022 14:50:12 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/03/2022 14:50:15 - INFO - codeparrot_training - Step 1205: {'lr': 0.00030125000000000003, 'samples': 617472, 'steps': 1205, 'loss/train': 4.892630577087402} +03/03/2022 14:50:18 - INFO - codeparrot_training - Step 1206: {'lr': 0.0003015, 'samples': 617984, 'steps': 1206, 'loss/train': 4.29644775390625} +03/03/2022 14:50:21 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 14:50:23 - INFO - codeparrot_training - Step 1207: {'lr': 0.00030175000000000004, 'samples': 618496, 'steps': 1207, 'loss/train': 4.6893815994262695} +03/03/2022 14:50:27 - INFO - codeparrot_training - Step 1208: {'lr': 0.000302, 'samples': 619008, 'steps': 1208, 'loss/train': 1.2830274105072021} +03/03/2022 14:50:29 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 14:50:32 - INFO - codeparrot_training - Step 1209: {'lr': 0.00030225, 'samples': 619520, 'steps': 1209, 'loss/train': 4.533934593200684} +03/03/2022 14:50:35 - INFO - codeparrot_training - Step 1210: {'lr': 0.0003025, 'samples': 620032, 'steps': 1210, 'loss/train': 4.98966121673584} +03/03/2022 14:50:38 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/03/2022 14:50:40 - INFO - codeparrot_training - Step 1211: {'lr': 0.00030275, 'samples': 620544, 'steps': 1211, 'loss/train': 4.9488630294799805} +03/03/2022 14:50:44 - INFO - codeparrot_training - Step 1212: {'lr': 0.000303, 'samples': 621056, 'steps': 1212, 'loss/train': 4.870970249176025} +03/03/2022 14:50:46 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/03/2022 14:50:49 - INFO - codeparrot_training - Step 1213: {'lr': 0.00030325, 'samples': 621568, 'steps': 1213, 'loss/train': 3.9525699615478516} +03/03/2022 14:50:52 - INFO - codeparrot_training - Step 1214: {'lr': 0.0003035, 'samples': 622080, 'steps': 1214, 'loss/train': 3.038872003555298} +03/03/2022 14:50:57 - INFO - codeparrot_training - Step 1215: {'lr': 0.00030375000000000004, 'samples': 622592, 'steps': 1215, 'loss/train': 4.5737481117248535} +03/03/2022 14:51:00 - INFO - codeparrot_training - Step 1216: {'lr': 0.000304, 'samples': 623104, 'steps': 1216, 'loss/train': 4.942699432373047} +03/03/2022 14:51:02 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/03/2022 14:51:06 - INFO - codeparrot_training - Step 1217: {'lr': 0.00030425000000000005, 'samples': 623616, 'steps': 1217, 'loss/train': 3.656524181365967} +03/03/2022 14:51:09 - INFO - codeparrot_training - Step 1218: {'lr': 0.0003045, 'samples': 624128, 'steps': 1218, 'loss/train': 4.410141468048096} +03/03/2022 14:51:11 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/03/2022 14:51:14 - INFO - codeparrot_training - Step 1219: {'lr': 0.00030475, 'samples': 624640, 'steps': 1219, 'loss/train': 4.2339558601379395} +03/03/2022 14:51:17 - INFO - codeparrot_training - Step 1220: {'lr': 0.000305, 'samples': 625152, 'steps': 1220, 'loss/train': 4.279458999633789} +03/03/2022 14:51:19 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 14:51:22 - INFO - codeparrot_training - Step 1221: {'lr': 0.00030525, 'samples': 625664, 'steps': 1221, 'loss/train': 4.509191513061523} +03/03/2022 14:51:26 - INFO - codeparrot_training - Step 1222: {'lr': 0.0003055, 'samples': 626176, 'steps': 1222, 'loss/train': 3.9416255950927734} +03/03/2022 14:51:27 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/03/2022 14:51:31 - INFO - codeparrot_training - Step 1223: {'lr': 0.00030575000000000003, 'samples': 626688, 'steps': 1223, 'loss/train': 4.671177387237549} +03/03/2022 14:51:34 - INFO - codeparrot_training - Step 1224: {'lr': 0.000306, 'samples': 627200, 'steps': 1224, 'loss/train': 5.080960273742676} +03/03/2022 14:51:36 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/03/2022 14:51:39 - INFO - codeparrot_training - Step 1225: {'lr': 0.00030625000000000004, 'samples': 627712, 'steps': 1225, 'loss/train': 4.823641777038574} +03/03/2022 14:51:42 - INFO - codeparrot_training - Step 1226: {'lr': 0.0003065, 'samples': 628224, 'steps': 1226, 'loss/train': 5.284897804260254} +03/03/2022 14:51:44 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/03/2022 14:51:48 - INFO - codeparrot_training - Step 1227: {'lr': 0.00030675, 'samples': 628736, 'steps': 1227, 'loss/train': 4.29206657409668} +03/03/2022 14:51:51 - INFO - codeparrot_training - Step 1228: {'lr': 0.000307, 'samples': 629248, 'steps': 1228, 'loss/train': 3.6068408489227295} +03/03/2022 14:51:53 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/03/2022 14:51:56 - INFO - codeparrot_training - Step 1229: {'lr': 0.00030725, 'samples': 629760, 'steps': 1229, 'loss/train': 5.08143949508667} +03/03/2022 14:51:59 - INFO - codeparrot_training - Step 1230: {'lr': 0.0003075, 'samples': 630272, 'steps': 1230, 'loss/train': 3.951544761657715} +03/03/2022 14:52:01 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/03/2022 14:52:05 - INFO - codeparrot_training - Step 1231: {'lr': 0.00030775, 'samples': 630784, 'steps': 1231, 'loss/train': 4.961446285247803} +03/03/2022 14:52:08 - INFO - codeparrot_training - Step 1232: {'lr': 0.000308, 'samples': 631296, 'steps': 1232, 'loss/train': 3.8938488960266113} +03/03/2022 14:52:09 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/03/2022 14:52:13 - INFO - codeparrot_training - Step 1233: {'lr': 0.00030825000000000004, 'samples': 631808, 'steps': 1233, 'loss/train': 5.353794574737549} +03/03/2022 14:52:16 - INFO - codeparrot_training - Step 1234: {'lr': 0.0003085, 'samples': 632320, 'steps': 1234, 'loss/train': 4.088746547698975} +03/03/2022 14:52:18 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/03/2022 14:52:22 - INFO - codeparrot_training - Step 1235: {'lr': 0.00030875000000000005, 'samples': 632832, 'steps': 1235, 'loss/train': 3.818260669708252} +03/03/2022 14:52:25 - INFO - codeparrot_training - Step 1236: {'lr': 0.00030900000000000003, 'samples': 633344, 'steps': 1236, 'loss/train': 5.052768707275391} +03/03/2022 14:52:28 - INFO - codeparrot_training - Step 1237: {'lr': 0.00030925, 'samples': 633856, 'steps': 1237, 'loss/train': 4.26099967956543} +03/03/2022 14:52:28 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/03/2022 14:52:33 - INFO - codeparrot_training - Step 1238: {'lr': 0.0003095, 'samples': 634368, 'steps': 1238, 'loss/train': 3.146836996078491} +03/03/2022 14:52:36 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/03/2022 14:52:38 - INFO - codeparrot_training - Step 1239: {'lr': 0.00030975, 'samples': 634880, 'steps': 1239, 'loss/train': 4.403883457183838} +03/03/2022 14:52:42 - INFO - codeparrot_training - Step 1240: {'lr': 0.00031, 'samples': 635392, 'steps': 1240, 'loss/train': 4.727677345275879} +03/03/2022 14:52:45 - INFO - codeparrot_training - Step 1241: {'lr': 0.00031025000000000003, 'samples': 635904, 'steps': 1241, 'loss/train': 4.085188388824463} +03/03/2022 14:52:45 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/03/2022 14:52:50 - INFO - codeparrot_training - Step 1242: {'lr': 0.0003105, 'samples': 636416, 'steps': 1242, 'loss/train': 4.715884685516357} +03/03/2022 14:52:53 - INFO - codeparrot_training - Step 1243: {'lr': 0.00031075000000000005, 'samples': 636928, 'steps': 1243, 'loss/train': 4.942862033843994} +03/03/2022 14:52:53 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/03/2022 14:52:59 - INFO - codeparrot_training - Step 1244: {'lr': 0.000311, 'samples': 637440, 'steps': 1244, 'loss/train': 4.894168853759766} +03/03/2022 14:53:02 - INFO - codeparrot_training - Step 1245: {'lr': 0.00031125000000000006, 'samples': 637952, 'steps': 1245, 'loss/train': 4.579377174377441} +03/03/2022 14:53:02 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/03/2022 14:53:07 - INFO - codeparrot_training - Step 1246: {'lr': 0.0003115, 'samples': 638464, 'steps': 1246, 'loss/train': 3.6056180000305176} +03/03/2022 14:53:10 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/03/2022 14:53:12 - INFO - codeparrot_training - Step 1247: {'lr': 0.00031175, 'samples': 638976, 'steps': 1247, 'loss/train': 4.984847068786621} +03/03/2022 14:53:15 - INFO - codeparrot_training - Step 1248: {'lr': 0.000312, 'samples': 639488, 'steps': 1248, 'loss/train': 3.766439437866211} +03/03/2022 14:53:18 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/03/2022 14:53:21 - INFO - codeparrot_training - Step 1249: {'lr': 0.00031225000000000003, 'samples': 640000, 'steps': 1249, 'loss/train': 4.63666296005249} +03/03/2022 14:53:24 - INFO - codeparrot_training - Step 1250: {'lr': 0.0003125, 'samples': 640512, 'steps': 1250, 'loss/train': 4.066371917724609} +03/03/2022 14:53:27 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/03/2022 14:53:29 - INFO - codeparrot_training - Step 1251: {'lr': 0.00031275, 'samples': 641024, 'steps': 1251, 'loss/train': 4.966028213500977} +03/03/2022 14:53:32 - INFO - codeparrot_training - Step 1252: {'lr': 0.000313, 'samples': 641536, 'steps': 1252, 'loss/train': 4.340587615966797} +03/03/2022 14:53:35 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/03/2022 14:53:37 - INFO - codeparrot_training - Step 1253: {'lr': 0.00031325, 'samples': 642048, 'steps': 1253, 'loss/train': 4.897738933563232} +03/03/2022 14:53:41 - INFO - codeparrot_training - Step 1254: {'lr': 0.00031350000000000003, 'samples': 642560, 'steps': 1254, 'loss/train': 4.782534122467041} +03/03/2022 14:53:44 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/03/2022 14:53:46 - INFO - codeparrot_training - Step 1255: {'lr': 0.00031374999999999996, 'samples': 643072, 'steps': 1255, 'loss/train': 4.573090553283691} +03/03/2022 14:53:49 - INFO - codeparrot_training - Step 1256: {'lr': 0.000314, 'samples': 643584, 'steps': 1256, 'loss/train': 4.770345211029053} +03/03/2022 14:53:52 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/03/2022 14:53:55 - INFO - codeparrot_training - Step 1257: {'lr': 0.00031424999999999997, 'samples': 644096, 'steps': 1257, 'loss/train': 3.4803128242492676} +03/03/2022 14:53:58 - INFO - codeparrot_training - Step 1258: {'lr': 0.0003145, 'samples': 644608, 'steps': 1258, 'loss/train': 2.8410775661468506} +03/03/2022 14:54:00 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/03/2022 14:54:03 - INFO - codeparrot_training - Step 1259: {'lr': 0.00031475, 'samples': 645120, 'steps': 1259, 'loss/train': 4.5956902503967285} +03/03/2022 14:54:06 - INFO - codeparrot_training - Step 1260: {'lr': 0.000315, 'samples': 645632, 'steps': 1260, 'loss/train': 3.090806722640991} +03/03/2022 14:54:09 - INFO - codeparrot_training - Step 1261: {'lr': 0.00031525, 'samples': 646144, 'steps': 1261, 'loss/train': 3.7441775798797607} +03/03/2022 14:54:09 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 14:54:15 - INFO - codeparrot_training - Step 1262: {'lr': 0.0003155, 'samples': 646656, 'steps': 1262, 'loss/train': 4.384487152099609} +03/03/2022 14:54:17 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/03/2022 14:54:20 - INFO - codeparrot_training - Step 1263: {'lr': 0.00031575, 'samples': 647168, 'steps': 1263, 'loss/train': 3.6381921768188477} +03/03/2022 14:54:23 - INFO - codeparrot_training - Step 1264: {'lr': 0.000316, 'samples': 647680, 'steps': 1264, 'loss/train': 4.461718559265137} +03/03/2022 14:54:26 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/03/2022 14:54:28 - INFO - codeparrot_training - Step 1265: {'lr': 0.00031624999999999996, 'samples': 648192, 'steps': 1265, 'loss/train': 4.316349983215332} +03/03/2022 14:54:31 - INFO - codeparrot_training - Step 1266: {'lr': 0.0003165, 'samples': 648704, 'steps': 1266, 'loss/train': 3.2537786960601807} +03/03/2022 14:54:34 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 14:54:37 - INFO - codeparrot_training - Step 1267: {'lr': 0.00031675, 'samples': 649216, 'steps': 1267, 'loss/train': 4.303836345672607} +03/03/2022 14:54:40 - INFO - codeparrot_training - Step 1268: {'lr': 0.000317, 'samples': 649728, 'steps': 1268, 'loss/train': 4.006771564483643} +03/03/2022 14:54:42 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/03/2022 14:54:45 - INFO - codeparrot_training - Step 1269: {'lr': 0.00031725, 'samples': 650240, 'steps': 1269, 'loss/train': 4.61845064163208} +03/03/2022 14:54:48 - INFO - codeparrot_training - Step 1270: {'lr': 0.0003175, 'samples': 650752, 'steps': 1270, 'loss/train': 4.1503729820251465} +03/03/2022 14:54:50 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/03/2022 14:54:54 - INFO - codeparrot_training - Step 1271: {'lr': 0.00031775, 'samples': 651264, 'steps': 1271, 'loss/train': 4.021304130554199} +03/03/2022 14:54:57 - INFO - codeparrot_training - Step 1272: {'lr': 0.00031800000000000003, 'samples': 651776, 'steps': 1272, 'loss/train': 4.838562488555908} +03/03/2022 14:55:00 - INFO - codeparrot_training - Step 1273: {'lr': 0.00031825, 'samples': 652288, 'steps': 1273, 'loss/train': 5.103586673736572} +03/03/2022 14:55:01 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/03/2022 14:55:05 - INFO - codeparrot_training - Step 1274: {'lr': 0.0003185, 'samples': 652800, 'steps': 1274, 'loss/train': 3.72918963432312} +03/03/2022 14:55:09 - INFO - codeparrot_training - Step 1275: {'lr': 0.00031874999999999997, 'samples': 653312, 'steps': 1275, 'loss/train': 4.720226287841797} +03/03/2022 14:55:09 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 14:55:14 - INFO - codeparrot_training - Step 1276: {'lr': 0.000319, 'samples': 653824, 'steps': 1276, 'loss/train': 5.096245288848877} +03/03/2022 14:55:17 - INFO - codeparrot_training - Step 1277: {'lr': 0.00031925, 'samples': 654336, 'steps': 1277, 'loss/train': 4.87737512588501} +03/03/2022 14:55:18 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/03/2022 14:55:22 - INFO - codeparrot_training - Step 1278: {'lr': 0.0003195, 'samples': 654848, 'steps': 1278, 'loss/train': 4.559388637542725} +03/03/2022 14:55:26 - INFO - codeparrot_training - Step 1279: {'lr': 0.00031975, 'samples': 655360, 'steps': 1279, 'loss/train': 4.643110752105713} +03/03/2022 14:55:26 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/03/2022 14:55:31 - INFO - codeparrot_training - Step 1280: {'lr': 0.00032, 'samples': 655872, 'steps': 1280, 'loss/train': 5.080116271972656} +03/03/2022 14:55:34 - INFO - codeparrot_training - Step 1281: {'lr': 0.00032025, 'samples': 656384, 'steps': 1281, 'loss/train': 4.069483280181885} +03/03/2022 14:55:35 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/03/2022 14:55:40 - INFO - codeparrot_training - Step 1282: {'lr': 0.00032050000000000004, 'samples': 656896, 'steps': 1282, 'loss/train': 4.1711602210998535} +03/03/2022 14:55:43 - INFO - codeparrot_training - Step 1283: {'lr': 0.00032074999999999996, 'samples': 657408, 'steps': 1283, 'loss/train': 8.397176742553711} +03/03/2022 14:55:44 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/03/2022 14:55:48 - INFO - codeparrot_training - Step 1284: {'lr': 0.000321, 'samples': 657920, 'steps': 1284, 'loss/train': 4.191243648529053} +03/03/2022 14:55:51 - INFO - codeparrot_training - Step 1285: {'lr': 0.00032125, 'samples': 658432, 'steps': 1285, 'loss/train': 3.708660840988159} +03/03/2022 14:55:53 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 14:55:57 - INFO - codeparrot_training - Step 1286: {'lr': 0.0003215, 'samples': 658944, 'steps': 1286, 'loss/train': 4.2151970863342285} +03/03/2022 14:56:00 - INFO - codeparrot_training - Step 1287: {'lr': 0.00032175, 'samples': 659456, 'steps': 1287, 'loss/train': 4.977806091308594} +03/03/2022 14:56:01 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/03/2022 14:56:05 - INFO - codeparrot_training - Step 1288: {'lr': 0.000322, 'samples': 659968, 'steps': 1288, 'loss/train': 3.4825994968414307} +03/03/2022 14:56:08 - INFO - codeparrot_training - Step 1289: {'lr': 0.00032225, 'samples': 660480, 'steps': 1289, 'loss/train': 4.8441691398620605} +03/03/2022 14:56:09 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 14:56:13 - INFO - codeparrot_training - Step 1290: {'lr': 0.00032250000000000003, 'samples': 660992, 'steps': 1290, 'loss/train': 4.4799323081970215} +03/03/2022 14:56:16 - INFO - codeparrot_training - Step 1291: {'lr': 0.00032275, 'samples': 661504, 'steps': 1291, 'loss/train': 4.314608573913574} +03/03/2022 14:56:17 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 14:56:22 - INFO - codeparrot_training - Step 1292: {'lr': 0.000323, 'samples': 662016, 'steps': 1292, 'loss/train': 4.211328983306885} +03/03/2022 14:56:25 - INFO - codeparrot_training - Step 1293: {'lr': 0.00032324999999999997, 'samples': 662528, 'steps': 1293, 'loss/train': 4.245683670043945} +03/03/2022 14:56:26 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/03/2022 14:56:30 - INFO - codeparrot_training - Step 1294: {'lr': 0.0003235, 'samples': 663040, 'steps': 1294, 'loss/train': 4.791468620300293} +03/03/2022 14:56:33 - INFO - codeparrot_training - Step 1295: {'lr': 0.00032375, 'samples': 663552, 'steps': 1295, 'loss/train': 4.671751499176025} +03/03/2022 14:56:34 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/03/2022 14:56:38 - INFO - codeparrot_training - Step 1296: {'lr': 0.000324, 'samples': 664064, 'steps': 1296, 'loss/train': 1.9223575592041016} +03/03/2022 14:56:42 - INFO - codeparrot_training - Step 1297: {'lr': 0.00032425, 'samples': 664576, 'steps': 1297, 'loss/train': 3.6483588218688965} +03/03/2022 14:56:42 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/03/2022 14:56:47 - INFO - codeparrot_training - Step 1298: {'lr': 0.00032450000000000003, 'samples': 665088, 'steps': 1298, 'loss/train': 4.501202583312988} +03/03/2022 14:56:50 - INFO - codeparrot_training - Step 1299: {'lr': 0.00032475, 'samples': 665600, 'steps': 1299, 'loss/train': 4.025876522064209} +03/03/2022 14:56:50 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/03/2022 14:56:55 - INFO - codeparrot_training - Step 1300: {'lr': 0.00032500000000000004, 'samples': 666112, 'steps': 1300, 'loss/train': 3.9416229724884033} +03/03/2022 14:56:58 - INFO - codeparrot_training - Step 1301: {'lr': 0.00032524999999999996, 'samples': 666624, 'steps': 1301, 'loss/train': 4.250044822692871} +03/03/2022 14:56:58 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 14:57:04 - INFO - codeparrot_training - Step 1302: {'lr': 0.0003255, 'samples': 667136, 'steps': 1302, 'loss/train': 4.65021276473999} +03/03/2022 14:57:07 - INFO - codeparrot_training - Step 1303: {'lr': 0.00032575, 'samples': 667648, 'steps': 1303, 'loss/train': 5.90805721282959} +03/03/2022 14:57:07 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/03/2022 14:57:12 - INFO - codeparrot_training - Step 1304: {'lr': 0.000326, 'samples': 668160, 'steps': 1304, 'loss/train': 1.5903449058532715} +03/03/2022 14:57:15 - INFO - codeparrot_training - Step 1305: {'lr': 0.00032625, 'samples': 668672, 'steps': 1305, 'loss/train': 4.671641826629639} +03/03/2022 14:57:15 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/03/2022 14:57:21 - INFO - codeparrot_training - Step 1306: {'lr': 0.0003265, 'samples': 669184, 'steps': 1306, 'loss/train': 2.0237982273101807} +03/03/2022 14:57:23 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 14:57:26 - INFO - codeparrot_training - Step 1307: {'lr': 0.00032675, 'samples': 669696, 'steps': 1307, 'loss/train': 3.784977674484253} +03/03/2022 14:57:29 - INFO - codeparrot_training - Step 1308: {'lr': 0.00032700000000000003, 'samples': 670208, 'steps': 1308, 'loss/train': 4.29042387008667} +03/03/2022 14:57:31 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/03/2022 14:57:34 - INFO - codeparrot_training - Step 1309: {'lr': 0.00032725, 'samples': 670720, 'steps': 1309, 'loss/train': 5.2600836753845215} +03/03/2022 14:57:37 - INFO - codeparrot_training - Step 1310: {'lr': 0.00032750000000000005, 'samples': 671232, 'steps': 1310, 'loss/train': 4.140377044677734} +03/03/2022 14:57:40 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/03/2022 14:57:43 - INFO - codeparrot_training - Step 1311: {'lr': 0.00032774999999999997, 'samples': 671744, 'steps': 1311, 'loss/train': 4.405689716339111} +03/03/2022 14:57:46 - INFO - codeparrot_training - Step 1312: {'lr': 0.000328, 'samples': 672256, 'steps': 1312, 'loss/train': 4.767686367034912} +03/03/2022 14:57:48 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/03/2022 14:57:51 - INFO - codeparrot_training - Step 1313: {'lr': 0.00032825, 'samples': 672768, 'steps': 1313, 'loss/train': 4.756639003753662} +03/03/2022 14:57:54 - INFO - codeparrot_training - Step 1314: {'lr': 0.0003285, 'samples': 673280, 'steps': 1314, 'loss/train': 4.535913944244385} +03/03/2022 14:57:56 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/03/2022 14:57:59 - INFO - codeparrot_training - Step 1315: {'lr': 0.00032875, 'samples': 673792, 'steps': 1315, 'loss/train': 8.396307945251465} +03/03/2022 14:58:03 - INFO - codeparrot_training - Step 1316: {'lr': 0.00032900000000000003, 'samples': 674304, 'steps': 1316, 'loss/train': 3.8972136974334717} +03/03/2022 14:58:05 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/03/2022 14:58:08 - INFO - codeparrot_training - Step 1317: {'lr': 0.00032925, 'samples': 674816, 'steps': 1317, 'loss/train': 4.135924339294434} +03/03/2022 14:58:11 - INFO - codeparrot_training - Step 1318: {'lr': 0.00032950000000000004, 'samples': 675328, 'steps': 1318, 'loss/train': 4.443324565887451} +03/03/2022 14:58:14 - INFO - codeparrot_training - Step 1319: {'lr': 0.00032975, 'samples': 675840, 'steps': 1319, 'loss/train': 4.235952854156494} +03/03/2022 14:58:14 - INFO - codeparrot_training - Skipping example with length 69 (seq_length=1024) +03/03/2022 14:58:20 - INFO - codeparrot_training - Step 1320: {'lr': 0.00033, 'samples': 676352, 'steps': 1320, 'loss/train': 2.607849597930908} +03/03/2022 14:58:23 - INFO - codeparrot_training - Step 1321: {'lr': 0.00033025, 'samples': 676864, 'steps': 1321, 'loss/train': 4.970389366149902} +03/03/2022 14:58:23 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/03/2022 14:58:28 - INFO - codeparrot_training - Step 1322: {'lr': 0.0003305, 'samples': 677376, 'steps': 1322, 'loss/train': 4.476796627044678} +03/03/2022 14:58:31 - INFO - codeparrot_training - Step 1323: {'lr': 0.00033075, 'samples': 677888, 'steps': 1323, 'loss/train': 4.577625751495361} +03/03/2022 14:58:31 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/03/2022 14:58:37 - INFO - codeparrot_training - Step 1324: {'lr': 0.000331, 'samples': 678400, 'steps': 1324, 'loss/train': 2.9740169048309326} +03/03/2022 14:58:40 - INFO - codeparrot_training - Step 1325: {'lr': 0.00033125, 'samples': 678912, 'steps': 1325, 'loss/train': 4.472623348236084} +03/03/2022 14:58:40 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/03/2022 14:58:45 - INFO - codeparrot_training - Step 1326: {'lr': 0.00033150000000000003, 'samples': 679424, 'steps': 1326, 'loss/train': 3.589266777038574} +03/03/2022 14:58:48 - INFO - codeparrot_training - Step 1327: {'lr': 0.00033175, 'samples': 679936, 'steps': 1327, 'loss/train': 4.105099678039551} +03/03/2022 14:58:48 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/03/2022 14:58:54 - INFO - codeparrot_training - Step 1328: {'lr': 0.00033200000000000005, 'samples': 680448, 'steps': 1328, 'loss/train': 4.997483253479004} +03/03/2022 14:58:56 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/03/2022 14:58:59 - INFO - codeparrot_training - Step 1329: {'lr': 0.00033224999999999997, 'samples': 680960, 'steps': 1329, 'loss/train': 3.461973190307617} +03/03/2022 14:59:02 - INFO - codeparrot_training - Step 1330: {'lr': 0.0003325, 'samples': 681472, 'steps': 1330, 'loss/train': 4.032837867736816} +03/03/2022 14:59:06 - INFO - codeparrot_training - Step 1331: {'lr': 0.00033275, 'samples': 681984, 'steps': 1331, 'loss/train': 3.4902446269989014} +03/03/2022 14:59:07 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/03/2022 14:59:11 - INFO - codeparrot_training - Step 1332: {'lr': 0.000333, 'samples': 682496, 'steps': 1332, 'loss/train': 3.956376791000366} +03/03/2022 14:59:14 - INFO - codeparrot_training - Step 1333: {'lr': 0.00033325, 'samples': 683008, 'steps': 1333, 'loss/train': 4.723476886749268} +03/03/2022 14:59:15 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/03/2022 14:59:19 - INFO - codeparrot_training - Step 1334: {'lr': 0.00033350000000000003, 'samples': 683520, 'steps': 1334, 'loss/train': 2.3966643810272217} +03/03/2022 14:59:22 - INFO - codeparrot_training - Step 1335: {'lr': 0.00033375, 'samples': 684032, 'steps': 1335, 'loss/train': 3.550367593765259} +03/03/2022 14:59:23 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/03/2022 14:59:28 - INFO - codeparrot_training - Step 1336: {'lr': 0.00033400000000000004, 'samples': 684544, 'steps': 1336, 'loss/train': 4.618042945861816} +03/03/2022 14:59:31 - INFO - codeparrot_training - Step 1337: {'lr': 0.00033425, 'samples': 685056, 'steps': 1337, 'loss/train': 3.770845890045166} +03/03/2022 14:59:31 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/03/2022 14:59:36 - INFO - codeparrot_training - Step 1338: {'lr': 0.00033450000000000005, 'samples': 685568, 'steps': 1338, 'loss/train': 3.7352075576782227} +03/03/2022 14:59:39 - INFO - codeparrot_training - Step 1339: {'lr': 0.00033475, 'samples': 686080, 'steps': 1339, 'loss/train': 3.3929390907287598} +03/03/2022 14:59:39 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/03/2022 14:59:44 - INFO - codeparrot_training - Step 1340: {'lr': 0.000335, 'samples': 686592, 'steps': 1340, 'loss/train': 4.196238994598389} +03/03/2022 14:59:47 - INFO - codeparrot_training - Step 1341: {'lr': 0.00033525, 'samples': 687104, 'steps': 1341, 'loss/train': 3.144120216369629} +03/03/2022 14:59:48 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/03/2022 14:59:53 - INFO - codeparrot_training - Step 1342: {'lr': 0.0003355, 'samples': 687616, 'steps': 1342, 'loss/train': 4.511755466461182} +03/03/2022 14:59:56 - INFO - codeparrot_training - Step 1343: {'lr': 0.00033575, 'samples': 688128, 'steps': 1343, 'loss/train': 2.205004930496216} +03/03/2022 14:59:56 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/03/2022 15:00:01 - INFO - codeparrot_training - Step 1344: {'lr': 0.00033600000000000004, 'samples': 688640, 'steps': 1344, 'loss/train': 4.689573764801025} +03/03/2022 15:00:05 - INFO - codeparrot_training - Step 1345: {'lr': 0.00033625, 'samples': 689152, 'steps': 1345, 'loss/train': 3.897254705429077} +03/03/2022 15:00:05 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/03/2022 15:00:10 - INFO - codeparrot_training - Step 1346: {'lr': 0.00033650000000000005, 'samples': 689664, 'steps': 1346, 'loss/train': 4.323125839233398} +03/03/2022 15:00:13 - INFO - codeparrot_training - Step 1347: {'lr': 0.00033675, 'samples': 690176, 'steps': 1347, 'loss/train': 3.3169021606445312} +03/03/2022 15:00:13 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/03/2022 15:00:18 - INFO - codeparrot_training - Step 1348: {'lr': 0.000337, 'samples': 690688, 'steps': 1348, 'loss/train': 4.208149433135986} +03/03/2022 15:00:21 - INFO - codeparrot_training - Step 1349: {'lr': 0.00033725, 'samples': 691200, 'steps': 1349, 'loss/train': 4.077748775482178} +03/03/2022 15:00:21 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 15:00:27 - INFO - codeparrot_training - Step 1350: {'lr': 0.0003375, 'samples': 691712, 'steps': 1350, 'loss/train': 4.038091659545898} +03/03/2022 15:00:30 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/03/2022 15:00:32 - INFO - codeparrot_training - Step 1351: {'lr': 0.00033775, 'samples': 692224, 'steps': 1351, 'loss/train': 4.951350688934326} +03/03/2022 15:00:35 - INFO - codeparrot_training - Step 1352: {'lr': 0.00033800000000000003, 'samples': 692736, 'steps': 1352, 'loss/train': 4.2818803787231445} +03/03/2022 15:00:38 - INFO - codeparrot_training - Step 1353: {'lr': 0.00033825, 'samples': 693248, 'steps': 1353, 'loss/train': 5.575289249420166} +03/03/2022 15:00:38 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/03/2022 15:00:44 - INFO - codeparrot_training - Step 1354: {'lr': 0.00033850000000000004, 'samples': 693760, 'steps': 1354, 'loss/train': 4.848819255828857} +03/03/2022 15:00:46 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/03/2022 15:00:49 - INFO - codeparrot_training - Step 1355: {'lr': 0.00033875, 'samples': 694272, 'steps': 1355, 'loss/train': 5.165854454040527} +03/03/2022 15:00:52 - INFO - codeparrot_training - Step 1356: {'lr': 0.00033900000000000005, 'samples': 694784, 'steps': 1356, 'loss/train': 3.9726078510284424} +03/03/2022 15:00:55 - INFO - codeparrot_training - Step 1357: {'lr': 0.00033925, 'samples': 695296, 'steps': 1357, 'loss/train': 3.788126230239868} +03/03/2022 15:00:55 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/03/2022 15:01:01 - INFO - codeparrot_training - Step 1358: {'lr': 0.0003395, 'samples': 695808, 'steps': 1358, 'loss/train': 3.8103890419006348} +03/03/2022 15:01:04 - INFO - codeparrot_training - Step 1359: {'lr': 0.00033975, 'samples': 696320, 'steps': 1359, 'loss/train': 4.306206703186035} +03/03/2022 15:01:04 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/03/2022 15:01:09 - INFO - codeparrot_training - Step 1360: {'lr': 0.00034, 'samples': 696832, 'steps': 1360, 'loss/train': 4.6508870124816895} +03/03/2022 15:01:12 - INFO - codeparrot_training - Step 1361: {'lr': 0.00034025, 'samples': 697344, 'steps': 1361, 'loss/train': 3.7625675201416016} +03/03/2022 15:01:12 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/03/2022 15:01:17 - INFO - codeparrot_training - Step 1362: {'lr': 0.00034050000000000004, 'samples': 697856, 'steps': 1362, 'loss/train': 4.423110485076904} +03/03/2022 15:01:20 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/03/2022 15:01:23 - INFO - codeparrot_training - Step 1363: {'lr': 0.00034075, 'samples': 698368, 'steps': 1363, 'loss/train': 3.479337692260742} +03/03/2022 15:01:26 - INFO - codeparrot_training - Step 1364: {'lr': 0.00034100000000000005, 'samples': 698880, 'steps': 1364, 'loss/train': 3.825160503387451} +03/03/2022 15:01:28 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/03/2022 15:01:31 - INFO - codeparrot_training - Step 1365: {'lr': 0.00034125000000000003, 'samples': 699392, 'steps': 1365, 'loss/train': 4.07443904876709} +03/03/2022 15:01:34 - INFO - codeparrot_training - Step 1366: {'lr': 0.0003415, 'samples': 699904, 'steps': 1366, 'loss/train': 1.9652504920959473} +03/03/2022 15:01:37 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/03/2022 15:01:39 - INFO - codeparrot_training - Step 1367: {'lr': 0.00034175, 'samples': 700416, 'steps': 1367, 'loss/train': 5.137356281280518} +03/03/2022 15:01:42 - INFO - codeparrot_training - Step 1368: {'lr': 0.000342, 'samples': 700928, 'steps': 1368, 'loss/train': 4.848583698272705} +03/03/2022 15:01:45 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/03/2022 15:01:48 - INFO - codeparrot_training - Step 1369: {'lr': 0.00034225, 'samples': 701440, 'steps': 1369, 'loss/train': 3.256547212600708} +03/03/2022 15:01:51 - INFO - codeparrot_training - Step 1370: {'lr': 0.00034250000000000003, 'samples': 701952, 'steps': 1370, 'loss/train': 3.7308225631713867} +03/03/2022 15:01:54 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 15:01:56 - INFO - codeparrot_training - Step 1371: {'lr': 0.00034275, 'samples': 702464, 'steps': 1371, 'loss/train': 4.213438987731934} +03/03/2022 15:01:59 - INFO - codeparrot_training - Step 1372: {'lr': 0.00034300000000000004, 'samples': 702976, 'steps': 1372, 'loss/train': 3.998389959335327} +03/03/2022 15:02:02 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/03/2022 15:02:05 - INFO - codeparrot_training - Step 1373: {'lr': 0.00034325, 'samples': 703488, 'steps': 1373, 'loss/train': 4.89825963973999} +03/03/2022 15:02:08 - INFO - codeparrot_training - Step 1374: {'lr': 0.00034350000000000006, 'samples': 704000, 'steps': 1374, 'loss/train': 4.995420455932617} +03/03/2022 15:02:10 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/03/2022 15:02:13 - INFO - codeparrot_training - Step 1375: {'lr': 0.00034375, 'samples': 704512, 'steps': 1375, 'loss/train': 4.292768955230713} +03/03/2022 15:02:16 - INFO - codeparrot_training - Step 1376: {'lr': 0.00034399999999999996, 'samples': 705024, 'steps': 1376, 'loss/train': 6.702393054962158} +03/03/2022 15:02:19 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/03/2022 15:02:22 - INFO - codeparrot_training - Step 1377: {'lr': 0.00034425, 'samples': 705536, 'steps': 1377, 'loss/train': 3.957270622253418} +03/03/2022 15:02:25 - INFO - codeparrot_training - Step 1378: {'lr': 0.00034449999999999997, 'samples': 706048, 'steps': 1378, 'loss/train': 5.051953315734863} +03/03/2022 15:02:28 - INFO - codeparrot_training - Step 1379: {'lr': 0.00034475, 'samples': 706560, 'steps': 1379, 'loss/train': 4.340051174163818} +03/03/2022 15:02:28 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 15:02:33 - INFO - codeparrot_training - Step 1380: {'lr': 0.000345, 'samples': 707072, 'steps': 1380, 'loss/train': 4.199203968048096} +03/03/2022 15:02:36 - INFO - codeparrot_training - Step 1381: {'lr': 0.00034525, 'samples': 707584, 'steps': 1381, 'loss/train': 4.896386623382568} +03/03/2022 15:02:37 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/03/2022 15:02:42 - INFO - codeparrot_training - Step 1382: {'lr': 0.0003455, 'samples': 708096, 'steps': 1382, 'loss/train': 4.71715784072876} +03/03/2022 15:02:45 - INFO - codeparrot_training - Step 1383: {'lr': 0.00034575000000000003, 'samples': 708608, 'steps': 1383, 'loss/train': 4.2592949867248535} +03/03/2022 15:02:45 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/03/2022 15:02:50 - INFO - codeparrot_training - Step 1384: {'lr': 0.000346, 'samples': 709120, 'steps': 1384, 'loss/train': 4.287826061248779} +03/03/2022 15:02:53 - INFO - codeparrot_training - Step 1385: {'lr': 0.00034625, 'samples': 709632, 'steps': 1385, 'loss/train': 4.505620002746582} +03/03/2022 15:02:53 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/03/2022 15:02:58 - INFO - codeparrot_training - Step 1386: {'lr': 0.00034649999999999997, 'samples': 710144, 'steps': 1386, 'loss/train': 4.012139320373535} +03/03/2022 15:03:02 - INFO - codeparrot_training - Step 1387: {'lr': 0.00034675, 'samples': 710656, 'steps': 1387, 'loss/train': 3.345623016357422} +03/03/2022 15:03:02 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/03/2022 15:03:07 - INFO - codeparrot_training - Step 1388: {'lr': 0.000347, 'samples': 711168, 'steps': 1388, 'loss/train': 4.0962114334106445} +03/03/2022 15:03:10 - INFO - codeparrot_training - Step 1389: {'lr': 0.00034725, 'samples': 711680, 'steps': 1389, 'loss/train': 3.7883925437927246} +03/03/2022 15:03:10 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/03/2022 15:03:15 - INFO - codeparrot_training - Step 1390: {'lr': 0.0003475, 'samples': 712192, 'steps': 1390, 'loss/train': 4.1875529289245605} +03/03/2022 15:03:18 - INFO - codeparrot_training - Step 1391: {'lr': 0.00034775, 'samples': 712704, 'steps': 1391, 'loss/train': 4.096395015716553} +03/03/2022 15:03:18 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/03/2022 15:03:24 - INFO - codeparrot_training - Step 1392: {'lr': 0.000348, 'samples': 713216, 'steps': 1392, 'loss/train': 4.032371520996094} +03/03/2022 15:03:27 - INFO - codeparrot_training - Step 1393: {'lr': 0.00034825000000000004, 'samples': 713728, 'steps': 1393, 'loss/train': 4.76602029800415} +03/03/2022 15:03:27 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/03/2022 15:03:32 - INFO - codeparrot_training - Step 1394: {'lr': 0.00034849999999999996, 'samples': 714240, 'steps': 1394, 'loss/train': 3.944689989089966} +03/03/2022 15:03:36 - INFO - codeparrot_training - Step 1395: {'lr': 0.00034875, 'samples': 714752, 'steps': 1395, 'loss/train': 1.1654375791549683} +03/03/2022 15:03:36 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/03/2022 15:03:41 - INFO - codeparrot_training - Step 1396: {'lr': 0.00034899999999999997, 'samples': 715264, 'steps': 1396, 'loss/train': 5.171820640563965} +03/03/2022 15:03:44 - INFO - codeparrot_training - Step 1397: {'lr': 0.00034925, 'samples': 715776, 'steps': 1397, 'loss/train': 4.422672748565674} +03/03/2022 15:03:44 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/03/2022 15:03:49 - INFO - codeparrot_training - Step 1398: {'lr': 0.0003495, 'samples': 716288, 'steps': 1398, 'loss/train': 3.649049997329712} +03/03/2022 15:03:52 - INFO - codeparrot_training - Step 1399: {'lr': 0.00034975, 'samples': 716800, 'steps': 1399, 'loss/train': 3.8000471591949463} +03/03/2022 15:03:52 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/03/2022 15:03:57 - INFO - codeparrot_training - Step 1400: {'lr': 0.00035, 'samples': 717312, 'steps': 1400, 'loss/train': 3.921113967895508} +03/03/2022 15:04:01 - INFO - codeparrot_training - Step 1401: {'lr': 0.00035025000000000003, 'samples': 717824, 'steps': 1401, 'loss/train': 3.2294576168060303} +03/03/2022 15:04:01 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/03/2022 15:04:06 - INFO - codeparrot_training - Step 1402: {'lr': 0.0003505, 'samples': 718336, 'steps': 1402, 'loss/train': 4.519753932952881} +03/03/2022 15:04:09 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/03/2022 15:04:11 - INFO - codeparrot_training - Step 1403: {'lr': 0.00035075, 'samples': 718848, 'steps': 1403, 'loss/train': 4.289191246032715} +03/03/2022 15:04:14 - INFO - codeparrot_training - Step 1404: {'lr': 0.00035099999999999997, 'samples': 719360, 'steps': 1404, 'loss/train': 4.642714977264404} +03/03/2022 15:04:17 - INFO - codeparrot_training - Skipping example with length 732 (seq_length=1024) +03/03/2022 15:04:19 - INFO - codeparrot_training - Step 1405: {'lr': 0.00035125, 'samples': 719872, 'steps': 1405, 'loss/train': 5.213424205780029} +03/03/2022 15:04:22 - INFO - codeparrot_training - Step 1406: {'lr': 0.0003515, 'samples': 720384, 'steps': 1406, 'loss/train': 4.356109142303467} +03/03/2022 15:04:25 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/03/2022 15:04:28 - INFO - codeparrot_training - Step 1407: {'lr': 0.00035175, 'samples': 720896, 'steps': 1407, 'loss/train': 3.914952516555786} +03/03/2022 15:04:31 - INFO - codeparrot_training - Step 1408: {'lr': 0.000352, 'samples': 721408, 'steps': 1408, 'loss/train': 6.220399856567383} +03/03/2022 15:04:33 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/03/2022 15:04:36 - INFO - codeparrot_training - Step 1409: {'lr': 0.00035225, 'samples': 721920, 'steps': 1409, 'loss/train': 4.541872501373291} +03/03/2022 15:04:39 - INFO - codeparrot_training - Step 1410: {'lr': 0.0003525, 'samples': 722432, 'steps': 1410, 'loss/train': 5.304427623748779} +03/03/2022 15:04:41 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/03/2022 15:04:45 - INFO - codeparrot_training - Step 1411: {'lr': 0.00035275000000000004, 'samples': 722944, 'steps': 1411, 'loss/train': 3.934983253479004} +03/03/2022 15:04:48 - INFO - codeparrot_training - Step 1412: {'lr': 0.00035299999999999996, 'samples': 723456, 'steps': 1412, 'loss/train': 3.3430862426757812} +03/03/2022 15:04:50 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/03/2022 15:04:53 - INFO - codeparrot_training - Step 1413: {'lr': 0.00035325, 'samples': 723968, 'steps': 1413, 'loss/train': 4.663939952850342} +03/03/2022 15:04:56 - INFO - codeparrot_training - Step 1414: {'lr': 0.0003535, 'samples': 724480, 'steps': 1414, 'loss/train': 2.632704019546509} +03/03/2022 15:04:58 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/03/2022 15:05:02 - INFO - codeparrot_training - Step 1415: {'lr': 0.00035375, 'samples': 724992, 'steps': 1415, 'loss/train': 3.939199924468994} +03/03/2022 15:05:05 - INFO - codeparrot_training - Step 1416: {'lr': 0.000354, 'samples': 725504, 'steps': 1416, 'loss/train': 3.894688606262207} +03/03/2022 15:05:08 - INFO - codeparrot_training - Step 1417: {'lr': 0.00035425, 'samples': 726016, 'steps': 1417, 'loss/train': 3.548281669616699} +03/03/2022 15:05:09 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/03/2022 15:05:13 - INFO - codeparrot_training - Step 1418: {'lr': 0.0003545, 'samples': 726528, 'steps': 1418, 'loss/train': 3.9033889770507812} +03/03/2022 15:05:16 - INFO - codeparrot_training - Step 1419: {'lr': 0.00035475000000000003, 'samples': 727040, 'steps': 1419, 'loss/train': 4.490829944610596} +03/03/2022 15:05:17 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/03/2022 15:05:22 - INFO - codeparrot_training - Step 1420: {'lr': 0.000355, 'samples': 727552, 'steps': 1420, 'loss/train': 3.8207790851593018} +03/03/2022 15:05:25 - INFO - codeparrot_training - Step 1421: {'lr': 0.00035525000000000004, 'samples': 728064, 'steps': 1421, 'loss/train': 3.9344375133514404} +03/03/2022 15:05:25 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 15:05:30 - INFO - codeparrot_training - Step 1422: {'lr': 0.00035549999999999997, 'samples': 728576, 'steps': 1422, 'loss/train': 4.054946422576904} +03/03/2022 15:05:33 - INFO - codeparrot_training - Step 1423: {'lr': 0.00035575, 'samples': 729088, 'steps': 1423, 'loss/train': 5.404002666473389} +03/03/2022 15:05:33 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/03/2022 15:05:38 - INFO - codeparrot_training - Step 1424: {'lr': 0.000356, 'samples': 729600, 'steps': 1424, 'loss/train': 4.829292297363281} +03/03/2022 15:05:41 - INFO - codeparrot_training - Step 1425: {'lr': 0.00035625, 'samples': 730112, 'steps': 1425, 'loss/train': 4.549272060394287} +03/03/2022 15:05:41 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/03/2022 15:05:47 - INFO - codeparrot_training - Step 1426: {'lr': 0.0003565, 'samples': 730624, 'steps': 1426, 'loss/train': 4.292669296264648} +03/03/2022 15:05:50 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/03/2022 15:05:52 - INFO - codeparrot_training - Step 1427: {'lr': 0.00035675, 'samples': 731136, 'steps': 1427, 'loss/train': 3.107365608215332} +03/03/2022 15:05:55 - INFO - codeparrot_training - Step 1428: {'lr': 0.000357, 'samples': 731648, 'steps': 1428, 'loss/train': 4.563395023345947} +03/03/2022 15:05:58 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/03/2022 15:06:00 - INFO - codeparrot_training - Step 1429: {'lr': 0.00035725000000000004, 'samples': 732160, 'steps': 1429, 'loss/train': 4.344968795776367} +03/03/2022 15:06:04 - INFO - codeparrot_training - Step 1430: {'lr': 0.0003575, 'samples': 732672, 'steps': 1430, 'loss/train': 4.074631214141846} +03/03/2022 15:06:06 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/03/2022 15:06:09 - INFO - codeparrot_training - Step 1431: {'lr': 0.00035775, 'samples': 733184, 'steps': 1431, 'loss/train': 3.411041498184204} +03/03/2022 15:06:12 - INFO - codeparrot_training - Step 1432: {'lr': 0.000358, 'samples': 733696, 'steps': 1432, 'loss/train': 4.681650161743164} +03/03/2022 15:06:15 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/03/2022 15:06:17 - INFO - codeparrot_training - Step 1433: {'lr': 0.00035825, 'samples': 734208, 'steps': 1433, 'loss/train': 3.564584732055664} +03/03/2022 15:06:20 - INFO - codeparrot_training - Step 1434: {'lr': 0.0003585, 'samples': 734720, 'steps': 1434, 'loss/train': 4.753785610198975} +03/03/2022 15:06:23 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/03/2022 15:06:26 - INFO - codeparrot_training - Step 1435: {'lr': 0.00035875, 'samples': 735232, 'steps': 1435, 'loss/train': 4.694465160369873} +03/03/2022 15:06:29 - INFO - codeparrot_training - Step 1436: {'lr': 0.000359, 'samples': 735744, 'steps': 1436, 'loss/train': 3.8891234397888184} +03/03/2022 15:06:34 - INFO - codeparrot_training - Step 1437: {'lr': 0.00035925000000000003, 'samples': 736256, 'steps': 1437, 'loss/train': 3.8811140060424805} +03/03/2022 15:06:37 - INFO - codeparrot_training - Step 1438: {'lr': 0.0003595, 'samples': 736768, 'steps': 1438, 'loss/train': 3.0895650386810303} +03/03/2022 15:06:40 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/03/2022 15:06:42 - INFO - codeparrot_training - Step 1439: {'lr': 0.00035975000000000004, 'samples': 737280, 'steps': 1439, 'loss/train': 4.115229606628418} +03/03/2022 15:06:46 - INFO - codeparrot_training - Step 1440: {'lr': 0.00035999999999999997, 'samples': 737792, 'steps': 1440, 'loss/train': 4.0173187255859375} +03/03/2022 15:06:48 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/03/2022 15:06:51 - INFO - codeparrot_training - Step 1441: {'lr': 0.00036025, 'samples': 738304, 'steps': 1441, 'loss/train': 4.3554840087890625} +03/03/2022 15:06:54 - INFO - codeparrot_training - Step 1442: {'lr': 0.0003605, 'samples': 738816, 'steps': 1442, 'loss/train': 4.1994194984436035} +03/03/2022 15:06:57 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/03/2022 15:06:59 - INFO - codeparrot_training - Step 1443: {'lr': 0.00036075, 'samples': 739328, 'steps': 1443, 'loss/train': 4.311388969421387} +03/03/2022 15:07:03 - INFO - codeparrot_training - Step 1444: {'lr': 0.000361, 'samples': 739840, 'steps': 1444, 'loss/train': 4.153400421142578} +03/03/2022 15:07:05 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/03/2022 15:07:08 - INFO - codeparrot_training - Step 1445: {'lr': 0.00036125, 'samples': 740352, 'steps': 1445, 'loss/train': 3.061067819595337} +03/03/2022 15:07:11 - INFO - codeparrot_training - Step 1446: {'lr': 0.0003615, 'samples': 740864, 'steps': 1446, 'loss/train': 3.8438150882720947} +03/03/2022 15:07:13 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 15:07:16 - INFO - codeparrot_training - Step 1447: {'lr': 0.00036175000000000004, 'samples': 741376, 'steps': 1447, 'loss/train': 4.005690574645996} +03/03/2022 15:07:19 - INFO - codeparrot_training - Step 1448: {'lr': 0.000362, 'samples': 741888, 'steps': 1448, 'loss/train': 4.079996585845947} +03/03/2022 15:07:22 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/03/2022 15:07:25 - INFO - codeparrot_training - Step 1449: {'lr': 0.00036225000000000005, 'samples': 742400, 'steps': 1449, 'loss/train': 4.334934711456299} +03/03/2022 15:07:28 - INFO - codeparrot_training - Step 1450: {'lr': 0.0003625, 'samples': 742912, 'steps': 1450, 'loss/train': 4.11976432800293} +03/03/2022 15:07:30 - INFO - codeparrot_training - Skipping example with length 740 (seq_length=1024) +03/03/2022 15:07:33 - INFO - codeparrot_training - Step 1451: {'lr': 0.00036275, 'samples': 743424, 'steps': 1451, 'loss/train': 2.37908935546875} +03/03/2022 15:07:36 - INFO - codeparrot_training - Step 1452: {'lr': 0.000363, 'samples': 743936, 'steps': 1452, 'loss/train': 3.74288010597229} +03/03/2022 15:07:39 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/03/2022 15:07:42 - INFO - codeparrot_training - Step 1453: {'lr': 0.00036325, 'samples': 744448, 'steps': 1453, 'loss/train': 4.1216254234313965} +03/03/2022 15:07:45 - INFO - codeparrot_training - Step 1454: {'lr': 0.0003635, 'samples': 744960, 'steps': 1454, 'loss/train': 4.451533317565918} +03/03/2022 15:07:47 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/03/2022 15:07:50 - INFO - codeparrot_training - Step 1455: {'lr': 0.00036375000000000003, 'samples': 745472, 'steps': 1455, 'loss/train': 4.489429950714111} +03/03/2022 15:07:53 - INFO - codeparrot_training - Step 1456: {'lr': 0.000364, 'samples': 745984, 'steps': 1456, 'loss/train': 4.745635032653809} +03/03/2022 15:07:55 - INFO - codeparrot_training - Skipping example with length 937 (seq_length=1024) +03/03/2022 15:07:58 - INFO - codeparrot_training - Step 1457: {'lr': 0.00036425000000000004, 'samples': 746496, 'steps': 1457, 'loss/train': 4.129570484161377} +03/03/2022 15:08:01 - INFO - codeparrot_training - Step 1458: {'lr': 0.0003645, 'samples': 747008, 'steps': 1458, 'loss/train': 3.9060328006744385} +03/03/2022 15:08:03 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/03/2022 15:08:07 - INFO - codeparrot_training - Step 1459: {'lr': 0.00036475, 'samples': 747520, 'steps': 1459, 'loss/train': 3.7146434783935547} +03/03/2022 15:08:10 - INFO - codeparrot_training - Step 1460: {'lr': 0.000365, 'samples': 748032, 'steps': 1460, 'loss/train': 4.706860065460205} +03/03/2022 15:08:12 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/03/2022 15:08:15 - INFO - codeparrot_training - Step 1461: {'lr': 0.00036525, 'samples': 748544, 'steps': 1461, 'loss/train': 4.445798873901367} +03/03/2022 15:08:18 - INFO - codeparrot_training - Step 1462: {'lr': 0.0003655, 'samples': 749056, 'steps': 1462, 'loss/train': 4.539623737335205} +03/03/2022 15:08:20 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/03/2022 15:08:23 - INFO - codeparrot_training - Step 1463: {'lr': 0.00036575, 'samples': 749568, 'steps': 1463, 'loss/train': 4.721151828765869} +03/03/2022 15:08:27 - INFO - codeparrot_training - Step 1464: {'lr': 0.000366, 'samples': 750080, 'steps': 1464, 'loss/train': 4.6012797355651855} +03/03/2022 15:08:28 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/03/2022 15:08:32 - INFO - codeparrot_training - Step 1465: {'lr': 0.00036625000000000004, 'samples': 750592, 'steps': 1465, 'loss/train': 4.169576644897461} +03/03/2022 15:08:35 - INFO - codeparrot_training - Step 1466: {'lr': 0.0003665, 'samples': 751104, 'steps': 1466, 'loss/train': 3.8605988025665283} +03/03/2022 15:08:36 - INFO - codeparrot_training - Skipping example with length 1023 (seq_length=1024) +03/03/2022 15:08:40 - INFO - codeparrot_training - Step 1467: {'lr': 0.00036675000000000005, 'samples': 751616, 'steps': 1467, 'loss/train': 4.078520774841309} +03/03/2022 15:08:43 - INFO - codeparrot_training - Step 1468: {'lr': 0.000367, 'samples': 752128, 'steps': 1468, 'loss/train': 4.675118446350098} +03/03/2022 15:08:44 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/03/2022 15:08:48 - INFO - codeparrot_training - Step 1469: {'lr': 0.00036725, 'samples': 752640, 'steps': 1469, 'loss/train': 4.158109188079834} +03/03/2022 15:08:52 - INFO - codeparrot_training - Step 1470: {'lr': 0.0003675, 'samples': 753152, 'steps': 1470, 'loss/train': 3.894350528717041} +03/03/2022 15:08:52 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/03/2022 15:08:57 - INFO - codeparrot_training - Step 1471: {'lr': 0.00036775, 'samples': 753664, 'steps': 1471, 'loss/train': 3.148911476135254} +03/03/2022 15:09:00 - INFO - codeparrot_training - Step 1472: {'lr': 0.000368, 'samples': 754176, 'steps': 1472, 'loss/train': 2.4368736743927} +03/03/2022 15:09:01 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/03/2022 15:09:05 - INFO - codeparrot_training - Step 1473: {'lr': 0.00036825000000000003, 'samples': 754688, 'steps': 1473, 'loss/train': 4.8750319480896} +03/03/2022 15:09:08 - INFO - codeparrot_training - Step 1474: {'lr': 0.0003685, 'samples': 755200, 'steps': 1474, 'loss/train': 4.30271053314209} +03/03/2022 15:09:09 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/03/2022 15:09:14 - INFO - codeparrot_training - Step 1475: {'lr': 0.00036875000000000005, 'samples': 755712, 'steps': 1475, 'loss/train': 3.941826581954956} +03/03/2022 15:09:17 - INFO - codeparrot_training - Step 1476: {'lr': 0.000369, 'samples': 756224, 'steps': 1476, 'loss/train': 4.268701553344727} +03/03/2022 15:09:17 - INFO - codeparrot_training - Skipping example with length 870 (seq_length=1024) +03/03/2022 15:09:22 - INFO - codeparrot_training - Step 1477: {'lr': 0.00036925, 'samples': 756736, 'steps': 1477, 'loss/train': 4.079792499542236} +03/03/2022 15:09:25 - INFO - codeparrot_training - Step 1478: {'lr': 0.0003695, 'samples': 757248, 'steps': 1478, 'loss/train': 3.1567745208740234} +03/03/2022 15:09:26 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/03/2022 15:09:31 - INFO - codeparrot_training - Step 1479: {'lr': 0.00036975, 'samples': 757760, 'steps': 1479, 'loss/train': 3.982180595397949} +03/03/2022 15:09:34 - INFO - codeparrot_training - Step 1480: {'lr': 0.00037, 'samples': 758272, 'steps': 1480, 'loss/train': 3.835810422897339} +03/03/2022 15:09:34 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 15:09:39 - INFO - codeparrot_training - Step 1481: {'lr': 0.00037025000000000003, 'samples': 758784, 'steps': 1481, 'loss/train': 3.477461099624634} +03/03/2022 15:09:42 - INFO - codeparrot_training - Step 1482: {'lr': 0.0003705, 'samples': 759296, 'steps': 1482, 'loss/train': 4.739671230316162} +03/03/2022 15:09:42 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/03/2022 15:09:47 - INFO - codeparrot_training - Step 1483: {'lr': 0.00037075000000000004, 'samples': 759808, 'steps': 1483, 'loss/train': 3.8007612228393555} +03/03/2022 15:09:51 - INFO - codeparrot_training - Step 1484: {'lr': 0.000371, 'samples': 760320, 'steps': 1484, 'loss/train': 4.528865814208984} +03/03/2022 15:09:51 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/03/2022 15:09:56 - INFO - codeparrot_training - Step 1485: {'lr': 0.00037125000000000005, 'samples': 760832, 'steps': 1485, 'loss/train': 4.351111888885498} +03/03/2022 15:09:59 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 15:10:01 - INFO - codeparrot_training - Step 1486: {'lr': 0.00037150000000000003, 'samples': 761344, 'steps': 1486, 'loss/train': 4.2383904457092285} +03/03/2022 15:10:04 - INFO - codeparrot_training - Step 1487: {'lr': 0.00037175, 'samples': 761856, 'steps': 1487, 'loss/train': 3.099395751953125} +03/03/2022 15:10:07 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/03/2022 15:10:09 - INFO - codeparrot_training - Step 1488: {'lr': 0.000372, 'samples': 762368, 'steps': 1488, 'loss/train': 3.2797293663024902} +03/03/2022 15:10:13 - INFO - codeparrot_training - Step 1489: {'lr': 0.00037225, 'samples': 762880, 'steps': 1489, 'loss/train': 3.8778297901153564} +03/03/2022 15:10:15 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/03/2022 15:10:18 - INFO - codeparrot_training - Step 1490: {'lr': 0.0003725, 'samples': 763392, 'steps': 1490, 'loss/train': 3.4822680950164795} +03/03/2022 15:10:21 - INFO - codeparrot_training - Step 1491: {'lr': 0.00037275000000000003, 'samples': 763904, 'steps': 1491, 'loss/train': 3.91239857673645} +03/03/2022 15:10:23 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/03/2022 15:10:26 - INFO - codeparrot_training - Step 1492: {'lr': 0.000373, 'samples': 764416, 'steps': 1492, 'loss/train': 3.9077584743499756} +03/03/2022 15:10:30 - INFO - codeparrot_training - Step 1493: {'lr': 0.00037325000000000005, 'samples': 764928, 'steps': 1493, 'loss/train': 4.385066032409668} +03/03/2022 15:10:32 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/03/2022 15:10:35 - INFO - codeparrot_training - Step 1494: {'lr': 0.0003735, 'samples': 765440, 'steps': 1494, 'loss/train': 3.0124974250793457} +03/03/2022 15:10:38 - INFO - codeparrot_training - Step 1495: {'lr': 0.00037375000000000006, 'samples': 765952, 'steps': 1495, 'loss/train': 2.8615899085998535} +03/03/2022 15:10:40 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/03/2022 15:10:43 - INFO - codeparrot_training - Step 1496: {'lr': 0.000374, 'samples': 766464, 'steps': 1496, 'loss/train': 4.435169219970703} +03/03/2022 15:10:46 - INFO - codeparrot_training - Step 1497: {'lr': 0.00037425, 'samples': 766976, 'steps': 1497, 'loss/train': 4.5539751052856445} +03/03/2022 15:10:49 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/03/2022 15:10:52 - INFO - codeparrot_training - Step 1498: {'lr': 0.0003745, 'samples': 767488, 'steps': 1498, 'loss/train': 4.482819557189941} +03/03/2022 15:10:55 - INFO - codeparrot_training - Step 1499: {'lr': 0.00037475000000000003, 'samples': 768000, 'steps': 1499, 'loss/train': 3.6092755794525146} +03/03/2022 15:10:57 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/03/2022 15:11:00 - INFO - codeparrot_training - Step 1500: {'lr': 0.000375, 'samples': 768512, 'steps': 1500, 'loss/train': 3.235640287399292} +03/03/2022 15:11:03 - INFO - codeparrot_training - Step 1501: {'lr': 0.00037525, 'samples': 769024, 'steps': 1501, 'loss/train': 3.5360376834869385} +03/03/2022 15:11:06 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/03/2022 15:11:08 - INFO - codeparrot_training - Step 1502: {'lr': 0.0003755, 'samples': 769536, 'steps': 1502, 'loss/train': 2.3084657192230225} +03/03/2022 15:11:11 - INFO - codeparrot_training - Step 1503: {'lr': 0.00037575, 'samples': 770048, 'steps': 1503, 'loss/train': 3.9656982421875} +03/03/2022 15:11:14 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/03/2022 15:11:17 - INFO - codeparrot_training - Step 1504: {'lr': 0.00037600000000000003, 'samples': 770560, 'steps': 1504, 'loss/train': 4.816207408905029} +03/03/2022 15:11:20 - INFO - codeparrot_training - Step 1505: {'lr': 0.00037624999999999996, 'samples': 771072, 'steps': 1505, 'loss/train': 3.7710113525390625} +03/03/2022 15:11:22 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/03/2022 15:11:25 - INFO - codeparrot_training - Step 1506: {'lr': 0.0003765, 'samples': 771584, 'steps': 1506, 'loss/train': 3.916992664337158} +03/03/2022 15:11:28 - INFO - codeparrot_training - Step 1507: {'lr': 0.00037674999999999997, 'samples': 772096, 'steps': 1507, 'loss/train': 3.842498302459717} +03/03/2022 15:11:30 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/03/2022 15:11:34 - INFO - codeparrot_training - Step 1508: {'lr': 0.000377, 'samples': 772608, 'steps': 1508, 'loss/train': 3.632445812225342} +03/03/2022 15:11:37 - INFO - codeparrot_training - Step 1509: {'lr': 0.00037725, 'samples': 773120, 'steps': 1509, 'loss/train': 4.604870796203613} +03/03/2022 15:11:39 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/03/2022 15:11:42 - INFO - codeparrot_training - Step 1510: {'lr': 0.0003775, 'samples': 773632, 'steps': 1510, 'loss/train': 2.283200740814209} +03/03/2022 15:11:45 - INFO - codeparrot_training - Step 1511: {'lr': 0.00037775, 'samples': 774144, 'steps': 1511, 'loss/train': 3.5791454315185547} +03/03/2022 15:11:48 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/03/2022 15:11:51 - INFO - codeparrot_training - Step 1512: {'lr': 0.000378, 'samples': 774656, 'steps': 1512, 'loss/train': 3.285102605819702} +03/03/2022 15:11:54 - INFO - codeparrot_training - Step 1513: {'lr': 0.00037825, 'samples': 775168, 'steps': 1513, 'loss/train': 4.37850284576416} +03/03/2022 15:11:56 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/03/2022 15:11:59 - INFO - codeparrot_training - Step 1514: {'lr': 0.0003785, 'samples': 775680, 'steps': 1514, 'loss/train': 4.18479585647583} +03/03/2022 15:12:02 - INFO - codeparrot_training - Step 1515: {'lr': 0.00037874999999999996, 'samples': 776192, 'steps': 1515, 'loss/train': 3.8053348064422607} +03/03/2022 15:12:04 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/03/2022 15:12:08 - INFO - codeparrot_training - Step 1516: {'lr': 0.000379, 'samples': 776704, 'steps': 1516, 'loss/train': 4.5604987144470215} +03/03/2022 15:12:11 - INFO - codeparrot_training - Step 1517: {'lr': 0.00037925, 'samples': 777216, 'steps': 1517, 'loss/train': 3.2770392894744873} +03/03/2022 15:12:12 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/03/2022 15:12:16 - INFO - codeparrot_training - Step 1518: {'lr': 0.0003795, 'samples': 777728, 'steps': 1518, 'loss/train': 4.6869425773620605} +03/03/2022 15:12:19 - INFO - codeparrot_training - Step 1519: {'lr': 0.00037975, 'samples': 778240, 'steps': 1519, 'loss/train': 4.943699359893799} +03/03/2022 15:12:21 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/03/2022 15:12:24 - INFO - codeparrot_training - Step 1520: {'lr': 0.00038, 'samples': 778752, 'steps': 1520, 'loss/train': 4.5217742919921875} +03/03/2022 15:12:27 - INFO - codeparrot_training - Step 1521: {'lr': 0.00038025, 'samples': 779264, 'steps': 1521, 'loss/train': 3.4776947498321533} +03/03/2022 15:12:29 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/03/2022 15:12:33 - INFO - codeparrot_training - Step 1522: {'lr': 0.00038050000000000003, 'samples': 779776, 'steps': 1522, 'loss/train': 3.51460862159729} +03/03/2022 15:12:36 - INFO - codeparrot_training - Step 1523: {'lr': 0.00038075, 'samples': 780288, 'steps': 1523, 'loss/train': 4.309046268463135} +03/03/2022 15:12:37 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 15:12:41 - INFO - codeparrot_training - Step 1524: {'lr': 0.000381, 'samples': 780800, 'steps': 1524, 'loss/train': 3.4880378246307373} +03/03/2022 15:12:44 - INFO - codeparrot_training - Step 1525: {'lr': 0.00038124999999999997, 'samples': 781312, 'steps': 1525, 'loss/train': 3.7793219089508057} +03/03/2022 15:12:46 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/03/2022 15:12:50 - INFO - codeparrot_training - Step 1526: {'lr': 0.0003815, 'samples': 781824, 'steps': 1526, 'loss/train': 3.9242136478424072} +03/03/2022 15:12:53 - INFO - codeparrot_training - Step 1527: {'lr': 0.00038175, 'samples': 782336, 'steps': 1527, 'loss/train': 3.7659497261047363} +03/03/2022 15:12:56 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/03/2022 15:12:58 - INFO - codeparrot_training - Step 1528: {'lr': 0.000382, 'samples': 782848, 'steps': 1528, 'loss/train': 4.0172271728515625} +03/03/2022 15:13:01 - INFO - codeparrot_training - Step 1529: {'lr': 0.00038225, 'samples': 783360, 'steps': 1529, 'loss/train': 3.6876139640808105} +03/03/2022 15:13:04 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/03/2022 15:13:07 - INFO - codeparrot_training - Step 1530: {'lr': 0.00038250000000000003, 'samples': 783872, 'steps': 1530, 'loss/train': 4.323005199432373} +03/03/2022 15:13:10 - INFO - codeparrot_training - Step 1531: {'lr': 0.00038275, 'samples': 784384, 'steps': 1531, 'loss/train': 2.826673746109009} +03/03/2022 15:13:12 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/03/2022 15:13:15 - INFO - codeparrot_training - Step 1532: {'lr': 0.00038300000000000004, 'samples': 784896, 'steps': 1532, 'loss/train': 4.546938419342041} +03/03/2022 15:13:18 - INFO - codeparrot_training - Step 1533: {'lr': 0.00038324999999999996, 'samples': 785408, 'steps': 1533, 'loss/train': 3.795086145401001} +03/03/2022 15:13:20 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/03/2022 15:13:24 - INFO - codeparrot_training - Step 1534: {'lr': 0.0003835, 'samples': 785920, 'steps': 1534, 'loss/train': 4.857752323150635} +03/03/2022 15:13:27 - INFO - codeparrot_training - Step 1535: {'lr': 0.00038375, 'samples': 786432, 'steps': 1535, 'loss/train': 3.7344937324523926} +03/03/2022 15:13:30 - INFO - codeparrot_training - Step 1536: {'lr': 0.000384, 'samples': 786944, 'steps': 1536, 'loss/train': 4.928196907043457} +03/03/2022 15:13:30 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/03/2022 15:13:35 - INFO - codeparrot_training - Step 1537: {'lr': 0.00038425, 'samples': 787456, 'steps': 1537, 'loss/train': 3.9834160804748535} +03/03/2022 15:13:38 - INFO - codeparrot_training - Step 1538: {'lr': 0.0003845, 'samples': 787968, 'steps': 1538, 'loss/train': 3.138561248779297} +03/03/2022 15:13:38 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/03/2022 15:13:43 - INFO - codeparrot_training - Step 1539: {'lr': 0.00038475, 'samples': 788480, 'steps': 1539, 'loss/train': 3.554269313812256} +03/03/2022 15:13:47 - INFO - codeparrot_training - Step 1540: {'lr': 0.00038500000000000003, 'samples': 788992, 'steps': 1540, 'loss/train': 4.217238903045654} +03/03/2022 15:13:47 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/03/2022 15:13:52 - INFO - codeparrot_training - Step 1541: {'lr': 0.00038525, 'samples': 789504, 'steps': 1541, 'loss/train': 4.2638044357299805} +03/03/2022 15:13:55 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 15:13:57 - INFO - codeparrot_training - Step 1542: {'lr': 0.0003855, 'samples': 790016, 'steps': 1542, 'loss/train': 4.403471946716309} +03/03/2022 15:14:00 - INFO - codeparrot_training - Step 1543: {'lr': 0.00038574999999999997, 'samples': 790528, 'steps': 1543, 'loss/train': 3.3588995933532715} +03/03/2022 15:14:03 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/03/2022 15:14:05 - INFO - codeparrot_training - Step 1544: {'lr': 0.000386, 'samples': 791040, 'steps': 1544, 'loss/train': 4.109154224395752} +03/03/2022 15:14:09 - INFO - codeparrot_training - Step 1545: {'lr': 0.00038625, 'samples': 791552, 'steps': 1545, 'loss/train': 3.2273197174072266} +03/03/2022 15:14:11 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/03/2022 15:14:14 - INFO - codeparrot_training - Step 1546: {'lr': 0.0003865, 'samples': 792064, 'steps': 1546, 'loss/train': 4.201442241668701} +03/03/2022 15:14:17 - INFO - codeparrot_training - Step 1547: {'lr': 0.00038675, 'samples': 792576, 'steps': 1547, 'loss/train': 1.5830601453781128} +03/03/2022 15:14:20 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/03/2022 15:14:22 - INFO - codeparrot_training - Step 1548: {'lr': 0.00038700000000000003, 'samples': 793088, 'steps': 1548, 'loss/train': 4.204220294952393} +03/03/2022 15:14:26 - INFO - codeparrot_training - Step 1549: {'lr': 0.00038725, 'samples': 793600, 'steps': 1549, 'loss/train': 3.021789312362671} +03/03/2022 15:14:28 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/03/2022 15:14:31 - INFO - codeparrot_training - Step 1550: {'lr': 0.00038750000000000004, 'samples': 794112, 'steps': 1550, 'loss/train': 3.7322466373443604} +03/03/2022 15:14:34 - INFO - codeparrot_training - Step 1551: {'lr': 0.00038774999999999997, 'samples': 794624, 'steps': 1551, 'loss/train': 3.2621359825134277} +03/03/2022 15:14:37 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/03/2022 15:14:39 - INFO - codeparrot_training - Step 1552: {'lr': 0.000388, 'samples': 795136, 'steps': 1552, 'loss/train': 3.510321855545044} +03/03/2022 15:14:42 - INFO - codeparrot_training - Step 1553: {'lr': 0.00038825, 'samples': 795648, 'steps': 1553, 'loss/train': 3.8624253273010254} +03/03/2022 15:14:45 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/03/2022 15:14:48 - INFO - codeparrot_training - Step 1554: {'lr': 0.0003885, 'samples': 796160, 'steps': 1554, 'loss/train': 4.156026840209961} +03/03/2022 15:14:51 - INFO - codeparrot_training - Step 1555: {'lr': 0.00038875, 'samples': 796672, 'steps': 1555, 'loss/train': 3.379423141479492} +03/03/2022 15:14:53 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/03/2022 15:14:56 - INFO - codeparrot_training - Step 1556: {'lr': 0.000389, 'samples': 797184, 'steps': 1556, 'loss/train': 4.1558098793029785} +03/03/2022 15:14:59 - INFO - codeparrot_training - Step 1557: {'lr': 0.00038925, 'samples': 797696, 'steps': 1557, 'loss/train': 3.1854405403137207} +03/03/2022 15:15:01 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/03/2022 15:15:04 - INFO - codeparrot_training - Step 1558: {'lr': 0.00038950000000000003, 'samples': 798208, 'steps': 1558, 'loss/train': 4.045464515686035} +03/03/2022 15:15:08 - INFO - codeparrot_training - Step 1559: {'lr': 0.00038975, 'samples': 798720, 'steps': 1559, 'loss/train': 7.381725311279297} +03/03/2022 15:15:10 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/03/2022 15:15:13 - INFO - codeparrot_training - Step 1560: {'lr': 0.00039000000000000005, 'samples': 799232, 'steps': 1560, 'loss/train': 3.5645205974578857} +03/03/2022 15:15:16 - INFO - codeparrot_training - Step 1561: {'lr': 0.00039024999999999997, 'samples': 799744, 'steps': 1561, 'loss/train': 2.3492555618286133} +03/03/2022 15:15:18 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/03/2022 15:15:21 - INFO - codeparrot_training - Step 1562: {'lr': 0.0003905, 'samples': 800256, 'steps': 1562, 'loss/train': 4.596646785736084} +03/03/2022 15:15:24 - INFO - codeparrot_training - Step 1563: {'lr': 0.00039075, 'samples': 800768, 'steps': 1563, 'loss/train': 3.2182395458221436} +03/03/2022 15:15:27 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/03/2022 15:15:30 - INFO - codeparrot_training - Step 1564: {'lr': 0.000391, 'samples': 801280, 'steps': 1564, 'loss/train': 4.244230270385742} +03/03/2022 15:15:33 - INFO - codeparrot_training - Step 1565: {'lr': 0.00039125, 'samples': 801792, 'steps': 1565, 'loss/train': 3.9040040969848633} +03/03/2022 15:15:35 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 15:15:38 - INFO - codeparrot_training - Step 1566: {'lr': 0.00039150000000000003, 'samples': 802304, 'steps': 1566, 'loss/train': 2.477119207382202} +03/03/2022 15:15:41 - INFO - codeparrot_training - Step 1567: {'lr': 0.00039175, 'samples': 802816, 'steps': 1567, 'loss/train': 4.116785049438477} +03/03/2022 15:15:43 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/03/2022 15:15:46 - INFO - codeparrot_training - Step 1568: {'lr': 0.00039200000000000004, 'samples': 803328, 'steps': 1568, 'loss/train': 3.90573787689209} +03/03/2022 15:15:50 - INFO - codeparrot_training - Step 1569: {'lr': 0.00039225, 'samples': 803840, 'steps': 1569, 'loss/train': 2.4844870567321777} +03/03/2022 15:15:51 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/03/2022 15:15:55 - INFO - codeparrot_training - Step 1570: {'lr': 0.0003925, 'samples': 804352, 'steps': 1570, 'loss/train': 3.338627338409424} +03/03/2022 15:15:58 - INFO - codeparrot_training - Step 1571: {'lr': 0.00039275, 'samples': 804864, 'steps': 1571, 'loss/train': 1.1468610763549805} +03/03/2022 15:16:00 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 15:16:03 - INFO - codeparrot_training - Step 1572: {'lr': 0.000393, 'samples': 805376, 'steps': 1572, 'loss/train': 3.844975471496582} +03/03/2022 15:16:06 - INFO - codeparrot_training - Step 1573: {'lr': 0.00039325, 'samples': 805888, 'steps': 1573, 'loss/train': 3.7398221492767334} +03/03/2022 15:16:08 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/03/2022 15:16:12 - INFO - codeparrot_training - Step 1574: {'lr': 0.0003935, 'samples': 806400, 'steps': 1574, 'loss/train': 4.145874500274658} +03/03/2022 15:16:15 - INFO - codeparrot_training - Step 1575: {'lr': 0.00039375, 'samples': 806912, 'steps': 1575, 'loss/train': 4.008064270019531} +03/03/2022 15:16:16 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/03/2022 15:16:20 - INFO - codeparrot_training - Step 1576: {'lr': 0.00039400000000000004, 'samples': 807424, 'steps': 1576, 'loss/train': 3.282827615737915} +03/03/2022 15:16:23 - INFO - codeparrot_training - Step 1577: {'lr': 0.00039425, 'samples': 807936, 'steps': 1577, 'loss/train': 3.2250897884368896} +03/03/2022 15:16:25 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/03/2022 15:16:29 - INFO - codeparrot_training - Step 1578: {'lr': 0.00039450000000000005, 'samples': 808448, 'steps': 1578, 'loss/train': 3.2866814136505127} +03/03/2022 15:16:32 - INFO - codeparrot_training - Step 1579: {'lr': 0.00039474999999999997, 'samples': 808960, 'steps': 1579, 'loss/train': 1.6143156290054321} +03/03/2022 15:16:34 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/03/2022 15:16:37 - INFO - codeparrot_training - Step 1580: {'lr': 0.000395, 'samples': 809472, 'steps': 1580, 'loss/train': 4.0041704177856445} +03/03/2022 15:16:40 - INFO - codeparrot_training - Step 1581: {'lr': 0.00039525, 'samples': 809984, 'steps': 1581, 'loss/train': 2.606600284576416} +03/03/2022 15:16:42 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/03/2022 15:16:45 - INFO - codeparrot_training - Step 1582: {'lr': 0.0003955, 'samples': 810496, 'steps': 1582, 'loss/train': 4.103724479675293} +03/03/2022 15:16:49 - INFO - codeparrot_training - Step 1583: {'lr': 0.00039575, 'samples': 811008, 'steps': 1583, 'loss/train': 3.899157762527466} +03/03/2022 15:16:50 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/03/2022 15:16:54 - INFO - codeparrot_training - Step 1584: {'lr': 0.00039600000000000003, 'samples': 811520, 'steps': 1584, 'loss/train': 4.094735145568848} +03/03/2022 15:16:57 - INFO - codeparrot_training - Step 1585: {'lr': 0.00039625, 'samples': 812032, 'steps': 1585, 'loss/train': 3.7360873222351074} +03/03/2022 15:16:58 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/03/2022 15:17:02 - INFO - codeparrot_training - Step 1586: {'lr': 0.00039650000000000004, 'samples': 812544, 'steps': 1586, 'loss/train': 2.2661588191986084} +03/03/2022 15:17:05 - INFO - codeparrot_training - Step 1587: {'lr': 0.00039675, 'samples': 813056, 'steps': 1587, 'loss/train': 4.081903457641602} +03/03/2022 15:17:07 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/03/2022 15:17:11 - INFO - codeparrot_training - Step 1588: {'lr': 0.00039700000000000005, 'samples': 813568, 'steps': 1588, 'loss/train': 4.26978063583374} +03/03/2022 15:17:14 - INFO - codeparrot_training - Step 1589: {'lr': 0.00039725, 'samples': 814080, 'steps': 1589, 'loss/train': 4.824879169464111} +03/03/2022 15:17:15 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/03/2022 15:17:19 - INFO - codeparrot_training - Step 1590: {'lr': 0.0003975, 'samples': 814592, 'steps': 1590, 'loss/train': 3.480339527130127} +03/03/2022 15:17:22 - INFO - codeparrot_training - Step 1591: {'lr': 0.00039775, 'samples': 815104, 'steps': 1591, 'loss/train': 4.108184337615967} +03/03/2022 15:17:23 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/03/2022 15:17:28 - INFO - codeparrot_training - Step 1592: {'lr': 0.000398, 'samples': 815616, 'steps': 1592, 'loss/train': 4.285973072052002} +03/03/2022 15:17:31 - INFO - codeparrot_training - Step 1593: {'lr': 0.00039825, 'samples': 816128, 'steps': 1593, 'loss/train': 3.275076150894165} +03/03/2022 15:17:31 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/03/2022 15:17:36 - INFO - codeparrot_training - Step 1594: {'lr': 0.00039850000000000004, 'samples': 816640, 'steps': 1594, 'loss/train': 3.0879600048065186} +03/03/2022 15:17:39 - INFO - codeparrot_training - Step 1595: {'lr': 0.00039875, 'samples': 817152, 'steps': 1595, 'loss/train': 3.174454927444458} +03/03/2022 15:17:40 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/03/2022 15:17:44 - INFO - codeparrot_training - Step 1596: {'lr': 0.00039900000000000005, 'samples': 817664, 'steps': 1596, 'loss/train': 3.5130112171173096} +03/03/2022 15:17:47 - INFO - codeparrot_training - Step 1597: {'lr': 0.00039925000000000003, 'samples': 818176, 'steps': 1597, 'loss/train': 3.571528911590576} +03/03/2022 15:17:48 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/03/2022 15:17:53 - INFO - codeparrot_training - Step 1598: {'lr': 0.0003995, 'samples': 818688, 'steps': 1598, 'loss/train': 6.576632499694824} +03/03/2022 15:17:56 - INFO - codeparrot_training - Step 1599: {'lr': 0.00039975, 'samples': 819200, 'steps': 1599, 'loss/train': 4.470029354095459} +03/03/2022 15:17:58 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/03/2022 15:18:01 - INFO - codeparrot_training - Step 1600: {'lr': 0.0004, 'samples': 819712, 'steps': 1600, 'loss/train': 4.19922399520874} +03/03/2022 15:18:05 - INFO - codeparrot_training - Step 1601: {'lr': 0.00040025, 'samples': 820224, 'steps': 1601, 'loss/train': 4.240340709686279} +03/03/2022 15:18:06 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/03/2022 15:18:10 - INFO - codeparrot_training - Step 1602: {'lr': 0.00040050000000000003, 'samples': 820736, 'steps': 1602, 'loss/train': 3.719184398651123} +03/03/2022 15:18:13 - INFO - codeparrot_training - Step 1603: {'lr': 0.00040075, 'samples': 821248, 'steps': 1603, 'loss/train': 4.493873596191406} +03/03/2022 15:18:15 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/03/2022 15:18:18 - INFO - codeparrot_training - Step 1604: {'lr': 0.00040100000000000004, 'samples': 821760, 'steps': 1604, 'loss/train': 3.7419326305389404} +03/03/2022 15:18:21 - INFO - codeparrot_training - Step 1605: {'lr': 0.00040125, 'samples': 822272, 'steps': 1605, 'loss/train': 4.254170894622803} +03/03/2022 15:18:23 - INFO - codeparrot_training - Skipping example with length 305 (seq_length=1024) +03/03/2022 15:18:27 - INFO - codeparrot_training - Step 1606: {'lr': 0.00040150000000000006, 'samples': 822784, 'steps': 1606, 'loss/train': 4.3019256591796875} +03/03/2022 15:18:30 - INFO - codeparrot_training - Step 1607: {'lr': 0.00040175, 'samples': 823296, 'steps': 1607, 'loss/train': 4.238136291503906} +03/03/2022 15:18:31 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/03/2022 15:18:35 - INFO - codeparrot_training - Step 1608: {'lr': 0.000402, 'samples': 823808, 'steps': 1608, 'loss/train': 3.7276854515075684} +03/03/2022 15:18:38 - INFO - codeparrot_training - Step 1609: {'lr': 0.00040225, 'samples': 824320, 'steps': 1609, 'loss/train': 4.083951950073242} +03/03/2022 15:18:40 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/03/2022 15:18:44 - INFO - codeparrot_training - Step 1610: {'lr': 0.0004025, 'samples': 824832, 'steps': 1610, 'loss/train': 4.121634483337402} +03/03/2022 15:18:47 - INFO - codeparrot_training - Step 1611: {'lr': 0.00040275, 'samples': 825344, 'steps': 1611, 'loss/train': 3.4871816635131836} +03/03/2022 15:18:50 - INFO - codeparrot_training - Step 1612: {'lr': 0.00040300000000000004, 'samples': 825856, 'steps': 1612, 'loss/train': 3.1121580600738525} +03/03/2022 15:18:51 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/03/2022 15:18:56 - INFO - codeparrot_training - Step 1613: {'lr': 0.00040325, 'samples': 826368, 'steps': 1613, 'loss/train': 4.2109150886535645} +03/03/2022 15:18:59 - INFO - codeparrot_training - Step 1614: {'lr': 0.00040350000000000005, 'samples': 826880, 'steps': 1614, 'loss/train': 4.267260551452637} +03/03/2022 15:18:59 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/03/2022 15:19:04 - INFO - codeparrot_training - Step 1615: {'lr': 0.00040375000000000003, 'samples': 827392, 'steps': 1615, 'loss/train': 3.589965581893921} +03/03/2022 15:19:07 - INFO - codeparrot_training - Step 1616: {'lr': 0.000404, 'samples': 827904, 'steps': 1616, 'loss/train': 3.9033656120300293} +03/03/2022 15:19:07 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/03/2022 15:19:12 - INFO - codeparrot_training - Step 1617: {'lr': 0.00040425, 'samples': 828416, 'steps': 1617, 'loss/train': 4.371466159820557} +03/03/2022 15:19:15 - INFO - codeparrot_training - Step 1618: {'lr': 0.0004045, 'samples': 828928, 'steps': 1618, 'loss/train': 3.589682102203369} +03/03/2022 15:19:16 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 15:19:21 - INFO - codeparrot_training - Step 1619: {'lr': 0.00040475, 'samples': 829440, 'steps': 1619, 'loss/train': 3.161203384399414} +03/03/2022 15:19:24 - INFO - codeparrot_training - Step 1620: {'lr': 0.00040500000000000003, 'samples': 829952, 'steps': 1620, 'loss/train': 4.257823467254639} +03/03/2022 15:19:24 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/03/2022 15:19:29 - INFO - codeparrot_training - Step 1621: {'lr': 0.00040525, 'samples': 830464, 'steps': 1621, 'loss/train': 4.098337650299072} +03/03/2022 15:19:32 - INFO - codeparrot_training - Step 1622: {'lr': 0.00040550000000000004, 'samples': 830976, 'steps': 1622, 'loss/train': 3.2562239170074463} +03/03/2022 15:19:32 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/03/2022 15:19:37 - INFO - codeparrot_training - Step 1623: {'lr': 0.00040575, 'samples': 831488, 'steps': 1623, 'loss/train': 3.4186604022979736} +03/03/2022 15:19:41 - INFO - codeparrot_training - Step 1624: {'lr': 0.00040600000000000006, 'samples': 832000, 'steps': 1624, 'loss/train': 3.073575258255005} +03/03/2022 15:19:41 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/03/2022 15:19:46 - INFO - codeparrot_training - Step 1625: {'lr': 0.00040625000000000004, 'samples': 832512, 'steps': 1625, 'loss/train': 3.647516965866089} +03/03/2022 15:19:49 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/03/2022 15:19:51 - INFO - codeparrot_training - Step 1626: {'lr': 0.00040649999999999996, 'samples': 833024, 'steps': 1626, 'loss/train': 3.6062047481536865} +03/03/2022 15:19:54 - INFO - codeparrot_training - Step 1627: {'lr': 0.00040675, 'samples': 833536, 'steps': 1627, 'loss/train': 3.5969531536102295} +03/03/2022 15:19:57 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/03/2022 15:19:59 - INFO - codeparrot_training - Step 1628: {'lr': 0.00040699999999999997, 'samples': 834048, 'steps': 1628, 'loss/train': 3.4551644325256348} +03/03/2022 15:20:03 - INFO - codeparrot_training - Step 1629: {'lr': 0.00040725, 'samples': 834560, 'steps': 1629, 'loss/train': 3.528550386428833} +03/03/2022 15:20:05 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/03/2022 15:20:08 - INFO - codeparrot_training - Step 1630: {'lr': 0.0004075, 'samples': 835072, 'steps': 1630, 'loss/train': 3.4015955924987793} +03/03/2022 15:20:11 - INFO - codeparrot_training - Step 1631: {'lr': 0.00040775, 'samples': 835584, 'steps': 1631, 'loss/train': 3.4129886627197266} +03/03/2022 15:20:14 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/03/2022 15:20:16 - INFO - codeparrot_training - Step 1632: {'lr': 0.000408, 'samples': 836096, 'steps': 1632, 'loss/train': 3.9364919662475586} +03/03/2022 15:20:19 - INFO - codeparrot_training - Step 1633: {'lr': 0.00040825000000000003, 'samples': 836608, 'steps': 1633, 'loss/train': 3.2710466384887695} +03/03/2022 15:20:22 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 15:20:24 - INFO - codeparrot_training - Step 1634: {'lr': 0.0004085, 'samples': 837120, 'steps': 1634, 'loss/train': 4.535517692565918} +03/03/2022 15:20:28 - INFO - codeparrot_training - Step 1635: {'lr': 0.00040875, 'samples': 837632, 'steps': 1635, 'loss/train': 4.544247627258301} +03/03/2022 15:20:30 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 15:20:33 - INFO - codeparrot_training - Step 1636: {'lr': 0.00040899999999999997, 'samples': 838144, 'steps': 1636, 'loss/train': 4.110836505889893} +03/03/2022 15:20:36 - INFO - codeparrot_training - Step 1637: {'lr': 0.00040925, 'samples': 838656, 'steps': 1637, 'loss/train': 1.8042603731155396} +03/03/2022 15:20:38 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/03/2022 15:20:41 - INFO - codeparrot_training - Step 1638: {'lr': 0.0004095, 'samples': 839168, 'steps': 1638, 'loss/train': 4.259596347808838} +03/03/2022 15:20:44 - INFO - codeparrot_training - Step 1639: {'lr': 0.00040975, 'samples': 839680, 'steps': 1639, 'loss/train': 4.155637264251709} +03/03/2022 15:20:46 - INFO - codeparrot_training - Skipping example with length 860 (seq_length=1024) +03/03/2022 15:20:50 - INFO - codeparrot_training - Step 1640: {'lr': 0.00041, 'samples': 840192, 'steps': 1640, 'loss/train': 2.8649520874023438} +03/03/2022 15:20:53 - INFO - codeparrot_training - Step 1641: {'lr': 0.00041025, 'samples': 840704, 'steps': 1641, 'loss/train': 3.9086496829986572} +03/03/2022 15:20:55 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/03/2022 15:20:58 - INFO - codeparrot_training - Step 1642: {'lr': 0.0004105, 'samples': 841216, 'steps': 1642, 'loss/train': 3.142146348953247} +03/03/2022 15:21:01 - INFO - codeparrot_training - Step 1643: {'lr': 0.00041075000000000004, 'samples': 841728, 'steps': 1643, 'loss/train': 4.127035140991211} +03/03/2022 15:21:03 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/03/2022 15:21:06 - INFO - codeparrot_training - Step 1644: {'lr': 0.00041099999999999996, 'samples': 842240, 'steps': 1644, 'loss/train': 1.2069125175476074} +03/03/2022 15:21:10 - INFO - codeparrot_training - Step 1645: {'lr': 0.00041125, 'samples': 842752, 'steps': 1645, 'loss/train': 3.94893217086792} +03/03/2022 15:21:11 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 15:21:15 - INFO - codeparrot_training - Step 1646: {'lr': 0.0004115, 'samples': 843264, 'steps': 1646, 'loss/train': 2.6652512550354004} +03/03/2022 15:21:18 - INFO - codeparrot_training - Step 1647: {'lr': 0.00041175, 'samples': 843776, 'steps': 1647, 'loss/train': 3.434938907623291} +03/03/2022 15:21:20 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/03/2022 15:21:24 - INFO - codeparrot_training - Step 1648: {'lr': 0.000412, 'samples': 844288, 'steps': 1648, 'loss/train': 3.532137632369995} +03/03/2022 15:21:27 - INFO - codeparrot_training - Step 1649: {'lr': 0.00041225, 'samples': 844800, 'steps': 1649, 'loss/train': 3.8914690017700195} +03/03/2022 15:21:28 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 15:21:32 - INFO - codeparrot_training - Step 1650: {'lr': 0.0004125, 'samples': 845312, 'steps': 1650, 'loss/train': 4.398185729980469} +03/03/2022 15:21:35 - INFO - codeparrot_training - Step 1651: {'lr': 0.00041275000000000003, 'samples': 845824, 'steps': 1651, 'loss/train': 2.951935291290283} +03/03/2022 15:21:37 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/03/2022 15:21:41 - INFO - codeparrot_training - Step 1652: {'lr': 0.000413, 'samples': 846336, 'steps': 1652, 'loss/train': 2.694889783859253} +03/03/2022 15:21:44 - INFO - codeparrot_training - Step 1653: {'lr': 0.00041325, 'samples': 846848, 'steps': 1653, 'loss/train': 1.3546243906021118} +03/03/2022 15:21:45 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 15:21:49 - INFO - codeparrot_training - Step 1654: {'lr': 0.00041349999999999997, 'samples': 847360, 'steps': 1654, 'loss/train': 3.1578609943389893} +03/03/2022 15:21:52 - INFO - codeparrot_training - Step 1655: {'lr': 0.00041375, 'samples': 847872, 'steps': 1655, 'loss/train': 3.9147822856903076} +03/03/2022 15:21:54 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/03/2022 15:21:57 - INFO - codeparrot_training - Step 1656: {'lr': 0.000414, 'samples': 848384, 'steps': 1656, 'loss/train': 3.3651390075683594} +03/03/2022 15:22:01 - INFO - codeparrot_training - Step 1657: {'lr': 0.00041425, 'samples': 848896, 'steps': 1657, 'loss/train': 4.778948783874512} +03/03/2022 15:22:02 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/03/2022 15:22:06 - INFO - codeparrot_training - Step 1658: {'lr': 0.0004145, 'samples': 849408, 'steps': 1658, 'loss/train': 4.195247650146484} +03/03/2022 15:22:09 - INFO - codeparrot_training - Step 1659: {'lr': 0.00041475, 'samples': 849920, 'steps': 1659, 'loss/train': 4.073681354522705} +03/03/2022 15:22:11 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/03/2022 15:22:14 - INFO - codeparrot_training - Step 1660: {'lr': 0.000415, 'samples': 850432, 'steps': 1660, 'loss/train': 4.1907572746276855} +03/03/2022 15:22:17 - INFO - codeparrot_training - Step 1661: {'lr': 0.00041525000000000004, 'samples': 850944, 'steps': 1661, 'loss/train': 4.558657169342041} +03/03/2022 15:22:19 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/03/2022 15:22:23 - INFO - codeparrot_training - Step 1662: {'lr': 0.00041549999999999996, 'samples': 851456, 'steps': 1662, 'loss/train': 2.9788718223571777} +03/03/2022 15:22:26 - INFO - codeparrot_training - Step 1663: {'lr': 0.00041575, 'samples': 851968, 'steps': 1663, 'loss/train': 3.6902568340301514} +03/03/2022 15:22:29 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/03/2022 15:22:31 - INFO - codeparrot_training - Step 1664: {'lr': 0.000416, 'samples': 852480, 'steps': 1664, 'loss/train': 3.099257230758667} +03/03/2022 15:22:34 - INFO - codeparrot_training - Step 1665: {'lr': 0.00041625, 'samples': 852992, 'steps': 1665, 'loss/train': 4.143247127532959} +03/03/2022 15:22:37 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/03/2022 15:22:40 - INFO - codeparrot_training - Step 1666: {'lr': 0.0004165, 'samples': 853504, 'steps': 1666, 'loss/train': 3.5870015621185303} +03/03/2022 15:22:43 - INFO - codeparrot_training - Step 1667: {'lr': 0.00041675, 'samples': 854016, 'steps': 1667, 'loss/train': 5.4092583656311035} +03/03/2022 15:22:46 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 15:22:48 - INFO - codeparrot_training - Step 1668: {'lr': 0.000417, 'samples': 854528, 'steps': 1668, 'loss/train': 3.9599406719207764} +03/03/2022 15:22:51 - INFO - codeparrot_training - Step 1669: {'lr': 0.00041725000000000003, 'samples': 855040, 'steps': 1669, 'loss/train': 3.46134352684021} +03/03/2022 15:22:54 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 15:22:57 - INFO - codeparrot_training - Step 1670: {'lr': 0.0004175, 'samples': 855552, 'steps': 1670, 'loss/train': 5.154891490936279} +03/03/2022 15:23:00 - INFO - codeparrot_training - Step 1671: {'lr': 0.00041775000000000004, 'samples': 856064, 'steps': 1671, 'loss/train': 3.756866931915283} +03/03/2022 15:23:02 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 15:23:05 - INFO - codeparrot_training - Step 1672: {'lr': 0.00041799999999999997, 'samples': 856576, 'steps': 1672, 'loss/train': 2.8966963291168213} +03/03/2022 15:23:08 - INFO - codeparrot_training - Step 1673: {'lr': 0.00041825, 'samples': 857088, 'steps': 1673, 'loss/train': 4.114925861358643} +03/03/2022 15:23:11 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/03/2022 15:23:13 - INFO - codeparrot_training - Step 1674: {'lr': 0.0004185, 'samples': 857600, 'steps': 1674, 'loss/train': 3.2051870822906494} +03/03/2022 15:23:17 - INFO - codeparrot_training - Step 1675: {'lr': 0.00041875, 'samples': 858112, 'steps': 1675, 'loss/train': 3.4225378036499023} +03/03/2022 15:23:19 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/03/2022 15:23:22 - INFO - codeparrot_training - Step 1676: {'lr': 0.000419, 'samples': 858624, 'steps': 1676, 'loss/train': 4.339293956756592} +03/03/2022 15:23:25 - INFO - codeparrot_training - Step 1677: {'lr': 0.00041925, 'samples': 859136, 'steps': 1677, 'loss/train': 6.490478992462158} +03/03/2022 15:23:28 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/03/2022 15:23:30 - INFO - codeparrot_training - Step 1678: {'lr': 0.0004195, 'samples': 859648, 'steps': 1678, 'loss/train': 3.74222731590271} +03/03/2022 15:23:34 - INFO - codeparrot_training - Step 1679: {'lr': 0.00041975000000000004, 'samples': 860160, 'steps': 1679, 'loss/train': 3.736689567565918} +03/03/2022 15:23:36 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/03/2022 15:23:39 - INFO - codeparrot_training - Step 1680: {'lr': 0.00042, 'samples': 860672, 'steps': 1680, 'loss/train': 3.611518144607544} +03/03/2022 15:23:42 - INFO - codeparrot_training - Step 1681: {'lr': 0.00042025, 'samples': 861184, 'steps': 1681, 'loss/train': 4.2163801193237305} +03/03/2022 15:23:44 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/03/2022 15:23:47 - INFO - codeparrot_training - Step 1682: {'lr': 0.0004205, 'samples': 861696, 'steps': 1682, 'loss/train': 3.356593132019043} +03/03/2022 15:23:50 - INFO - codeparrot_training - Step 1683: {'lr': 0.00042075, 'samples': 862208, 'steps': 1683, 'loss/train': 4.491826057434082} +03/03/2022 15:23:53 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/03/2022 15:23:56 - INFO - codeparrot_training - Step 1684: {'lr': 0.000421, 'samples': 862720, 'steps': 1684, 'loss/train': 3.3760108947753906} +03/03/2022 15:23:59 - INFO - codeparrot_training - Step 1685: {'lr': 0.00042125, 'samples': 863232, 'steps': 1685, 'loss/train': 4.584646701812744} +03/03/2022 15:24:01 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 15:24:04 - INFO - codeparrot_training - Step 1686: {'lr': 0.0004215, 'samples': 863744, 'steps': 1686, 'loss/train': 3.885138511657715} +03/03/2022 15:24:07 - INFO - codeparrot_training - Step 1687: {'lr': 0.00042175000000000003, 'samples': 864256, 'steps': 1687, 'loss/train': 3.71675968170166} +03/03/2022 15:24:09 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/03/2022 15:24:12 - INFO - codeparrot_training - Step 1688: {'lr': 0.000422, 'samples': 864768, 'steps': 1688, 'loss/train': 4.1364850997924805} +03/03/2022 15:24:15 - INFO - codeparrot_training - Step 1689: {'lr': 0.00042225000000000005, 'samples': 865280, 'steps': 1689, 'loss/train': 3.7608449459075928} +03/03/2022 15:24:18 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/03/2022 15:24:21 - INFO - codeparrot_training - Step 1690: {'lr': 0.00042249999999999997, 'samples': 865792, 'steps': 1690, 'loss/train': 3.461235523223877} +03/03/2022 15:24:24 - INFO - codeparrot_training - Step 1691: {'lr': 0.00042275, 'samples': 866304, 'steps': 1691, 'loss/train': 3.6422181129455566} +03/03/2022 15:24:27 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/03/2022 15:24:29 - INFO - codeparrot_training - Step 1692: {'lr': 0.000423, 'samples': 866816, 'steps': 1692, 'loss/train': 3.8520140647888184} +03/03/2022 15:24:32 - INFO - codeparrot_training - Step 1693: {'lr': 0.00042325, 'samples': 867328, 'steps': 1693, 'loss/train': 3.8769209384918213} +03/03/2022 15:24:35 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/03/2022 15:24:38 - INFO - codeparrot_training - Step 1694: {'lr': 0.0004235, 'samples': 867840, 'steps': 1694, 'loss/train': 4.164051532745361} +03/03/2022 15:24:41 - INFO - codeparrot_training - Step 1695: {'lr': 0.00042375000000000003, 'samples': 868352, 'steps': 1695, 'loss/train': 3.7627975940704346} +03/03/2022 15:24:43 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/03/2022 15:24:46 - INFO - codeparrot_training - Step 1696: {'lr': 0.000424, 'samples': 868864, 'steps': 1696, 'loss/train': 3.3928215503692627} +03/03/2022 15:24:49 - INFO - codeparrot_training - Step 1697: {'lr': 0.00042425000000000004, 'samples': 869376, 'steps': 1697, 'loss/train': 4.1996283531188965} +03/03/2022 15:24:52 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/03/2022 15:24:55 - INFO - codeparrot_training - Step 1698: {'lr': 0.0004245, 'samples': 869888, 'steps': 1698, 'loss/train': 3.942244052886963} +03/03/2022 15:24:58 - INFO - codeparrot_training - Step 1699: {'lr': 0.00042475000000000005, 'samples': 870400, 'steps': 1699, 'loss/train': 3.671191930770874} +03/03/2022 15:25:00 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/03/2022 15:25:03 - INFO - codeparrot_training - Step 1700: {'lr': 0.000425, 'samples': 870912, 'steps': 1700, 'loss/train': 3.7079269886016846} +03/03/2022 15:25:06 - INFO - codeparrot_training - Step 1701: {'lr': 0.00042525, 'samples': 871424, 'steps': 1701, 'loss/train': 5.224290370941162} +03/03/2022 15:25:09 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/03/2022 15:25:11 - INFO - codeparrot_training - Step 1702: {'lr': 0.0004255, 'samples': 871936, 'steps': 1702, 'loss/train': 3.76531982421875} +03/03/2022 15:25:14 - INFO - codeparrot_training - Step 1703: {'lr': 0.00042575, 'samples': 872448, 'steps': 1703, 'loss/train': 3.520569324493408} +03/03/2022 15:25:17 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/03/2022 15:25:20 - INFO - codeparrot_training - Step 1704: {'lr': 0.000426, 'samples': 872960, 'steps': 1704, 'loss/train': 3.5026602745056152} +03/03/2022 15:25:23 - INFO - codeparrot_training - Step 1705: {'lr': 0.00042625000000000003, 'samples': 873472, 'steps': 1705, 'loss/train': 3.4497320652008057} +03/03/2022 15:25:25 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/03/2022 15:25:28 - INFO - codeparrot_training - Step 1706: {'lr': 0.0004265, 'samples': 873984, 'steps': 1706, 'loss/train': 3.317208766937256} +03/03/2022 15:25:31 - INFO - codeparrot_training - Step 1707: {'lr': 0.00042675000000000005, 'samples': 874496, 'steps': 1707, 'loss/train': 3.737887382507324} +03/03/2022 15:25:34 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/03/2022 15:25:37 - INFO - codeparrot_training - Step 1708: {'lr': 0.000427, 'samples': 875008, 'steps': 1708, 'loss/train': 4.25682258605957} +03/03/2022 15:25:40 - INFO - codeparrot_training - Step 1709: {'lr': 0.00042725, 'samples': 875520, 'steps': 1709, 'loss/train': 4.29536771774292} +03/03/2022 15:25:42 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/03/2022 15:25:45 - INFO - codeparrot_training - Step 1710: {'lr': 0.0004275, 'samples': 876032, 'steps': 1710, 'loss/train': 3.959855318069458} +03/03/2022 15:25:48 - INFO - codeparrot_training - Step 1711: {'lr': 0.00042775, 'samples': 876544, 'steps': 1711, 'loss/train': 3.5018670558929443} +03/03/2022 15:25:50 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/03/2022 15:25:53 - INFO - codeparrot_training - Step 1712: {'lr': 0.000428, 'samples': 877056, 'steps': 1712, 'loss/train': 3.536062479019165} +03/03/2022 15:25:56 - INFO - codeparrot_training - Step 1713: {'lr': 0.00042825000000000003, 'samples': 877568, 'steps': 1713, 'loss/train': 2.2983222007751465} +03/03/2022 15:25:59 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/03/2022 15:26:02 - INFO - codeparrot_training - Step 1714: {'lr': 0.0004285, 'samples': 878080, 'steps': 1714, 'loss/train': 3.3023159503936768} +03/03/2022 15:26:05 - INFO - codeparrot_training - Step 1715: {'lr': 0.00042875000000000004, 'samples': 878592, 'steps': 1715, 'loss/train': 3.105361223220825} +03/03/2022 15:26:07 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 15:26:10 - INFO - codeparrot_training - Step 1716: {'lr': 0.000429, 'samples': 879104, 'steps': 1716, 'loss/train': 4.056852340698242} +03/03/2022 15:26:13 - INFO - codeparrot_training - Step 1717: {'lr': 0.00042925000000000005, 'samples': 879616, 'steps': 1717, 'loss/train': 3.6304948329925537} +03/03/2022 15:26:15 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/03/2022 15:26:19 - INFO - codeparrot_training - Step 1718: {'lr': 0.0004295, 'samples': 880128, 'steps': 1718, 'loss/train': 3.6288976669311523} +03/03/2022 15:26:22 - INFO - codeparrot_training - Step 1719: {'lr': 0.00042975, 'samples': 880640, 'steps': 1719, 'loss/train': 4.1409525871276855} +03/03/2022 15:26:24 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/03/2022 15:26:27 - INFO - codeparrot_training - Step 1720: {'lr': 0.00043, 'samples': 881152, 'steps': 1720, 'loss/train': 3.690675973892212} +03/03/2022 15:26:30 - INFO - codeparrot_training - Step 1721: {'lr': 0.00043025, 'samples': 881664, 'steps': 1721, 'loss/train': 4.356791019439697} +03/03/2022 15:26:32 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/03/2022 15:26:35 - INFO - codeparrot_training - Step 1722: {'lr': 0.0004305, 'samples': 882176, 'steps': 1722, 'loss/train': 2.7275469303131104} +03/03/2022 15:26:38 - INFO - codeparrot_training - Step 1723: {'lr': 0.00043075000000000003, 'samples': 882688, 'steps': 1723, 'loss/train': 3.7212345600128174} +03/03/2022 15:26:40 - INFO - codeparrot_training - Skipping example with length 578 (seq_length=1024) +03/03/2022 15:26:44 - INFO - codeparrot_training - Step 1724: {'lr': 0.000431, 'samples': 883200, 'steps': 1724, 'loss/train': 3.3754706382751465} +03/03/2022 15:26:47 - INFO - codeparrot_training - Step 1725: {'lr': 0.00043125000000000005, 'samples': 883712, 'steps': 1725, 'loss/train': 4.620190143585205} +03/03/2022 15:26:48 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/03/2022 15:26:52 - INFO - codeparrot_training - Step 1726: {'lr': 0.0004315, 'samples': 884224, 'steps': 1726, 'loss/train': 4.098268032073975} +03/03/2022 15:26:55 - INFO - codeparrot_training - Step 1727: {'lr': 0.00043175, 'samples': 884736, 'steps': 1727, 'loss/train': 8.18599796295166} +03/03/2022 15:26:58 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/03/2022 15:27:01 - INFO - codeparrot_training - Step 1728: {'lr': 0.000432, 'samples': 885248, 'steps': 1728, 'loss/train': 3.496340274810791} +03/03/2022 15:27:04 - INFO - codeparrot_training - Step 1729: {'lr': 0.00043225, 'samples': 885760, 'steps': 1729, 'loss/train': 3.5704078674316406} +03/03/2022 15:27:06 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/03/2022 15:27:09 - INFO - codeparrot_training - Step 1730: {'lr': 0.0004325, 'samples': 886272, 'steps': 1730, 'loss/train': 3.9318318367004395} +03/03/2022 15:27:13 - INFO - codeparrot_training - Step 1731: {'lr': 0.00043275000000000003, 'samples': 886784, 'steps': 1731, 'loss/train': 3.648881435394287} +03/03/2022 15:27:15 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/03/2022 15:27:18 - INFO - codeparrot_training - Step 1732: {'lr': 0.000433, 'samples': 887296, 'steps': 1732, 'loss/train': 2.6858880519866943} +03/03/2022 15:27:21 - INFO - codeparrot_training - Step 1733: {'lr': 0.00043325000000000004, 'samples': 887808, 'steps': 1733, 'loss/train': 3.021738052368164} +03/03/2022 15:27:23 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/03/2022 15:27:27 - INFO - codeparrot_training - Step 1734: {'lr': 0.0004335, 'samples': 888320, 'steps': 1734, 'loss/train': 3.8737881183624268} +03/03/2022 15:27:30 - INFO - codeparrot_training - Step 1735: {'lr': 0.00043375000000000005, 'samples': 888832, 'steps': 1735, 'loss/train': 4.361507892608643} +03/03/2022 15:27:33 - INFO - codeparrot_training - Step 1736: {'lr': 0.00043400000000000003, 'samples': 889344, 'steps': 1736, 'loss/train': 3.647573947906494} +03/03/2022 15:27:34 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/03/2022 15:27:38 - INFO - codeparrot_training - Step 1737: {'lr': 0.00043425, 'samples': 889856, 'steps': 1737, 'loss/train': 3.487705707550049} +03/03/2022 15:27:41 - INFO - codeparrot_training - Step 1738: {'lr': 0.0004345, 'samples': 890368, 'steps': 1738, 'loss/train': 3.882735252380371} +03/03/2022 15:27:42 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/03/2022 15:27:47 - INFO - codeparrot_training - Step 1739: {'lr': 0.00043475, 'samples': 890880, 'steps': 1739, 'loss/train': 3.165846586227417} +03/03/2022 15:27:50 - INFO - codeparrot_training - Step 1740: {'lr': 0.000435, 'samples': 891392, 'steps': 1740, 'loss/train': 2.254725694656372} +03/03/2022 15:27:50 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/03/2022 15:27:55 - INFO - codeparrot_training - Step 1741: {'lr': 0.00043525000000000004, 'samples': 891904, 'steps': 1741, 'loss/train': 2.0590782165527344} +03/03/2022 15:27:58 - INFO - codeparrot_training - Step 1742: {'lr': 0.0004355, 'samples': 892416, 'steps': 1742, 'loss/train': 4.130198955535889} +03/03/2022 15:27:58 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 15:28:04 - INFO - codeparrot_training - Step 1743: {'lr': 0.00043575000000000005, 'samples': 892928, 'steps': 1743, 'loss/train': 3.8559696674346924} +03/03/2022 15:28:06 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/03/2022 15:28:09 - INFO - codeparrot_training - Step 1744: {'lr': 0.000436, 'samples': 893440, 'steps': 1744, 'loss/train': 3.884993314743042} +03/03/2022 15:28:12 - INFO - codeparrot_training - Step 1745: {'lr': 0.00043625000000000006, 'samples': 893952, 'steps': 1745, 'loss/train': 3.838776111602783} +03/03/2022 15:28:15 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/03/2022 15:28:17 - INFO - codeparrot_training - Step 1746: {'lr': 0.0004365, 'samples': 894464, 'steps': 1746, 'loss/train': 3.2877955436706543} +03/03/2022 15:28:20 - INFO - codeparrot_training - Step 1747: {'lr': 0.00043675, 'samples': 894976, 'steps': 1747, 'loss/train': 4.131059169769287} +03/03/2022 15:28:23 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/03/2022 15:28:25 - INFO - codeparrot_training - Step 1748: {'lr': 0.000437, 'samples': 895488, 'steps': 1748, 'loss/train': 3.5058960914611816} +03/03/2022 15:28:29 - INFO - codeparrot_training - Step 1749: {'lr': 0.00043725000000000003, 'samples': 896000, 'steps': 1749, 'loss/train': 3.5843958854675293} +03/03/2022 15:28:31 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/03/2022 15:28:34 - INFO - codeparrot_training - Step 1750: {'lr': 0.0004375, 'samples': 896512, 'steps': 1750, 'loss/train': 3.9815938472747803} +03/03/2022 15:28:37 - INFO - codeparrot_training - Step 1751: {'lr': 0.00043775, 'samples': 897024, 'steps': 1751, 'loss/train': 3.6060678958892822} +03/03/2022 15:28:39 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/03/2022 15:28:42 - INFO - codeparrot_training - Step 1752: {'lr': 0.000438, 'samples': 897536, 'steps': 1752, 'loss/train': 3.5824193954467773} +03/03/2022 15:28:46 - INFO - codeparrot_training - Step 1753: {'lr': 0.00043825, 'samples': 898048, 'steps': 1753, 'loss/train': 1.004949927330017} +03/03/2022 15:28:48 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/03/2022 15:28:51 - INFO - codeparrot_training - Step 1754: {'lr': 0.00043850000000000003, 'samples': 898560, 'steps': 1754, 'loss/train': 2.629119396209717} +03/03/2022 15:28:54 - INFO - codeparrot_training - Step 1755: {'lr': 0.00043874999999999996, 'samples': 899072, 'steps': 1755, 'loss/train': 4.202991008758545} +03/03/2022 15:28:56 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/03/2022 15:28:59 - INFO - codeparrot_training - Step 1756: {'lr': 0.000439, 'samples': 899584, 'steps': 1756, 'loss/train': 3.62184476852417} +03/03/2022 15:29:03 - INFO - codeparrot_training - Step 1757: {'lr': 0.00043924999999999997, 'samples': 900096, 'steps': 1757, 'loss/train': 3.039698600769043} +03/03/2022 15:29:06 - INFO - codeparrot_training - Step 1758: {'lr': 0.0004395, 'samples': 900608, 'steps': 1758, 'loss/train': 2.670175313949585} +03/03/2022 15:29:06 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/03/2022 15:29:11 - INFO - codeparrot_training - Step 1759: {'lr': 0.00043975, 'samples': 901120, 'steps': 1759, 'loss/train': 3.6741206645965576} +03/03/2022 15:29:14 - INFO - codeparrot_training - Step 1760: {'lr': 0.00044, 'samples': 901632, 'steps': 1760, 'loss/train': 3.6410982608795166} +03/03/2022 15:29:15 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/03/2022 15:29:20 - INFO - codeparrot_training - Step 1761: {'lr': 0.00044025, 'samples': 902144, 'steps': 1761, 'loss/train': 2.68424654006958} +03/03/2022 15:29:23 - INFO - codeparrot_training - Step 1762: {'lr': 0.00044050000000000003, 'samples': 902656, 'steps': 1762, 'loss/train': 4.119108200073242} +03/03/2022 15:29:24 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/03/2022 15:29:28 - INFO - codeparrot_training - Step 1763: {'lr': 0.00044075, 'samples': 903168, 'steps': 1763, 'loss/train': 3.399466037750244} +03/03/2022 15:29:31 - INFO - codeparrot_training - Step 1764: {'lr': 0.000441, 'samples': 903680, 'steps': 1764, 'loss/train': 1.1867115497589111} +03/03/2022 15:29:32 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/03/2022 15:29:37 - INFO - codeparrot_training - Step 1765: {'lr': 0.00044124999999999996, 'samples': 904192, 'steps': 1765, 'loss/train': 3.64756178855896} +03/03/2022 15:29:40 - INFO - codeparrot_training - Step 1766: {'lr': 0.0004415, 'samples': 904704, 'steps': 1766, 'loss/train': 3.3465452194213867} +03/03/2022 15:29:41 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/03/2022 15:29:45 - INFO - codeparrot_training - Step 1767: {'lr': 0.00044175, 'samples': 905216, 'steps': 1767, 'loss/train': 3.5711829662323} +03/03/2022 15:29:48 - INFO - codeparrot_training - Step 1768: {'lr': 0.000442, 'samples': 905728, 'steps': 1768, 'loss/train': 4.265261650085449} +03/03/2022 15:29:49 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/03/2022 15:29:53 - INFO - codeparrot_training - Step 1769: {'lr': 0.00044225, 'samples': 906240, 'steps': 1769, 'loss/train': 5.817237854003906} +03/03/2022 15:29:56 - INFO - codeparrot_training - Step 1770: {'lr': 0.0004425, 'samples': 906752, 'steps': 1770, 'loss/train': 4.571526527404785} +03/03/2022 15:29:57 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/03/2022 15:30:02 - INFO - codeparrot_training - Step 1771: {'lr': 0.00044275, 'samples': 907264, 'steps': 1771, 'loss/train': 3.680689573287964} +03/03/2022 15:30:05 - INFO - codeparrot_training - Step 1772: {'lr': 0.00044300000000000003, 'samples': 907776, 'steps': 1772, 'loss/train': 4.107189655303955} +03/03/2022 15:30:06 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/03/2022 15:30:10 - INFO - codeparrot_training - Step 1773: {'lr': 0.00044325, 'samples': 908288, 'steps': 1773, 'loss/train': 3.7640373706817627} +03/03/2022 15:30:13 - INFO - codeparrot_training - Step 1774: {'lr': 0.0004435, 'samples': 908800, 'steps': 1774, 'loss/train': 3.4179940223693848} +03/03/2022 15:30:14 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 15:30:18 - INFO - codeparrot_training - Step 1775: {'lr': 0.00044374999999999997, 'samples': 909312, 'steps': 1775, 'loss/train': 1.0886516571044922} +03/03/2022 15:30:22 - INFO - codeparrot_training - Step 1776: {'lr': 0.000444, 'samples': 909824, 'steps': 1776, 'loss/train': 4.729548931121826} +03/03/2022 15:30:22 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/03/2022 15:30:27 - INFO - codeparrot_training - Step 1777: {'lr': 0.00044425, 'samples': 910336, 'steps': 1777, 'loss/train': 2.85425066947937} +03/03/2022 15:30:30 - INFO - codeparrot_training - Step 1778: {'lr': 0.0004445, 'samples': 910848, 'steps': 1778, 'loss/train': 2.7307944297790527} +03/03/2022 15:30:31 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 15:30:35 - INFO - codeparrot_training - Step 1779: {'lr': 0.00044475, 'samples': 911360, 'steps': 1779, 'loss/train': 3.718137264251709} +03/03/2022 15:30:39 - INFO - codeparrot_training - Step 1780: {'lr': 0.00044500000000000003, 'samples': 911872, 'steps': 1780, 'loss/train': 3.6661293506622314} +03/03/2022 15:30:40 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/03/2022 15:30:44 - INFO - codeparrot_training - Step 1781: {'lr': 0.00044525, 'samples': 912384, 'steps': 1781, 'loss/train': 3.7645955085754395} +03/03/2022 15:30:47 - INFO - codeparrot_training - Step 1782: {'lr': 0.00044550000000000004, 'samples': 912896, 'steps': 1782, 'loss/train': 3.7916455268859863} +03/03/2022 15:30:48 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/03/2022 15:30:52 - INFO - codeparrot_training - Step 1783: {'lr': 0.00044574999999999997, 'samples': 913408, 'steps': 1783, 'loss/train': 3.211923837661743} +03/03/2022 15:30:55 - INFO - codeparrot_training - Step 1784: {'lr': 0.000446, 'samples': 913920, 'steps': 1784, 'loss/train': 3.6987788677215576} +03/03/2022 15:30:56 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/03/2022 15:31:01 - INFO - codeparrot_training - Step 1785: {'lr': 0.00044625, 'samples': 914432, 'steps': 1785, 'loss/train': 2.0741822719573975} +03/03/2022 15:31:04 - INFO - codeparrot_training - Step 1786: {'lr': 0.0004465, 'samples': 914944, 'steps': 1786, 'loss/train': 2.637977123260498} +03/03/2022 15:31:05 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/03/2022 15:31:09 - INFO - codeparrot_training - Step 1787: {'lr': 0.00044675, 'samples': 915456, 'steps': 1787, 'loss/train': 3.3176441192626953} +03/03/2022 15:31:12 - INFO - codeparrot_training - Step 1788: {'lr': 0.000447, 'samples': 915968, 'steps': 1788, 'loss/train': 8.905036926269531} +03/03/2022 15:31:14 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 15:31:18 - INFO - codeparrot_training - Step 1789: {'lr': 0.00044725, 'samples': 916480, 'steps': 1789, 'loss/train': 4.793331146240234} +03/03/2022 15:31:21 - INFO - codeparrot_training - Step 1790: {'lr': 0.00044750000000000004, 'samples': 916992, 'steps': 1790, 'loss/train': 3.2318222522735596} +03/03/2022 15:31:23 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/03/2022 15:31:26 - INFO - codeparrot_training - Step 1791: {'lr': 0.00044775, 'samples': 917504, 'steps': 1791, 'loss/train': 1.2780442237854004} +03/03/2022 15:31:29 - INFO - codeparrot_training - Step 1792: {'lr': 0.000448, 'samples': 918016, 'steps': 1792, 'loss/train': 4.4922099113464355} +03/03/2022 15:31:31 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/03/2022 15:31:35 - INFO - codeparrot_training - Step 1793: {'lr': 0.00044824999999999997, 'samples': 918528, 'steps': 1793, 'loss/train': 5.961709022521973} +03/03/2022 15:31:38 - INFO - codeparrot_training - Step 1794: {'lr': 0.0004485, 'samples': 919040, 'steps': 1794, 'loss/train': 4.271733283996582} +03/03/2022 15:31:41 - INFO - codeparrot_training - Step 1795: {'lr': 0.00044875, 'samples': 919552, 'steps': 1795, 'loss/train': 4.202448844909668} +03/03/2022 15:31:41 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 15:31:46 - INFO - codeparrot_training - Step 1796: {'lr': 0.000449, 'samples': 920064, 'steps': 1796, 'loss/train': 3.2963552474975586} +03/03/2022 15:31:49 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/03/2022 15:31:52 - INFO - codeparrot_training - Step 1797: {'lr': 0.00044925, 'samples': 920576, 'steps': 1797, 'loss/train': 3.6156511306762695} +03/03/2022 15:31:55 - INFO - codeparrot_training - Step 1798: {'lr': 0.00044950000000000003, 'samples': 921088, 'steps': 1798, 'loss/train': 3.8857486248016357} +03/03/2022 15:31:58 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/03/2022 15:32:00 - INFO - codeparrot_training - Step 1799: {'lr': 0.00044975, 'samples': 921600, 'steps': 1799, 'loss/train': 3.9517669677734375} +03/03/2022 15:32:03 - INFO - codeparrot_training - Step 1800: {'lr': 0.00045000000000000004, 'samples': 922112, 'steps': 1800, 'loss/train': 3.953028678894043} +03/03/2022 15:32:06 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/03/2022 15:32:08 - INFO - codeparrot_training - Step 1801: {'lr': 0.00045024999999999997, 'samples': 922624, 'steps': 1801, 'loss/train': 5.343773365020752} +03/03/2022 15:32:12 - INFO - codeparrot_training - Step 1802: {'lr': 0.0004505, 'samples': 923136, 'steps': 1802, 'loss/train': 3.9360623359680176} +03/03/2022 15:32:14 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/03/2022 15:32:17 - INFO - codeparrot_training - Step 1803: {'lr': 0.00045075, 'samples': 923648, 'steps': 1803, 'loss/train': 3.6178886890411377} +03/03/2022 15:32:20 - INFO - codeparrot_training - Step 1804: {'lr': 0.000451, 'samples': 924160, 'steps': 1804, 'loss/train': 3.321953535079956} +03/03/2022 15:32:23 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/03/2022 15:32:25 - INFO - codeparrot_training - Step 1805: {'lr': 0.00045125, 'samples': 924672, 'steps': 1805, 'loss/train': 3.360380172729492} +03/03/2022 15:32:28 - INFO - codeparrot_training - Step 1806: {'lr': 0.0004515, 'samples': 925184, 'steps': 1806, 'loss/train': 4.78564977645874} +03/03/2022 15:32:31 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 15:32:34 - INFO - codeparrot_training - Step 1807: {'lr': 0.00045175, 'samples': 925696, 'steps': 1807, 'loss/train': 3.573976993560791} +03/03/2022 15:32:37 - INFO - codeparrot_training - Step 1808: {'lr': 0.00045200000000000004, 'samples': 926208, 'steps': 1808, 'loss/train': 4.651330947875977} +03/03/2022 15:32:39 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/03/2022 15:32:42 - INFO - codeparrot_training - Step 1809: {'lr': 0.00045225, 'samples': 926720, 'steps': 1809, 'loss/train': 4.338063716888428} +03/03/2022 15:32:46 - INFO - codeparrot_training - Step 1810: {'lr': 0.00045250000000000005, 'samples': 927232, 'steps': 1810, 'loss/train': 3.876124382019043} +03/03/2022 15:32:48 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/03/2022 15:32:51 - INFO - codeparrot_training - Step 1811: {'lr': 0.00045275, 'samples': 927744, 'steps': 1811, 'loss/train': 3.58648419380188} +03/03/2022 15:32:54 - INFO - codeparrot_training - Step 1812: {'lr': 0.000453, 'samples': 928256, 'steps': 1812, 'loss/train': 3.863163709640503} +03/03/2022 15:32:57 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/03/2022 15:32:59 - INFO - codeparrot_training - Step 1813: {'lr': 0.00045325, 'samples': 928768, 'steps': 1813, 'loss/train': 3.511075496673584} +03/03/2022 15:33:02 - INFO - codeparrot_training - Step 1814: {'lr': 0.0004535, 'samples': 929280, 'steps': 1814, 'loss/train': 3.8160760402679443} +03/03/2022 15:33:05 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/03/2022 15:33:08 - INFO - codeparrot_training - Step 1815: {'lr': 0.00045375, 'samples': 929792, 'steps': 1815, 'loss/train': 3.687553644180298} +03/03/2022 15:33:11 - INFO - codeparrot_training - Step 1816: {'lr': 0.00045400000000000003, 'samples': 930304, 'steps': 1816, 'loss/train': 3.6537318229675293} +03/03/2022 15:33:13 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/03/2022 15:33:16 - INFO - codeparrot_training - Step 1817: {'lr': 0.00045425, 'samples': 930816, 'steps': 1817, 'loss/train': 2.836833953857422} +03/03/2022 15:33:19 - INFO - codeparrot_training - Step 1818: {'lr': 0.00045450000000000004, 'samples': 931328, 'steps': 1818, 'loss/train': 3.969243049621582} +03/03/2022 15:33:21 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 15:33:25 - INFO - codeparrot_training - Step 1819: {'lr': 0.00045475, 'samples': 931840, 'steps': 1819, 'loss/train': 4.125191688537598} +03/03/2022 15:33:28 - INFO - codeparrot_training - Step 1820: {'lr': 0.000455, 'samples': 932352, 'steps': 1820, 'loss/train': 3.797196865081787} +03/03/2022 15:33:30 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/03/2022 15:33:33 - INFO - codeparrot_training - Step 1821: {'lr': 0.00045525, 'samples': 932864, 'steps': 1821, 'loss/train': 3.736868143081665} +03/03/2022 15:33:36 - INFO - codeparrot_training - Step 1822: {'lr': 0.0004555, 'samples': 933376, 'steps': 1822, 'loss/train': 3.7333502769470215} +03/03/2022 15:33:38 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/03/2022 15:33:41 - INFO - codeparrot_training - Step 1823: {'lr': 0.00045575, 'samples': 933888, 'steps': 1823, 'loss/train': 3.956984519958496} +03/03/2022 15:33:45 - INFO - codeparrot_training - Step 1824: {'lr': 0.000456, 'samples': 934400, 'steps': 1824, 'loss/train': 4.5288310050964355} +03/03/2022 15:33:47 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/03/2022 15:33:50 - INFO - codeparrot_training - Step 1825: {'lr': 0.00045625, 'samples': 934912, 'steps': 1825, 'loss/train': 4.2684783935546875} +03/03/2022 15:33:53 - INFO - codeparrot_training - Step 1826: {'lr': 0.00045650000000000004, 'samples': 935424, 'steps': 1826, 'loss/train': 3.128011465072632} +03/03/2022 15:33:56 - INFO - codeparrot_training - Step 1827: {'lr': 0.00045675, 'samples': 935936, 'steps': 1827, 'loss/train': 3.609135627746582} +03/03/2022 15:33:56 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/03/2022 15:34:02 - INFO - codeparrot_training - Step 1828: {'lr': 0.00045700000000000005, 'samples': 936448, 'steps': 1828, 'loss/train': 3.369703769683838} +03/03/2022 15:34:05 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/03/2022 15:34:07 - INFO - codeparrot_training - Step 1829: {'lr': 0.00045725, 'samples': 936960, 'steps': 1829, 'loss/train': 3.9321675300598145} +03/03/2022 15:34:10 - INFO - codeparrot_training - Step 1830: {'lr': 0.0004575, 'samples': 937472, 'steps': 1830, 'loss/train': 3.8493857383728027} +03/03/2022 15:34:12 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/03/2022 15:34:15 - INFO - codeparrot_training - Step 1831: {'lr': 0.00045775, 'samples': 937984, 'steps': 1831, 'loss/train': 4.316078186035156} +03/03/2022 15:34:18 - INFO - codeparrot_training - Step 1832: {'lr': 0.000458, 'samples': 938496, 'steps': 1832, 'loss/train': 3.823058605194092} +03/03/2022 15:34:21 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/03/2022 15:34:24 - INFO - codeparrot_training - Step 1833: {'lr': 0.00045825, 'samples': 939008, 'steps': 1833, 'loss/train': 3.4598371982574463} +03/03/2022 15:34:27 - INFO - codeparrot_training - Step 1834: {'lr': 0.00045850000000000003, 'samples': 939520, 'steps': 1834, 'loss/train': 3.3389430046081543} +03/03/2022 15:34:29 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 15:34:32 - INFO - codeparrot_training - Step 1835: {'lr': 0.00045875, 'samples': 940032, 'steps': 1835, 'loss/train': 3.106158494949341} +03/03/2022 15:34:35 - INFO - codeparrot_training - Step 1836: {'lr': 0.00045900000000000004, 'samples': 940544, 'steps': 1836, 'loss/train': 2.701791524887085} +03/03/2022 15:34:37 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/03/2022 15:34:40 - INFO - codeparrot_training - Step 1837: {'lr': 0.00045925, 'samples': 941056, 'steps': 1837, 'loss/train': 3.407944679260254} +03/03/2022 15:34:44 - INFO - codeparrot_training - Step 1838: {'lr': 0.00045950000000000006, 'samples': 941568, 'steps': 1838, 'loss/train': 4.242276191711426} +03/03/2022 15:34:45 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/03/2022 15:34:49 - INFO - codeparrot_training - Step 1839: {'lr': 0.00045975, 'samples': 942080, 'steps': 1839, 'loss/train': 3.5122146606445312} +03/03/2022 15:34:52 - INFO - codeparrot_training - Step 1840: {'lr': 0.00046, 'samples': 942592, 'steps': 1840, 'loss/train': 0.9950838088989258} +03/03/2022 15:34:54 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/03/2022 15:34:57 - INFO - codeparrot_training - Step 1841: {'lr': 0.00046025, 'samples': 943104, 'steps': 1841, 'loss/train': 4.217216491699219} +03/03/2022 15:35:00 - INFO - codeparrot_training - Step 1842: {'lr': 0.0004605, 'samples': 943616, 'steps': 1842, 'loss/train': 2.7910196781158447} +03/03/2022 15:35:02 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/03/2022 15:35:06 - INFO - codeparrot_training - Step 1843: {'lr': 0.00046075, 'samples': 944128, 'steps': 1843, 'loss/train': 3.5309927463531494} +03/03/2022 15:35:09 - INFO - codeparrot_training - Step 1844: {'lr': 0.00046100000000000004, 'samples': 944640, 'steps': 1844, 'loss/train': 3.6949799060821533} +03/03/2022 15:35:10 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/03/2022 15:35:14 - INFO - codeparrot_training - Step 1845: {'lr': 0.00046125, 'samples': 945152, 'steps': 1845, 'loss/train': 3.1792454719543457} +03/03/2022 15:35:17 - INFO - codeparrot_training - Step 1846: {'lr': 0.00046150000000000005, 'samples': 945664, 'steps': 1846, 'loss/train': 4.417724609375} +03/03/2022 15:35:19 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 15:35:23 - INFO - codeparrot_training - Step 1847: {'lr': 0.00046175000000000003, 'samples': 946176, 'steps': 1847, 'loss/train': 3.801145315170288} +03/03/2022 15:35:26 - INFO - codeparrot_training - Step 1848: {'lr': 0.000462, 'samples': 946688, 'steps': 1848, 'loss/train': 3.8281137943267822} +03/03/2022 15:35:27 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/03/2022 15:35:31 - INFO - codeparrot_training - Step 1849: {'lr': 0.00046225, 'samples': 947200, 'steps': 1849, 'loss/train': 4.239955425262451} +03/03/2022 15:35:34 - INFO - codeparrot_training - Step 1850: {'lr': 0.0004625, 'samples': 947712, 'steps': 1850, 'loss/train': 2.9147355556488037} +03/03/2022 15:35:35 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/03/2022 15:35:39 - INFO - codeparrot_training - Step 1851: {'lr': 0.00046275, 'samples': 948224, 'steps': 1851, 'loss/train': 4.109053611755371} +03/03/2022 15:35:43 - INFO - codeparrot_training - Step 1852: {'lr': 0.00046300000000000003, 'samples': 948736, 'steps': 1852, 'loss/train': 3.623049736022949} +03/03/2022 15:35:44 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/03/2022 15:35:48 - INFO - codeparrot_training - Step 1853: {'lr': 0.00046325, 'samples': 949248, 'steps': 1853, 'loss/train': 4.153678894042969} +03/03/2022 15:35:51 - INFO - codeparrot_training - Step 1854: {'lr': 0.00046350000000000004, 'samples': 949760, 'steps': 1854, 'loss/train': 2.6546082496643066} +03/03/2022 15:35:52 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/03/2022 15:35:56 - INFO - codeparrot_training - Step 1855: {'lr': 0.00046375, 'samples': 950272, 'steps': 1855, 'loss/train': 3.3636629581451416} +03/03/2022 15:35:59 - INFO - codeparrot_training - Step 1856: {'lr': 0.00046400000000000006, 'samples': 950784, 'steps': 1856, 'loss/train': 3.988624095916748} +03/03/2022 15:36:00 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/03/2022 15:36:05 - INFO - codeparrot_training - Step 1857: {'lr': 0.00046425, 'samples': 951296, 'steps': 1857, 'loss/train': 2.0212666988372803} +03/03/2022 15:36:08 - INFO - codeparrot_training - Step 1858: {'lr': 0.0004645, 'samples': 951808, 'steps': 1858, 'loss/train': 2.6467761993408203} +03/03/2022 15:36:09 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/03/2022 15:36:13 - INFO - codeparrot_training - Step 1859: {'lr': 0.00046475, 'samples': 952320, 'steps': 1859, 'loss/train': 2.9667983055114746} +03/03/2022 15:36:17 - INFO - codeparrot_training - Step 1860: {'lr': 0.000465, 'samples': 952832, 'steps': 1860, 'loss/train': 2.7909224033355713} +03/03/2022 15:36:17 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/03/2022 15:36:22 - INFO - codeparrot_training - Step 1861: {'lr': 0.00046525, 'samples': 953344, 'steps': 1861, 'loss/train': 1.7720826864242554} +03/03/2022 15:36:25 - INFO - codeparrot_training - Step 1862: {'lr': 0.00046550000000000004, 'samples': 953856, 'steps': 1862, 'loss/train': 3.453448534011841} +03/03/2022 15:36:26 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 15:36:30 - INFO - codeparrot_training - Step 1863: {'lr': 0.00046575, 'samples': 954368, 'steps': 1863, 'loss/train': 2.8469836711883545} +03/03/2022 15:36:33 - INFO - codeparrot_training - Step 1864: {'lr': 0.00046600000000000005, 'samples': 954880, 'steps': 1864, 'loss/train': 4.5493597984313965} +03/03/2022 15:36:34 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/03/2022 15:36:39 - INFO - codeparrot_training - Step 1865: {'lr': 0.00046625000000000003, 'samples': 955392, 'steps': 1865, 'loss/train': 3.703514337539673} +03/03/2022 15:36:42 - INFO - codeparrot_training - Step 1866: {'lr': 0.0004665, 'samples': 955904, 'steps': 1866, 'loss/train': 4.083889484405518} +03/03/2022 15:36:43 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 15:36:47 - INFO - codeparrot_training - Step 1867: {'lr': 0.00046675, 'samples': 956416, 'steps': 1867, 'loss/train': 2.603015422821045} +03/03/2022 15:36:50 - INFO - codeparrot_training - Step 1868: {'lr': 0.000467, 'samples': 956928, 'steps': 1868, 'loss/train': 3.677750587463379} +03/03/2022 15:36:51 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/03/2022 15:36:56 - INFO - codeparrot_training - Step 1869: {'lr': 0.00046725, 'samples': 957440, 'steps': 1869, 'loss/train': 3.587224006652832} +03/03/2022 15:36:59 - INFO - codeparrot_training - Step 1870: {'lr': 0.00046750000000000003, 'samples': 957952, 'steps': 1870, 'loss/train': 3.7286808490753174} +03/03/2022 15:36:59 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/03/2022 15:37:04 - INFO - codeparrot_training - Step 1871: {'lr': 0.00046775, 'samples': 958464, 'steps': 1871, 'loss/train': 3.8604061603546143} +03/03/2022 15:37:07 - INFO - codeparrot_training - Step 1872: {'lr': 0.00046800000000000005, 'samples': 958976, 'steps': 1872, 'loss/train': 3.772135019302368} +03/03/2022 15:37:08 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 15:37:12 - INFO - codeparrot_training - Step 1873: {'lr': 0.00046825, 'samples': 959488, 'steps': 1873, 'loss/train': 3.236870050430298} +03/03/2022 15:37:15 - INFO - codeparrot_training - Step 1874: {'lr': 0.00046850000000000006, 'samples': 960000, 'steps': 1874, 'loss/train': 3.809223175048828} +03/03/2022 15:37:17 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/03/2022 15:37:21 - INFO - codeparrot_training - Step 1875: {'lr': 0.00046875, 'samples': 960512, 'steps': 1875, 'loss/train': 2.7865772247314453} +03/03/2022 15:37:24 - INFO - codeparrot_training - Step 1876: {'lr': 0.00046899999999999996, 'samples': 961024, 'steps': 1876, 'loss/train': 2.8295211791992188} +03/03/2022 15:37:26 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/03/2022 15:37:29 - INFO - codeparrot_training - Step 1877: {'lr': 0.00046925, 'samples': 961536, 'steps': 1877, 'loss/train': 3.8288679122924805} +03/03/2022 15:37:32 - INFO - codeparrot_training - Step 1878: {'lr': 0.0004695, 'samples': 962048, 'steps': 1878, 'loss/train': 3.853792190551758} +03/03/2022 15:37:34 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/03/2022 15:37:38 - INFO - codeparrot_training - Step 1879: {'lr': 0.00046975, 'samples': 962560, 'steps': 1879, 'loss/train': 3.69453501701355} +03/03/2022 15:37:41 - INFO - codeparrot_training - Step 1880: {'lr': 0.00047, 'samples': 963072, 'steps': 1880, 'loss/train': 3.0930612087249756} +03/03/2022 15:37:43 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/03/2022 15:37:46 - INFO - codeparrot_training - Step 1881: {'lr': 0.00047025, 'samples': 963584, 'steps': 1881, 'loss/train': 2.7312281131744385} +03/03/2022 15:37:49 - INFO - codeparrot_training - Step 1882: {'lr': 0.0004705, 'samples': 964096, 'steps': 1882, 'loss/train': 2.825073480606079} +03/03/2022 15:37:51 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/03/2022 15:37:54 - INFO - codeparrot_training - Step 1883: {'lr': 0.00047075000000000003, 'samples': 964608, 'steps': 1883, 'loss/train': 4.372594356536865} +03/03/2022 15:37:58 - INFO - codeparrot_training - Step 1884: {'lr': 0.000471, 'samples': 965120, 'steps': 1884, 'loss/train': 2.83687424659729} +03/03/2022 15:37:59 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/03/2022 15:38:03 - INFO - codeparrot_training - Step 1885: {'lr': 0.00047125, 'samples': 965632, 'steps': 1885, 'loss/train': 3.1486153602600098} +03/03/2022 15:38:06 - INFO - codeparrot_training - Step 1886: {'lr': 0.00047149999999999997, 'samples': 966144, 'steps': 1886, 'loss/train': 4.076030731201172} +03/03/2022 15:38:07 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 15:38:11 - INFO - codeparrot_training - Step 1887: {'lr': 0.00047175, 'samples': 966656, 'steps': 1887, 'loss/train': 3.1757619380950928} +03/03/2022 15:38:14 - INFO - codeparrot_training - Step 1888: {'lr': 0.000472, 'samples': 967168, 'steps': 1888, 'loss/train': 2.746028184890747} +03/03/2022 15:38:16 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/03/2022 15:38:20 - INFO - codeparrot_training - Step 1889: {'lr': 0.00047225, 'samples': 967680, 'steps': 1889, 'loss/train': 3.2801096439361572} +03/03/2022 15:38:23 - INFO - codeparrot_training - Step 1890: {'lr': 0.0004725, 'samples': 968192, 'steps': 1890, 'loss/train': 3.9361536502838135} +03/03/2022 15:38:24 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/03/2022 15:38:28 - INFO - codeparrot_training - Step 1891: {'lr': 0.00047275, 'samples': 968704, 'steps': 1891, 'loss/train': 3.272562265396118} +03/03/2022 15:38:31 - INFO - codeparrot_training - Step 1892: {'lr': 0.000473, 'samples': 969216, 'steps': 1892, 'loss/train': 3.3093371391296387} +03/03/2022 15:38:32 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/03/2022 15:38:36 - INFO - codeparrot_training - Step 1893: {'lr': 0.00047325000000000004, 'samples': 969728, 'steps': 1893, 'loss/train': 3.352339744567871} +03/03/2022 15:38:40 - INFO - codeparrot_training - Step 1894: {'lr': 0.00047349999999999996, 'samples': 970240, 'steps': 1894, 'loss/train': 2.718169689178467} +03/03/2022 15:38:40 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/03/2022 15:38:45 - INFO - codeparrot_training - Step 1895: {'lr': 0.00047375, 'samples': 970752, 'steps': 1895, 'loss/train': 4.432314395904541} +03/03/2022 15:38:48 - INFO - codeparrot_training - Step 1896: {'lr': 0.000474, 'samples': 971264, 'steps': 1896, 'loss/train': 3.4355528354644775} +03/03/2022 15:38:49 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/03/2022 15:38:53 - INFO - codeparrot_training - Step 1897: {'lr': 0.00047425, 'samples': 971776, 'steps': 1897, 'loss/train': 3.9646997451782227} +03/03/2022 15:38:57 - INFO - codeparrot_training - Step 1898: {'lr': 0.0004745, 'samples': 972288, 'steps': 1898, 'loss/train': 3.4262611865997314} +03/03/2022 15:38:57 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/03/2022 15:39:02 - INFO - codeparrot_training - Step 1899: {'lr': 0.00047475, 'samples': 972800, 'steps': 1899, 'loss/train': 3.51899790763855} +03/03/2022 15:39:05 - INFO - codeparrot_training - Step 1900: {'lr': 0.000475, 'samples': 973312, 'steps': 1900, 'loss/train': 2.787599563598633} +03/03/2022 15:39:06 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/03/2022 15:39:10 - INFO - codeparrot_training - Step 1901: {'lr': 0.00047525000000000003, 'samples': 973824, 'steps': 1901, 'loss/train': 4.00376033782959} +03/03/2022 15:39:14 - INFO - codeparrot_training - Step 1902: {'lr': 0.0004755, 'samples': 974336, 'steps': 1902, 'loss/train': 3.8105556964874268} +03/03/2022 15:39:14 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/03/2022 15:39:19 - INFO - codeparrot_training - Step 1903: {'lr': 0.00047575, 'samples': 974848, 'steps': 1903, 'loss/train': 3.312718152999878} +03/03/2022 15:39:22 - INFO - codeparrot_training - Step 1904: {'lr': 0.00047599999999999997, 'samples': 975360, 'steps': 1904, 'loss/train': 3.1530582904815674} +03/03/2022 15:39:23 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 15:39:27 - INFO - codeparrot_training - Step 1905: {'lr': 0.00047625, 'samples': 975872, 'steps': 1905, 'loss/train': 2.0286476612091064} +03/03/2022 15:39:31 - INFO - codeparrot_training - Step 1906: {'lr': 0.0004765, 'samples': 976384, 'steps': 1906, 'loss/train': 3.4532697200775146} +03/03/2022 15:39:32 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/03/2022 15:39:36 - INFO - codeparrot_training - Step 1907: {'lr': 0.00047675, 'samples': 976896, 'steps': 1907, 'loss/train': 3.871291160583496} +03/03/2022 15:39:39 - INFO - codeparrot_training - Step 1908: {'lr': 0.000477, 'samples': 977408, 'steps': 1908, 'loss/train': 3.295064926147461} +03/03/2022 15:39:40 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/03/2022 15:39:44 - INFO - codeparrot_training - Step 1909: {'lr': 0.00047725, 'samples': 977920, 'steps': 1909, 'loss/train': 3.638679265975952} +03/03/2022 15:39:47 - INFO - codeparrot_training - Step 1910: {'lr': 0.0004775, 'samples': 978432, 'steps': 1910, 'loss/train': 2.9031922817230225} +03/03/2022 15:39:48 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/03/2022 15:39:53 - INFO - codeparrot_training - Step 1911: {'lr': 0.00047775000000000004, 'samples': 978944, 'steps': 1911, 'loss/train': 3.960149049758911} +03/03/2022 15:39:56 - INFO - codeparrot_training - Step 1912: {'lr': 0.00047799999999999996, 'samples': 979456, 'steps': 1912, 'loss/train': 3.321589946746826} +03/03/2022 15:39:57 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/03/2022 15:40:01 - INFO - codeparrot_training - Step 1913: {'lr': 0.00047825, 'samples': 979968, 'steps': 1913, 'loss/train': 3.246011734008789} +03/03/2022 15:40:04 - INFO - codeparrot_training - Step 1914: {'lr': 0.0004785, 'samples': 980480, 'steps': 1914, 'loss/train': 2.6056430339813232} +03/03/2022 15:40:05 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/03/2022 15:40:10 - INFO - codeparrot_training - Step 1915: {'lr': 0.00047875, 'samples': 980992, 'steps': 1915, 'loss/train': 3.8123860359191895} +03/03/2022 15:40:13 - INFO - codeparrot_training - Step 1916: {'lr': 0.000479, 'samples': 981504, 'steps': 1916, 'loss/train': 0.8929714560508728} +03/03/2022 15:40:14 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/03/2022 15:40:18 - INFO - codeparrot_training - Step 1917: {'lr': 0.00047925, 'samples': 982016, 'steps': 1917, 'loss/train': 3.048044204711914} +03/03/2022 15:40:21 - INFO - codeparrot_training - Step 1918: {'lr': 0.0004795, 'samples': 982528, 'steps': 1918, 'loss/train': 3.6655707359313965} +03/03/2022 15:40:26 - INFO - codeparrot_training - Step 1919: {'lr': 0.00047975000000000003, 'samples': 983040, 'steps': 1919, 'loss/train': 3.565509557723999} +03/03/2022 15:40:30 - INFO - codeparrot_training - Step 1920: {'lr': 0.00048, 'samples': 983552, 'steps': 1920, 'loss/train': 2.7780985832214355} +03/03/2022 15:40:30 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/03/2022 15:40:35 - INFO - codeparrot_training - Step 1921: {'lr': 0.00048025000000000005, 'samples': 984064, 'steps': 1921, 'loss/train': 3.146402597427368} +03/03/2022 15:40:38 - INFO - codeparrot_training - Step 1922: {'lr': 0.00048049999999999997, 'samples': 984576, 'steps': 1922, 'loss/train': 4.333209991455078} +03/03/2022 15:40:38 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/03/2022 15:40:43 - INFO - codeparrot_training - Step 1923: {'lr': 0.00048075, 'samples': 985088, 'steps': 1923, 'loss/train': 3.043414354324341} +03/03/2022 15:40:46 - INFO - codeparrot_training - Step 1924: {'lr': 0.000481, 'samples': 985600, 'steps': 1924, 'loss/train': 3.5705604553222656} +03/03/2022 15:40:47 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/03/2022 15:40:52 - INFO - codeparrot_training - Step 1925: {'lr': 0.00048125, 'samples': 986112, 'steps': 1925, 'loss/train': 3.69821834564209} +03/03/2022 15:40:55 - INFO - codeparrot_training - Step 1926: {'lr': 0.0004815, 'samples': 986624, 'steps': 1926, 'loss/train': 3.1315064430236816} +03/03/2022 15:40:55 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/03/2022 15:41:00 - INFO - codeparrot_training - Step 1927: {'lr': 0.00048175000000000003, 'samples': 987136, 'steps': 1927, 'loss/train': 3.3206987380981445} +03/03/2022 15:41:03 - INFO - codeparrot_training - Step 1928: {'lr': 0.000482, 'samples': 987648, 'steps': 1928, 'loss/train': 2.5814661979675293} +03/03/2022 15:41:03 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/03/2022 15:41:08 - INFO - codeparrot_training - Step 1929: {'lr': 0.00048225000000000004, 'samples': 988160, 'steps': 1929, 'loss/train': 2.912954807281494} +03/03/2022 15:41:12 - INFO - codeparrot_training - Step 1930: {'lr': 0.0004825, 'samples': 988672, 'steps': 1930, 'loss/train': 4.695540904998779} +03/03/2022 15:41:12 - INFO - codeparrot_training - Skipping example with length 694 (seq_length=1024) +03/03/2022 15:41:17 - INFO - codeparrot_training - Step 1931: {'lr': 0.00048275, 'samples': 989184, 'steps': 1931, 'loss/train': 4.0926408767700195} +03/03/2022 15:41:20 - INFO - codeparrot_training - Step 1932: {'lr': 0.000483, 'samples': 989696, 'steps': 1932, 'loss/train': 3.6458230018615723} +03/03/2022 15:41:20 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/03/2022 15:41:25 - INFO - codeparrot_training - Step 1933: {'lr': 0.00048325, 'samples': 990208, 'steps': 1933, 'loss/train': 2.4986352920532227} +03/03/2022 15:41:28 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/03/2022 15:41:30 - INFO - codeparrot_training - Step 1934: {'lr': 0.0004835, 'samples': 990720, 'steps': 1934, 'loss/train': 2.9695796966552734} +03/03/2022 15:41:34 - INFO - codeparrot_training - Step 1935: {'lr': 0.00048375, 'samples': 991232, 'steps': 1935, 'loss/train': 3.8234856128692627} +03/03/2022 15:41:36 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/03/2022 15:41:39 - INFO - codeparrot_training - Step 1936: {'lr': 0.000484, 'samples': 991744, 'steps': 1936, 'loss/train': 3.6123037338256836} +03/03/2022 15:41:42 - INFO - codeparrot_training - Step 1937: {'lr': 0.00048425000000000003, 'samples': 992256, 'steps': 1937, 'loss/train': 3.1480844020843506} +03/03/2022 15:41:45 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/03/2022 15:41:47 - INFO - codeparrot_training - Step 1938: {'lr': 0.0004845, 'samples': 992768, 'steps': 1938, 'loss/train': 2.5109102725982666} +03/03/2022 15:41:50 - INFO - codeparrot_training - Step 1939: {'lr': 0.00048475000000000005, 'samples': 993280, 'steps': 1939, 'loss/train': 4.0779266357421875} +03/03/2022 15:41:53 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/03/2022 15:41:56 - INFO - codeparrot_training - Step 1940: {'lr': 0.00048499999999999997, 'samples': 993792, 'steps': 1940, 'loss/train': 3.4988856315612793} +03/03/2022 15:41:59 - INFO - codeparrot_training - Step 1941: {'lr': 0.00048525, 'samples': 994304, 'steps': 1941, 'loss/train': 2.108307123184204} +03/03/2022 15:42:01 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 15:42:04 - INFO - codeparrot_training - Step 1942: {'lr': 0.0004855, 'samples': 994816, 'steps': 1942, 'loss/train': 3.5938289165496826} +03/03/2022 15:42:07 - INFO - codeparrot_training - Step 1943: {'lr': 0.00048575, 'samples': 995328, 'steps': 1943, 'loss/train': 2.5148792266845703} +03/03/2022 15:42:10 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/03/2022 15:42:13 - INFO - codeparrot_training - Step 1944: {'lr': 0.000486, 'samples': 995840, 'steps': 1944, 'loss/train': 2.513195037841797} +03/03/2022 15:42:16 - INFO - codeparrot_training - Step 1945: {'lr': 0.00048625000000000003, 'samples': 996352, 'steps': 1945, 'loss/train': 3.3820157051086426} +03/03/2022 15:42:18 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/03/2022 15:42:21 - INFO - codeparrot_training - Step 1946: {'lr': 0.0004865, 'samples': 996864, 'steps': 1946, 'loss/train': 3.358319044113159} +03/03/2022 15:42:24 - INFO - codeparrot_training - Step 1947: {'lr': 0.00048675000000000004, 'samples': 997376, 'steps': 1947, 'loss/train': 3.156923770904541} +03/03/2022 15:42:26 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/03/2022 15:42:29 - INFO - codeparrot_training - Step 1948: {'lr': 0.000487, 'samples': 997888, 'steps': 1948, 'loss/train': 4.169747352600098} +03/03/2022 15:42:33 - INFO - codeparrot_training - Step 1949: {'lr': 0.00048725000000000005, 'samples': 998400, 'steps': 1949, 'loss/train': 2.972259283065796} +03/03/2022 15:42:34 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/03/2022 15:42:38 - INFO - codeparrot_training - Step 1950: {'lr': 0.0004875, 'samples': 998912, 'steps': 1950, 'loss/train': 4.1069865226745605} +03/03/2022 15:42:41 - INFO - codeparrot_training - Step 1951: {'lr': 0.00048775, 'samples': 999424, 'steps': 1951, 'loss/train': 3.1483869552612305} +03/03/2022 15:42:43 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/03/2022 15:42:46 - INFO - codeparrot_training - Step 1952: {'lr': 0.000488, 'samples': 999936, 'steps': 1952, 'loss/train': 2.917391538619995} +03/03/2022 15:42:50 - INFO - codeparrot_training - Step 1953: {'lr': 0.00048825, 'samples': 1000448, 'steps': 1953, 'loss/train': 3.2499265670776367} +03/03/2022 15:42:51 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/03/2022 15:42:55 - INFO - codeparrot_training - Step 1954: {'lr': 0.0004885, 'samples': 1000960, 'steps': 1954, 'loss/train': 4.028998851776123} +03/03/2022 15:42:58 - INFO - codeparrot_training - Step 1955: {'lr': 0.00048875, 'samples': 1001472, 'steps': 1955, 'loss/train': 3.3478951454162598} +03/03/2022 15:43:00 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/03/2022 15:43:03 - INFO - codeparrot_training - Step 1956: {'lr': 0.000489, 'samples': 1001984, 'steps': 1956, 'loss/train': 3.5914785861968994} +03/03/2022 15:43:06 - INFO - codeparrot_training - Step 1957: {'lr': 0.00048925, 'samples': 1002496, 'steps': 1957, 'loss/train': 0.8406473398208618} +03/03/2022 15:43:08 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 15:43:12 - INFO - codeparrot_training - Step 1958: {'lr': 0.0004895, 'samples': 1003008, 'steps': 1958, 'loss/train': 4.742589473724365} +03/03/2022 15:43:15 - INFO - codeparrot_training - Step 1959: {'lr': 0.0004897500000000001, 'samples': 1003520, 'steps': 1959, 'loss/train': 3.799579620361328} +03/03/2022 15:43:16 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/03/2022 15:43:20 - INFO - codeparrot_training - Step 1960: {'lr': 0.00049, 'samples': 1004032, 'steps': 1960, 'loss/train': 0.8839002847671509} +03/03/2022 15:43:23 - INFO - codeparrot_training - Step 1961: {'lr': 0.00049025, 'samples': 1004544, 'steps': 1961, 'loss/train': 3.562156915664673} +03/03/2022 15:43:25 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/03/2022 15:43:29 - INFO - codeparrot_training - Step 1962: {'lr': 0.0004905, 'samples': 1005056, 'steps': 1962, 'loss/train': 3.1393496990203857} +03/03/2022 15:43:32 - INFO - codeparrot_training - Step 1963: {'lr': 0.0004907500000000001, 'samples': 1005568, 'steps': 1963, 'loss/train': 3.6644341945648193} +03/03/2022 15:43:33 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/03/2022 15:43:37 - INFO - codeparrot_training - Step 1964: {'lr': 0.000491, 'samples': 1006080, 'steps': 1964, 'loss/train': 3.0365798473358154} +03/03/2022 15:43:40 - INFO - codeparrot_training - Step 1965: {'lr': 0.00049125, 'samples': 1006592, 'steps': 1965, 'loss/train': 3.524775743484497} +03/03/2022 15:43:41 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/03/2022 15:43:45 - INFO - codeparrot_training - Step 1966: {'lr': 0.0004915, 'samples': 1007104, 'steps': 1966, 'loss/train': 5.5112457275390625} +03/03/2022 15:43:48 - INFO - codeparrot_training - Step 1967: {'lr': 0.00049175, 'samples': 1007616, 'steps': 1967, 'loss/train': 2.93630313873291} +03/03/2022 15:43:50 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/03/2022 15:43:54 - INFO - codeparrot_training - Step 1968: {'lr': 0.000492, 'samples': 1008128, 'steps': 1968, 'loss/train': 3.8050034046173096} +03/03/2022 15:43:57 - INFO - codeparrot_training - Step 1969: {'lr': 0.0004922500000000001, 'samples': 1008640, 'steps': 1969, 'loss/train': 3.544528007507324} +03/03/2022 15:43:58 - INFO - codeparrot_training - Skipping example with length 268 (seq_length=1024) +03/03/2022 15:44:02 - INFO - codeparrot_training - Step 1970: {'lr': 0.0004925, 'samples': 1009152, 'steps': 1970, 'loss/train': 3.3061039447784424} +03/03/2022 15:44:05 - INFO - codeparrot_training - Step 1971: {'lr': 0.00049275, 'samples': 1009664, 'steps': 1971, 'loss/train': 4.317564487457275} +03/03/2022 15:44:06 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/03/2022 15:44:11 - INFO - codeparrot_training - Step 1972: {'lr': 0.0004930000000000001, 'samples': 1010176, 'steps': 1972, 'loss/train': 3.416768789291382} +03/03/2022 15:44:14 - INFO - codeparrot_training - Step 1973: {'lr': 0.00049325, 'samples': 1010688, 'steps': 1973, 'loss/train': 3.5689117908477783} +03/03/2022 15:44:15 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/03/2022 15:44:19 - INFO - codeparrot_training - Step 1974: {'lr': 0.0004935, 'samples': 1011200, 'steps': 1974, 'loss/train': 3.1128177642822266} +03/03/2022 15:44:22 - INFO - codeparrot_training - Step 1975: {'lr': 0.00049375, 'samples': 1011712, 'steps': 1975, 'loss/train': 2.972442626953125} +03/03/2022 15:44:23 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/03/2022 15:44:27 - INFO - codeparrot_training - Step 1976: {'lr': 0.000494, 'samples': 1012224, 'steps': 1976, 'loss/train': 3.124418258666992} +03/03/2022 15:44:30 - INFO - codeparrot_training - Step 1977: {'lr': 0.00049425, 'samples': 1012736, 'steps': 1977, 'loss/train': 2.5030174255371094} +03/03/2022 15:44:31 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/03/2022 15:44:36 - INFO - codeparrot_training - Step 1978: {'lr': 0.0004945, 'samples': 1013248, 'steps': 1978, 'loss/train': 1.2461376190185547} +03/03/2022 15:44:39 - INFO - codeparrot_training - Step 1979: {'lr': 0.0004947500000000001, 'samples': 1013760, 'steps': 1979, 'loss/train': 2.7604634761810303} +03/03/2022 15:44:39 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/03/2022 15:44:44 - INFO - codeparrot_training - Step 1980: {'lr': 0.000495, 'samples': 1014272, 'steps': 1980, 'loss/train': 4.329571723937988} +03/03/2022 15:44:47 - INFO - codeparrot_training - Step 1981: {'lr': 0.00049525, 'samples': 1014784, 'steps': 1981, 'loss/train': 2.192009449005127} +03/03/2022 15:44:48 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 15:44:53 - INFO - codeparrot_training - Step 1982: {'lr': 0.0004955, 'samples': 1015296, 'steps': 1982, 'loss/train': 2.292438507080078} +03/03/2022 15:44:56 - INFO - codeparrot_training - Step 1983: {'lr': 0.00049575, 'samples': 1015808, 'steps': 1983, 'loss/train': 3.9385428428649902} +03/03/2022 15:44:56 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/03/2022 15:45:01 - INFO - codeparrot_training - Step 1984: {'lr': 0.000496, 'samples': 1016320, 'steps': 1984, 'loss/train': 2.4057044982910156} +03/03/2022 15:45:04 - INFO - codeparrot_training - Step 1985: {'lr': 0.0004962500000000001, 'samples': 1016832, 'steps': 1985, 'loss/train': 3.314926862716675} +03/03/2022 15:45:04 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/03/2022 15:45:09 - INFO - codeparrot_training - Step 1986: {'lr': 0.0004965, 'samples': 1017344, 'steps': 1986, 'loss/train': 3.1913468837738037} +03/03/2022 15:45:13 - INFO - codeparrot_training - Step 1987: {'lr': 0.00049675, 'samples': 1017856, 'steps': 1987, 'loss/train': 3.137672185897827} +03/03/2022 15:45:13 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/03/2022 15:45:18 - INFO - codeparrot_training - Step 1988: {'lr': 0.000497, 'samples': 1018368, 'steps': 1988, 'loss/train': 3.4764881134033203} +03/03/2022 15:45:21 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/03/2022 15:45:23 - INFO - codeparrot_training - Step 1989: {'lr': 0.0004972500000000001, 'samples': 1018880, 'steps': 1989, 'loss/train': 2.392765522003174} +03/03/2022 15:45:26 - INFO - codeparrot_training - Step 1990: {'lr': 0.0004975, 'samples': 1019392, 'steps': 1990, 'loss/train': 2.664504051208496} +03/03/2022 15:45:29 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/03/2022 15:45:32 - INFO - codeparrot_training - Step 1991: {'lr': 0.00049775, 'samples': 1019904, 'steps': 1991, 'loss/train': 3.0546810626983643} +03/03/2022 15:45:35 - INFO - codeparrot_training - Step 1992: {'lr': 0.000498, 'samples': 1020416, 'steps': 1992, 'loss/train': 2.8693273067474365} +03/03/2022 15:45:38 - INFO - codeparrot_training - Step 1993: {'lr': 0.00049825, 'samples': 1020928, 'steps': 1993, 'loss/train': 2.8332624435424805} +03/03/2022 15:45:39 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/03/2022 15:45:43 - INFO - codeparrot_training - Step 1994: {'lr': 0.0004985, 'samples': 1021440, 'steps': 1994, 'loss/train': 3.599762439727783} +03/03/2022 15:45:46 - INFO - codeparrot_training - Step 1995: {'lr': 0.0004987500000000001, 'samples': 1021952, 'steps': 1995, 'loss/train': 2.9485690593719482} +03/03/2022 15:45:47 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/03/2022 15:45:52 - INFO - codeparrot_training - Step 1996: {'lr': 0.000499, 'samples': 1022464, 'steps': 1996, 'loss/train': 2.9960310459136963} +03/03/2022 15:45:55 - INFO - codeparrot_training - Step 1997: {'lr': 0.00049925, 'samples': 1022976, 'steps': 1997, 'loss/train': 3.438770294189453} +03/03/2022 15:45:55 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/03/2022 15:46:00 - INFO - codeparrot_training - Step 1998: {'lr': 0.0004995, 'samples': 1023488, 'steps': 1998, 'loss/train': 3.730217933654785} +03/03/2022 15:46:03 - INFO - codeparrot_training - Step 1999: {'lr': 0.0004997500000000001, 'samples': 1024000, 'steps': 1999, 'loss/train': 3.169341564178467} +03/03/2022 15:46:04 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/03/2022 15:46:09 - INFO - codeparrot_training - Step 2000: {'lr': 0.0005, 'samples': 1024512, 'steps': 2000, 'loss/train': 3.9695992469787598} +03/03/2022 15:46:12 - INFO - codeparrot_training - Step 2001: {'lr': 0.0004999999999436769, 'samples': 1025024, 'steps': 2001, 'loss/train': 2.9300379753112793} +03/03/2022 15:46:13 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/03/2022 15:46:17 - INFO - codeparrot_training - Step 2002: {'lr': 0.0004999999997747077, 'samples': 1025536, 'steps': 2002, 'loss/train': 3.7071943283081055} +03/03/2022 15:46:20 - INFO - codeparrot_training - Step 2003: {'lr': 0.0004999999994930923, 'samples': 1026048, 'steps': 2003, 'loss/train': 3.0697338581085205} +03/03/2022 15:46:21 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/03/2022 15:46:25 - INFO - codeparrot_training - Step 2004: {'lr': 0.0004999999990988309, 'samples': 1026560, 'steps': 2004, 'loss/train': 4.035585403442383} +03/03/2022 15:46:29 - INFO - codeparrot_training - Step 2005: {'lr': 0.0004999999985919232, 'samples': 1027072, 'steps': 2005, 'loss/train': 3.3016936779022217} +03/03/2022 15:46:30 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/03/2022 15:46:34 - INFO - codeparrot_training - Step 2006: {'lr': 0.0004999999979723695, 'samples': 1027584, 'steps': 2006, 'loss/train': 4.132771968841553} +03/03/2022 15:46:37 - INFO - codeparrot_training - Step 2007: {'lr': 0.0004999999972401696, 'samples': 1028096, 'steps': 2007, 'loss/train': 4.007197856903076} +03/03/2022 15:46:38 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 15:46:42 - INFO - codeparrot_training - Step 2008: {'lr': 0.0004999999963953234, 'samples': 1028608, 'steps': 2008, 'loss/train': 1.783637523651123} +03/03/2022 15:46:45 - INFO - codeparrot_training - Step 2009: {'lr': 0.0004999999954378312, 'samples': 1029120, 'steps': 2009, 'loss/train': 3.6063473224639893} +03/03/2022 15:46:46 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/03/2022 15:46:51 - INFO - codeparrot_training - Step 2010: {'lr': 0.000499999994367693, 'samples': 1029632, 'steps': 2010, 'loss/train': 2.82340407371521} +03/03/2022 15:46:54 - INFO - codeparrot_training - Step 2011: {'lr': 0.0004999999931849084, 'samples': 1030144, 'steps': 2011, 'loss/train': 2.665566921234131} +03/03/2022 15:46:54 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/03/2022 15:46:59 - INFO - codeparrot_training - Step 2012: {'lr': 0.0004999999918894778, 'samples': 1030656, 'steps': 2012, 'loss/train': 3.4019744396209717} +03/03/2022 15:47:02 - INFO - codeparrot_training - Step 2013: {'lr': 0.000499999990481401, 'samples': 1031168, 'steps': 2013, 'loss/train': 3.7687528133392334} +03/03/2022 15:47:02 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/03/2022 15:47:08 - INFO - codeparrot_training - Step 2014: {'lr': 0.0004999999889606781, 'samples': 1031680, 'steps': 2014, 'loss/train': 3.2895429134368896} +03/03/2022 15:47:11 - INFO - codeparrot_training - Step 2015: {'lr': 0.0004999999873273091, 'samples': 1032192, 'steps': 2015, 'loss/train': 2.7207114696502686} +03/03/2022 15:47:11 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/03/2022 15:47:16 - INFO - codeparrot_training - Step 2016: {'lr': 0.000499999985581294, 'samples': 1032704, 'steps': 2016, 'loss/train': 3.1622705459594727} +03/03/2022 15:47:19 - INFO - codeparrot_training - Step 2017: {'lr': 0.0004999999837226326, 'samples': 1033216, 'steps': 2017, 'loss/train': 2.5284972190856934} +03/03/2022 15:47:19 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/03/2022 15:47:25 - INFO - codeparrot_training - Step 2018: {'lr': 0.0004999999817513252, 'samples': 1033728, 'steps': 2018, 'loss/train': 4.827317714691162} +03/03/2022 15:47:28 - INFO - codeparrot_training - Step 2019: {'lr': 0.0004999999796673716, 'samples': 1034240, 'steps': 2019, 'loss/train': 3.9731600284576416} +03/03/2022 15:47:29 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/03/2022 15:47:33 - INFO - codeparrot_training - Step 2020: {'lr': 0.0004999999774707719, 'samples': 1034752, 'steps': 2020, 'loss/train': 3.6044795513153076} +03/03/2022 15:47:37 - INFO - codeparrot_training - Step 2021: {'lr': 0.0004999999751615261, 'samples': 1035264, 'steps': 2021, 'loss/train': 5.511720180511475} +03/03/2022 15:47:38 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/03/2022 15:47:42 - INFO - codeparrot_training - Step 2022: {'lr': 0.0004999999727396341, 'samples': 1035776, 'steps': 2022, 'loss/train': 4.11544942855835} +03/03/2022 15:47:45 - INFO - codeparrot_training - Step 2023: {'lr': 0.0004999999702050959, 'samples': 1036288, 'steps': 2023, 'loss/train': 3.4344191551208496} +03/03/2022 15:47:46 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/03/2022 15:47:50 - INFO - codeparrot_training - Step 2024: {'lr': 0.0004999999675579118, 'samples': 1036800, 'steps': 2024, 'loss/train': 3.58003306388855} +03/03/2022 15:47:53 - INFO - codeparrot_training - Step 2025: {'lr': 0.0004999999647980814, 'samples': 1037312, 'steps': 2025, 'loss/train': 4.878102779388428} +03/03/2022 15:47:54 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/03/2022 15:47:59 - INFO - codeparrot_training - Step 2026: {'lr': 0.0004999999619256049, 'samples': 1037824, 'steps': 2026, 'loss/train': 3.9710001945495605} +03/03/2022 15:48:02 - INFO - codeparrot_training - Step 2027: {'lr': 0.0004999999589404822, 'samples': 1038336, 'steps': 2027, 'loss/train': 3.3915817737579346} +03/03/2022 15:48:03 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/03/2022 15:48:07 - INFO - codeparrot_training - Step 2028: {'lr': 0.0004999999558427136, 'samples': 1038848, 'steps': 2028, 'loss/train': 3.528927803039551} +03/03/2022 15:48:10 - INFO - codeparrot_training - Step 2029: {'lr': 0.0004999999526322987, 'samples': 1039360, 'steps': 2029, 'loss/train': 0.5144878625869751} +03/03/2022 15:48:11 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/03/2022 15:48:16 - INFO - codeparrot_training - Step 2030: {'lr': 0.0004999999493092377, 'samples': 1039872, 'steps': 2030, 'loss/train': 4.147453784942627} +03/03/2022 15:48:19 - INFO - codeparrot_training - Step 2031: {'lr': 0.0004999999458735306, 'samples': 1040384, 'steps': 2031, 'loss/train': 2.4205193519592285} +03/03/2022 15:48:19 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/03/2022 15:48:24 - INFO - codeparrot_training - Step 2032: {'lr': 0.0004999999423251774, 'samples': 1040896, 'steps': 2032, 'loss/train': 3.421488046646118} +03/03/2022 15:48:27 - INFO - codeparrot_training - Step 2033: {'lr': 0.0004999999386641781, 'samples': 1041408, 'steps': 2033, 'loss/train': 3.241138458251953} +03/03/2022 15:48:28 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/03/2022 15:48:33 - INFO - codeparrot_training - Step 2034: {'lr': 0.0004999999348905326, 'samples': 1041920, 'steps': 2034, 'loss/train': 4.374093532562256} +03/03/2022 15:48:36 - INFO - codeparrot_training - Step 2035: {'lr': 0.000499999931004241, 'samples': 1042432, 'steps': 2035, 'loss/train': 3.3062870502471924} +03/03/2022 15:48:37 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/03/2022 15:48:41 - INFO - codeparrot_training - Step 2036: {'lr': 0.0004999999270053034, 'samples': 1042944, 'steps': 2036, 'loss/train': 3.2776246070861816} +03/03/2022 15:48:44 - INFO - codeparrot_training - Step 2037: {'lr': 0.0004999999228937196, 'samples': 1043456, 'steps': 2037, 'loss/train': 3.5980310440063477} +03/03/2022 15:48:46 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 15:48:50 - INFO - codeparrot_training - Step 2038: {'lr': 0.0004999999186694897, 'samples': 1043968, 'steps': 2038, 'loss/train': 4.058172225952148} +03/03/2022 15:48:53 - INFO - codeparrot_training - Step 2039: {'lr': 0.0004999999143326137, 'samples': 1044480, 'steps': 2039, 'loss/train': 3.7193925380706787} +03/03/2022 15:48:54 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/03/2022 15:48:58 - INFO - codeparrot_training - Step 2040: {'lr': 0.0004999999098830916, 'samples': 1044992, 'steps': 2040, 'loss/train': 2.873699426651001} +03/03/2022 15:49:01 - INFO - codeparrot_training - Step 2041: {'lr': 0.0004999999053209235, 'samples': 1045504, 'steps': 2041, 'loss/train': 3.7037858963012695} +03/03/2022 15:49:03 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/03/2022 15:49:07 - INFO - codeparrot_training - Step 2042: {'lr': 0.0004999999006461091, 'samples': 1046016, 'steps': 2042, 'loss/train': 2.888669490814209} +03/03/2022 15:49:10 - INFO - codeparrot_training - Step 2043: {'lr': 0.0004999998958586487, 'samples': 1046528, 'steps': 2043, 'loss/train': 4.662944793701172} +03/03/2022 15:49:12 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 15:49:15 - INFO - codeparrot_training - Step 2044: {'lr': 0.0004999998909585423, 'samples': 1047040, 'steps': 2044, 'loss/train': 3.401412010192871} +03/03/2022 15:49:18 - INFO - codeparrot_training - Step 2045: {'lr': 0.0004999998859457896, 'samples': 1047552, 'steps': 2045, 'loss/train': 4.20189094543457} +03/03/2022 15:49:20 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/03/2022 15:49:24 - INFO - codeparrot_training - Step 2046: {'lr': 0.0004999998808203909, 'samples': 1048064, 'steps': 2046, 'loss/train': 4.263084888458252} +03/03/2022 15:49:27 - INFO - codeparrot_training - Step 2047: {'lr': 0.0004999998755823462, 'samples': 1048576, 'steps': 2047, 'loss/train': 2.259582042694092} +03/03/2022 15:49:28 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/03/2022 15:49:32 - INFO - codeparrot_training - Step 2048: {'lr': 0.0004999998702316553, 'samples': 1049088, 'steps': 2048, 'loss/train': 1.4496164321899414} +03/03/2022 15:49:35 - INFO - codeparrot_training - Step 2049: {'lr': 0.0004999998647683184, 'samples': 1049600, 'steps': 2049, 'loss/train': 3.107677936553955} +03/03/2022 15:49:37 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/03/2022 15:49:40 - INFO - codeparrot_training - Step 2050: {'lr': 0.0004999998591923353, 'samples': 1050112, 'steps': 2050, 'loss/train': 3.494973659515381} +03/03/2022 15:49:44 - INFO - codeparrot_training - Step 2051: {'lr': 0.0004999998535037063, 'samples': 1050624, 'steps': 2051, 'loss/train': 2.54117751121521} +03/03/2022 15:49:45 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/03/2022 15:49:49 - INFO - codeparrot_training - Step 2052: {'lr': 0.0004999998477024311, 'samples': 1051136, 'steps': 2052, 'loss/train': 2.789486885070801} +03/03/2022 15:49:52 - INFO - codeparrot_training - Step 2053: {'lr': 0.0004999998417885099, 'samples': 1051648, 'steps': 2053, 'loss/train': 3.0454747676849365} +03/03/2022 15:49:53 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 15:49:57 - INFO - codeparrot_training - Step 2054: {'lr': 0.0004999998357619425, 'samples': 1052160, 'steps': 2054, 'loss/train': 3.8502964973449707} +03/03/2022 15:50:00 - INFO - codeparrot_training - Step 2055: {'lr': 0.0004999998296227291, 'samples': 1052672, 'steps': 2055, 'loss/train': 1.7430517673492432} +03/03/2022 15:50:01 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/03/2022 15:50:06 - INFO - codeparrot_training - Step 2056: {'lr': 0.0004999998233708697, 'samples': 1053184, 'steps': 2056, 'loss/train': 3.429997444152832} +03/03/2022 15:50:09 - INFO - codeparrot_training - Step 2057: {'lr': 0.0004999998170063642, 'samples': 1053696, 'steps': 2057, 'loss/train': 4.1132307052612305} +03/03/2022 15:50:10 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/03/2022 15:50:14 - INFO - codeparrot_training - Step 2058: {'lr': 0.0004999998105292126, 'samples': 1054208, 'steps': 2058, 'loss/train': 3.997450828552246} +03/03/2022 15:50:17 - INFO - codeparrot_training - Step 2059: {'lr': 0.000499999803939415, 'samples': 1054720, 'steps': 2059, 'loss/train': 3.3158175945281982} +03/03/2022 15:50:18 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/03/2022 15:50:22 - INFO - codeparrot_training - Step 2060: {'lr': 0.0004999997972369713, 'samples': 1055232, 'steps': 2060, 'loss/train': 3.253645896911621} +03/03/2022 15:50:26 - INFO - codeparrot_training - Step 2061: {'lr': 0.0004999997904218816, 'samples': 1055744, 'steps': 2061, 'loss/train': 3.3187222480773926} +03/03/2022 15:50:27 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/03/2022 15:50:31 - INFO - codeparrot_training - Step 2062: {'lr': 0.0004999997834941459, 'samples': 1056256, 'steps': 2062, 'loss/train': 4.691457271575928} +03/03/2022 15:50:34 - INFO - codeparrot_training - Step 2063: {'lr': 0.000499999776453764, 'samples': 1056768, 'steps': 2063, 'loss/train': 4.1678242683410645} +03/03/2022 15:50:35 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 15:50:39 - INFO - codeparrot_training - Step 2064: {'lr': 0.0004999997693007361, 'samples': 1057280, 'steps': 2064, 'loss/train': 3.9383790493011475} +03/03/2022 15:50:43 - INFO - codeparrot_training - Step 2065: {'lr': 0.0004999997620350622, 'samples': 1057792, 'steps': 2065, 'loss/train': 3.684743881225586} +03/03/2022 15:50:44 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 15:50:48 - INFO - codeparrot_training - Step 2066: {'lr': 0.0004999997546567423, 'samples': 1058304, 'steps': 2066, 'loss/train': 2.53251314163208} +03/03/2022 15:50:51 - INFO - codeparrot_training - Step 2067: {'lr': 0.0004999997471657763, 'samples': 1058816, 'steps': 2067, 'loss/train': 3.7877984046936035} +03/03/2022 15:50:52 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/03/2022 15:50:56 - INFO - codeparrot_training - Step 2068: {'lr': 0.0004999997395621642, 'samples': 1059328, 'steps': 2068, 'loss/train': 1.0695316791534424} +03/03/2022 15:51:00 - INFO - codeparrot_training - Step 2069: {'lr': 0.0004999997318459064, 'samples': 1059840, 'steps': 2069, 'loss/train': 4.3450703620910645} +03/03/2022 15:51:00 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/03/2022 15:51:05 - INFO - codeparrot_training - Step 2070: {'lr': 0.0004999997240170023, 'samples': 1060352, 'steps': 2070, 'loss/train': 4.308434963226318} +03/03/2022 15:51:08 - INFO - codeparrot_training - Step 2071: {'lr': 0.0004999997160754522, 'samples': 1060864, 'steps': 2071, 'loss/train': 2.7694506645202637} +03/03/2022 15:51:09 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/03/2022 15:51:13 - INFO - codeparrot_training - Step 2072: {'lr': 0.0004999997080212561, 'samples': 1061376, 'steps': 2072, 'loss/train': 1.8834242820739746} +03/03/2022 15:51:16 - INFO - codeparrot_training - Step 2073: {'lr': 0.000499999699854414, 'samples': 1061888, 'steps': 2073, 'loss/train': 3.068740129470825} +03/03/2022 15:51:17 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 15:51:22 - INFO - codeparrot_training - Step 2074: {'lr': 0.0004999996915749259, 'samples': 1062400, 'steps': 2074, 'loss/train': 4.012972354888916} +03/03/2022 15:51:25 - INFO - codeparrot_training - Step 2075: {'lr': 0.0004999996831827918, 'samples': 1062912, 'steps': 2075, 'loss/train': 3.255206823348999} +03/03/2022 15:51:26 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/03/2022 15:51:30 - INFO - codeparrot_training - Step 2076: {'lr': 0.0004999996746780117, 'samples': 1063424, 'steps': 2076, 'loss/train': 3.5504531860351562} +03/03/2022 15:51:33 - INFO - codeparrot_training - Step 2077: {'lr': 0.0004999996660605856, 'samples': 1063936, 'steps': 2077, 'loss/train': 3.7585928440093994} +03/03/2022 15:51:34 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/03/2022 15:51:38 - INFO - codeparrot_training - Step 2078: {'lr': 0.0004999996573305135, 'samples': 1064448, 'steps': 2078, 'loss/train': 2.2548882961273193} +03/03/2022 15:51:42 - INFO - codeparrot_training - Step 2079: {'lr': 0.0004999996484877955, 'samples': 1064960, 'steps': 2079, 'loss/train': 4.103363990783691} +03/03/2022 15:51:42 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/03/2022 15:51:47 - INFO - codeparrot_training - Step 2080: {'lr': 0.0004999996395324313, 'samples': 1065472, 'steps': 2080, 'loss/train': 3.4765408039093018} +03/03/2022 15:51:50 - INFO - codeparrot_training - Step 2081: {'lr': 0.0004999996304644213, 'samples': 1065984, 'steps': 2081, 'loss/train': 3.9804258346557617} +03/03/2022 15:51:50 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/03/2022 15:51:55 - INFO - codeparrot_training - Step 2082: {'lr': 0.0004999996212837653, 'samples': 1066496, 'steps': 2082, 'loss/train': 3.8717150688171387} +03/03/2022 15:51:58 - INFO - codeparrot_training - Step 2083: {'lr': 0.0004999996119904633, 'samples': 1067008, 'steps': 2083, 'loss/train': 3.8754525184631348} +03/03/2022 15:51:59 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/03/2022 15:52:04 - INFO - codeparrot_training - Step 2084: {'lr': 0.0004999996025845154, 'samples': 1067520, 'steps': 2084, 'loss/train': 3.614020824432373} +03/03/2022 15:52:07 - INFO - codeparrot_training - Step 2085: {'lr': 0.0004999995930659215, 'samples': 1068032, 'steps': 2085, 'loss/train': 2.7679386138916016} +03/03/2022 15:52:07 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/03/2022 15:52:12 - INFO - codeparrot_training - Step 2086: {'lr': 0.0004999995834346815, 'samples': 1068544, 'steps': 2086, 'loss/train': 3.5978920459747314} +03/03/2022 15:52:15 - INFO - codeparrot_training - Step 2087: {'lr': 0.0004999995736907957, 'samples': 1069056, 'steps': 2087, 'loss/train': 3.90876841545105} +03/03/2022 15:52:15 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/03/2022 15:52:20 - INFO - codeparrot_training - Step 2088: {'lr': 0.000499999563834264, 'samples': 1069568, 'steps': 2088, 'loss/train': 3.1678805351257324} +03/03/2022 15:52:24 - INFO - codeparrot_training - Step 2089: {'lr': 0.0004999995538650862, 'samples': 1070080, 'steps': 2089, 'loss/train': 3.1336045265197754} +03/03/2022 15:52:24 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/03/2022 15:52:29 - INFO - codeparrot_training - Step 2090: {'lr': 0.0004999995437832626, 'samples': 1070592, 'steps': 2090, 'loss/train': 3.454817295074463} +03/03/2022 15:52:32 - INFO - codeparrot_training - Step 2091: {'lr': 0.0004999995335887929, 'samples': 1071104, 'steps': 2091, 'loss/train': 3.1001510620117188} +03/03/2022 15:52:32 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/03/2022 15:52:37 - INFO - codeparrot_training - Step 2092: {'lr': 0.0004999995232816774, 'samples': 1071616, 'steps': 2092, 'loss/train': 3.782294511795044} +03/03/2022 15:52:40 - INFO - codeparrot_training - Step 2093: {'lr': 0.000499999512861916, 'samples': 1072128, 'steps': 2093, 'loss/train': 3.420549154281616} +03/03/2022 15:52:41 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/03/2022 15:52:46 - INFO - codeparrot_training - Step 2094: {'lr': 0.0004999995023295086, 'samples': 1072640, 'steps': 2094, 'loss/train': 3.2693636417388916} +03/03/2022 15:52:49 - INFO - codeparrot_training - Step 2095: {'lr': 0.0004999994916844552, 'samples': 1073152, 'steps': 2095, 'loss/train': 2.378378391265869} +03/03/2022 15:52:49 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/03/2022 15:52:54 - INFO - codeparrot_training - Step 2096: {'lr': 0.0004999994809267561, 'samples': 1073664, 'steps': 2096, 'loss/train': 3.894771099090576} +03/03/2022 15:52:57 - INFO - codeparrot_training - Step 2097: {'lr': 0.0004999994700564109, 'samples': 1074176, 'steps': 2097, 'loss/train': 4.1221466064453125} +03/03/2022 15:52:57 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 15:53:02 - INFO - codeparrot_training - Step 2098: {'lr': 0.0004999994590734199, 'samples': 1074688, 'steps': 2098, 'loss/train': 4.020200252532959} +03/03/2022 15:53:05 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/03/2022 15:53:08 - INFO - codeparrot_training - Step 2099: {'lr': 0.000499999447977783, 'samples': 1075200, 'steps': 2099, 'loss/train': 3.3252177238464355} +03/03/2022 15:53:11 - INFO - codeparrot_training - Step 2100: {'lr': 0.0004999994367695001, 'samples': 1075712, 'steps': 2100, 'loss/train': 3.502760410308838} +03/03/2022 15:53:14 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/03/2022 15:53:16 - INFO - codeparrot_training - Step 2101: {'lr': 0.0004999994254485714, 'samples': 1076224, 'steps': 2101, 'loss/train': 3.02009654045105} +03/03/2022 15:53:19 - INFO - codeparrot_training - Step 2102: {'lr': 0.0004999994140149969, 'samples': 1076736, 'steps': 2102, 'loss/train': 3.3655712604522705} +03/03/2022 15:53:22 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 15:53:24 - INFO - codeparrot_training - Step 2103: {'lr': 0.0004999994024687764, 'samples': 1077248, 'steps': 2103, 'loss/train': 2.6672849655151367} +03/03/2022 15:53:28 - INFO - codeparrot_training - Step 2104: {'lr': 0.00049999939080991, 'samples': 1077760, 'steps': 2104, 'loss/train': 3.3088254928588867} +03/03/2022 15:53:30 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 15:53:33 - INFO - codeparrot_training - Step 2105: {'lr': 0.0004999993790383978, 'samples': 1078272, 'steps': 2105, 'loss/train': 3.3820853233337402} +03/03/2022 15:53:36 - INFO - codeparrot_training - Step 2106: {'lr': 0.0004999993671542397, 'samples': 1078784, 'steps': 2106, 'loss/train': 3.458866834640503} +03/03/2022 15:53:39 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/03/2022 15:53:41 - INFO - codeparrot_training - Step 2107: {'lr': 0.0004999993551574358, 'samples': 1079296, 'steps': 2107, 'loss/train': 3.018770217895508} +03/03/2022 15:53:45 - INFO - codeparrot_training - Step 2108: {'lr': 0.000499999343047986, 'samples': 1079808, 'steps': 2108, 'loss/train': 3.3291473388671875} +03/03/2022 15:53:47 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/03/2022 15:53:50 - INFO - codeparrot_training - Step 2109: {'lr': 0.0004999993308258904, 'samples': 1080320, 'steps': 2109, 'loss/train': 3.4458365440368652} +03/03/2022 15:53:53 - INFO - codeparrot_training - Step 2110: {'lr': 0.0004999993184911489, 'samples': 1080832, 'steps': 2110, 'loss/train': 2.906193733215332} +03/03/2022 15:53:55 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/03/2022 15:53:58 - INFO - codeparrot_training - Step 2111: {'lr': 0.0004999993060437616, 'samples': 1081344, 'steps': 2111, 'loss/train': 3.3055806159973145} +03/03/2022 15:54:02 - INFO - codeparrot_training - Step 2112: {'lr': 0.0004999992934837284, 'samples': 1081856, 'steps': 2112, 'loss/train': 8.058950424194336} +03/03/2022 15:54:04 - INFO - codeparrot_training - Skipping example with length 59 (seq_length=1024) +03/03/2022 15:54:07 - INFO - codeparrot_training - Step 2113: {'lr': 0.0004999992808110495, 'samples': 1082368, 'steps': 2113, 'loss/train': 3.8844070434570312} +03/03/2022 15:54:10 - INFO - codeparrot_training - Step 2114: {'lr': 0.0004999992680257247, 'samples': 1082880, 'steps': 2114, 'loss/train': 6.860340118408203} +03/03/2022 15:54:13 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 15:54:15 - INFO - codeparrot_training - Step 2115: {'lr': 0.0004999992551277541, 'samples': 1083392, 'steps': 2115, 'loss/train': 2.7332510948181152} +03/03/2022 15:54:18 - INFO - codeparrot_training - Step 2116: {'lr': 0.0004999992421171377, 'samples': 1083904, 'steps': 2116, 'loss/train': 3.278076171875} +03/03/2022 15:54:21 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/03/2022 15:54:24 - INFO - codeparrot_training - Step 2117: {'lr': 0.0004999992289938755, 'samples': 1084416, 'steps': 2117, 'loss/train': 2.1812992095947266} +03/03/2022 15:54:27 - INFO - codeparrot_training - Step 2118: {'lr': 0.0004999992157579676, 'samples': 1084928, 'steps': 2118, 'loss/train': 2.6568429470062256} +03/03/2022 15:54:30 - INFO - codeparrot_training - Step 2119: {'lr': 0.0004999992024094138, 'samples': 1085440, 'steps': 2119, 'loss/train': 3.4700217247009277} +03/03/2022 15:54:30 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/03/2022 15:54:36 - INFO - codeparrot_training - Step 2120: {'lr': 0.0004999991889482142, 'samples': 1085952, 'steps': 2120, 'loss/train': 4.040984153747559} +03/03/2022 15:54:38 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/03/2022 15:54:41 - INFO - codeparrot_training - Step 2121: {'lr': 0.0004999991753743689, 'samples': 1086464, 'steps': 2121, 'loss/train': 2.9417686462402344} +03/03/2022 15:54:44 - INFO - codeparrot_training - Step 2122: {'lr': 0.0004999991616878777, 'samples': 1086976, 'steps': 2122, 'loss/train': 3.1434085369110107} +03/03/2022 15:54:47 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/03/2022 15:54:49 - INFO - codeparrot_training - Step 2123: {'lr': 0.0004999991478887409, 'samples': 1087488, 'steps': 2123, 'loss/train': 2.9659318923950195} +03/03/2022 15:54:52 - INFO - codeparrot_training - Step 2124: {'lr': 0.0004999991339769582, 'samples': 1088000, 'steps': 2124, 'loss/train': 3.1472535133361816} +03/03/2022 15:54:55 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/03/2022 15:54:57 - INFO - codeparrot_training - Step 2125: {'lr': 0.0004999991199525299, 'samples': 1088512, 'steps': 2125, 'loss/train': 2.6963629722595215} +03/03/2022 15:55:01 - INFO - codeparrot_training - Step 2126: {'lr': 0.0004999991058154557, 'samples': 1089024, 'steps': 2126, 'loss/train': 3.4635300636291504} +03/03/2022 15:55:03 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/03/2022 15:55:06 - INFO - codeparrot_training - Step 2127: {'lr': 0.0004999990915657359, 'samples': 1089536, 'steps': 2127, 'loss/train': 4.766872882843018} +03/03/2022 15:55:09 - INFO - codeparrot_training - Step 2128: {'lr': 0.0004999990772033702, 'samples': 1090048, 'steps': 2128, 'loss/train': 2.3895936012268066} +03/03/2022 15:55:11 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/03/2022 15:55:14 - INFO - codeparrot_training - Step 2129: {'lr': 0.000499999062728359, 'samples': 1090560, 'steps': 2129, 'loss/train': 3.4782779216766357} +03/03/2022 15:55:17 - INFO - codeparrot_training - Step 2130: {'lr': 0.0004999990481407018, 'samples': 1091072, 'steps': 2130, 'loss/train': 2.4561641216278076} +03/03/2022 15:55:19 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/03/2022 15:55:23 - INFO - codeparrot_training - Step 2131: {'lr': 0.0004999990334403991, 'samples': 1091584, 'steps': 2131, 'loss/train': 3.1402735710144043} +03/03/2022 15:55:26 - INFO - codeparrot_training - Step 2132: {'lr': 0.0004999990186274506, 'samples': 1092096, 'steps': 2132, 'loss/train': 3.021862268447876} +03/03/2022 15:55:28 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/03/2022 15:55:31 - INFO - codeparrot_training - Step 2133: {'lr': 0.0004999990037018564, 'samples': 1092608, 'steps': 2133, 'loss/train': 2.954002857208252} +03/03/2022 15:55:34 - INFO - codeparrot_training - Step 2134: {'lr': 0.0004999989886636166, 'samples': 1093120, 'steps': 2134, 'loss/train': 3.399930715560913} +03/03/2022 15:55:36 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/03/2022 15:55:40 - INFO - codeparrot_training - Step 2135: {'lr': 0.000499998973512731, 'samples': 1093632, 'steps': 2135, 'loss/train': 4.165732383728027} +03/03/2022 15:55:43 - INFO - codeparrot_training - Step 2136: {'lr': 0.0004999989582491998, 'samples': 1094144, 'steps': 2136, 'loss/train': 2.960538864135742} +03/03/2022 15:55:44 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/03/2022 15:55:48 - INFO - codeparrot_training - Step 2137: {'lr': 0.0004999989428730229, 'samples': 1094656, 'steps': 2137, 'loss/train': 3.5106186866760254} +03/03/2022 15:55:51 - INFO - codeparrot_training - Step 2138: {'lr': 0.0004999989273842003, 'samples': 1095168, 'steps': 2138, 'loss/train': 4.432214260101318} +03/03/2022 15:55:53 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/03/2022 15:55:57 - INFO - codeparrot_training - Step 2139: {'lr': 0.0004999989117827321, 'samples': 1095680, 'steps': 2139, 'loss/train': 2.837758779525757} +03/03/2022 15:56:00 - INFO - codeparrot_training - Step 2140: {'lr': 0.0004999988960686182, 'samples': 1096192, 'steps': 2140, 'loss/train': 3.162780523300171} +03/03/2022 15:56:02 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 15:56:05 - INFO - codeparrot_training - Step 2141: {'lr': 0.0004999988802418587, 'samples': 1096704, 'steps': 2141, 'loss/train': 3.241360902786255} +03/03/2022 15:56:08 - INFO - codeparrot_training - Step 2142: {'lr': 0.0004999988643024536, 'samples': 1097216, 'steps': 2142, 'loss/train': 3.9060487747192383} +03/03/2022 15:56:10 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/03/2022 15:56:14 - INFO - codeparrot_training - Step 2143: {'lr': 0.0004999988482504027, 'samples': 1097728, 'steps': 2143, 'loss/train': 3.601203441619873} +03/03/2022 15:56:17 - INFO - codeparrot_training - Step 2144: {'lr': 0.0004999988320857063, 'samples': 1098240, 'steps': 2144, 'loss/train': 1.9985949993133545} +03/03/2022 15:56:18 - INFO - codeparrot_training - Skipping example with length 576 (seq_length=1024) +03/03/2022 15:56:22 - INFO - codeparrot_training - Step 2145: {'lr': 0.0004999988158083643, 'samples': 1098752, 'steps': 2145, 'loss/train': 3.100428819656372} +03/03/2022 15:56:25 - INFO - codeparrot_training - Step 2146: {'lr': 0.0004999987994183766, 'samples': 1099264, 'steps': 2146, 'loss/train': 2.8766708374023438} +03/03/2022 15:56:27 - INFO - codeparrot_training - Skipping example with length 733 (seq_length=1024) +03/03/2022 15:56:30 - INFO - codeparrot_training - Step 2147: {'lr': 0.0004999987829157434, 'samples': 1099776, 'steps': 2147, 'loss/train': 3.1598196029663086} +03/03/2022 15:56:34 - INFO - codeparrot_training - Step 2148: {'lr': 0.0004999987663004646, 'samples': 1100288, 'steps': 2148, 'loss/train': 4.006170749664307} +03/03/2022 15:56:35 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/03/2022 15:56:39 - INFO - codeparrot_training - Step 2149: {'lr': 0.0004999987495725401, 'samples': 1100800, 'steps': 2149, 'loss/train': 3.1935853958129883} +03/03/2022 15:56:42 - INFO - codeparrot_training - Step 2150: {'lr': 0.0004999987327319701, 'samples': 1101312, 'steps': 2150, 'loss/train': 3.8682503700256348} +03/03/2022 15:56:43 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/03/2022 15:56:47 - INFO - codeparrot_training - Step 2151: {'lr': 0.0004999987157787546, 'samples': 1101824, 'steps': 2151, 'loss/train': 3.1055119037628174} +03/03/2022 15:56:50 - INFO - codeparrot_training - Step 2152: {'lr': 0.0004999986987128934, 'samples': 1102336, 'steps': 2152, 'loss/train': 3.349456310272217} +03/03/2022 15:56:52 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/03/2022 15:56:56 - INFO - codeparrot_training - Step 2153: {'lr': 0.0004999986815343867, 'samples': 1102848, 'steps': 2153, 'loss/train': 3.3752105236053467} +03/03/2022 15:56:59 - INFO - codeparrot_training - Step 2154: {'lr': 0.0004999986642432345, 'samples': 1103360, 'steps': 2154, 'loss/train': 3.236268997192383} +03/03/2022 15:57:02 - INFO - codeparrot_training - Skipping example with length 714 (seq_length=1024) +03/03/2022 15:57:04 - INFO - codeparrot_training - Step 2155: {'lr': 0.0004999986468394367, 'samples': 1103872, 'steps': 2155, 'loss/train': 4.51322603225708} +03/03/2022 15:57:08 - INFO - codeparrot_training - Step 2156: {'lr': 0.0004999986293229934, 'samples': 1104384, 'steps': 2156, 'loss/train': 2.779130697250366} +03/03/2022 15:57:10 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/03/2022 15:57:13 - INFO - codeparrot_training - Step 2157: {'lr': 0.0004999986116939045, 'samples': 1104896, 'steps': 2157, 'loss/train': 3.7786474227905273} +03/03/2022 15:57:16 - INFO - codeparrot_training - Step 2158: {'lr': 0.0004999985939521702, 'samples': 1105408, 'steps': 2158, 'loss/train': 3.909266471862793} +03/03/2022 15:57:18 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/03/2022 15:57:21 - INFO - codeparrot_training - Step 2159: {'lr': 0.0004999985760977903, 'samples': 1105920, 'steps': 2159, 'loss/train': 3.5279836654663086} +03/03/2022 15:57:24 - INFO - codeparrot_training - Step 2160: {'lr': 0.000499998558130765, 'samples': 1106432, 'steps': 2160, 'loss/train': 2.9420106410980225} +03/03/2022 15:57:26 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/03/2022 15:57:30 - INFO - codeparrot_training - Step 2161: {'lr': 0.0004999985400510941, 'samples': 1106944, 'steps': 2161, 'loss/train': 4.075507164001465} +03/03/2022 15:57:33 - INFO - codeparrot_training - Step 2162: {'lr': 0.0004999985218587777, 'samples': 1107456, 'steps': 2162, 'loss/train': 3.7804722785949707} +03/03/2022 15:57:34 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/03/2022 15:57:38 - INFO - codeparrot_training - Step 2163: {'lr': 0.0004999985035538159, 'samples': 1107968, 'steps': 2163, 'loss/train': 4.148641109466553} +03/03/2022 15:57:41 - INFO - codeparrot_training - Step 2164: {'lr': 0.0004999984851362086, 'samples': 1108480, 'steps': 2164, 'loss/train': 4.1257781982421875} +03/03/2022 15:57:43 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/03/2022 15:57:47 - INFO - codeparrot_training - Step 2165: {'lr': 0.0004999984666059559, 'samples': 1108992, 'steps': 2165, 'loss/train': 3.3074519634246826} +03/03/2022 15:57:50 - INFO - codeparrot_training - Step 2166: {'lr': 0.0004999984479630577, 'samples': 1109504, 'steps': 2166, 'loss/train': 3.8513736724853516} +03/03/2022 15:57:51 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/03/2022 15:57:55 - INFO - codeparrot_training - Step 2167: {'lr': 0.000499998429207514, 'samples': 1110016, 'steps': 2167, 'loss/train': 2.6395175457000732} +03/03/2022 15:57:58 - INFO - codeparrot_training - Step 2168: {'lr': 0.000499998410339325, 'samples': 1110528, 'steps': 2168, 'loss/train': 2.7602648735046387} +03/03/2022 15:58:00 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/03/2022 15:58:04 - INFO - codeparrot_training - Step 2169: {'lr': 0.0004999983913584904, 'samples': 1111040, 'steps': 2169, 'loss/train': 3.9304921627044678} +03/03/2022 15:58:07 - INFO - codeparrot_training - Step 2170: {'lr': 0.0004999983722650106, 'samples': 1111552, 'steps': 2170, 'loss/train': 3.1235342025756836} +03/03/2022 15:58:08 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 15:58:12 - INFO - codeparrot_training - Step 2171: {'lr': 0.0004999983530588853, 'samples': 1112064, 'steps': 2171, 'loss/train': 3.3282878398895264} +03/03/2022 15:58:15 - INFO - codeparrot_training - Step 2172: {'lr': 0.0004999983337401145, 'samples': 1112576, 'steps': 2172, 'loss/train': 1.258226990699768} +03/03/2022 15:58:16 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/03/2022 15:58:20 - INFO - codeparrot_training - Step 2173: {'lr': 0.0004999983143086984, 'samples': 1113088, 'steps': 2173, 'loss/train': 3.6252224445343018} +03/03/2022 15:58:24 - INFO - codeparrot_training - Step 2174: {'lr': 0.0004999982947646368, 'samples': 1113600, 'steps': 2174, 'loss/train': 4.8876729011535645} +03/03/2022 15:58:25 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/03/2022 15:58:29 - INFO - codeparrot_training - Step 2175: {'lr': 0.00049999827510793, 'samples': 1114112, 'steps': 2175, 'loss/train': 3.379680633544922} +03/03/2022 15:58:32 - INFO - codeparrot_training - Step 2176: {'lr': 0.0004999982553385778, 'samples': 1114624, 'steps': 2176, 'loss/train': 2.915555715560913} +03/03/2022 15:58:33 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/03/2022 15:58:37 - INFO - codeparrot_training - Step 2177: {'lr': 0.0004999982354565802, 'samples': 1115136, 'steps': 2177, 'loss/train': 3.180706024169922} +03/03/2022 15:58:41 - INFO - codeparrot_training - Step 2178: {'lr': 0.0004999982154619372, 'samples': 1115648, 'steps': 2178, 'loss/train': 2.8413264751434326} +03/03/2022 15:58:42 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/03/2022 15:58:46 - INFO - codeparrot_training - Step 2179: {'lr': 0.000499998195354649, 'samples': 1116160, 'steps': 2179, 'loss/train': 3.075929641723633} +03/03/2022 15:58:49 - INFO - codeparrot_training - Step 2180: {'lr': 0.0004999981751347153, 'samples': 1116672, 'steps': 2180, 'loss/train': 2.786820411682129} +03/03/2022 15:58:50 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 15:58:54 - INFO - codeparrot_training - Step 2181: {'lr': 0.0004999981548021364, 'samples': 1117184, 'steps': 2181, 'loss/train': 3.5749623775482178} +03/03/2022 15:58:57 - INFO - codeparrot_training - Step 2182: {'lr': 0.0004999981343569122, 'samples': 1117696, 'steps': 2182, 'loss/train': 1.6517219543457031} +03/03/2022 15:58:59 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/03/2022 15:59:03 - INFO - codeparrot_training - Step 2183: {'lr': 0.0004999981137990425, 'samples': 1118208, 'steps': 2183, 'loss/train': 3.5469722747802734} +03/03/2022 15:59:06 - INFO - codeparrot_training - Step 2184: {'lr': 0.0004999980931285278, 'samples': 1118720, 'steps': 2184, 'loss/train': 3.6124532222747803} +03/03/2022 15:59:08 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/03/2022 15:59:11 - INFO - codeparrot_training - Step 2185: {'lr': 0.0004999980723453676, 'samples': 1119232, 'steps': 2185, 'loss/train': 4.047886848449707} +03/03/2022 15:59:14 - INFO - codeparrot_training - Step 2186: {'lr': 0.0004999980514495623, 'samples': 1119744, 'steps': 2186, 'loss/train': 3.0435261726379395} +03/03/2022 15:59:16 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/03/2022 15:59:20 - INFO - codeparrot_training - Step 2187: {'lr': 0.0004999980304411116, 'samples': 1120256, 'steps': 2187, 'loss/train': 3.721737861633301} +03/03/2022 15:59:23 - INFO - codeparrot_training - Step 2188: {'lr': 0.0004999980093200157, 'samples': 1120768, 'steps': 2188, 'loss/train': 2.6698317527770996} +03/03/2022 15:59:25 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/03/2022 15:59:28 - INFO - codeparrot_training - Step 2189: {'lr': 0.0004999979880862745, 'samples': 1121280, 'steps': 2189, 'loss/train': 2.596677541732788} +03/03/2022 15:59:31 - INFO - codeparrot_training - Step 2190: {'lr': 0.0004999979667398882, 'samples': 1121792, 'steps': 2190, 'loss/train': 2.020834445953369} +03/03/2022 15:59:34 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/03/2022 15:59:36 - INFO - codeparrot_training - Step 2191: {'lr': 0.0004999979452808565, 'samples': 1122304, 'steps': 2191, 'loss/train': 3.111593723297119} +03/03/2022 15:59:40 - INFO - codeparrot_training - Step 2192: {'lr': 0.0004999979237091796, 'samples': 1122816, 'steps': 2192, 'loss/train': 2.8774466514587402} +03/03/2022 15:59:42 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 15:59:45 - INFO - codeparrot_training - Step 2193: {'lr': 0.0004999979020248577, 'samples': 1123328, 'steps': 2193, 'loss/train': 2.7346956729888916} +03/03/2022 15:59:48 - INFO - codeparrot_training - Step 2194: {'lr': 0.0004999978802278904, 'samples': 1123840, 'steps': 2194, 'loss/train': 4.36392879486084} +03/03/2022 15:59:51 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/03/2022 15:59:53 - INFO - codeparrot_training - Step 2195: {'lr': 0.000499997858318278, 'samples': 1124352, 'steps': 2195, 'loss/train': 3.296250104904175} +03/03/2022 15:59:57 - INFO - codeparrot_training - Step 2196: {'lr': 0.0004999978362960204, 'samples': 1124864, 'steps': 2196, 'loss/train': 3.2620816230773926} +03/03/2022 15:59:59 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/03/2022 16:00:02 - INFO - codeparrot_training - Step 2197: {'lr': 0.0004999978141611176, 'samples': 1125376, 'steps': 2197, 'loss/train': 3.448967933654785} +03/03/2022 16:00:05 - INFO - codeparrot_training - Step 2198: {'lr': 0.0004999977919135696, 'samples': 1125888, 'steps': 2198, 'loss/train': 2.1470861434936523} +03/03/2022 16:00:07 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/03/2022 16:00:10 - INFO - codeparrot_training - Step 2199: {'lr': 0.0004999977695533766, 'samples': 1126400, 'steps': 2199, 'loss/train': 2.2718451023101807} +03/03/2022 16:00:13 - INFO - codeparrot_training - Step 2200: {'lr': 0.0004999977470805383, 'samples': 1126912, 'steps': 2200, 'loss/train': 3.131701946258545} +03/03/2022 16:00:16 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/03/2022 16:00:19 - INFO - codeparrot_training - Step 2201: {'lr': 0.0004999977244950551, 'samples': 1127424, 'steps': 2201, 'loss/train': 3.7374184131622314} +03/03/2022 16:00:22 - INFO - codeparrot_training - Step 2202: {'lr': 0.0004999977017969266, 'samples': 1127936, 'steps': 2202, 'loss/train': 8.167623519897461} +03/03/2022 16:00:25 - INFO - codeparrot_training - Step 2203: {'lr': 0.000499997678986153, 'samples': 1128448, 'steps': 2203, 'loss/train': 1.47736656665802} +03/03/2022 16:00:25 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/03/2022 16:00:30 - INFO - codeparrot_training - Step 2204: {'lr': 0.0004999976560627344, 'samples': 1128960, 'steps': 2204, 'loss/train': 2.690406560897827} +03/03/2022 16:00:34 - INFO - codeparrot_training - Step 2205: {'lr': 0.0004999976330266707, 'samples': 1129472, 'steps': 2205, 'loss/train': 2.537095785140991} +03/03/2022 16:00:34 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 16:00:39 - INFO - codeparrot_training - Step 2206: {'lr': 0.0004999976098779618, 'samples': 1129984, 'steps': 2206, 'loss/train': 3.085176944732666} +03/03/2022 16:00:42 - INFO - codeparrot_training - Step 2207: {'lr': 0.0004999975866166079, 'samples': 1130496, 'steps': 2207, 'loss/train': 1.8430806398391724} +03/03/2022 16:00:42 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/03/2022 16:00:47 - INFO - codeparrot_training - Step 2208: {'lr': 0.000499997563242609, 'samples': 1131008, 'steps': 2208, 'loss/train': 2.9553072452545166} +03/03/2022 16:00:50 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/03/2022 16:00:53 - INFO - codeparrot_training - Step 2209: {'lr': 0.0004999975397559649, 'samples': 1131520, 'steps': 2209, 'loss/train': 2.7070538997650146} +03/03/2022 16:00:56 - INFO - codeparrot_training - Step 2210: {'lr': 0.000499997516156676, 'samples': 1132032, 'steps': 2210, 'loss/train': 3.3697493076324463} +03/03/2022 16:00:58 - INFO - codeparrot_training - Skipping example with length 463 (seq_length=1024) +03/03/2022 16:01:01 - INFO - codeparrot_training - Step 2211: {'lr': 0.000499997492444742, 'samples': 1132544, 'steps': 2211, 'loss/train': 3.843799114227295} +03/03/2022 16:01:04 - INFO - codeparrot_training - Step 2212: {'lr': 0.0004999974686201629, 'samples': 1133056, 'steps': 2212, 'loss/train': 3.4916319847106934} +03/03/2022 16:01:06 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 16:01:09 - INFO - codeparrot_training - Step 2213: {'lr': 0.0004999974446829389, 'samples': 1133568, 'steps': 2213, 'loss/train': 3.0537095069885254} +03/03/2022 16:01:12 - INFO - codeparrot_training - Step 2214: {'lr': 0.0004999974206330698, 'samples': 1134080, 'steps': 2214, 'loss/train': 2.6437153816223145} +03/03/2022 16:01:14 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/03/2022 16:01:18 - INFO - codeparrot_training - Step 2215: {'lr': 0.0004999973964705558, 'samples': 1134592, 'steps': 2215, 'loss/train': 2.0629634857177734} +03/03/2022 16:01:21 - INFO - codeparrot_training - Step 2216: {'lr': 0.0004999973721953968, 'samples': 1135104, 'steps': 2216, 'loss/train': 3.6002323627471924} +03/03/2022 16:01:23 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 16:01:26 - INFO - codeparrot_training - Step 2217: {'lr': 0.0004999973478075928, 'samples': 1135616, 'steps': 2217, 'loss/train': 3.738161563873291} +03/03/2022 16:01:29 - INFO - codeparrot_training - Step 2218: {'lr': 0.0004999973233071438, 'samples': 1136128, 'steps': 2218, 'loss/train': 2.3218307495117188} +03/03/2022 16:01:32 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/03/2022 16:01:35 - INFO - codeparrot_training - Step 2219: {'lr': 0.00049999729869405, 'samples': 1136640, 'steps': 2219, 'loss/train': 2.759835720062256} +03/03/2022 16:01:38 - INFO - codeparrot_training - Step 2220: {'lr': 0.0004999972739683113, 'samples': 1137152, 'steps': 2220, 'loss/train': 2.835109233856201} +03/03/2022 16:01:40 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/03/2022 16:01:43 - INFO - codeparrot_training - Step 2221: {'lr': 0.0004999972491299276, 'samples': 1137664, 'steps': 2221, 'loss/train': 3.295903205871582} +03/03/2022 16:01:46 - INFO - codeparrot_training - Step 2222: {'lr': 0.000499997224178899, 'samples': 1138176, 'steps': 2222, 'loss/train': 2.1122236251831055} +03/03/2022 16:01:49 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/03/2022 16:01:52 - INFO - codeparrot_training - Step 2223: {'lr': 0.0004999971991152256, 'samples': 1138688, 'steps': 2223, 'loss/train': 2.486236333847046} +03/03/2022 16:01:55 - INFO - codeparrot_training - Step 2224: {'lr': 0.0004999971739389072, 'samples': 1139200, 'steps': 2224, 'loss/train': 2.6111724376678467} +03/03/2022 16:01:57 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/03/2022 16:02:00 - INFO - codeparrot_training - Step 2225: {'lr': 0.000499997148649944, 'samples': 1139712, 'steps': 2225, 'loss/train': 3.8881101608276367} +03/03/2022 16:02:03 - INFO - codeparrot_training - Step 2226: {'lr': 0.0004999971232483359, 'samples': 1140224, 'steps': 2226, 'loss/train': 3.3411731719970703} +03/03/2022 16:02:06 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/03/2022 16:02:08 - INFO - codeparrot_training - Step 2227: {'lr': 0.0004999970977340829, 'samples': 1140736, 'steps': 2227, 'loss/train': 3.3929550647735596} +03/03/2022 16:02:12 - INFO - codeparrot_training - Step 2228: {'lr': 0.0004999970721071852, 'samples': 1141248, 'steps': 2228, 'loss/train': 3.846820831298828} +03/03/2022 16:02:14 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/03/2022 16:02:17 - INFO - codeparrot_training - Step 2229: {'lr': 0.0004999970463676427, 'samples': 1141760, 'steps': 2229, 'loss/train': 3.200389862060547} +03/03/2022 16:02:20 - INFO - codeparrot_training - Step 2230: {'lr': 0.0004999970205154553, 'samples': 1142272, 'steps': 2230, 'loss/train': 3.928372859954834} +03/03/2022 16:02:22 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/03/2022 16:02:25 - INFO - codeparrot_training - Step 2231: {'lr': 0.000499996994550623, 'samples': 1142784, 'steps': 2231, 'loss/train': 3.8303329944610596} +03/03/2022 16:02:29 - INFO - codeparrot_training - Step 2232: {'lr': 0.000499996968473146, 'samples': 1143296, 'steps': 2232, 'loss/train': 3.860849380493164} +03/03/2022 16:02:31 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 16:02:34 - INFO - codeparrot_training - Step 2233: {'lr': 0.0004999969422830242, 'samples': 1143808, 'steps': 2233, 'loss/train': 2.639922857284546} +03/03/2022 16:02:37 - INFO - codeparrot_training - Step 2234: {'lr': 0.0004999969159802577, 'samples': 1144320, 'steps': 2234, 'loss/train': 4.099701404571533} +03/03/2022 16:02:39 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/03/2022 16:02:42 - INFO - codeparrot_training - Step 2235: {'lr': 0.0004999968895648464, 'samples': 1144832, 'steps': 2235, 'loss/train': 3.855062246322632} +03/03/2022 16:02:46 - INFO - codeparrot_training - Step 2236: {'lr': 0.0004999968630367905, 'samples': 1145344, 'steps': 2236, 'loss/train': 3.210245370864868} +03/03/2022 16:02:48 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/03/2022 16:02:51 - INFO - codeparrot_training - Step 2237: {'lr': 0.0004999968363960897, 'samples': 1145856, 'steps': 2237, 'loss/train': 2.9591712951660156} +03/03/2022 16:02:54 - INFO - codeparrot_training - Step 2238: {'lr': 0.0004999968096427443, 'samples': 1146368, 'steps': 2238, 'loss/train': 3.1449475288391113} +03/03/2022 16:02:56 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/03/2022 16:02:59 - INFO - codeparrot_training - Step 2239: {'lr': 0.0004999967827767541, 'samples': 1146880, 'steps': 2239, 'loss/train': 3.80161714553833} +03/03/2022 16:03:02 - INFO - codeparrot_training - Step 2240: {'lr': 0.0004999967557981192, 'samples': 1147392, 'steps': 2240, 'loss/train': 2.181140422821045} +03/03/2022 16:03:04 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/03/2022 16:03:07 - INFO - codeparrot_training - Step 2241: {'lr': 0.0004999967287068396, 'samples': 1147904, 'steps': 2241, 'loss/train': 3.3080360889434814} +03/03/2022 16:03:11 - INFO - codeparrot_training - Step 2242: {'lr': 0.0004999967015029155, 'samples': 1148416, 'steps': 2242, 'loss/train': 1.4108967781066895} +03/03/2022 16:03:12 - INFO - codeparrot_training - Skipping example with length 973 (seq_length=1024) +03/03/2022 16:03:16 - INFO - codeparrot_training - Step 2243: {'lr': 0.0004999966741863467, 'samples': 1148928, 'steps': 2243, 'loss/train': 3.116621971130371} +03/03/2022 16:03:19 - INFO - codeparrot_training - Step 2244: {'lr': 0.000499996646757133, 'samples': 1149440, 'steps': 2244, 'loss/train': 3.128178119659424} +03/03/2022 16:03:20 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/03/2022 16:03:24 - INFO - codeparrot_training - Step 2245: {'lr': 0.0004999966192152749, 'samples': 1149952, 'steps': 2245, 'loss/train': 3.1809072494506836} +03/03/2022 16:03:27 - INFO - codeparrot_training - Step 2246: {'lr': 0.0004999965915607722, 'samples': 1150464, 'steps': 2246, 'loss/train': 3.7514734268188477} +03/03/2022 16:03:29 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/03/2022 16:03:33 - INFO - codeparrot_training - Step 2247: {'lr': 0.0004999965637936248, 'samples': 1150976, 'steps': 2247, 'loss/train': 1.9753257036209106} +03/03/2022 16:03:36 - INFO - codeparrot_training - Step 2248: {'lr': 0.0004999965359138329, 'samples': 1151488, 'steps': 2248, 'loss/train': 2.962155342102051} +03/03/2022 16:03:37 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/03/2022 16:03:41 - INFO - codeparrot_training - Step 2249: {'lr': 0.0004999965079213964, 'samples': 1152000, 'steps': 2249, 'loss/train': 3.8691513538360596} +03/03/2022 16:03:44 - INFO - codeparrot_training - Step 2250: {'lr': 0.0004999964798163152, 'samples': 1152512, 'steps': 2250, 'loss/train': 3.998168468475342} +03/03/2022 16:03:45 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/03/2022 16:03:49 - INFO - codeparrot_training - Step 2251: {'lr': 0.0004999964515985896, 'samples': 1153024, 'steps': 2251, 'loss/train': 3.000925064086914} +03/03/2022 16:03:53 - INFO - codeparrot_training - Step 2252: {'lr': 0.0004999964232682194, 'samples': 1153536, 'steps': 2252, 'loss/train': 2.8140668869018555} +03/03/2022 16:03:54 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 16:03:58 - INFO - codeparrot_training - Step 2253: {'lr': 0.0004999963948252046, 'samples': 1154048, 'steps': 2253, 'loss/train': 2.92730975151062} +03/03/2022 16:04:01 - INFO - codeparrot_training - Step 2254: {'lr': 0.0004999963662695453, 'samples': 1154560, 'steps': 2254, 'loss/train': 2.7133889198303223} +03/03/2022 16:04:02 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/03/2022 16:04:06 - INFO - codeparrot_training - Step 2255: {'lr': 0.0004999963376012416, 'samples': 1155072, 'steps': 2255, 'loss/train': 3.4704363346099854} +03/03/2022 16:04:09 - INFO - codeparrot_training - Step 2256: {'lr': 0.0004999963088202934, 'samples': 1155584, 'steps': 2256, 'loss/train': 3.760922431945801} +03/03/2022 16:04:10 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/03/2022 16:04:15 - INFO - codeparrot_training - Step 2257: {'lr': 0.0004999962799267006, 'samples': 1156096, 'steps': 2257, 'loss/train': 2.528806686401367} +03/03/2022 16:04:18 - INFO - codeparrot_training - Step 2258: {'lr': 0.0004999962509204634, 'samples': 1156608, 'steps': 2258, 'loss/train': 2.3950648307800293} +03/03/2022 16:04:19 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/03/2022 16:04:23 - INFO - codeparrot_training - Step 2259: {'lr': 0.0004999962218015818, 'samples': 1157120, 'steps': 2259, 'loss/train': 0.7431287169456482} +03/03/2022 16:04:26 - INFO - codeparrot_training - Step 2260: {'lr': 0.0004999961925700557, 'samples': 1157632, 'steps': 2260, 'loss/train': 3.814455032348633} +03/03/2022 16:04:27 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/03/2022 16:04:32 - INFO - codeparrot_training - Step 2261: {'lr': 0.0004999961632258851, 'samples': 1158144, 'steps': 2261, 'loss/train': 3.390331268310547} +03/03/2022 16:04:35 - INFO - codeparrot_training - Step 2262: {'lr': 0.0004999961337690703, 'samples': 1158656, 'steps': 2262, 'loss/train': 3.006119728088379} +03/03/2022 16:04:36 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 16:04:40 - INFO - codeparrot_training - Step 2263: {'lr': 0.0004999961041996109, 'samples': 1159168, 'steps': 2263, 'loss/train': 2.743804454803467} +03/03/2022 16:04:43 - INFO - codeparrot_training - Step 2264: {'lr': 0.0004999960745175071, 'samples': 1159680, 'steps': 2264, 'loss/train': 3.8200066089630127} +03/03/2022 16:04:45 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/03/2022 16:04:49 - INFO - codeparrot_training - Step 2265: {'lr': 0.0004999960447227591, 'samples': 1160192, 'steps': 2265, 'loss/train': 3.8612003326416016} +03/03/2022 16:04:52 - INFO - codeparrot_training - Step 2266: {'lr': 0.0004999960148153667, 'samples': 1160704, 'steps': 2266, 'loss/train': 3.1429362297058105} +03/03/2022 16:04:54 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/03/2022 16:04:57 - INFO - codeparrot_training - Step 2267: {'lr': 0.0004999959847953299, 'samples': 1161216, 'steps': 2267, 'loss/train': 2.8557565212249756} +03/03/2022 16:05:00 - INFO - codeparrot_training - Step 2268: {'lr': 0.0004999959546626487, 'samples': 1161728, 'steps': 2268, 'loss/train': 2.1531596183776855} +03/03/2022 16:05:02 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/03/2022 16:05:06 - INFO - codeparrot_training - Step 2269: {'lr': 0.0004999959244173232, 'samples': 1162240, 'steps': 2269, 'loss/train': 3.010319948196411} +03/03/2022 16:05:09 - INFO - codeparrot_training - Step 2270: {'lr': 0.0004999958940593535, 'samples': 1162752, 'steps': 2270, 'loss/train': 3.9224822521209717} +03/03/2022 16:05:11 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/03/2022 16:05:14 - INFO - codeparrot_training - Step 2271: {'lr': 0.0004999958635887394, 'samples': 1163264, 'steps': 2271, 'loss/train': 3.962085008621216} +03/03/2022 16:05:17 - INFO - codeparrot_training - Step 2272: {'lr': 0.0004999958330054811, 'samples': 1163776, 'steps': 2272, 'loss/train': 3.341320514678955} +03/03/2022 16:05:19 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/03/2022 16:05:23 - INFO - codeparrot_training - Step 2273: {'lr': 0.0004999958023095785, 'samples': 1164288, 'steps': 2273, 'loss/train': 0.7658462524414062} +03/03/2022 16:05:26 - INFO - codeparrot_training - Step 2274: {'lr': 0.0004999957715010317, 'samples': 1164800, 'steps': 2274, 'loss/train': 3.0417842864990234} +03/03/2022 16:05:28 - INFO - codeparrot_training - Skipping example with length 1004 (seq_length=1024) +03/03/2022 16:05:31 - INFO - codeparrot_training - Step 2275: {'lr': 0.0004999957405798405, 'samples': 1165312, 'steps': 2275, 'loss/train': 3.515624761581421} +03/03/2022 16:05:34 - INFO - codeparrot_training - Step 2276: {'lr': 0.0004999957095460052, 'samples': 1165824, 'steps': 2276, 'loss/train': 2.811504602432251} +03/03/2022 16:05:36 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/03/2022 16:05:40 - INFO - codeparrot_training - Step 2277: {'lr': 0.0004999956783995257, 'samples': 1166336, 'steps': 2277, 'loss/train': 3.7728817462921143} +03/03/2022 16:05:43 - INFO - codeparrot_training - Step 2278: {'lr': 0.0004999956471404021, 'samples': 1166848, 'steps': 2278, 'loss/train': 3.2592594623565674} +03/03/2022 16:05:45 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 16:05:48 - INFO - codeparrot_training - Step 2279: {'lr': 0.0004999956157686341, 'samples': 1167360, 'steps': 2279, 'loss/train': 3.566458225250244} +03/03/2022 16:05:51 - INFO - codeparrot_training - Step 2280: {'lr': 0.0004999955842842222, 'samples': 1167872, 'steps': 2280, 'loss/train': 2.734273672103882} +03/03/2022 16:05:53 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/03/2022 16:05:56 - INFO - codeparrot_training - Step 2281: {'lr': 0.0004999955526871659, 'samples': 1168384, 'steps': 2281, 'loss/train': 3.0264272689819336} +03/03/2022 16:06:00 - INFO - codeparrot_training - Step 2282: {'lr': 0.0004999955209774656, 'samples': 1168896, 'steps': 2282, 'loss/train': 3.1493284702301025} +03/03/2022 16:06:01 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/03/2022 16:06:05 - INFO - codeparrot_training - Step 2283: {'lr': 0.0004999954891551211, 'samples': 1169408, 'steps': 2283, 'loss/train': 1.7484716176986694} +03/03/2022 16:06:08 - INFO - codeparrot_training - Step 2284: {'lr': 0.0004999954572201326, 'samples': 1169920, 'steps': 2284, 'loss/train': 3.4496495723724365} +03/03/2022 16:06:09 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 16:06:13 - INFO - codeparrot_training - Step 2285: {'lr': 0.0004999954251724999, 'samples': 1170432, 'steps': 2285, 'loss/train': 2.7517075538635254} +03/03/2022 16:06:16 - INFO - codeparrot_training - Step 2286: {'lr': 0.0004999953930122231, 'samples': 1170944, 'steps': 2286, 'loss/train': 2.4651145935058594} +03/03/2022 16:06:17 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/03/2022 16:06:22 - INFO - codeparrot_training - Step 2287: {'lr': 0.0004999953607393023, 'samples': 1171456, 'steps': 2287, 'loss/train': 3.3342177867889404} +03/03/2022 16:06:25 - INFO - codeparrot_training - Step 2288: {'lr': 0.0004999953283537374, 'samples': 1171968, 'steps': 2288, 'loss/train': 4.049373626708984} +03/03/2022 16:06:26 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/03/2022 16:06:30 - INFO - codeparrot_training - Step 2289: {'lr': 0.0004999952958555285, 'samples': 1172480, 'steps': 2289, 'loss/train': 3.1396403312683105} +03/03/2022 16:06:33 - INFO - codeparrot_training - Step 2290: {'lr': 0.0004999952632446756, 'samples': 1172992, 'steps': 2290, 'loss/train': 2.876293420791626} +03/03/2022 16:06:34 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/03/2022 16:06:38 - INFO - codeparrot_training - Step 2291: {'lr': 0.0004999952305211786, 'samples': 1173504, 'steps': 2291, 'loss/train': 3.5791354179382324} +03/03/2022 16:06:42 - INFO - codeparrot_training - Step 2292: {'lr': 0.0004999951976850377, 'samples': 1174016, 'steps': 2292, 'loss/train': 2.5373010635375977} +03/03/2022 16:06:42 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/03/2022 16:06:47 - INFO - codeparrot_training - Step 2293: {'lr': 0.0004999951647362527, 'samples': 1174528, 'steps': 2293, 'loss/train': 3.0804197788238525} +03/03/2022 16:06:50 - INFO - codeparrot_training - Step 2294: {'lr': 0.0004999951316748239, 'samples': 1175040, 'steps': 2294, 'loss/train': 2.5079517364501953} +03/03/2022 16:06:51 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/03/2022 16:06:55 - INFO - codeparrot_training - Step 2295: {'lr': 0.0004999950985007511, 'samples': 1175552, 'steps': 2295, 'loss/train': 2.9973175525665283} +03/03/2022 16:06:58 - INFO - codeparrot_training - Step 2296: {'lr': 0.0004999950652140343, 'samples': 1176064, 'steps': 2296, 'loss/train': 4.560308933258057} +03/03/2022 16:06:58 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/03/2022 16:07:04 - INFO - codeparrot_training - Step 2297: {'lr': 0.0004999950318146737, 'samples': 1176576, 'steps': 2297, 'loss/train': 2.290208101272583} +03/03/2022 16:07:07 - INFO - codeparrot_training - Step 2298: {'lr': 0.0004999949983026691, 'samples': 1177088, 'steps': 2298, 'loss/train': 3.1492295265197754} +03/03/2022 16:07:07 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/03/2022 16:07:12 - INFO - codeparrot_training - Step 2299: {'lr': 0.0004999949646780205, 'samples': 1177600, 'steps': 2299, 'loss/train': 3.4125776290893555} +03/03/2022 16:07:15 - INFO - codeparrot_training - Step 2300: {'lr': 0.0004999949309407283, 'samples': 1178112, 'steps': 2300, 'loss/train': 3.329164981842041} +03/03/2022 16:07:16 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/03/2022 16:07:20 - INFO - codeparrot_training - Step 2301: {'lr': 0.0004999948970907921, 'samples': 1178624, 'steps': 2301, 'loss/train': 3.2822911739349365} +03/03/2022 16:07:24 - INFO - codeparrot_training - Step 2302: {'lr': 0.0004999948631282119, 'samples': 1179136, 'steps': 2302, 'loss/train': 2.9314568042755127} +03/03/2022 16:07:24 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/03/2022 16:07:29 - INFO - codeparrot_training - Step 2303: {'lr': 0.0004999948290529881, 'samples': 1179648, 'steps': 2303, 'loss/train': 2.7405691146850586} +03/03/2022 16:07:32 - INFO - codeparrot_training - Step 2304: {'lr': 0.0004999947948651204, 'samples': 1180160, 'steps': 2304, 'loss/train': 3.455448627471924} +03/03/2022 16:07:32 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/03/2022 16:07:37 - INFO - codeparrot_training - Step 2305: {'lr': 0.0004999947605646089, 'samples': 1180672, 'steps': 2305, 'loss/train': 3.519378900527954} +03/03/2022 16:07:40 - INFO - codeparrot_training - Step 2306: {'lr': 0.0004999947261514537, 'samples': 1181184, 'steps': 2306, 'loss/train': 2.651277780532837} +03/03/2022 16:07:40 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/03/2022 16:07:46 - INFO - codeparrot_training - Step 2307: {'lr': 0.0004999946916256547, 'samples': 1181696, 'steps': 2307, 'loss/train': 2.6726553440093994} +03/03/2022 16:07:49 - INFO - codeparrot_training - Step 2308: {'lr': 0.0004999946569872118, 'samples': 1182208, 'steps': 2308, 'loss/train': 3.4526588916778564} +03/03/2022 16:07:49 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/03/2022 16:07:54 - INFO - codeparrot_training - Step 2309: {'lr': 0.0004999946222361254, 'samples': 1182720, 'steps': 2309, 'loss/train': 2.297178268432617} +03/03/2022 16:07:57 - INFO - codeparrot_training - Step 2310: {'lr': 0.0004999945873723951, 'samples': 1183232, 'steps': 2310, 'loss/train': 1.8463554382324219} +03/03/2022 16:07:57 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 16:08:02 - INFO - codeparrot_training - Step 2311: {'lr': 0.0004999945523960212, 'samples': 1183744, 'steps': 2311, 'loss/train': 2.822561264038086} +03/03/2022 16:08:05 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/03/2022 16:08:08 - INFO - codeparrot_training - Step 2312: {'lr': 0.0004999945173070035, 'samples': 1184256, 'steps': 2312, 'loss/train': 2.0279057025909424} +03/03/2022 16:08:11 - INFO - codeparrot_training - Step 2313: {'lr': 0.0004999944821053422, 'samples': 1184768, 'steps': 2313, 'loss/train': 4.771055698394775} +03/03/2022 16:08:14 - INFO - codeparrot_training - Step 2314: {'lr': 0.0004999944467910372, 'samples': 1185280, 'steps': 2314, 'loss/train': 2.7460744380950928} +03/03/2022 16:08:15 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/03/2022 16:08:19 - INFO - codeparrot_training - Step 2315: {'lr': 0.0004999944113640887, 'samples': 1185792, 'steps': 2315, 'loss/train': 2.4521260261535645} +03/03/2022 16:08:23 - INFO - codeparrot_training - Step 2316: {'lr': 0.0004999943758244964, 'samples': 1186304, 'steps': 2316, 'loss/train': 2.9871323108673096} +03/03/2022 16:08:23 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/03/2022 16:08:28 - INFO - codeparrot_training - Step 2317: {'lr': 0.0004999943401722606, 'samples': 1186816, 'steps': 2317, 'loss/train': 3.333430290222168} +03/03/2022 16:08:31 - INFO - codeparrot_training - Step 2318: {'lr': 0.0004999943044073813, 'samples': 1187328, 'steps': 2318, 'loss/train': 4.098447322845459} +03/03/2022 16:08:31 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/03/2022 16:08:36 - INFO - codeparrot_training - Step 2319: {'lr': 0.0004999942685298582, 'samples': 1187840, 'steps': 2319, 'loss/train': 3.556797504425049} +03/03/2022 16:08:39 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/03/2022 16:08:42 - INFO - codeparrot_training - Step 2320: {'lr': 0.0004999942325396916, 'samples': 1188352, 'steps': 2320, 'loss/train': 3.6084952354431152} +03/03/2022 16:08:45 - INFO - codeparrot_training - Step 2321: {'lr': 0.0004999941964368817, 'samples': 1188864, 'steps': 2321, 'loss/train': 2.39994740486145} +03/03/2022 16:08:47 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/03/2022 16:08:50 - INFO - codeparrot_training - Step 2322: {'lr': 0.000499994160221428, 'samples': 1189376, 'steps': 2322, 'loss/train': 3.695596933364868} +03/03/2022 16:08:53 - INFO - codeparrot_training - Step 2323: {'lr': 0.0004999941238933308, 'samples': 1189888, 'steps': 2323, 'loss/train': 2.4125816822052} +03/03/2022 16:08:56 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/03/2022 16:08:59 - INFO - codeparrot_training - Step 2324: {'lr': 0.0004999940874525902, 'samples': 1190400, 'steps': 2324, 'loss/train': 3.0763771533966064} +03/03/2022 16:09:02 - INFO - codeparrot_training - Step 2325: {'lr': 0.0004999940508992061, 'samples': 1190912, 'steps': 2325, 'loss/train': 2.733271360397339} +03/03/2022 16:09:05 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/03/2022 16:09:07 - INFO - codeparrot_training - Step 2326: {'lr': 0.0004999940142331785, 'samples': 1191424, 'steps': 2326, 'loss/train': 3.4050912857055664} +03/03/2022 16:09:10 - INFO - codeparrot_training - Step 2327: {'lr': 0.0004999939774545074, 'samples': 1191936, 'steps': 2327, 'loss/train': 2.9216370582580566} +03/03/2022 16:09:13 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/03/2022 16:09:16 - INFO - codeparrot_training - Step 2328: {'lr': 0.000499993940563193, 'samples': 1192448, 'steps': 2328, 'loss/train': 3.3163259029388428} +03/03/2022 16:09:19 - INFO - codeparrot_training - Step 2329: {'lr': 0.0004999939035592351, 'samples': 1192960, 'steps': 2329, 'loss/train': 6.4422688484191895} +03/03/2022 16:09:21 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/03/2022 16:09:24 - INFO - codeparrot_training - Step 2330: {'lr': 0.0004999938664426339, 'samples': 1193472, 'steps': 2330, 'loss/train': 3.1382224559783936} +03/03/2022 16:09:27 - INFO - codeparrot_training - Step 2331: {'lr': 0.0004999938292133894, 'samples': 1193984, 'steps': 2331, 'loss/train': 3.230679750442505} +03/03/2022 16:09:31 - INFO - codeparrot_training - Step 2332: {'lr': 0.0004999937918715013, 'samples': 1194496, 'steps': 2332, 'loss/train': 1.8205024003982544} +03/03/2022 16:09:31 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 16:09:36 - INFO - codeparrot_training - Step 2333: {'lr': 0.00049999375441697, 'samples': 1195008, 'steps': 2333, 'loss/train': 3.3296782970428467} +03/03/2022 16:09:39 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 16:09:41 - INFO - codeparrot_training - Step 2334: {'lr': 0.0004999937168497954, 'samples': 1195520, 'steps': 2334, 'loss/train': 3.1814863681793213} +03/03/2022 16:09:44 - INFO - codeparrot_training - Step 2335: {'lr': 0.0004999936791699773, 'samples': 1196032, 'steps': 2335, 'loss/train': 3.746454954147339} +03/03/2022 16:09:47 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/03/2022 16:09:50 - INFO - codeparrot_training - Step 2336: {'lr': 0.0004999936413775161, 'samples': 1196544, 'steps': 2336, 'loss/train': 4.3374152183532715} +03/03/2022 16:09:53 - INFO - codeparrot_training - Step 2337: {'lr': 0.0004999936034724115, 'samples': 1197056, 'steps': 2337, 'loss/train': 3.145212173461914} +03/03/2022 16:09:55 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/03/2022 16:09:58 - INFO - codeparrot_training - Step 2338: {'lr': 0.0004999935654546638, 'samples': 1197568, 'steps': 2338, 'loss/train': 3.6831605434417725} +03/03/2022 16:10:01 - INFO - codeparrot_training - Step 2339: {'lr': 0.0004999935273242727, 'samples': 1198080, 'steps': 2339, 'loss/train': 2.7442963123321533} +03/03/2022 16:10:04 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/03/2022 16:10:07 - INFO - codeparrot_training - Step 2340: {'lr': 0.0004999934890812384, 'samples': 1198592, 'steps': 2340, 'loss/train': 2.606112241744995} +03/03/2022 16:10:10 - INFO - codeparrot_training - Step 2341: {'lr': 0.0004999934507255609, 'samples': 1199104, 'steps': 2341, 'loss/train': 2.9907941818237305} +03/03/2022 16:10:12 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/03/2022 16:10:15 - INFO - codeparrot_training - Step 2342: {'lr': 0.0004999934122572403, 'samples': 1199616, 'steps': 2342, 'loss/train': 3.5362229347229004} +03/03/2022 16:10:18 - INFO - codeparrot_training - Step 2343: {'lr': 0.0004999933736762763, 'samples': 1200128, 'steps': 2343, 'loss/train': 1.0581634044647217} +03/03/2022 16:10:20 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/03/2022 16:10:23 - INFO - codeparrot_training - Step 2344: {'lr': 0.0004999933349826694, 'samples': 1200640, 'steps': 2344, 'loss/train': 3.1751739978790283} +03/03/2022 16:10:27 - INFO - codeparrot_training - Step 2345: {'lr': 0.0004999932961764192, 'samples': 1201152, 'steps': 2345, 'loss/train': 3.5930683612823486} +03/03/2022 16:10:29 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/03/2022 16:10:32 - INFO - codeparrot_training - Step 2346: {'lr': 0.000499993257257526, 'samples': 1201664, 'steps': 2346, 'loss/train': 2.397047758102417} +03/03/2022 16:10:35 - INFO - codeparrot_training - Step 2347: {'lr': 0.0004999932182259897, 'samples': 1202176, 'steps': 2347, 'loss/train': 2.6743717193603516} +03/03/2022 16:10:37 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/03/2022 16:10:40 - INFO - codeparrot_training - Step 2348: {'lr': 0.0004999931790818102, 'samples': 1202688, 'steps': 2348, 'loss/train': 0.2999705970287323} +03/03/2022 16:10:44 - INFO - codeparrot_training - Step 2349: {'lr': 0.0004999931398249876, 'samples': 1203200, 'steps': 2349, 'loss/train': 4.60331916809082} +03/03/2022 16:10:46 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/03/2022 16:10:49 - INFO - codeparrot_training - Step 2350: {'lr': 0.0004999931004555221, 'samples': 1203712, 'steps': 2350, 'loss/train': 3.645582675933838} +03/03/2022 16:10:52 - INFO - codeparrot_training - Step 2351: {'lr': 0.0004999930609734135, 'samples': 1204224, 'steps': 2351, 'loss/train': 2.5348284244537354} +03/03/2022 16:10:55 - INFO - codeparrot_training - Step 2352: {'lr': 0.0004999930213786619, 'samples': 1204736, 'steps': 2352, 'loss/train': 1.9799997806549072} +03/03/2022 16:10:56 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/03/2022 16:11:01 - INFO - codeparrot_training - Step 2353: {'lr': 0.0004999929816712672, 'samples': 1205248, 'steps': 2353, 'loss/train': 2.9346108436584473} +03/03/2022 16:11:04 - INFO - codeparrot_training - Step 2354: {'lr': 0.0004999929418512296, 'samples': 1205760, 'steps': 2354, 'loss/train': 2.96004581451416} +03/03/2022 16:11:05 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/03/2022 16:11:09 - INFO - codeparrot_training - Step 2355: {'lr': 0.0004999929019185491, 'samples': 1206272, 'steps': 2355, 'loss/train': 2.9823479652404785} +03/03/2022 16:11:12 - INFO - codeparrot_training - Step 2356: {'lr': 0.0004999928618732256, 'samples': 1206784, 'steps': 2356, 'loss/train': 2.6017024517059326} +03/03/2022 16:11:13 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/03/2022 16:11:19 - INFO - codeparrot_training - Step 2357: {'lr': 0.0004999928217152591, 'samples': 1207296, 'steps': 2357, 'loss/train': 3.004572868347168} +03/03/2022 16:11:22 - INFO - codeparrot_training - Step 2358: {'lr': 0.0004999927814446498, 'samples': 1207808, 'steps': 2358, 'loss/train': 3.047327995300293} +03/03/2022 16:11:25 - INFO - codeparrot_training - Step 2359: {'lr': 0.0004999927410613975, 'samples': 1208320, 'steps': 2359, 'loss/train': 2.6660752296447754} +03/03/2022 16:11:26 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/03/2022 16:11:30 - INFO - codeparrot_training - Step 2360: {'lr': 0.0004999927005655024, 'samples': 1208832, 'steps': 2360, 'loss/train': 5.5060224533081055} +03/03/2022 16:11:33 - INFO - codeparrot_training - Step 2361: {'lr': 0.0004999926599569644, 'samples': 1209344, 'steps': 2361, 'loss/train': 3.2518301010131836} +03/03/2022 16:11:34 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/03/2022 16:11:39 - INFO - codeparrot_training - Step 2362: {'lr': 0.0004999926192357836, 'samples': 1209856, 'steps': 2362, 'loss/train': 3.4132797718048096} +03/03/2022 16:11:42 - INFO - codeparrot_training - Step 2363: {'lr': 0.00049999257840196, 'samples': 1210368, 'steps': 2363, 'loss/train': 3.439847230911255} +03/03/2022 16:11:42 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/03/2022 16:11:47 - INFO - codeparrot_training - Step 2364: {'lr': 0.0004999925374554936, 'samples': 1210880, 'steps': 2364, 'loss/train': 2.345672130584717} +03/03/2022 16:11:50 - INFO - codeparrot_training - Step 2365: {'lr': 0.0004999924963963845, 'samples': 1211392, 'steps': 2365, 'loss/train': 3.1876866817474365} +03/03/2022 16:11:51 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/03/2022 16:11:55 - INFO - codeparrot_training - Step 2366: {'lr': 0.0004999924552246324, 'samples': 1211904, 'steps': 2366, 'loss/train': 3.294334650039673} +03/03/2022 16:11:58 - INFO - codeparrot_training - Step 2367: {'lr': 0.0004999924139402378, 'samples': 1212416, 'steps': 2367, 'loss/train': 4.286800384521484} +03/03/2022 16:11:59 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/03/2022 16:12:04 - INFO - codeparrot_training - Step 2368: {'lr': 0.0004999923725432004, 'samples': 1212928, 'steps': 2368, 'loss/train': 2.7459046840667725} +03/03/2022 16:12:07 - INFO - codeparrot_training - Step 2369: {'lr': 0.0004999923310335202, 'samples': 1213440, 'steps': 2369, 'loss/train': 3.5260021686553955} +03/03/2022 16:12:07 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/03/2022 16:12:12 - INFO - codeparrot_training - Step 2370: {'lr': 0.0004999922894111975, 'samples': 1213952, 'steps': 2370, 'loss/train': 3.4140329360961914} +03/03/2022 16:12:15 - INFO - codeparrot_training - Step 2371: {'lr': 0.000499992247676232, 'samples': 1214464, 'steps': 2371, 'loss/train': 3.0296790599823} +03/03/2022 16:12:16 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/03/2022 16:12:20 - INFO - codeparrot_training - Step 2372: {'lr': 0.0004999922058286238, 'samples': 1214976, 'steps': 2372, 'loss/train': 3.218442440032959} +03/03/2022 16:12:24 - INFO - codeparrot_training - Step 2373: {'lr': 0.0004999921638683731, 'samples': 1215488, 'steps': 2373, 'loss/train': 2.8287227153778076} +03/03/2022 16:12:24 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/03/2022 16:12:29 - INFO - codeparrot_training - Step 2374: {'lr': 0.0004999921217954797, 'samples': 1216000, 'steps': 2374, 'loss/train': 3.5401194095611572} +03/03/2022 16:12:32 - INFO - codeparrot_training - Step 2375: {'lr': 0.0004999920796099437, 'samples': 1216512, 'steps': 2375, 'loss/train': 2.5211355686187744} +03/03/2022 16:12:33 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/03/2022 16:12:37 - INFO - codeparrot_training - Step 2376: {'lr': 0.0004999920373117652, 'samples': 1217024, 'steps': 2376, 'loss/train': 2.916353940963745} +03/03/2022 16:12:41 - INFO - codeparrot_training - Step 2377: {'lr': 0.0004999919949009442, 'samples': 1217536, 'steps': 2377, 'loss/train': 2.8482859134674072} +03/03/2022 16:12:41 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/03/2022 16:12:46 - INFO - codeparrot_training - Step 2378: {'lr': 0.0004999919523774806, 'samples': 1218048, 'steps': 2378, 'loss/train': 3.3612005710601807} +03/03/2022 16:12:49 - INFO - codeparrot_training - Step 2379: {'lr': 0.0004999919097413743, 'samples': 1218560, 'steps': 2379, 'loss/train': 4.168346881866455} +03/03/2022 16:12:50 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/03/2022 16:12:54 - INFO - codeparrot_training - Step 2380: {'lr': 0.0004999918669926258, 'samples': 1219072, 'steps': 2380, 'loss/train': 4.019054412841797} +03/03/2022 16:12:57 - INFO - codeparrot_training - Step 2381: {'lr': 0.0004999918241312346, 'samples': 1219584, 'steps': 2381, 'loss/train': 3.4647421836853027} +03/03/2022 16:12:58 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/03/2022 16:13:03 - INFO - codeparrot_training - Step 2382: {'lr': 0.0004999917811572011, 'samples': 1220096, 'steps': 2382, 'loss/train': 3.70270037651062} +03/03/2022 16:13:06 - INFO - codeparrot_training - Step 2383: {'lr': 0.000499991738070525, 'samples': 1220608, 'steps': 2383, 'loss/train': 3.242283582687378} +03/03/2022 16:13:06 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/03/2022 16:13:11 - INFO - codeparrot_training - Step 2384: {'lr': 0.0004999916948712066, 'samples': 1221120, 'steps': 2384, 'loss/train': 4.30795955657959} +03/03/2022 16:13:14 - INFO - codeparrot_training - Step 2385: {'lr': 0.0004999916515592458, 'samples': 1221632, 'steps': 2385, 'loss/train': 3.269681692123413} +03/03/2022 16:13:15 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 16:13:20 - INFO - codeparrot_training - Step 2386: {'lr': 0.0004999916081346426, 'samples': 1222144, 'steps': 2386, 'loss/train': 3.6740543842315674} +03/03/2022 16:13:23 - INFO - codeparrot_training - Step 2387: {'lr': 0.000499991564597397, 'samples': 1222656, 'steps': 2387, 'loss/train': 2.9508981704711914} +03/03/2022 16:13:23 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/03/2022 16:13:28 - INFO - codeparrot_training - Step 2388: {'lr': 0.0004999915209475091, 'samples': 1223168, 'steps': 2388, 'loss/train': 2.972585439682007} +03/03/2022 16:13:31 - INFO - codeparrot_training - Step 2389: {'lr': 0.0004999914771849788, 'samples': 1223680, 'steps': 2389, 'loss/train': 3.5003321170806885} +03/03/2022 16:13:31 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/03/2022 16:13:36 - INFO - codeparrot_training - Step 2390: {'lr': 0.0004999914333098063, 'samples': 1224192, 'steps': 2390, 'loss/train': 2.90128231048584} +03/03/2022 16:13:39 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/03/2022 16:13:42 - INFO - codeparrot_training - Step 2391: {'lr': 0.0004999913893219915, 'samples': 1224704, 'steps': 2391, 'loss/train': 2.4618241786956787} +03/03/2022 16:13:45 - INFO - codeparrot_training - Step 2392: {'lr': 0.0004999913452215345, 'samples': 1225216, 'steps': 2392, 'loss/train': 2.814356565475464} +03/03/2022 16:13:48 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 16:13:50 - INFO - codeparrot_training - Step 2393: {'lr': 0.0004999913010084351, 'samples': 1225728, 'steps': 2393, 'loss/train': 3.2441394329071045} +03/03/2022 16:13:53 - INFO - codeparrot_training - Step 2394: {'lr': 0.0004999912566826935, 'samples': 1226240, 'steps': 2394, 'loss/train': 3.7821013927459717} +03/03/2022 16:13:56 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 16:13:58 - INFO - codeparrot_training - Step 2395: {'lr': 0.0004999912122443098, 'samples': 1226752, 'steps': 2395, 'loss/train': 2.7253780364990234} +03/03/2022 16:14:01 - INFO - codeparrot_training - Step 2396: {'lr': 0.0004999911676932838, 'samples': 1227264, 'steps': 2396, 'loss/train': 2.9002113342285156} +03/03/2022 16:14:04 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/03/2022 16:14:07 - INFO - codeparrot_training - Step 2397: {'lr': 0.0004999911230296158, 'samples': 1227776, 'steps': 2397, 'loss/train': 3.4079055786132812} +03/03/2022 16:14:10 - INFO - codeparrot_training - Step 2398: {'lr': 0.0004999910782533055, 'samples': 1228288, 'steps': 2398, 'loss/train': 3.3085381984710693} +03/03/2022 16:14:12 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/03/2022 16:14:15 - INFO - codeparrot_training - Step 2399: {'lr': 0.0004999910333643531, 'samples': 1228800, 'steps': 2399, 'loss/train': 3.7810471057891846} +03/03/2022 16:14:18 - INFO - codeparrot_training - Step 2400: {'lr': 0.0004999909883627587, 'samples': 1229312, 'steps': 2400, 'loss/train': 2.359142780303955} +03/03/2022 16:14:20 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/03/2022 16:14:24 - INFO - codeparrot_training - Step 2401: {'lr': 0.0004999909432485221, 'samples': 1229824, 'steps': 2401, 'loss/train': 2.2719218730926514} +03/03/2022 16:14:27 - INFO - codeparrot_training - Step 2402: {'lr': 0.0004999908980216436, 'samples': 1230336, 'steps': 2402, 'loss/train': 2.815001964569092} +03/03/2022 16:14:29 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/03/2022 16:14:32 - INFO - codeparrot_training - Step 2403: {'lr': 0.0004999908526821229, 'samples': 1230848, 'steps': 2403, 'loss/train': 3.1026618480682373} +03/03/2022 16:14:35 - INFO - codeparrot_training - Step 2404: {'lr': 0.0004999908072299602, 'samples': 1231360, 'steps': 2404, 'loss/train': 2.900294542312622} +03/03/2022 16:14:37 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/03/2022 16:14:40 - INFO - codeparrot_training - Step 2405: {'lr': 0.0004999907616651556, 'samples': 1231872, 'steps': 2405, 'loss/train': 2.3728840351104736} +03/03/2022 16:14:43 - INFO - codeparrot_training - Step 2406: {'lr': 0.000499990715987709, 'samples': 1232384, 'steps': 2406, 'loss/train': 3.0262739658355713} +03/03/2022 16:14:46 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/03/2022 16:14:49 - INFO - codeparrot_training - Step 2407: {'lr': 0.0004999906701976203, 'samples': 1232896, 'steps': 2407, 'loss/train': 2.347027063369751} +03/03/2022 16:14:52 - INFO - codeparrot_training - Step 2408: {'lr': 0.0004999906242948898, 'samples': 1233408, 'steps': 2408, 'loss/train': 2.5089776515960693} +03/03/2022 16:14:54 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/03/2022 16:14:57 - INFO - codeparrot_training - Step 2409: {'lr': 0.0004999905782795173, 'samples': 1233920, 'steps': 2409, 'loss/train': 3.1692869663238525} +03/03/2022 16:15:00 - INFO - codeparrot_training - Step 2410: {'lr': 0.000499990532151503, 'samples': 1234432, 'steps': 2410, 'loss/train': 3.394524335861206} +03/03/2022 16:15:02 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/03/2022 16:15:06 - INFO - codeparrot_training - Step 2411: {'lr': 0.0004999904859108467, 'samples': 1234944, 'steps': 2411, 'loss/train': 3.1552481651306152} +03/03/2022 16:15:09 - INFO - codeparrot_training - Step 2412: {'lr': 0.0004999904395575486, 'samples': 1235456, 'steps': 2412, 'loss/train': 3.102290630340576} +03/03/2022 16:15:11 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/03/2022 16:15:14 - INFO - codeparrot_training - Step 2413: {'lr': 0.0004999903930916087, 'samples': 1235968, 'steps': 2413, 'loss/train': 3.4390149116516113} +03/03/2022 16:15:17 - INFO - codeparrot_training - Step 2414: {'lr': 0.000499990346513027, 'samples': 1236480, 'steps': 2414, 'loss/train': 2.2866628170013428} +03/03/2022 16:15:19 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/03/2022 16:15:22 - INFO - codeparrot_training - Step 2415: {'lr': 0.0004999902998218034, 'samples': 1236992, 'steps': 2415, 'loss/train': 3.4210660457611084} +03/03/2022 16:15:26 - INFO - codeparrot_training - Step 2416: {'lr': 0.000499990253017938, 'samples': 1237504, 'steps': 2416, 'loss/train': 1.953756332397461} +03/03/2022 16:15:27 - INFO - codeparrot_training - Skipping example with length 370 (seq_length=1024) +03/03/2022 16:15:31 - INFO - codeparrot_training - Step 2417: {'lr': 0.0004999902061014311, 'samples': 1238016, 'steps': 2417, 'loss/train': 2.254668712615967} +03/03/2022 16:15:34 - INFO - codeparrot_training - Step 2418: {'lr': 0.0004999901590722823, 'samples': 1238528, 'steps': 2418, 'loss/train': 3.59061336517334} +03/03/2022 16:15:35 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/03/2022 16:15:39 - INFO - codeparrot_training - Step 2419: {'lr': 0.0004999901119304919, 'samples': 1239040, 'steps': 2419, 'loss/train': 2.7408499717712402} +03/03/2022 16:15:42 - INFO - codeparrot_training - Step 2420: {'lr': 0.0004999900646760597, 'samples': 1239552, 'steps': 2420, 'loss/train': 3.4129180908203125} +03/03/2022 16:15:44 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/03/2022 16:15:47 - INFO - codeparrot_training - Step 2421: {'lr': 0.0004999900173089858, 'samples': 1240064, 'steps': 2421, 'loss/train': 3.2427544593811035} +03/03/2022 16:15:51 - INFO - codeparrot_training - Step 2422: {'lr': 0.0004999899698292703, 'samples': 1240576, 'steps': 2422, 'loss/train': 3.2978503704071045} +03/03/2022 16:15:52 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/03/2022 16:15:56 - INFO - codeparrot_training - Step 2423: {'lr': 0.0004999899222369132, 'samples': 1241088, 'steps': 2423, 'loss/train': 3.648098945617676} +03/03/2022 16:15:59 - INFO - codeparrot_training - Step 2424: {'lr': 0.0004999898745319145, 'samples': 1241600, 'steps': 2424, 'loss/train': 2.8868560791015625} +03/03/2022 16:16:00 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/03/2022 16:16:04 - INFO - codeparrot_training - Step 2425: {'lr': 0.0004999898267142741, 'samples': 1242112, 'steps': 2425, 'loss/train': 3.0442099571228027} +03/03/2022 16:16:08 - INFO - codeparrot_training - Step 2426: {'lr': 0.0004999897787839923, 'samples': 1242624, 'steps': 2426, 'loss/train': 2.8172223567962646} +03/03/2022 16:16:08 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/03/2022 16:16:13 - INFO - codeparrot_training - Step 2427: {'lr': 0.000499989730741069, 'samples': 1243136, 'steps': 2427, 'loss/train': 2.9016196727752686} +03/03/2022 16:16:16 - INFO - codeparrot_training - Step 2428: {'lr': 0.000499989682585504, 'samples': 1243648, 'steps': 2428, 'loss/train': 2.6202454566955566} +03/03/2022 16:16:17 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/03/2022 16:16:21 - INFO - codeparrot_training - Step 2429: {'lr': 0.0004999896343172976, 'samples': 1244160, 'steps': 2429, 'loss/train': 3.309250593185425} +03/03/2022 16:16:24 - INFO - codeparrot_training - Step 2430: {'lr': 0.0004999895859364498, 'samples': 1244672, 'steps': 2430, 'loss/train': 3.224417209625244} +03/03/2022 16:16:25 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 16:16:30 - INFO - codeparrot_training - Step 2431: {'lr': 0.0004999895374429605, 'samples': 1245184, 'steps': 2431, 'loss/train': 2.3102617263793945} +03/03/2022 16:16:33 - INFO - codeparrot_training - Step 2432: {'lr': 0.0004999894888368297, 'samples': 1245696, 'steps': 2432, 'loss/train': 3.905405282974243} +03/03/2022 16:16:33 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 16:16:38 - INFO - codeparrot_training - Step 2433: {'lr': 0.0004999894401180576, 'samples': 1246208, 'steps': 2433, 'loss/train': 1.721686601638794} +03/03/2022 16:16:41 - INFO - codeparrot_training - Step 2434: {'lr': 0.0004999893912866441, 'samples': 1246720, 'steps': 2434, 'loss/train': 2.388075828552246} +03/03/2022 16:16:41 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/03/2022 16:16:46 - INFO - codeparrot_training - Step 2435: {'lr': 0.0004999893423425892, 'samples': 1247232, 'steps': 2435, 'loss/train': 2.9567904472351074} +03/03/2022 16:16:50 - INFO - codeparrot_training - Step 2436: {'lr': 0.0004999892932858929, 'samples': 1247744, 'steps': 2436, 'loss/train': 1.9273074865341187} +03/03/2022 16:16:50 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/03/2022 16:16:55 - INFO - codeparrot_training - Step 2437: {'lr': 0.0004999892441165554, 'samples': 1248256, 'steps': 2437, 'loss/train': 3.0093343257904053} +03/03/2022 16:16:58 - INFO - codeparrot_training - Step 2438: {'lr': 0.0004999891948345765, 'samples': 1248768, 'steps': 2438, 'loss/train': 2.2821545600891113} +03/03/2022 16:16:59 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/03/2022 16:17:03 - INFO - codeparrot_training - Step 2439: {'lr': 0.0004999891454399565, 'samples': 1249280, 'steps': 2439, 'loss/train': 3.1050679683685303} +03/03/2022 16:17:07 - INFO - codeparrot_training - Step 2440: {'lr': 0.000499989095932695, 'samples': 1249792, 'steps': 2440, 'loss/train': 2.4375011920928955} +03/03/2022 16:17:07 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/03/2022 16:17:12 - INFO - codeparrot_training - Step 2441: {'lr': 0.0004999890463127924, 'samples': 1250304, 'steps': 2441, 'loss/train': 2.7024149894714355} +03/03/2022 16:17:15 - INFO - codeparrot_training - Step 2442: {'lr': 0.0004999889965802486, 'samples': 1250816, 'steps': 2442, 'loss/train': 3.8853774070739746} +03/03/2022 16:17:16 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/03/2022 16:17:20 - INFO - codeparrot_training - Step 2443: {'lr': 0.0004999889467350636, 'samples': 1251328, 'steps': 2443, 'loss/train': 2.303218364715576} +03/03/2022 16:17:24 - INFO - codeparrot_training - Step 2444: {'lr': 0.0004999888967772375, 'samples': 1251840, 'steps': 2444, 'loss/train': 2.5290753841400146} +03/03/2022 16:17:24 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/03/2022 16:17:29 - INFO - codeparrot_training - Step 2445: {'lr': 0.0004999888467067702, 'samples': 1252352, 'steps': 2445, 'loss/train': 2.7982707023620605} +03/03/2022 16:17:32 - INFO - codeparrot_training - Step 2446: {'lr': 0.0004999887965236617, 'samples': 1252864, 'steps': 2446, 'loss/train': 1.5639216899871826} +03/03/2022 16:17:33 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/03/2022 16:17:37 - INFO - codeparrot_training - Step 2447: {'lr': 0.0004999887462279123, 'samples': 1253376, 'steps': 2447, 'loss/train': 3.469240188598633} +03/03/2022 16:17:40 - INFO - codeparrot_training - Step 2448: {'lr': 0.0004999886958195216, 'samples': 1253888, 'steps': 2448, 'loss/train': 2.5281832218170166} +03/03/2022 16:17:41 - INFO - codeparrot_training - Skipping example with length 589 (seq_length=1024) +03/03/2022 16:17:46 - INFO - codeparrot_training - Step 2449: {'lr': 0.00049998864529849, 'samples': 1254400, 'steps': 2449, 'loss/train': 3.104919195175171} +03/03/2022 16:17:49 - INFO - codeparrot_training - Step 2450: {'lr': 0.0004999885946648174, 'samples': 1254912, 'steps': 2450, 'loss/train': 2.9260053634643555} +03/03/2022 16:17:49 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/03/2022 16:17:54 - INFO - codeparrot_training - Step 2451: {'lr': 0.0004999885439185037, 'samples': 1255424, 'steps': 2451, 'loss/train': 2.8842074871063232} +03/03/2022 16:17:57 - INFO - codeparrot_training - Step 2452: {'lr': 0.0004999884930595491, 'samples': 1255936, 'steps': 2452, 'loss/train': 4.428753852844238} +03/03/2022 16:17:58 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/03/2022 16:18:03 - INFO - codeparrot_training - Step 2453: {'lr': 0.0004999884420879534, 'samples': 1256448, 'steps': 2453, 'loss/train': 3.476101875305176} +03/03/2022 16:18:06 - INFO - codeparrot_training - Step 2454: {'lr': 0.000499988391003717, 'samples': 1256960, 'steps': 2454, 'loss/train': 3.281400203704834} +03/03/2022 16:18:06 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/03/2022 16:18:11 - INFO - codeparrot_training - Step 2455: {'lr': 0.0004999883398068396, 'samples': 1257472, 'steps': 2455, 'loss/train': 2.3167288303375244} +03/03/2022 16:18:14 - INFO - codeparrot_training - Step 2456: {'lr': 0.0004999882884973212, 'samples': 1257984, 'steps': 2456, 'loss/train': 3.890521287918091} +03/03/2022 16:18:14 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/03/2022 16:18:19 - INFO - codeparrot_training - Step 2457: {'lr': 0.000499988237075162, 'samples': 1258496, 'steps': 2457, 'loss/train': 0.8238630294799805} +03/03/2022 16:18:23 - INFO - codeparrot_training - Step 2458: {'lr': 0.000499988185540362, 'samples': 1259008, 'steps': 2458, 'loss/train': 2.5662012100219727} +03/03/2022 16:18:23 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 16:18:28 - INFO - codeparrot_training - Step 2459: {'lr': 0.0004999881338929211, 'samples': 1259520, 'steps': 2459, 'loss/train': 2.84232497215271} +03/03/2022 16:18:31 - INFO - codeparrot_training - Step 2460: {'lr': 0.0004999880821328395, 'samples': 1260032, 'steps': 2460, 'loss/train': 3.193075656890869} +03/03/2022 16:18:31 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/03/2022 16:18:36 - INFO - codeparrot_training - Step 2461: {'lr': 0.000499988030260117, 'samples': 1260544, 'steps': 2461, 'loss/train': 3.0208098888397217} +03/03/2022 16:18:40 - INFO - codeparrot_training - Step 2462: {'lr': 0.0004999879782747539, 'samples': 1261056, 'steps': 2462, 'loss/train': 2.596449851989746} +03/03/2022 16:18:40 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/03/2022 16:18:46 - INFO - codeparrot_training - Step 2463: {'lr': 0.00049998792617675, 'samples': 1261568, 'steps': 2463, 'loss/train': 3.271831750869751} +03/03/2022 16:18:49 - INFO - codeparrot_training - Step 2464: {'lr': 0.0004999878739661053, 'samples': 1262080, 'steps': 2464, 'loss/train': 3.159120798110962} +03/03/2022 16:18:50 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 16:18:54 - INFO - codeparrot_training - Step 2465: {'lr': 0.0004999878216428201, 'samples': 1262592, 'steps': 2465, 'loss/train': 2.5441982746124268} +03/03/2022 16:18:57 - INFO - codeparrot_training - Step 2466: {'lr': 0.0004999877692068942, 'samples': 1263104, 'steps': 2466, 'loss/train': 2.4670543670654297} +03/03/2022 16:18:59 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/03/2022 16:19:02 - INFO - codeparrot_training - Step 2467: {'lr': 0.0004999877166583276, 'samples': 1263616, 'steps': 2467, 'loss/train': 3.816309928894043} +03/03/2022 16:19:06 - INFO - codeparrot_training - Step 2468: {'lr': 0.0004999876639971204, 'samples': 1264128, 'steps': 2468, 'loss/train': 2.9453892707824707} +03/03/2022 16:19:07 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 16:19:11 - INFO - codeparrot_training - Step 2469: {'lr': 0.0004999876112232726, 'samples': 1264640, 'steps': 2469, 'loss/train': 3.4072937965393066} +03/03/2022 16:19:14 - INFO - codeparrot_training - Step 2470: {'lr': 0.0004999875583367844, 'samples': 1265152, 'steps': 2470, 'loss/train': 2.753385543823242} +03/03/2022 16:19:15 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/03/2022 16:19:19 - INFO - codeparrot_training - Step 2471: {'lr': 0.0004999875053376555, 'samples': 1265664, 'steps': 2471, 'loss/train': 3.0363874435424805} +03/03/2022 16:19:22 - INFO - codeparrot_training - Step 2472: {'lr': 0.0004999874522258861, 'samples': 1266176, 'steps': 2472, 'loss/train': 2.5519156455993652} +03/03/2022 16:19:23 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 16:19:28 - INFO - codeparrot_training - Step 2473: {'lr': 0.0004999873990014763, 'samples': 1266688, 'steps': 2473, 'loss/train': 3.166799545288086} +03/03/2022 16:19:31 - INFO - codeparrot_training - Step 2474: {'lr': 0.0004999873456644259, 'samples': 1267200, 'steps': 2474, 'loss/train': 2.9901912212371826} +03/03/2022 16:19:32 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/03/2022 16:19:37 - INFO - codeparrot_training - Step 2475: {'lr': 0.0004999872922147352, 'samples': 1267712, 'steps': 2475, 'loss/train': 2.8614206314086914} +03/03/2022 16:19:40 - INFO - codeparrot_training - Step 2476: {'lr': 0.0004999872386524041, 'samples': 1268224, 'steps': 2476, 'loss/train': 2.3810527324676514} +03/03/2022 16:19:43 - INFO - codeparrot_training - Step 2477: {'lr': 0.0004999871849774325, 'samples': 1268736, 'steps': 2477, 'loss/train': 3.614152669906616} +03/03/2022 16:19:43 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/03/2022 16:19:48 - INFO - codeparrot_training - Step 2478: {'lr': 0.0004999871311898205, 'samples': 1269248, 'steps': 2478, 'loss/train': 3.7695107460021973} +03/03/2022 16:19:51 - INFO - codeparrot_training - Step 2479: {'lr': 0.0004999870772895683, 'samples': 1269760, 'steps': 2479, 'loss/train': 2.80676007270813} +03/03/2022 16:19:51 - INFO - codeparrot_training - Skipping example with length 255 (seq_length=1024) +03/03/2022 16:19:57 - INFO - codeparrot_training - Step 2480: {'lr': 0.0004999870232766756, 'samples': 1270272, 'steps': 2480, 'loss/train': 2.938058376312256} +03/03/2022 16:20:00 - INFO - codeparrot_training - Step 2481: {'lr': 0.0004999869691511428, 'samples': 1270784, 'steps': 2481, 'loss/train': 3.4740536212921143} +03/03/2022 16:20:00 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 16:20:05 - INFO - codeparrot_training - Step 2482: {'lr': 0.0004999869149129696, 'samples': 1271296, 'steps': 2482, 'loss/train': 3.245323657989502} +03/03/2022 16:20:08 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 16:20:10 - INFO - codeparrot_training - Step 2483: {'lr': 0.0004999868605621563, 'samples': 1271808, 'steps': 2483, 'loss/train': 3.025552749633789} +03/03/2022 16:20:13 - INFO - codeparrot_training - Step 2484: {'lr': 0.0004999868060987027, 'samples': 1272320, 'steps': 2484, 'loss/train': 3.455415725708008} +03/03/2022 16:20:17 - INFO - codeparrot_training - Step 2485: {'lr': 0.0004999867515226088, 'samples': 1272832, 'steps': 2485, 'loss/train': 2.180940866470337} +03/03/2022 16:20:17 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/03/2022 16:20:22 - INFO - codeparrot_training - Step 2486: {'lr': 0.0004999866968338748, 'samples': 1273344, 'steps': 2486, 'loss/train': 0.6444083452224731} +03/03/2022 16:20:25 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/03/2022 16:20:27 - INFO - codeparrot_training - Step 2487: {'lr': 0.0004999866420325006, 'samples': 1273856, 'steps': 2487, 'loss/train': 3.20237135887146} +03/03/2022 16:20:30 - INFO - codeparrot_training - Step 2488: {'lr': 0.0004999865871184863, 'samples': 1274368, 'steps': 2488, 'loss/train': 4.782764911651611} +03/03/2022 16:20:33 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/03/2022 16:20:36 - INFO - codeparrot_training - Step 2489: {'lr': 0.000499986532091832, 'samples': 1274880, 'steps': 2489, 'loss/train': 2.8954098224639893} +03/03/2022 16:20:39 - INFO - codeparrot_training - Step 2490: {'lr': 0.0004999864769525375, 'samples': 1275392, 'steps': 2490, 'loss/train': 1.8068829774856567} +03/03/2022 16:20:42 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/03/2022 16:20:44 - INFO - codeparrot_training - Step 2491: {'lr': 0.000499986421700603, 'samples': 1275904, 'steps': 2491, 'loss/train': 2.3099799156188965} +03/03/2022 16:20:47 - INFO - codeparrot_training - Step 2492: {'lr': 0.0004999863663360285, 'samples': 1276416, 'steps': 2492, 'loss/train': 3.421651840209961} +03/03/2022 16:20:50 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/03/2022 16:20:53 - INFO - codeparrot_training - Step 2493: {'lr': 0.000499986310858814, 'samples': 1276928, 'steps': 2493, 'loss/train': 3.2742176055908203} +03/03/2022 16:20:56 - INFO - codeparrot_training - Step 2494: {'lr': 0.0004999862552689595, 'samples': 1277440, 'steps': 2494, 'loss/train': 2.8373236656188965} +03/03/2022 16:20:58 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/03/2022 16:21:01 - INFO - codeparrot_training - Step 2495: {'lr': 0.000499986199566465, 'samples': 1277952, 'steps': 2495, 'loss/train': 2.910813093185425} +03/03/2022 16:21:04 - INFO - codeparrot_training - Step 2496: {'lr': 0.0004999861437513306, 'samples': 1278464, 'steps': 2496, 'loss/train': 3.323831558227539} +03/03/2022 16:21:06 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/03/2022 16:21:09 - INFO - codeparrot_training - Step 2497: {'lr': 0.0004999860878235564, 'samples': 1278976, 'steps': 2497, 'loss/train': 2.974306583404541} +03/03/2022 16:21:13 - INFO - codeparrot_training - Step 2498: {'lr': 0.0004999860317831423, 'samples': 1279488, 'steps': 2498, 'loss/train': 3.1286723613739014} +03/03/2022 16:21:14 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/03/2022 16:21:18 - INFO - codeparrot_training - Step 2499: {'lr': 0.0004999859756300883, 'samples': 1280000, 'steps': 2499, 'loss/train': 3.0235705375671387} +03/03/2022 16:21:21 - INFO - codeparrot_training - Step 2500: {'lr': 0.0004999859193643945, 'samples': 1280512, 'steps': 2500, 'loss/train': 3.4169411659240723} +03/03/2022 16:21:23 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/03/2022 16:21:26 - INFO - codeparrot_training - Step 2501: {'lr': 0.0004999858629860609, 'samples': 1281024, 'steps': 2501, 'loss/train': 2.4314191341400146} +03/03/2022 16:21:29 - INFO - codeparrot_training - Step 2502: {'lr': 0.0004999858064950875, 'samples': 1281536, 'steps': 2502, 'loss/train': 2.833728313446045} +03/03/2022 16:21:31 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/03/2022 16:21:35 - INFO - codeparrot_training - Step 2503: {'lr': 0.0004999857498914744, 'samples': 1282048, 'steps': 2503, 'loss/train': 3.0542643070220947} +03/03/2022 16:21:38 - INFO - codeparrot_training - Step 2504: {'lr': 0.0004999856931752215, 'samples': 1282560, 'steps': 2504, 'loss/train': 4.078123569488525} +03/03/2022 16:21:40 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/03/2022 16:21:43 - INFO - codeparrot_training - Step 2505: {'lr': 0.000499985636346329, 'samples': 1283072, 'steps': 2505, 'loss/train': 0.897447407245636} +03/03/2022 16:21:46 - INFO - codeparrot_training - Step 2506: {'lr': 0.0004999855794047968, 'samples': 1283584, 'steps': 2506, 'loss/train': 3.264345645904541} +03/03/2022 16:21:48 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/03/2022 16:21:52 - INFO - codeparrot_training - Step 2507: {'lr': 0.000499985522350625, 'samples': 1284096, 'steps': 2507, 'loss/train': 2.348832130432129} +03/03/2022 16:21:55 - INFO - codeparrot_training - Step 2508: {'lr': 0.0004999854651838134, 'samples': 1284608, 'steps': 2508, 'loss/train': 3.6561119556427} +03/03/2022 16:21:57 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/03/2022 16:22:00 - INFO - codeparrot_training - Step 2509: {'lr': 0.0004999854079043624, 'samples': 1285120, 'steps': 2509, 'loss/train': 4.345146179199219} +03/03/2022 16:22:03 - INFO - codeparrot_training - Step 2510: {'lr': 0.0004999853505122718, 'samples': 1285632, 'steps': 2510, 'loss/train': 2.546726703643799} +03/03/2022 16:22:05 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/03/2022 16:22:08 - INFO - codeparrot_training - Step 2511: {'lr': 0.0004999852930075416, 'samples': 1286144, 'steps': 2511, 'loss/train': 2.6213409900665283} +03/03/2022 16:22:12 - INFO - codeparrot_training - Step 2512: {'lr': 0.0004999852353901719, 'samples': 1286656, 'steps': 2512, 'loss/train': 3.1558749675750732} +03/03/2022 16:22:13 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/03/2022 16:22:17 - INFO - codeparrot_training - Step 2513: {'lr': 0.0004999851776601627, 'samples': 1287168, 'steps': 2513, 'loss/train': 3.087672233581543} +03/03/2022 16:22:20 - INFO - codeparrot_training - Step 2514: {'lr': 0.0004999851198175141, 'samples': 1287680, 'steps': 2514, 'loss/train': 3.7087156772613525} +03/03/2022 16:22:22 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/03/2022 16:22:25 - INFO - codeparrot_training - Step 2515: {'lr': 0.0004999850618622259, 'samples': 1288192, 'steps': 2515, 'loss/train': 0.6918866038322449} +03/03/2022 16:22:28 - INFO - codeparrot_training - Step 2516: {'lr': 0.0004999850037942984, 'samples': 1288704, 'steps': 2516, 'loss/train': 6.471134662628174} +03/03/2022 16:22:30 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/03/2022 16:22:34 - INFO - codeparrot_training - Step 2517: {'lr': 0.0004999849456137316, 'samples': 1289216, 'steps': 2517, 'loss/train': 3.3622610569000244} +03/03/2022 16:22:37 - INFO - codeparrot_training - Step 2518: {'lr': 0.0004999848873205254, 'samples': 1289728, 'steps': 2518, 'loss/train': 3.737034559249878} +03/03/2022 16:22:38 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/03/2022 16:22:42 - INFO - codeparrot_training - Step 2519: {'lr': 0.0004999848289146798, 'samples': 1290240, 'steps': 2519, 'loss/train': 2.79614520072937} +03/03/2022 16:22:45 - INFO - codeparrot_training - Step 2520: {'lr': 0.0004999847703961948, 'samples': 1290752, 'steps': 2520, 'loss/train': 1.873476505279541} +03/03/2022 16:22:47 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/03/2022 16:22:50 - INFO - codeparrot_training - Step 2521: {'lr': 0.0004999847117650708, 'samples': 1291264, 'steps': 2521, 'loss/train': 2.7374868392944336} +03/03/2022 16:22:54 - INFO - codeparrot_training - Step 2522: {'lr': 0.0004999846530213074, 'samples': 1291776, 'steps': 2522, 'loss/train': 2.8232455253601074} +03/03/2022 16:22:55 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/03/2022 16:22:59 - INFO - codeparrot_training - Step 2523: {'lr': 0.0004999845941649048, 'samples': 1292288, 'steps': 2523, 'loss/train': 2.8543264865875244} +03/03/2022 16:23:02 - INFO - codeparrot_training - Step 2524: {'lr': 0.0004999845351958629, 'samples': 1292800, 'steps': 2524, 'loss/train': 2.803300619125366} +03/03/2022 16:23:03 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/03/2022 16:23:07 - INFO - codeparrot_training - Step 2525: {'lr': 0.0004999844761141818, 'samples': 1293312, 'steps': 2525, 'loss/train': 3.112408399581909} +03/03/2022 16:23:11 - INFO - codeparrot_training - Step 2526: {'lr': 0.0004999844169198617, 'samples': 1293824, 'steps': 2526, 'loss/train': 2.4133479595184326} +03/03/2022 16:23:12 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/03/2022 16:23:16 - INFO - codeparrot_training - Step 2527: {'lr': 0.0004999843576129024, 'samples': 1294336, 'steps': 2527, 'loss/train': 3.1589486598968506} +03/03/2022 16:23:19 - INFO - codeparrot_training - Step 2528: {'lr': 0.000499984298193304, 'samples': 1294848, 'steps': 2528, 'loss/train': 2.494608163833618} +03/03/2022 16:23:20 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 16:23:24 - INFO - codeparrot_training - Step 2529: {'lr': 0.0004999842386610666, 'samples': 1295360, 'steps': 2529, 'loss/train': 3.9454216957092285} +03/03/2022 16:23:28 - INFO - codeparrot_training - Step 2530: {'lr': 0.0004999841790161901, 'samples': 1295872, 'steps': 2530, 'loss/train': 2.254082441329956} +03/03/2022 16:23:29 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/03/2022 16:23:33 - INFO - codeparrot_training - Step 2531: {'lr': 0.0004999841192586746, 'samples': 1296384, 'steps': 2531, 'loss/train': 4.019567012786865} +03/03/2022 16:23:36 - INFO - codeparrot_training - Step 2532: {'lr': 0.0004999840593885201, 'samples': 1296896, 'steps': 2532, 'loss/train': 3.2863903045654297} +03/03/2022 16:23:37 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/03/2022 16:23:41 - INFO - codeparrot_training - Step 2533: {'lr': 0.0004999839994057266, 'samples': 1297408, 'steps': 2533, 'loss/train': 1.4264503717422485} +03/03/2022 16:23:45 - INFO - codeparrot_training - Step 2534: {'lr': 0.0004999839393102943, 'samples': 1297920, 'steps': 2534, 'loss/train': 3.4209253787994385} +03/03/2022 16:23:46 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/03/2022 16:23:50 - INFO - codeparrot_training - Step 2535: {'lr': 0.0004999838791022229, 'samples': 1298432, 'steps': 2535, 'loss/train': 3.1111221313476562} +03/03/2022 16:23:53 - INFO - codeparrot_training - Step 2536: {'lr': 0.0004999838187815128, 'samples': 1298944, 'steps': 2536, 'loss/train': 3.1084094047546387} +03/03/2022 16:23:54 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/03/2022 16:23:58 - INFO - codeparrot_training - Step 2537: {'lr': 0.0004999837583481638, 'samples': 1299456, 'steps': 2537, 'loss/train': 3.6611745357513428} +03/03/2022 16:24:01 - INFO - codeparrot_training - Step 2538: {'lr': 0.000499983697802176, 'samples': 1299968, 'steps': 2538, 'loss/train': 3.2353146076202393} +03/03/2022 16:24:03 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/03/2022 16:24:07 - INFO - codeparrot_training - Step 2539: {'lr': 0.0004999836371435494, 'samples': 1300480, 'steps': 2539, 'loss/train': 3.1310691833496094} +03/03/2022 16:24:10 - INFO - codeparrot_training - Step 2540: {'lr': 0.000499983576372284, 'samples': 1300992, 'steps': 2540, 'loss/train': 3.1072871685028076} +03/03/2022 16:24:11 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 16:24:15 - INFO - codeparrot_training - Step 2541: {'lr': 0.0004999835154883798, 'samples': 1301504, 'steps': 2541, 'loss/train': 3.006399631500244} +03/03/2022 16:24:18 - INFO - codeparrot_training - Step 2542: {'lr': 0.0004999834544918369, 'samples': 1302016, 'steps': 2542, 'loss/train': 3.5297250747680664} +03/03/2022 16:24:19 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/03/2022 16:24:23 - INFO - codeparrot_training - Step 2543: {'lr': 0.0004999833933826554, 'samples': 1302528, 'steps': 2543, 'loss/train': 3.0366342067718506} +03/03/2022 16:24:27 - INFO - codeparrot_training - Step 2544: {'lr': 0.0004999833321608351, 'samples': 1303040, 'steps': 2544, 'loss/train': 3.1857190132141113} +03/03/2022 16:24:27 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/03/2022 16:24:32 - INFO - codeparrot_training - Step 2545: {'lr': 0.0004999832708263764, 'samples': 1303552, 'steps': 2545, 'loss/train': 3.755324363708496} +03/03/2022 16:24:35 - INFO - codeparrot_training - Step 2546: {'lr': 0.000499983209379279, 'samples': 1304064, 'steps': 2546, 'loss/train': 3.7012033462524414} +03/03/2022 16:24:36 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/03/2022 16:24:40 - INFO - codeparrot_training - Step 2547: {'lr': 0.0004999831478195429, 'samples': 1304576, 'steps': 2547, 'loss/train': 3.20994234085083} +03/03/2022 16:24:43 - INFO - codeparrot_training - Step 2548: {'lr': 0.0004999830861471684, 'samples': 1305088, 'steps': 2548, 'loss/train': 0.6826435923576355} +03/03/2022 16:24:44 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 16:24:49 - INFO - codeparrot_training - Step 2549: {'lr': 0.0004999830243621553, 'samples': 1305600, 'steps': 2549, 'loss/train': 2.4455618858337402} +03/03/2022 16:24:52 - INFO - codeparrot_training - Step 2550: {'lr': 0.0004999829624645037, 'samples': 1306112, 'steps': 2550, 'loss/train': 2.742499589920044} +03/03/2022 16:24:52 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/03/2022 16:24:57 - INFO - codeparrot_training - Step 2551: {'lr': 0.0004999829004542136, 'samples': 1306624, 'steps': 2551, 'loss/train': 3.267723321914673} +03/03/2022 16:25:00 - INFO - codeparrot_training - Step 2552: {'lr': 0.0004999828383312851, 'samples': 1307136, 'steps': 2552, 'loss/train': 3.2674925327301025} +03/03/2022 16:25:01 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/03/2022 16:25:06 - INFO - codeparrot_training - Step 2553: {'lr': 0.0004999827760957182, 'samples': 1307648, 'steps': 2553, 'loss/train': 0.8292660117149353} +03/03/2022 16:25:09 - INFO - codeparrot_training - Step 2554: {'lr': 0.000499982713747513, 'samples': 1308160, 'steps': 2554, 'loss/train': 3.120907783508301} +03/03/2022 16:25:09 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/03/2022 16:25:14 - INFO - codeparrot_training - Step 2555: {'lr': 0.0004999826512866693, 'samples': 1308672, 'steps': 2555, 'loss/train': 3.702171564102173} +03/03/2022 16:25:17 - INFO - codeparrot_training - Step 2556: {'lr': 0.0004999825887131874, 'samples': 1309184, 'steps': 2556, 'loss/train': 3.663010597229004} +03/03/2022 16:25:18 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/03/2022 16:25:23 - INFO - codeparrot_training - Step 2557: {'lr': 0.0004999825260270671, 'samples': 1309696, 'steps': 2557, 'loss/train': 3.624185562133789} +03/03/2022 16:25:26 - INFO - codeparrot_training - Step 2558: {'lr': 0.0004999824632283086, 'samples': 1310208, 'steps': 2558, 'loss/train': 2.0385353565216064} +03/03/2022 16:25:26 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/03/2022 16:25:31 - INFO - codeparrot_training - Step 2559: {'lr': 0.0004999824003169119, 'samples': 1310720, 'steps': 2559, 'loss/train': 1.7916879653930664} +03/03/2022 16:25:34 - INFO - codeparrot_training - Step 2560: {'lr': 0.000499982337292877, 'samples': 1311232, 'steps': 2560, 'loss/train': 1.947210431098938} +03/03/2022 16:25:34 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/03/2022 16:25:39 - INFO - codeparrot_training - Step 2561: {'lr': 0.0004999822741562038, 'samples': 1311744, 'steps': 2561, 'loss/train': 2.9803450107574463} +03/03/2022 16:25:43 - INFO - codeparrot_training - Step 2562: {'lr': 0.0004999822109068925, 'samples': 1312256, 'steps': 2562, 'loss/train': 1.7667006254196167} +03/03/2022 16:25:43 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/03/2022 16:25:48 - INFO - codeparrot_training - Step 2563: {'lr': 0.000499982147544943, 'samples': 1312768, 'steps': 2563, 'loss/train': 3.544752836227417} +03/03/2022 16:25:51 - INFO - codeparrot_training - Step 2564: {'lr': 0.0004999820840703554, 'samples': 1313280, 'steps': 2564, 'loss/train': 2.7020463943481445} +03/03/2022 16:25:51 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/03/2022 16:25:56 - INFO - codeparrot_training - Step 2565: {'lr': 0.0004999820204831298, 'samples': 1313792, 'steps': 2565, 'loss/train': 2.3768131732940674} +03/03/2022 16:25:59 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/03/2022 16:26:01 - INFO - codeparrot_training - Step 2566: {'lr': 0.0004999819567832661, 'samples': 1314304, 'steps': 2566, 'loss/train': 3.2334578037261963} +03/03/2022 16:26:05 - INFO - codeparrot_training - Step 2567: {'lr': 0.0004999818929707645, 'samples': 1314816, 'steps': 2567, 'loss/train': 3.352979898452759} +03/03/2022 16:26:07 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 16:26:10 - INFO - codeparrot_training - Step 2568: {'lr': 0.0004999818290456249, 'samples': 1315328, 'steps': 2568, 'loss/train': 2.679513931274414} +03/03/2022 16:26:13 - INFO - codeparrot_training - Step 2569: {'lr': 0.0004999817650078474, 'samples': 1315840, 'steps': 2569, 'loss/train': 2.85469126701355} +03/03/2022 16:26:15 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 16:26:18 - INFO - codeparrot_training - Step 2570: {'lr': 0.0004999817008574318, 'samples': 1316352, 'steps': 2570, 'loss/train': 3.305773973464966} +03/03/2022 16:26:21 - INFO - codeparrot_training - Step 2571: {'lr': 0.0004999816365943784, 'samples': 1316864, 'steps': 2571, 'loss/train': 2.1476075649261475} +03/03/2022 16:26:24 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/03/2022 16:26:27 - INFO - codeparrot_training - Step 2572: {'lr': 0.000499981572218687, 'samples': 1317376, 'steps': 2572, 'loss/train': 2.854360342025757} +03/03/2022 16:26:30 - INFO - codeparrot_training - Step 2573: {'lr': 0.0004999815077303579, 'samples': 1317888, 'steps': 2573, 'loss/train': 2.993514060974121} +03/03/2022 16:26:32 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/03/2022 16:26:35 - INFO - codeparrot_training - Step 2574: {'lr': 0.000499981443129391, 'samples': 1318400, 'steps': 2574, 'loss/train': 3.482952356338501} +03/03/2022 16:26:38 - INFO - codeparrot_training - Step 2575: {'lr': 0.0004999813784157863, 'samples': 1318912, 'steps': 2575, 'loss/train': 2.784579277038574} +03/03/2022 16:26:40 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/03/2022 16:26:43 - INFO - codeparrot_training - Step 2576: {'lr': 0.0004999813135895438, 'samples': 1319424, 'steps': 2576, 'loss/train': 2.7337288856506348} +03/03/2022 16:26:47 - INFO - codeparrot_training - Step 2577: {'lr': 0.0004999812486506637, 'samples': 1319936, 'steps': 2577, 'loss/train': 3.3158535957336426} +03/03/2022 16:26:49 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/03/2022 16:26:52 - INFO - codeparrot_training - Step 2578: {'lr': 0.0004999811835991457, 'samples': 1320448, 'steps': 2578, 'loss/train': 0.747119128704071} +03/03/2022 16:26:55 - INFO - codeparrot_training - Step 2579: {'lr': 0.0004999811184349902, 'samples': 1320960, 'steps': 2579, 'loss/train': 3.4974818229675293} +03/03/2022 16:26:57 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/03/2022 16:27:00 - INFO - codeparrot_training - Step 2580: {'lr': 0.000499981053158197, 'samples': 1321472, 'steps': 2580, 'loss/train': 2.9436874389648438} +03/03/2022 16:27:03 - INFO - codeparrot_training - Step 2581: {'lr': 0.0004999809877687662, 'samples': 1321984, 'steps': 2581, 'loss/train': 3.299527168273926} +03/03/2022 16:27:06 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/03/2022 16:27:09 - INFO - codeparrot_training - Step 2582: {'lr': 0.0004999809222666978, 'samples': 1322496, 'steps': 2582, 'loss/train': 3.9506969451904297} +03/03/2022 16:27:12 - INFO - codeparrot_training - Step 2583: {'lr': 0.0004999808566519919, 'samples': 1323008, 'steps': 2583, 'loss/train': 3.3544812202453613} +03/03/2022 16:27:14 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/03/2022 16:27:17 - INFO - codeparrot_training - Step 2584: {'lr': 0.0004999807909246485, 'samples': 1323520, 'steps': 2584, 'loss/train': 3.4369499683380127} +03/03/2022 16:27:20 - INFO - codeparrot_training - Step 2585: {'lr': 0.0004999807250846676, 'samples': 1324032, 'steps': 2585, 'loss/train': 1.846390724182129} +03/03/2022 16:27:22 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/03/2022 16:27:26 - INFO - codeparrot_training - Step 2586: {'lr': 0.0004999806591320492, 'samples': 1324544, 'steps': 2586, 'loss/train': 3.332951307296753} +03/03/2022 16:27:29 - INFO - codeparrot_training - Step 2587: {'lr': 0.0004999805930667934, 'samples': 1325056, 'steps': 2587, 'loss/train': 2.301896572113037} +03/03/2022 16:27:31 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/03/2022 16:27:34 - INFO - codeparrot_training - Step 2588: {'lr': 0.0004999805268889003, 'samples': 1325568, 'steps': 2588, 'loss/train': 2.26432466506958} +03/03/2022 16:27:37 - INFO - codeparrot_training - Step 2589: {'lr': 0.0004999804605983697, 'samples': 1326080, 'steps': 2589, 'loss/train': 1.6968886852264404} +03/03/2022 16:27:39 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 16:27:42 - INFO - codeparrot_training - Step 2590: {'lr': 0.0004999803941952018, 'samples': 1326592, 'steps': 2590, 'loss/train': 3.207984447479248} +03/03/2022 16:27:46 - INFO - codeparrot_training - Step 2591: {'lr': 0.0004999803276793965, 'samples': 1327104, 'steps': 2591, 'loss/train': 3.6978676319122314} +03/03/2022 16:27:47 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/03/2022 16:27:51 - INFO - codeparrot_training - Step 2592: {'lr': 0.0004999802610509541, 'samples': 1327616, 'steps': 2592, 'loss/train': 2.540597915649414} +03/03/2022 16:27:54 - INFO - codeparrot_training - Step 2593: {'lr': 0.0004999801943098743, 'samples': 1328128, 'steps': 2593, 'loss/train': 3.3277461528778076} +03/03/2022 16:27:56 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/03/2022 16:27:59 - INFO - codeparrot_training - Step 2594: {'lr': 0.0004999801274561573, 'samples': 1328640, 'steps': 2594, 'loss/train': 3.0864200592041016} +03/03/2022 16:28:02 - INFO - codeparrot_training - Step 2595: {'lr': 0.0004999800604898032, 'samples': 1329152, 'steps': 2595, 'loss/train': 2.9830515384674072} +03/03/2022 16:28:04 - INFO - codeparrot_training - Skipping example with length 703 (seq_length=1024) +03/03/2022 16:28:08 - INFO - codeparrot_training - Step 2596: {'lr': 0.000499979993410812, 'samples': 1329664, 'steps': 2596, 'loss/train': 2.583662748336792} +03/03/2022 16:28:11 - INFO - codeparrot_training - Step 2597: {'lr': 0.0004999799262191835, 'samples': 1330176, 'steps': 2597, 'loss/train': 3.0441434383392334} +03/03/2022 16:28:12 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/03/2022 16:28:16 - INFO - codeparrot_training - Step 2598: {'lr': 0.0004999798589149179, 'samples': 1330688, 'steps': 2598, 'loss/train': 3.271484851837158} +03/03/2022 16:28:19 - INFO - codeparrot_training - Step 2599: {'lr': 0.0004999797914980154, 'samples': 1331200, 'steps': 2599, 'loss/train': 2.115464448928833} +03/03/2022 16:28:21 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 16:28:24 - INFO - codeparrot_training - Step 2600: {'lr': 0.0004999797239684757, 'samples': 1331712, 'steps': 2600, 'loss/train': 2.7303597927093506} +03/03/2022 16:28:28 - INFO - codeparrot_training - Step 2601: {'lr': 0.0004999796563262991, 'samples': 1332224, 'steps': 2601, 'loss/train': 2.560098886489868} +03/03/2022 16:28:29 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/03/2022 16:28:33 - INFO - codeparrot_training - Step 2602: {'lr': 0.0004999795885714855, 'samples': 1332736, 'steps': 2602, 'loss/train': 2.6737420558929443} +03/03/2022 16:28:36 - INFO - codeparrot_training - Step 2603: {'lr': 0.0004999795207040349, 'samples': 1333248, 'steps': 2603, 'loss/train': 4.220717430114746} +03/03/2022 16:28:37 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/03/2022 16:28:41 - INFO - codeparrot_training - Step 2604: {'lr': 0.0004999794527239474, 'samples': 1333760, 'steps': 2604, 'loss/train': 3.3827965259552} +03/03/2022 16:28:44 - INFO - codeparrot_training - Step 2605: {'lr': 0.000499979384631223, 'samples': 1334272, 'steps': 2605, 'loss/train': 3.9132180213928223} +03/03/2022 16:28:46 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/03/2022 16:28:50 - INFO - codeparrot_training - Step 2606: {'lr': 0.000499979316425862, 'samples': 1334784, 'steps': 2606, 'loss/train': 3.231592893600464} +03/03/2022 16:28:53 - INFO - codeparrot_training - Step 2607: {'lr': 0.0004999792481078639, 'samples': 1335296, 'steps': 2607, 'loss/train': 2.7241311073303223} +03/03/2022 16:28:54 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/03/2022 16:28:58 - INFO - codeparrot_training - Step 2608: {'lr': 0.000499979179677229, 'samples': 1335808, 'steps': 2608, 'loss/train': 2.4818124771118164} +03/03/2022 16:29:01 - INFO - codeparrot_training - Step 2609: {'lr': 0.0004999791111339574, 'samples': 1336320, 'steps': 2609, 'loss/train': 1.6871732473373413} +03/03/2022 16:29:02 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 16:29:06 - INFO - codeparrot_training - Step 2610: {'lr': 0.0004999790424780492, 'samples': 1336832, 'steps': 2610, 'loss/train': 2.696526050567627} +03/03/2022 16:29:10 - INFO - codeparrot_training - Step 2611: {'lr': 0.0004999789737095041, 'samples': 1337344, 'steps': 2611, 'loss/train': 2.5918705463409424} +03/03/2022 16:29:11 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 16:29:15 - INFO - codeparrot_training - Step 2612: {'lr': 0.0004999789048283224, 'samples': 1337856, 'steps': 2612, 'loss/train': 3.523407220840454} +03/03/2022 16:29:18 - INFO - codeparrot_training - Step 2613: {'lr': 0.0004999788358345041, 'samples': 1338368, 'steps': 2613, 'loss/train': 3.5299386978149414} +03/03/2022 16:29:19 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 16:29:23 - INFO - codeparrot_training - Step 2614: {'lr': 0.0004999787667280492, 'samples': 1338880, 'steps': 2614, 'loss/train': 2.904710292816162} +03/03/2022 16:29:27 - INFO - codeparrot_training - Step 2615: {'lr': 0.0004999786975089577, 'samples': 1339392, 'steps': 2615, 'loss/train': 3.1387832164764404} +03/03/2022 16:29:28 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/03/2022 16:29:32 - INFO - codeparrot_training - Step 2616: {'lr': 0.0004999786281772296, 'samples': 1339904, 'steps': 2616, 'loss/train': 2.571301221847534} +03/03/2022 16:29:35 - INFO - codeparrot_training - Step 2617: {'lr': 0.0004999785587328651, 'samples': 1340416, 'steps': 2617, 'loss/train': 3.431955337524414} +03/03/2022 16:29:37 - INFO - codeparrot_training - Skipping example with length 255 (seq_length=1024) +03/03/2022 16:29:40 - INFO - codeparrot_training - Step 2618: {'lr': 0.0004999784891758641, 'samples': 1340928, 'steps': 2618, 'loss/train': 3.766864538192749} +03/03/2022 16:29:43 - INFO - codeparrot_training - Step 2619: {'lr': 0.0004999784195062266, 'samples': 1341440, 'steps': 2619, 'loss/train': 2.716116428375244} +03/03/2022 16:29:45 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/03/2022 16:29:49 - INFO - codeparrot_training - Step 2620: {'lr': 0.0004999783497239526, 'samples': 1341952, 'steps': 2620, 'loss/train': 3.0643069744110107} +03/03/2022 16:29:52 - INFO - codeparrot_training - Step 2621: {'lr': 0.0004999782798290424, 'samples': 1342464, 'steps': 2621, 'loss/train': 3.4522390365600586} +03/03/2022 16:29:54 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/03/2022 16:29:57 - INFO - codeparrot_training - Step 2622: {'lr': 0.0004999782098214957, 'samples': 1342976, 'steps': 2622, 'loss/train': 3.551976442337036} +03/03/2022 16:30:00 - INFO - codeparrot_training - Step 2623: {'lr': 0.0004999781397013127, 'samples': 1343488, 'steps': 2623, 'loss/train': 2.9192144870758057} +03/03/2022 16:30:02 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/03/2022 16:30:06 - INFO - codeparrot_training - Step 2624: {'lr': 0.0004999780694684934, 'samples': 1344000, 'steps': 2624, 'loss/train': 2.217301607131958} +03/03/2022 16:30:09 - INFO - codeparrot_training - Step 2625: {'lr': 0.000499977999123038, 'samples': 1344512, 'steps': 2625, 'loss/train': 2.3937151432037354} +03/03/2022 16:30:11 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/03/2022 16:30:14 - INFO - codeparrot_training - Step 2626: {'lr': 0.0004999779286649461, 'samples': 1345024, 'steps': 2626, 'loss/train': 4.238019943237305} +03/03/2022 16:30:17 - INFO - codeparrot_training - Step 2627: {'lr': 0.0004999778580942183, 'samples': 1345536, 'steps': 2627, 'loss/train': 1.5297744274139404} +03/03/2022 16:30:19 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/03/2022 16:30:23 - INFO - codeparrot_training - Step 2628: {'lr': 0.000499977787410854, 'samples': 1346048, 'steps': 2628, 'loss/train': 2.5261788368225098} +03/03/2022 16:30:26 - INFO - codeparrot_training - Step 2629: {'lr': 0.0004999777166148539, 'samples': 1346560, 'steps': 2629, 'loss/train': 2.9928596019744873} +03/03/2022 16:30:28 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 16:30:31 - INFO - codeparrot_training - Step 2630: {'lr': 0.0004999776457062175, 'samples': 1347072, 'steps': 2630, 'loss/train': 2.845942258834839} +03/03/2022 16:30:34 - INFO - codeparrot_training - Step 2631: {'lr': 0.0004999775746849451, 'samples': 1347584, 'steps': 2631, 'loss/train': 3.638514518737793} +03/03/2022 16:30:36 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 16:30:39 - INFO - codeparrot_training - Step 2632: {'lr': 0.0004999775035510367, 'samples': 1348096, 'steps': 2632, 'loss/train': 3.0251049995422363} +03/03/2022 16:30:43 - INFO - codeparrot_training - Step 2633: {'lr': 0.0004999774323044922, 'samples': 1348608, 'steps': 2633, 'loss/train': 2.5025033950805664} +03/03/2022 16:30:45 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/03/2022 16:30:48 - INFO - codeparrot_training - Step 2634: {'lr': 0.0004999773609453118, 'samples': 1349120, 'steps': 2634, 'loss/train': 2.459221124649048} +03/03/2022 16:30:51 - INFO - codeparrot_training - Step 2635: {'lr': 0.0004999772894734954, 'samples': 1349632, 'steps': 2635, 'loss/train': 3.168429136276245} +03/03/2022 16:30:53 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/03/2022 16:30:56 - INFO - codeparrot_training - Step 2636: {'lr': 0.000499977217889043, 'samples': 1350144, 'steps': 2636, 'loss/train': 3.164872884750366} +03/03/2022 16:31:00 - INFO - codeparrot_training - Step 2637: {'lr': 0.0004999771461919549, 'samples': 1350656, 'steps': 2637, 'loss/train': 3.768165111541748} +03/03/2022 16:31:01 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/03/2022 16:31:05 - INFO - codeparrot_training - Step 2638: {'lr': 0.0004999770743822309, 'samples': 1351168, 'steps': 2638, 'loss/train': 1.4939980506896973} +03/03/2022 16:31:08 - INFO - codeparrot_training - Step 2639: {'lr': 0.0004999770024598711, 'samples': 1351680, 'steps': 2639, 'loss/train': 2.271815538406372} +03/03/2022 16:31:10 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/03/2022 16:31:13 - INFO - codeparrot_training - Step 2640: {'lr': 0.0004999769304248754, 'samples': 1352192, 'steps': 2640, 'loss/train': 3.0449881553649902} +03/03/2022 16:31:17 - INFO - codeparrot_training - Step 2641: {'lr': 0.0004999768582772442, 'samples': 1352704, 'steps': 2641, 'loss/train': 2.447634220123291} +03/03/2022 16:31:19 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/03/2022 16:31:22 - INFO - codeparrot_training - Step 2642: {'lr': 0.000499976786016977, 'samples': 1353216, 'steps': 2642, 'loss/train': 2.848057985305786} +03/03/2022 16:31:25 - INFO - codeparrot_training - Step 2643: {'lr': 0.0004999767136440742, 'samples': 1353728, 'steps': 2643, 'loss/train': 4.213784694671631} +03/03/2022 16:31:27 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/03/2022 16:31:30 - INFO - codeparrot_training - Step 2644: {'lr': 0.0004999766411585359, 'samples': 1354240, 'steps': 2644, 'loss/train': 3.698878288269043} +03/03/2022 16:31:33 - INFO - codeparrot_training - Step 2645: {'lr': 0.0004999765685603618, 'samples': 1354752, 'steps': 2645, 'loss/train': 3.135502815246582} +03/03/2022 16:31:35 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/03/2022 16:31:39 - INFO - codeparrot_training - Step 2646: {'lr': 0.0004999764958495522, 'samples': 1355264, 'steps': 2646, 'loss/train': 3.0132973194122314} +03/03/2022 16:31:42 - INFO - codeparrot_training - Step 2647: {'lr': 0.0004999764230261072, 'samples': 1355776, 'steps': 2647, 'loss/train': 2.5124495029449463} +03/03/2022 16:31:44 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/03/2022 16:31:47 - INFO - codeparrot_training - Step 2648: {'lr': 0.0004999763500900265, 'samples': 1356288, 'steps': 2648, 'loss/train': 3.5623955726623535} +03/03/2022 16:31:50 - INFO - codeparrot_training - Step 2649: {'lr': 0.0004999762770413103, 'samples': 1356800, 'steps': 2649, 'loss/train': 2.7302751541137695} +03/03/2022 16:31:52 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/03/2022 16:31:55 - INFO - codeparrot_training - Step 2650: {'lr': 0.0004999762038799587, 'samples': 1357312, 'steps': 2650, 'loss/train': 4.946469306945801} +03/03/2022 16:31:59 - INFO - codeparrot_training - Step 2651: {'lr': 0.0004999761306059717, 'samples': 1357824, 'steps': 2651, 'loss/train': 2.629087448120117} +03/03/2022 16:32:00 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/03/2022 16:32:04 - INFO - codeparrot_training - Step 2652: {'lr': 0.0004999760572193492, 'samples': 1358336, 'steps': 2652, 'loss/train': 2.986102819442749} +03/03/2022 16:32:07 - INFO - codeparrot_training - Step 2653: {'lr': 0.0004999759837200914, 'samples': 1358848, 'steps': 2653, 'loss/train': 3.6453475952148438} +03/03/2022 16:32:09 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/03/2022 16:32:12 - INFO - codeparrot_training - Step 2654: {'lr': 0.0004999759101081984, 'samples': 1359360, 'steps': 2654, 'loss/train': 2.356199026107788} +03/03/2022 16:32:16 - INFO - codeparrot_training - Step 2655: {'lr': 0.0004999758363836701, 'samples': 1359872, 'steps': 2655, 'loss/train': 3.1962454319000244} +03/03/2022 16:32:18 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/03/2022 16:32:21 - INFO - codeparrot_training - Step 2656: {'lr': 0.0004999757625465063, 'samples': 1360384, 'steps': 2656, 'loss/train': 3.558591604232788} +03/03/2022 16:32:24 - INFO - codeparrot_training - Step 2657: {'lr': 0.0004999756885967075, 'samples': 1360896, 'steps': 2657, 'loss/train': 3.0056636333465576} +03/03/2022 16:32:26 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/03/2022 16:32:29 - INFO - codeparrot_training - Step 2658: {'lr': 0.0004999756145342735, 'samples': 1361408, 'steps': 2658, 'loss/train': 1.05905020236969} +03/03/2022 16:32:32 - INFO - codeparrot_training - Step 2659: {'lr': 0.0004999755403592043, 'samples': 1361920, 'steps': 2659, 'loss/train': 2.734844923019409} +03/03/2022 16:32:34 - INFO - codeparrot_training - Skipping example with length 719 (seq_length=1024) +03/03/2022 16:32:38 - INFO - codeparrot_training - Step 2660: {'lr': 0.0004999754660714999, 'samples': 1362432, 'steps': 2660, 'loss/train': 3.336033821105957} +03/03/2022 16:32:41 - INFO - codeparrot_training - Step 2661: {'lr': 0.0004999753916711606, 'samples': 1362944, 'steps': 2661, 'loss/train': 2.3727469444274902} +03/03/2022 16:32:43 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/03/2022 16:32:46 - INFO - codeparrot_training - Step 2662: {'lr': 0.0004999753171581862, 'samples': 1363456, 'steps': 2662, 'loss/train': 3.810101270675659} +03/03/2022 16:32:49 - INFO - codeparrot_training - Step 2663: {'lr': 0.0004999752425325766, 'samples': 1363968, 'steps': 2663, 'loss/train': 3.6662099361419678} +03/03/2022 16:32:52 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/03/2022 16:32:55 - INFO - codeparrot_training - Step 2664: {'lr': 0.0004999751677943322, 'samples': 1364480, 'steps': 2664, 'loss/train': 2.4257819652557373} +03/03/2022 16:32:58 - INFO - codeparrot_training - Step 2665: {'lr': 0.0004999750929434527, 'samples': 1364992, 'steps': 2665, 'loss/train': 2.9750545024871826} +03/03/2022 16:33:00 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/03/2022 16:33:03 - INFO - codeparrot_training - Step 2666: {'lr': 0.0004999750179799383, 'samples': 1365504, 'steps': 2666, 'loss/train': 3.2500622272491455} +03/03/2022 16:33:06 - INFO - codeparrot_training - Step 2667: {'lr': 0.0004999749429037892, 'samples': 1366016, 'steps': 2667, 'loss/train': 2.81709885597229} +03/03/2022 16:33:08 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/03/2022 16:33:12 - INFO - codeparrot_training - Step 2668: {'lr': 0.0004999748677150051, 'samples': 1366528, 'steps': 2668, 'loss/train': 3.947962522506714} +03/03/2022 16:33:15 - INFO - codeparrot_training - Step 2669: {'lr': 0.0004999747924135862, 'samples': 1367040, 'steps': 2669, 'loss/train': 3.3503713607788086} +03/03/2022 16:33:17 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/03/2022 16:33:20 - INFO - codeparrot_training - Step 2670: {'lr': 0.0004999747169995325, 'samples': 1367552, 'steps': 2670, 'loss/train': 2.945610284805298} +03/03/2022 16:33:23 - INFO - codeparrot_training - Step 2671: {'lr': 0.0004999746414728441, 'samples': 1368064, 'steps': 2671, 'loss/train': 3.689152479171753} +03/03/2022 16:33:25 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/03/2022 16:33:29 - INFO - codeparrot_training - Step 2672: {'lr': 0.0004999745658335209, 'samples': 1368576, 'steps': 2672, 'loss/train': 3.413879871368408} +03/03/2022 16:33:32 - INFO - codeparrot_training - Step 2673: {'lr': 0.000499974490081563, 'samples': 1369088, 'steps': 2673, 'loss/train': 3.0831363201141357} +03/03/2022 16:33:34 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/03/2022 16:33:37 - INFO - codeparrot_training - Step 2674: {'lr': 0.0004999744142169707, 'samples': 1369600, 'steps': 2674, 'loss/train': 3.054553747177124} +03/03/2022 16:33:40 - INFO - codeparrot_training - Step 2675: {'lr': 0.0004999743382397435, 'samples': 1370112, 'steps': 2675, 'loss/train': 3.3434205055236816} +03/03/2022 16:33:42 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/03/2022 16:33:45 - INFO - codeparrot_training - Step 2676: {'lr': 0.0004999742621498818, 'samples': 1370624, 'steps': 2676, 'loss/train': 1.5339998006820679} +03/03/2022 16:33:49 - INFO - codeparrot_training - Step 2677: {'lr': 0.0004999741859473857, 'samples': 1371136, 'steps': 2677, 'loss/train': 3.1449472904205322} +03/03/2022 16:33:50 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 16:33:54 - INFO - codeparrot_training - Step 2678: {'lr': 0.0004999741096322549, 'samples': 1371648, 'steps': 2678, 'loss/train': 2.4214611053466797} +03/03/2022 16:33:57 - INFO - codeparrot_training - Step 2679: {'lr': 0.0004999740332044898, 'samples': 1372160, 'steps': 2679, 'loss/train': 3.1184234619140625} +03/03/2022 16:33:58 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 16:34:02 - INFO - codeparrot_training - Step 2680: {'lr': 0.0004999739566640901, 'samples': 1372672, 'steps': 2680, 'loss/train': 2.306725263595581} +03/03/2022 16:34:06 - INFO - codeparrot_training - Step 2681: {'lr': 0.000499973880011056, 'samples': 1373184, 'steps': 2681, 'loss/train': 1.848206639289856} +03/03/2022 16:34:07 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/03/2022 16:34:11 - INFO - codeparrot_training - Step 2682: {'lr': 0.0004999738032453876, 'samples': 1373696, 'steps': 2682, 'loss/train': 2.607447862625122} +03/03/2022 16:34:14 - INFO - codeparrot_training - Step 2683: {'lr': 0.0004999737263670848, 'samples': 1374208, 'steps': 2683, 'loss/train': 3.085094451904297} +03/03/2022 16:34:15 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/03/2022 16:34:19 - INFO - codeparrot_training - Step 2684: {'lr': 0.0004999736493761477, 'samples': 1374720, 'steps': 2684, 'loss/train': 2.721832275390625} +03/03/2022 16:34:22 - INFO - codeparrot_training - Step 2685: {'lr': 0.0004999735722725765, 'samples': 1375232, 'steps': 2685, 'loss/train': 2.966313600540161} +03/03/2022 16:34:23 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/03/2022 16:34:28 - INFO - codeparrot_training - Step 2686: {'lr': 0.0004999734950563709, 'samples': 1375744, 'steps': 2686, 'loss/train': 0.4267406463623047} +03/03/2022 16:34:31 - INFO - codeparrot_training - Step 2687: {'lr': 0.0004999734177275311, 'samples': 1376256, 'steps': 2687, 'loss/train': 2.8467860221862793} +03/03/2022 16:34:32 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/03/2022 16:34:36 - INFO - codeparrot_training - Step 2688: {'lr': 0.0004999733402860572, 'samples': 1376768, 'steps': 2688, 'loss/train': 2.1725759506225586} +03/03/2022 16:34:39 - INFO - codeparrot_training - Step 2689: {'lr': 0.0004999732627319491, 'samples': 1377280, 'steps': 2689, 'loss/train': 2.5292203426361084} +03/03/2022 16:34:40 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/03/2022 16:34:44 - INFO - codeparrot_training - Step 2690: {'lr': 0.000499973185065207, 'samples': 1377792, 'steps': 2690, 'loss/train': 2.9473001956939697} +03/03/2022 16:34:48 - INFO - codeparrot_training - Step 2691: {'lr': 0.0004999731072858307, 'samples': 1378304, 'steps': 2691, 'loss/train': 2.802412986755371} +03/03/2022 16:34:49 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/03/2022 16:34:53 - INFO - codeparrot_training - Step 2692: {'lr': 0.0004999730293938205, 'samples': 1378816, 'steps': 2692, 'loss/train': 2.6749649047851562} +03/03/2022 16:34:56 - INFO - codeparrot_training - Step 2693: {'lr': 0.0004999729513891762, 'samples': 1379328, 'steps': 2693, 'loss/train': 2.759756565093994} +03/03/2022 16:34:57 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/03/2022 16:35:01 - INFO - codeparrot_training - Step 2694: {'lr': 0.000499972873271898, 'samples': 1379840, 'steps': 2694, 'loss/train': 2.8685598373413086} +03/03/2022 16:35:04 - INFO - codeparrot_training - Step 2695: {'lr': 0.0004999727950419859, 'samples': 1380352, 'steps': 2695, 'loss/train': 3.7282521724700928} +03/03/2022 16:35:06 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/03/2022 16:35:10 - INFO - codeparrot_training - Step 2696: {'lr': 0.0004999727166994399, 'samples': 1380864, 'steps': 2696, 'loss/train': 2.929319381713867} +03/03/2022 16:35:13 - INFO - codeparrot_training - Step 2697: {'lr': 0.0004999726382442601, 'samples': 1381376, 'steps': 2697, 'loss/train': 3.113121509552002} +03/03/2022 16:35:14 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/03/2022 16:35:18 - INFO - codeparrot_training - Step 2698: {'lr': 0.0004999725596764465, 'samples': 1381888, 'steps': 2698, 'loss/train': 2.7707581520080566} +03/03/2022 16:35:21 - INFO - codeparrot_training - Step 2699: {'lr': 0.000499972480995999, 'samples': 1382400, 'steps': 2699, 'loss/train': 0.7133868932723999} +03/03/2022 16:35:23 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/03/2022 16:35:27 - INFO - codeparrot_training - Step 2700: {'lr': 0.0004999724022029179, 'samples': 1382912, 'steps': 2700, 'loss/train': 1.3908557891845703} +03/03/2022 16:35:30 - INFO - codeparrot_training - Step 2701: {'lr': 0.000499972323297203, 'samples': 1383424, 'steps': 2701, 'loss/train': 1.9161713123321533} +03/03/2022 16:35:31 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/03/2022 16:35:35 - INFO - codeparrot_training - Step 2702: {'lr': 0.0004999722442788544, 'samples': 1383936, 'steps': 2702, 'loss/train': 2.9433400630950928} +03/03/2022 16:35:38 - INFO - codeparrot_training - Step 2703: {'lr': 0.0004999721651478723, 'samples': 1384448, 'steps': 2703, 'loss/train': 2.624702215194702} +03/03/2022 16:35:39 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/03/2022 16:35:43 - INFO - codeparrot_training - Step 2704: {'lr': 0.0004999720859042565, 'samples': 1384960, 'steps': 2704, 'loss/train': 3.163682222366333} +03/03/2022 16:35:47 - INFO - codeparrot_training - Step 2705: {'lr': 0.0004999720065480071, 'samples': 1385472, 'steps': 2705, 'loss/train': 3.0142343044281006} +03/03/2022 16:35:47 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/03/2022 16:35:52 - INFO - codeparrot_training - Step 2706: {'lr': 0.0004999719270791242, 'samples': 1385984, 'steps': 2706, 'loss/train': 2.339606761932373} +03/03/2022 16:35:55 - INFO - codeparrot_training - Step 2707: {'lr': 0.0004999718474976078, 'samples': 1386496, 'steps': 2707, 'loss/train': 3.7225284576416016} +03/03/2022 16:35:55 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/03/2022 16:36:01 - INFO - codeparrot_training - Step 2708: {'lr': 0.000499971767803458, 'samples': 1387008, 'steps': 2708, 'loss/train': 2.9600157737731934} +03/03/2022 16:36:04 - INFO - codeparrot_training - Step 2709: {'lr': 0.0004999716879966747, 'samples': 1387520, 'steps': 2709, 'loss/train': 0.5095680952072144} +03/03/2022 16:36:06 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/03/2022 16:36:09 - INFO - codeparrot_training - Step 2710: {'lr': 0.000499971608077258, 'samples': 1388032, 'steps': 2710, 'loss/train': 3.024923324584961} +03/03/2022 16:36:12 - INFO - codeparrot_training - Step 2711: {'lr': 0.000499971528045208, 'samples': 1388544, 'steps': 2711, 'loss/train': 2.553765296936035} +03/03/2022 16:36:14 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/03/2022 16:36:18 - INFO - codeparrot_training - Step 2712: {'lr': 0.0004999714479005248, 'samples': 1389056, 'steps': 2712, 'loss/train': 2.1832149028778076} +03/03/2022 16:36:21 - INFO - codeparrot_training - Step 2713: {'lr': 0.0004999713676432082, 'samples': 1389568, 'steps': 2713, 'loss/train': 2.8498430252075195} +03/03/2022 16:36:22 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/03/2022 16:36:26 - INFO - codeparrot_training - Step 2714: {'lr': 0.0004999712872732584, 'samples': 1390080, 'steps': 2714, 'loss/train': 3.45271372795105} +03/03/2022 16:36:29 - INFO - codeparrot_training - Step 2715: {'lr': 0.0004999712067906754, 'samples': 1390592, 'steps': 2715, 'loss/train': 3.0970587730407715} +03/03/2022 16:36:30 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/03/2022 16:36:34 - INFO - codeparrot_training - Step 2716: {'lr': 0.0004999711261954591, 'samples': 1391104, 'steps': 2716, 'loss/train': 2.373814105987549} +03/03/2022 16:36:38 - INFO - codeparrot_training - Step 2717: {'lr': 0.0004999710454876099, 'samples': 1391616, 'steps': 2717, 'loss/train': 0.6093522906303406} +03/03/2022 16:36:39 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/03/2022 16:36:43 - INFO - codeparrot_training - Step 2718: {'lr': 0.0004999709646671274, 'samples': 1392128, 'steps': 2718, 'loss/train': 2.97921085357666} +03/03/2022 16:36:46 - INFO - codeparrot_training - Step 2719: {'lr': 0.0004999708837340119, 'samples': 1392640, 'steps': 2719, 'loss/train': 3.300020217895508} +03/03/2022 16:36:47 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/03/2022 16:36:51 - INFO - codeparrot_training - Step 2720: {'lr': 0.0004999708026882635, 'samples': 1393152, 'steps': 2720, 'loss/train': 2.6183254718780518} +03/03/2022 16:36:54 - INFO - codeparrot_training - Step 2721: {'lr': 0.000499970721529882, 'samples': 1393664, 'steps': 2721, 'loss/train': 3.246802806854248} +03/03/2022 16:36:56 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/03/2022 16:37:00 - INFO - codeparrot_training - Step 2722: {'lr': 0.0004999706402588675, 'samples': 1394176, 'steps': 2722, 'loss/train': 1.8369256258010864} +03/03/2022 16:37:03 - INFO - codeparrot_training - Step 2723: {'lr': 0.0004999705588752202, 'samples': 1394688, 'steps': 2723, 'loss/train': 3.219496726989746} +03/03/2022 16:37:04 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/03/2022 16:37:08 - INFO - codeparrot_training - Step 2724: {'lr': 0.00049997047737894, 'samples': 1395200, 'steps': 2724, 'loss/train': 2.1128249168395996} +03/03/2022 16:37:11 - INFO - codeparrot_training - Step 2725: {'lr': 0.0004999703957700269, 'samples': 1395712, 'steps': 2725, 'loss/train': 2.07346773147583} +03/03/2022 16:37:13 - INFO - codeparrot_training - Skipping example with length 24 (seq_length=1024) +03/03/2022 16:37:17 - INFO - codeparrot_training - Step 2726: {'lr': 0.000499970314048481, 'samples': 1396224, 'steps': 2726, 'loss/train': 3.2113945484161377} +03/03/2022 16:37:20 - INFO - codeparrot_training - Step 2727: {'lr': 0.0004999702322143023, 'samples': 1396736, 'steps': 2727, 'loss/train': 3.701845169067383} +03/03/2022 16:37:21 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/03/2022 16:37:25 - INFO - codeparrot_training - Step 2728: {'lr': 0.000499970150267491, 'samples': 1397248, 'steps': 2728, 'loss/train': 2.8003170490264893} +03/03/2022 16:37:28 - INFO - codeparrot_training - Step 2729: {'lr': 0.0004999700682080469, 'samples': 1397760, 'steps': 2729, 'loss/train': 2.227933406829834} +03/03/2022 16:37:29 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/03/2022 16:37:33 - INFO - codeparrot_training - Step 2730: {'lr': 0.0004999699860359702, 'samples': 1398272, 'steps': 2730, 'loss/train': 1.0050252676010132} +03/03/2022 16:37:37 - INFO - codeparrot_training - Step 2731: {'lr': 0.0004999699037512608, 'samples': 1398784, 'steps': 2731, 'loss/train': 3.0615792274475098} +03/03/2022 16:37:38 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/03/2022 16:37:42 - INFO - codeparrot_training - Step 2732: {'lr': 0.000499969821353919, 'samples': 1399296, 'steps': 2732, 'loss/train': 2.2257916927337646} +03/03/2022 16:37:45 - INFO - codeparrot_training - Step 2733: {'lr': 0.0004999697388439444, 'samples': 1399808, 'steps': 2733, 'loss/train': 2.260690450668335} +03/03/2022 16:37:46 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/03/2022 16:37:50 - INFO - codeparrot_training - Step 2734: {'lr': 0.0004999696562213375, 'samples': 1400320, 'steps': 2734, 'loss/train': 3.423987865447998} +03/03/2022 16:37:53 - INFO - codeparrot_training - Step 2735: {'lr': 0.0004999695734860981, 'samples': 1400832, 'steps': 2735, 'loss/train': 2.2401645183563232} +03/03/2022 16:37:54 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/03/2022 16:37:59 - INFO - codeparrot_training - Step 2736: {'lr': 0.0004999694906382262, 'samples': 1401344, 'steps': 2736, 'loss/train': 3.383301019668579} +03/03/2022 16:38:02 - INFO - codeparrot_training - Step 2737: {'lr': 0.0004999694076777219, 'samples': 1401856, 'steps': 2737, 'loss/train': 2.5983786582946777} +03/03/2022 16:38:02 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/03/2022 16:38:07 - INFO - codeparrot_training - Step 2738: {'lr': 0.0004999693246045854, 'samples': 1402368, 'steps': 2738, 'loss/train': 3.214512348175049} +03/03/2022 16:38:10 - INFO - codeparrot_training - Step 2739: {'lr': 0.0004999692414188164, 'samples': 1402880, 'steps': 2739, 'loss/train': 3.0131938457489014} +03/03/2022 16:38:11 - INFO - codeparrot_training - Skipping example with length 54 (seq_length=1024) +03/03/2022 16:38:16 - INFO - codeparrot_training - Step 2740: {'lr': 0.0004999691581204152, 'samples': 1403392, 'steps': 2740, 'loss/train': 3.7400827407836914} +03/03/2022 16:38:19 - INFO - codeparrot_training - Step 2741: {'lr': 0.0004999690747093816, 'samples': 1403904, 'steps': 2741, 'loss/train': 2.461756706237793} +03/03/2022 16:38:19 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/03/2022 16:38:24 - INFO - codeparrot_training - Step 2742: {'lr': 0.000499968991185716, 'samples': 1404416, 'steps': 2742, 'loss/train': 2.7894327640533447} +03/03/2022 16:38:27 - INFO - codeparrot_training - Step 2743: {'lr': 0.0004999689075494182, 'samples': 1404928, 'steps': 2743, 'loss/train': 2.5563488006591797} +03/03/2022 16:38:27 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/03/2022 16:38:32 - INFO - codeparrot_training - Step 2744: {'lr': 0.0004999688238004882, 'samples': 1405440, 'steps': 2744, 'loss/train': 4.033098220825195} +03/03/2022 16:38:36 - INFO - codeparrot_training - Step 2745: {'lr': 0.0004999687399389262, 'samples': 1405952, 'steps': 2745, 'loss/train': 2.51008939743042} +03/03/2022 16:38:37 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/03/2022 16:38:41 - INFO - codeparrot_training - Step 2746: {'lr': 0.0004999686559647319, 'samples': 1406464, 'steps': 2746, 'loss/train': 0.8866392374038696} +03/03/2022 16:38:44 - INFO - codeparrot_training - Step 2747: {'lr': 0.0004999685718779058, 'samples': 1406976, 'steps': 2747, 'loss/train': 3.109419345855713} +03/03/2022 16:38:45 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/03/2022 16:38:49 - INFO - codeparrot_training - Step 2748: {'lr': 0.0004999684876784477, 'samples': 1407488, 'steps': 2748, 'loss/train': 3.017601251602173} +03/03/2022 16:38:52 - INFO - codeparrot_training - Step 2749: {'lr': 0.0004999684033663576, 'samples': 1408000, 'steps': 2749, 'loss/train': 2.993380546569824} +03/03/2022 16:38:53 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/03/2022 16:38:58 - INFO - codeparrot_training - Step 2750: {'lr': 0.0004999683189416356, 'samples': 1408512, 'steps': 2750, 'loss/train': 3.0072054862976074} +03/03/2022 16:39:01 - INFO - codeparrot_training - Step 2751: {'lr': 0.0004999682344042817, 'samples': 1409024, 'steps': 2751, 'loss/train': 2.9468133449554443} +03/03/2022 16:39:01 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/03/2022 16:39:06 - INFO - codeparrot_training - Step 2752: {'lr': 0.000499968149754296, 'samples': 1409536, 'steps': 2752, 'loss/train': 3.357618570327759} +03/03/2022 16:39:09 - INFO - codeparrot_training - Step 2753: {'lr': 0.0004999680649916786, 'samples': 1410048, 'steps': 2753, 'loss/train': 2.3924074172973633} +03/03/2022 16:39:09 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 16:39:15 - INFO - codeparrot_training - Step 2754: {'lr': 0.0004999679801164295, 'samples': 1410560, 'steps': 2754, 'loss/train': 2.7252907752990723} +03/03/2022 16:39:18 - INFO - codeparrot_training - Step 2755: {'lr': 0.0004999678951285485, 'samples': 1411072, 'steps': 2755, 'loss/train': 2.8945701122283936} +03/03/2022 16:39:18 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/03/2022 16:39:23 - INFO - codeparrot_training - Step 2756: {'lr': 0.0004999678100280358, 'samples': 1411584, 'steps': 2756, 'loss/train': 0.596611499786377} +03/03/2022 16:39:26 - INFO - codeparrot_training - Step 2757: {'lr': 0.0004999677248148916, 'samples': 1412096, 'steps': 2757, 'loss/train': 2.9982056617736816} +03/03/2022 16:39:26 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/03/2022 16:39:32 - INFO - codeparrot_training - Step 2758: {'lr': 0.0004999676394891158, 'samples': 1412608, 'steps': 2758, 'loss/train': 3.010190486907959} +03/03/2022 16:39:35 - INFO - codeparrot_training - Step 2759: {'lr': 0.0004999675540507083, 'samples': 1413120, 'steps': 2759, 'loss/train': 2.8700668811798096} +03/03/2022 16:39:35 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 16:39:40 - INFO - codeparrot_training - Step 2760: {'lr': 0.0004999674684996694, 'samples': 1413632, 'steps': 2760, 'loss/train': 3.8052008152008057} +03/03/2022 16:39:43 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/03/2022 16:39:45 - INFO - codeparrot_training - Step 2761: {'lr': 0.0004999673828359989, 'samples': 1414144, 'steps': 2761, 'loss/train': 2.5783233642578125} +03/03/2022 16:39:48 - INFO - codeparrot_training - Step 2762: {'lr': 0.0004999672970596971, 'samples': 1414656, 'steps': 2762, 'loss/train': 2.9074954986572266} +03/03/2022 16:39:51 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/03/2022 16:39:54 - INFO - codeparrot_training - Step 2763: {'lr': 0.0004999672111707639, 'samples': 1415168, 'steps': 2763, 'loss/train': 3.0255846977233887} +03/03/2022 16:39:57 - INFO - codeparrot_training - Step 2764: {'lr': 0.0004999671251691991, 'samples': 1415680, 'steps': 2764, 'loss/train': 2.850754737854004} +03/03/2022 16:39:59 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/03/2022 16:40:02 - INFO - codeparrot_training - Step 2765: {'lr': 0.0004999670390550032, 'samples': 1416192, 'steps': 2765, 'loss/train': 0.5259968638420105} +03/03/2022 16:40:05 - INFO - codeparrot_training - Step 2766: {'lr': 0.000499966952828176, 'samples': 1416704, 'steps': 2766, 'loss/train': 2.8810441493988037} +03/03/2022 16:40:11 - INFO - codeparrot_training - Step 2767: {'lr': 0.0004999668664887175, 'samples': 1417216, 'steps': 2767, 'loss/train': 2.586190938949585} +03/03/2022 16:40:14 - INFO - codeparrot_training - Step 2768: {'lr': 0.0004999667800366278, 'samples': 1417728, 'steps': 2768, 'loss/train': 3.123504161834717} +03/03/2022 16:40:16 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/03/2022 16:40:19 - INFO - codeparrot_training - Step 2769: {'lr': 0.0004999666934719069, 'samples': 1418240, 'steps': 2769, 'loss/train': 2.135603427886963} +03/03/2022 16:40:22 - INFO - codeparrot_training - Step 2770: {'lr': 0.0004999666067945548, 'samples': 1418752, 'steps': 2770, 'loss/train': 2.5394155979156494} +03/03/2022 16:40:24 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/03/2022 16:40:28 - INFO - codeparrot_training - Step 2771: {'lr': 0.0004999665200045716, 'samples': 1419264, 'steps': 2771, 'loss/train': 2.546546459197998} +03/03/2022 16:40:31 - INFO - codeparrot_training - Step 2772: {'lr': 0.0004999664331019574, 'samples': 1419776, 'steps': 2772, 'loss/train': 2.88893723487854} +03/03/2022 16:40:33 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/03/2022 16:40:36 - INFO - codeparrot_training - Step 2773: {'lr': 0.0004999663460867123, 'samples': 1420288, 'steps': 2773, 'loss/train': 2.6379425525665283} +03/03/2022 16:40:39 - INFO - codeparrot_training - Step 2774: {'lr': 0.000499966258958836, 'samples': 1420800, 'steps': 2774, 'loss/train': 2.9634621143341064} +03/03/2022 16:40:41 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/03/2022 16:40:44 - INFO - codeparrot_training - Step 2775: {'lr': 0.000499966171718329, 'samples': 1421312, 'steps': 2775, 'loss/train': 2.962170124053955} +03/03/2022 16:40:48 - INFO - codeparrot_training - Step 2776: {'lr': 0.000499966084365191, 'samples': 1421824, 'steps': 2776, 'loss/train': 2.5851809978485107} +03/03/2022 16:40:49 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/03/2022 16:40:53 - INFO - codeparrot_training - Step 2777: {'lr': 0.0004999659968994221, 'samples': 1422336, 'steps': 2777, 'loss/train': 2.1692118644714355} +03/03/2022 16:40:56 - INFO - codeparrot_training - Step 2778: {'lr': 0.0004999659093210223, 'samples': 1422848, 'steps': 2778, 'loss/train': 6.008566379547119} +03/03/2022 16:40:58 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 16:41:01 - INFO - codeparrot_training - Step 2779: {'lr': 0.0004999658216299919, 'samples': 1423360, 'steps': 2779, 'loss/train': 4.314851760864258} +03/03/2022 16:41:04 - INFO - codeparrot_training - Step 2780: {'lr': 0.0004999657338263308, 'samples': 1423872, 'steps': 2780, 'loss/train': 3.9941909313201904} +03/03/2022 16:41:06 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/03/2022 16:41:10 - INFO - codeparrot_training - Step 2781: {'lr': 0.0004999656459100388, 'samples': 1424384, 'steps': 2781, 'loss/train': 3.216099500656128} +03/03/2022 16:41:13 - INFO - codeparrot_training - Step 2782: {'lr': 0.0004999655578811161, 'samples': 1424896, 'steps': 2782, 'loss/train': 2.0455474853515625} +03/03/2022 16:41:14 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/03/2022 16:41:18 - INFO - codeparrot_training - Step 2783: {'lr': 0.0004999654697395629, 'samples': 1425408, 'steps': 2783, 'loss/train': 3.5764999389648438} +03/03/2022 16:41:21 - INFO - codeparrot_training - Step 2784: {'lr': 0.0004999653814853791, 'samples': 1425920, 'steps': 2784, 'loss/train': 3.778637409210205} +03/03/2022 16:41:23 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/03/2022 16:41:26 - INFO - codeparrot_training - Step 2785: {'lr': 0.0004999652931185648, 'samples': 1426432, 'steps': 2785, 'loss/train': 2.3404812812805176} +03/03/2022 16:41:30 - INFO - codeparrot_training - Step 2786: {'lr': 0.00049996520463912, 'samples': 1426944, 'steps': 2786, 'loss/train': 2.825230360031128} +03/03/2022 16:41:32 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/03/2022 16:41:35 - INFO - codeparrot_training - Step 2787: {'lr': 0.0004999651160470447, 'samples': 1427456, 'steps': 2787, 'loss/train': 2.9974162578582764} +03/03/2022 16:41:38 - INFO - codeparrot_training - Step 2788: {'lr': 0.0004999650273423389, 'samples': 1427968, 'steps': 2788, 'loss/train': 3.308454990386963} +03/03/2022 16:41:40 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/03/2022 16:41:43 - INFO - codeparrot_training - Step 2789: {'lr': 0.0004999649385250028, 'samples': 1428480, 'steps': 2789, 'loss/train': 2.9546446800231934} +03/03/2022 16:41:46 - INFO - codeparrot_training - Step 2790: {'lr': 0.0004999648495950363, 'samples': 1428992, 'steps': 2790, 'loss/train': 3.3561437129974365} +03/03/2022 16:41:48 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/03/2022 16:41:52 - INFO - codeparrot_training - Step 2791: {'lr': 0.0004999647605524396, 'samples': 1429504, 'steps': 2791, 'loss/train': 0.43039724230766296} +03/03/2022 16:41:55 - INFO - codeparrot_training - Step 2792: {'lr': 0.0004999646713972126, 'samples': 1430016, 'steps': 2792, 'loss/train': 3.791672468185425} +03/03/2022 16:41:56 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/03/2022 16:42:00 - INFO - codeparrot_training - Step 2793: {'lr': 0.0004999645821293552, 'samples': 1430528, 'steps': 2793, 'loss/train': 2.333115577697754} +03/03/2022 16:42:03 - INFO - codeparrot_training - Step 2794: {'lr': 0.0004999644927488678, 'samples': 1431040, 'steps': 2794, 'loss/train': 2.9044437408447266} +03/03/2022 16:42:05 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/03/2022 16:42:08 - INFO - codeparrot_training - Step 2795: {'lr': 0.0004999644032557503, 'samples': 1431552, 'steps': 2795, 'loss/train': 2.2309656143188477} +03/03/2022 16:42:12 - INFO - codeparrot_training - Step 2796: {'lr': 0.0004999643136500027, 'samples': 1432064, 'steps': 2796, 'loss/train': 3.2770564556121826} +03/03/2022 16:42:13 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/03/2022 16:42:17 - INFO - codeparrot_training - Step 2797: {'lr': 0.0004999642239316249, 'samples': 1432576, 'steps': 2797, 'loss/train': 3.998497247695923} +03/03/2022 16:42:20 - INFO - codeparrot_training - Step 2798: {'lr': 0.000499964134100617, 'samples': 1433088, 'steps': 2798, 'loss/train': 3.8539581298828125} +03/03/2022 16:42:21 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 16:42:25 - INFO - codeparrot_training - Step 2799: {'lr': 0.0004999640441569793, 'samples': 1433600, 'steps': 2799, 'loss/train': 3.7501556873321533} +03/03/2022 16:42:28 - INFO - codeparrot_training - Step 2800: {'lr': 0.0004999639541007116, 'samples': 1434112, 'steps': 2800, 'loss/train': 2.857802152633667} +03/03/2022 16:42:30 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/03/2022 16:42:34 - INFO - codeparrot_training - Step 2801: {'lr': 0.0004999638639318141, 'samples': 1434624, 'steps': 2801, 'loss/train': 1.9306632280349731} +03/03/2022 16:42:37 - INFO - codeparrot_training - Step 2802: {'lr': 0.0004999637736502866, 'samples': 1435136, 'steps': 2802, 'loss/train': 2.8554866313934326} +03/03/2022 16:42:38 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 16:42:42 - INFO - codeparrot_training - Step 2803: {'lr': 0.0004999636832561293, 'samples': 1435648, 'steps': 2803, 'loss/train': 2.6029152870178223} +03/03/2022 16:42:46 - INFO - codeparrot_training - Step 2804: {'lr': 0.0004999635927493423, 'samples': 1436160, 'steps': 2804, 'loss/train': 2.9625725746154785} +03/03/2022 16:42:47 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/03/2022 16:42:51 - INFO - codeparrot_training - Step 2805: {'lr': 0.0004999635021299255, 'samples': 1436672, 'steps': 2805, 'loss/train': 2.49249267578125} +03/03/2022 16:42:54 - INFO - codeparrot_training - Step 2806: {'lr': 0.0004999634113978791, 'samples': 1437184, 'steps': 2806, 'loss/train': 1.568454623222351} +03/03/2022 16:42:56 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/03/2022 16:43:00 - INFO - codeparrot_training - Step 2807: {'lr': 0.0004999633205532029, 'samples': 1437696, 'steps': 2807, 'loss/train': 3.222116231918335} +03/03/2022 16:43:03 - INFO - codeparrot_training - Step 2808: {'lr': 0.0004999632295958972, 'samples': 1438208, 'steps': 2808, 'loss/train': 3.8004493713378906} +03/03/2022 16:43:04 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/03/2022 16:43:08 - INFO - codeparrot_training - Step 2809: {'lr': 0.0004999631385259617, 'samples': 1438720, 'steps': 2809, 'loss/train': 3.1872613430023193} +03/03/2022 16:43:11 - INFO - codeparrot_training - Step 2810: {'lr': 0.000499963047343397, 'samples': 1439232, 'steps': 2810, 'loss/train': 2.936629056930542} +03/03/2022 16:43:13 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/03/2022 16:43:17 - INFO - codeparrot_training - Step 2811: {'lr': 0.0004999629560482026, 'samples': 1439744, 'steps': 2811, 'loss/train': 2.819894790649414} +03/03/2022 16:43:20 - INFO - codeparrot_training - Step 2812: {'lr': 0.0004999628646403788, 'samples': 1440256, 'steps': 2812, 'loss/train': 2.7867915630340576} +03/03/2022 16:43:21 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 16:43:25 - INFO - codeparrot_training - Step 2813: {'lr': 0.0004999627731199256, 'samples': 1440768, 'steps': 2813, 'loss/train': 2.9790804386138916} +03/03/2022 16:43:28 - INFO - codeparrot_training - Step 2814: {'lr': 0.0004999626814868429, 'samples': 1441280, 'steps': 2814, 'loss/train': 3.0541512966156006} +03/03/2022 16:43:29 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/03/2022 16:43:33 - INFO - codeparrot_training - Step 2815: {'lr': 0.0004999625897411311, 'samples': 1441792, 'steps': 2815, 'loss/train': 3.0761306285858154} +03/03/2022 16:43:37 - INFO - codeparrot_training - Step 2816: {'lr': 0.0004999624978827899, 'samples': 1442304, 'steps': 2816, 'loss/train': 2.8112967014312744} +03/03/2022 16:43:38 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/03/2022 16:43:42 - INFO - codeparrot_training - Step 2817: {'lr': 0.0004999624059118194, 'samples': 1442816, 'steps': 2817, 'loss/train': 3.1284847259521484} +03/03/2022 16:43:45 - INFO - codeparrot_training - Step 2818: {'lr': 0.0004999623138282198, 'samples': 1443328, 'steps': 2818, 'loss/train': 3.0432567596435547} +03/03/2022 16:43:46 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/03/2022 16:43:50 - INFO - codeparrot_training - Step 2819: {'lr': 0.000499962221631991, 'samples': 1443840, 'steps': 2819, 'loss/train': 2.4113528728485107} +03/03/2022 16:43:53 - INFO - codeparrot_training - Step 2820: {'lr': 0.0004999621293231331, 'samples': 1444352, 'steps': 2820, 'loss/train': 2.8052361011505127} +03/03/2022 16:43:54 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/03/2022 16:43:59 - INFO - codeparrot_training - Step 2821: {'lr': 0.0004999620369016461, 'samples': 1444864, 'steps': 2821, 'loss/train': 2.4694979190826416} +03/03/2022 16:44:02 - INFO - codeparrot_training - Step 2822: {'lr': 0.00049996194436753, 'samples': 1445376, 'steps': 2822, 'loss/train': 3.619378089904785} +03/03/2022 16:44:03 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/03/2022 16:44:07 - INFO - codeparrot_training - Step 2823: {'lr': 0.000499961851720785, 'samples': 1445888, 'steps': 2823, 'loss/train': 1.5173958539962769} +03/03/2022 16:44:10 - INFO - codeparrot_training - Step 2824: {'lr': 0.000499961758961411, 'samples': 1446400, 'steps': 2824, 'loss/train': 1.1305230855941772} +03/03/2022 16:44:11 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/03/2022 16:44:15 - INFO - codeparrot_training - Step 2825: {'lr': 0.0004999616660894081, 'samples': 1446912, 'steps': 2825, 'loss/train': 2.3258564472198486} +03/03/2022 16:44:19 - INFO - codeparrot_training - Step 2826: {'lr': 0.0004999615731047762, 'samples': 1447424, 'steps': 2826, 'loss/train': 2.947964668273926} +03/03/2022 16:44:19 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/03/2022 16:44:24 - INFO - codeparrot_training - Step 2827: {'lr': 0.0004999614800075158, 'samples': 1447936, 'steps': 2827, 'loss/train': 3.4644880294799805} +03/03/2022 16:44:27 - INFO - codeparrot_training - Step 2828: {'lr': 0.0004999613867976264, 'samples': 1448448, 'steps': 2828, 'loss/train': 2.9566638469696045} +03/03/2022 16:44:27 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/03/2022 16:44:32 - INFO - codeparrot_training - Step 2829: {'lr': 0.0004999612934751082, 'samples': 1448960, 'steps': 2829, 'loss/train': 3.086613416671753} +03/03/2022 16:44:36 - INFO - codeparrot_training - Step 2830: {'lr': 0.0004999612000399614, 'samples': 1449472, 'steps': 2830, 'loss/train': 1.5432345867156982} +03/03/2022 16:44:36 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 16:44:41 - INFO - codeparrot_training - Step 2831: {'lr': 0.0004999611064921859, 'samples': 1449984, 'steps': 2831, 'loss/train': 3.0353586673736572} +03/03/2022 16:44:44 - INFO - codeparrot_training - Step 2832: {'lr': 0.0004999610128317818, 'samples': 1450496, 'steps': 2832, 'loss/train': 3.705756902694702} +03/03/2022 16:44:44 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/03/2022 16:44:49 - INFO - codeparrot_training - Step 2833: {'lr': 0.0004999609190587492, 'samples': 1451008, 'steps': 2833, 'loss/train': 2.6160504817962646} +03/03/2022 16:44:52 - INFO - codeparrot_training - Step 2834: {'lr': 0.000499960825173088, 'samples': 1451520, 'steps': 2834, 'loss/train': 2.667778253555298} +03/03/2022 16:44:53 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/03/2022 16:44:58 - INFO - codeparrot_training - Step 2835: {'lr': 0.0004999607311747983, 'samples': 1452032, 'steps': 2835, 'loss/train': 2.8666133880615234} +03/03/2022 16:45:01 - INFO - codeparrot_training - Step 2836: {'lr': 0.0004999606370638801, 'samples': 1452544, 'steps': 2836, 'loss/train': 1.6883126497268677} +03/03/2022 16:45:01 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 16:45:06 - INFO - codeparrot_training - Step 2837: {'lr': 0.0004999605428403336, 'samples': 1453056, 'steps': 2837, 'loss/train': 2.7956125736236572} +03/03/2022 16:45:09 - INFO - codeparrot_training - Step 2838: {'lr': 0.0004999604485041585, 'samples': 1453568, 'steps': 2838, 'loss/train': 0.4438847303390503} +03/03/2022 16:45:09 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/03/2022 16:45:15 - INFO - codeparrot_training - Step 2839: {'lr': 0.0004999603540553554, 'samples': 1454080, 'steps': 2839, 'loss/train': 3.6946747303009033} +03/03/2022 16:45:17 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/03/2022 16:45:20 - INFO - codeparrot_training - Step 2840: {'lr': 0.0004999602594939238, 'samples': 1454592, 'steps': 2840, 'loss/train': 2.51926589012146} +03/03/2022 16:45:23 - INFO - codeparrot_training - Step 2841: {'lr': 0.0004999601648198641, 'samples': 1455104, 'steps': 2841, 'loss/train': 3.2881579399108887} +03/03/2022 16:45:26 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/03/2022 16:45:28 - INFO - codeparrot_training - Step 2842: {'lr': 0.0004999600700331761, 'samples': 1455616, 'steps': 2842, 'loss/train': 2.6718385219573975} +03/03/2022 16:45:31 - INFO - codeparrot_training - Step 2843: {'lr': 0.0004999599751338601, 'samples': 1456128, 'steps': 2843, 'loss/train': 2.1170666217803955} +03/03/2022 16:45:34 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/03/2022 16:45:37 - INFO - codeparrot_training - Step 2844: {'lr': 0.0004999598801219158, 'samples': 1456640, 'steps': 2844, 'loss/train': 2.5187900066375732} +03/03/2022 16:45:40 - INFO - codeparrot_training - Step 2845: {'lr': 0.0004999597849973435, 'samples': 1457152, 'steps': 2845, 'loss/train': 3.298952579498291} +03/03/2022 16:45:42 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/03/2022 16:45:45 - INFO - codeparrot_training - Step 2846: {'lr': 0.0004999596897601432, 'samples': 1457664, 'steps': 2846, 'loss/train': 3.2442617416381836} +03/03/2022 16:45:48 - INFO - codeparrot_training - Step 2847: {'lr': 0.0004999595944103149, 'samples': 1458176, 'steps': 2847, 'loss/train': 3.1593821048736572} +03/03/2022 16:45:51 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/03/2022 16:45:54 - INFO - codeparrot_training - Step 2848: {'lr': 0.0004999594989478587, 'samples': 1458688, 'steps': 2848, 'loss/train': 2.7550294399261475} +03/03/2022 16:45:57 - INFO - codeparrot_training - Step 2849: {'lr': 0.0004999594033727747, 'samples': 1459200, 'steps': 2849, 'loss/train': 2.896057605743408} +03/03/2022 16:45:59 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/03/2022 16:46:02 - INFO - codeparrot_training - Step 2850: {'lr': 0.0004999593076850627, 'samples': 1459712, 'steps': 2850, 'loss/train': 0.38432011008262634} +03/03/2022 16:46:05 - INFO - codeparrot_training - Step 2851: {'lr': 0.0004999592118847229, 'samples': 1460224, 'steps': 2851, 'loss/train': 2.7161507606506348} +03/03/2022 16:46:07 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/03/2022 16:46:10 - INFO - codeparrot_training - Step 2852: {'lr': 0.0004999591159717554, 'samples': 1460736, 'steps': 2852, 'loss/train': 2.5032246112823486} +03/03/2022 16:46:14 - INFO - codeparrot_training - Step 2853: {'lr': 0.0004999590199461602, 'samples': 1461248, 'steps': 2853, 'loss/train': 3.5437097549438477} +03/03/2022 16:46:16 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/03/2022 16:46:19 - INFO - codeparrot_training - Step 2854: {'lr': 0.0004999589238079373, 'samples': 1461760, 'steps': 2854, 'loss/train': 2.760153293609619} +03/03/2022 16:46:22 - INFO - codeparrot_training - Step 2855: {'lr': 0.0004999588275570868, 'samples': 1462272, 'steps': 2855, 'loss/train': 2.4435343742370605} +03/03/2022 16:46:24 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/03/2022 16:46:27 - INFO - codeparrot_training - Step 2856: {'lr': 0.0004999587311936086, 'samples': 1462784, 'steps': 2856, 'loss/train': 2.5376758575439453} +03/03/2022 16:46:30 - INFO - codeparrot_training - Step 2857: {'lr': 0.000499958634717503, 'samples': 1463296, 'steps': 2857, 'loss/train': 3.098510503768921} +03/03/2022 16:46:32 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/03/2022 16:46:36 - INFO - codeparrot_training - Step 2858: {'lr': 0.0004999585381287696, 'samples': 1463808, 'steps': 2858, 'loss/train': 2.388951539993286} +03/03/2022 16:46:39 - INFO - codeparrot_training - Step 2859: {'lr': 0.000499958441427409, 'samples': 1464320, 'steps': 2859, 'loss/train': 1.6173213720321655} +03/03/2022 16:46:40 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/03/2022 16:46:44 - INFO - codeparrot_training - Step 2860: {'lr': 0.0004999583446134209, 'samples': 1464832, 'steps': 2860, 'loss/train': 3.3134214878082275} +03/03/2022 16:46:47 - INFO - codeparrot_training - Step 2861: {'lr': 0.0004999582476868055, 'samples': 1465344, 'steps': 2861, 'loss/train': 2.238408088684082} +03/03/2022 16:46:49 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 16:46:53 - INFO - codeparrot_training - Step 2862: {'lr': 0.0004999581506475627, 'samples': 1465856, 'steps': 2862, 'loss/train': 2.844252109527588} +03/03/2022 16:46:56 - INFO - codeparrot_training - Step 2863: {'lr': 0.0004999580534956927, 'samples': 1466368, 'steps': 2863, 'loss/train': 2.686821460723877} +03/03/2022 16:46:57 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/03/2022 16:47:01 - INFO - codeparrot_training - Step 2864: {'lr': 0.0004999579562311953, 'samples': 1466880, 'steps': 2864, 'loss/train': 2.783883810043335} +03/03/2022 16:47:04 - INFO - codeparrot_training - Step 2865: {'lr': 0.0004999578588540709, 'samples': 1467392, 'steps': 2865, 'loss/train': 2.1759700775146484} +03/03/2022 16:47:06 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/03/2022 16:47:10 - INFO - codeparrot_training - Step 2866: {'lr': 0.0004999577613643192, 'samples': 1467904, 'steps': 2866, 'loss/train': 2.838135242462158} +03/03/2022 16:47:13 - INFO - codeparrot_training - Step 2867: {'lr': 0.0004999576637619404, 'samples': 1468416, 'steps': 2867, 'loss/train': 2.4004790782928467} +03/03/2022 16:47:15 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/03/2022 16:47:18 - INFO - codeparrot_training - Step 2868: {'lr': 0.0004999575660469347, 'samples': 1468928, 'steps': 2868, 'loss/train': 3.1228396892547607} +03/03/2022 16:47:21 - INFO - codeparrot_training - Step 2869: {'lr': 0.0004999574682193017, 'samples': 1469440, 'steps': 2869, 'loss/train': 2.6474180221557617} +03/03/2022 16:47:24 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 16:47:26 - INFO - codeparrot_training - Step 2870: {'lr': 0.0004999573702790419, 'samples': 1469952, 'steps': 2870, 'loss/train': 2.027031183242798} +03/03/2022 16:47:30 - INFO - codeparrot_training - Step 2871: {'lr': 0.0004999572722261551, 'samples': 1470464, 'steps': 2871, 'loss/train': 3.0093166828155518} +03/03/2022 16:47:32 - INFO - codeparrot_training - Skipping example with length 178 (seq_length=1024) +03/03/2022 16:47:35 - INFO - codeparrot_training - Step 2872: {'lr': 0.0004999571740606415, 'samples': 1470976, 'steps': 2872, 'loss/train': 2.5463478565216064} +03/03/2022 16:47:38 - INFO - codeparrot_training - Step 2873: {'lr': 0.000499957075782501, 'samples': 1471488, 'steps': 2873, 'loss/train': 3.1903157234191895} +03/03/2022 16:47:40 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/03/2022 16:47:43 - INFO - codeparrot_training - Step 2874: {'lr': 0.0004999569773917337, 'samples': 1472000, 'steps': 2874, 'loss/train': 2.2402002811431885} +03/03/2022 16:47:46 - INFO - codeparrot_training - Step 2875: {'lr': 0.0004999568788883397, 'samples': 1472512, 'steps': 2875, 'loss/train': 1.9795204401016235} +03/03/2022 16:47:49 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/03/2022 16:47:52 - INFO - codeparrot_training - Step 2876: {'lr': 0.0004999567802723188, 'samples': 1473024, 'steps': 2876, 'loss/train': 2.8544304370880127} +03/03/2022 16:47:55 - INFO - codeparrot_training - Step 2877: {'lr': 0.0004999566815436715, 'samples': 1473536, 'steps': 2877, 'loss/train': 3.3672308921813965} +03/03/2022 16:47:57 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/03/2022 16:48:00 - INFO - codeparrot_training - Step 2878: {'lr': 0.0004999565827023974, 'samples': 1474048, 'steps': 2878, 'loss/train': 2.9266796112060547} +03/03/2022 16:48:03 - INFO - codeparrot_training - Step 2879: {'lr': 0.0004999564837484967, 'samples': 1474560, 'steps': 2879, 'loss/train': 2.1751248836517334} +03/03/2022 16:48:05 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/03/2022 16:48:08 - INFO - codeparrot_training - Step 2880: {'lr': 0.0004999563846819696, 'samples': 1475072, 'steps': 2880, 'loss/train': 3.251425266265869} +03/03/2022 16:48:12 - INFO - codeparrot_training - Step 2881: {'lr': 0.0004999562855028159, 'samples': 1475584, 'steps': 2881, 'loss/train': 2.9958724975585938} +03/03/2022 16:48:13 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/03/2022 16:48:17 - INFO - codeparrot_training - Step 2882: {'lr': 0.0004999561862110358, 'samples': 1476096, 'steps': 2882, 'loss/train': 3.046231269836426} +03/03/2022 16:48:20 - INFO - codeparrot_training - Step 2883: {'lr': 0.0004999560868066293, 'samples': 1476608, 'steps': 2883, 'loss/train': 3.035357713699341} +03/03/2022 16:48:22 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/03/2022 16:48:25 - INFO - codeparrot_training - Step 2884: {'lr': 0.0004999559872895964, 'samples': 1477120, 'steps': 2884, 'loss/train': 2.7119312286376953} +03/03/2022 16:48:28 - INFO - codeparrot_training - Step 2885: {'lr': 0.0004999558876599373, 'samples': 1477632, 'steps': 2885, 'loss/train': 2.8161816596984863} +03/03/2022 16:48:30 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/03/2022 16:48:34 - INFO - codeparrot_training - Step 2886: {'lr': 0.0004999557879176518, 'samples': 1478144, 'steps': 2886, 'loss/train': 2.352759838104248} +03/03/2022 16:48:37 - INFO - codeparrot_training - Step 2887: {'lr': 0.0004999556880627401, 'samples': 1478656, 'steps': 2887, 'loss/train': 2.4657838344573975} +03/03/2022 16:48:38 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/03/2022 16:48:42 - INFO - codeparrot_training - Step 2888: {'lr': 0.0004999555880952023, 'samples': 1479168, 'steps': 2888, 'loss/train': 3.347620964050293} +03/03/2022 16:48:45 - INFO - codeparrot_training - Step 2889: {'lr': 0.0004999554880150383, 'samples': 1479680, 'steps': 2889, 'loss/train': 1.623335599899292} +03/03/2022 16:48:47 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/03/2022 16:48:50 - INFO - codeparrot_training - Step 2890: {'lr': 0.0004999553878222482, 'samples': 1480192, 'steps': 2890, 'loss/train': 2.8661653995513916} +03/03/2022 16:48:54 - INFO - codeparrot_training - Step 2891: {'lr': 0.0004999552875168321, 'samples': 1480704, 'steps': 2891, 'loss/train': 3.1229605674743652} +03/03/2022 16:48:55 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/03/2022 16:48:59 - INFO - codeparrot_training - Step 2892: {'lr': 0.0004999551870987901, 'samples': 1481216, 'steps': 2892, 'loss/train': 1.4061732292175293} +03/03/2022 16:49:02 - INFO - codeparrot_training - Step 2893: {'lr': 0.000499955086568122, 'samples': 1481728, 'steps': 2893, 'loss/train': 3.0606513023376465} +03/03/2022 16:49:04 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/03/2022 16:49:07 - INFO - codeparrot_training - Step 2894: {'lr': 0.000499954985924828, 'samples': 1482240, 'steps': 2894, 'loss/train': 1.5282270908355713} +03/03/2022 16:49:10 - INFO - codeparrot_training - Step 2895: {'lr': 0.0004999548851689082, 'samples': 1482752, 'steps': 2895, 'loss/train': 2.9949023723602295} +03/03/2022 16:49:12 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/03/2022 16:49:16 - INFO - codeparrot_training - Step 2896: {'lr': 0.0004999547843003627, 'samples': 1483264, 'steps': 2896, 'loss/train': 0.6338028907775879} +03/03/2022 16:49:19 - INFO - codeparrot_training - Step 2897: {'lr': 0.0004999546833191912, 'samples': 1483776, 'steps': 2897, 'loss/train': 2.896749973297119} +03/03/2022 16:49:21 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/03/2022 16:49:24 - INFO - codeparrot_training - Step 2898: {'lr': 0.0004999545822253941, 'samples': 1484288, 'steps': 2898, 'loss/train': 2.1511001586914062} +03/03/2022 16:49:27 - INFO - codeparrot_training - Step 2899: {'lr': 0.0004999544810189713, 'samples': 1484800, 'steps': 2899, 'loss/train': 4.156933784484863} +03/03/2022 16:49:30 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/03/2022 16:49:33 - INFO - codeparrot_training - Step 2900: {'lr': 0.0004999543796999228, 'samples': 1485312, 'steps': 2900, 'loss/train': 3.070873498916626} +03/03/2022 16:49:36 - INFO - codeparrot_training - Step 2901: {'lr': 0.0004999542782682489, 'samples': 1485824, 'steps': 2901, 'loss/train': 2.656186580657959} +03/03/2022 16:49:38 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/03/2022 16:49:41 - INFO - codeparrot_training - Step 2902: {'lr': 0.0004999541767239493, 'samples': 1486336, 'steps': 2902, 'loss/train': 1.978148102760315} +03/03/2022 16:49:44 - INFO - codeparrot_training - Step 2903: {'lr': 0.0004999540750670243, 'samples': 1486848, 'steps': 2903, 'loss/train': 1.9795753955841064} +03/03/2022 16:49:46 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/03/2022 16:49:50 - INFO - codeparrot_training - Step 2904: {'lr': 0.0004999539732974738, 'samples': 1487360, 'steps': 2904, 'loss/train': 6.763665199279785} +03/03/2022 16:49:53 - INFO - codeparrot_training - Step 2905: {'lr': 0.0004999538714152978, 'samples': 1487872, 'steps': 2905, 'loss/train': 3.782104969024658} +03/03/2022 16:49:56 - INFO - codeparrot_training - Step 2906: {'lr': 0.0004999537694204966, 'samples': 1488384, 'steps': 2906, 'loss/train': 1.9802603721618652} +03/03/2022 16:49:56 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/03/2022 16:50:01 - INFO - codeparrot_training - Step 2907: {'lr': 0.0004999536673130701, 'samples': 1488896, 'steps': 2907, 'loss/train': 1.8991543054580688} +03/03/2022 16:50:04 - INFO - codeparrot_training - Step 2908: {'lr': 0.0004999535650930182, 'samples': 1489408, 'steps': 2908, 'loss/train': 2.644139051437378} +03/03/2022 16:50:05 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/03/2022 16:50:09 - INFO - codeparrot_training - Step 2909: {'lr': 0.0004999534627603411, 'samples': 1489920, 'steps': 2909, 'loss/train': 3.088216543197632} +03/03/2022 16:50:13 - INFO - codeparrot_training - Step 2910: {'lr': 0.0004999533603150389, 'samples': 1490432, 'steps': 2910, 'loss/train': 3.5622875690460205} +03/03/2022 16:50:13 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/03/2022 16:50:18 - INFO - codeparrot_training - Step 2911: {'lr': 0.0004999532577571116, 'samples': 1490944, 'steps': 2911, 'loss/train': 2.801483154296875} +03/03/2022 16:50:21 - INFO - codeparrot_training - Step 2912: {'lr': 0.0004999531550865592, 'samples': 1491456, 'steps': 2912, 'loss/train': 2.2697737216949463} +03/03/2022 16:50:21 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 16:50:26 - INFO - codeparrot_training - Step 2913: {'lr': 0.0004999530523033817, 'samples': 1491968, 'steps': 2913, 'loss/train': 2.3897781372070312} +03/03/2022 16:50:30 - INFO - codeparrot_training - Step 2914: {'lr': 0.0004999529494075792, 'samples': 1492480, 'steps': 2914, 'loss/train': 2.510364532470703} +03/03/2022 16:50:30 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/03/2022 16:50:35 - INFO - codeparrot_training - Step 2915: {'lr': 0.0004999528463991518, 'samples': 1492992, 'steps': 2915, 'loss/train': 2.54477858543396} +03/03/2022 16:50:37 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/03/2022 16:50:40 - INFO - codeparrot_training - Step 2916: {'lr': 0.0004999527432780995, 'samples': 1493504, 'steps': 2916, 'loss/train': 3.213799238204956} +03/03/2022 16:50:43 - INFO - codeparrot_training - Step 2917: {'lr': 0.0004999526400444223, 'samples': 1494016, 'steps': 2917, 'loss/train': 2.6195068359375} +03/03/2022 16:50:46 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/03/2022 16:50:48 - INFO - codeparrot_training - Step 2918: {'lr': 0.0004999525366981204, 'samples': 1494528, 'steps': 2918, 'loss/train': 2.4569740295410156} +03/03/2022 16:50:52 - INFO - codeparrot_training - Step 2919: {'lr': 0.0004999524332391937, 'samples': 1495040, 'steps': 2919, 'loss/train': 2.6555092334747314} +03/03/2022 16:50:54 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/03/2022 16:50:57 - INFO - codeparrot_training - Step 2920: {'lr': 0.0004999523296676423, 'samples': 1495552, 'steps': 2920, 'loss/train': 3.5851943492889404} +03/03/2022 16:51:00 - INFO - codeparrot_training - Step 2921: {'lr': 0.0004999522259834662, 'samples': 1496064, 'steps': 2921, 'loss/train': 1.0952709913253784} +03/03/2022 16:51:02 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 16:51:05 - INFO - codeparrot_training - Step 2922: {'lr': 0.0004999521221866655, 'samples': 1496576, 'steps': 2922, 'loss/train': 2.8582754135131836} +03/03/2022 16:51:09 - INFO - codeparrot_training - Step 2923: {'lr': 0.0004999520182772402, 'samples': 1497088, 'steps': 2923, 'loss/train': 0.3613796830177307} +03/03/2022 16:51:11 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/03/2022 16:51:14 - INFO - codeparrot_training - Step 2924: {'lr': 0.0004999519142551905, 'samples': 1497600, 'steps': 2924, 'loss/train': 2.3661694526672363} +03/03/2022 16:51:17 - INFO - codeparrot_training - Step 2925: {'lr': 0.0004999518101205162, 'samples': 1498112, 'steps': 2925, 'loss/train': 2.803722858428955} +03/03/2022 16:51:19 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/03/2022 16:51:22 - INFO - codeparrot_training - Step 2926: {'lr': 0.0004999517058732175, 'samples': 1498624, 'steps': 2926, 'loss/train': 2.7775557041168213} +03/03/2022 16:51:25 - INFO - codeparrot_training - Step 2927: {'lr': 0.0004999516015132945, 'samples': 1499136, 'steps': 2927, 'loss/train': 3.014251947402954} +03/03/2022 16:51:27 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/03/2022 16:51:31 - INFO - codeparrot_training - Step 2928: {'lr': 0.0004999514970407471, 'samples': 1499648, 'steps': 2928, 'loss/train': 3.1578376293182373} +03/03/2022 16:51:34 - INFO - codeparrot_training - Step 2929: {'lr': 0.0004999513924555754, 'samples': 1500160, 'steps': 2929, 'loss/train': 3.245116710662842} +03/03/2022 16:51:36 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 16:51:39 - INFO - codeparrot_training - Step 2930: {'lr': 0.0004999512877577794, 'samples': 1500672, 'steps': 2930, 'loss/train': 3.635124444961548} +03/03/2022 16:51:42 - INFO - codeparrot_training - Step 2931: {'lr': 0.0004999511829473593, 'samples': 1501184, 'steps': 2931, 'loss/train': 3.4634807109832764} +03/03/2022 16:51:44 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/03/2022 16:51:48 - INFO - codeparrot_training - Step 2932: {'lr': 0.0004999510780243151, 'samples': 1501696, 'steps': 2932, 'loss/train': 3.018321990966797} +03/03/2022 16:51:51 - INFO - codeparrot_training - Step 2933: {'lr': 0.0004999509729886467, 'samples': 1502208, 'steps': 2933, 'loss/train': 1.8695935010910034} +03/03/2022 16:51:53 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/03/2022 16:51:56 - INFO - codeparrot_training - Step 2934: {'lr': 0.0004999508678403542, 'samples': 1502720, 'steps': 2934, 'loss/train': 2.63712215423584} +03/03/2022 16:51:59 - INFO - codeparrot_training - Step 2935: {'lr': 0.0004999507625794378, 'samples': 1503232, 'steps': 2935, 'loss/train': 2.7933175563812256} +03/03/2022 16:52:01 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/03/2022 16:52:04 - INFO - codeparrot_training - Step 2936: {'lr': 0.0004999506572058974, 'samples': 1503744, 'steps': 2936, 'loss/train': 2.4611945152282715} +03/03/2022 16:52:08 - INFO - codeparrot_training - Step 2937: {'lr': 0.0004999505517197331, 'samples': 1504256, 'steps': 2937, 'loss/train': 3.586103677749634} +03/03/2022 16:52:09 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/03/2022 16:52:13 - INFO - codeparrot_training - Step 2938: {'lr': 0.000499950446120945, 'samples': 1504768, 'steps': 2938, 'loss/train': 1.9715590476989746} +03/03/2022 16:52:16 - INFO - codeparrot_training - Step 2939: {'lr': 0.000499950340409533, 'samples': 1505280, 'steps': 2939, 'loss/train': 1.2664121389389038} +03/03/2022 16:52:17 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/03/2022 16:52:21 - INFO - codeparrot_training - Step 2940: {'lr': 0.0004999502345854973, 'samples': 1505792, 'steps': 2940, 'loss/train': 2.706735134124756} +03/03/2022 16:52:24 - INFO - codeparrot_training - Step 2941: {'lr': 0.0004999501286488378, 'samples': 1506304, 'steps': 2941, 'loss/train': 2.703542470932007} +03/03/2022 16:52:26 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/03/2022 16:52:30 - INFO - codeparrot_training - Step 2942: {'lr': 0.0004999500225995547, 'samples': 1506816, 'steps': 2942, 'loss/train': 2.9381766319274902} +03/03/2022 16:52:33 - INFO - codeparrot_training - Step 2943: {'lr': 0.000499949916437648, 'samples': 1507328, 'steps': 2943, 'loss/train': 2.9742491245269775} +03/03/2022 16:52:34 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/03/2022 16:52:38 - INFO - codeparrot_training - Step 2944: {'lr': 0.0004999498101631177, 'samples': 1507840, 'steps': 2944, 'loss/train': 2.764361619949341} +03/03/2022 16:52:41 - INFO - codeparrot_training - Step 2945: {'lr': 0.0004999497037759638, 'samples': 1508352, 'steps': 2945, 'loss/train': 2.016622304916382} +03/03/2022 16:52:42 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/03/2022 16:52:46 - INFO - codeparrot_training - Step 2946: {'lr': 0.0004999495972761865, 'samples': 1508864, 'steps': 2946, 'loss/train': 2.1625781059265137} +03/03/2022 16:52:50 - INFO - codeparrot_training - Step 2947: {'lr': 0.0004999494906637857, 'samples': 1509376, 'steps': 2947, 'loss/train': 1.55034339427948} +03/03/2022 16:52:51 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/03/2022 16:52:55 - INFO - codeparrot_training - Step 2948: {'lr': 0.0004999493839387615, 'samples': 1509888, 'steps': 2948, 'loss/train': 2.8589117527008057} +03/03/2022 16:52:58 - INFO - codeparrot_training - Step 2949: {'lr': 0.000499949277101114, 'samples': 1510400, 'steps': 2949, 'loss/train': 2.5486247539520264} +03/03/2022 16:52:59 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/03/2022 16:53:03 - INFO - codeparrot_training - Step 2950: {'lr': 0.0004999491701508433, 'samples': 1510912, 'steps': 2950, 'loss/train': 2.798234224319458} +03/03/2022 16:53:06 - INFO - codeparrot_training - Step 2951: {'lr': 0.0004999490630879493, 'samples': 1511424, 'steps': 2951, 'loss/train': 3.435621738433838} +03/03/2022 16:53:07 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/03/2022 16:53:12 - INFO - codeparrot_training - Step 2952: {'lr': 0.0004999489559124321, 'samples': 1511936, 'steps': 2952, 'loss/train': 3.4545528888702393} +03/03/2022 16:53:15 - INFO - codeparrot_training - Step 2953: {'lr': 0.0004999488486242918, 'samples': 1512448, 'steps': 2953, 'loss/train': 2.7492282390594482} +03/03/2022 16:53:15 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/03/2022 16:53:20 - INFO - codeparrot_training - Step 2954: {'lr': 0.0004999487412235284, 'samples': 1512960, 'steps': 2954, 'loss/train': 2.9947216510772705} +03/03/2022 16:53:23 - INFO - codeparrot_training - Step 2955: {'lr': 0.0004999486337101419, 'samples': 1513472, 'steps': 2955, 'loss/train': 1.8972951173782349} +03/03/2022 16:53:24 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/03/2022 16:53:28 - INFO - codeparrot_training - Step 2956: {'lr': 0.0004999485260841324, 'samples': 1513984, 'steps': 2956, 'loss/train': 2.149407148361206} +03/03/2022 16:53:32 - INFO - codeparrot_training - Step 2957: {'lr': 0.0004999484183455, 'samples': 1514496, 'steps': 2957, 'loss/train': 1.9423972368240356} +03/03/2022 16:53:32 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/03/2022 16:53:37 - INFO - codeparrot_training - Step 2958: {'lr': 0.0004999483104942446, 'samples': 1515008, 'steps': 2958, 'loss/train': 3.0708155632019043} +03/03/2022 16:53:40 - INFO - codeparrot_training - Step 2959: {'lr': 0.0004999482025303665, 'samples': 1515520, 'steps': 2959, 'loss/train': 3.2037572860717773} +03/03/2022 16:53:40 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/03/2022 16:53:45 - INFO - codeparrot_training - Step 2960: {'lr': 0.0004999480944538655, 'samples': 1516032, 'steps': 2960, 'loss/train': 3.3366873264312744} +03/03/2022 16:53:48 - INFO - codeparrot_training - Step 2961: {'lr': 0.0004999479862647417, 'samples': 1516544, 'steps': 2961, 'loss/train': 1.7511944770812988} +03/03/2022 16:53:49 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 16:53:54 - INFO - codeparrot_training - Step 2962: {'lr': 0.0004999478779629953, 'samples': 1517056, 'steps': 2962, 'loss/train': 0.9921990633010864} +03/03/2022 16:53:57 - INFO - codeparrot_training - Step 2963: {'lr': 0.0004999477695486261, 'samples': 1517568, 'steps': 2963, 'loss/train': 3.249025583267212} +03/03/2022 16:53:58 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/03/2022 16:54:02 - INFO - codeparrot_training - Step 2964: {'lr': 0.0004999476610216345, 'samples': 1518080, 'steps': 2964, 'loss/train': 3.530367612838745} +03/03/2022 16:54:05 - INFO - codeparrot_training - Step 2965: {'lr': 0.0004999475523820203, 'samples': 1518592, 'steps': 2965, 'loss/train': 2.7569520473480225} +03/03/2022 16:54:06 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/03/2022 16:54:11 - INFO - codeparrot_training - Step 2966: {'lr': 0.0004999474436297835, 'samples': 1519104, 'steps': 2966, 'loss/train': 2.4646494388580322} +03/03/2022 16:54:14 - INFO - codeparrot_training - Step 2967: {'lr': 0.0004999473347649242, 'samples': 1519616, 'steps': 2967, 'loss/train': 2.6931183338165283} +03/03/2022 16:54:15 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/03/2022 16:54:19 - INFO - codeparrot_training - Step 2968: {'lr': 0.0004999472257874426, 'samples': 1520128, 'steps': 2968, 'loss/train': 3.742434024810791} +03/03/2022 16:54:22 - INFO - codeparrot_training - Step 2969: {'lr': 0.0004999471166973385, 'samples': 1520640, 'steps': 2969, 'loss/train': 2.9466493129730225} +03/03/2022 16:54:23 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/03/2022 16:54:28 - INFO - codeparrot_training - Step 2970: {'lr': 0.0004999470074946122, 'samples': 1521152, 'steps': 2970, 'loss/train': 3.7005269527435303} +03/03/2022 16:54:31 - INFO - codeparrot_training - Step 2971: {'lr': 0.0004999468981792636, 'samples': 1521664, 'steps': 2971, 'loss/train': 2.7872354984283447} +03/03/2022 16:54:33 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/03/2022 16:54:36 - INFO - codeparrot_training - Step 2972: {'lr': 0.0004999467887512928, 'samples': 1522176, 'steps': 2972, 'loss/train': 2.5354108810424805} +03/03/2022 16:54:39 - INFO - codeparrot_training - Step 2973: {'lr': 0.0004999466792106998, 'samples': 1522688, 'steps': 2973, 'loss/train': 3.665862798690796} +03/03/2022 16:54:41 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/03/2022 16:54:44 - INFO - codeparrot_training - Step 2974: {'lr': 0.0004999465695574848, 'samples': 1523200, 'steps': 2974, 'loss/train': 2.738384485244751} +03/03/2022 16:54:48 - INFO - codeparrot_training - Step 2975: {'lr': 0.0004999464597916476, 'samples': 1523712, 'steps': 2975, 'loss/train': 2.64980411529541} +03/03/2022 16:54:49 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/03/2022 16:54:53 - INFO - codeparrot_training - Step 2976: {'lr': 0.0004999463499131884, 'samples': 1524224, 'steps': 2976, 'loss/train': 3.256869077682495} +03/03/2022 16:54:56 - INFO - codeparrot_training - Step 2977: {'lr': 0.0004999462399221073, 'samples': 1524736, 'steps': 2977, 'loss/train': 0.622014582157135} +03/03/2022 16:54:58 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/03/2022 16:55:01 - INFO - codeparrot_training - Step 2978: {'lr': 0.0004999461298184042, 'samples': 1525248, 'steps': 2978, 'loss/train': 2.186582088470459} +03/03/2022 16:55:05 - INFO - codeparrot_training - Step 2979: {'lr': 0.0004999460196020793, 'samples': 1525760, 'steps': 2979, 'loss/train': 2.4215495586395264} +03/03/2022 16:55:06 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/03/2022 16:55:10 - INFO - codeparrot_training - Step 2980: {'lr': 0.0004999459092731326, 'samples': 1526272, 'steps': 2980, 'loss/train': 3.043511390686035} +03/03/2022 16:55:13 - INFO - codeparrot_training - Step 2981: {'lr': 0.000499945798831564, 'samples': 1526784, 'steps': 2981, 'loss/train': 0.34505370259284973} +03/03/2022 16:55:15 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/03/2022 16:55:19 - INFO - codeparrot_training - Step 2982: {'lr': 0.0004999456882773737, 'samples': 1527296, 'steps': 2982, 'loss/train': 2.4173309803009033} +03/03/2022 16:55:22 - INFO - codeparrot_training - Step 2983: {'lr': 0.0004999455776105618, 'samples': 1527808, 'steps': 2983, 'loss/train': 2.212392568588257} +03/03/2022 16:55:24 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/03/2022 16:55:27 - INFO - codeparrot_training - Step 2984: {'lr': 0.0004999454668311283, 'samples': 1528320, 'steps': 2984, 'loss/train': 2.320594072341919} +03/03/2022 16:55:30 - INFO - codeparrot_training - Step 2985: {'lr': 0.0004999453559390731, 'samples': 1528832, 'steps': 2985, 'loss/train': 3.268564224243164} +03/03/2022 16:55:33 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/03/2022 16:55:36 - INFO - codeparrot_training - Step 2986: {'lr': 0.0004999452449343967, 'samples': 1529344, 'steps': 2986, 'loss/train': 2.425050973892212} +03/03/2022 16:55:39 - INFO - codeparrot_training - Step 2987: {'lr': 0.0004999451338170985, 'samples': 1529856, 'steps': 2987, 'loss/train': 1.8262228965759277} +03/03/2022 16:55:41 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/03/2022 16:55:44 - INFO - codeparrot_training - Step 2988: {'lr': 0.000499945022587179, 'samples': 1530368, 'steps': 2988, 'loss/train': 1.778611660003662} +03/03/2022 16:55:47 - INFO - codeparrot_training - Step 2989: {'lr': 0.0004999449112446381, 'samples': 1530880, 'steps': 2989, 'loss/train': 2.0941693782806396} +03/03/2022 16:55:49 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/03/2022 16:55:53 - INFO - codeparrot_training - Step 2990: {'lr': 0.000499944799789476, 'samples': 1531392, 'steps': 2990, 'loss/train': 0.45699453353881836} +03/03/2022 16:55:56 - INFO - codeparrot_training - Step 2991: {'lr': 0.0004999446882216925, 'samples': 1531904, 'steps': 2991, 'loss/train': 3.459641218185425} +03/03/2022 16:55:57 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/03/2022 16:56:01 - INFO - codeparrot_training - Step 2992: {'lr': 0.0004999445765412878, 'samples': 1532416, 'steps': 2992, 'loss/train': 2.763507127761841} +03/03/2022 16:56:04 - INFO - codeparrot_training - Step 2993: {'lr': 0.0004999444647482619, 'samples': 1532928, 'steps': 2993, 'loss/train': 3.3254709243774414} +03/03/2022 16:56:06 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/03/2022 16:56:09 - INFO - codeparrot_training - Step 2994: {'lr': 0.0004999443528426149, 'samples': 1533440, 'steps': 2994, 'loss/train': 3.735809087753296} +03/03/2022 16:56:13 - INFO - codeparrot_training - Step 2995: {'lr': 0.0004999442408243469, 'samples': 1533952, 'steps': 2995, 'loss/train': 2.520660877227783} +03/03/2022 16:56:14 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/03/2022 16:56:18 - INFO - codeparrot_training - Step 2996: {'lr': 0.0004999441286934578, 'samples': 1534464, 'steps': 2996, 'loss/train': 2.5852878093719482} +03/03/2022 16:56:21 - INFO - codeparrot_training - Step 2997: {'lr': 0.0004999440164499478, 'samples': 1534976, 'steps': 2997, 'loss/train': 2.4575204849243164} +03/03/2022 16:56:23 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/03/2022 16:56:26 - INFO - codeparrot_training - Step 2998: {'lr': 0.0004999439040938168, 'samples': 1535488, 'steps': 2998, 'loss/train': 2.7370760440826416} +03/03/2022 16:56:29 - INFO - codeparrot_training - Step 2999: {'lr': 0.000499943791625065, 'samples': 1536000, 'steps': 2999, 'loss/train': 3.0840344429016113} +03/03/2022 16:56:31 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/03/2022 16:56:35 - INFO - codeparrot_training - Step 3000: {'lr': 0.0004999436790436923, 'samples': 1536512, 'steps': 3000, 'loss/train': 2.73323130607605} +03/03/2022 16:56:38 - INFO - codeparrot_training - Step 3001: {'lr': 0.000499943566349699, 'samples': 1537024, 'steps': 3001, 'loss/train': 3.658524751663208} +03/03/2022 16:56:39 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/03/2022 16:56:44 - INFO - codeparrot_training - Step 3002: {'lr': 0.0004999434535430848, 'samples': 1537536, 'steps': 3002, 'loss/train': 2.86822772026062} +03/03/2022 16:56:47 - INFO - codeparrot_training - Step 3003: {'lr': 0.0004999433406238501, 'samples': 1538048, 'steps': 3003, 'loss/train': 3.4883062839508057} +03/03/2022 16:56:48 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/03/2022 16:56:52 - INFO - codeparrot_training - Step 3004: {'lr': 0.0004999432275919947, 'samples': 1538560, 'steps': 3004, 'loss/train': 3.6083505153656006} +03/03/2022 16:56:55 - INFO - codeparrot_training - Step 3005: {'lr': 0.0004999431144475187, 'samples': 1539072, 'steps': 3005, 'loss/train': 3.043722152709961} +03/03/2022 16:56:56 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/03/2022 16:57:00 - INFO - codeparrot_training - Step 3006: {'lr': 0.0004999430011904222, 'samples': 1539584, 'steps': 3006, 'loss/train': 1.712800145149231} +03/03/2022 16:57:04 - INFO - codeparrot_training - Step 3007: {'lr': 0.0004999428878207054, 'samples': 1540096, 'steps': 3007, 'loss/train': 4.08502197265625} +03/03/2022 16:57:05 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/03/2022 16:57:09 - INFO - codeparrot_training - Step 3008: {'lr': 0.000499942774338368, 'samples': 1540608, 'steps': 3008, 'loss/train': 2.389301061630249} +03/03/2022 16:57:12 - INFO - codeparrot_training - Step 3009: {'lr': 0.0004999426607434104, 'samples': 1541120, 'steps': 3009, 'loss/train': 1.6993309259414673} +03/03/2022 16:57:13 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/03/2022 16:57:17 - INFO - codeparrot_training - Step 3010: {'lr': 0.0004999425470358324, 'samples': 1541632, 'steps': 3010, 'loss/train': 2.3994078636169434} +03/03/2022 16:57:20 - INFO - codeparrot_training - Step 3011: {'lr': 0.0004999424332156341, 'samples': 1542144, 'steps': 3011, 'loss/train': 2.4164786338806152} +03/03/2022 16:57:22 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/03/2022 16:57:26 - INFO - codeparrot_training - Step 3012: {'lr': 0.0004999423192828156, 'samples': 1542656, 'steps': 3012, 'loss/train': 1.4398820400238037} +03/03/2022 16:57:29 - INFO - codeparrot_training - Step 3013: {'lr': 0.0004999422052373771, 'samples': 1543168, 'steps': 3013, 'loss/train': 3.3699848651885986} +03/03/2022 16:57:30 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 16:57:34 - INFO - codeparrot_training - Step 3014: {'lr': 0.0004999420910793183, 'samples': 1543680, 'steps': 3014, 'loss/train': 3.2891273498535156} +03/03/2022 16:57:37 - INFO - codeparrot_training - Step 3015: {'lr': 0.0004999419768086397, 'samples': 1544192, 'steps': 3015, 'loss/train': 2.9098665714263916} +03/03/2022 16:57:38 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/03/2022 16:57:42 - INFO - codeparrot_training - Step 3016: {'lr': 0.0004999418624253408, 'samples': 1544704, 'steps': 3016, 'loss/train': 2.594470977783203} +03/03/2022 16:57:46 - INFO - codeparrot_training - Step 3017: {'lr': 0.0004999417479294221, 'samples': 1545216, 'steps': 3017, 'loss/train': 3.1557273864746094} +03/03/2022 16:57:46 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/03/2022 16:57:51 - INFO - codeparrot_training - Step 3018: {'lr': 0.0004999416333208835, 'samples': 1545728, 'steps': 3018, 'loss/train': 2.074286460876465} +03/03/2022 16:57:54 - INFO - codeparrot_training - Step 3019: {'lr': 0.0004999415185997252, 'samples': 1546240, 'steps': 3019, 'loss/train': 2.579888105392456} +03/03/2022 16:57:55 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/03/2022 16:57:59 - INFO - codeparrot_training - Step 3020: {'lr': 0.0004999414037659468, 'samples': 1546752, 'steps': 3020, 'loss/train': 3.6203603744506836} +03/03/2022 16:58:02 - INFO - codeparrot_training - Step 3021: {'lr': 0.000499941288819549, 'samples': 1547264, 'steps': 3021, 'loss/train': 3.199841260910034} +03/03/2022 16:58:03 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/03/2022 16:58:08 - INFO - codeparrot_training - Step 3022: {'lr': 0.0004999411737605313, 'samples': 1547776, 'steps': 3022, 'loss/train': 2.1176857948303223} +03/03/2022 16:58:11 - INFO - codeparrot_training - Step 3023: {'lr': 0.000499941058588894, 'samples': 1548288, 'steps': 3023, 'loss/train': 1.9793611764907837} +03/03/2022 16:58:11 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/03/2022 16:58:16 - INFO - codeparrot_training - Step 3024: {'lr': 0.0004999409433046371, 'samples': 1548800, 'steps': 3024, 'loss/train': 2.9934186935424805} +03/03/2022 16:58:19 - INFO - codeparrot_training - Step 3025: {'lr': 0.0004999408279077607, 'samples': 1549312, 'steps': 3025, 'loss/train': 4.017479419708252} +03/03/2022 16:58:20 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/03/2022 16:58:25 - INFO - codeparrot_training - Step 3026: {'lr': 0.0004999407123982649, 'samples': 1549824, 'steps': 3026, 'loss/train': 0.5607479214668274} +03/03/2022 16:58:28 - INFO - codeparrot_training - Step 3027: {'lr': 0.0004999405967761495, 'samples': 1550336, 'steps': 3027, 'loss/train': 3.239078998565674} +03/03/2022 16:58:28 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/03/2022 16:58:33 - INFO - codeparrot_training - Step 3028: {'lr': 0.0004999404810414149, 'samples': 1550848, 'steps': 3028, 'loss/train': 1.772896647453308} +03/03/2022 16:58:36 - INFO - codeparrot_training - Step 3029: {'lr': 0.0004999403651940608, 'samples': 1551360, 'steps': 3029, 'loss/train': 2.721193790435791} +03/03/2022 16:58:37 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/03/2022 16:58:42 - INFO - codeparrot_training - Step 3030: {'lr': 0.0004999402492340875, 'samples': 1551872, 'steps': 3030, 'loss/train': 2.8103930950164795} +03/03/2022 16:58:45 - INFO - codeparrot_training - Step 3031: {'lr': 0.000499940133161495, 'samples': 1552384, 'steps': 3031, 'loss/train': 2.6185073852539062} +03/03/2022 16:58:45 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/03/2022 16:58:50 - INFO - codeparrot_training - Step 3032: {'lr': 0.0004999400169762834, 'samples': 1552896, 'steps': 3032, 'loss/train': 2.8803467750549316} +03/03/2022 16:58:53 - INFO - codeparrot_training - Step 3033: {'lr': 0.0004999399006784525, 'samples': 1553408, 'steps': 3033, 'loss/train': 3.338804244995117} +03/03/2022 16:58:54 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/03/2022 16:58:59 - INFO - codeparrot_training - Step 3034: {'lr': 0.0004999397842680027, 'samples': 1553920, 'steps': 3034, 'loss/train': 3.1873862743377686} +03/03/2022 16:59:02 - INFO - codeparrot_training - Step 3035: {'lr': 0.0004999396677449338, 'samples': 1554432, 'steps': 3035, 'loss/train': 3.9947705268859863} +03/03/2022 16:59:03 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 16:59:07 - INFO - codeparrot_training - Step 3036: {'lr': 0.000499939551109246, 'samples': 1554944, 'steps': 3036, 'loss/train': 3.17539119720459} +03/03/2022 16:59:10 - INFO - codeparrot_training - Step 3037: {'lr': 0.0004999394343609393, 'samples': 1555456, 'steps': 3037, 'loss/train': 2.4144952297210693} +03/03/2022 16:59:11 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/03/2022 16:59:16 - INFO - codeparrot_training - Step 3038: {'lr': 0.0004999393175000137, 'samples': 1555968, 'steps': 3038, 'loss/train': 0.7590366005897522} +03/03/2022 16:59:19 - INFO - codeparrot_training - Step 3039: {'lr': 0.0004999392005264694, 'samples': 1556480, 'steps': 3039, 'loss/train': 3.3694674968719482} +03/03/2022 16:59:19 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/03/2022 16:59:24 - INFO - codeparrot_training - Step 3040: {'lr': 0.0004999390834403062, 'samples': 1556992, 'steps': 3040, 'loss/train': 3.011796474456787} +03/03/2022 16:59:27 - INFO - codeparrot_training - Step 3041: {'lr': 0.0004999389662415244, 'samples': 1557504, 'steps': 3041, 'loss/train': 2.1363580226898193} +03/03/2022 16:59:27 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/03/2022 16:59:32 - INFO - codeparrot_training - Step 3042: {'lr': 0.000499938848930124, 'samples': 1558016, 'steps': 3042, 'loss/train': 2.9906158447265625} +03/03/2022 16:59:36 - INFO - codeparrot_training - Step 3043: {'lr': 0.0004999387315061049, 'samples': 1558528, 'steps': 3043, 'loss/train': 2.3319952487945557} +03/03/2022 16:59:36 - INFO - codeparrot_training - Skipping example with length 292 (seq_length=1024) +03/03/2022 16:59:41 - INFO - codeparrot_training - Step 3044: {'lr': 0.0004999386139694673, 'samples': 1559040, 'steps': 3044, 'loss/train': 2.926527500152588} +03/03/2022 16:59:44 - INFO - codeparrot_training - Step 3045: {'lr': 0.0004999384963202113, 'samples': 1559552, 'steps': 3045, 'loss/train': 3.160982847213745} +03/03/2022 16:59:44 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/03/2022 16:59:49 - INFO - codeparrot_training - Step 3046: {'lr': 0.0004999383785583368, 'samples': 1560064, 'steps': 3046, 'loss/train': 2.948795795440674} +03/03/2022 16:59:52 - INFO - codeparrot_training - Step 3047: {'lr': 0.0004999382606838439, 'samples': 1560576, 'steps': 3047, 'loss/train': 4.0511298179626465} +03/03/2022 16:59:53 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/03/2022 16:59:58 - INFO - codeparrot_training - Step 3048: {'lr': 0.0004999381426967327, 'samples': 1561088, 'steps': 3048, 'loss/train': 3.3037965297698975} +03/03/2022 17:00:01 - INFO - codeparrot_training - Step 3049: {'lr': 0.0004999380245970033, 'samples': 1561600, 'steps': 3049, 'loss/train': 3.782954692840576} +03/03/2022 17:00:02 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/03/2022 17:00:06 - INFO - codeparrot_training - Step 3050: {'lr': 0.0004999379063846555, 'samples': 1562112, 'steps': 3050, 'loss/train': 2.4499409198760986} +03/03/2022 17:00:10 - INFO - codeparrot_training - Step 3051: {'lr': 0.0004999377880596897, 'samples': 1562624, 'steps': 3051, 'loss/train': 3.1150593757629395} +03/03/2022 17:00:11 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/03/2022 17:00:15 - INFO - codeparrot_training - Step 3052: {'lr': 0.0004999376696221057, 'samples': 1563136, 'steps': 3052, 'loss/train': 2.4729442596435547} +03/03/2022 17:00:18 - INFO - codeparrot_training - Step 3053: {'lr': 0.0004999375510719037, 'samples': 1563648, 'steps': 3053, 'loss/train': 3.3743228912353516} +03/03/2022 17:00:20 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/03/2022 17:00:23 - INFO - codeparrot_training - Step 3054: {'lr': 0.0004999374324090837, 'samples': 1564160, 'steps': 3054, 'loss/train': 2.0660548210144043} +03/03/2022 17:00:26 - INFO - codeparrot_training - Step 3055: {'lr': 0.0004999373136336457, 'samples': 1564672, 'steps': 3055, 'loss/train': 2.4760117530822754} +03/03/2022 17:00:28 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/03/2022 17:00:32 - INFO - codeparrot_training - Step 3056: {'lr': 0.0004999371947455899, 'samples': 1565184, 'steps': 3056, 'loss/train': 3.62672758102417} +03/03/2022 17:00:35 - INFO - codeparrot_training - Step 3057: {'lr': 0.0004999370757449162, 'samples': 1565696, 'steps': 3057, 'loss/train': 2.5714051723480225} +03/03/2022 17:00:36 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 17:00:40 - INFO - codeparrot_training - Step 3058: {'lr': 0.0004999369566316247, 'samples': 1566208, 'steps': 3058, 'loss/train': 1.9593223333358765} +03/03/2022 17:00:43 - INFO - codeparrot_training - Step 3059: {'lr': 0.0004999368374057155, 'samples': 1566720, 'steps': 3059, 'loss/train': 2.8943517208099365} +03/03/2022 17:00:45 - INFO - codeparrot_training - Skipping example with length 405 (seq_length=1024) +03/03/2022 17:00:49 - INFO - codeparrot_training - Step 3060: {'lr': 0.0004999367180671886, 'samples': 1567232, 'steps': 3060, 'loss/train': 2.2934627532958984} +03/03/2022 17:00:52 - INFO - codeparrot_training - Step 3061: {'lr': 0.000499936598616044, 'samples': 1567744, 'steps': 3061, 'loss/train': 2.4961822032928467} +03/03/2022 17:00:54 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/03/2022 17:00:57 - INFO - codeparrot_training - Step 3062: {'lr': 0.0004999364790522819, 'samples': 1568256, 'steps': 3062, 'loss/train': 2.7870068550109863} +03/03/2022 17:01:00 - INFO - codeparrot_training - Step 3063: {'lr': 0.0004999363593759022, 'samples': 1568768, 'steps': 3063, 'loss/train': 2.2881481647491455} +03/03/2022 17:01:02 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 17:01:05 - INFO - codeparrot_training - Step 3064: {'lr': 0.0004999362395869052, 'samples': 1569280, 'steps': 3064, 'loss/train': 3.8400609493255615} +03/03/2022 17:01:09 - INFO - codeparrot_training - Step 3065: {'lr': 0.0004999361196852906, 'samples': 1569792, 'steps': 3065, 'loss/train': 1.588464617729187} +03/03/2022 17:01:10 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 17:01:14 - INFO - codeparrot_training - Step 3066: {'lr': 0.0004999359996710588, 'samples': 1570304, 'steps': 3066, 'loss/train': 2.2829205989837646} +03/03/2022 17:01:17 - INFO - codeparrot_training - Step 3067: {'lr': 0.0004999358795442096, 'samples': 1570816, 'steps': 3067, 'loss/train': 2.5555667877197266} +03/03/2022 17:01:18 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/03/2022 17:01:22 - INFO - codeparrot_training - Step 3068: {'lr': 0.0004999357593047431, 'samples': 1571328, 'steps': 3068, 'loss/train': 2.7500314712524414} +03/03/2022 17:01:25 - INFO - codeparrot_training - Step 3069: {'lr': 0.0004999356389526595, 'samples': 1571840, 'steps': 3069, 'loss/train': 2.743537664413452} +03/03/2022 17:01:27 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/03/2022 17:01:31 - INFO - codeparrot_training - Step 3070: {'lr': 0.0004999355184879587, 'samples': 1572352, 'steps': 3070, 'loss/train': 3.9839508533477783} +03/03/2022 17:01:34 - INFO - codeparrot_training - Step 3071: {'lr': 0.0004999353979106409, 'samples': 1572864, 'steps': 3071, 'loss/train': 1.6620782613754272} +03/03/2022 17:01:35 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/03/2022 17:01:39 - INFO - codeparrot_training - Step 3072: {'lr': 0.000499935277220706, 'samples': 1573376, 'steps': 3072, 'loss/train': 3.1344823837280273} +03/03/2022 17:01:42 - INFO - codeparrot_training - Step 3073: {'lr': 0.0004999351564181541, 'samples': 1573888, 'steps': 3073, 'loss/train': 1.925447702407837} +03/03/2022 17:01:44 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/03/2022 17:01:47 - INFO - codeparrot_training - Step 3074: {'lr': 0.0004999350355029854, 'samples': 1574400, 'steps': 3074, 'loss/train': 3.8054072856903076} +03/03/2022 17:01:51 - INFO - codeparrot_training - Step 3075: {'lr': 0.0004999349144751997, 'samples': 1574912, 'steps': 3075, 'loss/train': 3.0133216381073} +03/03/2022 17:01:52 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/03/2022 17:01:56 - INFO - codeparrot_training - Step 3076: {'lr': 0.0004999347933347972, 'samples': 1575424, 'steps': 3076, 'loss/train': 2.225146770477295} +03/03/2022 17:01:59 - INFO - codeparrot_training - Step 3077: {'lr': 0.0004999346720817779, 'samples': 1575936, 'steps': 3077, 'loss/train': 1.9959720373153687} +03/03/2022 17:02:00 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/03/2022 17:02:04 - INFO - codeparrot_training - Step 3078: {'lr': 0.000499934550716142, 'samples': 1576448, 'steps': 3078, 'loss/train': 3.034642219543457} +03/03/2022 17:02:08 - INFO - codeparrot_training - Step 3079: {'lr': 0.0004999344292378893, 'samples': 1576960, 'steps': 3079, 'loss/train': 2.573922634124756} +03/03/2022 17:02:09 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/03/2022 17:02:13 - INFO - codeparrot_training - Step 3080: {'lr': 0.0004999343076470202, 'samples': 1577472, 'steps': 3080, 'loss/train': 2.871983051300049} +03/03/2022 17:02:16 - INFO - codeparrot_training - Step 3081: {'lr': 0.0004999341859435345, 'samples': 1577984, 'steps': 3081, 'loss/train': 1.8925114870071411} +03/03/2022 17:02:17 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/03/2022 17:02:21 - INFO - codeparrot_training - Step 3082: {'lr': 0.0004999340641274322, 'samples': 1578496, 'steps': 3082, 'loss/train': 3.5696139335632324} +03/03/2022 17:02:24 - INFO - codeparrot_training - Step 3083: {'lr': 0.0004999339421987136, 'samples': 1579008, 'steps': 3083, 'loss/train': 2.7413642406463623} +03/03/2022 17:02:25 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/03/2022 17:02:30 - INFO - codeparrot_training - Step 3084: {'lr': 0.0004999338201573786, 'samples': 1579520, 'steps': 3084, 'loss/train': 2.511333465576172} +03/03/2022 17:02:33 - INFO - codeparrot_training - Step 3085: {'lr': 0.0004999336980034271, 'samples': 1580032, 'steps': 3085, 'loss/train': 3.681885242462158} +03/03/2022 17:02:34 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/03/2022 17:02:38 - INFO - codeparrot_training - Step 3086: {'lr': 0.0004999335757368595, 'samples': 1580544, 'steps': 3086, 'loss/train': 3.395083427429199} +03/03/2022 17:02:41 - INFO - codeparrot_training - Step 3087: {'lr': 0.0004999334533576757, 'samples': 1581056, 'steps': 3087, 'loss/train': 3.583638906478882} +03/03/2022 17:02:42 - INFO - codeparrot_training - Skipping example with length 156 (seq_length=1024) +03/03/2022 17:02:46 - INFO - codeparrot_training - Step 3088: {'lr': 0.0004999333308658756, 'samples': 1581568, 'steps': 3088, 'loss/train': 2.0097591876983643} +03/03/2022 17:02:49 - INFO - codeparrot_training - Step 3089: {'lr': 0.0004999332082614597, 'samples': 1582080, 'steps': 3089, 'loss/train': 2.4555861949920654} +03/03/2022 17:02:50 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/03/2022 17:02:55 - INFO - codeparrot_training - Step 3090: {'lr': 0.0004999330855444274, 'samples': 1582592, 'steps': 3090, 'loss/train': 2.084200382232666} +03/03/2022 17:02:58 - INFO - codeparrot_training - Step 3091: {'lr': 0.0004999329627147792, 'samples': 1583104, 'steps': 3091, 'loss/train': 2.766651153564453} +03/03/2022 17:02:59 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/03/2022 17:03:03 - INFO - codeparrot_training - Step 3092: {'lr': 0.0004999328397725152, 'samples': 1583616, 'steps': 3092, 'loss/train': 2.2201409339904785} +03/03/2022 17:03:06 - INFO - codeparrot_training - Step 3093: {'lr': 0.0004999327167176352, 'samples': 1584128, 'steps': 3093, 'loss/train': 2.795578956604004} +03/03/2022 17:03:07 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/03/2022 17:03:12 - INFO - codeparrot_training - Step 3094: {'lr': 0.0004999325935501395, 'samples': 1584640, 'steps': 3094, 'loss/train': 2.4546406269073486} +03/03/2022 17:03:15 - INFO - codeparrot_training - Step 3095: {'lr': 0.0004999324702700279, 'samples': 1585152, 'steps': 3095, 'loss/train': 1.952446460723877} +03/03/2022 17:03:16 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/03/2022 17:03:20 - INFO - codeparrot_training - Step 3096: {'lr': 0.0004999323468773007, 'samples': 1585664, 'steps': 3096, 'loss/train': 2.9668383598327637} +03/03/2022 17:03:24 - INFO - codeparrot_training - Step 3097: {'lr': 0.0004999322233719578, 'samples': 1586176, 'steps': 3097, 'loss/train': 2.368140935897827} +03/03/2022 17:03:26 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/03/2022 17:03:29 - INFO - codeparrot_training - Step 3098: {'lr': 0.0004999320997539992, 'samples': 1586688, 'steps': 3098, 'loss/train': 3.236647129058838} +03/03/2022 17:03:32 - INFO - codeparrot_training - Step 3099: {'lr': 0.0004999319760234251, 'samples': 1587200, 'steps': 3099, 'loss/train': 3.8558356761932373} +03/03/2022 17:03:35 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/03/2022 17:03:37 - INFO - codeparrot_training - Step 3100: {'lr': 0.0004999318521802356, 'samples': 1587712, 'steps': 3100, 'loss/train': 0.6512963175773621} +03/03/2022 17:03:40 - INFO - codeparrot_training - Step 3101: {'lr': 0.0004999317282244305, 'samples': 1588224, 'steps': 3101, 'loss/train': 2.5115411281585693} +03/03/2022 17:03:43 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/03/2022 17:03:46 - INFO - codeparrot_training - Step 3102: {'lr': 0.0004999316041560102, 'samples': 1588736, 'steps': 3102, 'loss/train': 2.5713682174682617} +03/03/2022 17:03:49 - INFO - codeparrot_training - Step 3103: {'lr': 0.0004999314799749745, 'samples': 1589248, 'steps': 3103, 'loss/train': 2.4353208541870117} +03/03/2022 17:03:51 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/03/2022 17:03:54 - INFO - codeparrot_training - Step 3104: {'lr': 0.0004999313556813235, 'samples': 1589760, 'steps': 3104, 'loss/train': 3.4859204292297363} +03/03/2022 17:03:57 - INFO - codeparrot_training - Step 3105: {'lr': 0.0004999312312750573, 'samples': 1590272, 'steps': 3105, 'loss/train': 2.447747230529785} +03/03/2022 17:03:59 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/03/2022 17:04:02 - INFO - codeparrot_training - Step 3106: {'lr': 0.000499931106756176, 'samples': 1590784, 'steps': 3106, 'loss/train': 2.5023605823516846} +03/03/2022 17:04:06 - INFO - codeparrot_training - Step 3107: {'lr': 0.0004999309821246795, 'samples': 1591296, 'steps': 3107, 'loss/train': 3.1312501430511475} +03/03/2022 17:04:07 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 17:04:11 - INFO - codeparrot_training - Step 3108: {'lr': 0.000499930857380568, 'samples': 1591808, 'steps': 3108, 'loss/train': 3.1182613372802734} +03/03/2022 17:04:14 - INFO - codeparrot_training - Step 3109: {'lr': 0.0004999307325238416, 'samples': 1592320, 'steps': 3109, 'loss/train': 2.5728583335876465} +03/03/2022 17:04:16 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/03/2022 17:04:19 - INFO - codeparrot_training - Step 3110: {'lr': 0.0004999306075545002, 'samples': 1592832, 'steps': 3110, 'loss/train': 3.1181764602661133} +03/03/2022 17:04:22 - INFO - codeparrot_training - Step 3111: {'lr': 0.0004999304824725439, 'samples': 1593344, 'steps': 3111, 'loss/train': 2.513261318206787} +03/03/2022 17:04:24 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/03/2022 17:04:28 - INFO - codeparrot_training - Step 3112: {'lr': 0.0004999303572779727, 'samples': 1593856, 'steps': 3112, 'loss/train': 2.324193000793457} +03/03/2022 17:04:31 - INFO - codeparrot_training - Step 3113: {'lr': 0.0004999302319707869, 'samples': 1594368, 'steps': 3113, 'loss/train': 3.271068811416626} +03/03/2022 17:04:32 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/03/2022 17:04:36 - INFO - codeparrot_training - Step 3114: {'lr': 0.0004999301065509863, 'samples': 1594880, 'steps': 3114, 'loss/train': 3.3310585021972656} +03/03/2022 17:04:39 - INFO - codeparrot_training - Step 3115: {'lr': 0.0004999299810185712, 'samples': 1595392, 'steps': 3115, 'loss/train': 1.0331887006759644} +03/03/2022 17:04:41 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/03/2022 17:04:44 - INFO - codeparrot_training - Step 3116: {'lr': 0.0004999298553735413, 'samples': 1595904, 'steps': 3116, 'loss/train': 3.413482189178467} +03/03/2022 17:04:48 - INFO - codeparrot_training - Step 3117: {'lr': 0.000499929729615897, 'samples': 1596416, 'steps': 3117, 'loss/train': 3.7765398025512695} +03/03/2022 17:04:49 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/03/2022 17:04:53 - INFO - codeparrot_training - Step 3118: {'lr': 0.0004999296037456381, 'samples': 1596928, 'steps': 3118, 'loss/train': 1.8707209825515747} +03/03/2022 17:04:56 - INFO - codeparrot_training - Step 3119: {'lr': 0.0004999294777627649, 'samples': 1597440, 'steps': 3119, 'loss/train': 1.862858772277832} +03/03/2022 17:04:58 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/03/2022 17:05:02 - INFO - codeparrot_training - Step 3120: {'lr': 0.0004999293516672773, 'samples': 1597952, 'steps': 3120, 'loss/train': 2.5494027137756348} +03/03/2022 17:05:05 - INFO - codeparrot_training - Step 3121: {'lr': 0.0004999292254591754, 'samples': 1598464, 'steps': 3121, 'loss/train': 7.465449333190918} +03/03/2022 17:05:08 - INFO - codeparrot_training - Step 3122: {'lr': 0.0004999290991384591, 'samples': 1598976, 'steps': 3122, 'loss/train': 2.28774356842041} +03/03/2022 17:05:08 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/03/2022 17:05:13 - INFO - codeparrot_training - Step 3123: {'lr': 0.0004999289727051289, 'samples': 1599488, 'steps': 3123, 'loss/train': 1.5419032573699951} +03/03/2022 17:05:16 - INFO - codeparrot_training - Step 3124: {'lr': 0.0004999288461591842, 'samples': 1600000, 'steps': 3124, 'loss/train': 3.3454294204711914} +03/03/2022 17:05:17 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/03/2022 17:05:22 - INFO - codeparrot_training - Step 3125: {'lr': 0.0004999287195006257, 'samples': 1600512, 'steps': 3125, 'loss/train': 3.6345365047454834} +03/03/2022 17:05:25 - INFO - codeparrot_training - Step 3126: {'lr': 0.000499928592729453, 'samples': 1601024, 'steps': 3126, 'loss/train': 2.8627607822418213} +03/03/2022 17:05:25 - INFO - codeparrot_training - Skipping example with length 519 (seq_length=1024) +03/03/2022 17:05:30 - INFO - codeparrot_training - Step 3127: {'lr': 0.0004999284658456665, 'samples': 1601536, 'steps': 3127, 'loss/train': 1.328304648399353} +03/03/2022 17:05:33 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/03/2022 17:05:36 - INFO - codeparrot_training - Step 3128: {'lr': 0.000499928338849266, 'samples': 1602048, 'steps': 3128, 'loss/train': 2.903782606124878} +03/03/2022 17:05:39 - INFO - codeparrot_training - Step 3129: {'lr': 0.0004999282117402516, 'samples': 1602560, 'steps': 3129, 'loss/train': 3.107649087905884} +03/03/2022 17:05:41 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/03/2022 17:05:44 - INFO - codeparrot_training - Step 3130: {'lr': 0.0004999280845186235, 'samples': 1603072, 'steps': 3130, 'loss/train': 4.006110668182373} +03/03/2022 17:05:47 - INFO - codeparrot_training - Step 3131: {'lr': 0.0004999279571843816, 'samples': 1603584, 'steps': 3131, 'loss/train': 0.8248851895332336} +03/03/2022 17:05:50 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/03/2022 17:05:53 - INFO - codeparrot_training - Step 3132: {'lr': 0.000499927829737526, 'samples': 1604096, 'steps': 3132, 'loss/train': 2.6746115684509277} +03/03/2022 17:05:56 - INFO - codeparrot_training - Step 3133: {'lr': 0.0004999277021780569, 'samples': 1604608, 'steps': 3133, 'loss/train': 3.0486955642700195} +03/03/2022 17:05:58 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/03/2022 17:06:01 - INFO - codeparrot_training - Step 3134: {'lr': 0.0004999275745059741, 'samples': 1605120, 'steps': 3134, 'loss/train': 3.2297000885009766} +03/03/2022 17:06:04 - INFO - codeparrot_training - Step 3135: {'lr': 0.0004999274467212779, 'samples': 1605632, 'steps': 3135, 'loss/train': 2.3956644535064697} +03/03/2022 17:06:07 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/03/2022 17:06:09 - INFO - codeparrot_training - Step 3136: {'lr': 0.0004999273188239681, 'samples': 1606144, 'steps': 3136, 'loss/train': 2.932687282562256} +03/03/2022 17:06:13 - INFO - codeparrot_training - Step 3137: {'lr': 0.0004999271908140451, 'samples': 1606656, 'steps': 3137, 'loss/train': 2.4833178520202637} +03/03/2022 17:06:16 - INFO - codeparrot_training - Step 3138: {'lr': 0.0004999270626915086, 'samples': 1607168, 'steps': 3138, 'loss/train': 2.874952793121338} +03/03/2022 17:06:16 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/03/2022 17:06:21 - INFO - codeparrot_training - Step 3139: {'lr': 0.0004999269344563589, 'samples': 1607680, 'steps': 3139, 'loss/train': 2.971289873123169} +03/03/2022 17:06:24 - INFO - codeparrot_training - Step 3140: {'lr': 0.0004999268061085959, 'samples': 1608192, 'steps': 3140, 'loss/train': 2.131265163421631} +03/03/2022 17:06:25 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/03/2022 17:06:29 - INFO - codeparrot_training - Step 3141: {'lr': 0.0004999266776482199, 'samples': 1608704, 'steps': 3141, 'loss/train': 3.173490047454834} +03/03/2022 17:06:33 - INFO - codeparrot_training - Step 3142: {'lr': 0.0004999265490752306, 'samples': 1609216, 'steps': 3142, 'loss/train': 2.1478161811828613} +03/03/2022 17:06:33 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/03/2022 17:06:38 - INFO - codeparrot_training - Step 3143: {'lr': 0.0004999264203896284, 'samples': 1609728, 'steps': 3143, 'loss/train': 3.464094400405884} +03/03/2022 17:06:41 - INFO - codeparrot_training - Step 3144: {'lr': 0.0004999262915914132, 'samples': 1610240, 'steps': 3144, 'loss/train': 3.17561411857605} +03/03/2022 17:06:41 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 17:06:46 - INFO - codeparrot_training - Step 3145: {'lr': 0.000499926162680585, 'samples': 1610752, 'steps': 3145, 'loss/train': 2.610203266143799} +03/03/2022 17:06:49 - INFO - codeparrot_training - Step 3146: {'lr': 0.000499926033657144, 'samples': 1611264, 'steps': 3146, 'loss/train': 2.3648221492767334} +03/03/2022 17:06:50 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 17:06:55 - INFO - codeparrot_training - Step 3147: {'lr': 0.0004999259045210901, 'samples': 1611776, 'steps': 3147, 'loss/train': 3.393726348876953} +03/03/2022 17:06:58 - INFO - codeparrot_training - Step 3148: {'lr': 0.0004999257752724234, 'samples': 1612288, 'steps': 3148, 'loss/train': 0.5157548785209656} +03/03/2022 17:06:58 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/03/2022 17:07:03 - INFO - codeparrot_training - Step 3149: {'lr': 0.0004999256459111443, 'samples': 1612800, 'steps': 3149, 'loss/train': 2.7473878860473633} +03/03/2022 17:07:06 - INFO - codeparrot_training - Step 3150: {'lr': 0.0004999255164372523, 'samples': 1613312, 'steps': 3150, 'loss/train': 1.5548590421676636} +03/03/2022 17:07:07 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/03/2022 17:07:12 - INFO - codeparrot_training - Step 3151: {'lr': 0.0004999253868507476, 'samples': 1613824, 'steps': 3151, 'loss/train': 3.867713689804077} +03/03/2022 17:07:15 - INFO - codeparrot_training - Step 3152: {'lr': 0.0004999252571516306, 'samples': 1614336, 'steps': 3152, 'loss/train': 2.6200544834136963} +03/03/2022 17:07:15 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/03/2022 17:07:20 - INFO - codeparrot_training - Step 3153: {'lr': 0.0004999251273399011, 'samples': 1614848, 'steps': 3153, 'loss/train': 2.2708542346954346} +03/03/2022 17:07:23 - INFO - codeparrot_training - Step 3154: {'lr': 0.0004999249974155592, 'samples': 1615360, 'steps': 3154, 'loss/train': 3.3140199184417725} +03/03/2022 17:07:23 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/03/2022 17:07:28 - INFO - codeparrot_training - Step 3155: {'lr': 0.0004999248673786049, 'samples': 1615872, 'steps': 3155, 'loss/train': 3.1553444862365723} +03/03/2022 17:07:31 - INFO - codeparrot_training - Step 3156: {'lr': 0.0004999247372290383, 'samples': 1616384, 'steps': 3156, 'loss/train': 2.6321499347686768} +03/03/2022 17:07:32 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/03/2022 17:07:37 - INFO - codeparrot_training - Step 3157: {'lr': 0.0004999246069668596, 'samples': 1616896, 'steps': 3157, 'loss/train': 3.4026570320129395} +03/03/2022 17:07:40 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/03/2022 17:07:42 - INFO - codeparrot_training - Step 3158: {'lr': 0.0004999244765920687, 'samples': 1617408, 'steps': 3158, 'loss/train': 0.6521148085594177} +03/03/2022 17:07:45 - INFO - codeparrot_training - Step 3159: {'lr': 0.0004999243461046656, 'samples': 1617920, 'steps': 3159, 'loss/train': 4.805545330047607} +03/03/2022 17:07:49 - INFO - codeparrot_training - Step 3160: {'lr': 0.0004999242155046504, 'samples': 1618432, 'steps': 3160, 'loss/train': 2.479771614074707} +03/03/2022 17:07:49 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 17:07:54 - INFO - codeparrot_training - Step 3161: {'lr': 0.0004999240847920233, 'samples': 1618944, 'steps': 3161, 'loss/train': 1.5201314687728882} +03/03/2022 17:07:57 - INFO - codeparrot_training - Step 3162: {'lr': 0.0004999239539667842, 'samples': 1619456, 'steps': 3162, 'loss/train': 3.331648111343384} +03/03/2022 17:07:57 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/03/2022 17:08:02 - INFO - codeparrot_training - Step 3163: {'lr': 0.0004999238230289333, 'samples': 1619968, 'steps': 3163, 'loss/train': 3.1433770656585693} +03/03/2022 17:08:05 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/03/2022 17:08:07 - INFO - codeparrot_training - Step 3164: {'lr': 0.0004999236919784705, 'samples': 1620480, 'steps': 3164, 'loss/train': 2.3067240715026855} +03/03/2022 17:08:11 - INFO - codeparrot_training - Step 3165: {'lr': 0.0004999235608153961, 'samples': 1620992, 'steps': 3165, 'loss/train': 2.0710079669952393} +03/03/2022 17:08:13 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/03/2022 17:08:16 - INFO - codeparrot_training - Step 3166: {'lr': 0.0004999234295397098, 'samples': 1621504, 'steps': 3166, 'loss/train': 1.5704458951950073} +03/03/2022 17:08:19 - INFO - codeparrot_training - Step 3167: {'lr': 0.000499923298151412, 'samples': 1622016, 'steps': 3167, 'loss/train': 2.52276349067688} +03/03/2022 17:08:22 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/03/2022 17:08:24 - INFO - codeparrot_training - Step 3168: {'lr': 0.0004999231666505025, 'samples': 1622528, 'steps': 3168, 'loss/train': 3.7464592456817627} +03/03/2022 17:08:27 - INFO - codeparrot_training - Step 3169: {'lr': 0.0004999230350369816, 'samples': 1623040, 'steps': 3169, 'loss/train': 2.637087106704712} +03/03/2022 17:08:30 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/03/2022 17:08:33 - INFO - codeparrot_training - Step 3170: {'lr': 0.0004999229033108492, 'samples': 1623552, 'steps': 3170, 'loss/train': 2.5652010440826416} +03/03/2022 17:08:36 - INFO - codeparrot_training - Step 3171: {'lr': 0.0004999227714721054, 'samples': 1624064, 'steps': 3171, 'loss/train': 2.8615496158599854} +03/03/2022 17:08:38 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/03/2022 17:08:41 - INFO - codeparrot_training - Step 3172: {'lr': 0.0004999226395207501, 'samples': 1624576, 'steps': 3172, 'loss/train': 1.5188332796096802} +03/03/2022 17:08:44 - INFO - codeparrot_training - Step 3173: {'lr': 0.0004999225074567837, 'samples': 1625088, 'steps': 3173, 'loss/train': 1.6403237581253052} +03/03/2022 17:08:47 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/03/2022 17:08:49 - INFO - codeparrot_training - Step 3174: {'lr': 0.000499922375280206, 'samples': 1625600, 'steps': 3174, 'loss/train': 3.164886236190796} +03/03/2022 17:08:53 - INFO - codeparrot_training - Step 3175: {'lr': 0.0004999222429910171, 'samples': 1626112, 'steps': 3175, 'loss/train': 2.994744300842285} +03/03/2022 17:08:55 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 17:08:58 - INFO - codeparrot_training - Step 3176: {'lr': 0.0004999221105892172, 'samples': 1626624, 'steps': 3176, 'loss/train': 3.450744390487671} +03/03/2022 17:09:01 - INFO - codeparrot_training - Step 3177: {'lr': 0.0004999219780748062, 'samples': 1627136, 'steps': 3177, 'loss/train': 3.842543601989746} +03/03/2022 17:09:04 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 17:09:06 - INFO - codeparrot_training - Step 3178: {'lr': 0.0004999218454477843, 'samples': 1627648, 'steps': 3178, 'loss/train': 1.6521320343017578} +03/03/2022 17:09:10 - INFO - codeparrot_training - Step 3179: {'lr': 0.0004999217127081514, 'samples': 1628160, 'steps': 3179, 'loss/train': 2.400299549102783} +03/03/2022 17:09:12 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 17:09:15 - INFO - codeparrot_training - Step 3180: {'lr': 0.0004999215798559076, 'samples': 1628672, 'steps': 3180, 'loss/train': 2.1372478008270264} +03/03/2022 17:09:18 - INFO - codeparrot_training - Step 3181: {'lr': 0.000499921446891053, 'samples': 1629184, 'steps': 3181, 'loss/train': 3.0301592350006104} +03/03/2022 17:09:20 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/03/2022 17:09:23 - INFO - codeparrot_training - Step 3182: {'lr': 0.0004999213138135877, 'samples': 1629696, 'steps': 3182, 'loss/train': 3.164119005203247} +03/03/2022 17:09:27 - INFO - codeparrot_training - Step 3183: {'lr': 0.0004999211806235117, 'samples': 1630208, 'steps': 3183, 'loss/train': 0.4649476110935211} +03/03/2022 17:09:29 - INFO - codeparrot_training - Skipping example with length 610 (seq_length=1024) +03/03/2022 17:09:32 - INFO - codeparrot_training - Step 3184: {'lr': 0.000499921047320825, 'samples': 1630720, 'steps': 3184, 'loss/train': 2.762665271759033} +03/03/2022 17:09:35 - INFO - codeparrot_training - Step 3185: {'lr': 0.0004999209139055278, 'samples': 1631232, 'steps': 3185, 'loss/train': 3.1531805992126465} +03/03/2022 17:09:37 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/03/2022 17:09:40 - INFO - codeparrot_training - Step 3186: {'lr': 0.0004999207803776201, 'samples': 1631744, 'steps': 3186, 'loss/train': 3.335547924041748} +03/03/2022 17:09:43 - INFO - codeparrot_training - Step 3187: {'lr': 0.000499920646737102, 'samples': 1632256, 'steps': 3187, 'loss/train': 3.408154010772705} +03/03/2022 17:09:46 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/03/2022 17:09:49 - INFO - codeparrot_training - Step 3188: {'lr': 0.0004999205129839734, 'samples': 1632768, 'steps': 3188, 'loss/train': 3.2741801738739014} +03/03/2022 17:09:52 - INFO - codeparrot_training - Step 3189: {'lr': 0.0004999203791182345, 'samples': 1633280, 'steps': 3189, 'loss/train': 2.234330415725708} +03/03/2022 17:09:54 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/03/2022 17:09:57 - INFO - codeparrot_training - Step 3190: {'lr': 0.0004999202451398853, 'samples': 1633792, 'steps': 3190, 'loss/train': 0.2653330862522125} +03/03/2022 17:10:00 - INFO - codeparrot_training - Step 3191: {'lr': 0.000499920111048926, 'samples': 1634304, 'steps': 3191, 'loss/train': 2.2467868328094482} +03/03/2022 17:10:02 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/03/2022 17:10:06 - INFO - codeparrot_training - Step 3192: {'lr': 0.0004999199768453565, 'samples': 1634816, 'steps': 3192, 'loss/train': 1.5938605070114136} +03/03/2022 17:10:09 - INFO - codeparrot_training - Step 3193: {'lr': 0.0004999198425291769, 'samples': 1635328, 'steps': 3193, 'loss/train': 2.2746238708496094} +03/03/2022 17:10:11 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/03/2022 17:10:14 - INFO - codeparrot_training - Step 3194: {'lr': 0.0004999197081003873, 'samples': 1635840, 'steps': 3194, 'loss/train': 2.664440155029297} +03/03/2022 17:10:17 - INFO - codeparrot_training - Step 3195: {'lr': 0.0004999195735589877, 'samples': 1636352, 'steps': 3195, 'loss/train': 2.3604633808135986} +03/03/2022 17:10:19 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/03/2022 17:10:22 - INFO - codeparrot_training - Step 3196: {'lr': 0.0004999194389049783, 'samples': 1636864, 'steps': 3196, 'loss/train': 3.0940935611724854} +03/03/2022 17:10:25 - INFO - codeparrot_training - Step 3197: {'lr': 0.0004999193041383588, 'samples': 1637376, 'steps': 3197, 'loss/train': 3.4353840351104736} +03/03/2022 17:10:27 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/03/2022 17:10:31 - INFO - codeparrot_training - Step 3198: {'lr': 0.0004999191692591299, 'samples': 1637888, 'steps': 3198, 'loss/train': 1.9620518684387207} +03/03/2022 17:10:34 - INFO - codeparrot_training - Step 3199: {'lr': 0.000499919034267291, 'samples': 1638400, 'steps': 3199, 'loss/train': 2.629711389541626} +03/03/2022 17:10:37 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/03/2022 17:10:39 - INFO - codeparrot_training - Step 3200: {'lr': 0.0004999188991628425, 'samples': 1638912, 'steps': 3200, 'loss/train': 2.821241855621338} +03/03/2022 17:10:43 - INFO - codeparrot_training - Step 3201: {'lr': 0.0004999187639457844, 'samples': 1639424, 'steps': 3201, 'loss/train': 3.0432631969451904} +03/03/2022 17:10:45 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/03/2022 17:10:48 - INFO - codeparrot_training - Step 3202: {'lr': 0.0004999186286161169, 'samples': 1639936, 'steps': 3202, 'loss/train': 3.304659128189087} +03/03/2022 17:10:51 - INFO - codeparrot_training - Step 3203: {'lr': 0.0004999184931738397, 'samples': 1640448, 'steps': 3203, 'loss/train': 4.19053840637207} +03/03/2022 17:10:54 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/03/2022 17:10:56 - INFO - codeparrot_training - Step 3204: {'lr': 0.0004999183576189532, 'samples': 1640960, 'steps': 3204, 'loss/train': 2.628227710723877} +03/03/2022 17:10:59 - INFO - codeparrot_training - Step 3205: {'lr': 0.0004999182219514573, 'samples': 1641472, 'steps': 3205, 'loss/train': 3.6766059398651123} +03/03/2022 17:11:02 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/03/2022 17:11:05 - INFO - codeparrot_training - Step 3206: {'lr': 0.0004999180861713522, 'samples': 1641984, 'steps': 3206, 'loss/train': 1.6787306070327759} +03/03/2022 17:11:08 - INFO - codeparrot_training - Step 3207: {'lr': 0.0004999179502786377, 'samples': 1642496, 'steps': 3207, 'loss/train': 3.987818479537964} +03/03/2022 17:11:11 - INFO - codeparrot_training - Step 3208: {'lr': 0.0004999178142733141, 'samples': 1643008, 'steps': 3208, 'loss/train': 4.15583610534668} +03/03/2022 17:11:11 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/03/2022 17:11:16 - INFO - codeparrot_training - Step 3209: {'lr': 0.0004999176781553815, 'samples': 1643520, 'steps': 3209, 'loss/train': 3.3124165534973145} +03/03/2022 17:11:19 - INFO - codeparrot_training - Step 3210: {'lr': 0.0004999175419248398, 'samples': 1644032, 'steps': 3210, 'loss/train': 2.627556562423706} +03/03/2022 17:11:19 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/03/2022 17:11:25 - INFO - codeparrot_training - Step 3211: {'lr': 0.0004999174055816891, 'samples': 1644544, 'steps': 3211, 'loss/train': 2.08768892288208} +03/03/2022 17:11:28 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/03/2022 17:11:30 - INFO - codeparrot_training - Step 3212: {'lr': 0.0004999172691259293, 'samples': 1645056, 'steps': 3212, 'loss/train': 2.2124674320220947} +03/03/2022 17:11:33 - INFO - codeparrot_training - Step 3213: {'lr': 0.0004999171325575609, 'samples': 1645568, 'steps': 3213, 'loss/train': 2.686856985092163} +03/03/2022 17:11:36 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/03/2022 17:11:38 - INFO - codeparrot_training - Step 3214: {'lr': 0.0004999169958765836, 'samples': 1646080, 'steps': 3214, 'loss/train': 2.803110361099243} +03/03/2022 17:11:41 - INFO - codeparrot_training - Step 3215: {'lr': 0.0004999168590829975, 'samples': 1646592, 'steps': 3215, 'loss/train': 3.0305893421173096} +03/03/2022 17:11:44 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/03/2022 17:11:47 - INFO - codeparrot_training - Step 3216: {'lr': 0.0004999167221768028, 'samples': 1647104, 'steps': 3216, 'loss/train': 2.773348093032837} +03/03/2022 17:11:50 - INFO - codeparrot_training - Step 3217: {'lr': 0.0004999165851579994, 'samples': 1647616, 'steps': 3217, 'loss/train': 2.8608086109161377} +03/03/2022 17:11:52 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/03/2022 17:11:55 - INFO - codeparrot_training - Step 3218: {'lr': 0.0004999164480265875, 'samples': 1648128, 'steps': 3218, 'loss/train': 2.5137412548065186} +03/03/2022 17:11:58 - INFO - codeparrot_training - Step 3219: {'lr': 0.0004999163107825671, 'samples': 1648640, 'steps': 3219, 'loss/train': 2.9388837814331055} +03/03/2022 17:12:00 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/03/2022 17:12:03 - INFO - codeparrot_training - Step 3220: {'lr': 0.0004999161734259383, 'samples': 1649152, 'steps': 3220, 'loss/train': 1.962165355682373} +03/03/2022 17:12:07 - INFO - codeparrot_training - Step 3221: {'lr': 0.0004999160359567011, 'samples': 1649664, 'steps': 3221, 'loss/train': 2.2112157344818115} +03/03/2022 17:12:08 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/03/2022 17:12:12 - INFO - codeparrot_training - Step 3222: {'lr': 0.0004999158983748555, 'samples': 1650176, 'steps': 3222, 'loss/train': 2.206014394760132} +03/03/2022 17:12:15 - INFO - codeparrot_training - Step 3223: {'lr': 0.0004999157606804018, 'samples': 1650688, 'steps': 3223, 'loss/train': 1.0711002349853516} +03/03/2022 17:12:17 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/03/2022 17:12:20 - INFO - codeparrot_training - Step 3224: {'lr': 0.0004999156228733398, 'samples': 1651200, 'steps': 3224, 'loss/train': 3.438539743423462} +03/03/2022 17:12:23 - INFO - codeparrot_training - Step 3225: {'lr': 0.0004999154849536698, 'samples': 1651712, 'steps': 3225, 'loss/train': 2.9586122035980225} +03/03/2022 17:12:25 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/03/2022 17:12:29 - INFO - codeparrot_training - Step 3226: {'lr': 0.0004999153469213917, 'samples': 1652224, 'steps': 3226, 'loss/train': 3.1565017700195312} +03/03/2022 17:12:32 - INFO - codeparrot_training - Step 3227: {'lr': 0.0004999152087765055, 'samples': 1652736, 'steps': 3227, 'loss/train': 2.9213156700134277} +03/03/2022 17:12:33 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/03/2022 17:12:37 - INFO - codeparrot_training - Step 3228: {'lr': 0.0004999150705190114, 'samples': 1653248, 'steps': 3228, 'loss/train': 3.5583932399749756} +03/03/2022 17:12:40 - INFO - codeparrot_training - Step 3229: {'lr': 0.0004999149321489095, 'samples': 1653760, 'steps': 3229, 'loss/train': 1.9859654903411865} +03/03/2022 17:12:41 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/03/2022 17:12:46 - INFO - codeparrot_training - Step 3230: {'lr': 0.0004999147936661997, 'samples': 1654272, 'steps': 3230, 'loss/train': 2.050604820251465} +03/03/2022 17:12:49 - INFO - codeparrot_training - Step 3231: {'lr': 0.0004999146550708822, 'samples': 1654784, 'steps': 3231, 'loss/train': 2.4777016639709473} +03/03/2022 17:12:52 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/03/2022 17:12:54 - INFO - codeparrot_training - Step 3232: {'lr': 0.000499914516362957, 'samples': 1655296, 'steps': 3232, 'loss/train': 2.9253621101379395} +03/03/2022 17:12:57 - INFO - codeparrot_training - Step 3233: {'lr': 0.0004999143775424241, 'samples': 1655808, 'steps': 3233, 'loss/train': 3.603736162185669} +03/03/2022 17:13:00 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/03/2022 17:13:02 - INFO - codeparrot_training - Step 3234: {'lr': 0.0004999142386092838, 'samples': 1656320, 'steps': 3234, 'loss/train': 3.0794525146484375} +03/03/2022 17:13:06 - INFO - codeparrot_training - Step 3235: {'lr': 0.000499914099563536, 'samples': 1656832, 'steps': 3235, 'loss/train': 2.2751059532165527} +03/03/2022 17:13:08 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/03/2022 17:13:11 - INFO - codeparrot_training - Step 3236: {'lr': 0.0004999139604051806, 'samples': 1657344, 'steps': 3236, 'loss/train': 2.004234552383423} +03/03/2022 17:13:14 - INFO - codeparrot_training - Step 3237: {'lr': 0.0004999138211342179, 'samples': 1657856, 'steps': 3237, 'loss/train': 2.1669678688049316} +03/03/2022 17:13:16 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/03/2022 17:13:19 - INFO - codeparrot_training - Step 3238: {'lr': 0.0004999136817506478, 'samples': 1658368, 'steps': 3238, 'loss/train': 3.01165509223938} +03/03/2022 17:13:22 - INFO - codeparrot_training - Step 3239: {'lr': 0.0004999135422544707, 'samples': 1658880, 'steps': 3239, 'loss/train': 2.7612557411193848} +03/03/2022 17:13:25 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/03/2022 17:13:28 - INFO - codeparrot_training - Step 3240: {'lr': 0.0004999134026456862, 'samples': 1659392, 'steps': 3240, 'loss/train': 1.7467108964920044} +03/03/2022 17:13:31 - INFO - codeparrot_training - Step 3241: {'lr': 0.0004999132629242946, 'samples': 1659904, 'steps': 3241, 'loss/train': 2.286334753036499} +03/03/2022 17:13:33 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/03/2022 17:13:36 - INFO - codeparrot_training - Step 3242: {'lr': 0.000499913123090296, 'samples': 1660416, 'steps': 3242, 'loss/train': 2.9461050033569336} +03/03/2022 17:13:39 - INFO - codeparrot_training - Step 3243: {'lr': 0.0004999129831436904, 'samples': 1660928, 'steps': 3243, 'loss/train': 1.7647240161895752} +03/03/2022 17:13:41 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 17:13:44 - INFO - codeparrot_training - Step 3244: {'lr': 0.0004999128430844778, 'samples': 1661440, 'steps': 3244, 'loss/train': 2.9348437786102295} +03/03/2022 17:13:48 - INFO - codeparrot_training - Step 3245: {'lr': 0.0004999127029126585, 'samples': 1661952, 'steps': 3245, 'loss/train': 2.5803918838500977} +03/03/2022 17:13:50 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/03/2022 17:13:53 - INFO - codeparrot_training - Step 3246: {'lr': 0.0004999125626282322, 'samples': 1662464, 'steps': 3246, 'loss/train': 2.541604995727539} +03/03/2022 17:13:56 - INFO - codeparrot_training - Step 3247: {'lr': 0.0004999124222311993, 'samples': 1662976, 'steps': 3247, 'loss/train': 4.035691261291504} +03/03/2022 17:13:58 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/03/2022 17:14:01 - INFO - codeparrot_training - Step 3248: {'lr': 0.0004999122817215595, 'samples': 1663488, 'steps': 3248, 'loss/train': 3.107337236404419} +03/03/2022 17:14:04 - INFO - codeparrot_training - Step 3249: {'lr': 0.0004999121410993133, 'samples': 1664000, 'steps': 3249, 'loss/train': 2.2093987464904785} +03/03/2022 17:14:06 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/03/2022 17:14:10 - INFO - codeparrot_training - Step 3250: {'lr': 0.0004999120003644604, 'samples': 1664512, 'steps': 3250, 'loss/train': 2.8178553581237793} +03/03/2022 17:14:13 - INFO - codeparrot_training - Step 3251: {'lr': 0.0004999118595170011, 'samples': 1665024, 'steps': 3251, 'loss/train': 3.4205546379089355} +03/03/2022 17:14:15 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/03/2022 17:14:18 - INFO - codeparrot_training - Step 3252: {'lr': 0.0004999117185569354, 'samples': 1665536, 'steps': 3252, 'loss/train': 2.919506072998047} +03/03/2022 17:14:21 - INFO - codeparrot_training - Step 3253: {'lr': 0.0004999115774842633, 'samples': 1666048, 'steps': 3253, 'loss/train': 3.2676570415496826} +03/03/2022 17:14:23 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/03/2022 17:14:27 - INFO - codeparrot_training - Step 3254: {'lr': 0.0004999114362989849, 'samples': 1666560, 'steps': 3254, 'loss/train': 3.099179267883301} +03/03/2022 17:14:30 - INFO - codeparrot_training - Step 3255: {'lr': 0.0004999112950011002, 'samples': 1667072, 'steps': 3255, 'loss/train': 2.8482823371887207} +03/03/2022 17:14:35 - INFO - codeparrot_training - Step 3256: {'lr': 0.0004999111535906094, 'samples': 1667584, 'steps': 3256, 'loss/train': 2.8414599895477295} +03/03/2022 17:14:38 - INFO - codeparrot_training - Step 3257: {'lr': 0.0004999110120675125, 'samples': 1668096, 'steps': 3257, 'loss/train': 2.3864519596099854} +03/03/2022 17:14:40 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/03/2022 17:14:44 - INFO - codeparrot_training - Step 3258: {'lr': 0.0004999108704318095, 'samples': 1668608, 'steps': 3258, 'loss/train': 2.384385347366333} +03/03/2022 17:14:47 - INFO - codeparrot_training - Step 3259: {'lr': 0.0004999107286835006, 'samples': 1669120, 'steps': 3259, 'loss/train': 2.52323317527771} +03/03/2022 17:14:48 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/03/2022 17:14:52 - INFO - codeparrot_training - Step 3260: {'lr': 0.0004999105868225858, 'samples': 1669632, 'steps': 3260, 'loss/train': 1.8758745193481445} +03/03/2022 17:14:55 - INFO - codeparrot_training - Step 3261: {'lr': 0.0004999104448490649, 'samples': 1670144, 'steps': 3261, 'loss/train': 3.7754602432250977} +03/03/2022 17:14:56 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 17:15:00 - INFO - codeparrot_training - Step 3262: {'lr': 0.0004999103027629384, 'samples': 1670656, 'steps': 3262, 'loss/train': 2.2947566509246826} +03/03/2022 17:15:04 - INFO - codeparrot_training - Step 3263: {'lr': 0.0004999101605642061, 'samples': 1671168, 'steps': 3263, 'loss/train': 2.9705259799957275} +03/03/2022 17:15:05 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/03/2022 17:15:09 - INFO - codeparrot_training - Step 3264: {'lr': 0.0004999100182528683, 'samples': 1671680, 'steps': 3264, 'loss/train': 2.902282476425171} +03/03/2022 17:15:12 - INFO - codeparrot_training - Step 3265: {'lr': 0.0004999098758289248, 'samples': 1672192, 'steps': 3265, 'loss/train': 2.4553070068359375} +03/03/2022 17:15:13 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 17:15:17 - INFO - codeparrot_training - Step 3266: {'lr': 0.0004999097332923758, 'samples': 1672704, 'steps': 3266, 'loss/train': 2.07495379447937} +03/03/2022 17:15:21 - INFO - codeparrot_training - Step 3267: {'lr': 0.0004999095906432213, 'samples': 1673216, 'steps': 3267, 'loss/train': 2.7839205265045166} +03/03/2022 17:15:22 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/03/2022 17:15:26 - INFO - codeparrot_training - Step 3268: {'lr': 0.0004999094478814613, 'samples': 1673728, 'steps': 3268, 'loss/train': 2.388890027999878} +03/03/2022 17:15:29 - INFO - codeparrot_training - Step 3269: {'lr': 0.0004999093050070961, 'samples': 1674240, 'steps': 3269, 'loss/train': 3.7101285457611084} +03/03/2022 17:15:30 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/03/2022 17:15:34 - INFO - codeparrot_training - Step 3270: {'lr': 0.0004999091620201255, 'samples': 1674752, 'steps': 3270, 'loss/train': 3.1778390407562256} +03/03/2022 17:15:37 - INFO - codeparrot_training - Step 3271: {'lr': 0.0004999090189205498, 'samples': 1675264, 'steps': 3271, 'loss/train': 3.498061180114746} +03/03/2022 17:15:38 - INFO - codeparrot_training - Skipping example with length 604 (seq_length=1024) +03/03/2022 17:15:43 - INFO - codeparrot_training - Step 3272: {'lr': 0.0004999088757083689, 'samples': 1675776, 'steps': 3272, 'loss/train': 2.9312329292297363} +03/03/2022 17:15:46 - INFO - codeparrot_training - Step 3273: {'lr': 0.0004999087323835829, 'samples': 1676288, 'steps': 3273, 'loss/train': 3.227250576019287} +03/03/2022 17:15:47 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/03/2022 17:15:51 - INFO - codeparrot_training - Step 3274: {'lr': 0.0004999085889461919, 'samples': 1676800, 'steps': 3274, 'loss/train': 2.545597791671753} +03/03/2022 17:15:54 - INFO - codeparrot_training - Step 3275: {'lr': 0.0004999084453961959, 'samples': 1677312, 'steps': 3275, 'loss/train': 1.9713518619537354} +03/03/2022 17:15:55 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/03/2022 17:15:59 - INFO - codeparrot_training - Step 3276: {'lr': 0.0004999083017335951, 'samples': 1677824, 'steps': 3276, 'loss/train': 3.0922610759735107} +03/03/2022 17:16:03 - INFO - codeparrot_training - Step 3277: {'lr': 0.0004999081579583895, 'samples': 1678336, 'steps': 3277, 'loss/train': 1.9257805347442627} +03/03/2022 17:16:03 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/03/2022 17:16:08 - INFO - codeparrot_training - Step 3278: {'lr': 0.0004999080140705791, 'samples': 1678848, 'steps': 3278, 'loss/train': 1.7623577117919922} +03/03/2022 17:16:11 - INFO - codeparrot_training - Step 3279: {'lr': 0.0004999078700701639, 'samples': 1679360, 'steps': 3279, 'loss/train': 2.6713383197784424} +03/03/2022 17:16:11 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/03/2022 17:16:16 - INFO - codeparrot_training - Step 3280: {'lr': 0.0004999077259571442, 'samples': 1679872, 'steps': 3280, 'loss/train': 3.537552833557129} +03/03/2022 17:16:19 - INFO - codeparrot_training - Step 3281: {'lr': 0.0004999075817315199, 'samples': 1680384, 'steps': 3281, 'loss/train': 3.077975034713745} +03/03/2022 17:16:20 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/03/2022 17:16:24 - INFO - codeparrot_training - Step 3282: {'lr': 0.0004999074373932911, 'samples': 1680896, 'steps': 3282, 'loss/train': 2.3269410133361816} +03/03/2022 17:16:28 - INFO - codeparrot_training - Step 3283: {'lr': 0.0004999072929424579, 'samples': 1681408, 'steps': 3283, 'loss/train': 1.5639630556106567} +03/03/2022 17:16:28 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/03/2022 17:16:33 - INFO - codeparrot_training - Step 3284: {'lr': 0.0004999071483790203, 'samples': 1681920, 'steps': 3284, 'loss/train': 2.3655848503112793} +03/03/2022 17:16:36 - INFO - codeparrot_training - Step 3285: {'lr': 0.0004999070037029783, 'samples': 1682432, 'steps': 3285, 'loss/train': 3.358809232711792} +03/03/2022 17:16:37 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/03/2022 17:16:41 - INFO - codeparrot_training - Step 3286: {'lr': 0.0004999068589143322, 'samples': 1682944, 'steps': 3286, 'loss/train': 0.729758620262146} +03/03/2022 17:16:45 - INFO - codeparrot_training - Step 3287: {'lr': 0.0004999067140130819, 'samples': 1683456, 'steps': 3287, 'loss/train': 2.011780023574829} +03/03/2022 17:16:45 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/03/2022 17:16:50 - INFO - codeparrot_training - Step 3288: {'lr': 0.0004999065689992273, 'samples': 1683968, 'steps': 3288, 'loss/train': 2.709617853164673} +03/03/2022 17:16:53 - INFO - codeparrot_training - Step 3289: {'lr': 0.0004999064238727689, 'samples': 1684480, 'steps': 3289, 'loss/train': 3.4925050735473633} +03/03/2022 17:16:53 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/03/2022 17:16:58 - INFO - codeparrot_training - Step 3290: {'lr': 0.0004999062786337064, 'samples': 1684992, 'steps': 3290, 'loss/train': 2.6360971927642822} +03/03/2022 17:17:02 - INFO - codeparrot_training - Step 3291: {'lr': 0.0004999061332820401, 'samples': 1685504, 'steps': 3291, 'loss/train': 3.1296236515045166} +03/03/2022 17:17:02 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/03/2022 17:17:07 - INFO - codeparrot_training - Step 3292: {'lr': 0.0004999059878177699, 'samples': 1686016, 'steps': 3292, 'loss/train': 3.0777924060821533} +03/03/2022 17:17:10 - INFO - codeparrot_training - Step 3293: {'lr': 0.0004999058422408959, 'samples': 1686528, 'steps': 3293, 'loss/train': 2.3218016624450684} +03/03/2022 17:17:10 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/03/2022 17:17:15 - INFO - codeparrot_training - Step 3294: {'lr': 0.0004999056965514181, 'samples': 1687040, 'steps': 3294, 'loss/train': 3.278587818145752} +03/03/2022 17:17:18 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/03/2022 17:17:20 - INFO - codeparrot_training - Step 3295: {'lr': 0.0004999055507493368, 'samples': 1687552, 'steps': 3295, 'loss/train': 3.443573474884033} +03/03/2022 17:17:24 - INFO - codeparrot_training - Step 3296: {'lr': 0.0004999054048346517, 'samples': 1688064, 'steps': 3296, 'loss/train': 2.9336061477661133} +03/03/2022 17:17:26 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/03/2022 17:17:29 - INFO - codeparrot_training - Step 3297: {'lr': 0.0004999052588073633, 'samples': 1688576, 'steps': 3297, 'loss/train': 2.5668258666992188} +03/03/2022 17:17:32 - INFO - codeparrot_training - Step 3298: {'lr': 0.0004999051126674714, 'samples': 1689088, 'steps': 3298, 'loss/train': 4.688945770263672} +03/03/2022 17:17:35 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/03/2022 17:17:37 - INFO - codeparrot_training - Step 3299: {'lr': 0.0004999049664149761, 'samples': 1689600, 'steps': 3299, 'loss/train': 2.8824422359466553} +03/03/2022 17:17:41 - INFO - codeparrot_training - Step 3300: {'lr': 0.0004999048200498774, 'samples': 1690112, 'steps': 3300, 'loss/train': 2.3504159450531006} +03/03/2022 17:17:43 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/03/2022 17:17:46 - INFO - codeparrot_training - Step 3301: {'lr': 0.0004999046735721755, 'samples': 1690624, 'steps': 3301, 'loss/train': 2.3504092693328857} +03/03/2022 17:17:49 - INFO - codeparrot_training - Step 3302: {'lr': 0.0004999045269818704, 'samples': 1691136, 'steps': 3302, 'loss/train': 1.819130301475525} +03/03/2022 17:17:52 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/03/2022 17:17:54 - INFO - codeparrot_training - Step 3303: {'lr': 0.0004999043802789622, 'samples': 1691648, 'steps': 3303, 'loss/train': 3.09501314163208} +03/03/2022 17:17:57 - INFO - codeparrot_training - Step 3304: {'lr': 0.000499904233463451, 'samples': 1692160, 'steps': 3304, 'loss/train': 3.1654160022735596} +03/03/2022 17:18:00 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 17:18:03 - INFO - codeparrot_training - Step 3305: {'lr': 0.0004999040865353367, 'samples': 1692672, 'steps': 3305, 'loss/train': 2.178650379180908} +03/03/2022 17:18:06 - INFO - codeparrot_training - Step 3306: {'lr': 0.0004999039394946196, 'samples': 1693184, 'steps': 3306, 'loss/train': 2.0084445476531982} +03/03/2022 17:18:08 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/03/2022 17:18:11 - INFO - codeparrot_training - Step 3307: {'lr': 0.0004999037923412995, 'samples': 1693696, 'steps': 3307, 'loss/train': 2.2655982971191406} +03/03/2022 17:18:14 - INFO - codeparrot_training - Step 3308: {'lr': 0.0004999036450753767, 'samples': 1694208, 'steps': 3308, 'loss/train': 1.6685303449630737} +03/03/2022 17:18:16 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/03/2022 17:18:20 - INFO - codeparrot_training - Step 3309: {'lr': 0.0004999034976968511, 'samples': 1694720, 'steps': 3309, 'loss/train': 2.7072291374206543} +03/03/2022 17:18:23 - INFO - codeparrot_training - Step 3310: {'lr': 0.0004999033502057228, 'samples': 1695232, 'steps': 3310, 'loss/train': 3.3253469467163086} +03/03/2022 17:18:26 - INFO - codeparrot_training - Step 3311: {'lr': 0.000499903202601992, 'samples': 1695744, 'steps': 3311, 'loss/train': 2.6117329597473145} +03/03/2022 17:18:27 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/03/2022 17:18:31 - INFO - codeparrot_training - Step 3312: {'lr': 0.0004999030548856586, 'samples': 1696256, 'steps': 3312, 'loss/train': 3.7277019023895264} +03/03/2022 17:18:34 - INFO - codeparrot_training - Step 3313: {'lr': 0.0004999029070567229, 'samples': 1696768, 'steps': 3313, 'loss/train': 2.4026546478271484} +03/03/2022 17:18:35 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 17:18:39 - INFO - codeparrot_training - Step 3314: {'lr': 0.0004999027591151847, 'samples': 1697280, 'steps': 3314, 'loss/train': 2.4240620136260986} +03/03/2022 17:18:43 - INFO - codeparrot_training - Step 3315: {'lr': 0.0004999026110610442, 'samples': 1697792, 'steps': 3315, 'loss/train': 3.1103720664978027} +03/03/2022 17:18:43 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/03/2022 17:18:48 - INFO - codeparrot_training - Step 3316: {'lr': 0.0004999024628943014, 'samples': 1698304, 'steps': 3316, 'loss/train': 2.6764280796051025} +03/03/2022 17:18:51 - INFO - codeparrot_training - Step 3317: {'lr': 0.0004999023146149565, 'samples': 1698816, 'steps': 3317, 'loss/train': 3.0690629482269287} +03/03/2022 17:18:51 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/03/2022 17:18:56 - INFO - codeparrot_training - Step 3318: {'lr': 0.0004999021662230093, 'samples': 1699328, 'steps': 3318, 'loss/train': 3.241624355316162} +03/03/2022 17:18:59 - INFO - codeparrot_training - Step 3319: {'lr': 0.0004999020177184601, 'samples': 1699840, 'steps': 3319, 'loss/train': 2.1815922260284424} +03/03/2022 17:19:00 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/03/2022 17:19:05 - INFO - codeparrot_training - Step 3320: {'lr': 0.000499901869101309, 'samples': 1700352, 'steps': 3320, 'loss/train': 2.7197413444519043} +03/03/2022 17:19:08 - INFO - codeparrot_training - Step 3321: {'lr': 0.0004999017203715559, 'samples': 1700864, 'steps': 3321, 'loss/train': 2.1207735538482666} +03/03/2022 17:19:08 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/03/2022 17:19:13 - INFO - codeparrot_training - Step 3322: {'lr': 0.000499901571529201, 'samples': 1701376, 'steps': 3322, 'loss/train': 0.3533053398132324} +03/03/2022 17:19:17 - INFO - codeparrot_training - Step 3323: {'lr': 0.0004999014225742442, 'samples': 1701888, 'steps': 3323, 'loss/train': 1.6861681938171387} +03/03/2022 17:19:17 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/03/2022 17:19:22 - INFO - codeparrot_training - Step 3324: {'lr': 0.0004999012735066858, 'samples': 1702400, 'steps': 3324, 'loss/train': 2.591144561767578} +03/03/2022 17:19:25 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/03/2022 17:19:27 - INFO - codeparrot_training - Step 3325: {'lr': 0.0004999011243265257, 'samples': 1702912, 'steps': 3325, 'loss/train': 3.132512331008911} +03/03/2022 17:19:30 - INFO - codeparrot_training - Step 3326: {'lr': 0.000499900975033764, 'samples': 1703424, 'steps': 3326, 'loss/train': 2.485368013381958} +03/03/2022 17:19:33 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/03/2022 17:19:35 - INFO - codeparrot_training - Step 3327: {'lr': 0.0004999008256284008, 'samples': 1703936, 'steps': 3327, 'loss/train': 1.8475751876831055} +03/03/2022 17:19:39 - INFO - codeparrot_training - Step 3328: {'lr': 0.0004999006761104361, 'samples': 1704448, 'steps': 3328, 'loss/train': 1.615195393562317} +03/03/2022 17:19:41 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/03/2022 17:19:44 - INFO - codeparrot_training - Step 3329: {'lr': 0.0004999005264798701, 'samples': 1704960, 'steps': 3329, 'loss/train': 3.016690969467163} +03/03/2022 17:19:47 - INFO - codeparrot_training - Step 3330: {'lr': 0.0004999003767367027, 'samples': 1705472, 'steps': 3330, 'loss/train': 2.9630043506622314} +03/03/2022 17:19:50 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/03/2022 17:19:52 - INFO - codeparrot_training - Step 3331: {'lr': 0.0004999002268809339, 'samples': 1705984, 'steps': 3331, 'loss/train': 1.8617148399353027} +03/03/2022 17:19:56 - INFO - codeparrot_training - Step 3332: {'lr': 0.0004999000769125642, 'samples': 1706496, 'steps': 3332, 'loss/train': 3.054851531982422} +03/03/2022 17:19:58 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/03/2022 17:20:01 - INFO - codeparrot_training - Step 3333: {'lr': 0.0004998999268315932, 'samples': 1707008, 'steps': 3333, 'loss/train': 2.727586269378662} +03/03/2022 17:20:04 - INFO - codeparrot_training - Step 3334: {'lr': 0.0004998997766380212, 'samples': 1707520, 'steps': 3334, 'loss/train': 2.11360239982605} +03/03/2022 17:20:06 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/03/2022 17:20:09 - INFO - codeparrot_training - Step 3335: {'lr': 0.0004998996263318482, 'samples': 1708032, 'steps': 3335, 'loss/train': 2.2568912506103516} +03/03/2022 17:20:12 - INFO - codeparrot_training - Step 3336: {'lr': 0.0004998994759130743, 'samples': 1708544, 'steps': 3336, 'loss/train': 2.80450439453125} +03/03/2022 17:20:15 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/03/2022 17:20:18 - INFO - codeparrot_training - Step 3337: {'lr': 0.0004998993253816996, 'samples': 1709056, 'steps': 3337, 'loss/train': 2.2485573291778564} +03/03/2022 17:20:21 - INFO - codeparrot_training - Step 3338: {'lr': 0.000499899174737724, 'samples': 1709568, 'steps': 3338, 'loss/train': 2.7095296382904053} +03/03/2022 17:20:23 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/03/2022 17:20:26 - INFO - codeparrot_training - Step 3339: {'lr': 0.0004998990239811477, 'samples': 1710080, 'steps': 3339, 'loss/train': 0.8227948546409607} +03/03/2022 17:20:29 - INFO - codeparrot_training - Step 3340: {'lr': 0.0004998988731119709, 'samples': 1710592, 'steps': 3340, 'loss/train': 1.7955102920532227} +03/03/2022 17:20:31 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/03/2022 17:20:35 - INFO - codeparrot_training - Step 3341: {'lr': 0.0004998987221301935, 'samples': 1711104, 'steps': 3341, 'loss/train': 3.065826892852783} +03/03/2022 17:20:38 - INFO - codeparrot_training - Step 3342: {'lr': 0.0004998985710358155, 'samples': 1711616, 'steps': 3342, 'loss/train': 2.9800052642822266} +03/03/2022 17:20:39 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 17:20:43 - INFO - codeparrot_training - Step 3343: {'lr': 0.0004998984198288371, 'samples': 1712128, 'steps': 3343, 'loss/train': 2.2378668785095215} +03/03/2022 17:20:46 - INFO - codeparrot_training - Step 3344: {'lr': 0.0004998982685092583, 'samples': 1712640, 'steps': 3344, 'loss/train': 2.862046241760254} +03/03/2022 17:20:48 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/03/2022 17:20:51 - INFO - codeparrot_training - Step 3345: {'lr': 0.0004998981170770792, 'samples': 1713152, 'steps': 3345, 'loss/train': 2.491856575012207} +03/03/2022 17:20:55 - INFO - codeparrot_training - Step 3346: {'lr': 0.0004998979655323, 'samples': 1713664, 'steps': 3346, 'loss/train': 2.3223211765289307} +03/03/2022 17:20:56 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/03/2022 17:21:00 - INFO - codeparrot_training - Step 3347: {'lr': 0.0004998978138749204, 'samples': 1714176, 'steps': 3347, 'loss/train': 2.613044023513794} +03/03/2022 17:21:03 - INFO - codeparrot_training - Step 3348: {'lr': 0.0004998976621049408, 'samples': 1714688, 'steps': 3348, 'loss/train': 3.2470545768737793} +03/03/2022 17:21:04 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/03/2022 17:21:08 - INFO - codeparrot_training - Step 3349: {'lr': 0.0004998975102223612, 'samples': 1715200, 'steps': 3349, 'loss/train': 2.890226364135742} +03/03/2022 17:21:11 - INFO - codeparrot_training - Step 3350: {'lr': 0.0004998973582271817, 'samples': 1715712, 'steps': 3350, 'loss/train': 1.8736419677734375} +03/03/2022 17:21:13 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/03/2022 17:21:17 - INFO - codeparrot_training - Step 3351: {'lr': 0.0004998972061194022, 'samples': 1716224, 'steps': 3351, 'loss/train': 2.136707067489624} +03/03/2022 17:21:20 - INFO - codeparrot_training - Step 3352: {'lr': 0.0004998970538990228, 'samples': 1716736, 'steps': 3352, 'loss/train': 2.641378879547119} +03/03/2022 17:21:21 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/03/2022 17:21:25 - INFO - codeparrot_training - Step 3353: {'lr': 0.0004998969015660438, 'samples': 1717248, 'steps': 3353, 'loss/train': 2.1359269618988037} +03/03/2022 17:21:28 - INFO - codeparrot_training - Step 3354: {'lr': 0.0004998967491204651, 'samples': 1717760, 'steps': 3354, 'loss/train': 2.473081111907959} +03/03/2022 17:21:29 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 17:21:33 - INFO - codeparrot_training - Step 3355: {'lr': 0.0004998965965622867, 'samples': 1718272, 'steps': 3355, 'loss/train': 2.6167824268341064} +03/03/2022 17:21:37 - INFO - codeparrot_training - Step 3356: {'lr': 0.0004998964438915088, 'samples': 1718784, 'steps': 3356, 'loss/train': 2.4331297874450684} +03/03/2022 17:21:37 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/03/2022 17:21:42 - INFO - codeparrot_training - Step 3357: {'lr': 0.0004998962911081314, 'samples': 1719296, 'steps': 3357, 'loss/train': 2.593017101287842} +03/03/2022 17:21:45 - INFO - codeparrot_training - Step 3358: {'lr': 0.0004998961382121546, 'samples': 1719808, 'steps': 3358, 'loss/train': 3.2228899002075195} +03/03/2022 17:21:46 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/03/2022 17:21:50 - INFO - codeparrot_training - Step 3359: {'lr': 0.0004998959852035785, 'samples': 1720320, 'steps': 3359, 'loss/train': 2.2014858722686768} +03/03/2022 17:21:53 - INFO - codeparrot_training - Step 3360: {'lr': 0.0004998958320824031, 'samples': 1720832, 'steps': 3360, 'loss/train': 2.5461223125457764} +03/03/2022 17:21:54 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 17:21:59 - INFO - codeparrot_training - Step 3361: {'lr': 0.0004998956788486284, 'samples': 1721344, 'steps': 3361, 'loss/train': 2.9290778636932373} +03/03/2022 17:22:02 - INFO - codeparrot_training - Step 3362: {'lr': 0.0004998955255022547, 'samples': 1721856, 'steps': 3362, 'loss/train': 3.1084821224212646} +03/03/2022 17:22:02 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/03/2022 17:22:07 - INFO - codeparrot_training - Step 3363: {'lr': 0.0004998953720432818, 'samples': 1722368, 'steps': 3363, 'loss/train': 3.2487435340881348} +03/03/2022 17:22:10 - INFO - codeparrot_training - Step 3364: {'lr': 0.00049989521847171, 'samples': 1722880, 'steps': 3364, 'loss/train': 2.6276304721832275} +03/03/2022 17:22:11 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/03/2022 17:22:15 - INFO - codeparrot_training - Step 3365: {'lr': 0.0004998950647875392, 'samples': 1723392, 'steps': 3365, 'loss/train': 3.2136082649230957} +03/03/2022 17:22:18 - INFO - codeparrot_training - Step 3366: {'lr': 0.0004998949109907697, 'samples': 1723904, 'steps': 3366, 'loss/train': 2.3309714794158936} +03/03/2022 17:22:19 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/03/2022 17:22:24 - INFO - codeparrot_training - Step 3367: {'lr': 0.0004998947570814012, 'samples': 1724416, 'steps': 3367, 'loss/train': 0.5572906136512756} +03/03/2022 17:22:27 - INFO - codeparrot_training - Step 3368: {'lr': 0.0004998946030594341, 'samples': 1724928, 'steps': 3368, 'loss/train': 2.8445613384246826} +03/03/2022 17:22:27 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/03/2022 17:22:32 - INFO - codeparrot_training - Step 3369: {'lr': 0.0004998944489248683, 'samples': 1725440, 'steps': 3369, 'loss/train': 2.5794687271118164} +03/03/2022 17:22:35 - INFO - codeparrot_training - Step 3370: {'lr': 0.000499894294677704, 'samples': 1725952, 'steps': 3370, 'loss/train': 3.1697585582733154} +03/03/2022 17:22:35 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/03/2022 17:22:41 - INFO - codeparrot_training - Step 3371: {'lr': 0.000499894140317941, 'samples': 1726464, 'steps': 3371, 'loss/train': 1.9900901317596436} +03/03/2022 17:22:44 - INFO - codeparrot_training - Step 3372: {'lr': 0.0004998939858455798, 'samples': 1726976, 'steps': 3372, 'loss/train': 2.4157989025115967} +03/03/2022 17:22:44 - INFO - codeparrot_training - Skipping example with length 585 (seq_length=1024) +03/03/2022 17:22:49 - INFO - codeparrot_training - Step 3373: {'lr': 0.0004998938312606201, 'samples': 1727488, 'steps': 3373, 'loss/train': 2.314059019088745} +03/03/2022 17:22:52 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/03/2022 17:22:54 - INFO - codeparrot_training - Step 3374: {'lr': 0.000499893676563062, 'samples': 1728000, 'steps': 3374, 'loss/train': 2.3428850173950195} +03/03/2022 17:22:58 - INFO - codeparrot_training - Step 3375: {'lr': 0.0004998935217529058, 'samples': 1728512, 'steps': 3375, 'loss/train': 2.808596611022949} +03/03/2022 17:23:00 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/03/2022 17:23:03 - INFO - codeparrot_training - Step 3376: {'lr': 0.0004998933668301514, 'samples': 1729024, 'steps': 3376, 'loss/train': 2.693678379058838} +03/03/2022 17:23:06 - INFO - codeparrot_training - Step 3377: {'lr': 0.0004998932117947989, 'samples': 1729536, 'steps': 3377, 'loss/train': 3.662250518798828} +03/03/2022 17:23:09 - INFO - codeparrot_training - Step 3378: {'lr': 0.0004998930566468484, 'samples': 1730048, 'steps': 3378, 'loss/train': 2.135911226272583} +03/03/2022 17:23:09 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/03/2022 17:23:15 - INFO - codeparrot_training - Step 3379: {'lr': 0.0004998929013863, 'samples': 1730560, 'steps': 3379, 'loss/train': 3.202366352081299} +03/03/2022 17:23:18 - INFO - codeparrot_training - Step 3380: {'lr': 0.0004998927460131535, 'samples': 1731072, 'steps': 3380, 'loss/train': 2.319014072418213} +03/03/2022 17:23:18 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/03/2022 17:23:23 - INFO - codeparrot_training - Step 3381: {'lr': 0.0004998925905274094, 'samples': 1731584, 'steps': 3381, 'loss/train': 2.2633581161499023} +03/03/2022 17:23:26 - INFO - codeparrot_training - Step 3382: {'lr': 0.0004998924349290674, 'samples': 1732096, 'steps': 3382, 'loss/train': 3.1386866569519043} +03/03/2022 17:23:26 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/03/2022 17:23:31 - INFO - codeparrot_training - Step 3383: {'lr': 0.0004998922792181278, 'samples': 1732608, 'steps': 3383, 'loss/train': 2.434248447418213} +03/03/2022 17:23:35 - INFO - codeparrot_training - Step 3384: {'lr': 0.0004998921233945907, 'samples': 1733120, 'steps': 3384, 'loss/train': 1.3909858465194702} +03/03/2022 17:23:35 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/03/2022 17:23:40 - INFO - codeparrot_training - Step 3385: {'lr': 0.0004998919674584559, 'samples': 1733632, 'steps': 3385, 'loss/train': 4.128971576690674} +03/03/2022 17:23:43 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/03/2022 17:23:45 - INFO - codeparrot_training - Step 3386: {'lr': 0.0004998918114097237, 'samples': 1734144, 'steps': 3386, 'loss/train': 2.916940450668335} +03/03/2022 17:23:48 - INFO - codeparrot_training - Step 3387: {'lr': 0.0004998916552483941, 'samples': 1734656, 'steps': 3387, 'loss/train': 1.928485631942749} +03/03/2022 17:23:51 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 17:23:54 - INFO - codeparrot_training - Step 3388: {'lr': 0.0004998914989744671, 'samples': 1735168, 'steps': 3388, 'loss/train': 3.1142492294311523} +03/03/2022 17:23:57 - INFO - codeparrot_training - Step 3389: {'lr': 0.000499891342587943, 'samples': 1735680, 'steps': 3389, 'loss/train': 4.308945655822754} +03/03/2022 17:24:00 - INFO - codeparrot_training - Step 3390: {'lr': 0.0004998911860888217, 'samples': 1736192, 'steps': 3390, 'loss/train': 2.3879692554473877} +03/03/2022 17:24:00 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/03/2022 17:24:05 - INFO - codeparrot_training - Step 3391: {'lr': 0.0004998910294771032, 'samples': 1736704, 'steps': 3391, 'loss/train': 2.4640536308288574} +03/03/2022 17:24:08 - INFO - codeparrot_training - Step 3392: {'lr': 0.0004998908727527877, 'samples': 1737216, 'steps': 3392, 'loss/train': 2.474311590194702} +03/03/2022 17:24:08 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/03/2022 17:24:14 - INFO - codeparrot_training - Step 3393: {'lr': 0.0004998907159158752, 'samples': 1737728, 'steps': 3393, 'loss/train': 0.6039132475852966} +03/03/2022 17:24:17 - INFO - codeparrot_training - Step 3394: {'lr': 0.0004998905589663658, 'samples': 1738240, 'steps': 3394, 'loss/train': 2.794804096221924} +03/03/2022 17:24:17 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/03/2022 17:24:22 - INFO - codeparrot_training - Step 3395: {'lr': 0.0004998904019042596, 'samples': 1738752, 'steps': 3395, 'loss/train': 2.3736462593078613} +03/03/2022 17:24:25 - INFO - codeparrot_training - Step 3396: {'lr': 0.0004998902447295567, 'samples': 1739264, 'steps': 3396, 'loss/train': 3.3609683513641357} +03/03/2022 17:24:25 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/03/2022 17:24:30 - INFO - codeparrot_training - Step 3397: {'lr': 0.000499890087442257, 'samples': 1739776, 'steps': 3397, 'loss/train': 1.1259238719940186} +03/03/2022 17:24:34 - INFO - codeparrot_training - Step 3398: {'lr': 0.0004998899300423607, 'samples': 1740288, 'steps': 3398, 'loss/train': 4.190059661865234} +03/03/2022 17:24:34 - INFO - codeparrot_training - Skipping example with length 242 (seq_length=1024) +03/03/2022 17:24:39 - INFO - codeparrot_training - Step 3399: {'lr': 0.0004998897725298679, 'samples': 1740800, 'steps': 3399, 'loss/train': 0.8685932755470276} +03/03/2022 17:24:42 - INFO - codeparrot_training - Step 3400: {'lr': 0.0004998896149047786, 'samples': 1741312, 'steps': 3400, 'loss/train': 2.2184441089630127} +03/03/2022 17:24:42 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/03/2022 17:24:48 - INFO - codeparrot_training - Step 3401: {'lr': 0.0004998894571670929, 'samples': 1741824, 'steps': 3401, 'loss/train': 2.9940412044525146} +03/03/2022 17:24:51 - INFO - codeparrot_training - Step 3402: {'lr': 0.0004998892993168109, 'samples': 1742336, 'steps': 3402, 'loss/train': 3.181220531463623} +03/03/2022 17:24:53 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/03/2022 17:24:56 - INFO - codeparrot_training - Step 3403: {'lr': 0.0004998891413539326, 'samples': 1742848, 'steps': 3403, 'loss/train': 2.095587730407715} +03/03/2022 17:24:59 - INFO - codeparrot_training - Step 3404: {'lr': 0.0004998889832784581, 'samples': 1743360, 'steps': 3404, 'loss/train': 3.191856622695923} +03/03/2022 17:25:01 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/03/2022 17:25:05 - INFO - codeparrot_training - Step 3405: {'lr': 0.0004998888250903875, 'samples': 1743872, 'steps': 3405, 'loss/train': 3.3809690475463867} +03/03/2022 17:25:08 - INFO - codeparrot_training - Step 3406: {'lr': 0.0004998886667897209, 'samples': 1744384, 'steps': 3406, 'loss/train': 2.7711968421936035} +03/03/2022 17:25:09 - INFO - codeparrot_training - Skipping example with length 198 (seq_length=1024) +03/03/2022 17:25:13 - INFO - codeparrot_training - Step 3407: {'lr': 0.0004998885083764582, 'samples': 1744896, 'steps': 3407, 'loss/train': 3.3071417808532715} +03/03/2022 17:25:16 - INFO - codeparrot_training - Step 3408: {'lr': 0.0004998883498505996, 'samples': 1745408, 'steps': 3408, 'loss/train': 3.456636428833008} +03/03/2022 17:25:18 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/03/2022 17:25:21 - INFO - codeparrot_training - Step 3409: {'lr': 0.0004998881912121453, 'samples': 1745920, 'steps': 3409, 'loss/train': 2.7034947872161865} +03/03/2022 17:25:25 - INFO - codeparrot_training - Step 3410: {'lr': 0.0004998880324610952, 'samples': 1746432, 'steps': 3410, 'loss/train': 2.239018201828003} +03/03/2022 17:25:26 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/03/2022 17:25:30 - INFO - codeparrot_training - Step 3411: {'lr': 0.0004998878735974493, 'samples': 1746944, 'steps': 3411, 'loss/train': 2.7509021759033203} +03/03/2022 17:25:33 - INFO - codeparrot_training - Step 3412: {'lr': 0.0004998877146212079, 'samples': 1747456, 'steps': 3412, 'loss/train': 2.139158248901367} +03/03/2022 17:25:34 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/03/2022 17:25:38 - INFO - codeparrot_training - Step 3413: {'lr': 0.0004998875555323708, 'samples': 1747968, 'steps': 3413, 'loss/train': 2.25270676612854} +03/03/2022 17:25:41 - INFO - codeparrot_training - Step 3414: {'lr': 0.0004998873963309384, 'samples': 1748480, 'steps': 3414, 'loss/train': 3.3153390884399414} +03/03/2022 17:25:42 - INFO - codeparrot_training - Skipping example with length 757 (seq_length=1024) +03/03/2022 17:25:47 - INFO - codeparrot_training - Step 3415: {'lr': 0.0004998872370169105, 'samples': 1748992, 'steps': 3415, 'loss/train': 2.900937557220459} +03/03/2022 17:25:50 - INFO - codeparrot_training - Step 3416: {'lr': 0.0004998870775902872, 'samples': 1749504, 'steps': 3416, 'loss/train': 1.87582528591156} +03/03/2022 17:25:51 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/03/2022 17:25:55 - INFO - codeparrot_training - Step 3417: {'lr': 0.0004998869180510688, 'samples': 1750016, 'steps': 3417, 'loss/train': 3.2035417556762695} +03/03/2022 17:25:58 - INFO - codeparrot_training - Step 3418: {'lr': 0.0004998867583992551, 'samples': 1750528, 'steps': 3418, 'loss/train': 2.7804815769195557} +03/03/2022 17:25:59 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 17:26:04 - INFO - codeparrot_training - Step 3419: {'lr': 0.0004998865986348464, 'samples': 1751040, 'steps': 3419, 'loss/train': 2.191013813018799} +03/03/2022 17:26:07 - INFO - codeparrot_training - Step 3420: {'lr': 0.0004998864387578426, 'samples': 1751552, 'steps': 3420, 'loss/train': 1.1928843259811401} +03/03/2022 17:26:07 - INFO - codeparrot_training - Skipping example with length 266 (seq_length=1024) +03/03/2022 17:26:12 - INFO - codeparrot_training - Step 3421: {'lr': 0.0004998862787682438, 'samples': 1752064, 'steps': 3421, 'loss/train': 2.590466260910034} +03/03/2022 17:26:15 - INFO - codeparrot_training - Step 3422: {'lr': 0.00049988611866605, 'samples': 1752576, 'steps': 3422, 'loss/train': 2.749896287918091} +03/03/2022 17:26:17 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/03/2022 17:26:21 - INFO - codeparrot_training - Step 3423: {'lr': 0.0004998859584512615, 'samples': 1753088, 'steps': 3423, 'loss/train': 3.0882656574249268} +03/03/2022 17:26:24 - INFO - codeparrot_training - Step 3424: {'lr': 0.0004998857981238782, 'samples': 1753600, 'steps': 3424, 'loss/train': 3.1641132831573486} +03/03/2022 17:26:29 - INFO - codeparrot_training - Step 3425: {'lr': 0.0004998856376839003, 'samples': 1754112, 'steps': 3425, 'loss/train': 1.9951564073562622} +03/03/2022 17:26:32 - INFO - codeparrot_training - Step 3426: {'lr': 0.0004998854771313277, 'samples': 1754624, 'steps': 3426, 'loss/train': 2.7978129386901855} +03/03/2022 17:26:34 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/03/2022 17:26:38 - INFO - codeparrot_training - Step 3427: {'lr': 0.0004998853164661606, 'samples': 1755136, 'steps': 3427, 'loss/train': 2.788806438446045} +03/03/2022 17:26:41 - INFO - codeparrot_training - Step 3428: {'lr': 0.000499885155688399, 'samples': 1755648, 'steps': 3428, 'loss/train': 2.3447115421295166} +03/03/2022 17:26:42 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/03/2022 17:26:46 - INFO - codeparrot_training - Step 3429: {'lr': 0.000499884994798043, 'samples': 1756160, 'steps': 3429, 'loss/train': 3.077827215194702} +03/03/2022 17:26:49 - INFO - codeparrot_training - Step 3430: {'lr': 0.0004998848337950927, 'samples': 1756672, 'steps': 3430, 'loss/train': 2.3189892768859863} +03/03/2022 17:26:51 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/03/2022 17:26:54 - INFO - codeparrot_training - Step 3431: {'lr': 0.0004998846726795482, 'samples': 1757184, 'steps': 3431, 'loss/train': 2.3812575340270996} +03/03/2022 17:26:58 - INFO - codeparrot_training - Step 3432: {'lr': 0.0004998845114514095, 'samples': 1757696, 'steps': 3432, 'loss/train': 1.064099669456482} +03/03/2022 17:26:59 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/03/2022 17:27:03 - INFO - codeparrot_training - Step 3433: {'lr': 0.0004998843501106766, 'samples': 1758208, 'steps': 3433, 'loss/train': 3.1903083324432373} +03/03/2022 17:27:06 - INFO - codeparrot_training - Step 3434: {'lr': 0.0004998841886573496, 'samples': 1758720, 'steps': 3434, 'loss/train': 2.3210856914520264} +03/03/2022 17:27:08 - INFO - codeparrot_training - Skipping example with length 64 (seq_length=1024) +03/03/2022 17:27:11 - INFO - codeparrot_training - Step 3435: {'lr': 0.0004998840270914288, 'samples': 1759232, 'steps': 3435, 'loss/train': 2.2028391361236572} +03/03/2022 17:27:15 - INFO - codeparrot_training - Step 3436: {'lr': 0.0004998838654129142, 'samples': 1759744, 'steps': 3436, 'loss/train': 2.7622439861297607} +03/03/2022 17:27:17 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/03/2022 17:27:20 - INFO - codeparrot_training - Step 3437: {'lr': 0.0004998837036218056, 'samples': 1760256, 'steps': 3437, 'loss/train': 2.1658856868743896} +03/03/2022 17:27:23 - INFO - codeparrot_training - Step 3438: {'lr': 0.0004998835417181033, 'samples': 1760768, 'steps': 3438, 'loss/train': 2.2480814456939697} +03/03/2022 17:27:25 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/03/2022 17:27:28 - INFO - codeparrot_training - Step 3439: {'lr': 0.0004998833797018074, 'samples': 1761280, 'steps': 3439, 'loss/train': 2.6411871910095215} +03/03/2022 17:27:31 - INFO - codeparrot_training - Step 3440: {'lr': 0.0004998832175729179, 'samples': 1761792, 'steps': 3440, 'loss/train': 1.5373883247375488} +03/03/2022 17:27:33 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/03/2022 17:27:37 - INFO - codeparrot_training - Step 3441: {'lr': 0.0004998830553314349, 'samples': 1762304, 'steps': 3441, 'loss/train': 2.9242169857025146} +03/03/2022 17:27:40 - INFO - codeparrot_training - Step 3442: {'lr': 0.0004998828929773583, 'samples': 1762816, 'steps': 3442, 'loss/train': 2.364250659942627} +03/03/2022 17:27:41 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/03/2022 17:27:45 - INFO - codeparrot_training - Step 3443: {'lr': 0.0004998827305106884, 'samples': 1763328, 'steps': 3443, 'loss/train': 2.7832021713256836} +03/03/2022 17:27:48 - INFO - codeparrot_training - Step 3444: {'lr': 0.0004998825679314253, 'samples': 1763840, 'steps': 3444, 'loss/train': 2.560842514038086} +03/03/2022 17:27:50 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/03/2022 17:27:53 - INFO - codeparrot_training - Step 3445: {'lr': 0.0004998824052395689, 'samples': 1764352, 'steps': 3445, 'loss/train': 2.861851930618286} +03/03/2022 17:27:57 - INFO - codeparrot_training - Step 3446: {'lr': 0.0004998822424351193, 'samples': 1764864, 'steps': 3446, 'loss/train': 2.9265940189361572} +03/03/2022 17:27:58 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/03/2022 17:28:02 - INFO - codeparrot_training - Step 3447: {'lr': 0.0004998820795180766, 'samples': 1765376, 'steps': 3447, 'loss/train': 1.185547113418579} +03/03/2022 17:28:05 - INFO - codeparrot_training - Step 3448: {'lr': 0.000499881916488441, 'samples': 1765888, 'steps': 3448, 'loss/train': 2.6167521476745605} +03/03/2022 17:28:06 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/03/2022 17:28:10 - INFO - codeparrot_training - Step 3449: {'lr': 0.0004998817533462123, 'samples': 1766400, 'steps': 3449, 'loss/train': 2.8428573608398438} +03/03/2022 17:28:13 - INFO - codeparrot_training - Step 3450: {'lr': 0.0004998815900913909, 'samples': 1766912, 'steps': 3450, 'loss/train': 2.921534538269043} +03/03/2022 17:28:15 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/03/2022 17:28:19 - INFO - codeparrot_training - Step 3451: {'lr': 0.0004998814267239767, 'samples': 1767424, 'steps': 3451, 'loss/train': 2.1076900959014893} +03/03/2022 17:28:22 - INFO - codeparrot_training - Step 3452: {'lr': 0.0004998812632439697, 'samples': 1767936, 'steps': 3452, 'loss/train': 2.8642594814300537} +03/03/2022 17:28:23 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/03/2022 17:28:27 - INFO - codeparrot_training - Step 3453: {'lr': 0.00049988109965137, 'samples': 1768448, 'steps': 3453, 'loss/train': 2.299715280532837} +03/03/2022 17:28:30 - INFO - codeparrot_training - Step 3454: {'lr': 0.000499880935946178, 'samples': 1768960, 'steps': 3454, 'loss/train': 2.4853639602661133} +03/03/2022 17:28:31 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/03/2022 17:28:36 - INFO - codeparrot_training - Step 3455: {'lr': 0.0004998807721283932, 'samples': 1769472, 'steps': 3455, 'loss/train': 0.5581620335578918} +03/03/2022 17:28:39 - INFO - codeparrot_training - Step 3456: {'lr': 0.0004998806081980162, 'samples': 1769984, 'steps': 3456, 'loss/train': 2.44799542427063} +03/03/2022 17:28:40 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/03/2022 17:28:44 - INFO - codeparrot_training - Step 3457: {'lr': 0.0004998804441550467, 'samples': 1770496, 'steps': 3457, 'loss/train': 2.511789083480835} +03/03/2022 17:28:47 - INFO - codeparrot_training - Step 3458: {'lr': 0.000499880279999485, 'samples': 1771008, 'steps': 3458, 'loss/train': 2.9691922664642334} +03/03/2022 17:28:48 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/03/2022 17:28:53 - INFO - codeparrot_training - Step 3459: {'lr': 0.0004998801157313311, 'samples': 1771520, 'steps': 3459, 'loss/train': 2.8180603981018066} +03/03/2022 17:28:56 - INFO - codeparrot_training - Step 3460: {'lr': 0.0004998799513505851, 'samples': 1772032, 'steps': 3460, 'loss/train': 2.8695521354675293} +03/03/2022 17:28:57 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/03/2022 17:29:01 - INFO - codeparrot_training - Step 3461: {'lr': 0.000499879786857247, 'samples': 1772544, 'steps': 3461, 'loss/train': 0.26493769884109497} +03/03/2022 17:29:04 - INFO - codeparrot_training - Step 3462: {'lr': 0.0004998796222513169, 'samples': 1773056, 'steps': 3462, 'loss/train': 2.8650240898132324} +03/03/2022 17:29:05 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/03/2022 17:29:09 - INFO - codeparrot_training - Step 3463: {'lr': 0.000499879457532795, 'samples': 1773568, 'steps': 3463, 'loss/train': 3.003499984741211} +03/03/2022 17:29:13 - INFO - codeparrot_training - Step 3464: {'lr': 0.0004998792927016812, 'samples': 1774080, 'steps': 3464, 'loss/train': 2.320399045944214} +03/03/2022 17:29:13 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/03/2022 17:29:18 - INFO - codeparrot_training - Step 3465: {'lr': 0.0004998791277579757, 'samples': 1774592, 'steps': 3465, 'loss/train': 3.08478045463562} +03/03/2022 17:29:21 - INFO - codeparrot_training - Step 3466: {'lr': 0.0004998789627016784, 'samples': 1775104, 'steps': 3466, 'loss/train': 1.549597144126892} +03/03/2022 17:29:22 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/03/2022 17:29:26 - INFO - codeparrot_training - Step 3467: {'lr': 0.0004998787975327896, 'samples': 1775616, 'steps': 3467, 'loss/train': 0.4559893012046814} +03/03/2022 17:29:30 - INFO - codeparrot_training - Step 3468: {'lr': 0.0004998786322513093, 'samples': 1776128, 'steps': 3468, 'loss/train': 3.2301816940307617} +03/03/2022 17:29:30 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/03/2022 17:29:35 - INFO - codeparrot_training - Step 3469: {'lr': 0.0004998784668572375, 'samples': 1776640, 'steps': 3469, 'loss/train': 2.6614420413970947} +03/03/2022 17:29:38 - INFO - codeparrot_training - Step 3470: {'lr': 0.0004998783013505743, 'samples': 1777152, 'steps': 3470, 'loss/train': 2.472599983215332} +03/03/2022 17:29:38 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/03/2022 17:29:43 - INFO - codeparrot_training - Step 3471: {'lr': 0.0004998781357313198, 'samples': 1777664, 'steps': 3471, 'loss/train': 2.3214733600616455} +03/03/2022 17:29:46 - INFO - codeparrot_training - Step 3472: {'lr': 0.0004998779699994741, 'samples': 1778176, 'steps': 3472, 'loss/train': 3.0650954246520996} +03/03/2022 17:29:47 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/03/2022 17:29:52 - INFO - codeparrot_training - Step 3473: {'lr': 0.0004998778041550372, 'samples': 1778688, 'steps': 3473, 'loss/train': 2.4158334732055664} +03/03/2022 17:29:55 - INFO - codeparrot_training - Step 3474: {'lr': 0.0004998776381980092, 'samples': 1779200, 'steps': 3474, 'loss/train': 1.8305494785308838} +03/03/2022 17:29:55 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/03/2022 17:30:00 - INFO - codeparrot_training - Step 3475: {'lr': 0.0004998774721283903, 'samples': 1779712, 'steps': 3475, 'loss/train': 1.4764806032180786} +03/03/2022 17:30:03 - INFO - codeparrot_training - Step 3476: {'lr': 0.0004998773059461803, 'samples': 1780224, 'steps': 3476, 'loss/train': 2.9446611404418945} +03/03/2022 17:30:05 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/03/2022 17:30:09 - INFO - codeparrot_training - Step 3477: {'lr': 0.0004998771396513796, 'samples': 1780736, 'steps': 3477, 'loss/train': 1.8397433757781982} +03/03/2022 17:30:12 - INFO - codeparrot_training - Step 3478: {'lr': 0.000499876973243988, 'samples': 1781248, 'steps': 3478, 'loss/train': 1.8445950746536255} +03/03/2022 17:30:13 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/03/2022 17:30:17 - INFO - codeparrot_training - Step 3479: {'lr': 0.0004998768067240059, 'samples': 1781760, 'steps': 3479, 'loss/train': 3.2472801208496094} +03/03/2022 17:30:20 - INFO - codeparrot_training - Step 3480: {'lr': 0.0004998766400914329, 'samples': 1782272, 'steps': 3480, 'loss/train': 2.1465084552764893} +03/03/2022 17:30:21 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/03/2022 17:30:26 - INFO - codeparrot_training - Step 3481: {'lr': 0.0004998764733462694, 'samples': 1782784, 'steps': 3481, 'loss/train': 2.122545003890991} +03/03/2022 17:30:29 - INFO - codeparrot_training - Step 3482: {'lr': 0.0004998763064885155, 'samples': 1783296, 'steps': 3482, 'loss/train': 2.131110906600952} +03/03/2022 17:30:30 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/03/2022 17:30:34 - INFO - codeparrot_training - Step 3483: {'lr': 0.0004998761395181712, 'samples': 1783808, 'steps': 3483, 'loss/train': 3.1917903423309326} +03/03/2022 17:30:37 - INFO - codeparrot_training - Step 3484: {'lr': 0.0004998759724352365, 'samples': 1784320, 'steps': 3484, 'loss/train': 2.5262393951416016} +03/03/2022 17:30:39 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/03/2022 17:30:42 - INFO - codeparrot_training - Step 3485: {'lr': 0.0004998758052397115, 'samples': 1784832, 'steps': 3485, 'loss/train': 3.0092735290527344} +03/03/2022 17:30:46 - INFO - codeparrot_training - Step 3486: {'lr': 0.0004998756379315964, 'samples': 1785344, 'steps': 3486, 'loss/train': 1.8495444059371948} +03/03/2022 17:30:47 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/03/2022 17:30:51 - INFO - codeparrot_training - Step 3487: {'lr': 0.0004998754705108912, 'samples': 1785856, 'steps': 3487, 'loss/train': 1.8278295993804932} +03/03/2022 17:30:54 - INFO - codeparrot_training - Step 3488: {'lr': 0.000499875302977596, 'samples': 1786368, 'steps': 3488, 'loss/train': 1.983155369758606} +03/03/2022 17:30:55 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/03/2022 17:30:59 - INFO - codeparrot_training - Step 3489: {'lr': 0.0004998751353317108, 'samples': 1786880, 'steps': 3489, 'loss/train': 3.1049907207489014} +03/03/2022 17:31:02 - INFO - codeparrot_training - Step 3490: {'lr': 0.0004998749675732357, 'samples': 1787392, 'steps': 3490, 'loss/train': 3.100144863128662} +03/03/2022 17:31:04 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/03/2022 17:31:08 - INFO - codeparrot_training - Step 3491: {'lr': 0.0004998747997021708, 'samples': 1787904, 'steps': 3491, 'loss/train': 2.7815253734588623} +03/03/2022 17:31:11 - INFO - codeparrot_training - Step 3492: {'lr': 0.0004998746317185162, 'samples': 1788416, 'steps': 3492, 'loss/train': 1.3102047443389893} +03/03/2022 17:31:12 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/03/2022 17:31:16 - INFO - codeparrot_training - Step 3493: {'lr': 0.000499874463622272, 'samples': 1788928, 'steps': 3493, 'loss/train': 2.865513324737549} +03/03/2022 17:31:19 - INFO - codeparrot_training - Step 3494: {'lr': 0.000499874295413438, 'samples': 1789440, 'steps': 3494, 'loss/train': 3.174488067626953} +03/03/2022 17:31:20 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/03/2022 17:31:25 - INFO - codeparrot_training - Step 3495: {'lr': 0.0004998741270920147, 'samples': 1789952, 'steps': 3495, 'loss/train': 1.6792330741882324} +03/03/2022 17:31:28 - INFO - codeparrot_training - Step 3496: {'lr': 0.0004998739586580019, 'samples': 1790464, 'steps': 3496, 'loss/train': 2.017700672149658} +03/03/2022 17:31:29 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/03/2022 17:31:33 - INFO - codeparrot_training - Step 3497: {'lr': 0.0004998737901113999, 'samples': 1790976, 'steps': 3497, 'loss/train': 2.2834866046905518} +03/03/2022 17:31:36 - INFO - codeparrot_training - Step 3498: {'lr': 0.0004998736214522084, 'samples': 1791488, 'steps': 3498, 'loss/train': 2.2111284732818604} +03/03/2022 17:31:37 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/03/2022 17:31:41 - INFO - codeparrot_training - Step 3499: {'lr': 0.0004998734526804278, 'samples': 1792000, 'steps': 3499, 'loss/train': 3.3585121631622314} +03/03/2022 17:31:45 - INFO - codeparrot_training - Step 3500: {'lr': 0.0004998732837960581, 'samples': 1792512, 'steps': 3500, 'loss/train': 3.1115927696228027} +03/03/2022 17:31:46 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/03/2022 17:31:50 - INFO - codeparrot_training - Step 3501: {'lr': 0.0004998731147990993, 'samples': 1793024, 'steps': 3501, 'loss/train': 2.718416452407837} +03/03/2022 17:31:53 - INFO - codeparrot_training - Step 3502: {'lr': 0.0004998729456895516, 'samples': 1793536, 'steps': 3502, 'loss/train': 2.7886691093444824} +03/03/2022 17:31:54 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/03/2022 17:31:58 - INFO - codeparrot_training - Step 3503: {'lr': 0.0004998727764674149, 'samples': 1794048, 'steps': 3503, 'loss/train': 3.472165584564209} +03/03/2022 17:32:01 - INFO - codeparrot_training - Step 3504: {'lr': 0.0004998726071326896, 'samples': 1794560, 'steps': 3504, 'loss/train': 1.2720261812210083} +03/03/2022 17:32:03 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/03/2022 17:32:07 - INFO - codeparrot_training - Step 3505: {'lr': 0.0004998724376853754, 'samples': 1795072, 'steps': 3505, 'loss/train': 2.7412705421447754} +03/03/2022 17:32:10 - INFO - codeparrot_training - Step 3506: {'lr': 0.0004998722681254725, 'samples': 1795584, 'steps': 3506, 'loss/train': 2.27960205078125} +03/03/2022 17:32:11 - INFO - codeparrot_training - Skipping example with length 540 (seq_length=1024) +03/03/2022 17:32:15 - INFO - codeparrot_training - Step 3507: {'lr': 0.0004998720984529811, 'samples': 1796096, 'steps': 3507, 'loss/train': 3.05230450630188} +03/03/2022 17:32:18 - INFO - codeparrot_training - Step 3508: {'lr': 0.0004998719286679011, 'samples': 1796608, 'steps': 3508, 'loss/train': 2.459235429763794} +03/03/2022 17:32:19 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/03/2022 17:32:24 - INFO - codeparrot_training - Step 3509: {'lr': 0.0004998717587702328, 'samples': 1797120, 'steps': 3509, 'loss/train': 2.190256118774414} +03/03/2022 17:32:27 - INFO - codeparrot_training - Step 3510: {'lr': 0.0004998715887599759, 'samples': 1797632, 'steps': 3510, 'loss/train': 2.57688307762146} +03/03/2022 17:32:28 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/03/2022 17:32:32 - INFO - codeparrot_training - Step 3511: {'lr': 0.000499871418637131, 'samples': 1798144, 'steps': 3511, 'loss/train': 1.5449107885360718} +03/03/2022 17:32:35 - INFO - codeparrot_training - Step 3512: {'lr': 0.0004998712484016977, 'samples': 1798656, 'steps': 3512, 'loss/train': 2.0804619789123535} +03/03/2022 17:32:36 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/03/2022 17:32:40 - INFO - codeparrot_training - Step 3513: {'lr': 0.0004998710780536763, 'samples': 1799168, 'steps': 3513, 'loss/train': 1.6403422355651855} +03/03/2022 17:32:44 - INFO - codeparrot_training - Step 3514: {'lr': 0.0004998709075930669, 'samples': 1799680, 'steps': 3514, 'loss/train': 2.2240114212036133} +03/03/2022 17:32:44 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/03/2022 17:32:49 - INFO - codeparrot_training - Step 3515: {'lr': 0.0004998707370198695, 'samples': 1800192, 'steps': 3515, 'loss/train': 2.609682559967041} +03/03/2022 17:32:52 - INFO - codeparrot_training - Step 3516: {'lr': 0.0004998705663340843, 'samples': 1800704, 'steps': 3516, 'loss/train': 2.486140012741089} +03/03/2022 17:32:54 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/03/2022 17:32:58 - INFO - codeparrot_training - Step 3517: {'lr': 0.0004998703955357111, 'samples': 1801216, 'steps': 3517, 'loss/train': 2.108114242553711} +03/03/2022 17:33:01 - INFO - codeparrot_training - Step 3518: {'lr': 0.0004998702246247502, 'samples': 1801728, 'steps': 3518, 'loss/train': 2.6833527088165283} +03/03/2022 17:33:02 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/03/2022 17:33:06 - INFO - codeparrot_training - Step 3519: {'lr': 0.0004998700536012017, 'samples': 1802240, 'steps': 3519, 'loss/train': 2.221470355987549} +03/03/2022 17:33:09 - INFO - codeparrot_training - Step 3520: {'lr': 0.0004998698824650655, 'samples': 1802752, 'steps': 3520, 'loss/train': 2.5528666973114014} +03/03/2022 17:33:11 - INFO - codeparrot_training - Skipping example with length 220 (seq_length=1024) +03/03/2022 17:33:14 - INFO - codeparrot_training - Step 3521: {'lr': 0.000499869711216342, 'samples': 1803264, 'steps': 3521, 'loss/train': 2.0246126651763916} +03/03/2022 17:33:18 - INFO - codeparrot_training - Step 3522: {'lr': 0.0004998695398550309, 'samples': 1803776, 'steps': 3522, 'loss/train': 1.979175090789795} +03/03/2022 17:33:19 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/03/2022 17:33:23 - INFO - codeparrot_training - Step 3523: {'lr': 0.0004998693683811325, 'samples': 1804288, 'steps': 3523, 'loss/train': 2.033844232559204} +03/03/2022 17:33:26 - INFO - codeparrot_training - Step 3524: {'lr': 0.0004998691967946468, 'samples': 1804800, 'steps': 3524, 'loss/train': 2.557607412338257} +03/03/2022 17:33:27 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/03/2022 17:33:31 - INFO - codeparrot_training - Step 3525: {'lr': 0.000499869025095574, 'samples': 1805312, 'steps': 3525, 'loss/train': 3.400775194168091} +03/03/2022 17:33:35 - INFO - codeparrot_training - Step 3526: {'lr': 0.0004998688532839139, 'samples': 1805824, 'steps': 3526, 'loss/train': 2.724454879760742} +03/03/2022 17:33:36 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/03/2022 17:33:40 - INFO - codeparrot_training - Step 3527: {'lr': 0.0004998686813596668, 'samples': 1806336, 'steps': 3527, 'loss/train': 2.373878240585327} +03/03/2022 17:33:43 - INFO - codeparrot_training - Step 3528: {'lr': 0.0004998685093228327, 'samples': 1806848, 'steps': 3528, 'loss/train': 2.1089589595794678} +03/03/2022 17:33:44 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 17:33:48 - INFO - codeparrot_training - Step 3529: {'lr': 0.0004998683371734118, 'samples': 1807360, 'steps': 3529, 'loss/train': 2.7946763038635254} +03/03/2022 17:33:51 - INFO - codeparrot_training - Step 3530: {'lr': 0.000499868164911404, 'samples': 1807872, 'steps': 3530, 'loss/train': 2.7737417221069336} +03/03/2022 17:33:52 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/03/2022 17:33:57 - INFO - codeparrot_training - Step 3531: {'lr': 0.0004998679925368094, 'samples': 1808384, 'steps': 3531, 'loss/train': 2.004412889480591} +03/03/2022 17:34:00 - INFO - codeparrot_training - Step 3532: {'lr': 0.0004998678200496283, 'samples': 1808896, 'steps': 3532, 'loss/train': 2.3877594470977783} +03/03/2022 17:34:01 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/03/2022 17:34:05 - INFO - codeparrot_training - Step 3533: {'lr': 0.0004998676474498606, 'samples': 1809408, 'steps': 3533, 'loss/train': 1.9091302156448364} +03/03/2022 17:34:08 - INFO - codeparrot_training - Step 3534: {'lr': 0.0004998674747375063, 'samples': 1809920, 'steps': 3534, 'loss/train': 2.0079824924468994} +03/03/2022 17:34:09 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/03/2022 17:34:14 - INFO - codeparrot_training - Step 3535: {'lr': 0.0004998673019125657, 'samples': 1810432, 'steps': 3535, 'loss/train': 0.7680908441543579} +03/03/2022 17:34:17 - INFO - codeparrot_training - Step 3536: {'lr': 0.0004998671289750386, 'samples': 1810944, 'steps': 3536, 'loss/train': 2.1504931449890137} +03/03/2022 17:34:18 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 17:34:22 - INFO - codeparrot_training - Step 3537: {'lr': 0.0004998669559249252, 'samples': 1811456, 'steps': 3537, 'loss/train': 1.0805754661560059} +03/03/2022 17:34:25 - INFO - codeparrot_training - Step 3538: {'lr': 0.0004998667827622258, 'samples': 1811968, 'steps': 3538, 'loss/train': 3.1109695434570312} +03/03/2022 17:34:26 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/03/2022 17:34:30 - INFO - codeparrot_training - Step 3539: {'lr': 0.0004998666094869402, 'samples': 1812480, 'steps': 3539, 'loss/train': 1.6236883401870728} +03/03/2022 17:34:34 - INFO - codeparrot_training - Step 3540: {'lr': 0.0004998664360990685, 'samples': 1812992, 'steps': 3540, 'loss/train': 2.8824963569641113} +03/03/2022 17:34:35 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/03/2022 17:34:39 - INFO - codeparrot_training - Step 3541: {'lr': 0.0004998662625986109, 'samples': 1813504, 'steps': 3541, 'loss/train': 1.8157204389572144} +03/03/2022 17:34:42 - INFO - codeparrot_training - Step 3542: {'lr': 0.0004998660889855674, 'samples': 1814016, 'steps': 3542, 'loss/train': 2.892594337463379} +03/03/2022 17:34:43 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/03/2022 17:34:47 - INFO - codeparrot_training - Step 3543: {'lr': 0.0004998659152599381, 'samples': 1814528, 'steps': 3543, 'loss/train': 2.030524253845215} +03/03/2022 17:34:51 - INFO - codeparrot_training - Step 3544: {'lr': 0.000499865741421723, 'samples': 1815040, 'steps': 3544, 'loss/train': 3.1669063568115234} +03/03/2022 17:34:51 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/03/2022 17:34:56 - INFO - codeparrot_training - Step 3545: {'lr': 0.0004998655674709224, 'samples': 1815552, 'steps': 3545, 'loss/train': 2.5481038093566895} +03/03/2022 17:34:59 - INFO - codeparrot_training - Step 3546: {'lr': 0.0004998653934075361, 'samples': 1816064, 'steps': 3546, 'loss/train': 2.2129180431365967} +03/03/2022 17:35:00 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/03/2022 17:35:04 - INFO - codeparrot_training - Step 3547: {'lr': 0.0004998652192315644, 'samples': 1816576, 'steps': 3547, 'loss/train': 2.811291456222534} +03/03/2022 17:35:07 - INFO - codeparrot_training - Step 3548: {'lr': 0.0004998650449430073, 'samples': 1817088, 'steps': 3548, 'loss/train': 2.5717151165008545} +03/03/2022 17:35:08 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 17:35:13 - INFO - codeparrot_training - Step 3549: {'lr': 0.0004998648705418648, 'samples': 1817600, 'steps': 3549, 'loss/train': 3.0319743156433105} +03/03/2022 17:35:16 - INFO - codeparrot_training - Step 3550: {'lr': 0.000499864696028137, 'samples': 1818112, 'steps': 3550, 'loss/train': 2.6602559089660645} +03/03/2022 17:35:16 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/03/2022 17:35:21 - INFO - codeparrot_training - Step 3551: {'lr': 0.000499864521401824, 'samples': 1818624, 'steps': 3551, 'loss/train': 1.9295809268951416} +03/03/2022 17:35:24 - INFO - codeparrot_training - Step 3552: {'lr': 0.000499864346662926, 'samples': 1819136, 'steps': 3552, 'loss/train': 3.327582359313965} +03/03/2022 17:35:25 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/03/2022 17:35:30 - INFO - codeparrot_training - Step 3553: {'lr': 0.000499864171811443, 'samples': 1819648, 'steps': 3553, 'loss/train': 2.9110333919525146} +03/03/2022 17:35:33 - INFO - codeparrot_training - Step 3554: {'lr': 0.0004998639968473751, 'samples': 1820160, 'steps': 3554, 'loss/train': 2.0936439037323} +03/03/2022 17:35:34 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/03/2022 17:35:38 - INFO - codeparrot_training - Step 3555: {'lr': 0.0004998638217707222, 'samples': 1820672, 'steps': 3555, 'loss/train': 0.25007060170173645} +03/03/2022 17:35:41 - INFO - codeparrot_training - Step 3556: {'lr': 0.0004998636465814846, 'samples': 1821184, 'steps': 3556, 'loss/train': 2.5801384449005127} +03/03/2022 17:35:42 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/03/2022 17:35:47 - INFO - codeparrot_training - Step 3557: {'lr': 0.0004998634712796622, 'samples': 1821696, 'steps': 3557, 'loss/train': 2.2404232025146484} +03/03/2022 17:35:50 - INFO - codeparrot_training - Step 3558: {'lr': 0.0004998632958652554, 'samples': 1822208, 'steps': 3558, 'loss/train': 3.1668248176574707} +03/03/2022 17:35:50 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/03/2022 17:35:55 - INFO - codeparrot_training - Step 3559: {'lr': 0.0004998631203382639, 'samples': 1822720, 'steps': 3559, 'loss/train': 1.8602008819580078} +03/03/2022 17:35:58 - INFO - codeparrot_training - Step 3560: {'lr': 0.0004998629446986879, 'samples': 1823232, 'steps': 3560, 'loss/train': 3.4576563835144043} +03/03/2022 17:35:58 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 17:36:03 - INFO - codeparrot_training - Step 3561: {'lr': 0.0004998627689465276, 'samples': 1823744, 'steps': 3561, 'loss/train': 2.7182540893554688} +03/03/2022 17:36:07 - INFO - codeparrot_training - Step 3562: {'lr': 0.0004998625930817829, 'samples': 1824256, 'steps': 3562, 'loss/train': 2.6915667057037354} +03/03/2022 17:36:07 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/03/2022 17:36:12 - INFO - codeparrot_training - Step 3563: {'lr': 0.0004998624171044541, 'samples': 1824768, 'steps': 3563, 'loss/train': 2.676105260848999} +03/03/2022 17:36:15 - INFO - codeparrot_training - Step 3564: {'lr': 0.000499862241014541, 'samples': 1825280, 'steps': 3564, 'loss/train': 2.7108681201934814} +03/03/2022 17:36:15 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/03/2022 17:36:20 - INFO - codeparrot_training - Step 3565: {'lr': 0.0004998620648120439, 'samples': 1825792, 'steps': 3565, 'loss/train': 2.5601260662078857} +03/03/2022 17:36:24 - INFO - codeparrot_training - Step 3566: {'lr': 0.0004998618884969628, 'samples': 1826304, 'steps': 3566, 'loss/train': 2.4409899711608887} +03/03/2022 17:36:24 - INFO - codeparrot_training - Skipping example with length 892 (seq_length=1024) +03/03/2022 17:36:29 - INFO - codeparrot_training - Step 3567: {'lr': 0.0004998617120692977, 'samples': 1826816, 'steps': 3567, 'loss/train': 2.8623995780944824} +03/03/2022 17:36:32 - INFO - codeparrot_training - Step 3568: {'lr': 0.0004998615355290489, 'samples': 1827328, 'steps': 3568, 'loss/train': 3.122307777404785} +03/03/2022 17:36:32 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 17:36:37 - INFO - codeparrot_training - Step 3569: {'lr': 0.0004998613588762163, 'samples': 1827840, 'steps': 3569, 'loss/train': 4.345600128173828} +03/03/2022 17:36:40 - INFO - codeparrot_training - Step 3570: {'lr': 0.0004998611821108001, 'samples': 1828352, 'steps': 3570, 'loss/train': 3.649526357650757} +03/03/2022 17:36:41 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/03/2022 17:36:46 - INFO - codeparrot_training - Step 3571: {'lr': 0.0004998610052328002, 'samples': 1828864, 'steps': 3571, 'loss/train': 2.1949987411499023} +03/03/2022 17:36:49 - INFO - codeparrot_training - Step 3572: {'lr': 0.0004998608282422169, 'samples': 1829376, 'steps': 3572, 'loss/train': 2.33331561088562} +03/03/2022 17:36:49 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/03/2022 17:36:54 - INFO - codeparrot_training - Step 3573: {'lr': 0.0004998606511390501, 'samples': 1829888, 'steps': 3573, 'loss/train': 2.4240610599517822} +03/03/2022 17:36:57 - INFO - codeparrot_training - Step 3574: {'lr': 0.0004998604739232999, 'samples': 1830400, 'steps': 3574, 'loss/train': 2.1821587085723877} +03/03/2022 17:36:57 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/03/2022 17:37:03 - INFO - codeparrot_training - Step 3575: {'lr': 0.0004998602965949664, 'samples': 1830912, 'steps': 3575, 'loss/train': 2.5694711208343506} +03/03/2022 17:37:06 - INFO - codeparrot_training - Step 3576: {'lr': 0.0004998601191540499, 'samples': 1831424, 'steps': 3576, 'loss/train': 2.2584760189056396} +03/03/2022 17:37:07 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/03/2022 17:37:11 - INFO - codeparrot_training - Step 3577: {'lr': 0.0004998599416005502, 'samples': 1831936, 'steps': 3577, 'loss/train': 2.4998300075531006} +03/03/2022 17:37:15 - INFO - codeparrot_training - Step 3578: {'lr': 0.0004998597639344674, 'samples': 1832448, 'steps': 3578, 'loss/train': 4.368144512176514} +03/03/2022 17:37:16 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/03/2022 17:37:20 - INFO - codeparrot_training - Step 3579: {'lr': 0.0004998595861558016, 'samples': 1832960, 'steps': 3579, 'loss/train': 2.849193572998047} +03/03/2022 17:37:23 - INFO - codeparrot_training - Step 3580: {'lr': 0.000499859408264553, 'samples': 1833472, 'steps': 3580, 'loss/train': 2.3457655906677246} +03/03/2022 17:37:24 - INFO - codeparrot_training - Skipping example with length 904 (seq_length=1024) +03/03/2022 17:37:28 - INFO - codeparrot_training - Step 3581: {'lr': 0.0004998592302607217, 'samples': 1833984, 'steps': 3581, 'loss/train': 2.8259127140045166} +03/03/2022 17:37:31 - INFO - codeparrot_training - Step 3582: {'lr': 0.0004998590521443075, 'samples': 1834496, 'steps': 3582, 'loss/train': 3.155935525894165} +03/03/2022 17:37:33 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/03/2022 17:37:37 - INFO - codeparrot_training - Step 3583: {'lr': 0.0004998588739153108, 'samples': 1835008, 'steps': 3583, 'loss/train': 2.9264156818389893} +03/03/2022 17:37:40 - INFO - codeparrot_training - Step 3584: {'lr': 0.0004998586955737316, 'samples': 1835520, 'steps': 3584, 'loss/train': 2.0631113052368164} +03/03/2022 17:37:41 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/03/2022 17:37:45 - INFO - codeparrot_training - Step 3585: {'lr': 0.0004998585171195698, 'samples': 1836032, 'steps': 3585, 'loss/train': 2.686034917831421} +03/03/2022 17:37:48 - INFO - codeparrot_training - Step 3586: {'lr': 0.0004998583385528256, 'samples': 1836544, 'steps': 3586, 'loss/train': 2.0968966484069824} +03/03/2022 17:37:49 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/03/2022 17:37:53 - INFO - codeparrot_training - Step 3587: {'lr': 0.0004998581598734991, 'samples': 1837056, 'steps': 3587, 'loss/train': 2.2153120040893555} +03/03/2022 17:37:57 - INFO - codeparrot_training - Step 3588: {'lr': 0.0004998579810815905, 'samples': 1837568, 'steps': 3588, 'loss/train': 2.6685640811920166} +03/03/2022 17:37:58 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/03/2022 17:38:02 - INFO - codeparrot_training - Step 3589: {'lr': 0.0004998578021770995, 'samples': 1838080, 'steps': 3589, 'loss/train': 2.742213249206543} +03/03/2022 17:38:05 - INFO - codeparrot_training - Step 3590: {'lr': 0.0004998576231600267, 'samples': 1838592, 'steps': 3590, 'loss/train': 3.1950418949127197} +03/03/2022 17:38:07 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/03/2022 17:38:10 - INFO - codeparrot_training - Step 3591: {'lr': 0.0004998574440303718, 'samples': 1839104, 'steps': 3591, 'loss/train': 1.4914791584014893} +03/03/2022 17:38:13 - INFO - codeparrot_training - Step 3592: {'lr': 0.0004998572647881349, 'samples': 1839616, 'steps': 3592, 'loss/train': 3.2210724353790283} +03/03/2022 17:38:15 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/03/2022 17:38:19 - INFO - codeparrot_training - Step 3593: {'lr': 0.0004998570854333163, 'samples': 1840128, 'steps': 3593, 'loss/train': 2.1720919609069824} +03/03/2022 17:38:22 - INFO - codeparrot_training - Step 3594: {'lr': 0.0004998569059659158, 'samples': 1840640, 'steps': 3594, 'loss/train': 3.3248465061187744} +03/03/2022 17:38:23 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/03/2022 17:38:27 - INFO - codeparrot_training - Step 3595: {'lr': 0.0004998567263859338, 'samples': 1841152, 'steps': 3595, 'loss/train': 2.3522305488586426} +03/03/2022 17:38:30 - INFO - codeparrot_training - Step 3596: {'lr': 0.0004998565466933702, 'samples': 1841664, 'steps': 3596, 'loss/train': 2.2768325805664062} +03/03/2022 17:38:32 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/03/2022 17:38:36 - INFO - codeparrot_training - Step 3597: {'lr': 0.000499856366888225, 'samples': 1842176, 'steps': 3597, 'loss/train': 2.3901896476745605} +03/03/2022 17:38:39 - INFO - codeparrot_training - Step 3598: {'lr': 0.0004998561869704983, 'samples': 1842688, 'steps': 3598, 'loss/train': 2.52305269241333} +03/03/2022 17:38:40 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/03/2022 17:38:44 - INFO - codeparrot_training - Step 3599: {'lr': 0.0004998560069401905, 'samples': 1843200, 'steps': 3599, 'loss/train': 3.3277816772460938} +03/03/2022 17:38:47 - INFO - codeparrot_training - Step 3600: {'lr': 0.0004998558267973013, 'samples': 1843712, 'steps': 3600, 'loss/train': 3.032680034637451} +03/03/2022 17:38:48 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/03/2022 17:38:52 - INFO - codeparrot_training - Step 3601: {'lr': 0.0004998556465418309, 'samples': 1844224, 'steps': 3601, 'loss/train': 2.336735248565674} +03/03/2022 17:38:56 - INFO - codeparrot_training - Step 3602: {'lr': 0.0004998554661737795, 'samples': 1844736, 'steps': 3602, 'loss/train': 2.719419002532959} +03/03/2022 17:38:57 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/03/2022 17:39:01 - INFO - codeparrot_training - Step 3603: {'lr': 0.000499855285693147, 'samples': 1845248, 'steps': 3603, 'loss/train': 2.1824147701263428} +03/03/2022 17:39:04 - INFO - codeparrot_training - Step 3604: {'lr': 0.0004998551050999336, 'samples': 1845760, 'steps': 3604, 'loss/train': 2.0732715129852295} +03/03/2022 17:39:05 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/03/2022 17:39:09 - INFO - codeparrot_training - Step 3605: {'lr': 0.0004998549243941393, 'samples': 1846272, 'steps': 3605, 'loss/train': 2.9126882553100586} +03/03/2022 17:39:12 - INFO - codeparrot_training - Step 3606: {'lr': 0.0004998547435757643, 'samples': 1846784, 'steps': 3606, 'loss/train': 3.208984613418579} +03/03/2022 17:39:14 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/03/2022 17:39:18 - INFO - codeparrot_training - Step 3607: {'lr': 0.0004998545626448087, 'samples': 1847296, 'steps': 3607, 'loss/train': 2.5929415225982666} +03/03/2022 17:39:21 - INFO - codeparrot_training - Step 3608: {'lr': 0.0004998543816012723, 'samples': 1847808, 'steps': 3608, 'loss/train': 1.4198228120803833} +03/03/2022 17:39:22 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/03/2022 17:39:26 - INFO - codeparrot_training - Step 3609: {'lr': 0.0004998542004451554, 'samples': 1848320, 'steps': 3609, 'loss/train': 2.305907964706421} +03/03/2022 17:39:29 - INFO - codeparrot_training - Step 3610: {'lr': 0.000499854019176458, 'samples': 1848832, 'steps': 3610, 'loss/train': 2.23753023147583} +03/03/2022 17:39:30 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/03/2022 17:39:34 - INFO - codeparrot_training - Step 3611: {'lr': 0.0004998538377951803, 'samples': 1849344, 'steps': 3611, 'loss/train': 2.7183055877685547} +03/03/2022 17:39:37 - INFO - codeparrot_training - Step 3612: {'lr': 0.0004998536563013224, 'samples': 1849856, 'steps': 3612, 'loss/train': 1.9926756620407104} +03/03/2022 17:39:39 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/03/2022 17:39:43 - INFO - codeparrot_training - Step 3613: {'lr': 0.0004998534746948843, 'samples': 1850368, 'steps': 3613, 'loss/train': 2.6904778480529785} +03/03/2022 17:39:46 - INFO - codeparrot_training - Step 3614: {'lr': 0.000499853292975866, 'samples': 1850880, 'steps': 3614, 'loss/train': 3.7864110469818115} +03/03/2022 17:39:47 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/03/2022 17:39:51 - INFO - codeparrot_training - Step 3615: {'lr': 0.0004998531111442676, 'samples': 1851392, 'steps': 3615, 'loss/train': 2.306504487991333} +03/03/2022 17:39:54 - INFO - codeparrot_training - Step 3616: {'lr': 0.0004998529292000893, 'samples': 1851904, 'steps': 3616, 'loss/train': 2.8009467124938965} +03/03/2022 17:39:56 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/03/2022 17:39:59 - INFO - codeparrot_training - Step 3617: {'lr': 0.0004998527471433312, 'samples': 1852416, 'steps': 3617, 'loss/train': 2.683246612548828} +03/03/2022 17:40:03 - INFO - codeparrot_training - Step 3618: {'lr': 0.0004998525649739932, 'samples': 1852928, 'steps': 3618, 'loss/train': 2.416942596435547} +03/03/2022 17:40:04 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/03/2022 17:40:08 - INFO - codeparrot_training - Step 3619: {'lr': 0.0004998523826920756, 'samples': 1853440, 'steps': 3619, 'loss/train': 1.7753640413284302} +03/03/2022 17:40:11 - INFO - codeparrot_training - Step 3620: {'lr': 0.0004998522002975783, 'samples': 1853952, 'steps': 3620, 'loss/train': 2.5840494632720947} +03/03/2022 17:40:12 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/03/2022 17:40:16 - INFO - codeparrot_training - Step 3621: {'lr': 0.0004998520177905015, 'samples': 1854464, 'steps': 3621, 'loss/train': 2.44264817237854} +03/03/2022 17:40:19 - INFO - codeparrot_training - Step 3622: {'lr': 0.0004998518351708452, 'samples': 1854976, 'steps': 3622, 'loss/train': 3.1505048274993896} +03/03/2022 17:40:20 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/03/2022 17:40:25 - INFO - codeparrot_training - Step 3623: {'lr': 0.0004998516524386095, 'samples': 1855488, 'steps': 3623, 'loss/train': 2.8222768306732178} +03/03/2022 17:40:28 - INFO - codeparrot_training - Step 3624: {'lr': 0.0004998514695937945, 'samples': 1856000, 'steps': 3624, 'loss/train': 2.9943392276763916} +03/03/2022 17:40:29 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/03/2022 17:40:33 - INFO - codeparrot_training - Step 3625: {'lr': 0.0004998512866364003, 'samples': 1856512, 'steps': 3625, 'loss/train': 2.179144859313965} +03/03/2022 17:40:36 - INFO - codeparrot_training - Step 3626: {'lr': 0.000499851103566427, 'samples': 1857024, 'steps': 3626, 'loss/train': 2.710876226425171} +03/03/2022 17:40:37 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/03/2022 17:40:42 - INFO - codeparrot_training - Step 3627: {'lr': 0.0004998509203838746, 'samples': 1857536, 'steps': 3627, 'loss/train': 3.0475049018859863} +03/03/2022 17:40:45 - INFO - codeparrot_training - Step 3628: {'lr': 0.0004998507370887433, 'samples': 1858048, 'steps': 3628, 'loss/train': 1.7123950719833374} +03/03/2022 17:40:45 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/03/2022 17:40:50 - INFO - codeparrot_training - Step 3629: {'lr': 0.000499850553681033, 'samples': 1858560, 'steps': 3629, 'loss/train': 3.6757304668426514} +03/03/2022 17:40:53 - INFO - codeparrot_training - Step 3630: {'lr': 0.000499850370160744, 'samples': 1859072, 'steps': 3630, 'loss/train': 3.588217258453369} +03/03/2022 17:40:54 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 17:40:59 - INFO - codeparrot_training - Step 3631: {'lr': 0.0004998501865278762, 'samples': 1859584, 'steps': 3631, 'loss/train': 3.2438197135925293} +03/03/2022 17:41:02 - INFO - codeparrot_training - Step 3632: {'lr': 0.0004998500027824298, 'samples': 1860096, 'steps': 3632, 'loss/train': 2.3159561157226562} +03/03/2022 17:41:02 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/03/2022 17:41:07 - INFO - codeparrot_training - Step 3633: {'lr': 0.0004998498189244049, 'samples': 1860608, 'steps': 3633, 'loss/train': 2.9345972537994385} +03/03/2022 17:41:10 - INFO - codeparrot_training - Step 3634: {'lr': 0.0004998496349538015, 'samples': 1861120, 'steps': 3634, 'loss/train': 2.375861644744873} +03/03/2022 17:41:10 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/03/2022 17:41:15 - INFO - codeparrot_training - Step 3635: {'lr': 0.0004998494508706196, 'samples': 1861632, 'steps': 3635, 'loss/train': 2.841984987258911} +03/03/2022 17:41:19 - INFO - codeparrot_training - Step 3636: {'lr': 0.0004998492666748594, 'samples': 1862144, 'steps': 3636, 'loss/train': 2.4892513751983643} +03/03/2022 17:41:19 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/03/2022 17:41:24 - INFO - codeparrot_training - Step 3637: {'lr': 0.0004998490823665211, 'samples': 1862656, 'steps': 3637, 'loss/train': 1.2803422212600708} +03/03/2022 17:41:27 - INFO - codeparrot_training - Step 3638: {'lr': 0.0004998488979456046, 'samples': 1863168, 'steps': 3638, 'loss/train': 2.3221004009246826} +03/03/2022 17:41:27 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/03/2022 17:41:32 - INFO - codeparrot_training - Step 3639: {'lr': 0.00049984871341211, 'samples': 1863680, 'steps': 3639, 'loss/train': 3.4515297412872314} +03/03/2022 17:41:36 - INFO - codeparrot_training - Step 3640: {'lr': 0.0004998485287660375, 'samples': 1864192, 'steps': 3640, 'loss/train': 2.875380516052246} +03/03/2022 17:41:36 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/03/2022 17:41:41 - INFO - codeparrot_training - Step 3641: {'lr': 0.0004998483440073871, 'samples': 1864704, 'steps': 3641, 'loss/train': 1.632257103919983} +03/03/2022 17:41:44 - INFO - codeparrot_training - Step 3642: {'lr': 0.0004998481591361589, 'samples': 1865216, 'steps': 3642, 'loss/train': 1.3579598665237427} +03/03/2022 17:41:45 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/03/2022 17:41:50 - INFO - codeparrot_training - Step 3643: {'lr': 0.000499847974152353, 'samples': 1865728, 'steps': 3643, 'loss/train': 2.599026918411255} +03/03/2022 17:41:53 - INFO - codeparrot_training - Step 3644: {'lr': 0.0004998477890559693, 'samples': 1866240, 'steps': 3644, 'loss/train': 2.9226536750793457} +03/03/2022 17:41:56 - INFO - codeparrot_training - Skipping example with length 148 (seq_length=1024) +03/03/2022 17:41:58 - INFO - codeparrot_training - Step 3645: {'lr': 0.0004998476038470082, 'samples': 1866752, 'steps': 3645, 'loss/train': 2.3888001441955566} +03/03/2022 17:42:01 - INFO - codeparrot_training - Step 3646: {'lr': 0.0004998474185254696, 'samples': 1867264, 'steps': 3646, 'loss/train': 2.4278995990753174} +03/03/2022 17:42:04 - INFO - codeparrot_training - Skipping example with length 799 (seq_length=1024) +03/03/2022 17:42:07 - INFO - codeparrot_training - Step 3647: {'lr': 0.0004998472330913535, 'samples': 1867776, 'steps': 3647, 'loss/train': 3.1469688415527344} +03/03/2022 17:42:10 - INFO - codeparrot_training - Step 3648: {'lr': 0.0004998470475446603, 'samples': 1868288, 'steps': 3648, 'loss/train': 2.0364415645599365} +03/03/2022 17:42:12 - INFO - codeparrot_training - Skipping example with length 255 (seq_length=1024) +03/03/2022 17:42:15 - INFO - codeparrot_training - Step 3649: {'lr': 0.0004998468618853896, 'samples': 1868800, 'steps': 3649, 'loss/train': 2.8296844959259033} +03/03/2022 17:42:19 - INFO - codeparrot_training - Step 3650: {'lr': 0.000499846676113542, 'samples': 1869312, 'steps': 3650, 'loss/train': 3.0041654109954834} +03/03/2022 17:42:21 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/03/2022 17:42:24 - INFO - codeparrot_training - Step 3651: {'lr': 0.0004998464902291173, 'samples': 1869824, 'steps': 3651, 'loss/train': 1.721150279045105} +03/03/2022 17:42:27 - INFO - codeparrot_training - Step 3652: {'lr': 0.0004998463042321155, 'samples': 1870336, 'steps': 3652, 'loss/train': 2.6826014518737793} +03/03/2022 17:42:30 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/03/2022 17:42:32 - INFO - codeparrot_training - Step 3653: {'lr': 0.0004998461181225369, 'samples': 1870848, 'steps': 3653, 'loss/train': 3.217963695526123} +03/03/2022 17:42:35 - INFO - codeparrot_training - Step 3654: {'lr': 0.0004998459319003815, 'samples': 1871360, 'steps': 3654, 'loss/train': 2.749781608581543} +03/03/2022 17:42:38 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/03/2022 17:42:40 - INFO - codeparrot_training - Step 3655: {'lr': 0.0004998457455656493, 'samples': 1871872, 'steps': 3655, 'loss/train': 2.6557867527008057} +03/03/2022 17:42:44 - INFO - codeparrot_training - Step 3656: {'lr': 0.0004998455591183406, 'samples': 1872384, 'steps': 3656, 'loss/train': 2.878848075866699} +03/03/2022 17:42:46 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/03/2022 17:42:49 - INFO - codeparrot_training - Step 3657: {'lr': 0.0004998453725584552, 'samples': 1872896, 'steps': 3657, 'loss/train': 2.1072981357574463} +03/03/2022 17:42:52 - INFO - codeparrot_training - Step 3658: {'lr': 0.0004998451858859934, 'samples': 1873408, 'steps': 3658, 'loss/train': 1.8208065032958984} +03/03/2022 17:42:55 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/03/2022 17:42:57 - INFO - codeparrot_training - Step 3659: {'lr': 0.0004998449991009552, 'samples': 1873920, 'steps': 3659, 'loss/train': 2.472975730895996} +03/03/2022 17:43:01 - INFO - codeparrot_training - Step 3660: {'lr': 0.0004998448122033408, 'samples': 1874432, 'steps': 3660, 'loss/train': 2.201519727706909} +03/03/2022 17:43:03 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/03/2022 17:43:06 - INFO - codeparrot_training - Step 3661: {'lr': 0.00049984462519315, 'samples': 1874944, 'steps': 3661, 'loss/train': 2.484926700592041} +03/03/2022 17:43:09 - INFO - codeparrot_training - Step 3662: {'lr': 0.0004998444380703832, 'samples': 1875456, 'steps': 3662, 'loss/train': 4.3101420402526855} +03/03/2022 17:43:11 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/03/2022 17:43:14 - INFO - codeparrot_training - Step 3663: {'lr': 0.0004998442508350404, 'samples': 1875968, 'steps': 3663, 'loss/train': 3.4521312713623047} +03/03/2022 17:43:18 - INFO - codeparrot_training - Step 3664: {'lr': 0.0004998440634871215, 'samples': 1876480, 'steps': 3664, 'loss/train': 2.967343807220459} +03/03/2022 17:43:20 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/03/2022 17:43:23 - INFO - codeparrot_training - Step 3665: {'lr': 0.0004998438760266267, 'samples': 1876992, 'steps': 3665, 'loss/train': 2.482686758041382} +03/03/2022 17:43:26 - INFO - codeparrot_training - Step 3666: {'lr': 0.0004998436884535562, 'samples': 1877504, 'steps': 3666, 'loss/train': 2.7415034770965576} +03/03/2022 17:43:28 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/03/2022 17:43:31 - INFO - codeparrot_training - Step 3667: {'lr': 0.00049984350076791, 'samples': 1878016, 'steps': 3667, 'loss/train': 1.9611742496490479} +03/03/2022 17:43:34 - INFO - codeparrot_training - Step 3668: {'lr': 0.0004998433129696882, 'samples': 1878528, 'steps': 3668, 'loss/train': 2.9182839393615723} +03/03/2022 17:43:36 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/03/2022 17:43:40 - INFO - codeparrot_training - Step 3669: {'lr': 0.0004998431250588907, 'samples': 1879040, 'steps': 3669, 'loss/train': 3.628209352493286} +03/03/2022 17:43:43 - INFO - codeparrot_training - Step 3670: {'lr': 0.0004998429370355179, 'samples': 1879552, 'steps': 3670, 'loss/train': 1.954303503036499} +03/03/2022 17:43:45 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/03/2022 17:43:48 - INFO - codeparrot_training - Step 3671: {'lr': 0.0004998427488995697, 'samples': 1880064, 'steps': 3671, 'loss/train': 2.8552260398864746} +03/03/2022 17:43:51 - INFO - codeparrot_training - Step 3672: {'lr': 0.0004998425606510461, 'samples': 1880576, 'steps': 3672, 'loss/train': 2.551632881164551} +03/03/2022 17:43:54 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/03/2022 17:43:57 - INFO - codeparrot_training - Step 3673: {'lr': 0.0004998423722899475, 'samples': 1881088, 'steps': 3673, 'loss/train': 2.644566774368286} +03/03/2022 17:44:00 - INFO - codeparrot_training - Step 3674: {'lr': 0.0004998421838162735, 'samples': 1881600, 'steps': 3674, 'loss/train': 2.391247272491455} +03/03/2022 17:44:02 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 17:44:05 - INFO - codeparrot_training - Step 3675: {'lr': 0.0004998419952300247, 'samples': 1882112, 'steps': 3675, 'loss/train': 3.183044195175171} +03/03/2022 17:44:08 - INFO - codeparrot_training - Step 3676: {'lr': 0.0004998418065312009, 'samples': 1882624, 'steps': 3676, 'loss/train': 1.800328254699707} +03/03/2022 17:44:10 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/03/2022 17:44:13 - INFO - codeparrot_training - Step 3677: {'lr': 0.0004998416177198022, 'samples': 1883136, 'steps': 3677, 'loss/train': 2.083582639694214} +03/03/2022 17:44:17 - INFO - codeparrot_training - Step 3678: {'lr': 0.0004998414287958288, 'samples': 1883648, 'steps': 3678, 'loss/train': 1.6754897832870483} +03/03/2022 17:44:18 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/03/2022 17:44:22 - INFO - codeparrot_training - Step 3679: {'lr': 0.0004998412397592807, 'samples': 1884160, 'steps': 3679, 'loss/train': 2.243828773498535} +03/03/2022 17:44:25 - INFO - codeparrot_training - Step 3680: {'lr': 0.0004998410506101579, 'samples': 1884672, 'steps': 3680, 'loss/train': 2.387927293777466} +03/03/2022 17:44:27 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/03/2022 17:44:30 - INFO - codeparrot_training - Step 3681: {'lr': 0.0004998408613484605, 'samples': 1885184, 'steps': 3681, 'loss/train': 3.237916946411133} +03/03/2022 17:44:33 - INFO - codeparrot_training - Step 3682: {'lr': 0.0004998406719741888, 'samples': 1885696, 'steps': 3682, 'loss/train': 2.312106132507324} +03/03/2022 17:44:35 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/03/2022 17:44:39 - INFO - codeparrot_training - Step 3683: {'lr': 0.0004998404824873428, 'samples': 1886208, 'steps': 3683, 'loss/train': 2.2623579502105713} +03/03/2022 17:44:42 - INFO - codeparrot_training - Step 3684: {'lr': 0.0004998402928879225, 'samples': 1886720, 'steps': 3684, 'loss/train': 2.575990676879883} +03/03/2022 17:44:43 - INFO - codeparrot_training - Skipping example with length 832 (seq_length=1024) +03/03/2022 17:44:47 - INFO - codeparrot_training - Step 3685: {'lr': 0.000499840103175928, 'samples': 1887232, 'steps': 3685, 'loss/train': 3.730891227722168} +03/03/2022 17:44:50 - INFO - codeparrot_training - Step 3686: {'lr': 0.0004998399133513594, 'samples': 1887744, 'steps': 3686, 'loss/train': 2.1866838932037354} +03/03/2022 17:44:52 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/03/2022 17:44:56 - INFO - codeparrot_training - Step 3687: {'lr': 0.0004998397234142167, 'samples': 1888256, 'steps': 3687, 'loss/train': 2.3116037845611572} +03/03/2022 17:44:59 - INFO - codeparrot_training - Step 3688: {'lr': 0.0004998395333645002, 'samples': 1888768, 'steps': 3688, 'loss/train': 2.7817230224609375} +03/03/2022 17:45:00 - INFO - codeparrot_training - Skipping example with length 403 (seq_length=1024) +03/03/2022 17:45:04 - INFO - codeparrot_training - Step 3689: {'lr': 0.0004998393432022098, 'samples': 1889280, 'steps': 3689, 'loss/train': 2.8956661224365234} +03/03/2022 17:45:07 - INFO - codeparrot_training - Step 3690: {'lr': 0.0004998391529273457, 'samples': 1889792, 'steps': 3690, 'loss/train': 2.9790821075439453} +03/03/2022 17:45:09 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/03/2022 17:45:12 - INFO - codeparrot_training - Step 3691: {'lr': 0.0004998389625399079, 'samples': 1890304, 'steps': 3691, 'loss/train': 2.958409309387207} +03/03/2022 17:45:15 - INFO - codeparrot_training - Step 3692: {'lr': 0.0004998387720398965, 'samples': 1890816, 'steps': 3692, 'loss/train': 2.0681912899017334} +03/03/2022 17:45:17 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/03/2022 17:45:21 - INFO - codeparrot_training - Step 3693: {'lr': 0.0004998385814273116, 'samples': 1891328, 'steps': 3693, 'loss/train': 2.9830071926116943} +03/03/2022 17:45:24 - INFO - codeparrot_training - Step 3694: {'lr': 0.0004998383907021533, 'samples': 1891840, 'steps': 3694, 'loss/train': 2.9512157440185547} +03/03/2022 17:45:25 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 17:45:29 - INFO - codeparrot_training - Step 3695: {'lr': 0.0004998381998644217, 'samples': 1892352, 'steps': 3695, 'loss/train': 1.9748598337173462} +03/03/2022 17:45:32 - INFO - codeparrot_training - Step 3696: {'lr': 0.0004998380089141169, 'samples': 1892864, 'steps': 3696, 'loss/train': 3.3202672004699707} +03/03/2022 17:45:34 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 17:45:38 - INFO - codeparrot_training - Step 3697: {'lr': 0.0004998378178512388, 'samples': 1893376, 'steps': 3697, 'loss/train': 2.688002347946167} +03/03/2022 17:45:41 - INFO - codeparrot_training - Step 3698: {'lr': 0.0004998376266757878, 'samples': 1893888, 'steps': 3698, 'loss/train': 2.5726189613342285} +03/03/2022 17:45:42 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/03/2022 17:45:46 - INFO - codeparrot_training - Step 3699: {'lr': 0.0004998374353877638, 'samples': 1894400, 'steps': 3699, 'loss/train': 1.4164992570877075} +03/03/2022 17:45:49 - INFO - codeparrot_training - Step 3700: {'lr': 0.0004998372439871668, 'samples': 1894912, 'steps': 3700, 'loss/train': 1.0202736854553223} +03/03/2022 17:45:50 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/03/2022 17:45:54 - INFO - codeparrot_training - Step 3701: {'lr': 0.000499837052473997, 'samples': 1895424, 'steps': 3701, 'loss/train': 2.804818868637085} +03/03/2022 17:45:58 - INFO - codeparrot_training - Step 3702: {'lr': 0.0004998368608482546, 'samples': 1895936, 'steps': 3702, 'loss/train': 3.662398099899292} +03/03/2022 17:45:59 - INFO - codeparrot_training - Skipping example with length 465 (seq_length=1024) +03/03/2022 17:46:03 - INFO - codeparrot_training - Step 3703: {'lr': 0.0004998366691099395, 'samples': 1896448, 'steps': 3703, 'loss/train': 1.1836762428283691} +03/03/2022 17:46:06 - INFO - codeparrot_training - Step 3704: {'lr': 0.0004998364772590518, 'samples': 1896960, 'steps': 3704, 'loss/train': 2.834601879119873} +03/03/2022 17:46:07 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/03/2022 17:46:11 - INFO - codeparrot_training - Step 3705: {'lr': 0.0004998362852955918, 'samples': 1897472, 'steps': 3705, 'loss/train': 2.514988899230957} +03/03/2022 17:46:14 - INFO - codeparrot_training - Step 3706: {'lr': 0.0004998360932195593, 'samples': 1897984, 'steps': 3706, 'loss/train': 3.4431533813476562} +03/03/2022 17:46:16 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/03/2022 17:46:20 - INFO - codeparrot_training - Step 3707: {'lr': 0.0004998359010309544, 'samples': 1898496, 'steps': 3707, 'loss/train': 1.8733627796173096} +03/03/2022 17:46:23 - INFO - codeparrot_training - Step 3708: {'lr': 0.0004998357087297775, 'samples': 1899008, 'steps': 3708, 'loss/train': 1.8023738861083984} +03/03/2022 17:46:25 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/03/2022 17:46:28 - INFO - codeparrot_training - Step 3709: {'lr': 0.0004998355163160285, 'samples': 1899520, 'steps': 3709, 'loss/train': 2.1604526042938232} +03/03/2022 17:46:32 - INFO - codeparrot_training - Step 3710: {'lr': 0.0004998353237897073, 'samples': 1900032, 'steps': 3710, 'loss/train': 2.415114641189575} +03/03/2022 17:46:34 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/03/2022 17:46:37 - INFO - codeparrot_training - Step 3711: {'lr': 0.0004998351311508143, 'samples': 1900544, 'steps': 3711, 'loss/train': 1.8773698806762695} +03/03/2022 17:46:40 - INFO - codeparrot_training - Step 3712: {'lr': 0.0004998349383993493, 'samples': 1901056, 'steps': 3712, 'loss/train': 2.5502567291259766} +03/03/2022 17:46:42 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/03/2022 17:46:46 - INFO - codeparrot_training - Step 3713: {'lr': 0.0004998347455353126, 'samples': 1901568, 'steps': 3713, 'loss/train': 1.7980543375015259} +03/03/2022 17:46:49 - INFO - codeparrot_training - Step 3714: {'lr': 0.0004998345525587042, 'samples': 1902080, 'steps': 3714, 'loss/train': 2.639075756072998} +03/03/2022 17:46:51 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/03/2022 17:46:54 - INFO - codeparrot_training - Step 3715: {'lr': 0.0004998343594695242, 'samples': 1902592, 'steps': 3715, 'loss/train': 2.9284801483154297} +03/03/2022 17:46:57 - INFO - codeparrot_training - Step 3716: {'lr': 0.0004998341662677728, 'samples': 1903104, 'steps': 3716, 'loss/train': 1.6907099485397339} +03/03/2022 17:46:59 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/03/2022 17:47:03 - INFO - codeparrot_training - Step 3717: {'lr': 0.0004998339729534499, 'samples': 1903616, 'steps': 3717, 'loss/train': 2.285964012145996} +03/03/2022 17:47:06 - INFO - codeparrot_training - Step 3718: {'lr': 0.0004998337795265557, 'samples': 1904128, 'steps': 3718, 'loss/train': 2.3662631511688232} +03/03/2022 17:47:08 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/03/2022 17:47:11 - INFO - codeparrot_training - Step 3719: {'lr': 0.0004998335859870903, 'samples': 1904640, 'steps': 3719, 'loss/train': 1.7086601257324219} +03/03/2022 17:47:14 - INFO - codeparrot_training - Step 3720: {'lr': 0.0004998333923350536, 'samples': 1905152, 'steps': 3720, 'loss/train': 1.6167089939117432} +03/03/2022 17:47:16 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/03/2022 17:47:20 - INFO - codeparrot_training - Step 3721: {'lr': 0.000499833198570446, 'samples': 1905664, 'steps': 3721, 'loss/train': 2.410611629486084} +03/03/2022 17:47:23 - INFO - codeparrot_training - Step 3722: {'lr': 0.0004998330046932672, 'samples': 1906176, 'steps': 3722, 'loss/train': 2.5066184997558594} +03/03/2022 17:47:25 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/03/2022 17:47:28 - INFO - codeparrot_training - Step 3723: {'lr': 0.0004998328107035176, 'samples': 1906688, 'steps': 3723, 'loss/train': 3.44197154045105} +03/03/2022 17:47:31 - INFO - codeparrot_training - Step 3724: {'lr': 0.0004998326166011973, 'samples': 1907200, 'steps': 3724, 'loss/train': 2.433289051055908} +03/03/2022 17:47:34 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/03/2022 17:47:37 - INFO - codeparrot_training - Step 3725: {'lr': 0.0004998324223863061, 'samples': 1907712, 'steps': 3725, 'loss/train': 4.985229015350342} +03/03/2022 17:47:40 - INFO - codeparrot_training - Step 3726: {'lr': 0.0004998322280588445, 'samples': 1908224, 'steps': 3726, 'loss/train': 1.6632026433944702} +03/03/2022 17:47:42 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/03/2022 17:47:45 - INFO - codeparrot_training - Step 3727: {'lr': 0.0004998320336188121, 'samples': 1908736, 'steps': 3727, 'loss/train': 3.0993289947509766} +03/03/2022 17:47:48 - INFO - codeparrot_training - Step 3728: {'lr': 0.0004998318390662095, 'samples': 1909248, 'steps': 3728, 'loss/train': 2.6630921363830566} +03/03/2022 17:47:50 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 17:47:54 - INFO - codeparrot_training - Step 3729: {'lr': 0.0004998316444010363, 'samples': 1909760, 'steps': 3729, 'loss/train': 0.6879110336303711} +03/03/2022 17:47:57 - INFO - codeparrot_training - Step 3730: {'lr': 0.0004998314496232929, 'samples': 1910272, 'steps': 3730, 'loss/train': 2.419485092163086} +03/03/2022 17:47:59 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/03/2022 17:48:02 - INFO - codeparrot_training - Step 3731: {'lr': 0.0004998312547329793, 'samples': 1910784, 'steps': 3731, 'loss/train': 2.544678211212158} +03/03/2022 17:48:05 - INFO - codeparrot_training - Step 3732: {'lr': 0.0004998310597300956, 'samples': 1911296, 'steps': 3732, 'loss/train': 2.6967203617095947} +03/03/2022 17:48:07 - INFO - codeparrot_training - Skipping example with length 43 (seq_length=1024) +03/03/2022 17:48:10 - INFO - codeparrot_training - Step 3733: {'lr': 0.0004998308646146419, 'samples': 1911808, 'steps': 3733, 'loss/train': 1.7263741493225098} +03/03/2022 17:48:14 - INFO - codeparrot_training - Step 3734: {'lr': 0.0004998306693866181, 'samples': 1912320, 'steps': 3734, 'loss/train': 2.4978933334350586} +03/03/2022 17:48:16 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/03/2022 17:48:19 - INFO - codeparrot_training - Step 3735: {'lr': 0.0004998304740460247, 'samples': 1912832, 'steps': 3735, 'loss/train': 2.487455129623413} +03/03/2022 17:48:22 - INFO - codeparrot_training - Step 3736: {'lr': 0.0004998302785928614, 'samples': 1913344, 'steps': 3736, 'loss/train': 1.291659951210022} +03/03/2022 17:48:24 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/03/2022 17:48:27 - INFO - codeparrot_training - Step 3737: {'lr': 0.0004998300830271285, 'samples': 1913856, 'steps': 3737, 'loss/train': 2.6324472427368164} +03/03/2022 17:48:30 - INFO - codeparrot_training - Step 3738: {'lr': 0.000499829887348826, 'samples': 1914368, 'steps': 3738, 'loss/train': 2.5330216884613037} +03/03/2022 17:48:32 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/03/2022 17:48:36 - INFO - codeparrot_training - Step 3739: {'lr': 0.0004998296915579539, 'samples': 1914880, 'steps': 3739, 'loss/train': 2.3425724506378174} +03/03/2022 17:48:39 - INFO - codeparrot_training - Step 3740: {'lr': 0.0004998294956545125, 'samples': 1915392, 'steps': 3740, 'loss/train': 3.2945218086242676} +03/03/2022 17:48:41 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/03/2022 17:48:44 - INFO - codeparrot_training - Step 3741: {'lr': 0.0004998292996385019, 'samples': 1915904, 'steps': 3741, 'loss/train': 1.7640777826309204} +03/03/2022 17:48:47 - INFO - codeparrot_training - Step 3742: {'lr': 0.0004998291035099219, 'samples': 1916416, 'steps': 3742, 'loss/train': 2.349073648452759} +03/03/2022 17:48:49 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/03/2022 17:48:52 - INFO - codeparrot_training - Step 3743: {'lr': 0.0004998289072687728, 'samples': 1916928, 'steps': 3743, 'loss/train': 2.297445058822632} +03/03/2022 17:48:56 - INFO - codeparrot_training - Step 3744: {'lr': 0.0004998287109150547, 'samples': 1917440, 'steps': 3744, 'loss/train': 2.437101125717163} +03/03/2022 17:48:57 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/03/2022 17:49:01 - INFO - codeparrot_training - Step 3745: {'lr': 0.0004998285144487676, 'samples': 1917952, 'steps': 3745, 'loss/train': 2.081667423248291} +03/03/2022 17:49:04 - INFO - codeparrot_training - Step 3746: {'lr': 0.0004998283178699116, 'samples': 1918464, 'steps': 3746, 'loss/train': 2.67838191986084} +03/03/2022 17:49:06 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/03/2022 17:49:09 - INFO - codeparrot_training - Step 3747: {'lr': 0.0004998281211784869, 'samples': 1918976, 'steps': 3747, 'loss/train': 2.2773544788360596} +03/03/2022 17:49:12 - INFO - codeparrot_training - Step 3748: {'lr': 0.0004998279243744934, 'samples': 1919488, 'steps': 3748, 'loss/train': 2.441211700439453} +03/03/2022 17:49:14 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/03/2022 17:49:18 - INFO - codeparrot_training - Step 3749: {'lr': 0.0004998277274579313, 'samples': 1920000, 'steps': 3749, 'loss/train': 3.0278847217559814} +03/03/2022 17:49:21 - INFO - codeparrot_training - Step 3750: {'lr': 0.0004998275304288007, 'samples': 1920512, 'steps': 3750, 'loss/train': 2.970712184906006} +03/03/2022 17:49:22 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/03/2022 17:49:26 - INFO - codeparrot_training - Step 3751: {'lr': 0.0004998273332871017, 'samples': 1921024, 'steps': 3751, 'loss/train': 1.4337462186813354} +03/03/2022 17:49:29 - INFO - codeparrot_training - Step 3752: {'lr': 0.0004998271360328344, 'samples': 1921536, 'steps': 3752, 'loss/train': 3.0126447677612305} +03/03/2022 17:49:32 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/03/2022 17:49:35 - INFO - codeparrot_training - Step 3753: {'lr': 0.0004998269386659988, 'samples': 1922048, 'steps': 3753, 'loss/train': 2.8830173015594482} +03/03/2022 17:49:38 - INFO - codeparrot_training - Step 3754: {'lr': 0.000499826741186595, 'samples': 1922560, 'steps': 3754, 'loss/train': 2.7315151691436768} +03/03/2022 17:49:40 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/03/2022 17:49:43 - INFO - codeparrot_training - Step 3755: {'lr': 0.0004998265435946232, 'samples': 1923072, 'steps': 3755, 'loss/train': 2.9850118160247803} +03/03/2022 17:49:46 - INFO - codeparrot_training - Step 3756: {'lr': 0.0004998263458900833, 'samples': 1923584, 'steps': 3756, 'loss/train': 2.8998255729675293} +03/03/2022 17:49:49 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 17:49:51 - INFO - codeparrot_training - Step 3757: {'lr': 0.0004998261480729755, 'samples': 1924096, 'steps': 3757, 'loss/train': 3.4218661785125732} +03/03/2022 17:49:55 - INFO - codeparrot_training - Step 3758: {'lr': 0.0004998259501433, 'samples': 1924608, 'steps': 3758, 'loss/train': 2.27099609375} +03/03/2022 17:49:57 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 17:50:00 - INFO - codeparrot_training - Step 3759: {'lr': 0.0004998257521010567, 'samples': 1925120, 'steps': 3759, 'loss/train': 2.9374964237213135} +03/03/2022 17:50:03 - INFO - codeparrot_training - Step 3760: {'lr': 0.0004998255539462459, 'samples': 1925632, 'steps': 3760, 'loss/train': 2.132153272628784} +03/03/2022 17:50:05 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/03/2022 17:50:08 - INFO - codeparrot_training - Step 3761: {'lr': 0.0004998253556788675, 'samples': 1926144, 'steps': 3761, 'loss/train': 2.714040994644165} +03/03/2022 17:50:12 - INFO - codeparrot_training - Step 3762: {'lr': 0.0004998251572989217, 'samples': 1926656, 'steps': 3762, 'loss/train': 2.861882448196411} +03/03/2022 17:50:14 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 17:50:17 - INFO - codeparrot_training - Step 3763: {'lr': 0.0004998249588064085, 'samples': 1927168, 'steps': 3763, 'loss/train': 3.266000509262085} +03/03/2022 17:50:20 - INFO - codeparrot_training - Step 3764: {'lr': 0.0004998247602013278, 'samples': 1927680, 'steps': 3764, 'loss/train': 2.7360594272613525} +03/03/2022 17:50:22 - INFO - codeparrot_training - Skipping example with length 150 (seq_length=1024) +03/03/2022 17:50:25 - INFO - codeparrot_training - Step 3765: {'lr': 0.0004998245614836802, 'samples': 1928192, 'steps': 3765, 'loss/train': 2.908205270767212} +03/03/2022 17:50:28 - INFO - codeparrot_training - Step 3766: {'lr': 0.0004998243626534655, 'samples': 1928704, 'steps': 3766, 'loss/train': 2.2812976837158203} +03/03/2022 17:50:31 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/03/2022 17:50:34 - INFO - codeparrot_training - Step 3767: {'lr': 0.0004998241637106836, 'samples': 1929216, 'steps': 3767, 'loss/train': 2.5964865684509277} +03/03/2022 17:50:37 - INFO - codeparrot_training - Step 3768: {'lr': 0.0004998239646553349, 'samples': 1929728, 'steps': 3768, 'loss/train': 2.180405616760254} +03/03/2022 17:50:39 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/03/2022 17:50:43 - INFO - codeparrot_training - Step 3769: {'lr': 0.0004998237654874195, 'samples': 1930240, 'steps': 3769, 'loss/train': 3.5195956230163574} +03/03/2022 17:50:46 - INFO - codeparrot_training - Step 3770: {'lr': 0.0004998235662069372, 'samples': 1930752, 'steps': 3770, 'loss/train': 1.7296557426452637} +03/03/2022 17:50:49 - INFO - codeparrot_training - Step 3771: {'lr': 0.0004998233668138883, 'samples': 1931264, 'steps': 3771, 'loss/train': 2.9200501441955566} +03/03/2022 17:50:52 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/03/2022 17:50:55 - INFO - codeparrot_training - Step 3772: {'lr': 0.0004998231673082729, 'samples': 1931776, 'steps': 3772, 'loss/train': 3.0159971714019775} +03/03/2022 17:50:58 - INFO - codeparrot_training - Step 3773: {'lr': 0.000499822967690091, 'samples': 1932288, 'steps': 3773, 'loss/train': 2.8896706104278564} +03/03/2022 17:51:00 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/03/2022 17:51:03 - INFO - codeparrot_training - Step 3774: {'lr': 0.0004998227679593426, 'samples': 1932800, 'steps': 3774, 'loss/train': 2.543423652648926} +03/03/2022 17:51:06 - INFO - codeparrot_training - Step 3775: {'lr': 0.0004998225681160281, 'samples': 1933312, 'steps': 3775, 'loss/train': 2.5930213928222656} +03/03/2022 17:51:09 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 17:51:11 - INFO - codeparrot_training - Step 3776: {'lr': 0.0004998223681601474, 'samples': 1933824, 'steps': 3776, 'loss/train': 2.8261871337890625} +03/03/2022 17:51:15 - INFO - codeparrot_training - Step 3777: {'lr': 0.0004998221680917004, 'samples': 1934336, 'steps': 3777, 'loss/train': 1.3928354978561401} +03/03/2022 17:51:18 - INFO - codeparrot_training - Step 3778: {'lr': 0.0004998219679106876, 'samples': 1934848, 'steps': 3778, 'loss/train': 2.0770843029022217} +03/03/2022 17:51:18 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/03/2022 17:51:23 - INFO - codeparrot_training - Step 3779: {'lr': 0.0004998217676171088, 'samples': 1935360, 'steps': 3779, 'loss/train': 2.2479074001312256} +03/03/2022 17:51:26 - INFO - codeparrot_training - Step 3780: {'lr': 0.0004998215672109641, 'samples': 1935872, 'steps': 3780, 'loss/train': 2.635571002960205} +03/03/2022 17:51:26 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/03/2022 17:51:32 - INFO - codeparrot_training - Step 3781: {'lr': 0.0004998213666922537, 'samples': 1936384, 'steps': 3781, 'loss/train': 2.527639627456665} +03/03/2022 17:51:34 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/03/2022 17:51:37 - INFO - codeparrot_training - Step 3782: {'lr': 0.0004998211660609777, 'samples': 1936896, 'steps': 3782, 'loss/train': 2.7919180393218994} +03/03/2022 17:51:40 - INFO - codeparrot_training - Step 3783: {'lr': 0.0004998209653171361, 'samples': 1937408, 'steps': 3783, 'loss/train': 2.786320209503174} +03/03/2022 17:51:42 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/03/2022 17:51:46 - INFO - codeparrot_training - Step 3784: {'lr': 0.0004998207644607291, 'samples': 1937920, 'steps': 3784, 'loss/train': 1.423477053642273} +03/03/2022 17:51:49 - INFO - codeparrot_training - Step 3785: {'lr': 0.0004998205634917566, 'samples': 1938432, 'steps': 3785, 'loss/train': 2.622469663619995} +03/03/2022 17:51:52 - INFO - codeparrot_training - Step 3786: {'lr': 0.0004998203624102188, 'samples': 1938944, 'steps': 3786, 'loss/train': 2.874709367752075} +03/03/2022 17:51:52 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/03/2022 17:51:57 - INFO - codeparrot_training - Step 3787: {'lr': 0.0004998201612161159, 'samples': 1939456, 'steps': 3787, 'loss/train': 2.402019500732422} +03/03/2022 17:52:00 - INFO - codeparrot_training - Step 3788: {'lr': 0.0004998199599094478, 'samples': 1939968, 'steps': 3788, 'loss/train': 2.9784326553344727} +03/03/2022 17:52:01 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/03/2022 17:52:05 - INFO - codeparrot_training - Step 3789: {'lr': 0.0004998197584902147, 'samples': 1940480, 'steps': 3789, 'loss/train': 2.2643918991088867} +03/03/2022 17:52:09 - INFO - codeparrot_training - Step 3790: {'lr': 0.0004998195569584168, 'samples': 1940992, 'steps': 3790, 'loss/train': 2.488447666168213} +03/03/2022 17:52:09 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/03/2022 17:52:14 - INFO - codeparrot_training - Step 3791: {'lr': 0.0004998193553140539, 'samples': 1941504, 'steps': 3791, 'loss/train': 2.5369577407836914} +03/03/2022 17:52:17 - INFO - codeparrot_training - Step 3792: {'lr': 0.0004998191535571264, 'samples': 1942016, 'steps': 3792, 'loss/train': 3.69472336769104} +03/03/2022 17:52:17 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/03/2022 17:52:22 - INFO - codeparrot_training - Step 3793: {'lr': 0.0004998189516876342, 'samples': 1942528, 'steps': 3793, 'loss/train': 2.4994587898254395} +03/03/2022 17:52:25 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/03/2022 17:52:28 - INFO - codeparrot_training - Step 3794: {'lr': 0.0004998187497055773, 'samples': 1943040, 'steps': 3794, 'loss/train': 3.082371234893799} +03/03/2022 17:52:31 - INFO - codeparrot_training - Step 3795: {'lr': 0.000499818547610956, 'samples': 1943552, 'steps': 3795, 'loss/train': 3.0740747451782227} +03/03/2022 17:52:34 - INFO - codeparrot_training - Step 3796: {'lr': 0.0004998183454037703, 'samples': 1944064, 'steps': 3796, 'loss/train': 2.5580010414123535} +03/03/2022 17:52:34 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/03/2022 17:52:39 - INFO - codeparrot_training - Step 3797: {'lr': 0.0004998181430840204, 'samples': 1944576, 'steps': 3797, 'loss/train': 2.9097588062286377} +03/03/2022 17:52:42 - INFO - codeparrot_training - Step 3798: {'lr': 0.0004998179406517063, 'samples': 1945088, 'steps': 3798, 'loss/train': 2.571601152420044} +03/03/2022 17:52:43 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/03/2022 17:52:48 - INFO - codeparrot_training - Step 3799: {'lr': 0.000499817738106828, 'samples': 1945600, 'steps': 3799, 'loss/train': 2.3581387996673584} +03/03/2022 17:52:51 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/03/2022 17:52:53 - INFO - codeparrot_training - Step 3800: {'lr': 0.0004998175354493857, 'samples': 1946112, 'steps': 3800, 'loss/train': 3.021977186203003} +03/03/2022 17:52:56 - INFO - codeparrot_training - Step 3801: {'lr': 0.0004998173326793795, 'samples': 1946624, 'steps': 3801, 'loss/train': 3.525871515274048} +03/03/2022 17:52:59 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/03/2022 17:53:02 - INFO - codeparrot_training - Step 3802: {'lr': 0.0004998171297968095, 'samples': 1947136, 'steps': 3802, 'loss/train': 1.9012943506240845} +03/03/2022 17:53:05 - INFO - codeparrot_training - Step 3803: {'lr': 0.0004998169268016757, 'samples': 1947648, 'steps': 3803, 'loss/train': 1.8721492290496826} +03/03/2022 17:53:08 - INFO - codeparrot_training - Step 3804: {'lr': 0.0004998167236939783, 'samples': 1948160, 'steps': 3804, 'loss/train': 3.524681806564331} +03/03/2022 17:53:08 - INFO - codeparrot_training - Skipping example with length 103 (seq_length=1024) +03/03/2022 17:53:13 - INFO - codeparrot_training - Step 3805: {'lr': 0.0004998165204737173, 'samples': 1948672, 'steps': 3805, 'loss/train': 2.6027891635894775} +03/03/2022 17:53:16 - INFO - codeparrot_training - Step 3806: {'lr': 0.0004998163171408928, 'samples': 1949184, 'steps': 3806, 'loss/train': 2.059025287628174} +03/03/2022 17:53:16 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/03/2022 17:53:21 - INFO - codeparrot_training - Step 3807: {'lr': 0.000499816113695505, 'samples': 1949696, 'steps': 3807, 'loss/train': 2.747498035430908} +03/03/2022 17:53:25 - INFO - codeparrot_training - Step 3808: {'lr': 0.0004998159101375538, 'samples': 1950208, 'steps': 3808, 'loss/train': 3.2909584045410156} +03/03/2022 17:53:25 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 17:53:30 - INFO - codeparrot_training - Step 3809: {'lr': 0.0004998157064670395, 'samples': 1950720, 'steps': 3809, 'loss/train': 2.8653724193573} +03/03/2022 17:53:33 - INFO - codeparrot_training - Step 3810: {'lr': 0.0004998155026839621, 'samples': 1951232, 'steps': 3810, 'loss/train': 2.820488214492798} +03/03/2022 17:53:34 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/03/2022 17:53:38 - INFO - codeparrot_training - Step 3811: {'lr': 0.0004998152987883217, 'samples': 1951744, 'steps': 3811, 'loss/train': 2.5435593128204346} +03/03/2022 17:53:41 - INFO - codeparrot_training - Step 3812: {'lr': 0.0004998150947801182, 'samples': 1952256, 'steps': 3812, 'loss/train': 1.3595515489578247} +03/03/2022 17:53:42 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/03/2022 17:53:47 - INFO - codeparrot_training - Step 3813: {'lr': 0.000499814890659352, 'samples': 1952768, 'steps': 3813, 'loss/train': 1.7683148384094238} +03/03/2022 17:53:50 - INFO - codeparrot_training - Step 3814: {'lr': 0.0004998146864260231, 'samples': 1953280, 'steps': 3814, 'loss/train': 2.640709638595581} +03/03/2022 17:53:50 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/03/2022 17:53:55 - INFO - codeparrot_training - Step 3815: {'lr': 0.0004998144820801316, 'samples': 1953792, 'steps': 3815, 'loss/train': 2.3973660469055176} +03/03/2022 17:53:58 - INFO - codeparrot_training - Step 3816: {'lr': 0.0004998142776216775, 'samples': 1954304, 'steps': 3816, 'loss/train': 3.3172028064727783} +03/03/2022 17:53:59 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/03/2022 17:54:04 - INFO - codeparrot_training - Step 3817: {'lr': 0.0004998140730506609, 'samples': 1954816, 'steps': 3817, 'loss/train': 3.212907314300537} +03/03/2022 17:54:07 - INFO - codeparrot_training - Step 3818: {'lr': 0.000499813868367082, 'samples': 1955328, 'steps': 3818, 'loss/train': 2.394165277481079} +03/03/2022 17:54:08 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 17:54:12 - INFO - codeparrot_training - Step 3819: {'lr': 0.0004998136635709408, 'samples': 1955840, 'steps': 3819, 'loss/train': 2.162062406539917} +03/03/2022 17:54:15 - INFO - codeparrot_training - Step 3820: {'lr': 0.0004998134586622374, 'samples': 1956352, 'steps': 3820, 'loss/train': 2.444108724594116} +03/03/2022 17:54:17 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/03/2022 17:54:21 - INFO - codeparrot_training - Step 3821: {'lr': 0.0004998132536409718, 'samples': 1956864, 'steps': 3821, 'loss/train': 2.7850687503814697} +03/03/2022 17:54:24 - INFO - codeparrot_training - Step 3822: {'lr': 0.0004998130485071444, 'samples': 1957376, 'steps': 3822, 'loss/train': 3.0810601711273193} +03/03/2022 17:54:25 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/03/2022 17:54:29 - INFO - codeparrot_training - Step 3823: {'lr': 0.000499812843260755, 'samples': 1957888, 'steps': 3823, 'loss/train': 3.0379748344421387} +03/03/2022 17:54:32 - INFO - codeparrot_training - Step 3824: {'lr': 0.0004998126379018038, 'samples': 1958400, 'steps': 3824, 'loss/train': 2.047121524810791} +03/03/2022 17:54:33 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/03/2022 17:54:37 - INFO - codeparrot_training - Step 3825: {'lr': 0.000499812432430291, 'samples': 1958912, 'steps': 3825, 'loss/train': 0.3185496926307678} +03/03/2022 17:54:41 - INFO - codeparrot_training - Step 3826: {'lr': 0.0004998122268462164, 'samples': 1959424, 'steps': 3826, 'loss/train': 1.8392993211746216} +03/03/2022 17:54:42 - INFO - codeparrot_training - Skipping example with length 140 (seq_length=1024) +03/03/2022 17:54:46 - INFO - codeparrot_training - Step 3827: {'lr': 0.0004998120211495803, 'samples': 1959936, 'steps': 3827, 'loss/train': 2.8945508003234863} +03/03/2022 17:54:49 - INFO - codeparrot_training - Step 3828: {'lr': 0.0004998118153403827, 'samples': 1960448, 'steps': 3828, 'loss/train': 3.3008294105529785} +03/03/2022 17:54:50 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 17:54:54 - INFO - codeparrot_training - Step 3829: {'lr': 0.0004998116094186239, 'samples': 1960960, 'steps': 3829, 'loss/train': 2.9008419513702393} +03/03/2022 17:54:57 - INFO - codeparrot_training - Step 3830: {'lr': 0.0004998114033843038, 'samples': 1961472, 'steps': 3830, 'loss/train': 2.5601296424865723} +03/03/2022 17:54:59 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/03/2022 17:55:03 - INFO - codeparrot_training - Step 3831: {'lr': 0.0004998111972374225, 'samples': 1961984, 'steps': 3831, 'loss/train': 2.9485204219818115} +03/03/2022 17:55:06 - INFO - codeparrot_training - Step 3832: {'lr': 0.0004998109909779801, 'samples': 1962496, 'steps': 3832, 'loss/train': 3.3430233001708984} +03/03/2022 17:55:07 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/03/2022 17:55:11 - INFO - codeparrot_training - Step 3833: {'lr': 0.0004998107846059768, 'samples': 1963008, 'steps': 3833, 'loss/train': 2.8167989253997803} +03/03/2022 17:55:14 - INFO - codeparrot_training - Step 3834: {'lr': 0.0004998105781214126, 'samples': 1963520, 'steps': 3834, 'loss/train': 2.5883312225341797} +03/03/2022 17:55:15 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/03/2022 17:55:20 - INFO - codeparrot_training - Step 3835: {'lr': 0.0004998103715242875, 'samples': 1964032, 'steps': 3835, 'loss/train': 1.0971472263336182} +03/03/2022 17:55:23 - INFO - codeparrot_training - Step 3836: {'lr': 0.0004998101648146018, 'samples': 1964544, 'steps': 3836, 'loss/train': 2.96157169342041} +03/03/2022 17:55:24 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 17:55:28 - INFO - codeparrot_training - Step 3837: {'lr': 0.0004998099579923555, 'samples': 1965056, 'steps': 3837, 'loss/train': 3.068044662475586} +03/03/2022 17:55:31 - INFO - codeparrot_training - Step 3838: {'lr': 0.0004998097510575487, 'samples': 1965568, 'steps': 3838, 'loss/train': 3.1705379486083984} +03/03/2022 17:55:32 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/03/2022 17:55:36 - INFO - codeparrot_training - Step 3839: {'lr': 0.0004998095440101815, 'samples': 1966080, 'steps': 3839, 'loss/train': 2.1022558212280273} +03/03/2022 17:55:40 - INFO - codeparrot_training - Step 3840: {'lr': 0.0004998093368502539, 'samples': 1966592, 'steps': 3840, 'loss/train': 1.828479528427124} +03/03/2022 17:55:40 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/03/2022 17:55:45 - INFO - codeparrot_training - Step 3841: {'lr': 0.000499809129577766, 'samples': 1967104, 'steps': 3841, 'loss/train': 3.0751326084136963} +03/03/2022 17:55:48 - INFO - codeparrot_training - Step 3842: {'lr': 0.0004998089221927182, 'samples': 1967616, 'steps': 3842, 'loss/train': 3.1886119842529297} +03/03/2022 17:55:49 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/03/2022 17:55:53 - INFO - codeparrot_training - Step 3843: {'lr': 0.0004998087146951101, 'samples': 1968128, 'steps': 3843, 'loss/train': 2.654568910598755} +03/03/2022 17:55:56 - INFO - codeparrot_training - Step 3844: {'lr': 0.0004998085070849422, 'samples': 1968640, 'steps': 3844, 'loss/train': 2.970553398132324} +03/03/2022 17:55:57 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/03/2022 17:56:02 - INFO - codeparrot_training - Step 3845: {'lr': 0.0004998082993622144, 'samples': 1969152, 'steps': 3845, 'loss/train': 2.4194540977478027} +03/03/2022 17:56:05 - INFO - codeparrot_training - Step 3846: {'lr': 0.0004998080915269268, 'samples': 1969664, 'steps': 3846, 'loss/train': 2.6159262657165527} +03/03/2022 17:56:05 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/03/2022 17:56:10 - INFO - codeparrot_training - Step 3847: {'lr': 0.0004998078835790796, 'samples': 1970176, 'steps': 3847, 'loss/train': 2.8778598308563232} +03/03/2022 17:56:13 - INFO - codeparrot_training - Step 3848: {'lr': 0.0004998076755186727, 'samples': 1970688, 'steps': 3848, 'loss/train': 1.1561795473098755} +03/03/2022 17:56:14 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/03/2022 17:56:18 - INFO - codeparrot_training - Step 3849: {'lr': 0.0004998074673457064, 'samples': 1971200, 'steps': 3849, 'loss/train': 2.9131720066070557} +03/03/2022 17:56:22 - INFO - codeparrot_training - Step 3850: {'lr': 0.0004998072590601808, 'samples': 1971712, 'steps': 3850, 'loss/train': 2.3436501026153564} +03/03/2022 17:56:22 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/03/2022 17:56:27 - INFO - codeparrot_training - Step 3851: {'lr': 0.0004998070506620957, 'samples': 1972224, 'steps': 3851, 'loss/train': 3.272664785385132} +03/03/2022 17:56:30 - INFO - codeparrot_training - Step 3852: {'lr': 0.0004998068421514515, 'samples': 1972736, 'steps': 3852, 'loss/train': 1.823011040687561} +03/03/2022 17:56:30 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/03/2022 17:56:35 - INFO - codeparrot_training - Step 3853: {'lr': 0.0004998066335282483, 'samples': 1973248, 'steps': 3853, 'loss/train': 2.1942403316497803} +03/03/2022 17:56:39 - INFO - codeparrot_training - Step 3854: {'lr': 0.0004998064247924859, 'samples': 1973760, 'steps': 3854, 'loss/train': 2.3646399974823} +03/03/2022 17:56:39 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/03/2022 17:56:44 - INFO - codeparrot_training - Step 3855: {'lr': 0.0004998062159441648, 'samples': 1974272, 'steps': 3855, 'loss/train': 2.4217517375946045} +03/03/2022 17:56:47 - INFO - codeparrot_training - Step 3856: {'lr': 0.0004998060069832846, 'samples': 1974784, 'steps': 3856, 'loss/train': 2.312204599380493} +03/03/2022 17:56:47 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/03/2022 17:56:53 - INFO - codeparrot_training - Step 3857: {'lr': 0.0004998057979098459, 'samples': 1975296, 'steps': 3857, 'loss/train': 2.196375608444214} +03/03/2022 17:56:56 - INFO - codeparrot_training - Step 3858: {'lr': 0.0004998055887238485, 'samples': 1975808, 'steps': 3858, 'loss/train': 2.5646989345550537} +03/03/2022 17:56:56 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/03/2022 17:57:01 - INFO - codeparrot_training - Step 3859: {'lr': 0.0004998053794252925, 'samples': 1976320, 'steps': 3859, 'loss/train': 2.4142074584960938} +03/03/2022 17:57:04 - INFO - codeparrot_training - Step 3860: {'lr': 0.0004998051700141781, 'samples': 1976832, 'steps': 3860, 'loss/train': 1.6763968467712402} +03/03/2022 17:57:05 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 17:57:09 - INFO - codeparrot_training - Step 3861: {'lr': 0.0004998049604905052, 'samples': 1977344, 'steps': 3861, 'loss/train': 2.0136899948120117} +03/03/2022 17:57:13 - INFO - codeparrot_training - Step 3862: {'lr': 0.0004998047508542742, 'samples': 1977856, 'steps': 3862, 'loss/train': 2.0031094551086426} +03/03/2022 17:57:13 - INFO - codeparrot_training - Skipping example with length 628 (seq_length=1024) +03/03/2022 17:57:18 - INFO - codeparrot_training - Step 3863: {'lr': 0.000499804541105485, 'samples': 1978368, 'steps': 3863, 'loss/train': 2.8693439960479736} +03/03/2022 17:57:21 - INFO - codeparrot_training - Step 3864: {'lr': 0.0004998043312441378, 'samples': 1978880, 'steps': 3864, 'loss/train': 1.8378208875656128} +03/03/2022 17:57:21 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/03/2022 17:57:26 - INFO - codeparrot_training - Step 3865: {'lr': 0.0004998041212702325, 'samples': 1979392, 'steps': 3865, 'loss/train': 3.6412179470062256} +03/03/2022 17:57:29 - INFO - codeparrot_training - Step 3866: {'lr': 0.0004998039111837694, 'samples': 1979904, 'steps': 3866, 'loss/train': 3.0094830989837646} +03/03/2022 17:57:30 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/03/2022 17:57:35 - INFO - codeparrot_training - Step 3867: {'lr': 0.0004998037009847485, 'samples': 1980416, 'steps': 3867, 'loss/train': 2.71376895904541} +03/03/2022 17:57:38 - INFO - codeparrot_training - Step 3868: {'lr': 0.0004998034906731699, 'samples': 1980928, 'steps': 3868, 'loss/train': 1.7196274995803833} +03/03/2022 17:57:38 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/03/2022 17:57:43 - INFO - codeparrot_training - Step 3869: {'lr': 0.0004998032802490337, 'samples': 1981440, 'steps': 3869, 'loss/train': 3.5652031898498535} +03/03/2022 17:57:46 - INFO - codeparrot_training - Step 3870: {'lr': 0.0004998030697123399, 'samples': 1981952, 'steps': 3870, 'loss/train': 2.986319065093994} +03/03/2022 17:57:47 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/03/2022 17:57:52 - INFO - codeparrot_training - Step 3871: {'lr': 0.0004998028590630887, 'samples': 1982464, 'steps': 3871, 'loss/train': 3.1613736152648926} +03/03/2022 17:57:55 - INFO - codeparrot_training - Step 3872: {'lr': 0.0004998026483012803, 'samples': 1982976, 'steps': 3872, 'loss/train': 2.6749696731567383} +03/03/2022 17:57:56 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 17:58:00 - INFO - codeparrot_training - Step 3873: {'lr': 0.0004998024374269147, 'samples': 1983488, 'steps': 3873, 'loss/train': 3.0051774978637695} +03/03/2022 17:58:03 - INFO - codeparrot_training - Step 3874: {'lr': 0.000499802226439992, 'samples': 1984000, 'steps': 3874, 'loss/train': 1.8445782661437988} +03/03/2022 17:58:04 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/03/2022 17:58:09 - INFO - codeparrot_training - Step 3875: {'lr': 0.0004998020153405121, 'samples': 1984512, 'steps': 3875, 'loss/train': 2.854663848876953} +03/03/2022 17:58:12 - INFO - codeparrot_training - Step 3876: {'lr': 0.0004998018041284754, 'samples': 1985024, 'steps': 3876, 'loss/train': 2.5668838024139404} +03/03/2022 17:58:13 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/03/2022 17:58:17 - INFO - codeparrot_training - Step 3877: {'lr': 0.0004998015928038819, 'samples': 1985536, 'steps': 3877, 'loss/train': 2.474461317062378} +03/03/2022 17:58:20 - INFO - codeparrot_training - Step 3878: {'lr': 0.0004998013813667315, 'samples': 1986048, 'steps': 3878, 'loss/train': 2.9251227378845215} +03/03/2022 17:58:21 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/03/2022 17:58:26 - INFO - codeparrot_training - Step 3879: {'lr': 0.0004998011698170245, 'samples': 1986560, 'steps': 3879, 'loss/train': 0.40676942467689514} +03/03/2022 17:58:29 - INFO - codeparrot_training - Step 3880: {'lr': 0.000499800958154761, 'samples': 1987072, 'steps': 3880, 'loss/train': 2.4557044506073} +03/03/2022 17:58:29 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/03/2022 17:58:34 - INFO - codeparrot_training - Step 3881: {'lr': 0.000499800746379941, 'samples': 1987584, 'steps': 3881, 'loss/train': 2.256380796432495} +03/03/2022 17:58:37 - INFO - codeparrot_training - Step 3882: {'lr': 0.0004998005344925647, 'samples': 1988096, 'steps': 3882, 'loss/train': 2.2377755641937256} +03/03/2022 17:58:38 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 17:58:42 - INFO - codeparrot_training - Step 3883: {'lr': 0.0004998003224926321, 'samples': 1988608, 'steps': 3883, 'loss/train': 0.9866228103637695} +03/03/2022 17:58:46 - INFO - codeparrot_training - Step 3884: {'lr': 0.0004998001103801433, 'samples': 1989120, 'steps': 3884, 'loss/train': 2.723963499069214} +03/03/2022 17:58:46 - INFO - codeparrot_training - Skipping example with length 933 (seq_length=1024) +03/03/2022 17:58:51 - INFO - codeparrot_training - Step 3885: {'lr': 0.0004997998981550985, 'samples': 1989632, 'steps': 3885, 'loss/train': 2.6290078163146973} +03/03/2022 17:58:54 - INFO - codeparrot_training - Step 3886: {'lr': 0.0004997996858174976, 'samples': 1990144, 'steps': 3886, 'loss/train': 2.634025812149048} +03/03/2022 17:58:54 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/03/2022 17:58:59 - INFO - codeparrot_training - Step 3887: {'lr': 0.0004997994733673409, 'samples': 1990656, 'steps': 3887, 'loss/train': 2.3399627208709717} +03/03/2022 17:59:02 - INFO - codeparrot_training - Step 3888: {'lr': 0.0004997992608046283, 'samples': 1991168, 'steps': 3888, 'loss/train': 2.6446616649627686} +03/03/2022 17:59:03 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/03/2022 17:59:08 - INFO - codeparrot_training - Step 3889: {'lr': 0.0004997990481293602, 'samples': 1991680, 'steps': 3889, 'loss/train': 2.5382840633392334} +03/03/2022 17:59:11 - INFO - codeparrot_training - Step 3890: {'lr': 0.0004997988353415364, 'samples': 1992192, 'steps': 3890, 'loss/train': 2.7250001430511475} +03/03/2022 17:59:11 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/03/2022 17:59:16 - INFO - codeparrot_training - Step 3891: {'lr': 0.0004997986224411571, 'samples': 1992704, 'steps': 3891, 'loss/train': 2.8453969955444336} +03/03/2022 17:59:19 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/03/2022 17:59:21 - INFO - codeparrot_training - Step 3892: {'lr': 0.0004997984094282224, 'samples': 1993216, 'steps': 3892, 'loss/train': 3.0485970973968506} +03/03/2022 17:59:25 - INFO - codeparrot_training - Step 3893: {'lr': 0.0004997981963027324, 'samples': 1993728, 'steps': 3893, 'loss/train': 2.819335460662842} +03/03/2022 17:59:27 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/03/2022 17:59:30 - INFO - codeparrot_training - Step 3894: {'lr': 0.0004997979830646871, 'samples': 1994240, 'steps': 3894, 'loss/train': 2.3712048530578613} +03/03/2022 17:59:33 - INFO - codeparrot_training - Step 3895: {'lr': 0.0004997977697140868, 'samples': 1994752, 'steps': 3895, 'loss/train': 3.0491459369659424} +03/03/2022 17:59:36 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/03/2022 17:59:38 - INFO - codeparrot_training - Step 3896: {'lr': 0.0004997975562509315, 'samples': 1995264, 'steps': 3896, 'loss/train': 2.762273073196411} +03/03/2022 17:59:41 - INFO - codeparrot_training - Step 3897: {'lr': 0.0004997973426752212, 'samples': 1995776, 'steps': 3897, 'loss/train': 2.7730660438537598} +03/03/2022 17:59:44 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/03/2022 17:59:47 - INFO - codeparrot_training - Step 3898: {'lr': 0.0004997971289869561, 'samples': 1996288, 'steps': 3898, 'loss/train': 2.130523204803467} +03/03/2022 17:59:50 - INFO - codeparrot_training - Step 3899: {'lr': 0.0004997969151861362, 'samples': 1996800, 'steps': 3899, 'loss/train': 2.766228199005127} +03/03/2022 17:59:52 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/03/2022 17:59:55 - INFO - codeparrot_training - Step 3900: {'lr': 0.0004997967012727618, 'samples': 1997312, 'steps': 3900, 'loss/train': 2.325099468231201} +03/03/2022 17:59:58 - INFO - codeparrot_training - Step 3901: {'lr': 0.0004997964872468327, 'samples': 1997824, 'steps': 3901, 'loss/train': 3.279083490371704} +03/03/2022 18:00:01 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/03/2022 18:00:04 - INFO - codeparrot_training - Step 3902: {'lr': 0.0004997962731083492, 'samples': 1998336, 'steps': 3902, 'loss/train': 2.196179151535034} +03/03/2022 18:00:07 - INFO - codeparrot_training - Step 3903: {'lr': 0.0004997960588573115, 'samples': 1998848, 'steps': 3903, 'loss/train': 2.9831697940826416} +03/03/2022 18:00:09 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 18:00:12 - INFO - codeparrot_training - Step 3904: {'lr': 0.0004997958444937193, 'samples': 1999360, 'steps': 3904, 'loss/train': 2.906731128692627} +03/03/2022 18:00:15 - INFO - codeparrot_training - Step 3905: {'lr': 0.0004997956300175732, 'samples': 1999872, 'steps': 3905, 'loss/train': 2.383371114730835} +03/03/2022 18:00:18 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/03/2022 18:00:21 - INFO - codeparrot_training - Step 3906: {'lr': 0.000499795415428873, 'samples': 2000384, 'steps': 3906, 'loss/train': 3.246926784515381} +03/03/2022 18:00:24 - INFO - codeparrot_training - Step 3907: {'lr': 0.0004997952007276187, 'samples': 2000896, 'steps': 3907, 'loss/train': 3.258622407913208} +03/03/2022 18:00:26 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/03/2022 18:00:29 - INFO - codeparrot_training - Step 3908: {'lr': 0.0004997949859138106, 'samples': 2001408, 'steps': 3908, 'loss/train': 2.2974727153778076} +03/03/2022 18:00:32 - INFO - codeparrot_training - Step 3909: {'lr': 0.0004997947709874487, 'samples': 2001920, 'steps': 3909, 'loss/train': 2.8245277404785156} +03/03/2022 18:00:34 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/03/2022 18:00:37 - INFO - codeparrot_training - Step 3910: {'lr': 0.0004997945559485333, 'samples': 2002432, 'steps': 3910, 'loss/train': 2.5938360691070557} +03/03/2022 18:00:41 - INFO - codeparrot_training - Step 3911: {'lr': 0.0004997943407970642, 'samples': 2002944, 'steps': 3911, 'loss/train': 2.3936986923217773} +03/03/2022 18:00:43 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/03/2022 18:00:46 - INFO - codeparrot_training - Step 3912: {'lr': 0.0004997941255330416, 'samples': 2003456, 'steps': 3912, 'loss/train': 2.5845417976379395} +03/03/2022 18:00:49 - INFO - codeparrot_training - Step 3913: {'lr': 0.0004997939101564656, 'samples': 2003968, 'steps': 3913, 'loss/train': 2.7111804485321045} +03/03/2022 18:00:52 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/03/2022 18:00:55 - INFO - codeparrot_training - Step 3914: {'lr': 0.0004997936946673365, 'samples': 2004480, 'steps': 3914, 'loss/train': 2.2483110427856445} +03/03/2022 18:00:58 - INFO - codeparrot_training - Step 3915: {'lr': 0.000499793479065654, 'samples': 2004992, 'steps': 3915, 'loss/train': 2.495793342590332} +03/03/2022 18:01:00 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/03/2022 18:01:03 - INFO - codeparrot_training - Step 3916: {'lr': 0.0004997932633514185, 'samples': 2005504, 'steps': 3916, 'loss/train': 2.491365671157837} +03/03/2022 18:01:06 - INFO - codeparrot_training - Step 3917: {'lr': 0.00049979304752463, 'samples': 2006016, 'steps': 3917, 'loss/train': 2.1895015239715576} +03/03/2022 18:01:09 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/03/2022 18:01:11 - INFO - codeparrot_training - Step 3918: {'lr': 0.0004997928315852887, 'samples': 2006528, 'steps': 3918, 'loss/train': 2.1218948364257812} +03/03/2022 18:01:15 - INFO - codeparrot_training - Step 3919: {'lr': 0.0004997926155333944, 'samples': 2007040, 'steps': 3919, 'loss/train': 3.1267402172088623} +03/03/2022 18:01:17 - INFO - codeparrot_training - Skipping example with length 910 (seq_length=1024) +03/03/2022 18:01:20 - INFO - codeparrot_training - Step 3920: {'lr': 0.0004997923993689476, 'samples': 2007552, 'steps': 3920, 'loss/train': 1.7067797183990479} +03/03/2022 18:01:23 - INFO - codeparrot_training - Step 3921: {'lr': 0.0004997921830919481, 'samples': 2008064, 'steps': 3921, 'loss/train': 3.328880548477173} +03/03/2022 18:01:26 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/03/2022 18:01:28 - INFO - codeparrot_training - Step 3922: {'lr': 0.0004997919667023962, 'samples': 2008576, 'steps': 3922, 'loss/train': 1.8148462772369385} +03/03/2022 18:01:32 - INFO - codeparrot_training - Step 3923: {'lr': 0.0004997917502002917, 'samples': 2009088, 'steps': 3923, 'loss/train': 3.037026882171631} +03/03/2022 18:01:34 - INFO - codeparrot_training - Skipping example with length 774 (seq_length=1024) +03/03/2022 18:01:37 - INFO - codeparrot_training - Step 3924: {'lr': 0.000499791533585635, 'samples': 2009600, 'steps': 3924, 'loss/train': 1.572440505027771} +03/03/2022 18:01:40 - INFO - codeparrot_training - Step 3925: {'lr': 0.0004997913168584262, 'samples': 2010112, 'steps': 3925, 'loss/train': 2.2495689392089844} +03/03/2022 18:01:43 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/03/2022 18:01:45 - INFO - codeparrot_training - Step 3926: {'lr': 0.0004997911000186651, 'samples': 2010624, 'steps': 3926, 'loss/train': 2.544018268585205} +03/03/2022 18:01:49 - INFO - codeparrot_training - Step 3927: {'lr': 0.0004997908830663521, 'samples': 2011136, 'steps': 3927, 'loss/train': 2.3300535678863525} +03/03/2022 18:01:51 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/03/2022 18:01:54 - INFO - codeparrot_training - Step 3928: {'lr': 0.0004997906660014871, 'samples': 2011648, 'steps': 3928, 'loss/train': 2.1153769493103027} +03/03/2022 18:01:57 - INFO - codeparrot_training - Step 3929: {'lr': 0.0004997904488240704, 'samples': 2012160, 'steps': 3929, 'loss/train': 2.5201823711395264} +03/03/2022 18:02:00 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/03/2022 18:02:02 - INFO - codeparrot_training - Step 3930: {'lr': 0.0004997902315341019, 'samples': 2012672, 'steps': 3930, 'loss/train': 3.689603567123413} +03/03/2022 18:02:06 - INFO - codeparrot_training - Step 3931: {'lr': 0.0004997900141315817, 'samples': 2013184, 'steps': 3931, 'loss/train': 3.0747292041778564} +03/03/2022 18:02:08 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/03/2022 18:02:11 - INFO - codeparrot_training - Step 3932: {'lr': 0.0004997897966165101, 'samples': 2013696, 'steps': 3932, 'loss/train': 3.1622369289398193} +03/03/2022 18:02:14 - INFO - codeparrot_training - Step 3933: {'lr': 0.000499789578988887, 'samples': 2014208, 'steps': 3933, 'loss/train': 2.250472068786621} +03/03/2022 18:02:16 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/03/2022 18:02:19 - INFO - codeparrot_training - Step 3934: {'lr': 0.0004997893612487126, 'samples': 2014720, 'steps': 3934, 'loss/train': 2.3587052822113037} +03/03/2022 18:02:22 - INFO - codeparrot_training - Step 3935: {'lr': 0.000499789143395987, 'samples': 2015232, 'steps': 3935, 'loss/train': 2.693073272705078} +03/03/2022 18:02:24 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/03/2022 18:02:28 - INFO - codeparrot_training - Step 3936: {'lr': 0.0004997889254307103, 'samples': 2015744, 'steps': 3936, 'loss/train': 2.833967447280884} +03/03/2022 18:02:31 - INFO - codeparrot_training - Step 3937: {'lr': 0.0004997887073528825, 'samples': 2016256, 'steps': 3937, 'loss/train': 1.5553349256515503} +03/03/2022 18:02:33 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/03/2022 18:02:36 - INFO - codeparrot_training - Step 3938: {'lr': 0.0004997884891625037, 'samples': 2016768, 'steps': 3938, 'loss/train': 2.2556614875793457} +03/03/2022 18:02:39 - INFO - codeparrot_training - Step 3939: {'lr': 0.0004997882708595742, 'samples': 2017280, 'steps': 3939, 'loss/train': 2.909503698348999} +03/03/2022 18:02:41 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/03/2022 18:02:45 - INFO - codeparrot_training - Step 3940: {'lr': 0.0004997880524440939, 'samples': 2017792, 'steps': 3940, 'loss/train': 2.662753105163574} +03/03/2022 18:02:48 - INFO - codeparrot_training - Step 3941: {'lr': 0.0004997878339160628, 'samples': 2018304, 'steps': 3941, 'loss/train': 2.884737253189087} +03/03/2022 18:02:51 - INFO - codeparrot_training - Step 3942: {'lr': 0.0004997876152754814, 'samples': 2018816, 'steps': 3942, 'loss/train': 3.0503275394439697} +03/03/2022 18:02:52 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/03/2022 18:02:58 - INFO - codeparrot_training - Step 3943: {'lr': 0.0004997873965223495, 'samples': 2019328, 'steps': 3943, 'loss/train': 2.9102585315704346} +03/03/2022 18:03:01 - INFO - codeparrot_training - Step 3944: {'lr': 0.0004997871776566672, 'samples': 2019840, 'steps': 3944, 'loss/train': 3.42290997505188} +03/03/2022 18:03:04 - INFO - codeparrot_training - Step 3945: {'lr': 0.0004997869586784346, 'samples': 2020352, 'steps': 3945, 'loss/train': 1.8006359338760376} +03/03/2022 18:03:05 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 18:03:09 - INFO - codeparrot_training - Step 3946: {'lr': 0.0004997867395876519, 'samples': 2020864, 'steps': 3946, 'loss/train': 3.318176031112671} +03/03/2022 18:03:12 - INFO - codeparrot_training - Step 3947: {'lr': 0.0004997865203843192, 'samples': 2021376, 'steps': 3947, 'loss/train': 2.8333096504211426} +03/03/2022 18:03:13 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/03/2022 18:03:17 - INFO - codeparrot_training - Step 3948: {'lr': 0.0004997863010684365, 'samples': 2021888, 'steps': 3948, 'loss/train': 3.0892834663391113} +03/03/2022 18:03:21 - INFO - codeparrot_training - Step 3949: {'lr': 0.0004997860816400039, 'samples': 2022400, 'steps': 3949, 'loss/train': 4.059104919433594} +03/03/2022 18:03:21 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/03/2022 18:03:26 - INFO - codeparrot_training - Step 3950: {'lr': 0.0004997858620990217, 'samples': 2022912, 'steps': 3950, 'loss/train': 3.6839346885681152} +03/03/2022 18:03:29 - INFO - codeparrot_training - Step 3951: {'lr': 0.0004997856424454897, 'samples': 2023424, 'steps': 3951, 'loss/train': 2.158196210861206} +03/03/2022 18:03:32 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/03/2022 18:03:34 - INFO - codeparrot_training - Step 3952: {'lr': 0.0004997854226794082, 'samples': 2023936, 'steps': 3952, 'loss/train': 3.6836612224578857} +03/03/2022 18:03:38 - INFO - codeparrot_training - Step 3953: {'lr': 0.0004997852028007772, 'samples': 2024448, 'steps': 3953, 'loss/train': 2.3645546436309814} +03/03/2022 18:03:40 - INFO - codeparrot_training - Skipping example with length 925 (seq_length=1024) +03/03/2022 18:03:43 - INFO - codeparrot_training - Step 3954: {'lr': 0.0004997849828095969, 'samples': 2024960, 'steps': 3954, 'loss/train': 2.4522507190704346} +03/03/2022 18:03:46 - INFO - codeparrot_training - Step 3955: {'lr': 0.0004997847627058673, 'samples': 2025472, 'steps': 3955, 'loss/train': 2.9364266395568848} +03/03/2022 18:03:48 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/03/2022 18:03:51 - INFO - codeparrot_training - Step 3956: {'lr': 0.0004997845424895886, 'samples': 2025984, 'steps': 3956, 'loss/train': 4.080404758453369} +03/03/2022 18:03:54 - INFO - codeparrot_training - Step 3957: {'lr': 0.0004997843221607607, 'samples': 2026496, 'steps': 3957, 'loss/train': 1.346103310585022} +03/03/2022 18:03:57 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/03/2022 18:04:00 - INFO - codeparrot_training - Step 3958: {'lr': 0.0004997841017193841, 'samples': 2027008, 'steps': 3958, 'loss/train': 2.2125799655914307} +03/03/2022 18:04:03 - INFO - codeparrot_training - Step 3959: {'lr': 0.0004997838811654584, 'samples': 2027520, 'steps': 3959, 'loss/train': 2.544726848602295} +03/03/2022 18:04:05 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/03/2022 18:04:08 - INFO - codeparrot_training - Step 3960: {'lr': 0.000499783660498984, 'samples': 2028032, 'steps': 3960, 'loss/train': 2.5979676246643066} +03/03/2022 18:04:11 - INFO - codeparrot_training - Step 3961: {'lr': 0.0004997834397199609, 'samples': 2028544, 'steps': 3961, 'loss/train': 2.790780544281006} +03/03/2022 18:04:13 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/03/2022 18:04:17 - INFO - codeparrot_training - Step 3962: {'lr': 0.0004997832188283893, 'samples': 2029056, 'steps': 3962, 'loss/train': 3.1926486492156982} +03/03/2022 18:04:20 - INFO - codeparrot_training - Step 3963: {'lr': 0.0004997829978242693, 'samples': 2029568, 'steps': 3963, 'loss/train': 1.3696067333221436} +03/03/2022 18:04:21 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/03/2022 18:04:25 - INFO - codeparrot_training - Step 3964: {'lr': 0.0004997827767076008, 'samples': 2030080, 'steps': 3964, 'loss/train': 2.9188528060913086} +03/03/2022 18:04:28 - INFO - codeparrot_training - Step 3965: {'lr': 0.0004997825554783841, 'samples': 2030592, 'steps': 3965, 'loss/train': 2.740131378173828} +03/03/2022 18:04:30 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/03/2022 18:04:33 - INFO - codeparrot_training - Step 3966: {'lr': 0.0004997823341366192, 'samples': 2031104, 'steps': 3966, 'loss/train': 2.3014016151428223} +03/03/2022 18:04:37 - INFO - codeparrot_training - Step 3967: {'lr': 0.0004997821126823062, 'samples': 2031616, 'steps': 3967, 'loss/train': 2.4409942626953125} +03/03/2022 18:04:38 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/03/2022 18:04:42 - INFO - codeparrot_training - Step 3968: {'lr': 0.0004997818911154454, 'samples': 2032128, 'steps': 3968, 'loss/train': 2.6989142894744873} +03/03/2022 18:04:45 - INFO - codeparrot_training - Step 3969: {'lr': 0.0004997816694360367, 'samples': 2032640, 'steps': 3969, 'loss/train': 3.1979548931121826} +03/03/2022 18:04:47 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/03/2022 18:04:50 - INFO - codeparrot_training - Step 3970: {'lr': 0.00049978144764408, 'samples': 2033152, 'steps': 3970, 'loss/train': 2.3690061569213867} +03/03/2022 18:04:53 - INFO - codeparrot_training - Step 3971: {'lr': 0.0004997812257395758, 'samples': 2033664, 'steps': 3971, 'loss/train': 2.812025547027588} +03/03/2022 18:04:55 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 18:04:59 - INFO - codeparrot_training - Step 3972: {'lr': 0.0004997810037225241, 'samples': 2034176, 'steps': 3972, 'loss/train': 3.2775442600250244} +03/03/2022 18:05:02 - INFO - codeparrot_training - Step 3973: {'lr': 0.0004997807815929248, 'samples': 2034688, 'steps': 3973, 'loss/train': 2.1329185962677} +03/03/2022 18:05:04 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/03/2022 18:05:07 - INFO - codeparrot_training - Step 3974: {'lr': 0.0004997805593507783, 'samples': 2035200, 'steps': 3974, 'loss/train': 2.7244060039520264} +03/03/2022 18:05:10 - INFO - codeparrot_training - Step 3975: {'lr': 0.0004997803369960844, 'samples': 2035712, 'steps': 3975, 'loss/train': 1.5863265991210938} +03/03/2022 18:05:13 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/03/2022 18:05:16 - INFO - codeparrot_training - Step 3976: {'lr': 0.0004997801145288433, 'samples': 2036224, 'steps': 3976, 'loss/train': 2.3796982765197754} +03/03/2022 18:05:19 - INFO - codeparrot_training - Step 3977: {'lr': 0.0004997798919490553, 'samples': 2036736, 'steps': 3977, 'loss/train': 2.716003894805908} +03/03/2022 18:05:21 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/03/2022 18:05:24 - INFO - codeparrot_training - Step 3978: {'lr': 0.0004997796692567202, 'samples': 2037248, 'steps': 3978, 'loss/train': 2.5770249366760254} +03/03/2022 18:05:27 - INFO - codeparrot_training - Step 3979: {'lr': 0.0004997794464518383, 'samples': 2037760, 'steps': 3979, 'loss/train': 1.193472981452942} +03/03/2022 18:05:29 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/03/2022 18:05:33 - INFO - codeparrot_training - Step 3980: {'lr': 0.0004997792235344096, 'samples': 2038272, 'steps': 3980, 'loss/train': 1.7371487617492676} +03/03/2022 18:05:36 - INFO - codeparrot_training - Step 3981: {'lr': 0.0004997790005044343, 'samples': 2038784, 'steps': 3981, 'loss/train': 3.5910770893096924} +03/03/2022 18:05:38 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/03/2022 18:05:41 - INFO - codeparrot_training - Step 3982: {'lr': 0.0004997787773619123, 'samples': 2039296, 'steps': 3982, 'loss/train': 3.1053221225738525} +03/03/2022 18:05:44 - INFO - codeparrot_training - Step 3983: {'lr': 0.0004997785541068439, 'samples': 2039808, 'steps': 3983, 'loss/train': 2.070878744125366} +03/03/2022 18:05:46 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/03/2022 18:05:49 - INFO - codeparrot_training - Step 3984: {'lr': 0.0004997783307392292, 'samples': 2040320, 'steps': 3984, 'loss/train': 2.9818992614746094} +03/03/2022 18:05:52 - INFO - codeparrot_training - Step 3985: {'lr': 0.0004997781072590683, 'samples': 2040832, 'steps': 3985, 'loss/train': 2.1180307865142822} +03/03/2022 18:05:55 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/03/2022 18:05:58 - INFO - codeparrot_training - Step 3986: {'lr': 0.000499777883666361, 'samples': 2041344, 'steps': 3986, 'loss/train': 3.313664436340332} +03/03/2022 18:06:01 - INFO - codeparrot_training - Step 3987: {'lr': 0.0004997776599611078, 'samples': 2041856, 'steps': 3987, 'loss/train': 2.0892179012298584} +03/03/2022 18:06:03 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/03/2022 18:06:06 - INFO - codeparrot_training - Step 3988: {'lr': 0.0004997774361433086, 'samples': 2042368, 'steps': 3988, 'loss/train': 2.44980788230896} +03/03/2022 18:06:09 - INFO - codeparrot_training - Step 3989: {'lr': 0.0004997772122129635, 'samples': 2042880, 'steps': 3989, 'loss/train': 2.897059202194214} +03/03/2022 18:06:11 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/03/2022 18:06:14 - INFO - codeparrot_training - Step 3990: {'lr': 0.0004997769881700727, 'samples': 2043392, 'steps': 3990, 'loss/train': 2.5682404041290283} +03/03/2022 18:06:18 - INFO - codeparrot_training - Step 3991: {'lr': 0.0004997767640146363, 'samples': 2043904, 'steps': 3991, 'loss/train': 3.12326717376709} +03/03/2022 18:06:19 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/03/2022 18:06:23 - INFO - codeparrot_training - Step 3992: {'lr': 0.0004997765397466543, 'samples': 2044416, 'steps': 3992, 'loss/train': 2.292527914047241} +03/03/2022 18:06:26 - INFO - codeparrot_training - Step 3993: {'lr': 0.0004997763153661269, 'samples': 2044928, 'steps': 3993, 'loss/train': 1.6879278421401978} +03/03/2022 18:06:27 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/03/2022 18:06:31 - INFO - codeparrot_training - Step 3994: {'lr': 0.000499776090873054, 'samples': 2045440, 'steps': 3994, 'loss/train': 2.0581037998199463} +03/03/2022 18:06:34 - INFO - codeparrot_training - Step 3995: {'lr': 0.000499775866267436, 'samples': 2045952, 'steps': 3995, 'loss/train': 2.7551822662353516} +03/03/2022 18:06:35 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/03/2022 18:06:40 - INFO - codeparrot_training - Step 3996: {'lr': 0.0004997756415492727, 'samples': 2046464, 'steps': 3996, 'loss/train': 1.3939646482467651} +03/03/2022 18:06:43 - INFO - codeparrot_training - Step 3997: {'lr': 0.0004997754167185644, 'samples': 2046976, 'steps': 3997, 'loss/train': 2.505709409713745} +03/03/2022 18:06:44 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/03/2022 18:06:48 - INFO - codeparrot_training - Step 3998: {'lr': 0.0004997751917753113, 'samples': 2047488, 'steps': 3998, 'loss/train': 3.190295934677124} +03/03/2022 18:06:51 - INFO - codeparrot_training - Step 3999: {'lr': 0.0004997749667195132, 'samples': 2048000, 'steps': 3999, 'loss/train': 3.65911602973938} +03/03/2022 18:06:53 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 18:06:56 - INFO - codeparrot_training - Step 4000: {'lr': 0.0004997747415511704, 'samples': 2048512, 'steps': 4000, 'loss/train': 3.273041009902954} +03/03/2022 18:07:00 - INFO - codeparrot_training - Step 4001: {'lr': 0.000499774516270283, 'samples': 2049024, 'steps': 4001, 'loss/train': 0.6735871434211731} +03/03/2022 18:07:01 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/03/2022 18:07:05 - INFO - codeparrot_training - Step 4002: {'lr': 0.0004997742908768508, 'samples': 2049536, 'steps': 4002, 'loss/train': 3.193909168243408} +03/03/2022 18:07:08 - INFO - codeparrot_training - Step 4003: {'lr': 0.0004997740653708744, 'samples': 2050048, 'steps': 4003, 'loss/train': 1.8106073141098022} +03/03/2022 18:07:09 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/03/2022 18:07:13 - INFO - codeparrot_training - Step 4004: {'lr': 0.0004997738397523537, 'samples': 2050560, 'steps': 4004, 'loss/train': 2.9387056827545166} +03/03/2022 18:07:16 - INFO - codeparrot_training - Step 4005: {'lr': 0.0004997736140212887, 'samples': 2051072, 'steps': 4005, 'loss/train': 2.4872539043426514} +03/03/2022 18:07:17 - INFO - codeparrot_training - Skipping example with length 411 (seq_length=1024) +03/03/2022 18:07:22 - INFO - codeparrot_training - Step 4006: {'lr': 0.0004997733881776796, 'samples': 2051584, 'steps': 4006, 'loss/train': 3.0138444900512695} +03/03/2022 18:07:25 - INFO - codeparrot_training - Step 4007: {'lr': 0.0004997731622215264, 'samples': 2052096, 'steps': 4007, 'loss/train': 3.176285743713379} +03/03/2022 18:07:26 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/03/2022 18:07:30 - INFO - codeparrot_training - Step 4008: {'lr': 0.0004997729361528292, 'samples': 2052608, 'steps': 4008, 'loss/train': 2.647770881652832} +03/03/2022 18:07:33 - INFO - codeparrot_training - Step 4009: {'lr': 0.0004997727099715882, 'samples': 2053120, 'steps': 4009, 'loss/train': 2.273289442062378} +03/03/2022 18:07:34 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/03/2022 18:07:38 - INFO - codeparrot_training - Step 4010: {'lr': 0.0004997724836778036, 'samples': 2053632, 'steps': 4010, 'loss/train': 2.4431509971618652} +03/03/2022 18:07:41 - INFO - codeparrot_training - Step 4011: {'lr': 0.0004997722572714753, 'samples': 2054144, 'steps': 4011, 'loss/train': 3.5274946689605713} +03/03/2022 18:07:42 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 18:07:47 - INFO - codeparrot_training - Step 4012: {'lr': 0.0004997720307526034, 'samples': 2054656, 'steps': 4012, 'loss/train': 2.285733222961426} +03/03/2022 18:07:50 - INFO - codeparrot_training - Step 4013: {'lr': 0.0004997718041211881, 'samples': 2055168, 'steps': 4013, 'loss/train': 3.925359010696411} +03/03/2022 18:07:50 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/03/2022 18:07:55 - INFO - codeparrot_training - Step 4014: {'lr': 0.0004997715773772296, 'samples': 2055680, 'steps': 4014, 'loss/train': 2.8097586631774902} +03/03/2022 18:07:58 - INFO - codeparrot_training - Step 4015: {'lr': 0.0004997713505207278, 'samples': 2056192, 'steps': 4015, 'loss/train': 2.6443235874176025} +03/03/2022 18:07:58 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/03/2022 18:08:04 - INFO - codeparrot_training - Step 4016: {'lr': 0.0004997711235516829, 'samples': 2056704, 'steps': 4016, 'loss/train': 2.697261333465576} +03/03/2022 18:08:07 - INFO - codeparrot_training - Step 4017: {'lr': 0.000499770896470095, 'samples': 2057216, 'steps': 4017, 'loss/train': 2.3651421070098877} +03/03/2022 18:08:07 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/03/2022 18:08:12 - INFO - codeparrot_training - Step 4018: {'lr': 0.0004997706692759642, 'samples': 2057728, 'steps': 4018, 'loss/train': 2.331338405609131} +03/03/2022 18:08:15 - INFO - codeparrot_training - Step 4019: {'lr': 0.0004997704419692905, 'samples': 2058240, 'steps': 4019, 'loss/train': 2.53157114982605} +03/03/2022 18:08:16 - INFO - codeparrot_training - Skipping example with length 1005 (seq_length=1024) +03/03/2022 18:08:21 - INFO - codeparrot_training - Step 4020: {'lr': 0.0004997702145500741, 'samples': 2058752, 'steps': 4020, 'loss/train': 1.9059734344482422} +03/03/2022 18:08:24 - INFO - codeparrot_training - Step 4021: {'lr': 0.0004997699870183151, 'samples': 2059264, 'steps': 4021, 'loss/train': 1.3047473430633545} +03/03/2022 18:08:24 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/03/2022 18:08:29 - INFO - codeparrot_training - Step 4022: {'lr': 0.0004997697593740137, 'samples': 2059776, 'steps': 4022, 'loss/train': 2.946004629135132} +03/03/2022 18:08:32 - INFO - codeparrot_training - Step 4023: {'lr': 0.0004997695316171698, 'samples': 2060288, 'steps': 4023, 'loss/train': 1.339981198310852} +03/03/2022 18:08:33 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 18:08:37 - INFO - codeparrot_training - Step 4024: {'lr': 0.0004997693037477837, 'samples': 2060800, 'steps': 4024, 'loss/train': 2.8004448413848877} +03/03/2022 18:08:41 - INFO - codeparrot_training - Step 4025: {'lr': 0.0004997690757658552, 'samples': 2061312, 'steps': 4025, 'loss/train': 2.155595541000366} +03/03/2022 18:08:41 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/03/2022 18:08:46 - INFO - codeparrot_training - Step 4026: {'lr': 0.0004997688476713848, 'samples': 2061824, 'steps': 4026, 'loss/train': 2.260622024536133} +03/03/2022 18:08:49 - INFO - codeparrot_training - Step 4027: {'lr': 0.0004997686194643724, 'samples': 2062336, 'steps': 4027, 'loss/train': 2.6317644119262695} +03/03/2022 18:08:49 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/03/2022 18:08:54 - INFO - codeparrot_training - Step 4028: {'lr': 0.0004997683911448181, 'samples': 2062848, 'steps': 4028, 'loss/train': 2.65738582611084} +03/03/2022 18:08:57 - INFO - codeparrot_training - Step 4029: {'lr': 0.000499768162712722, 'samples': 2063360, 'steps': 4029, 'loss/train': 2.319352865219116} +03/03/2022 18:08:58 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/03/2022 18:09:03 - INFO - codeparrot_training - Step 4030: {'lr': 0.0004997679341680843, 'samples': 2063872, 'steps': 4030, 'loss/train': 2.180222272872925} +03/03/2022 18:09:06 - INFO - codeparrot_training - Step 4031: {'lr': 0.0004997677055109049, 'samples': 2064384, 'steps': 4031, 'loss/train': 3.0376524925231934} +03/03/2022 18:09:06 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/03/2022 18:09:11 - INFO - codeparrot_training - Step 4032: {'lr': 0.0004997674767411841, 'samples': 2064896, 'steps': 4032, 'loss/train': 2.1492767333984375} +03/03/2022 18:09:14 - INFO - codeparrot_training - Step 4033: {'lr': 0.0004997672478589219, 'samples': 2065408, 'steps': 4033, 'loss/train': 2.4172158241271973} +03/03/2022 18:09:14 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/03/2022 18:09:20 - INFO - codeparrot_training - Step 4034: {'lr': 0.0004997670188641183, 'samples': 2065920, 'steps': 4034, 'loss/train': 2.6885666847229004} +03/03/2022 18:09:23 - INFO - codeparrot_training - Step 4035: {'lr': 0.0004997667897567738, 'samples': 2066432, 'steps': 4035, 'loss/train': 3.269563913345337} +03/03/2022 18:09:23 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/03/2022 18:09:28 - INFO - codeparrot_training - Step 4036: {'lr': 0.0004997665605368881, 'samples': 2066944, 'steps': 4036, 'loss/train': 2.86141037940979} +03/03/2022 18:09:31 - INFO - codeparrot_training - Step 4037: {'lr': 0.0004997663312044614, 'samples': 2067456, 'steps': 4037, 'loss/train': 2.443502187728882} +03/03/2022 18:09:31 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/03/2022 18:09:36 - INFO - codeparrot_training - Step 4038: {'lr': 0.0004997661017594939, 'samples': 2067968, 'steps': 4038, 'loss/train': 2.5220706462860107} +03/03/2022 18:09:40 - INFO - codeparrot_training - Step 4039: {'lr': 0.0004997658722019857, 'samples': 2068480, 'steps': 4039, 'loss/train': 2.915877103805542} +03/03/2022 18:09:40 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/03/2022 18:09:45 - INFO - codeparrot_training - Step 4040: {'lr': 0.0004997656425319367, 'samples': 2068992, 'steps': 4040, 'loss/train': 2.5245325565338135} +03/03/2022 18:09:48 - INFO - codeparrot_training - Step 4041: {'lr': 0.0004997654127493473, 'samples': 2069504, 'steps': 4041, 'loss/train': 2.562852382659912} +03/03/2022 18:09:48 - INFO - codeparrot_training - Skipping example with length 934 (seq_length=1024) +03/03/2022 18:09:53 - INFO - codeparrot_training - Step 4042: {'lr': 0.0004997651828542173, 'samples': 2070016, 'steps': 4042, 'loss/train': 2.7951602935791016} +03/03/2022 18:09:56 - INFO - codeparrot_training - Step 4043: {'lr': 0.0004997649528465471, 'samples': 2070528, 'steps': 4043, 'loss/train': 2.896421432495117} +03/03/2022 18:09:56 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/03/2022 18:10:02 - INFO - codeparrot_training - Step 4044: {'lr': 0.0004997647227263367, 'samples': 2071040, 'steps': 4044, 'loss/train': 2.4040536880493164} +03/03/2022 18:10:05 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 18:10:07 - INFO - codeparrot_training - Step 4045: {'lr': 0.000499764492493586, 'samples': 2071552, 'steps': 4045, 'loss/train': 2.78424334526062} +03/03/2022 18:10:10 - INFO - codeparrot_training - Step 4046: {'lr': 0.0004997642621482955, 'samples': 2072064, 'steps': 4046, 'loss/train': 1.5856965780258179} +03/03/2022 18:10:13 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/03/2022 18:10:15 - INFO - codeparrot_training - Step 4047: {'lr': 0.0004997640316904649, 'samples': 2072576, 'steps': 4047, 'loss/train': 2.806013584136963} +03/03/2022 18:10:19 - INFO - codeparrot_training - Step 4048: {'lr': 0.0004997638011200946, 'samples': 2073088, 'steps': 4048, 'loss/train': 1.2127141952514648} +03/03/2022 18:10:21 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/03/2022 18:10:24 - INFO - codeparrot_training - Step 4049: {'lr': 0.0004997635704371844, 'samples': 2073600, 'steps': 4049, 'loss/train': 3.2219486236572266} +03/03/2022 18:10:27 - INFO - codeparrot_training - Step 4050: {'lr': 0.0004997633396417348, 'samples': 2074112, 'steps': 4050, 'loss/train': 3.620973825454712} +03/03/2022 18:10:30 - INFO - codeparrot_training - Step 4051: {'lr': 0.0004997631087337456, 'samples': 2074624, 'steps': 4051, 'loss/train': 2.690335988998413} +03/03/2022 18:10:31 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/03/2022 18:10:36 - INFO - codeparrot_training - Step 4052: {'lr': 0.000499762877713217, 'samples': 2075136, 'steps': 4052, 'loss/train': 2.6117894649505615} +03/03/2022 18:10:39 - INFO - codeparrot_training - Step 4053: {'lr': 0.0004997626465801492, 'samples': 2075648, 'steps': 4053, 'loss/train': 2.7060298919677734} +03/03/2022 18:10:39 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/03/2022 18:10:44 - INFO - codeparrot_training - Step 4054: {'lr': 0.000499762415334542, 'samples': 2076160, 'steps': 4054, 'loss/train': 1.8638306856155396} +03/03/2022 18:10:47 - INFO - codeparrot_training - Step 4055: {'lr': 0.0004997621839763958, 'samples': 2076672, 'steps': 4055, 'loss/train': 2.9767332077026367} +03/03/2022 18:10:48 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/03/2022 18:10:52 - INFO - codeparrot_training - Step 4056: {'lr': 0.0004997619525057106, 'samples': 2077184, 'steps': 4056, 'loss/train': 2.5807266235351562} +03/03/2022 18:10:56 - INFO - codeparrot_training - Step 4057: {'lr': 0.0004997617209224866, 'samples': 2077696, 'steps': 4057, 'loss/train': 2.995727062225342} +03/03/2022 18:10:56 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/03/2022 18:11:01 - INFO - codeparrot_training - Step 4058: {'lr': 0.0004997614892267238, 'samples': 2078208, 'steps': 4058, 'loss/train': 2.310736656188965} +03/03/2022 18:11:04 - INFO - codeparrot_training - Step 4059: {'lr': 0.0004997612574184223, 'samples': 2078720, 'steps': 4059, 'loss/train': 2.6918468475341797} +03/03/2022 18:11:05 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/03/2022 18:11:09 - INFO - codeparrot_training - Step 4060: {'lr': 0.0004997610254975823, 'samples': 2079232, 'steps': 4060, 'loss/train': 3.2785415649414062} +03/03/2022 18:11:12 - INFO - codeparrot_training - Step 4061: {'lr': 0.0004997607934642038, 'samples': 2079744, 'steps': 4061, 'loss/train': 2.231865406036377} +03/03/2022 18:11:13 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/03/2022 18:11:18 - INFO - codeparrot_training - Step 4062: {'lr': 0.0004997605613182868, 'samples': 2080256, 'steps': 4062, 'loss/train': 2.827054977416992} +03/03/2022 18:11:21 - INFO - codeparrot_training - Step 4063: {'lr': 0.0004997603290598317, 'samples': 2080768, 'steps': 4063, 'loss/train': 2.4032063484191895} +03/03/2022 18:11:22 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/03/2022 18:11:26 - INFO - codeparrot_training - Step 4064: {'lr': 0.0004997600966888384, 'samples': 2081280, 'steps': 4064, 'loss/train': 2.884215831756592} +03/03/2022 18:11:29 - INFO - codeparrot_training - Step 4065: {'lr': 0.000499759864205307, 'samples': 2081792, 'steps': 4065, 'loss/train': 2.158616781234741} +03/03/2022 18:11:30 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 18:11:35 - INFO - codeparrot_training - Step 4066: {'lr': 0.0004997596316092378, 'samples': 2082304, 'steps': 4066, 'loss/train': 2.028759241104126} +03/03/2022 18:11:38 - INFO - codeparrot_training - Step 4067: {'lr': 0.0004997593989006306, 'samples': 2082816, 'steps': 4067, 'loss/train': 2.340603828430176} +03/03/2022 18:11:38 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 18:11:43 - INFO - codeparrot_training - Step 4068: {'lr': 0.0004997591660794858, 'samples': 2083328, 'steps': 4068, 'loss/train': 2.3579812049865723} +03/03/2022 18:11:46 - INFO - codeparrot_training - Step 4069: {'lr': 0.0004997589331458034, 'samples': 2083840, 'steps': 4069, 'loss/train': 2.315962553024292} +03/03/2022 18:11:46 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/03/2022 18:11:52 - INFO - codeparrot_training - Step 4070: {'lr': 0.0004997587000995833, 'samples': 2084352, 'steps': 4070, 'loss/train': 2.3112103939056396} +03/03/2022 18:11:55 - INFO - codeparrot_training - Step 4071: {'lr': 0.000499758466940826, 'samples': 2084864, 'steps': 4071, 'loss/train': 3.152911901473999} +03/03/2022 18:11:55 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 18:12:00 - INFO - codeparrot_training - Step 4072: {'lr': 0.0004997582336695312, 'samples': 2085376, 'steps': 4072, 'loss/train': 2.1941304206848145} +03/03/2022 18:12:03 - INFO - codeparrot_training - Step 4073: {'lr': 0.0004997580002856993, 'samples': 2085888, 'steps': 4073, 'loss/train': 2.8319485187530518} +03/03/2022 18:12:03 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/03/2022 18:12:08 - INFO - codeparrot_training - Step 4074: {'lr': 0.0004997577667893303, 'samples': 2086400, 'steps': 4074, 'loss/train': 2.8888895511627197} +03/03/2022 18:12:11 - INFO - codeparrot_training - Step 4075: {'lr': 0.0004997575331804243, 'samples': 2086912, 'steps': 4075, 'loss/train': 2.258307695388794} +03/03/2022 18:12:12 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 18:12:17 - INFO - codeparrot_training - Step 4076: {'lr': 0.0004997572994589812, 'samples': 2087424, 'steps': 4076, 'loss/train': 2.183765172958374} +03/03/2022 18:12:20 - INFO - codeparrot_training - Step 4077: {'lr': 0.0004997570656250016, 'samples': 2087936, 'steps': 4077, 'loss/train': 3.7469561100006104} +03/03/2022 18:12:20 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/03/2022 18:12:25 - INFO - codeparrot_training - Step 4078: {'lr': 0.0004997568316784852, 'samples': 2088448, 'steps': 4078, 'loss/train': 2.8327484130859375} +03/03/2022 18:12:28 - INFO - codeparrot_training - Step 4079: {'lr': 0.0004997565976194323, 'samples': 2088960, 'steps': 4079, 'loss/train': 2.171255111694336} +03/03/2022 18:12:29 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/03/2022 18:12:34 - INFO - codeparrot_training - Step 4080: {'lr': 0.0004997563634478429, 'samples': 2089472, 'steps': 4080, 'loss/train': 1.2639844417572021} +03/03/2022 18:12:37 - INFO - codeparrot_training - Step 4081: {'lr': 0.000499756129163717, 'samples': 2089984, 'steps': 4081, 'loss/train': 3.2736730575561523} +03/03/2022 18:12:37 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/03/2022 18:12:42 - INFO - codeparrot_training - Step 4082: {'lr': 0.000499755894767055, 'samples': 2090496, 'steps': 4082, 'loss/train': 2.5604846477508545} +03/03/2022 18:12:45 - INFO - codeparrot_training - Step 4083: {'lr': 0.0004997556602578568, 'samples': 2091008, 'steps': 4083, 'loss/train': 2.595517873764038} +03/03/2022 18:12:46 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 18:12:50 - INFO - codeparrot_training - Step 4084: {'lr': 0.0004997554256361225, 'samples': 2091520, 'steps': 4084, 'loss/train': 2.2262682914733887} +03/03/2022 18:12:54 - INFO - codeparrot_training - Step 4085: {'lr': 0.0004997551909018524, 'samples': 2092032, 'steps': 4085, 'loss/train': 1.6804790496826172} +03/03/2022 18:12:54 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/03/2022 18:12:59 - INFO - codeparrot_training - Step 4086: {'lr': 0.0004997549560550464, 'samples': 2092544, 'steps': 4086, 'loss/train': 2.4673213958740234} +03/03/2022 18:13:02 - INFO - codeparrot_training - Step 4087: {'lr': 0.0004997547210957047, 'samples': 2093056, 'steps': 4087, 'loss/train': 0.6997495889663696} +03/03/2022 18:13:02 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/03/2022 18:13:07 - INFO - codeparrot_training - Step 4088: {'lr': 0.0004997544860238272, 'samples': 2093568, 'steps': 4088, 'loss/train': 1.7764511108398438} +03/03/2022 18:13:10 - INFO - codeparrot_training - Step 4089: {'lr': 0.0004997542508394144, 'samples': 2094080, 'steps': 4089, 'loss/train': 1.9796948432922363} +03/03/2022 18:13:10 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/03/2022 18:13:16 - INFO - codeparrot_training - Step 4090: {'lr': 0.000499754015542466, 'samples': 2094592, 'steps': 4090, 'loss/train': 3.0019431114196777} +03/03/2022 18:13:19 - INFO - codeparrot_training - Step 4091: {'lr': 0.0004997537801329824, 'samples': 2095104, 'steps': 4091, 'loss/train': 1.7092125415802002} +03/03/2022 18:13:19 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/03/2022 18:13:24 - INFO - codeparrot_training - Step 4092: {'lr': 0.0004997535446109637, 'samples': 2095616, 'steps': 4092, 'loss/train': 2.9902400970458984} +03/03/2022 18:13:27 - INFO - codeparrot_training - Step 4093: {'lr': 0.0004997533089764097, 'samples': 2096128, 'steps': 4093, 'loss/train': 2.423766613006592} +03/03/2022 18:13:28 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/03/2022 18:13:33 - INFO - codeparrot_training - Step 4094: {'lr': 0.0004997530732293209, 'samples': 2096640, 'steps': 4094, 'loss/train': 1.3834996223449707} +03/03/2022 18:13:36 - INFO - codeparrot_training - Step 4095: {'lr': 0.000499752837369697, 'samples': 2097152, 'steps': 4095, 'loss/train': 2.4921278953552246} +03/03/2022 18:13:37 - INFO - codeparrot_training - Skipping example with length 660 (seq_length=1024) +03/03/2022 18:13:41 - INFO - codeparrot_training - Step 4096: {'lr': 0.0004997526013975385, 'samples': 2097664, 'steps': 4096, 'loss/train': 2.3582775592803955} +03/03/2022 18:13:44 - INFO - codeparrot_training - Step 4097: {'lr': 0.0004997523653128453, 'samples': 2098176, 'steps': 4097, 'loss/train': 2.5054030418395996} +03/03/2022 18:13:45 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/03/2022 18:13:49 - INFO - codeparrot_training - Step 4098: {'lr': 0.0004997521291156175, 'samples': 2098688, 'steps': 4098, 'loss/train': 2.687840700149536} +03/03/2022 18:13:53 - INFO - codeparrot_training - Step 4099: {'lr': 0.0004997518928058553, 'samples': 2099200, 'steps': 4099, 'loss/train': 2.318593978881836} +03/03/2022 18:13:54 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/03/2022 18:13:58 - INFO - codeparrot_training - Step 4100: {'lr': 0.0004997516563835587, 'samples': 2099712, 'steps': 4100, 'loss/train': 3.0710105895996094} +03/03/2022 18:14:01 - INFO - codeparrot_training - Step 4101: {'lr': 0.0004997514198487279, 'samples': 2100224, 'steps': 4101, 'loss/train': 2.9276015758514404} +03/03/2022 18:14:02 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 18:14:06 - INFO - codeparrot_training - Step 4102: {'lr': 0.0004997511832013629, 'samples': 2100736, 'steps': 4102, 'loss/train': 2.666696548461914} +03/03/2022 18:14:09 - INFO - codeparrot_training - Step 4103: {'lr': 0.0004997509464414639, 'samples': 2101248, 'steps': 4103, 'loss/train': 3.1703107357025146} +03/03/2022 18:14:10 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/03/2022 18:14:15 - INFO - codeparrot_training - Step 4104: {'lr': 0.000499750709569031, 'samples': 2101760, 'steps': 4104, 'loss/train': 0.9635540843009949} +03/03/2022 18:14:18 - INFO - codeparrot_training - Step 4105: {'lr': 0.0004997504725840644, 'samples': 2102272, 'steps': 4105, 'loss/train': 1.584394097328186} +03/03/2022 18:14:20 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/03/2022 18:14:23 - INFO - codeparrot_training - Step 4106: {'lr': 0.0004997502354865639, 'samples': 2102784, 'steps': 4106, 'loss/train': 2.15683913230896} +03/03/2022 18:14:26 - INFO - codeparrot_training - Step 4107: {'lr': 0.0004997499982765299, 'samples': 2103296, 'steps': 4107, 'loss/train': 2.5185112953186035} +03/03/2022 18:14:28 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 18:14:32 - INFO - codeparrot_training - Step 4108: {'lr': 0.0004997497609539623, 'samples': 2103808, 'steps': 4108, 'loss/train': 2.416644811630249} +03/03/2022 18:14:35 - INFO - codeparrot_training - Step 4109: {'lr': 0.0004997495235188614, 'samples': 2104320, 'steps': 4109, 'loss/train': 2.281923770904541} +03/03/2022 18:14:36 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/03/2022 18:14:40 - INFO - codeparrot_training - Step 4110: {'lr': 0.0004997492859712272, 'samples': 2104832, 'steps': 4110, 'loss/train': 1.5101902484893799} +03/03/2022 18:14:43 - INFO - codeparrot_training - Step 4111: {'lr': 0.0004997490483110599, 'samples': 2105344, 'steps': 4111, 'loss/train': 0.526434600353241} +03/03/2022 18:14:44 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/03/2022 18:14:48 - INFO - codeparrot_training - Step 4112: {'lr': 0.0004997488105383594, 'samples': 2105856, 'steps': 4112, 'loss/train': 1.918593168258667} +03/03/2022 18:14:52 - INFO - codeparrot_training - Step 4113: {'lr': 0.000499748572653126, 'samples': 2106368, 'steps': 4113, 'loss/train': 2.5217132568359375} +03/03/2022 18:14:53 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 18:14:57 - INFO - codeparrot_training - Step 4114: {'lr': 0.0004997483346553597, 'samples': 2106880, 'steps': 4114, 'loss/train': 2.4129765033721924} +03/03/2022 18:15:00 - INFO - codeparrot_training - Step 4115: {'lr': 0.0004997480965450607, 'samples': 2107392, 'steps': 4115, 'loss/train': 2.6270291805267334} +03/03/2022 18:15:01 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/03/2022 18:15:05 - INFO - codeparrot_training - Step 4116: {'lr': 0.0004997478583222291, 'samples': 2107904, 'steps': 4116, 'loss/train': 1.7648855447769165} +03/03/2022 18:15:08 - INFO - codeparrot_training - Step 4117: {'lr': 0.0004997476199868649, 'samples': 2108416, 'steps': 4117, 'loss/train': 1.9991642236709595} +03/03/2022 18:15:09 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/03/2022 18:15:14 - INFO - codeparrot_training - Step 4118: {'lr': 0.0004997473815389683, 'samples': 2108928, 'steps': 4118, 'loss/train': 2.1075313091278076} +03/03/2022 18:15:17 - INFO - codeparrot_training - Step 4119: {'lr': 0.0004997471429785394, 'samples': 2109440, 'steps': 4119, 'loss/train': 1.0505720376968384} +03/03/2022 18:15:18 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/03/2022 18:15:22 - INFO - codeparrot_training - Step 4120: {'lr': 0.0004997469043055784, 'samples': 2109952, 'steps': 4120, 'loss/train': 2.572298049926758} +03/03/2022 18:15:26 - INFO - codeparrot_training - Step 4121: {'lr': 0.000499746665520085, 'samples': 2110464, 'steps': 4121, 'loss/train': 2.1575143337249756} +03/03/2022 18:15:27 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/03/2022 18:15:31 - INFO - codeparrot_training - Step 4122: {'lr': 0.0004997464266220599, 'samples': 2110976, 'steps': 4122, 'loss/train': 2.810858964920044} +03/03/2022 18:15:34 - INFO - codeparrot_training - Step 4123: {'lr': 0.0004997461876115029, 'samples': 2111488, 'steps': 4123, 'loss/train': 2.393360137939453} +03/03/2022 18:15:35 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/03/2022 18:15:39 - INFO - codeparrot_training - Step 4124: {'lr': 0.0004997459484884139, 'samples': 2112000, 'steps': 4124, 'loss/train': 2.3642711639404297} +03/03/2022 18:15:42 - INFO - codeparrot_training - Step 4125: {'lr': 0.0004997457092527934, 'samples': 2112512, 'steps': 4125, 'loss/train': 1.894789695739746} +03/03/2022 18:15:43 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/03/2022 18:15:48 - INFO - codeparrot_training - Step 4126: {'lr': 0.0004997454699046412, 'samples': 2113024, 'steps': 4126, 'loss/train': 3.0609028339385986} +03/03/2022 18:15:51 - INFO - codeparrot_training - Step 4127: {'lr': 0.0004997452304439577, 'samples': 2113536, 'steps': 4127, 'loss/train': 2.4769256114959717} +03/03/2022 18:15:53 - INFO - codeparrot_training - Skipping example with length 138 (seq_length=1024) +03/03/2022 18:15:56 - INFO - codeparrot_training - Step 4128: {'lr': 0.0004997449908707428, 'samples': 2114048, 'steps': 4128, 'loss/train': 2.600919246673584} +03/03/2022 18:15:59 - INFO - codeparrot_training - Step 4129: {'lr': 0.0004997447511849966, 'samples': 2114560, 'steps': 4129, 'loss/train': 2.6774415969848633} +03/03/2022 18:16:01 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/03/2022 18:16:05 - INFO - codeparrot_training - Step 4130: {'lr': 0.0004997445113867193, 'samples': 2115072, 'steps': 4130, 'loss/train': 3.0764505863189697} +03/03/2022 18:16:08 - INFO - codeparrot_training - Step 4131: {'lr': 0.000499744271475911, 'samples': 2115584, 'steps': 4131, 'loss/train': 1.4300533533096313} +03/03/2022 18:16:09 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/03/2022 18:16:13 - INFO - codeparrot_training - Step 4132: {'lr': 0.0004997440314525718, 'samples': 2116096, 'steps': 4132, 'loss/train': 3.59360408782959} +03/03/2022 18:16:16 - INFO - codeparrot_training - Step 4133: {'lr': 0.0004997437913167018, 'samples': 2116608, 'steps': 4133, 'loss/train': 1.883839726448059} +03/03/2022 18:16:18 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/03/2022 18:16:22 - INFO - codeparrot_training - Step 4134: {'lr': 0.0004997435510683011, 'samples': 2117120, 'steps': 4134, 'loss/train': 2.075192451477051} +03/03/2022 18:16:25 - INFO - codeparrot_training - Step 4135: {'lr': 0.0004997433107073697, 'samples': 2117632, 'steps': 4135, 'loss/train': 2.5948169231414795} +03/03/2022 18:16:26 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/03/2022 18:16:30 - INFO - codeparrot_training - Step 4136: {'lr': 0.000499743070233908, 'samples': 2118144, 'steps': 4136, 'loss/train': 2.7506139278411865} +03/03/2022 18:16:33 - INFO - codeparrot_training - Step 4137: {'lr': 0.0004997428296479158, 'samples': 2118656, 'steps': 4137, 'loss/train': 2.5044801235198975} +03/03/2022 18:16:35 - INFO - codeparrot_training - Skipping example with length 756 (seq_length=1024) +03/03/2022 18:16:39 - INFO - codeparrot_training - Step 4138: {'lr': 0.0004997425889493933, 'samples': 2119168, 'steps': 4138, 'loss/train': 2.9602603912353516} +03/03/2022 18:16:42 - INFO - codeparrot_training - Step 4139: {'lr': 0.0004997423481383407, 'samples': 2119680, 'steps': 4139, 'loss/train': 3.964172124862671} +03/03/2022 18:16:43 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/03/2022 18:16:47 - INFO - codeparrot_training - Step 4140: {'lr': 0.0004997421072147581, 'samples': 2120192, 'steps': 4140, 'loss/train': 2.4455370903015137} +03/03/2022 18:16:50 - INFO - codeparrot_training - Step 4141: {'lr': 0.0004997418661786455, 'samples': 2120704, 'steps': 4141, 'loss/train': 1.569913387298584} +03/03/2022 18:16:52 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/03/2022 18:16:56 - INFO - codeparrot_training - Step 4142: {'lr': 0.0004997416250300031, 'samples': 2121216, 'steps': 4142, 'loss/train': 1.0883207321166992} +03/03/2022 18:16:59 - INFO - codeparrot_training - Step 4143: {'lr': 0.0004997413837688309, 'samples': 2121728, 'steps': 4143, 'loss/train': 2.9094440937042236} +03/03/2022 18:17:00 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/03/2022 18:17:04 - INFO - codeparrot_training - Step 4144: {'lr': 0.0004997411423951292, 'samples': 2122240, 'steps': 4144, 'loss/train': 2.9646050930023193} +03/03/2022 18:17:07 - INFO - codeparrot_training - Step 4145: {'lr': 0.0004997409009088979, 'samples': 2122752, 'steps': 4145, 'loss/train': 2.3445956707000732} +03/03/2022 18:17:09 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/03/2022 18:17:13 - INFO - codeparrot_training - Step 4146: {'lr': 0.0004997406593101373, 'samples': 2123264, 'steps': 4146, 'loss/train': 1.9361507892608643} +03/03/2022 18:17:16 - INFO - codeparrot_training - Step 4147: {'lr': 0.0004997404175988474, 'samples': 2123776, 'steps': 4147, 'loss/train': 2.9918205738067627} +03/03/2022 18:17:17 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/03/2022 18:17:21 - INFO - codeparrot_training - Step 4148: {'lr': 0.0004997401757750282, 'samples': 2124288, 'steps': 4148, 'loss/train': 2.99582839012146} +03/03/2022 18:17:24 - INFO - codeparrot_training - Step 4149: {'lr': 0.00049973993383868, 'samples': 2124800, 'steps': 4149, 'loss/train': 3.0628974437713623} +03/03/2022 18:17:26 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/03/2022 18:17:30 - INFO - codeparrot_training - Step 4150: {'lr': 0.0004997396917898029, 'samples': 2125312, 'steps': 4150, 'loss/train': 3.2338919639587402} +03/03/2022 18:17:33 - INFO - codeparrot_training - Step 4151: {'lr': 0.0004997394496283969, 'samples': 2125824, 'steps': 4151, 'loss/train': 2.5238869190216064} +03/03/2022 18:17:34 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/03/2022 18:17:38 - INFO - codeparrot_training - Step 4152: {'lr': 0.0004997392073544622, 'samples': 2126336, 'steps': 4152, 'loss/train': 3.1680476665496826} +03/03/2022 18:17:41 - INFO - codeparrot_training - Step 4153: {'lr': 0.0004997389649679987, 'samples': 2126848, 'steps': 4153, 'loss/train': 1.4233876466751099} +03/03/2022 18:17:42 - INFO - codeparrot_training - Skipping example with length 107 (seq_length=1024) +03/03/2022 18:17:46 - INFO - codeparrot_training - Step 4154: {'lr': 0.0004997387224690068, 'samples': 2127360, 'steps': 4154, 'loss/train': 2.5396082401275635} +03/03/2022 18:17:49 - INFO - codeparrot_training - Step 4155: {'lr': 0.0004997384798574865, 'samples': 2127872, 'steps': 4155, 'loss/train': 2.4975132942199707} +03/03/2022 18:17:50 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/03/2022 18:17:55 - INFO - codeparrot_training - Step 4156: {'lr': 0.0004997382371334379, 'samples': 2128384, 'steps': 4156, 'loss/train': 3.6286377906799316} +03/03/2022 18:17:58 - INFO - codeparrot_training - Step 4157: {'lr': 0.0004997379942968611, 'samples': 2128896, 'steps': 4157, 'loss/train': 2.523087501525879} +03/03/2022 18:17:59 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/03/2022 18:18:03 - INFO - codeparrot_training - Step 4158: {'lr': 0.0004997377513477562, 'samples': 2129408, 'steps': 4158, 'loss/train': 3.208761215209961} +03/03/2022 18:18:06 - INFO - codeparrot_training - Step 4159: {'lr': 0.0004997375082861234, 'samples': 2129920, 'steps': 4159, 'loss/train': 0.6377323865890503} +03/03/2022 18:18:07 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 18:18:11 - INFO - codeparrot_training - Step 4160: {'lr': 0.0004997372651119626, 'samples': 2130432, 'steps': 4160, 'loss/train': 2.6335766315460205} +03/03/2022 18:18:15 - INFO - codeparrot_training - Step 4161: {'lr': 0.0004997370218252741, 'samples': 2130944, 'steps': 4161, 'loss/train': 2.5048093795776367} +03/03/2022 18:18:15 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/03/2022 18:18:20 - INFO - codeparrot_training - Step 4162: {'lr': 0.000499736778426058, 'samples': 2131456, 'steps': 4162, 'loss/train': 2.977962017059326} +03/03/2022 18:18:23 - INFO - codeparrot_training - Step 4163: {'lr': 0.0004997365349143142, 'samples': 2131968, 'steps': 4163, 'loss/train': 0.8089989423751831} +03/03/2022 18:18:24 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 18:18:29 - INFO - codeparrot_training - Step 4164: {'lr': 0.0004997362912900432, 'samples': 2132480, 'steps': 4164, 'loss/train': 0.4729238450527191} +03/03/2022 18:18:32 - INFO - codeparrot_training - Step 4165: {'lr': 0.0004997360475532447, 'samples': 2132992, 'steps': 4165, 'loss/train': 2.9363718032836914} +03/03/2022 18:18:32 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/03/2022 18:18:37 - INFO - codeparrot_training - Step 4166: {'lr': 0.000499735803703919, 'samples': 2133504, 'steps': 4166, 'loss/train': 1.6260018348693848} +03/03/2022 18:18:40 - INFO - codeparrot_training - Step 4167: {'lr': 0.0004997355597420663, 'samples': 2134016, 'steps': 4167, 'loss/train': 2.8573882579803467} +03/03/2022 18:18:41 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/03/2022 18:18:45 - INFO - codeparrot_training - Step 4168: {'lr': 0.0004997353156676866, 'samples': 2134528, 'steps': 4168, 'loss/train': 2.9690592288970947} +03/03/2022 18:18:48 - INFO - codeparrot_training - Step 4169: {'lr': 0.0004997350714807799, 'samples': 2135040, 'steps': 4169, 'loss/train': 2.972532272338867} +03/03/2022 18:18:49 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/03/2022 18:18:54 - INFO - codeparrot_training - Step 4170: {'lr': 0.0004997348271813466, 'samples': 2135552, 'steps': 4170, 'loss/train': 3.199317455291748} +03/03/2022 18:18:57 - INFO - codeparrot_training - Step 4171: {'lr': 0.0004997345827693865, 'samples': 2136064, 'steps': 4171, 'loss/train': 2.829935073852539} +03/03/2022 18:18:57 - INFO - codeparrot_training - Skipping example with length 260 (seq_length=1024) +03/03/2022 18:19:02 - INFO - codeparrot_training - Step 4172: {'lr': 0.0004997343382448999, 'samples': 2136576, 'steps': 4172, 'loss/train': 2.5150444507598877} +03/03/2022 18:19:05 - INFO - codeparrot_training - Step 4173: {'lr': 0.0004997340936078869, 'samples': 2137088, 'steps': 4173, 'loss/train': 2.6708381175994873} +03/03/2022 18:19:05 - INFO - codeparrot_training - Skipping example with length 988 (seq_length=1024) +03/03/2022 18:19:10 - INFO - codeparrot_training - Step 4174: {'lr': 0.0004997338488583475, 'samples': 2137600, 'steps': 4174, 'loss/train': 1.084150791168213} +03/03/2022 18:19:14 - INFO - codeparrot_training - Step 4175: {'lr': 0.000499733603996282, 'samples': 2138112, 'steps': 4175, 'loss/train': 3.284475326538086} +03/03/2022 18:19:14 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/03/2022 18:19:19 - INFO - codeparrot_training - Step 4176: {'lr': 0.0004997333590216902, 'samples': 2138624, 'steps': 4176, 'loss/train': 2.739633798599243} +03/03/2022 18:19:22 - INFO - codeparrot_training - Step 4177: {'lr': 0.0004997331139345725, 'samples': 2139136, 'steps': 4177, 'loss/train': 3.0070743560791016} +03/03/2022 18:19:22 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/03/2022 18:19:27 - INFO - codeparrot_training - Step 4178: {'lr': 0.000499732868734929, 'samples': 2139648, 'steps': 4178, 'loss/train': 1.8760261535644531} +03/03/2022 18:19:30 - INFO - codeparrot_training - Step 4179: {'lr': 0.0004997326234227596, 'samples': 2140160, 'steps': 4179, 'loss/train': 1.5933669805526733} +03/03/2022 18:19:31 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 18:19:36 - INFO - codeparrot_training - Step 4180: {'lr': 0.0004997323779980646, 'samples': 2140672, 'steps': 4180, 'loss/train': 2.30065655708313} +03/03/2022 18:19:39 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/03/2022 18:19:41 - INFO - codeparrot_training - Step 4181: {'lr': 0.0004997321324608441, 'samples': 2141184, 'steps': 4181, 'loss/train': 2.3813674449920654} +03/03/2022 18:19:44 - INFO - codeparrot_training - Step 4182: {'lr': 0.0004997318868110981, 'samples': 2141696, 'steps': 4182, 'loss/train': 1.6555471420288086} +03/03/2022 18:19:48 - INFO - codeparrot_training - Step 4183: {'lr': 0.0004997316410488267, 'samples': 2142208, 'steps': 4183, 'loss/train': 1.8753160238265991} +03/03/2022 18:19:48 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/03/2022 18:19:53 - INFO - codeparrot_training - Step 4184: {'lr': 0.0004997313951740301, 'samples': 2142720, 'steps': 4184, 'loss/train': 2.5217244625091553} +03/03/2022 18:19:56 - INFO - codeparrot_training - Step 4185: {'lr': 0.0004997311491867083, 'samples': 2143232, 'steps': 4185, 'loss/train': 1.9755817651748657} +03/03/2022 18:19:56 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/03/2022 18:20:01 - INFO - codeparrot_training - Step 4186: {'lr': 0.0004997309030868617, 'samples': 2143744, 'steps': 4186, 'loss/train': 2.2549116611480713} +03/03/2022 18:20:05 - INFO - codeparrot_training - Step 4187: {'lr': 0.0004997306568744901, 'samples': 2144256, 'steps': 4187, 'loss/train': 0.9781758189201355} +03/03/2022 18:20:05 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/03/2022 18:20:10 - INFO - codeparrot_training - Step 4188: {'lr': 0.0004997304105495938, 'samples': 2144768, 'steps': 4188, 'loss/train': 3.3931400775909424} +03/03/2022 18:20:13 - INFO - codeparrot_training - Step 4189: {'lr': 0.0004997301641121727, 'samples': 2145280, 'steps': 4189, 'loss/train': 1.42490816116333} +03/03/2022 18:20:13 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/03/2022 18:20:18 - INFO - codeparrot_training - Step 4190: {'lr': 0.0004997299175622271, 'samples': 2145792, 'steps': 4190, 'loss/train': 1.298801064491272} +03/03/2022 18:20:21 - INFO - codeparrot_training - Step 4191: {'lr': 0.000499729670899757, 'samples': 2146304, 'steps': 4191, 'loss/train': 0.4252501428127289} +03/03/2022 18:20:22 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/03/2022 18:20:27 - INFO - codeparrot_training - Step 4192: {'lr': 0.0004997294241247627, 'samples': 2146816, 'steps': 4192, 'loss/train': 2.6930501461029053} +03/03/2022 18:20:30 - INFO - codeparrot_training - Step 4193: {'lr': 0.0004997291772372441, 'samples': 2147328, 'steps': 4193, 'loss/train': 2.5323684215545654} +03/03/2022 18:20:31 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/03/2022 18:20:35 - INFO - codeparrot_training - Step 4194: {'lr': 0.0004997289302372014, 'samples': 2147840, 'steps': 4194, 'loss/train': 2.235337257385254} +03/03/2022 18:20:38 - INFO - codeparrot_training - Step 4195: {'lr': 0.0004997286831246347, 'samples': 2148352, 'steps': 4195, 'loss/train': 1.4446816444396973} +03/03/2022 18:20:39 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/03/2022 18:20:44 - INFO - codeparrot_training - Step 4196: {'lr': 0.0004997284358995441, 'samples': 2148864, 'steps': 4196, 'loss/train': 2.681225299835205} +03/03/2022 18:20:47 - INFO - codeparrot_training - Step 4197: {'lr': 0.0004997281885619297, 'samples': 2149376, 'steps': 4197, 'loss/train': 3.0499494075775146} +03/03/2022 18:20:47 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/03/2022 18:20:52 - INFO - codeparrot_training - Step 4198: {'lr': 0.0004997279411117916, 'samples': 2149888, 'steps': 4198, 'loss/train': 2.9221158027648926} +03/03/2022 18:20:55 - INFO - codeparrot_training - Step 4199: {'lr': 0.00049972769354913, 'samples': 2150400, 'steps': 4199, 'loss/train': 2.7829251289367676} +03/03/2022 18:20:56 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/03/2022 18:21:00 - INFO - codeparrot_training - Step 4200: {'lr': 0.0004997274458739449, 'samples': 2150912, 'steps': 4200, 'loss/train': 2.388589382171631} +03/03/2022 18:21:04 - INFO - codeparrot_training - Step 4201: {'lr': 0.0004997271980862366, 'samples': 2151424, 'steps': 4201, 'loss/train': 2.827019214630127} +03/03/2022 18:21:04 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/03/2022 18:21:09 - INFO - codeparrot_training - Step 4202: {'lr': 0.000499726950186005, 'samples': 2151936, 'steps': 4202, 'loss/train': 1.8681377172470093} +03/03/2022 18:21:12 - INFO - codeparrot_training - Step 4203: {'lr': 0.0004997267021732502, 'samples': 2152448, 'steps': 4203, 'loss/train': 2.7481272220611572} +03/03/2022 18:21:13 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/03/2022 18:21:17 - INFO - codeparrot_training - Step 4204: {'lr': 0.0004997264540479724, 'samples': 2152960, 'steps': 4204, 'loss/train': 2.2508490085601807} +03/03/2022 18:21:21 - INFO - codeparrot_training - Step 4205: {'lr': 0.0004997262058101719, 'samples': 2153472, 'steps': 4205, 'loss/train': 2.915381908416748} +03/03/2022 18:21:22 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/03/2022 18:21:26 - INFO - codeparrot_training - Step 4206: {'lr': 0.0004997259574598485, 'samples': 2153984, 'steps': 4206, 'loss/train': 2.1661908626556396} +03/03/2022 18:21:29 - INFO - codeparrot_training - Step 4207: {'lr': 0.0004997257089970024, 'samples': 2154496, 'steps': 4207, 'loss/train': 2.371553421020508} +03/03/2022 18:21:30 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/03/2022 18:21:34 - INFO - codeparrot_training - Step 4208: {'lr': 0.0004997254604216338, 'samples': 2155008, 'steps': 4208, 'loss/train': 2.215244770050049} +03/03/2022 18:21:37 - INFO - codeparrot_training - Step 4209: {'lr': 0.0004997252117337428, 'samples': 2155520, 'steps': 4209, 'loss/train': 2.8784711360931396} +03/03/2022 18:21:38 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/03/2022 18:21:43 - INFO - codeparrot_training - Step 4210: {'lr': 0.0004997249629333294, 'samples': 2156032, 'steps': 4210, 'loss/train': 1.9061493873596191} +03/03/2022 18:21:46 - INFO - codeparrot_training - Step 4211: {'lr': 0.0004997247140203939, 'samples': 2156544, 'steps': 4211, 'loss/train': 2.4784083366394043} +03/03/2022 18:21:46 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/03/2022 18:21:51 - INFO - codeparrot_training - Step 4212: {'lr': 0.0004997244649949362, 'samples': 2157056, 'steps': 4212, 'loss/train': 2.107531785964966} +03/03/2022 18:21:54 - INFO - codeparrot_training - Step 4213: {'lr': 0.0004997242158569564, 'samples': 2157568, 'steps': 4213, 'loss/train': 3.0238053798675537} +03/03/2022 18:21:55 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/03/2022 18:21:59 - INFO - codeparrot_training - Step 4214: {'lr': 0.0004997239666064549, 'samples': 2158080, 'steps': 4214, 'loss/train': 2.926823377609253} +03/03/2022 18:22:03 - INFO - codeparrot_training - Step 4215: {'lr': 0.0004997237172434316, 'samples': 2158592, 'steps': 4215, 'loss/train': 2.6826536655426025} +03/03/2022 18:22:04 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/03/2022 18:22:08 - INFO - codeparrot_training - Step 4216: {'lr': 0.0004997234677678867, 'samples': 2159104, 'steps': 4216, 'loss/train': 1.9293338060379028} +03/03/2022 18:22:11 - INFO - codeparrot_training - Step 4217: {'lr': 0.0004997232181798201, 'samples': 2159616, 'steps': 4217, 'loss/train': 2.3123693466186523} +03/03/2022 18:22:12 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/03/2022 18:22:16 - INFO - codeparrot_training - Step 4218: {'lr': 0.0004997229684792322, 'samples': 2160128, 'steps': 4218, 'loss/train': 2.6831119060516357} +03/03/2022 18:22:19 - INFO - codeparrot_training - Step 4219: {'lr': 0.000499722718666123, 'samples': 2160640, 'steps': 4219, 'loss/train': 2.3265035152435303} +03/03/2022 18:22:20 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/03/2022 18:22:25 - INFO - codeparrot_training - Step 4220: {'lr': 0.0004997224687404926, 'samples': 2161152, 'steps': 4220, 'loss/train': 3.8408584594726562} +03/03/2022 18:22:28 - INFO - codeparrot_training - Step 4221: {'lr': 0.0004997222187023409, 'samples': 2161664, 'steps': 4221, 'loss/train': 2.347179651260376} +03/03/2022 18:22:28 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/03/2022 18:22:33 - INFO - codeparrot_training - Step 4222: {'lr': 0.0004997219685516684, 'samples': 2162176, 'steps': 4222, 'loss/train': 0.5493170619010925} +03/03/2022 18:22:36 - INFO - codeparrot_training - Step 4223: {'lr': 0.000499721718288475, 'samples': 2162688, 'steps': 4223, 'loss/train': 3.4518625736236572} +03/03/2022 18:22:36 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/03/2022 18:22:41 - INFO - codeparrot_training - Step 4224: {'lr': 0.0004997214679127609, 'samples': 2163200, 'steps': 4224, 'loss/train': 7.402772903442383} +03/03/2022 18:22:45 - INFO - codeparrot_training - Step 4225: {'lr': 0.000499721217424526, 'samples': 2163712, 'steps': 4225, 'loss/train': 1.6710703372955322} +03/03/2022 18:22:46 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/03/2022 18:22:50 - INFO - codeparrot_training - Step 4226: {'lr': 0.0004997209668237707, 'samples': 2164224, 'steps': 4226, 'loss/train': 2.7258856296539307} +03/03/2022 18:22:53 - INFO - codeparrot_training - Step 4227: {'lr': 0.0004997207161104951, 'samples': 2164736, 'steps': 4227, 'loss/train': 3.111036777496338} +03/03/2022 18:22:54 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/03/2022 18:22:58 - INFO - codeparrot_training - Step 4228: {'lr': 0.0004997204652846991, 'samples': 2165248, 'steps': 4228, 'loss/train': 2.2086572647094727} +03/03/2022 18:23:02 - INFO - codeparrot_training - Step 4229: {'lr': 0.0004997202143463828, 'samples': 2165760, 'steps': 4229, 'loss/train': 1.5683060884475708} +03/03/2022 18:23:02 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 18:23:07 - INFO - codeparrot_training - Step 4230: {'lr': 0.0004997199632955464, 'samples': 2166272, 'steps': 4230, 'loss/train': 1.8688592910766602} +03/03/2022 18:23:10 - INFO - codeparrot_training - Step 4231: {'lr': 0.0004997197121321903, 'samples': 2166784, 'steps': 4231, 'loss/train': 2.0556914806365967} +03/03/2022 18:23:10 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/03/2022 18:23:15 - INFO - codeparrot_training - Step 4232: {'lr': 0.0004997194608563142, 'samples': 2167296, 'steps': 4232, 'loss/train': 2.1599605083465576} +03/03/2022 18:23:18 - INFO - codeparrot_training - Step 4233: {'lr': 0.0004997192094679183, 'samples': 2167808, 'steps': 4233, 'loss/train': 7.5924601554870605} +03/03/2022 18:23:20 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/03/2022 18:23:24 - INFO - codeparrot_training - Step 4234: {'lr': 0.0004997189579670028, 'samples': 2168320, 'steps': 4234, 'loss/train': 1.3628270626068115} +03/03/2022 18:23:28 - INFO - codeparrot_training - Step 4235: {'lr': 0.0004997187063535679, 'samples': 2168832, 'steps': 4235, 'loss/train': 3.244776725769043} +03/03/2022 18:23:31 - INFO - codeparrot_training - Step 4236: {'lr': 0.0004997184546276135, 'samples': 2169344, 'steps': 4236, 'loss/train': 3.347801685333252} +03/03/2022 18:23:31 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 18:23:36 - INFO - codeparrot_training - Step 4237: {'lr': 0.0004997182027891399, 'samples': 2169856, 'steps': 4237, 'loss/train': 2.7080979347229004} +03/03/2022 18:23:39 - INFO - codeparrot_training - Step 4238: {'lr': 0.000499717950838147, 'samples': 2170368, 'steps': 4238, 'loss/train': 2.261035203933716} +03/03/2022 18:23:40 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 18:23:45 - INFO - codeparrot_training - Step 4239: {'lr': 0.0004997176987746352, 'samples': 2170880, 'steps': 4239, 'loss/train': 0.747605562210083} +03/03/2022 18:23:48 - INFO - codeparrot_training - Step 4240: {'lr': 0.0004997174465986043, 'samples': 2171392, 'steps': 4240, 'loss/train': 1.8044626712799072} +03/03/2022 18:23:49 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/03/2022 18:23:53 - INFO - codeparrot_training - Step 4241: {'lr': 0.0004997171943100547, 'samples': 2171904, 'steps': 4241, 'loss/train': 2.823612928390503} +03/03/2022 18:23:56 - INFO - codeparrot_training - Step 4242: {'lr': 0.0004997169419089863, 'samples': 2172416, 'steps': 4242, 'loss/train': 1.7056597471237183} +03/03/2022 18:23:57 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/03/2022 18:24:01 - INFO - codeparrot_training - Step 4243: {'lr': 0.0004997166893953994, 'samples': 2172928, 'steps': 4243, 'loss/train': 2.243332624435425} +03/03/2022 18:24:04 - INFO - codeparrot_training - Step 4244: {'lr': 0.000499716436769294, 'samples': 2173440, 'steps': 4244, 'loss/train': 2.3363187313079834} +03/03/2022 18:24:05 - INFO - codeparrot_training - Skipping example with length 750 (seq_length=1024) +03/03/2022 18:24:10 - INFO - codeparrot_training - Step 4245: {'lr': 0.0004997161840306701, 'samples': 2173952, 'steps': 4245, 'loss/train': 2.5776519775390625} +03/03/2022 18:24:13 - INFO - codeparrot_training - Step 4246: {'lr': 0.0004997159311795281, 'samples': 2174464, 'steps': 4246, 'loss/train': 2.3468308448791504} +03/03/2022 18:24:13 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/03/2022 18:24:18 - INFO - codeparrot_training - Step 4247: {'lr': 0.0004997156782158679, 'samples': 2174976, 'steps': 4247, 'loss/train': 2.2870025634765625} +03/03/2022 18:24:21 - INFO - codeparrot_training - Step 4248: {'lr': 0.0004997154251396896, 'samples': 2175488, 'steps': 4248, 'loss/train': 3.344219207763672} +03/03/2022 18:24:21 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/03/2022 18:24:27 - INFO - codeparrot_training - Step 4249: {'lr': 0.0004997151719509935, 'samples': 2176000, 'steps': 4249, 'loss/train': 1.333770990371704} +03/03/2022 18:24:30 - INFO - codeparrot_training - Step 4250: {'lr': 0.0004997149186497795, 'samples': 2176512, 'steps': 4250, 'loss/train': 2.816612958908081} +03/03/2022 18:24:30 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/03/2022 18:24:35 - INFO - codeparrot_training - Step 4251: {'lr': 0.0004997146652360478, 'samples': 2177024, 'steps': 4251, 'loss/train': 2.37959885597229} +03/03/2022 18:24:38 - INFO - codeparrot_training - Step 4252: {'lr': 0.0004997144117097986, 'samples': 2177536, 'steps': 4252, 'loss/train': 2.9103994369506836} +03/03/2022 18:24:39 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/03/2022 18:24:43 - INFO - codeparrot_training - Step 4253: {'lr': 0.0004997141580710318, 'samples': 2178048, 'steps': 4253, 'loss/train': 2.08974289894104} +03/03/2022 18:24:47 - INFO - codeparrot_training - Step 4254: {'lr': 0.0004997139043197478, 'samples': 2178560, 'steps': 4254, 'loss/train': 2.9145236015319824} +03/03/2022 18:24:47 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 18:24:52 - INFO - codeparrot_training - Step 4255: {'lr': 0.0004997136504559465, 'samples': 2179072, 'steps': 4255, 'loss/train': 2.6321043968200684} +03/03/2022 18:24:55 - INFO - codeparrot_training - Step 4256: {'lr': 0.0004997133964796281, 'samples': 2179584, 'steps': 4256, 'loss/train': 2.3172450065612793} +03/03/2022 18:24:55 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/03/2022 18:25:01 - INFO - codeparrot_training - Step 4257: {'lr': 0.0004997131423907927, 'samples': 2180096, 'steps': 4257, 'loss/train': 3.576275110244751} +03/03/2022 18:25:04 - INFO - codeparrot_training - Step 4258: {'lr': 0.0004997128881894404, 'samples': 2180608, 'steps': 4258, 'loss/train': 3.177367687225342} +03/03/2022 18:25:06 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/03/2022 18:25:09 - INFO - codeparrot_training - Step 4259: {'lr': 0.0004997126338755714, 'samples': 2181120, 'steps': 4259, 'loss/train': 3.177424669265747} +03/03/2022 18:25:13 - INFO - codeparrot_training - Step 4260: {'lr': 0.0004997123794491856, 'samples': 2181632, 'steps': 4260, 'loss/train': 1.8041192293167114} +03/03/2022 18:25:14 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 18:25:18 - INFO - codeparrot_training - Step 4261: {'lr': 0.0004997121249102834, 'samples': 2182144, 'steps': 4261, 'loss/train': 2.2417562007904053} +03/03/2022 18:25:21 - INFO - codeparrot_training - Step 4262: {'lr': 0.0004997118702588647, 'samples': 2182656, 'steps': 4262, 'loss/train': 2.5560426712036133} +03/03/2022 18:25:23 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 18:25:26 - INFO - codeparrot_training - Step 4263: {'lr': 0.0004997116154949297, 'samples': 2183168, 'steps': 4263, 'loss/train': 2.5086123943328857} +03/03/2022 18:25:29 - INFO - codeparrot_training - Step 4264: {'lr': 0.0004997113606184785, 'samples': 2183680, 'steps': 4264, 'loss/train': 3.7005512714385986} +03/03/2022 18:25:31 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/03/2022 18:25:35 - INFO - codeparrot_training - Step 4265: {'lr': 0.0004997111056295111, 'samples': 2184192, 'steps': 4265, 'loss/train': 2.2525722980499268} +03/03/2022 18:25:38 - INFO - codeparrot_training - Step 4266: {'lr': 0.0004997108505280279, 'samples': 2184704, 'steps': 4266, 'loss/train': 2.904784917831421} +03/03/2022 18:25:39 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/03/2022 18:25:43 - INFO - codeparrot_training - Step 4267: {'lr': 0.0004997105953140288, 'samples': 2185216, 'steps': 4267, 'loss/train': 2.262169361114502} +03/03/2022 18:25:46 - INFO - codeparrot_training - Step 4268: {'lr': 0.0004997103399875139, 'samples': 2185728, 'steps': 4268, 'loss/train': 2.8045146465301514} +03/03/2022 18:25:47 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/03/2022 18:25:51 - INFO - codeparrot_training - Step 4269: {'lr': 0.0004997100845484834, 'samples': 2186240, 'steps': 4269, 'loss/train': 2.2560219764709473} +03/03/2022 18:25:55 - INFO - codeparrot_training - Step 4270: {'lr': 0.0004997098289969374, 'samples': 2186752, 'steps': 4270, 'loss/train': 2.0662131309509277} +03/03/2022 18:25:56 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 18:26:00 - INFO - codeparrot_training - Step 4271: {'lr': 0.0004997095733328761, 'samples': 2187264, 'steps': 4271, 'loss/train': 1.531733751296997} +03/03/2022 18:26:03 - INFO - codeparrot_training - Step 4272: {'lr': 0.0004997093175562994, 'samples': 2187776, 'steps': 4272, 'loss/train': 2.7360663414001465} +03/03/2022 18:26:04 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/03/2022 18:26:08 - INFO - codeparrot_training - Step 4273: {'lr': 0.0004997090616672076, 'samples': 2188288, 'steps': 4273, 'loss/train': 2.1304633617401123} +03/03/2022 18:26:11 - INFO - codeparrot_training - Step 4274: {'lr': 0.0004997088056656006, 'samples': 2188800, 'steps': 4274, 'loss/train': 2.0623817443847656} +03/03/2022 18:26:12 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/03/2022 18:26:17 - INFO - codeparrot_training - Step 4275: {'lr': 0.0004997085495514788, 'samples': 2189312, 'steps': 4275, 'loss/train': 2.2538864612579346} +03/03/2022 18:26:20 - INFO - codeparrot_training - Step 4276: {'lr': 0.0004997082933248421, 'samples': 2189824, 'steps': 4276, 'loss/train': 3.050964832305908} +03/03/2022 18:26:21 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/03/2022 18:26:25 - INFO - codeparrot_training - Step 4277: {'lr': 0.0004997080369856907, 'samples': 2190336, 'steps': 4277, 'loss/train': 2.590604066848755} +03/03/2022 18:26:28 - INFO - codeparrot_training - Step 4278: {'lr': 0.0004997077805340248, 'samples': 2190848, 'steps': 4278, 'loss/train': 0.72682785987854} +03/03/2022 18:26:29 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/03/2022 18:26:34 - INFO - codeparrot_training - Step 4279: {'lr': 0.0004997075239698445, 'samples': 2191360, 'steps': 4279, 'loss/train': 1.6795930862426758} +03/03/2022 18:26:37 - INFO - codeparrot_training - Step 4280: {'lr': 0.0004997072672931497, 'samples': 2191872, 'steps': 4280, 'loss/train': 0.8268131017684937} +03/03/2022 18:26:38 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/03/2022 18:26:42 - INFO - codeparrot_training - Step 4281: {'lr': 0.0004997070105039407, 'samples': 2192384, 'steps': 4281, 'loss/train': 2.489441394805908} +03/03/2022 18:26:45 - INFO - codeparrot_training - Step 4282: {'lr': 0.0004997067536022176, 'samples': 2192896, 'steps': 4282, 'loss/train': 2.314330577850342} +03/03/2022 18:26:46 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/03/2022 18:26:51 - INFO - codeparrot_training - Step 4283: {'lr': 0.0004997064965879804, 'samples': 2193408, 'steps': 4283, 'loss/train': 2.8974599838256836} +03/03/2022 18:26:54 - INFO - codeparrot_training - Step 4284: {'lr': 0.0004997062394612293, 'samples': 2193920, 'steps': 4284, 'loss/train': 1.5091400146484375} +03/03/2022 18:26:55 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/03/2022 18:26:59 - INFO - codeparrot_training - Step 4285: {'lr': 0.0004997059822219645, 'samples': 2194432, 'steps': 4285, 'loss/train': 1.181900143623352} +03/03/2022 18:27:02 - INFO - codeparrot_training - Step 4286: {'lr': 0.000499705724870186, 'samples': 2194944, 'steps': 4286, 'loss/train': 2.6882920265197754} +03/03/2022 18:27:03 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/03/2022 18:27:07 - INFO - codeparrot_training - Step 4287: {'lr': 0.0004997054674058941, 'samples': 2195456, 'steps': 4287, 'loss/train': 2.496251344680786} +03/03/2022 18:27:11 - INFO - codeparrot_training - Step 4288: {'lr': 0.0004997052098290886, 'samples': 2195968, 'steps': 4288, 'loss/train': 2.141219139099121} +03/03/2022 18:27:11 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/03/2022 18:27:16 - INFO - codeparrot_training - Step 4289: {'lr': 0.0004997049521397698, 'samples': 2196480, 'steps': 4289, 'loss/train': 3.410372495651245} +03/03/2022 18:27:19 - INFO - codeparrot_training - Step 4290: {'lr': 0.0004997046943379379, 'samples': 2196992, 'steps': 4290, 'loss/train': 2.416654348373413} +03/03/2022 18:27:20 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/03/2022 18:27:24 - INFO - codeparrot_training - Step 4291: {'lr': 0.0004997044364235928, 'samples': 2197504, 'steps': 4291, 'loss/train': 1.5918540954589844} +03/03/2022 18:27:27 - INFO - codeparrot_training - Step 4292: {'lr': 0.0004997041783967348, 'samples': 2198016, 'steps': 4292, 'loss/train': 2.547029733657837} +03/03/2022 18:27:28 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/03/2022 18:27:33 - INFO - codeparrot_training - Step 4293: {'lr': 0.0004997039202573639, 'samples': 2198528, 'steps': 4293, 'loss/train': 0.7370705604553223} +03/03/2022 18:27:36 - INFO - codeparrot_training - Step 4294: {'lr': 0.0004997036620054803, 'samples': 2199040, 'steps': 4294, 'loss/train': 3.696993350982666} +03/03/2022 18:27:41 - INFO - codeparrot_training - Step 4295: {'lr': 0.0004997034036410841, 'samples': 2199552, 'steps': 4295, 'loss/train': 3.2373900413513184} +03/03/2022 18:27:44 - INFO - codeparrot_training - Step 4296: {'lr': 0.0004997031451641754, 'samples': 2200064, 'steps': 4296, 'loss/train': 2.544260025024414} +03/03/2022 18:27:45 - INFO - codeparrot_training - Skipping example with length 285 (seq_length=1024) +03/03/2022 18:27:49 - INFO - codeparrot_training - Step 4297: {'lr': 0.0004997028865747542, 'samples': 2200576, 'steps': 4297, 'loss/train': 2.820240020751953} +03/03/2022 18:27:53 - INFO - codeparrot_training - Step 4298: {'lr': 0.0004997026278728209, 'samples': 2201088, 'steps': 4298, 'loss/train': 2.4059300422668457} +03/03/2022 18:27:53 - INFO - codeparrot_training - Skipping example with length 644 (seq_length=1024) +03/03/2022 18:27:58 - INFO - codeparrot_training - Step 4299: {'lr': 0.0004997023690583753, 'samples': 2201600, 'steps': 4299, 'loss/train': 2.955502986907959} +03/03/2022 18:28:01 - INFO - codeparrot_training - Step 4300: {'lr': 0.0004997021101314179, 'samples': 2202112, 'steps': 4300, 'loss/train': 1.7855418920516968} +03/03/2022 18:28:01 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/03/2022 18:28:06 - INFO - codeparrot_training - Step 4301: {'lr': 0.0004997018510919483, 'samples': 2202624, 'steps': 4301, 'loss/train': 1.8956694602966309} +03/03/2022 18:28:09 - INFO - codeparrot_training - Step 4302: {'lr': 0.0004997015919399671, 'samples': 2203136, 'steps': 4302, 'loss/train': 2.6902828216552734} +03/03/2022 18:28:09 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/03/2022 18:28:15 - INFO - codeparrot_training - Step 4303: {'lr': 0.0004997013326754742, 'samples': 2203648, 'steps': 4303, 'loss/train': 2.759458541870117} +03/03/2022 18:28:18 - INFO - codeparrot_training - Step 4304: {'lr': 0.0004997010732984696, 'samples': 2204160, 'steps': 4304, 'loss/train': 2.708376884460449} +03/03/2022 18:28:18 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/03/2022 18:28:23 - INFO - codeparrot_training - Step 4305: {'lr': 0.0004997008138089536, 'samples': 2204672, 'steps': 4305, 'loss/train': 2.6910762786865234} +03/03/2022 18:28:26 - INFO - codeparrot_training - Step 4306: {'lr': 0.0004997005542069263, 'samples': 2205184, 'steps': 4306, 'loss/train': 2.1699793338775635} +03/03/2022 18:28:26 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 18:28:31 - INFO - codeparrot_training - Step 4307: {'lr': 0.0004997002944923878, 'samples': 2205696, 'steps': 4307, 'loss/train': 3.3660695552825928} +03/03/2022 18:28:35 - INFO - codeparrot_training - Step 4308: {'lr': 0.0004997000346653381, 'samples': 2206208, 'steps': 4308, 'loss/train': 2.6844048500061035} +03/03/2022 18:28:35 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/03/2022 18:28:40 - INFO - codeparrot_training - Step 4309: {'lr': 0.0004996997747257775, 'samples': 2206720, 'steps': 4309, 'loss/train': 3.1445083618164062} +03/03/2022 18:28:43 - INFO - codeparrot_training - Step 4310: {'lr': 0.000499699514673706, 'samples': 2207232, 'steps': 4310, 'loss/train': 4.650846481323242} +03/03/2022 18:28:48 - INFO - codeparrot_training - Step 4311: {'lr': 0.0004996992545091239, 'samples': 2207744, 'steps': 4311, 'loss/train': 2.638298988342285} +03/03/2022 18:28:52 - INFO - codeparrot_training - Step 4312: {'lr': 0.000499698994232031, 'samples': 2208256, 'steps': 4312, 'loss/train': 2.455766439437866} +03/03/2022 18:28:52 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/03/2022 18:28:57 - INFO - codeparrot_training - Step 4313: {'lr': 0.0004996987338424276, 'samples': 2208768, 'steps': 4313, 'loss/train': 2.582677125930786} +03/03/2022 18:29:00 - INFO - codeparrot_training - Step 4314: {'lr': 0.0004996984733403138, 'samples': 2209280, 'steps': 4314, 'loss/train': 2.7016289234161377} +03/03/2022 18:29:00 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/03/2022 18:29:05 - INFO - codeparrot_training - Step 4315: {'lr': 0.0004996982127256898, 'samples': 2209792, 'steps': 4315, 'loss/train': 2.1626553535461426} +03/03/2022 18:29:08 - INFO - codeparrot_training - Step 4316: {'lr': 0.0004996979519985556, 'samples': 2210304, 'steps': 4316, 'loss/train': 1.4530099630355835} +03/03/2022 18:29:09 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/03/2022 18:29:14 - INFO - codeparrot_training - Step 4317: {'lr': 0.0004996976911589114, 'samples': 2210816, 'steps': 4317, 'loss/train': 1.4317450523376465} +03/03/2022 18:29:17 - INFO - codeparrot_training - Step 4318: {'lr': 0.0004996974302067572, 'samples': 2211328, 'steps': 4318, 'loss/train': 2.0460665225982666} +03/03/2022 18:29:17 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/03/2022 18:29:22 - INFO - codeparrot_training - Step 4319: {'lr': 0.0004996971691420931, 'samples': 2211840, 'steps': 4319, 'loss/train': 2.122131824493408} +03/03/2022 18:29:25 - INFO - codeparrot_training - Step 4320: {'lr': 0.0004996969079649195, 'samples': 2212352, 'steps': 4320, 'loss/train': 2.228632926940918} +03/03/2022 18:29:25 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/03/2022 18:29:30 - INFO - codeparrot_training - Step 4321: {'lr': 0.0004996966466752362, 'samples': 2212864, 'steps': 4321, 'loss/train': 2.2341220378875732} +03/03/2022 18:29:34 - INFO - codeparrot_training - Step 4322: {'lr': 0.0004996963852730436, 'samples': 2213376, 'steps': 4322, 'loss/train': 3.526921510696411} +03/03/2022 18:29:34 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/03/2022 18:29:39 - INFO - codeparrot_training - Step 4323: {'lr': 0.0004996961237583415, 'samples': 2213888, 'steps': 4323, 'loss/train': 2.496530294418335} +03/03/2022 18:29:42 - INFO - codeparrot_training - Step 4324: {'lr': 0.0004996958621311302, 'samples': 2214400, 'steps': 4324, 'loss/train': 3.1492295265197754} +03/03/2022 18:29:42 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/03/2022 18:29:47 - INFO - codeparrot_training - Step 4325: {'lr': 0.00049969560039141, 'samples': 2214912, 'steps': 4325, 'loss/train': 1.7919201850891113} +03/03/2022 18:29:51 - INFO - codeparrot_training - Step 4326: {'lr': 0.0004996953385391806, 'samples': 2215424, 'steps': 4326, 'loss/train': 2.5024709701538086} +03/03/2022 18:29:51 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 18:29:56 - INFO - codeparrot_training - Step 4327: {'lr': 0.0004996950765744424, 'samples': 2215936, 'steps': 4327, 'loss/train': 2.5253944396972656} +03/03/2022 18:29:59 - INFO - codeparrot_training - Step 4328: {'lr': 0.0004996948144971953, 'samples': 2216448, 'steps': 4328, 'loss/train': 0.5469165444374084} +03/03/2022 18:29:59 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/03/2022 18:30:05 - INFO - codeparrot_training - Step 4329: {'lr': 0.0004996945523074398, 'samples': 2216960, 'steps': 4329, 'loss/train': 2.994957208633423} +03/03/2022 18:30:08 - INFO - codeparrot_training - Step 4330: {'lr': 0.0004996942900051757, 'samples': 2217472, 'steps': 4330, 'loss/train': 2.8696177005767822} +03/03/2022 18:30:08 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/03/2022 18:30:13 - INFO - codeparrot_training - Step 4331: {'lr': 0.0004996940275904031, 'samples': 2217984, 'steps': 4331, 'loss/train': 2.6124939918518066} +03/03/2022 18:30:16 - INFO - codeparrot_training - Step 4332: {'lr': 0.0004996937650631224, 'samples': 2218496, 'steps': 4332, 'loss/train': 1.142934799194336} +03/03/2022 18:30:16 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/03/2022 18:30:22 - INFO - codeparrot_training - Step 4333: {'lr': 0.0004996935024233335, 'samples': 2219008, 'steps': 4333, 'loss/train': 2.541126012802124} +03/03/2022 18:30:25 - INFO - codeparrot_training - Step 4334: {'lr': 0.0004996932396710365, 'samples': 2219520, 'steps': 4334, 'loss/train': 2.222437858581543} +03/03/2022 18:30:25 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 18:30:30 - INFO - codeparrot_training - Step 4335: {'lr': 0.0004996929768062316, 'samples': 2220032, 'steps': 4335, 'loss/train': 1.3417575359344482} +03/03/2022 18:30:33 - INFO - codeparrot_training - Step 4336: {'lr': 0.0004996927138289189, 'samples': 2220544, 'steps': 4336, 'loss/train': 2.144819736480713} +03/03/2022 18:30:33 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/03/2022 18:30:38 - INFO - codeparrot_training - Step 4337: {'lr': 0.0004996924507390985, 'samples': 2221056, 'steps': 4337, 'loss/train': 3.5478854179382324} +03/03/2022 18:30:42 - INFO - codeparrot_training - Step 4338: {'lr': 0.0004996921875367705, 'samples': 2221568, 'steps': 4338, 'loss/train': 1.6496244668960571} +03/03/2022 18:30:42 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/03/2022 18:30:47 - INFO - codeparrot_training - Step 4339: {'lr': 0.0004996919242219352, 'samples': 2222080, 'steps': 4339, 'loss/train': 2.7958552837371826} +03/03/2022 18:30:50 - INFO - codeparrot_training - Step 4340: {'lr': 0.0004996916607945925, 'samples': 2222592, 'steps': 4340, 'loss/train': 2.4317195415496826} +03/03/2022 18:30:51 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/03/2022 18:30:55 - INFO - codeparrot_training - Step 4341: {'lr': 0.0004996913972547426, 'samples': 2223104, 'steps': 4341, 'loss/train': 2.8689799308776855} +03/03/2022 18:30:59 - INFO - codeparrot_training - Step 4342: {'lr': 0.0004996911336023855, 'samples': 2223616, 'steps': 4342, 'loss/train': 2.3515217304229736} +03/03/2022 18:30:59 - INFO - codeparrot_training - Skipping example with length 889 (seq_length=1024) +03/03/2022 18:31:04 - INFO - codeparrot_training - Step 4343: {'lr': 0.0004996908698375216, 'samples': 2224128, 'steps': 4343, 'loss/train': 2.992849111557007} +03/03/2022 18:31:07 - INFO - codeparrot_training - Step 4344: {'lr': 0.0004996906059601507, 'samples': 2224640, 'steps': 4344, 'loss/train': 2.367928981781006} +03/03/2022 18:31:07 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/03/2022 18:31:12 - INFO - codeparrot_training - Step 4345: {'lr': 0.0004996903419702731, 'samples': 2225152, 'steps': 4345, 'loss/train': 1.823011875152588} +03/03/2022 18:31:15 - INFO - codeparrot_training - Step 4346: {'lr': 0.0004996900778678889, 'samples': 2225664, 'steps': 4346, 'loss/train': 2.4864914417266846} +03/03/2022 18:31:16 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/03/2022 18:31:21 - INFO - codeparrot_training - Step 4347: {'lr': 0.0004996898136529982, 'samples': 2226176, 'steps': 4347, 'loss/train': 2.3876285552978516} +03/03/2022 18:31:24 - INFO - codeparrot_training - Step 4348: {'lr': 0.0004996895493256012, 'samples': 2226688, 'steps': 4348, 'loss/train': 2.6978161334991455} +03/03/2022 18:31:25 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/03/2022 18:31:29 - INFO - codeparrot_training - Step 4349: {'lr': 0.0004996892848856978, 'samples': 2227200, 'steps': 4349, 'loss/train': 2.39406418800354} +03/03/2022 18:31:32 - INFO - codeparrot_training - Step 4350: {'lr': 0.0004996890203332883, 'samples': 2227712, 'steps': 4350, 'loss/train': 2.8484408855438232} +03/03/2022 18:31:33 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/03/2022 18:31:37 - INFO - codeparrot_training - Step 4351: {'lr': 0.0004996887556683729, 'samples': 2228224, 'steps': 4351, 'loss/train': 2.828001022338867} +03/03/2022 18:31:41 - INFO - codeparrot_training - Step 4352: {'lr': 0.0004996884908909515, 'samples': 2228736, 'steps': 4352, 'loss/train': 2.1774840354919434} +03/03/2022 18:31:41 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/03/2022 18:31:46 - INFO - codeparrot_training - Step 4353: {'lr': 0.0004996882260010243, 'samples': 2229248, 'steps': 4353, 'loss/train': 3.037933349609375} +03/03/2022 18:31:49 - INFO - codeparrot_training - Step 4354: {'lr': 0.0004996879609985915, 'samples': 2229760, 'steps': 4354, 'loss/train': 2.3908474445343018} +03/03/2022 18:31:49 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/03/2022 18:31:54 - INFO - codeparrot_training - Step 4355: {'lr': 0.0004996876958836532, 'samples': 2230272, 'steps': 4355, 'loss/train': 2.869760513305664} +03/03/2022 18:31:57 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/03/2022 18:32:00 - INFO - codeparrot_training - Step 4356: {'lr': 0.0004996874306562093, 'samples': 2230784, 'steps': 4356, 'loss/train': 2.3235960006713867} +03/03/2022 18:32:03 - INFO - codeparrot_training - Step 4357: {'lr': 0.0004996871653162602, 'samples': 2231296, 'steps': 4357, 'loss/train': 2.08998441696167} +03/03/2022 18:32:06 - INFO - codeparrot_training - Step 4358: {'lr': 0.0004996868998638059, 'samples': 2231808, 'steps': 4358, 'loss/train': 2.722283363342285} +03/03/2022 18:32:06 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/03/2022 18:32:11 - INFO - codeparrot_training - Step 4359: {'lr': 0.0004996866342988467, 'samples': 2232320, 'steps': 4359, 'loss/train': 1.4162204265594482} +03/03/2022 18:32:15 - INFO - codeparrot_training - Step 4360: {'lr': 0.0004996863686213823, 'samples': 2232832, 'steps': 4360, 'loss/train': 0.8815621137619019} +03/03/2022 18:32:15 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/03/2022 18:32:20 - INFO - codeparrot_training - Step 4361: {'lr': 0.0004996861028314133, 'samples': 2233344, 'steps': 4361, 'loss/train': 2.506401300430298} +03/03/2022 18:32:23 - INFO - codeparrot_training - Step 4362: {'lr': 0.0004996858369289394, 'samples': 2233856, 'steps': 4362, 'loss/train': 2.2106435298919678} +03/03/2022 18:32:23 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/03/2022 18:32:28 - INFO - codeparrot_training - Step 4363: {'lr': 0.000499685570913961, 'samples': 2234368, 'steps': 4363, 'loss/train': 0.6344096660614014} +03/03/2022 18:32:31 - INFO - codeparrot_training - Step 4364: {'lr': 0.0004996853047864781, 'samples': 2234880, 'steps': 4364, 'loss/train': 2.9758496284484863} +03/03/2022 18:32:32 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/03/2022 18:32:37 - INFO - codeparrot_training - Step 4365: {'lr': 0.0004996850385464909, 'samples': 2235392, 'steps': 4365, 'loss/train': 1.7241847515106201} +03/03/2022 18:32:40 - INFO - codeparrot_training - Step 4366: {'lr': 0.0004996847721939994, 'samples': 2235904, 'steps': 4366, 'loss/train': 1.9942525625228882} +03/03/2022 18:32:40 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/03/2022 18:32:45 - INFO - codeparrot_training - Step 4367: {'lr': 0.0004996845057290039, 'samples': 2236416, 'steps': 4367, 'loss/train': 2.5172691345214844} +03/03/2022 18:32:48 - INFO - codeparrot_training - Step 4368: {'lr': 0.0004996842391515044, 'samples': 2236928, 'steps': 4368, 'loss/train': 1.4673107862472534} +03/03/2022 18:32:48 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/03/2022 18:32:54 - INFO - codeparrot_training - Step 4369: {'lr': 0.000499683972461501, 'samples': 2237440, 'steps': 4369, 'loss/train': 2.821680784225464} +03/03/2022 18:32:57 - INFO - codeparrot_training - Step 4370: {'lr': 0.0004996837056589938, 'samples': 2237952, 'steps': 4370, 'loss/train': 2.858996868133545} +03/03/2022 18:32:59 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/03/2022 18:33:02 - INFO - codeparrot_training - Step 4371: {'lr': 0.0004996834387439831, 'samples': 2238464, 'steps': 4371, 'loss/train': 3.237086772918701} +03/03/2022 18:33:05 - INFO - codeparrot_training - Step 4372: {'lr': 0.0004996831717164689, 'samples': 2238976, 'steps': 4372, 'loss/train': 1.8775861263275146} +03/03/2022 18:33:07 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/03/2022 18:33:11 - INFO - codeparrot_training - Step 4373: {'lr': 0.0004996829045764512, 'samples': 2239488, 'steps': 4373, 'loss/train': 2.146308183670044} +03/03/2022 18:33:14 - INFO - codeparrot_training - Step 4374: {'lr': 0.0004996826373239303, 'samples': 2240000, 'steps': 4374, 'loss/train': 2.7931647300720215} +03/03/2022 18:33:15 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/03/2022 18:33:19 - INFO - codeparrot_training - Step 4375: {'lr': 0.0004996823699589062, 'samples': 2240512, 'steps': 4375, 'loss/train': 2.416057825088501} +03/03/2022 18:33:22 - INFO - codeparrot_training - Step 4376: {'lr': 0.0004996821024813791, 'samples': 2241024, 'steps': 4376, 'loss/train': 3.0219902992248535} +03/03/2022 18:33:23 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 18:33:27 - INFO - codeparrot_training - Step 4377: {'lr': 0.0004996818348913491, 'samples': 2241536, 'steps': 4377, 'loss/train': 1.6127864122390747} +03/03/2022 18:33:31 - INFO - codeparrot_training - Step 4378: {'lr': 0.0004996815671888163, 'samples': 2242048, 'steps': 4378, 'loss/train': 2.1656272411346436} +03/03/2022 18:33:32 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/03/2022 18:33:36 - INFO - codeparrot_training - Step 4379: {'lr': 0.000499681299373781, 'samples': 2242560, 'steps': 4379, 'loss/train': 2.604130506515503} +03/03/2022 18:33:39 - INFO - codeparrot_training - Step 4380: {'lr': 0.0004996810314462429, 'samples': 2243072, 'steps': 4380, 'loss/train': 3.3166394233703613} +03/03/2022 18:33:40 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/03/2022 18:33:44 - INFO - codeparrot_training - Step 4381: {'lr': 0.0004996807634062025, 'samples': 2243584, 'steps': 4381, 'loss/train': 2.961524724960327} +03/03/2022 18:33:47 - INFO - codeparrot_training - Step 4382: {'lr': 0.0004996804952536599, 'samples': 2244096, 'steps': 4382, 'loss/train': 1.5539356470108032} +03/03/2022 18:33:48 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/03/2022 18:33:53 - INFO - codeparrot_training - Step 4383: {'lr': 0.0004996802269886149, 'samples': 2244608, 'steps': 4383, 'loss/train': 1.9402815103530884} +03/03/2022 18:33:56 - INFO - codeparrot_training - Step 4384: {'lr': 0.0004996799586110681, 'samples': 2245120, 'steps': 4384, 'loss/train': 1.4335135221481323} +03/03/2022 18:33:57 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 18:34:01 - INFO - codeparrot_training - Step 4385: {'lr': 0.0004996796901210192, 'samples': 2245632, 'steps': 4385, 'loss/train': 3.063713550567627} +03/03/2022 18:34:04 - INFO - codeparrot_training - Step 4386: {'lr': 0.0004996794215184685, 'samples': 2246144, 'steps': 4386, 'loss/train': 2.110746145248413} +03/03/2022 18:34:05 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 18:34:10 - INFO - codeparrot_training - Step 4387: {'lr': 0.0004996791528034161, 'samples': 2246656, 'steps': 4387, 'loss/train': 2.998420000076294} +03/03/2022 18:34:13 - INFO - codeparrot_training - Step 4388: {'lr': 0.0004996788839758622, 'samples': 2247168, 'steps': 4388, 'loss/train': 2.4892635345458984} +03/03/2022 18:34:14 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/03/2022 18:34:18 - INFO - codeparrot_training - Step 4389: {'lr': 0.0004996786150358068, 'samples': 2247680, 'steps': 4389, 'loss/train': 4.476348876953125} +03/03/2022 18:34:21 - INFO - codeparrot_training - Step 4390: {'lr': 0.00049967834598325, 'samples': 2248192, 'steps': 4390, 'loss/train': 2.510578155517578} +03/03/2022 18:34:22 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/03/2022 18:34:26 - INFO - codeparrot_training - Step 4391: {'lr': 0.0004996780768181921, 'samples': 2248704, 'steps': 4391, 'loss/train': 1.3088515996932983} +03/03/2022 18:34:30 - INFO - codeparrot_training - Step 4392: {'lr': 0.0004996778075406331, 'samples': 2249216, 'steps': 4392, 'loss/train': 2.9308276176452637} +03/03/2022 18:34:30 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/03/2022 18:34:35 - INFO - codeparrot_training - Step 4393: {'lr': 0.0004996775381505731, 'samples': 2249728, 'steps': 4393, 'loss/train': 2.195002555847168} +03/03/2022 18:34:38 - INFO - codeparrot_training - Step 4394: {'lr': 0.0004996772686480122, 'samples': 2250240, 'steps': 4394, 'loss/train': 2.6623852252960205} +03/03/2022 18:34:39 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/03/2022 18:34:43 - INFO - codeparrot_training - Step 4395: {'lr': 0.0004996769990329507, 'samples': 2250752, 'steps': 4395, 'loss/train': 2.9987738132476807} +03/03/2022 18:34:46 - INFO - codeparrot_training - Step 4396: {'lr': 0.0004996767293053885, 'samples': 2251264, 'steps': 4396, 'loss/train': 2.5764143466949463} +03/03/2022 18:34:47 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/03/2022 18:34:52 - INFO - codeparrot_training - Step 4397: {'lr': 0.0004996764594653258, 'samples': 2251776, 'steps': 4397, 'loss/train': 2.5048840045928955} +03/03/2022 18:34:55 - INFO - codeparrot_training - Step 4398: {'lr': 0.0004996761895127628, 'samples': 2252288, 'steps': 4398, 'loss/train': 2.230863332748413} +03/03/2022 18:34:55 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/03/2022 18:35:00 - INFO - codeparrot_training - Step 4399: {'lr': 0.0004996759194476996, 'samples': 2252800, 'steps': 4399, 'loss/train': 2.132049560546875} +03/03/2022 18:35:03 - INFO - codeparrot_training - Step 4400: {'lr': 0.0004996756492701362, 'samples': 2253312, 'steps': 4400, 'loss/train': 2.3715033531188965} +03/03/2022 18:35:03 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/03/2022 18:35:08 - INFO - codeparrot_training - Step 4401: {'lr': 0.0004996753789800729, 'samples': 2253824, 'steps': 4401, 'loss/train': 3.0586087703704834} +03/03/2022 18:35:12 - INFO - codeparrot_training - Step 4402: {'lr': 0.0004996751085775096, 'samples': 2254336, 'steps': 4402, 'loss/train': 2.5790276527404785} +03/03/2022 18:35:12 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/03/2022 18:35:17 - INFO - codeparrot_training - Step 4403: {'lr': 0.0004996748380624467, 'samples': 2254848, 'steps': 4403, 'loss/train': 2.3327598571777344} +03/03/2022 18:35:20 - INFO - codeparrot_training - Step 4404: {'lr': 0.000499674567434884, 'samples': 2255360, 'steps': 4404, 'loss/train': 2.396493434906006} +03/03/2022 18:35:20 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/03/2022 18:35:25 - INFO - codeparrot_training - Step 4405: {'lr': 0.0004996742966948219, 'samples': 2255872, 'steps': 4405, 'loss/train': 2.6866250038146973} +03/03/2022 18:35:29 - INFO - codeparrot_training - Step 4406: {'lr': 0.0004996740258422604, 'samples': 2256384, 'steps': 4406, 'loss/train': 2.257866859436035} +03/03/2022 18:35:29 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/03/2022 18:35:34 - INFO - codeparrot_training - Step 4407: {'lr': 0.0004996737548771997, 'samples': 2256896, 'steps': 4407, 'loss/train': 2.4264907836914062} +03/03/2022 18:35:37 - INFO - codeparrot_training - Step 4408: {'lr': 0.0004996734837996397, 'samples': 2257408, 'steps': 4408, 'loss/train': 2.066406726837158} +03/03/2022 18:35:37 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/03/2022 18:35:43 - INFO - codeparrot_training - Step 4409: {'lr': 0.0004996732126095807, 'samples': 2257920, 'steps': 4409, 'loss/train': 3.6866579055786133} +03/03/2022 18:35:46 - INFO - codeparrot_training - Step 4410: {'lr': 0.0004996729413070229, 'samples': 2258432, 'steps': 4410, 'loss/train': 2.640519618988037} +03/03/2022 18:35:46 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/03/2022 18:35:51 - INFO - codeparrot_training - Step 4411: {'lr': 0.0004996726698919664, 'samples': 2258944, 'steps': 4411, 'loss/train': 2.6126174926757812} +03/03/2022 18:35:54 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/03/2022 18:35:56 - INFO - codeparrot_training - Step 4412: {'lr': 0.0004996723983644112, 'samples': 2259456, 'steps': 4412, 'loss/train': 2.0176353454589844} +03/03/2022 18:35:59 - INFO - codeparrot_training - Step 4413: {'lr': 0.0004996721267243573, 'samples': 2259968, 'steps': 4413, 'loss/train': 2.9792444705963135} +03/03/2022 18:36:02 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/03/2022 18:36:05 - INFO - codeparrot_training - Step 4414: {'lr': 0.0004996718549718051, 'samples': 2260480, 'steps': 4414, 'loss/train': 1.6411380767822266} +03/03/2022 18:36:08 - INFO - codeparrot_training - Step 4415: {'lr': 0.0004996715831067546, 'samples': 2260992, 'steps': 4415, 'loss/train': 2.9626359939575195} +03/03/2022 18:36:10 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/03/2022 18:36:13 - INFO - codeparrot_training - Step 4416: {'lr': 0.000499671311129206, 'samples': 2261504, 'steps': 4416, 'loss/train': 2.6969351768493652} +03/03/2022 18:36:16 - INFO - codeparrot_training - Step 4417: {'lr': 0.0004996710390391593, 'samples': 2262016, 'steps': 4417, 'loss/train': 2.5846803188323975} +03/03/2022 18:36:19 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/03/2022 18:36:21 - INFO - codeparrot_training - Step 4418: {'lr': 0.0004996707668366147, 'samples': 2262528, 'steps': 4418, 'loss/train': 1.8627187013626099} +03/03/2022 18:36:25 - INFO - codeparrot_training - Step 4419: {'lr': 0.0004996704945215724, 'samples': 2263040, 'steps': 4419, 'loss/train': 2.7960450649261475} +03/03/2022 18:36:27 - INFO - codeparrot_training - Skipping example with length 309 (seq_length=1024) +03/03/2022 18:36:30 - INFO - codeparrot_training - Step 4420: {'lr': 0.0004996702220940322, 'samples': 2263552, 'steps': 4420, 'loss/train': 2.8446831703186035} +03/03/2022 18:36:33 - INFO - codeparrot_training - Step 4421: {'lr': 0.0004996699495539947, 'samples': 2264064, 'steps': 4421, 'loss/train': 1.2441693544387817} +03/03/2022 18:36:37 - INFO - codeparrot_training - Step 4422: {'lr': 0.0004996696769014596, 'samples': 2264576, 'steps': 4422, 'loss/train': 2.26054048538208} +03/03/2022 18:36:37 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 18:36:42 - INFO - codeparrot_training - Step 4423: {'lr': 0.0004996694041364272, 'samples': 2265088, 'steps': 4423, 'loss/train': 1.3591479063034058} +03/03/2022 18:36:45 - INFO - codeparrot_training - Step 4424: {'lr': 0.0004996691312588977, 'samples': 2265600, 'steps': 4424, 'loss/train': 2.7090351581573486} +03/03/2022 18:36:45 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/03/2022 18:36:50 - INFO - codeparrot_training - Step 4425: {'lr': 0.0004996688582688711, 'samples': 2266112, 'steps': 4425, 'loss/train': 1.4263428449630737} +03/03/2022 18:36:53 - INFO - codeparrot_training - Step 4426: {'lr': 0.0004996685851663477, 'samples': 2266624, 'steps': 4426, 'loss/train': 3.3216335773468018} +03/03/2022 18:36:54 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/03/2022 18:36:59 - INFO - codeparrot_training - Step 4427: {'lr': 0.0004996683119513274, 'samples': 2267136, 'steps': 4427, 'loss/train': 2.856020450592041} +03/03/2022 18:37:02 - INFO - codeparrot_training - Step 4428: {'lr': 0.0004996680386238103, 'samples': 2267648, 'steps': 4428, 'loss/train': 2.2716636657714844} +03/03/2022 18:37:03 - INFO - codeparrot_training - Skipping example with length 551 (seq_length=1024) +03/03/2022 18:37:07 - INFO - codeparrot_training - Step 4429: {'lr': 0.0004996677651837967, 'samples': 2268160, 'steps': 4429, 'loss/train': 2.9824678897857666} +03/03/2022 18:37:11 - INFO - codeparrot_training - Step 4430: {'lr': 0.0004996674916312867, 'samples': 2268672, 'steps': 4430, 'loss/train': 2.500814437866211} +03/03/2022 18:37:11 - INFO - codeparrot_training - Skipping example with length 470 (seq_length=1024) +03/03/2022 18:37:16 - INFO - codeparrot_training - Step 4431: {'lr': 0.0004996672179662803, 'samples': 2269184, 'steps': 4431, 'loss/train': 2.5786080360412598} +03/03/2022 18:37:19 - INFO - codeparrot_training - Step 4432: {'lr': 0.0004996669441887778, 'samples': 2269696, 'steps': 4432, 'loss/train': 2.8381242752075195} +03/03/2022 18:37:20 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/03/2022 18:37:24 - INFO - codeparrot_training - Step 4433: {'lr': 0.0004996666702987791, 'samples': 2270208, 'steps': 4433, 'loss/train': 1.6283460855484009} +03/03/2022 18:37:27 - INFO - codeparrot_training - Step 4434: {'lr': 0.0004996663962962846, 'samples': 2270720, 'steps': 4434, 'loss/train': 3.303316831588745} +03/03/2022 18:37:28 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/03/2022 18:37:33 - INFO - codeparrot_training - Step 4435: {'lr': 0.0004996661221812942, 'samples': 2271232, 'steps': 4435, 'loss/train': 3.173570156097412} +03/03/2022 18:37:36 - INFO - codeparrot_training - Step 4436: {'lr': 0.0004996658479538081, 'samples': 2271744, 'steps': 4436, 'loss/train': 1.5844521522521973} +03/03/2022 18:37:36 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/03/2022 18:37:41 - INFO - codeparrot_training - Step 4437: {'lr': 0.0004996655736138265, 'samples': 2272256, 'steps': 4437, 'loss/train': 1.9391813278198242} +03/03/2022 18:37:44 - INFO - codeparrot_training - Step 4438: {'lr': 0.0004996652991613494, 'samples': 2272768, 'steps': 4438, 'loss/train': 2.306976795196533} +03/03/2022 18:37:45 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 18:37:49 - INFO - codeparrot_training - Step 4439: {'lr': 0.0004996650245963768, 'samples': 2273280, 'steps': 4439, 'loss/train': 2.8830103874206543} +03/03/2022 18:37:52 - INFO - codeparrot_training - Step 4440: {'lr': 0.0004996647499189092, 'samples': 2273792, 'steps': 4440, 'loss/train': 2.678385019302368} +03/03/2022 18:37:53 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/03/2022 18:37:58 - INFO - codeparrot_training - Step 4441: {'lr': 0.0004996644751289464, 'samples': 2274304, 'steps': 4441, 'loss/train': 2.832144260406494} +03/03/2022 18:38:01 - INFO - codeparrot_training - Step 4442: {'lr': 0.0004996642002264887, 'samples': 2274816, 'steps': 4442, 'loss/train': 2.8010382652282715} +03/03/2022 18:38:01 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/03/2022 18:38:06 - INFO - codeparrot_training - Step 4443: {'lr': 0.0004996639252115362, 'samples': 2275328, 'steps': 4443, 'loss/train': 1.863884449005127} +03/03/2022 18:38:09 - INFO - codeparrot_training - Step 4444: {'lr': 0.000499663650084089, 'samples': 2275840, 'steps': 4444, 'loss/train': 1.97604238986969} +03/03/2022 18:38:10 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/03/2022 18:38:15 - INFO - codeparrot_training - Step 4445: {'lr': 0.0004996633748441472, 'samples': 2276352, 'steps': 4445, 'loss/train': 3.2015700340270996} +03/03/2022 18:38:18 - INFO - codeparrot_training - Step 4446: {'lr': 0.0004996630994917108, 'samples': 2276864, 'steps': 4446, 'loss/train': 3.1272215843200684} +03/03/2022 18:38:18 - INFO - codeparrot_training - Skipping example with length 409 (seq_length=1024) +03/03/2022 18:38:23 - INFO - codeparrot_training - Step 4447: {'lr': 0.0004996628240267802, 'samples': 2277376, 'steps': 4447, 'loss/train': 1.6743178367614746} +03/03/2022 18:38:26 - INFO - codeparrot_training - Step 4448: {'lr': 0.0004996625484493554, 'samples': 2277888, 'steps': 4448, 'loss/train': 2.4432613849639893} +03/03/2022 18:38:27 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/03/2022 18:38:31 - INFO - codeparrot_training - Step 4449: {'lr': 0.0004996622727594363, 'samples': 2278400, 'steps': 4449, 'loss/train': 2.6805787086486816} +03/03/2022 18:38:35 - INFO - codeparrot_training - Step 4450: {'lr': 0.0004996619969570234, 'samples': 2278912, 'steps': 4450, 'loss/train': 2.8536782264709473} +03/03/2022 18:38:35 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 18:38:40 - INFO - codeparrot_training - Step 4451: {'lr': 0.0004996617210421166, 'samples': 2279424, 'steps': 4451, 'loss/train': 2.7872047424316406} +03/03/2022 18:38:43 - INFO - codeparrot_training - Step 4452: {'lr': 0.0004996614450147161, 'samples': 2279936, 'steps': 4452, 'loss/train': 1.790632963180542} +03/03/2022 18:38:43 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/03/2022 18:38:48 - INFO - codeparrot_training - Step 4453: {'lr': 0.0004996611688748221, 'samples': 2280448, 'steps': 4453, 'loss/train': 1.1693707704544067} +03/03/2022 18:38:52 - INFO - codeparrot_training - Step 4454: {'lr': 0.0004996608926224345, 'samples': 2280960, 'steps': 4454, 'loss/train': 3.115797519683838} +03/03/2022 18:38:52 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/03/2022 18:38:57 - INFO - codeparrot_training - Step 4455: {'lr': 0.0004996606162575536, 'samples': 2281472, 'steps': 4455, 'loss/train': 2.2257065773010254} +03/03/2022 18:39:00 - INFO - codeparrot_training - Step 4456: {'lr': 0.0004996603397801795, 'samples': 2281984, 'steps': 4456, 'loss/train': 2.2013580799102783} +03/03/2022 18:39:00 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/03/2022 18:39:05 - INFO - codeparrot_training - Step 4457: {'lr': 0.0004996600631903123, 'samples': 2282496, 'steps': 4457, 'loss/train': 2.145881414413452} +03/03/2022 18:39:09 - INFO - codeparrot_training - Step 4458: {'lr': 0.0004996597864879521, 'samples': 2283008, 'steps': 4458, 'loss/train': 0.4779275059700012} +03/03/2022 18:39:09 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/03/2022 18:39:14 - INFO - codeparrot_training - Step 4459: {'lr': 0.000499659509673099, 'samples': 2283520, 'steps': 4459, 'loss/train': 3.163132905960083} +03/03/2022 18:39:17 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/03/2022 18:39:19 - INFO - codeparrot_training - Step 4460: {'lr': 0.0004996592327457533, 'samples': 2284032, 'steps': 4460, 'loss/train': 1.8647692203521729} +03/03/2022 18:39:22 - INFO - codeparrot_training - Step 4461: {'lr': 0.000499658955705915, 'samples': 2284544, 'steps': 4461, 'loss/train': 2.1909308433532715} +03/03/2022 18:39:25 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/03/2022 18:39:27 - INFO - codeparrot_training - Step 4462: {'lr': 0.0004996586785535841, 'samples': 2285056, 'steps': 4462, 'loss/train': 2.2605512142181396} +03/03/2022 18:39:31 - INFO - codeparrot_training - Step 4463: {'lr': 0.000499658401288761, 'samples': 2285568, 'steps': 4463, 'loss/train': 2.2403509616851807} +03/03/2022 18:39:33 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/03/2022 18:39:36 - INFO - codeparrot_training - Step 4464: {'lr': 0.0004996581239114456, 'samples': 2286080, 'steps': 4464, 'loss/train': 1.7435789108276367} +03/03/2022 18:39:39 - INFO - codeparrot_training - Step 4465: {'lr': 0.0004996578464216381, 'samples': 2286592, 'steps': 4465, 'loss/train': 2.196474552154541} +03/03/2022 18:39:41 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/03/2022 18:39:44 - INFO - codeparrot_training - Step 4466: {'lr': 0.0004996575688193386, 'samples': 2287104, 'steps': 4466, 'loss/train': 2.4311423301696777} +03/03/2022 18:39:48 - INFO - codeparrot_training - Step 4467: {'lr': 0.0004996572911045473, 'samples': 2287616, 'steps': 4467, 'loss/train': 2.4050772190093994} +03/03/2022 18:39:50 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/03/2022 18:39:53 - INFO - codeparrot_training - Step 4468: {'lr': 0.0004996570132772642, 'samples': 2288128, 'steps': 4468, 'loss/train': 0.3285658359527588} +03/03/2022 18:39:56 - INFO - codeparrot_training - Step 4469: {'lr': 0.0004996567353374896, 'samples': 2288640, 'steps': 4469, 'loss/train': 1.9346344470977783} +03/03/2022 18:39:59 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/03/2022 18:40:01 - INFO - codeparrot_training - Step 4470: {'lr': 0.0004996564572852235, 'samples': 2289152, 'steps': 4470, 'loss/train': 1.9485158920288086} +03/03/2022 18:40:05 - INFO - codeparrot_training - Step 4471: {'lr': 0.000499656179120466, 'samples': 2289664, 'steps': 4471, 'loss/train': 1.926476240158081} +03/03/2022 18:40:08 - INFO - codeparrot_training - Step 4472: {'lr': 0.0004996559008432173, 'samples': 2290176, 'steps': 4472, 'loss/train': 0.333122581243515} +03/03/2022 18:40:08 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/03/2022 18:40:13 - INFO - codeparrot_training - Step 4473: {'lr': 0.0004996556224534776, 'samples': 2290688, 'steps': 4473, 'loss/train': 2.306248903274536} +03/03/2022 18:40:16 - INFO - codeparrot_training - Skipping example with length 621 (seq_length=1024) +03/03/2022 18:40:18 - INFO - codeparrot_training - Step 4474: {'lr': 0.0004996553439512468, 'samples': 2291200, 'steps': 4474, 'loss/train': 2.319051742553711} +03/03/2022 18:40:21 - INFO - codeparrot_training - Step 4475: {'lr': 0.0004996550653365253, 'samples': 2291712, 'steps': 4475, 'loss/train': 1.7853162288665771} +03/03/2022 18:40:24 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/03/2022 18:40:27 - INFO - codeparrot_training - Step 4476: {'lr': 0.0004996547866093129, 'samples': 2292224, 'steps': 4476, 'loss/train': 3.0089235305786133} +03/03/2022 18:40:30 - INFO - codeparrot_training - Step 4477: {'lr': 0.00049965450776961, 'samples': 2292736, 'steps': 4477, 'loss/train': 2.5537867546081543} +03/03/2022 18:40:33 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/03/2022 18:40:35 - INFO - codeparrot_training - Step 4478: {'lr': 0.0004996542288174166, 'samples': 2293248, 'steps': 4478, 'loss/train': 2.556941509246826} +03/03/2022 18:40:38 - INFO - codeparrot_training - Step 4479: {'lr': 0.0004996539497527329, 'samples': 2293760, 'steps': 4479, 'loss/train': 2.5110392570495605} +03/03/2022 18:40:41 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/03/2022 18:40:44 - INFO - codeparrot_training - Step 4480: {'lr': 0.000499653670575559, 'samples': 2294272, 'steps': 4480, 'loss/train': 2.400174140930176} +03/03/2022 18:40:47 - INFO - codeparrot_training - Step 4481: {'lr': 0.0004996533912858949, 'samples': 2294784, 'steps': 4481, 'loss/train': 4.368938446044922} +03/03/2022 18:40:50 - INFO - codeparrot_training - Step 4482: {'lr': 0.000499653111883741, 'samples': 2295296, 'steps': 4482, 'loss/train': 2.521679162979126} +03/03/2022 18:40:51 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 18:40:55 - INFO - codeparrot_training - Step 4483: {'lr': 0.0004996528323690971, 'samples': 2295808, 'steps': 4483, 'loss/train': 2.7446067333221436} +03/03/2022 18:40:59 - INFO - codeparrot_training - Step 4484: {'lr': 0.0004996525527419636, 'samples': 2296320, 'steps': 4484, 'loss/train': 2.9803435802459717} +03/03/2022 18:40:59 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/03/2022 18:41:04 - INFO - codeparrot_training - Step 4485: {'lr': 0.0004996522730023404, 'samples': 2296832, 'steps': 4485, 'loss/train': 2.4842233657836914} +03/03/2022 18:41:07 - INFO - codeparrot_training - Step 4486: {'lr': 0.0004996519931502279, 'samples': 2297344, 'steps': 4486, 'loss/train': 2.374891996383667} +03/03/2022 18:41:09 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/03/2022 18:41:13 - INFO - codeparrot_training - Step 4487: {'lr': 0.0004996517131856259, 'samples': 2297856, 'steps': 4487, 'loss/train': 2.179633855819702} +03/03/2022 18:41:16 - INFO - codeparrot_training - Step 4488: {'lr': 0.0004996514331085348, 'samples': 2298368, 'steps': 4488, 'loss/train': 2.2711615562438965} +03/03/2022 18:41:18 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/03/2022 18:41:21 - INFO - codeparrot_training - Step 4489: {'lr': 0.0004996511529189546, 'samples': 2298880, 'steps': 4489, 'loss/train': 2.0145022869110107} +03/03/2022 18:41:24 - INFO - codeparrot_training - Step 4490: {'lr': 0.0004996508726168854, 'samples': 2299392, 'steps': 4490, 'loss/train': 3.5286483764648438} +03/03/2022 18:41:26 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/03/2022 18:41:30 - INFO - codeparrot_training - Step 4491: {'lr': 0.0004996505922023274, 'samples': 2299904, 'steps': 4491, 'loss/train': 3.4150900840759277} +03/03/2022 18:41:33 - INFO - codeparrot_training - Step 4492: {'lr': 0.0004996503116752807, 'samples': 2300416, 'steps': 4492, 'loss/train': 0.40494608879089355} +03/03/2022 18:41:35 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/03/2022 18:41:38 - INFO - codeparrot_training - Step 4493: {'lr': 0.0004996500310357454, 'samples': 2300928, 'steps': 4493, 'loss/train': 2.3771004676818848} +03/03/2022 18:41:41 - INFO - codeparrot_training - Step 4494: {'lr': 0.0004996497502837217, 'samples': 2301440, 'steps': 4494, 'loss/train': 4.1367387771606445} +03/03/2022 18:41:44 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/03/2022 18:41:47 - INFO - codeparrot_training - Step 4495: {'lr': 0.0004996494694192096, 'samples': 2301952, 'steps': 4495, 'loss/train': 2.4400084018707275} +03/03/2022 18:41:50 - INFO - codeparrot_training - Step 4496: {'lr': 0.0004996491884422092, 'samples': 2302464, 'steps': 4496, 'loss/train': 2.199230909347534} +03/03/2022 18:41:53 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/03/2022 18:41:55 - INFO - codeparrot_training - Step 4497: {'lr': 0.0004996489073527208, 'samples': 2302976, 'steps': 4497, 'loss/train': 3.448881149291992} +03/03/2022 18:41:58 - INFO - codeparrot_training - Step 4498: {'lr': 0.0004996486261507445, 'samples': 2303488, 'steps': 4498, 'loss/train': 2.253598213195801} +03/03/2022 18:42:01 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/03/2022 18:42:04 - INFO - codeparrot_training - Step 4499: {'lr': 0.0004996483448362805, 'samples': 2304000, 'steps': 4499, 'loss/train': 3.679954767227173} +03/03/2022 18:42:07 - INFO - codeparrot_training - Step 4500: {'lr': 0.0004996480634093287, 'samples': 2304512, 'steps': 4500, 'loss/train': 2.651301383972168} +03/03/2022 18:42:10 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/03/2022 18:42:12 - INFO - codeparrot_training - Step 4501: {'lr': 0.0004996477818698893, 'samples': 2305024, 'steps': 4501, 'loss/train': 2.139702320098877} +03/03/2022 18:42:15 - INFO - codeparrot_training - Step 4502: {'lr': 0.0004996475002179625, 'samples': 2305536, 'steps': 4502, 'loss/train': 0.20497223734855652} +03/03/2022 18:42:18 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/03/2022 18:42:20 - INFO - codeparrot_training - Step 4503: {'lr': 0.0004996472184535484, 'samples': 2306048, 'steps': 4503, 'loss/train': 2.7946314811706543} +03/03/2022 18:42:24 - INFO - codeparrot_training - Step 4504: {'lr': 0.0004996469365766471, 'samples': 2306560, 'steps': 4504, 'loss/train': 2.1938178539276123} +03/03/2022 18:42:26 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/03/2022 18:42:29 - INFO - codeparrot_training - Step 4505: {'lr': 0.0004996466545872588, 'samples': 2307072, 'steps': 4505, 'loss/train': 2.826042890548706} +03/03/2022 18:42:32 - INFO - codeparrot_training - Step 4506: {'lr': 0.0004996463724853834, 'samples': 2307584, 'steps': 4506, 'loss/train': 2.7833034992218018} +03/03/2022 18:42:34 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/03/2022 18:42:37 - INFO - codeparrot_training - Step 4507: {'lr': 0.0004996460902710214, 'samples': 2308096, 'steps': 4507, 'loss/train': 2.5797717571258545} +03/03/2022 18:42:40 - INFO - codeparrot_training - Step 4508: {'lr': 0.0004996458079441727, 'samples': 2308608, 'steps': 4508, 'loss/train': 1.3006548881530762} +03/03/2022 18:42:43 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/03/2022 18:42:46 - INFO - codeparrot_training - Step 4509: {'lr': 0.0004996455255048373, 'samples': 2309120, 'steps': 4509, 'loss/train': 2.9387824535369873} +03/03/2022 18:42:49 - INFO - codeparrot_training - Step 4510: {'lr': 0.0004996452429530156, 'samples': 2309632, 'steps': 4510, 'loss/train': 2.245065689086914} +03/03/2022 18:42:52 - INFO - codeparrot_training - Step 4511: {'lr': 0.0004996449602887075, 'samples': 2310144, 'steps': 4511, 'loss/train': 2.9967188835144043} +03/03/2022 18:42:52 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/03/2022 18:42:58 - INFO - codeparrot_training - Step 4512: {'lr': 0.0004996446775119134, 'samples': 2310656, 'steps': 4512, 'loss/train': 2.7684106826782227} +03/03/2022 18:43:01 - INFO - codeparrot_training - Step 4513: {'lr': 0.0004996443946226331, 'samples': 2311168, 'steps': 4513, 'loss/train': 1.9037456512451172} +03/03/2022 18:43:01 - INFO - codeparrot_training - Skipping example with length 160 (seq_length=1024) +03/03/2022 18:43:06 - INFO - codeparrot_training - Step 4514: {'lr': 0.000499644111620867, 'samples': 2311680, 'steps': 4514, 'loss/train': 1.1991430521011353} +03/03/2022 18:43:09 - INFO - codeparrot_training - Skipping example with length 674 (seq_length=1024) +03/03/2022 18:43:11 - INFO - codeparrot_training - Step 4515: {'lr': 0.000499643828506615, 'samples': 2312192, 'steps': 4515, 'loss/train': 2.886251449584961} +03/03/2022 18:43:14 - INFO - codeparrot_training - Step 4516: {'lr': 0.0004996435452798775, 'samples': 2312704, 'steps': 4516, 'loss/train': 2.7607433795928955} +03/03/2022 18:43:17 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/03/2022 18:43:20 - INFO - codeparrot_training - Step 4517: {'lr': 0.0004996432619406543, 'samples': 2313216, 'steps': 4517, 'loss/train': 2.562838554382324} +03/03/2022 18:43:23 - INFO - codeparrot_training - Step 4518: {'lr': 0.0004996429784889458, 'samples': 2313728, 'steps': 4518, 'loss/train': 2.6622464656829834} +03/03/2022 18:43:26 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/03/2022 18:43:28 - INFO - codeparrot_training - Step 4519: {'lr': 0.000499642694924752, 'samples': 2314240, 'steps': 4519, 'loss/train': 1.2066340446472168} +03/03/2022 18:43:31 - INFO - codeparrot_training - Step 4520: {'lr': 0.000499642411248073, 'samples': 2314752, 'steps': 4520, 'loss/train': 1.6766794919967651} +03/03/2022 18:43:34 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/03/2022 18:43:36 - INFO - codeparrot_training - Step 4521: {'lr': 0.0004996421274589091, 'samples': 2315264, 'steps': 4521, 'loss/train': 3.1136581897735596} +03/03/2022 18:43:40 - INFO - codeparrot_training - Step 4522: {'lr': 0.0004996418435572603, 'samples': 2315776, 'steps': 4522, 'loss/train': 1.0299172401428223} +03/03/2022 18:43:42 - INFO - codeparrot_training - Skipping example with length 982 (seq_length=1024) +03/03/2022 18:43:45 - INFO - codeparrot_training - Step 4523: {'lr': 0.0004996415595431267, 'samples': 2316288, 'steps': 4523, 'loss/train': 3.028358221054077} +03/03/2022 18:43:48 - INFO - codeparrot_training - Step 4524: {'lr': 0.0004996412754165084, 'samples': 2316800, 'steps': 4524, 'loss/train': 2.931763172149658} +03/03/2022 18:43:51 - INFO - codeparrot_training - Step 4525: {'lr': 0.0004996409911774056, 'samples': 2317312, 'steps': 4525, 'loss/train': 2.7905900478363037} +03/03/2022 18:43:51 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/03/2022 18:43:57 - INFO - codeparrot_training - Step 4526: {'lr': 0.0004996407068258186, 'samples': 2317824, 'steps': 4526, 'loss/train': 2.5149190425872803} +03/03/2022 18:43:59 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/03/2022 18:44:02 - INFO - codeparrot_training - Step 4527: {'lr': 0.0004996404223617471, 'samples': 2318336, 'steps': 4527, 'loss/train': 1.9550740718841553} +03/03/2022 18:44:05 - INFO - codeparrot_training - Step 4528: {'lr': 0.0004996401377851917, 'samples': 2318848, 'steps': 4528, 'loss/train': 2.70468807220459} +03/03/2022 18:44:07 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/03/2022 18:44:10 - INFO - codeparrot_training - Step 4529: {'lr': 0.0004996398530961522, 'samples': 2319360, 'steps': 4529, 'loss/train': 2.895120143890381} +03/03/2022 18:44:13 - INFO - codeparrot_training - Step 4530: {'lr': 0.0004996395682946288, 'samples': 2319872, 'steps': 4530, 'loss/train': 1.2683780193328857} +03/03/2022 18:44:16 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/03/2022 18:44:18 - INFO - codeparrot_training - Step 4531: {'lr': 0.0004996392833806217, 'samples': 2320384, 'steps': 4531, 'loss/train': 2.7207252979278564} +03/03/2022 18:44:22 - INFO - codeparrot_training - Step 4532: {'lr': 0.000499638998354131, 'samples': 2320896, 'steps': 4532, 'loss/train': 2.7755935192108154} +03/03/2022 18:44:24 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/03/2022 18:44:27 - INFO - codeparrot_training - Step 4533: {'lr': 0.0004996387132151567, 'samples': 2321408, 'steps': 4533, 'loss/train': 3.1805593967437744} +03/03/2022 18:44:30 - INFO - codeparrot_training - Step 4534: {'lr': 0.0004996384279636993, 'samples': 2321920, 'steps': 4534, 'loss/train': 1.7802833318710327} +03/03/2022 18:44:33 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/03/2022 18:44:35 - INFO - codeparrot_training - Step 4535: {'lr': 0.0004996381425997584, 'samples': 2322432, 'steps': 4535, 'loss/train': 2.9403076171875} +03/03/2022 18:44:39 - INFO - codeparrot_training - Step 4536: {'lr': 0.0004996378571233347, 'samples': 2322944, 'steps': 4536, 'loss/train': 1.889412522315979} +03/03/2022 18:44:41 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/03/2022 18:44:44 - INFO - codeparrot_training - Step 4537: {'lr': 0.0004996375715344278, 'samples': 2323456, 'steps': 4537, 'loss/train': 2.134777069091797} +03/03/2022 18:44:47 - INFO - codeparrot_training - Step 4538: {'lr': 0.0004996372858330382, 'samples': 2323968, 'steps': 4538, 'loss/train': 3.698913812637329} +03/03/2022 18:44:49 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 18:44:52 - INFO - codeparrot_training - Step 4539: {'lr': 0.0004996370000191657, 'samples': 2324480, 'steps': 4539, 'loss/train': 2.852271556854248} +03/03/2022 18:44:56 - INFO - codeparrot_training - Step 4540: {'lr': 0.0004996367140928107, 'samples': 2324992, 'steps': 4540, 'loss/train': 2.746784210205078} +03/03/2022 18:44:58 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/03/2022 18:45:01 - INFO - codeparrot_training - Step 4541: {'lr': 0.0004996364280539734, 'samples': 2325504, 'steps': 4541, 'loss/train': 1.9466984272003174} +03/03/2022 18:45:04 - INFO - codeparrot_training - Step 4542: {'lr': 0.0004996361419026537, 'samples': 2326016, 'steps': 4542, 'loss/train': 2.6892871856689453} +03/03/2022 18:45:07 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/03/2022 18:45:09 - INFO - codeparrot_training - Step 4543: {'lr': 0.0004996358556388518, 'samples': 2326528, 'steps': 4543, 'loss/train': 1.1311993598937988} +03/03/2022 18:45:12 - INFO - codeparrot_training - Step 4544: {'lr': 0.0004996355692625678, 'samples': 2327040, 'steps': 4544, 'loss/train': 2.6714491844177246} +03/03/2022 18:45:15 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/03/2022 18:45:18 - INFO - codeparrot_training - Step 4545: {'lr': 0.0004996352827738018, 'samples': 2327552, 'steps': 4545, 'loss/train': 1.9413790702819824} +03/03/2022 18:45:21 - INFO - codeparrot_training - Step 4546: {'lr': 0.0004996349961725542, 'samples': 2328064, 'steps': 4546, 'loss/train': 2.4526164531707764} +03/03/2022 18:45:24 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/03/2022 18:45:26 - INFO - codeparrot_training - Step 4547: {'lr': 0.0004996347094588247, 'samples': 2328576, 'steps': 4547, 'loss/train': 2.6600139141082764} +03/03/2022 18:45:29 - INFO - codeparrot_training - Step 4548: {'lr': 0.0004996344226326137, 'samples': 2329088, 'steps': 4548, 'loss/train': 1.528720498085022} +03/03/2022 18:45:32 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/03/2022 18:45:35 - INFO - codeparrot_training - Step 4549: {'lr': 0.0004996341356939214, 'samples': 2329600, 'steps': 4549, 'loss/train': 2.263441562652588} +03/03/2022 18:45:38 - INFO - codeparrot_training - Step 4550: {'lr': 0.0004996338486427477, 'samples': 2330112, 'steps': 4550, 'loss/train': 2.2804174423217773} +03/03/2022 18:45:40 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/03/2022 18:45:43 - INFO - codeparrot_training - Step 4551: {'lr': 0.0004996335614790929, 'samples': 2330624, 'steps': 4551, 'loss/train': 2.821113109588623} +03/03/2022 18:45:46 - INFO - codeparrot_training - Step 4552: {'lr': 0.0004996332742029571, 'samples': 2331136, 'steps': 4552, 'loss/train': 2.5891807079315186} +03/03/2022 18:45:48 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/03/2022 18:45:51 - INFO - codeparrot_training - Step 4553: {'lr': 0.0004996329868143404, 'samples': 2331648, 'steps': 4553, 'loss/train': 2.240842342376709} +03/03/2022 18:45:54 - INFO - codeparrot_training - Step 4554: {'lr': 0.0004996326993132428, 'samples': 2332160, 'steps': 4554, 'loss/train': 3.165534019470215} +03/03/2022 18:45:56 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/03/2022 18:46:00 - INFO - codeparrot_training - Step 4555: {'lr': 0.0004996324116996647, 'samples': 2332672, 'steps': 4555, 'loss/train': 2.1791093349456787} +03/03/2022 18:46:03 - INFO - codeparrot_training - Step 4556: {'lr': 0.0004996321239736059, 'samples': 2333184, 'steps': 4556, 'loss/train': 1.8402019739151} +03/03/2022 18:46:05 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 18:46:08 - INFO - codeparrot_training - Step 4557: {'lr': 0.000499631836135067, 'samples': 2333696, 'steps': 4557, 'loss/train': 2.265331745147705} +03/03/2022 18:46:12 - INFO - codeparrot_training - Step 4558: {'lr': 0.0004996315481840476, 'samples': 2334208, 'steps': 4558, 'loss/train': 2.242522716522217} +03/03/2022 18:46:13 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/03/2022 18:46:17 - INFO - codeparrot_training - Step 4559: {'lr': 0.0004996312601205482, 'samples': 2334720, 'steps': 4559, 'loss/train': 3.1619279384613037} +03/03/2022 18:46:20 - INFO - codeparrot_training - Step 4560: {'lr': 0.0004996309719445687, 'samples': 2335232, 'steps': 4560, 'loss/train': 1.8038603067398071} +03/03/2022 18:46:22 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/03/2022 18:46:25 - INFO - codeparrot_training - Step 4561: {'lr': 0.0004996306836561094, 'samples': 2335744, 'steps': 4561, 'loss/train': 2.522491216659546} +03/03/2022 18:46:28 - INFO - codeparrot_training - Step 4562: {'lr': 0.0004996303952551704, 'samples': 2336256, 'steps': 4562, 'loss/train': 1.6704250574111938} +03/03/2022 18:46:30 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/03/2022 18:46:33 - INFO - codeparrot_training - Step 4563: {'lr': 0.0004996301067417517, 'samples': 2336768, 'steps': 4563, 'loss/train': 1.854622721672058} +03/03/2022 18:46:37 - INFO - codeparrot_training - Step 4564: {'lr': 0.0004996298181158536, 'samples': 2337280, 'steps': 4564, 'loss/train': 2.4415104389190674} +03/03/2022 18:46:38 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/03/2022 18:46:42 - INFO - codeparrot_training - Step 4565: {'lr': 0.0004996295293774762, 'samples': 2337792, 'steps': 4565, 'loss/train': 1.9747360944747925} +03/03/2022 18:46:45 - INFO - codeparrot_training - Step 4566: {'lr': 0.0004996292405266195, 'samples': 2338304, 'steps': 4566, 'loss/train': 2.942199468612671} +03/03/2022 18:46:46 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/03/2022 18:46:50 - INFO - codeparrot_training - Step 4567: {'lr': 0.0004996289515632838, 'samples': 2338816, 'steps': 4567, 'loss/train': 2.2934954166412354} +03/03/2022 18:46:54 - INFO - codeparrot_training - Step 4568: {'lr': 0.0004996286624874691, 'samples': 2339328, 'steps': 4568, 'loss/train': 3.1125307083129883} +03/03/2022 18:46:55 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/03/2022 18:46:59 - INFO - codeparrot_training - Step 4569: {'lr': 0.0004996283732991755, 'samples': 2339840, 'steps': 4569, 'loss/train': 2.3876988887786865} +03/03/2022 18:47:02 - INFO - codeparrot_training - Step 4570: {'lr': 0.0004996280839984033, 'samples': 2340352, 'steps': 4570, 'loss/train': 2.622971773147583} +03/03/2022 18:47:03 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/03/2022 18:47:07 - INFO - codeparrot_training - Step 4571: {'lr': 0.0004996277945851525, 'samples': 2340864, 'steps': 4571, 'loss/train': 2.5220556259155273} +03/03/2022 18:47:10 - INFO - codeparrot_training - Step 4572: {'lr': 0.0004996275050594233, 'samples': 2341376, 'steps': 4572, 'loss/train': 1.807889461517334} +03/03/2022 18:47:11 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/03/2022 18:47:16 - INFO - codeparrot_training - Step 4573: {'lr': 0.0004996272154212158, 'samples': 2341888, 'steps': 4573, 'loss/train': 2.5448601245880127} +03/03/2022 18:47:19 - INFO - codeparrot_training - Step 4574: {'lr': 0.0004996269256705301, 'samples': 2342400, 'steps': 4574, 'loss/train': 4.194021701812744} +03/03/2022 18:47:22 - INFO - codeparrot_training - Step 4575: {'lr': 0.0004996266358073664, 'samples': 2342912, 'steps': 4575, 'loss/train': 2.7334179878234863} +03/03/2022 18:47:24 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/03/2022 18:47:28 - INFO - codeparrot_training - Step 4576: {'lr': 0.0004996263458317248, 'samples': 2343424, 'steps': 4576, 'loss/train': 3.9706997871398926} +03/03/2022 18:47:31 - INFO - codeparrot_training - Step 4577: {'lr': 0.0004996260557436053, 'samples': 2343936, 'steps': 4577, 'loss/train': 2.762892246246338} +03/03/2022 18:47:33 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/03/2022 18:47:36 - INFO - codeparrot_training - Step 4578: {'lr': 0.0004996257655430083, 'samples': 2344448, 'steps': 4578, 'loss/train': 2.164797306060791} +03/03/2022 18:47:39 - INFO - codeparrot_training - Step 4579: {'lr': 0.0004996254752299337, 'samples': 2344960, 'steps': 4579, 'loss/train': 2.627582311630249} +03/03/2022 18:47:41 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/03/2022 18:47:44 - INFO - codeparrot_training - Step 4580: {'lr': 0.0004996251848043817, 'samples': 2345472, 'steps': 4580, 'loss/train': 1.6247608661651611} +03/03/2022 18:47:48 - INFO - codeparrot_training - Step 4581: {'lr': 0.0004996248942663525, 'samples': 2345984, 'steps': 4581, 'loss/train': 2.698594808578491} +03/03/2022 18:47:49 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/03/2022 18:47:53 - INFO - codeparrot_training - Step 4582: {'lr': 0.000499624603615846, 'samples': 2346496, 'steps': 4582, 'loss/train': 2.708056688308716} +03/03/2022 18:47:56 - INFO - codeparrot_training - Step 4583: {'lr': 0.0004996243128528628, 'samples': 2347008, 'steps': 4583, 'loss/train': 1.2114841938018799} +03/03/2022 18:47:58 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/03/2022 18:48:01 - INFO - codeparrot_training - Step 4584: {'lr': 0.0004996240219774025, 'samples': 2347520, 'steps': 4584, 'loss/train': 3.1986730098724365} +03/03/2022 18:48:04 - INFO - codeparrot_training - Step 4585: {'lr': 0.0004996237309894656, 'samples': 2348032, 'steps': 4585, 'loss/train': 3.2915306091308594} +03/03/2022 18:48:06 - INFO - codeparrot_training - Skipping example with length 450 (seq_length=1024) +03/03/2022 18:48:10 - INFO - codeparrot_training - Step 4586: {'lr': 0.0004996234398890521, 'samples': 2348544, 'steps': 4586, 'loss/train': 3.125734567642212} +03/03/2022 18:48:13 - INFO - codeparrot_training - Step 4587: {'lr': 0.000499623148676162, 'samples': 2349056, 'steps': 4587, 'loss/train': 1.5774887800216675} +03/03/2022 18:48:14 - INFO - codeparrot_training - Skipping example with length 639 (seq_length=1024) +03/03/2022 18:48:18 - INFO - codeparrot_training - Step 4588: {'lr': 0.0004996228573507957, 'samples': 2349568, 'steps': 4588, 'loss/train': 2.4835472106933594} +03/03/2022 18:48:21 - INFO - codeparrot_training - Step 4589: {'lr': 0.0004996225659129531, 'samples': 2350080, 'steps': 4589, 'loss/train': 1.9856977462768555} +03/03/2022 18:48:23 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/03/2022 18:48:27 - INFO - codeparrot_training - Step 4590: {'lr': 0.0004996222743626345, 'samples': 2350592, 'steps': 4590, 'loss/train': 2.5355513095855713} +03/03/2022 18:48:30 - INFO - codeparrot_training - Step 4591: {'lr': 0.0004996219826998399, 'samples': 2351104, 'steps': 4591, 'loss/train': 2.6369247436523438} +03/03/2022 18:48:32 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/03/2022 18:48:35 - INFO - codeparrot_training - Step 4592: {'lr': 0.0004996216909245695, 'samples': 2351616, 'steps': 4592, 'loss/train': 2.4580135345458984} +03/03/2022 18:48:38 - INFO - codeparrot_training - Step 4593: {'lr': 0.0004996213990368234, 'samples': 2352128, 'steps': 4593, 'loss/train': 1.5065810680389404} +03/03/2022 18:48:40 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/03/2022 18:48:43 - INFO - codeparrot_training - Step 4594: {'lr': 0.0004996211070366018, 'samples': 2352640, 'steps': 4594, 'loss/train': 3.6997439861297607} +03/03/2022 18:48:47 - INFO - codeparrot_training - Step 4595: {'lr': 0.0004996208149239047, 'samples': 2353152, 'steps': 4595, 'loss/train': 2.375065326690674} +03/03/2022 18:48:48 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/03/2022 18:48:52 - INFO - codeparrot_training - Step 4596: {'lr': 0.0004996205226987324, 'samples': 2353664, 'steps': 4596, 'loss/train': 2.5042741298675537} +03/03/2022 18:48:55 - INFO - codeparrot_training - Step 4597: {'lr': 0.0004996202303610849, 'samples': 2354176, 'steps': 4597, 'loss/train': 2.3357157707214355} +03/03/2022 18:48:57 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/03/2022 18:49:00 - INFO - codeparrot_training - Step 4598: {'lr': 0.0004996199379109624, 'samples': 2354688, 'steps': 4598, 'loss/train': 2.4726715087890625} +03/03/2022 18:49:04 - INFO - codeparrot_training - Step 4599: {'lr': 0.000499619645348365, 'samples': 2355200, 'steps': 4599, 'loss/train': 1.3293640613555908} +03/03/2022 18:49:06 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/03/2022 18:49:09 - INFO - codeparrot_training - Step 4600: {'lr': 0.0004996193526732929, 'samples': 2355712, 'steps': 4600, 'loss/train': 2.5870630741119385} +03/03/2022 18:49:12 - INFO - codeparrot_training - Step 4601: {'lr': 0.0004996190598857461, 'samples': 2356224, 'steps': 4601, 'loss/train': 2.7861487865448} +03/03/2022 18:49:15 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/03/2022 18:49:17 - INFO - codeparrot_training - Step 4602: {'lr': 0.0004996187669857247, 'samples': 2356736, 'steps': 4602, 'loss/train': 2.3623220920562744} +03/03/2022 18:49:21 - INFO - codeparrot_training - Step 4603: {'lr': 0.0004996184739732291, 'samples': 2357248, 'steps': 4603, 'loss/train': 3.0617871284484863} +03/03/2022 18:49:23 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/03/2022 18:49:26 - INFO - codeparrot_training - Step 4604: {'lr': 0.0004996181808482592, 'samples': 2357760, 'steps': 4604, 'loss/train': 2.1376233100891113} +03/03/2022 18:49:29 - INFO - codeparrot_training - Step 4605: {'lr': 0.0004996178876108152, 'samples': 2358272, 'steps': 4605, 'loss/train': 2.2154245376586914} +03/03/2022 18:49:32 - INFO - codeparrot_training - Step 4606: {'lr': 0.0004996175942608973, 'samples': 2358784, 'steps': 4606, 'loss/train': 3.100008010864258} +03/03/2022 18:49:32 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/03/2022 18:49:37 - INFO - codeparrot_training - Step 4607: {'lr': 0.0004996173007985055, 'samples': 2359296, 'steps': 4607, 'loss/train': 2.7818808555603027} +03/03/2022 18:49:41 - INFO - codeparrot_training - Step 4608: {'lr': 0.00049961700722364, 'samples': 2359808, 'steps': 4608, 'loss/train': 2.177035093307495} +03/03/2022 18:49:41 - INFO - codeparrot_training - Skipping example with length 433 (seq_length=1024) +03/03/2022 18:49:46 - INFO - codeparrot_training - Step 4609: {'lr': 0.0004996167135363009, 'samples': 2360320, 'steps': 4609, 'loss/train': 0.4171116352081299} +03/03/2022 18:49:49 - INFO - codeparrot_training - Step 4610: {'lr': 0.0004996164197364884, 'samples': 2360832, 'steps': 4610, 'loss/train': 2.6499948501586914} +03/03/2022 18:49:49 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/03/2022 18:49:54 - INFO - codeparrot_training - Step 4611: {'lr': 0.0004996161258242025, 'samples': 2361344, 'steps': 4611, 'loss/train': 2.4323244094848633} +03/03/2022 18:49:57 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/03/2022 18:50:00 - INFO - codeparrot_training - Step 4612: {'lr': 0.0004996158317994436, 'samples': 2361856, 'steps': 4612, 'loss/train': 2.278235673904419} +03/03/2022 18:50:03 - INFO - codeparrot_training - Step 4613: {'lr': 0.0004996155376622115, 'samples': 2362368, 'steps': 4613, 'loss/train': 2.5719165802001953} +03/03/2022 18:50:05 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/03/2022 18:50:08 - INFO - codeparrot_training - Step 4614: {'lr': 0.0004996152434125066, 'samples': 2362880, 'steps': 4614, 'loss/train': 2.966566324234009} +03/03/2022 18:50:11 - INFO - codeparrot_training - Step 4615: {'lr': 0.0004996149490503289, 'samples': 2363392, 'steps': 4615, 'loss/train': 2.5710880756378174} +03/03/2022 18:50:14 - INFO - codeparrot_training - Skipping example with length 972 (seq_length=1024) +03/03/2022 18:50:17 - INFO - codeparrot_training - Step 4616: {'lr': 0.0004996146545756786, 'samples': 2363904, 'steps': 4616, 'loss/train': 2.8298099040985107} +03/03/2022 18:50:20 - INFO - codeparrot_training - Step 4617: {'lr': 0.0004996143599885557, 'samples': 2364416, 'steps': 4617, 'loss/train': 2.8046791553497314} +03/03/2022 18:50:22 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/03/2022 18:50:25 - INFO - codeparrot_training - Step 4618: {'lr': 0.0004996140652889603, 'samples': 2364928, 'steps': 4618, 'loss/train': 2.1006786823272705} +03/03/2022 18:50:28 - INFO - codeparrot_training - Step 4619: {'lr': 0.0004996137704768929, 'samples': 2365440, 'steps': 4619, 'loss/train': 2.9494175910949707} +03/03/2022 18:50:31 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/03/2022 18:50:33 - INFO - codeparrot_training - Step 4620: {'lr': 0.0004996134755523532, 'samples': 2365952, 'steps': 4620, 'loss/train': 1.224858045578003} +03/03/2022 18:50:37 - INFO - codeparrot_training - Step 4621: {'lr': 0.0004996131805153417, 'samples': 2366464, 'steps': 4621, 'loss/train': 2.76505184173584} +03/03/2022 18:50:39 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/03/2022 18:50:42 - INFO - codeparrot_training - Step 4622: {'lr': 0.0004996128853658583, 'samples': 2366976, 'steps': 4622, 'loss/train': 3.0009052753448486} +03/03/2022 18:50:45 - INFO - codeparrot_training - Step 4623: {'lr': 0.0004996125901039031, 'samples': 2367488, 'steps': 4623, 'loss/train': 2.5275073051452637} +03/03/2022 18:50:47 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/03/2022 18:50:51 - INFO - codeparrot_training - Step 4624: {'lr': 0.0004996122947294764, 'samples': 2368000, 'steps': 4624, 'loss/train': 2.499239921569824} +03/03/2022 18:50:54 - INFO - codeparrot_training - Step 4625: {'lr': 0.0004996119992425782, 'samples': 2368512, 'steps': 4625, 'loss/train': 5.90408182144165} +03/03/2022 18:50:56 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/03/2022 18:50:59 - INFO - codeparrot_training - Step 4626: {'lr': 0.0004996117036432087, 'samples': 2369024, 'steps': 4626, 'loss/train': 2.400402307510376} +03/03/2022 18:51:02 - INFO - codeparrot_training - Step 4627: {'lr': 0.000499611407931368, 'samples': 2369536, 'steps': 4627, 'loss/train': 2.334341526031494} +03/03/2022 18:51:04 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/03/2022 18:51:07 - INFO - codeparrot_training - Step 4628: {'lr': 0.0004996111121070562, 'samples': 2370048, 'steps': 4628, 'loss/train': 2.2049238681793213} +03/03/2022 18:51:10 - INFO - codeparrot_training - Step 4629: {'lr': 0.0004996108161702736, 'samples': 2370560, 'steps': 4629, 'loss/train': 2.459738254547119} +03/03/2022 18:51:12 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 18:51:16 - INFO - codeparrot_training - Step 4630: {'lr': 0.0004996105201210202, 'samples': 2371072, 'steps': 4630, 'loss/train': 2.579355478286743} +03/03/2022 18:51:19 - INFO - codeparrot_training - Step 4631: {'lr': 0.0004996102239592961, 'samples': 2371584, 'steps': 4631, 'loss/train': 3.162139892578125} +03/03/2022 18:51:21 - INFO - codeparrot_training - Skipping example with length 680 (seq_length=1024) +03/03/2022 18:51:24 - INFO - codeparrot_training - Step 4632: {'lr': 0.0004996099276851015, 'samples': 2372096, 'steps': 4632, 'loss/train': 1.9149963855743408} +03/03/2022 18:51:28 - INFO - codeparrot_training - Step 4633: {'lr': 0.0004996096312984365, 'samples': 2372608, 'steps': 4633, 'loss/train': 1.9362866878509521} +03/03/2022 18:51:33 - INFO - codeparrot_training - Step 4634: {'lr': 0.0004996093347993013, 'samples': 2373120, 'steps': 4634, 'loss/train': 2.3835883140563965} +03/03/2022 18:51:36 - INFO - codeparrot_training - Step 4635: {'lr': 0.000499609038187696, 'samples': 2373632, 'steps': 4635, 'loss/train': 1.4204899072647095} +03/03/2022 18:51:38 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/03/2022 18:51:42 - INFO - codeparrot_training - Step 4636: {'lr': 0.0004996087414636207, 'samples': 2374144, 'steps': 4636, 'loss/train': 2.190681219100952} +03/03/2022 18:51:45 - INFO - codeparrot_training - Step 4637: {'lr': 0.0004996084446270755, 'samples': 2374656, 'steps': 4637, 'loss/train': 2.3916056156158447} +03/03/2022 18:51:47 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/03/2022 18:51:50 - INFO - codeparrot_training - Step 4638: {'lr': 0.0004996081476780607, 'samples': 2375168, 'steps': 4638, 'loss/train': 1.886127233505249} +03/03/2022 18:51:53 - INFO - codeparrot_training - Step 4639: {'lr': 0.0004996078506165762, 'samples': 2375680, 'steps': 4639, 'loss/train': 2.6743874549865723} +03/03/2022 18:51:55 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/03/2022 18:51:58 - INFO - codeparrot_training - Step 4640: {'lr': 0.0004996075534426222, 'samples': 2376192, 'steps': 4640, 'loss/train': 2.8635878562927246} +03/03/2022 18:52:02 - INFO - codeparrot_training - Step 4641: {'lr': 0.000499607256156199, 'samples': 2376704, 'steps': 4641, 'loss/train': 2.204378128051758} +03/03/2022 18:52:03 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/03/2022 18:52:07 - INFO - codeparrot_training - Step 4642: {'lr': 0.0004996069587573067, 'samples': 2377216, 'steps': 4642, 'loss/train': 2.8208365440368652} +03/03/2022 18:52:10 - INFO - codeparrot_training - Step 4643: {'lr': 0.0004996066612459452, 'samples': 2377728, 'steps': 4643, 'loss/train': 2.909043550491333} +03/03/2022 18:52:11 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/03/2022 18:52:15 - INFO - codeparrot_training - Step 4644: {'lr': 0.0004996063636221148, 'samples': 2378240, 'steps': 4644, 'loss/train': 3.126006603240967} +03/03/2022 18:52:18 - INFO - codeparrot_training - Step 4645: {'lr': 0.0004996060658858158, 'samples': 2378752, 'steps': 4645, 'loss/train': 2.2166192531585693} +03/03/2022 18:52:20 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/03/2022 18:52:24 - INFO - codeparrot_training - Step 4646: {'lr': 0.000499605768037048, 'samples': 2379264, 'steps': 4646, 'loss/train': 1.477970004081726} +03/03/2022 18:52:27 - INFO - codeparrot_training - Step 4647: {'lr': 0.0004996054700758117, 'samples': 2379776, 'steps': 4647, 'loss/train': 2.9668641090393066} +03/03/2022 18:52:28 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/03/2022 18:52:32 - INFO - codeparrot_training - Step 4648: {'lr': 0.0004996051720021071, 'samples': 2380288, 'steps': 4648, 'loss/train': 2.741767406463623} +03/03/2022 18:52:35 - INFO - codeparrot_training - Step 4649: {'lr': 0.0004996048738159342, 'samples': 2380800, 'steps': 4649, 'loss/train': 2.473820924758911} +03/03/2022 18:52:36 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/03/2022 18:52:41 - INFO - codeparrot_training - Step 4650: {'lr': 0.0004996045755172932, 'samples': 2381312, 'steps': 4650, 'loss/train': 3.226134777069092} +03/03/2022 18:52:44 - INFO - codeparrot_training - Step 4651: {'lr': 0.0004996042771061843, 'samples': 2381824, 'steps': 4651, 'loss/train': 2.2292158603668213} +03/03/2022 18:52:45 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/03/2022 18:52:49 - INFO - codeparrot_training - Step 4652: {'lr': 0.0004996039785826075, 'samples': 2382336, 'steps': 4652, 'loss/train': 2.717466354370117} +03/03/2022 18:52:53 - INFO - codeparrot_training - Step 4653: {'lr': 0.000499603679946563, 'samples': 2382848, 'steps': 4653, 'loss/train': 1.880379557609558} +03/03/2022 18:52:55 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/03/2022 18:52:58 - INFO - codeparrot_training - Step 4654: {'lr': 0.0004996033811980509, 'samples': 2383360, 'steps': 4654, 'loss/train': 2.5817008018493652} +03/03/2022 18:53:01 - INFO - codeparrot_training - Step 4655: {'lr': 0.0004996030823370715, 'samples': 2383872, 'steps': 4655, 'loss/train': 2.0912535190582275} +03/03/2022 18:53:03 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/03/2022 18:53:06 - INFO - codeparrot_training - Step 4656: {'lr': 0.0004996027833636247, 'samples': 2384384, 'steps': 4656, 'loss/train': 2.473559856414795} +03/03/2022 18:53:09 - INFO - codeparrot_training - Step 4657: {'lr': 0.0004996024842777106, 'samples': 2384896, 'steps': 4657, 'loss/train': 3.0933618545532227} +03/03/2022 18:53:12 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/03/2022 18:53:15 - INFO - codeparrot_training - Step 4658: {'lr': 0.0004996021850793297, 'samples': 2385408, 'steps': 4658, 'loss/train': 1.241376280784607} +03/03/2022 18:53:18 - INFO - codeparrot_training - Step 4659: {'lr': 0.0004996018857684818, 'samples': 2385920, 'steps': 4659, 'loss/train': 2.125493049621582} +03/03/2022 18:53:20 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/03/2022 18:53:23 - INFO - codeparrot_training - Step 4660: {'lr': 0.0004996015863451672, 'samples': 2386432, 'steps': 4660, 'loss/train': 2.280150890350342} +03/03/2022 18:53:26 - INFO - codeparrot_training - Step 4661: {'lr': 0.0004996012868093859, 'samples': 2386944, 'steps': 4661, 'loss/train': 2.247298002243042} +03/03/2022 18:53:29 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/03/2022 18:53:32 - INFO - codeparrot_training - Step 4662: {'lr': 0.0004996009871611382, 'samples': 2387456, 'steps': 4662, 'loss/train': 2.3099477291107178} +03/03/2022 18:53:35 - INFO - codeparrot_training - Step 4663: {'lr': 0.0004996006874004241, 'samples': 2387968, 'steps': 4663, 'loss/train': 2.141946792602539} +03/03/2022 18:53:37 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/03/2022 18:53:40 - INFO - codeparrot_training - Step 4664: {'lr': 0.0004996003875272438, 'samples': 2388480, 'steps': 4664, 'loss/train': 2.521242141723633} +03/03/2022 18:53:43 - INFO - codeparrot_training - Step 4665: {'lr': 0.0004996000875415973, 'samples': 2388992, 'steps': 4665, 'loss/train': 2.3308358192443848} +03/03/2022 18:53:45 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/03/2022 18:53:48 - INFO - codeparrot_training - Step 4666: {'lr': 0.000499599787443485, 'samples': 2389504, 'steps': 4666, 'loss/train': 2.503063440322876} +03/03/2022 18:53:52 - INFO - codeparrot_training - Step 4667: {'lr': 0.0004995994872329069, 'samples': 2390016, 'steps': 4667, 'loss/train': 2.1684467792510986} +03/03/2022 18:53:54 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/03/2022 18:53:57 - INFO - codeparrot_training - Step 4668: {'lr': 0.000499599186909863, 'samples': 2390528, 'steps': 4668, 'loss/train': 1.9804948568344116} +03/03/2022 18:54:00 - INFO - codeparrot_training - Step 4669: {'lr': 0.0004995988864743536, 'samples': 2391040, 'steps': 4669, 'loss/train': 2.4766082763671875} +03/03/2022 18:54:02 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/03/2022 18:54:05 - INFO - codeparrot_training - Step 4670: {'lr': 0.0004995985859263789, 'samples': 2391552, 'steps': 4670, 'loss/train': 2.580853223800659} +03/03/2022 18:54:08 - INFO - codeparrot_training - Step 4671: {'lr': 0.0004995982852659388, 'samples': 2392064, 'steps': 4671, 'loss/train': 1.9907152652740479} +03/03/2022 18:54:10 - INFO - codeparrot_training - Skipping example with length 959 (seq_length=1024) +03/03/2022 18:54:14 - INFO - codeparrot_training - Step 4672: {'lr': 0.0004995979844930336, 'samples': 2392576, 'steps': 4672, 'loss/train': 2.404788017272949} +03/03/2022 18:54:17 - INFO - codeparrot_training - Step 4673: {'lr': 0.0004995976836076635, 'samples': 2393088, 'steps': 4673, 'loss/train': 2.285843849182129} +03/03/2022 18:54:19 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/03/2022 18:54:22 - INFO - codeparrot_training - Step 4674: {'lr': 0.0004995973826098283, 'samples': 2393600, 'steps': 4674, 'loss/train': 1.7763363122940063} +03/03/2022 18:54:25 - INFO - codeparrot_training - Step 4675: {'lr': 0.0004995970814995285, 'samples': 2394112, 'steps': 4675, 'loss/train': 2.27932071685791} +03/03/2022 18:54:27 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/03/2022 18:54:31 - INFO - codeparrot_training - Step 4676: {'lr': 0.0004995967802767641, 'samples': 2394624, 'steps': 4676, 'loss/train': 2.321117401123047} +03/03/2022 18:54:34 - INFO - codeparrot_training - Step 4677: {'lr': 0.0004995964789415353, 'samples': 2395136, 'steps': 4677, 'loss/train': 2.4822885990142822} +03/03/2022 18:54:35 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 18:54:39 - INFO - codeparrot_training - Step 4678: {'lr': 0.0004995961774938423, 'samples': 2395648, 'steps': 4678, 'loss/train': 2.2400808334350586} +03/03/2022 18:54:42 - INFO - codeparrot_training - Step 4679: {'lr': 0.0004995958759336849, 'samples': 2396160, 'steps': 4679, 'loss/train': 1.6393972635269165} +03/03/2022 18:54:43 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/03/2022 18:54:47 - INFO - codeparrot_training - Step 4680: {'lr': 0.0004995955742610635, 'samples': 2396672, 'steps': 4680, 'loss/train': 2.05308198928833} +03/03/2022 18:54:50 - INFO - codeparrot_training - Step 4681: {'lr': 0.0004995952724759781, 'samples': 2397184, 'steps': 4681, 'loss/train': 2.454460859298706} +03/03/2022 18:54:52 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/03/2022 18:54:56 - INFO - codeparrot_training - Step 4682: {'lr': 0.0004995949705784291, 'samples': 2397696, 'steps': 4682, 'loss/train': 3.408576250076294} +03/03/2022 18:54:59 - INFO - codeparrot_training - Step 4683: {'lr': 0.0004995946685684164, 'samples': 2398208, 'steps': 4683, 'loss/train': 0.8699386715888977} +03/03/2022 18:55:00 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/03/2022 18:55:04 - INFO - codeparrot_training - Step 4684: {'lr': 0.0004995943664459401, 'samples': 2398720, 'steps': 4684, 'loss/train': 2.600980281829834} +03/03/2022 18:55:07 - INFO - codeparrot_training - Step 4685: {'lr': 0.0004995940642110005, 'samples': 2399232, 'steps': 4685, 'loss/train': 2.7256598472595215} +03/03/2022 18:55:08 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/03/2022 18:55:12 - INFO - codeparrot_training - Step 4686: {'lr': 0.0004995937618635977, 'samples': 2399744, 'steps': 4686, 'loss/train': 2.7518720626831055} +03/03/2022 18:55:16 - INFO - codeparrot_training - Step 4687: {'lr': 0.0004995934594037316, 'samples': 2400256, 'steps': 4687, 'loss/train': 2.600630283355713} +03/03/2022 18:55:16 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/03/2022 18:55:21 - INFO - codeparrot_training - Step 4688: {'lr': 0.0004995931568314028, 'samples': 2400768, 'steps': 4688, 'loss/train': 2.7087504863739014} +03/03/2022 18:55:24 - INFO - codeparrot_training - Step 4689: {'lr': 0.0004995928541466111, 'samples': 2401280, 'steps': 4689, 'loss/train': 1.108820915222168} +03/03/2022 18:55:25 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/03/2022 18:55:29 - INFO - codeparrot_training - Step 4690: {'lr': 0.0004995925513493567, 'samples': 2401792, 'steps': 4690, 'loss/train': 3.3378891944885254} +03/03/2022 18:55:32 - INFO - codeparrot_training - Step 4691: {'lr': 0.0004995922484396397, 'samples': 2402304, 'steps': 4691, 'loss/train': 2.256744861602783} +03/03/2022 18:55:34 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/03/2022 18:55:38 - INFO - codeparrot_training - Step 4692: {'lr': 0.0004995919454174603, 'samples': 2402816, 'steps': 4692, 'loss/train': 2.5554239749908447} +03/03/2022 18:55:41 - INFO - codeparrot_training - Step 4693: {'lr': 0.0004995916422828187, 'samples': 2403328, 'steps': 4693, 'loss/train': 2.00566029548645} +03/03/2022 18:55:42 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/03/2022 18:55:46 - INFO - codeparrot_training - Step 4694: {'lr': 0.0004995913390357148, 'samples': 2403840, 'steps': 4694, 'loss/train': 1.5068752765655518} +03/03/2022 18:55:49 - INFO - codeparrot_training - Step 4695: {'lr': 0.0004995910356761491, 'samples': 2404352, 'steps': 4695, 'loss/train': 2.8116512298583984} +03/03/2022 18:55:50 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/03/2022 18:55:55 - INFO - codeparrot_training - Step 4696: {'lr': 0.0004995907322041214, 'samples': 2404864, 'steps': 4696, 'loss/train': 2.4675779342651367} +03/03/2022 18:55:58 - INFO - codeparrot_training - Step 4697: {'lr': 0.000499590428619632, 'samples': 2405376, 'steps': 4697, 'loss/train': 2.3261730670928955} +03/03/2022 18:55:59 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/03/2022 18:56:03 - INFO - codeparrot_training - Step 4698: {'lr': 0.000499590124922681, 'samples': 2405888, 'steps': 4698, 'loss/train': 1.7899770736694336} +03/03/2022 18:56:06 - INFO - codeparrot_training - Step 4699: {'lr': 0.0004995898211132685, 'samples': 2406400, 'steps': 4699, 'loss/train': 1.6092798709869385} +03/03/2022 18:56:07 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/03/2022 18:56:11 - INFO - codeparrot_training - Step 4700: {'lr': 0.0004995895171913947, 'samples': 2406912, 'steps': 4700, 'loss/train': 2.0648248195648193} +03/03/2022 18:56:15 - INFO - codeparrot_training - Step 4701: {'lr': 0.0004995892131570598, 'samples': 2407424, 'steps': 4701, 'loss/train': 1.7186914682388306} +03/03/2022 18:56:15 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/03/2022 18:56:20 - INFO - codeparrot_training - Step 4702: {'lr': 0.0004995889090102638, 'samples': 2407936, 'steps': 4702, 'loss/train': 0.5497411489486694} +03/03/2022 18:56:23 - INFO - codeparrot_training - Step 4703: {'lr': 0.0004995886047510068, 'samples': 2408448, 'steps': 4703, 'loss/train': 2.1507561206817627} +03/03/2022 18:56:24 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/03/2022 18:56:28 - INFO - codeparrot_training - Step 4704: {'lr': 0.0004995883003792891, 'samples': 2408960, 'steps': 4704, 'loss/train': 1.8343905210494995} +03/03/2022 18:56:32 - INFO - codeparrot_training - Step 4705: {'lr': 0.0004995879958951107, 'samples': 2409472, 'steps': 4705, 'loss/train': 2.727137565612793} +03/03/2022 18:56:32 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/03/2022 18:56:37 - INFO - codeparrot_training - Step 4706: {'lr': 0.0004995876912984719, 'samples': 2409984, 'steps': 4706, 'loss/train': 2.7634663581848145} +03/03/2022 18:56:40 - INFO - codeparrot_training - Step 4707: {'lr': 0.0004995873865893727, 'samples': 2410496, 'steps': 4707, 'loss/train': 1.4441975355148315} +03/03/2022 18:56:43 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/03/2022 18:56:46 - INFO - codeparrot_training - Step 4708: {'lr': 0.0004995870817678133, 'samples': 2411008, 'steps': 4708, 'loss/train': 2.786498546600342} +03/03/2022 18:56:49 - INFO - codeparrot_training - Step 4709: {'lr': 0.0004995867768337938, 'samples': 2411520, 'steps': 4709, 'loss/train': 2.1342484951019287} +03/03/2022 18:56:51 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/03/2022 18:56:54 - INFO - codeparrot_training - Step 4710: {'lr': 0.0004995864717873143, 'samples': 2412032, 'steps': 4710, 'loss/train': 2.513976573944092} +03/03/2022 18:56:57 - INFO - codeparrot_training - Step 4711: {'lr': 0.000499586166628375, 'samples': 2412544, 'steps': 4711, 'loss/train': 1.603917121887207} +03/03/2022 18:57:00 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/03/2022 18:57:03 - INFO - codeparrot_training - Step 4712: {'lr': 0.0004995858613569761, 'samples': 2413056, 'steps': 4712, 'loss/train': 1.8005483150482178} +03/03/2022 18:57:06 - INFO - codeparrot_training - Step 4713: {'lr': 0.0004995855559731176, 'samples': 2413568, 'steps': 4713, 'loss/train': 2.355597734451294} +03/03/2022 18:57:08 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/03/2022 18:57:11 - INFO - codeparrot_training - Step 4714: {'lr': 0.0004995852504767997, 'samples': 2414080, 'steps': 4714, 'loss/train': 2.3837673664093018} +03/03/2022 18:57:14 - INFO - codeparrot_training - Step 4715: {'lr': 0.0004995849448680225, 'samples': 2414592, 'steps': 4715, 'loss/train': 3.0911576747894287} +03/03/2022 18:57:16 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/03/2022 18:57:19 - INFO - codeparrot_training - Step 4716: {'lr': 0.0004995846391467862, 'samples': 2415104, 'steps': 4716, 'loss/train': 1.7520031929016113} +03/03/2022 18:57:23 - INFO - codeparrot_training - Step 4717: {'lr': 0.000499584333313091, 'samples': 2415616, 'steps': 4717, 'loss/train': 2.256087303161621} +03/03/2022 18:57:24 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/03/2022 18:57:28 - INFO - codeparrot_training - Step 4718: {'lr': 0.0004995840273669369, 'samples': 2416128, 'steps': 4718, 'loss/train': 2.5394482612609863} +03/03/2022 18:57:31 - INFO - codeparrot_training - Step 4719: {'lr': 0.0004995837213083241, 'samples': 2416640, 'steps': 4719, 'loss/train': 2.3791534900665283} +03/03/2022 18:57:33 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/03/2022 18:57:36 - INFO - codeparrot_training - Step 4720: {'lr': 0.0004995834151372526, 'samples': 2417152, 'steps': 4720, 'loss/train': 2.025820255279541} +03/03/2022 18:57:39 - INFO - codeparrot_training - Step 4721: {'lr': 0.0004995831088537229, 'samples': 2417664, 'steps': 4721, 'loss/train': 0.9099211096763611} +03/03/2022 18:57:41 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/03/2022 18:57:45 - INFO - codeparrot_training - Step 4722: {'lr': 0.0004995828024577346, 'samples': 2418176, 'steps': 4722, 'loss/train': 2.8879313468933105} +03/03/2022 18:57:48 - INFO - codeparrot_training - Step 4723: {'lr': 0.0004995824959492884, 'samples': 2418688, 'steps': 4723, 'loss/train': 1.881589651107788} +03/03/2022 18:57:49 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/03/2022 18:57:53 - INFO - codeparrot_training - Step 4724: {'lr': 0.0004995821893283841, 'samples': 2419200, 'steps': 4724, 'loss/train': 1.7407132387161255} +03/03/2022 18:57:56 - INFO - codeparrot_training - Step 4725: {'lr': 0.0004995818825950218, 'samples': 2419712, 'steps': 4725, 'loss/train': 2.2677977085113525} +03/03/2022 18:57:58 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/03/2022 18:58:01 - INFO - codeparrot_training - Step 4726: {'lr': 0.0004995815757492019, 'samples': 2420224, 'steps': 4726, 'loss/train': 0.9499351382255554} +03/03/2022 18:58:05 - INFO - codeparrot_training - Step 4727: {'lr': 0.0004995812687909243, 'samples': 2420736, 'steps': 4727, 'loss/train': 2.478179693222046} +03/03/2022 18:58:06 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/03/2022 18:58:10 - INFO - codeparrot_training - Step 4728: {'lr': 0.0004995809617201894, 'samples': 2421248, 'steps': 4728, 'loss/train': 2.4390244483947754} +03/03/2022 18:58:13 - INFO - codeparrot_training - Step 4729: {'lr': 0.000499580654536997, 'samples': 2421760, 'steps': 4729, 'loss/train': 1.2902790307998657} +03/03/2022 18:58:15 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/03/2022 18:58:18 - INFO - codeparrot_training - Step 4730: {'lr': 0.0004995803472413474, 'samples': 2422272, 'steps': 4730, 'loss/train': 2.216912031173706} +03/03/2022 18:58:22 - INFO - codeparrot_training - Step 4731: {'lr': 0.0004995800398332409, 'samples': 2422784, 'steps': 4731, 'loss/train': 1.9274173974990845} +03/03/2022 18:58:23 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/03/2022 18:58:27 - INFO - codeparrot_training - Step 4732: {'lr': 0.0004995797323126774, 'samples': 2423296, 'steps': 4732, 'loss/train': 2.1699960231781006} +03/03/2022 18:58:30 - INFO - codeparrot_training - Step 4733: {'lr': 0.0004995794246796571, 'samples': 2423808, 'steps': 4733, 'loss/train': 2.2564847469329834} +03/03/2022 18:58:31 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/03/2022 18:58:35 - INFO - codeparrot_training - Step 4734: {'lr': 0.0004995791169341801, 'samples': 2424320, 'steps': 4734, 'loss/train': 2.8803675174713135} +03/03/2022 18:58:39 - INFO - codeparrot_training - Step 4735: {'lr': 0.0004995788090762467, 'samples': 2424832, 'steps': 4735, 'loss/train': 2.483232021331787} +03/03/2022 18:58:40 - INFO - codeparrot_training - Skipping example with length 340 (seq_length=1024) +03/03/2022 18:58:44 - INFO - codeparrot_training - Step 4736: {'lr': 0.000499578501105857, 'samples': 2425344, 'steps': 4736, 'loss/train': 1.0061744451522827} +03/03/2022 18:58:47 - INFO - codeparrot_training - Step 4737: {'lr': 0.000499578193023011, 'samples': 2425856, 'steps': 4737, 'loss/train': 1.9147300720214844} +03/03/2022 18:58:48 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/03/2022 18:58:52 - INFO - codeparrot_training - Step 4738: {'lr': 0.0004995778848277088, 'samples': 2426368, 'steps': 4738, 'loss/train': 1.797553539276123} +03/03/2022 18:58:55 - INFO - codeparrot_training - Step 4739: {'lr': 0.0004995775765199509, 'samples': 2426880, 'steps': 4739, 'loss/train': 2.743924856185913} +03/03/2022 18:58:57 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/03/2022 18:59:01 - INFO - codeparrot_training - Step 4740: {'lr': 0.000499577268099737, 'samples': 2427392, 'steps': 4740, 'loss/train': 1.9076218605041504} +03/03/2022 18:59:04 - INFO - codeparrot_training - Step 4741: {'lr': 0.0004995769595670675, 'samples': 2427904, 'steps': 4741, 'loss/train': 3.0042145252227783} +03/03/2022 18:59:05 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/03/2022 18:59:09 - INFO - codeparrot_training - Step 4742: {'lr': 0.0004995766509219425, 'samples': 2428416, 'steps': 4742, 'loss/train': 2.3705785274505615} +03/03/2022 18:59:12 - INFO - codeparrot_training - Step 4743: {'lr': 0.0004995763421643621, 'samples': 2428928, 'steps': 4743, 'loss/train': 2.731111526489258} +03/03/2022 18:59:13 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/03/2022 18:59:17 - INFO - codeparrot_training - Step 4744: {'lr': 0.0004995760332943264, 'samples': 2429440, 'steps': 4744, 'loss/train': 2.951591730117798} +03/03/2022 18:59:21 - INFO - codeparrot_training - Step 4745: {'lr': 0.0004995757243118356, 'samples': 2429952, 'steps': 4745, 'loss/train': 1.689826250076294} +03/03/2022 18:59:22 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/03/2022 18:59:26 - INFO - codeparrot_training - Step 4746: {'lr': 0.0004995754152168899, 'samples': 2430464, 'steps': 4746, 'loss/train': 5.022709846496582} +03/03/2022 18:59:29 - INFO - codeparrot_training - Step 4747: {'lr': 0.0004995751060094893, 'samples': 2430976, 'steps': 4747, 'loss/train': 1.5847760438919067} +03/03/2022 18:59:30 - INFO - codeparrot_training - Skipping example with length 25 (seq_length=1024) +03/03/2022 18:59:34 - INFO - codeparrot_training - Step 4748: {'lr': 0.000499574796689634, 'samples': 2431488, 'steps': 4748, 'loss/train': 1.7551151514053345} +03/03/2022 18:59:37 - INFO - codeparrot_training - Step 4749: {'lr': 0.0004995744872573242, 'samples': 2432000, 'steps': 4749, 'loss/train': 4.218952655792236} +03/03/2022 18:59:39 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/03/2022 18:59:43 - INFO - codeparrot_training - Step 4750: {'lr': 0.00049957417771256, 'samples': 2432512, 'steps': 4750, 'loss/train': 2.422407388687134} +03/03/2022 18:59:46 - INFO - codeparrot_training - Step 4751: {'lr': 0.0004995738680553415, 'samples': 2433024, 'steps': 4751, 'loss/train': 2.4572670459747314} +03/03/2022 18:59:47 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/03/2022 18:59:51 - INFO - codeparrot_training - Step 4752: {'lr': 0.0004995735582856689, 'samples': 2433536, 'steps': 4752, 'loss/train': 2.680219888687134} +03/03/2022 18:59:54 - INFO - codeparrot_training - Step 4753: {'lr': 0.0004995732484035422, 'samples': 2434048, 'steps': 4753, 'loss/train': 1.9186408519744873} +03/03/2022 18:59:55 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/03/2022 19:00:00 - INFO - codeparrot_training - Step 4754: {'lr': 0.0004995729384089618, 'samples': 2434560, 'steps': 4754, 'loss/train': 2.4876461029052734} +03/03/2022 19:00:03 - INFO - codeparrot_training - Step 4755: {'lr': 0.0004995726283019275, 'samples': 2435072, 'steps': 4755, 'loss/train': 2.374783754348755} +03/03/2022 19:00:05 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/03/2022 19:00:08 - INFO - codeparrot_training - Step 4756: {'lr': 0.0004995723180824397, 'samples': 2435584, 'steps': 4756, 'loss/train': 2.2229275703430176} +03/03/2022 19:00:11 - INFO - codeparrot_training - Step 4757: {'lr': 0.0004995720077504986, 'samples': 2436096, 'steps': 4757, 'loss/train': 2.4879696369171143} +03/03/2022 19:00:13 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/03/2022 19:00:16 - INFO - codeparrot_training - Step 4758: {'lr': 0.0004995716973061041, 'samples': 2436608, 'steps': 4758, 'loss/train': 0.3748067021369934} +03/03/2022 19:00:20 - INFO - codeparrot_training - Step 4759: {'lr': 0.0004995713867492564, 'samples': 2437120, 'steps': 4759, 'loss/train': 7.180379867553711} +03/03/2022 19:00:21 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/03/2022 19:00:25 - INFO - codeparrot_training - Step 4760: {'lr': 0.0004995710760799557, 'samples': 2437632, 'steps': 4760, 'loss/train': 2.4887943267822266} +03/03/2022 19:00:28 - INFO - codeparrot_training - Step 4761: {'lr': 0.0004995707652982022, 'samples': 2438144, 'steps': 4761, 'loss/train': 1.8410297632217407} +03/03/2022 19:00:30 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/03/2022 19:00:33 - INFO - codeparrot_training - Step 4762: {'lr': 0.0004995704544039958, 'samples': 2438656, 'steps': 4762, 'loss/train': 2.4743151664733887} +03/03/2022 19:00:36 - INFO - codeparrot_training - Step 4763: {'lr': 0.0004995701433973369, 'samples': 2439168, 'steps': 4763, 'loss/train': 2.673358917236328} +03/03/2022 19:00:38 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/03/2022 19:00:42 - INFO - codeparrot_training - Step 4764: {'lr': 0.0004995698322782257, 'samples': 2439680, 'steps': 4764, 'loss/train': 2.596189022064209} +03/03/2022 19:00:45 - INFO - codeparrot_training - Step 4765: {'lr': 0.0004995695210466619, 'samples': 2440192, 'steps': 4765, 'loss/train': 2.173036575317383} +03/03/2022 19:00:46 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/03/2022 19:00:50 - INFO - codeparrot_training - Step 4766: {'lr': 0.0004995692097026461, 'samples': 2440704, 'steps': 4766, 'loss/train': 1.7914903163909912} +03/03/2022 19:00:53 - INFO - codeparrot_training - Step 4767: {'lr': 0.0004995688982461783, 'samples': 2441216, 'steps': 4767, 'loss/train': 3.139613628387451} +03/03/2022 19:00:54 - INFO - codeparrot_training - Skipping example with length 548 (seq_length=1024) +03/03/2022 19:00:58 - INFO - codeparrot_training - Step 4768: {'lr': 0.0004995685866772586, 'samples': 2441728, 'steps': 4768, 'loss/train': 2.7471354007720947} +03/03/2022 19:01:02 - INFO - codeparrot_training - Step 4769: {'lr': 0.000499568274995887, 'samples': 2442240, 'steps': 4769, 'loss/train': 1.7421013116836548} +03/03/2022 19:01:02 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/03/2022 19:01:07 - INFO - codeparrot_training - Step 4770: {'lr': 0.0004995679632020639, 'samples': 2442752, 'steps': 4770, 'loss/train': 2.3011398315429688} +03/03/2022 19:01:10 - INFO - codeparrot_training - Step 4771: {'lr': 0.0004995676512957892, 'samples': 2443264, 'steps': 4771, 'loss/train': 1.4951660633087158} +03/03/2022 19:01:10 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/03/2022 19:01:16 - INFO - codeparrot_training - Step 4772: {'lr': 0.0004995673392770634, 'samples': 2443776, 'steps': 4772, 'loss/train': 1.858750343322754} +03/03/2022 19:01:19 - INFO - codeparrot_training - Step 4773: {'lr': 0.0004995670271458863, 'samples': 2444288, 'steps': 4773, 'loss/train': 2.9087467193603516} +03/03/2022 19:01:21 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/03/2022 19:01:24 - INFO - codeparrot_training - Step 4774: {'lr': 0.0004995667149022581, 'samples': 2444800, 'steps': 4774, 'loss/train': 3.1615138053894043} +03/03/2022 19:01:27 - INFO - codeparrot_training - Step 4775: {'lr': 0.000499566402546179, 'samples': 2445312, 'steps': 4775, 'loss/train': 2.610400915145874} +03/03/2022 19:01:30 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/03/2022 19:01:33 - INFO - codeparrot_training - Step 4776: {'lr': 0.0004995660900776491, 'samples': 2445824, 'steps': 4776, 'loss/train': 1.832546353340149} +03/03/2022 19:01:36 - INFO - codeparrot_training - Step 4777: {'lr': 0.0004995657774966686, 'samples': 2446336, 'steps': 4777, 'loss/train': 1.2755085229873657} +03/03/2022 19:01:38 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/03/2022 19:01:41 - INFO - codeparrot_training - Step 4778: {'lr': 0.0004995654648032377, 'samples': 2446848, 'steps': 4778, 'loss/train': 2.5259299278259277} +03/03/2022 19:01:44 - INFO - codeparrot_training - Step 4779: {'lr': 0.0004995651519973563, 'samples': 2447360, 'steps': 4779, 'loss/train': 1.94992196559906} +03/03/2022 19:01:46 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/03/2022 19:01:49 - INFO - codeparrot_training - Step 4780: {'lr': 0.0004995648390790249, 'samples': 2447872, 'steps': 4780, 'loss/train': 1.9459383487701416} +03/03/2022 19:01:52 - INFO - codeparrot_training - Step 4781: {'lr': 0.0004995645260482432, 'samples': 2448384, 'steps': 4781, 'loss/train': 2.969061851501465} +03/03/2022 19:01:54 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/03/2022 19:01:58 - INFO - codeparrot_training - Step 4782: {'lr': 0.0004995642129050117, 'samples': 2448896, 'steps': 4782, 'loss/train': 2.0243520736694336} +03/03/2022 19:02:01 - INFO - codeparrot_training - Step 4783: {'lr': 0.0004995638996493304, 'samples': 2449408, 'steps': 4783, 'loss/train': 2.1596310138702393} +03/03/2022 19:02:03 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/03/2022 19:02:06 - INFO - codeparrot_training - Step 4784: {'lr': 0.0004995635862811994, 'samples': 2449920, 'steps': 4784, 'loss/train': 3.034994125366211} +03/03/2022 19:02:09 - INFO - codeparrot_training - Step 4785: {'lr': 0.000499563272800619, 'samples': 2450432, 'steps': 4785, 'loss/train': 0.6463490724563599} +03/03/2022 19:02:11 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/03/2022 19:02:15 - INFO - codeparrot_training - Step 4786: {'lr': 0.0004995629592075892, 'samples': 2450944, 'steps': 4786, 'loss/train': 2.113098382949829} +03/03/2022 19:02:18 - INFO - codeparrot_training - Step 4787: {'lr': 0.0004995626455021101, 'samples': 2451456, 'steps': 4787, 'loss/train': 3.029111385345459} +03/03/2022 19:02:19 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/03/2022 19:02:23 - INFO - codeparrot_training - Step 4788: {'lr': 0.0004995623316841821, 'samples': 2451968, 'steps': 4788, 'loss/train': 3.180652379989624} +03/03/2022 19:02:26 - INFO - codeparrot_training - Step 4789: {'lr': 0.0004995620177538051, 'samples': 2452480, 'steps': 4789, 'loss/train': 7.288547992706299} +03/03/2022 19:02:29 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/03/2022 19:02:32 - INFO - codeparrot_training - Step 4790: {'lr': 0.0004995617037109792, 'samples': 2452992, 'steps': 4790, 'loss/train': 2.0536251068115234} +03/03/2022 19:02:35 - INFO - codeparrot_training - Step 4791: {'lr': 0.0004995613895557048, 'samples': 2453504, 'steps': 4791, 'loss/train': 2.681994676589966} +03/03/2022 19:02:37 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/03/2022 19:02:40 - INFO - codeparrot_training - Step 4792: {'lr': 0.0004995610752879818, 'samples': 2454016, 'steps': 4792, 'loss/train': 2.03291654586792} +03/03/2022 19:02:43 - INFO - codeparrot_training - Step 4793: {'lr': 0.0004995607609078104, 'samples': 2454528, 'steps': 4793, 'loss/train': 2.808248519897461} +03/03/2022 19:02:45 - INFO - codeparrot_training - Skipping example with length 297 (seq_length=1024) +03/03/2022 19:02:48 - INFO - codeparrot_training - Step 4794: {'lr': 0.0004995604464151908, 'samples': 2455040, 'steps': 4794, 'loss/train': 2.305968999862671} +03/03/2022 19:02:52 - INFO - codeparrot_training - Step 4795: {'lr': 0.0004995601318101231, 'samples': 2455552, 'steps': 4795, 'loss/train': 1.9428397417068481} +03/03/2022 19:02:53 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/03/2022 19:02:57 - INFO - codeparrot_training - Step 4796: {'lr': 0.0004995598170926074, 'samples': 2456064, 'steps': 4796, 'loss/train': 2.3756942749023438} +03/03/2022 19:03:00 - INFO - codeparrot_training - Step 4797: {'lr': 0.000499559502262644, 'samples': 2456576, 'steps': 4797, 'loss/train': 7.142023086547852} +03/03/2022 19:03:04 - INFO - codeparrot_training - Step 4798: {'lr': 0.000499559187320233, 'samples': 2457088, 'steps': 4798, 'loss/train': 2.333749771118164} +03/03/2022 19:03:04 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/03/2022 19:03:09 - INFO - codeparrot_training - Step 4799: {'lr': 0.0004995588722653743, 'samples': 2457600, 'steps': 4799, 'loss/train': 5.182779312133789} +03/03/2022 19:03:12 - INFO - codeparrot_training - Step 4800: {'lr': 0.0004995585570980684, 'samples': 2458112, 'steps': 4800, 'loss/train': 2.502042770385742} +03/03/2022 19:03:12 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/03/2022 19:03:17 - INFO - codeparrot_training - Step 4801: {'lr': 0.0004995582418183151, 'samples': 2458624, 'steps': 4801, 'loss/train': 2.4904863834381104} +03/03/2022 19:03:21 - INFO - codeparrot_training - Step 4802: {'lr': 0.0004995579264261148, 'samples': 2459136, 'steps': 4802, 'loss/train': 2.4681832790374756} +03/03/2022 19:03:22 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/03/2022 19:03:26 - INFO - codeparrot_training - Step 4803: {'lr': 0.0004995576109214676, 'samples': 2459648, 'steps': 4803, 'loss/train': 2.12237548828125} +03/03/2022 19:03:29 - INFO - codeparrot_training - Step 4804: {'lr': 0.0004995572953043736, 'samples': 2460160, 'steps': 4804, 'loss/train': 2.4620602130889893} +03/03/2022 19:03:30 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/03/2022 19:03:34 - INFO - codeparrot_training - Step 4805: {'lr': 0.0004995569795748328, 'samples': 2460672, 'steps': 4805, 'loss/train': 3.420135259628296} +03/03/2022 19:03:38 - INFO - codeparrot_training - Step 4806: {'lr': 0.0004995566637328456, 'samples': 2461184, 'steps': 4806, 'loss/train': 2.24116587638855} +03/03/2022 19:03:39 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/03/2022 19:03:43 - INFO - codeparrot_training - Step 4807: {'lr': 0.0004995563477784119, 'samples': 2461696, 'steps': 4807, 'loss/train': 2.304032802581787} +03/03/2022 19:03:46 - INFO - codeparrot_training - Step 4808: {'lr': 0.000499556031711532, 'samples': 2462208, 'steps': 4808, 'loss/train': 3.5310094356536865} +03/03/2022 19:03:48 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/03/2022 19:03:51 - INFO - codeparrot_training - Step 4809: {'lr': 0.000499555715532206, 'samples': 2462720, 'steps': 4809, 'loss/train': 1.3793209791183472} +03/03/2022 19:03:55 - INFO - codeparrot_training - Step 4810: {'lr': 0.0004995553992404342, 'samples': 2463232, 'steps': 4810, 'loss/train': 2.970479965209961} +03/03/2022 19:03:57 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/03/2022 19:04:00 - INFO - codeparrot_training - Step 4811: {'lr': 0.0004995550828362163, 'samples': 2463744, 'steps': 4811, 'loss/train': 2.346778392791748} +03/03/2022 19:04:03 - INFO - codeparrot_training - Step 4812: {'lr': 0.000499554766319553, 'samples': 2464256, 'steps': 4812, 'loss/train': 2.7982091903686523} +03/03/2022 19:04:08 - INFO - codeparrot_training - Step 4813: {'lr': 0.0004995544496904441, 'samples': 2464768, 'steps': 4813, 'loss/train': 2.5287725925445557} +03/03/2022 19:04:11 - INFO - codeparrot_training - Step 4814: {'lr': 0.0004995541329488897, 'samples': 2465280, 'steps': 4814, 'loss/train': 3.1618282794952393} +03/03/2022 19:04:13 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/03/2022 19:04:17 - INFO - codeparrot_training - Step 4815: {'lr': 0.0004995538160948901, 'samples': 2465792, 'steps': 4815, 'loss/train': 3.0996172428131104} +03/03/2022 19:04:20 - INFO - codeparrot_training - Step 4816: {'lr': 0.0004995534991284455, 'samples': 2466304, 'steps': 4816, 'loss/train': 2.564525842666626} +03/03/2022 19:04:22 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/03/2022 19:04:25 - INFO - codeparrot_training - Step 4817: {'lr': 0.0004995531820495559, 'samples': 2466816, 'steps': 4817, 'loss/train': 2.790700912475586} +03/03/2022 19:04:28 - INFO - codeparrot_training - Step 4818: {'lr': 0.0004995528648582214, 'samples': 2467328, 'steps': 4818, 'loss/train': 2.151323080062866} +03/03/2022 19:04:30 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/03/2022 19:04:33 - INFO - codeparrot_training - Step 4819: {'lr': 0.0004995525475544423, 'samples': 2467840, 'steps': 4819, 'loss/train': 2.2183685302734375} +03/03/2022 19:04:37 - INFO - codeparrot_training - Step 4820: {'lr': 0.0004995522301382187, 'samples': 2468352, 'steps': 4820, 'loss/train': 2.680931806564331} +03/03/2022 19:04:38 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/03/2022 19:04:42 - INFO - codeparrot_training - Step 4821: {'lr': 0.0004995519126095506, 'samples': 2468864, 'steps': 4821, 'loss/train': 2.698070764541626} +03/03/2022 19:04:45 - INFO - codeparrot_training - Step 4822: {'lr': 0.0004995515949684384, 'samples': 2469376, 'steps': 4822, 'loss/train': 3.038058042526245} +03/03/2022 19:04:47 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/03/2022 19:04:50 - INFO - codeparrot_training - Step 4823: {'lr': 0.000499551277214882, 'samples': 2469888, 'steps': 4823, 'loss/train': 2.282282590866089} +03/03/2022 19:04:54 - INFO - codeparrot_training - Step 4824: {'lr': 0.0004995509593488818, 'samples': 2470400, 'steps': 4824, 'loss/train': 3.021301746368408} +03/03/2022 19:04:56 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/03/2022 19:04:59 - INFO - codeparrot_training - Step 4825: {'lr': 0.0004995506413704376, 'samples': 2470912, 'steps': 4825, 'loss/train': 0.31424516439437866} +03/03/2022 19:05:02 - INFO - codeparrot_training - Step 4826: {'lr': 0.0004995503232795498, 'samples': 2471424, 'steps': 4826, 'loss/train': 2.6228997707366943} +03/03/2022 19:05:04 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/03/2022 19:05:07 - INFO - codeparrot_training - Step 4827: {'lr': 0.0004995500050762185, 'samples': 2471936, 'steps': 4827, 'loss/train': 1.3924195766448975} +03/03/2022 19:05:10 - INFO - codeparrot_training - Step 4828: {'lr': 0.0004995496867604438, 'samples': 2472448, 'steps': 4828, 'loss/train': 2.4656717777252197} +03/03/2022 19:05:12 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/03/2022 19:05:16 - INFO - codeparrot_training - Step 4829: {'lr': 0.0004995493683322259, 'samples': 2472960, 'steps': 4829, 'loss/train': 2.274223804473877} +03/03/2022 19:05:19 - INFO - codeparrot_training - Step 4830: {'lr': 0.0004995490497915649, 'samples': 2473472, 'steps': 4830, 'loss/train': 2.323464870452881} +03/03/2022 19:05:21 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/03/2022 19:05:24 - INFO - codeparrot_training - Step 4831: {'lr': 0.0004995487311384609, 'samples': 2473984, 'steps': 4831, 'loss/train': 2.9853515625} +03/03/2022 19:05:27 - INFO - codeparrot_training - Step 4832: {'lr': 0.0004995484123729141, 'samples': 2474496, 'steps': 4832, 'loss/train': 1.8002382516860962} +03/03/2022 19:05:30 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/03/2022 19:05:33 - INFO - codeparrot_training - Step 4833: {'lr': 0.0004995480934949247, 'samples': 2475008, 'steps': 4833, 'loss/train': 2.5132851600646973} +03/03/2022 19:05:36 - INFO - codeparrot_training - Step 4834: {'lr': 0.0004995477745044927, 'samples': 2475520, 'steps': 4834, 'loss/train': 1.5053669214248657} +03/03/2022 19:05:38 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/03/2022 19:05:41 - INFO - codeparrot_training - Step 4835: {'lr': 0.0004995474554016184, 'samples': 2476032, 'steps': 4835, 'loss/train': 1.6865235567092896} +03/03/2022 19:05:44 - INFO - codeparrot_training - Step 4836: {'lr': 0.0004995471361863017, 'samples': 2476544, 'steps': 4836, 'loss/train': 2.26845383644104} +03/03/2022 19:05:46 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/03/2022 19:05:49 - INFO - codeparrot_training - Step 4837: {'lr': 0.0004995468168585431, 'samples': 2477056, 'steps': 4837, 'loss/train': 3.041250467300415} +03/03/2022 19:05:53 - INFO - codeparrot_training - Step 4838: {'lr': 0.0004995464974183424, 'samples': 2477568, 'steps': 4838, 'loss/train': 1.528280258178711} +03/03/2022 19:05:55 - INFO - codeparrot_training - Skipping example with length 807 (seq_length=1024) +03/03/2022 19:05:58 - INFO - codeparrot_training - Step 4839: {'lr': 0.0004995461778657002, 'samples': 2478080, 'steps': 4839, 'loss/train': 1.6582547426223755} +03/03/2022 19:06:01 - INFO - codeparrot_training - Step 4840: {'lr': 0.000499545858200616, 'samples': 2478592, 'steps': 4840, 'loss/train': 1.2643731832504272} +03/03/2022 19:06:03 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/03/2022 19:06:06 - INFO - codeparrot_training - Step 4841: {'lr': 0.0004995455384230904, 'samples': 2479104, 'steps': 4841, 'loss/train': 3.444776773452759} +03/03/2022 19:06:09 - INFO - codeparrot_training - Step 4842: {'lr': 0.0004995452185331235, 'samples': 2479616, 'steps': 4842, 'loss/train': 3.337554693222046} +03/03/2022 19:06:11 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/03/2022 19:06:15 - INFO - codeparrot_training - Step 4843: {'lr': 0.0004995448985307153, 'samples': 2480128, 'steps': 4843, 'loss/train': 2.9425690174102783} +03/03/2022 19:06:18 - INFO - codeparrot_training - Step 4844: {'lr': 0.0004995445784158661, 'samples': 2480640, 'steps': 4844, 'loss/train': 2.7902324199676514} +03/03/2022 19:06:20 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/03/2022 19:06:24 - INFO - codeparrot_training - Step 4845: {'lr': 0.0004995442581885759, 'samples': 2481152, 'steps': 4845, 'loss/train': 1.8444856405258179} +03/03/2022 19:06:27 - INFO - codeparrot_training - Step 4846: {'lr': 0.0004995439378488449, 'samples': 2481664, 'steps': 4846, 'loss/train': 1.2752681970596313} +03/03/2022 19:06:29 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/03/2022 19:06:32 - INFO - codeparrot_training - Step 4847: {'lr': 0.0004995436173966733, 'samples': 2482176, 'steps': 4847, 'loss/train': 2.3915936946868896} +03/03/2022 19:06:35 - INFO - codeparrot_training - Step 4848: {'lr': 0.0004995432968320611, 'samples': 2482688, 'steps': 4848, 'loss/train': 2.089434862136841} +03/03/2022 19:06:38 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/03/2022 19:06:40 - INFO - codeparrot_training - Step 4849: {'lr': 0.0004995429761550086, 'samples': 2483200, 'steps': 4849, 'loss/train': 2.34389066696167} +03/03/2022 19:06:44 - INFO - codeparrot_training - Step 4850: {'lr': 0.0004995426553655159, 'samples': 2483712, 'steps': 4850, 'loss/train': 1.768385648727417} +03/03/2022 19:06:46 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/03/2022 19:06:49 - INFO - codeparrot_training - Step 4851: {'lr': 0.0004995423344635831, 'samples': 2484224, 'steps': 4851, 'loss/train': 2.706448793411255} +03/03/2022 19:06:52 - INFO - codeparrot_training - Step 4852: {'lr': 0.0004995420134492105, 'samples': 2484736, 'steps': 4852, 'loss/train': 2.4971652030944824} +03/03/2022 19:06:54 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/03/2022 19:06:57 - INFO - codeparrot_training - Step 4853: {'lr': 0.0004995416923223979, 'samples': 2485248, 'steps': 4853, 'loss/train': 2.418558120727539} +03/03/2022 19:07:00 - INFO - codeparrot_training - Step 4854: {'lr': 0.0004995413710831458, 'samples': 2485760, 'steps': 4854, 'loss/train': 2.712454319000244} +03/03/2022 19:07:02 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/03/2022 19:07:06 - INFO - codeparrot_training - Step 4855: {'lr': 0.0004995410497314542, 'samples': 2486272, 'steps': 4855, 'loss/train': 3.9014358520507812} +03/03/2022 19:07:09 - INFO - codeparrot_training - Step 4856: {'lr': 0.0004995407282673232, 'samples': 2486784, 'steps': 4856, 'loss/train': 1.6873059272766113} +03/03/2022 19:07:10 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/03/2022 19:07:14 - INFO - codeparrot_training - Step 4857: {'lr': 0.000499540406690753, 'samples': 2487296, 'steps': 4857, 'loss/train': 2.746579885482788} +03/03/2022 19:07:17 - INFO - codeparrot_training - Step 4858: {'lr': 0.0004995400850017438, 'samples': 2487808, 'steps': 4858, 'loss/train': 1.8538373708724976} +03/03/2022 19:07:19 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/03/2022 19:07:22 - INFO - codeparrot_training - Step 4859: {'lr': 0.0004995397632002957, 'samples': 2488320, 'steps': 4859, 'loss/train': 2.714437246322632} +03/03/2022 19:07:26 - INFO - codeparrot_training - Step 4860: {'lr': 0.0004995394412864088, 'samples': 2488832, 'steps': 4860, 'loss/train': 2.37245512008667} +03/03/2022 19:07:28 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/03/2022 19:07:31 - INFO - codeparrot_training - Step 4861: {'lr': 0.0004995391192600834, 'samples': 2489344, 'steps': 4861, 'loss/train': 2.2520837783813477} +03/03/2022 19:07:34 - INFO - codeparrot_training - Step 4862: {'lr': 0.0004995387971213194, 'samples': 2489856, 'steps': 4862, 'loss/train': 3.3185737133026123} +03/03/2022 19:07:36 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/03/2022 19:07:39 - INFO - codeparrot_training - Step 4863: {'lr': 0.000499538474870117, 'samples': 2490368, 'steps': 4863, 'loss/train': 2.0023157596588135} +03/03/2022 19:07:43 - INFO - codeparrot_training - Step 4864: {'lr': 0.0004995381525064765, 'samples': 2490880, 'steps': 4864, 'loss/train': 2.583448886871338} +03/03/2022 19:07:44 - INFO - codeparrot_training - Skipping example with length 354 (seq_length=1024) +03/03/2022 19:07:48 - INFO - codeparrot_training - Step 4865: {'lr': 0.0004995378300303979, 'samples': 2491392, 'steps': 4865, 'loss/train': 2.6357576847076416} +03/03/2022 19:07:51 - INFO - codeparrot_training - Step 4866: {'lr': 0.0004995375074418815, 'samples': 2491904, 'steps': 4866, 'loss/train': 2.7243049144744873} +03/03/2022 19:07:53 - INFO - codeparrot_training - Skipping example with length 534 (seq_length=1024) +03/03/2022 19:07:56 - INFO - codeparrot_training - Step 4867: {'lr': 0.0004995371847409273, 'samples': 2492416, 'steps': 4867, 'loss/train': 2.1891427040100098} +03/03/2022 19:07:59 - INFO - codeparrot_training - Step 4868: {'lr': 0.0004995368619275355, 'samples': 2492928, 'steps': 4868, 'loss/train': 2.1025922298431396} +03/03/2022 19:08:01 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/03/2022 19:08:05 - INFO - codeparrot_training - Step 4869: {'lr': 0.0004995365390017062, 'samples': 2493440, 'steps': 4869, 'loss/train': 2.3074140548706055} +03/03/2022 19:08:08 - INFO - codeparrot_training - Step 4870: {'lr': 0.0004995362159634396, 'samples': 2493952, 'steps': 4870, 'loss/train': 2.1589725017547607} +03/03/2022 19:08:10 - INFO - codeparrot_training - Skipping example with length 371 (seq_length=1024) +03/03/2022 19:08:13 - INFO - codeparrot_training - Step 4871: {'lr': 0.0004995358928127359, 'samples': 2494464, 'steps': 4871, 'loss/train': 2.175736665725708} +03/03/2022 19:08:16 - INFO - codeparrot_training - Step 4872: {'lr': 0.0004995355695495952, 'samples': 2494976, 'steps': 4872, 'loss/train': 1.2226284742355347} +03/03/2022 19:08:18 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/03/2022 19:08:22 - INFO - codeparrot_training - Step 4873: {'lr': 0.0004995352461740174, 'samples': 2495488, 'steps': 4873, 'loss/train': 2.1534829139709473} +03/03/2022 19:08:25 - INFO - codeparrot_training - Step 4874: {'lr': 0.0004995349226860031, 'samples': 2496000, 'steps': 4874, 'loss/train': 2.9266910552978516} +03/03/2022 19:08:26 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/03/2022 19:08:30 - INFO - codeparrot_training - Step 4875: {'lr': 0.0004995345990855522, 'samples': 2496512, 'steps': 4875, 'loss/train': 3.0570971965789795} +03/03/2022 19:08:33 - INFO - codeparrot_training - Step 4876: {'lr': 0.0004995342753726647, 'samples': 2497024, 'steps': 4876, 'loss/train': 2.609173536300659} +03/03/2022 19:08:35 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/03/2022 19:08:38 - INFO - codeparrot_training - Step 4877: {'lr': 0.0004995339515473411, 'samples': 2497536, 'steps': 4877, 'loss/train': 2.687882900238037} +03/03/2022 19:08:42 - INFO - codeparrot_training - Step 4878: {'lr': 0.0004995336276095812, 'samples': 2498048, 'steps': 4878, 'loss/train': 3.0205888748168945} +03/03/2022 19:08:43 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/03/2022 19:08:47 - INFO - codeparrot_training - Step 4879: {'lr': 0.0004995333035593853, 'samples': 2498560, 'steps': 4879, 'loss/train': 2.140230178833008} +03/03/2022 19:08:50 - INFO - codeparrot_training - Step 4880: {'lr': 0.0004995329793967537, 'samples': 2499072, 'steps': 4880, 'loss/train': 1.8657763004302979} +03/03/2022 19:08:52 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/03/2022 19:08:55 - INFO - codeparrot_training - Step 4881: {'lr': 0.0004995326551216862, 'samples': 2499584, 'steps': 4881, 'loss/train': 2.8468210697174072} +03/03/2022 19:08:58 - INFO - codeparrot_training - Step 4882: {'lr': 0.0004995323307341832, 'samples': 2500096, 'steps': 4882, 'loss/train': 2.36456036567688} +03/03/2022 19:09:00 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/03/2022 19:09:04 - INFO - codeparrot_training - Step 4883: {'lr': 0.0004995320062342449, 'samples': 2500608, 'steps': 4883, 'loss/train': 1.5441758632659912} +03/03/2022 19:09:07 - INFO - codeparrot_training - Step 4884: {'lr': 0.0004995316816218712, 'samples': 2501120, 'steps': 4884, 'loss/train': 2.314457893371582} +03/03/2022 19:09:08 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/03/2022 19:09:12 - INFO - codeparrot_training - Step 4885: {'lr': 0.0004995313568970625, 'samples': 2501632, 'steps': 4885, 'loss/train': 2.21091628074646} +03/03/2022 19:09:15 - INFO - codeparrot_training - Step 4886: {'lr': 0.0004995310320598187, 'samples': 2502144, 'steps': 4886, 'loss/train': 2.2402615547180176} +03/03/2022 19:09:17 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/03/2022 19:09:21 - INFO - codeparrot_training - Step 4887: {'lr': 0.0004995307071101401, 'samples': 2502656, 'steps': 4887, 'loss/train': 2.9875502586364746} +03/03/2022 19:09:24 - INFO - codeparrot_training - Step 4888: {'lr': 0.0004995303820480268, 'samples': 2503168, 'steps': 4888, 'loss/train': 2.417663812637329} +03/03/2022 19:09:25 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/03/2022 19:09:29 - INFO - codeparrot_training - Step 4889: {'lr': 0.000499530056873479, 'samples': 2503680, 'steps': 4889, 'loss/train': 2.6566038131713867} +03/03/2022 19:09:32 - INFO - codeparrot_training - Step 4890: {'lr': 0.0004995297315864968, 'samples': 2504192, 'steps': 4890, 'loss/train': 2.1093435287475586} +03/03/2022 19:09:34 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/03/2022 19:09:37 - INFO - codeparrot_training - Step 4891: {'lr': 0.0004995294061870802, 'samples': 2504704, 'steps': 4891, 'loss/train': 2.0911667346954346} +03/03/2022 19:09:41 - INFO - codeparrot_training - Step 4892: {'lr': 0.0004995290806752297, 'samples': 2505216, 'steps': 4892, 'loss/train': 3.0593526363372803} +03/03/2022 19:09:42 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/03/2022 19:09:46 - INFO - codeparrot_training - Step 4893: {'lr': 0.0004995287550509452, 'samples': 2505728, 'steps': 4893, 'loss/train': 3.0033910274505615} +03/03/2022 19:09:49 - INFO - codeparrot_training - Step 4894: {'lr': 0.0004995284293142268, 'samples': 2506240, 'steps': 4894, 'loss/train': 1.8219630718231201} +03/03/2022 19:09:50 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/03/2022 19:09:54 - INFO - codeparrot_training - Step 4895: {'lr': 0.0004995281034650748, 'samples': 2506752, 'steps': 4895, 'loss/train': 2.043574333190918} +03/03/2022 19:09:57 - INFO - codeparrot_training - Step 4896: {'lr': 0.0004995277775034894, 'samples': 2507264, 'steps': 4896, 'loss/train': 3.157118320465088} +03/03/2022 19:09:59 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/03/2022 19:10:03 - INFO - codeparrot_training - Step 4897: {'lr': 0.0004995274514294706, 'samples': 2507776, 'steps': 4897, 'loss/train': 3.0801634788513184} +03/03/2022 19:10:06 - INFO - codeparrot_training - Step 4898: {'lr': 0.0004995271252430184, 'samples': 2508288, 'steps': 4898, 'loss/train': 0.7655453085899353} +03/03/2022 19:10:07 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/03/2022 19:10:11 - INFO - codeparrot_training - Step 4899: {'lr': 0.0004995267989441332, 'samples': 2508800, 'steps': 4899, 'loss/train': 1.375827431678772} +03/03/2022 19:10:14 - INFO - codeparrot_training - Step 4900: {'lr': 0.0004995264725328151, 'samples': 2509312, 'steps': 4900, 'loss/train': 2.1573596000671387} +03/03/2022 19:10:15 - INFO - codeparrot_training - Skipping example with length 117 (seq_length=1024) +03/03/2022 19:10:19 - INFO - codeparrot_training - Step 4901: {'lr': 0.0004995261460090644, 'samples': 2509824, 'steps': 4901, 'loss/train': 2.4019925594329834} +03/03/2022 19:10:22 - INFO - codeparrot_training - Step 4902: {'lr': 0.0004995258193728809, 'samples': 2510336, 'steps': 4902, 'loss/train': 2.480454683303833} +03/03/2022 19:10:23 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/03/2022 19:10:28 - INFO - codeparrot_training - Step 4903: {'lr': 0.0004995254926242649, 'samples': 2510848, 'steps': 4903, 'loss/train': 2.215315341949463} +03/03/2022 19:10:31 - INFO - codeparrot_training - Step 4904: {'lr': 0.0004995251657632165, 'samples': 2511360, 'steps': 4904, 'loss/train': 2.805739164352417} +03/03/2022 19:10:32 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/03/2022 19:10:36 - INFO - codeparrot_training - Step 4905: {'lr': 0.000499524838789736, 'samples': 2511872, 'steps': 4905, 'loss/train': 1.9561092853546143} +03/03/2022 19:10:39 - INFO - codeparrot_training - Step 4906: {'lr': 0.0004995245117038235, 'samples': 2512384, 'steps': 4906, 'loss/train': 1.8768481016159058} +03/03/2022 19:10:40 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/03/2022 19:10:45 - INFO - codeparrot_training - Step 4907: {'lr': 0.0004995241845054791, 'samples': 2512896, 'steps': 4907, 'loss/train': 2.1988232135772705} +03/03/2022 19:10:48 - INFO - codeparrot_training - Step 4908: {'lr': 0.0004995238571947029, 'samples': 2513408, 'steps': 4908, 'loss/train': 2.6251220703125} +03/03/2022 19:10:48 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/03/2022 19:10:53 - INFO - codeparrot_training - Step 4909: {'lr': 0.0004995235297714951, 'samples': 2513920, 'steps': 4909, 'loss/train': 2.3381741046905518} +03/03/2022 19:10:56 - INFO - codeparrot_training - Step 4910: {'lr': 0.0004995232022358559, 'samples': 2514432, 'steps': 4910, 'loss/train': 1.9262293577194214} +03/03/2022 19:10:57 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/03/2022 19:11:01 - INFO - codeparrot_training - Step 4911: {'lr': 0.0004995228745877853, 'samples': 2514944, 'steps': 4911, 'loss/train': 2.5077426433563232} +03/03/2022 19:11:05 - INFO - codeparrot_training - Step 4912: {'lr': 0.0004995225468272836, 'samples': 2515456, 'steps': 4912, 'loss/train': 2.658615827560425} +03/03/2022 19:11:05 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/03/2022 19:11:10 - INFO - codeparrot_training - Step 4913: {'lr': 0.0004995222189543509, 'samples': 2515968, 'steps': 4913, 'loss/train': 0.8045946955680847} +03/03/2022 19:11:13 - INFO - codeparrot_training - Step 4914: {'lr': 0.0004995218909689873, 'samples': 2516480, 'steps': 4914, 'loss/train': 2.7647016048431396} +03/03/2022 19:11:13 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/03/2022 19:11:18 - INFO - codeparrot_training - Step 4915: {'lr': 0.0004995215628711931, 'samples': 2516992, 'steps': 4915, 'loss/train': 2.483281373977661} +03/03/2022 19:11:22 - INFO - codeparrot_training - Step 4916: {'lr': 0.0004995212346609682, 'samples': 2517504, 'steps': 4916, 'loss/train': 2.2260591983795166} +03/03/2022 19:11:22 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/03/2022 19:11:27 - INFO - codeparrot_training - Step 4917: {'lr': 0.0004995209063383129, 'samples': 2518016, 'steps': 4917, 'loss/train': 2.569035530090332} +03/03/2022 19:11:30 - INFO - codeparrot_training - Step 4918: {'lr': 0.0004995205779032274, 'samples': 2518528, 'steps': 4918, 'loss/train': 2.897451877593994} +03/03/2022 19:11:30 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/03/2022 19:11:35 - INFO - codeparrot_training - Step 4919: {'lr': 0.0004995202493557118, 'samples': 2519040, 'steps': 4919, 'loss/train': 3.023164987564087} +03/03/2022 19:11:38 - INFO - codeparrot_training - Step 4920: {'lr': 0.0004995199206957662, 'samples': 2519552, 'steps': 4920, 'loss/train': 1.6149410009384155} +03/03/2022 19:11:39 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/03/2022 19:11:44 - INFO - codeparrot_training - Step 4921: {'lr': 0.0004995195919233906, 'samples': 2520064, 'steps': 4921, 'loss/train': 1.6233185529708862} +03/03/2022 19:11:46 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/03/2022 19:11:49 - INFO - codeparrot_training - Step 4922: {'lr': 0.0004995192630385855, 'samples': 2520576, 'steps': 4922, 'loss/train': 2.3325858116149902} +03/03/2022 19:11:52 - INFO - codeparrot_training - Step 4923: {'lr': 0.0004995189340413509, 'samples': 2521088, 'steps': 4923, 'loss/train': 3.023954153060913} +03/03/2022 19:11:55 - INFO - codeparrot_training - Skipping example with length 683 (seq_length=1024) +03/03/2022 19:11:57 - INFO - codeparrot_training - Step 4924: {'lr': 0.0004995186049316868, 'samples': 2521600, 'steps': 4924, 'loss/train': 2.960425615310669} +03/03/2022 19:12:01 - INFO - codeparrot_training - Step 4925: {'lr': 0.0004995182757095935, 'samples': 2522112, 'steps': 4925, 'loss/train': 2.6881051063537598} +03/03/2022 19:12:03 - INFO - codeparrot_training - Skipping example with length 553 (seq_length=1024) +03/03/2022 19:12:06 - INFO - codeparrot_training - Step 4926: {'lr': 0.0004995179463750712, 'samples': 2522624, 'steps': 4926, 'loss/train': 2.5338778495788574} +03/03/2022 19:12:09 - INFO - codeparrot_training - Step 4927: {'lr': 0.0004995176169281199, 'samples': 2523136, 'steps': 4927, 'loss/train': 2.42183518409729} +03/03/2022 19:12:12 - INFO - codeparrot_training - Step 4928: {'lr': 0.0004995172873687398, 'samples': 2523648, 'steps': 4928, 'loss/train': 2.3432347774505615} +03/03/2022 19:12:13 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/03/2022 19:12:18 - INFO - codeparrot_training - Step 4929: {'lr': 0.0004995169576969311, 'samples': 2524160, 'steps': 4929, 'loss/train': 0.531111478805542} +03/03/2022 19:12:21 - INFO - codeparrot_training - Step 4930: {'lr': 0.0004995166279126938, 'samples': 2524672, 'steps': 4930, 'loss/train': 2.643454074859619} +03/03/2022 19:12:21 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/03/2022 19:12:26 - INFO - codeparrot_training - Step 4931: {'lr': 0.0004995162980160283, 'samples': 2525184, 'steps': 4931, 'loss/train': 2.7847843170166016} +03/03/2022 19:12:30 - INFO - codeparrot_training - Step 4932: {'lr': 0.0004995159680069346, 'samples': 2525696, 'steps': 4932, 'loss/train': 1.917352318763733} +03/03/2022 19:12:30 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/03/2022 19:12:35 - INFO - codeparrot_training - Step 4933: {'lr': 0.0004995156378854127, 'samples': 2526208, 'steps': 4933, 'loss/train': 2.3337295055389404} +03/03/2022 19:12:38 - INFO - codeparrot_training - Step 4934: {'lr': 0.000499515307651463, 'samples': 2526720, 'steps': 4934, 'loss/train': 2.6071813106536865} +03/03/2022 19:12:40 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/03/2022 19:12:43 - INFO - codeparrot_training - Step 4935: {'lr': 0.0004995149773050857, 'samples': 2527232, 'steps': 4935, 'loss/train': 2.3921566009521484} +03/03/2022 19:12:47 - INFO - codeparrot_training - Step 4936: {'lr': 0.0004995146468462806, 'samples': 2527744, 'steps': 4936, 'loss/train': 2.373450756072998} +03/03/2022 19:12:48 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/03/2022 19:12:52 - INFO - codeparrot_training - Step 4937: {'lr': 0.0004995143162750481, 'samples': 2528256, 'steps': 4937, 'loss/train': 2.156949520111084} +03/03/2022 19:12:55 - INFO - codeparrot_training - Step 4938: {'lr': 0.0004995139855913883, 'samples': 2528768, 'steps': 4938, 'loss/train': 2.20689058303833} +03/03/2022 19:12:56 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/03/2022 19:13:00 - INFO - codeparrot_training - Step 4939: {'lr': 0.0004995136547953014, 'samples': 2529280, 'steps': 4939, 'loss/train': 2.0212583541870117} +03/03/2022 19:13:04 - INFO - codeparrot_training - Step 4940: {'lr': 0.0004995133238867874, 'samples': 2529792, 'steps': 4940, 'loss/train': 2.652263879776001} +03/03/2022 19:13:05 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/03/2022 19:13:09 - INFO - codeparrot_training - Step 4941: {'lr': 0.0004995129928658466, 'samples': 2530304, 'steps': 4941, 'loss/train': 2.6729049682617188} +03/03/2022 19:13:12 - INFO - codeparrot_training - Step 4942: {'lr': 0.0004995126617324791, 'samples': 2530816, 'steps': 4942, 'loss/train': 2.372443675994873} +03/03/2022 19:13:14 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/03/2022 19:13:18 - INFO - codeparrot_training - Step 4943: {'lr': 0.000499512330486685, 'samples': 2531328, 'steps': 4943, 'loss/train': 1.6827476024627686} +03/03/2022 19:13:21 - INFO - codeparrot_training - Step 4944: {'lr': 0.0004995119991284645, 'samples': 2531840, 'steps': 4944, 'loss/train': 1.4622911214828491} +03/03/2022 19:13:22 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/03/2022 19:13:26 - INFO - codeparrot_training - Step 4945: {'lr': 0.0004995116676578178, 'samples': 2532352, 'steps': 4945, 'loss/train': 1.1457879543304443} +03/03/2022 19:13:29 - INFO - codeparrot_training - Step 4946: {'lr': 0.000499511336074745, 'samples': 2532864, 'steps': 4946, 'loss/train': 2.403461217880249} +03/03/2022 19:13:30 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/03/2022 19:13:34 - INFO - codeparrot_training - Step 4947: {'lr': 0.0004995110043792462, 'samples': 2533376, 'steps': 4947, 'loss/train': 1.7617229223251343} +03/03/2022 19:13:38 - INFO - codeparrot_training - Step 4948: {'lr': 0.0004995106725713217, 'samples': 2533888, 'steps': 4948, 'loss/train': 2.4262309074401855} +03/03/2022 19:13:39 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/03/2022 19:13:43 - INFO - codeparrot_training - Step 4949: {'lr': 0.0004995103406509713, 'samples': 2534400, 'steps': 4949, 'loss/train': 2.4595015048980713} +03/03/2022 19:13:46 - INFO - codeparrot_training - Step 4950: {'lr': 0.0004995100086181957, 'samples': 2534912, 'steps': 4950, 'loss/train': 2.8011679649353027} +03/03/2022 19:13:47 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/03/2022 19:13:51 - INFO - codeparrot_training - Step 4951: {'lr': 0.0004995096764729945, 'samples': 2535424, 'steps': 4951, 'loss/train': 1.997688889503479} +03/03/2022 19:13:55 - INFO - codeparrot_training - Step 4952: {'lr': 0.0004995093442153681, 'samples': 2535936, 'steps': 4952, 'loss/train': 2.8641645908355713} +03/03/2022 19:13:56 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/03/2022 19:14:00 - INFO - codeparrot_training - Step 4953: {'lr': 0.0004995090118453167, 'samples': 2536448, 'steps': 4953, 'loss/train': 1.8954434394836426} +03/03/2022 19:14:03 - INFO - codeparrot_training - Step 4954: {'lr': 0.0004995086793628405, 'samples': 2536960, 'steps': 4954, 'loss/train': 2.0424671173095703} +03/03/2022 19:14:04 - INFO - codeparrot_training - Skipping example with length 227 (seq_length=1024) +03/03/2022 19:14:08 - INFO - codeparrot_training - Step 4955: {'lr': 0.0004995083467679394, 'samples': 2537472, 'steps': 4955, 'loss/train': 2.7899105548858643} +03/03/2022 19:14:11 - INFO - codeparrot_training - Step 4956: {'lr': 0.0004995080140606137, 'samples': 2537984, 'steps': 4956, 'loss/train': 2.1049716472625732} +03/03/2022 19:14:13 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/03/2022 19:14:17 - INFO - codeparrot_training - Step 4957: {'lr': 0.0004995076812408636, 'samples': 2538496, 'steps': 4957, 'loss/train': 2.0607383251190186} +03/03/2022 19:14:20 - INFO - codeparrot_training - Step 4958: {'lr': 0.0004995073483086891, 'samples': 2539008, 'steps': 4958, 'loss/train': 1.9795573949813843} +03/03/2022 19:14:21 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/03/2022 19:14:25 - INFO - codeparrot_training - Step 4959: {'lr': 0.0004995070152640905, 'samples': 2539520, 'steps': 4959, 'loss/train': 2.205944061279297} +03/03/2022 19:14:28 - INFO - codeparrot_training - Step 4960: {'lr': 0.0004995066821070679, 'samples': 2540032, 'steps': 4960, 'loss/train': 1.8774482011795044} +03/03/2022 19:14:31 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/03/2022 19:14:34 - INFO - codeparrot_training - Step 4961: {'lr': 0.0004995063488376214, 'samples': 2540544, 'steps': 4961, 'loss/train': 2.5545992851257324} +03/03/2022 19:14:37 - INFO - codeparrot_training - Step 4962: {'lr': 0.0004995060154557513, 'samples': 2541056, 'steps': 4962, 'loss/train': 2.2972452640533447} +03/03/2022 19:14:39 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/03/2022 19:14:42 - INFO - codeparrot_training - Step 4963: {'lr': 0.0004995056819614575, 'samples': 2541568, 'steps': 4963, 'loss/train': 2.35840106010437} +03/03/2022 19:14:45 - INFO - codeparrot_training - Step 4964: {'lr': 0.0004995053483547404, 'samples': 2542080, 'steps': 4964, 'loss/train': 2.625316858291626} +03/03/2022 19:14:47 - INFO - codeparrot_training - Skipping example with length 121 (seq_length=1024) +03/03/2022 19:14:50 - INFO - codeparrot_training - Step 4965: {'lr': 0.0004995050146355999, 'samples': 2542592, 'steps': 4965, 'loss/train': 3.5701122283935547} +03/03/2022 19:14:53 - INFO - codeparrot_training - Step 4966: {'lr': 0.0004995046808040363, 'samples': 2543104, 'steps': 4966, 'loss/train': 2.6742842197418213} +03/03/2022 19:14:55 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/03/2022 19:14:59 - INFO - codeparrot_training - Step 4967: {'lr': 0.0004995043468600499, 'samples': 2543616, 'steps': 4967, 'loss/train': 2.32631516456604} +03/03/2022 19:15:02 - INFO - codeparrot_training - Step 4968: {'lr': 0.0004995040128036405, 'samples': 2544128, 'steps': 4968, 'loss/train': 2.5201194286346436} +03/03/2022 19:15:03 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/03/2022 19:15:07 - INFO - codeparrot_training - Step 4969: {'lr': 0.0004995036786348086, 'samples': 2544640, 'steps': 4969, 'loss/train': 1.6076056957244873} +03/03/2022 19:15:10 - INFO - codeparrot_training - Step 4970: {'lr': 0.0004995033443535541, 'samples': 2545152, 'steps': 4970, 'loss/train': 1.6770422458648682} +03/03/2022 19:15:11 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/03/2022 19:15:15 - INFO - codeparrot_training - Step 4971: {'lr': 0.0004995030099598773, 'samples': 2545664, 'steps': 4971, 'loss/train': 1.7019637823104858} +03/03/2022 19:15:18 - INFO - codeparrot_training - Step 4972: {'lr': 0.0004995026754537783, 'samples': 2546176, 'steps': 4972, 'loss/train': 0.24787460267543793} +03/03/2022 19:15:20 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/03/2022 19:15:24 - INFO - codeparrot_training - Step 4973: {'lr': 0.0004995023408352572, 'samples': 2546688, 'steps': 4973, 'loss/train': 2.1225314140319824} +03/03/2022 19:15:27 - INFO - codeparrot_training - Step 4974: {'lr': 0.0004995020061043142, 'samples': 2547200, 'steps': 4974, 'loss/train': 2.090137481689453} +03/03/2022 19:15:28 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/03/2022 19:15:32 - INFO - codeparrot_training - Step 4975: {'lr': 0.0004995016712609495, 'samples': 2547712, 'steps': 4975, 'loss/train': 3.2171671390533447} +03/03/2022 19:15:35 - INFO - codeparrot_training - Step 4976: {'lr': 0.0004995013363051631, 'samples': 2548224, 'steps': 4976, 'loss/train': 1.3078683614730835} +03/03/2022 19:15:36 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/03/2022 19:15:41 - INFO - codeparrot_training - Step 4977: {'lr': 0.0004995010012369554, 'samples': 2548736, 'steps': 4977, 'loss/train': 2.4288623332977295} +03/03/2022 19:15:44 - INFO - codeparrot_training - Step 4978: {'lr': 0.0004995006660563262, 'samples': 2549248, 'steps': 4978, 'loss/train': 0.9987608194351196} +03/03/2022 19:15:45 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/03/2022 19:15:49 - INFO - codeparrot_training - Step 4979: {'lr': 0.000499500330763276, 'samples': 2549760, 'steps': 4979, 'loss/train': 2.5851078033447266} +03/03/2022 19:15:52 - INFO - codeparrot_training - Step 4980: {'lr': 0.0004994999953578048, 'samples': 2550272, 'steps': 4980, 'loss/train': 3.1460390090942383} +03/03/2022 19:15:53 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/03/2022 19:15:57 - INFO - codeparrot_training - Step 4981: {'lr': 0.0004994996598399127, 'samples': 2550784, 'steps': 4981, 'loss/train': 2.649510622024536} +03/03/2022 19:16:01 - INFO - codeparrot_training - Step 4982: {'lr': 0.0004994993242095999, 'samples': 2551296, 'steps': 4982, 'loss/train': 2.756120443344116} +03/03/2022 19:16:02 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/03/2022 19:16:06 - INFO - codeparrot_training - Step 4983: {'lr': 0.0004994989884668665, 'samples': 2551808, 'steps': 4983, 'loss/train': 2.4659817218780518} +03/03/2022 19:16:09 - INFO - codeparrot_training - Step 4984: {'lr': 0.0004994986526117127, 'samples': 2552320, 'steps': 4984, 'loss/train': 2.997540235519409} +03/03/2022 19:16:10 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/03/2022 19:16:14 - INFO - codeparrot_training - Step 4985: {'lr': 0.0004994983166441388, 'samples': 2552832, 'steps': 4985, 'loss/train': 3.7428548336029053} +03/03/2022 19:16:17 - INFO - codeparrot_training - Step 4986: {'lr': 0.0004994979805641448, 'samples': 2553344, 'steps': 4986, 'loss/train': 2.352506160736084} +03/03/2022 19:16:19 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/03/2022 19:16:23 - INFO - codeparrot_training - Step 4987: {'lr': 0.0004994976443717308, 'samples': 2553856, 'steps': 4987, 'loss/train': 2.735555410385132} +03/03/2022 19:16:26 - INFO - codeparrot_training - Step 4988: {'lr': 0.000499497308066897, 'samples': 2554368, 'steps': 4988, 'loss/train': 2.138292074203491} +03/03/2022 19:16:27 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/03/2022 19:16:31 - INFO - codeparrot_training - Step 4989: {'lr': 0.0004994969716496435, 'samples': 2554880, 'steps': 4989, 'loss/train': 2.1425745487213135} +03/03/2022 19:16:34 - INFO - codeparrot_training - Step 4990: {'lr': 0.0004994966351199706, 'samples': 2555392, 'steps': 4990, 'loss/train': 2.332216739654541} +03/03/2022 19:16:35 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/03/2022 19:16:39 - INFO - codeparrot_training - Step 4991: {'lr': 0.0004994962984778784, 'samples': 2555904, 'steps': 4991, 'loss/train': 2.833285331726074} +03/03/2022 19:16:43 - INFO - codeparrot_training - Step 4992: {'lr': 0.0004994959617233669, 'samples': 2556416, 'steps': 4992, 'loss/train': 1.6718634366989136} +03/03/2022 19:16:43 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/03/2022 19:16:48 - INFO - codeparrot_training - Step 4993: {'lr': 0.0004994956248564364, 'samples': 2556928, 'steps': 4993, 'loss/train': 2.1394989490509033} +03/03/2022 19:16:51 - INFO - codeparrot_training - Step 4994: {'lr': 0.000499495287877087, 'samples': 2557440, 'steps': 4994, 'loss/train': 2.591257333755493} +03/03/2022 19:16:52 - INFO - codeparrot_training - Skipping example with length 795 (seq_length=1024) +03/03/2022 19:16:56 - INFO - codeparrot_training - Step 4995: {'lr': 0.000499494950785319, 'samples': 2557952, 'steps': 4995, 'loss/train': 2.4716546535491943} +03/03/2022 19:17:00 - INFO - codeparrot_training - Step 4996: {'lr': 0.0004994946135811324, 'samples': 2558464, 'steps': 4996, 'loss/train': 1.958532452583313} +03/03/2022 19:17:01 - INFO - codeparrot_training - Skipping example with length 37 (seq_length=1024) +03/03/2022 19:17:05 - INFO - codeparrot_training - Step 4997: {'lr': 0.0004994942762645274, 'samples': 2558976, 'steps': 4997, 'loss/train': 1.6212142705917358} +03/03/2022 19:17:08 - INFO - codeparrot_training - Step 4998: {'lr': 0.000499493938835504, 'samples': 2559488, 'steps': 4998, 'loss/train': 2.0565834045410156} +03/03/2022 19:17:09 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/03/2022 19:17:13 - INFO - codeparrot_training - Step 4999: {'lr': 0.0004994936012940626, 'samples': 2560000, 'steps': 4999, 'loss/train': 2.9388248920440674} +03/03/2022 19:17:13 - INFO - codeparrot_training - Evaluating and saving model checkpoint