Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.gitattributes +1 -0
.summary/0/events.out.tfevents.1711742223.ip-172-31-79-185.ec2.internal +2 -2
checkpoint_p0/checkpoint_000041439_678936576.pth +3 -0
checkpoint_p0/checkpoint_000041750_684032000.pth +3 -0
replay.mp4 +3 -0
sf_log.txt +192 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 env_a100_100x100.mp4 filter=lfs diff=lfs merge=lfs -text
 env_a20_40x40.mp4 filter=lfs diff=lfs merge=lfs -text
 env_a5_25x25.mp4 filter=lfs diff=lfs merge=lfs -text

 env_a100_100x100.mp4 filter=lfs diff=lfs merge=lfs -text
 env_a20_40x40.mp4 filter=lfs diff=lfs merge=lfs -text
 env_a5_25x25.mp4 filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text

.summary/0/events.out.tfevents.1711742223.ip-172-31-79-185.ec2.internal CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c34a56081bc81ce84e226e9f8b2602235817f949c8274fc23fd8b817ac47092
-size 12345477

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ff0e2db331626f4bbb98a8323c77070ee1fda92c477c74048e6be700ba36168
+size 12548713

checkpoint_p0/checkpoint_000041439_678936576.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:040754cfb8ac5d31cf45879758b3906cd289bd0b537e4dcadcae57ab1139d4db
+size 76479020

checkpoint_p0/checkpoint_000041750_684032000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d8bbfd7349be28fbacd83e5de2f98fe6b6bf3c292047428f8a4f07b9dc30358
+size 76479020

replay.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d6ac88750b89110511d737c7040357fff3569f5100ec39dc3ea9e74bfadc211
+size 6493147

sf_log.txt CHANGED Viewed

@@ -14128,3 +14128,195 @@
 [2024-03-29 16:40:23,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.2, 300 sec: 42154.1). Total num frames: 677265408. Throughput: 0: 41884.8. Samples: 559431480. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
 [2024-03-29 16:40:23,840][00126] Avg episode reward: [(0, '0.522')]
 [2024-03-29 16:40:27,212][00497] Updated weights for policy 0, policy_version 41345 (0.0025)

 [2024-03-29 16:40:23,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.2, 300 sec: 42154.1). Total num frames: 677265408. Throughput: 0: 41884.8. Samples: 559431480. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
 [2024-03-29 16:40:23,840][00126] Avg episode reward: [(0, '0.522')]
 [2024-03-29 16:40:27,212][00497] Updated weights for policy 0, policy_version 41345 (0.0025)
+[2024-03-29 16:40:28,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 42154.1). Total num frames: 677445632. Throughput: 0: 41759.5. Samples: 559682740. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
+[2024-03-29 16:40:28,840][00126] Avg episode reward: [(0, '0.514')]
+[2024-03-29 16:40:31,165][00497] Updated weights for policy 0, policy_version 41355 (0.0021)
+[2024-03-29 16:40:33,839][00126] Fps is (10 sec: 39322.2, 60 sec: 41779.3, 300 sec: 42209.6). Total num frames: 677658624. Throughput: 0: 42043.6. Samples: 559821560. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
+[2024-03-29 16:40:33,841][00126] Avg episode reward: [(0, '0.541')]
+[2024-03-29 16:40:35,055][00497] Updated weights for policy 0, policy_version 41365 (0.0024)
+[2024-03-29 16:40:38,519][00497] Updated weights for policy 0, policy_version 41375 (0.0018)
+[2024-03-29 16:40:38,839][00126] Fps is (10 sec: 45874.6, 60 sec: 42052.2, 300 sec: 42209.6). Total num frames: 677904384. Throughput: 0: 42435.1. Samples: 560072920. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
+[2024-03-29 16:40:38,840][00126] Avg episode reward: [(0, '0.472')]
+[2024-03-29 16:40:42,773][00497] Updated weights for policy 0, policy_version 41385 (0.0022)
+[2024-03-29 16:40:43,839][00126] Fps is (10 sec: 44236.9, 60 sec: 42598.4, 300 sec: 42265.2). Total num frames: 678100992. Throughput: 0: 42070.3. Samples: 560322440. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
+[2024-03-29 16:40:43,840][00126] Avg episode reward: [(0, '0.519')]
+[2024-03-29 16:40:46,653][00497] Updated weights for policy 0, policy_version 41395 (0.0033)
+[2024-03-29 16:40:48,059][00476] Signal inference workers to stop experience collection... (19950 times)
+[2024-03-29 16:40:48,059][00476] Signal inference workers to resume experience collection... (19950 times)
+[2024-03-29 16:40:48,100][00497] InferenceWorker_p0-w0: stopping experience collection (19950 times)
+[2024-03-29 16:40:48,100][00497] InferenceWorker_p0-w0: resuming experience collection (19950 times)
+[2024-03-29 16:40:48,839][00126] Fps is (10 sec: 37683.3, 60 sec: 41779.1, 300 sec: 42154.1). Total num frames: 678281216. Throughput: 0: 42167.9. Samples: 560454860. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
+[2024-03-29 16:40:48,840][00126] Avg episode reward: [(0, '0.518')]
+[2024-03-29 16:40:50,489][00497] Updated weights for policy 0, policy_version 41405 (0.0028)
+[2024-03-29 16:40:53,839][00126] Fps is (10 sec: 42598.0, 60 sec: 42052.3, 300 sec: 42265.2). Total num frames: 678526976. Throughput: 0: 42150.2. Samples: 560703100. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
+[2024-03-29 16:40:53,842][00126] Avg episode reward: [(0, '0.545')]
+[2024-03-29 16:40:53,953][00497] Updated weights for policy 0, policy_version 41415 (0.0029)
+[2024-03-29 16:40:58,051][00497] Updated weights for policy 0, policy_version 41425 (0.0019)
+[2024-03-29 16:40:58,839][00126] Fps is (10 sec: 45875.9, 60 sec: 42871.6, 300 sec: 42265.2). Total num frames: 678739968. Throughput: 0: 42561.0. Samples: 560970480. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
+[2024-03-29 16:40:58,840][00126] Avg episode reward: [(0, '0.586')]
+[2024-03-29 16:41:02,142][00497] Updated weights for policy 0, policy_version 41435 (0.0026)
+[2024-03-29 16:41:03,839][00126] Fps is (10 sec: 40960.2, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 678936576. Throughput: 0: 42432.9. Samples: 561092280. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
+[2024-03-29 16:41:03,840][00126] Avg episode reward: [(0, '0.487')]
+[2024-03-29 16:41:03,860][00476] Saving /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041439_678936576.pth...
+[2024-03-29 16:41:04,176][00476] Removing /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000040822_668827648.pth
+[2024-03-29 16:41:05,970][00497] Updated weights for policy 0, policy_version 41445 (0.0035)
+[2024-03-29 16:41:08,839][00126] Fps is (10 sec: 40960.1, 60 sec: 41779.2, 300 sec: 42209.7). Total num frames: 679149568. Throughput: 0: 42307.3. Samples: 561335300. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
+[2024-03-29 16:41:08,840][00126] Avg episode reward: [(0, '0.512')]
+[2024-03-29 16:41:09,654][00497] Updated weights for policy 0, policy_version 41455 (0.0031)
+[2024-03-29 16:41:13,758][00497] Updated weights for policy 0, policy_version 41465 (0.0027)
+[2024-03-29 16:41:13,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42598.5, 300 sec: 42209.6). Total num frames: 679362560. Throughput: 0: 42430.7. Samples: 561592120. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
+[2024-03-29 16:41:13,840][00126] Avg episode reward: [(0, '0.578')]
+[2024-03-29 16:41:17,860][00497] Updated weights for policy 0, policy_version 41475 (0.0022)
+[2024-03-29 16:41:18,839][00126] Fps is (10 sec: 40960.1, 60 sec: 42325.4, 300 sec: 42209.6). Total num frames: 679559168. Throughput: 0: 42113.8. Samples: 561716680. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
+[2024-03-29 16:41:18,840][00126] Avg episode reward: [(0, '0.552')]
+[2024-03-29 16:41:21,625][00497] Updated weights for policy 0, policy_version 41485 (0.0018)
+[2024-03-29 16:41:23,839][00126] Fps is (10 sec: 42598.5, 60 sec: 42052.4, 300 sec: 42209.7). Total num frames: 679788544. Throughput: 0: 42313.5. Samples: 561977020. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
+[2024-03-29 16:41:23,841][00126] Avg episode reward: [(0, '0.544')]
+[2024-03-29 16:41:24,554][00476] Signal inference workers to stop experience collection... (20000 times)
+[2024-03-29 16:41:24,599][00497] InferenceWorker_p0-w0: stopping experience collection (20000 times)
+[2024-03-29 16:41:24,717][00476] Signal inference workers to resume experience collection... (20000 times)
+[2024-03-29 16:41:24,718][00497] InferenceWorker_p0-w0: resuming experience collection (20000 times)
+[2024-03-29 16:41:24,985][00497] Updated weights for policy 0, policy_version 41495 (0.0031)
+[2024-03-29 16:41:28,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42209.6). Total num frames: 679985152. Throughput: 0: 42177.4. Samples: 562220420. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
+[2024-03-29 16:41:28,840][00126] Avg episode reward: [(0, '0.477')]
+[2024-03-29 16:41:29,258][00497] Updated weights for policy 0, policy_version 41505 (0.0026)
+[2024-03-29 16:41:33,268][00497] Updated weights for policy 0, policy_version 41515 (0.0019)
+[2024-03-29 16:41:33,839][00126] Fps is (10 sec: 40959.6, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 680198144. Throughput: 0: 42196.5. Samples: 562353700. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:33,840][00126] Avg episode reward: [(0, '0.538')]
+[2024-03-29 16:41:37,278][00497] Updated weights for policy 0, policy_version 41525 (0.0024)
+[2024-03-29 16:41:38,839][00126] Fps is (10 sec: 44236.8, 60 sec: 42052.4, 300 sec: 42209.6). Total num frames: 680427520. Throughput: 0: 42255.7. Samples: 562604600. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:38,840][00126] Avg episode reward: [(0, '0.520')]
+[2024-03-29 16:41:40,762][00497] Updated weights for policy 0, policy_version 41535 (0.0024)
+[2024-03-29 16:41:43,839][00126] Fps is (10 sec: 42599.0, 60 sec: 42052.3, 300 sec: 42209.6). Total num frames: 680624128. Throughput: 0: 41717.8. Samples: 562847780. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:43,840][00126] Avg episode reward: [(0, '0.525')]
+[2024-03-29 16:41:44,999][00497] Updated weights for policy 0, policy_version 41545 (0.0024)
+[2024-03-29 16:41:48,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42325.5, 300 sec: 42209.6). Total num frames: 680820736. Throughput: 0: 41759.7. Samples: 562971460. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:48,840][00126] Avg episode reward: [(0, '0.491')]
+[2024-03-29 16:41:48,984][00497] Updated weights for policy 0, policy_version 41555 (0.0022)
+[2024-03-29 16:41:52,661][00497] Updated weights for policy 0, policy_version 41565 (0.0022)
+[2024-03-29 16:41:53,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42052.4, 300 sec: 42209.6). Total num frames: 681050112. Throughput: 0: 42443.1. Samples: 563245240. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:53,840][00126] Avg episode reward: [(0, '0.485')]
+[2024-03-29 16:41:55,986][00497] Updated weights for policy 0, policy_version 41575 (0.0027)
+[2024-03-29 16:41:58,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 681279488. Throughput: 0: 42145.7. Samples: 563488680. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
+[2024-03-29 16:41:58,840][00126] Avg episode reward: [(0, '0.564')]
+[2024-03-29 16:42:00,205][00497] Updated weights for policy 0, policy_version 41585 (0.0021)
+[2024-03-29 16:42:00,509][00476] Signal inference workers to stop experience collection... (20050 times)
+[2024-03-29 16:42:00,531][00497] InferenceWorker_p0-w0: stopping experience collection (20050 times)
+[2024-03-29 16:42:00,720][00476] Signal inference workers to resume experience collection... (20050 times)
+[2024-03-29 16:42:00,720][00497] InferenceWorker_p0-w0: resuming experience collection (20050 times)
+[2024-03-29 16:42:03,839][00126] Fps is (10 sec: 40959.9, 60 sec: 42052.3, 300 sec: 42265.2). Total num frames: 681459712. Throughput: 0: 42084.9. Samples: 563610500. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
+[2024-03-29 16:42:03,840][00126] Avg episode reward: [(0, '0.522')]
+[2024-03-29 16:42:04,244][00497] Updated weights for policy 0, policy_version 41595 (0.0025)
+[2024-03-29 16:42:08,007][00497] Updated weights for policy 0, policy_version 41605 (0.0019)
+[2024-03-29 16:42:08,839][00126] Fps is (10 sec: 40960.6, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 681689088. Throughput: 0: 42428.9. Samples: 563886320. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
+[2024-03-29 16:42:08,840][00126] Avg episode reward: [(0, '0.498')]
+[2024-03-29 16:42:11,398][00497] Updated weights for policy 0, policy_version 41615 (0.0023)
+[2024-03-29 16:42:13,839][00126] Fps is (10 sec: 45875.2, 60 sec: 42598.4, 300 sec: 42265.2). Total num frames: 681918464. Throughput: 0: 42315.6. Samples: 564124620. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
+[2024-03-29 16:42:13,840][00126] Avg episode reward: [(0, '0.540')]
+[2024-03-29 16:42:15,624][00497] Updated weights for policy 0, policy_version 41625 (0.0022)
+[2024-03-29 16:42:18,839][00126] Fps is (10 sec: 42598.1, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 682115072. Throughput: 0: 42218.3. Samples: 564253520. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
+[2024-03-29 16:42:18,840][00126] Avg episode reward: [(0, '0.478')]
+[2024-03-29 16:42:19,788][00497] Updated weights for policy 0, policy_version 41635 (0.0019)
+[2024-03-29 16:42:23,451][00497] Updated weights for policy 0, policy_version 41645 (0.0019)
+[2024-03-29 16:42:23,839][00126] Fps is (10 sec: 40959.3, 60 sec: 42325.2, 300 sec: 42265.2). Total num frames: 682328064. Throughput: 0: 42678.1. Samples: 564525120. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
+[2024-03-29 16:42:23,840][00126] Avg episode reward: [(0, '0.539')]
+[2024-03-29 16:42:26,669][00497] Updated weights for policy 0, policy_version 41655 (0.0022)
+[2024-03-29 16:42:28,839][00126] Fps is (10 sec: 44236.6, 60 sec: 42871.4, 300 sec: 42265.2). Total num frames: 682557440. Throughput: 0: 42624.8. Samples: 564765900. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
+[2024-03-29 16:42:28,840][00126] Avg episode reward: [(0, '0.446')]
+[2024-03-29 16:42:30,866][00497] Updated weights for policy 0, policy_version 41665 (0.0018)
+[2024-03-29 16:42:33,839][00126] Fps is (10 sec: 42598.9, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 682754048. Throughput: 0: 42987.5. Samples: 564905900. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
+[2024-03-29 16:42:33,840][00126] Avg episode reward: [(0, '0.455')]
+[2024-03-29 16:42:34,855][00497] Updated weights for policy 0, policy_version 41675 (0.0022)
+[2024-03-29 16:42:38,671][00497] Updated weights for policy 0, policy_version 41685 (0.0028)
+[2024-03-29 16:42:38,839][00126] Fps is (10 sec: 40960.1, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 682967040. Throughput: 0: 42818.1. Samples: 565172060. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
+[2024-03-29 16:42:38,840][00126] Avg episode reward: [(0, '0.565')]
+[2024-03-29 16:42:41,176][00476] Signal inference workers to stop experience collection... (20100 times)
+[2024-03-29 16:42:41,221][00497] InferenceWorker_p0-w0: stopping experience collection (20100 times)
+[2024-03-29 16:42:41,268][00476] Signal inference workers to resume experience collection... (20100 times)
+[2024-03-29 16:42:41,291][00497] InferenceWorker_p0-w0: resuming experience collection (20100 times)
+[2024-03-29 16:42:41,947][00497] Updated weights for policy 0, policy_version 41695 (0.0033)
+[2024-03-29 16:42:43,839][00126] Fps is (10 sec: 44236.8, 60 sec: 42871.4, 300 sec: 42265.2). Total num frames: 683196416. Throughput: 0: 42752.9. Samples: 565412560. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
+[2024-03-29 16:42:43,840][00126] Avg episode reward: [(0, '0.519')]
+[2024-03-29 16:42:46,218][00497] Updated weights for policy 0, policy_version 41705 (0.0019)
+[2024-03-29 16:42:48,839][00126] Fps is (10 sec: 42598.5, 60 sec: 42871.4, 300 sec: 42320.7). Total num frames: 683393024. Throughput: 0: 43124.0. Samples: 565551080. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
+[2024-03-29 16:42:48,840][00126] Avg episode reward: [(0, '0.517')]
+[2024-03-29 16:42:50,112][00497] Updated weights for policy 0, policy_version 41715 (0.0025)
+[2024-03-29 16:42:53,839][00126] Fps is (10 sec: 40960.0, 60 sec: 42598.4, 300 sec: 42376.3). Total num frames: 683606016. Throughput: 0: 42952.4. Samples: 565819180. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:42:53,840][00126] Avg episode reward: [(0, '0.405')]
+[2024-03-29 16:42:53,861][00497] Updated weights for policy 0, policy_version 41725 (0.0024)
+[2024-03-29 16:42:57,201][00497] Updated weights for policy 0, policy_version 41735 (0.0020)
+[2024-03-29 16:42:58,839][00126] Fps is (10 sec: 45875.2, 60 sec: 42871.5, 300 sec: 42320.7). Total num frames: 683851776. Throughput: 0: 42947.1. Samples: 566057240. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:42:58,840][00126] Avg episode reward: [(0, '0.530')]
+[2024-03-29 16:43:01,480][00497] Updated weights for policy 0, policy_version 41745 (0.0021)
+[2024-03-29 16:43:03,839][00126] Fps is (10 sec: 42597.8, 60 sec: 42871.3, 300 sec: 42320.7). Total num frames: 684032000. Throughput: 0: 43021.2. Samples: 566189480. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:43:03,840][00126] Avg episode reward: [(0, '0.517')]
+[2024-03-29 16:43:03,862][00476] Saving /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041750_684032000.pth...
+[2024-03-29 16:43:04,183][00476] Removing /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041133_673923072.pth
+[2024-03-29 16:43:05,654][00497] Updated weights for policy 0, policy_version 41755 (0.0019)
+[2024-03-29 16:43:08,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42598.3, 300 sec: 42376.3). Total num frames: 684244992. Throughput: 0: 42878.8. Samples: 566454660. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:43:08,841][00126] Avg episode reward: [(0, '0.469')]
+[2024-03-29 16:43:09,374][00497] Updated weights for policy 0, policy_version 41765 (0.0024)
+[2024-03-29 16:43:12,545][00497] Updated weights for policy 0, policy_version 41775 (0.0019)
+[2024-03-29 16:43:13,653][00476] Signal inference workers to stop experience collection... (20150 times)
+[2024-03-29 16:43:13,703][00497] InferenceWorker_p0-w0: stopping experience collection (20150 times)
+[2024-03-29 16:43:13,834][00476] Signal inference workers to resume experience collection... (20150 times)
+[2024-03-29 16:43:13,834][00497] InferenceWorker_p0-w0: resuming experience collection (20150 times)
+[2024-03-29 16:43:13,839][00126] Fps is (10 sec: 45875.6, 60 sec: 42871.4, 300 sec: 42320.7). Total num frames: 684490752. Throughput: 0: 42767.5. Samples: 566690440. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:43:13,840][00126] Avg episode reward: [(0, '0.536')]
+[2024-03-29 16:43:17,068][00497] Updated weights for policy 0, policy_version 41785 (0.0021)
+[2024-03-29 16:43:18,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 684670976. Throughput: 0: 42478.7. Samples: 566817440. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
+[2024-03-29 16:43:18,840][00126] Avg episode reward: [(0, '0.504')]
+[2024-03-29 16:43:21,132][00497] Updated weights for policy 0, policy_version 41795 (0.0022)
+[2024-03-29 16:43:23,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42598.5, 300 sec: 42320.7). Total num frames: 684883968. Throughput: 0: 42453.3. Samples: 567082460. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
+[2024-03-29 16:43:23,840][00126] Avg episode reward: [(0, '0.614')]
+[2024-03-29 16:43:24,923][00497] Updated weights for policy 0, policy_version 41805 (0.0029)
+[2024-03-29 16:43:28,069][00497] Updated weights for policy 0, policy_version 41815 (0.0030)
+[2024-03-29 16:43:28,839][00126] Fps is (10 sec: 45875.4, 60 sec: 42871.5, 300 sec: 42320.7). Total num frames: 685129728. Throughput: 0: 42478.3. Samples: 567324080. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
+[2024-03-29 16:43:28,840][00126] Avg episode reward: [(0, '0.509')]
+[2024-03-29 16:43:32,415][00497] Updated weights for policy 0, policy_version 41825 (0.0019)
+[2024-03-29 16:43:33,839][00126] Fps is (10 sec: 44236.6, 60 sec: 42871.4, 300 sec: 42376.3). Total num frames: 685326336. Throughput: 0: 42300.8. Samples: 567454620. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
+[2024-03-29 16:43:33,840][00126] Avg episode reward: [(0, '0.570')]
+[2024-03-29 16:43:36,562][00497] Updated weights for policy 0, policy_version 41835 (0.0018)
+[2024-03-29 16:43:38,839][00126] Fps is (10 sec: 37683.0, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 685506560. Throughput: 0: 42140.0. Samples: 567715480. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
+[2024-03-29 16:43:38,840][00126] Avg episode reward: [(0, '0.523')]
+[2024-03-29 16:43:40,285][00497] Updated weights for policy 0, policy_version 41845 (0.0033)
+[2024-03-29 16:43:43,508][00497] Updated weights for policy 0, policy_version 41855 (0.0028)
+[2024-03-29 16:43:43,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 685752320. Throughput: 0: 42272.9. Samples: 567959520. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
+[2024-03-29 16:43:43,841][00126] Avg episode reward: [(0, '0.409')]
+[2024-03-29 16:43:47,834][00497] Updated weights for policy 0, policy_version 41865 (0.0024)
+[2024-03-29 16:43:48,839][00126] Fps is (10 sec: 45874.9, 60 sec: 42871.4, 300 sec: 42376.3). Total num frames: 685965312. Throughput: 0: 42126.3. Samples: 568085160. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
+[2024-03-29 16:43:48,840][00126] Avg episode reward: [(0, '0.483')]
+[2024-03-29 16:43:50,885][00476] Signal inference workers to stop experience collection... (20200 times)
+[2024-03-29 16:43:50,886][00476] Signal inference workers to resume experience collection... (20200 times)
+[2024-03-29 16:43:50,919][00497] InferenceWorker_p0-w0: stopping experience collection (20200 times)
+[2024-03-29 16:43:50,920][00497] InferenceWorker_p0-w0: resuming experience collection (20200 times)
+[2024-03-29 16:43:52,115][00497] Updated weights for policy 0, policy_version 41875 (0.0024)
+[2024-03-29 16:43:53,839][00126] Fps is (10 sec: 39321.4, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 686145536. Throughput: 0: 42307.0. Samples: 568358480. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
+[2024-03-29 16:43:53,840][00126] Avg episode reward: [(0, '0.547')]
+[2024-03-29 16:43:55,706][00497] Updated weights for policy 0, policy_version 41885 (0.0033)
+[2024-03-29 16:43:58,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 686391296. Throughput: 0: 42537.3. Samples: 568604620. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
+[2024-03-29 16:43:58,840][00126] Avg episode reward: [(0, '0.548')]
+[2024-03-29 16:43:58,870][00497] Updated weights for policy 0, policy_version 41895 (0.0022)
+[2024-03-29 16:44:03,523][00497] Updated weights for policy 0, policy_version 41905 (0.0021)
+[2024-03-29 16:44:03,839][00126] Fps is (10 sec: 44236.7, 60 sec: 42598.4, 300 sec: 42376.2). Total num frames: 686587904. Throughput: 0: 42040.8. Samples: 568709280. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
+[2024-03-29 16:44:03,840][00126] Avg episode reward: [(0, '0.560')]
+[2024-03-29 16:44:07,753][00497] Updated weights for policy 0, policy_version 41915 (0.0026)
+[2024-03-29 16:44:08,839][00126] Fps is (10 sec: 37683.2, 60 sec: 42052.2, 300 sec: 42209.6). Total num frames: 686768128. Throughput: 0: 42192.0. Samples: 568981100. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
+[2024-03-29 16:44:08,840][00126] Avg episode reward: [(0, '0.487')]
+[2024-03-29 16:44:11,387][00497] Updated weights for policy 0, policy_version 41925 (0.0025)
+[2024-03-29 16:44:13,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 687013888. Throughput: 0: 42369.7. Samples: 569230720. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
+[2024-03-29 16:44:13,840][00126] Avg episode reward: [(0, '0.412')]
+[2024-03-29 16:44:14,672][00497] Updated weights for policy 0, policy_version 41935 (0.0030)
+[2024-03-29 16:44:18,839][00126] Fps is (10 sec: 44237.4, 60 sec: 42325.4, 300 sec: 42320.7). Total num frames: 687210496. Throughput: 0: 41945.9. Samples: 569342180. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
+[2024-03-29 16:44:18,840][00126] Avg episode reward: [(0, '0.531')]
+[2024-03-29 16:44:19,134][00497] Updated weights for policy 0, policy_version 41945 (0.0020)
+[2024-03-29 16:44:23,135][00497] Updated weights for policy 0, policy_version 41955 (0.0022)
+[2024-03-29 16:44:23,450][00476] Signal inference workers to stop experience collection... (20250 times)
+[2024-03-29 16:44:23,451][00476] Signal inference workers to resume experience collection... (20250 times)
+[2024-03-29 16:44:23,495][00497] InferenceWorker_p0-w0: stopping experience collection (20250 times)
+[2024-03-29 16:44:23,495][00497] InferenceWorker_p0-w0: resuming experience collection (20250 times)
+[2024-03-29 16:44:23,839][00126] Fps is (10 sec: 40959.9, 60 sec: 42325.3, 300 sec: 42376.3). Total num frames: 687423488. Throughput: 0: 42243.0. Samples: 569616420. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
+[2024-03-29 16:44:23,840][00126] Avg episode reward: [(0, '0.511')]
+[2024-03-29 16:44:26,841][00497] Updated weights for policy 0, policy_version 41965 (0.0019)
+[2024-03-29 16:44:28,839][00126] Fps is (10 sec: 44236.4, 60 sec: 42052.2, 300 sec: 42376.3). Total num frames: 687652864. Throughput: 0: 42609.8. Samples: 569876960. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
+[2024-03-29 16:44:28,840][00126] Avg episode reward: [(0, '0.622')]