daveey commited on
Commit
f3c91ed
1 Parent(s): 2980854

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  env_a100_100x100.mp4 filter=lfs diff=lfs merge=lfs -text
37
  env_a20_40x40.mp4 filter=lfs diff=lfs merge=lfs -text
38
  env_a5_25x25.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
36
  env_a100_100x100.mp4 filter=lfs diff=lfs merge=lfs -text
37
  env_a20_40x40.mp4 filter=lfs diff=lfs merge=lfs -text
38
  env_a5_25x25.mp4 filter=lfs diff=lfs merge=lfs -text
39
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
.summary/0/events.out.tfevents.1711742223.ip-172-31-79-185.ec2.internal CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c34a56081bc81ce84e226e9f8b2602235817f949c8274fc23fd8b817ac47092
3
- size 12345477
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff0e2db331626f4bbb98a8323c77070ee1fda92c477c74048e6be700ba36168
3
+ size 12548713
checkpoint_p0/checkpoint_000041439_678936576.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040754cfb8ac5d31cf45879758b3906cd289bd0b537e4dcadcae57ab1139d4db
3
+ size 76479020
checkpoint_p0/checkpoint_000041750_684032000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8bbfd7349be28fbacd83e5de2f98fe6b6bf3c292047428f8a4f07b9dc30358
3
+ size 76479020
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d6ac88750b89110511d737c7040357fff3569f5100ec39dc3ea9e74bfadc211
3
+ size 6493147
sf_log.txt CHANGED
@@ -14128,3 +14128,195 @@
14128
  [2024-03-29 16:40:23,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.2, 300 sec: 42154.1). Total num frames: 677265408. Throughput: 0: 41884.8. Samples: 559431480. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
14129
  [2024-03-29 16:40:23,840][00126] Avg episode reward: [(0, '0.522')]
14130
  [2024-03-29 16:40:27,212][00497] Updated weights for policy 0, policy_version 41345 (0.0025)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14128
  [2024-03-29 16:40:23,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.2, 300 sec: 42154.1). Total num frames: 677265408. Throughput: 0: 41884.8. Samples: 559431480. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
14129
  [2024-03-29 16:40:23,840][00126] Avg episode reward: [(0, '0.522')]
14130
  [2024-03-29 16:40:27,212][00497] Updated weights for policy 0, policy_version 41345 (0.0025)
14131
+ [2024-03-29 16:40:28,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42052.3, 300 sec: 42154.1). Total num frames: 677445632. Throughput: 0: 41759.5. Samples: 559682740. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
14132
+ [2024-03-29 16:40:28,840][00126] Avg episode reward: [(0, '0.514')]
14133
+ [2024-03-29 16:40:31,165][00497] Updated weights for policy 0, policy_version 41355 (0.0021)
14134
+ [2024-03-29 16:40:33,839][00126] Fps is (10 sec: 39322.2, 60 sec: 41779.3, 300 sec: 42209.6). Total num frames: 677658624. Throughput: 0: 42043.6. Samples: 559821560. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
14135
+ [2024-03-29 16:40:33,841][00126] Avg episode reward: [(0, '0.541')]
14136
+ [2024-03-29 16:40:35,055][00497] Updated weights for policy 0, policy_version 41365 (0.0024)
14137
+ [2024-03-29 16:40:38,519][00497] Updated weights for policy 0, policy_version 41375 (0.0018)
14138
+ [2024-03-29 16:40:38,839][00126] Fps is (10 sec: 45874.6, 60 sec: 42052.2, 300 sec: 42209.6). Total num frames: 677904384. Throughput: 0: 42435.1. Samples: 560072920. Policy #0 lag: (min: 0.0, avg: 21.0, max: 42.0)
14139
+ [2024-03-29 16:40:38,840][00126] Avg episode reward: [(0, '0.472')]
14140
+ [2024-03-29 16:40:42,773][00497] Updated weights for policy 0, policy_version 41385 (0.0022)
14141
+ [2024-03-29 16:40:43,839][00126] Fps is (10 sec: 44236.9, 60 sec: 42598.4, 300 sec: 42265.2). Total num frames: 678100992. Throughput: 0: 42070.3. Samples: 560322440. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
14142
+ [2024-03-29 16:40:43,840][00126] Avg episode reward: [(0, '0.519')]
14143
+ [2024-03-29 16:40:46,653][00497] Updated weights for policy 0, policy_version 41395 (0.0033)
14144
+ [2024-03-29 16:40:48,059][00476] Signal inference workers to stop experience collection... (19950 times)
14145
+ [2024-03-29 16:40:48,059][00476] Signal inference workers to resume experience collection... (19950 times)
14146
+ [2024-03-29 16:40:48,100][00497] InferenceWorker_p0-w0: stopping experience collection (19950 times)
14147
+ [2024-03-29 16:40:48,100][00497] InferenceWorker_p0-w0: resuming experience collection (19950 times)
14148
+ [2024-03-29 16:40:48,839][00126] Fps is (10 sec: 37683.3, 60 sec: 41779.1, 300 sec: 42154.1). Total num frames: 678281216. Throughput: 0: 42167.9. Samples: 560454860. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
14149
+ [2024-03-29 16:40:48,840][00126] Avg episode reward: [(0, '0.518')]
14150
+ [2024-03-29 16:40:50,489][00497] Updated weights for policy 0, policy_version 41405 (0.0028)
14151
+ [2024-03-29 16:40:53,839][00126] Fps is (10 sec: 42598.0, 60 sec: 42052.3, 300 sec: 42265.2). Total num frames: 678526976. Throughput: 0: 42150.2. Samples: 560703100. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
14152
+ [2024-03-29 16:40:53,842][00126] Avg episode reward: [(0, '0.545')]
14153
+ [2024-03-29 16:40:53,953][00497] Updated weights for policy 0, policy_version 41415 (0.0029)
14154
+ [2024-03-29 16:40:58,051][00497] Updated weights for policy 0, policy_version 41425 (0.0019)
14155
+ [2024-03-29 16:40:58,839][00126] Fps is (10 sec: 45875.9, 60 sec: 42871.6, 300 sec: 42265.2). Total num frames: 678739968. Throughput: 0: 42561.0. Samples: 560970480. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
14156
+ [2024-03-29 16:40:58,840][00126] Avg episode reward: [(0, '0.586')]
14157
+ [2024-03-29 16:41:02,142][00497] Updated weights for policy 0, policy_version 41435 (0.0026)
14158
+ [2024-03-29 16:41:03,839][00126] Fps is (10 sec: 40960.2, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 678936576. Throughput: 0: 42432.9. Samples: 561092280. Policy #0 lag: (min: 2.0, avg: 22.9, max: 43.0)
14159
+ [2024-03-29 16:41:03,840][00126] Avg episode reward: [(0, '0.487')]
14160
+ [2024-03-29 16:41:03,860][00476] Saving /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041439_678936576.pth...
14161
+ [2024-03-29 16:41:04,176][00476] Removing /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000040822_668827648.pth
14162
+ [2024-03-29 16:41:05,970][00497] Updated weights for policy 0, policy_version 41445 (0.0035)
14163
+ [2024-03-29 16:41:08,839][00126] Fps is (10 sec: 40960.1, 60 sec: 41779.2, 300 sec: 42209.7). Total num frames: 679149568. Throughput: 0: 42307.3. Samples: 561335300. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
14164
+ [2024-03-29 16:41:08,840][00126] Avg episode reward: [(0, '0.512')]
14165
+ [2024-03-29 16:41:09,654][00497] Updated weights for policy 0, policy_version 41455 (0.0031)
14166
+ [2024-03-29 16:41:13,758][00497] Updated weights for policy 0, policy_version 41465 (0.0027)
14167
+ [2024-03-29 16:41:13,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42598.5, 300 sec: 42209.6). Total num frames: 679362560. Throughput: 0: 42430.7. Samples: 561592120. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
14168
+ [2024-03-29 16:41:13,840][00126] Avg episode reward: [(0, '0.578')]
14169
+ [2024-03-29 16:41:17,860][00497] Updated weights for policy 0, policy_version 41475 (0.0022)
14170
+ [2024-03-29 16:41:18,839][00126] Fps is (10 sec: 40960.1, 60 sec: 42325.4, 300 sec: 42209.6). Total num frames: 679559168. Throughput: 0: 42113.8. Samples: 561716680. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
14171
+ [2024-03-29 16:41:18,840][00126] Avg episode reward: [(0, '0.552')]
14172
+ [2024-03-29 16:41:21,625][00497] Updated weights for policy 0, policy_version 41485 (0.0018)
14173
+ [2024-03-29 16:41:23,839][00126] Fps is (10 sec: 42598.5, 60 sec: 42052.4, 300 sec: 42209.7). Total num frames: 679788544. Throughput: 0: 42313.5. Samples: 561977020. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
14174
+ [2024-03-29 16:41:23,841][00126] Avg episode reward: [(0, '0.544')]
14175
+ [2024-03-29 16:41:24,554][00476] Signal inference workers to stop experience collection... (20000 times)
14176
+ [2024-03-29 16:41:24,599][00497] InferenceWorker_p0-w0: stopping experience collection (20000 times)
14177
+ [2024-03-29 16:41:24,717][00476] Signal inference workers to resume experience collection... (20000 times)
14178
+ [2024-03-29 16:41:24,718][00497] InferenceWorker_p0-w0: resuming experience collection (20000 times)
14179
+ [2024-03-29 16:41:24,985][00497] Updated weights for policy 0, policy_version 41495 (0.0031)
14180
+ [2024-03-29 16:41:28,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42325.4, 300 sec: 42209.6). Total num frames: 679985152. Throughput: 0: 42177.4. Samples: 562220420. Policy #0 lag: (min: 1.0, avg: 19.6, max: 43.0)
14181
+ [2024-03-29 16:41:28,840][00126] Avg episode reward: [(0, '0.477')]
14182
+ [2024-03-29 16:41:29,258][00497] Updated weights for policy 0, policy_version 41505 (0.0026)
14183
+ [2024-03-29 16:41:33,268][00497] Updated weights for policy 0, policy_version 41515 (0.0019)
14184
+ [2024-03-29 16:41:33,839][00126] Fps is (10 sec: 40959.6, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 680198144. Throughput: 0: 42196.5. Samples: 562353700. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14185
+ [2024-03-29 16:41:33,840][00126] Avg episode reward: [(0, '0.538')]
14186
+ [2024-03-29 16:41:37,278][00497] Updated weights for policy 0, policy_version 41525 (0.0024)
14187
+ [2024-03-29 16:41:38,839][00126] Fps is (10 sec: 44236.8, 60 sec: 42052.4, 300 sec: 42209.6). Total num frames: 680427520. Throughput: 0: 42255.7. Samples: 562604600. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14188
+ [2024-03-29 16:41:38,840][00126] Avg episode reward: [(0, '0.520')]
14189
+ [2024-03-29 16:41:40,762][00497] Updated weights for policy 0, policy_version 41535 (0.0024)
14190
+ [2024-03-29 16:41:43,839][00126] Fps is (10 sec: 42599.0, 60 sec: 42052.3, 300 sec: 42209.6). Total num frames: 680624128. Throughput: 0: 41717.8. Samples: 562847780. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14191
+ [2024-03-29 16:41:43,840][00126] Avg episode reward: [(0, '0.525')]
14192
+ [2024-03-29 16:41:44,999][00497] Updated weights for policy 0, policy_version 41545 (0.0024)
14193
+ [2024-03-29 16:41:48,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42325.5, 300 sec: 42209.6). Total num frames: 680820736. Throughput: 0: 41759.7. Samples: 562971460. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14194
+ [2024-03-29 16:41:48,840][00126] Avg episode reward: [(0, '0.491')]
14195
+ [2024-03-29 16:41:48,984][00497] Updated weights for policy 0, policy_version 41555 (0.0022)
14196
+ [2024-03-29 16:41:52,661][00497] Updated weights for policy 0, policy_version 41565 (0.0022)
14197
+ [2024-03-29 16:41:53,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42052.4, 300 sec: 42209.6). Total num frames: 681050112. Throughput: 0: 42443.1. Samples: 563245240. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14198
+ [2024-03-29 16:41:53,840][00126] Avg episode reward: [(0, '0.485')]
14199
+ [2024-03-29 16:41:55,986][00497] Updated weights for policy 0, policy_version 41575 (0.0027)
14200
+ [2024-03-29 16:41:58,839][00126] Fps is (10 sec: 45874.7, 60 sec: 42325.3, 300 sec: 42265.2). Total num frames: 681279488. Throughput: 0: 42145.7. Samples: 563488680. Policy #0 lag: (min: 0.0, avg: 19.9, max: 41.0)
14201
+ [2024-03-29 16:41:58,840][00126] Avg episode reward: [(0, '0.564')]
14202
+ [2024-03-29 16:42:00,205][00497] Updated weights for policy 0, policy_version 41585 (0.0021)
14203
+ [2024-03-29 16:42:00,509][00476] Signal inference workers to stop experience collection... (20050 times)
14204
+ [2024-03-29 16:42:00,531][00497] InferenceWorker_p0-w0: stopping experience collection (20050 times)
14205
+ [2024-03-29 16:42:00,720][00476] Signal inference workers to resume experience collection... (20050 times)
14206
+ [2024-03-29 16:42:00,720][00497] InferenceWorker_p0-w0: resuming experience collection (20050 times)
14207
+ [2024-03-29 16:42:03,839][00126] Fps is (10 sec: 40959.9, 60 sec: 42052.3, 300 sec: 42265.2). Total num frames: 681459712. Throughput: 0: 42084.9. Samples: 563610500. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
14208
+ [2024-03-29 16:42:03,840][00126] Avg episode reward: [(0, '0.522')]
14209
+ [2024-03-29 16:42:04,244][00497] Updated weights for policy 0, policy_version 41595 (0.0025)
14210
+ [2024-03-29 16:42:08,007][00497] Updated weights for policy 0, policy_version 41605 (0.0019)
14211
+ [2024-03-29 16:42:08,839][00126] Fps is (10 sec: 40960.6, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 681689088. Throughput: 0: 42428.9. Samples: 563886320. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
14212
+ [2024-03-29 16:42:08,840][00126] Avg episode reward: [(0, '0.498')]
14213
+ [2024-03-29 16:42:11,398][00497] Updated weights for policy 0, policy_version 41615 (0.0023)
14214
+ [2024-03-29 16:42:13,839][00126] Fps is (10 sec: 45875.2, 60 sec: 42598.4, 300 sec: 42265.2). Total num frames: 681918464. Throughput: 0: 42315.6. Samples: 564124620. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
14215
+ [2024-03-29 16:42:13,840][00126] Avg episode reward: [(0, '0.540')]
14216
+ [2024-03-29 16:42:15,624][00497] Updated weights for policy 0, policy_version 41625 (0.0022)
14217
+ [2024-03-29 16:42:18,839][00126] Fps is (10 sec: 42598.1, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 682115072. Throughput: 0: 42218.3. Samples: 564253520. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
14218
+ [2024-03-29 16:42:18,840][00126] Avg episode reward: [(0, '0.478')]
14219
+ [2024-03-29 16:42:19,788][00497] Updated weights for policy 0, policy_version 41635 (0.0019)
14220
+ [2024-03-29 16:42:23,451][00497] Updated weights for policy 0, policy_version 41645 (0.0019)
14221
+ [2024-03-29 16:42:23,839][00126] Fps is (10 sec: 40959.3, 60 sec: 42325.2, 300 sec: 42265.2). Total num frames: 682328064. Throughput: 0: 42678.1. Samples: 564525120. Policy #0 lag: (min: 0.0, avg: 20.6, max: 40.0)
14222
+ [2024-03-29 16:42:23,840][00126] Avg episode reward: [(0, '0.539')]
14223
+ [2024-03-29 16:42:26,669][00497] Updated weights for policy 0, policy_version 41655 (0.0022)
14224
+ [2024-03-29 16:42:28,839][00126] Fps is (10 sec: 44236.6, 60 sec: 42871.4, 300 sec: 42265.2). Total num frames: 682557440. Throughput: 0: 42624.8. Samples: 564765900. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
14225
+ [2024-03-29 16:42:28,840][00126] Avg episode reward: [(0, '0.446')]
14226
+ [2024-03-29 16:42:30,866][00497] Updated weights for policy 0, policy_version 41665 (0.0018)
14227
+ [2024-03-29 16:42:33,839][00126] Fps is (10 sec: 42598.9, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 682754048. Throughput: 0: 42987.5. Samples: 564905900. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
14228
+ [2024-03-29 16:42:33,840][00126] Avg episode reward: [(0, '0.455')]
14229
+ [2024-03-29 16:42:34,855][00497] Updated weights for policy 0, policy_version 41675 (0.0022)
14230
+ [2024-03-29 16:42:38,671][00497] Updated weights for policy 0, policy_version 41685 (0.0028)
14231
+ [2024-03-29 16:42:38,839][00126] Fps is (10 sec: 40960.1, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 682967040. Throughput: 0: 42818.1. Samples: 565172060. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
14232
+ [2024-03-29 16:42:38,840][00126] Avg episode reward: [(0, '0.565')]
14233
+ [2024-03-29 16:42:41,176][00476] Signal inference workers to stop experience collection... (20100 times)
14234
+ [2024-03-29 16:42:41,221][00497] InferenceWorker_p0-w0: stopping experience collection (20100 times)
14235
+ [2024-03-29 16:42:41,268][00476] Signal inference workers to resume experience collection... (20100 times)
14236
+ [2024-03-29 16:42:41,291][00497] InferenceWorker_p0-w0: resuming experience collection (20100 times)
14237
+ [2024-03-29 16:42:41,947][00497] Updated weights for policy 0, policy_version 41695 (0.0033)
14238
+ [2024-03-29 16:42:43,839][00126] Fps is (10 sec: 44236.8, 60 sec: 42871.4, 300 sec: 42265.2). Total num frames: 683196416. Throughput: 0: 42752.9. Samples: 565412560. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
14239
+ [2024-03-29 16:42:43,840][00126] Avg episode reward: [(0, '0.519')]
14240
+ [2024-03-29 16:42:46,218][00497] Updated weights for policy 0, policy_version 41705 (0.0019)
14241
+ [2024-03-29 16:42:48,839][00126] Fps is (10 sec: 42598.5, 60 sec: 42871.4, 300 sec: 42320.7). Total num frames: 683393024. Throughput: 0: 43124.0. Samples: 565551080. Policy #0 lag: (min: 1.0, avg: 21.4, max: 41.0)
14242
+ [2024-03-29 16:42:48,840][00126] Avg episode reward: [(0, '0.517')]
14243
+ [2024-03-29 16:42:50,112][00497] Updated weights for policy 0, policy_version 41715 (0.0025)
14244
+ [2024-03-29 16:42:53,839][00126] Fps is (10 sec: 40960.0, 60 sec: 42598.4, 300 sec: 42376.3). Total num frames: 683606016. Throughput: 0: 42952.4. Samples: 565819180. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14245
+ [2024-03-29 16:42:53,840][00126] Avg episode reward: [(0, '0.405')]
14246
+ [2024-03-29 16:42:53,861][00497] Updated weights for policy 0, policy_version 41725 (0.0024)
14247
+ [2024-03-29 16:42:57,201][00497] Updated weights for policy 0, policy_version 41735 (0.0020)
14248
+ [2024-03-29 16:42:58,839][00126] Fps is (10 sec: 45875.2, 60 sec: 42871.5, 300 sec: 42320.7). Total num frames: 683851776. Throughput: 0: 42947.1. Samples: 566057240. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14249
+ [2024-03-29 16:42:58,840][00126] Avg episode reward: [(0, '0.530')]
14250
+ [2024-03-29 16:43:01,480][00497] Updated weights for policy 0, policy_version 41745 (0.0021)
14251
+ [2024-03-29 16:43:03,839][00126] Fps is (10 sec: 42597.8, 60 sec: 42871.3, 300 sec: 42320.7). Total num frames: 684032000. Throughput: 0: 43021.2. Samples: 566189480. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14252
+ [2024-03-29 16:43:03,840][00126] Avg episode reward: [(0, '0.517')]
14253
+ [2024-03-29 16:43:03,862][00476] Saving /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041750_684032000.pth...
14254
+ [2024-03-29 16:43:04,183][00476] Removing /workspace/metta/train_dir/b.a20.20x20_40x40.norm/checkpoint_p0/checkpoint_000041133_673923072.pth
14255
+ [2024-03-29 16:43:05,654][00497] Updated weights for policy 0, policy_version 41755 (0.0019)
14256
+ [2024-03-29 16:43:08,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42598.3, 300 sec: 42376.3). Total num frames: 684244992. Throughput: 0: 42878.8. Samples: 566454660. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14257
+ [2024-03-29 16:43:08,841][00126] Avg episode reward: [(0, '0.469')]
14258
+ [2024-03-29 16:43:09,374][00497] Updated weights for policy 0, policy_version 41765 (0.0024)
14259
+ [2024-03-29 16:43:12,545][00497] Updated weights for policy 0, policy_version 41775 (0.0019)
14260
+ [2024-03-29 16:43:13,653][00476] Signal inference workers to stop experience collection... (20150 times)
14261
+ [2024-03-29 16:43:13,703][00497] InferenceWorker_p0-w0: stopping experience collection (20150 times)
14262
+ [2024-03-29 16:43:13,834][00476] Signal inference workers to resume experience collection... (20150 times)
14263
+ [2024-03-29 16:43:13,834][00497] InferenceWorker_p0-w0: resuming experience collection (20150 times)
14264
+ [2024-03-29 16:43:13,839][00126] Fps is (10 sec: 45875.6, 60 sec: 42871.4, 300 sec: 42320.7). Total num frames: 684490752. Throughput: 0: 42767.5. Samples: 566690440. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14265
+ [2024-03-29 16:43:13,840][00126] Avg episode reward: [(0, '0.536')]
14266
+ [2024-03-29 16:43:17,068][00497] Updated weights for policy 0, policy_version 41785 (0.0021)
14267
+ [2024-03-29 16:43:18,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 684670976. Throughput: 0: 42478.7. Samples: 566817440. Policy #0 lag: (min: 1.0, avg: 19.0, max: 41.0)
14268
+ [2024-03-29 16:43:18,840][00126] Avg episode reward: [(0, '0.504')]
14269
+ [2024-03-29 16:43:21,132][00497] Updated weights for policy 0, policy_version 41795 (0.0022)
14270
+ [2024-03-29 16:43:23,839][00126] Fps is (10 sec: 39321.6, 60 sec: 42598.5, 300 sec: 42320.7). Total num frames: 684883968. Throughput: 0: 42453.3. Samples: 567082460. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
14271
+ [2024-03-29 16:43:23,840][00126] Avg episode reward: [(0, '0.614')]
14272
+ [2024-03-29 16:43:24,923][00497] Updated weights for policy 0, policy_version 41805 (0.0029)
14273
+ [2024-03-29 16:43:28,069][00497] Updated weights for policy 0, policy_version 41815 (0.0030)
14274
+ [2024-03-29 16:43:28,839][00126] Fps is (10 sec: 45875.4, 60 sec: 42871.5, 300 sec: 42320.7). Total num frames: 685129728. Throughput: 0: 42478.3. Samples: 567324080. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
14275
+ [2024-03-29 16:43:28,840][00126] Avg episode reward: [(0, '0.509')]
14276
+ [2024-03-29 16:43:32,415][00497] Updated weights for policy 0, policy_version 41825 (0.0019)
14277
+ [2024-03-29 16:43:33,839][00126] Fps is (10 sec: 44236.6, 60 sec: 42871.4, 300 sec: 42376.3). Total num frames: 685326336. Throughput: 0: 42300.8. Samples: 567454620. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
14278
+ [2024-03-29 16:43:33,840][00126] Avg episode reward: [(0, '0.570')]
14279
+ [2024-03-29 16:43:36,562][00497] Updated weights for policy 0, policy_version 41835 (0.0018)
14280
+ [2024-03-29 16:43:38,839][00126] Fps is (10 sec: 37683.0, 60 sec: 42325.4, 300 sec: 42265.2). Total num frames: 685506560. Throughput: 0: 42140.0. Samples: 567715480. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
14281
+ [2024-03-29 16:43:38,840][00126] Avg episode reward: [(0, '0.523')]
14282
+ [2024-03-29 16:43:40,285][00497] Updated weights for policy 0, policy_version 41845 (0.0033)
14283
+ [2024-03-29 16:43:43,508][00497] Updated weights for policy 0, policy_version 41855 (0.0028)
14284
+ [2024-03-29 16:43:43,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42598.4, 300 sec: 42320.7). Total num frames: 685752320. Throughput: 0: 42272.9. Samples: 567959520. Policy #0 lag: (min: 0.0, avg: 20.4, max: 41.0)
14285
+ [2024-03-29 16:43:43,841][00126] Avg episode reward: [(0, '0.409')]
14286
+ [2024-03-29 16:43:47,834][00497] Updated weights for policy 0, policy_version 41865 (0.0024)
14287
+ [2024-03-29 16:43:48,839][00126] Fps is (10 sec: 45874.9, 60 sec: 42871.4, 300 sec: 42376.3). Total num frames: 685965312. Throughput: 0: 42126.3. Samples: 568085160. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
14288
+ [2024-03-29 16:43:48,840][00126] Avg episode reward: [(0, '0.483')]
14289
+ [2024-03-29 16:43:50,885][00476] Signal inference workers to stop experience collection... (20200 times)
14290
+ [2024-03-29 16:43:50,886][00476] Signal inference workers to resume experience collection... (20200 times)
14291
+ [2024-03-29 16:43:50,919][00497] InferenceWorker_p0-w0: stopping experience collection (20200 times)
14292
+ [2024-03-29 16:43:50,920][00497] InferenceWorker_p0-w0: resuming experience collection (20200 times)
14293
+ [2024-03-29 16:43:52,115][00497] Updated weights for policy 0, policy_version 41875 (0.0024)
14294
+ [2024-03-29 16:43:53,839][00126] Fps is (10 sec: 39321.4, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 686145536. Throughput: 0: 42307.0. Samples: 568358480. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
14295
+ [2024-03-29 16:43:53,840][00126] Avg episode reward: [(0, '0.547')]
14296
+ [2024-03-29 16:43:55,706][00497] Updated weights for policy 0, policy_version 41885 (0.0033)
14297
+ [2024-03-29 16:43:58,839][00126] Fps is (10 sec: 42598.4, 60 sec: 42325.3, 300 sec: 42320.7). Total num frames: 686391296. Throughput: 0: 42537.3. Samples: 568604620. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
14298
+ [2024-03-29 16:43:58,840][00126] Avg episode reward: [(0, '0.548')]
14299
+ [2024-03-29 16:43:58,870][00497] Updated weights for policy 0, policy_version 41895 (0.0022)
14300
+ [2024-03-29 16:44:03,523][00497] Updated weights for policy 0, policy_version 41905 (0.0021)
14301
+ [2024-03-29 16:44:03,839][00126] Fps is (10 sec: 44236.7, 60 sec: 42598.4, 300 sec: 42376.2). Total num frames: 686587904. Throughput: 0: 42040.8. Samples: 568709280. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
14302
+ [2024-03-29 16:44:03,840][00126] Avg episode reward: [(0, '0.560')]
14303
+ [2024-03-29 16:44:07,753][00497] Updated weights for policy 0, policy_version 41915 (0.0026)
14304
+ [2024-03-29 16:44:08,839][00126] Fps is (10 sec: 37683.2, 60 sec: 42052.2, 300 sec: 42209.6). Total num frames: 686768128. Throughput: 0: 42192.0. Samples: 568981100. Policy #0 lag: (min: 0.0, avg: 22.5, max: 42.0)
14305
+ [2024-03-29 16:44:08,840][00126] Avg episode reward: [(0, '0.487')]
14306
+ [2024-03-29 16:44:11,387][00497] Updated weights for policy 0, policy_version 41925 (0.0025)
14307
+ [2024-03-29 16:44:13,839][00126] Fps is (10 sec: 42598.7, 60 sec: 42052.3, 300 sec: 42320.7). Total num frames: 687013888. Throughput: 0: 42369.7. Samples: 569230720. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
14308
+ [2024-03-29 16:44:13,840][00126] Avg episode reward: [(0, '0.412')]
14309
+ [2024-03-29 16:44:14,672][00497] Updated weights for policy 0, policy_version 41935 (0.0030)
14310
+ [2024-03-29 16:44:18,839][00126] Fps is (10 sec: 44237.4, 60 sec: 42325.4, 300 sec: 42320.7). Total num frames: 687210496. Throughput: 0: 41945.9. Samples: 569342180. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
14311
+ [2024-03-29 16:44:18,840][00126] Avg episode reward: [(0, '0.531')]
14312
+ [2024-03-29 16:44:19,134][00497] Updated weights for policy 0, policy_version 41945 (0.0020)
14313
+ [2024-03-29 16:44:23,135][00497] Updated weights for policy 0, policy_version 41955 (0.0022)
14314
+ [2024-03-29 16:44:23,450][00476] Signal inference workers to stop experience collection... (20250 times)
14315
+ [2024-03-29 16:44:23,451][00476] Signal inference workers to resume experience collection... (20250 times)
14316
+ [2024-03-29 16:44:23,495][00497] InferenceWorker_p0-w0: stopping experience collection (20250 times)
14317
+ [2024-03-29 16:44:23,495][00497] InferenceWorker_p0-w0: resuming experience collection (20250 times)
14318
+ [2024-03-29 16:44:23,839][00126] Fps is (10 sec: 40959.9, 60 sec: 42325.3, 300 sec: 42376.3). Total num frames: 687423488. Throughput: 0: 42243.0. Samples: 569616420. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
14319
+ [2024-03-29 16:44:23,840][00126] Avg episode reward: [(0, '0.511')]
14320
+ [2024-03-29 16:44:26,841][00497] Updated weights for policy 0, policy_version 41965 (0.0019)
14321
+ [2024-03-29 16:44:28,839][00126] Fps is (10 sec: 44236.4, 60 sec: 42052.2, 300 sec: 42376.3). Total num frames: 687652864. Throughput: 0: 42609.8. Samples: 569876960. Policy #0 lag: (min: 1.0, avg: 19.8, max: 41.0)
14322
+ [2024-03-29 16:44:28,840][00126] Avg episode reward: [(0, '0.622')]