diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..521784494aef90a6c65d93d8560bfde82a3b06d6 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+replay.mp4 filter=lfs diff=lfs merge=lfs -text
+sf_log.txt filter=lfs diff=lfs merge=lfs -text
diff --git a/.summary/0/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10 b/.summary/0/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10
new file mode 100644
index 0000000000000000000000000000000000000000..157f0d5cc26aa1c340af5c0b3006d8dc177bae37
--- /dev/null
+++ b/.summary/0/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06a5b8bddcdc270b19caef67b33c45b599db22e6e6d1e78352f5d7678dc1811d
+size 100366978
diff --git a/.summary/1/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10 b/.summary/1/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10
new file mode 100644
index 0000000000000000000000000000000000000000..1e8dd531003c1a314d7a87e1b41c0cf662836876
--- /dev/null
+++ b/.summary/1/events.out.tfevents.1700740633.rhmmedcatt-proliant-ml350-gen10
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b92211605ccc41570ed64d13917d0f6838d0e9c0d9ff16501c21d4b0073944a
+size 52668533
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..81803f570d5c652d3465371e529b6c1a15bac516
--- /dev/null
+++ b/README.md
@@ -0,0 +1,189 @@
+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+model-index:
+- name: APPO
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: atari_robotank
+      type: atari_robotank
+    metrics:
+    - type: mean_reward
+      value: 76.20 +/- 10.64
+      name: mean_reward
+      verified: false
+---
+
+## About the Project
+
+This project is an attempt to maximise performance of high sample throughput APPO RL models in Atari environments in as carbon efficient a manner as possible using a single, not particularly high performance single machine. It is about demonstrating the generalisability of on-policy algorithms to create good performance quickly (by sacrificing sample efficiency) while also proving that this route to RL production is accessible to even hobbyists like me (I am a gastroenterologist not a computer scientist). 
+
+In terms of throughput I am managing to reach throughputs of 2,500 - 3,000 across both policies using sample factory using two Quadro P2200's (not particularly powerful GPUs) each loaded up about 60% (3GB). Previously using the stable baselines 3 (sb3) implementation of PPO it would take about a week to train an atari agent to 100 million timesteps synchronously. By comparison the sample factory async implementation takes only just over 2 hours to achieve the same result. That is about 84 times faster with only typically a 21 watt burn per GPU. I am thus very grateful to Alex Petrenko and all the sample factory team for their work on this.
+
+## Project Aims
+
+This model as with all the others in the benchmarks was trained initially asynchronously un-seeded to 10 million steps for the purposes of setting a sample factory async baseline for this model on this environment but only 3/57 made it anywhere near sota performance. 
+
+I then re-trained the models with 100 million timesteps- at this point 2 environments maxed out at sota performance (Pong and Freeway) with four approaching sota performance - (atlantis, boxing, tennis and fishingderby.) =6/57 near sota. 
+
+The aim now is to try and reach state-of-the-art (SOTA) performance on a further block of atari environments using up to 1 billion training timesteps initially with appo. I will flag the models with SOTA when they reach at or near these levels. 
+
+After this I will switch on V-Trace to see if the Impala variations perform any better with the same seed (I have seeded '1234')
+
+
+## About the Model
+
+The hyperparameters used in the model are described in my shell script on my fork of sample-factory: https://github.com/MattStammers/sample-factory. Given that https://huggingface.co/edbeeching has kindly shared his parameters, I saved time and energy by using many of his tuned hyperparameters to reduce carbon inefficiency:
+```
+hyperparameters =  {
+  "help": false,
+  "algo": "APPO",
+  "env": "atari_asteroid",
+  "experiment": "atari_asteroid_APPO",
+  "train_dir": "./train_atari",
+  "restart_behavior": "restart",
+  "device": "gpu",
+  "seed": 1234,
+  "num_policies": 2,
+  "async_rl": true,
+  "serial_mode": false,
+  "batched_sampling": true,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 1,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 16,
+  "num_envs_per_worker": 2,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 8,
+  "num_epochs": 4,
+  "rollout": 128,
+  "recurrence": 1,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1.0,
+  "reward_clip": 1000.0,
+  "value_bootstrap": false,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.0004677351413,
+  "value_loss_coeff": 0.5,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "entropy",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.1,
+  "ppo_clip_value": 1.0,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-05,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 0.0,
+  "learning_rate": 0.0003033891184,
+  "lr_schedule": "linear_decay",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 255.0,
+  "normalize_input": true,
+  "normalize_input_keys": [
+    "obs"
+  ],
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 3,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 10,
+  "heartbeat_reporting_interval": 60,
+  "train_for_env_steps": 100000000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 120,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": 1200,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    512,
+    512
+  ],
+  "encoder_conv_architecture": "convnet_atari",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": false,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "relu",
+  "policy_initialization": "orthogonal",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": false,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 4,
+  "env_framestack": 4,
+  "pixel_format": "CHW"
+}
+
+  ```
+
+
+    
+A(n) **APPO** model trained on the **atari_robotank** environment.
+
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory. Sample factory is a 
+high throughput on-policy RL framework. I have been using 
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+
+
+## Downloading the model
+
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r MattStammers/APPO-atari_robotank
+```
+
+    
+## Using the model
+
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m sf_examples.atari.enjoy_atari --algo=APPO --env=atari_robotank --train_dir=./train_dir --experiment=APPO-atari_robotank
+```
+
+
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+    
+## Training with this model
+
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m sf_examples.atari.train_atari --algo=APPO --env=atari_robotank --train_dir=./train_dir --experiment=APPO-atari_robotank --restart_behavior=resume --train_for_env_steps=10000000000
+```
+
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
+    
\ No newline at end of file
diff --git a/checkpoint_p0/best_001559680_399278080_reward_14.320.pth b/checkpoint_p0/best_001559680_399278080_reward_14.320.pth
new file mode 100644
index 0000000000000000000000000000000000000000..93320347a082c755c563267e3f3a01b38af4f783
--- /dev/null
+++ b/checkpoint_p0/best_001559680_399278080_reward_14.320.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d51e975b5df4b94b206264b87b79f1246dc69320ef9d73017919ceb7c17fe6a8
+size 20795763
diff --git a/checkpoint_p0/checkpoint_001953056_499982336.pth b/checkpoint_p0/checkpoint_001953056_499982336.pth
new file mode 100644
index 0000000000000000000000000000000000000000..92aaba9c006038594fb0f41a7850adc8f022460d
--- /dev/null
+++ b/checkpoint_p0/checkpoint_001953056_499982336.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a489b06c470db138a2674509d23b38cf3fc2b87489a21922f8275427e05cab6b
+size 20796099
diff --git a/checkpoint_p0/checkpoint_001953120_500006912.pth b/checkpoint_p0/checkpoint_001953120_500006912.pth
new file mode 100644
index 0000000000000000000000000000000000000000..75971f05e7201aa081a1cbd891651940fad089a7
--- /dev/null
+++ b/checkpoint_p0/checkpoint_001953120_500006912.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:997c90a71ce6397857331bb6dbba216abda95118e8f0a5163fc8e08ebb1f6f5e
+size 20796099
diff --git a/checkpoint_p0/milestones/checkpoint_000011104_2842624.pth b/checkpoint_p0/milestones/checkpoint_000011104_2842624.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ff858f8ffc0b43191ce7ef2c294e0fa44aa18a2
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000011104_2842624.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9f3e59db1c1dbc9738e46f75779c25385d8053e6777544e975a90fe6800cca0
+size 20796955
diff --git a/checkpoint_p0/milestones/checkpoint_000022464_5750784.pth b/checkpoint_p0/milestones/checkpoint_000022464_5750784.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a9a23e869b13e9d6cb91eb75d16fa9bcbc817cb8
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000022464_5750784.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bdf6f27a8892dbb380c132620f0efa50a28f2b2128ea1cfd262512b8c97e60c
+size 20796955
diff --git a/checkpoint_p0/milestones/checkpoint_000033824_8658944.pth b/checkpoint_p0/milestones/checkpoint_000033824_8658944.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0cc7de0266026553438fbf861d90d5a12846fdb
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000033824_8658944.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd8880402cb3805e30fd8e1d9ad73170425fce1ed0da3d4b94770c1a32041c95
+size 20796955
diff --git a/checkpoint_p0/milestones/checkpoint_000045216_11575296.pth b/checkpoint_p0/milestones/checkpoint_000045216_11575296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94cccd10bdb4cfa4841f0fb9137f9f0da3f2e9ae
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000045216_11575296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c24bd58314ddb19fa250688d6ef4d201c7efdd0ecb144fffa5d1a5b6540e4ae
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000056704_14516224.pth b/checkpoint_p0/milestones/checkpoint_000056704_14516224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ddf0d70e415701556cab839c40a0f6a3649b9680
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000056704_14516224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df330c013df4b2158c8657cc23018fd13b3e0f757c7bc041f9e50016ef7263cf
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000068224_17465344.pth b/checkpoint_p0/milestones/checkpoint_000068224_17465344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f6e9c02e10e284a8d9048546b52722bbba84bc4f
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000068224_17465344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82799eb3c27a7a994a9578f257d1c54ee5eaee57bf4bee1afee130053d41200a
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000079712_20406272.pth b/checkpoint_p0/milestones/checkpoint_000079712_20406272.pth
new file mode 100644
index 0000000000000000000000000000000000000000..33dbaf4e88617a02ddbd3af0b142ea5e4defa138
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000079712_20406272.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6592816a520d6990b95249421126adb9ac3458f838cc4b06a4292104a97d92dd
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000091136_23330816.pth b/checkpoint_p0/milestones/checkpoint_000091136_23330816.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c6605a5b3495509e82682184c18a95b20085e8f8
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000091136_23330816.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8edf1a4bec05a2a3314cdcaf83fda793572d7a61c3d6c764a647b44db4509b0e
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000102560_26255360.pth b/checkpoint_p0/milestones/checkpoint_000102560_26255360.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e7d435b3dd5d6b96d427c7c7bacf7f1a7be6f5ec
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000102560_26255360.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92273369e2ff271b12ce1e5343c55ecf934e60c42fb598ed74ff22b9ce104106
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000113920_29163520.pth b/checkpoint_p0/milestones/checkpoint_000113920_29163520.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f8a685cbd3e9caa0fba15e4f8a5673267b84df74
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000113920_29163520.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff996eecc15f90a8bd7899de592c53357098d3a8bbbec611dbf398f556165436
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000125312_32079872.pth b/checkpoint_p0/milestones/checkpoint_000125312_32079872.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7699e7fd1ff5c4dd17e5adee90f86fea5ba2a6ad
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000125312_32079872.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80002b4db22a46d4e6710899e6f46b04dee84ef3c9a492fe39a6c856a859cdd8
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000136704_34996224.pth b/checkpoint_p0/milestones/checkpoint_000136704_34996224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8a6be90a396007fcf8cc7c8cd6afe729e06ac38
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000136704_34996224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04169e8360e8acb7315d8d1defed61e62c2900c9d7d8949b458d5150d930d49b
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000148064_37904384.pth b/checkpoint_p0/milestones/checkpoint_000148064_37904384.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bbb6c4b930dbf64272a8e2891caa576850eb3091
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000148064_37904384.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e7a9817b44379de04ae9fe6e043efedf7a6c0b9381d7ef836968fa62dc3ec91
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000159360_40796160.pth b/checkpoint_p0/milestones/checkpoint_000159360_40796160.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c4d8a65b4ef3a1a2d96880345b51f60eb9dfec76
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000159360_40796160.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db8a2709ee2310e604dcba7a71139404c19c5a70347cbec3f5d753dba805396a
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000170656_43687936.pth b/checkpoint_p0/milestones/checkpoint_000170656_43687936.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d22f126b2dff3ace20ddfcdc62cac8bc85d76938
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000170656_43687936.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:793c7199903c2fcd1b4b92acb22a3a89fc9c276a6169b856d81a0fa9c6d5cb7e
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000182016_46596096.pth b/checkpoint_p0/milestones/checkpoint_000182016_46596096.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5e0d5237a7389f628f5d652f1788e162ad9d813b
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000182016_46596096.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9bcdcc90d02639a353acea63d9871f8d1a129b6486ff5742b3907722b6e19170
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000193376_49504256.pth b/checkpoint_p0/milestones/checkpoint_000193376_49504256.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bbf1a455bff900588b631d06ee97fe115b1b13c4
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000193376_49504256.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b16df79c56a33c375cfd63e040037836a6c9a0f2cf0501d34f09a6e4ef3715b
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000204864_52445184.pth b/checkpoint_p0/milestones/checkpoint_000204864_52445184.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e538478dff5d30e4bc8eb981ee1e2d74553f8be5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000204864_52445184.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfe3eece94e10bdf7452a9635c3f09e8b004c31541f5428f779482d211e9904d
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000216416_55402496.pth b/checkpoint_p0/milestones/checkpoint_000216416_55402496.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2fc082a0a5289b9a9f21cf0c4779f53819059183
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000216416_55402496.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4227e525880422d3818a6a091b9ee4906bac664574af705296d6a27a13a0e733
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000227872_58335232.pth b/checkpoint_p0/milestones/checkpoint_000227872_58335232.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7b66d50248e3b317525426c1f61e8bfe32384a89
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000227872_58335232.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19e7c0a3fd2ad72cc9aef363083a47957b56705d112208e22949ef6a547554b0
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000239328_61267968.pth b/checkpoint_p0/milestones/checkpoint_000239328_61267968.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6a002c195cb4334a21ac3c13ec3820cb9a54585d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000239328_61267968.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fcfde5bc0993491eecc9e0efe163ceb6af5626748792c8f0c312e34050e5950
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000250880_64225280.pth b/checkpoint_p0/milestones/checkpoint_000250880_64225280.pth
new file mode 100644
index 0000000000000000000000000000000000000000..645a0aa944de28b2151dcf07037c2243f63848fa
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000250880_64225280.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb0ff6b5b5716a654c51a9bd8a42c3b622eaa52a2ca714a3fb933e2a7cf0d8a
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000262432_67182592.pth b/checkpoint_p0/milestones/checkpoint_000262432_67182592.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4c9418a018ee6e16e021c707a5e892ff36334a10
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000262432_67182592.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12e04808392555744abc34048a2022ea99d88500437fb90b8a2e0bf30cfaaebc
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000273920_70123520.pth b/checkpoint_p0/milestones/checkpoint_000273920_70123520.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8faa7e312dc0fceb8c877c463a44ed59d43cff4
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000273920_70123520.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9d2ca5ddd1e4540a59c40dc577730854a4667b25d08c84cf408c90f75c7aa4a
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000285408_73064448.pth b/checkpoint_p0/milestones/checkpoint_000285408_73064448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..03f2102caad84361556a7d2c301a75c36d6ec0cf
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000285408_73064448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02fba063e73c9421ea753d10165760320178ee89a39470bca221e1a8998f54cb
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000296896_76005376.pth b/checkpoint_p0/milestones/checkpoint_000296896_76005376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ded8cf1aa78665cfca2b455d6b43c7794ea7577c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000296896_76005376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04d5b39cc7e23f90b1506c2992123e0b4738ece0728ae83025fcccdb38e7560c
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000308352_78938112.pth b/checkpoint_p0/milestones/checkpoint_000308352_78938112.pth
new file mode 100644
index 0000000000000000000000000000000000000000..db8ecc7cbf912c028343cb3be7297db2ebecd8d4
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000308352_78938112.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9710ea6ce9cb9a053d8e71f426e0a35f0bea0461cb4a16ab8e20674f1885be36
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000319808_81870848.pth b/checkpoint_p0/milestones/checkpoint_000319808_81870848.pth
new file mode 100644
index 0000000000000000000000000000000000000000..de99da8b6d7ace097102b3722a7e58d5a0f60754
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000319808_81870848.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a552323ecd7980d8aa3c2feabe093bf2303a4a299bcb9d62fa32488f372415e
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000331264_84803584.pth b/checkpoint_p0/milestones/checkpoint_000331264_84803584.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5cb7fe04f7b386fcf16718d74e1bbdb8e9133765
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000331264_84803584.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f092f75d5456eaab8651c041c9e49ec29a41dec718338e11449d02eb1afa590c
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000342688_87728128.pth b/checkpoint_p0/milestones/checkpoint_000342688_87728128.pth
new file mode 100644
index 0000000000000000000000000000000000000000..facf80057edf18634b916f8a69767ebb3b6182b8
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000342688_87728128.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5987fd680b7e6ce8c9aa9766511472de58a405ce82c5ba60fbde282d91987343
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000354144_90660864.pth b/checkpoint_p0/milestones/checkpoint_000354144_90660864.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c6f395d7380f903929d37a262ce944e17de0ad77
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000354144_90660864.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f99f022cb34105cce73e1fe9f5d00be98fa15c4824d4c0fc0ce2e6a4cc032a8
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000365568_93585408.pth b/checkpoint_p0/milestones/checkpoint_000365568_93585408.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cc7e3cf2e42ba8a0e07f9d2bffdfcfb0b7a7d0ba
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000365568_93585408.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53bc43d2b7f50ffadfcbe1517f49d0528b8403c5a662ef0b1047f0e7f90df961
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000377088_96534528.pth b/checkpoint_p0/milestones/checkpoint_000377088_96534528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f2fa4c66e9b0248d002a97c0af65bb9b279f2b4e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000377088_96534528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dc5849c9b0f7e7cfaa11a99b40232ba1f739fd9d908c017c31392a08f83f3d2
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000388544_99467264.pth b/checkpoint_p0/milestones/checkpoint_000388544_99467264.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a707714b8df5bcbfd8cc744457c40215645cc415
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000388544_99467264.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18db326bf1be7c133252c77a9e9f046b8397e02fc7422f9f11ef1532718dce23
+size 20797011
diff --git a/checkpoint_p0/milestones/checkpoint_000400000_102400000.pth b/checkpoint_p0/milestones/checkpoint_000400000_102400000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8bd35e876e8f0dcf4bee8fb81733239164942a79
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000400000_102400000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e1da769980932a86663ff757390d4d5fe170d5bd0b7b32ba6c7002e9540bed
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000411488_105340928.pth b/checkpoint_p0/milestones/checkpoint_000411488_105340928.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e4b2513e66c7ece2bcb0492d9f7225b6c800a314
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000411488_105340928.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59c9105e2bf40b2c638548dde6d377e6046d81ef19f5dc6b0088ad712389d3a2
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000422592_108183552.pth b/checkpoint_p0/milestones/checkpoint_000422592_108183552.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6738ac3a38eb8ec2d984ec0d17a0165fb63a11c5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000422592_108183552.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d21a5fc5477cfdf88088d1efc64049db0026c054680c2ea72af3a2400481d817
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000434048_111116288.pth b/checkpoint_p0/milestones/checkpoint_000434048_111116288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f3d59c4e8bbdff777e178101cda6ce8e7b8370a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000434048_111116288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b91274b5225d93570fff8cbac768857727aeab383bde1506a9b0cfa71feb136
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000445440_114032640.pth b/checkpoint_p0/milestones/checkpoint_000445440_114032640.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1697276fc39bbd93b02171a77da09a5e7fd19803
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000445440_114032640.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:253de8a52041191076f224e75fb9094d47cf96227f3eddf26e011902fe5928a0
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000456896_116965376.pth b/checkpoint_p0/milestones/checkpoint_000456896_116965376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..307183f2207e35f6bc9ae93f902bb6e58b42f68e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000456896_116965376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de6aa6736e61814944fac75281fd165a2221a1b062097452e8317fdeafdc59e3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000468384_119906304.pth b/checkpoint_p0/milestones/checkpoint_000468384_119906304.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d1baec38abf331cd05aa16b1092d1665f2ebba7a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000468384_119906304.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ffb44a6a0d69593014125eb484299b2748e9acfb3c3f045dce7f9d6b33066c1
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000479872_122847232.pth b/checkpoint_p0/milestones/checkpoint_000479872_122847232.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dad9eead029e2e235f3904f99db20e1439c6c61a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000479872_122847232.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf73216dc045481f937ffe1db18191bd5c29aa9ac067450f8631ce80e4dd38a8
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000491424_125804544.pth b/checkpoint_p0/milestones/checkpoint_000491424_125804544.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9fd38d4af5fdb9cb33be48ebd75f6c0776cc5675
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000491424_125804544.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b3847da08fc7e2f10a79cc6723262552baa92b50b954ef18aea1458f35c49c5
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000502976_128761856.pth b/checkpoint_p0/milestones/checkpoint_000502976_128761856.pth
new file mode 100644
index 0000000000000000000000000000000000000000..771fb374e28cecb2cd9da3993f60281eca74dfcf
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000502976_128761856.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6041e3326342c0da7aae003ef6ccf8c2ffd1af51061ce51b2093be66f2cdedc
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000514496_131710976.pth b/checkpoint_p0/milestones/checkpoint_000514496_131710976.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9ae0b41ae1d64967e730489b5abe63dd394c5045
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000514496_131710976.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b78e0e066a7835166bccc7ede2d71caca6e49545b90e6e6a1c51ebbbd1b91750
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000525984_134651904.pth b/checkpoint_p0/milestones/checkpoint_000525984_134651904.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dace3bd855f74e1898742ef98d56088e85fb0c26
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000525984_134651904.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8627d53be2b3ee6b0ec49ffa5ce659a999d301c5eb343c7fe626758efe470d62
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000537504_137601024.pth b/checkpoint_p0/milestones/checkpoint_000537504_137601024.pth
new file mode 100644
index 0000000000000000000000000000000000000000..af258f22f49c10bd4dd2df943f1254a71c6a3e96
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000537504_137601024.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3761a4f0ff7700bb87f8208b8501e3ec44f31eb4cf91aeec1358e2b677de932
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000549024_140550144.pth b/checkpoint_p0/milestones/checkpoint_000549024_140550144.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4f49703577ad4c4814fa961633b40e0296f2b0fb
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000549024_140550144.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d718393b64482132b6c1979bb7003a10c005380b9d96ed8f16d7b3f3906c6c80
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000560512_143491072.pth b/checkpoint_p0/milestones/checkpoint_000560512_143491072.pth
new file mode 100644
index 0000000000000000000000000000000000000000..428c6c6968f18f2b68c5dd2d09069767775ddae6
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000560512_143491072.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:019c3900c4cc844d27bfd401b09ad16dca62960b3af13a634bca5f4c357ed7df
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000572064_146448384.pth b/checkpoint_p0/milestones/checkpoint_000572064_146448384.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a676b47bdab6070004ba4c82e9ccc264b3778e02
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000572064_146448384.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e33c0ca566d3dfb4b1c2b6f17938701e9cc23ce3be99f47a6906147fea3f5b
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000583552_149389312.pth b/checkpoint_p0/milestones/checkpoint_000583552_149389312.pth
new file mode 100644
index 0000000000000000000000000000000000000000..334dfad1187ec04716bdde03b836ad6e57f95095
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000583552_149389312.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac7a6b0d268ac22226d3f27d8c71b0f01b31d5251317efc8a06d5e350bc4b8f1
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000595008_152322048.pth b/checkpoint_p0/milestones/checkpoint_000595008_152322048.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e13bead04f718728fa9c92338bba073fac2d7467
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000595008_152322048.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:137249fdd51e3a7b59e50b4c2e658bbc8f6ae6cd51d9ac84c9cf0049f9250699
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000606560_155279360.pth b/checkpoint_p0/milestones/checkpoint_000606560_155279360.pth
new file mode 100644
index 0000000000000000000000000000000000000000..02ab2440d105e0c8383237efa05bc0b94d111279
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000606560_155279360.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33198eb4f86dc5334f9da93cf1303573ed513be432fc13f4f52bd49ad16d54a9
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000618112_158236672.pth b/checkpoint_p0/milestones/checkpoint_000618112_158236672.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6a0de78f90ba7fca7d3031a180f2b44751b7ca9d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000618112_158236672.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8db3d648ebe8bede34273d0211861aaa537559abb00544fc1afda1f8ae5cf37
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000629600_161177600.pth b/checkpoint_p0/milestones/checkpoint_000629600_161177600.pth
new file mode 100644
index 0000000000000000000000000000000000000000..740c44c23cc4f6911d5dc922654266c6706ce52a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000629600_161177600.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00f984249c828f2a4467715f7a3430fb5cb0e34d3ff62cc2cb4faad5b73f174c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000641120_164126720.pth b/checkpoint_p0/milestones/checkpoint_000641120_164126720.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2bbaef65809696f4a20bf22bc7bee84079da8098
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000641120_164126720.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2402d6bdb2e4c7954799237147e16971bfca074dc3c59aa9bca12a00127d1c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000652672_167084032.pth b/checkpoint_p0/milestones/checkpoint_000652672_167084032.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bdbc9bbf7c02b9c3942db431736966b8e7fea662
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000652672_167084032.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:152f071376b8bb0a3656b9e03ff74f635b0a159c94a58240e253f13bd4c5a36d
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000664192_170033152.pth b/checkpoint_p0/milestones/checkpoint_000664192_170033152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eb3554087e881703abf7ae5d4165db02eaa49fe4
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000664192_170033152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5ba9d073657cae78d016f20cf8f0017b1b0adb3518afeb6f96dd8d2015bea96
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000675712_172982272.pth b/checkpoint_p0/milestones/checkpoint_000675712_172982272.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf64f3ac248af24322cec16c3f4028618f3628a4
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000675712_172982272.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36ef1536ab02ea817ffb3a3669cae5890db47d707c94ec58f745acdc5c8ff33f
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000687168_175915008.pth b/checkpoint_p0/milestones/checkpoint_000687168_175915008.pth
new file mode 100644
index 0000000000000000000000000000000000000000..440030d6b83442f7ce5b44329554041e67e353a5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000687168_175915008.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1917181e0fbfc1f5e410b2f260c28c17ccf7a1a4b133500ba5d2c955428c4ed2
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000698752_178880512.pth b/checkpoint_p0/milestones/checkpoint_000698752_178880512.pth
new file mode 100644
index 0000000000000000000000000000000000000000..21b3eacb0b4c9714c582262fddc1dfd9f7dc7d0d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000698752_178880512.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c42cf50d2f1448dee1db33f98d3215e1bfeedbc17977e3f3e979ddc58cf55cda
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000710240_181821440.pth b/checkpoint_p0/milestones/checkpoint_000710240_181821440.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d3a3167f2d60bd29d712731e6bdda60b8bb33f09
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000710240_181821440.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f09d9cef4d2753525c7dcbcbf1b17c5e49410ba2e72bc14788dfb91a332b1a9c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000721728_184762368.pth b/checkpoint_p0/milestones/checkpoint_000721728_184762368.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b97c5b155895a7d2bba14efc3488ac9b7390ad15
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000721728_184762368.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81c5c96c37795d201902da1c1e5fb8c8280831f62505780b6c73cc4cd63a0613
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000733216_187703296.pth b/checkpoint_p0/milestones/checkpoint_000733216_187703296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fe9776ad4cf76af936fedfb9566c200e0ed82620
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000733216_187703296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:053573f0c8914c7cefcb34af16e6029cb57b7f34c5bb96788dfd56526df6e82c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000744704_190644224.pth b/checkpoint_p0/milestones/checkpoint_000744704_190644224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e265c34dc564c9df6c261aa597b766974c9104ca
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000744704_190644224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8117ab05eef643726932cdc37ab96a191d2b82d25cdbd97157937ab897c84ce1
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000756192_193585152.pth b/checkpoint_p0/milestones/checkpoint_000756192_193585152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a58ad7bc981ec182d96e7ce0857af2402b9e9390
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000756192_193585152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cdb1a803fbff904d6a7c165156aff6001aabfdad6077d1a79c841bddbc9cc7b
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000767648_196517888.pth b/checkpoint_p0/milestones/checkpoint_000767648_196517888.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7e6ddb8241e30c428a8e265709d9be3ba1320189
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000767648_196517888.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7307f258e0663c68e0308ec0959387551442992360ef909381e2b3d5dd6f41f3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000779040_199434240.pth b/checkpoint_p0/milestones/checkpoint_000779040_199434240.pth
new file mode 100644
index 0000000000000000000000000000000000000000..583d817b74fdea4dd4b2e1096c254851fe529426
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000779040_199434240.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59853155d26bf80c8f7c38d8026c94269cf4d2262829181cb9c115e80f53a386
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000790336_202326016.pth b/checkpoint_p0/milestones/checkpoint_000790336_202326016.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ae3a89c3a8fe0f8db0f888b2e19f4cfbd6dc6203
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000790336_202326016.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d236b5797a906b102e5f7b42b6db4349a1ee7bbc08489ebe46e8adc02c83b0d
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000801600_205209600.pth b/checkpoint_p0/milestones/checkpoint_000801600_205209600.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fc52fd4eb8f3be14321ec59b713476fc4d00f36a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000801600_205209600.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8d0da99065d0f79fee7d8557353d9e70342a031e853eb5bbe3d88ebec7e8f75
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000812896_208101376.pth b/checkpoint_p0/milestones/checkpoint_000812896_208101376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..253a1d870492083097fd8146c0f7f02b4fa9e009
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000812896_208101376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca24d32b5ff170d1ef991c0ce2dd5a1b7baba0867521179f808eb713c8b1a1b9
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000824224_211001344.pth b/checkpoint_p0/milestones/checkpoint_000824224_211001344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cb232159432a36165ba47b781bb7a4a5a6165993
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000824224_211001344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdafc073b04ad2403a3fd2a37c25edc0819dd61a0b27656c5e1d22352cd5ca41
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000835520_213893120.pth b/checkpoint_p0/milestones/checkpoint_000835520_213893120.pth
new file mode 100644
index 0000000000000000000000000000000000000000..94764b54a72a50bcf6d382942ee06f1ae642a328
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000835520_213893120.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8121c58b72ffc9f0e57ce724258aeb910767f141e92ae67af6d4cb5323ad5ff
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000846816_216784896.pth b/checkpoint_p0/milestones/checkpoint_000846816_216784896.pth
new file mode 100644
index 0000000000000000000000000000000000000000..aa10630b800bb2684bfd8a710b623bd07ef85354
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000846816_216784896.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61bc21ae5cc7fa24e605e0117047c3273745cb0f247abba2000b857fb95956d3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000858048_219660288.pth b/checkpoint_p0/milestones/checkpoint_000858048_219660288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c4ca48845dcf9e9dc796f5191bd7747f2f8614c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000858048_219660288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b5a8af244ddfd4de9f6f10f83f3d10c6bad2f47ca02594233a4a4721d9f5e5d
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000869344_222552064.pth b/checkpoint_p0/milestones/checkpoint_000869344_222552064.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9d290a9ea5bfc020b7554fee8cb2d070f93e8cc9
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000869344_222552064.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57cec1a5508ddf8728a6642cddeb9a6572b1610d058ea59d29ba572aa22d9cdd
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000880672_225452032.pth b/checkpoint_p0/milestones/checkpoint_000880672_225452032.pth
new file mode 100644
index 0000000000000000000000000000000000000000..894e329b17b4a349350c98ce5549dcc26a8982b3
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000880672_225452032.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1819a6a81223de405f4d73ff5169eb12b3e9558e3a7ef8cf8882d9028517f3c7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000891936_228335616.pth b/checkpoint_p0/milestones/checkpoint_000891936_228335616.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6dbe3c0e59c10f046d4bce01f98926fbee8b16c5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000891936_228335616.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83ac2828e3800bbf2a806d49c568de381a3836ee0d381fb6aefa8e20a40dbd3f
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000903296_231243776.pth b/checkpoint_p0/milestones/checkpoint_000903296_231243776.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8c318058da686d6a14b88dde00264450651286a2
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000903296_231243776.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d80ffe567816c046525ae508442fd02539322b84e09d4bc7ff082c7948a807ba
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000914656_234151936.pth b/checkpoint_p0/milestones/checkpoint_000914656_234151936.pth
new file mode 100644
index 0000000000000000000000000000000000000000..742bcabecf801f170244da574aa2bb033509c244
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000914656_234151936.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71b2388e2002b3e8a8f92f35ecc62a4943073abcfa54e256861f488d6e175c78
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000925952_237043712.pth b/checkpoint_p0/milestones/checkpoint_000925952_237043712.pth
new file mode 100644
index 0000000000000000000000000000000000000000..29c68a97122d3a82b5911e7748de53c0c70018ba
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000925952_237043712.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ff94850834b97e2a300090f2c9a2eec35471a64c94d463154285c4544f4a5b7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000937216_239927296.pth b/checkpoint_p0/milestones/checkpoint_000937216_239927296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d7fa1b516a9c7063023db700210cb54a9e3450c3
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000937216_239927296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc2be5e4c7c2ff2a6540d21b9045d1c495b4f9166f5b7daaa3f455d6a66fc66
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000948416_242794496.pth b/checkpoint_p0/milestones/checkpoint_000948416_242794496.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a672181e4c86ca8b0b59c65bb7676591f6b0eb17
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000948416_242794496.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a1d6e59745fc28f4e138945ae4e97abdab5aae06c17b172370d30c87acb65e
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000959712_245686272.pth b/checkpoint_p0/milestones/checkpoint_000959712_245686272.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ca82ab752417247d88724aaa1ef74806321c4d19
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000959712_245686272.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81eb3691f3d6cfbb337ca79127d29446cac08d0d6a9ea6de2de0d9e0fbacbe85
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000970944_248561664.pth b/checkpoint_p0/milestones/checkpoint_000970944_248561664.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0adc029718bf764c92794ed9f292bb4366facca0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000970944_248561664.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29b1ddd0b24305d7b3917f525a34c361c6f7c785a90b3085438feab3dd233966
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000982208_251445248.pth b/checkpoint_p0/milestones/checkpoint_000982208_251445248.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9506b210bb6ecda491bacaa9384a710375a0653f
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000982208_251445248.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cc5ff507f821b82775aaf634a028593ae995c2534555a1e1cefe3367531f07f
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_000993536_254345216.pth b/checkpoint_p0/milestones/checkpoint_000993536_254345216.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e9ffb10862004be9c3d782d20e9ead795abf2ee0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_000993536_254345216.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b12508d5c567d77c7c2ae0ac69e84903e40fbdc15ff2d7fc427db00eea4f3f16
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001004832_257236992.pth b/checkpoint_p0/milestones/checkpoint_001004832_257236992.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf263767ab36c7b9562cd26f5cbe2dafdfc4617c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001004832_257236992.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:881b6b1d722d8a06e62790fce834d83d8cb3e187e11de1e4e90f99f8673febe7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001016192_260145152.pth b/checkpoint_p0/milestones/checkpoint_001016192_260145152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..659e42489e7dba1b2de5157fd6f74792fe4ee3d0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001016192_260145152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e323056bbd02a568d40e812445ffdf5166c7c81db533b51088f5fb7b184a0167
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001027520_263045120.pth b/checkpoint_p0/milestones/checkpoint_001027520_263045120.pth
new file mode 100644
index 0000000000000000000000000000000000000000..82f0c6c77470274448e6e5ad7451c0707115878c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001027520_263045120.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccfe6740925eea2337f34f82181f2b8e73e9cb8088fe1dca021802312a85ed4d
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001038848_265945088.pth b/checkpoint_p0/milestones/checkpoint_001038848_265945088.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f493cdce12164817ad6fe7c3774663ac208261fb
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001038848_265945088.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3158b991e06225b1362526f61f4f7b9c251ecdf586f55ad1a57bd0657d3b0cd4
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001050176_268845056.pth b/checkpoint_p0/milestones/checkpoint_001050176_268845056.pth
new file mode 100644
index 0000000000000000000000000000000000000000..17562d17d5b66bc133e6bd69439fdd8aedf76e5d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001050176_268845056.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a5018a8a207a84b55c7d1b101d2816c6dc66f5a31654cbaf8a7b7cbbc81eda3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001061504_271745024.pth b/checkpoint_p0/milestones/checkpoint_001061504_271745024.pth
new file mode 100644
index 0000000000000000000000000000000000000000..749eb03687bcb2a0a148fd955bade857a0e0e42f
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001061504_271745024.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c66cfec230e611a4de11d55795ae922d467c5bfcb2f09e6dc286647cb19f61
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001072832_274644992.pth b/checkpoint_p0/milestones/checkpoint_001072832_274644992.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6cf68ea00765fd9b5f84e561410e4ba78ff6b33b
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001072832_274644992.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7da705fb16d18374bda7936af646f425bfa8445a9f39e1988c5404202dd402ea
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001084224_277561344.pth b/checkpoint_p0/milestones/checkpoint_001084224_277561344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c96f9d1f5647b2cb5e0f3785c1fdf0c085dfa30e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001084224_277561344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a16473cacec1a2b89b655afad7a1a3f3cf261c08952643c04b54a8cffde3a53
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001095584_280469504.pth b/checkpoint_p0/milestones/checkpoint_001095584_280469504.pth
new file mode 100644
index 0000000000000000000000000000000000000000..907079612e35c95521a3c8c777c2179a087fbb43
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001095584_280469504.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff60eca23f478da62c041626315d742f143c89f4970ed5b37e5d33a201f02bcf
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001107072_283410432.pth b/checkpoint_p0/milestones/checkpoint_001107072_283410432.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d88884c2c9d1c68ea4025ef2156a58110e7fb0f6
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001107072_283410432.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db09662e5c6426affd32d886d5ac5fd3c19cd0c47b81543829b6f741f2adbb79
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001118496_286334976.pth b/checkpoint_p0/milestones/checkpoint_001118496_286334976.pth
new file mode 100644
index 0000000000000000000000000000000000000000..05c92c420a4189b5d8f39cd4ae2fd0f7d39af4d0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001118496_286334976.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4388c369885f10e7f48fcee568d02ae649cca1750127ee48c0bcb1cbbe2a6aee
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001129984_289275904.pth b/checkpoint_p0/milestones/checkpoint_001129984_289275904.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5bd7ef1f68a53c4d66300d47083b815d540d0e45
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001129984_289275904.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b02d9ffb1c528f50c6d5a9bb1438ceaf2913b6fb03a3ca8e6fffff3d14cae2b
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001141408_292200448.pth b/checkpoint_p0/milestones/checkpoint_001141408_292200448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..575665d02cbafe8266efd7248616fcb82b22cb33
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001141408_292200448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3de93d60cac6840fc3ccf2383085ccb9973ededb3f7143f4b314b5e9ce520a33
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001152832_295124992.pth b/checkpoint_p0/milestones/checkpoint_001152832_295124992.pth
new file mode 100644
index 0000000000000000000000000000000000000000..48e0b6ae74d15fc5bd068cf896f1508171c1fe8d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001152832_295124992.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96dfee64a26968976a6ddc7544d529cd077d60bb23e2b2dc057397f9345aa47a
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001164320_298065920.pth b/checkpoint_p0/milestones/checkpoint_001164320_298065920.pth
new file mode 100644
index 0000000000000000000000000000000000000000..aec3141a131469cd15c08d6342091236fd184e1c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001164320_298065920.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80ba2508b763256fc506496a77e46dfaddddcff9bed7a457e38faa4a78fb5e98
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001175680_300974080.pth b/checkpoint_p0/milestones/checkpoint_001175680_300974080.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0cf33dbf929d8a5af2cbf7b26ed8c909b887b21d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001175680_300974080.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0294f7eb2fae5ac8687c74ade59d582f77630f99bc5fb49a4dfa6515d2c78d5
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001187104_303898624.pth b/checkpoint_p0/milestones/checkpoint_001187104_303898624.pth
new file mode 100644
index 0000000000000000000000000000000000000000..42165d0382b6bb6cdced60defe9335c66682dfae
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001187104_303898624.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68b3d87233ea72a7f80ef5f692d6b27a1def917368862e16fd6cf1b206d05624
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001198528_306823168.pth b/checkpoint_p0/milestones/checkpoint_001198528_306823168.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c64c5faed432127159c444c9001074395caf59c5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001198528_306823168.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3461c8e308f9194db3d38c7ad0d002c4884383c1a974eeccd6f702acb2d96495
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001210016_309764096.pth b/checkpoint_p0/milestones/checkpoint_001210016_309764096.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5f1e4f840f8dda19d5f87debfdf432d50d04ed77
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001210016_309764096.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93a6d880526fe954e53d044b248bcb2e06a20a661f7b0af287b91dc03f0ca3f1
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001221504_312705024.pth b/checkpoint_p0/milestones/checkpoint_001221504_312705024.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d66e0cdcfd0be7f5033c34f1a7f66b5a97d9c83d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001221504_312705024.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5a08bc5b4c54d699cccca1f603c0bd74de756d44b2bde508bd3fa56e93fb752
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001232960_315637760.pth b/checkpoint_p0/milestones/checkpoint_001232960_315637760.pth
new file mode 100644
index 0000000000000000000000000000000000000000..14cdedd63f082101605146d741fa8d9d686925a8
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001232960_315637760.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddfeca45cbdd2e3e4fe3819c057c369b5ae98f9f036543dbb5fe96e49faf6c3f
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001244096_318488576.pth b/checkpoint_p0/milestones/checkpoint_001244096_318488576.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3a148f239fca4639bf96633667ce606ea95ec839
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001244096_318488576.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccb28b9e2986dae893cb91f510cbee5e5ac429877253bb63e571060258042e2c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001255584_321429504.pth b/checkpoint_p0/milestones/checkpoint_001255584_321429504.pth
new file mode 100644
index 0000000000000000000000000000000000000000..16ab54c6133c3650ee61fe03b7e8580cf53c3fb2
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001255584_321429504.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c6eccd4df8de4ae5e384bede5f91e5dc70a9aa6a4f570d9a6eb549b6a1bae3f
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001267040_324362240.pth b/checkpoint_p0/milestones/checkpoint_001267040_324362240.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f7da53a3b39d18c87476722c2adf5b2576ebdd70
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001267040_324362240.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc201feb17e1601ce591cd0936922c5ef4dc93aa8e2f476705b0fa0ac65f7fac
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001278592_327319552.pth b/checkpoint_p0/milestones/checkpoint_001278592_327319552.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3822230db83473df8d3eec9f006e896faf8e2d24
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001278592_327319552.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a0b956433415c1bcf8823324c2fe8fd487f9db15bb08cee3cfab87d9ead8396
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001290176_330285056.pth b/checkpoint_p0/milestones/checkpoint_001290176_330285056.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf464b16f9917f537098d296c82402e0a7a3019e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001290176_330285056.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:362e9a9b835839fab4b186b943743fce93a613a66e5c7660dcb934320d258912
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001301696_333234176.pth b/checkpoint_p0/milestones/checkpoint_001301696_333234176.pth
new file mode 100644
index 0000000000000000000000000000000000000000..01dfc2845b39a83591f2f4fab3fd117297f10827
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001301696_333234176.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b04063eee8bb23f4f178d03aa726ad532eb08ecd9c5dc64fc6b0a0a092b33560
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001313184_336175104.pth b/checkpoint_p0/milestones/checkpoint_001313184_336175104.pth
new file mode 100644
index 0000000000000000000000000000000000000000..af61d1435d71a223b793e6fcf699bcdb3d182e65
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001313184_336175104.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a32e9f5659dc9f075418a36911524f1697ae78df340dc0e373440c7abb72100
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001324768_339140608.pth b/checkpoint_p0/milestones/checkpoint_001324768_339140608.pth
new file mode 100644
index 0000000000000000000000000000000000000000..977c1a766984880583649217fa5de2c1b4f23ef2
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001324768_339140608.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b750f0814d2c3ea141c69ca60ddcae09105f57ef5af4c9baa64e73fc33c2841e
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001336256_342081536.pth b/checkpoint_p0/milestones/checkpoint_001336256_342081536.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5ef896494630d278de822c9a28ebdeaaceb8b39b
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001336256_342081536.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e05034752c86436ee9575a7f2074f88dcefd0c9351e65d10dc0fd8f11187fa57
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001347808_345038848.pth b/checkpoint_p0/milestones/checkpoint_001347808_345038848.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bb149cb0659edb56fae1ae8ca4adcb7a56a9ff85
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001347808_345038848.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d946c022b5f84adfb483f0be8543f13026b364e67409bf2bb80a2cf2a7e7067
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001359296_347979776.pth b/checkpoint_p0/milestones/checkpoint_001359296_347979776.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ada0d528068e883c60739d24ccc3b5a72f92c89
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001359296_347979776.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcb18d199fc070383b16f99baca9a9ab23f5b296cb028325e87cb627358a5367
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001370848_350937088.pth b/checkpoint_p0/milestones/checkpoint_001370848_350937088.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ac8e86054cf0a873b2f2c0b43436d48c3679817b
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001370848_350937088.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83dc3bb36da324509d1a4820d535c996ec9c244a42e72cef379021de75f94263
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001382368_353886208.pth b/checkpoint_p0/milestones/checkpoint_001382368_353886208.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2b20e2b0ff8042401f4a31c1594b8c4a7e72bc8c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001382368_353886208.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a16dbf913caa51eed4b1bf9aefba2effbe772ba4a1a3831a8e019fb9f853418c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001393856_356827136.pth b/checkpoint_p0/milestones/checkpoint_001393856_356827136.pth
new file mode 100644
index 0000000000000000000000000000000000000000..88bfbfd116bd638bcf210b512ee44d8b33c97afb
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001393856_356827136.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fbf969e2fdf672d0ac32dd370d545b73183fb55cee5d622b1cb048be5d314e7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001405280_359751680.pth b/checkpoint_p0/milestones/checkpoint_001405280_359751680.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9ebe9bcceecc3f19437846c18f237a8407f68c16
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001405280_359751680.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4c77cd48121a4b6cf10d21484865fb146d87892f29e9abd7c2415ac04bcaa8e
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001416736_362684416.pth b/checkpoint_p0/milestones/checkpoint_001416736_362684416.pth
new file mode 100644
index 0000000000000000000000000000000000000000..587fd2cbb433af7c2f9974fdd59a3f54d6b77d9a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001416736_362684416.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f29dd4d80f0e76e112fa0ac7a03edd60aa7c065e04d6d6037618eee156e7ed29
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001428288_365641728.pth b/checkpoint_p0/milestones/checkpoint_001428288_365641728.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f7e5cfe34dd0ea94e3e08d9b84f4c54c15bcb8ba
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001428288_365641728.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3bfef6d9d7a665c4517c676561494fb9c4d10fd18a3b878643bcd740dad06a9
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001439712_368566272.pth b/checkpoint_p0/milestones/checkpoint_001439712_368566272.pth
new file mode 100644
index 0000000000000000000000000000000000000000..da957c587de49f697e5fee5ceb3d16a95b5d9c5a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001439712_368566272.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4669148e60785442eb145ebbec1c1e18db3fcb672d28179405d526494f82c54a
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001451200_371507200.pth b/checkpoint_p0/milestones/checkpoint_001451200_371507200.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c73a42b0ad99b7abae42bcfd8767f4169c1bf1a5
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001451200_371507200.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0eb6b6c6ee7f68efc42c4605a909993d1f3568c2af4ed333fcd788b4097cf72a
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001462720_374456320.pth b/checkpoint_p0/milestones/checkpoint_001462720_374456320.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ad9ae6ae0abd66903e3eb07013bc3ff39d973220
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001462720_374456320.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bb4002946149d6c5a01823b50212029a4c1dc90c4d1a4036378288cf3997035
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001474240_377405440.pth b/checkpoint_p0/milestones/checkpoint_001474240_377405440.pth
new file mode 100644
index 0000000000000000000000000000000000000000..79efd74f0b218d9e90f2bb2981c7f2315f28adb3
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001474240_377405440.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2edc2a12e1225d0d21bc528c3af3466bbd7b731acd6f36d0c1a2ecdf11d1d2e6
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001485728_380346368.pth b/checkpoint_p0/milestones/checkpoint_001485728_380346368.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b4e9c9d852f64a7fddee98c3bf0569d10cb840ad
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001485728_380346368.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:944d1e9713b05d6d9dca58aecfe7f2b659489a470560ecef928de5c5f72282f3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001497216_383287296.pth b/checkpoint_p0/milestones/checkpoint_001497216_383287296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a4dff25cd29bc50cb7535dbf5cdf9d08c5977ef
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001497216_383287296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a6796fa437b40c3e4d7a247502d2877e69a2fb12edb5b5627ebb3a9431f8ec3
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001508704_386228224.pth b/checkpoint_p0/milestones/checkpoint_001508704_386228224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6c2300ef41f45bd3f7803aaeebb9bf63273d1a39
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001508704_386228224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ebfbae36e73adf31e12354ac56fd29ca0329b8fa8eb4d4ebb2f84c0629955fb
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001520256_389185536.pth b/checkpoint_p0/milestones/checkpoint_001520256_389185536.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6fa213ecc045531b29a3694d2c485597fe557773
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001520256_389185536.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b55561a2c706ecda409c70365f1068facd5cb7771e9cbe71f072607084fc198c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001531776_392134656.pth b/checkpoint_p0/milestones/checkpoint_001531776_392134656.pth
new file mode 100644
index 0000000000000000000000000000000000000000..914101d8e644e13791acd7911c688ad2b1376dc9
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001531776_392134656.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f696c6d9a888be78f4e087f12f547391efe4bb26bc85541ea8e8f14c7b487c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001543264_395075584.pth b/checkpoint_p0/milestones/checkpoint_001543264_395075584.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c609c24bd7d1623eed5d23cade68d978d1dc1e52
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001543264_395075584.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27c750c3d1fe532365212fc337d561b901f7800db51c2ef47019d3f69d9d3ed7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001554848_398041088.pth b/checkpoint_p0/milestones/checkpoint_001554848_398041088.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2f4bd946e4e14718b0f3d65338296202043f8bd1
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001554848_398041088.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a05b6a7241ca9741b3ca116eb2a70bce2b96943345249c6c69102426e91ab89c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001566368_400990208.pth b/checkpoint_p0/milestones/checkpoint_001566368_400990208.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d2a58c2c4694fddde955d3442043e590374f07aa
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001566368_400990208.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07bae98d559ecac2bdda6cfcaae8632d253235dcc40c994c8447ea4ff87cac9b
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001577920_403947520.pth b/checkpoint_p0/milestones/checkpoint_001577920_403947520.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c1a13bcee6529c5f4d299d44fab1fcbd1d6a846e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001577920_403947520.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b741da1ab4ba3fbbad28cdbddd3f06ea50bfbe4a4e76bc309612a720265e6e2
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001589440_406896640.pth b/checkpoint_p0/milestones/checkpoint_001589440_406896640.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1db1166e9f73027538d586e59146d8ff0aa9be7c
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001589440_406896640.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e98c7cacf021f5857bcc5e0900c0dab4bd27a1f02ac384dc12fe0d2422600819
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001600896_409829376.pth b/checkpoint_p0/milestones/checkpoint_001600896_409829376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b9b5f0e13a5155ecc5573a0591e28e00f5d26125
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001600896_409829376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1a5d512109e1741cace1e9756355153cf5f991dd7ba3299fba74c6ac68b31ca
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001612384_412770304.pth b/checkpoint_p0/milestones/checkpoint_001612384_412770304.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0b0cabafcee9c8bfc2fb952efeba5c7d59dcd824
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001612384_412770304.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a49f6197fd239d15c21d6c5ff39d129683b5a4e564009da3e219c053a9cb8bce
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001623808_415694848.pth b/checkpoint_p0/milestones/checkpoint_001623808_415694848.pth
new file mode 100644
index 0000000000000000000000000000000000000000..622e9e5aac23c1edb50cd4a5cc207c89478f32b0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001623808_415694848.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb3249fb9ba1b79247f87bcc88e78b050ea0d12729bfaea279a101846011cd86
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001635296_418635776.pth b/checkpoint_p0/milestones/checkpoint_001635296_418635776.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cdb4da4629554a18c7307befdd88e140971026cd
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001635296_418635776.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07f77633a43cb56bd62a2dc0b24b1eef891bc72bccb8fa41e8f376c7a0a999c4
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001646688_421552128.pth b/checkpoint_p0/milestones/checkpoint_001646688_421552128.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e60f8e3c7145e4767807271942f1956d85f38810
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001646688_421552128.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9243e91ebb5e92b6ccb1b1b852527d83d8e91947d9dea732a2eafa0475eb129
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001658048_424460288.pth b/checkpoint_p0/milestones/checkpoint_001658048_424460288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ab8ebcd001756fa7b20600296b9758853aa66022
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001658048_424460288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20655183438602732373f84ee956466440a2d26d5e7fd809010bc0e5f97e5056
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001669472_427384832.pth b/checkpoint_p0/milestones/checkpoint_001669472_427384832.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bf4a1a23c4484d659ec2cf54e063a3e702667690
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001669472_427384832.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5914fc32bde4c605d555461a89e57a3b29209be5d6d4243168d59fc424ead0d
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001680896_430309376.pth b/checkpoint_p0/milestones/checkpoint_001680896_430309376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c7eb881105f77d03195a4801578c52f817249301
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001680896_430309376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09dc22784de819ff6bed37ad6d7fe6813eb095b5ee488a1ff663a068f34b92a7
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001692352_433242112.pth b/checkpoint_p0/milestones/checkpoint_001692352_433242112.pth
new file mode 100644
index 0000000000000000000000000000000000000000..29afc4cb2f470cc9c2c5ce55477bca6d400ce6d2
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001692352_433242112.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ffe6dd550d5a9186d085d37d5c2d53b56ee1ead34160e389f50c23c3785e7f1
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001703840_436183040.pth b/checkpoint_p0/milestones/checkpoint_001703840_436183040.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2e7de52d174ab176a2fe27411fb201f7705554be
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001703840_436183040.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b494c8e93982ccb0f036bd15689d418d57723aad827b1bd2fe500b82e847beae
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001715232_439099392.pth b/checkpoint_p0/milestones/checkpoint_001715232_439099392.pth
new file mode 100644
index 0000000000000000000000000000000000000000..70aedae8fb125c1b0f63cc9069d9dfd7d6e53a15
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001715232_439099392.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71a4f16c1948ef957c09d321bb9730451e62cc83ee3fa273f0534e4de2cef3d9
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001726688_442032128.pth b/checkpoint_p0/milestones/checkpoint_001726688_442032128.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7e4416b044f2103d4800402027378634b5ae42ac
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001726688_442032128.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ad143ba0ffab4c1dc6b44d5c5cf87598b475546a627ce46b7072c3a44aadbb6
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001738144_444964864.pth b/checkpoint_p0/milestones/checkpoint_001738144_444964864.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ea5acaed9b772c92d481951694129adeb1e6bfbe
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001738144_444964864.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40698c609ca1698e0d5d73dbb03c22b551457260a748da1c14ed58090f1281d4
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001749632_447905792.pth b/checkpoint_p0/milestones/checkpoint_001749632_447905792.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2c1f835067bce7bab2215084aaa49910f11fadc0
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001749632_447905792.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db1a2b9b4bbf31affbc3729e5897f99ddf1a67453731957522d7adc8e518e969
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001761024_450822144.pth b/checkpoint_p0/milestones/checkpoint_001761024_450822144.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f631842427e343ce14f3ede0f459a9b3ab4cf5cc
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001761024_450822144.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55e03386ec52cdd794222538b8cf960ecb97b8e3669471d6ee1d0c551c7cc125
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001772448_453746688.pth b/checkpoint_p0/milestones/checkpoint_001772448_453746688.pth
new file mode 100644
index 0000000000000000000000000000000000000000..18203d4a37530c47165e9581ac47aededfde3fc6
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001772448_453746688.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:730e6638745174e01cfb062d118bcb5029d55c443dcc209f5e11b4465b0a7ead
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001783904_456679424.pth b/checkpoint_p0/milestones/checkpoint_001783904_456679424.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6360a60e2c908356423bce49e6bf68329484f2e1
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001783904_456679424.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4ee9257a912cfbd8283c9b59c3379286cd205cdccee0a831eb947b5729018e
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001795424_459628544.pth b/checkpoint_p0/milestones/checkpoint_001795424_459628544.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7c51d7cde2aab7678ce128a97af73b2a2d51d699
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001795424_459628544.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f567dead3c90c305679f2b9589bf7756a0460272c151e8da371144ef5befe97
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001806912_462569472.pth b/checkpoint_p0/milestones/checkpoint_001806912_462569472.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b28989e9565ad56f075c846cbb1665926087cc60
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001806912_462569472.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba6bd6e67110422f538fbd8970be773368fd4c8a96803b3586ce0e37590563f4
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001818368_465502208.pth b/checkpoint_p0/milestones/checkpoint_001818368_465502208.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d53063ab178cde3afe8670652de1757a24294b1e
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001818368_465502208.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:252d27538fec9096ac3966ec985a3c11fe7d1ed14c70c59fb5b9c759721e9352
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001829888_468451328.pth b/checkpoint_p0/milestones/checkpoint_001829888_468451328.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1745fc3a495802c8ca137809f62e789c256afadb
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001829888_468451328.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800a30aa5f22d45bfed96a4cca1d0cbbaa36a27030012f460dc527b60bab70c2
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001841408_471400448.pth b/checkpoint_p0/milestones/checkpoint_001841408_471400448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..26111440f72dedec0735a947f9ab4d8b4f871a8a
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001841408_471400448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb95d8f5deefcfce2a73dff5a050e2e8805664acaeaed58648cf25a82ab6e701
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001852896_474341376.pth b/checkpoint_p0/milestones/checkpoint_001852896_474341376.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b1fca906b7bbf51adc89f0210334641eff4294b6
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001852896_474341376.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b2dca62e15b1f826253f0ef1f8eb7a4c314d00a94dd3207181fb9c40e9c7636
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001864416_477290496.pth b/checkpoint_p0/milestones/checkpoint_001864416_477290496.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b7d3846ca69db2e4bceb7d4887bf70db8987dc40
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001864416_477290496.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b91cc6dceb1bd2b179a95619f6d64bcc07df32c591c0724773d391f796ec63c
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001876000_480256000.pth b/checkpoint_p0/milestones/checkpoint_001876000_480256000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5c72ef9a44b5eb74c26256d366e0f8564e793f74
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001876000_480256000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecf716adad8b5b863f77b5679ba052d261d8c6d6d59ac2d212f6944e17b4dd1b
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001887520_483205120.pth b/checkpoint_p0/milestones/checkpoint_001887520_483205120.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b314c9320c56a848c0932927df8f3894b4119265
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001887520_483205120.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b4c7593f7daf01df8f1034f71472a15c3ca97f9045bfda3915539d0e0148770
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001899040_486154240.pth b/checkpoint_p0/milestones/checkpoint_001899040_486154240.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4d068d6e8b8d6953f76d34b73bcea23dc43ebeb9
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001899040_486154240.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1adbb9ee5462f5b46a74647b18f7d9241f994f91ac06680098d5cebc5a8cfcbc
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001910592_489111552.pth b/checkpoint_p0/milestones/checkpoint_001910592_489111552.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5ab316b1b6001b417672849e6a41116a18bcaf73
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001910592_489111552.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13112659b469f8145487834b476a501c2ef14a5793d53ba6f750b63044e725c4
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001922112_492060672.pth b/checkpoint_p0/milestones/checkpoint_001922112_492060672.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9612d7c6cd47182b3b5b43b9ab199344ac9af76d
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001922112_492060672.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4e311941b38803c1b712dc27aa636d8649e223536d8af1f55a8ed9e5d7a55e2
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001933600_495001600.pth b/checkpoint_p0/milestones/checkpoint_001933600_495001600.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ba4cd085b2ba1eee5e863eb52e01f5134bb3e559
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001933600_495001600.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b362faeacfa2b815ad719e6ba48594ef098f3a61845465d9e94cdb018eff8ee5
+size 20797067
diff --git a/checkpoint_p0/milestones/checkpoint_001945056_497934336.pth b/checkpoint_p0/milestones/checkpoint_001945056_497934336.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f2672a57ba3dfd5d2bfdb9b2ad24babdca025965
--- /dev/null
+++ b/checkpoint_p0/milestones/checkpoint_001945056_497934336.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3954f5f50f3398da8cc4dacc5c567c6a839aa769a47ca7aa98f3f0a38487c30a
+size 20797067
diff --git a/checkpoint_p1/best_001370240_350781440_reward_14.990.pth b/checkpoint_p1/best_001370240_350781440_reward_14.990.pth
new file mode 100644
index 0000000000000000000000000000000000000000..324014a24b5cf35dbf10ec7986a247e93878f830
--- /dev/null
+++ b/checkpoint_p1/best_001370240_350781440_reward_14.990.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33a709df4ade524a9708ba6f16d28d366964ffbb75ead62a0c7ed6130af9ac26
+size 20795763
diff --git a/checkpoint_p1/checkpoint_001968576_507920384.pth b/checkpoint_p1/checkpoint_001968576_507920384.pth
new file mode 100644
index 0000000000000000000000000000000000000000..efa738fff114f81335837ba36bb099d265aa7ddc
--- /dev/null
+++ b/checkpoint_p1/checkpoint_001968576_507920384.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3cc67ec4a641965b9d6ac9652a767ac52357da63e4576f171cf97cda103177
+size 20796099
diff --git a/checkpoint_p1/checkpoint_001968640_507953152.pth b/checkpoint_p1/checkpoint_001968640_507953152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bd65abd3f6e0bb0936a975c4240d138b0b41c94e
--- /dev/null
+++ b/checkpoint_p1/checkpoint_001968640_507953152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:558ec8c5fd533599b44f5a96b49bce54c4752b9cdcec3090e128156b6512b0ea
+size 20796099
diff --git a/checkpoint_p1/milestones/checkpoint_000011328_2899968.pth b/checkpoint_p1/milestones/checkpoint_000011328_2899968.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3484822b6dcb9f2d7c9801a76d4364c10911bf31
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000011328_2899968.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fd6c198062accb37ad4d2cea0ff7b69b297c8dbd94a6bd67a4e96785eb6730c
+size 20796955
diff --git a/checkpoint_p1/milestones/checkpoint_000022848_5849088.pth b/checkpoint_p1/milestones/checkpoint_000022848_5849088.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b49308f15e70d6d7eb1aacc409398618f641a03a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000022848_5849088.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4fe70a86d1245b732154a49d642adfd8516cc5c43dfac18165c06b704fc616d
+size 20796955
diff --git a/checkpoint_p1/milestones/checkpoint_000034464_8822784.pth b/checkpoint_p1/milestones/checkpoint_000034464_8822784.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e058f9205fc9c13a7322ebe950c84e2c95da7664
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000034464_8822784.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39ff4ddb9e67f015e2f4ba138113e22aa5c1a1b6704ca606c92d892a24f9910c
+size 20796955
diff --git a/checkpoint_p1/milestones/checkpoint_000046016_11780096.pth b/checkpoint_p1/milestones/checkpoint_000046016_11780096.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5572292b9f4a899798b3123569aa80b011543043
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000046016_11780096.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23210ecec6a9bfc4ef65eb1a18694f0446571009487eec4b4c7914b5bb806436
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000057536_14729216.pth b/checkpoint_p1/milestones/checkpoint_000057536_14729216.pth
new file mode 100644
index 0000000000000000000000000000000000000000..acd0a2d141989b8e187e23999913c9761b7591cd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000057536_14729216.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bea9c8f5652c2cb3ea8cc3844a2c93a3d584da3c694dfa0cf663215c334e93b9
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000069216_17719296.pth b/checkpoint_p1/milestones/checkpoint_000069216_17719296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f42478e24dd1902245e1d52faf865c14f417a558
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000069216_17719296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e131dd5c71c7b4ddd49d1300be535da66092c314a9d4e881230cc04cea85272
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000080800_20684800.pth b/checkpoint_p1/milestones/checkpoint_000080800_20684800.pth
new file mode 100644
index 0000000000000000000000000000000000000000..251d626fbe328a137c1e7e2752a1431e39f1967c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000080800_20684800.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44c32d649c7394e16925b1efc44c7a24ed14d444310018b40acce73b3d13e32b
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000092480_23674880.pth b/checkpoint_p1/milestones/checkpoint_000092480_23674880.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd710d64b25e401232a5c614a76854f163ccf57b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000092480_23674880.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:673c3e917480a5159902263731b3636926ace7ed9c256a835b093f4018fa85ae
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000104032_26632192.pth b/checkpoint_p1/milestones/checkpoint_000104032_26632192.pth
new file mode 100644
index 0000000000000000000000000000000000000000..21264628dbcb920e019bf69652f45ec15e195e09
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000104032_26632192.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c199f5242e52fe29fb56f1828c1a69d226dbd84110ad7b8cf9b9e4ba2d90cbd
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000115680_29614080.pth b/checkpoint_p1/milestones/checkpoint_000115680_29614080.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bee301702672dfc97c78fe58447a759a8979139b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000115680_29614080.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335e95974f9275d6c2c9d927f08616f907b1c8c41c4caf59782d6265dddaeb49
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000127200_32563200.pth b/checkpoint_p1/milestones/checkpoint_000127200_32563200.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c783e3ca06d13a12c319eb9c7274f13c135b0f77
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000127200_32563200.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8737b4008760df494befea553d38d4d5f2af60369949c5561105c4517aafdc1d
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000138720_35512320.pth b/checkpoint_p1/milestones/checkpoint_000138720_35512320.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a2e939d48e92aaf9b222f52860bbb90cebb3d202
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000138720_35512320.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bccebf5b28d26c86e3e3d69101f192af2260b724b588d7d6ef697f3cb591cad
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000150208_38453248.pth b/checkpoint_p1/milestones/checkpoint_000150208_38453248.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dafc56a56f8844466820a44c527bb21904eab073
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000150208_38453248.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4650895adf25990381f0474e202089a9e21bcd956d462ebd709949245ad7d62
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000161728_41402368.pth b/checkpoint_p1/milestones/checkpoint_000161728_41402368.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3441474b75c12b91f68760c9a3261e02414aea0f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000161728_41402368.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edf772df1d44c31ee127f9509a13b60d65ed030a13af26ad76b685aa14db491a
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000173248_44351488.pth b/checkpoint_p1/milestones/checkpoint_000173248_44351488.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d361840aab0b10c3b130f608d616f25d3a9c6a2b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000173248_44351488.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cdc3e44e039c56f0aaa527580cda99cee00b90ce8332ab454a031b2fb195c8d
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000184736_47292416.pth b/checkpoint_p1/milestones/checkpoint_000184736_47292416.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d65eb84f86a9966f2dc462aedd231e4b069fbba0
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000184736_47292416.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca0407505397fed6a1356678e60c62c7993072f502def80a728ff7ef2f1ad71a
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000196224_50233344.pth b/checkpoint_p1/milestones/checkpoint_000196224_50233344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8fd4820fe5257b3966f00bca245f75afe6642031
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000196224_50233344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499faf0855c8d0ef6d256b59b6afc92a155a733555b0cf488c8eb86d7c946d59
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000207840_53207040.pth b/checkpoint_p1/milestones/checkpoint_000207840_53207040.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5850f7a63d34759cd6bdac02bc42b52d3fe1e932
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000207840_53207040.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9070253eee92ee2b27f808eb992308cddb87e80c09eb95cb046f5bb94188aff2
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000219488_56188928.pth b/checkpoint_p1/milestones/checkpoint_000219488_56188928.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9452471f5b14d7b098135efb165f25a4c5f81d72
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000219488_56188928.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c56876ad845c2257b445990ef5dcab630ad743d479b52998376ea8d8c70f7f39
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000231232_59195392.pth b/checkpoint_p1/milestones/checkpoint_000231232_59195392.pth
new file mode 100644
index 0000000000000000000000000000000000000000..963bfbb9cd9da1a9b1eff4dad013bd872abc06f5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000231232_59195392.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677af78613439439805683bc175c0b595026211d9744b06a199328cf54b4716d
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000242944_62193664.pth b/checkpoint_p1/milestones/checkpoint_000242944_62193664.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a0af98b5a3a3c6bb01e02a322b5917e992d5decb
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000242944_62193664.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0b7e2c1ee89e095cee4f26c224df080eab892722643ca1d0603f62b48dfae58
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000254560_65167360.pth b/checkpoint_p1/milestones/checkpoint_000254560_65167360.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ddc21e281cf4a4ae1b394e9478354180534fe109
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000254560_65167360.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b240069efc1c611d7772eeb36e8c4a400999e0289dd82a178ff990b72c8a0c90
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000266240_68157440.pth b/checkpoint_p1/milestones/checkpoint_000266240_68157440.pth
new file mode 100644
index 0000000000000000000000000000000000000000..397393485d29b8bbf21aa1bd43879b4fe6622547
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000266240_68157440.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b43d3a248fa2fba2d1a5514e15683462c0bc8ae70192b75072b4fae97c7bfc91
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000277952_71155712.pth b/checkpoint_p1/milestones/checkpoint_000277952_71155712.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cf693e2c072c061939bd9d937faddf950199aa92
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000277952_71155712.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58dc88db0626ce0a7b44b9c7c096d9c3ccd3e72f5bd412365bdb0580cb0cd52f
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000289664_74153984.pth b/checkpoint_p1/milestones/checkpoint_000289664_74153984.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5ecaa1926df706c3e89915e4267b8bdfbce152f6
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000289664_74153984.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6dc824618480fac0bb34f4eb4f339157fd77b7d5aecaf88a241b3fa52094b8a
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000301408_77160448.pth b/checkpoint_p1/milestones/checkpoint_000301408_77160448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ec6937927304531009360d7e635fefd755178112
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000301408_77160448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c87930b48009f7c3211e07a662934e427022725315e02376fb38776b76ea1aab
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000313088_80150528.pth b/checkpoint_p1/milestones/checkpoint_000313088_80150528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c7e367944e969309e9f8125a1659c9ebed9cf724
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000313088_80150528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edff6344db4c8aa56521e6a560648a737571e10368649babb17a556591984742
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000324704_83124224.pth b/checkpoint_p1/milestones/checkpoint_000324704_83124224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0856e36f6240342e4e5b902ed0b720e47c47c2cd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000324704_83124224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64e41a7d0c634b9a06f5382ed2d06ba6b1e7d6e8bd7939545c279448ecbca09c
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000336320_86097920.pth b/checkpoint_p1/milestones/checkpoint_000336320_86097920.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72eaac2f3916dae0fe6a5982d8c0a308967fc21b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000336320_86097920.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b426463ffd68d0a56588fde9206d65aa1feb933de1f3e4af24c81c313dcb093e
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000347904_89063424.pth b/checkpoint_p1/milestones/checkpoint_000347904_89063424.pth
new file mode 100644
index 0000000000000000000000000000000000000000..752e5465c03d574e2f363024020d042843b5912f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000347904_89063424.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048211502ff3a9a7f269e49c36e36638bc1bbe613e6e1ec0ae4de77af79f12b7
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000359488_92028928.pth b/checkpoint_p1/milestones/checkpoint_000359488_92028928.pth
new file mode 100644
index 0000000000000000000000000000000000000000..487bcec0259a156783eebe4dc47c1efd5fb27c46
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000359488_92028928.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1ae33555a734d37a4555913a3b21672864626b6084af9afec3d56e19ed60bd2
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000371040_94986240.pth b/checkpoint_p1/milestones/checkpoint_000371040_94986240.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a695d3e9342c1a558ab4e74becc7b0f22b20cbf
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000371040_94986240.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac9fc96726012c3a7b975333cd264c0bd6b3910c615fa9d512c76b28cf2916a
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000382656_97959936.pth b/checkpoint_p1/milestones/checkpoint_000382656_97959936.pth
new file mode 100644
index 0000000000000000000000000000000000000000..beda6ac924a4b7f61fe5bccedd94ea6aebda28c1
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000382656_97959936.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36524257bde16897f12cd9e6759056888a5acaf2a58e32cece167148e6afe956
+size 20797011
diff --git a/checkpoint_p1/milestones/checkpoint_000394240_100925440.pth b/checkpoint_p1/milestones/checkpoint_000394240_100925440.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65c28fa4e54cd6088eac52c6d2582dd69e918ef6
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000394240_100925440.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82533a26a18c80e915d017c3f8647cdc23bee9466ce6a1857c0b672048c8addc
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000405824_103890944.pth b/checkpoint_p1/milestones/checkpoint_000405824_103890944.pth
new file mode 100644
index 0000000000000000000000000000000000000000..16e7ffd8b83edef49d1e9de33d65aefb4d03a440
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000405824_103890944.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff96c73ae7b07ea439461cf64de468706aedce1c08a6e69643bed26c578042b7
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000417344_106840064.pth b/checkpoint_p1/milestones/checkpoint_000417344_106840064.pth
new file mode 100644
index 0000000000000000000000000000000000000000..43d40d6136c458e0791c73757625be882161940b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000417344_106840064.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800117088a233f5c6d1766a6f31ee553c5f777dcebeeacaade0b16879f8e81c8
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000428608_109723648.pth b/checkpoint_p1/milestones/checkpoint_000428608_109723648.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8859b9f4d317ec7d9672af22db50bda13e30ebc
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000428608_109723648.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f679064f7a9570dd01956107a3772a6d43b160d1e77535c8ca6d8f1f14eb6d4
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000440224_112697344.pth b/checkpoint_p1/milestones/checkpoint_000440224_112697344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d28a5069b31d09c61f3be48bf2810e4ee60c6e1
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000440224_112697344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca172de9e0f5be12a0b8a0700720dbc84d9b494a3e46008cdd4604516f68ccf4
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000451776_115654656.pth b/checkpoint_p1/milestones/checkpoint_000451776_115654656.pth
new file mode 100644
index 0000000000000000000000000000000000000000..609d8c8237d33271b1e226ffc8740c43b13ebb7c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000451776_115654656.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9c0c58e50fc3cb9401988d885640f0b30b2e78241ed668985848904e0876552
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000463328_118611968.pth b/checkpoint_p1/milestones/checkpoint_000463328_118611968.pth
new file mode 100644
index 0000000000000000000000000000000000000000..fb47113fa4a31800ea5953dbec5b51c9b3adf93a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000463328_118611968.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9e77c75b6a27d225f85febf3da19edb02215604c356d9d40810cef79e1c0d5
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000475040_121610240.pth b/checkpoint_p1/milestones/checkpoint_000475040_121610240.pth
new file mode 100644
index 0000000000000000000000000000000000000000..61db793017053e40074d2146468d4fce59f72df0
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000475040_121610240.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d17cf0331fe20b1c9d5082260f1fb3cc77f25aa691dca1c19827bcb62590602
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000486784_124616704.pth b/checkpoint_p1/milestones/checkpoint_000486784_124616704.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d89aee212ff29e3649c0ab02332d7af8517f225e
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000486784_124616704.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c21a41b41d63ba7033602312acf9eec5dff6828cfafec7297cd6b11ab4a908
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000498464_127606784.pth b/checkpoint_p1/milestones/checkpoint_000498464_127606784.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1cddcf36b7d8aeb2cb48e3449328ca27064c75f6
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000498464_127606784.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4474aaf475c3485b68c92858e3105d718cfa5f436eaaa0087e8d190429d21b78
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000510144_130596864.pth b/checkpoint_p1/milestones/checkpoint_000510144_130596864.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6df14c30de3683c704b32ad822c2c3779cf7829a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000510144_130596864.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74d2a49a373ed42e3fae0ca602211a4a628a4f42bed80f572e3aa3eebe380b68
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000521824_133586944.pth b/checkpoint_p1/milestones/checkpoint_000521824_133586944.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5bc7d97cca993a1108b68af3ce19bfbd47032de0
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000521824_133586944.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a0096072c333a3f80071937c7874368a310c2e4c93932856406d0187aad3ec
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000533536_136585216.pth b/checkpoint_p1/milestones/checkpoint_000533536_136585216.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0c1690b0f30c66a2c1f9afab71580a5d8a050023
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000533536_136585216.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6a896b453d2b50ffb89baa8e38ac7e7c3398beba20285c6b23acbea89ac3596
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000545216_139575296.pth b/checkpoint_p1/milestones/checkpoint_000545216_139575296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4cf2e0d31dcb8fcefecf38b4bfbefa9553d9ecae
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000545216_139575296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84ac5fc81b062bfb9f3fecdd801a494ad56ed05e3035449f2517f714a238e8ba
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000556864_142557184.pth b/checkpoint_p1/milestones/checkpoint_000556864_142557184.pth
new file mode 100644
index 0000000000000000000000000000000000000000..37cb42bd23cf930ec4205dfcf9c0948a5d661915
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000556864_142557184.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ddaae1a3a1c7c1a31572e59f41002940aa77196fdee60bcdfb65eaa124eba31
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000568576_145555456.pth b/checkpoint_p1/milestones/checkpoint_000568576_145555456.pth
new file mode 100644
index 0000000000000000000000000000000000000000..939f926aa4894a7e58f4c371f5fffdffcf842ed2
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000568576_145555456.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4266ef24e72da2ce8d3e7cc2fc63f1e295a194f1fc654fe9e3391d1ba30aa667
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000580224_148537344.pth b/checkpoint_p1/milestones/checkpoint_000580224_148537344.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d5acd309efc01bae1020054f668297e29b8f5947
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000580224_148537344.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c216b19c59cd38f6c11b259cf4e68223918b7ee619a5e742a808827b0c114734
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000591840_151511040.pth b/checkpoint_p1/milestones/checkpoint_000591840_151511040.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9810e0d4f15f35487957bb3fbb85eb5d371c9961
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000591840_151511040.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f56512a94449d8d75ae1869fa8409120f7e0faf82925c69887b8c3b920cd18ef
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000603584_154517504.pth b/checkpoint_p1/milestones/checkpoint_000603584_154517504.pth
new file mode 100644
index 0000000000000000000000000000000000000000..30c97f01ad690412124c9f53cfea042dc4ad846d
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000603584_154517504.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50acf5eb30274abc1c4094da3401572d06fa4e16e0c256f69a6d2c4dd031173a
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000615232_157499392.pth b/checkpoint_p1/milestones/checkpoint_000615232_157499392.pth
new file mode 100644
index 0000000000000000000000000000000000000000..95b6eacdcb598ccf10d6eda47bbe8c096ab8d857
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000615232_157499392.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd06cfc54c87beb972a890d21b081fb3c123a6857a2017c0aec055c9f97c65c
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000626912_160489472.pth b/checkpoint_p1/milestones/checkpoint_000626912_160489472.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3b317f92ea7997b81ee639d735dfca0a2b62b8ac
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000626912_160489472.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88a350e88fa2b7e45b6ea739474e287a95bcea6af44ec5a9be277d8d9298165d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000638592_163479552.pth b/checkpoint_p1/milestones/checkpoint_000638592_163479552.pth
new file mode 100644
index 0000000000000000000000000000000000000000..50ad870bd0fc0dd7054b0f127a0a94fa49ccb243
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000638592_163479552.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be823ecc3ebd02ac64fe2c4b997b15962b46f6c0ea3e7ca9301e24691960ca9
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000650272_166469632.pth b/checkpoint_p1/milestones/checkpoint_000650272_166469632.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d4bed9c58927f56a362a82523499be53bdc903fd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000650272_166469632.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7307ea0ea1ebfab6eb33b27f0269c78413ed32758f0896e8fae10f49b24d0dae
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000662048_169484288.pth b/checkpoint_p1/milestones/checkpoint_000662048_169484288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd6244dcc3de1e81e5888cda229126005aca7f3a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000662048_169484288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a329f85aefa1206ddbb03c9f4e800b503a8d457a482a7da78bae43cad8eb743a
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000673664_172457984.pth b/checkpoint_p1/milestones/checkpoint_000673664_172457984.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6d726bdbf2c600a1016b992b2067d86e75c23d6d
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000673664_172457984.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:785a0921cdb51c44c47219b53f55ea3399508e3b693ad0e3b23ae255c61286da
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000685408_175464448.pth b/checkpoint_p1/milestones/checkpoint_000685408_175464448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a3146e76be0fd83b43c4ecff2a4c708de90f2898
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000685408_175464448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfcad31a699243d4a0f9fd9aee386bca391ea0285e01dd18ec65abdef6b46c0f
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000697088_178454528.pth b/checkpoint_p1/milestones/checkpoint_000697088_178454528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..77e91278a136ee57dca6a3949899b43a8ebf2a90
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000697088_178454528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8b4efb9dd44e68240cf32e4096892c06332faecaa386d5eb4675d8b38c46684
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000708800_181452800.pth b/checkpoint_p1/milestones/checkpoint_000708800_181452800.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1e9a11db8c025a931a46cc270712471edd2c4c44
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000708800_181452800.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5641a721a114a81b5807a01f06ddc24a47efa3201fa897d72c6077e73e0eeb11
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000720512_184451072.pth b/checkpoint_p1/milestones/checkpoint_000720512_184451072.pth
new file mode 100644
index 0000000000000000000000000000000000000000..181320d0f8b8fef2e40e40f679631ee87ec3d0b4
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000720512_184451072.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3716a8ad6adb8a35dab60fbdb1cd3d1317d54905640545bd22b3b03efebc928
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000732192_187441152.pth b/checkpoint_p1/milestones/checkpoint_000732192_187441152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..afe47fc78c3d3a52f44ca67b11b782a7dc10e249
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000732192_187441152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d204d113548c14ea7af69cd1e8e8cced07bc77b6506e3a8af5b44508efb6d22
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000743872_190431232.pth b/checkpoint_p1/milestones/checkpoint_000743872_190431232.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d9cae2e2a0b7c7e413531dc1e33ce9f6bde7a3e8
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000743872_190431232.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c6321469b3bc94782ace0c29ac7d94fcec48c433e28bae1e86cec187390e09
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000755552_193421312.pth b/checkpoint_p1/milestones/checkpoint_000755552_193421312.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c8bcd646db562d3dd1d994095b5e693162672149
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000755552_193421312.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1a7ecc99b31384115fb7f5573ed01bcaa040a38593a44ef2d88d2731893c5cd
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000767200_196403200.pth b/checkpoint_p1/milestones/checkpoint_000767200_196403200.pth
new file mode 100644
index 0000000000000000000000000000000000000000..00b43225e0148a954cfc753c6f294b4bca5737f5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000767200_196403200.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:214853a7d9aea9402c2021ebb98334fe4f87b5445baba005244eb9a3163aa11b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000778816_199376896.pth b/checkpoint_p1/milestones/checkpoint_000778816_199376896.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2b2d8d156f7334e19ee1feedbdcd0c5ba3469df4
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000778816_199376896.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd73e6cd84ffdaad60ee04725e23a71d97fac13b4657b97ba87db1bae164e787
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000790336_202326016.pth b/checkpoint_p1/milestones/checkpoint_000790336_202326016.pth
new file mode 100644
index 0000000000000000000000000000000000000000..625344d34188707257e2909ccde70f4e24480b25
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000790336_202326016.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:570cf6f818b5213b4304e391f970c11502f816f937cad0747dd5aab63d49c447
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000801792_205258752.pth b/checkpoint_p1/milestones/checkpoint_000801792_205258752.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c40efbcbcc484055f7561912e55d6602ee2c94be
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000801792_205258752.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23ea203d29daf477223cdd542cf7b3cb0e7bcb1a0866d9cfcfb4b6bd52b55775
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000813312_208207872.pth b/checkpoint_p1/milestones/checkpoint_000813312_208207872.pth
new file mode 100644
index 0000000000000000000000000000000000000000..35e3c8a90fb5803bddd4ae78dd5f8308047b1849
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000813312_208207872.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76a6b858183145587e0f194d6b4ce76cbca6be7999e69faf891fe7020df647cc
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000824864_211165184.pth b/checkpoint_p1/milestones/checkpoint_000824864_211165184.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0931d3f2482da4d20532790e7534e22aa023f3fd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000824864_211165184.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37abac90c01f2cd4b0d00ea961172dcb2426f530bebfeb62f2160a5da487678b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000836352_214106112.pth b/checkpoint_p1/milestones/checkpoint_000836352_214106112.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d83aafd2c9e69a1117ac2e7b870533ef33bc4816
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000836352_214106112.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd79588c404a8531e39f7763b124b7b0f2dd1d54dd0b8e4ef3f4069ffb130864
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000847872_217055232.pth b/checkpoint_p1/milestones/checkpoint_000847872_217055232.pth
new file mode 100644
index 0000000000000000000000000000000000000000..845c3dd261a86f9135b15a6ac37aa0370c19c859
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000847872_217055232.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46dcd7da1a84b14e8c430b34c016bb22f32e180042b824901572cf031e8c2f91
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000859392_220004352.pth b/checkpoint_p1/milestones/checkpoint_000859392_220004352.pth
new file mode 100644
index 0000000000000000000000000000000000000000..903a92d117b81a87da0454af4deb86cf62b41cdc
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000859392_220004352.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:741800f3926e6a62f3cb482df2601a1729647fae3b1954b8208e9bdb4e902873
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000870848_222937088.pth b/checkpoint_p1/milestones/checkpoint_000870848_222937088.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d094752d450c1f9d6cad6a5ce5a8f7d9b5fe87b0
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000870848_222937088.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3412bc056af8b34f6763bb4f2d5b6dd155859009740c51c857ba0292e49cd581
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000882464_225910784.pth b/checkpoint_p1/milestones/checkpoint_000882464_225910784.pth
new file mode 100644
index 0000000000000000000000000000000000000000..92b6e7de622f2cf7c1f61d39f8b465724a41e2fd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000882464_225910784.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20e03cb06cf4edc5cbba2691a6ccf4ef950f00263f9954bc0158127f9c99bc35
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000893920_228843520.pth b/checkpoint_p1/milestones/checkpoint_000893920_228843520.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8397e20337ccabe72b8593b378c76000ae6aa929
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000893920_228843520.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a11e5c338feb54253259d3abb5f2fd7e9403d84d40f1130d76cd770613b1f829
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000905376_231776256.pth b/checkpoint_p1/milestones/checkpoint_000905376_231776256.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6a98885274f90182f1ccc5077c4c6e30c42aedf7
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000905376_231776256.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:958ea78911844dc46e50a237b2ce48b03bc8d87828289c493d78f53b388bdc4e
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000916928_234733568.pth b/checkpoint_p1/milestones/checkpoint_000916928_234733568.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3896645022d330c96e33a6f7a5a9a3af5c29e912
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000916928_234733568.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9b20c5f9a68ab91348e5bef1af6a8c1bbaca132fbc50adee6e6cde58721ddb1
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000928448_237682688.pth b/checkpoint_p1/milestones/checkpoint_000928448_237682688.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0db8eeebb19579b6cb65664df546cf5444838079
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000928448_237682688.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d04c841e75f522b6b4baab72875ba11b17d1e448130424ee11124f85fc5b4295
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000939904_240615424.pth b/checkpoint_p1/milestones/checkpoint_000939904_240615424.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b880cbdbd5c4aac3cef34b33eb39392c135aa40b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000939904_240615424.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79e5948b87bc53a8e69ecf38d8141a43c6a21c6ed8680b7a9a52d0f97710d979
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000951424_243564544.pth b/checkpoint_p1/milestones/checkpoint_000951424_243564544.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f6c362585f9939805f84cbb601348d87c9ebf960
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000951424_243564544.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5002747b15840c2650bbf2cac20476caa0db7ea192971aac67855efaaba0ac21
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000962784_246472704.pth b/checkpoint_p1/milestones/checkpoint_000962784_246472704.pth
new file mode 100644
index 0000000000000000000000000000000000000000..13109475fca168d57c82de949612711cd6084f3b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000962784_246472704.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad29793c907fb87805ef85e65169aca4a217054cb73841ed1b9f87b5744a5c3e
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000974240_249405440.pth b/checkpoint_p1/milestones/checkpoint_000974240_249405440.pth
new file mode 100644
index 0000000000000000000000000000000000000000..05c04b549e61625a8eb59f36c6dad4366900ef0e
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000974240_249405440.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaae252983bd26e6bd6a67a98fbe1a21e485c24d2358bf3af38805a90ff3b21a
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000985600_252313600.pth b/checkpoint_p1/milestones/checkpoint_000985600_252313600.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f6085b770ce95a1ac5f3e869497270ccdf51387d
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000985600_252313600.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8264b1c559b1f4779bcda24cd18c5bd6f92fe61425a5ae1e2b3796fa17b0b38b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_000997088_255254528.pth b/checkpoint_p1/milestones/checkpoint_000997088_255254528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8193f5f5ceb3e89582191065c99deaf9e8fefbfd
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_000997088_255254528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44e6d9de347d18515a42a7c5683809f541b15fe560b23496def0cc38c9c54185
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001008576_258195456.pth b/checkpoint_p1/milestones/checkpoint_001008576_258195456.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e72fc0bb68cdd6fea1ff0d9b93ff3cd0ae42e297
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001008576_258195456.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4befe99ce2de45b2d85375f2a8a6bc00c64df229493a1f6177d6d4d53ace8866
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001020096_261144576.pth b/checkpoint_p1/milestones/checkpoint_001020096_261144576.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1c57d1c8a522362cb296db684cbc2ba97650c1cc
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001020096_261144576.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:704f497ca1215b7df88e6fe013fe98ec83d6b4d4713c56794217b71b3a3eb2e9
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001031648_264101888.pth b/checkpoint_p1/milestones/checkpoint_001031648_264101888.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cce361f325a3feddb8b5f8effdfd77b466d61396
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001031648_264101888.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5679deb6d78dd8719e2236c888ea07531e00dc8542c99592050d85dc3a3dcae
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001043104_267034624.pth b/checkpoint_p1/milestones/checkpoint_001043104_267034624.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eb3695db7818b5a0eeb1c8f9cc3ce2564d5e97bf
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001043104_267034624.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae4c8d9d8771251bb62c59fe12cdafc5acb842fad82fbab8cec0418c33a8a0d3
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001054592_269975552.pth b/checkpoint_p1/milestones/checkpoint_001054592_269975552.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9f2c23ba361071802c9ed081a9da88696931652a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001054592_269975552.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:119d04e7635a6f732c22b248274cabd3a22df74b43f073276d783ff01a49e97b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001066048_272908288.pth b/checkpoint_p1/milestones/checkpoint_001066048_272908288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..16b7e5f897971a4080c25d6303ac21ea9d4e4835
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001066048_272908288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaeac167c9f0c99deb59b70396483b57234b9def66d1296034fb8375b6b00d1f
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001077568_275857408.pth b/checkpoint_p1/milestones/checkpoint_001077568_275857408.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1eca6e2f760fe7f7bb34e9451cbf26685a3c3779
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001077568_275857408.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d66d35a74119c9948aba0dbb818d7e4a0a03958fb29ed976d044a658f4b54220
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001089088_278806528.pth b/checkpoint_p1/milestones/checkpoint_001089088_278806528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dabe7f9ba9f26f3dcef1661d248cb093aa1e57a6
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001089088_278806528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:382274d7ae5d0f0e40de3e0b9d7743aef4e7763113eb70ef76d00a828d14f646
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001100768_281796608.pth b/checkpoint_p1/milestones/checkpoint_001100768_281796608.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e42f441122cc22192a5e7ec1c8e254d4c11253c7
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001100768_281796608.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd46cbd04f389ff379bf5fb2ae1f32203734f63642d4d963b62d8f60598d25eb
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001112320_284753920.pth b/checkpoint_p1/milestones/checkpoint_001112320_284753920.pth
new file mode 100644
index 0000000000000000000000000000000000000000..76003b4ea6d3a04d3474fba055ac33d59e4854bf
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001112320_284753920.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aacd59c567c1b055271466f29a3336daf1c52e9f5852d4d29183a7a8a2228c4
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001123936_287727616.pth b/checkpoint_p1/milestones/checkpoint_001123936_287727616.pth
new file mode 100644
index 0000000000000000000000000000000000000000..848c4dd066b1cca05320078829c1026fcff95757
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001123936_287727616.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3413fcd05b6c598121199cb3723650def376440171925d13347356f42bbe7888
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001135584_290709504.pth b/checkpoint_p1/milestones/checkpoint_001135584_290709504.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0bc24fb761451a3d35cfc5383ceffb43d7f077a9
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001135584_290709504.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d75b52b65a6c58ee45092b317ecfd13fec69e8d75345c0153a2fca3d40af728
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001147168_293675008.pth b/checkpoint_p1/milestones/checkpoint_001147168_293675008.pth
new file mode 100644
index 0000000000000000000000000000000000000000..be8fdfd70975ddb2dc8d014e3d9233a7a031bc74
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001147168_293675008.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:839f89bfef663e452076e6937df7c9b3e76664df75b132e5afeb8fde25f16afc
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001158752_296640512.pth b/checkpoint_p1/milestones/checkpoint_001158752_296640512.pth
new file mode 100644
index 0000000000000000000000000000000000000000..20600f72ae7792fad153956766a43724b18faa2f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001158752_296640512.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e39fd487de963b5cc9763231d8e450b9b8761bf75370973a93a51d809985afd
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001170400_299622400.pth b/checkpoint_p1/milestones/checkpoint_001170400_299622400.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bb2a663308fb17cf61e6c74e4e6a312a5a30b5ab
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001170400_299622400.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e66a5ccff3edf2be28743521719d8ce2f271e329d092e7dc3deb99fbd05d5102
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001182048_302604288.pth b/checkpoint_p1/milestones/checkpoint_001182048_302604288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3dde63d5c4f6f730d0197e22f0bb8b5437ed0226
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001182048_302604288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f85a8dfbf880cf481380fac18ff1f94c32a3aff845f7cfa857017a6fb082f43
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001193664_305577984.pth b/checkpoint_p1/milestones/checkpoint_001193664_305577984.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e79401f6b97d2175d10d46f7b1a70be4d95b28ae
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001193664_305577984.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e5953e568f3c06c568827d642203480e51e835252740f73b8d0d620112027ae
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001205216_308535296.pth b/checkpoint_p1/milestones/checkpoint_001205216_308535296.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d6978da82b4117d25f99dbe2e866c6cc4708d9fe
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001205216_308535296.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:886465e1c6cee95b76b0163368dbd1bd564ffa46c7c50d6f49266ebc3a1e212d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001216864_311517184.pth b/checkpoint_p1/milestones/checkpoint_001216864_311517184.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9459cfe2e6dcfcc583f4d592abcdd2de701a95f3
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001216864_311517184.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d5bff2d4f1e762e8a3742a7586ff06a080aa631e1293fcffe996d2f2f9fb3cc
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001228512_314499072.pth b/checkpoint_p1/milestones/checkpoint_001228512_314499072.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b0d5c4a7a162ea49b1ecb9018b76e5b806ba33df
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001228512_314499072.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:205507c4c6524102deb861b16ff0419f7447ddf039ed28eceaf3813459afc622
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001240128_317472768.pth b/checkpoint_p1/milestones/checkpoint_001240128_317472768.pth
new file mode 100644
index 0000000000000000000000000000000000000000..31ef24e5a89a933b1d851da71bdee2e01c6d3482
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001240128_317472768.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:799d47abeddfdd9f2318a36c7127d07ca56b6bbe4c2a6b1827f726d12096acca
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001251744_320446464.pth b/checkpoint_p1/milestones/checkpoint_001251744_320446464.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0b01dbfae05ff14229257989282049fc0e01f08b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001251744_320446464.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad77667445048578808da47a4402f90c1e9ab5897ec71a35639d3e0d64912821
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001263008_323330048.pth b/checkpoint_p1/milestones/checkpoint_001263008_323330048.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c141b3cc9c045b1a978dbbf3a247244cdf4bc90c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001263008_323330048.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbc0d483b681d917466a80a44ccc7ecd5fe4987b7d1277c63d2fbe41577ee567
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001274752_326336512.pth b/checkpoint_p1/milestones/checkpoint_001274752_326336512.pth
new file mode 100644
index 0000000000000000000000000000000000000000..693b7ae298f904fcd362b3f5b87249971076e8c4
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001274752_326336512.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cda186e501bb8cfd83c0fe664d4746f5a93868e4321b45f7d52617f9f3e2a18a
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001286368_329310208.pth b/checkpoint_p1/milestones/checkpoint_001286368_329310208.pth
new file mode 100644
index 0000000000000000000000000000000000000000..599c9af5ce8dd334ced559ec2178331cd510ac7c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001286368_329310208.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9fc90969b99d849a926c05f3108629ba994bc522657548ce8ce6a0df6fecc0
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001298048_332300288.pth b/checkpoint_p1/milestones/checkpoint_001298048_332300288.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8a5e1d755af132c070285e3970f7c61783524c06
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001298048_332300288.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c78045eaa0a585bf59010ac8adb6597957886dae9f96faf14ef623049742618
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001309792_335306752.pth b/checkpoint_p1/milestones/checkpoint_001309792_335306752.pth
new file mode 100644
index 0000000000000000000000000000000000000000..54100b8b6d8658f14c28b6753c4dcca2c86fa5e0
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001309792_335306752.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c7a15bce6a3867445a98856b86f7c6e8c3a604db1b913f2a72c99e5030f2ed4
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001321472_338296832.pth b/checkpoint_p1/milestones/checkpoint_001321472_338296832.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2722c4679f08ccae33e8640aba3cdefe5d1b685f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001321472_338296832.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:343f2d83ec038c5eb7f5a2b326853da96fa6d6f0a452b5aa3cfe6e51cadf3941
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001333152_341286912.pth b/checkpoint_p1/milestones/checkpoint_001333152_341286912.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d411d0511feb255a54a17f1aeb1f678875f391f5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001333152_341286912.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25abb4d4755e6bbc42fbf8c552b8f26123871078e06714c80fa6405d78e35db0
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001344864_344285184.pth b/checkpoint_p1/milestones/checkpoint_001344864_344285184.pth
new file mode 100644
index 0000000000000000000000000000000000000000..148859876d05e03d870cf4bf151e61f5c9599621
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001344864_344285184.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e20e3abb7291d67f6280f2632db971d5cad372d01b600ce531ddf3efe06c931
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001356544_347275264.pth b/checkpoint_p1/milestones/checkpoint_001356544_347275264.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0515acec253aa983e42bf77c64065a6944b873bf
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001356544_347275264.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be6b6ae7d66a7a9a64f7130a31b51bd5a4b50e50d5138082867207f961673f98
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001368288_350281728.pth b/checkpoint_p1/milestones/checkpoint_001368288_350281728.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8378a96e571b9e1c6384578897285f6c9cc8ab93
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001368288_350281728.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eeaae1ec66df78697a4e81cebd9c0dbdb9870f41ee6a3f1b1d988b3a3117642f
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001380000_353280000.pth b/checkpoint_p1/milestones/checkpoint_001380000_353280000.pth
new file mode 100644
index 0000000000000000000000000000000000000000..30b617583d98b26a5b2aaaf25f7ec4f7928e67a4
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001380000_353280000.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdb9164a8fb4d54728a3e8058766492d865926e23a216f55512e022a2a7dc9c9
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001391616_356253696.pth b/checkpoint_p1/milestones/checkpoint_001391616_356253696.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bfc96e2c75860a42d48fb2f78b246a4b22f3e9d9
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001391616_356253696.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e69acb6c7a73eabe2b7eddedff40bdf98fedf319bec859171bac216e14da5f32
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001403264_359235584.pth b/checkpoint_p1/milestones/checkpoint_001403264_359235584.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4e556dd383cee930b303e3f155e5dbe691de07bc
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001403264_359235584.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61b168cfa0e3af9677b11b5557f347b1f16c5248c75812bbab4778d41541d86e
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001414976_362233856.pth b/checkpoint_p1/milestones/checkpoint_001414976_362233856.pth
new file mode 100644
index 0000000000000000000000000000000000000000..838780311ae8559cd4191e2b284a9c1b57c31f5d
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001414976_362233856.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2dbfcf374d0fcce620eeaf5c324af74512fa7b40fa7375ff3b796d1ff21fa69b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001426624_365215744.pth b/checkpoint_p1/milestones/checkpoint_001426624_365215744.pth
new file mode 100644
index 0000000000000000000000000000000000000000..87ba0a3cabfffbfc5daf6f31f7c4da91449a46de
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001426624_365215744.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f37dd7c1e2420823ba8f378a0318ece22741cd08faaf58af0bde0554ff8656
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001438304_368205824.pth b/checkpoint_p1/milestones/checkpoint_001438304_368205824.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a559bf28ee28fb6cd0c90873aed55d8a54cae20
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001438304_368205824.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87b7bdbf6d6756e2d09f9565670400e6fc06aaced1c38daf618778376f8912b5
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001449984_371195904.pth b/checkpoint_p1/milestones/checkpoint_001449984_371195904.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6d3190ff5761ce46d22d996dacf4dbcdfe2e3306
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001449984_371195904.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:603d9281f8e2f81a65de9d3088b0fb95754426ae06478df21ddb71161b3ac2c2
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001461664_374185984.pth b/checkpoint_p1/milestones/checkpoint_001461664_374185984.pth
new file mode 100644
index 0000000000000000000000000000000000000000..26d92bc9e5b154cc7519876aad8c2b17f5b622a9
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001461664_374185984.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5eccba7b27bdc311d8e2d5d626e0423d5f8166e23f66ec78fae76c7ac53d6c7
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001473376_377184256.pth b/checkpoint_p1/milestones/checkpoint_001473376_377184256.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0094173ab4e4dc9fc92dfa22b1bdd3ebc15564fb
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001473376_377184256.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe77794df2956be1e4b52f095d233a3fb83a12cd58a8070a3201e45fff796f6
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001485024_380166144.pth b/checkpoint_p1/milestones/checkpoint_001485024_380166144.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1e91de7a8b399b7c49a4c7e1e2d47debb4ff131b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001485024_380166144.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c94d8602f95345e2755085b90b3d4afec67678826e705ed95197928767c3c4bf
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001496672_383148032.pth b/checkpoint_p1/milestones/checkpoint_001496672_383148032.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5b3f6917a7fdd82b9528a122c47a0831003375a7
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001496672_383148032.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1170570223a71369210e5e7538670f55b7f70f2a7862d12d80694d48eff0e975
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001508384_386146304.pth b/checkpoint_p1/milestones/checkpoint_001508384_386146304.pth
new file mode 100644
index 0000000000000000000000000000000000000000..310934d6c19be3c9b675a6b62ccf5c31c5af5f95
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001508384_386146304.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a7df622634280f896d65c8705b5fa85da9230132a202793edc616526891ed21
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001520128_389152768.pth b/checkpoint_p1/milestones/checkpoint_001520128_389152768.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a4f8c72502ef50479d82d495683ddc15aadec9b9
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001520128_389152768.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f92519f12addaf246909e8130da8c0885fea5ba7a8cf4c33340f5b2fcfcb7b76
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001531840_392151040.pth b/checkpoint_p1/milestones/checkpoint_001531840_392151040.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2a2650a76b50e18fb3d871f0d0ada26cc24b884f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001531840_392151040.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad68d415cdf065c2bc1f0e417ed9f60d47a213666e4a931ed041b09638412c7c
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001543520_395141120.pth b/checkpoint_p1/milestones/checkpoint_001543520_395141120.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a4107dfadfd7d9fd481555cf860b7c420e52f638
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001543520_395141120.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3709495b4a305bcc2b6049c8512c118d912ef40241509843c71682a102b59a29
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001555232_398139392.pth b/checkpoint_p1/milestones/checkpoint_001555232_398139392.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd1a09f79bc369b8251fab070fce5e2e2f98137a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001555232_398139392.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68ff4f23bbb42a75bfb9d13a2cf94cb9882f8aa1ac96faefa9e5766fc6b5a42d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001566944_401137664.pth b/checkpoint_p1/milestones/checkpoint_001566944_401137664.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f31c963ec94107f2ca262b40e42a02747aa78137
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001566944_401137664.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22152b1b80432c6732cf281613b7d5d1ae27543fa3f897fe77946e5764a4036a
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001578624_404127744.pth b/checkpoint_p1/milestones/checkpoint_001578624_404127744.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d1bb8517b069f00466dd54fc2c88c0ed8d2c4372
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001578624_404127744.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0a0db1c2b4ce057d8fc1a46f841ad75e66960b0fb220a90f001653a1cb6856
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001590336_407126016.pth b/checkpoint_p1/milestones/checkpoint_001590336_407126016.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3dc6addc1d8e3bea88a122db4122169cc988de02
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001590336_407126016.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8467777739492c38dc3c19c3047ae90a83561e88208a7dadfee8324a0738a4b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001602080_410132480.pth b/checkpoint_p1/milestones/checkpoint_001602080_410132480.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6d87e3b73c826f20e0bb2de9dd6361c05ac4962c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001602080_410132480.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:582c7b99610fda74f81855cb5aae4e16becc38c3a8d0fddce4b992a6ad2790da
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001613728_413114368.pth b/checkpoint_p1/milestones/checkpoint_001613728_413114368.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e41f3a7c60775d2648c64a808677737216c5145b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001613728_413114368.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a03003d878d6bf185c1bf3e289514fba6014826a5c79151832a8c35322a1563
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001625440_416112640.pth b/checkpoint_p1/milestones/checkpoint_001625440_416112640.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c6d09e0b12d8adf7f3def5f63f7be87982880205
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001625440_416112640.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ccff9e6b51089b239ee29010681f8d240dba988bb1046f1503a6ca1961aa46
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001637120_419102720.pth b/checkpoint_p1/milestones/checkpoint_001637120_419102720.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e45695adea1543240dc8294b1ab7f6916f22619f
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001637120_419102720.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1052474beb5549dd2e64008b64fe1db08f4ba20f5fccc0eb1f34a4414099d1e0
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001648640_422051840.pth b/checkpoint_p1/milestones/checkpoint_001648640_422051840.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6e833f2b59e0640c59e806d69d13bfd69adc8241
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001648640_422051840.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fc5e39ae84ab03d78e61883de47138b649d3c5b67e52277c888212a65fbf608
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001660288_425033728.pth b/checkpoint_p1/milestones/checkpoint_001660288_425033728.pth
new file mode 100644
index 0000000000000000000000000000000000000000..154667a6fdb6f13b16c242874c0c86d2d738f48a
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001660288_425033728.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:481e842cf87c4d2f48c47ecefd12005a335560e08f567db3681c4e089d6aad30
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001671840_427991040.pth b/checkpoint_p1/milestones/checkpoint_001671840_427991040.pth
new file mode 100644
index 0000000000000000000000000000000000000000..ded315c4a925c187ed53a931429ef355f56d185c
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001671840_427991040.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eec0eefcdf4a334935fb555a657ceaf13222e15e33e0dd9be0bea1a8eaac817
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001683456_430964736.pth b/checkpoint_p1/milestones/checkpoint_001683456_430964736.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dbc701a322fbf72f57415f64b7c7029170271154
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001683456_430964736.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5238c2f280a70ddbefc9beb114d3cb22f3432ed5da506987b0ede29418716f7f
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001695072_433938432.pth b/checkpoint_p1/milestones/checkpoint_001695072_433938432.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d7a7d7e36488ae1b93092619b83bac2c566a37a5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001695072_433938432.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75ba177978d3a649cd0ceccec6dff039f123168401bf6460fe6c97116a133b64
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001706720_436920320.pth b/checkpoint_p1/milestones/checkpoint_001706720_436920320.pth
new file mode 100644
index 0000000000000000000000000000000000000000..79110aeae9e22e74ece843d6d383b4fafef60447
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001706720_436920320.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef90ee9c2a878e67689f3230ee4f5071925596117fb00ed9abf0789fa2975cc9
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001718304_439885824.pth b/checkpoint_p1/milestones/checkpoint_001718304_439885824.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4e1ced1fe62590b4d2a726954cbede456e71515b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001718304_439885824.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e5432aa5dab82bfe7e5eac4add15a565dec1a5ba428fa8fcb101e073c610bf9
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001729888_442851328.pth b/checkpoint_p1/milestones/checkpoint_001729888_442851328.pth
new file mode 100644
index 0000000000000000000000000000000000000000..65a8110eeae3f5ad857e3c4071f2aaea2f64c905
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001729888_442851328.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e18c8747eecf25d8343f03b7ab50f782d3db3b0775a5fb8759832f52f8cd3655
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001741504_445825024.pth b/checkpoint_p1/milestones/checkpoint_001741504_445825024.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d53a893da414c434f4bce37aa6991544801975cb
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001741504_445825024.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b26ceadcaaef358a5a6d5716130f4018b3e7ec7091b3a4f99d434186787da29c
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001753152_448806912.pth b/checkpoint_p1/milestones/checkpoint_001753152_448806912.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c1c1cdfd034e05ced5ec6965d9f9de9c07188507
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001753152_448806912.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef888ed2b9fc1577b0e21513809e5e00bbd9e8d0644721739433493cc2efc2a8
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001764704_451764224.pth b/checkpoint_p1/milestones/checkpoint_001764704_451764224.pth
new file mode 100644
index 0000000000000000000000000000000000000000..eeb5f4a4f470718c389e8b287f20053520f9e194
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001764704_451764224.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bf408602221ffbc4a8417fe355e6fd02bf12e9366a378ad024f5ea4fdc6bfa0
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001776320_454737920.pth b/checkpoint_p1/milestones/checkpoint_001776320_454737920.pth
new file mode 100644
index 0000000000000000000000000000000000000000..53d8f01c971c553ac72062cdb4431079e73e9fc5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001776320_454737920.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a483906e9828ae249033a1a9b68f81598d3b65965ae0414d10127053c625e15
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001787904_457703424.pth b/checkpoint_p1/milestones/checkpoint_001787904_457703424.pth
new file mode 100644
index 0000000000000000000000000000000000000000..969795ea763f036f38a2b949317c88c68e55f8c5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001787904_457703424.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba99a3ea16aeddf161f434ebb9e0b5192eba339538922c2760fb87fcbd0fd92b
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001799552_460685312.pth b/checkpoint_p1/milestones/checkpoint_001799552_460685312.pth
new file mode 100644
index 0000000000000000000000000000000000000000..608a52ddc73c249ef85e8009d77104f329c60816
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001799552_460685312.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec20adb7a60d0ea815e0a32801d2eddbad1fafac40117359e4ee4456ea287cb6
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001811200_463667200.pth b/checkpoint_p1/milestones/checkpoint_001811200_463667200.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1f28402a3e1287050f53360291767b6f29568dd1
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001811200_463667200.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b9213e01d355d865a73ec1828f987aeaddde261077542aaf00a7d215914b214
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001822912_466665472.pth b/checkpoint_p1/milestones/checkpoint_001822912_466665472.pth
new file mode 100644
index 0000000000000000000000000000000000000000..089aa74a14b640c730d5fd12d7bbd92fbbab2576
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001822912_466665472.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7d4b9b11322e0d71f8a16328df127776881aa7e0a50d1e5cb17de2f4d8030cb
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001834656_469671936.pth b/checkpoint_p1/milestones/checkpoint_001834656_469671936.pth
new file mode 100644
index 0000000000000000000000000000000000000000..8cf5be38dfa299a80d95b1f7ece8bff4755824c5
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001834656_469671936.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d26f25c53a9fd72592756b748c6fcd495df9822f324f63bafd027a3ad83fa79d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001846336_472662016.pth b/checkpoint_p1/milestones/checkpoint_001846336_472662016.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e3ea6ecaa2f925d82aeeb6a2ed2d9b79331e0768
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001846336_472662016.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cae4d0629d7f4e4661b35c68c06b719f4cc8f979751c9eb2fc4640409c6c4670
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001858016_475652096.pth b/checkpoint_p1/milestones/checkpoint_001858016_475652096.pth
new file mode 100644
index 0000000000000000000000000000000000000000..a96996f1760656982793e548aa276c9ff2081934
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001858016_475652096.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f41f2d998582c2a8f2074f68bd5a9185b3639625d6e2fc7496293bde3cc649e3
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001869760_478658560.pth b/checkpoint_p1/milestones/checkpoint_001869760_478658560.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e43911508141accc8aec718b19b187c1b4ad691d
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001869760_478658560.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebe3b111d0e0ae88d56a0b0475152c7dbb65e9c6f1667dd11b78c5d23536c765
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001881408_481640448.pth b/checkpoint_p1/milestones/checkpoint_001881408_481640448.pth
new file mode 100644
index 0000000000000000000000000000000000000000..52a2d9c3a309c6e480be8678aa653429fe44bf78
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001881408_481640448.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f08831f287253d232a5ca7a7c481a322e704118b3a7fdc3baaf6892d0109b434
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001893088_484630528.pth b/checkpoint_p1/milestones/checkpoint_001893088_484630528.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b677a18771ada3fd990f67537a48f85c0d521687
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001893088_484630528.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b81df50cc2b37f6aeb3f9aec089e06dc9e21185e6aaa603578b7bc99214900b2
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001904768_487620608.pth b/checkpoint_p1/milestones/checkpoint_001904768_487620608.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0577e18ae6536ca3ceade144763afe154f729b34
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001904768_487620608.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56eddf17453b3de53eb7616c24dff5a79215fca5a4398e196249e1bd05a178d8
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001916544_490635264.pth b/checkpoint_p1/milestones/checkpoint_001916544_490635264.pth
new file mode 100644
index 0000000000000000000000000000000000000000..37306517d9706d66712c92f7fbe42552ec4eb408
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001916544_490635264.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:265548a13a7dcac76a725012c79105a48ee9395d2e3215a1d8d7045ccd84bfb2
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001928192_493617152.pth b/checkpoint_p1/milestones/checkpoint_001928192_493617152.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3d7fd6f4fd6724f6458a1b99f878dfbdec2ff169
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001928192_493617152.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e23491bb1e4acabad26e18f6abb23100607b03992a0ea319fd23af6dd846b7d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001939904_496615424.pth b/checkpoint_p1/milestones/checkpoint_001939904_496615424.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e980e084eb454ce38e519f605e980489e6ecae67
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001939904_496615424.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37a97c0e58813783e7868fd105edb58f6715882561ebc479cced41097815873f
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001951584_499605504.pth b/checkpoint_p1/milestones/checkpoint_001951584_499605504.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd7c948a4932907542ce597b858d55966ada4f83
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001951584_499605504.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db030c453ac84992bd52a785399b91d74b4998a52ebd974571df7fbe86e1128d
+size 20797067
diff --git a/checkpoint_p1/milestones/checkpoint_001958368_502693888.pth b/checkpoint_p1/milestones/checkpoint_001958368_502693888.pth
new file mode 100644
index 0000000000000000000000000000000000000000..f718d7e5baeafbfebf578b8aa78dd8b58d26662b
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001958368_502693888.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:910d88b605b2ad9c85be1ed31298f1ca4ac3e7cd2c799cc9b2e979648b027470
+size 20797003
diff --git a/checkpoint_p1/milestones/checkpoint_001964384_505774080.pth b/checkpoint_p1/milestones/checkpoint_001964384_505774080.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5dc183cb0c1c04af160a3470c882b8fe0b2935f8
--- /dev/null
+++ b/checkpoint_p1/milestones/checkpoint_001964384_505774080.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8be43243294e34537e0557eaaa58e26f773cc98d0b197af2125244d0e51774
+size 20797003
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..83b6f270413a698f7a1fd74c8c3fa4be81515d22
--- /dev/null
+++ b/config.json
@@ -0,0 +1,164 @@
+{
+  "help": false,
+  "algo": "APPO",
+  "env": "atari_robotank",
+  "experiment": "atari_robotank_APPO",
+  "train_dir": "./train_atari",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": 1234,
+  "num_policies": 2,
+  "async_rl": true,
+  "serial_mode": false,
+  "batched_sampling": true,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 2,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 16,
+  "num_envs_per_worker": 8,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 8,
+  "num_epochs": 4,
+  "rollout": 128,
+  "recurrence": 1,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1.0,
+  "reward_clip": 1000.0,
+  "value_bootstrap": false,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.0004677351413,
+  "value_loss_coeff": 0.5,
+  "kl_loss_coeff": 0.0,
+  "exploration_loss": "entropy",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.1,
+  "ppo_clip_value": 1.0,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-05,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 0.0,
+  "learning_rate": 0.0003033891184,
+  "lr_schedule": "linear_decay",
+  "lr_schedule_kl_threshold": 0.008,
+  "lr_adaptive_min": 1e-06,
+  "lr_adaptive_max": 0.01,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 255.0,
+  "normalize_input": true,
+  "normalize_input_keys": [
+    "obs"
+  ],
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 3,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": false,
+  "heartbeat_interval": 10,
+  "heartbeat_reporting_interval": 60,
+  "train_for_env_steps": 500000000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 120,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": 1200,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    512,
+    512
+  ],
+  "encoder_conv_architecture": "convnet_atari",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": false,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "relu",
+  "policy_initialization": "orthogonal",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": false,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 4,
+  "env_framestack": 4,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": true,
+  "with_wandb": true,
+  "wandb_user": "matt-stammers",
+  "wandb_project": "atari_APPO",
+  "wandb_group": "atari_robotank",
+  "wandb_job_type": "SF",
+  "wandb_tags": [
+    "atari"
+  ],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "command_line": "--algo=APPO --env=atari_robotank --experiment=atari_robotank_APPO --num_policies=2 --restart_behavior=resume --train_dir=./train_atari --train_for_env_steps=500000000 --seed=1234 --num_workers=16 --num_envs_per_worker=8 --num_batches_per_epoch=8 --worker_num_splits=2 --async_rl=true --batched_sampling=true --batch_size=1024 --max_grad_norm=0 --learning_rate=0.0003033891184 --heartbeat_interval=10 --heartbeat_reporting_interval=60 --save_milestones_sec=1200 --num_epochs=4 --exploration_loss_coeff=0.0004677351413 --summaries_use_frameskip=False --with_wandb=true --wandb_user=matt-stammers --wandb_project=atari_APPO --wandb_group=atari_robotank --wandb_job_type=SF --wandb_tags=atari",
+  "cli_args": {
+    "algo": "APPO",
+    "env": "atari_robotank",
+    "experiment": "atari_robotank_APPO",
+    "train_dir": "./train_atari",
+    "restart_behavior": "resume",
+    "seed": 1234,
+    "num_policies": 2,
+    "async_rl": true,
+    "batched_sampling": true,
+    "worker_num_splits": 2,
+    "num_workers": 16,
+    "num_envs_per_worker": 8,
+    "batch_size": 1024,
+    "num_batches_per_epoch": 8,
+    "num_epochs": 4,
+    "exploration_loss_coeff": 0.0004677351413,
+    "max_grad_norm": 0.0,
+    "learning_rate": 0.0003033891184,
+    "summaries_use_frameskip": false,
+    "heartbeat_interval": 10,
+    "heartbeat_reporting_interval": 60,
+    "train_for_env_steps": 500000000,
+    "save_milestones_sec": 1200,
+    "with_wandb": true,
+    "wandb_user": "matt-stammers",
+    "wandb_project": "atari_APPO",
+    "wandb_group": "atari_robotank",
+    "wandb_job_type": "SF",
+    "wandb_tags": [
+      "atari"
+    ]
+  },
+  "git_hash": "5fff97c2f535da5987d358cdbe6927cccd43621e",
+  "git_repo_name": "not a git repository",
+  "wandb_unique_id": "atari_robotank_APPO_20231123_115710_641997"
+}
\ No newline at end of file
diff --git a/git.diff b/git.diff
new file mode 100644
index 0000000000000000000000000000000000000000..9cf0f6792bd54243f70657987210adf15e33d282
--- /dev/null
+++ b/git.diff
@@ -0,0 +1,3470 @@
+diff --git a/train_dir/Standup/.summary/0/events.out.tfevents.1695118333.rhmmedcatt-ProLiant-ML350-Gen10 b/train_dir/Standup/.summary/0/events.out.tfevents.1695118333.rhmmedcatt-ProLiant-ML350-Gen10
+deleted file mode 100644
+index ce9a72a..0000000
+Binary files a/train_dir/Standup/.summary/0/events.out.tfevents.1695118333.rhmmedcatt-ProLiant-ML350-Gen10 and /dev/null differ
+diff --git a/train_dir/Standup/.summary/0/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10 b/train_dir/Standup/.summary/0/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10
+deleted file mode 100644
+index 85ffbb3..0000000
+Binary files a/train_dir/Standup/.summary/0/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10 and /dev/null differ
+diff --git a/train_dir/Standup/.summary/0/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10 b/train_dir/Standup/.summary/0/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10
+deleted file mode 100644
+index 6b68289..0000000
+Binary files a/train_dir/Standup/.summary/0/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10 and /dev/null differ
+diff --git a/train_dir/Standup/.summary/1/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10 b/train_dir/Standup/.summary/1/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10
+deleted file mode 100644
+index 7e65434..0000000
+Binary files a/train_dir/Standup/.summary/1/events.out.tfevents.1695118395.rhmmedcatt-ProLiant-ML350-Gen10 and /dev/null differ
+diff --git a/train_dir/Standup/.summary/1/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10 b/train_dir/Standup/.summary/1/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10
+deleted file mode 100644
+index e2184e2..0000000
+Binary files a/train_dir/Standup/.summary/1/events.out.tfevents.1695118777.rhmmedcatt-ProLiant-ML350-Gen10 and /dev/null differ
+diff --git a/train_dir/Standup/README.md b/train_dir/Standup/README.md
+index 59b4eea..2dc15b6 100644
+--- a/train_dir/Standup/README.md
++++ b/train_dir/Standup/README.md
+@@ -5,7 +5,7 @@ tags:
+ - reinforcement-learning
+ - sample-factory
+ model-index:
+-- name: APPO
++- name: ATD3
+   results:
+   - task:
+       type: reinforcement-learning
+@@ -15,12 +15,12 @@ model-index:
+       type: mujoco_standup
+     metrics:
+     - type: mean_reward
+-      value: 160842.81 +/- 49335.32
++      value: 157750.89 +/- 30990.47
+       name: mean_reward
+       verified: false
+ ---
+ 
+-A(n) **APPO** model trained on the **mujoco_standup** environment.
++A(n) **ATD3** model trained on the **mujoco_standup** environment.
+ 
+ This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+ Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+@@ -30,7 +30,7 @@ Documentation for how to use Sample-Factory can be found at https://www.samplefa
+ 
+ After installing Sample-Factory, download the model with:
+ ```
+-python -m sample_factory.huggingface.load_from_hub -r MattStammers/appo-mujoco-Standup
++python -m sample_factory.huggingface.load_from_hub -r MattStammers/atd3-mujoco-standup
+ ```
+ 
+     
+@@ -38,7 +38,7 @@ python -m sample_factory.huggingface.load_from_hub -r MattStammers/appo-mujoco-S
+ 
+ To run the model after download, use the `enjoy` script corresponding to this environment:
+ ```
+-python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_standup --train_dir=./train_dir --experiment=appo-mujoco-Standup
++python -m sf_examples.mujoco.enjoy_mujoco --algo=ATD3 --env=mujoco_standup --train_dir=./train_dir --experiment=atd3-mujoco-standup
+ ```
+ 
+ 
+@@ -49,7 +49,7 @@ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+ 
+ To continue training with this model, use the `train` script corresponding to this environment:
+ ```
+-python -m sf_examples.mujoco.train_mujoco --algo=APPO --env=mujoco_standup --train_dir=./train_dir --experiment=appo-mujoco-Standup --restart_behavior=resume --train_for_env_steps=10000000000
++python -m sf_examples.mujoco.train_mujoco --algo=ATD3 --env=mujoco_standup --train_dir=./train_dir --experiment=atd3-mujoco-standup --restart_behavior=resume --train_for_env_steps=10000000000
+ ```
+ 
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
+diff --git a/train_dir/Standup/checkpoint_p0/best_000008160_4177920_reward_162764.036.pth b/train_dir/Standup/checkpoint_p0/best_000008160_4177920_reward_162764.036.pth
+deleted file mode 100644
+index 7b4f077..0000000
+Binary files a/train_dir/Standup/checkpoint_p0/best_000008160_4177920_reward_162764.036.pth and /dev/null differ
+diff --git a/train_dir/Standup/checkpoint_p0/checkpoint_000014336_7340032.pth b/train_dir/Standup/checkpoint_p0/checkpoint_000014336_7340032.pth
+deleted file mode 100644
+index d1f1336..0000000
+Binary files a/train_dir/Standup/checkpoint_p0/checkpoint_000014336_7340032.pth and /dev/null differ
+diff --git a/train_dir/Standup/checkpoint_p0/checkpoint_000014408_7376896.pth b/train_dir/Standup/checkpoint_p0/checkpoint_000014408_7376896.pth
+deleted file mode 100644
+index f4340e0..0000000
+Binary files a/train_dir/Standup/checkpoint_p0/checkpoint_000014408_7376896.pth and /dev/null differ
+diff --git a/train_dir/Standup/checkpoint_p1/best_000013232_6774784_reward_164168.870.pth b/train_dir/Standup/checkpoint_p1/best_000013232_6774784_reward_164168.870.pth
+deleted file mode 100644
+index af623df..0000000
+Binary files a/train_dir/Standup/checkpoint_p1/best_000013232_6774784_reward_164168.870.pth and /dev/null differ
+diff --git a/train_dir/Standup/checkpoint_p1/checkpoint_000014296_7319552.pth b/train_dir/Standup/checkpoint_p1/checkpoint_000014296_7319552.pth
+deleted file mode 100644
+index f875926..0000000
+Binary files a/train_dir/Standup/checkpoint_p1/checkpoint_000014296_7319552.pth and /dev/null differ
+diff --git a/train_dir/Standup/checkpoint_p1/checkpoint_000014368_7356416.pth b/train_dir/Standup/checkpoint_p1/checkpoint_000014368_7356416.pth
+deleted file mode 100644
+index 0916341..0000000
+Binary files a/train_dir/Standup/checkpoint_p1/checkpoint_000014368_7356416.pth and /dev/null differ
+diff --git a/train_dir/Standup/config.json b/train_dir/Standup/config.json
+index 638783d..22fa3bb 100644
+--- a/train_dir/Standup/config.json
++++ b/train_dir/Standup/config.json
+@@ -1,10 +1,10 @@
+ {
+   "help": false,
+-  "algo": "APPO",
++  "algo": "ATD3",
+   "env": "mujoco_standup",
+   "experiment": "Standup",
+   "train_dir": "./train_dir",
+-  "restart_behavior": "resume",
++  "restart_behavior": "restart",
+   "device": "gpu",
+   "seed": null,
+   "num_policies": 2,
+@@ -104,8 +104,8 @@
+   "use_record_episode_statistics": false,
+   "with_wandb": true,
+   "wandb_user": "matt-stammers",
+-  "wandb_project": "sample_factory",
+-  "wandb_group": "mujoco_standup",
++  "wandb_project": "mujoco",
++  "wandb_group": "mujoco_standup3",
+   "wandb_job_type": "SF",
+   "wandb_tags": [
+     "mujoco"
+diff --git a/train_dir/Standup/replay.mp4 b/train_dir/Standup/replay.mp4
+index 51d7026..f9ddacb 100644
+Binary files a/train_dir/Standup/replay.mp4 and b/train_dir/Standup/replay.mp4 differ
+diff --git a/train_dir/Standup/sf_log.txt b/train_dir/Standup/sf_log.txt
+index c3ddd83..0d60e70 100644
+--- a/train_dir/Standup/sf_log.txt
++++ b/train_dir/Standup/sf_log.txt
+@@ -1,46 +1,48 @@
+-[2023-09-19 11:12:17,416][35316] Saving configuration to ./train_dir/Standup/config.json...
+-[2023-09-19 11:12:17,417][35316] Rollout worker 0 uses device cpu
+-[2023-09-19 11:12:17,418][35316] Rollout worker 1 uses device cpu
+-[2023-09-19 11:12:17,418][35316] Rollout worker 2 uses device cpu
+-[2023-09-19 11:12:17,418][35316] Rollout worker 3 uses device cpu
+-[2023-09-19 11:12:17,418][35316] Rollout worker 4 uses device cpu
+-[2023-09-19 11:12:17,419][35316] Rollout worker 5 uses device cpu
+-[2023-09-19 11:12:17,419][35316] Rollout worker 6 uses device cpu
+-[2023-09-19 11:12:17,419][35316] Rollout worker 7 uses device cpu
+-[2023-09-19 11:12:17,419][35316] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
+-[2023-09-19 11:12:17,463][35316] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:12:17,463][35316] InferenceWorker_p0-w0: min num requests: 2
+-[2023-09-19 11:12:17,487][35316] Starting all processes...
+-[2023-09-19 11:12:17,488][35316] Starting process learner_proc0
+-[2023-09-19 11:12:17,492][35316] Starting all processes...
+-[2023-09-19 11:12:17,504][35316] Starting process inference_proc0-0
+-[2023-09-19 11:12:17,504][35316] Starting process rollout_proc0
+-[2023-09-19 11:12:17,505][35316] Starting process rollout_proc1
+-[2023-09-19 11:12:17,505][35316] Starting process rollout_proc2
+-[2023-09-19 11:12:17,507][35316] Starting process rollout_proc3
+-[2023-09-19 11:12:17,507][35316] Starting process rollout_proc4
+-[2023-09-19 11:12:17,508][35316] Starting process rollout_proc5
+-[2023-09-19 11:12:17,508][35316] Starting process rollout_proc6
+-[2023-09-19 11:12:17,508][35316] Starting process rollout_proc7
+-[2023-09-19 11:12:19,355][36026] Worker 6 uses CPU cores [24, 25, 26, 27]
+-[2023-09-19 11:12:19,356][36006] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:12:19,356][36006] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+-[2023-09-19 11:12:19,370][36022] Worker 0 uses CPU cores [0, 1, 2, 3]
+-[2023-09-19 11:12:19,373][36023] Worker 5 uses CPU cores [20, 21, 22, 23]
+-[2023-09-19 11:12:19,378][36006] Num visible devices: 1
+-[2023-09-19 11:12:19,396][36020] Worker 1 uses CPU cores [4, 5, 6, 7]
+-[2023-09-19 11:12:19,404][36021] Worker 2 uses CPU cores [8, 9, 10, 11]
+-[2023-09-19 11:12:19,410][36027] Worker 4 uses CPU cores [16, 17, 18, 19]
+-[2023-09-19 11:12:19,436][36025] Worker 7 uses CPU cores [28, 29, 30, 31]
+-[2023-09-19 11:12:19,436][36006] Starting seed is not provided
+-[2023-09-19 11:12:19,436][36006] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:12:19,436][36006] Initializing actor-critic model on device cuda:0
+-[2023-09-19 11:12:19,437][36006] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:12:19,437][36006] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:12:19,528][36019] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:12:19,529][36019] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+-[2023-09-19 11:12:19,530][36006] Created Actor Critic model with architecture:
+-[2023-09-19 11:12:19,530][36006] ActorCriticSharedWeights(
++[2023-09-21 15:10:43,177][99566] Saving configuration to ./train_dir/Standup/config.json...
++[2023-09-21 15:10:43,343][99566] Rollout worker 0 uses device cpu
++[2023-09-21 15:10:43,344][99566] Rollout worker 1 uses device cpu
++[2023-09-21 15:10:43,345][99566] Rollout worker 2 uses device cpu
++[2023-09-21 15:10:43,345][99566] Rollout worker 3 uses device cpu
++[2023-09-21 15:10:43,346][99566] Rollout worker 4 uses device cpu
++[2023-09-21 15:10:43,346][99566] Rollout worker 5 uses device cpu
++[2023-09-21 15:10:43,346][99566] Rollout worker 6 uses device cpu
++[2023-09-21 15:10:43,347][99566] Rollout worker 7 uses device cpu
++[2023-09-21 15:10:43,347][99566] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
++[2023-09-21 15:10:43,408][99566] Using GPUs [0] for process 0 (actually maps to GPUs [0])
++[2023-09-21 15:10:43,408][99566] InferenceWorker_p0-w0: min num requests: 1
++[2023-09-21 15:10:43,411][99566] Using GPUs [1] for process 1 (actually maps to GPUs [1])
++[2023-09-21 15:10:43,412][99566] InferenceWorker_p1-w0: min num requests: 1
++[2023-09-21 15:10:43,436][99566] Starting all processes...
++[2023-09-21 15:10:43,437][99566] Starting process learner_proc0
++[2023-09-21 15:10:43,439][99566] Starting process learner_proc1
++[2023-09-21 15:10:43,486][99566] Starting all processes...
++[2023-09-21 15:10:43,493][99566] Starting process inference_proc0-0
++[2023-09-21 15:10:43,493][99566] Starting process inference_proc1-0
++[2023-09-21 15:10:43,494][99566] Starting process rollout_proc0
++[2023-09-21 15:10:43,494][99566] Starting process rollout_proc1
++[2023-09-21 15:10:43,494][99566] Starting process rollout_proc2
++[2023-09-21 15:10:43,495][99566] Starting process rollout_proc3
++[2023-09-21 15:10:43,495][99566] Starting process rollout_proc4
++[2023-09-21 15:10:43,505][99566] Starting process rollout_proc5
++[2023-09-21 15:10:43,508][99566] Starting process rollout_proc6
++[2023-09-21 15:10:43,514][99566] Starting process rollout_proc7
++[2023-09-21 15:10:45,312][101035] Using GPUs [1] for process 1 (actually maps to GPUs [1])
++[2023-09-21 15:10:45,312][101035] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for learning process 1
++[2023-09-21 15:10:45,328][101117] Using GPUs [1] for process 1 (actually maps to GPUs [1])
++[2023-09-21 15:10:45,328][101117] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for inference process 1
++[2023-09-21 15:10:45,330][101035] Num visible devices: 1
++[2023-09-21 15:10:45,346][101117] Num visible devices: 1
++[2023-09-21 15:10:45,369][101035] Starting seed is not provided
++[2023-09-21 15:10:45,370][101035] Using GPUs [0] for process 1 (actually maps to GPUs [1])
++[2023-09-21 15:10:45,370][101035] Initializing actor-critic model on device cuda:0
++[2023-09-21 15:10:45,370][101035] RunningMeanStd input shape: (376,)
++[2023-09-21 15:10:45,371][101035] RunningMeanStd input shape: (1,)
++[2023-09-21 15:10:45,373][101122] Worker 4 uses CPU cores [16, 17, 18, 19]
++[2023-09-21 15:10:45,392][101119] Worker 2 uses CPU cores [8, 9, 10, 11]
++[2023-09-21 15:10:45,415][101120] Worker 3 uses CPU cores [12, 13, 14, 15]
++[2023-09-21 15:10:45,415][101121] Worker 5 uses CPU cores [20, 21, 22, 23]
++[2023-09-21 15:10:45,421][101035] Created Actor Critic model with architecture:
++[2023-09-21 15:10:45,421][101035] ActorCriticSharedWeights(
+   (obs_normalizer): ObservationNormalizer(
+     (running_mean_std): RunningMeanStdDictInPlace(
+       (running_mean_std): ModuleDict(
+@@ -71,167 +73,21 @@
+     (distribution_linear): Linear(in_features=64, out_features=17, bias=True)
+   )
+ )
+-[2023-09-19 11:12:19,571][36019] Num visible devices: 1
+-[2023-09-19 11:12:19,598][36024] Worker 3 uses CPU cores [12, 13, 14, 15]
+-[2023-09-19 11:12:20,100][36006] Using optimizer <class 'torch.optim.adam.Adam'>
+-[2023-09-19 11:12:20,101][36006] No checkpoints found
+-[2023-09-19 11:12:20,101][36006] Did not load from checkpoint, starting from scratch!
+-[2023-09-19 11:12:20,101][36006] Initialized policy 0 weights for model version 0
+-[2023-09-19 11:12:20,103][36006] LearnerWorker_p0 finished initialization!
+-[2023-09-19 11:12:20,103][36006] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:12:20,715][36019] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:12:20,716][36019] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:12:20,748][35316] Inference worker 0-0 is ready!
+-[2023-09-19 11:12:20,749][35316] All inference workers are ready! Signal rollout workers to start!
+-[2023-09-19 11:12:20,854][36024] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,855][36024] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,857][36025] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,858][36025] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,858][36026] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,858][36021] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,859][36026] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,859][36021] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,872][36027] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,873][36027] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,881][36022] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,882][36022] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,899][36020] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,899][36023] Decorrelating experience for 0 frames...
+-[2023-09-19 11:12:20,900][36023] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,900][36020] Decorrelating experience for 64 frames...
+-[2023-09-19 11:12:20,908][36024] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,910][36026] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,913][36025] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,915][36021] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,926][36027] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,938][36022] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,984][36023] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:20,986][36020] Decorrelating experience for 128 frames...
+-[2023-09-19 11:12:21,016][36026] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,017][36024] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,018][36021] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,021][36025] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,033][36027] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,055][36022] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,144][36023] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,150][36020] Decorrelating experience for 192 frames...
+-[2023-09-19 11:12:21,191][36021] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,191][36024] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,198][36025] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,200][36026] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,204][36027] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,236][36022] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,312][36023] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,320][36020] Decorrelating experience for 256 frames...
+-[2023-09-19 11:12:21,387][36021] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,402][36024] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,408][36027] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,411][36025] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,449][36026] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,456][36022] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,516][36023] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,526][36020] Decorrelating experience for 320 frames...
+-[2023-09-19 11:12:21,639][36021] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,667][36024] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,672][36027] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,679][36025] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,690][36026] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,727][36022] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,766][36020] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,777][36023] Decorrelating experience for 384 frames...
+-[2023-09-19 11:12:21,966][36021] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:21,974][36027] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:21,978][36024] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:21,991][36025] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:22,000][36026] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:22,066][36022] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:22,100][36020] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:22,150][36023] Decorrelating experience for 448 frames...
+-[2023-09-19 11:12:23,541][35316] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+-[2023-09-19 11:12:28,541][35316] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 16384. Throughput: 0: 2365.6. Samples: 11828. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:12:28,544][36006] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000032_16384.pth...
+-[2023-09-19 11:12:29,287][35316] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 35316], exiting...
+-[2023-09-19 11:12:29,288][35316] Runner profile tree view:
+-main_loop: 11.8008
+-[2023-09-19 11:12:29,288][35316] Collected {0: 20480}, FPS: 1735.5
+-[2023-09-19 11:12:29,288][36006] Stopping Batcher_0...
+-[2023-09-19 11:12:29,289][36026] Stopping RolloutWorker_w6...
+-[2023-09-19 11:12:29,290][36026] Loop rollout_proc6_evt_loop terminating...
+-[2023-09-19 11:12:29,289][36006] Loop batcher_evt_loop terminating...
+-[2023-09-19 11:12:29,290][36027] Stopping RolloutWorker_w4...
+-[2023-09-19 11:12:29,290][36027] Loop rollout_proc4_evt_loop terminating...
+-[2023-09-19 11:12:29,290][36020] Stopping RolloutWorker_w1...
+-[2023-09-19 11:12:29,290][36023] Stopping RolloutWorker_w5...
+-[2023-09-19 11:12:29,290][36020] Loop rollout_proc1_evt_loop terminating...
+-[2023-09-19 11:12:29,290][36023] Loop rollout_proc5_evt_loop terminating...
+-[2023-09-19 11:12:29,290][36006] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000040_20480.pth...
+-[2023-09-19 11:12:29,291][36022] Stopping RolloutWorker_w0...
+-[2023-09-19 11:12:29,291][36022] Loop rollout_proc0_evt_loop terminating...
+-[2023-09-19 11:12:29,291][36024] Stopping RolloutWorker_w3...
+-[2023-09-19 11:12:29,292][36024] Loop rollout_proc3_evt_loop terminating...
+-[2023-09-19 11:12:29,292][36021] Stopping RolloutWorker_w2...
+-[2023-09-19 11:12:29,292][36021] Loop rollout_proc2_evt_loop terminating...
+-[2023-09-19 11:12:29,294][36025] Stopping RolloutWorker_w7...
+-[2023-09-19 11:12:29,295][36025] Loop rollout_proc7_evt_loop terminating...
+-[2023-09-19 11:12:29,299][36006] Stopping LearnerWorker_p0...
+-[2023-09-19 11:12:29,300][36006] Loop learner_proc0_evt_loop terminating...
+-[2023-09-19 11:12:29,303][36019] Weights refcount: 2 0
+-[2023-09-19 11:12:29,304][36019] Stopping InferenceWorker_p0-w0...
+-[2023-09-19 11:12:29,304][36019] Loop inference_proc0-0_evt_loop terminating...
+-[2023-09-19 11:13:18,923][40303] Saving configuration to ./train_dir/Standup/config.json...
+-[2023-09-19 11:13:18,925][40303] Rollout worker 0 uses device cpu
+-[2023-09-19 11:13:18,926][40303] Rollout worker 1 uses device cpu
+-[2023-09-19 11:13:18,926][40303] Rollout worker 2 uses device cpu
+-[2023-09-19 11:13:18,927][40303] Rollout worker 3 uses device cpu
+-[2023-09-19 11:13:18,928][40303] Rollout worker 4 uses device cpu
+-[2023-09-19 11:13:18,928][40303] Rollout worker 5 uses device cpu
+-[2023-09-19 11:13:18,929][40303] Rollout worker 6 uses device cpu
+-[2023-09-19 11:13:18,929][40303] Rollout worker 7 uses device cpu
+-[2023-09-19 11:13:18,930][40303] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
+-[2023-09-19 11:13:18,986][40303] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:13:18,986][40303] InferenceWorker_p0-w0: min num requests: 1
+-[2023-09-19 11:13:18,990][40303] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:13:18,990][40303] InferenceWorker_p1-w0: min num requests: 1
+-[2023-09-19 11:13:19,015][40303] Starting all processes...
+-[2023-09-19 11:13:19,015][40303] Starting process learner_proc0
+-[2023-09-19 11:13:19,018][40303] Starting process learner_proc1
+-[2023-09-19 11:13:19,065][40303] Starting all processes...
+-[2023-09-19 11:13:19,071][40303] Starting process inference_proc0-0
+-[2023-09-19 11:13:19,071][40303] Starting process inference_proc1-0
+-[2023-09-19 11:13:19,071][40303] Starting process rollout_proc0
+-[2023-09-19 11:13:19,071][40303] Starting process rollout_proc1
+-[2023-09-19 11:13:19,072][40303] Starting process rollout_proc2
+-[2023-09-19 11:13:19,072][40303] Starting process rollout_proc3
+-[2023-09-19 11:13:19,073][40303] Starting process rollout_proc4
+-[2023-09-19 11:13:19,074][40303] Starting process rollout_proc5
+-[2023-09-19 11:13:19,080][40303] Starting process rollout_proc6
+-[2023-09-19 11:13:19,081][40303] Starting process rollout_proc7
+-[2023-09-19 11:13:21,055][41278] Worker 2 uses CPU cores [8, 9, 10, 11]
+-[2023-09-19 11:13:21,063][41246] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:13:21,063][41246] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+-[2023-09-19 11:13:21,068][41284] Worker 3 uses CPU cores [12, 13, 14, 15]
+-[2023-09-19 11:13:21,080][41271] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:13:21,080][41271] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for inference process 1
+-[2023-09-19 11:13:21,083][41246] Num visible devices: 1
+-[2023-09-19 11:13:21,088][41292] Worker 5 uses CPU cores [20, 21, 22, 23]
+-[2023-09-19 11:13:21,100][41271] Num visible devices: 1
+-[2023-09-19 11:13:21,157][41272] Worker 0 uses CPU cores [0, 1, 2, 3]
+-[2023-09-19 11:13:21,187][41276] Worker 1 uses CPU cores [4, 5, 6, 7]
+-[2023-09-19 11:13:21,291][41291] Worker 7 uses CPU cores [28, 29, 30, 31]
+-[2023-09-19 11:13:21,319][41290] Worker 6 uses CPU cores [24, 25, 26, 27]
+-[2023-09-19 11:13:21,326][41287] Worker 4 uses CPU cores [16, 17, 18, 19]
+-[2023-09-19 11:13:21,373][41187] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:13:21,373][41187] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+-[2023-09-19 11:13:21,391][41187] Num visible devices: 1
+-[2023-09-19 11:13:21,412][41187] Starting seed is not provided
+-[2023-09-19 11:13:21,412][41187] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:13:21,412][41187] Initializing actor-critic model on device cuda:0
+-[2023-09-19 11:13:21,413][41187] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:13:21,413][41187] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:13:21,450][41188] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:13:21,450][41188] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for learning process 1
+-[2023-09-19 11:13:21,461][41187] Created Actor Critic model with architecture:
+-[2023-09-19 11:13:21,462][41187] ActorCriticSharedWeights(
++[2023-09-21 15:10:45,459][101034] Using GPUs [0] for process 0 (actually maps to GPUs [0])
++[2023-09-21 15:10:45,459][101034] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
++[2023-09-21 15:10:45,470][101124] Worker 7 uses CPU cores [28, 29, 30, 31]
++[2023-09-21 15:10:45,484][101123] Worker 6 uses CPU cores [24, 25, 26, 27]
++[2023-09-21 15:10:45,490][101034] Num visible devices: 1
++[2023-09-21 15:10:45,528][101034] Starting seed is not provided
++[2023-09-21 15:10:45,528][101034] Using GPUs [0] for process 0 (actually maps to GPUs [0])
++[2023-09-21 15:10:45,528][101034] Initializing actor-critic model on device cuda:0
++[2023-09-21 15:10:45,529][101034] RunningMeanStd input shape: (376,)
++[2023-09-21 15:10:45,530][101034] RunningMeanStd input shape: (1,)
++[2023-09-21 15:10:45,552][101115] Using GPUs [0] for process 0 (actually maps to GPUs [0])
++[2023-09-21 15:10:45,552][101115] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
++[2023-09-21 15:10:45,570][101115] Num visible devices: 1
++[2023-09-21 15:10:45,580][101034] Created Actor Critic model with architecture:
++[2023-09-21 15:10:45,581][101034] ActorCriticSharedWeights(
+   (obs_normalizer): ObservationNormalizer(
+     (running_mean_std): RunningMeanStdDictInPlace(
+       (running_mean_std): ModuleDict(
+@@ -262,2573 +118,537 @@ main_loop: 11.8008
+     (distribution_linear): Linear(in_features=64, out_features=17, bias=True)
+   )
+ )
+-[2023-09-19 11:13:21,478][41188] Num visible devices: 1
+-[2023-09-19 11:13:21,500][41188] Starting seed is not provided
+-[2023-09-19 11:13:21,500][41188] Using GPUs [0] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:13:21,500][41188] Initializing actor-critic model on device cuda:0
+-[2023-09-19 11:13:21,501][41188] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:13:21,501][41188] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:13:21,548][41188] Created Actor Critic model with architecture:
+-[2023-09-19 11:13:21,548][41188] ActorCriticSharedWeights(
+-  (obs_normalizer): ObservationNormalizer(
+-    (running_mean_std): RunningMeanStdDictInPlace(
+-      (running_mean_std): ModuleDict(
+-        (obs): RunningMeanStdInPlace()
+-      )
+-    )
+-  )
+-  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+-  (encoder): MultiInputEncoder(
+-    (encoders): ModuleDict(
+-      (obs): MlpEncoder(
+-        (mlp_head): RecursiveScriptModule(
+-          original_name=Sequential
+-          (0): RecursiveScriptModule(original_name=Linear)
+-          (1): RecursiveScriptModule(original_name=Tanh)
+-          (2): RecursiveScriptModule(original_name=Linear)
+-          (3): RecursiveScriptModule(original_name=Tanh)
+-        )
+-      )
+-    )
+-  )
+-  (core): ModelCoreIdentity()
+-  (decoder): MlpDecoder(
+-    (mlp): Identity()
+-  )
+-  (critic_linear): Linear(in_features=64, out_features=1, bias=True)
+-  (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
+-    (distribution_linear): Linear(in_features=64, out_features=17, bias=True)
+-  )
+-)
+-[2023-09-19 11:13:22,080][41187] Using optimizer <class 'torch.optim.adam.Adam'>
+-[2023-09-19 11:13:22,081][41187] Loading state from checkpoint ./train_dir/Standup/checkpoint_p0/checkpoint_000000040_20480.pth...
+-[2023-09-19 11:13:22,087][41187] Loading model from checkpoint
+-[2023-09-19 11:13:22,089][41187] Loaded experiment state at self.train_step=40, self.env_steps=20480
+-[2023-09-19 11:13:22,090][41187] Initialized policy 0 weights for model version 40
+-[2023-09-19 11:13:22,091][41187] LearnerWorker_p0 finished initialization!
+-[2023-09-19 11:13:22,092][41187] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:13:22,122][41188] Using optimizer <class 'torch.optim.adam.Adam'>
+-[2023-09-19 11:13:22,123][41188] No checkpoints found
+-[2023-09-19 11:13:22,123][41188] Did not load from checkpoint, starting from scratch!
+-[2023-09-19 11:13:22,124][41188] Initialized policy 1 weights for model version 0
+-[2023-09-19 11:13:22,142][41188] LearnerWorker_p1 finished initialization!
+-[2023-09-19 11:13:22,142][41188] Using GPUs [0] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:13:22,704][41246] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:13:22,705][41246] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:13:22,718][41271] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:13:22,718][41271] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:13:22,737][40303] Inference worker 0-0 is ready!
+-[2023-09-19 11:13:22,750][40303] Inference worker 1-0 is ready!
+-[2023-09-19 11:13:22,751][40303] All inference workers are ready! Signal rollout workers to start!
+-[2023-09-19 11:13:22,845][41278] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,846][41278] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,852][41290] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,853][41290] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,872][41287] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,873][41287] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,886][41276] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,885][41272] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,886][41276] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,886][41272] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,893][41292] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,894][41292] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,899][41278] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,905][41291] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,905][41284] Decorrelating experience for 0 frames...
+-[2023-09-19 11:13:22,907][41284] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,907][41291] Decorrelating experience for 64 frames...
+-[2023-09-19 11:13:22,907][41290] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,939][41276] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,944][41287] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,957][41292] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,974][41272] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:22,990][41291] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:23,000][41284] Decorrelating experience for 128 frames...
+-[2023-09-19 11:13:23,003][41278] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,051][41290] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,056][41287] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,057][41292] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,102][41276] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,147][41272] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,170][41291] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,177][41278] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,180][41284] Decorrelating experience for 192 frames...
+-[2023-09-19 11:13:23,229][41292] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,230][41287] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,248][41290] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,374][41276] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,378][41278] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,426][41287] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,428][41272] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,429][41292] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,433][41291] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,434][41284] Decorrelating experience for 256 frames...
+-[2023-09-19 11:13:23,481][41290] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,626][41291] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,675][41278] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,680][41284] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,685][41292] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,686][41287] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,711][41276] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,748][41272] Decorrelating experience for 320 frames...
+-[2023-09-19 11:13:23,770][41290] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,868][41291] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,948][41284] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:23,986][41278] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:23,993][41292] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:23,995][41287] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:24,071][41290] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:24,113][41276] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:24,140][41272] Decorrelating experience for 384 frames...
+-[2023-09-19 11:13:24,170][41291] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:24,250][41284] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:24,433][41276] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:24,471][41272] Decorrelating experience for 448 frames...
+-[2023-09-19 11:13:25,197][40303] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 20480. Throughput: 0: nan, 1: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+-[2023-09-19 11:13:30,198][40303] Fps is (10 sec: 3276.7, 60 sec: 3276.7, 300 sec: 3276.7). Total num frames: 36864. Throughput: 0: 1638.4, 1: 1638.4. Samples: 16384. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:13:30,200][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000056_28672.pth...
+-[2023-09-19 11:13:30,201][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000016_8192.pth...
+-[2023-09-19 11:13:30,211][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000032_16384.pth
+-[2023-09-19 11:13:35,197][40303] Fps is (10 sec: 4915.2, 60 sec: 4915.2, 300 sec: 4915.2). Total num frames: 69632. Throughput: 0: 2621.6, 1: 2627.4. Samples: 52490. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:13:35,198][40303] Avg episode reward: [(0, '31454.083'), (1, '27038.960')]
+-[2023-09-19 11:13:38,652][41271] Updated weights for policy 1, policy_version 80 (0.0016)
+-[2023-09-19 11:13:38,652][41246] Updated weights for policy 0, policy_version 120 (0.0015)
+-[2023-09-19 11:13:38,973][40303] Heartbeat connected on Batcher_0
+-[2023-09-19 11:13:38,976][40303] Heartbeat connected on LearnerWorker_p0
+-[2023-09-19 11:13:38,979][40303] Heartbeat connected on Batcher_1
+-[2023-09-19 11:13:38,982][40303] Heartbeat connected on LearnerWorker_p1
+-[2023-09-19 11:13:38,989][40303] Heartbeat connected on InferenceWorker_p0-w0
+-[2023-09-19 11:13:38,992][40303] Heartbeat connected on InferenceWorker_p1-w0
+-[2023-09-19 11:13:38,998][40303] Heartbeat connected on RolloutWorker_w0
+-[2023-09-19 11:13:39,001][40303] Heartbeat connected on RolloutWorker_w1
+-[2023-09-19 11:13:39,004][40303] Heartbeat connected on RolloutWorker_w2
+-[2023-09-19 11:13:39,006][40303] Heartbeat connected on RolloutWorker_w3
+-[2023-09-19 11:13:39,008][40303] Heartbeat connected on RolloutWorker_w4
+-[2023-09-19 11:13:39,014][40303] Heartbeat connected on RolloutWorker_w5
+-[2023-09-19 11:13:39,018][40303] Heartbeat connected on RolloutWorker_w7
+-[2023-09-19 11:13:39,018][40303] Heartbeat connected on RolloutWorker_w6
+-[2023-09-19 11:13:40,198][40303] Fps is (10 sec: 7372.8, 60 sec: 6007.4, 300 sec: 6007.4). Total num frames: 110592. Throughput: 0: 2508.5, 1: 2511.7. Samples: 75304. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:13:40,199][40303] Avg episode reward: [(0, '34634.079'), (1, '30037.595')]
+-[2023-09-19 11:13:45,197][40303] Fps is (10 sec: 7372.8, 60 sec: 6144.0, 300 sec: 6144.0). Total num frames: 143360. Throughput: 0: 2989.8, 1: 2992.6. Samples: 119648. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:13:45,198][40303] Avg episode reward: [(0, '43292.280'), (1, '42584.158')]
+-[2023-09-19 11:13:45,201][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000160_81920.pth...
+-[2023-09-19 11:13:45,201][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000120_61440.pth...
+-[2023-09-19 11:13:45,207][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000040_20480.pth
+-[2023-09-19 11:13:49,800][41271] Updated weights for policy 1, policy_version 160 (0.0015)
+-[2023-09-19 11:13:49,801][41246] Updated weights for policy 0, policy_version 200 (0.0014)
+-[2023-09-19 11:13:50,198][40303] Fps is (10 sec: 7372.8, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 184320. Throughput: 0: 3276.9, 1: 3276.9. Samples: 163844. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:13:50,199][40303] Avg episode reward: [(0, '43292.280'), (1, '44801.351')]
+-[2023-09-19 11:13:50,200][41187] Saving new best policy, reward=43292.280!
+-[2023-09-19 11:13:55,197][40303] Fps is (10 sec: 7372.8, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 217088. Throughput: 0: 3089.5, 1: 3091.3. Samples: 185422. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:13:55,198][40303] Avg episode reward: [(0, '50284.168'), (1, '51051.259')]
+-[2023-09-19 11:13:55,199][41187] Saving new best policy, reward=50284.168!
+-[2023-09-19 11:14:00,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 249856. Throughput: 0: 3196.1, 1: 3197.6. Samples: 223782. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:14:00,199][40303] Avg episode reward: [(0, '53046.991'), (1, '53883.351')]
+-[2023-09-19 11:14:00,208][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000264_135168.pth...
+-[2023-09-19 11:14:00,208][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000224_114688.pth...
+-[2023-09-19 11:14:00,217][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000016_8192.pth
+-[2023-09-19 11:14:00,217][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000056_28672.pth
+-[2023-09-19 11:14:00,218][41188] Saving new best policy, reward=53883.351!
+-[2023-09-19 11:14:00,218][41187] Saving new best policy, reward=53046.991!
+-[2023-09-19 11:14:01,592][41271] Updated weights for policy 1, policy_version 240 (0.0012)
+-[2023-09-19 11:14:01,592][41246] Updated weights for policy 0, policy_version 280 (0.0013)
+-[2023-09-19 11:14:05,198][40303] Fps is (10 sec: 7372.7, 60 sec: 6758.4, 300 sec: 6758.4). Total num frames: 290816. Throughput: 0: 3357.1, 1: 3358.6. Samples: 268630. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:14:05,199][40303] Avg episode reward: [(0, '62279.408'), (1, '63904.672')]
+-[2023-09-19 11:14:05,200][41187] Saving new best policy, reward=62279.408!
+-[2023-09-19 11:14:05,200][41188] Saving new best policy, reward=63904.672!
+-[2023-09-19 11:14:10,197][40303] Fps is (10 sec: 7372.9, 60 sec: 6735.6, 300 sec: 6735.6). Total num frames: 323584. Throughput: 0: 3232.0, 1: 3233.1. Samples: 290930. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:10,198][40303] Avg episode reward: [(0, '66478.184'), (1, '70755.397')]
+-[2023-09-19 11:14:10,199][41188] Saving new best policy, reward=70755.397!
+-[2023-09-19 11:14:10,199][41187] Saving new best policy, reward=66478.184!
+-[2023-09-19 11:14:12,751][41271] Updated weights for policy 1, policy_version 320 (0.0010)
+-[2023-09-19 11:14:12,752][41246] Updated weights for policy 0, policy_version 360 (0.0015)
+-[2023-09-19 11:14:15,197][40303] Fps is (10 sec: 7372.9, 60 sec: 6881.3, 300 sec: 6881.3). Total num frames: 364544. Throughput: 0: 3545.5, 1: 3546.1. Samples: 335508. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:15,198][40303] Avg episode reward: [(0, '69427.017'), (1, '72462.244')]
+-[2023-09-19 11:14:15,204][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000376_192512.pth...
+-[2023-09-19 11:14:15,204][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000336_172032.pth...
+-[2023-09-19 11:14:15,208][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000160_81920.pth
+-[2023-09-19 11:14:15,209][41187] Saving new best policy, reward=69427.017!
+-[2023-09-19 11:14:15,211][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000120_61440.pth
+-[2023-09-19 11:14:15,211][41188] Saving new best policy, reward=72462.244!
+-[2023-09-19 11:14:20,197][40303] Fps is (10 sec: 7372.9, 60 sec: 6851.5, 300 sec: 6851.5). Total num frames: 397312. Throughput: 0: 3635.0, 1: 3635.0. Samples: 379638. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:20,198][40303] Avg episode reward: [(0, '73711.169'), (1, '77171.232')]
+-[2023-09-19 11:14:20,199][41187] Saving new best policy, reward=73711.169!
+-[2023-09-19 11:14:20,199][41188] Saving new best policy, reward=77171.232!
+-[2023-09-19 11:14:23,933][41246] Updated weights for policy 0, policy_version 440 (0.0013)
+-[2023-09-19 11:14:23,934][41271] Updated weights for policy 1, policy_version 400 (0.0011)
+-[2023-09-19 11:14:25,197][40303] Fps is (10 sec: 6553.6, 60 sec: 6826.7, 300 sec: 6826.7). Total num frames: 430080. Throughput: 0: 3632.2, 1: 3632.4. Samples: 402210. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:25,198][40303] Avg episode reward: [(0, '73688.883'), (1, '77171.232')]
+-[2023-09-19 11:14:30,198][40303] Fps is (10 sec: 7372.7, 60 sec: 7236.3, 300 sec: 6931.7). Total num frames: 471040. Throughput: 0: 3606.1, 1: 3606.2. Samples: 444202. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:30,198][40303] Avg episode reward: [(0, '76128.666'), (1, '83391.647')]
+-[2023-09-19 11:14:30,204][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000480_245760.pth...
+-[2023-09-19 11:14:30,205][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000440_225280.pth...
+-[2023-09-19 11:14:30,211][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000264_135168.pth
+-[2023-09-19 11:14:30,212][41187] Saving new best policy, reward=76128.666!
+-[2023-09-19 11:14:30,213][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000224_114688.pth
+-[2023-09-19 11:14:30,214][41188] Saving new best policy, reward=83391.647!
+-[2023-09-19 11:14:35,005][41271] Updated weights for policy 1, policy_version 480 (0.0015)
+-[2023-09-19 11:14:35,006][41246] Updated weights for policy 0, policy_version 520 (0.0013)
+-[2023-09-19 11:14:35,198][40303] Fps is (10 sec: 8191.9, 60 sec: 7372.8, 300 sec: 7021.7). Total num frames: 512000. Throughput: 0: 3626.8, 1: 3628.0. Samples: 490312. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:14:35,199][40303] Avg episode reward: [(0, '76490.813'), (1, '86333.937')]
+-[2023-09-19 11:14:35,200][41187] Saving new best policy, reward=76490.813!
+-[2023-09-19 11:14:35,200][41188] Saving new best policy, reward=86333.937!
+-[2023-09-19 11:14:40,198][40303] Fps is (10 sec: 7372.6, 60 sec: 7236.2, 300 sec: 6990.5). Total num frames: 544768. Throughput: 0: 3630.3, 1: 3630.3. Samples: 512152. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:40,199][40303] Avg episode reward: [(0, '78215.963'), (1, '88825.825')]
+-[2023-09-19 11:14:40,201][41187] Saving new best policy, reward=78215.963!
+-[2023-09-19 11:14:40,201][41188] Saving new best policy, reward=88825.825!
+-[2023-09-19 11:14:45,197][40303] Fps is (10 sec: 7372.9, 60 sec: 7372.8, 300 sec: 7065.6). Total num frames: 585728. Throughput: 0: 3707.3, 1: 3707.2. Samples: 557434. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:14:45,198][40303] Avg episode reward: [(0, '80179.767'), (1, '93784.048')]
+-[2023-09-19 11:14:45,208][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000552_282624.pth...
+-[2023-09-19 11:14:45,207][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000592_303104.pth...
+-[2023-09-19 11:14:45,216][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000336_172032.pth
+-[2023-09-19 11:14:45,216][41188] Saving new best policy, reward=93784.048!
+-[2023-09-19 11:14:45,219][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000376_192512.pth
+-[2023-09-19 11:14:45,220][41187] Saving new best policy, reward=80179.767!
+-[2023-09-19 11:14:46,154][41271] Updated weights for policy 1, policy_version 560 (0.0013)
+-[2023-09-19 11:14:46,154][41246] Updated weights for policy 0, policy_version 600 (0.0015)
+-[2023-09-19 11:14:50,198][40303] Fps is (10 sec: 7372.9, 60 sec: 7236.3, 300 sec: 7035.5). Total num frames: 618496. Throughput: 0: 3695.9, 1: 3695.8. Samples: 601256. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:14:50,199][40303] Avg episode reward: [(0, '81224.789'), (1, '95410.430')]
+-[2023-09-19 11:14:50,200][41187] Saving new best policy, reward=81224.789!
+-[2023-09-19 11:14:50,200][41188] Saving new best policy, reward=95410.430!
+-[2023-09-19 11:14:55,215][40303] Fps is (10 sec: 7359.6, 60 sec: 7370.6, 300 sec: 7098.3). Total num frames: 659456. Throughput: 0: 3656.8, 1: 3656.9. Samples: 620178. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:14:55,218][40303] Avg episode reward: [(0, '81804.261'), (1, '100729.706')]
+-[2023-09-19 11:14:55,219][41187] Saving new best policy, reward=81804.261!
+-[2023-09-19 11:14:55,219][41188] Saving new best policy, reward=100729.706!
+-[2023-09-19 11:14:57,288][41246] Updated weights for policy 0, policy_version 680 (0.0016)
+-[2023-09-19 11:14:57,288][41271] Updated weights for policy 1, policy_version 640 (0.0013)
+-[2023-09-19 11:15:00,198][40303] Fps is (10 sec: 7372.8, 60 sec: 7372.8, 300 sec: 7071.0). Total num frames: 692224. Throughput: 0: 3672.5, 1: 3673.2. Samples: 666064. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:15:00,198][40303] Avg episode reward: [(0, '81842.550'), (1, '102461.265')]
+-[2023-09-19 11:15:00,204][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000656_335872.pth...
+-[2023-09-19 11:15:00,204][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000696_356352.pth...
+-[2023-09-19 11:15:00,208][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000440_225280.pth
+-[2023-09-19 11:15:00,208][41188] Saving new best policy, reward=102461.265!
+-[2023-09-19 11:15:00,211][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000480_245760.pth
+-[2023-09-19 11:15:00,211][41187] Saving new best policy, reward=81842.550!
+-[2023-09-19 11:15:05,198][40303] Fps is (10 sec: 5744.7, 60 sec: 7099.7, 300 sec: 6963.2). Total num frames: 716800. Throughput: 0: 3578.5, 1: 3578.2. Samples: 701690. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:05,199][40303] Avg episode reward: [(0, '82742.948'), (1, '108945.356')]
+-[2023-09-19 11:15:05,200][41187] Saving new best policy, reward=82742.948!
+-[2023-09-19 11:15:05,200][41188] Saving new best policy, reward=108945.356!
+-[2023-09-19 11:15:09,873][41271] Updated weights for policy 1, policy_version 720 (0.0015)
+-[2023-09-19 11:15:09,873][41246] Updated weights for policy 0, policy_version 760 (0.0015)
+-[2023-09-19 11:15:10,198][40303] Fps is (10 sec: 6553.6, 60 sec: 7236.3, 300 sec: 7021.7). Total num frames: 757760. Throughput: 0: 3542.3, 1: 3541.1. Samples: 720964. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:15:10,199][40303] Avg episode reward: [(0, '83013.951'), (1, '110482.262')]
+-[2023-09-19 11:15:10,200][41187] Saving new best policy, reward=83013.951!
+-[2023-09-19 11:15:10,200][41188] Saving new best policy, reward=110482.262!
+-[2023-09-19 11:15:15,198][40303] Fps is (10 sec: 7372.7, 60 sec: 7099.7, 300 sec: 7000.4). Total num frames: 790528. Throughput: 0: 3537.8, 1: 3537.8. Samples: 762602. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:15:15,199][40303] Avg episode reward: [(0, '84790.627'), (1, '117040.476')]
+-[2023-09-19 11:15:15,208][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000792_405504.pth...
+-[2023-09-19 11:15:15,209][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000752_385024.pth...
+-[2023-09-19 11:15:15,215][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000552_282624.pth
+-[2023-09-19 11:15:15,216][41188] Saving new best policy, reward=117040.476!
+-[2023-09-19 11:15:15,218][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000592_303104.pth
+-[2023-09-19 11:15:15,219][41187] Saving new best policy, reward=84790.627!
+-[2023-09-19 11:15:20,197][40303] Fps is (10 sec: 6553.7, 60 sec: 7099.7, 300 sec: 6981.0). Total num frames: 823296. Throughput: 0: 3525.8, 1: 3526.0. Samples: 807640. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:20,198][40303] Avg episode reward: [(0, '86690.838'), (1, '119342.411')]
+-[2023-09-19 11:15:20,199][41188] Saving new best policy, reward=119342.411!
+-[2023-09-19 11:15:20,199][41187] Saving new best policy, reward=86690.838!
+-[2023-09-19 11:15:21,652][41271] Updated weights for policy 1, policy_version 800 (0.0011)
+-[2023-09-19 11:15:21,653][41246] Updated weights for policy 0, policy_version 840 (0.0015)
+-[2023-09-19 11:15:25,198][40303] Fps is (10 sec: 7372.9, 60 sec: 7236.2, 300 sec: 7031.5). Total num frames: 864256. Throughput: 0: 3502.0, 1: 3501.0. Samples: 827284. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:25,199][40303] Avg episode reward: [(0, '90539.601'), (1, '125734.908')]
+-[2023-09-19 11:15:25,200][41187] Saving new best policy, reward=90539.601!
+-[2023-09-19 11:15:25,200][41188] Saving new best policy, reward=125734.908!
+-[2023-09-19 11:15:30,198][40303] Fps is (10 sec: 8191.9, 60 sec: 7236.3, 300 sec: 7077.9). Total num frames: 905216. Throughput: 0: 3517.5, 1: 3518.0. Samples: 874032. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:30,198][40303] Avg episode reward: [(0, '92852.762'), (1, '132358.210')]
+-[2023-09-19 11:15:30,207][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000904_462848.pth...
+-[2023-09-19 11:15:30,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000864_442368.pth...
+-[2023-09-19 11:15:30,213][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000656_335872.pth
+-[2023-09-19 11:15:30,214][41188] Saving new best policy, reward=132358.210!
+-[2023-09-19 11:15:30,216][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000696_356352.pth
+-[2023-09-19 11:15:30,217][41187] Saving new best policy, reward=92852.762!
+-[2023-09-19 11:15:32,850][41271] Updated weights for policy 1, policy_version 880 (0.0013)
+-[2023-09-19 11:15:32,851][41246] Updated weights for policy 0, policy_version 920 (0.0013)
+-[2023-09-19 11:15:35,197][40303] Fps is (10 sec: 7372.9, 60 sec: 7099.7, 300 sec: 7057.7). Total num frames: 937984. Throughput: 0: 3497.4, 1: 3497.6. Samples: 916030. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:35,198][40303] Avg episode reward: [(0, '96297.528'), (1, '134666.372')]
+-[2023-09-19 11:15:35,199][41187] Saving new best policy, reward=96297.528!
+-[2023-09-19 11:15:35,200][41188] Saving new best policy, reward=134666.372!
+-[2023-09-19 11:15:40,198][40303] Fps is (10 sec: 6553.5, 60 sec: 7099.7, 300 sec: 7039.0). Total num frames: 970752. Throughput: 0: 3537.6, 1: 3537.8. Samples: 938444. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:40,199][40303] Avg episode reward: [(0, '101172.898'), (1, '137965.048')]
+-[2023-09-19 11:15:40,200][41187] Saving new best policy, reward=101172.898!
+-[2023-09-19 11:15:40,200][41188] Saving new best policy, reward=137965.048!
+-[2023-09-19 11:15:44,038][41246] Updated weights for policy 0, policy_version 1000 (0.0014)
+-[2023-09-19 11:15:44,038][41271] Updated weights for policy 1, policy_version 960 (0.0014)
+-[2023-09-19 11:15:45,197][40303] Fps is (10 sec: 6553.7, 60 sec: 6963.2, 300 sec: 7021.7). Total num frames: 1003520. Throughput: 0: 3519.5, 1: 3518.3. Samples: 982764. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:15:45,198][40303] Avg episode reward: [(0, '103392.205'), (1, '137965.048')]
+-[2023-09-19 11:15:45,205][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000968_495616.pth...
+-[2023-09-19 11:15:45,208][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000752_385024.pth
+-[2023-09-19 11:15:45,213][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001008_516096.pth...
+-[2023-09-19 11:15:45,217][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000792_405504.pth
+-[2023-09-19 11:15:45,218][41187] Saving new best policy, reward=103392.205!
+-[2023-09-19 11:15:50,198][40303] Fps is (10 sec: 7372.8, 60 sec: 7099.7, 300 sec: 7062.1). Total num frames: 1044480. Throughput: 0: 3604.8, 1: 3604.9. Samples: 1026124. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:15:50,199][40303] Avg episode reward: [(0, '112090.239'), (1, '141479.122')]
+-[2023-09-19 11:15:50,200][41187] Saving new best policy, reward=112090.239!
+-[2023-09-19 11:15:50,200][41188] Saving new best policy, reward=141479.122!
+-[2023-09-19 11:15:55,198][40303] Fps is (10 sec: 7372.6, 60 sec: 6965.3, 300 sec: 7045.1). Total num frames: 1077248. Throughput: 0: 3640.0, 1: 3640.1. Samples: 1048568. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:15:55,199][40303] Avg episode reward: [(0, '115037.292'), (1, '141360.147')]
+-[2023-09-19 11:15:55,200][41187] Saving new best policy, reward=115037.292!
+-[2023-09-19 11:15:55,356][41271] Updated weights for policy 1, policy_version 1040 (0.0016)
+-[2023-09-19 11:15:55,356][41246] Updated weights for policy 0, policy_version 1080 (0.0011)
+-[2023-09-19 11:16:00,198][40303] Fps is (10 sec: 7372.8, 60 sec: 7099.7, 300 sec: 7082.1). Total num frames: 1118208. Throughput: 0: 3634.6, 1: 3634.6. Samples: 1089716. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:16:00,199][40303] Avg episode reward: [(0, '121696.345'), (1, '143516.477')]
+-[2023-09-19 11:16:00,207][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001112_569344.pth...
+-[2023-09-19 11:16:00,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001072_548864.pth...
+-[2023-09-19 11:16:00,213][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000904_462848.pth
+-[2023-09-19 11:16:00,214][41187] Saving new best policy, reward=121696.345!
+-[2023-09-19 11:16:00,218][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000864_442368.pth
+-[2023-09-19 11:16:00,218][41188] Saving new best policy, reward=143516.477!
+-[2023-09-19 11:16:05,198][40303] Fps is (10 sec: 8192.0, 60 sec: 7372.8, 300 sec: 7116.8). Total num frames: 1159168. Throughput: 0: 3656.8, 1: 3657.0. Samples: 1136764. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:16:05,199][40303] Avg episode reward: [(0, '126501.030'), (1, '144413.674')]
+-[2023-09-19 11:16:05,200][41187] Saving new best policy, reward=126501.030!
+-[2023-09-19 11:16:05,200][41188] Saving new best policy, reward=144413.674!
+-[2023-09-19 11:16:06,100][41271] Updated weights for policy 1, policy_version 1120 (0.0013)
+-[2023-09-19 11:16:06,100][41246] Updated weights for policy 0, policy_version 1160 (0.0016)
+-[2023-09-19 11:16:10,197][40303] Fps is (10 sec: 7372.9, 60 sec: 7236.3, 300 sec: 7099.7). Total num frames: 1191936. Throughput: 0: 3691.9, 1: 3692.5. Samples: 1159578. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:16:10,198][40303] Avg episode reward: [(0, '132111.068'), (1, '146024.408')]
+-[2023-09-19 11:16:10,199][41187] Saving new best policy, reward=132111.068!
+-[2023-09-19 11:16:10,200][41188] Saving new best policy, reward=146024.408!
+-[2023-09-19 11:16:15,198][40303] Fps is (10 sec: 5734.4, 60 sec: 7099.7, 300 sec: 7035.5). Total num frames: 1216512. Throughput: 0: 3566.4, 1: 3565.5. Samples: 1194970. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:16:15,198][40303] Avg episode reward: [(0, '135452.407'), (1, '145133.020')]
+-[2023-09-19 11:16:15,205][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001208_618496.pth...
+-[2023-09-19 11:16:15,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001168_598016.pth...
+-[2023-09-19 11:16:15,217][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000968_495616.pth
+-[2023-09-19 11:16:15,218][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001008_516096.pth
+-[2023-09-19 11:16:15,218][41187] Saving new best policy, reward=135452.407!
+-[2023-09-19 11:16:19,680][41271] Updated weights for policy 1, policy_version 1200 (0.0013)
+-[2023-09-19 11:16:19,680][41246] Updated weights for policy 0, policy_version 1240 (0.0011)
+-[2023-09-19 11:16:20,198][40303] Fps is (10 sec: 5734.3, 60 sec: 7099.7, 300 sec: 7021.7). Total num frames: 1249280. Throughput: 0: 3480.7, 1: 3480.4. Samples: 1229280. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:16:20,199][40303] Avg episode reward: [(0, '138970.413'), (1, '144367.334')]
+-[2023-09-19 11:16:20,200][41187] Saving new best policy, reward=138970.413!
+-[2023-09-19 11:16:25,198][40303] Fps is (10 sec: 7372.8, 60 sec: 7099.7, 300 sec: 7054.2). Total num frames: 1290240. Throughput: 0: 3490.3, 1: 3490.3. Samples: 1252572. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:16:25,199][40303] Avg episode reward: [(0, '138954.797'), (1, '143024.613')]
+-[2023-09-19 11:16:30,198][40303] Fps is (10 sec: 7372.7, 60 sec: 6963.2, 300 sec: 7040.7). Total num frames: 1323008. Throughput: 0: 3514.6, 1: 3515.8. Samples: 1299130. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:16:30,199][40303] Avg episode reward: [(0, '138602.655'), (1, '141826.181')]
+-[2023-09-19 11:16:30,206][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001312_671744.pth...
+-[2023-09-19 11:16:30,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001272_651264.pth...
+-[2023-09-19 11:16:30,216][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001112_569344.pth
+-[2023-09-19 11:16:30,216][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001072_548864.pth
+-[2023-09-19 11:16:30,719][41246] Updated weights for policy 0, policy_version 1320 (0.0010)
+-[2023-09-19 11:16:30,720][41271] Updated weights for policy 1, policy_version 1280 (0.0015)
+-[2023-09-19 11:16:35,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 7027.9). Total num frames: 1355776. Throughput: 0: 3463.6, 1: 3463.8. Samples: 1337854. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:16:35,199][40303] Avg episode reward: [(0, '139704.889'), (1, '143172.712')]
+-[2023-09-19 11:16:35,200][41187] Saving new best policy, reward=139704.889!
+-[2023-09-19 11:16:40,197][40303] Fps is (10 sec: 6553.8, 60 sec: 6963.2, 300 sec: 7015.7). Total num frames: 1388544. Throughput: 0: 3470.0, 1: 3470.8. Samples: 1360900. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:16:40,198][40303] Avg episode reward: [(0, '140044.752'), (1, '143172.712')]
+-[2023-09-19 11:16:40,199][41187] Saving new best policy, reward=140044.752!
+-[2023-09-19 11:16:42,534][41246] Updated weights for policy 0, policy_version 1400 (0.0015)
+-[2023-09-19 11:16:42,534][41271] Updated weights for policy 1, policy_version 1360 (0.0013)
+-[2023-09-19 11:16:45,198][40303] Fps is (10 sec: 7372.7, 60 sec: 7099.7, 300 sec: 7045.1). Total num frames: 1429504. Throughput: 0: 3467.3, 1: 3466.8. Samples: 1401752. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:16:45,199][40303] Avg episode reward: [(0, '142593.411'), (1, '143339.303')]
+-[2023-09-19 11:16:45,209][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001416_724992.pth...
+-[2023-09-19 11:16:45,211][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001376_704512.pth...
+-[2023-09-19 11:16:45,219][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001208_618496.pth
+-[2023-09-19 11:16:45,220][41187] Saving new best policy, reward=142593.411!
+-[2023-09-19 11:16:45,221][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001168_598016.pth
+-[2023-09-19 11:16:50,198][40303] Fps is (10 sec: 7372.6, 60 sec: 6963.2, 300 sec: 7033.1). Total num frames: 1462272. Throughput: 0: 3406.2, 1: 3406.0. Samples: 1443310. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:16:50,199][40303] Avg episode reward: [(0, '142593.411'), (1, '143931.524')]
+-[2023-09-19 11:16:53,925][41271] Updated weights for policy 1, policy_version 1440 (0.0013)
+-[2023-09-19 11:16:53,925][41246] Updated weights for policy 0, policy_version 1480 (0.0014)
+-[2023-09-19 11:16:55,197][40303] Fps is (10 sec: 6553.8, 60 sec: 6963.2, 300 sec: 7021.7). Total num frames: 1495040. Throughput: 0: 3409.2, 1: 3408.4. Samples: 1466370. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:16:55,198][40303] Avg episode reward: [(0, '149470.139'), (1, '145636.804')]
+-[2023-09-19 11:16:55,217][41187] Saving new best policy, reward=149470.139!
+-[2023-09-19 11:17:00,198][40303] Fps is (10 sec: 7372.9, 60 sec: 6963.2, 300 sec: 7048.9). Total num frames: 1536000. Throughput: 0: 3492.7, 1: 3493.2. Samples: 1509334. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:00,198][40303] Avg episode reward: [(0, '153538.121'), (1, '146540.869')]
+-[2023-09-19 11:17:00,207][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001520_778240.pth...
+-[2023-09-19 11:17:00,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001480_757760.pth...
+-[2023-09-19 11:17:00,214][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001272_651264.pth
+-[2023-09-19 11:17:00,215][41188] Saving new best policy, reward=146540.869!
+-[2023-09-19 11:17:00,215][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001312_671744.pth
+-[2023-09-19 11:17:00,216][41187] Saving new best policy, reward=153538.121!
+-[2023-09-19 11:17:05,198][40303] Fps is (10 sec: 7372.6, 60 sec: 6826.7, 300 sec: 7037.7). Total num frames: 1568768. Throughput: 0: 3593.8, 1: 3594.0. Samples: 1552730. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:05,199][40303] Avg episode reward: [(0, '153219.886'), (1, '146009.885')]
+-[2023-09-19 11:17:05,296][41271] Updated weights for policy 1, policy_version 1520 (0.0013)
+-[2023-09-19 11:17:05,298][41246] Updated weights for policy 0, policy_version 1560 (0.0011)
+-[2023-09-19 11:17:10,197][40303] Fps is (10 sec: 7372.9, 60 sec: 6963.2, 300 sec: 7063.3). Total num frames: 1609728. Throughput: 0: 3560.3, 1: 3559.7. Samples: 1572972. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:17:10,198][40303] Avg episode reward: [(0, '152679.736'), (1, '148353.321')]
+-[2023-09-19 11:17:10,199][41188] Saving new best policy, reward=148353.321!
+-[2023-09-19 11:17:15,197][40303] Fps is (10 sec: 7372.9, 60 sec: 7099.7, 300 sec: 7052.2). Total num frames: 1642496. Throughput: 0: 3533.3, 1: 3532.9. Samples: 1617104. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:17:15,198][40303] Avg episode reward: [(0, '152608.632'), (1, '147785.111')]
+-[2023-09-19 11:17:15,204][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001624_831488.pth...
+-[2023-09-19 11:17:15,204][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001584_811008.pth...
+-[2023-09-19 11:17:15,211][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001376_704512.pth
+-[2023-09-19 11:17:15,212][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001416_724992.pth
+-[2023-09-19 11:17:17,037][41271] Updated weights for policy 1, policy_version 1600 (0.0014)
+-[2023-09-19 11:17:17,037][41246] Updated weights for policy 0, policy_version 1640 (0.0013)
+-[2023-09-19 11:17:20,198][40303] Fps is (10 sec: 6553.5, 60 sec: 7099.7, 300 sec: 7041.6). Total num frames: 1675264. Throughput: 0: 3521.5, 1: 3520.4. Samples: 1654740. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:20,199][40303] Avg episode reward: [(0, '153339.116'), (1, '146902.121')]
+-[2023-09-19 11:17:25,198][40303] Fps is (10 sec: 5734.3, 60 sec: 6826.7, 300 sec: 6997.3). Total num frames: 1699840. Throughput: 0: 3454.4, 1: 3454.3. Samples: 1671790. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:25,199][40303] Avg episode reward: [(0, '153197.923'), (1, '148121.368')]
+-[2023-09-19 11:17:29,838][41246] Updated weights for policy 0, policy_version 1720 (0.0013)
+-[2023-09-19 11:17:29,839][41271] Updated weights for policy 1, policy_version 1680 (0.0014)
+-[2023-09-19 11:17:30,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6963.2, 300 sec: 7021.7). Total num frames: 1740800. Throughput: 0: 3465.9, 1: 3466.4. Samples: 1713704. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:30,199][40303] Avg episode reward: [(0, '149499.415'), (1, '148156.690')]
+-[2023-09-19 11:17:30,206][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001720_880640.pth...
+-[2023-09-19 11:17:30,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001680_860160.pth...
+-[2023-09-19 11:17:30,213][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001520_778240.pth
+-[2023-09-19 11:17:30,215][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001480_757760.pth
+-[2023-09-19 11:17:35,198][40303] Fps is (10 sec: 7372.8, 60 sec: 6963.2, 300 sec: 7012.3). Total num frames: 1773568. Throughput: 0: 3480.4, 1: 3480.4. Samples: 1756542. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:17:35,199][40303] Avg episode reward: [(0, '148300.205'), (1, '148607.536')]
+-[2023-09-19 11:17:35,200][41188] Saving new best policy, reward=148607.536!
+-[2023-09-19 11:17:40,198][40303] Fps is (10 sec: 7372.8, 60 sec: 7099.7, 300 sec: 7035.5). Total num frames: 1814528. Throughput: 0: 3459.8, 1: 3460.8. Samples: 1777800. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:17:40,199][40303] Avg episode reward: [(0, '148265.013'), (1, '149557.427')]
+-[2023-09-19 11:17:40,201][41188] Saving new best policy, reward=149557.427!
+-[2023-09-19 11:17:41,639][41271] Updated weights for policy 1, policy_version 1760 (0.0011)
+-[2023-09-19 11:17:41,639][41246] Updated weights for policy 0, policy_version 1800 (0.0015)
+-[2023-09-19 11:17:45,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6826.7, 300 sec: 6994.7). Total num frames: 1839104. Throughput: 0: 3421.1, 1: 3421.1. Samples: 1817234. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:17:45,199][40303] Avg episode reward: [(0, '148265.013'), (1, '149557.427')]
+-[2023-09-19 11:17:45,233][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001784_913408.pth...
+-[2023-09-19 11:17:45,236][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001584_811008.pth
+-[2023-09-19 11:17:45,238][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001824_933888.pth...
+-[2023-09-19 11:17:45,242][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001624_831488.pth
+-[2023-09-19 11:17:50,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 7017.3). Total num frames: 1880064. Throughput: 0: 3389.4, 1: 3389.2. Samples: 1857766. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:17:50,199][40303] Avg episode reward: [(0, '148074.607'), (1, '152409.695')]
+-[2023-09-19 11:17:50,201][41188] Saving new best policy, reward=152409.695!
+-[2023-09-19 11:17:53,620][41246] Updated weights for policy 0, policy_version 1880 (0.0014)
+-[2023-09-19 11:17:53,621][41271] Updated weights for policy 1, policy_version 1840 (0.0015)
+-[2023-09-19 11:17:55,198][40303] Fps is (10 sec: 7372.9, 60 sec: 6963.2, 300 sec: 7008.7). Total num frames: 1912832. Throughput: 0: 3389.5, 1: 3390.0. Samples: 1878050. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:17:55,199][40303] Avg episode reward: [(0, '148074.607'), (1, '152273.875')]
+-[2023-09-19 11:18:00,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6826.7, 300 sec: 7000.4). Total num frames: 1945600. Throughput: 0: 3345.4, 1: 3345.7. Samples: 1918202. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:00,198][40303] Avg episode reward: [(0, '153050.644'), (1, '152274.335')]
+-[2023-09-19 11:18:00,206][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001880_962560.pth...
+-[2023-09-19 11:18:00,206][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001920_983040.pth...
+-[2023-09-19 11:18:00,212][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001680_860160.pth
+-[2023-09-19 11:18:00,216][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001720_880640.pth
+-[2023-09-19 11:18:05,198][40303] Fps is (10 sec: 6553.7, 60 sec: 6826.7, 300 sec: 6992.5). Total num frames: 1978368. Throughput: 0: 3388.8, 1: 3389.7. Samples: 1959770. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:05,199][40303] Avg episode reward: [(0, '154101.029'), (1, '152166.485')]
+-[2023-09-19 11:18:05,200][41187] Saving new best policy, reward=154101.029!
+-[2023-09-19 11:18:05,611][41246] Updated weights for policy 0, policy_version 1960 (0.0016)
+-[2023-09-19 11:18:05,611][41271] Updated weights for policy 1, policy_version 1920 (0.0015)
+-[2023-09-19 11:18:10,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6690.1, 300 sec: 6984.8). Total num frames: 2011136. Throughput: 0: 3449.6, 1: 3449.8. Samples: 1982264. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:18:10,199][40303] Avg episode reward: [(0, '156034.676'), (1, '151366.137')]
+-[2023-09-19 11:18:10,200][41187] Saving new best policy, reward=156034.676!
+-[2023-09-19 11:18:15,198][40303] Fps is (10 sec: 6553.4, 60 sec: 6690.1, 300 sec: 6977.3). Total num frames: 2043904. Throughput: 0: 3381.2, 1: 3381.2. Samples: 2018014. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:15,199][40303] Avg episode reward: [(0, '155164.894'), (1, '150838.954')]
+-[2023-09-19 11:18:15,207][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002016_1032192.pth...
+-[2023-09-19 11:18:15,207][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001976_1011712.pth...
+-[2023-09-19 11:18:15,217][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001824_933888.pth
+-[2023-09-19 11:18:15,217][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001784_913408.pth
+-[2023-09-19 11:18:18,102][41246] Updated weights for policy 0, policy_version 2040 (0.0012)
+-[2023-09-19 11:18:18,102][41271] Updated weights for policy 1, policy_version 2000 (0.0012)
+-[2023-09-19 11:18:20,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6970.1). Total num frames: 2076672. Throughput: 0: 3371.1, 1: 3370.6. Samples: 2059918. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:18:20,199][40303] Avg episode reward: [(0, '155231.770'), (1, '150958.366')]
+-[2023-09-19 11:18:25,198][40303] Fps is (10 sec: 5734.5, 60 sec: 6690.1, 300 sec: 6997.9). Total num frames: 2101248. Throughput: 0: 3299.0, 1: 3299.1. Samples: 2074716. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:18:25,199][40303] Avg episode reward: [(0, '154273.430'), (1, '151002.496')]
+-[2023-09-19 11:18:30,198][40303] Fps is (10 sec: 5734.4, 60 sec: 6553.6, 300 sec: 6997.9). Total num frames: 2134016. Throughput: 0: 3270.6, 1: 3270.6. Samples: 2111590. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:30,198][40303] Avg episode reward: [(0, '155007.997'), (1, '150084.194')]
+-[2023-09-19 11:18:30,205][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002064_1056768.pth...
+-[2023-09-19 11:18:30,205][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002104_1077248.pth...
+-[2023-09-19 11:18:30,211][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001880_962560.pth
+-[2023-09-19 11:18:30,214][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001920_983040.pth
+-[2023-09-19 11:18:32,089][41271] Updated weights for policy 1, policy_version 2080 (0.0011)
+-[2023-09-19 11:18:32,090][41246] Updated weights for policy 0, policy_version 2120 (0.0012)
+-[2023-09-19 11:18:35,197][40303] Fps is (10 sec: 6553.7, 60 sec: 6553.6, 300 sec: 6970.1). Total num frames: 2166784. Throughput: 0: 3205.9, 1: 3205.1. Samples: 2146258. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:35,198][40303] Avg episode reward: [(0, '155007.997'), (1, '150624.279')]
+-[2023-09-19 11:18:40,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6970.1). Total num frames: 2199552. Throughput: 0: 3184.1, 1: 3184.0. Samples: 2164612. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:18:40,199][40303] Avg episode reward: [(0, '145580.524'), (1, '151090.148')]
+-[2023-09-19 11:18:44,767][41271] Updated weights for policy 1, policy_version 2160 (0.0013)
+-[2023-09-19 11:18:44,768][41246] Updated weights for policy 0, policy_version 2200 (0.0014)
+-[2023-09-19 11:18:45,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6553.6, 300 sec: 6942.4). Total num frames: 2232320. Throughput: 0: 3191.7, 1: 3191.8. Samples: 2205460. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:18:45,199][40303] Avg episode reward: [(0, '145607.879'), (1, '152391.555')]
+-[2023-09-19 11:18:45,208][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002200_1126400.pth...
+-[2023-09-19 11:18:45,208][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002160_1105920.pth...
+-[2023-09-19 11:18:45,214][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001976_1011712.pth
+-[2023-09-19 11:18:45,217][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002016_1032192.pth
+-[2023-09-19 11:18:50,197][40303] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6942.4). Total num frames: 2265088. Throughput: 0: 3173.6, 1: 3173.4. Samples: 2245384. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:18:50,198][40303] Avg episode reward: [(0, '141531.277'), (1, '153070.074')]
+-[2023-09-19 11:18:50,199][41188] Saving new best policy, reward=153070.074!
+-[2023-09-19 11:18:55,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6942.4). Total num frames: 2297856. Throughput: 0: 3144.3, 1: 3144.4. Samples: 2265256. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:18:55,199][40303] Avg episode reward: [(0, '141531.277'), (1, '153134.366')]
+-[2023-09-19 11:18:55,200][41188] Saving new best policy, reward=153134.366!
+-[2023-09-19 11:18:57,266][41271] Updated weights for policy 1, policy_version 2240 (0.0014)
+-[2023-09-19 11:18:57,267][41246] Updated weights for policy 0, policy_version 2280 (0.0013)
+-[2023-09-19 11:19:00,198][40303] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6914.6). Total num frames: 2330624. Throughput: 0: 3181.9, 1: 3181.2. Samples: 2304352. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:19:00,198][40303] Avg episode reward: [(0, '140084.973'), (1, '153499.154')]
+-[2023-09-19 11:19:00,205][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002296_1175552.pth...
+-[2023-09-19 11:19:00,206][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002256_1155072.pth...
+-[2023-09-19 11:19:00,213][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002064_1056768.pth
+-[2023-09-19 11:19:00,213][41188] Saving new best policy, reward=153499.154!
+-[2023-09-19 11:19:00,214][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002104_1077248.pth
+-[2023-09-19 11:19:05,197][40303] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6914.6). Total num frames: 2363392. Throughput: 0: 3174.0, 1: 3174.3. Samples: 2345590. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:19:05,198][40303] Avg episode reward: [(0, '140163.997'), (1, '154850.945')]
+-[2023-09-19 11:19:05,199][41188] Saving new best policy, reward=154850.945!
+-[2023-09-19 11:19:09,431][41271] Updated weights for policy 1, policy_version 2320 (0.0012)
+-[2023-09-19 11:19:09,432][41246] Updated weights for policy 0, policy_version 2360 (0.0015)
+-[2023-09-19 11:19:10,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6886.8). Total num frames: 2396160. Throughput: 0: 3226.4, 1: 3226.4. Samples: 2365092. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:19:10,199][40303] Avg episode reward: [(0, '146252.432'), (1, '155854.149')]
+-[2023-09-19 11:19:10,200][41188] Saving new best policy, reward=155854.149!
+-[2023-09-19 11:19:15,198][40303] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6886.8). Total num frames: 2428928. Throughput: 0: 3251.3, 1: 3251.4. Samples: 2404212. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:19:15,198][40303] Avg episode reward: [(0, '146450.850'), (1, '155450.882')]
+-[2023-09-19 11:19:15,205][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002392_1224704.pth...
+-[2023-09-19 11:19:15,206][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002352_1204224.pth...
+-[2023-09-19 11:19:15,216][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002200_1126400.pth
+-[2023-09-19 11:19:15,217][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002160_1105920.pth
+-[2023-09-19 11:19:16,564][40303] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 40303], exiting...
+-[2023-09-19 11:19:16,565][40303] Runner profile tree view:
+-main_loop: 357.5508
+-[2023-09-19 11:19:16,566][41187] Stopping Batcher_0...
+-[2023-09-19 11:19:16,566][41187] Loop batcher_evt_loop terminating...
+-[2023-09-19 11:19:16,566][40303] Collected {0: 1228800, 1: 1208320}, FPS: 6758.9
+-[2023-09-19 11:19:16,566][41188] Stopping Batcher_1...
+-[2023-09-19 11:19:16,566][41188] Loop batcher_evt_loop terminating...
+-[2023-09-19 11:19:16,567][41188] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002360_1208320.pth...
+-[2023-09-19 11:19:16,567][41287] Stopping RolloutWorker_w4...
+-[2023-09-19 11:19:16,567][41187] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002400_1228800.pth...
+-[2023-09-19 11:19:16,567][41287] Loop rollout_proc4_evt_loop terminating...
+-[2023-09-19 11:19:16,569][41278] Stopping RolloutWorker_w2...
+-[2023-09-19 11:19:16,569][41276] Stopping RolloutWorker_w1...
+-[2023-09-19 11:19:16,569][41278] Loop rollout_proc2_evt_loop terminating...
+-[2023-09-19 11:19:16,569][41276] Loop rollout_proc1_evt_loop terminating...
+-[2023-09-19 11:19:16,570][41291] Stopping RolloutWorker_w7...
+-[2023-09-19 11:19:16,570][41291] Loop rollout_proc7_evt_loop terminating...
+-[2023-09-19 11:19:16,570][41272] Stopping RolloutWorker_w0...
+-[2023-09-19 11:19:16,570][41272] Loop rollout_proc0_evt_loop terminating...
+-[2023-09-19 11:19:16,571][41290] Stopping RolloutWorker_w6...
+-[2023-09-19 11:19:16,571][41290] Loop rollout_proc6_evt_loop terminating...
+-[2023-09-19 11:19:16,571][41284] Stopping RolloutWorker_w3...
+-[2023-09-19 11:19:16,571][41188] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002256_1155072.pth
+-[2023-09-19 11:19:16,571][41284] Loop rollout_proc3_evt_loop terminating...
+-[2023-09-19 11:19:16,571][41292] Stopping RolloutWorker_w5...
+-[2023-09-19 11:19:16,572][41188] Stopping LearnerWorker_p1...
+-[2023-09-19 11:19:16,572][41292] Loop rollout_proc5_evt_loop terminating...
+-[2023-09-19 11:19:16,572][41188] Loop learner_proc1_evt_loop terminating...
+-[2023-09-19 11:19:16,575][41187] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002296_1175552.pth
+-[2023-09-19 11:19:16,576][41187] Stopping LearnerWorker_p0...
+-[2023-09-19 11:19:16,576][41187] Loop learner_proc0_evt_loop terminating...
+-[2023-09-19 11:19:16,580][41246] Weights refcount: 2 0
+-[2023-09-19 11:19:16,581][41246] Stopping InferenceWorker_p0-w0...
+-[2023-09-19 11:19:16,581][41246] Loop inference_proc0-0_evt_loop terminating...
+-[2023-09-19 11:19:16,583][41271] Weights refcount: 2 0
+-[2023-09-19 11:19:16,584][41271] Stopping InferenceWorker_p1-w0...
+-[2023-09-19 11:19:16,584][41271] Loop inference_proc1-0_evt_loop terminating...
+-[2023-09-19 11:19:40,691][72530] Saving configuration to ./train_dir/Standup/config.json...
+-[2023-09-19 11:19:40,693][72530] Rollout worker 0 uses device cpu
+-[2023-09-19 11:19:40,694][72530] Rollout worker 1 uses device cpu
+-[2023-09-19 11:19:40,694][72530] Rollout worker 2 uses device cpu
+-[2023-09-19 11:19:40,695][72530] Rollout worker 3 uses device cpu
+-[2023-09-19 11:19:40,695][72530] Rollout worker 4 uses device cpu
+-[2023-09-19 11:19:40,695][72530] Rollout worker 5 uses device cpu
+-[2023-09-19 11:19:40,696][72530] Rollout worker 6 uses device cpu
+-[2023-09-19 11:19:40,696][72530] Rollout worker 7 uses device cpu
+-[2023-09-19 11:19:40,696][72530] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
+-[2023-09-19 11:19:40,753][72530] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:19:40,753][72530] InferenceWorker_p0-w0: min num requests: 1
+-[2023-09-19 11:19:40,757][72530] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:19:40,757][72530] InferenceWorker_p1-w0: min num requests: 1
+-[2023-09-19 11:19:40,783][72530] Starting all processes...
+-[2023-09-19 11:19:40,783][72530] Starting process learner_proc0
+-[2023-09-19 11:19:40,786][72530] Starting process learner_proc1
+-[2023-09-19 11:19:40,832][72530] Starting all processes...
+-[2023-09-19 11:19:40,838][72530] Starting process inference_proc0-0
+-[2023-09-19 11:19:40,838][72530] Starting process inference_proc1-0
+-[2023-09-19 11:19:40,838][72530] Starting process rollout_proc0
+-[2023-09-19 11:19:40,839][72530] Starting process rollout_proc1
+-[2023-09-19 11:19:40,839][72530] Starting process rollout_proc2
+-[2023-09-19 11:19:40,839][72530] Starting process rollout_proc3
+-[2023-09-19 11:19:40,840][72530] Starting process rollout_proc4
+-[2023-09-19 11:19:40,843][72530] Starting process rollout_proc5
+-[2023-09-19 11:19:40,843][72530] Starting process rollout_proc6
+-[2023-09-19 11:19:40,844][72530] Starting process rollout_proc7
+-[2023-09-19 11:19:42,632][73131] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:19:42,632][73131] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for learning process 1
+-[2023-09-19 11:19:42,651][73131] Num visible devices: 1
+-[2023-09-19 11:19:42,668][73131] Starting seed is not provided
+-[2023-09-19 11:19:42,669][73131] Using GPUs [0] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:19:42,669][73131] Initializing actor-critic model on device cuda:0
+-[2023-09-19 11:19:42,670][73131] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:19:42,670][73131] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:19:42,680][73130] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:19:42,681][73130] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+-[2023-09-19 11:19:42,683][73226] Worker 7 uses CPU cores [28, 29, 30, 31]
+-[2023-09-19 11:19:42,694][73220] Worker 2 uses CPU cores [8, 9, 10, 11]
+-[2023-09-19 11:19:42,701][73130] Num visible devices: 1
+-[2023-09-19 11:19:42,723][73222] Worker 6 uses CPU cores [24, 25, 26, 27]
+-[2023-09-19 11:19:42,725][73130] Starting seed is not provided
+-[2023-09-19 11:19:42,725][73130] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:19:42,725][73130] Initializing actor-critic model on device cuda:0
+-[2023-09-19 11:19:42,725][73130] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:19:42,726][73130] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:19:42,731][73224] Worker 4 uses CPU cores [16, 17, 18, 19]
+-[2023-09-19 11:19:42,733][73131] Created Actor Critic model with architecture:
+-[2023-09-19 11:19:42,734][73131] ActorCriticSharedWeights(
+-  (obs_normalizer): ObservationNormalizer(
+-    (running_mean_std): RunningMeanStdDictInPlace(
+-      (running_mean_std): ModuleDict(
+-        (obs): RunningMeanStdInPlace()
+-      )
+-    )
+-  )
+-  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+-  (encoder): MultiInputEncoder(
+-    (encoders): ModuleDict(
+-      (obs): MlpEncoder(
+-        (mlp_head): RecursiveScriptModule(
+-          original_name=Sequential
+-          (0): RecursiveScriptModule(original_name=Linear)
+-          (1): RecursiveScriptModule(original_name=Tanh)
+-          (2): RecursiveScriptModule(original_name=Linear)
+-          (3): RecursiveScriptModule(original_name=Tanh)
+-        )
+-      )
+-    )
+-  )
+-  (core): ModelCoreIdentity()
+-  (decoder): MlpDecoder(
+-    (mlp): Identity()
+-  )
+-  (critic_linear): Linear(in_features=64, out_features=1, bias=True)
+-  (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
+-    (distribution_linear): Linear(in_features=64, out_features=17, bias=True)
+-  )
+-)
+-[2023-09-19 11:19:42,740][73219] Using GPUs [1] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:19:42,740][73219] Set environment var CUDA_VISIBLE_DEVICES to '1' (GPU indices [1]) for inference process 1
+-[2023-09-19 11:19:42,756][73145] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:19:42,756][73145] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+-[2023-09-19 11:19:42,787][73145] Num visible devices: 1
+-[2023-09-19 11:19:42,787][73219] Num visible devices: 1
+-[2023-09-19 11:19:42,797][73130] Created Actor Critic model with architecture:
+-[2023-09-19 11:19:42,797][73130] ActorCriticSharedWeights(
+-  (obs_normalizer): ObservationNormalizer(
+-    (running_mean_std): RunningMeanStdDictInPlace(
+-      (running_mean_std): ModuleDict(
+-        (obs): RunningMeanStdInPlace()
+-      )
+-    )
+-  )
+-  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+-  (encoder): MultiInputEncoder(
+-    (encoders): ModuleDict(
+-      (obs): MlpEncoder(
+-        (mlp_head): RecursiveScriptModule(
+-          original_name=Sequential
+-          (0): RecursiveScriptModule(original_name=Linear)
+-          (1): RecursiveScriptModule(original_name=Tanh)
+-          (2): RecursiveScriptModule(original_name=Linear)
+-          (3): RecursiveScriptModule(original_name=Tanh)
+-        )
+-      )
+-    )
+-  )
+-  (core): ModelCoreIdentity()
+-  (decoder): MlpDecoder(
+-    (mlp): Identity()
+-  )
+-  (critic_linear): Linear(in_features=64, out_features=1, bias=True)
+-  (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
+-    (distribution_linear): Linear(in_features=64, out_features=17, bias=True)
+-  )
+-)
+-[2023-09-19 11:19:42,798][73221] Worker 1 uses CPU cores [4, 5, 6, 7]
+-[2023-09-19 11:19:42,939][73223] Worker 3 uses CPU cores [12, 13, 14, 15]
+-[2023-09-19 11:19:43,116][73229] Worker 5 uses CPU cores [20, 21, 22, 23]
+-[2023-09-19 11:19:43,205][73218] Worker 0 uses CPU cores [0, 1, 2, 3]
+-[2023-09-19 11:19:43,373][73131] Using optimizer <class 'torch.optim.adam.Adam'>
+-[2023-09-19 11:19:43,373][73131] Loading state from checkpoint ./train_dir/Standup/checkpoint_p1/checkpoint_000002360_1208320.pth...
+-[2023-09-19 11:19:43,379][73131] Loading model from checkpoint
+-[2023-09-19 11:19:43,381][73131] Loaded experiment state at self.train_step=2360, self.env_steps=1208320
+-[2023-09-19 11:19:43,382][73131] Initialized policy 1 weights for model version 2360
+-[2023-09-19 11:19:43,383][73131] LearnerWorker_p1 finished initialization!
+-[2023-09-19 11:19:43,383][73131] Using GPUs [0] for process 1 (actually maps to GPUs [1])
+-[2023-09-19 11:19:43,409][73130] Using optimizer <class 'torch.optim.adam.Adam'>
+-[2023-09-19 11:19:43,410][73130] Loading state from checkpoint ./train_dir/Standup/checkpoint_p0/checkpoint_000002400_1228800.pth...
+-[2023-09-19 11:19:43,416][73130] Loading model from checkpoint
+-[2023-09-19 11:19:43,419][73130] Loaded experiment state at self.train_step=2400, self.env_steps=1228800
+-[2023-09-19 11:19:43,419][73130] Initialized policy 0 weights for model version 2400
+-[2023-09-19 11:19:43,427][73130] LearnerWorker_p0 finished initialization!
+-[2023-09-19 11:19:43,427][73130] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+-[2023-09-19 11:19:43,971][73219] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:19:43,971][73219] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:19:43,987][73145] RunningMeanStd input shape: (376,)
+-[2023-09-19 11:19:43,987][73145] RunningMeanStd input shape: (1,)
+-[2023-09-19 11:19:44,004][72530] Inference worker 1-0 is ready!
+-[2023-09-19 11:19:44,021][72530] Inference worker 0-0 is ready!
+-[2023-09-19 11:19:44,022][72530] All inference workers are ready! Signal rollout workers to start!
+-[2023-09-19 11:19:44,118][73223] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,119][73223] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,120][73229] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,121][73229] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,122][73221] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,123][73221] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,128][73226] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,128][73226] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,132][73220] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,133][73220] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,141][73222] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,142][73222] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,157][73224] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,158][73224] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,161][73218] Decorrelating experience for 0 frames...
+-[2023-09-19 11:19:44,162][73218] Decorrelating experience for 64 frames...
+-[2023-09-19 11:19:44,168][73223] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,171][73229] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,175][73221] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,181][73226] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,199][73220] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,200][73222] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,222][73224] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,243][73218] Decorrelating experience for 128 frames...
+-[2023-09-19 11:19:44,273][73223] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,277][73229] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,281][73221] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,284][73226] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,299][73222] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,304][73220] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,319][73224] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,368][73218] Decorrelating experience for 192 frames...
+-[2023-09-19 11:19:44,444][73223] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,447][73229] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,457][73226] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,462][73221] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,476][73222] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,492][73220] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,499][73224] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,575][73218] Decorrelating experience for 256 frames...
+-[2023-09-19 11:19:44,651][73223] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,657][73229] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,666][73226] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,681][73221] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,689][73222] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,707][73220] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,714][73224] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,803][73218] Decorrelating experience for 320 frames...
+-[2023-09-19 11:19:44,907][73223] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:44,915][73229] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:44,916][73226] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:44,933][73221] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:44,939][73222] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:44,969][73224] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:45,006][73220] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:45,067][73218] Decorrelating experience for 384 frames...
+-[2023-09-19 11:19:45,209][73223] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,221][73226] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,227][73229] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,253][73222] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,261][73221] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,302][73224] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,325][73220] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:45,392][73218] Decorrelating experience for 448 frames...
+-[2023-09-19 11:19:47,043][72530] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 2437120. Throughput: 0: nan, 1: nan. Samples: 5818. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+-[2023-09-19 11:19:52,043][72530] Fps is (10 sec: 3276.7, 60 sec: 3276.7, 300 sec: 3276.7). Total num frames: 2453504. Throughput: 0: 1049.2, 1: 1064.0. Samples: 16384. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:19:52,340][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002424_1241088.pth...
+-[2023-09-19 11:19:52,343][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002384_1220608.pth...
+-[2023-09-19 11:19:52,345][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002392_1224704.pth
+-[2023-09-19 11:19:52,348][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002352_1204224.pth
+-[2023-09-19 11:19:57,043][72530] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 2478080. Throughput: 0: 1910.2, 1: 1911.2. Samples: 44032. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:19:57,045][72530] Avg episode reward: [(0, '67743.941'), (1, '43579.059')]
+-[2023-09-19 11:20:00,740][72530] Heartbeat connected on Batcher_0
+-[2023-09-19 11:20:00,743][72530] Heartbeat connected on LearnerWorker_p0
+-[2023-09-19 11:20:00,746][72530] Heartbeat connected on Batcher_1
+-[2023-09-19 11:20:00,749][72530] Heartbeat connected on LearnerWorker_p1
+-[2023-09-19 11:20:00,755][72530] Heartbeat connected on InferenceWorker_p0-w0
+-[2023-09-19 11:20:00,760][72530] Heartbeat connected on InferenceWorker_p1-w0
+-[2023-09-19 11:20:00,761][72530] Heartbeat connected on RolloutWorker_w0
+-[2023-09-19 11:20:00,767][72530] Heartbeat connected on RolloutWorker_w2
+-[2023-09-19 11:20:00,769][72530] Heartbeat connected on RolloutWorker_w3
+-[2023-09-19 11:20:00,771][72530] Heartbeat connected on RolloutWorker_w1
+-[2023-09-19 11:20:00,772][72530] Heartbeat connected on RolloutWorker_w4
+-[2023-09-19 11:20:00,779][72530] Heartbeat connected on RolloutWorker_w6
+-[2023-09-19 11:20:00,782][72530] Heartbeat connected on RolloutWorker_w7
+-[2023-09-19 11:20:00,783][72530] Heartbeat connected on RolloutWorker_w5
+-[2023-09-19 11:20:02,043][72530] Fps is (10 sec: 6144.1, 60 sec: 5188.3, 300 sec: 5188.3). Total num frames: 2514944. Throughput: 0: 1864.9, 1: 1865.5. Samples: 61774. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:20:02,043][72530] Avg episode reward: [(0, '99962.574'), (1, '74154.644')]
+-[2023-09-19 11:20:02,049][73145] Updated weights for policy 0, policy_version 2480 (0.0015)
+-[2023-09-19 11:20:02,049][73219] Updated weights for policy 1, policy_version 2440 (0.0013)
+-[2023-09-19 11:20:07,043][72530] Fps is (10 sec: 7372.7, 60 sec: 5734.3, 300 sec: 5734.3). Total num frames: 2551808. Throughput: 0: 2437.4, 1: 2437.7. Samples: 103320. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:20:07,045][72530] Avg episode reward: [(0, '117948.527'), (1, '103367.832')]
+-[2023-09-19 11:20:07,048][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002512_1286144.pth...
+-[2023-09-19 11:20:07,048][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002472_1265664.pth...
+-[2023-09-19 11:20:07,055][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002400_1228800.pth
+-[2023-09-19 11:20:07,057][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002360_1208320.pth
+-[2023-09-19 11:20:12,043][72530] Fps is (10 sec: 6963.0, 60 sec: 5898.2, 300 sec: 5898.2). Total num frames: 2584576. Throughput: 0: 2806.0, 1: 2806.1. Samples: 146122. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:20:12,045][72530] Avg episode reward: [(0, '124390.108'), (1, '110149.801')]
+-[2023-09-19 11:20:13,613][73219] Updated weights for policy 1, policy_version 2520 (0.0012)
+-[2023-09-19 11:20:13,613][73145] Updated weights for policy 0, policy_version 2560 (0.0014)
+-[2023-09-19 11:20:17,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6007.5, 300 sec: 6007.5). Total num frames: 2617344. Throughput: 0: 3030.0, 1: 3030.3. Samples: 187626. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:20:17,047][72530] Avg episode reward: [(0, '133247.381'), (1, '124425.608')]
+-[2023-09-19 11:20:22,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6085.4, 300 sec: 6085.4). Total num frames: 2650112. Throughput: 0: 2890.3, 1: 2890.4. Samples: 208144. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:20:22,044][72530] Avg episode reward: [(0, '134256.432'), (1, '126597.151')]
+-[2023-09-19 11:20:22,047][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002568_1314816.pth...
+-[2023-09-19 11:20:22,049][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002608_1335296.pth...
+-[2023-09-19 11:20:22,056][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002384_1220608.pth
+-[2023-09-19 11:20:22,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002424_1241088.pth
+-[2023-09-19 11:20:26,328][73219] Updated weights for policy 1, policy_version 2600 (0.0014)
+-[2023-09-19 11:20:26,329][73145] Updated weights for policy 0, policy_version 2640 (0.0014)
+-[2023-09-19 11:20:27,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6144.0). Total num frames: 2682880. Throughput: 0: 2979.6, 1: 2979.8. Samples: 244194. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:20:27,044][72530] Avg episode reward: [(0, '152920.116'), (1, '149441.210')]
+-[2023-09-19 11:20:32,043][72530] Fps is (10 sec: 6553.8, 60 sec: 6189.5, 300 sec: 6189.5). Total num frames: 2715648. Throughput: 0: 3119.2, 1: 3119.5. Samples: 286558. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:20:32,044][72530] Avg episode reward: [(0, '154007.595'), (1, '152818.612')]
+-[2023-09-19 11:20:37,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6225.9, 300 sec: 6225.9). Total num frames: 2748416. Throughput: 0: 3203.5, 1: 3202.2. Samples: 304640. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:20:37,044][72530] Avg episode reward: [(0, '152855.520'), (1, '157930.094')]
+-[2023-09-19 11:20:37,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002664_1363968.pth...
+-[2023-09-19 11:20:37,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002704_1384448.pth...
+-[2023-09-19 11:20:37,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002472_1265664.pth
+-[2023-09-19 11:20:37,059][73131] Saving new best policy, reward=157930.094!
+-[2023-09-19 11:20:37,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002512_1286144.pth
+-[2023-09-19 11:20:39,045][73219] Updated weights for policy 1, policy_version 2680 (0.0013)
+-[2023-09-19 11:20:39,046][73145] Updated weights for policy 0, policy_version 2720 (0.0015)
+-[2023-09-19 11:20:42,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6255.7, 300 sec: 6255.7). Total num frames: 2781184. Throughput: 0: 3306.8, 1: 3306.8. Samples: 341646. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:20:42,044][72530] Avg episode reward: [(0, '151664.402'), (1, '158564.220')]
+-[2023-09-19 11:20:42,045][73131] Saving new best policy, reward=158564.220!
+-[2023-09-19 11:20:47,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6280.5). Total num frames: 2813952. Throughput: 0: 3562.3, 1: 3562.2. Samples: 382380. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:20:47,044][72530] Avg episode reward: [(0, '145105.303'), (1, '155856.908')]
+-[2023-09-19 11:20:51,525][73145] Updated weights for policy 0, policy_version 2800 (0.0016)
+-[2023-09-19 11:20:51,526][73219] Updated weights for policy 1, policy_version 2760 (0.0014)
+-[2023-09-19 11:20:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6301.5). Total num frames: 2846720. Throughput: 0: 3312.4, 1: 3313.6. Samples: 401490. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:20:52,044][72530] Avg episode reward: [(0, '144121.200'), (1, '155856.908')]
+-[2023-09-19 11:20:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002800_1433600.pth...
+-[2023-09-19 11:20:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002760_1413120.pth...
+-[2023-09-19 11:20:52,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002608_1335296.pth
+-[2023-09-19 11:20:52,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002568_1314816.pth
+-[2023-09-19 11:20:57,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6319.5). Total num frames: 2879488. Throughput: 0: 3240.4, 1: 3240.4. Samples: 437758. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:20:57,044][72530] Avg episode reward: [(0, '142988.097'), (1, '155707.131')]
+-[2023-09-19 11:21:02,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6621.8, 300 sec: 6335.1). Total num frames: 2912256. Throughput: 0: 3233.5, 1: 3233.3. Samples: 478634. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:21:02,045][72530] Avg episode reward: [(0, '144052.838'), (1, '155707.131')]
+-[2023-09-19 11:21:04,172][73219] Updated weights for policy 1, policy_version 2840 (0.0013)
+-[2023-09-19 11:21:04,172][73145] Updated weights for policy 0, policy_version 2880 (0.0010)
+-[2023-09-19 11:21:07,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6348.8). Total num frames: 2945024. Throughput: 0: 3220.4, 1: 3220.5. Samples: 497982. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:21:07,044][72530] Avg episode reward: [(0, '145667.627'), (1, '156028.379')]
+-[2023-09-19 11:21:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002856_1462272.pth...
+-[2023-09-19 11:21:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002896_1482752.pth...
+-[2023-09-19 11:21:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002664_1363968.pth
+-[2023-09-19 11:21:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002704_1384448.pth
+-[2023-09-19 11:21:12,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6360.8). Total num frames: 2977792. Throughput: 0: 3292.6, 1: 3293.6. Samples: 540574. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:21:12,045][72530] Avg episode reward: [(0, '147376.857'), (1, '156100.448')]
+-[2023-09-19 11:21:16,935][73219] Updated weights for policy 1, policy_version 2920 (0.0012)
+-[2023-09-19 11:21:16,935][73145] Updated weights for policy 0, policy_version 2960 (0.0013)
+-[2023-09-19 11:21:17,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6371.5). Total num frames: 3010560. Throughput: 0: 3192.5, 1: 3192.8. Samples: 573896. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:21:17,044][72530] Avg episode reward: [(0, '153646.249'), (1, '157825.450')]
+-[2023-09-19 11:21:22,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6417.1, 300 sec: 6294.9). Total num frames: 3035136. Throughput: 0: 3218.0, 1: 3217.9. Samples: 594256. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:21:22,044][72530] Avg episode reward: [(0, '156927.936'), (1, '157825.450')]
+-[2023-09-19 11:21:22,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002944_1507328.pth...
+-[2023-09-19 11:21:22,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002984_1527808.pth...
+-[2023-09-19 11:21:22,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002800_1433600.pth
+-[2023-09-19 11:21:22,060][73130] Saving new best policy, reward=156927.936!
+-[2023-09-19 11:21:22,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002760_1413120.pth
+-[2023-09-19 11:21:27,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6417.1, 300 sec: 6307.8). Total num frames: 3067904. Throughput: 0: 3210.2, 1: 3210.0. Samples: 630558. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:21:27,045][72530] Avg episode reward: [(0, '158809.152'), (1, '159261.146')]
+-[2023-09-19 11:21:27,046][73130] Saving new best policy, reward=158809.152!
+-[2023-09-19 11:21:27,046][73131] Saving new best policy, reward=159261.146!
+-[2023-09-19 11:21:29,937][73145] Updated weights for policy 0, policy_version 3040 (0.0014)
+-[2023-09-19 11:21:29,937][73219] Updated weights for policy 1, policy_version 3000 (0.0015)
+-[2023-09-19 11:21:32,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6417.0, 300 sec: 6319.5). Total num frames: 3100672. Throughput: 0: 3202.6, 1: 3202.7. Samples: 670618. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:21:32,044][72530] Avg episode reward: [(0, '158669.703'), (1, '159261.146')]
+-[2023-09-19 11:21:37,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6330.2). Total num frames: 3133440. Throughput: 0: 3194.8, 1: 3193.6. Samples: 688972. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:21:37,044][72530] Avg episode reward: [(0, '160207.000'), (1, '158900.446')]
+-[2023-09-19 11:21:37,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003040_1556480.pth...
+-[2023-09-19 11:21:37,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003080_1576960.pth...
+-[2023-09-19 11:21:37,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002856_1462272.pth
+-[2023-09-19 11:21:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002896_1482752.pth
+-[2023-09-19 11:21:37,063][73130] Saving new best policy, reward=160207.000!
+-[2023-09-19 11:21:42,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6339.9). Total num frames: 3166208. Throughput: 0: 3201.6, 1: 3201.7. Samples: 725908. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:21:42,044][72530] Avg episode reward: [(0, '160167.920'), (1, '158900.446')]
+-[2023-09-19 11:21:43,450][73219] Updated weights for policy 1, policy_version 3080 (0.0013)
+-[2023-09-19 11:21:43,451][73145] Updated weights for policy 0, policy_version 3120 (0.0018)
+-[2023-09-19 11:21:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6280.5). Total num frames: 3190784. Throughput: 0: 3112.9, 1: 3113.1. Samples: 758804. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:21:47,045][72530] Avg episode reward: [(0, '160649.680'), (1, '159123.148')]
+-[2023-09-19 11:21:47,046][73130] Saving new best policy, reward=160649.680!
+-[2023-09-19 11:21:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6291.4). Total num frames: 3223552. Throughput: 0: 3114.0, 1: 3115.4. Samples: 778308. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:21:52,044][72530] Avg episode reward: [(0, '159415.266'), (1, '160065.227')]
+-[2023-09-19 11:21:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003168_1622016.pth...
+-[2023-09-19 11:21:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003128_1601536.pth...
+-[2023-09-19 11:21:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002944_1507328.pth
+-[2023-09-19 11:21:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002984_1527808.pth
+-[2023-09-19 11:21:52,062][73131] Saving new best policy, reward=160065.227!
+-[2023-09-19 11:21:56,561][73145] Updated weights for policy 0, policy_version 3200 (0.0015)
+-[2023-09-19 11:21:56,561][73219] Updated weights for policy 1, policy_version 3160 (0.0015)
+-[2023-09-19 11:21:57,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6301.5). Total num frames: 3256320. Throughput: 0: 3061.8, 1: 3060.8. Samples: 816090. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:21:57,044][72530] Avg episode reward: [(0, '159883.769'), (1, '159603.128')]
+-[2023-09-19 11:22:02,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6310.9). Total num frames: 3289088. Throughput: 0: 3110.9, 1: 3110.3. Samples: 853850. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:22:02,044][72530] Avg episode reward: [(0, '160168.162'), (1, '158805.251')]
+-[2023-09-19 11:22:07,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6319.5). Total num frames: 3321856. Throughput: 0: 3100.6, 1: 3100.8. Samples: 873320. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:22:07,044][72530] Avg episode reward: [(0, '159368.135'), (1, '158166.073')]
+-[2023-09-19 11:22:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003224_1650688.pth...
+-[2023-09-19 11:22:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003264_1671168.pth...
+-[2023-09-19 11:22:07,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003040_1556480.pth
+-[2023-09-19 11:22:07,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003080_1576960.pth
+-[2023-09-19 11:22:09,339][73145] Updated weights for policy 0, policy_version 3280 (0.0016)
+-[2023-09-19 11:22:09,339][73219] Updated weights for policy 1, policy_version 3240 (0.0013)
+-[2023-09-19 11:22:12,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6327.6). Total num frames: 3354624. Throughput: 0: 3134.1, 1: 3134.3. Samples: 912634. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:12,044][72530] Avg episode reward: [(0, '154750.825'), (1, '157929.657')]
+-[2023-09-19 11:22:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6280.5). Total num frames: 3379200. Throughput: 0: 3098.0, 1: 3098.2. Samples: 949448. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:17,045][72530] Avg episode reward: [(0, '152601.903'), (1, '153832.123')]
+-[2023-09-19 11:22:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6289.3). Total num frames: 3411968. Throughput: 0: 3112.9, 1: 3113.0. Samples: 969138. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:22:22,044][72530] Avg episode reward: [(0, '152992.401'), (1, '152373.243')]
+-[2023-09-19 11:22:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003312_1695744.pth...
+-[2023-09-19 11:22:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003352_1716224.pth...
+-[2023-09-19 11:22:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003128_1601536.pth
+-[2023-09-19 11:22:22,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003168_1622016.pth
+-[2023-09-19 11:22:22,748][73219] Updated weights for policy 1, policy_version 3320 (0.0013)
+-[2023-09-19 11:22:22,749][73145] Updated weights for policy 0, policy_version 3360 (0.0014)
+-[2023-09-19 11:22:27,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6297.6). Total num frames: 3444736. Throughput: 0: 3104.4, 1: 3104.3. Samples: 1005296. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:27,044][72530] Avg episode reward: [(0, '152409.297'), (1, '152400.063')]
+-[2023-09-19 11:22:32,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6305.4). Total num frames: 3477504. Throughput: 0: 3183.3, 1: 3183.3. Samples: 1045302. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:22:32,044][72530] Avg episode reward: [(0, '150571.798'), (1, '152330.295')]
+-[2023-09-19 11:22:35,267][73145] Updated weights for policy 0, policy_version 3440 (0.0013)
+-[2023-09-19 11:22:35,268][73219] Updated weights for policy 1, policy_version 3400 (0.0014)
+-[2023-09-19 11:22:37,043][72530] Fps is (10 sec: 6553.4, 60 sec: 6280.5, 300 sec: 6312.6). Total num frames: 3510272. Throughput: 0: 3180.8, 1: 3180.2. Samples: 1064554. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:37,044][72530] Avg episode reward: [(0, '150024.495'), (1, '153833.309')]
+-[2023-09-19 11:22:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003408_1744896.pth...
+-[2023-09-19 11:22:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003448_1765376.pth...
+-[2023-09-19 11:22:37,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003264_1671168.pth
+-[2023-09-19 11:22:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003224_1650688.pth
+-[2023-09-19 11:22:42,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6319.5). Total num frames: 3543040. Throughput: 0: 3161.4, 1: 3161.5. Samples: 1100622. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:42,044][72530] Avg episode reward: [(0, '153738.816'), (1, '154391.331')]
+-[2023-09-19 11:22:47,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6280.5, 300 sec: 6280.5). Total num frames: 3567616. Throughput: 0: 3163.8, 1: 3165.1. Samples: 1138652. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:47,044][72530] Avg episode reward: [(0, '154735.807'), (1, '155587.330')]
+-[2023-09-19 11:22:48,507][73145] Updated weights for policy 0, policy_version 3520 (0.0012)
+-[2023-09-19 11:22:48,508][73219] Updated weights for policy 1, policy_version 3480 (0.0013)
+-[2023-09-19 11:22:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6287.9). Total num frames: 3600384. Throughput: 0: 3151.2, 1: 3151.2. Samples: 1156926. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:22:52,044][72530] Avg episode reward: [(0, '155558.003'), (1, '158444.382')]
+-[2023-09-19 11:22:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003496_1789952.pth...
+-[2023-09-19 11:22:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003536_1810432.pth...
+-[2023-09-19 11:22:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003312_1695744.pth
+-[2023-09-19 11:22:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003352_1716224.pth
+-[2023-09-19 11:22:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6251.8). Total num frames: 3624960. Throughput: 0: 3076.0, 1: 3076.3. Samples: 1189490. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:22:57,044][72530] Avg episode reward: [(0, '156213.048'), (1, '159905.417')]
+-[2023-09-19 11:23:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6144.0, 300 sec: 6259.5). Total num frames: 3657728. Throughput: 0: 3051.2, 1: 3051.2. Samples: 1224052. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:02,044][72530] Avg episode reward: [(0, '154952.583'), (1, '160034.837')]
+-[2023-09-19 11:23:03,139][73219] Updated weights for policy 1, policy_version 3560 (0.0011)
+-[2023-09-19 11:23:03,140][73145] Updated weights for policy 0, policy_version 3600 (0.0014)
+-[2023-09-19 11:23:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6225.9). Total num frames: 3682304. Throughput: 0: 3014.6, 1: 3014.8. Samples: 1240462. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:23:07,045][72530] Avg episode reward: [(0, '154952.583'), (1, '160050.141')]
+-[2023-09-19 11:23:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003576_1830912.pth...
+-[2023-09-19 11:23:07,057][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003616_1851392.pth...
+-[2023-09-19 11:23:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003448_1765376.pth
+-[2023-09-19 11:23:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003408_1744896.pth
+-[2023-09-19 11:23:12,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.5, 300 sec: 6233.9). Total num frames: 3715072. Throughput: 0: 3031.9, 1: 3032.3. Samples: 1278182. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:12,045][72530] Avg episode reward: [(0, '151313.141'), (1, '159602.087')]
+-[2023-09-19 11:23:16,373][73219] Updated weights for policy 1, policy_version 3640 (0.0012)
+-[2023-09-19 11:23:16,374][73145] Updated weights for policy 0, policy_version 3680 (0.0011)
+-[2023-09-19 11:23:17,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6144.0, 300 sec: 6241.5). Total num frames: 3747840. Throughput: 0: 2998.0, 1: 2998.0. Samples: 1315120. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:17,044][72530] Avg episode reward: [(0, '151313.141'), (1, '159523.019')]
+-[2023-09-19 11:23:22,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6144.0, 300 sec: 6248.8). Total num frames: 3780608. Throughput: 0: 3006.6, 1: 3007.0. Samples: 1335164. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:23:22,044][72530] Avg episode reward: [(0, '152570.175'), (1, '158236.314')]
+-[2023-09-19 11:23:22,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003712_1900544.pth...
+-[2023-09-19 11:23:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003672_1880064.pth...
+-[2023-09-19 11:23:22,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003536_1810432.pth
+-[2023-09-19 11:23:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003496_1789952.pth
+-[2023-09-19 11:23:27,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6255.7). Total num frames: 3813376. Throughput: 0: 2986.7, 1: 2986.5. Samples: 1369418. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:27,045][72530] Avg episode reward: [(0, '152570.175'), (1, '158236.314')]
+-[2023-09-19 11:23:29,330][73219] Updated weights for policy 1, policy_version 3720 (0.0013)
+-[2023-09-19 11:23:29,331][73145] Updated weights for policy 0, policy_version 3760 (0.0014)
+-[2023-09-19 11:23:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6225.9). Total num frames: 3837952. Throughput: 0: 2803.1, 1: 2802.0. Samples: 1390884. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:23:32,044][72530] Avg episode reward: [(0, '152939.553'), (1, '157598.636')]
+-[2023-09-19 11:23:37,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6007.5, 300 sec: 6233.0). Total num frames: 3870720. Throughput: 0: 3009.5, 1: 3009.5. Samples: 1427778. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:23:37,044][72530] Avg episode reward: [(0, '152939.553'), (1, '157598.636')]
+-[2023-09-19 11:23:37,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003800_1945600.pth...
+-[2023-09-19 11:23:37,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003760_1925120.pth...
+-[2023-09-19 11:23:37,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003616_1851392.pth
+-[2023-09-19 11:23:37,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003576_1830912.pth
+-[2023-09-19 11:23:42,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6007.5, 300 sec: 6239.9). Total num frames: 3903488. Throughput: 0: 3062.4, 1: 3061.9. Samples: 1465082. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:42,044][72530] Avg episode reward: [(0, '155045.334'), (1, '157877.174')]
+-[2023-09-19 11:23:42,591][73219] Updated weights for policy 1, policy_version 3800 (0.0013)
+-[2023-09-19 11:23:42,592][73145] Updated weights for policy 0, policy_version 3840 (0.0013)
+-[2023-09-19 11:23:47,043][72530] Fps is (10 sec: 5734.2, 60 sec: 6007.4, 300 sec: 6212.3). Total num frames: 3928064. Throughput: 0: 3055.4, 1: 3056.6. Samples: 1499094. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:47,044][72530] Avg episode reward: [(0, '155518.647'), (1, '157877.174')]
+-[2023-09-19 11:23:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6219.2). Total num frames: 3960832. Throughput: 0: 3090.5, 1: 3090.4. Samples: 1518604. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:23:52,044][72530] Avg episode reward: [(0, '156419.731'), (1, '159794.628')]
+-[2023-09-19 11:23:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003848_1970176.pth...
+-[2023-09-19 11:23:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003888_1990656.pth...
+-[2023-09-19 11:23:52,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003672_1880064.pth
+-[2023-09-19 11:23:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003712_1900544.pth
+-[2023-09-19 11:23:56,865][73219] Updated weights for policy 1, policy_version 3880 (0.0012)
+-[2023-09-19 11:23:56,866][73145] Updated weights for policy 0, policy_version 3920 (0.0014)
+-[2023-09-19 11:23:57,043][72530] Fps is (10 sec: 6553.8, 60 sec: 6144.0, 300 sec: 6225.9). Total num frames: 3993600. Throughput: 0: 3037.8, 1: 3037.7. Samples: 1551576. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:23:57,044][72530] Avg episode reward: [(0, '155910.674'), (1, '159794.628')]
+-[2023-09-19 11:24:02,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.4, 300 sec: 6200.2). Total num frames: 4018176. Throughput: 0: 2986.6, 1: 2986.6. Samples: 1583914. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:02,045][72530] Avg episode reward: [(0, '155254.236'), (1, '160650.698')]
+-[2023-09-19 11:24:02,046][73131] Saving new best policy, reward=160650.698!
+-[2023-09-19 11:24:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6207.0). Total num frames: 4050944. Throughput: 0: 2980.8, 1: 2979.8. Samples: 1603390. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:07,044][72530] Avg episode reward: [(0, '155932.263'), (1, '160981.958')]
+-[2023-09-19 11:24:07,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000003936_2015232.pth...
+-[2023-09-19 11:24:07,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000003976_2035712.pth...
+-[2023-09-19 11:24:07,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003800_1945600.pth
+-[2023-09-19 11:24:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003760_1925120.pth
+-[2023-09-19 11:24:07,062][73131] Saving new best policy, reward=160981.958!
+-[2023-09-19 11:24:10,788][73145] Updated weights for policy 0, policy_version 4000 (0.0014)
+-[2023-09-19 11:24:10,788][73219] Updated weights for policy 1, policy_version 3960 (0.0013)
+-[2023-09-19 11:24:12,043][72530] Fps is (10 sec: 5734.6, 60 sec: 6007.5, 300 sec: 6182.6). Total num frames: 4075520. Throughput: 0: 3006.8, 1: 3006.8. Samples: 1640028. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:12,044][72530] Avg episode reward: [(0, '156741.192'), (1, '161232.794')]
+-[2023-09-19 11:24:12,051][73131] Saving new best policy, reward=161232.794!
+-[2023-09-19 11:24:17,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6144.0, 300 sec: 6219.9). Total num frames: 4116480. Throughput: 0: 3213.3, 1: 3213.3. Samples: 1680080. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:17,044][72530] Avg episode reward: [(0, '158187.015'), (1, '161349.690')]
+-[2023-09-19 11:24:17,045][73131] Saving new best policy, reward=161349.690!
+-[2023-09-19 11:24:22,043][72530] Fps is (10 sec: 7372.6, 60 sec: 6144.0, 300 sec: 6225.9). Total num frames: 4149248. Throughput: 0: 3037.6, 1: 3037.4. Samples: 1701154. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:22,044][72530] Avg episode reward: [(0, '158725.033'), (1, '161643.895')]
+-[2023-09-19 11:24:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004032_2064384.pth...
+-[2023-09-19 11:24:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004072_2084864.pth...
+-[2023-09-19 11:24:22,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003848_1970176.pth
+-[2023-09-19 11:24:22,061][73131] Saving new best policy, reward=161643.895!
+-[2023-09-19 11:24:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003888_1990656.pth
+-[2023-09-19 11:24:23,177][73219] Updated weights for policy 1, policy_version 4040 (0.0014)
+-[2023-09-19 11:24:23,178][73145] Updated weights for policy 0, policy_version 4080 (0.0016)
+-[2023-09-19 11:24:27,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.5, 300 sec: 6202.5). Total num frames: 4173824. Throughput: 0: 3023.4, 1: 3023.4. Samples: 1737188. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:27,044][72530] Avg episode reward: [(0, '155903.476'), (1, '161815.854')]
+-[2023-09-19 11:24:27,045][73131] Saving new best policy, reward=161815.854!
+-[2023-09-19 11:24:32,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6237.4). Total num frames: 4214784. Throughput: 0: 3098.6, 1: 3097.1. Samples: 1777900. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:24:32,044][72530] Avg episode reward: [(0, '154152.668'), (1, '161862.881')]
+-[2023-09-19 11:24:32,045][73131] Saving new best policy, reward=161862.881!
+-[2023-09-19 11:24:36,067][73145] Updated weights for policy 0, policy_version 4160 (0.0013)
+-[2023-09-19 11:24:36,069][73219] Updated weights for policy 1, policy_version 4120 (0.0014)
+-[2023-09-19 11:24:37,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6214.6). Total num frames: 4239360. Throughput: 0: 3068.9, 1: 3068.7. Samples: 1794800. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:24:37,044][72530] Avg episode reward: [(0, '151755.204'), (1, '161941.149')]
+-[2023-09-19 11:24:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004160_2129920.pth...
+-[2023-09-19 11:24:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004120_2109440.pth...
+-[2023-09-19 11:24:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000003976_2035712.pth
+-[2023-09-19 11:24:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000003936_2015232.pth
+-[2023-09-19 11:24:37,065][73131] Saving new best policy, reward=161941.149!
+-[2023-09-19 11:24:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6220.4). Total num frames: 4272128. Throughput: 0: 3155.4, 1: 3155.3. Samples: 1835558. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:24:42,044][72530] Avg episode reward: [(0, '150680.731'), (1, '161901.771')]
+-[2023-09-19 11:24:47,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.6, 300 sec: 6275.9). Total num frames: 4304896. Throughput: 0: 3207.8, 1: 3207.7. Samples: 1872614. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:47,044][72530] Avg episode reward: [(0, '148141.874'), (1, '161823.095')]
+-[2023-09-19 11:24:48,828][73219] Updated weights for policy 1, policy_version 4200 (0.0010)
+-[2023-09-19 11:24:48,829][73145] Updated weights for policy 0, policy_version 4240 (0.0014)
+-[2023-09-19 11:24:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6303.7). Total num frames: 4337664. Throughput: 0: 3210.0, 1: 3211.3. Samples: 1892352. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:52,044][72530] Avg episode reward: [(0, '144621.889'), (1, '160527.319')]
+-[2023-09-19 11:24:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004216_2158592.pth...
+-[2023-09-19 11:24:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004256_2179072.pth...
+-[2023-09-19 11:24:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004032_2064384.pth
+-[2023-09-19 11:24:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004072_2084864.pth
+-[2023-09-19 11:24:57,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6289.8). Total num frames: 4370432. Throughput: 0: 3240.7, 1: 3240.7. Samples: 1931690. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:24:57,044][72530] Avg episode reward: [(0, '144800.686'), (1, '160295.845')]
+-[2023-09-19 11:25:01,923][73219] Updated weights for policy 1, policy_version 4280 (0.0013)
+-[2023-09-19 11:25:01,923][73145] Updated weights for policy 0, policy_version 4320 (0.0013)
+-[2023-09-19 11:25:02,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6275.9). Total num frames: 4403200. Throughput: 0: 3184.6, 1: 3184.7. Samples: 1966700. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:02,044][72530] Avg episode reward: [(0, '145084.319'), (1, '160219.957')]
+-[2023-09-19 11:25:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 4427776. Throughput: 0: 3125.7, 1: 3127.1. Samples: 1982528. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:07,045][72530] Avg episode reward: [(0, '144470.225'), (1, '159962.850')]
+-[2023-09-19 11:25:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004304_2203648.pth...
+-[2023-09-19 11:25:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004344_2224128.pth...
+-[2023-09-19 11:25:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004120_2109440.pth
+-[2023-09-19 11:25:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004160_2129920.pth
+-[2023-09-19 11:25:12,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4460544. Throughput: 0: 3189.5, 1: 3189.4. Samples: 2024240. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:25:12,044][72530] Avg episode reward: [(0, '144911.603'), (1, '159940.569')]
+-[2023-09-19 11:25:14,575][73145] Updated weights for policy 0, policy_version 4400 (0.0013)
+-[2023-09-19 11:25:14,577][73219] Updated weights for policy 1, policy_version 4360 (0.0014)
+-[2023-09-19 11:25:17,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 4493312. Throughput: 0: 3180.8, 1: 3181.4. Samples: 2064198. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:17,044][72530] Avg episode reward: [(0, '140838.640'), (1, '160023.760')]
+-[2023-09-19 11:25:22,043][72530] Fps is (10 sec: 7372.7, 60 sec: 6417.1, 300 sec: 6275.9). Total num frames: 4534272. Throughput: 0: 3210.7, 1: 3210.9. Samples: 2083770. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:22,044][72530] Avg episode reward: [(0, '145736.624'), (1, '161193.551')]
+-[2023-09-19 11:25:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004408_2256896.pth...
+-[2023-09-19 11:25:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004448_2277376.pth...
+-[2023-09-19 11:25:22,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004256_2179072.pth
+-[2023-09-19 11:25:22,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004216_2158592.pth
+-[2023-09-19 11:25:27,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4558848. Throughput: 0: 3177.1, 1: 3178.3. Samples: 2121554. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:27,044][72530] Avg episode reward: [(0, '146210.154'), (1, '161328.801')]
+-[2023-09-19 11:25:27,759][73219] Updated weights for policy 1, policy_version 4440 (0.0013)
+-[2023-09-19 11:25:27,759][73145] Updated weights for policy 0, policy_version 4480 (0.0013)
+-[2023-09-19 11:25:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 4591616. Throughput: 0: 3156.1, 1: 3156.0. Samples: 2156656. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:32,044][72530] Avg episode reward: [(0, '148003.467'), (1, '161381.807')]
+-[2023-09-19 11:25:37,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4624384. Throughput: 0: 3173.7, 1: 3172.3. Samples: 2177924. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:37,044][72530] Avg episode reward: [(0, '152496.039'), (1, '161644.736')]
+-[2023-09-19 11:25:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004496_2301952.pth...
+-[2023-09-19 11:25:37,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004536_2322432.pth...
+-[2023-09-19 11:25:37,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004304_2203648.pth
+-[2023-09-19 11:25:37,071][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004344_2224128.pth
+-[2023-09-19 11:25:40,045][73219] Updated weights for policy 1, policy_version 4520 (0.0014)
+-[2023-09-19 11:25:40,045][73145] Updated weights for policy 0, policy_version 4560 (0.0014)
+-[2023-09-19 11:25:42,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4657152. Throughput: 0: 3196.5, 1: 3196.4. Samples: 2219366. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:25:42,044][72530] Avg episode reward: [(0, '151440.031'), (1, '161613.003')]
+-[2023-09-19 11:25:47,043][72530] Fps is (10 sec: 6553.8, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4689920. Throughput: 0: 3225.5, 1: 3225.4. Samples: 2256988. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:47,044][72530] Avg episode reward: [(0, '155863.155'), (1, '161674.332')]
+-[2023-09-19 11:25:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4722688. Throughput: 0: 3263.8, 1: 3262.1. Samples: 2276194. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:52,044][72530] Avg episode reward: [(0, '154315.726'), (1, '161202.405')]
+-[2023-09-19 11:25:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004592_2351104.pth...
+-[2023-09-19 11:25:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004632_2371584.pth...
+-[2023-09-19 11:25:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004408_2256896.pth
+-[2023-09-19 11:25:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004448_2277376.pth
+-[2023-09-19 11:25:52,782][73219] Updated weights for policy 1, policy_version 4600 (0.0014)
+-[2023-09-19 11:25:52,782][73145] Updated weights for policy 0, policy_version 4640 (0.0014)
+-[2023-09-19 11:25:57,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4755456. Throughput: 0: 3211.2, 1: 3211.4. Samples: 2313256. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:25:57,044][72530] Avg episode reward: [(0, '150010.060'), (1, '160917.790')]
+-[2023-09-19 11:26:02,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 4788224. Throughput: 0: 3193.2, 1: 3192.7. Samples: 2351560. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:26:02,045][72530] Avg episode reward: [(0, '150263.831'), (1, '161029.917')]
+-[2023-09-19 11:26:05,903][73219] Updated weights for policy 1, policy_version 4680 (0.0013)
+-[2023-09-19 11:26:05,903][73145] Updated weights for policy 0, policy_version 4720 (0.0012)
+-[2023-09-19 11:26:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6417.1, 300 sec: 6220.4). Total num frames: 4812800. Throughput: 0: 3179.7, 1: 3179.7. Samples: 2369942. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:26:07,044][72530] Avg episode reward: [(0, '146564.474'), (1, '161044.989')]
+-[2023-09-19 11:26:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004680_2396160.pth...
+-[2023-09-19 11:26:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004720_2416640.pth...
+-[2023-09-19 11:26:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004536_2322432.pth
+-[2023-09-19 11:26:07,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004496_2301952.pth
+-[2023-09-19 11:26:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6417.0, 300 sec: 6220.4). Total num frames: 4845568. Throughput: 0: 3166.9, 1: 3166.1. Samples: 2406540. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:26:12,045][72530] Avg episode reward: [(0, '146583.424'), (1, '161100.378')]
+-[2023-09-19 11:26:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 4870144. Throughput: 0: 3143.0, 1: 3143.1. Samples: 2439528. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:26:17,044][72530] Avg episode reward: [(0, '141842.532'), (1, '161215.328')]
+-[2023-09-19 11:26:19,884][73219] Updated weights for policy 1, policy_version 4760 (0.0016)
+-[2023-09-19 11:26:19,884][73145] Updated weights for policy 0, policy_version 4800 (0.0015)
+-[2023-09-19 11:26:22,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6144.0, 300 sec: 6220.4). Total num frames: 4902912. Throughput: 0: 3118.8, 1: 3118.8. Samples: 2458616. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:26:22,044][72530] Avg episode reward: [(0, '142452.610'), (1, '161748.126')]
+-[2023-09-19 11:26:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004768_2441216.pth...
+-[2023-09-19 11:26:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004808_2461696.pth...
+-[2023-09-19 11:26:22,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004592_2351104.pth
+-[2023-09-19 11:26:22,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004632_2371584.pth
+-[2023-09-19 11:26:27,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 4935680. Throughput: 0: 3120.4, 1: 3120.6. Samples: 2500210. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:26:27,044][72530] Avg episode reward: [(0, '137895.579'), (1, '162103.130')]
+-[2023-09-19 11:26:27,045][73131] Saving new best policy, reward=162103.130!
+-[2023-09-19 11:26:32,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 4968448. Throughput: 0: 3138.1, 1: 3139.4. Samples: 2539476. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:26:32,044][72530] Avg episode reward: [(0, '135551.322'), (1, '162133.015')]
+-[2023-09-19 11:26:32,045][73131] Saving new best policy, reward=162133.015!
+-[2023-09-19 11:26:32,157][73219] Updated weights for policy 1, policy_version 4840 (0.0010)
+-[2023-09-19 11:26:32,158][73145] Updated weights for policy 0, policy_version 4880 (0.0012)
+-[2023-09-19 11:26:37,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 5001216. Throughput: 0: 3107.1, 1: 3108.5. Samples: 2555894. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:26:37,044][72530] Avg episode reward: [(0, '131208.771'), (1, '162213.080')]
+-[2023-09-19 11:26:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000004904_2510848.pth...
+-[2023-09-19 11:26:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004864_2490368.pth...
+-[2023-09-19 11:26:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004680_2396160.pth
+-[2023-09-19 11:26:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004720_2416640.pth
+-[2023-09-19 11:26:37,065][73131] Saving new best policy, reward=162213.080!
+-[2023-09-19 11:26:42,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5033984. Throughput: 0: 3145.9, 1: 3145.9. Samples: 2596386. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:26:42,044][72530] Avg episode reward: [(0, '130942.941'), (1, '162247.080')]
+-[2023-09-19 11:26:42,045][73131] Saving new best policy, reward=162247.080!
+-[2023-09-19 11:26:44,999][73145] Updated weights for policy 0, policy_version 4960 (0.0011)
+-[2023-09-19 11:26:44,999][73219] Updated weights for policy 1, policy_version 4920 (0.0015)
+-[2023-09-19 11:26:47,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5066752. Throughput: 0: 3150.9, 1: 3150.9. Samples: 2635140. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:26:47,044][72530] Avg episode reward: [(0, '129912.953'), (1, '162118.110')]
+-[2023-09-19 11:26:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5099520. Throughput: 0: 3158.5, 1: 3159.3. Samples: 2654242. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:26:52,044][72530] Avg episode reward: [(0, '128999.147'), (1, '162160.112')]
+-[2023-09-19 11:26:52,050][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005000_2560000.pth...
+-[2023-09-19 11:26:52,050][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000004960_2539520.pth...
+-[2023-09-19 11:26:52,057][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004808_2461696.pth
+-[2023-09-19 11:26:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004768_2441216.pth
+-[2023-09-19 11:26:57,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5132288. Throughput: 0: 3168.7, 1: 3168.5. Samples: 2691716. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:26:57,045][72530] Avg episode reward: [(0, '133141.196'), (1, '161915.344')]
+-[2023-09-19 11:26:57,786][73219] Updated weights for policy 1, policy_version 5000 (0.0015)
+-[2023-09-19 11:26:57,787][73145] Updated weights for policy 0, policy_version 5040 (0.0011)
+-[2023-09-19 11:27:02,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5165056. Throughput: 0: 3210.7, 1: 3210.9. Samples: 2728500. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:27:02,044][72530] Avg episode reward: [(0, '134217.012'), (1, '161927.050')]
+-[2023-09-19 11:27:07,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 5189632. Throughput: 0: 3215.4, 1: 3215.5. Samples: 2748004. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:27:07,044][72530] Avg episode reward: [(0, '139484.494'), (1, '161928.042')]
+-[2023-09-19 11:27:07,058][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005056_2588672.pth...
+-[2023-09-19 11:27:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004864_2490368.pth
+-[2023-09-19 11:27:07,070][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005096_2609152.pth...
+-[2023-09-19 11:27:07,073][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000004904_2510848.pth
+-[2023-09-19 11:27:10,937][73145] Updated weights for policy 0, policy_version 5120 (0.0015)
+-[2023-09-19 11:27:10,937][73219] Updated weights for policy 1, policy_version 5080 (0.0015)
+-[2023-09-19 11:27:12,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6280.6, 300 sec: 6248.1). Total num frames: 5222400. Throughput: 0: 3173.6, 1: 3173.6. Samples: 2785834. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:27:12,044][72530] Avg episode reward: [(0, '140906.115'), (1, '161939.285')]
+-[2023-09-19 11:27:17,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 5255168. Throughput: 0: 3151.1, 1: 3149.6. Samples: 2823006. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:17,045][72530] Avg episode reward: [(0, '145817.026'), (1, '161027.902')]
+-[2023-09-19 11:27:22,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 5287936. Throughput: 0: 3185.7, 1: 3185.7. Samples: 2842604. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:22,044][72530] Avg episode reward: [(0, '142531.824'), (1, '161093.623')]
+-[2023-09-19 11:27:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005184_2654208.pth...
+-[2023-09-19 11:27:22,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005144_2633728.pth...
+-[2023-09-19 11:27:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005000_2560000.pth
+-[2023-09-19 11:27:22,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000004960_2539520.pth
+-[2023-09-19 11:27:23,721][73145] Updated weights for policy 0, policy_version 5200 (0.0010)
+-[2023-09-19 11:27:23,721][73219] Updated weights for policy 1, policy_version 5160 (0.0013)
+-[2023-09-19 11:27:27,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6248.1). Total num frames: 5320704. Throughput: 0: 3188.8, 1: 3189.0. Samples: 2883390. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:27:27,044][72530] Avg episode reward: [(0, '142889.954'), (1, '161067.955')]
+-[2023-09-19 11:27:32,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.0, 300 sec: 6248.1). Total num frames: 5353472. Throughput: 0: 3127.8, 1: 3128.0. Samples: 2916652. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:32,044][72530] Avg episode reward: [(0, '142889.954'), (1, '161121.140')]
+-[2023-09-19 11:27:37,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6220.4). Total num frames: 5378048. Throughput: 0: 3133.1, 1: 3132.4. Samples: 2936188. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:37,045][72530] Avg episode reward: [(0, '142062.299'), (1, '159603.215')]
+-[2023-09-19 11:27:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005232_2678784.pth...
+-[2023-09-19 11:27:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005272_2699264.pth...
+-[2023-09-19 11:27:37,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005056_2588672.pth
+-[2023-09-19 11:27:37,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005096_2609152.pth
+-[2023-09-19 11:27:37,396][73145] Updated weights for policy 0, policy_version 5280 (0.0011)
+-[2023-09-19 11:27:37,396][73219] Updated weights for policy 1, policy_version 5240 (0.0014)
+-[2023-09-19 11:27:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5410816. Throughput: 0: 3131.2, 1: 3132.0. Samples: 2973558. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:42,045][72530] Avg episode reward: [(0, '142062.299'), (1, '159574.648')]
+-[2023-09-19 11:27:47,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6248.1). Total num frames: 5443584. Throughput: 0: 3132.0, 1: 3132.0. Samples: 3010382. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:27:47,045][72530] Avg episode reward: [(0, '143548.226'), (1, '160565.889')]
+-[2023-09-19 11:27:50,044][73145] Updated weights for policy 0, policy_version 5360 (0.0013)
+-[2023-09-19 11:27:50,045][73219] Updated weights for policy 1, policy_version 5320 (0.0012)
+-[2023-09-19 11:27:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 5476352. Throughput: 0: 3144.9, 1: 3145.5. Samples: 3031074. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:52,044][72530] Avg episode reward: [(0, '143548.226'), (1, '160563.875')]
+-[2023-09-19 11:27:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005368_2748416.pth...
+-[2023-09-19 11:27:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005328_2727936.pth...
+-[2023-09-19 11:27:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005184_2654208.pth
+-[2023-09-19 11:27:52,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005144_2633728.pth
+-[2023-09-19 11:27:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 5500928. Throughput: 0: 3089.5, 1: 3090.1. Samples: 3063918. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:27:57,045][72530] Avg episode reward: [(0, '146270.433'), (1, '160418.331')]
+-[2023-09-19 11:28:02,043][72530] Fps is (10 sec: 5734.6, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5533696. Throughput: 0: 3077.2, 1: 3077.4. Samples: 3099964. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:28:02,044][72530] Avg episode reward: [(0, '146167.425'), (1, '160418.331')]
+-[2023-09-19 11:28:04,104][73219] Updated weights for policy 1, policy_version 5400 (0.0013)
+-[2023-09-19 11:28:04,105][73145] Updated weights for policy 0, policy_version 5440 (0.0014)
+-[2023-09-19 11:28:07,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 5566464. Throughput: 0: 3076.4, 1: 3075.6. Samples: 3119446. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:07,044][72530] Avg episode reward: [(0, '146743.667'), (1, '161871.065')]
+-[2023-09-19 11:28:07,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005456_2793472.pth...
+-[2023-09-19 11:28:07,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005416_2772992.pth...
+-[2023-09-19 11:28:07,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005272_2699264.pth
+-[2023-09-19 11:28:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005232_2678784.pth
+-[2023-09-19 11:28:12,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 5599232. Throughput: 0: 3039.7, 1: 3039.5. Samples: 3156956. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:12,044][72530] Avg episode reward: [(0, '146503.770'), (1, '161818.415')]
+-[2023-09-19 11:28:17,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 5623808. Throughput: 0: 2895.2, 1: 2895.1. Samples: 3177218. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:28:17,044][72530] Avg episode reward: [(0, '146720.459'), (1, '161769.023')]
+-[2023-09-19 11:28:17,068][73145] Updated weights for policy 0, policy_version 5520 (0.0013)
+-[2023-09-19 11:28:17,069][73219] Updated weights for policy 1, policy_version 5480 (0.0012)
+-[2023-09-19 11:28:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 5656576. Throughput: 0: 3086.3, 1: 3086.2. Samples: 3213950. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:28:22,044][72530] Avg episode reward: [(0, '147474.306'), (1, '161768.790')]
+-[2023-09-19 11:28:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005504_2818048.pth...
+-[2023-09-19 11:28:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005544_2838528.pth...
+-[2023-09-19 11:28:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005328_2727936.pth
+-[2023-09-19 11:28:22,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005368_2748416.pth
+-[2023-09-19 11:28:27,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5689344. Throughput: 0: 3045.5, 1: 3044.6. Samples: 3247612. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:27,044][72530] Avg episode reward: [(0, '146171.268'), (1, '161578.478')]
+-[2023-09-19 11:28:30,639][73219] Updated weights for policy 1, policy_version 5560 (0.0007)
+-[2023-09-19 11:28:30,640][73145] Updated weights for policy 0, policy_version 5600 (0.0014)
+-[2023-09-19 11:28:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6248.1). Total num frames: 5713920. Throughput: 0: 3050.4, 1: 3051.6. Samples: 3284968. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:32,044][72530] Avg episode reward: [(0, '143678.262'), (1, '161606.329')]
+-[2023-09-19 11:28:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 5746688. Throughput: 0: 3003.7, 1: 3003.7. Samples: 3301410. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:37,044][72530] Avg episode reward: [(0, '147339.327'), (1, '161677.535')]
+-[2023-09-19 11:28:37,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005632_2883584.pth...
+-[2023-09-19 11:28:37,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005592_2863104.pth...
+-[2023-09-19 11:28:37,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005456_2793472.pth
+-[2023-09-19 11:28:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005416_2772992.pth
+-[2023-09-19 11:28:42,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5779456. Throughput: 0: 3087.8, 1: 3087.2. Samples: 3341792. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:28:42,044][72530] Avg episode reward: [(0, '149917.742'), (1, '161865.342')]
+-[2023-09-19 11:28:43,733][73145] Updated weights for policy 0, policy_version 5680 (0.0015)
+-[2023-09-19 11:28:43,734][73219] Updated weights for policy 1, policy_version 5640 (0.0014)
+-[2023-09-19 11:28:47,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5812224. Throughput: 0: 3101.5, 1: 3101.5. Samples: 3379098. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:47,044][72530] Avg episode reward: [(0, '150038.622'), (1, '161982.019')]
+-[2023-09-19 11:28:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5844992. Throughput: 0: 3141.9, 1: 3141.4. Samples: 3402194. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:28:52,044][72530] Avg episode reward: [(0, '144197.749'), (1, '162024.019')]
+-[2023-09-19 11:28:52,065][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005696_2916352.pth...
+-[2023-09-19 11:28:52,067][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005736_2936832.pth...
+-[2023-09-19 11:28:52,069][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005504_2818048.pth
+-[2023-09-19 11:28:52,072][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005544_2838528.pth
+-[2023-09-19 11:28:56,545][73145] Updated weights for policy 0, policy_version 5760 (0.0009)
+-[2023-09-19 11:28:56,546][73219] Updated weights for policy 1, policy_version 5720 (0.0014)
+-[2023-09-19 11:28:57,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6303.7). Total num frames: 5877760. Throughput: 0: 3141.5, 1: 3142.4. Samples: 3439730. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:28:57,044][72530] Avg episode reward: [(0, '138885.538'), (1, '162345.734')]
+-[2023-09-19 11:28:57,045][73131] Saving new best policy, reward=162345.734!
+-[2023-09-19 11:29:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6275.9). Total num frames: 5902336. Throughput: 0: 3289.1, 1: 3289.5. Samples: 3473254. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:02,044][72530] Avg episode reward: [(0, '134517.899'), (1, '162329.045')]
+-[2023-09-19 11:29:07,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6331.4). Total num frames: 5943296. Throughput: 0: 3101.0, 1: 3100.8. Samples: 3493030. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:07,045][72530] Avg episode reward: [(0, '128642.101'), (1, '162318.341')]
+-[2023-09-19 11:29:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005784_2961408.pth...
+-[2023-09-19 11:29:07,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005824_2981888.pth...
+-[2023-09-19 11:29:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005592_2863104.pth
+-[2023-09-19 11:29:07,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005632_2883584.pth
+-[2023-09-19 11:29:09,339][73145] Updated weights for policy 0, policy_version 5840 (0.0012)
+-[2023-09-19 11:29:09,340][73219] Updated weights for policy 1, policy_version 5800 (0.0013)
+-[2023-09-19 11:29:12,043][72530] Fps is (10 sec: 7372.7, 60 sec: 6280.5, 300 sec: 6303.7). Total num frames: 5976064. Throughput: 0: 3166.1, 1: 3166.1. Samples: 3532560. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:12,044][72530] Avg episode reward: [(0, '118954.396'), (1, '162291.376')]
+-[2023-09-19 11:29:17,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6303.7). Total num frames: 6008832. Throughput: 0: 3187.8, 1: 3186.9. Samples: 3571832. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:29:17,044][72530] Avg episode reward: [(0, '114412.470'), (1, '162295.847')]
+-[2023-09-19 11:29:21,826][73145] Updated weights for policy 0, policy_version 5920 (0.0012)
+-[2023-09-19 11:29:21,826][73219] Updated weights for policy 1, policy_version 5880 (0.0015)
+-[2023-09-19 11:29:22,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6331.4). Total num frames: 6041600. Throughput: 0: 3238.0, 1: 3237.2. Samples: 3592796. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:22,044][72530] Avg episode reward: [(0, '112785.270'), (1, '162279.161')]
+-[2023-09-19 11:29:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005880_3010560.pth...
+-[2023-09-19 11:29:22,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000005920_3031040.pth...
+-[2023-09-19 11:29:22,057][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005696_2916352.pth
+-[2023-09-19 11:29:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005736_2936832.pth
+-[2023-09-19 11:29:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 6066176. Throughput: 0: 3176.2, 1: 3176.2. Samples: 3627652. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:27,044][72530] Avg episode reward: [(0, '112913.414'), (1, '161752.422')]
+-[2023-09-19 11:29:32,053][72530] Fps is (10 sec: 5728.9, 60 sec: 6416.0, 300 sec: 6303.5). Total num frames: 6098944. Throughput: 0: 3140.7, 1: 3142.2. Samples: 3661888. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:32,055][72530] Avg episode reward: [(0, '114352.426'), (1, '160799.718')]
+-[2023-09-19 11:29:36,186][73145] Updated weights for policy 0, policy_version 6000 (0.0013)
+-[2023-09-19 11:29:36,187][73219] Updated weights for policy 1, policy_version 5960 (0.0013)
+-[2023-09-19 11:29:37,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 6123520. Throughput: 0: 3071.6, 1: 3072.0. Samples: 3678658. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:37,044][72530] Avg episode reward: [(0, '114436.532'), (1, '160786.962')]
+-[2023-09-19 11:29:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000005960_3051520.pth...
+-[2023-09-19 11:29:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006000_3072000.pth...
+-[2023-09-19 11:29:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005784_2961408.pth
+-[2023-09-19 11:29:37,067][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005824_2981888.pth
+-[2023-09-19 11:29:42,043][72530] Fps is (10 sec: 5740.0, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 6156288. Throughput: 0: 3094.3, 1: 3094.0. Samples: 3718202. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:42,044][72530] Avg episode reward: [(0, '115826.562'), (1, '160797.026')]
+-[2023-09-19 11:29:47,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6275.9). Total num frames: 6189056. Throughput: 0: 3107.9, 1: 3107.4. Samples: 3752942. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:29:47,044][72530] Avg episode reward: [(0, '116442.200'), (1, '160802.531')]
+-[2023-09-19 11:29:49,303][73219] Updated weights for policy 1, policy_version 6040 (0.0013)
+-[2023-09-19 11:29:49,304][73145] Updated weights for policy 0, policy_version 6080 (0.0015)
+-[2023-09-19 11:29:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 6213632. Throughput: 0: 3115.6, 1: 3116.0. Samples: 3773450. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:29:52,044][72530] Avg episode reward: [(0, '116782.945'), (1, '160812.704')]
+-[2023-09-19 11:29:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006048_3096576.pth...
+-[2023-09-19 11:29:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006088_3117056.pth...
+-[2023-09-19 11:29:52,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005880_3010560.pth
+-[2023-09-19 11:29:52,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000005920_3031040.pth
+-[2023-09-19 11:29:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6248.1). Total num frames: 6246400. Throughput: 0: 3059.6, 1: 3059.6. Samples: 3807922. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:29:57,044][72530] Avg episode reward: [(0, '116782.945'), (1, '160801.177')]
+-[2023-09-19 11:30:02,043][72530] Fps is (10 sec: 6144.0, 60 sec: 6212.2, 300 sec: 6262.0). Total num frames: 6275072. Throughput: 0: 2823.3, 1: 2823.1. Samples: 3825920. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:02,045][72530] Avg episode reward: [(0, '113033.870'), (1, '161341.845')]
+-[2023-09-19 11:30:03,323][73145] Updated weights for policy 0, policy_version 6160 (0.0013)
+-[2023-09-19 11:30:03,323][73219] Updated weights for policy 1, policy_version 6120 (0.0015)
+-[2023-09-19 11:30:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.5, 300 sec: 6248.1). Total num frames: 6303744. Throughput: 0: 2978.1, 1: 2978.2. Samples: 3860830. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:07,045][72530] Avg episode reward: [(0, '113033.870'), (1, '161341.845')]
+-[2023-09-19 11:30:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006136_3141632.pth...
+-[2023-09-19 11:30:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006176_3162112.pth...
+-[2023-09-19 11:30:07,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006000_3072000.pth
+-[2023-09-19 11:30:07,066][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000005960_3051520.pth
+-[2023-09-19 11:30:12,043][72530] Fps is (10 sec: 6144.1, 60 sec: 6007.5, 300 sec: 6248.1). Total num frames: 6336512. Throughput: 0: 3016.4, 1: 3017.1. Samples: 3899160. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:30:12,044][72530] Avg episode reward: [(0, '110251.349'), (1, '161327.497')]
+-[2023-09-19 11:30:16,823][73219] Updated weights for policy 1, policy_version 6200 (0.0013)
+-[2023-09-19 11:30:16,823][73145] Updated weights for policy 0, policy_version 6240 (0.0011)
+-[2023-09-19 11:30:17,043][72530] Fps is (10 sec: 6553.8, 60 sec: 6007.5, 300 sec: 6220.4). Total num frames: 6369280. Throughput: 0: 3019.8, 1: 3018.5. Samples: 3933552. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:30:17,044][72530] Avg episode reward: [(0, '110867.577'), (1, '161327.497')]
+-[2023-09-19 11:30:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5870.9, 300 sec: 6220.4). Total num frames: 6393856. Throughput: 0: 3036.0, 1: 3036.0. Samples: 3951900. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:22,044][72530] Avg episode reward: [(0, '112712.732'), (1, '161204.256')]
+-[2023-09-19 11:30:22,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006264_3207168.pth...
+-[2023-09-19 11:30:22,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006224_3186688.pth...
+-[2023-09-19 11:30:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006088_3117056.pth
+-[2023-09-19 11:30:22,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006048_3096576.pth
+-[2023-09-19 11:30:27,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5870.9, 300 sec: 6192.6). Total num frames: 6418432. Throughput: 0: 2950.9, 1: 2950.4. Samples: 3983760. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:27,044][72530] Avg episode reward: [(0, '113392.546'), (1, '161125.113')]
+-[2023-09-19 11:30:31,119][73145] Updated weights for policy 0, policy_version 6320 (0.0012)
+-[2023-09-19 11:30:31,119][73219] Updated weights for policy 1, policy_version 6280 (0.0013)
+-[2023-09-19 11:30:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5871.9, 300 sec: 6192.6). Total num frames: 6451200. Throughput: 0: 2960.9, 1: 2961.4. Samples: 4019448. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:32,044][72530] Avg episode reward: [(0, '113475.235'), (1, '161107.375')]
+-[2023-09-19 11:30:37,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5870.9, 300 sec: 6164.8). Total num frames: 6475776. Throughput: 0: 2884.0, 1: 2883.7. Samples: 4032994. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:37,045][72530] Avg episode reward: [(0, '118367.276'), (1, '160625.932')]
+-[2023-09-19 11:30:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006344_3248128.pth...
+-[2023-09-19 11:30:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006304_3227648.pth...
+-[2023-09-19 11:30:37,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006176_3162112.pth
+-[2023-09-19 11:30:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006136_3141632.pth
+-[2023-09-19 11:30:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6164.8). Total num frames: 6508544. Throughput: 0: 2926.3, 1: 2927.1. Samples: 4071324. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:42,044][72530] Avg episode reward: [(0, '121712.371'), (1, '160963.772')]
+-[2023-09-19 11:30:44,991][73145] Updated weights for policy 0, policy_version 6400 (0.0014)
+-[2023-09-19 11:30:44,991][73219] Updated weights for policy 1, policy_version 6360 (0.0016)
+-[2023-09-19 11:30:47,043][72530] Fps is (10 sec: 6553.7, 60 sec: 5870.9, 300 sec: 6164.8). Total num frames: 6541312. Throughput: 0: 3155.9, 1: 3155.8. Samples: 4109948. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:47,044][72530] Avg episode reward: [(0, '130906.514'), (1, '160996.680')]
+-[2023-09-19 11:30:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6007.5, 300 sec: 6164.8). Total num frames: 6574080. Throughput: 0: 2977.7, 1: 2978.5. Samples: 4128860. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:30:52,044][72530] Avg episode reward: [(0, '132721.030'), (1, '160740.250')]
+-[2023-09-19 11:30:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006400_3276800.pth...
+-[2023-09-19 11:30:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006440_3297280.pth...
+-[2023-09-19 11:30:52,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006264_3207168.pth
+-[2023-09-19 11:30:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006224_3186688.pth
+-[2023-09-19 11:30:57,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6007.5, 300 sec: 6164.8). Total num frames: 6606848. Throughput: 0: 2999.1, 1: 2998.3. Samples: 4169040. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:30:57,044][72530] Avg episode reward: [(0, '135319.795'), (1, '160381.100')]
+-[2023-09-19 11:30:57,551][73145] Updated weights for policy 0, policy_version 6480 (0.0013)
+-[2023-09-19 11:30:57,551][73219] Updated weights for policy 1, policy_version 6440 (0.0014)
+-[2023-09-19 11:31:02,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6075.8, 300 sec: 6192.6). Total num frames: 6639616. Throughput: 0: 2999.7, 1: 2999.9. Samples: 4203534. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:31:02,044][72530] Avg episode reward: [(0, '138147.376'), (1, '160443.505')]
+-[2023-09-19 11:31:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6164.8). Total num frames: 6664192. Throughput: 0: 2995.4, 1: 2995.4. Samples: 4221484. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:31:07,044][72530] Avg episode reward: [(0, '140607.383'), (1, '160444.041')]
+-[2023-09-19 11:31:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006488_3321856.pth...
+-[2023-09-19 11:31:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006528_3342336.pth...
+-[2023-09-19 11:31:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006304_3227648.pth
+-[2023-09-19 11:31:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006344_3248128.pth
+-[2023-09-19 11:31:11,438][73219] Updated weights for policy 1, policy_version 6520 (0.0012)
+-[2023-09-19 11:31:11,438][73145] Updated weights for policy 0, policy_version 6560 (0.0013)
+-[2023-09-19 11:31:12,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.5, 300 sec: 6192.6). Total num frames: 6696960. Throughput: 0: 3048.7, 1: 3048.4. Samples: 4258132. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:31:12,044][72530] Avg episode reward: [(0, '140794.377'), (1, '161324.369')]
+-[2023-09-19 11:31:17,043][72530] Fps is (10 sec: 6553.6, 60 sec: 6007.4, 300 sec: 6192.6). Total num frames: 6729728. Throughput: 0: 3077.7, 1: 3077.3. Samples: 4296424. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:31:17,045][72530] Avg episode reward: [(0, '146158.608'), (1, '161421.999')]
+-[2023-09-19 11:31:22,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6144.0, 300 sec: 6192.6). Total num frames: 6762496. Throughput: 0: 3152.2, 1: 3152.3. Samples: 4316696. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:31:22,044][72530] Avg episode reward: [(0, '147169.785'), (1, '161725.816')]
+-[2023-09-19 11:31:22,057][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006624_3391488.pth...
+-[2023-09-19 11:31:22,057][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006584_3371008.pth...
+-[2023-09-19 11:31:22,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006400_3276800.pth
+-[2023-09-19 11:31:22,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006440_3297280.pth
+-[2023-09-19 11:31:24,435][73145] Updated weights for policy 0, policy_version 6640 (0.0016)
+-[2023-09-19 11:31:24,436][73219] Updated weights for policy 1, policy_version 6600 (0.0016)
+-[2023-09-19 11:31:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6164.8). Total num frames: 6787072. Throughput: 0: 3107.4, 1: 3106.5. Samples: 4350952. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:31:27,044][72530] Avg episode reward: [(0, '147753.216'), (1, '161687.670')]
+-[2023-09-19 11:31:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 6144.0, 300 sec: 6164.8). Total num frames: 6819840. Throughput: 0: 3071.0, 1: 3071.2. Samples: 4386346. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:31:32,044][72530] Avg episode reward: [(0, '147753.216'), (1, '161687.670')]
+-[2023-09-19 11:31:37,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6164.8). Total num frames: 6852608. Throughput: 0: 3060.3, 1: 3059.2. Samples: 4404236. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:31:37,044][72530] Avg episode reward: [(0, '155757.680'), (1, '162057.882')]
+-[2023-09-19 11:31:37,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006672_3416064.pth...
+-[2023-09-19 11:31:37,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006712_3436544.pth...
+-[2023-09-19 11:31:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006528_3342336.pth
+-[2023-09-19 11:31:37,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006488_3321856.pth
+-[2023-09-19 11:31:38,241][73145] Updated weights for policy 0, policy_version 6720 (0.0015)
+-[2023-09-19 11:31:38,242][73219] Updated weights for policy 1, policy_version 6680 (0.0014)
+-[2023-09-19 11:31:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 6877184. Throughput: 0: 3009.5, 1: 3010.6. Samples: 4439946. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:31:42,045][72530] Avg episode reward: [(0, '155757.680'), (1, '162057.882')]
+-[2023-09-19 11:31:47,043][72530] Fps is (10 sec: 5734.6, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 6909952. Throughput: 0: 3026.3, 1: 3026.0. Samples: 4475886. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:31:47,044][72530] Avg episode reward: [(0, '156604.047'), (1, '162030.805')]
+-[2023-09-19 11:31:51,596][73145] Updated weights for policy 0, policy_version 6800 (0.0012)
+-[2023-09-19 11:31:51,596][73219] Updated weights for policy 1, policy_version 6760 (0.0015)
+-[2023-09-19 11:31:52,043][72530] Fps is (10 sec: 6553.7, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 6942720. Throughput: 0: 3052.5, 1: 3052.3. Samples: 4496200. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:31:52,044][72530] Avg episode reward: [(0, '154864.725'), (1, '162040.931')]
+-[2023-09-19 11:31:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006800_3481600.pth...
+-[2023-09-19 11:31:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006760_3461120.pth...
+-[2023-09-19 11:31:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006624_3391488.pth
+-[2023-09-19 11:31:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006584_3371008.pth
+-[2023-09-19 11:31:57,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 6975488. Throughput: 0: 3054.7, 1: 3054.8. Samples: 4533058. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:31:57,044][72530] Avg episode reward: [(0, '150963.742'), (1, '162180.754')]
+-[2023-09-19 11:32:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6137.1). Total num frames: 7000064. Throughput: 0: 3051.5, 1: 3052.9. Samples: 4571120. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:32:02,044][72530] Avg episode reward: [(0, '150392.522'), (1, '162181.685')]
+-[2023-09-19 11:32:04,785][73145] Updated weights for policy 0, policy_version 6880 (0.0009)
+-[2023-09-19 11:32:04,786][73219] Updated weights for policy 1, policy_version 6840 (0.0010)
+-[2023-09-19 11:32:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 7032832. Throughput: 0: 3026.7, 1: 3026.7. Samples: 4589098. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:32:07,044][72530] Avg episode reward: [(0, '145583.491'), (1, '162297.023')]
+-[2023-09-19 11:32:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006848_3506176.pth...
+-[2023-09-19 11:32:07,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006888_3526656.pth...
+-[2023-09-19 11:32:07,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006672_3416064.pth
+-[2023-09-19 11:32:07,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006712_3436544.pth
+-[2023-09-19 11:32:12,043][72530] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6137.1). Total num frames: 7065600. Throughput: 0: 3022.0, 1: 3022.0. Samples: 4622936. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:32:12,045][72530] Avg episode reward: [(0, '144862.441'), (1, '162339.817')]
+-[2023-09-19 11:32:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6109.3). Total num frames: 7090176. Throughput: 0: 3051.5, 1: 3052.3. Samples: 4661014. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:32:17,045][72530] Avg episode reward: [(0, '140160.091'), (1, '162747.216')]
+-[2023-09-19 11:32:17,046][73131] Saving new best policy, reward=162747.216!
+-[2023-09-19 11:32:18,733][73145] Updated weights for policy 0, policy_version 6960 (0.0013)
+-[2023-09-19 11:32:18,734][73219] Updated weights for policy 1, policy_version 6920 (0.0014)
+-[2023-09-19 11:32:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6109.3). Total num frames: 7122944. Throughput: 0: 3035.2, 1: 3036.6. Samples: 4677464. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:32:22,045][72530] Avg episode reward: [(0, '138598.620'), (1, '162798.619')]
+-[2023-09-19 11:32:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000006936_3551232.pth...
+-[2023-09-19 11:32:22,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000006976_3571712.pth...
+-[2023-09-19 11:32:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006760_3461120.pth
+-[2023-09-19 11:32:22,063][73131] Saving new best policy, reward=162798.619!
+-[2023-09-19 11:32:22,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006800_3481600.pth
+-[2023-09-19 11:32:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6081.5). Total num frames: 7147520. Throughput: 0: 3005.6, 1: 3006.0. Samples: 4710464. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:32:27,044][72530] Avg episode reward: [(0, '140889.402'), (1, '162879.076')]
+-[2023-09-19 11:32:27,046][73131] Saving new best policy, reward=162879.076!
+-[2023-09-19 11:32:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 6007.4, 300 sec: 6109.3). Total num frames: 7180288. Throughput: 0: 3006.3, 1: 3006.6. Samples: 4746466. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:32:32,045][72530] Avg episode reward: [(0, '146675.404'), (1, '162955.653')]
+-[2023-09-19 11:32:32,046][73131] Saving new best policy, reward=162955.653!
+-[2023-09-19 11:32:33,165][73219] Updated weights for policy 1, policy_version 7000 (0.0013)
+-[2023-09-19 11:32:33,166][73145] Updated weights for policy 0, policy_version 7040 (0.0014)
+-[2023-09-19 11:32:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6081.5). Total num frames: 7204864. Throughput: 0: 2926.2, 1: 2927.6. Samples: 4759620. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:32:37,044][72530] Avg episode reward: [(0, '146675.404'), (1, '162959.948')]
+-[2023-09-19 11:32:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007016_3592192.pth...
+-[2023-09-19 11:32:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007056_3612672.pth...
+-[2023-09-19 11:32:37,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006888_3526656.pth
+-[2023-09-19 11:32:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006848_3506176.pth
+-[2023-09-19 11:32:37,061][73131] Saving new best policy, reward=162959.948!
+-[2023-09-19 11:32:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5870.9, 300 sec: 6053.7). Total num frames: 7229440. Throughput: 0: 2894.3, 1: 2894.4. Samples: 4793548. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:32:42,045][72530] Avg episode reward: [(0, '152846.566'), (1, '163027.662')]
+-[2023-09-19 11:32:42,046][73131] Saving new best policy, reward=163027.662!
+-[2023-09-19 11:32:47,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5870.9, 300 sec: 6053.8). Total num frames: 7262208. Throughput: 0: 2871.0, 1: 2869.7. Samples: 4829452. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:32:47,044][72530] Avg episode reward: [(0, '152846.566'), (1, '163027.662')]
+-[2023-09-19 11:32:47,786][73145] Updated weights for policy 0, policy_version 7120 (0.0013)
+-[2023-09-19 11:32:47,786][73219] Updated weights for policy 1, policy_version 7080 (0.0013)
+-[2023-09-19 11:32:52,043][72530] Fps is (10 sec: 6553.5, 60 sec: 5870.9, 300 sec: 6081.5). Total num frames: 7294976. Throughput: 0: 2870.4, 1: 2870.4. Samples: 4847430. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:32:52,044][72530] Avg episode reward: [(0, '158683.654'), (1, '163077.657')]
+-[2023-09-19 11:32:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007104_3637248.pth...
+-[2023-09-19 11:32:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007144_3657728.pth...
+-[2023-09-19 11:32:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000006936_3551232.pth
+-[2023-09-19 11:32:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000006976_3571712.pth
+-[2023-09-19 11:32:52,062][73131] Saving new best policy, reward=163077.657!
+-[2023-09-19 11:32:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 6053.7). Total num frames: 7319552. Throughput: 0: 2884.6, 1: 2885.0. Samples: 4882566. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:32:57,044][72530] Avg episode reward: [(0, '159312.798'), (1, '163099.001')]
+-[2023-09-19 11:32:57,046][73131] Saving new best policy, reward=163099.001!
+-[2023-09-19 11:33:01,325][73145] Updated weights for policy 0, policy_version 7200 (0.0015)
+-[2023-09-19 11:33:01,326][73219] Updated weights for policy 1, policy_version 7160 (0.0014)
+-[2023-09-19 11:33:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5870.9, 300 sec: 6053.8). Total num frames: 7352320. Throughput: 0: 2874.4, 1: 2873.4. Samples: 4919666. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:02,044][72530] Avg episode reward: [(0, '154677.624'), (1, '162411.671')]
+-[2023-09-19 11:33:07,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5870.9, 300 sec: 6053.7). Total num frames: 7385088. Throughput: 0: 2888.5, 1: 2887.3. Samples: 4937376. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:07,044][72530] Avg episode reward: [(0, '154955.709'), (1, '162052.665')]
+-[2023-09-19 11:33:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007232_3702784.pth...
+-[2023-09-19 11:33:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007192_3682304.pth...
+-[2023-09-19 11:33:07,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007056_3612672.pth
+-[2023-09-19 11:33:07,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007016_3592192.pth
+-[2023-09-19 11:33:12,043][72530] Fps is (10 sec: 6553.5, 60 sec: 5870.9, 300 sec: 6081.5). Total num frames: 7417856. Throughput: 0: 2946.5, 1: 2945.3. Samples: 4975594. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:12,044][72530] Avg episode reward: [(0, '152248.569'), (1, '160136.631')]
+-[2023-09-19 11:33:14,671][73219] Updated weights for policy 1, policy_version 7240 (0.0012)
+-[2023-09-19 11:33:14,672][73145] Updated weights for policy 0, policy_version 7280 (0.0010)
+-[2023-09-19 11:33:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6053.7). Total num frames: 7442432. Throughput: 0: 2951.6, 1: 2951.3. Samples: 5012094. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:17,044][72530] Avg episode reward: [(0, '148744.362'), (1, '160112.522')]
+-[2023-09-19 11:33:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6053.7). Total num frames: 7475200. Throughput: 0: 3001.9, 1: 3001.6. Samples: 5029778. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:22,044][72530] Avg episode reward: [(0, '143773.891'), (1, '160005.506')]
+-[2023-09-19 11:33:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007280_3727360.pth...
+-[2023-09-19 11:33:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007320_3747840.pth...
+-[2023-09-19 11:33:22,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007144_3657728.pth
+-[2023-09-19 11:33:22,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007104_3637248.pth
+-[2023-09-19 11:33:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6053.7). Total num frames: 7499776. Throughput: 0: 2989.4, 1: 2990.7. Samples: 5062652. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:33:27,045][72530] Avg episode reward: [(0, '140255.889'), (1, '159095.967')]
+-[2023-09-19 11:33:28,731][73145] Updated weights for policy 0, policy_version 7360 (0.0015)
+-[2023-09-19 11:33:28,731][73219] Updated weights for policy 1, policy_version 7320 (0.0015)
+-[2023-09-19 11:33:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5870.9, 300 sec: 6053.8). Total num frames: 7532544. Throughput: 0: 2981.7, 1: 2982.1. Samples: 5097822. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:33:32,044][72530] Avg episode reward: [(0, '141984.249'), (1, '158018.359')]
+-[2023-09-19 11:33:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 6026.0). Total num frames: 7557120. Throughput: 0: 2948.8, 1: 2948.9. Samples: 5112824. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:33:37,044][72530] Avg episode reward: [(0, '143692.297'), (1, '152337.381')]
+-[2023-09-19 11:33:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007360_3768320.pth...
+-[2023-09-19 11:33:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007400_3788800.pth...
+-[2023-09-19 11:33:37,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007192_3682304.pth
+-[2023-09-19 11:33:37,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007232_3702784.pth
+-[2023-09-19 11:33:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 6007.5, 300 sec: 6026.0). Total num frames: 7589888. Throughput: 0: 2966.8, 1: 2966.4. Samples: 5149558. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:33:42,045][72530] Avg episode reward: [(0, '143692.297'), (1, '151993.886')]
+-[2023-09-19 11:33:43,226][73219] Updated weights for policy 1, policy_version 7400 (0.0014)
+-[2023-09-19 11:33:43,228][73145] Updated weights for policy 0, policy_version 7440 (0.0015)
+-[2023-09-19 11:33:47,043][72530] Fps is (10 sec: 5734.6, 60 sec: 5870.9, 300 sec: 5998.2). Total num frames: 7614464. Throughput: 0: 2915.9, 1: 2916.2. Samples: 5182110. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:33:47,044][72530] Avg episode reward: [(0, '148573.588'), (1, '147997.923')]
+-[2023-09-19 11:33:52,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5871.0, 300 sec: 5998.2). Total num frames: 7647232. Throughput: 0: 2925.6, 1: 2925.8. Samples: 5200686. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:52,044][72530] Avg episode reward: [(0, '148799.645'), (1, '147997.923')]
+-[2023-09-19 11:33:52,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007448_3813376.pth...
+-[2023-09-19 11:33:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007488_3833856.pth...
+-[2023-09-19 11:33:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007280_3727360.pth
+-[2023-09-19 11:33:52,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007320_3747840.pth
+-[2023-09-19 11:33:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5870.9, 300 sec: 5998.2). Total num frames: 7671808. Throughput: 0: 2852.0, 1: 2851.8. Samples: 5232264. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:33:57,044][72530] Avg episode reward: [(0, '149325.137'), (1, '139943.474')]
+-[2023-09-19 11:33:57,818][73219] Updated weights for policy 1, policy_version 7480 (0.0015)
+-[2023-09-19 11:33:57,818][73145] Updated weights for policy 0, policy_version 7520 (0.0012)
+-[2023-09-19 11:34:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 5970.4). Total num frames: 7704576. Throughput: 0: 2837.4, 1: 2838.8. Samples: 5267522. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:02,044][72530] Avg episode reward: [(0, '151014.673'), (1, '136184.034')]
+-[2023-09-19 11:34:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5942.7). Total num frames: 7729152. Throughput: 0: 2822.8, 1: 2822.9. Samples: 5283834. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:07,045][72530] Avg episode reward: [(0, '151810.022'), (1, '129794.580')]
+-[2023-09-19 11:34:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007528_3854336.pth...
+-[2023-09-19 11:34:07,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007568_3874816.pth...
+-[2023-09-19 11:34:07,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007400_3788800.pth
+-[2023-09-19 11:34:07,071][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007360_3768320.pth
+-[2023-09-19 11:34:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5914.9). Total num frames: 7753728. Throughput: 0: 2825.1, 1: 2824.1. Samples: 5316866. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:34:12,044][72530] Avg episode reward: [(0, '148050.631'), (1, '130670.879')]
+-[2023-09-19 11:34:12,371][73145] Updated weights for policy 0, policy_version 7600 (0.0013)
+-[2023-09-19 11:34:12,371][73219] Updated weights for policy 1, policy_version 7560 (0.0012)
+-[2023-09-19 11:34:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5914.9). Total num frames: 7786496. Throughput: 0: 2825.1, 1: 2824.7. Samples: 5352062. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:17,044][72530] Avg episode reward: [(0, '149579.084'), (1, '130319.380')]
+-[2023-09-19 11:34:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5914.9). Total num frames: 7811072. Throughput: 0: 2840.5, 1: 2840.4. Samples: 5368464. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:22,044][72530] Avg episode reward: [(0, '149283.565'), (1, '126902.166')]
+-[2023-09-19 11:34:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007608_3895296.pth...
+-[2023-09-19 11:34:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007648_3915776.pth...
+-[2023-09-19 11:34:22,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007448_3813376.pth
+-[2023-09-19 11:34:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007488_3833856.pth
+-[2023-09-19 11:34:27,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5887.3). Total num frames: 7835648. Throughput: 0: 2775.7, 1: 2776.0. Samples: 5399384. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:27,045][72530] Avg episode reward: [(0, '149283.565'), (1, '126902.166')]
+-[2023-09-19 11:34:27,264][73219] Updated weights for policy 1, policy_version 7640 (0.0012)
+-[2023-09-19 11:34:27,264][73145] Updated weights for policy 0, policy_version 7680 (0.0015)
+-[2023-09-19 11:34:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5914.9). Total num frames: 7868416. Throughput: 0: 2824.0, 1: 2824.0. Samples: 5436266. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:32,044][72530] Avg episode reward: [(0, '144232.887'), (1, '135739.316')]
+-[2023-09-19 11:34:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5887.1). Total num frames: 7892992. Throughput: 0: 2799.2, 1: 2799.5. Samples: 5452628. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:37,044][72530] Avg episode reward: [(0, '144291.890'), (1, '135739.316')]
+-[2023-09-19 11:34:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007688_3936256.pth...
+-[2023-09-19 11:34:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007728_3956736.pth...
+-[2023-09-19 11:34:37,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007528_3854336.pth
+-[2023-09-19 11:34:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007568_3874816.pth
+-[2023-09-19 11:34:42,043][72530] Fps is (10 sec: 5324.8, 60 sec: 5529.6, 300 sec: 5873.3). Total num frames: 7921664. Throughput: 0: 2800.1, 1: 2800.4. Samples: 5484288. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:42,044][72530] Avg episode reward: [(0, '144843.661'), (1, '143710.582')]
+-[2023-09-19 11:34:42,048][73219] Updated weights for policy 1, policy_version 7720 (0.0013)
+-[2023-09-19 11:34:42,048][73145] Updated weights for policy 0, policy_version 7760 (0.0015)
+-[2023-09-19 11:34:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.8, 300 sec: 5887.1). Total num frames: 7950336. Throughput: 0: 2757.9, 1: 2756.6. Samples: 5515674. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:34:47,044][72530] Avg episode reward: [(0, '143205.243'), (1, '147140.750')]
+-[2023-09-19 11:34:52,043][72530] Fps is (10 sec: 5324.6, 60 sec: 5461.3, 300 sec: 5859.4). Total num frames: 7974912. Throughput: 0: 2780.3, 1: 2779.1. Samples: 5534010. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:34:52,044][72530] Avg episode reward: [(0, '141821.454'), (1, '149043.623')]
+-[2023-09-19 11:34:52,061][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007776_3981312.pth...
+-[2023-09-19 11:34:52,063][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007816_4001792.pth...
+-[2023-09-19 11:34:52,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007608_3895296.pth
+-[2023-09-19 11:34:52,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007648_3915776.pth
+-[2023-09-19 11:34:56,255][73145] Updated weights for policy 0, policy_version 7840 (0.0012)
+-[2023-09-19 11:34:56,255][73219] Updated weights for policy 1, policy_version 7800 (0.0014)
+-[2023-09-19 11:34:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5873.2). Total num frames: 8007680. Throughput: 0: 2815.3, 1: 2815.3. Samples: 5570242. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:34:57,044][72530] Avg episode reward: [(0, '138372.363'), (1, '156468.221')]
+-[2023-09-19 11:35:02,043][72530] Fps is (10 sec: 5734.6, 60 sec: 5461.3, 300 sec: 5859.4). Total num frames: 8032256. Throughput: 0: 2781.8, 1: 2782.4. Samples: 5602448. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:02,044][72530] Avg episode reward: [(0, '138372.363'), (1, '157445.674')]
+-[2023-09-19 11:35:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5859.4). Total num frames: 8065024. Throughput: 0: 2774.6, 1: 2774.7. Samples: 5618186. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:07,044][72530] Avg episode reward: [(0, '142866.072'), (1, '153809.911')]
+-[2023-09-19 11:35:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007896_4042752.pth...
+-[2023-09-19 11:35:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007856_4022272.pth...
+-[2023-09-19 11:35:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007728_3956736.pth
+-[2023-09-19 11:35:07,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007688_3936256.pth
+-[2023-09-19 11:35:11,540][73219] Updated weights for policy 1, policy_version 7880 (0.0011)
+-[2023-09-19 11:35:11,541][73145] Updated weights for policy 0, policy_version 7920 (0.0013)
+-[2023-09-19 11:35:12,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5831.6). Total num frames: 8089600. Throughput: 0: 2803.7, 1: 2803.8. Samples: 5651722. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:35:12,044][72530] Avg episode reward: [(0, '143261.806'), (1, '153809.911')]
+-[2023-09-19 11:35:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5859.4). Total num frames: 8122368. Throughput: 0: 2571.9, 1: 2571.7. Samples: 5667726. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:17,044][72530] Avg episode reward: [(0, '145757.121'), (1, '154277.285')]
+-[2023-09-19 11:35:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5859.4). Total num frames: 8146944. Throughput: 0: 2785.8, 1: 2785.5. Samples: 5703340. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:22,045][72530] Avg episode reward: [(0, '147899.769'), (1, '154650.090')]
+-[2023-09-19 11:35:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000007936_4063232.pth...
+-[2023-09-19 11:35:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000007976_4083712.pth...
+-[2023-09-19 11:35:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007816_4001792.pth
+-[2023-09-19 11:35:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007776_3981312.pth
+-[2023-09-19 11:35:25,473][73145] Updated weights for policy 0, policy_version 8000 (0.0013)
+-[2023-09-19 11:35:25,474][73219] Updated weights for policy 1, policy_version 7960 (0.0013)
+-[2023-09-19 11:35:27,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5859.4). Total num frames: 8179712. Throughput: 0: 2814.4, 1: 2814.4. Samples: 5737588. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:35:27,044][72530] Avg episode reward: [(0, '152749.656'), (1, '154559.667')]
+-[2023-09-19 11:35:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5859.4). Total num frames: 8204288. Throughput: 0: 2811.2, 1: 2811.5. Samples: 5768692. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:35:32,044][72530] Avg episode reward: [(0, '154504.272'), (1, '154707.152')]
+-[2023-09-19 11:35:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5831.6). Total num frames: 8228864. Throughput: 0: 2771.9, 1: 2773.0. Samples: 5783532. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:37,044][72530] Avg episode reward: [(0, '154504.272'), (1, '154289.118')]
+-[2023-09-19 11:35:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008056_4124672.pth...
+-[2023-09-19 11:35:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008016_4104192.pth...
+-[2023-09-19 11:35:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007896_4042752.pth
+-[2023-09-19 11:35:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007856_4022272.pth
+-[2023-09-19 11:35:41,038][73145] Updated weights for policy 0, policy_version 8080 (0.0011)
+-[2023-09-19 11:35:41,039][73219] Updated weights for policy 1, policy_version 8040 (0.0014)
+-[2023-09-19 11:35:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5529.6, 300 sec: 5803.8). Total num frames: 8253440. Throughput: 0: 2734.4, 1: 2734.8. Samples: 5816354. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:42,044][72530] Avg episode reward: [(0, '161395.630'), (1, '158048.524')]
+-[2023-09-19 11:35:42,045][73130] Saving new best policy, reward=161395.630!
+-[2023-09-19 11:35:47,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5803.8). Total num frames: 8286208. Throughput: 0: 2761.4, 1: 2761.2. Samples: 5850962. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:35:47,044][72530] Avg episode reward: [(0, '159754.689'), (1, '158048.524')]
+-[2023-09-19 11:35:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5776.1). Total num frames: 8310784. Throughput: 0: 2780.5, 1: 2780.5. Samples: 5868434. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:35:52,045][72530] Avg episode reward: [(0, '160738.664'), (1, '159082.738')]
+-[2023-09-19 11:35:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008096_4145152.pth...
+-[2023-09-19 11:35:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008136_4165632.pth...
+-[2023-09-19 11:35:52,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000007936_4063232.pth
+-[2023-09-19 11:35:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000007976_4083712.pth
+-[2023-09-19 11:35:55,537][73145] Updated weights for policy 0, policy_version 8160 (0.0015)
+-[2023-09-19 11:35:55,537][73219] Updated weights for policy 1, policy_version 8120 (0.0014)
+-[2023-09-19 11:35:57,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5748.3). Total num frames: 8335360. Throughput: 0: 2770.3, 1: 2770.4. Samples: 5901056. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:35:57,044][72530] Avg episode reward: [(0, '162764.036'), (1, '158805.704')]
+-[2023-09-19 11:35:57,046][73130] Saving new best policy, reward=162764.036!
+-[2023-09-19 11:36:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5776.1). Total num frames: 8368128. Throughput: 0: 2965.8, 1: 2965.9. Samples: 5934650. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:36:02,044][72530] Avg episode reward: [(0, '162349.625'), (1, '156768.108')]
+-[2023-09-19 11:36:07,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.4, 300 sec: 5748.3). Total num frames: 8392704. Throughput: 0: 2770.8, 1: 2770.8. Samples: 5952710. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:36:07,043][72530] Avg episode reward: [(0, '161456.743'), (1, '158066.203')]
+-[2023-09-19 11:36:07,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008176_4186112.pth...
+-[2023-09-19 11:36:07,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008216_4206592.pth...
+-[2023-09-19 11:36:07,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008016_4104192.pth
+-[2023-09-19 11:36:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008056_4124672.pth
+-[2023-09-19 11:36:10,148][73145] Updated weights for policy 0, policy_version 8240 (0.0015)
+-[2023-09-19 11:36:10,148][73219] Updated weights for policy 1, policy_version 8200 (0.0015)
+-[2023-09-19 11:36:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.8, 300 sec: 5748.3). Total num frames: 8425472. Throughput: 0: 2751.9, 1: 2751.9. Samples: 5985260. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:36:12,044][72530] Avg episode reward: [(0, '161456.743'), (1, '157423.763')]
+-[2023-09-19 11:36:17,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5720.5). Total num frames: 8450048. Throughput: 0: 2744.1, 1: 2743.7. Samples: 6015644. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:36:17,044][72530] Avg episode reward: [(0, '161135.132'), (1, '158376.754')]
+-[2023-09-19 11:36:22,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5720.5). Total num frames: 8474624. Throughput: 0: 2786.1, 1: 2785.0. Samples: 6034230. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:36:22,044][72530] Avg episode reward: [(0, '160598.162'), (1, '158376.754')]
+-[2023-09-19 11:36:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008256_4227072.pth...
+-[2023-09-19 11:36:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008296_4247552.pth...
+-[2023-09-19 11:36:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008136_4165632.pth
+-[2023-09-19 11:36:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008096_4145152.pth
+-[2023-09-19 11:36:25,677][73219] Updated weights for policy 1, policy_version 8280 (0.0011)
+-[2023-09-19 11:36:25,678][73145] Updated weights for policy 0, policy_version 8320 (0.0011)
+-[2023-09-19 11:36:27,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5692.7). Total num frames: 8499200. Throughput: 0: 2751.8, 1: 2751.2. Samples: 6063990. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:36:27,044][72530] Avg episode reward: [(0, '159747.417'), (1, '153840.543')]
+-[2023-09-19 11:36:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5692.8). Total num frames: 8531968. Throughput: 0: 2749.7, 1: 2749.3. Samples: 6098416. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:36:32,044][72530] Avg episode reward: [(0, '154229.110'), (1, '153123.695')]
+-[2023-09-19 11:36:37,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5597.9, 300 sec: 5720.5). Total num frames: 8564736. Throughput: 0: 2758.1, 1: 2758.1. Samples: 6116658. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:36:37,044][72530] Avg episode reward: [(0, '149061.433'), (1, '154038.880')]
+-[2023-09-19 11:36:37,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008344_4272128.pth...
+-[2023-09-19 11:36:37,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008384_4292608.pth...
+-[2023-09-19 11:36:37,057][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008176_4186112.pth
+-[2023-09-19 11:36:37,057][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008216_4206592.pth
+-[2023-09-19 11:36:39,594][73219] Updated weights for policy 1, policy_version 8360 (0.0015)
+-[2023-09-19 11:36:39,594][73145] Updated weights for policy 0, policy_version 8400 (0.0016)
+-[2023-09-19 11:36:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5692.7). Total num frames: 8589312. Throughput: 0: 2789.9, 1: 2790.9. Samples: 6152192. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:36:42,044][72530] Avg episode reward: [(0, '144058.161'), (1, '154059.173')]
+-[2023-09-19 11:36:47,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5692.7). Total num frames: 8622080. Throughput: 0: 2794.4, 1: 2794.6. Samples: 6186154. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:36:47,044][72530] Avg episode reward: [(0, '140921.567'), (1, '153742.312')]
+-[2023-09-19 11:36:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5665.0). Total num frames: 8646656. Throughput: 0: 2793.8, 1: 2793.7. Samples: 6204150. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:36:52,044][72530] Avg episode reward: [(0, '137894.258'), (1, '154893.749')]
+-[2023-09-19 11:36:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008424_4313088.pth...
+-[2023-09-19 11:36:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008464_4333568.pth...
+-[2023-09-19 11:36:52,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008256_4227072.pth
+-[2023-09-19 11:36:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008296_4247552.pth
+-[2023-09-19 11:36:53,707][73145] Updated weights for policy 0, policy_version 8480 (0.0014)
+-[2023-09-19 11:36:53,707][73219] Updated weights for policy 1, policy_version 8440 (0.0015)
+-[2023-09-19 11:36:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5692.7). Total num frames: 8679424. Throughput: 0: 2832.6, 1: 2832.4. Samples: 6240182. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:36:57,044][72530] Avg episode reward: [(0, '137894.258'), (1, '155112.103')]
+-[2023-09-19 11:37:02,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5734.4, 300 sec: 5692.7). Total num frames: 8712192. Throughput: 0: 2894.2, 1: 2894.2. Samples: 6276120. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:37:02,045][72530] Avg episode reward: [(0, '135885.742'), (1, '160768.230')]
+-[2023-09-19 11:37:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 8736768. Throughput: 0: 2876.4, 1: 2876.3. Samples: 6293104. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:37:07,045][72530] Avg episode reward: [(0, '135885.742'), (1, '160768.230')]
+-[2023-09-19 11:37:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008512_4358144.pth...
+-[2023-09-19 11:37:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008552_4378624.pth...
+-[2023-09-19 11:37:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008344_4272128.pth
+-[2023-09-19 11:37:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008384_4292608.pth
+-[2023-09-19 11:37:07,529][73145] Updated weights for policy 0, policy_version 8560 (0.0012)
+-[2023-09-19 11:37:07,529][73219] Updated weights for policy 1, policy_version 8520 (0.0013)
+-[2023-09-19 11:37:12,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5692.7). Total num frames: 8769536. Throughput: 0: 2951.7, 1: 2951.7. Samples: 6329646. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:37:12,044][72530] Avg episode reward: [(0, '142564.431'), (1, '161744.693')]
+-[2023-09-19 11:37:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 8794112. Throughput: 0: 2942.3, 1: 2942.5. Samples: 6363234. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:37:17,044][72530] Avg episode reward: [(0, '142915.479'), (1, '161744.693')]
+-[2023-09-19 11:37:21,639][73219] Updated weights for policy 1, policy_version 8600 (0.0011)
+-[2023-09-19 11:37:21,640][73145] Updated weights for policy 0, policy_version 8640 (0.0013)
+-[2023-09-19 11:37:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5870.9, 300 sec: 5692.7). Total num frames: 8826880. Throughput: 0: 2933.3, 1: 2933.3. Samples: 6380658. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:37:22,045][72530] Avg episode reward: [(0, '143397.695'), (1, '160966.343')]
+-[2023-09-19 11:37:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008600_4403200.pth...
+-[2023-09-19 11:37:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008640_4423680.pth...
+-[2023-09-19 11:37:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008424_4313088.pth
+-[2023-09-19 11:37:22,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008464_4333568.pth
+-[2023-09-19 11:37:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 5665.0). Total num frames: 8851456. Throughput: 0: 2899.7, 1: 2898.4. Samples: 6413104. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:37:27,044][72530] Avg episode reward: [(0, '144691.029'), (1, '161003.911')]
+-[2023-09-19 11:37:32,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 8876032. Throughput: 0: 2857.8, 1: 2857.5. Samples: 6443340. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:37:32,044][72530] Avg episode reward: [(0, '145505.352'), (1, '159886.354')]
+-[2023-09-19 11:37:37,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5597.8, 300 sec: 5665.0). Total num frames: 8900608. Throughput: 0: 2838.1, 1: 2838.1. Samples: 6459580. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:37:37,045][72530] Avg episode reward: [(0, '148461.800'), (1, '159898.604')]
+-[2023-09-19 11:37:37,054][73145] Updated weights for policy 0, policy_version 8720 (0.0016)
+-[2023-09-19 11:37:37,054][73219] Updated weights for policy 1, policy_version 8680 (0.0016)
+-[2023-09-19 11:37:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008720_4464640.pth...
+-[2023-09-19 11:37:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008680_4444160.pth...
+-[2023-09-19 11:37:37,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008512_4358144.pth
+-[2023-09-19 11:37:37,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008552_4378624.pth
+-[2023-09-19 11:37:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 8933376. Throughput: 0: 2844.0, 1: 2844.8. Samples: 6496178. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:37:42,045][72530] Avg episode reward: [(0, '148461.800'), (1, '159908.679')]
+-[2023-09-19 11:37:47,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5637.2). Total num frames: 8957952. Throughput: 0: 2809.0, 1: 2810.2. Samples: 6528986. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:37:47,044][72530] Avg episode reward: [(0, '139457.708'), (1, '160880.552')]
+-[2023-09-19 11:37:52,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5637.2). Total num frames: 8982528. Throughput: 0: 2798.8, 1: 2799.8. Samples: 6545042. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:37:52,045][72530] Avg episode reward: [(0, '139457.708'), (1, '160880.552')]
+-[2023-09-19 11:37:52,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008752_4481024.pth...
+-[2023-09-19 11:37:52,057][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008792_4501504.pth...
+-[2023-09-19 11:37:52,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008600_4403200.pth
+-[2023-09-19 11:37:52,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008640_4423680.pth
+-[2023-09-19 11:37:52,458][73219] Updated weights for policy 1, policy_version 8760 (0.0012)
+-[2023-09-19 11:37:52,459][73145] Updated weights for policy 0, policy_version 8800 (0.0011)
+-[2023-09-19 11:37:57,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 9007104. Throughput: 0: 2704.4, 1: 2704.2. Samples: 6573032. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:37:57,044][72530] Avg episode reward: [(0, '140620.851'), (1, '160021.637')]
+-[2023-09-19 11:38:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 9039872. Throughput: 0: 2697.8, 1: 2697.6. Samples: 6606026. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:02,045][72530] Avg episode reward: [(0, '142260.932'), (1, '161163.491')]
+-[2023-09-19 11:38:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5581.7). Total num frames: 9064448. Throughput: 0: 2677.0, 1: 2676.9. Samples: 6621582. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:38:07,045][72530] Avg episode reward: [(0, '140483.928'), (1, '159989.158')]
+-[2023-09-19 11:38:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008832_4521984.pth...
+-[2023-09-19 11:38:07,057][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008872_4542464.pth...
+-[2023-09-19 11:38:07,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008680_4444160.pth
+-[2023-09-19 11:38:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008720_4464640.pth
+-[2023-09-19 11:38:07,390][73145] Updated weights for policy 0, policy_version 8880 (0.0012)
+-[2023-09-19 11:38:07,390][73219] Updated weights for policy 1, policy_version 8840 (0.0013)
+-[2023-09-19 11:38:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 9097216. Throughput: 0: 2721.5, 1: 2721.4. Samples: 6658036. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:12,045][72530] Avg episode reward: [(0, '138787.732'), (1, '159982.414')]
+-[2023-09-19 11:38:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5581.7). Total num frames: 9121792. Throughput: 0: 2759.0, 1: 2758.8. Samples: 6691642. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:17,044][72530] Avg episode reward: [(0, '138787.732'), (1, '159979.750')]
+-[2023-09-19 11:38:22,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 9146368. Throughput: 0: 2732.9, 1: 2733.0. Samples: 6705544. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:22,045][72530] Avg episode reward: [(0, '137723.465'), (1, '159412.068')]
+-[2023-09-19 11:38:22,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008912_4562944.pth...
+-[2023-09-19 11:38:22,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000008952_4583424.pth...
+-[2023-09-19 11:38:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008752_4481024.pth
+-[2023-09-19 11:38:22,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008792_4501504.pth
+-[2023-09-19 11:38:22,663][73219] Updated weights for policy 1, policy_version 8920 (0.0014)
+-[2023-09-19 11:38:22,663][73145] Updated weights for policy 0, policy_version 8960 (0.0016)
+-[2023-09-19 11:38:27,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5553.9). Total num frames: 9170944. Throughput: 0: 2668.7, 1: 2667.9. Samples: 6736324. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:27,044][72530] Avg episode reward: [(0, '137729.460'), (1, '159412.184')]
+-[2023-09-19 11:38:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5581.7). Total num frames: 9203712. Throughput: 0: 2658.7, 1: 2657.8. Samples: 6768230. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:32,044][72530] Avg episode reward: [(0, '137016.334'), (1, '160295.806')]
+-[2023-09-19 11:38:37,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5553.9). Total num frames: 9228288. Throughput: 0: 2671.2, 1: 2670.4. Samples: 6785414. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:38:37,044][72530] Avg episode reward: [(0, '135131.016'), (1, '159223.120')]
+-[2023-09-19 11:38:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009032_4624384.pth...
+-[2023-09-19 11:38:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000008992_4603904.pth...
+-[2023-09-19 11:38:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008832_4521984.pth
+-[2023-09-19 11:38:37,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008872_4542464.pth
+-[2023-09-19 11:38:37,583][73145] Updated weights for policy 0, policy_version 9040 (0.0013)
+-[2023-09-19 11:38:37,583][73219] Updated weights for policy 1, policy_version 9000 (0.0014)
+-[2023-09-19 11:38:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5553.9). Total num frames: 9252864. Throughput: 0: 2718.7, 1: 2718.9. Samples: 6817726. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:38:42,044][72530] Avg episode reward: [(0, '136734.966'), (1, '159206.896')]
+-[2023-09-19 11:38:47,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9277440. Throughput: 0: 2507.6, 1: 2694.9. Samples: 6840140. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:47,044][72530] Avg episode reward: [(0, '137794.300'), (1, '161324.133')]
+-[2023-09-19 11:38:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5553.9). Total num frames: 9310208. Throughput: 0: 2701.8, 1: 2702.8. Samples: 6864788. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:52,044][72530] Avg episode reward: [(0, '137794.300'), (1, '161324.133')]
+-[2023-09-19 11:38:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009112_4665344.pth...
+-[2023-09-19 11:38:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009072_4644864.pth...
+-[2023-09-19 11:38:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008912_4562944.pth
+-[2023-09-19 11:38:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000008952_4583424.pth
+-[2023-09-19 11:38:53,194][73219] Updated weights for policy 1, policy_version 9080 (0.0013)
+-[2023-09-19 11:38:53,194][73145] Updated weights for policy 0, policy_version 9120 (0.0011)
+-[2023-09-19 11:38:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9334784. Throughput: 0: 2646.8, 1: 2646.8. Samples: 6896248. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:38:57,044][72530] Avg episode reward: [(0, '137042.173'), (1, '160811.317')]
+-[2023-09-19 11:39:02,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9359360. Throughput: 0: 2603.1, 1: 2603.4. Samples: 6925932. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:39:02,044][72530] Avg episode reward: [(0, '137522.213'), (1, '160783.376')]
+-[2023-09-19 11:39:07,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9383936. Throughput: 0: 2590.8, 1: 2591.2. Samples: 6938732. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:39:07,044][72530] Avg episode reward: [(0, '138050.348'), (1, '161331.845')]
+-[2023-09-19 11:39:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009184_4702208.pth...
+-[2023-09-19 11:39:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009144_4681728.pth...
+-[2023-09-19 11:39:07,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009032_4624384.pth
+-[2023-09-19 11:39:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000008992_4603904.pth
+-[2023-09-19 11:39:09,053][73219] Updated weights for policy 1, policy_version 9160 (0.0013)
+-[2023-09-19 11:39:09,054][73145] Updated weights for policy 0, policy_version 9200 (0.0011)
+-[2023-09-19 11:39:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9416704. Throughput: 0: 2658.6, 1: 2658.4. Samples: 6975588. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:39:12,044][72530] Avg episode reward: [(0, '140041.122'), (1, '160346.458')]
+-[2023-09-19 11:39:17,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9441280. Throughput: 0: 2631.2, 1: 2630.8. Samples: 7005018. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:17,044][72530] Avg episode reward: [(0, '140041.122'), (1, '160346.458')]
+-[2023-09-19 11:39:22,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9465856. Throughput: 0: 2645.9, 1: 2646.0. Samples: 7023548. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:22,045][72530] Avg episode reward: [(0, '141321.149'), (1, '160469.876')]
+-[2023-09-19 11:39:22,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009264_4743168.pth...
+-[2023-09-19 11:39:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009224_4722688.pth...
+-[2023-09-19 11:39:22,057][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009112_4665344.pth
+-[2023-09-19 11:39:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009072_4644864.pth
+-[2023-09-19 11:39:23,736][73145] Updated weights for policy 0, policy_version 9280 (0.0016)
+-[2023-09-19 11:39:23,736][73219] Updated weights for policy 1, policy_version 9240 (0.0012)
+-[2023-09-19 11:39:27,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9498624. Throughput: 0: 2682.5, 1: 2682.2. Samples: 7059138. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:27,044][72530] Avg episode reward: [(0, '143117.745'), (1, '160469.876')]
+-[2023-09-19 11:39:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5324.8, 300 sec: 5526.1). Total num frames: 9523200. Throughput: 0: 2906.0, 1: 2718.8. Samples: 7093254. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:32,044][72530] Avg episode reward: [(0, '139677.367'), (1, '160492.361')]
+-[2023-09-19 11:39:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.4, 300 sec: 5540.0). Total num frames: 9555968. Throughput: 0: 2725.9, 1: 2724.9. Samples: 7110074. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:39:37,044][72530] Avg episode reward: [(0, '137931.041'), (1, '160516.950')]
+-[2023-09-19 11:39:37,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009352_4788224.pth...
+-[2023-09-19 11:39:37,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009312_4767744.pth...
+-[2023-09-19 11:39:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009184_4702208.pth
+-[2023-09-19 11:39:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009144_4681728.pth
+-[2023-09-19 11:39:38,025][73219] Updated weights for policy 1, policy_version 9320 (0.0014)
+-[2023-09-19 11:39:38,025][73145] Updated weights for policy 0, policy_version 9360 (0.0014)
+-[2023-09-19 11:39:42,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9580544. Throughput: 0: 2739.1, 1: 2740.1. Samples: 7142812. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:42,045][72530] Avg episode reward: [(0, '135675.016'), (1, '160545.833')]
+-[2023-09-19 11:39:47,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9605120. Throughput: 0: 2730.0, 1: 2729.9. Samples: 7171628. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:47,044][72530] Avg episode reward: [(0, '134266.068'), (1, '160294.298')]
+-[2023-09-19 11:39:52,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 9629696. Throughput: 0: 2788.4, 1: 2787.8. Samples: 7189664. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:52,045][72530] Avg episode reward: [(0, '134266.068'), (1, '160294.298')]
+-[2023-09-19 11:39:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009384_4804608.pth...
+-[2023-09-19 11:39:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009424_4825088.pth...
+-[2023-09-19 11:39:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009264_4743168.pth
+-[2023-09-19 11:39:52,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009224_4722688.pth
+-[2023-09-19 11:39:53,740][73145] Updated weights for policy 0, policy_version 9440 (0.0014)
+-[2023-09-19 11:39:53,740][73219] Updated weights for policy 1, policy_version 9400 (0.0011)
+-[2023-09-19 11:39:57,043][72530] Fps is (10 sec: 5734.2, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9662464. Throughput: 0: 2746.2, 1: 2746.2. Samples: 7222744. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:39:57,045][72530] Avg episode reward: [(0, '127890.525'), (1, '162301.441')]
+-[2023-09-19 11:40:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 9687040. Throughput: 0: 2803.2, 1: 2803.9. Samples: 7257338. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:40:02,044][72530] Avg episode reward: [(0, '126377.631'), (1, '162308.865')]
+-[2023-09-19 11:40:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.8, 300 sec: 5526.1). Total num frames: 9719808. Throughput: 0: 2767.5, 1: 2767.4. Samples: 7272620. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:40:07,045][72530] Avg episode reward: [(0, '126851.345'), (1, '162306.418')]
+-[2023-09-19 11:40:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009512_4870144.pth...
+-[2023-09-19 11:40:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009472_4849664.pth...
+-[2023-09-19 11:40:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009352_4788224.pth
+-[2023-09-19 11:40:07,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009312_4767744.pth
+-[2023-09-19 11:40:08,284][73145] Updated weights for policy 0, policy_version 9520 (0.0013)
+-[2023-09-19 11:40:08,284][73219] Updated weights for policy 1, policy_version 9480 (0.0012)
+-[2023-09-19 11:40:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 9744384. Throughput: 0: 2756.3, 1: 2757.5. Samples: 7307258. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:40:12,044][72530] Avg episode reward: [(0, '124049.731'), (1, '162755.710')]
+-[2023-09-19 11:40:17,043][72530] Fps is (10 sec: 5734.6, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 9777152. Throughput: 0: 2743.8, 1: 2743.8. Samples: 7340196. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:40:17,044][72530] Avg episode reward: [(0, '125342.829'), (1, '162760.657')]
+-[2023-09-19 11:40:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 9801728. Throughput: 0: 2757.9, 1: 2758.1. Samples: 7358298. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:40:22,044][72530] Avg episode reward: [(0, '126869.413'), (1, '162774.053')]
+-[2023-09-19 11:40:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009552_4890624.pth...
+-[2023-09-19 11:40:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009592_4911104.pth...
+-[2023-09-19 11:40:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009384_4804608.pth
+-[2023-09-19 11:40:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009424_4825088.pth
+-[2023-09-19 11:40:22,807][73219] Updated weights for policy 1, policy_version 9560 (0.0014)
+-[2023-09-19 11:40:22,808][73145] Updated weights for policy 0, policy_version 9600 (0.0015)
+-[2023-09-19 11:40:27,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 9826304. Throughput: 0: 2762.2, 1: 2761.2. Samples: 7391364. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:40:27,044][72530] Avg episode reward: [(0, '126869.413'), (1, '163199.726')]
+-[2023-09-19 11:40:27,045][73131] Saving new best policy, reward=163199.726!
+-[2023-09-19 11:40:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 9859072. Throughput: 0: 2813.1, 1: 2813.0. Samples: 7424800. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:40:32,044][72530] Avg episode reward: [(0, '127876.969'), (1, '163246.788')]
+-[2023-09-19 11:40:32,045][73131] Saving new best policy, reward=163246.788!
+-[2023-09-19 11:40:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 9883648. Throughput: 0: 2764.8, 1: 2765.1. Samples: 7438508. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:40:37,044][72530] Avg episode reward: [(0, '128261.706'), (1, '163246.788')]
+-[2023-09-19 11:40:37,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009632_4931584.pth...
+-[2023-09-19 11:40:37,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009672_4952064.pth...
+-[2023-09-19 11:40:37,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009472_4849664.pth
+-[2023-09-19 11:40:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009512_4870144.pth
+-[2023-09-19 11:40:37,743][73145] Updated weights for policy 0, policy_version 9680 (0.0015)
+-[2023-09-19 11:40:37,744][73219] Updated weights for policy 1, policy_version 9640 (0.0013)
+-[2023-09-19 11:40:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.4, 300 sec: 5498.4). Total num frames: 9908224. Throughput: 0: 2790.9, 1: 2791.1. Samples: 7473936. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:40:42,044][72530] Avg episode reward: [(0, '127401.924'), (1, '163340.260')]
+-[2023-09-19 11:40:42,046][73131] Saving new best policy, reward=163340.260!
+-[2023-09-19 11:40:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 9940992. Throughput: 0: 2779.8, 1: 2779.3. Samples: 7507494. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:40:47,044][72530] Avg episode reward: [(0, '126125.877'), (1, '163360.879')]
+-[2023-09-19 11:40:47,045][73131] Saving new best policy, reward=163360.879!
+-[2023-09-19 11:40:51,985][73219] Updated weights for policy 1, policy_version 9720 (0.0013)
+-[2023-09-19 11:40:51,986][73145] Updated weights for policy 0, policy_version 9760 (0.0013)
+-[2023-09-19 11:40:52,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 9973760. Throughput: 0: 2804.9, 1: 2805.1. Samples: 7525070. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:40:52,044][72530] Avg episode reward: [(0, '124501.715'), (1, '163360.926')]
+-[2023-09-19 11:40:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009720_4976640.pth...
+-[2023-09-19 11:40:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009760_4997120.pth...
+-[2023-09-19 11:40:52,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009552_4890624.pth
+-[2023-09-19 11:40:52,060][73131] Saving new best policy, reward=163360.926!
+-[2023-09-19 11:40:52,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009592_4911104.pth
+-[2023-09-19 11:40:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 9998336. Throughput: 0: 2820.8, 1: 2820.6. Samples: 7561120. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:40:57,044][72530] Avg episode reward: [(0, '121716.208'), (1, '162980.361')]
+-[2023-09-19 11:41:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10031104. Throughput: 0: 2839.9, 1: 2840.1. Samples: 7595796. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:41:02,044][72530] Avg episode reward: [(0, '123047.712'), (1, '162118.736')]
+-[2023-09-19 11:41:05,816][73145] Updated weights for policy 0, policy_version 9840 (0.0013)
+-[2023-09-19 11:41:05,816][73219] Updated weights for policy 1, policy_version 9800 (0.0011)
+-[2023-09-19 11:41:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 10055680. Throughput: 0: 2842.4, 1: 2842.3. Samples: 7614106. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:07,044][72530] Avg episode reward: [(0, '121775.268'), (1, '162040.085')]
+-[2023-09-19 11:41:07,093][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009808_5021696.pth...
+-[2023-09-19 11:41:07,097][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009632_4931584.pth
+-[2023-09-19 11:41:07,108][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009848_5042176.pth...
+-[2023-09-19 11:41:07,112][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009672_4952064.pth
+-[2023-09-19 11:41:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10088448. Throughput: 0: 2884.6, 1: 2885.2. Samples: 7651008. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:12,044][72530] Avg episode reward: [(0, '121120.688'), (1, '162013.153')]
+-[2023-09-19 11:41:17,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5734.4, 300 sec: 5581.7). Total num frames: 10121216. Throughput: 0: 2908.3, 1: 2908.4. Samples: 7686552. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:17,044][72530] Avg episode reward: [(0, '124879.108'), (1, '161974.000')]
+-[2023-09-19 11:41:19,279][73145] Updated weights for policy 0, policy_version 9920 (0.0012)
+-[2023-09-19 11:41:19,279][73219] Updated weights for policy 1, policy_version 9880 (0.0014)
+-[2023-09-19 11:41:22,043][72530] Fps is (10 sec: 6553.6, 60 sec: 5870.9, 300 sec: 5609.4). Total num frames: 10153984. Throughput: 0: 2966.0, 1: 2965.8. Samples: 7705438. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:22,044][72530] Avg episode reward: [(0, '124879.108'), (1, '161966.557')]
+-[2023-09-19 11:41:22,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000009936_5087232.pth...
+-[2023-09-19 11:41:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009896_5066752.pth...
+-[2023-09-19 11:41:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009720_4976640.pth
+-[2023-09-19 11:41:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009760_4997120.pth
+-[2023-09-19 11:41:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 5581.7). Total num frames: 10178560. Throughput: 0: 2936.0, 1: 2936.1. Samples: 7738182. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:27,045][72530] Avg episode reward: [(0, '127104.430'), (1, '161743.941')]
+-[2023-09-19 11:41:32,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10203136. Throughput: 0: 2947.0, 1: 2947.1. Samples: 7772726. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:32,044][72530] Avg episode reward: [(0, '128493.741'), (1, '161743.941')]
+-[2023-09-19 11:41:33,872][73219] Updated weights for policy 1, policy_version 9960 (0.0008)
+-[2023-09-19 11:41:33,873][73145] Updated weights for policy 0, policy_version 10000 (0.0013)
+-[2023-09-19 11:41:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 5581.7). Total num frames: 10235904. Throughput: 0: 2945.2, 1: 2945.2. Samples: 7790142. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:37,044][72530] Avg episode reward: [(0, '127668.095'), (1, '162203.421')]
+-[2023-09-19 11:41:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000009976_5107712.pth...
+-[2023-09-19 11:41:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010016_5128192.pth...
+-[2023-09-19 11:41:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009808_5021696.pth
+-[2023-09-19 11:41:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009848_5042176.pth
+-[2023-09-19 11:41:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5870.9, 300 sec: 5553.9). Total num frames: 10260480. Throughput: 0: 2933.3, 1: 2932.4. Samples: 7825076. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:42,044][72530] Avg episode reward: [(0, '129918.117'), (1, '162258.975')]
+-[2023-09-19 11:41:47,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5870.9, 300 sec: 5581.7). Total num frames: 10293248. Throughput: 0: 2916.2, 1: 2916.0. Samples: 7858242. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:47,044][72530] Avg episode reward: [(0, '127362.808'), (1, '162303.333')]
+-[2023-09-19 11:41:47,993][73219] Updated weights for policy 1, policy_version 10040 (0.0015)
+-[2023-09-19 11:41:47,993][73145] Updated weights for policy 0, policy_version 10080 (0.0013)
+-[2023-09-19 11:41:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10317824. Throughput: 0: 2920.8, 1: 2920.8. Samples: 7876978. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:52,044][72530] Avg episode reward: [(0, '126406.406'), (1, '162333.274')]
+-[2023-09-19 11:41:52,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010096_5169152.pth...
+-[2023-09-19 11:41:52,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010056_5148672.pth...
+-[2023-09-19 11:41:52,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000009936_5087232.pth
+-[2023-09-19 11:41:52,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009896_5066752.pth
+-[2023-09-19 11:41:57,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5870.9, 300 sec: 5553.9). Total num frames: 10350592. Throughput: 0: 2853.3, 1: 2852.7. Samples: 7907776. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:41:57,044][72530] Avg episode reward: [(0, '123833.586'), (1, '162357.732')]
+-[2023-09-19 11:42:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10375168. Throughput: 0: 2849.7, 1: 2849.6. Samples: 7943016. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:02,044][72530] Avg episode reward: [(0, '119983.882'), (1, '162619.479')]
+-[2023-09-19 11:42:02,675][73219] Updated weights for policy 1, policy_version 10120 (0.0014)
+-[2023-09-19 11:42:02,676][73145] Updated weights for policy 0, policy_version 10160 (0.0015)
+-[2023-09-19 11:42:07,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5734.4, 300 sec: 5526.1). Total num frames: 10399744. Throughput: 0: 2834.9, 1: 2834.8. Samples: 7960578. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:07,045][72530] Avg episode reward: [(0, '119983.882'), (1, '162619.479')]
+-[2023-09-19 11:42:07,089][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010144_5193728.pth...
+-[2023-09-19 11:42:07,090][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010184_5214208.pth...
+-[2023-09-19 11:42:07,093][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000009976_5107712.pth
+-[2023-09-19 11:42:07,096][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010016_5128192.pth
+-[2023-09-19 11:42:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 10432512. Throughput: 0: 2856.8, 1: 2857.6. Samples: 7995330. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:12,044][72530] Avg episode reward: [(0, '117278.968'), (1, '162975.817')]
+-[2023-09-19 11:42:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 10457088. Throughput: 0: 2837.4, 1: 2838.5. Samples: 8028138. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:17,044][72530] Avg episode reward: [(0, '117278.968'), (1, '162975.817')]
+-[2023-09-19 11:42:17,136][73219] Updated weights for policy 1, policy_version 10200 (0.0013)
+-[2023-09-19 11:42:17,136][73145] Updated weights for policy 0, policy_version 10240 (0.0014)
+-[2023-09-19 11:42:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5553.9). Total num frames: 10489856. Throughput: 0: 2827.2, 1: 2828.1. Samples: 8044630. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:22,044][72530] Avg episode reward: [(0, '121515.315'), (1, '162988.813')]
+-[2023-09-19 11:42:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010224_5234688.pth...
+-[2023-09-19 11:42:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010264_5255168.pth...
+-[2023-09-19 11:42:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010056_5148672.pth
+-[2023-09-19 11:42:22,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010096_5169152.pth
+-[2023-09-19 11:42:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5553.9). Total num frames: 10514432. Throughput: 0: 2792.9, 1: 2792.8. Samples: 8076430. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:42:27,044][72530] Avg episode reward: [(0, '118834.382'), (1, '162994.952')]
+-[2023-09-19 11:42:31,795][73145] Updated weights for policy 0, policy_version 10320 (0.0014)
+-[2023-09-19 11:42:31,795][73219] Updated weights for policy 1, policy_version 10280 (0.0014)
+-[2023-09-19 11:42:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5581.7). Total num frames: 10547200. Throughput: 0: 2815.4, 1: 2815.5. Samples: 8111630. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:42:32,044][72530] Avg episode reward: [(0, '118369.672'), (1, '161883.949')]
+-[2023-09-19 11:42:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5553.9). Total num frames: 10571776. Throughput: 0: 2810.9, 1: 2810.8. Samples: 8129954. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:42:37,044][72530] Avg episode reward: [(0, '121189.235'), (1, '161513.570')]
+-[2023-09-19 11:42:37,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010304_5275648.pth...
+-[2023-09-19 11:42:37,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010344_5296128.pth...
+-[2023-09-19 11:42:37,057][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010144_5193728.pth
+-[2023-09-19 11:42:37,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010184_5214208.pth
+-[2023-09-19 11:42:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5581.7). Total num frames: 10604544. Throughput: 0: 2861.8, 1: 2861.8. Samples: 8165336. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:42,044][72530] Avg episode reward: [(0, '121261.492'), (1, '161513.431')]
+-[2023-09-19 11:42:45,967][73219] Updated weights for policy 1, policy_version 10360 (0.0013)
+-[2023-09-19 11:42:45,968][73145] Updated weights for policy 0, policy_version 10400 (0.0014)
+-[2023-09-19 11:42:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5581.7). Total num frames: 10629120. Throughput: 0: 2844.1, 1: 2844.1. Samples: 8198984. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:47,044][72530] Avg episode reward: [(0, '118032.842'), (1, '157614.308')]
+-[2023-09-19 11:42:52,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5609.4). Total num frames: 10661888. Throughput: 0: 2843.3, 1: 2844.3. Samples: 8216516. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:42:52,044][72530] Avg episode reward: [(0, '118032.842'), (1, '156763.229')]
+-[2023-09-19 11:42:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010392_5320704.pth...
+-[2023-09-19 11:42:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010432_5341184.pth...
+-[2023-09-19 11:42:52,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010224_5234688.pth
+-[2023-09-19 11:42:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010264_5255168.pth
+-[2023-09-19 11:42:57,043][72530] Fps is (10 sec: 6553.7, 60 sec: 5734.4, 300 sec: 5609.4). Total num frames: 10694656. Throughput: 0: 2862.2, 1: 2861.2. Samples: 8252880. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:42:57,044][72530] Avg episode reward: [(0, '113836.289'), (1, '152570.666')]
+-[2023-09-19 11:42:59,347][73219] Updated weights for policy 1, policy_version 10440 (0.0013)
+-[2023-09-19 11:42:59,348][73145] Updated weights for policy 0, policy_version 10480 (0.0014)
+-[2023-09-19 11:43:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5609.4). Total num frames: 10719232. Throughput: 0: 2911.6, 1: 2911.6. Samples: 8290182. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:02,044][72530] Avg episode reward: [(0, '113836.289'), (1, '152570.666')]
+-[2023-09-19 11:43:07,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5734.4, 300 sec: 5581.7). Total num frames: 10743808. Throughput: 0: 2894.9, 1: 2894.2. Samples: 8305140. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:43:07,045][72530] Avg episode reward: [(0, '115267.182'), (1, '149740.570')]
+-[2023-09-19 11:43:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010512_5382144.pth...
+-[2023-09-19 11:43:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010472_5361664.pth...
+-[2023-09-19 11:43:07,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010304_5275648.pth
+-[2023-09-19 11:43:07,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010344_5296128.pth
+-[2023-09-19 11:43:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5609.4). Total num frames: 10776576. Throughput: 0: 2920.7, 1: 2921.1. Samples: 8339310. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:43:12,044][72530] Avg episode reward: [(0, '112738.453'), (1, '147363.933')]
+-[2023-09-19 11:43:13,928][73145] Updated weights for policy 0, policy_version 10560 (0.0015)
+-[2023-09-19 11:43:13,928][73219] Updated weights for policy 1, policy_version 10520 (0.0013)
+-[2023-09-19 11:43:17,043][72530] Fps is (10 sec: 6553.7, 60 sec: 5870.9, 300 sec: 5637.2). Total num frames: 10809344. Throughput: 0: 2925.9, 1: 2925.9. Samples: 8374958. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:17,044][72530] Avg episode reward: [(0, '112692.445'), (1, '139212.880')]
+-[2023-09-19 11:43:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5637.2). Total num frames: 10833920. Throughput: 0: 2885.6, 1: 2885.7. Samples: 8389664. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:22,044][72530] Avg episode reward: [(0, '111524.427'), (1, '138648.058')]
+-[2023-09-19 11:43:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010560_5406720.pth...
+-[2023-09-19 11:43:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010600_5427200.pth...
+-[2023-09-19 11:43:22,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010392_5320704.pth
+-[2023-09-19 11:43:22,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010432_5341184.pth
+-[2023-09-19 11:43:27,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5734.4, 300 sec: 5609.4). Total num frames: 10858496. Throughput: 0: 2801.5, 1: 2801.5. Samples: 8417468. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:27,044][72530] Avg episode reward: [(0, '111524.427'), (1, '136941.524')]
+-[2023-09-19 11:43:29,536][73145] Updated weights for policy 0, policy_version 10640 (0.0016)
+-[2023-09-19 11:43:29,536][73219] Updated weights for policy 1, policy_version 10600 (0.0013)
+-[2023-09-19 11:43:32,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5597.9, 300 sec: 5609.4). Total num frames: 10883072. Throughput: 0: 2830.2, 1: 2830.5. Samples: 8453716. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:32,044][72530] Avg episode reward: [(0, '107385.986'), (1, '135321.775')]
+-[2023-09-19 11:43:37,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5734.4, 300 sec: 5637.2). Total num frames: 10915840. Throughput: 0: 2819.7, 1: 2819.8. Samples: 8470292. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:43:37,044][72530] Avg episode reward: [(0, '107385.986'), (1, '135321.775')]
+-[2023-09-19 11:43:37,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010640_5447680.pth...
+-[2023-09-19 11:43:37,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010680_5468160.pth...
+-[2023-09-19 11:43:37,056][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010512_5382144.pth
+-[2023-09-19 11:43:37,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010472_5361664.pth
+-[2023-09-19 11:43:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5637.2). Total num frames: 10940416. Throughput: 0: 2781.7, 1: 2783.1. Samples: 8503296. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:42,044][72530] Avg episode reward: [(0, '100071.875'), (1, '136457.439')]
+-[2023-09-19 11:43:44,019][73219] Updated weights for policy 1, policy_version 10680 (0.0013)
+-[2023-09-19 11:43:44,019][73145] Updated weights for policy 0, policy_version 10720 (0.0016)
+-[2023-09-19 11:43:47,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5597.9, 300 sec: 5609.4). Total num frames: 10964992. Throughput: 0: 2720.2, 1: 2719.9. Samples: 8534988. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:43:47,044][72530] Avg episode reward: [(0, '99838.586'), (1, '136457.439')]
+-[2023-09-19 11:43:52,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 10989568. Throughput: 0: 2733.8, 1: 2733.7. Samples: 8551178. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:43:52,044][72530] Avg episode reward: [(0, '96058.014'), (1, '144304.212')]
+-[2023-09-19 11:43:52,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010752_5505024.pth...
+-[2023-09-19 11:43:52,051][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010712_5484544.pth...
+-[2023-09-19 11:43:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010600_5427200.pth
+-[2023-09-19 11:43:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010560_5406720.pth
+-[2023-09-19 11:43:57,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5609.4). Total num frames: 11014144. Throughput: 0: 2672.1, 1: 2671.8. Samples: 8579786. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:43:57,044][72530] Avg episode reward: [(0, '94439.101'), (1, '146370.302')]
+-[2023-09-19 11:44:00,053][73145] Updated weights for policy 0, policy_version 10800 (0.0011)
+-[2023-09-19 11:44:00,053][73219] Updated weights for policy 1, policy_version 10760 (0.0014)
+-[2023-09-19 11:44:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11046912. Throughput: 0: 2656.6, 1: 2656.7. Samples: 8614056. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:44:02,044][72530] Avg episode reward: [(0, '94439.101'), (1, '149966.737')]
+-[2023-09-19 11:44:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 11071488. Throughput: 0: 2659.8, 1: 2659.7. Samples: 8629044. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:44:07,044][72530] Avg episode reward: [(0, '89650.521'), (1, '155598.656')]
+-[2023-09-19 11:44:07,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010832_5545984.pth...
+-[2023-09-19 11:44:07,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010792_5525504.pth...
+-[2023-09-19 11:44:07,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010680_5468160.pth
+-[2023-09-19 11:44:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010640_5447680.pth
+-[2023-09-19 11:44:12,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11104256. Throughput: 0: 2748.8, 1: 2748.7. Samples: 8664854. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:44:12,044][72530] Avg episode reward: [(0, '89650.521'), (1, '155598.656')]
+-[2023-09-19 11:44:14,562][73219] Updated weights for policy 1, policy_version 10840 (0.0014)
+-[2023-09-19 11:44:14,562][73145] Updated weights for policy 0, policy_version 10880 (0.0013)
+-[2023-09-19 11:44:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5637.2). Total num frames: 11128832. Throughput: 0: 2724.0, 1: 2723.6. Samples: 8698854. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:44:17,044][72530] Avg episode reward: [(0, '87254.510'), (1, '160076.645')]
+-[2023-09-19 11:44:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11161600. Throughput: 0: 2733.2, 1: 2733.3. Samples: 8716288. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:44:22,044][72530] Avg episode reward: [(0, '85923.521'), (1, '161617.846')]
+-[2023-09-19 11:44:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010880_5570560.pth...
+-[2023-09-19 11:44:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000010920_5591040.pth...
+-[2023-09-19 11:44:22,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010712_5484544.pth
+-[2023-09-19 11:44:22,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010752_5505024.pth
+-[2023-09-19 11:44:27,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11186176. Throughput: 0: 2701.6, 1: 2700.4. Samples: 8746388. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:44:27,044][72530] Avg episode reward: [(0, '86413.887'), (1, '161983.883')]
+-[2023-09-19 11:44:29,421][73219] Updated weights for policy 1, policy_version 10920 (0.0014)
+-[2023-09-19 11:44:29,422][73145] Updated weights for policy 0, policy_version 10960 (0.0016)
+-[2023-09-19 11:44:32,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 11210752. Throughput: 0: 2730.4, 1: 2729.5. Samples: 8780682. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:44:32,044][72530] Avg episode reward: [(0, '88162.418'), (1, '162739.322')]
+-[2023-09-19 11:44:37,044][72530] Fps is (10 sec: 5734.2, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11243520. Throughput: 0: 2743.6, 1: 2743.8. Samples: 8798112. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:44:37,045][72530] Avg episode reward: [(0, '89655.979'), (1, '162764.902')]
+-[2023-09-19 11:44:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000010960_5611520.pth...
+-[2023-09-19 11:44:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011000_5632000.pth...
+-[2023-09-19 11:44:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010792_5525504.pth
+-[2023-09-19 11:44:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010832_5545984.pth
+-[2023-09-19 11:44:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5637.2). Total num frames: 11268096. Throughput: 0: 2814.7, 1: 2814.5. Samples: 8833096. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:44:42,044][72530] Avg episode reward: [(0, '90951.227'), (1, '163562.679')]
+-[2023-09-19 11:44:42,045][73131] Saving new best policy, reward=163562.679!
+-[2023-09-19 11:44:43,549][73145] Updated weights for policy 0, policy_version 11040 (0.0014)
+-[2023-09-19 11:44:43,549][73219] Updated weights for policy 1, policy_version 11000 (0.0012)
+-[2023-09-19 11:44:47,043][72530] Fps is (10 sec: 5734.6, 60 sec: 5597.9, 300 sec: 5665.0). Total num frames: 11300864. Throughput: 0: 2823.4, 1: 2823.2. Samples: 8868152. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:44:47,044][72530] Avg episode reward: [(0, '90951.227'), (1, '163569.234')]
+-[2023-09-19 11:44:47,045][73131] Saving new best policy, reward=163569.234!
+-[2023-09-19 11:44:52,043][72530] Fps is (10 sec: 6553.4, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 11333632. Throughput: 0: 2853.3, 1: 2853.4. Samples: 8885846. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:44:52,044][72530] Avg episode reward: [(0, '93196.735'), (1, '163602.622')]
+-[2023-09-19 11:44:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011048_5656576.pth...
+-[2023-09-19 11:44:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011088_5677056.pth...
+-[2023-09-19 11:44:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000010920_5591040.pth
+-[2023-09-19 11:44:52,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010880_5570560.pth
+-[2023-09-19 11:44:52,064][73131] Saving new best policy, reward=163602.622!
+-[2023-09-19 11:44:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5665.0). Total num frames: 11358208. Throughput: 0: 2817.7, 1: 2817.8. Samples: 8918454. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:44:57,045][72530] Avg episode reward: [(0, '93196.735'), (1, '163602.622')]
+-[2023-09-19 11:44:58,430][73145] Updated weights for policy 0, policy_version 11120 (0.0014)
+-[2023-09-19 11:44:58,430][73219] Updated weights for policy 1, policy_version 11080 (0.0010)
+-[2023-09-19 11:45:02,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5597.9, 300 sec: 5637.2). Total num frames: 11382784. Throughput: 0: 2747.1, 1: 2747.7. Samples: 8946120. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:02,044][72530] Avg episode reward: [(0, '96123.404'), (1, '163617.376')]
+-[2023-09-19 11:45:02,046][73131] Saving new best policy, reward=163617.376!
+-[2023-09-19 11:45:07,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5597.9, 300 sec: 5637.2). Total num frames: 11407360. Throughput: 0: 2726.9, 1: 2726.6. Samples: 8961696. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:45:07,044][72530] Avg episode reward: [(0, '97205.408'), (1, '163596.670')]
+-[2023-09-19 11:45:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011160_5713920.pth...
+-[2023-09-19 11:45:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011120_5693440.pth...
+-[2023-09-19 11:45:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011000_5632000.pth
+-[2023-09-19 11:45:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000010960_5611520.pth
+-[2023-09-19 11:45:12,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5461.4, 300 sec: 5609.4). Total num frames: 11431936. Throughput: 0: 2703.2, 1: 2703.2. Samples: 8989676. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:45:12,043][72530] Avg episode reward: [(0, '97205.408'), (1, '163596.670')]
+-[2023-09-19 11:45:15,308][73145] Updated weights for policy 0, policy_version 11200 (0.0014)
+-[2023-09-19 11:45:15,308][73219] Updated weights for policy 1, policy_version 11160 (0.0012)
+-[2023-09-19 11:45:17,043][72530] Fps is (10 sec: 4096.1, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 11448320. Throughput: 0: 2651.6, 1: 2652.6. Samples: 9019372. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:45:17,044][72530] Avg episode reward: [(0, '102260.462'), (1, '163634.922')]
+-[2023-09-19 11:45:17,045][73131] Saving new best policy, reward=163634.922!
+-[2023-09-19 11:45:22,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5324.8, 300 sec: 5609.4). Total num frames: 11481088. Throughput: 0: 2640.3, 1: 2641.3. Samples: 9035780. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:45:22,044][72530] Avg episode reward: [(0, '105256.610'), (1, '163636.135')]
+-[2023-09-19 11:45:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011192_5730304.pth...
+-[2023-09-19 11:45:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011232_5750784.pth...
+-[2023-09-19 11:45:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011088_5677056.pth
+-[2023-09-19 11:45:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011048_5656576.pth
+-[2023-09-19 11:45:22,064][73131] Saving new best policy, reward=163636.135!
+-[2023-09-19 11:45:27,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 11505664. Throughput: 0: 2614.9, 1: 2616.2. Samples: 9068494. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:27,044][72530] Avg episode reward: [(0, '106969.066'), (1, '163623.728')]
+-[2023-09-19 11:45:30,281][73145] Updated weights for policy 0, policy_version 11280 (0.0010)
+-[2023-09-19 11:45:30,281][73219] Updated weights for policy 1, policy_version 11240 (0.0014)
+-[2023-09-19 11:45:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 11538432. Throughput: 0: 2590.7, 1: 2592.2. Samples: 9101380. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:32,044][72530] Avg episode reward: [(0, '107549.962'), (1, '163617.201')]
+-[2023-09-19 11:45:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5609.4). Total num frames: 11563008. Throughput: 0: 2590.5, 1: 2590.6. Samples: 9118994. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:37,044][72530] Avg episode reward: [(0, '105811.432'), (1, '163639.722')]
+-[2023-09-19 11:45:37,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011272_5771264.pth...
+-[2023-09-19 11:45:37,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011312_5791744.pth...
+-[2023-09-19 11:45:37,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011120_5693440.pth
+-[2023-09-19 11:45:37,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011160_5713920.pth
+-[2023-09-19 11:45:37,061][73131] Saving new best policy, reward=163639.722!
+-[2023-09-19 11:45:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5609.4). Total num frames: 11595776. Throughput: 0: 2606.7, 1: 2606.6. Samples: 9153050. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:42,044][72530] Avg episode reward: [(0, '107882.450'), (1, '163839.533')]
+-[2023-09-19 11:45:42,044][73131] Saving new best policy, reward=163839.533!
+-[2023-09-19 11:45:44,762][73219] Updated weights for policy 1, policy_version 11320 (0.0013)
+-[2023-09-19 11:45:44,763][73145] Updated weights for policy 0, policy_version 11360 (0.0014)
+-[2023-09-19 11:45:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 11620352. Throughput: 0: 2698.0, 1: 2697.3. Samples: 9188910. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:47,044][72530] Avg episode reward: [(0, '107882.450'), (1, '163858.005')]
+-[2023-09-19 11:45:47,046][73131] Saving new best policy, reward=163858.005!
+-[2023-09-19 11:45:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5324.8, 300 sec: 5609.4). Total num frames: 11653120. Throughput: 0: 2722.0, 1: 2721.0. Samples: 9206630. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:52,044][72530] Avg episode reward: [(0, '105400.770'), (1, '163575.436')]
+-[2023-09-19 11:45:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011360_5816320.pth...
+-[2023-09-19 11:45:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011400_5836800.pth...
+-[2023-09-19 11:45:52,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011192_5730304.pth
+-[2023-09-19 11:45:52,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011232_5750784.pth
+-[2023-09-19 11:45:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 11677696. Throughput: 0: 2764.1, 1: 2764.3. Samples: 9238454. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:45:57,044][72530] Avg episode reward: [(0, '105400.770'), (1, '163575.436')]
+-[2023-09-19 11:45:59,327][73219] Updated weights for policy 1, policy_version 11400 (0.0016)
+-[2023-09-19 11:45:59,327][73145] Updated weights for policy 0, policy_version 11440 (0.0016)
+-[2023-09-19 11:46:02,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5581.7). Total num frames: 11702272. Throughput: 0: 2625.6, 1: 2624.6. Samples: 9255630. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:46:02,044][72530] Avg episode reward: [(0, '104718.135'), (1, '163329.699')]
+-[2023-09-19 11:46:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5581.7). Total num frames: 11735040. Throughput: 0: 2830.1, 1: 2829.0. Samples: 9290438. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:46:07,044][72530] Avg episode reward: [(0, '104541.048'), (1, '163378.382')]
+-[2023-09-19 11:46:07,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011440_5857280.pth...
+-[2023-09-19 11:46:07,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011480_5877760.pth...
+-[2023-09-19 11:46:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011272_5771264.pth
+-[2023-09-19 11:46:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011312_5791744.pth
+-[2023-09-19 11:46:12,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5553.9). Total num frames: 11759616. Throughput: 0: 2839.9, 1: 2838.8. Samples: 9324034. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:46:12,044][72530] Avg episode reward: [(0, '106063.260'), (1, '163399.235')]
+-[2023-09-19 11:46:13,614][73219] Updated weights for policy 1, policy_version 11480 (0.0013)
+-[2023-09-19 11:46:13,614][73145] Updated weights for policy 0, policy_version 11520 (0.0014)
+-[2023-09-19 11:46:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5553.9). Total num frames: 11792384. Throughput: 0: 2832.0, 1: 2830.7. Samples: 9356202. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:46:17,044][72530] Avg episode reward: [(0, '105598.661'), (1, '162236.464')]
+-[2023-09-19 11:46:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5553.9). Total num frames: 11816960. Throughput: 0: 2812.6, 1: 2812.9. Samples: 9372142. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:46:22,044][72530] Avg episode reward: [(0, '105598.661'), (1, '158593.843')]
+-[2023-09-19 11:46:22,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011520_5898240.pth...
+-[2023-09-19 11:46:22,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011560_5918720.pth...
+-[2023-09-19 11:46:22,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011360_5816320.pth
+-[2023-09-19 11:46:22,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011400_5836800.pth
+-[2023-09-19 11:46:27,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5553.9). Total num frames: 11841536. Throughput: 0: 2754.3, 1: 2754.3. Samples: 9400936. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:46:27,044][72530] Avg episode reward: [(0, '104505.657'), (1, '155073.280')]
+-[2023-09-19 11:46:30,228][73145] Updated weights for policy 0, policy_version 11600 (0.0013)
+-[2023-09-19 11:46:30,228][73219] Updated weights for policy 1, policy_version 11560 (0.0013)
+-[2023-09-19 11:46:32,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 11866112. Throughput: 0: 2687.7, 1: 2687.8. Samples: 9430808. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:46:32,044][72530] Avg episode reward: [(0, '105201.453'), (1, '155073.280')]
+-[2023-09-19 11:46:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 11890688. Throughput: 0: 2652.0, 1: 2653.2. Samples: 9445364. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:46:37,044][72530] Avg episode reward: [(0, '106467.037'), (1, '155392.306')]
+-[2023-09-19 11:46:37,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011592_5935104.pth...
+-[2023-09-19 11:46:37,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011632_5955584.pth...
+-[2023-09-19 11:46:37,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011440_5857280.pth
+-[2023-09-19 11:46:37,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011480_5877760.pth
+-[2023-09-19 11:46:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 11915264. Throughput: 0: 2662.8, 1: 2663.7. Samples: 9478146. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:46:42,044][72530] Avg episode reward: [(0, '108179.215'), (1, '155385.005')]
+-[2023-09-19 11:46:45,489][73145] Updated weights for policy 0, policy_version 11680 (0.0014)
+-[2023-09-19 11:46:45,490][73219] Updated weights for policy 1, policy_version 11640 (0.0014)
+-[2023-09-19 11:46:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 11948032. Throughput: 0: 2837.7, 1: 2837.9. Samples: 9511032. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:46:47,044][72530] Avg episode reward: [(0, '108179.215'), (1, '155360.629')]
+-[2023-09-19 11:46:52,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 11972608. Throughput: 0: 2646.1, 1: 2645.9. Samples: 9528578. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:46:52,044][72530] Avg episode reward: [(0, '110744.067'), (1, '155267.645')]
+-[2023-09-19 11:46:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011712_5996544.pth...
+-[2023-09-19 11:46:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011672_5976064.pth...
+-[2023-09-19 11:46:52,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011560_5918720.pth
+-[2023-09-19 11:46:52,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011520_5898240.pth
+-[2023-09-19 11:46:57,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 11997184. Throughput: 0: 2648.3, 1: 2648.4. Samples: 9562384. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:46:57,044][72530] Avg episode reward: [(0, '110744.067'), (1, '155267.645')]
+-[2023-09-19 11:46:59,831][73145] Updated weights for policy 0, policy_version 11760 (0.0013)
+-[2023-09-19 11:46:59,832][73219] Updated weights for policy 1, policy_version 11720 (0.0013)
+-[2023-09-19 11:47:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 12029952. Throughput: 0: 2681.6, 1: 2681.6. Samples: 9597548. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:47:02,044][72530] Avg episode reward: [(0, '113492.232'), (1, '163938.746')]
+-[2023-09-19 11:47:02,046][73131] Saving new best policy, reward=163938.746!
+-[2023-09-19 11:47:07,043][72530] Fps is (10 sec: 6553.5, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 12062720. Throughput: 0: 2695.1, 1: 2694.9. Samples: 9614692. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:47:07,044][72530] Avg episode reward: [(0, '114645.759'), (1, '163938.746')]
+-[2023-09-19 11:47:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011760_6021120.pth...
+-[2023-09-19 11:47:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011800_6041600.pth...
+-[2023-09-19 11:47:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011592_5935104.pth
+-[2023-09-19 11:47:07,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011632_5955584.pth
+-[2023-09-19 11:47:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5526.1). Total num frames: 12087296. Throughput: 0: 2755.8, 1: 2755.8. Samples: 9648956. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:47:12,044][72530] Avg episode reward: [(0, '117620.147'), (1, '164058.186')]
+-[2023-09-19 11:47:12,046][73131] Saving new best policy, reward=164058.186!
+-[2023-09-19 11:47:15,016][73145] Updated weights for policy 0, policy_version 11840 (0.0014)
+-[2023-09-19 11:47:15,017][73219] Updated weights for policy 1, policy_version 11800 (0.0013)
+-[2023-09-19 11:47:17,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 12111872. Throughput: 0: 2736.9, 1: 2737.0. Samples: 9677132. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:47:17,044][72530] Avg episode reward: [(0, '120342.444'), (1, '164061.858')]
+-[2023-09-19 11:47:17,045][73131] Saving new best policy, reward=164061.858!
+-[2023-09-19 11:47:22,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5498.4). Total num frames: 12136448. Throughput: 0: 2756.4, 1: 2755.3. Samples: 9693392. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:47:22,045][72530] Avg episode reward: [(0, '121613.231'), (1, '164054.108')]
+-[2023-09-19 11:47:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011832_6057984.pth...
+-[2023-09-19 11:47:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011872_6078464.pth...
+-[2023-09-19 11:47:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011672_5976064.pth
+-[2023-09-19 11:47:22,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011712_5996544.pth
+-[2023-09-19 11:47:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 12169216. Throughput: 0: 2794.7, 1: 2793.4. Samples: 9729608. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:47:27,044][72530] Avg episode reward: [(0, '120341.617'), (1, '164136.092')]
+-[2023-09-19 11:47:27,045][73131] Saving new best policy, reward=164136.092!
+-[2023-09-19 11:47:29,294][73145] Updated weights for policy 0, policy_version 11920 (0.0016)
+-[2023-09-19 11:47:29,294][73219] Updated weights for policy 1, policy_version 11880 (0.0016)
+-[2023-09-19 11:47:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 12193792. Throughput: 0: 2811.6, 1: 2812.1. Samples: 9764102. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:47:32,044][72530] Avg episode reward: [(0, '120341.617'), (1, '163669.701')]
+-[2023-09-19 11:47:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12226560. Throughput: 0: 2781.4, 1: 2781.4. Samples: 9778906. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:47:37,044][72530] Avg episode reward: [(0, '118487.302'), (1, '163451.635')]
+-[2023-09-19 11:47:37,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000011920_6103040.pth...
+-[2023-09-19 11:47:37,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000011960_6123520.pth...
+-[2023-09-19 11:47:37,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011800_6041600.pth
+-[2023-09-19 11:47:37,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011760_6021120.pth
+-[2023-09-19 11:47:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12251136. Throughput: 0: 2801.6, 1: 2801.3. Samples: 9814516. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:47:42,044][72530] Avg episode reward: [(0, '119270.006'), (1, '163451.635')]
+-[2023-09-19 11:47:43,762][73145] Updated weights for policy 0, policy_version 12000 (0.0016)
+-[2023-09-19 11:47:43,762][73219] Updated weights for policy 1, policy_version 11960 (0.0015)
+-[2023-09-19 11:47:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12283904. Throughput: 0: 2806.5, 1: 2806.4. Samples: 9850126. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:47:47,044][72530] Avg episode reward: [(0, '118016.428'), (1, '163248.371')]
+-[2023-09-19 11:47:52,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 12308480. Throughput: 0: 2818.1, 1: 2818.1. Samples: 9868322. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:47:52,044][72530] Avg episode reward: [(0, '115308.107'), (1, '163245.962')]
+-[2023-09-19 11:47:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012040_6164480.pth...
+-[2023-09-19 11:47:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012000_6144000.pth...
+-[2023-09-19 11:47:52,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011872_6078464.pth
+-[2023-09-19 11:47:52,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011832_6057984.pth
+-[2023-09-19 11:47:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5734.4, 300 sec: 5498.4). Total num frames: 12341248. Throughput: 0: 2811.7, 1: 2811.7. Samples: 9902008. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:47:57,044][72530] Avg episode reward: [(0, '115899.171'), (1, '163286.602')]
+-[2023-09-19 11:47:57,983][73219] Updated weights for policy 1, policy_version 12040 (0.0014)
+-[2023-09-19 11:47:57,985][73145] Updated weights for policy 0, policy_version 12080 (0.0016)
+-[2023-09-19 11:48:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12365824. Throughput: 0: 2849.0, 1: 2849.1. Samples: 9933548. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:48:02,044][72530] Avg episode reward: [(0, '117424.425'), (1, '163330.945')]
+-[2023-09-19 11:48:07,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12390400. Throughput: 0: 2815.6, 1: 2815.7. Samples: 9946798. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:48:07,044][72530] Avg episode reward: [(0, '117424.425'), (1, '163807.050')]
+-[2023-09-19 11:48:07,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012080_6184960.pth...
+-[2023-09-19 11:48:07,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012120_6205440.pth...
+-[2023-09-19 11:48:07,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000011920_6103040.pth
+-[2023-09-19 11:48:07,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000011960_6123520.pth
+-[2023-09-19 11:48:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 12414976. Throughput: 0: 2772.0, 1: 2772.1. Samples: 9979096. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:48:12,044][72530] Avg episode reward: [(0, '116643.698'), (1, '164043.715')]
+-[2023-09-19 11:48:13,637][73145] Updated weights for policy 0, policy_version 12160 (0.0014)
+-[2023-09-19 11:48:13,637][73219] Updated weights for policy 1, policy_version 12120 (0.0013)
+-[2023-09-19 11:48:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 12447744. Throughput: 0: 2761.6, 1: 2761.6. Samples: 10012644. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:48:17,044][72530] Avg episode reward: [(0, '116643.698'), (1, '164043.715')]
+-[2023-09-19 11:48:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 12472320. Throughput: 0: 2792.3, 1: 2792.3. Samples: 10030210. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:22,044][72530] Avg episode reward: [(0, '114977.442'), (1, '164100.748')]
+-[2023-09-19 11:48:22,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012200_6246400.pth...
+-[2023-09-19 11:48:22,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012160_6225920.pth...
+-[2023-09-19 11:48:22,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012040_6164480.pth
+-[2023-09-19 11:48:22,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012000_6144000.pth
+-[2023-09-19 11:48:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12505088. Throughput: 0: 2773.0, 1: 2773.1. Samples: 10064090. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:27,044][72530] Avg episode reward: [(0, '115199.567'), (1, '164068.846')]
+-[2023-09-19 11:48:28,286][73219] Updated weights for policy 1, policy_version 12200 (0.0013)
+-[2023-09-19 11:48:28,286][73145] Updated weights for policy 0, policy_version 12240 (0.0012)
+-[2023-09-19 11:48:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 12529664. Throughput: 0: 2711.3, 1: 2711.5. Samples: 10094152. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:32,044][72530] Avg episode reward: [(0, '111865.477'), (1, '163548.142')]
+-[2023-09-19 11:48:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12554240. Throughput: 0: 2682.5, 1: 2682.3. Samples: 10109738. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:37,044][72530] Avg episode reward: [(0, '107535.334'), (1, '163429.327')]
+-[2023-09-19 11:48:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012240_6266880.pth...
+-[2023-09-19 11:48:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012280_6287360.pth...
+-[2023-09-19 11:48:37,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012080_6184960.pth
+-[2023-09-19 11:48:37,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012120_6205440.pth
+-[2023-09-19 11:48:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 12587008. Throughput: 0: 2703.6, 1: 2703.6. Samples: 10145332. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:42,044][72530] Avg episode reward: [(0, '107535.334'), (1, '162298.894')]
+-[2023-09-19 11:48:43,731][73145] Updated weights for policy 0, policy_version 12320 (0.0014)
+-[2023-09-19 11:48:43,732][73219] Updated weights for policy 1, policy_version 12280 (0.0012)
+-[2023-09-19 11:48:47,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 12611584. Throughput: 0: 2702.4, 1: 2702.2. Samples: 10176756. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:47,044][72530] Avg episode reward: [(0, '106316.522'), (1, '161408.497')]
+-[2023-09-19 11:48:52,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 12636160. Throughput: 0: 2750.1, 1: 2749.8. Samples: 10194294. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:52,044][72530] Avg episode reward: [(0, '106316.522'), (1, '161408.497')]
+-[2023-09-19 11:48:52,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012320_6307840.pth...
+-[2023-09-19 11:48:52,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012360_6328320.pth...
+-[2023-09-19 11:48:52,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012160_6225920.pth
+-[2023-09-19 11:48:52,059][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012200_6246400.pth
+-[2023-09-19 11:48:57,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 12660736. Throughput: 0: 2734.8, 1: 2734.8. Samples: 10225232. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:48:57,045][72530] Avg episode reward: [(0, '104226.634'), (1, '160377.599')]
+-[2023-09-19 11:48:58,673][73145] Updated weights for policy 0, policy_version 12400 (0.0012)
+-[2023-09-19 11:48:58,673][73219] Updated weights for policy 1, policy_version 12360 (0.0015)
+-[2023-09-19 11:49:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 12693504. Throughput: 0: 2747.6, 1: 2746.8. Samples: 10259896. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:02,045][72530] Avg episode reward: [(0, '104136.332'), (1, '159195.252')]
+-[2023-09-19 11:49:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12718080. Throughput: 0: 2694.6, 1: 2695.7. Samples: 10272774. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:07,045][72530] Avg episode reward: [(0, '106355.574'), (1, '159230.111')]
+-[2023-09-19 11:49:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012440_6369280.pth...
+-[2023-09-19 11:49:07,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012400_6348800.pth...
+-[2023-09-19 11:49:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012280_6287360.pth
+-[2023-09-19 11:49:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012240_6266880.pth
+-[2023-09-19 11:49:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12742656. Throughput: 0: 2680.0, 1: 2680.4. Samples: 10305312. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:12,045][72530] Avg episode reward: [(0, '110372.318'), (1, '159771.618')]
+-[2023-09-19 11:49:14,377][73145] Updated weights for policy 0, policy_version 12480 (0.0012)
+-[2023-09-19 11:49:14,378][73219] Updated weights for policy 1, policy_version 12440 (0.0012)
+-[2023-09-19 11:49:17,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5442.8). Total num frames: 12767232. Throughput: 0: 2711.7, 1: 2712.5. Samples: 10338240. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:17,044][72530] Avg episode reward: [(0, '110372.318'), (1, '159771.618')]
+-[2023-09-19 11:49:22,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12800000. Throughput: 0: 2729.1, 1: 2729.3. Samples: 10355370. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:22,045][72530] Avg episode reward: [(0, '110181.871'), (1, '160985.288')]
+-[2023-09-19 11:49:22,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012520_6410240.pth...
+-[2023-09-19 11:49:22,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012480_6389760.pth...
+-[2023-09-19 11:49:22,066][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012320_6307840.pth
+-[2023-09-19 11:49:22,066][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012360_6328320.pth
+-[2023-09-19 11:49:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 12824576. Throughput: 0: 2671.7, 1: 2671.6. Samples: 10385784. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:27,044][72530] Avg episode reward: [(0, '110198.661'), (1, '160985.288')]
+-[2023-09-19 11:49:29,030][73145] Updated weights for policy 0, policy_version 12560 (0.0012)
+-[2023-09-19 11:49:29,031][73219] Updated weights for policy 1, policy_version 12520 (0.0011)
+-[2023-09-19 11:49:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12857344. Throughput: 0: 2719.7, 1: 2720.0. Samples: 10421540. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:32,044][72530] Avg episode reward: [(0, '111544.611'), (1, '162823.390')]
+-[2023-09-19 11:49:37,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 12881920. Throughput: 0: 2719.0, 1: 2719.2. Samples: 10439012. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:49:37,044][72530] Avg episode reward: [(0, '112652.375'), (1, '162884.861')]
+-[2023-09-19 11:49:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012600_6451200.pth...
+-[2023-09-19 11:49:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012560_6430720.pth...
+-[2023-09-19 11:49:37,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012440_6369280.pth
+-[2023-09-19 11:49:37,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012400_6348800.pth
+-[2023-09-19 11:49:42,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5442.8). Total num frames: 12906496. Throughput: 0: 2710.0, 1: 2711.1. Samples: 10469180. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:49:42,045][72530] Avg episode reward: [(0, '112673.657'), (1, '163872.242')]
+-[2023-09-19 11:49:44,447][73219] Updated weights for policy 1, policy_version 12600 (0.0014)
+-[2023-09-19 11:49:44,448][73145] Updated weights for policy 0, policy_version 12640 (0.0013)
+-[2023-09-19 11:49:47,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5415.1). Total num frames: 12931072. Throughput: 0: 2683.4, 1: 2683.4. Samples: 10501404. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:49:47,044][72530] Avg episode reward: [(0, '113377.242'), (1, '163154.750')]
+-[2023-09-19 11:49:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 12963840. Throughput: 0: 2727.1, 1: 2727.0. Samples: 10518208. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:49:52,045][72530] Avg episode reward: [(0, '113377.242'), (1, '163154.750')]
+-[2023-09-19 11:49:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012680_6492160.pth...
+-[2023-09-19 11:49:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012640_6471680.pth...
+-[2023-09-19 11:49:52,058][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012520_6410240.pth
+-[2023-09-19 11:49:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012480_6389760.pth
+-[2023-09-19 11:49:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 12988416. Throughput: 0: 2708.9, 1: 2708.4. Samples: 10549088. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:49:57,044][72530] Avg episode reward: [(0, '113159.962'), (1, '161958.914')]
+-[2023-09-19 11:49:59,407][73219] Updated weights for policy 1, policy_version 12680 (0.0015)
+-[2023-09-19 11:49:59,407][73145] Updated weights for policy 0, policy_version 12720 (0.0014)
+-[2023-09-19 11:50:02,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5442.8). Total num frames: 13012992. Throughput: 0: 2709.9, 1: 2709.0. Samples: 10582090. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:50:02,044][72530] Avg episode reward: [(0, '113659.562'), (1, '161957.798')]
+-[2023-09-19 11:50:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13045760. Throughput: 0: 2706.5, 1: 2706.4. Samples: 10598952. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:50:07,044][72530] Avg episode reward: [(0, '114392.887'), (1, '161274.669')]
+-[2023-09-19 11:50:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012720_6512640.pth...
+-[2023-09-19 11:50:07,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012760_6533120.pth...
+-[2023-09-19 11:50:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012560_6430720.pth
+-[2023-09-19 11:50:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012600_6451200.pth
+-[2023-09-19 11:50:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 13062144. Throughput: 0: 2697.0, 1: 2697.2. Samples: 10628524. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:50:12,044][72530] Avg episode reward: [(0, '113293.487'), (1, '160635.441')]
+-[2023-09-19 11:50:15,160][73145] Updated weights for policy 0, policy_version 12800 (0.0014)
+-[2023-09-19 11:50:15,160][73219] Updated weights for policy 1, policy_version 12760 (0.0016)
+-[2023-09-19 11:50:17,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13094912. Throughput: 0: 2646.0, 1: 2645.9. Samples: 10659676. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:50:17,044][72530] Avg episode reward: [(0, '113293.487'), (1, '160786.247')]
+-[2023-09-19 11:50:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 13119488. Throughput: 0: 2645.4, 1: 2645.3. Samples: 10677092. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:50:22,044][72530] Avg episode reward: [(0, '113047.949'), (1, '161538.199')]
+-[2023-09-19 11:50:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012792_6549504.pth...
+-[2023-09-19 11:50:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012832_6569984.pth...
+-[2023-09-19 11:50:22,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012640_6471680.pth
+-[2023-09-19 11:50:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012680_6492160.pth
+-[2023-09-19 11:50:27,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5442.8). Total num frames: 13144064. Throughput: 0: 2674.3, 1: 2673.4. Samples: 10709824. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:50:27,044][72530] Avg episode reward: [(0, '113325.734'), (1, '161538.199')]
+-[2023-09-19 11:50:30,173][73219] Updated weights for policy 1, policy_version 12840 (0.0011)
+-[2023-09-19 11:50:30,174][73145] Updated weights for policy 0, policy_version 12880 (0.0019)
+-[2023-09-19 11:50:32,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 13176832. Throughput: 0: 2678.6, 1: 2678.5. Samples: 10742474. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:50:32,045][72530] Avg episode reward: [(0, '110953.125'), (1, '162797.965')]
+-[2023-09-19 11:50:37,043][72530] Fps is (10 sec: 5734.2, 60 sec: 5324.8, 300 sec: 5442.8). Total num frames: 13201408. Throughput: 0: 2676.9, 1: 2675.9. Samples: 10759082. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:50:37,044][72530] Avg episode reward: [(0, '111513.061'), (1, '162852.658')]
+-[2023-09-19 11:50:37,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012872_6590464.pth...
+-[2023-09-19 11:50:37,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000012912_6610944.pth...
+-[2023-09-19 11:50:37,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012760_6533120.pth
+-[2023-09-19 11:50:37,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012720_6512640.pth
+-[2023-09-19 11:50:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13234176. Throughput: 0: 2723.3, 1: 2723.3. Samples: 10794184. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:50:42,044][72530] Avg episode reward: [(0, '113571.621'), (1, '162934.778')]
+-[2023-09-19 11:50:44,663][73145] Updated weights for policy 0, policy_version 12960 (0.0011)
+-[2023-09-19 11:50:44,664][73219] Updated weights for policy 1, policy_version 12920 (0.0014)
+-[2023-09-19 11:50:47,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5442.8). Total num frames: 13258752. Throughput: 0: 2728.1, 1: 2728.3. Samples: 10827630. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:50:47,044][72530] Avg episode reward: [(0, '113123.608'), (1, '163033.885')]
+-[2023-09-19 11:50:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13291520. Throughput: 0: 2735.7, 1: 2735.9. Samples: 10845174. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+-[2023-09-19 11:50:52,044][72530] Avg episode reward: [(0, '114036.709'), (1, '163233.390')]
+-[2023-09-19 11:50:52,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000012960_6635520.pth...
+-[2023-09-19 11:50:52,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013000_6656000.pth...
+-[2023-09-19 11:50:52,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012792_6549504.pth
+-[2023-09-19 11:50:52,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012832_6569984.pth
+-[2023-09-19 11:50:57,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13316096. Throughput: 0: 2784.1, 1: 2784.4. Samples: 10879108. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:50:57,044][72530] Avg episode reward: [(0, '114403.782'), (1, '162804.648')]
+-[2023-09-19 11:50:58,834][73219] Updated weights for policy 1, policy_version 13000 (0.0013)
+-[2023-09-19 11:50:58,834][73145] Updated weights for policy 0, policy_version 13040 (0.0013)
+-[2023-09-19 11:51:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 13348864. Throughput: 0: 2823.3, 1: 2823.4. Samples: 10913776. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:02,045][72530] Avg episode reward: [(0, '114403.782'), (1, '162804.648')]
+-[2023-09-19 11:51:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13373440. Throughput: 0: 2802.2, 1: 2802.3. Samples: 10929298. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:07,044][72530] Avg episode reward: [(0, '115552.472'), (1, '161727.982')]
+-[2023-09-19 11:51:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013040_6676480.pth...
+-[2023-09-19 11:51:07,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013080_6696960.pth...
+-[2023-09-19 11:51:07,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012872_6590464.pth
+-[2023-09-19 11:51:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000012912_6610944.pth
+-[2023-09-19 11:51:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5442.8). Total num frames: 13398016. Throughput: 0: 2794.4, 1: 2794.4. Samples: 10961322. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:12,045][72530] Avg episode reward: [(0, '114818.267'), (1, '161727.982')]
+-[2023-09-19 11:51:14,001][73219] Updated weights for policy 1, policy_version 13080 (0.0013)
+-[2023-09-19 11:51:14,001][73145] Updated weights for policy 0, policy_version 13120 (0.0014)
+-[2023-09-19 11:51:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 13430784. Throughput: 0: 2802.5, 1: 2802.7. Samples: 10994708. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:17,044][72530] Avg episode reward: [(0, '112267.470'), (1, '161652.380')]
+-[2023-09-19 11:51:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5470.6). Total num frames: 13455360. Throughput: 0: 2816.0, 1: 2816.1. Samples: 11012526. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:22,044][72530] Avg episode reward: [(0, '113122.257'), (1, '162464.319')]
+-[2023-09-19 11:51:22,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013120_6717440.pth...
+-[2023-09-19 11:51:22,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013160_6737920.pth...
+-[2023-09-19 11:51:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013000_6656000.pth
+-[2023-09-19 11:51:22,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000012960_6635520.pth
+-[2023-09-19 11:51:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5498.4). Total num frames: 13488128. Throughput: 0: 2804.3, 1: 2804.5. Samples: 11046580. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:51:27,044][72530] Avg episode reward: [(0, '114512.117'), (1, '162679.576')]
+-[2023-09-19 11:51:28,153][73145] Updated weights for policy 0, policy_version 13200 (0.0012)
+-[2023-09-19 11:51:28,154][73219] Updated weights for policy 1, policy_version 13160 (0.0017)
+-[2023-09-19 11:51:32,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13512704. Throughput: 0: 2817.9, 1: 2818.0. Samples: 11081242. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
+-[2023-09-19 11:51:32,044][72530] Avg episode reward: [(0, '115152.477'), (1, '162979.727')]
+-[2023-09-19 11:51:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13537280. Throughput: 0: 2793.9, 1: 2793.8. Samples: 11096620. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:51:37,044][72530] Avg episode reward: [(0, '115152.477'), (1, '163119.801')]
+-[2023-09-19 11:51:37,050][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013200_6758400.pth...
+-[2023-09-19 11:51:37,051][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013240_6778880.pth...
+-[2023-09-19 11:51:37,055][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013040_6676480.pth
+-[2023-09-19 11:51:37,060][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013080_6696960.pth
+-[2023-09-19 11:51:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13570048. Throughput: 0: 2789.2, 1: 2788.7. Samples: 11130112. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:51:42,045][72530] Avg episode reward: [(0, '113913.820'), (1, '164168.870')]
+-[2023-09-19 11:51:42,046][73131] Saving new best policy, reward=164168.870!
+-[2023-09-19 11:51:43,105][73219] Updated weights for policy 1, policy_version 13240 (0.0012)
+-[2023-09-19 11:51:43,105][73145] Updated weights for policy 0, policy_version 13280 (0.0013)
+-[2023-09-19 11:51:47,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13594624. Throughput: 0: 2792.0, 1: 2791.7. Samples: 11165042. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:47,045][72530] Avg episode reward: [(0, '113329.232'), (1, '164168.870')]
+-[2023-09-19 11:51:52,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 13627392. Throughput: 0: 2810.2, 1: 2810.3. Samples: 11182220. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:52,044][72530] Avg episode reward: [(0, '115366.353'), (1, '164073.991')]
+-[2023-09-19 11:51:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013288_6803456.pth...
+-[2023-09-19 11:51:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013328_6823936.pth...
+-[2023-09-19 11:51:52,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013160_6737920.pth
+-[2023-09-19 11:51:52,062][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013120_6717440.pth
+-[2023-09-19 11:51:57,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13651968. Throughput: 0: 2834.8, 1: 2834.5. Samples: 11216442. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:51:57,044][72530] Avg episode reward: [(0, '112745.770'), (1, '164094.197')]
+-[2023-09-19 11:51:57,114][73145] Updated weights for policy 0, policy_version 13360 (0.0013)
+-[2023-09-19 11:51:57,114][73219] Updated weights for policy 1, policy_version 13320 (0.0014)
+-[2023-09-19 11:52:02,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13684736. Throughput: 0: 2842.9, 1: 2843.0. Samples: 11250572. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:52:02,044][72530] Avg episode reward: [(0, '112085.582'), (1, '164047.200')]
+-[2023-09-19 11:52:07,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13709312. Throughput: 0: 2825.9, 1: 2825.9. Samples: 11266858. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:52:07,044][72530] Avg episode reward: [(0, '113004.087'), (1, '163717.311')]
+-[2023-09-19 11:52:07,056][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013368_6844416.pth...
+-[2023-09-19 11:52:07,056][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013408_6864896.pth...
+-[2023-09-19 11:52:07,062][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013240_6778880.pth
+-[2023-09-19 11:52:07,063][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013200_6758400.pth
+-[2023-09-19 11:52:11,691][73145] Updated weights for policy 0, policy_version 13440 (0.0009)
+-[2023-09-19 11:52:11,692][73219] Updated weights for policy 1, policy_version 13400 (0.0012)
+-[2023-09-19 11:52:12,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5526.1). Total num frames: 13742080. Throughput: 0: 2834.2, 1: 2834.1. Samples: 11301652. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:52:12,043][72530] Avg episode reward: [(0, '113246.114'), (1, '163818.756')]
+-[2023-09-19 11:52:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 13766656. Throughput: 0: 2831.7, 1: 2831.6. Samples: 11336090. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:52:17,044][72530] Avg episode reward: [(0, '112684.823'), (1, '163893.278')]
+-[2023-09-19 11:52:22,043][72530] Fps is (10 sec: 5734.2, 60 sec: 5734.4, 300 sec: 5526.1). Total num frames: 13799424. Throughput: 0: 2853.1, 1: 2853.3. Samples: 11353408. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:52:22,044][72530] Avg episode reward: [(0, '112684.823'), (1, '163936.233')]
+-[2023-09-19 11:52:22,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013456_6889472.pth...
+-[2023-09-19 11:52:22,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013496_6909952.pth...
+-[2023-09-19 11:52:22,059][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013288_6803456.pth
+-[2023-09-19 11:52:22,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013328_6823936.pth
+-[2023-09-19 11:52:26,737][73219] Updated weights for policy 1, policy_version 13480 (0.0011)
+-[2023-09-19 11:52:26,738][73145] Updated weights for policy 0, policy_version 13520 (0.0013)
+-[2023-09-19 11:52:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 13824000. Throughput: 0: 2835.0, 1: 2835.0. Samples: 11385262. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:52:27,044][72530] Avg episode reward: [(0, '110980.059'), (1, '163809.491')]
+-[2023-09-19 11:52:32,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13848576. Throughput: 0: 2781.7, 1: 2781.7. Samples: 11415394. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:52:32,044][72530] Avg episode reward: [(0, '110632.259'), (1, '163809.491')]
+-[2023-09-19 11:52:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13873152. Throughput: 0: 2779.5, 1: 2779.4. Samples: 11432370. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+-[2023-09-19 11:52:37,044][72530] Avg episode reward: [(0, '110937.608'), (1, '162945.052')]
+-[2023-09-19 11:52:37,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013528_6926336.pth...
+-[2023-09-19 11:52:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013568_6946816.pth...
+-[2023-09-19 11:52:37,065][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013368_6844416.pth
+-[2023-09-19 11:52:37,070][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013408_6864896.pth
+-[2023-09-19 11:52:41,868][73145] Updated weights for policy 0, policy_version 13600 (0.0011)
+-[2023-09-19 11:52:41,868][73219] Updated weights for policy 1, policy_version 13560 (0.0013)
+-[2023-09-19 11:52:42,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13905920. Throughput: 0: 2761.7, 1: 2761.8. Samples: 11464998. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:52:42,044][72530] Avg episode reward: [(0, '108749.531'), (1, '162877.908')]
+-[2023-09-19 11:52:47,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13930496. Throughput: 0: 2736.3, 1: 2736.0. Samples: 11496824. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:52:47,044][72530] Avg episode reward: [(0, '107670.741'), (1, '161974.202')]
+-[2023-09-19 11:52:52,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5461.3, 300 sec: 5470.6). Total num frames: 13955072. Throughput: 0: 2755.3, 1: 2755.3. Samples: 11514836. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:52:52,044][72530] Avg episode reward: [(0, '109926.959'), (1, '161647.451')]
+-[2023-09-19 11:52:52,084][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013656_6991872.pth...
+-[2023-09-19 11:52:52,087][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013616_6971392.pth...
+-[2023-09-19 11:52:52,090][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013496_6909952.pth
+-[2023-09-19 11:52:52,092][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013456_6889472.pth
+-[2023-09-19 11:52:56,529][73219] Updated weights for policy 1, policy_version 13640 (0.0015)
+-[2023-09-19 11:52:56,529][73145] Updated weights for policy 0, policy_version 13680 (0.0013)
+-[2023-09-19 11:52:57,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 13987840. Throughput: 0: 2750.2, 1: 2750.3. Samples: 11549176. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:52:57,044][72530] Avg episode reward: [(0, '109926.959'), (1, '161647.451')]
+-[2023-09-19 11:53:02,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14012416. Throughput: 0: 2745.7, 1: 2746.6. Samples: 11583242. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:02,044][72530] Avg episode reward: [(0, '108812.138'), (1, '160242.020')]
+-[2023-09-19 11:53:07,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 14045184. Throughput: 0: 2738.7, 1: 2739.8. Samples: 11599938. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:07,044][72530] Avg episode reward: [(0, '109280.885'), (1, '160242.020')]
+-[2023-09-19 11:53:07,053][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013696_7012352.pth...
+-[2023-09-19 11:53:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013736_7032832.pth...
+-[2023-09-19 11:53:07,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013528_6926336.pth
+-[2023-09-19 11:53:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013568_6946816.pth
+-[2023-09-19 11:53:10,573][73145] Updated weights for policy 0, policy_version 13760 (0.0015)
+-[2023-09-19 11:53:10,573][73219] Updated weights for policy 1, policy_version 13720 (0.0015)
+-[2023-09-19 11:53:12,043][72530] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14069760. Throughput: 0: 2777.5, 1: 2777.6. Samples: 11635244. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:53:12,044][72530] Avg episode reward: [(0, '107640.671'), (1, '160924.342')]
+-[2023-09-19 11:53:17,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 14102528. Throughput: 0: 2813.6, 1: 2813.8. Samples: 11668628. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
+-[2023-09-19 11:53:17,045][72530] Avg episode reward: [(0, '106086.364'), (1, '160888.486')]
+-[2023-09-19 11:53:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14127104. Throughput: 0: 2820.8, 1: 2820.8. Samples: 11686240. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:22,044][72530] Avg episode reward: [(0, '105829.988'), (1, '160815.863')]
+-[2023-09-19 11:53:22,079][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013824_7077888.pth...
+-[2023-09-19 11:53:22,082][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013656_6991872.pth
+-[2023-09-19 11:53:22,084][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013784_7057408.pth...
+-[2023-09-19 11:53:22,091][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013616_6971392.pth
+-[2023-09-19 11:53:25,330][73145] Updated weights for policy 0, policy_version 13840 (0.0015)
+-[2023-09-19 11:53:25,331][73219] Updated weights for policy 1, policy_version 13800 (0.0013)
+-[2023-09-19 11:53:27,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 14159872. Throughput: 0: 2819.4, 1: 2819.5. Samples: 11718750. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:27,044][72530] Avg episode reward: [(0, '105059.149'), (1, '160896.515')]
+-[2023-09-19 11:53:32,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5597.9, 300 sec: 5526.1). Total num frames: 14184448. Throughput: 0: 2847.1, 1: 2847.4. Samples: 11753076. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:32,044][72530] Avg episode reward: [(0, '104051.203'), (1, '160838.692')]
+-[2023-09-19 11:53:37,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5597.9, 300 sec: 5498.4). Total num frames: 14209024. Throughput: 0: 2817.1, 1: 2817.1. Samples: 11768374. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:37,044][72530] Avg episode reward: [(0, '108523.798'), (1, '161743.570')]
+-[2023-09-19 11:53:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013856_7094272.pth...
+-[2023-09-19 11:53:37,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013896_7114752.pth...
+-[2023-09-19 11:53:37,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013736_7032832.pth
+-[2023-09-19 11:53:37,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013696_7012352.pth
+-[2023-09-19 11:53:40,930][73145] Updated weights for policy 0, policy_version 13920 (0.0017)
+-[2023-09-19 11:53:40,930][73219] Updated weights for policy 1, policy_version 13880 (0.0016)
+-[2023-09-19 11:53:42,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14233600. Throughput: 0: 2753.8, 1: 2753.5. Samples: 11797004. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:42,044][72530] Avg episode reward: [(0, '108523.798'), (1, '161743.570')]
+-[2023-09-19 11:53:47,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14258176. Throughput: 0: 2724.0, 1: 2723.2. Samples: 11828370. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:47,044][72530] Avg episode reward: [(0, '108688.999'), (1, '160237.416')]
+-[2023-09-19 11:53:52,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5461.3, 300 sec: 5498.4). Total num frames: 14282752. Throughput: 0: 2709.0, 1: 2708.0. Samples: 11843704. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:52,045][72530] Avg episode reward: [(0, '112242.915'), (1, '159716.483')]
+-[2023-09-19 11:53:52,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000013928_7131136.pth...
+-[2023-09-19 11:53:52,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000013968_7151616.pth...
+-[2023-09-19 11:53:52,064][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013784_7057408.pth
+-[2023-09-19 11:53:52,064][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013824_7077888.pth
+-[2023-09-19 11:53:57,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 14307328. Throughput: 0: 2611.5, 1: 2612.1. Samples: 11870304. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:53:57,044][72530] Avg episode reward: [(0, '112242.915'), (1, '159730.340')]
+-[2023-09-19 11:53:57,810][73145] Updated weights for policy 0, policy_version 14000 (0.0013)
+-[2023-09-19 11:53:57,810][73219] Updated weights for policy 1, policy_version 13960 (0.0012)
+-[2023-09-19 11:54:02,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5324.8, 300 sec: 5470.6). Total num frames: 14331904. Throughput: 0: 2594.1, 1: 2593.9. Samples: 11902086. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:54:02,044][72530] Avg episode reward: [(0, '116633.173'), (1, '156732.668')]
+-[2023-09-19 11:54:07,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5188.3, 300 sec: 5470.6). Total num frames: 14356480. Throughput: 0: 2554.6, 1: 2554.5. Samples: 11916152. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:54:07,044][72530] Avg episode reward: [(0, '119968.454'), (1, '156732.668')]
+-[2023-09-19 11:54:07,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014000_7168000.pth...
+-[2023-09-19 11:54:07,054][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014040_7188480.pth...
+-[2023-09-19 11:54:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013856_7094272.pth
+-[2023-09-19 11:54:07,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013896_7114752.pth
+-[2023-09-19 11:54:12,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5188.3, 300 sec: 5470.6). Total num frames: 14381056. Throughput: 0: 2531.6, 1: 2531.5. Samples: 11946590. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:54:12,044][72530] Avg episode reward: [(0, '126021.713'), (1, '155674.672')]
+-[2023-09-19 11:54:13,622][73219] Updated weights for policy 1, policy_version 14040 (0.0013)
+-[2023-09-19 11:54:13,623][73145] Updated weights for policy 0, policy_version 14080 (0.0014)
+-[2023-09-19 11:54:17,043][72530] Fps is (10 sec: 5734.5, 60 sec: 5188.3, 300 sec: 5470.6). Total num frames: 14413824. Throughput: 0: 2509.2, 1: 2509.1. Samples: 11978896. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:54:17,044][72530] Avg episode reward: [(0, '128184.575'), (1, '154083.119')]
+-[2023-09-19 11:54:22,043][72530] Fps is (10 sec: 5734.4, 60 sec: 5188.3, 300 sec: 5470.6). Total num frames: 14438400. Throughput: 0: 2497.2, 1: 2498.2. Samples: 11993164. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:54:22,044][72530] Avg episode reward: [(0, '128184.575'), (1, '154040.425')]
+-[2023-09-19 11:54:22,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014080_7208960.pth...
+-[2023-09-19 11:54:22,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014120_7229440.pth...
+-[2023-09-19 11:54:22,058][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000013928_7131136.pth
+-[2023-09-19 11:54:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000013968_7151616.pth
+-[2023-09-19 11:54:27,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5051.7, 300 sec: 5442.8). Total num frames: 14462976. Throughput: 0: 2535.1, 1: 2535.4. Samples: 12025176. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:54:27,044][72530] Avg episode reward: [(0, '140472.932'), (1, '154651.060')]
+-[2023-09-19 11:54:29,535][73145] Updated weights for policy 0, policy_version 14160 (0.0011)
+-[2023-09-19 11:54:29,536][73219] Updated weights for policy 1, policy_version 14120 (0.0014)
+-[2023-09-19 11:54:32,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5051.7, 300 sec: 5442.8). Total num frames: 14487552. Throughput: 0: 2522.9, 1: 2522.9. Samples: 12055430. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:54:32,044][72530] Avg episode reward: [(0, '141718.503'), (1, '154651.060')]
+-[2023-09-19 11:54:37,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5051.7, 300 sec: 5442.8). Total num frames: 14512128. Throughput: 0: 2513.4, 1: 2513.2. Samples: 12069904. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:54:37,044][72530] Avg episode reward: [(0, '150806.538'), (1, '155291.907')]
+-[2023-09-19 11:54:37,054][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014152_7245824.pth...
+-[2023-09-19 11:54:37,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014192_7266304.pth...
+-[2023-09-19 11:54:37,060][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000014000_7168000.pth
+-[2023-09-19 11:54:37,063][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000014040_7188480.pth
+-[2023-09-19 11:54:42,043][72530] Fps is (10 sec: 4915.1, 60 sec: 5051.7, 300 sec: 5442.8). Total num frames: 14536704. Throughput: 0: 2545.0, 1: 2545.5. Samples: 12099376. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:54:42,045][72530] Avg episode reward: [(0, '151204.698'), (1, '156645.148')]
+-[2023-09-19 11:54:46,277][73145] Updated weights for policy 0, policy_version 14240 (0.0011)
+-[2023-09-19 11:54:46,277][73219] Updated weights for policy 1, policy_version 14200 (0.0011)
+-[2023-09-19 11:54:47,043][72530] Fps is (10 sec: 4915.3, 60 sec: 5051.7, 300 sec: 5415.1). Total num frames: 14561280. Throughput: 0: 2519.7, 1: 2520.0. Samples: 12128874. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+-[2023-09-19 11:54:47,044][72530] Avg episode reward: [(0, '151204.698'), (1, '156645.148')]
+-[2023-09-19 11:54:52,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5051.7, 300 sec: 5415.0). Total num frames: 14585856. Throughput: 0: 2545.9, 1: 2545.9. Samples: 12145282. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:54:52,045][72530] Avg episode reward: [(0, '150668.353'), (1, '159741.496')]
+-[2023-09-19 11:54:52,055][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014224_7282688.pth...
+-[2023-09-19 11:54:52,055][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014264_7303168.pth...
+-[2023-09-19 11:54:52,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000014080_7208960.pth
+-[2023-09-19 11:54:52,065][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000014120_7229440.pth
+-[2023-09-19 11:54:57,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5051.7, 300 sec: 5415.1). Total num frames: 14610432. Throughput: 0: 2537.3, 1: 2537.6. Samples: 12174958. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+-[2023-09-19 11:54:57,045][72530] Avg episode reward: [(0, '151344.969'), (1, '159741.496')]
+-[2023-09-19 11:55:02,044][72530] Fps is (10 sec: 4914.7, 60 sec: 5051.6, 300 sec: 5387.3). Total num frames: 14635008. Throughput: 0: 2521.2, 1: 2521.7. Samples: 12205832. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:55:02,045][72530] Avg episode reward: [(0, '151628.001'), (1, '160794.096')]
+-[2023-09-19 11:55:02,322][73219] Updated weights for policy 1, policy_version 14280 (0.0013)
+-[2023-09-19 11:55:02,323][73145] Updated weights for policy 0, policy_version 14320 (0.0013)
+-[2023-09-19 11:55:07,043][72530] Fps is (10 sec: 4915.2, 60 sec: 5051.7, 300 sec: 5415.1). Total num frames: 14659584. Throughput: 0: 2513.9, 1: 2512.9. Samples: 12219370. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+-[2023-09-19 11:55:07,044][72530] Avg episode reward: [(0, '148176.544'), (1, '161684.229')]
+-[2023-09-19 11:55:07,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014296_7319552.pth...
+-[2023-09-19 11:55:07,053][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014336_7340032.pth...
+-[2023-09-19 11:55:07,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000014152_7245824.pth
+-[2023-09-19 11:55:07,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000014192_7266304.pth
+-[2023-09-19 11:55:12,043][72530] Fps is (10 sec: 4915.8, 60 sec: 5051.7, 300 sec: 5387.3). Total num frames: 14684160. Throughput: 0: 2444.2, 1: 2444.4. Samples: 12245164. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:55:12,045][72530] Avg episode reward: [(0, '148176.544'), (1, '161684.229')]
+-[2023-09-19 11:55:17,043][72530] Fps is (10 sec: 4915.2, 60 sec: 4915.2, 300 sec: 5387.3). Total num frames: 14708736. Throughput: 0: 2444.5, 1: 2444.5. Samples: 12275438. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:55:17,045][72530] Avg episode reward: [(0, '148556.520'), (1, '161543.907')]
+-[2023-09-19 11:55:19,505][73219] Updated weights for policy 1, policy_version 14360 (0.0011)
+-[2023-09-19 11:55:19,506][73145] Updated weights for policy 0, policy_version 14400 (0.0014)
+-[2023-09-19 11:55:22,043][72530] Fps is (10 sec: 4915.2, 60 sec: 4915.2, 300 sec: 5387.3). Total num frames: 14733312. Throughput: 0: 2457.9, 1: 2457.9. Samples: 12291112. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+-[2023-09-19 11:55:22,044][72530] Avg episode reward: [(0, '146305.866'), (1, '161543.272')]
+-[2023-09-19 11:55:22,052][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014368_7356416.pth...
+-[2023-09-19 11:55:22,052][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014408_7376896.pth...
+-[2023-09-19 11:55:22,061][73131] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000014224_7282688.pth
+-[2023-09-19 11:55:22,061][73130] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000014264_7303168.pth
+-[2023-09-19 11:55:22,280][72530] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 72530], exiting...
+-[2023-09-19 11:55:22,281][72530] Runner profile tree view:
+-main_loop: 2141.4988
+-[2023-09-19 11:55:22,282][72530] Collected {1: 7356416, 0: 7376896}, FPS: 5741.9
+-[2023-09-19 11:55:22,281][73130] Stopping Batcher_0...
+-[2023-09-19 11:55:22,281][73131] Stopping Batcher_1...
+-[2023-09-19 11:55:22,282][73130] Loop batcher_evt_loop terminating...
+-[2023-09-19 11:55:22,282][73131] Loop batcher_evt_loop terminating...
+-[2023-09-19 11:55:22,282][73130] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000014408_7376896.pth...
+-[2023-09-19 11:55:22,283][73131] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000014368_7356416.pth...
+-[2023-09-19 11:55:22,285][73221] Stopping RolloutWorker_w1...
+-[2023-09-19 11:55:22,285][73221] Loop rollout_proc1_evt_loop terminating...
+-[2023-09-19 11:55:22,288][73220] Stopping RolloutWorker_w2...
+-[2023-09-19 11:55:22,288][73130] Stopping LearnerWorker_p0...
+-[2023-09-19 11:55:22,288][73220] Loop rollout_proc2_evt_loop terminating...
+-[2023-09-19 11:55:22,288][73130] Loop learner_proc0_evt_loop terminating...
+-[2023-09-19 11:55:22,288][73222] Stopping RolloutWorker_w6...
+-[2023-09-19 11:55:22,288][73131] Stopping LearnerWorker_p1...
+-[2023-09-19 11:55:22,289][73222] Loop rollout_proc6_evt_loop terminating...
+-[2023-09-19 11:55:22,289][73131] Loop learner_proc1_evt_loop terminating...
+-[2023-09-19 11:55:22,290][73218] Stopping RolloutWorker_w0...
+-[2023-09-19 11:55:22,290][73224] Stopping RolloutWorker_w4...
+-[2023-09-19 11:55:22,291][73218] Loop rollout_proc0_evt_loop terminating...
+-[2023-09-19 11:55:22,291][73223] Stopping RolloutWorker_w3...
+-[2023-09-19 11:55:22,291][73224] Loop rollout_proc4_evt_loop terminating...
+-[2023-09-19 11:55:22,291][73223] Loop rollout_proc3_evt_loop terminating...
+-[2023-09-19 11:55:22,292][73226] Stopping RolloutWorker_w7...
+-[2023-09-19 11:55:22,292][73226] Loop rollout_proc7_evt_loop terminating...
+-[2023-09-19 11:55:22,295][73229] Stopping RolloutWorker_w5...
+-[2023-09-19 11:55:22,295][73229] Loop rollout_proc5_evt_loop terminating...
+-[2023-09-19 11:55:22,296][73145] Weights refcount: 2 0
+-[2023-09-19 11:55:22,297][73145] Stopping InferenceWorker_p0-w0...
+-[2023-09-19 11:55:22,297][73145] Loop inference_proc0-0_evt_loop terminating...
+-[2023-09-19 11:55:22,301][73219] Weights refcount: 2 0
+-[2023-09-19 11:55:22,303][73219] Stopping InferenceWorker_p1-w0...
+-[2023-09-19 11:55:22,303][73219] Loop inference_proc1-0_evt_loop terminating...
++[2023-09-21 15:10:45,648][101116] Worker 0 uses CPU cores [0, 1, 2, 3]
++[2023-09-21 15:10:45,774][101118] Worker 1 uses CPU cores [4, 5, 6, 7]
++[2023-09-21 15:10:46,022][101035] Using optimizer <class 'torch.optim.adam.Adam'>
++[2023-09-21 15:10:46,023][101035] No checkpoints found
++[2023-09-21 15:10:46,023][101035] Did not load from checkpoint, starting from scratch!
++[2023-09-21 15:10:46,023][101035] Initialized policy 1 weights for model version 0
++[2023-09-21 15:10:46,025][101035] LearnerWorker_p1 finished initialization!
++[2023-09-21 15:10:46,025][101035] Using GPUs [0] for process 1 (actually maps to GPUs [1])
++[2023-09-21 15:10:46,158][101034] Using optimizer <class 'torch.optim.adam.Adam'>
++[2023-09-21 15:10:46,159][101034] No checkpoints found
++[2023-09-21 15:10:46,159][101034] Did not load from checkpoint, starting from scratch!
++[2023-09-21 15:10:46,159][101034] Initialized policy 0 weights for model version 0
++[2023-09-21 15:10:46,161][101034] LearnerWorker_p0 finished initialization!
++[2023-09-21 15:10:46,161][101034] Using GPUs [0] for process 0 (actually maps to GPUs [0])
++[2023-09-21 15:10:46,569][101117] RunningMeanStd input shape: (376,)
++[2023-09-21 15:10:46,570][101117] RunningMeanStd input shape: (1,)
++[2023-09-21 15:10:46,602][99566] Inference worker 1-0 is ready!
++[2023-09-21 15:10:46,707][101115] RunningMeanStd input shape: (376,)
++[2023-09-21 15:10:46,707][101115] RunningMeanStd input shape: (1,)
++[2023-09-21 15:10:46,739][99566] Inference worker 0-0 is ready!
++[2023-09-21 15:10:46,740][99566] All inference workers are ready! Signal rollout workers to start!
++[2023-09-21 15:10:46,835][101122] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,836][101122] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,836][101120] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,837][101120] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,838][101118] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,838][101116] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,839][101118] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,839][101119] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,839][101116] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,840][101119] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,840][101121] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,841][101121] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,843][101124] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,844][101124] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,851][101123] Decorrelating experience for 0 frames...
++[2023-09-21 15:10:46,852][101123] Decorrelating experience for 64 frames...
++[2023-09-21 15:10:46,887][101120] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,887][101122] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,888][101116] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,892][101119] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,893][101118] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,893][101121] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,897][101124] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,903][101123] Decorrelating experience for 128 frames...
++[2023-09-21 15:10:46,985][101116] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,985][101122] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,985][101120] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,989][101119] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,990][101118] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,995][101121] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:46,998][101124] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:47,003][101123] Decorrelating experience for 192 frames...
++[2023-09-21 15:10:47,149][101119] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,153][101120] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,154][101116] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,156][101122] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,157][101118] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,163][101121] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,164][101123] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,170][101124] Decorrelating experience for 256 frames...
++[2023-09-21 15:10:47,345][101119] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,348][101120] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,349][101118] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,359][101123] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,363][101122] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,370][101121] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,372][101124] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,377][101116] Decorrelating experience for 320 frames...
++[2023-09-21 15:10:47,595][101123] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,596][101120] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,600][101118] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,603][101119] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,609][101122] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,611][101121] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,632][101116] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,633][101124] Decorrelating experience for 384 frames...
++[2023-09-21 15:10:47,892][101118] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,903][101119] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,910][101122] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,915][101121] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,915][101123] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,917][101120] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,930][101124] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:47,947][101116] Decorrelating experience for 448 frames...
++[2023-09-21 15:10:49,496][99566] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan, 1: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
++[2023-09-21 15:10:54,497][99566] Fps is (10 sec: 3276.5, 60 sec: 3276.5, 300 sec: 3276.5). Total num frames: 16384. Throughput: 0: 1638.3, 1: 1638.3. Samples: 16384. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:10:54,861][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000024_12288.pth...
++[2023-09-21 15:10:54,871][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000024_12288.pth...
++[2023-09-21 15:10:59,497][99566] Fps is (10 sec: 5734.3, 60 sec: 5734.3, 300 sec: 5734.3). Total num frames: 57344. Throughput: 0: 2677.6, 1: 2619.8. Samples: 52974. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:10:59,498][99566] Avg episode reward: [(0, '29111.123'), (1, '26500.965')]
++[2023-09-21 15:11:03,368][101115] Updated weights for policy 0, policy_version 80 (0.0014)
++[2023-09-21 15:11:03,369][101117] Updated weights for policy 1, policy_version 80 (0.0013)
++[2023-09-21 15:11:03,395][99566] Heartbeat connected on Batcher_0
++[2023-09-21 15:11:03,398][99566] Heartbeat connected on LearnerWorker_p0
++[2023-09-21 15:11:03,401][99566] Heartbeat connected on Batcher_1
++[2023-09-21 15:11:03,404][99566] Heartbeat connected on LearnerWorker_p1
++[2023-09-21 15:11:03,410][99566] Heartbeat connected on InferenceWorker_p0-w0
++[2023-09-21 15:11:03,415][99566] Heartbeat connected on InferenceWorker_p1-w0
++[2023-09-21 15:11:03,419][99566] Heartbeat connected on RolloutWorker_w0
++[2023-09-21 15:11:03,420][99566] Heartbeat connected on RolloutWorker_w1
++[2023-09-21 15:11:03,425][99566] Heartbeat connected on RolloutWorker_w2
++[2023-09-21 15:11:03,430][99566] Heartbeat connected on RolloutWorker_w5
++[2023-09-21 15:11:03,432][99566] Heartbeat connected on RolloutWorker_w3
++[2023-09-21 15:11:03,435][99566] Heartbeat connected on RolloutWorker_w4
++[2023-09-21 15:11:03,435][99566] Heartbeat connected on RolloutWorker_w6
++[2023-09-21 15:11:03,441][99566] Heartbeat connected on RolloutWorker_w7
++[2023-09-21 15:11:04,497][99566] Fps is (10 sec: 6553.8, 60 sec: 5461.3, 300 sec: 5461.3). Total num frames: 81920. Throughput: 0: 2445.3, 1: 2433.3. Samples: 73180. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:04,498][99566] Avg episode reward: [(0, '32653.580'), (1, '29531.158')]
++[2023-09-21 15:11:09,497][99566] Fps is (10 sec: 5734.4, 60 sec: 5734.4, 300 sec: 5734.4). Total num frames: 114688. Throughput: 0: 2741.4, 1: 2711.0. Samples: 109048. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:11:09,497][99566] Avg episode reward: [(0, '42202.736'), (1, '37345.618')]
++[2023-09-21 15:11:09,500][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000112_57344.pth...
++[2023-09-21 15:11:09,500][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000112_57344.pth...
++[2023-09-21 15:11:14,496][99566] Fps is (10 sec: 7372.8, 60 sec: 6225.9, 300 sec: 6225.9). Total num frames: 155648. Throughput: 0: 3059.7, 1: 3035.7. Samples: 152388. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:11:14,498][99566] Avg episode reward: [(0, '44269.193'), (1, '39095.771')]
++[2023-09-21 15:11:15,281][101117] Updated weights for policy 1, policy_version 160 (0.0014)
++[2023-09-21 15:11:15,282][101115] Updated weights for policy 0, policy_version 160 (0.0014)
++[2023-09-21 15:11:19,496][99566] Fps is (10 sec: 7372.8, 60 sec: 6280.5, 300 sec: 6280.5). Total num frames: 188416. Throughput: 0: 2891.8, 1: 2871.1. Samples: 172888. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
++[2023-09-21 15:11:19,498][99566] Avg episode reward: [(0, '52608.069'), (1, '46035.948')]
++[2023-09-21 15:11:24,497][99566] Fps is (10 sec: 7372.7, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 229376. Throughput: 0: 3103.4, 1: 3086.0. Samples: 216632. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:24,498][99566] Avg episode reward: [(0, '52608.069'), (1, '49544.367')]
++[2023-09-21 15:11:24,502][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000224_114688.pth...
++[2023-09-21 15:11:24,502][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000224_114688.pth...
++[2023-09-21 15:11:24,508][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000024_12288.pth
++[2023-09-21 15:11:24,509][101035] Saving new best policy, reward=49544.367!
++[2023-09-21 15:11:24,510][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000024_12288.pth
++[2023-09-21 15:11:24,511][101034] Saving new best policy, reward=52608.069!
++[2023-09-21 15:11:26,547][101117] Updated weights for policy 1, policy_version 240 (0.0011)
++[2023-09-21 15:11:26,548][101115] Updated weights for policy 0, policy_version 240 (0.0012)
++[2023-09-21 15:11:29,496][99566] Fps is (10 sec: 7372.8, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 262144. Throughput: 0: 3277.6, 1: 3275.7. Samples: 262134. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
++[2023-09-21 15:11:29,497][99566] Avg episode reward: [(0, '66016.832'), (1, '59449.959')]
++[2023-09-21 15:11:29,498][101034] Saving new best policy, reward=66016.832!
++[2023-09-21 15:11:29,498][101035] Saving new best policy, reward=59449.959!
++[2023-09-21 15:11:34,496][99566] Fps is (10 sec: 6963.4, 60 sec: 6644.6, 300 sec: 6644.6). Total num frames: 299008. Throughput: 0: 3152.0, 1: 3139.7. Samples: 283126. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:34,497][99566] Avg episode reward: [(0, '68820.170'), (1, '64166.988')]
++[2023-09-21 15:11:34,497][101034] Saving new best policy, reward=68820.170!
++[2023-09-21 15:11:34,499][101035] Saving new best policy, reward=64166.988!
++[2023-09-21 15:11:37,925][101117] Updated weights for policy 1, policy_version 320 (0.0015)
++[2023-09-21 15:11:37,925][101115] Updated weights for policy 0, policy_version 320 (0.0013)
++[2023-09-21 15:11:39,497][99566] Fps is (10 sec: 7372.6, 60 sec: 6717.4, 300 sec: 6717.4). Total num frames: 335872. Throughput: 0: 3456.7, 1: 3448.0. Samples: 327094. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:39,498][99566] Avg episode reward: [(0, '75159.121'), (1, '68598.547')]
++[2023-09-21 15:11:39,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000328_167936.pth...
++[2023-09-21 15:11:39,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000328_167936.pth...
++[2023-09-21 15:11:39,513][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000112_57344.pth
++[2023-09-21 15:11:39,513][101035] Saving new best policy, reward=68598.547!
++[2023-09-21 15:11:39,515][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000112_57344.pth
++[2023-09-21 15:11:39,516][101034] Saving new best policy, reward=75159.121!
++[2023-09-21 15:11:44,497][99566] Fps is (10 sec: 6963.0, 60 sec: 6702.5, 300 sec: 6702.5). Total num frames: 368640. Throughput: 0: 3484.0, 1: 3483.4. Samples: 366510. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:44,498][99566] Avg episode reward: [(0, '77287.138'), (1, '73006.291')]
++[2023-09-21 15:11:44,499][101034] Saving new best policy, reward=77287.138!
++[2023-09-21 15:11:44,499][101035] Saving new best policy, reward=73006.291!
++[2023-09-21 15:11:49,496][99566] Fps is (10 sec: 6553.8, 60 sec: 6690.1, 300 sec: 6690.1). Total num frames: 401408. Throughput: 0: 3500.4, 1: 3490.9. Samples: 387786. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:49,497][99566] Avg episode reward: [(0, '79728.904'), (1, '73989.310')]
++[2023-09-21 15:11:49,498][101034] Saving new best policy, reward=79728.904!
++[2023-09-21 15:11:49,498][101035] Saving new best policy, reward=73989.310!
++[2023-09-21 15:11:49,630][101117] Updated weights for policy 1, policy_version 400 (0.0014)
++[2023-09-21 15:11:49,630][101115] Updated weights for policy 0, policy_version 400 (0.0015)
++[2023-09-21 15:11:54,496][99566] Fps is (10 sec: 7372.8, 60 sec: 7099.8, 300 sec: 6805.7). Total num frames: 442368. Throughput: 0: 3564.5, 1: 3565.1. Samples: 429880. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:54,497][99566] Avg episode reward: [(0, '83640.081'), (1, '77200.239')]
++[2023-09-21 15:11:54,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000432_221184.pth...
++[2023-09-21 15:11:54,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000432_221184.pth...
++[2023-09-21 15:11:54,512][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000224_114688.pth
++[2023-09-21 15:11:54,512][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000224_114688.pth
++[2023-09-21 15:11:54,513][101034] Saving new best policy, reward=83640.081!
++[2023-09-21 15:11:54,513][101035] Saving new best policy, reward=77200.239!
++[2023-09-21 15:11:59,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6826.7, 300 sec: 6670.6). Total num frames: 466944. Throughput: 0: 3511.4, 1: 3516.1. Samples: 468628. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:11:59,498][99566] Avg episode reward: [(0, '84922.994'), (1, '77768.201')]
++[2023-09-21 15:11:59,499][101034] Saving new best policy, reward=84922.994!
++[2023-09-21 15:11:59,499][101035] Saving new best policy, reward=77768.201!
++[2023-09-21 15:12:02,658][101117] Updated weights for policy 1, policy_version 480 (0.0016)
++[2023-09-21 15:12:02,658][101115] Updated weights for policy 0, policy_version 480 (0.0011)
++[2023-09-21 15:12:04,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6963.2, 300 sec: 6662.8). Total num frames: 499712. Throughput: 0: 3466.4, 1: 3468.6. Samples: 484962. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:12:04,497][99566] Avg episode reward: [(0, '88681.435'), (1, '80463.062')]
++[2023-09-21 15:12:04,498][101034] Saving new best policy, reward=88681.435!
++[2023-09-21 15:12:04,498][101035] Saving new best policy, reward=80463.062!
++[2023-09-21 15:12:09,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 6656.0). Total num frames: 532480. Throughput: 0: 3427.1, 1: 3427.7. Samples: 525100. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:12:09,498][99566] Avg episode reward: [(0, '89644.782'), (1, '81336.452')]
++[2023-09-21 15:12:09,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000520_266240.pth...
++[2023-09-21 15:12:09,509][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000520_266240.pth...
++[2023-09-21 15:12:09,515][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000328_167936.pth
++[2023-09-21 15:12:09,516][101035] Saving new best policy, reward=81336.452!
++[2023-09-21 15:12:09,517][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000328_167936.pth
++[2023-09-21 15:12:09,517][101034] Saving new best policy, reward=89644.782!
++[2023-09-21 15:12:14,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6826.7, 300 sec: 6650.0). Total num frames: 565248. Throughput: 0: 3353.4, 1: 3345.7. Samples: 563594. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:12:14,498][99566] Avg episode reward: [(0, '93904.727'), (1, '84817.215')]
++[2023-09-21 15:12:14,499][101034] Saving new best policy, reward=93904.727!
++[2023-09-21 15:12:14,499][101035] Saving new best policy, reward=84817.215!
++[2023-09-21 15:12:15,959][101117] Updated weights for policy 1, policy_version 560 (0.0014)
++[2023-09-21 15:12:15,960][101115] Updated weights for policy 0, policy_version 560 (0.0012)
++[2023-09-21 15:12:19,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6826.7, 300 sec: 6644.6). Total num frames: 598016. Throughput: 0: 3291.8, 1: 3291.1. Samples: 579360. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:12:19,498][99566] Avg episode reward: [(0, '95010.164'), (1, '85718.694')]
++[2023-09-21 15:12:19,499][101034] Saving new best policy, reward=95010.164!
++[2023-09-21 15:12:19,499][101035] Saving new best policy, reward=85718.694!
++[2023-09-21 15:12:24,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6639.8). Total num frames: 630784. Throughput: 0: 3273.7, 1: 3269.4. Samples: 621534. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:12:24,498][99566] Avg episode reward: [(0, '98412.703'), (1, '89341.648')]
++[2023-09-21 15:12:24,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000616_315392.pth...
++[2023-09-21 15:12:24,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000616_315392.pth...
++[2023-09-21 15:12:24,515][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000432_221184.pth
++[2023-09-21 15:12:24,516][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000432_221184.pth
++[2023-09-21 15:12:24,516][101034] Saving new best policy, reward=98412.703!
++[2023-09-21 15:12:24,517][101035] Saving new best policy, reward=89341.648!
++[2023-09-21 15:12:27,494][101115] Updated weights for policy 0, policy_version 640 (0.0013)
++[2023-09-21 15:12:27,495][101117] Updated weights for policy 1, policy_version 640 (0.0015)
++[2023-09-21 15:12:29,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6690.1, 300 sec: 6635.5). Total num frames: 663552. Throughput: 0: 3307.6, 1: 3308.7. Samples: 664240. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:12:29,497][99566] Avg episode reward: [(0, '100510.374'), (1, '91250.829')]
++[2023-09-21 15:12:29,498][101034] Saving new best policy, reward=100510.374!
++[2023-09-21 15:12:29,498][101035] Saving new best policy, reward=91250.829!
++[2023-09-21 15:12:34,496][99566] Fps is (10 sec: 7372.9, 60 sec: 6758.4, 300 sec: 6709.6). Total num frames: 704512. Throughput: 0: 3325.3, 1: 3326.3. Samples: 687108. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:12:34,498][99566] Avg episode reward: [(0, '106969.058'), (1, '95836.961')]
++[2023-09-21 15:12:34,499][101034] Saving new best policy, reward=106969.058!
++[2023-09-21 15:12:34,499][101035] Saving new best policy, reward=95836.961!
++[2023-09-21 15:12:38,780][101115] Updated weights for policy 0, policy_version 720 (0.0012)
++[2023-09-21 15:12:38,780][101117] Updated weights for policy 1, policy_version 720 (0.0015)
++[2023-09-21 15:12:39,497][99566] Fps is (10 sec: 7372.7, 60 sec: 6690.1, 300 sec: 6702.5). Total num frames: 737280. Throughput: 0: 3329.2, 1: 3331.7. Samples: 729624. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
++[2023-09-21 15:12:39,498][99566] Avg episode reward: [(0, '106969.058'), (1, '97182.862')]
++[2023-09-21 15:12:39,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000720_368640.pth...
++[2023-09-21 15:12:39,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000720_368640.pth...
++[2023-09-21 15:12:39,517][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000520_266240.pth
++[2023-09-21 15:12:39,517][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000520_266240.pth
++[2023-09-21 15:12:39,517][101035] Saving new best policy, reward=97182.862!
++[2023-09-21 15:12:44,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6690.1, 300 sec: 6696.1). Total num frames: 770048. Throughput: 0: 3327.6, 1: 3329.0. Samples: 768176. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
++[2023-09-21 15:12:44,497][99566] Avg episode reward: [(0, '113117.062'), (1, '101505.450')]
++[2023-09-21 15:12:44,498][101034] Saving new best policy, reward=113117.062!
++[2023-09-21 15:12:44,498][101035] Saving new best policy, reward=101505.450!
++[2023-09-21 15:12:49,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6690.1, 300 sec: 6690.1). Total num frames: 802816. Throughput: 0: 3349.2, 1: 3355.5. Samples: 786672. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
++[2023-09-21 15:12:49,497][99566] Avg episode reward: [(0, '113117.062'), (1, '101932.079')]
++[2023-09-21 15:12:49,499][101035] Saving new best policy, reward=101932.079!
++[2023-09-21 15:12:51,683][101115] Updated weights for policy 0, policy_version 800 (0.0014)
++[2023-09-21 15:12:51,683][101117] Updated weights for policy 1, policy_version 800 (0.0013)
++[2023-09-21 15:12:54,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6684.7). Total num frames: 835584. Throughput: 0: 3348.4, 1: 3351.9. Samples: 826612. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:12:54,497][99566] Avg episode reward: [(0, '116085.972'), (1, '103659.764')]
++[2023-09-21 15:12:54,505][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000816_417792.pth...
++[2023-09-21 15:12:54,505][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000816_417792.pth...
++[2023-09-21 15:12:54,509][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000616_315392.pth
++[2023-09-21 15:12:54,509][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000616_315392.pth
++[2023-09-21 15:12:54,509][101034] Saving new best policy, reward=116085.972!
++[2023-09-21 15:12:54,509][101035] Saving new best policy, reward=103659.764!
++[2023-09-21 15:12:59,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6679.6). Total num frames: 868352. Throughput: 0: 3349.9, 1: 3347.9. Samples: 864992. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
++[2023-09-21 15:12:59,497][99566] Avg episode reward: [(0, '116085.972'), (1, '104473.905')]
++[2023-09-21 15:12:59,498][101035] Saving new best policy, reward=104473.905!
++[2023-09-21 15:13:04,444][101115] Updated weights for policy 0, policy_version 880 (0.0016)
++[2023-09-21 15:13:04,444][101117] Updated weights for policy 1, policy_version 880 (0.0015)
++[2023-09-21 15:13:04,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6675.0). Total num frames: 901120. Throughput: 0: 3359.1, 1: 3359.9. Samples: 881714. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:13:04,497][99566] Avg episode reward: [(0, '120958.463'), (1, '109086.335')]
++[2023-09-21 15:13:04,498][101034] Saving new best policy, reward=120958.463!
++[2023-09-21 15:13:04,498][101035] Saving new best policy, reward=109086.335!
++[2023-09-21 15:13:09,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6690.1, 300 sec: 6670.6). Total num frames: 933888. Throughput: 0: 3373.1, 1: 3378.0. Samples: 925336. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:09,497][99566] Avg episode reward: [(0, '121585.003'), (1, '109071.902')]
++[2023-09-21 15:13:09,507][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000000912_466944.pth...
++[2023-09-21 15:13:09,507][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000000912_466944.pth...
++[2023-09-21 15:13:09,516][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000720_368640.pth
++[2023-09-21 15:13:09,516][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000720_368640.pth
++[2023-09-21 15:13:09,517][101034] Saving new best policy, reward=121585.003!
++[2023-09-21 15:13:14,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6690.1, 300 sec: 6666.6). Total num frames: 966656. Throughput: 0: 3353.0, 1: 3356.2. Samples: 966154. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
++[2023-09-21 15:13:14,497][99566] Avg episode reward: [(0, '122500.165'), (1, '112494.272')]
++[2023-09-21 15:13:14,498][101034] Saving new best policy, reward=122500.165!
++[2023-09-21 15:13:14,498][101035] Saving new best policy, reward=112494.272!
++[2023-09-21 15:13:16,904][101117] Updated weights for policy 1, policy_version 960 (0.0014)
++[2023-09-21 15:13:16,904][101115] Updated weights for policy 0, policy_version 960 (0.0012)
++[2023-09-21 15:13:19,497][99566] Fps is (10 sec: 5734.4, 60 sec: 6553.6, 300 sec: 6608.2). Total num frames: 991232. Throughput: 0: 3280.2, 1: 3286.4. Samples: 982602. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:19,497][99566] Avg episode reward: [(0, '124457.495'), (1, '113067.463')]
++[2023-09-21 15:13:19,499][101034] Saving new best policy, reward=124457.495!
++[2023-09-21 15:13:19,499][101035] Saving new best policy, reward=113067.463!
++[2023-09-21 15:13:24,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6553.6, 300 sec: 6606.4). Total num frames: 1024000. Throughput: 0: 3215.8, 1: 3213.7. Samples: 1018952. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:24,498][99566] Avg episode reward: [(0, '128172.103'), (1, '116190.090')]
++[2023-09-21 15:13:24,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001000_512000.pth...
++[2023-09-21 15:13:24,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001000_512000.pth...
++[2023-09-21 15:13:24,513][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000816_417792.pth
++[2023-09-21 15:13:24,513][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000816_417792.pth
++[2023-09-21 15:13:24,513][101035] Saving new best policy, reward=116190.090!
++[2023-09-21 15:13:24,514][101034] Saving new best policy, reward=128172.103!
++[2023-09-21 15:13:29,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6553.6, 300 sec: 6604.8). Total num frames: 1056768. Throughput: 0: 3251.9, 1: 3247.1. Samples: 1060632. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:29,498][99566] Avg episode reward: [(0, '129655.387'), (1, '116680.591')]
++[2023-09-21 15:13:29,499][101034] Saving new best policy, reward=129655.387!
++[2023-09-21 15:13:29,499][101035] Saving new best policy, reward=116680.591!
++[2023-09-21 15:13:29,751][101117] Updated weights for policy 1, policy_version 1040 (0.0013)
++[2023-09-21 15:13:29,751][101115] Updated weights for policy 0, policy_version 1040 (0.0013)
++[2023-09-21 15:13:34,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6603.2). Total num frames: 1089536. Throughput: 0: 3241.6, 1: 3235.9. Samples: 1078160. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:34,498][99566] Avg episode reward: [(0, '131544.446'), (1, '116603.723')]
++[2023-09-21 15:13:34,499][101034] Saving new best policy, reward=131544.446!
++[2023-09-21 15:13:39,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6601.8). Total num frames: 1122304. Throughput: 0: 3209.9, 1: 3206.6. Samples: 1115360. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
++[2023-09-21 15:13:39,498][99566] Avg episode reward: [(0, '132903.044'), (1, '116397.835')]
++[2023-09-21 15:13:39,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001096_561152.pth...
++[2023-09-21 15:13:39,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001096_561152.pth...
++[2023-09-21 15:13:39,514][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000000912_466944.pth
++[2023-09-21 15:13:39,516][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000000912_466944.pth
++[2023-09-21 15:13:39,517][101034] Saving new best policy, reward=132903.044!
++[2023-09-21 15:13:42,911][101115] Updated weights for policy 0, policy_version 1120 (0.0014)
++[2023-09-21 15:13:42,911][101117] Updated weights for policy 1, policy_version 1120 (0.0013)
++[2023-09-21 15:13:44,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6600.4). Total num frames: 1155072. Throughput: 0: 3195.6, 1: 3195.7. Samples: 1152604. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:44,498][99566] Avg episode reward: [(0, '137104.414'), (1, '119496.691')]
++[2023-09-21 15:13:44,499][101034] Saving new best policy, reward=137104.414!
++[2023-09-21 15:13:44,499][101035] Saving new best policy, reward=119496.691!
++[2023-09-21 15:13:49,496][99566] Fps is (10 sec: 5734.5, 60 sec: 6280.5, 300 sec: 6553.6). Total num frames: 1179648. Throughput: 0: 3200.2, 1: 3200.0. Samples: 1169724. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:49,497][99566] Avg episode reward: [(0, '137104.414'), (1, '121701.677')]
++[2023-09-21 15:13:49,526][101035] Saving new best policy, reward=121701.677!
++[2023-09-21 15:13:54,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6417.0, 300 sec: 6597.9). Total num frames: 1220608. Throughput: 0: 3163.7, 1: 3159.2. Samples: 1209864. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:54,498][99566] Avg episode reward: [(0, '142552.888'), (1, '126028.832')]
++[2023-09-21 15:13:54,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001192_610304.pth...
++[2023-09-21 15:13:54,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001192_610304.pth...
++[2023-09-21 15:13:54,515][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001000_512000.pth
++[2023-09-21 15:13:54,515][101035] Saving new best policy, reward=126028.832!
++[2023-09-21 15:13:54,516][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001000_512000.pth
++[2023-09-21 15:13:54,516][101034] Saving new best policy, reward=142552.888!
++[2023-09-21 15:13:55,395][101117] Updated weights for policy 1, policy_version 1200 (0.0013)
++[2023-09-21 15:13:55,395][101115] Updated weights for policy 0, policy_version 1200 (0.0014)
++[2023-09-21 15:13:59,496][99566] Fps is (10 sec: 7372.9, 60 sec: 6417.1, 300 sec: 6596.7). Total num frames: 1253376. Throughput: 0: 3178.4, 1: 3175.2. Samples: 1252066. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:13:59,497][99566] Avg episode reward: [(0, '143172.574'), (1, '126257.348')]
++[2023-09-21 15:13:59,498][101034] Saving new best policy, reward=143172.574!
++[2023-09-21 15:13:59,498][101035] Saving new best policy, reward=126257.348!
++[2023-09-21 15:14:04,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6417.1, 300 sec: 6595.6). Total num frames: 1286144. Throughput: 0: 3190.2, 1: 3193.8. Samples: 1269880. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:04,497][99566] Avg episode reward: [(0, '143312.733'), (1, '133847.080')]
++[2023-09-21 15:14:04,498][101035] Saving new best policy, reward=133847.080!
++[2023-09-21 15:14:04,498][101034] Saving new best policy, reward=143312.733!
++[2023-09-21 15:14:07,603][101117] Updated weights for policy 1, policy_version 1280 (0.0015)
++[2023-09-21 15:14:07,604][101115] Updated weights for policy 0, policy_version 1280 (0.0013)
++[2023-09-21 15:14:09,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6594.6). Total num frames: 1318912. Throughput: 0: 3247.3, 1: 3248.9. Samples: 1311276. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:14:09,497][99566] Avg episode reward: [(0, '142024.998'), (1, '135803.021')]
++[2023-09-21 15:14:09,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001288_659456.pth...
++[2023-09-21 15:14:09,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001288_659456.pth...
++[2023-09-21 15:14:09,514][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001096_561152.pth
++[2023-09-21 15:14:09,514][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001096_561152.pth
++[2023-09-21 15:14:09,514][101035] Saving new best policy, reward=135803.021!
++[2023-09-21 15:14:14,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6417.1, 300 sec: 6593.6). Total num frames: 1351680. Throughput: 0: 3252.1, 1: 3252.0. Samples: 1353318. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
++[2023-09-21 15:14:14,497][99566] Avg episode reward: [(0, '137567.192'), (1, '137580.324')]
++[2023-09-21 15:14:14,498][101035] Saving new best policy, reward=137580.324!
++[2023-09-21 15:14:19,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6553.6, 300 sec: 6592.6). Total num frames: 1384448. Throughput: 0: 3300.0, 1: 3298.9. Samples: 1375110. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:14:19,497][99566] Avg episode reward: [(0, '135167.354'), (1, '138853.897')]
++[2023-09-21 15:14:19,498][101035] Saving new best policy, reward=138853.897!
++[2023-09-21 15:14:19,579][101115] Updated weights for policy 0, policy_version 1360 (0.0013)
++[2023-09-21 15:14:19,580][101117] Updated weights for policy 1, policy_version 1360 (0.0011)
++[2023-09-21 15:14:24,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6417.1, 300 sec: 6553.6). Total num frames: 1409024. Throughput: 0: 3251.1, 1: 3259.6. Samples: 1408344. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
++[2023-09-21 15:14:24,497][99566] Avg episode reward: [(0, '133135.888'), (1, '140760.635')]
++[2023-09-21 15:14:24,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001376_704512.pth...
++[2023-09-21 15:14:24,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001376_704512.pth...
++[2023-09-21 15:14:24,519][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001192_610304.pth
++[2023-09-21 15:14:24,519][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001192_610304.pth
++[2023-09-21 15:14:24,520][101035] Saving new best policy, reward=140760.635!
++[2023-09-21 15:14:29,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6417.1, 300 sec: 6553.6). Total num frames: 1441792. Throughput: 0: 3224.6, 1: 3224.8. Samples: 1442826. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
++[2023-09-21 15:14:29,498][99566] Avg episode reward: [(0, '133811.659'), (1, '141653.024')]
++[2023-09-21 15:14:29,499][101035] Saving new best policy, reward=141653.024!
++[2023-09-21 15:14:33,167][101115] Updated weights for policy 0, policy_version 1440 (0.0014)
++[2023-09-21 15:14:33,167][101117] Updated weights for policy 1, policy_version 1440 (0.0011)
++[2023-09-21 15:14:34,496][99566] Fps is (10 sec: 7372.8, 60 sec: 6553.6, 300 sec: 6590.0). Total num frames: 1482752. Throughput: 0: 3281.4, 1: 3281.6. Samples: 1465058. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:34,498][99566] Avg episode reward: [(0, '137138.699'), (1, '143246.225')]
++[2023-09-21 15:14:34,499][101035] Saving new best policy, reward=143246.225!
++[2023-09-21 15:14:39,496][99566] Fps is (10 sec: 7372.9, 60 sec: 6553.6, 300 sec: 6589.2). Total num frames: 1515520. Throughput: 0: 3298.9, 1: 3303.3. Samples: 1506962. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:39,497][99566] Avg episode reward: [(0, '138323.470'), (1, '146247.815')]
++[2023-09-21 15:14:39,505][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001480_757760.pth...
++[2023-09-21 15:14:39,505][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001480_757760.pth...
++[2023-09-21 15:14:39,514][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001288_659456.pth
++[2023-09-21 15:14:39,515][101035] Saving new best policy, reward=146247.815!
++[2023-09-21 15:14:39,515][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001288_659456.pth
++[2023-09-21 15:14:44,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6553.6, 300 sec: 6588.5). Total num frames: 1548288. Throughput: 0: 3242.7, 1: 3242.9. Samples: 1543918. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:44,497][99566] Avg episode reward: [(0, '146316.476'), (1, '147786.818')]
++[2023-09-21 15:14:44,498][101034] Saving new best policy, reward=146316.476!
++[2023-09-21 15:14:44,498][101035] Saving new best policy, reward=147786.818!
++[2023-09-21 15:14:46,115][101115] Updated weights for policy 0, policy_version 1520 (0.0014)
++[2023-09-21 15:14:46,115][101117] Updated weights for policy 1, policy_version 1520 (0.0015)
++[2023-09-21 15:14:49,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6553.6, 300 sec: 6553.6). Total num frames: 1572864. Throughput: 0: 3228.2, 1: 3221.9. Samples: 1560134. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:49,497][99566] Avg episode reward: [(0, '148683.548'), (1, '148519.388')]
++[2023-09-21 15:14:49,498][101034] Saving new best policy, reward=148683.548!
++[2023-09-21 15:14:49,498][101035] Saving new best policy, reward=148519.388!
++[2023-09-21 15:14:54,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6417.1, 300 sec: 6553.6). Total num frames: 1605632. Throughput: 0: 3169.4, 1: 3167.6. Samples: 1596444. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:14:54,497][99566] Avg episode reward: [(0, '154239.879'), (1, '148233.580')]
++[2023-09-21 15:14:54,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001568_802816.pth...
++[2023-09-21 15:14:54,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001568_802816.pth...
++[2023-09-21 15:14:54,512][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001376_704512.pth
++[2023-09-21 15:14:54,514][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001376_704512.pth
++[2023-09-21 15:14:54,514][101034] Saving new best policy, reward=154239.879!
++[2023-09-21 15:14:59,496][99566] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6520.8). Total num frames: 1630208. Throughput: 0: 3099.2, 1: 3100.9. Samples: 1632322. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
++[2023-09-21 15:14:59,498][99566] Avg episode reward: [(0, '154239.879'), (1, '148584.617')]
++[2023-09-21 15:14:59,499][101035] Saving new best policy, reward=148584.617!
++[2023-09-21 15:14:59,981][101117] Updated weights for policy 1, policy_version 1600 (0.0013)
++[2023-09-21 15:14:59,981][101115] Updated weights for policy 0, policy_version 1600 (0.0013)
++[2023-09-21 15:15:04,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6521.5). Total num frames: 1662976. Throughput: 0: 3053.0, 1: 3054.0. Samples: 1649926. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:15:04,498][99566] Avg episode reward: [(0, '155604.981'), (1, '151719.185')]
++[2023-09-21 15:15:04,499][101034] Saving new best policy, reward=155604.981!
++[2023-09-21 15:15:04,499][101035] Saving new best policy, reward=151719.185!
++[2023-09-21 15:15:09,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6280.5, 300 sec: 6522.1). Total num frames: 1695744. Throughput: 0: 3104.8, 1: 3104.4. Samples: 1687760. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:15:09,498][99566] Avg episode reward: [(0, '155604.981'), (1, '151719.185')]
++[2023-09-21 15:15:09,508][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001656_847872.pth...
++[2023-09-21 15:15:09,508][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001656_847872.pth...
++[2023-09-21 15:15:09,514][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001480_757760.pth
++[2023-09-21 15:15:09,517][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001480_757760.pth
++[2023-09-21 15:15:12,938][101115] Updated weights for policy 0, policy_version 1680 (0.0015)
++[2023-09-21 15:15:12,938][101117] Updated weights for policy 1, policy_version 1680 (0.0013)
++[2023-09-21 15:15:14,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6522.7). Total num frames: 1728512. Throughput: 0: 3141.0, 1: 3140.8. Samples: 1725508. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:15:14,497][99566] Avg episode reward: [(0, '155183.801'), (1, '155086.429')]
++[2023-09-21 15:15:14,498][101035] Saving new best policy, reward=155086.429!
++[2023-09-21 15:15:19,497][99566] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 6492.9). Total num frames: 1753088. Throughput: 0: 3084.1, 1: 3084.4. Samples: 1742640. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
++[2023-09-21 15:15:19,498][99566] Avg episode reward: [(0, '155183.801'), (1, '155086.429')]
++[2023-09-21 15:15:24,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6494.0). Total num frames: 1785856. Throughput: 0: 3047.1, 1: 3042.9. Samples: 1781016. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:15:24,497][99566] Avg episode reward: [(0, '156547.028'), (1, '156937.777')]
++[2023-09-21 15:15:24,507][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001744_892928.pth...
++[2023-09-21 15:15:24,507][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001744_892928.pth...
++[2023-09-21 15:15:24,514][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001568_802816.pth
++[2023-09-21 15:15:24,514][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001568_802816.pth
++[2023-09-21 15:15:24,515][101035] Saving new best policy, reward=156937.777!
++[2023-09-21 15:15:24,515][101034] Saving new best policy, reward=156547.028!
++[2023-09-21 15:15:25,925][101115] Updated weights for policy 0, policy_version 1760 (0.0016)
++[2023-09-21 15:15:25,926][101117] Updated weights for policy 1, policy_version 1760 (0.0016)
++[2023-09-21 15:15:29,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6280.6, 300 sec: 6495.1). Total num frames: 1818624. Throughput: 0: 3045.6, 1: 3055.0. Samples: 1818448. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:15:29,497][99566] Avg episode reward: [(0, '156547.028'), (1, '156937.777')]
++[2023-09-21 15:15:34,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6144.0, 300 sec: 6496.1). Total num frames: 1851392. Throughput: 0: 3056.4, 1: 3058.8. Samples: 1835316. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:15:34,497][99566] Avg episode reward: [(0, '157397.752'), (1, '159312.801')]
++[2023-09-21 15:15:34,498][101034] Saving new best policy, reward=157397.752!
++[2023-09-21 15:15:34,498][101035] Saving new best policy, reward=159312.801!
++[2023-09-21 15:15:39,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6468.9). Total num frames: 1875968. Throughput: 0: 3089.1, 1: 3090.0. Samples: 1874502. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:15:39,497][99566] Avg episode reward: [(0, '156903.730'), (1, '159312.801')]
++[2023-09-21 15:15:39,554][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001840_942080.pth...
++[2023-09-21 15:15:39,555][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001840_942080.pth...
++[2023-09-21 15:15:39,558][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001656_847872.pth
++[2023-09-21 15:15:39,559][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001656_847872.pth
++[2023-09-21 15:15:39,559][101117] Updated weights for policy 1, policy_version 1840 (0.0011)
++[2023-09-21 15:15:39,560][101115] Updated weights for policy 0, policy_version 1840 (0.0013)
++[2023-09-21 15:15:44,496][99566] Fps is (10 sec: 6553.5, 60 sec: 6144.0, 300 sec: 6498.1). Total num frames: 1916928. Throughput: 0: 3111.8, 1: 3110.3. Samples: 1912318. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:15:44,498][99566] Avg episode reward: [(0, '157395.614'), (1, '159854.521')]
++[2023-09-21 15:15:44,499][101035] Saving new best policy, reward=159854.521!
++[2023-09-21 15:15:49,496][99566] Fps is (10 sec: 7372.8, 60 sec: 6280.5, 300 sec: 6553.6). Total num frames: 1949696. Throughput: 0: 3142.5, 1: 3145.9. Samples: 1932904. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
++[2023-09-21 15:15:49,497][99566] Avg episode reward: [(0, '157502.563'), (1, '159854.521')]
++[2023-09-21 15:15:49,498][101034] Saving new best policy, reward=157502.563!
++[2023-09-21 15:15:51,986][101115] Updated weights for policy 0, policy_version 1920 (0.0012)
++[2023-09-21 15:15:51,986][101117] Updated weights for policy 1, policy_version 1920 (0.0012)
++[2023-09-21 15:15:54,496][99566] Fps is (10 sec: 5734.5, 60 sec: 6144.0, 300 sec: 6498.1). Total num frames: 1974272. Throughput: 0: 3146.4, 1: 3140.3. Samples: 1970662. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:15:54,497][99566] Avg episode reward: [(0, '154117.366'), (1, '160110.948')]
++[2023-09-21 15:15:54,503][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000001928_987136.pth...
++[2023-09-21 15:15:54,503][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000001928_987136.pth...
++[2023-09-21 15:15:54,507][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001744_892928.pth
++[2023-09-21 15:15:54,507][101035] Saving new best policy, reward=160110.948!
++[2023-09-21 15:15:54,512][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001744_892928.pth
++[2023-09-21 15:15:59,497][99566] Fps is (10 sec: 6553.5, 60 sec: 6417.1, 300 sec: 6553.6). Total num frames: 2015232. Throughput: 0: 3161.0, 1: 3160.7. Samples: 2009984. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:15:59,498][99566] Avg episode reward: [(0, '152354.094'), (1, '160110.948')]
++[2023-09-21 15:16:04,496][99566] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6525.8). Total num frames: 2039808. Throughput: 0: 3201.8, 1: 3202.4. Samples: 2030826. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:16:04,497][99566] Avg episode reward: [(0, '145305.518'), (1, '161252.171')]
++[2023-09-21 15:16:04,498][101035] Saving new best policy, reward=161252.171!
++[2023-09-21 15:16:04,810][101117] Updated weights for policy 1, policy_version 2000 (0.0015)
++[2023-09-21 15:16:04,810][101115] Updated weights for policy 0, policy_version 2000 (0.0012)
++[2023-09-21 15:16:09,497][99566] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6498.1). Total num frames: 2072576. Throughput: 0: 3152.8, 1: 3155.5. Samples: 2064886. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:16:09,498][99566] Avg episode reward: [(0, '144668.785'), (1, '161252.171')]
++[2023-09-21 15:16:09,506][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002024_1036288.pth...
++[2023-09-21 15:16:09,506][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002024_1036288.pth...
++[2023-09-21 15:16:09,511][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001840_942080.pth
++[2023-09-21 15:16:09,513][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001840_942080.pth
++[2023-09-21 15:16:14,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6144.0, 300 sec: 6470.3). Total num frames: 2097152. Throughput: 0: 3118.9, 1: 3111.8. Samples: 2098830. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
++[2023-09-21 15:16:14,498][99566] Avg episode reward: [(0, '143485.830'), (1, '162275.771')]
++[2023-09-21 15:16:14,500][101035] Saving new best policy, reward=162275.771!
++[2023-09-21 15:16:18,984][101115] Updated weights for policy 0, policy_version 2080 (0.0014)
++[2023-09-21 15:16:18,985][101117] Updated weights for policy 1, policy_version 2080 (0.0014)
++[2023-09-21 15:16:19,497][99566] Fps is (10 sec: 5734.4, 60 sec: 6280.5, 300 sec: 6442.5). Total num frames: 2129920. Throughput: 0: 3111.4, 1: 3107.0. Samples: 2115142. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:19,498][99566] Avg episode reward: [(0, '141474.267'), (1, '162345.285')]
++[2023-09-21 15:16:19,499][101035] Saving new best policy, reward=162345.285!
++[2023-09-21 15:16:24,497][99566] Fps is (10 sec: 6553.6, 60 sec: 6280.5, 300 sec: 6442.5). Total num frames: 2162688. Throughput: 0: 3106.0, 1: 3115.1. Samples: 2154452. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:24,498][99566] Avg episode reward: [(0, '140839.183'), (1, '163464.763')]
++[2023-09-21 15:16:24,507][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002112_1081344.pth...
++[2023-09-21 15:16:24,507][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002112_1081344.pth...
++[2023-09-21 15:16:24,516][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000001928_987136.pth
++[2023-09-21 15:16:24,516][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000001928_987136.pth
++[2023-09-21 15:16:24,517][101035] Saving new best policy, reward=163464.763!
++[2023-09-21 15:16:29,496][99566] Fps is (10 sec: 6553.7, 60 sec: 6280.5, 300 sec: 6428.6). Total num frames: 2195456. Throughput: 0: 3110.6, 1: 3111.2. Samples: 2192296. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
++[2023-09-21 15:16:29,497][99566] Avg episode reward: [(0, '141474.326'), (1, '163612.163')]
++[2023-09-21 15:16:29,498][101035] Saving new best policy, reward=163612.163!
++[2023-09-21 15:16:32,534][101115] Updated weights for policy 0, policy_version 2160 (0.0013)
++[2023-09-21 15:16:32,534][101117] Updated weights for policy 1, policy_version 2160 (0.0015)
++[2023-09-21 15:16:34,496][99566] Fps is (10 sec: 5734.5, 60 sec: 6144.0, 300 sec: 6387.0). Total num frames: 2220032. Throughput: 0: 3060.2, 1: 3058.9. Samples: 2208266. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:34,497][99566] Avg episode reward: [(0, '143510.663'), (1, '161964.692')]
++[2023-09-21 15:16:39,497][99566] Fps is (10 sec: 5734.3, 60 sec: 6280.5, 300 sec: 6387.0). Total num frames: 2252800. Throughput: 0: 3029.4, 1: 3028.4. Samples: 2243264. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:39,498][99566] Avg episode reward: [(0, '144449.158'), (1, '159709.222')]
++[2023-09-21 15:16:39,507][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002200_1126400.pth...
++[2023-09-21 15:16:39,507][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002200_1126400.pth...
++[2023-09-21 15:16:39,513][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002024_1036288.pth
++[2023-09-21 15:16:39,514][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002024_1036288.pth
++[2023-09-21 15:16:44,496][99566] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 6359.2). Total num frames: 2277376. Throughput: 0: 2993.8, 1: 2994.1. Samples: 2279440. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:44,497][99566] Avg episode reward: [(0, '144500.336'), (1, '159960.034')]
++[2023-09-21 15:16:46,205][101117] Updated weights for policy 1, policy_version 2240 (0.0015)
++[2023-09-21 15:16:46,206][101115] Updated weights for policy 0, policy_version 2240 (0.0013)
++[2023-09-21 15:16:49,496][99566] Fps is (10 sec: 5734.5, 60 sec: 6007.5, 300 sec: 6331.4). Total num frames: 2310144. Throughput: 0: 2960.0, 1: 2959.9. Samples: 2297222. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
++[2023-09-21 15:16:49,497][99566] Avg episode reward: [(0, '148419.870'), (1, '157559.143')]
++[2023-09-21 15:16:54,497][99566] Fps is (10 sec: 6553.4, 60 sec: 6144.0, 300 sec: 6359.2). Total num frames: 2342912. Throughput: 0: 2975.2, 1: 2973.6. Samples: 2332580. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
++[2023-09-21 15:16:54,498][99566] Avg episode reward: [(0, '148104.147'), (1, '157533.887')]
++[2023-09-21 15:16:54,507][101034] Saving ./train_dir/Standup/checkpoint_p0/checkpoint_000002288_1171456.pth...
++[2023-09-21 15:16:54,507][101035] Saving ./train_dir/Standup/checkpoint_p1/checkpoint_000002288_1171456.pth...
++[2023-09-21 15:16:54,512][101034] Removing ./train_dir/Standup/checkpoint_p0/checkpoint_000002112_1081344.pth
++[2023-09-21 15:16:54,513][101035] Removing ./train_dir/Standup/checkpoint_p1/checkpoint_000002112_1081344.pth
++[2023-09-21 15:16:59,496][99566] Fps is (10 sec: 5734.5, 60 sec: 5871.0, 300 sec: 6331.4). Total num frames: 2367488. Throughput: 0: 3009.8, 1: 3008.9. Samples: 2369672. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
++[2023-09-21 15:16:59,497][99566] Avg episode reward: [(0, '149878.068'), (1, '156400.338')]
++[2023-09-21 15:16:59,928][101115] Updated weights for policy 0, policy_vers
\ No newline at end of file
diff --git a/replay.mp4 b/replay.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..2b6693c681e295607c024719632ad235b7041605
--- /dev/null
+++ b/replay.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07cc06d5a05379a9f7be5b80814fd258727b07c89cd1934984be29ed9af72ed4
+size 25278935
diff --git a/sf_log.txt b/sf_log.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30120eae2ae918cdbe9c9c453bfca91fecf1cc78
--- /dev/null
+++ b/sf_log.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3963309d393b038234936883664516c60f3b46984686e38e0a3012e87b244ae3
+size 50558873