diff --git a/README.md b/README.md index 602f74c4bf1e7c240f4a7a558e1ba7b95a57c63e..4038bc10ca6a9f9efeecdee49c3d99a95379d64e 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ model-index: type: Humanoid-v4 metrics: - type: mean_reward - value: 599.05 +/- 83.59 + value: 690.72 +/- 172.79 name: mean_reward verified: false --- diff --git a/events.out.tfevents.1705698231.3090-172.2590103.0 b/events.out.tfevents.1705698231.3090-172.2590103.0 deleted file mode 100644 index fa80b84d06159788a9213aa2aabe5b307e7c9e08..0000000000000000000000000000000000000000 --- a/events.out.tfevents.1705698231.3090-172.2590103.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c0e1bc1db010114cac6ae64631dc0bbdffb7a18a16c30a4333e6e3f38518083 -size 1596905 diff --git a/events.out.tfevents.1705733551.4090-171.2660473.0 b/events.out.tfevents.1705733551.4090-171.2660473.0 new file mode 100644 index 0000000000000000000000000000000000000000..1320074ee1a499045b5b22371e55d7b3a0106572 --- /dev/null +++ b/events.out.tfevents.1705733551.4090-171.2660473.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08157c5d573487b6ec26b68f9ea15e51dce97a0843161e3218141322143f4e8d +size 1576463 diff --git a/ppo_fix_continuous_action-10000.cleanrl_model b/ppo_fix_continuous_action-10000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..37e98eaf2cc21154eb1903c0528c340dcf763321 Binary files /dev/null and b/ppo_fix_continuous_action-10000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-100000.cleanrl_model b/ppo_fix_continuous_action-100000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c2dcf2035f9614c790113d260cd2b4114f32c794 Binary files /dev/null and b/ppo_fix_continuous_action-100000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-105000.cleanrl_model b/ppo_fix_continuous_action-105000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..297643aefbedd025da8ba6027f262fc720e98eb8 Binary files /dev/null and b/ppo_fix_continuous_action-105000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-110000.cleanrl_model b/ppo_fix_continuous_action-110000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d56d5a6013380e3193dd444d38fca4a9317fff6f Binary files /dev/null and b/ppo_fix_continuous_action-110000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-115000.cleanrl_model b/ppo_fix_continuous_action-115000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5804db9474786eac920fdf8b7cee1f3f17ca2383 Binary files /dev/null and b/ppo_fix_continuous_action-115000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-120000.cleanrl_model b/ppo_fix_continuous_action-120000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cb71073d992c93299169f61ff26bfb1c6224ee17 Binary files /dev/null and b/ppo_fix_continuous_action-120000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-125000.cleanrl_model b/ppo_fix_continuous_action-125000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3db424691b7b4ef774f8cbc541cb30b99d353960 Binary files /dev/null and b/ppo_fix_continuous_action-125000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-130000.cleanrl_model b/ppo_fix_continuous_action-130000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ee0637b085dc0dcaafee58e5e3071ff74f4299a1 Binary files /dev/null and b/ppo_fix_continuous_action-130000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-135000.cleanrl_model b/ppo_fix_continuous_action-135000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c43a6cb0fe97ffb35ffc9b6feae111861c2dbfbf Binary files /dev/null and b/ppo_fix_continuous_action-135000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-140000.cleanrl_model b/ppo_fix_continuous_action-140000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5b19ed840b1c57463e26708b7fd14ddeb30f9537 Binary files /dev/null and b/ppo_fix_continuous_action-140000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-145000.cleanrl_model b/ppo_fix_continuous_action-145000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc85753e0fe49c29228a8cb552a5fab9ff2046ad Binary files /dev/null and b/ppo_fix_continuous_action-145000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-15000.cleanrl_model b/ppo_fix_continuous_action-15000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a207e59290c084c8037ec78439da5d8f1823228c Binary files /dev/null and b/ppo_fix_continuous_action-15000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-150000.cleanrl_model b/ppo_fix_continuous_action-150000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7176bd0cf754b56f629cd6b0302c0594fd5c7f69 Binary files /dev/null and b/ppo_fix_continuous_action-150000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-155000.cleanrl_model b/ppo_fix_continuous_action-155000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c1cd413fbfe3dc672e2a6f2c4268f43cd2c6015f Binary files /dev/null and b/ppo_fix_continuous_action-155000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-160000.cleanrl_model b/ppo_fix_continuous_action-160000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e335babb5ccd0665be81b17e5b0868732aebc100 Binary files /dev/null and b/ppo_fix_continuous_action-160000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-165000.cleanrl_model b/ppo_fix_continuous_action-165000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..37d5fa50490004ef9fbc97c8383b0fbc8dcc31d3 Binary files /dev/null and b/ppo_fix_continuous_action-165000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-170000.cleanrl_model b/ppo_fix_continuous_action-170000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3c573cd4a61a45a7dec74b37d724af89d7c5650e Binary files /dev/null and b/ppo_fix_continuous_action-170000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-175000.cleanrl_model b/ppo_fix_continuous_action-175000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f58172bf3074a14119bbd5b8b70714e8ff0fa6de Binary files /dev/null and b/ppo_fix_continuous_action-175000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-180000.cleanrl_model b/ppo_fix_continuous_action-180000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..53245e5b7d92995ee6779125d30f53ec49e6eece Binary files /dev/null and b/ppo_fix_continuous_action-180000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-185000.cleanrl_model b/ppo_fix_continuous_action-185000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3b0e797bee630f5b4d146a0da02459817cd6e98f Binary files /dev/null and b/ppo_fix_continuous_action-185000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-190000.cleanrl_model b/ppo_fix_continuous_action-190000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e9b4ecddcc5907e54567c48c503b6c15840d4c07 Binary files /dev/null and b/ppo_fix_continuous_action-190000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-195000.cleanrl_model b/ppo_fix_continuous_action-195000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d61d88b3e29256969614daba1bf6e8a29c1c0336 Binary files /dev/null and b/ppo_fix_continuous_action-195000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-20000.cleanrl_model b/ppo_fix_continuous_action-20000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..41edee8a65acfba19340d5b4508f060377f1c282 Binary files /dev/null and b/ppo_fix_continuous_action-20000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-200000.cleanrl_model b/ppo_fix_continuous_action-200000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d3b77dd46a42457b1f51eb4419b14ae4951828da Binary files /dev/null and b/ppo_fix_continuous_action-200000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-205000.cleanrl_model b/ppo_fix_continuous_action-205000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d1347e424a549101a47056dbbcb8d3ed53972758 Binary files /dev/null and b/ppo_fix_continuous_action-205000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-210000.cleanrl_model b/ppo_fix_continuous_action-210000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d22c160471c1c5e9442afa399f75680c77ec2d97 Binary files /dev/null and b/ppo_fix_continuous_action-210000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-215000.cleanrl_model b/ppo_fix_continuous_action-215000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9a92e100a1235dd53310899be1e0e703fc3aa6ec Binary files /dev/null and b/ppo_fix_continuous_action-215000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-220000.cleanrl_model b/ppo_fix_continuous_action-220000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c6966cdd3db9d34d35b8214459ab00056b1f71e9 Binary files /dev/null and b/ppo_fix_continuous_action-220000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-225000.cleanrl_model b/ppo_fix_continuous_action-225000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5c5f95a721ebd8b38d23575e74c964414c0d1909 Binary files /dev/null and b/ppo_fix_continuous_action-225000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-230000.cleanrl_model b/ppo_fix_continuous_action-230000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..93dfc2da7578e74585e0ba67bce702281e857c68 Binary files /dev/null and b/ppo_fix_continuous_action-230000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-235000.cleanrl_model b/ppo_fix_continuous_action-235000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f44a50f391e2c5a32dd82252d6b85fd28ae4cf5e Binary files /dev/null and b/ppo_fix_continuous_action-235000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-240000.cleanrl_model b/ppo_fix_continuous_action-240000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..44856187cadbdcb16fbb32251777c8a46bf37a09 Binary files /dev/null and b/ppo_fix_continuous_action-240000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-245000.cleanrl_model b/ppo_fix_continuous_action-245000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..278b5f433fdc3a51186321929d8aa1b19250d90f Binary files /dev/null and b/ppo_fix_continuous_action-245000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-25000.cleanrl_model b/ppo_fix_continuous_action-25000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d00cf40ad1b41a200f86a3cea2615a65352d2b82 Binary files /dev/null and b/ppo_fix_continuous_action-25000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-250000.cleanrl_model b/ppo_fix_continuous_action-250000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..64e41ac4e1aabd47843bc5cbda8c810e1dd2e7d0 Binary files /dev/null and b/ppo_fix_continuous_action-250000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-255000.cleanrl_model b/ppo_fix_continuous_action-255000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7266f123d9a89a4ddf172f969590a3213f5c9350 Binary files /dev/null and b/ppo_fix_continuous_action-255000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-260000.cleanrl_model b/ppo_fix_continuous_action-260000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..58ee9e267f720d092ca0f3c6b688bc19ea5b224e Binary files /dev/null and b/ppo_fix_continuous_action-260000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-265000.cleanrl_model b/ppo_fix_continuous_action-265000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0cf810cc9f79950bfe63def371f4515f60cb88d Binary files /dev/null and b/ppo_fix_continuous_action-265000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-270000.cleanrl_model b/ppo_fix_continuous_action-270000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..72e853b2fd49ab681289e58d5da4d57ecc5c0f95 Binary files /dev/null and b/ppo_fix_continuous_action-270000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-275000.cleanrl_model b/ppo_fix_continuous_action-275000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0a35e23d9263b0392eb2371b3e5642d90de16f9 Binary files /dev/null and b/ppo_fix_continuous_action-275000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-280000.cleanrl_model b/ppo_fix_continuous_action-280000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7e6c6a1159a0ee5bd6f499f35d22a9ea7c368b03 Binary files /dev/null and b/ppo_fix_continuous_action-280000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-285000.cleanrl_model b/ppo_fix_continuous_action-285000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..679bae78ab67bfd25dfa409878329378c0057911 Binary files /dev/null and b/ppo_fix_continuous_action-285000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-290000.cleanrl_model b/ppo_fix_continuous_action-290000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..228cb03ec991ad5b1cb04c692504fb607da84d68 Binary files /dev/null and b/ppo_fix_continuous_action-290000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-295000.cleanrl_model b/ppo_fix_continuous_action-295000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e46fc3a7e01c06cd963c5ea3f0c99d6e83c3e764 Binary files /dev/null and b/ppo_fix_continuous_action-295000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-30000.cleanrl_model b/ppo_fix_continuous_action-30000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..279401924e5aa17aef7a72778888cc19c9df0651 Binary files /dev/null and b/ppo_fix_continuous_action-30000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-300000.cleanrl_model b/ppo_fix_continuous_action-300000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5a3c067c79930b87d09751ad2ec8079a2f8b174 Binary files /dev/null and b/ppo_fix_continuous_action-300000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-305000.cleanrl_model b/ppo_fix_continuous_action-305000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6199948a0a5223ae3841e6fa9370fd833259b7b6 Binary files /dev/null and b/ppo_fix_continuous_action-305000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-310000.cleanrl_model b/ppo_fix_continuous_action-310000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c3bc2d620e33b8a4f39971d803fb5b726315e626 Binary files /dev/null and b/ppo_fix_continuous_action-310000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-315000.cleanrl_model b/ppo_fix_continuous_action-315000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4e9815e77055099dcb0206f86190392f026b3e48 Binary files /dev/null and b/ppo_fix_continuous_action-315000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-320000.cleanrl_model b/ppo_fix_continuous_action-320000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..691be384365a13794559d7d2fde10ed9d05c715e Binary files /dev/null and b/ppo_fix_continuous_action-320000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-325000.cleanrl_model b/ppo_fix_continuous_action-325000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cdd80b7ffbd22546f9fad0780642ffa789019f73 Binary files /dev/null and b/ppo_fix_continuous_action-325000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-330000.cleanrl_model b/ppo_fix_continuous_action-330000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..859a97ab5a18914f63ea2f135bbeb0e0f2c34dd7 Binary files /dev/null and b/ppo_fix_continuous_action-330000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-335000.cleanrl_model b/ppo_fix_continuous_action-335000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7dda64f628712f2a625307b8bac64d04af2a0f80 Binary files /dev/null and b/ppo_fix_continuous_action-335000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-340000.cleanrl_model b/ppo_fix_continuous_action-340000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..feff4e8ef82dc32ad56d5c0a408f13e3d39400c1 Binary files /dev/null and b/ppo_fix_continuous_action-340000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-345000.cleanrl_model b/ppo_fix_continuous_action-345000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..44ec7cac5014f2cc9563c74f8dceaf74a55598ec Binary files /dev/null and b/ppo_fix_continuous_action-345000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-35000.cleanrl_model b/ppo_fix_continuous_action-35000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8ec4a6f2d464ecf544e0f6d7e28ebef94c1f8742 Binary files /dev/null and b/ppo_fix_continuous_action-35000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-350000.cleanrl_model b/ppo_fix_continuous_action-350000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9ef1b5baf44c0c95437c4a44f1f78278f3e3cc3d Binary files /dev/null and b/ppo_fix_continuous_action-350000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-355000.cleanrl_model b/ppo_fix_continuous_action-355000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..09536afa38c8e3ead373a3e24c265dba232ee577 Binary files /dev/null and b/ppo_fix_continuous_action-355000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-360000.cleanrl_model b/ppo_fix_continuous_action-360000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d149ccd2b9643e3561ee0c804dc07326fa730d41 Binary files /dev/null and b/ppo_fix_continuous_action-360000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-365000.cleanrl_model b/ppo_fix_continuous_action-365000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b91ec8a0f0e4ba1ef1419c75828ca8f6fba18f64 Binary files /dev/null and b/ppo_fix_continuous_action-365000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-370000.cleanrl_model b/ppo_fix_continuous_action-370000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..90dfd126b16ffdd081973e8438b64203f2e75378 Binary files /dev/null and b/ppo_fix_continuous_action-370000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-375000.cleanrl_model b/ppo_fix_continuous_action-375000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d1ceb8ba4a94e90526b30ece415e7fba53033d27 Binary files /dev/null and b/ppo_fix_continuous_action-375000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-380000.cleanrl_model b/ppo_fix_continuous_action-380000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..89956978e1f62c6cbf26fafb4f0a7aebb919fb07 Binary files /dev/null and b/ppo_fix_continuous_action-380000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-385000.cleanrl_model b/ppo_fix_continuous_action-385000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2aacadd0feda08efe95204d67e556d159e006f95 Binary files /dev/null and b/ppo_fix_continuous_action-385000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-390000.cleanrl_model b/ppo_fix_continuous_action-390000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..567f1cf8d5f3c4241f6bc41da139187b26bdadff Binary files /dev/null and b/ppo_fix_continuous_action-390000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-395000.cleanrl_model b/ppo_fix_continuous_action-395000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4de2a02acc2e6f93dca5c1f6fca9866319ac44f6 Binary files /dev/null and b/ppo_fix_continuous_action-395000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-40000.cleanrl_model b/ppo_fix_continuous_action-40000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d9762a5dfaf31e6055b949b580de0918a2c5cda1 Binary files /dev/null and b/ppo_fix_continuous_action-40000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-400000.cleanrl_model b/ppo_fix_continuous_action-400000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29d357e125c02fec3b7d70e966acc11e8433065e Binary files /dev/null and b/ppo_fix_continuous_action-400000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-405000.cleanrl_model b/ppo_fix_continuous_action-405000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c8cdd666334f7d0d24a87cb48d1fe2f2c09241c4 Binary files /dev/null and b/ppo_fix_continuous_action-405000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-410000.cleanrl_model b/ppo_fix_continuous_action-410000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c41e9c51630711f55d8374e6c6bff84edb150426 Binary files /dev/null and b/ppo_fix_continuous_action-410000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-415000.cleanrl_model b/ppo_fix_continuous_action-415000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bd9a74bcc240005c058af5e61688c3b4db6e8f23 Binary files /dev/null and b/ppo_fix_continuous_action-415000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-420000.cleanrl_model b/ppo_fix_continuous_action-420000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dbbe3152d38b906656fa0ddcb172a5633a73336e Binary files /dev/null and b/ppo_fix_continuous_action-420000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-425000.cleanrl_model b/ppo_fix_continuous_action-425000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..26461131640bc42bdddf97d75a3004908055dd22 Binary files /dev/null and b/ppo_fix_continuous_action-425000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-430000.cleanrl_model b/ppo_fix_continuous_action-430000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2ac29e7367c76f826efb3722123983fda72e2b97 Binary files /dev/null and b/ppo_fix_continuous_action-430000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-435000.cleanrl_model b/ppo_fix_continuous_action-435000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6a629401a9d580c4729c8d3963ca92924b4fdf0d Binary files /dev/null and b/ppo_fix_continuous_action-435000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-440000.cleanrl_model b/ppo_fix_continuous_action-440000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f5cafb24c463e1a7a5a00a778b289b02d0cdde8 Binary files /dev/null and b/ppo_fix_continuous_action-440000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-445000.cleanrl_model b/ppo_fix_continuous_action-445000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2bf27405481f31f07f5734288ac6563d4fb53e76 Binary files /dev/null and b/ppo_fix_continuous_action-445000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-45000.cleanrl_model b/ppo_fix_continuous_action-45000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7423e2305db2cae7bdfa63abbccedada72da734b Binary files /dev/null and b/ppo_fix_continuous_action-45000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-450000.cleanrl_model b/ppo_fix_continuous_action-450000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..19c6acec7e8c98b957632692c41a379c3af94640 Binary files /dev/null and b/ppo_fix_continuous_action-450000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-455000.cleanrl_model b/ppo_fix_continuous_action-455000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b210224ef9f9a1945cab9936d9c4df29a08d343c Binary files /dev/null and b/ppo_fix_continuous_action-455000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-460000.cleanrl_model b/ppo_fix_continuous_action-460000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1a0cdef37400fea47ad6ba6884e142b68f840711 Binary files /dev/null and b/ppo_fix_continuous_action-460000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-465000.cleanrl_model b/ppo_fix_continuous_action-465000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..96c1b351d242a5af37f3632ca9a7441783b722d0 Binary files /dev/null and b/ppo_fix_continuous_action-465000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-470000.cleanrl_model b/ppo_fix_continuous_action-470000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7f8481ac0dd3afdcb94fa1ebb3452e807e1f2182 Binary files /dev/null and b/ppo_fix_continuous_action-470000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-475000.cleanrl_model b/ppo_fix_continuous_action-475000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e6f5dce7d094b368ce4dbf014fe42310f674c7db Binary files /dev/null and b/ppo_fix_continuous_action-475000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-480000.cleanrl_model b/ppo_fix_continuous_action-480000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f3a4df168d78128bffa83b980015f33fad8e18b2 Binary files /dev/null and b/ppo_fix_continuous_action-480000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-485000.cleanrl_model b/ppo_fix_continuous_action-485000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..01edd43827be778ed8eeffcf50644411209cf300 Binary files /dev/null and b/ppo_fix_continuous_action-485000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-490000.cleanrl_model b/ppo_fix_continuous_action-490000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a05214a4344a49fc06879d6c0575e29c7f2728f5 Binary files /dev/null and b/ppo_fix_continuous_action-490000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-495000.cleanrl_model b/ppo_fix_continuous_action-495000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7098707abb0473b80f319635d762a4e02011fe5b Binary files /dev/null and b/ppo_fix_continuous_action-495000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-5000.cleanrl_model b/ppo_fix_continuous_action-5000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f5da039545049f8a4e15db93843a73a713522210 Binary files /dev/null and b/ppo_fix_continuous_action-5000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-50000.cleanrl_model b/ppo_fix_continuous_action-50000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7a40a4da4320ac97232d2f498b219910f3b5239a Binary files /dev/null and b/ppo_fix_continuous_action-50000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-500000.cleanrl_model b/ppo_fix_continuous_action-500000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..93dc335750a6f2ec0879f0e3a48662d0fc53f2d6 Binary files /dev/null and b/ppo_fix_continuous_action-500000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-505000.cleanrl_model b/ppo_fix_continuous_action-505000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6e131b770943c801bda0b3f81bbec061f6a95e12 Binary files /dev/null and b/ppo_fix_continuous_action-505000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-510000.cleanrl_model b/ppo_fix_continuous_action-510000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f6b06d27bbf2552176a7158c8cc34e939c24d6d4 Binary files /dev/null and b/ppo_fix_continuous_action-510000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-515000.cleanrl_model b/ppo_fix_continuous_action-515000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2897eeecebc7226e5ff13debfe022fff84317cc1 Binary files /dev/null and b/ppo_fix_continuous_action-515000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-520000.cleanrl_model b/ppo_fix_continuous_action-520000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c2929f172149fcbe18639835cdd9e5a20dd3ddf5 Binary files /dev/null and b/ppo_fix_continuous_action-520000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-525000.cleanrl_model b/ppo_fix_continuous_action-525000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..50ab379a0f9bbf6bd21113a3e003aff40fec3dcd Binary files /dev/null and b/ppo_fix_continuous_action-525000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-530000.cleanrl_model b/ppo_fix_continuous_action-530000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..65e5cd1425953bfb387d3e2e68c9fbb02df5606a Binary files /dev/null and b/ppo_fix_continuous_action-530000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-535000.cleanrl_model b/ppo_fix_continuous_action-535000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e710ad1d165718888d8392624297c1026dc65517 Binary files /dev/null and b/ppo_fix_continuous_action-535000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-540000.cleanrl_model b/ppo_fix_continuous_action-540000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c14d83662befec31b63e4c3354ecdf25c947ea84 Binary files /dev/null and b/ppo_fix_continuous_action-540000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-545000.cleanrl_model b/ppo_fix_continuous_action-545000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3dbdce9273b186d14739ce8b6fcde1be14cc64ab Binary files /dev/null and b/ppo_fix_continuous_action-545000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-55000.cleanrl_model b/ppo_fix_continuous_action-55000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..698bf499ef7c1a341a0de3cc95f5705909e97431 Binary files /dev/null and b/ppo_fix_continuous_action-55000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-550000.cleanrl_model b/ppo_fix_continuous_action-550000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f05d3381a75de8be098ef8f3ab72092004a212aa Binary files /dev/null and b/ppo_fix_continuous_action-550000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-555000.cleanrl_model b/ppo_fix_continuous_action-555000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0a5271dc82266f18a4a97050b2e9ed68a602871a Binary files /dev/null and b/ppo_fix_continuous_action-555000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-560000.cleanrl_model b/ppo_fix_continuous_action-560000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c059adc16b1ce351ebd97ec2e6ebb7219348da72 Binary files /dev/null and b/ppo_fix_continuous_action-560000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-565000.cleanrl_model b/ppo_fix_continuous_action-565000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2839b7e20948c2937f540404a9a2a05dbb2bb60a Binary files /dev/null and b/ppo_fix_continuous_action-565000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-570000.cleanrl_model b/ppo_fix_continuous_action-570000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4777345217d1913fe735ba9ff83724147048c1b3 Binary files /dev/null and b/ppo_fix_continuous_action-570000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-575000.cleanrl_model b/ppo_fix_continuous_action-575000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b06f26eed808f4d5c547387848cbbfbca91c6173 Binary files /dev/null and b/ppo_fix_continuous_action-575000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-580000.cleanrl_model b/ppo_fix_continuous_action-580000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0ac9d179b6dc65ea4c0d23211f4f10a1657082e3 Binary files /dev/null and b/ppo_fix_continuous_action-580000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-585000.cleanrl_model b/ppo_fix_continuous_action-585000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5b5f141e8f2238d975285d6bdba2d092d8ab806a Binary files /dev/null and b/ppo_fix_continuous_action-585000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-590000.cleanrl_model b/ppo_fix_continuous_action-590000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e86fb64f9a3c97c2bdbc4e7c5b52d843a3ca72c9 Binary files /dev/null and b/ppo_fix_continuous_action-590000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-595000.cleanrl_model b/ppo_fix_continuous_action-595000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b4a99809e4fb8e8b67199fdc5b7319666a0d5331 Binary files /dev/null and b/ppo_fix_continuous_action-595000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-60000.cleanrl_model b/ppo_fix_continuous_action-60000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fe9d1de85f31a3696193d2a6fdcdcf81b94cb6d6 Binary files /dev/null and b/ppo_fix_continuous_action-60000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-600000.cleanrl_model b/ppo_fix_continuous_action-600000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5fec87bf53301121248ab7f8211e3db9587769c5 Binary files /dev/null and b/ppo_fix_continuous_action-600000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-605000.cleanrl_model b/ppo_fix_continuous_action-605000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b204c65edf8a026b3042efca3ff88a62abcfa2c2 Binary files /dev/null and b/ppo_fix_continuous_action-605000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-610000.cleanrl_model b/ppo_fix_continuous_action-610000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a918afe38874ab3009e2b9e7784975fb0462de42 Binary files /dev/null and b/ppo_fix_continuous_action-610000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-615000.cleanrl_model b/ppo_fix_continuous_action-615000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..76cb1d42b447a45e3fb7f32df8532fdb4339f354 Binary files /dev/null and b/ppo_fix_continuous_action-615000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-620000.cleanrl_model b/ppo_fix_continuous_action-620000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7d10a5b4e3e348914b69f585a5359ab178dc5606 Binary files /dev/null and b/ppo_fix_continuous_action-620000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-625000.cleanrl_model b/ppo_fix_continuous_action-625000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..842971cd8a68547d5219525dfedc6bec012a4389 Binary files /dev/null and b/ppo_fix_continuous_action-625000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-630000.cleanrl_model b/ppo_fix_continuous_action-630000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..751912c24c2bf3421458976637f99fee1ec444d0 Binary files /dev/null and b/ppo_fix_continuous_action-630000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-635000.cleanrl_model b/ppo_fix_continuous_action-635000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..db5f003842aefaa86185f260550c170069b28111 Binary files /dev/null and b/ppo_fix_continuous_action-635000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-640000.cleanrl_model b/ppo_fix_continuous_action-640000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..861aba4443f30bc243a1828aa942f37dc0c0ce04 Binary files /dev/null and b/ppo_fix_continuous_action-640000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-645000.cleanrl_model b/ppo_fix_continuous_action-645000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b328d49559323866235c5342cbf4843bb2499b40 Binary files /dev/null and b/ppo_fix_continuous_action-645000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-65000.cleanrl_model b/ppo_fix_continuous_action-65000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6d6ddbfd12230852df9af808f35b538331af14df Binary files /dev/null and b/ppo_fix_continuous_action-65000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-650000.cleanrl_model b/ppo_fix_continuous_action-650000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a90f4e5fab020e7a6f5359910bfb9ff799896c8e Binary files /dev/null and b/ppo_fix_continuous_action-650000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-655000.cleanrl_model b/ppo_fix_continuous_action-655000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5c7913ba485dc111c99bbba937983e794a83993 Binary files /dev/null and b/ppo_fix_continuous_action-655000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-660000.cleanrl_model b/ppo_fix_continuous_action-660000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..59d582f02e709382bd01ddf719c9ac0f25b1263f Binary files /dev/null and b/ppo_fix_continuous_action-660000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-665000.cleanrl_model b/ppo_fix_continuous_action-665000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fa994c032418aa3ace11a69b39b37c5ef5f5aa9c Binary files /dev/null and b/ppo_fix_continuous_action-665000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-670000.cleanrl_model b/ppo_fix_continuous_action-670000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f6523706b01e001cedf201dc6f0d43af80f4be33 Binary files /dev/null and b/ppo_fix_continuous_action-670000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-675000.cleanrl_model b/ppo_fix_continuous_action-675000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cbbbb43ce495a122fe1151069cf7a46551b0564b Binary files /dev/null and b/ppo_fix_continuous_action-675000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-680000.cleanrl_model b/ppo_fix_continuous_action-680000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6e40b796ab0162f7c8addab723d067e223c2735a Binary files /dev/null and b/ppo_fix_continuous_action-680000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-685000.cleanrl_model b/ppo_fix_continuous_action-685000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0164cceb27ed7e66aaf951a6122e50f91780782 Binary files /dev/null and b/ppo_fix_continuous_action-685000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-690000.cleanrl_model b/ppo_fix_continuous_action-690000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b74f78f6693519c2feda7592c0953fe8cb199d02 Binary files /dev/null and b/ppo_fix_continuous_action-690000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-695000.cleanrl_model b/ppo_fix_continuous_action-695000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..322f3af275fcf7d15a0c17ed18f95f53a8be27f8 Binary files /dev/null and b/ppo_fix_continuous_action-695000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-70000.cleanrl_model b/ppo_fix_continuous_action-70000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..598a7fe24832f61d6a45508e38ad5158f127da61 Binary files /dev/null and b/ppo_fix_continuous_action-70000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-700000.cleanrl_model b/ppo_fix_continuous_action-700000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..95fd9f154db509602cb9da444a47cc355b94e114 Binary files /dev/null and b/ppo_fix_continuous_action-700000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-705000.cleanrl_model b/ppo_fix_continuous_action-705000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..51e64b079b475338964936d7879ed237d58a438d Binary files /dev/null and b/ppo_fix_continuous_action-705000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-710000.cleanrl_model b/ppo_fix_continuous_action-710000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5bed2c46152a722b5e711723e33ccaa51c50d648 Binary files /dev/null and b/ppo_fix_continuous_action-710000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-715000.cleanrl_model b/ppo_fix_continuous_action-715000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0da8672225df1dc34e8d9c5663ad3e1b8bd6e34 Binary files /dev/null and b/ppo_fix_continuous_action-715000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-720000.cleanrl_model b/ppo_fix_continuous_action-720000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f17138ad9d26fe94db6bcda4fdc67d3fe3ce615c Binary files /dev/null and b/ppo_fix_continuous_action-720000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-725000.cleanrl_model b/ppo_fix_continuous_action-725000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d62487b2e6dba03bde2f66ef7ee83b254cc089b5 Binary files /dev/null and b/ppo_fix_continuous_action-725000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-730000.cleanrl_model b/ppo_fix_continuous_action-730000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..648c2d933c4f4cf7c99d180b494718f2149221cf Binary files /dev/null and b/ppo_fix_continuous_action-730000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-735000.cleanrl_model b/ppo_fix_continuous_action-735000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c4822afe68ee28767f9692605a241045e559e855 Binary files /dev/null and b/ppo_fix_continuous_action-735000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-740000.cleanrl_model b/ppo_fix_continuous_action-740000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6912058679167d4bc2b883f10a71a53c9b694e99 Binary files /dev/null and b/ppo_fix_continuous_action-740000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-745000.cleanrl_model b/ppo_fix_continuous_action-745000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3a6ab2f1f251727a5344998a0298b30882181388 Binary files /dev/null and b/ppo_fix_continuous_action-745000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-75000.cleanrl_model b/ppo_fix_continuous_action-75000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d5e779e484c0ac9ff04658c4545350b55b0460a4 Binary files /dev/null and b/ppo_fix_continuous_action-75000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-750000.cleanrl_model b/ppo_fix_continuous_action-750000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..18c51e98d9f6a83953f390064c02dad5282bb97c Binary files /dev/null and b/ppo_fix_continuous_action-750000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-755000.cleanrl_model b/ppo_fix_continuous_action-755000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a64f0ea7d076000aefb022b8af1ea6a8e5af5bc7 Binary files /dev/null and b/ppo_fix_continuous_action-755000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-760000.cleanrl_model b/ppo_fix_continuous_action-760000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..582e3ee8a73c68931fa4d9622e89f894f9a877e8 Binary files /dev/null and b/ppo_fix_continuous_action-760000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-765000.cleanrl_model b/ppo_fix_continuous_action-765000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..98fd3f30e8d9ab458114360c512985df3845c254 Binary files /dev/null and b/ppo_fix_continuous_action-765000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-770000.cleanrl_model b/ppo_fix_continuous_action-770000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ba3cd548f87ea28685a45d94cbb608bd281593dc Binary files /dev/null and b/ppo_fix_continuous_action-770000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-775000.cleanrl_model b/ppo_fix_continuous_action-775000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..137ec1100f37393937196fcb9b5f4b55446a6f8e Binary files /dev/null and b/ppo_fix_continuous_action-775000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-780000.cleanrl_model b/ppo_fix_continuous_action-780000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..776361279114b3483787349baae32d4f0695f650 Binary files /dev/null and b/ppo_fix_continuous_action-780000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-785000.cleanrl_model b/ppo_fix_continuous_action-785000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ff5b75665d98cfb815ce583ac5d0cd6c1bd46ea5 Binary files /dev/null and b/ppo_fix_continuous_action-785000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-790000.cleanrl_model b/ppo_fix_continuous_action-790000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9153fd8e6eb10f341372ca97c661b3d63b0de2ae Binary files /dev/null and b/ppo_fix_continuous_action-790000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-795000.cleanrl_model b/ppo_fix_continuous_action-795000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d8fb04a81aa7b1fc60b9e616fe5d61f66e160616 Binary files /dev/null and b/ppo_fix_continuous_action-795000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-80000.cleanrl_model b/ppo_fix_continuous_action-80000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3e68792eabd952a271de13e9a0c02afe4fbff5e3 Binary files /dev/null and b/ppo_fix_continuous_action-80000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-800000.cleanrl_model b/ppo_fix_continuous_action-800000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fa5f2320efa5c2cfb3d1aec3c1c811b25fa090ff Binary files /dev/null and b/ppo_fix_continuous_action-800000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-805000.cleanrl_model b/ppo_fix_continuous_action-805000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ceee82cd8769d16fa561cee6c47a4c17c69cfed1 Binary files /dev/null and b/ppo_fix_continuous_action-805000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-810000.cleanrl_model b/ppo_fix_continuous_action-810000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..14ee47ecae3e3954169f70363185c20fd821ac69 Binary files /dev/null and b/ppo_fix_continuous_action-810000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-815000.cleanrl_model b/ppo_fix_continuous_action-815000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0fed5c76f641ee8db03372ef5d851a431fce77c3 Binary files /dev/null and b/ppo_fix_continuous_action-815000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-820000.cleanrl_model b/ppo_fix_continuous_action-820000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..153be72f2ce62c6d5d517f0d9f439cb59fe7feb4 Binary files /dev/null and b/ppo_fix_continuous_action-820000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-825000.cleanrl_model b/ppo_fix_continuous_action-825000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c28a4f453ffb16c98da591f1f8508756cddf9d9b Binary files /dev/null and b/ppo_fix_continuous_action-825000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-830000.cleanrl_model b/ppo_fix_continuous_action-830000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7f57d9f3a3384985d792fc92e72374313489d591 Binary files /dev/null and b/ppo_fix_continuous_action-830000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-835000.cleanrl_model b/ppo_fix_continuous_action-835000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cabb4ef2de935cefd9679cf35e64151ce0b7a918 Binary files /dev/null and b/ppo_fix_continuous_action-835000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-840000.cleanrl_model b/ppo_fix_continuous_action-840000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9b734d7f9ab3a52550f70778843aec8b0ef0a9e2 Binary files /dev/null and b/ppo_fix_continuous_action-840000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-845000.cleanrl_model b/ppo_fix_continuous_action-845000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9b6777761191dc62cf99a3edae3323412ae94936 Binary files /dev/null and b/ppo_fix_continuous_action-845000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-85000.cleanrl_model b/ppo_fix_continuous_action-85000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5dd4499de2558eccd76da460dc91b68f925393a Binary files /dev/null and b/ppo_fix_continuous_action-85000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-850000.cleanrl_model b/ppo_fix_continuous_action-850000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..566079787d0d6ebceffea08f64b2991c385dec4b Binary files /dev/null and b/ppo_fix_continuous_action-850000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-855000.cleanrl_model b/ppo_fix_continuous_action-855000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5b5a5d806e7b3c71bc20234e51bfe632b0976893 Binary files /dev/null and b/ppo_fix_continuous_action-855000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-860000.cleanrl_model b/ppo_fix_continuous_action-860000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0568e06e97b598048ab6135a025f9d4018eb41b1 Binary files /dev/null and b/ppo_fix_continuous_action-860000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-865000.cleanrl_model b/ppo_fix_continuous_action-865000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b5ff00fe0c9cb60beab1edce5c52b91d61b0a24b Binary files /dev/null and b/ppo_fix_continuous_action-865000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-870000.cleanrl_model b/ppo_fix_continuous_action-870000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0323945bcfa4049f386303aaf21744f64b03a7ef Binary files /dev/null and b/ppo_fix_continuous_action-870000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-875000.cleanrl_model b/ppo_fix_continuous_action-875000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6a9e2d8a8328001f1067a5d8e5e1d856d17067e4 Binary files /dev/null and b/ppo_fix_continuous_action-875000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-880000.cleanrl_model b/ppo_fix_continuous_action-880000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..45dbd15447ce907d70b4dd7ff4e03825c4f55f7d Binary files /dev/null and b/ppo_fix_continuous_action-880000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-885000.cleanrl_model b/ppo_fix_continuous_action-885000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7f0df4e63271981194b977af54690ab03e5e0e5c Binary files /dev/null and b/ppo_fix_continuous_action-885000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-890000.cleanrl_model b/ppo_fix_continuous_action-890000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c54a27c874def98a62679327ece3f24586728123 Binary files /dev/null and b/ppo_fix_continuous_action-890000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-895000.cleanrl_model b/ppo_fix_continuous_action-895000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c53bac73003580fc2cc2e45c26b4324ffa412ea1 Binary files /dev/null and b/ppo_fix_continuous_action-895000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-90000.cleanrl_model b/ppo_fix_continuous_action-90000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..31c332515fde3fa6ba1d24b1501a45ec2bbf7f05 Binary files /dev/null and b/ppo_fix_continuous_action-90000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-900000.cleanrl_model b/ppo_fix_continuous_action-900000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bbe30ecf68ba86f9b0ad6a4ec03c39f32631dcd6 Binary files /dev/null and b/ppo_fix_continuous_action-900000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-905000.cleanrl_model b/ppo_fix_continuous_action-905000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8278291f3ed896dab062b8571096f465738dc418 Binary files /dev/null and b/ppo_fix_continuous_action-905000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-910000.cleanrl_model b/ppo_fix_continuous_action-910000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a8d3f952aa682a6453f93a0610bb1e4e83ff7b27 Binary files /dev/null and b/ppo_fix_continuous_action-910000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-915000.cleanrl_model b/ppo_fix_continuous_action-915000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d282b916d97a7af89b2aa7e55af3036a3d447862 Binary files /dev/null and b/ppo_fix_continuous_action-915000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-920000.cleanrl_model b/ppo_fix_continuous_action-920000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c834e14ed84387c7f56e677f91c34e7ab108674b Binary files /dev/null and b/ppo_fix_continuous_action-920000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-925000.cleanrl_model b/ppo_fix_continuous_action-925000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9317507d777e3811bd918e0be699042261cf361c Binary files /dev/null and b/ppo_fix_continuous_action-925000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-930000.cleanrl_model b/ppo_fix_continuous_action-930000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5021e9cefdb7084fc58d64520fbd3769e28b1ff2 Binary files /dev/null and b/ppo_fix_continuous_action-930000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-935000.cleanrl_model b/ppo_fix_continuous_action-935000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..01ce694385ce4359f3bbd6d093b9ded3571a44de Binary files /dev/null and b/ppo_fix_continuous_action-935000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-940000.cleanrl_model b/ppo_fix_continuous_action-940000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..94d92688416e890f2a4a6de1a298ad2572391fac Binary files /dev/null and b/ppo_fix_continuous_action-940000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-945000.cleanrl_model b/ppo_fix_continuous_action-945000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..281f4a8933d125f618e6d8df5493fc49be537a51 Binary files /dev/null and b/ppo_fix_continuous_action-945000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-95000.cleanrl_model b/ppo_fix_continuous_action-95000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f231a2cb7f1995a51e6a73986d45ef500f6462bf Binary files /dev/null and b/ppo_fix_continuous_action-95000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-950000.cleanrl_model b/ppo_fix_continuous_action-950000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bcbae46a235960618a1007df1b43927c8b9579e3 Binary files /dev/null and b/ppo_fix_continuous_action-950000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-955000.cleanrl_model b/ppo_fix_continuous_action-955000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b0907af6c33461a31b173d5d258722b500ab2798 Binary files /dev/null and b/ppo_fix_continuous_action-955000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-960000.cleanrl_model b/ppo_fix_continuous_action-960000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e3082df975bd9ca9b26c5891f78456bdef1a3113 Binary files /dev/null and b/ppo_fix_continuous_action-960000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-965000.cleanrl_model b/ppo_fix_continuous_action-965000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7ac08f34b436babcbd5295cd7c878aeb019b3fb9 Binary files /dev/null and b/ppo_fix_continuous_action-965000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-970000.cleanrl_model b/ppo_fix_continuous_action-970000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..db7041501e2a67a57e7f9b3fecca44b62389839b Binary files /dev/null and b/ppo_fix_continuous_action-970000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-975000.cleanrl_model b/ppo_fix_continuous_action-975000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c425da937f8f3f28b68548a88bd64b2b75e0f894 Binary files /dev/null and b/ppo_fix_continuous_action-975000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-980000.cleanrl_model b/ppo_fix_continuous_action-980000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..741017fc46a236ca2b7644b89970227e623f173c Binary files /dev/null and b/ppo_fix_continuous_action-980000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-985000.cleanrl_model b/ppo_fix_continuous_action-985000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b0843f5bfae87248daf760738eaea7053b635419 Binary files /dev/null and b/ppo_fix_continuous_action-985000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-990000.cleanrl_model b/ppo_fix_continuous_action-990000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b3a737a37d93ffc2e2e17cf0a0511895d4016b8c Binary files /dev/null and b/ppo_fix_continuous_action-990000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-995000.cleanrl_model b/ppo_fix_continuous_action-995000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0c4fc9c767d067ec6d5fb500f65b18dc1dd72b33 Binary files /dev/null and b/ppo_fix_continuous_action-995000.cleanrl_model differ diff --git a/ppo_fix_continuous_action.cleanrl_model b/ppo_fix_continuous_action.cleanrl_model index 76197f029da9500afbd7a2ca864aac2a13599edc..fa9eeacc7eb118162231807d1131e73c5a814c42 100644 Binary files a/ppo_fix_continuous_action.cleanrl_model and b/ppo_fix_continuous_action.cleanrl_model differ diff --git a/ppo_fix_continuous_action.py b/ppo_fix_continuous_action.py index f204e4e83e13f5da11c5e6e100efdc9b35ea4674..0d9b7c61f4d8f53a5f7c70e7065fa07fe9ad7a57 100644 --- a/ppo_fix_continuous_action.py +++ b/ppo_fix_continuous_action.py @@ -229,7 +229,7 @@ def evaluate( envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, capture_video, run_name, agent.obs_rms)]) obs, _ = envs.reset() - episodic_returns = [] + episodic_returns, episodic_lengths = [], [] while len(episodic_returns) < eval_episodes: actions, _, _, _ = agent.get_action_and_value(torch.Tensor(obs).to(device)) next_obs, _, _, _, infos = envs.step(actions.cpu().numpy()) @@ -239,9 +239,10 @@ def evaluate( continue print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") episodic_returns += [info["episode"]["r"]] + episodic_lengths += [info["episode"]["l"]] obs = next_obs - return episodic_returns + return episodic_returns, episodic_lengths def make_env(env_id, idx, capture_video, run_name, gamma): @@ -436,6 +437,28 @@ if __name__ == "__main__": terminal_value = agent.get_value(torch.Tensor(real_next_obs).to(device)).reshape(1, -1)[0][0] rewards[step][idx] += args.gamma * terminal_value + if global_step % (5000 // args.num_envs * args.num_envs) == 0: + obs_rms, return_rms = get_rms(envs.envs[0]) + agent.obs_rms = copy.deepcopy(get_rms(envs.envs[0])[0]) + model_path = f"runs/{run_name}/{args.exp_name}-{global_step}.cleanrl_model" + torch.save(agent.state_dict(), model_path) + print(f"model saved to {model_path}") + + episodic_returns, episodic_lengths = evaluate( + model_path, + make_eval_env, + args.env_id, + eval_episodes=3, + run_name=f"{run_name}-eval", + Model=Agent, + device=device, + capture_video=False, + ) + + print(episodic_returns, episodic_lengths) + writer.add_scalar("charts/eval/episodic_return", np.mean(episodic_returns), global_step) + writer.add_scalar("charts/eval/episodic_length", np.mean(episodic_lengths), global_step) + # Only print when at least 1 env is done if "final_info" not in infos: continue @@ -549,7 +572,7 @@ if __name__ == "__main__": torch.save(agent.state_dict(), model_path) print(f"model saved to {model_path}") - episodic_returns = evaluate( + episodic_returns, episodic_lengths = evaluate( model_path, make_eval_env, args.env_id, diff --git a/replay.mp4 b/replay.mp4 index 0dfb218db97cc4bf910da22e2f01ea916f73fb6e..4c335b236b4cebb44a89f9456899aa719e8e95c6 100644 Binary files a/replay.mp4 and b/replay.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-0.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-0.mp4 deleted file mode 100644 index d9279450e519223a3f8ecbf64ef4ff576638a9a3..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-0.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-1.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-1.mp4 deleted file mode 100644 index 8a4feacd19e18c72dd31faefe519a14ef95c76e4..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-1.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-8.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-8.mp4 deleted file mode 100644 index 0dfb218db97cc4bf910da22e2f01ea916f73fb6e..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705698224-eval/rl-video-episode-8.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-0.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f099fc9f0a1a71f50078cf286b38f4c02799ca1a Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-0.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-1.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8e1901bf85fdf53ab2ce1d346d5f86b0757169b1 Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-1.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-8.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-8.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4c335b236b4cebb44a89f9456899aa719e8e95c6 Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__2__1705733530-eval/rl-video-episode-8.mp4 differ