diff --git a/.gitattributes b/.gitattributes index e3350f01881eecfd48d71467ed09f174ec47d59b..041c5a432c01c610face74c7c6b3ba7d039c372b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -40,3 +40,4 @@ videos/Hopper-v4__ppo_fix_continuous_action__5__1704452497-eval/rl-video-episode videos/Hopper-v4__ppo_fix_continuous_action__5__1704452497-eval/rl-video-episode-0.mp4 filter=lfs diff=lfs merge=lfs -text replay.mp4 filter=lfs diff=lfs merge=lfs -text videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-8.mp4 filter=lfs diff=lfs merge=lfs -text +videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-1.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index a4b46dd9afdbdb38cc4b3011a052c32b534fd442..c390d805c25063236759b5be682f6b4dc44bdfec 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ model-index: type: Hopper-v4 metrics: - type: mean_reward - value: 2236.19 +/- 642.91 + value: 2504.30 +/- 688.11 name: mean_reward verified: false --- diff --git a/events.out.tfevents.1705691779.3090-172.2535141.0 b/events.out.tfevents.1705691779.3090-172.2535141.0 deleted file mode 100644 index 37cfd2198e04a62f513fe5162049b0694449cdc9..0000000000000000000000000000000000000000 --- a/events.out.tfevents.1705691779.3090-172.2535141.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2b1d7245be99b91a61381464144f47df31278c189db6ad4869e7b980f00cedac -size 599723 diff --git a/events.out.tfevents.1705726116.4090-171.2579631.0 b/events.out.tfevents.1705726116.4090-171.2579631.0 new file mode 100644 index 0000000000000000000000000000000000000000..a7aa62e94b22d07e81719d7a0ab1141d6d1464c9 --- /dev/null +++ b/events.out.tfevents.1705726116.4090-171.2579631.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8ba926ac6f1e47db851e1156729d4ac28f72ca4da1b14c6b17bbfe1b04a4e7 +size 575959 diff --git a/ppo_fix_continuous_action-10000.cleanrl_model b/ppo_fix_continuous_action-10000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..16bd2ed804eb7dfb99753f9401bd1cf47ad7b044 Binary files /dev/null and b/ppo_fix_continuous_action-10000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-100000.cleanrl_model b/ppo_fix_continuous_action-100000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..622beaf54d269c7ba814461c89ed2d143cf2490b Binary files /dev/null and b/ppo_fix_continuous_action-100000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-105000.cleanrl_model b/ppo_fix_continuous_action-105000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..304dd5141c6b28fac12c504148691bf9ff047e1f Binary files /dev/null and b/ppo_fix_continuous_action-105000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-110000.cleanrl_model b/ppo_fix_continuous_action-110000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0827365731ae2ef0ebc18ede98668f018ea98466 Binary files /dev/null and b/ppo_fix_continuous_action-110000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-115000.cleanrl_model b/ppo_fix_continuous_action-115000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fc7ee9d3bfb556b4d439f448af61c50e536ee81f Binary files /dev/null and b/ppo_fix_continuous_action-115000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-120000.cleanrl_model b/ppo_fix_continuous_action-120000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2b5dcaf93ae985b3f14f7b044ab00dd1402b8316 Binary files /dev/null and b/ppo_fix_continuous_action-120000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-125000.cleanrl_model b/ppo_fix_continuous_action-125000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..23e9e1ed77236b6eb90f9dade02f88eb508581de Binary files /dev/null and b/ppo_fix_continuous_action-125000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-130000.cleanrl_model b/ppo_fix_continuous_action-130000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d17a5d0c096c1a4a5334d8985f2b3a832884b292 Binary files /dev/null and b/ppo_fix_continuous_action-130000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-135000.cleanrl_model b/ppo_fix_continuous_action-135000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a0236c5270e41dc2dd84d4742ddb64877e52f2df Binary files /dev/null and b/ppo_fix_continuous_action-135000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-140000.cleanrl_model b/ppo_fix_continuous_action-140000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f0f60f0ff0fa051ff26413902dceff91464a3f58 Binary files /dev/null and b/ppo_fix_continuous_action-140000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-145000.cleanrl_model b/ppo_fix_continuous_action-145000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1d91e8ed8fdad593188e11b4707825ad4aab3020 Binary files /dev/null and b/ppo_fix_continuous_action-145000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-15000.cleanrl_model b/ppo_fix_continuous_action-15000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0164b5b360b7e3be8a4d0ded5fe65c4f57f688f5 Binary files /dev/null and b/ppo_fix_continuous_action-15000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-150000.cleanrl_model b/ppo_fix_continuous_action-150000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..23f18cb0d867801b844e1c5256a1d2807c2e57c3 Binary files /dev/null and b/ppo_fix_continuous_action-150000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-155000.cleanrl_model b/ppo_fix_continuous_action-155000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..79e608e5d2f578a88d1122c2125a1373f881cac7 Binary files /dev/null and b/ppo_fix_continuous_action-155000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-160000.cleanrl_model b/ppo_fix_continuous_action-160000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e025e9295cddc05c6bec434f9b60a21f58398c10 Binary files /dev/null and b/ppo_fix_continuous_action-160000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-165000.cleanrl_model b/ppo_fix_continuous_action-165000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f7e4efa2638cd5f9c1f31e25e09f988565cc3d3 Binary files /dev/null and b/ppo_fix_continuous_action-165000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-170000.cleanrl_model b/ppo_fix_continuous_action-170000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4c6714e4942d45df4a2d3c2bb962cf6f8c2acf1d Binary files /dev/null and b/ppo_fix_continuous_action-170000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-175000.cleanrl_model b/ppo_fix_continuous_action-175000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6ff2008a95c7a01176e1752a124b82a95c5637ab Binary files /dev/null and b/ppo_fix_continuous_action-175000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-180000.cleanrl_model b/ppo_fix_continuous_action-180000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4735d57fa0865c562011096f8fa6f5ebfcadc18b Binary files /dev/null and b/ppo_fix_continuous_action-180000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-185000.cleanrl_model b/ppo_fix_continuous_action-185000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..370bc5d8dbbf6d57455fae2154e2243a7794268c Binary files /dev/null and b/ppo_fix_continuous_action-185000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-190000.cleanrl_model b/ppo_fix_continuous_action-190000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c10124e91167433352ccb3a79b6ec14315d6f436 Binary files /dev/null and b/ppo_fix_continuous_action-190000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-195000.cleanrl_model b/ppo_fix_continuous_action-195000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1dcb0b6efe4fe3af34079e10bd36130cb47b5037 Binary files /dev/null and b/ppo_fix_continuous_action-195000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-20000.cleanrl_model b/ppo_fix_continuous_action-20000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bd88ce7e95d5712ef8a05afdb6c0947ac6200f12 Binary files /dev/null and b/ppo_fix_continuous_action-20000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-200000.cleanrl_model b/ppo_fix_continuous_action-200000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..77d7fe2e107a384e2964f2d96520db32912bd8e7 Binary files /dev/null and b/ppo_fix_continuous_action-200000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-205000.cleanrl_model b/ppo_fix_continuous_action-205000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bcb4de216f855e5b9b04150d9267722e0c597129 Binary files /dev/null and b/ppo_fix_continuous_action-205000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-210000.cleanrl_model b/ppo_fix_continuous_action-210000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6cd95c4c6c52c228daf8f99c1bcc4e00b9dbfe92 Binary files /dev/null and b/ppo_fix_continuous_action-210000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-215000.cleanrl_model b/ppo_fix_continuous_action-215000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5fdfff3ec61866c10f92caf6c4e05fbd7093164e Binary files /dev/null and b/ppo_fix_continuous_action-215000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-220000.cleanrl_model b/ppo_fix_continuous_action-220000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..40d678d353d326c36e5b99638272c1764f00650a Binary files /dev/null and b/ppo_fix_continuous_action-220000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-225000.cleanrl_model b/ppo_fix_continuous_action-225000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5f031660120fabe5847b9336e53ab89eed072e36 Binary files /dev/null and b/ppo_fix_continuous_action-225000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-230000.cleanrl_model b/ppo_fix_continuous_action-230000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9a0ca435d49d8d13030847b826f8192718093d09 Binary files /dev/null and b/ppo_fix_continuous_action-230000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-235000.cleanrl_model b/ppo_fix_continuous_action-235000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ec548e2297f1fb8825dfa449f2f86c698a388161 Binary files /dev/null and b/ppo_fix_continuous_action-235000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-240000.cleanrl_model b/ppo_fix_continuous_action-240000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..67e18cb64a0290ba4ae73ce752cb45aeecf38a1e Binary files /dev/null and b/ppo_fix_continuous_action-240000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-245000.cleanrl_model b/ppo_fix_continuous_action-245000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..041f4663763afb14bbd7e8c76cfeadc39ebcae99 Binary files /dev/null and b/ppo_fix_continuous_action-245000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-25000.cleanrl_model b/ppo_fix_continuous_action-25000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5629606553fd0bb9d2b1b340984efd3ea2ba7f88 Binary files /dev/null and b/ppo_fix_continuous_action-25000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-250000.cleanrl_model b/ppo_fix_continuous_action-250000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..53b53812578cd070d33ffc8aab232f099f06319e Binary files /dev/null and b/ppo_fix_continuous_action-250000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-255000.cleanrl_model b/ppo_fix_continuous_action-255000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a778e663dcc24ebcb40ab29a11bc03b36a87d12d Binary files /dev/null and b/ppo_fix_continuous_action-255000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-260000.cleanrl_model b/ppo_fix_continuous_action-260000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0062f35ad98a011373c895c4cece86268067aeef Binary files /dev/null and b/ppo_fix_continuous_action-260000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-265000.cleanrl_model b/ppo_fix_continuous_action-265000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..32bbec4291f3a842477108b38af3ba212e1a3152 Binary files /dev/null and b/ppo_fix_continuous_action-265000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-270000.cleanrl_model b/ppo_fix_continuous_action-270000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4fa1d025ef31d2271a5edd04a09709e316c44e18 Binary files /dev/null and b/ppo_fix_continuous_action-270000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-275000.cleanrl_model b/ppo_fix_continuous_action-275000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a8187584fac91ee0cdb25d6cb6d4f2430d074caa Binary files /dev/null and b/ppo_fix_continuous_action-275000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-280000.cleanrl_model b/ppo_fix_continuous_action-280000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d822013d47d78089e7883ee3f26f999e3b79736a Binary files /dev/null and b/ppo_fix_continuous_action-280000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-285000.cleanrl_model b/ppo_fix_continuous_action-285000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..76d9791792785c1fc5dfb5fa0359bb54126fa2f5 Binary files /dev/null and b/ppo_fix_continuous_action-285000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-290000.cleanrl_model b/ppo_fix_continuous_action-290000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4b51b00e660fa22eedf9666daed8b214277d3199 Binary files /dev/null and b/ppo_fix_continuous_action-290000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-295000.cleanrl_model b/ppo_fix_continuous_action-295000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3bef0766e217cc1f1958b6957066578cf291aaeb Binary files /dev/null and b/ppo_fix_continuous_action-295000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-30000.cleanrl_model b/ppo_fix_continuous_action-30000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..05eaa203a38bcb9482bd2663442ca5b01bba9eb9 Binary files /dev/null and b/ppo_fix_continuous_action-30000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-300000.cleanrl_model b/ppo_fix_continuous_action-300000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..67329d35aff4b5ca4721f2d3c50596cc6affe7dd Binary files /dev/null and b/ppo_fix_continuous_action-300000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-305000.cleanrl_model b/ppo_fix_continuous_action-305000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a84d6d1b3cf44639bb34e9951f9aaecb98696855 Binary files /dev/null and b/ppo_fix_continuous_action-305000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-310000.cleanrl_model b/ppo_fix_continuous_action-310000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4bfbc20d894e3cd66039f08f204a54dbf46a97e2 Binary files /dev/null and b/ppo_fix_continuous_action-310000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-315000.cleanrl_model b/ppo_fix_continuous_action-315000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2e09c6d4792da82ef4c1dd5cc6f6000544785e5b Binary files /dev/null and b/ppo_fix_continuous_action-315000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-320000.cleanrl_model b/ppo_fix_continuous_action-320000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..777fee30dc8e06ae1e6a1343d097d9db9fe2c04b Binary files /dev/null and b/ppo_fix_continuous_action-320000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-325000.cleanrl_model b/ppo_fix_continuous_action-325000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1940c5897a67521da243244c07c2345f3402a6b0 Binary files /dev/null and b/ppo_fix_continuous_action-325000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-330000.cleanrl_model b/ppo_fix_continuous_action-330000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f4b3d584a3d016324bdf781328fd67d630cdba54 Binary files /dev/null and b/ppo_fix_continuous_action-330000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-335000.cleanrl_model b/ppo_fix_continuous_action-335000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0c7fbef0f1ad56ee264db5e8c796879eea015fc1 Binary files /dev/null and b/ppo_fix_continuous_action-335000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-340000.cleanrl_model b/ppo_fix_continuous_action-340000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ddcdeef638c9a9e2f6bbc05426dc9a161d8e8262 Binary files /dev/null and b/ppo_fix_continuous_action-340000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-345000.cleanrl_model b/ppo_fix_continuous_action-345000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..10f4b92ea748979972c8069f66989a36735235fe Binary files /dev/null and b/ppo_fix_continuous_action-345000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-35000.cleanrl_model b/ppo_fix_continuous_action-35000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..60a79d64abb368127fd74ea497910dd78c542d15 Binary files /dev/null and b/ppo_fix_continuous_action-35000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-350000.cleanrl_model b/ppo_fix_continuous_action-350000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7baccaa5f7357173abbd01ec47212dcfd03a4cc4 Binary files /dev/null and b/ppo_fix_continuous_action-350000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-355000.cleanrl_model b/ppo_fix_continuous_action-355000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..72f1d01b09f471b70d10024fe3836fac8b150792 Binary files /dev/null and b/ppo_fix_continuous_action-355000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-360000.cleanrl_model b/ppo_fix_continuous_action-360000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..582ecfc14a3eff22f0d01d11fab9548009575938 Binary files /dev/null and b/ppo_fix_continuous_action-360000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-365000.cleanrl_model b/ppo_fix_continuous_action-365000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..64aead98e58ff2b25e45118064a7ac8a66bcbd78 Binary files /dev/null and b/ppo_fix_continuous_action-365000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-370000.cleanrl_model b/ppo_fix_continuous_action-370000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..24744aa8a6809ed57d727eea654c65d019516b4e Binary files /dev/null and b/ppo_fix_continuous_action-370000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-375000.cleanrl_model b/ppo_fix_continuous_action-375000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..66e46a6c6942b96f65aa5dedf3d0f43571a1dbce Binary files /dev/null and b/ppo_fix_continuous_action-375000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-380000.cleanrl_model b/ppo_fix_continuous_action-380000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..24e62f0dde0c72649992f007810651c496976efd Binary files /dev/null and b/ppo_fix_continuous_action-380000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-385000.cleanrl_model b/ppo_fix_continuous_action-385000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..359119603267526b47c8aa21c8acba2e35df3686 Binary files /dev/null and b/ppo_fix_continuous_action-385000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-390000.cleanrl_model b/ppo_fix_continuous_action-390000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bd2cfc133b11b4adbdf3033692932dc2334907fc Binary files /dev/null and b/ppo_fix_continuous_action-390000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-395000.cleanrl_model b/ppo_fix_continuous_action-395000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..52e5336064da8c02c59f89a54abb7c1f5f74e64c Binary files /dev/null and b/ppo_fix_continuous_action-395000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-40000.cleanrl_model b/ppo_fix_continuous_action-40000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..caa382ecf38f9a137e46369894539c5316c601e9 Binary files /dev/null and b/ppo_fix_continuous_action-40000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-400000.cleanrl_model b/ppo_fix_continuous_action-400000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..00b51533e764df76e0f18e676b5cce9423931eca Binary files /dev/null and b/ppo_fix_continuous_action-400000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-405000.cleanrl_model b/ppo_fix_continuous_action-405000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29348db8999474458895eec0815042705ae856a4 Binary files /dev/null and b/ppo_fix_continuous_action-405000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-410000.cleanrl_model b/ppo_fix_continuous_action-410000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7863971e02bcd12ccad1ff1b3e2b4cabfe99f5cd Binary files /dev/null and b/ppo_fix_continuous_action-410000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-415000.cleanrl_model b/ppo_fix_continuous_action-415000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..84fe524515f92de3e0a3e060b01454ecce557683 Binary files /dev/null and b/ppo_fix_continuous_action-415000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-420000.cleanrl_model b/ppo_fix_continuous_action-420000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..207dae652c74bf6a6fce52d684fb3fca3135c9f2 Binary files /dev/null and b/ppo_fix_continuous_action-420000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-425000.cleanrl_model b/ppo_fix_continuous_action-425000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5b5e9a80d0ba8a3ab556da41046fa84ffeca2d85 Binary files /dev/null and b/ppo_fix_continuous_action-425000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-430000.cleanrl_model b/ppo_fix_continuous_action-430000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..672fa54c13264a9f05bd3644983b6743da72259e Binary files /dev/null and b/ppo_fix_continuous_action-430000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-435000.cleanrl_model b/ppo_fix_continuous_action-435000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9adf853893751f0d52730d45e5570c34129f0a2d Binary files /dev/null and b/ppo_fix_continuous_action-435000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-440000.cleanrl_model b/ppo_fix_continuous_action-440000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..27f169d8e0682ec6a7a9db0bb8a1402a0f53dbc5 Binary files /dev/null and b/ppo_fix_continuous_action-440000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-445000.cleanrl_model b/ppo_fix_continuous_action-445000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8ba5092c0d569a386fe04590da486e6405a1348c Binary files /dev/null and b/ppo_fix_continuous_action-445000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-45000.cleanrl_model b/ppo_fix_continuous_action-45000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9b80be2277fa046bf5dccf85e24ff6d8b274b164 Binary files /dev/null and b/ppo_fix_continuous_action-45000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-450000.cleanrl_model b/ppo_fix_continuous_action-450000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c1e3e1b093b76e74ce30c07c3c2f86b845dd8d5b Binary files /dev/null and b/ppo_fix_continuous_action-450000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-455000.cleanrl_model b/ppo_fix_continuous_action-455000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ee22db53a15a0dc988c267afef60004f6182ea09 Binary files /dev/null and b/ppo_fix_continuous_action-455000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-460000.cleanrl_model b/ppo_fix_continuous_action-460000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a43e98fb0d589e7ab46dd86f30eed494ad31026c Binary files /dev/null and b/ppo_fix_continuous_action-460000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-465000.cleanrl_model b/ppo_fix_continuous_action-465000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4ffe2def6f6eed3f3fe57fab29c2e3d6d91335e6 Binary files /dev/null and b/ppo_fix_continuous_action-465000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-470000.cleanrl_model b/ppo_fix_continuous_action-470000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..87fb80118a54cb4a89460522f8c8c68034648e3a Binary files /dev/null and b/ppo_fix_continuous_action-470000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-475000.cleanrl_model b/ppo_fix_continuous_action-475000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..282f143ce56aa995e3cd626d4530b8ab4a6de8fd Binary files /dev/null and b/ppo_fix_continuous_action-475000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-480000.cleanrl_model b/ppo_fix_continuous_action-480000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..be315cbefe9ca6535b5f1501086c3daadecc9118 Binary files /dev/null and b/ppo_fix_continuous_action-480000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-485000.cleanrl_model b/ppo_fix_continuous_action-485000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0c94ea7c3bee0c9c562b159ae2eb7c59aaa12331 Binary files /dev/null and b/ppo_fix_continuous_action-485000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-490000.cleanrl_model b/ppo_fix_continuous_action-490000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c8087cd72deea35c9f8df06e3c3797e11a5060fd Binary files /dev/null and b/ppo_fix_continuous_action-490000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-495000.cleanrl_model b/ppo_fix_continuous_action-495000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d93d9fce90bf0f12fb3a293042546b26084e4b98 Binary files /dev/null and b/ppo_fix_continuous_action-495000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-5000.cleanrl_model b/ppo_fix_continuous_action-5000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..39f92ed87400bc8cead2325e434c0976a0b6ce42 Binary files /dev/null and b/ppo_fix_continuous_action-5000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-50000.cleanrl_model b/ppo_fix_continuous_action-50000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a714b4855829549a1829015dbf8d1adc0c8ac010 Binary files /dev/null and b/ppo_fix_continuous_action-50000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-500000.cleanrl_model b/ppo_fix_continuous_action-500000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d83d27f60a7b95be56bf16c5b7f49fff755ad6f8 Binary files /dev/null and b/ppo_fix_continuous_action-500000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-505000.cleanrl_model b/ppo_fix_continuous_action-505000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5f9ce70744c1cb30a31aa031f5269ff02375d70 Binary files /dev/null and b/ppo_fix_continuous_action-505000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-510000.cleanrl_model b/ppo_fix_continuous_action-510000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..71d29078cc5e9fabc50e24ecc44cb002e7f3a97d Binary files /dev/null and b/ppo_fix_continuous_action-510000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-515000.cleanrl_model b/ppo_fix_continuous_action-515000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7d1daddf2edf327fd3f106f202c9d9c02f1bf916 Binary files /dev/null and b/ppo_fix_continuous_action-515000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-520000.cleanrl_model b/ppo_fix_continuous_action-520000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..810774d4e86eb7bd5af1565910ee215ed2a69383 Binary files /dev/null and b/ppo_fix_continuous_action-520000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-525000.cleanrl_model b/ppo_fix_continuous_action-525000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9af4f02164ec97b3be1474029751bfe8eaba3d80 Binary files /dev/null and b/ppo_fix_continuous_action-525000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-530000.cleanrl_model b/ppo_fix_continuous_action-530000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7b1c21334355d3f0f08703e8b7883a4b16096f02 Binary files /dev/null and b/ppo_fix_continuous_action-530000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-535000.cleanrl_model b/ppo_fix_continuous_action-535000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..005beb788212ecb8551c5fe5be6074ccbd9103ee Binary files /dev/null and b/ppo_fix_continuous_action-535000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-540000.cleanrl_model b/ppo_fix_continuous_action-540000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..099b209698a74fb82988282cb72f7b8e6371ad06 Binary files /dev/null and b/ppo_fix_continuous_action-540000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-545000.cleanrl_model b/ppo_fix_continuous_action-545000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a9b40268e2da6a750d0318a82431d1068c65f44c Binary files /dev/null and b/ppo_fix_continuous_action-545000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-55000.cleanrl_model b/ppo_fix_continuous_action-55000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..732ea6f41340eb6ac6cb0f308b9ac130f1938086 Binary files /dev/null and b/ppo_fix_continuous_action-55000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-550000.cleanrl_model b/ppo_fix_continuous_action-550000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9ae2b6fe36c62ae7e042f57502098ff51c781734 Binary files /dev/null and b/ppo_fix_continuous_action-550000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-555000.cleanrl_model b/ppo_fix_continuous_action-555000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b4ff5adfcbf2ab2da9e3091c90fbbdfd8b691a16 Binary files /dev/null and b/ppo_fix_continuous_action-555000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-560000.cleanrl_model b/ppo_fix_continuous_action-560000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3bdc8725ec43924ced25f951a4afb902d662c330 Binary files /dev/null and b/ppo_fix_continuous_action-560000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-565000.cleanrl_model b/ppo_fix_continuous_action-565000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f7e4918e2e4e385538d6c461f95eb6761be17274 Binary files /dev/null and b/ppo_fix_continuous_action-565000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-570000.cleanrl_model b/ppo_fix_continuous_action-570000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0c3f29f2ae85029fc3354f39548b3b05d1bfb3e3 Binary files /dev/null and b/ppo_fix_continuous_action-570000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-575000.cleanrl_model b/ppo_fix_continuous_action-575000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d0f1f3a67736e466545fe8301bac640d01851a45 Binary files /dev/null and b/ppo_fix_continuous_action-575000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-580000.cleanrl_model b/ppo_fix_continuous_action-580000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..378c71426c5f22137b396b44f25ecda90f1ae839 Binary files /dev/null and b/ppo_fix_continuous_action-580000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-585000.cleanrl_model b/ppo_fix_continuous_action-585000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e8c3d9d4bee3baad51a0779c2004545047ade15e Binary files /dev/null and b/ppo_fix_continuous_action-585000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-590000.cleanrl_model b/ppo_fix_continuous_action-590000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9a31f4e255a3d6b2948563235fd9a9b8585973eb Binary files /dev/null and b/ppo_fix_continuous_action-590000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-595000.cleanrl_model b/ppo_fix_continuous_action-595000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..523dc93fef381e40adbb720322ac4525b67e111f Binary files /dev/null and b/ppo_fix_continuous_action-595000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-60000.cleanrl_model b/ppo_fix_continuous_action-60000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..30f88387df0ee5a764babd1113c97a9a294456fc Binary files /dev/null and b/ppo_fix_continuous_action-60000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-600000.cleanrl_model b/ppo_fix_continuous_action-600000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f2e0aa254c7dcf888d88b2adbf6643ae1f1c8825 Binary files /dev/null and b/ppo_fix_continuous_action-600000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-605000.cleanrl_model b/ppo_fix_continuous_action-605000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b40007906ebe5046a07d7e5000009a0796f40bdd Binary files /dev/null and b/ppo_fix_continuous_action-605000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-610000.cleanrl_model b/ppo_fix_continuous_action-610000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..eee04faaf321d4602bb54a5c1f0c50f4e200f23e Binary files /dev/null and b/ppo_fix_continuous_action-610000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-615000.cleanrl_model b/ppo_fix_continuous_action-615000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b46f675aa1b11aab5e86feda3c6093217a7dc7cc Binary files /dev/null and b/ppo_fix_continuous_action-615000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-620000.cleanrl_model b/ppo_fix_continuous_action-620000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3ce452fd621c05b783784148a02f3eda7d501708 Binary files /dev/null and b/ppo_fix_continuous_action-620000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-625000.cleanrl_model b/ppo_fix_continuous_action-625000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3d087730c8fa8a300403d88c19dd591df2413c01 Binary files /dev/null and b/ppo_fix_continuous_action-625000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-630000.cleanrl_model b/ppo_fix_continuous_action-630000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1b42df3333b823aed2cc383ef1691620773f3daa Binary files /dev/null and b/ppo_fix_continuous_action-630000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-635000.cleanrl_model b/ppo_fix_continuous_action-635000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d8c0464b8dcbb5f24f7e9abc28248e43c912dd24 Binary files /dev/null and b/ppo_fix_continuous_action-635000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-640000.cleanrl_model b/ppo_fix_continuous_action-640000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3f35b4d6f14d96ec4111683bd7b5f1dba76c7ea0 Binary files /dev/null and b/ppo_fix_continuous_action-640000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-645000.cleanrl_model b/ppo_fix_continuous_action-645000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6ee56e68d0e9ad76868725bb69cb06991a93314e Binary files /dev/null and b/ppo_fix_continuous_action-645000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-65000.cleanrl_model b/ppo_fix_continuous_action-65000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1350e7e3abd22791002ce8856b769de885c2604c Binary files /dev/null and b/ppo_fix_continuous_action-65000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-650000.cleanrl_model b/ppo_fix_continuous_action-650000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..04df0ce474b73067b20e1e63bd6273f1045a06b1 Binary files /dev/null and b/ppo_fix_continuous_action-650000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-655000.cleanrl_model b/ppo_fix_continuous_action-655000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..31a1fcf207df90f0c3f250e18e41826ddf614d78 Binary files /dev/null and b/ppo_fix_continuous_action-655000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-660000.cleanrl_model b/ppo_fix_continuous_action-660000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e6c5ddbad80f22132c316aa8b5796cdc22b02018 Binary files /dev/null and b/ppo_fix_continuous_action-660000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-665000.cleanrl_model b/ppo_fix_continuous_action-665000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..72e7080e60f1c69d160eb668b03a0d0f1122064a Binary files /dev/null and b/ppo_fix_continuous_action-665000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-670000.cleanrl_model b/ppo_fix_continuous_action-670000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..75a762db028951ebaa70aa6f0bfc94e182fcfd2d Binary files /dev/null and b/ppo_fix_continuous_action-670000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-675000.cleanrl_model b/ppo_fix_continuous_action-675000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f327d10d295bb54a86a05dc4df34d62b03bf549a Binary files /dev/null and b/ppo_fix_continuous_action-675000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-680000.cleanrl_model b/ppo_fix_continuous_action-680000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..661e88c38b272e453ef090ceb7ab770b5e918399 Binary files /dev/null and b/ppo_fix_continuous_action-680000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-685000.cleanrl_model b/ppo_fix_continuous_action-685000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..67d3cc57dba90f6b993014259fad7fdb1c8c38e1 Binary files /dev/null and b/ppo_fix_continuous_action-685000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-690000.cleanrl_model b/ppo_fix_continuous_action-690000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..690bc4808a01620381241d321ce480a44dce12c9 Binary files /dev/null and b/ppo_fix_continuous_action-690000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-695000.cleanrl_model b/ppo_fix_continuous_action-695000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..11e5aa3e8b219532591f960399911d224c13f362 Binary files /dev/null and b/ppo_fix_continuous_action-695000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-70000.cleanrl_model b/ppo_fix_continuous_action-70000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b29858bbefdf9dcbba098779d8dcd518767693f2 Binary files /dev/null and b/ppo_fix_continuous_action-70000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-700000.cleanrl_model b/ppo_fix_continuous_action-700000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc18edf14c240ac4a3f444959102696fc14543c0 Binary files /dev/null and b/ppo_fix_continuous_action-700000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-705000.cleanrl_model b/ppo_fix_continuous_action-705000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f75a34c4f4aad1d593103d186f1c0447eeb1d069 Binary files /dev/null and b/ppo_fix_continuous_action-705000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-710000.cleanrl_model b/ppo_fix_continuous_action-710000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..30c5b956a0d6db65fc22bb9577672c645cf82388 Binary files /dev/null and b/ppo_fix_continuous_action-710000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-715000.cleanrl_model b/ppo_fix_continuous_action-715000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..32c51698273a9c419ad5b38fa143a940095c643d Binary files /dev/null and b/ppo_fix_continuous_action-715000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-720000.cleanrl_model b/ppo_fix_continuous_action-720000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b8b2cfd99f8862e64b8d9ef851ece6fd415a9d85 Binary files /dev/null and b/ppo_fix_continuous_action-720000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-725000.cleanrl_model b/ppo_fix_continuous_action-725000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e9dca48550b689cf00f266ee655c0e750e9731bc Binary files /dev/null and b/ppo_fix_continuous_action-725000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-730000.cleanrl_model b/ppo_fix_continuous_action-730000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d8d60bc31aee6187371e945fe013db5b41d9abbc Binary files /dev/null and b/ppo_fix_continuous_action-730000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-735000.cleanrl_model b/ppo_fix_continuous_action-735000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cf08d9ea826884a31a709b9d573e63d6125eef61 Binary files /dev/null and b/ppo_fix_continuous_action-735000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-740000.cleanrl_model b/ppo_fix_continuous_action-740000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4f432077e38a28ece4ed694f11fd1b6695b99375 Binary files /dev/null and b/ppo_fix_continuous_action-740000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-745000.cleanrl_model b/ppo_fix_continuous_action-745000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7d232759bf42483f1465dfc03da5e1685f60e0b0 Binary files /dev/null and b/ppo_fix_continuous_action-745000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-75000.cleanrl_model b/ppo_fix_continuous_action-75000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bbe68a47d464b58daa128671133f23e01faedcd7 Binary files /dev/null and b/ppo_fix_continuous_action-75000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-750000.cleanrl_model b/ppo_fix_continuous_action-750000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0cf86c1cb2c23cbd586050ad75dcdcf6ab0cf69b Binary files /dev/null and b/ppo_fix_continuous_action-750000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-755000.cleanrl_model b/ppo_fix_continuous_action-755000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d77c049f4962a8b4ea4cee9e3487997ce9996a5b Binary files /dev/null and b/ppo_fix_continuous_action-755000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-760000.cleanrl_model b/ppo_fix_continuous_action-760000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b11dcb0f540f36016b4b672b1d9ddae999269267 Binary files /dev/null and b/ppo_fix_continuous_action-760000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-765000.cleanrl_model b/ppo_fix_continuous_action-765000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..05331aaa8f323af803f7adfbe3ccc4ad1e3a5b06 Binary files /dev/null and b/ppo_fix_continuous_action-765000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-770000.cleanrl_model b/ppo_fix_continuous_action-770000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d18402ec83d2d3f3a8d538058e689016bb591c67 Binary files /dev/null and b/ppo_fix_continuous_action-770000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-775000.cleanrl_model b/ppo_fix_continuous_action-775000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b9f7d8748f4c7e3f6038c1df6ce871d7a4358489 Binary files /dev/null and b/ppo_fix_continuous_action-775000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-780000.cleanrl_model b/ppo_fix_continuous_action-780000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c7f7309b6fb6732523b6c13dfdfaa6d04f35310e Binary files /dev/null and b/ppo_fix_continuous_action-780000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-785000.cleanrl_model b/ppo_fix_continuous_action-785000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3aa8042895d5b5de8ee44c708965434ff41dd9bc Binary files /dev/null and b/ppo_fix_continuous_action-785000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-790000.cleanrl_model b/ppo_fix_continuous_action-790000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1739c551e80179c9ef82e8bf5cdfc94bf5b56db0 Binary files /dev/null and b/ppo_fix_continuous_action-790000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-795000.cleanrl_model b/ppo_fix_continuous_action-795000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..01f2c5cd2ce5d713b378618865356e4e3279d251 Binary files /dev/null and b/ppo_fix_continuous_action-795000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-80000.cleanrl_model b/ppo_fix_continuous_action-80000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e6f501b07ba884a58dbcfbe3c8bc2c9dfa6a723f Binary files /dev/null and b/ppo_fix_continuous_action-80000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-800000.cleanrl_model b/ppo_fix_continuous_action-800000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dc25b17eca1cf62015b8d9fcdceda77101684c37 Binary files /dev/null and b/ppo_fix_continuous_action-800000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-805000.cleanrl_model b/ppo_fix_continuous_action-805000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6276bf99a0346cb9b430bb309283930e4fe01a24 Binary files /dev/null and b/ppo_fix_continuous_action-805000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-810000.cleanrl_model b/ppo_fix_continuous_action-810000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a1a6e5f464fdd26027ed8f5b3319717aaf7a04a9 Binary files /dev/null and b/ppo_fix_continuous_action-810000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-815000.cleanrl_model b/ppo_fix_continuous_action-815000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..15888e6d5932c4e4b9893b247e814322e3363f18 Binary files /dev/null and b/ppo_fix_continuous_action-815000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-820000.cleanrl_model b/ppo_fix_continuous_action-820000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5745bec26488dbb2b870322849b350187f31748f Binary files /dev/null and b/ppo_fix_continuous_action-820000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-825000.cleanrl_model b/ppo_fix_continuous_action-825000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fbbde6eaeba03fefda169200c1208269ff34d805 Binary files /dev/null and b/ppo_fix_continuous_action-825000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-830000.cleanrl_model b/ppo_fix_continuous_action-830000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6bb8fd551179586d7e07578775a3e01856cf26cf Binary files /dev/null and b/ppo_fix_continuous_action-830000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-835000.cleanrl_model b/ppo_fix_continuous_action-835000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4d0016834e0fd02c0d85e14a21472110d5a26d80 Binary files /dev/null and b/ppo_fix_continuous_action-835000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-840000.cleanrl_model b/ppo_fix_continuous_action-840000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2854b92c6d503cb8214ffa1fa943618c7191e39d Binary files /dev/null and b/ppo_fix_continuous_action-840000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-845000.cleanrl_model b/ppo_fix_continuous_action-845000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cd032593e857f1e9e8ab298dcf4baea7ba734fbc Binary files /dev/null and b/ppo_fix_continuous_action-845000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-85000.cleanrl_model b/ppo_fix_continuous_action-85000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8117471ddd36708d088f467b678efa39ece677dd Binary files /dev/null and b/ppo_fix_continuous_action-85000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-850000.cleanrl_model b/ppo_fix_continuous_action-850000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ccecf4e3524671bd09de82e37df2d57af94ed6e1 Binary files /dev/null and b/ppo_fix_continuous_action-850000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-855000.cleanrl_model b/ppo_fix_continuous_action-855000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c1df55648c87944dfb410c9f9ae211fa21090c9a Binary files /dev/null and b/ppo_fix_continuous_action-855000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-860000.cleanrl_model b/ppo_fix_continuous_action-860000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f5e664f64f2c19aed5057ff840782ab9d8092bfa Binary files /dev/null and b/ppo_fix_continuous_action-860000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-865000.cleanrl_model b/ppo_fix_continuous_action-865000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e238f387d7252dff97754be2fcf16826afa3535e Binary files /dev/null and b/ppo_fix_continuous_action-865000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-870000.cleanrl_model b/ppo_fix_continuous_action-870000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..036e5b8833e86b52f49b7fe11bd687e7bea02213 Binary files /dev/null and b/ppo_fix_continuous_action-870000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-875000.cleanrl_model b/ppo_fix_continuous_action-875000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1a3723063b49e6171c994d15b62352aa14fddc59 Binary files /dev/null and b/ppo_fix_continuous_action-875000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-880000.cleanrl_model b/ppo_fix_continuous_action-880000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..052d792f04d441441f9f293b3c4c45591fac8de0 Binary files /dev/null and b/ppo_fix_continuous_action-880000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-885000.cleanrl_model b/ppo_fix_continuous_action-885000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7830178811795fea55ed89e31c327871befd2139 Binary files /dev/null and b/ppo_fix_continuous_action-885000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-890000.cleanrl_model b/ppo_fix_continuous_action-890000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..243621f1a43c15dadce7f53f3a0a28bf82c58fb9 Binary files /dev/null and b/ppo_fix_continuous_action-890000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-895000.cleanrl_model b/ppo_fix_continuous_action-895000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b115c3e4a742da13e135a6fc69f4be8c8bfd34e4 Binary files /dev/null and b/ppo_fix_continuous_action-895000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-90000.cleanrl_model b/ppo_fix_continuous_action-90000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..75adb2a7f540c32e12eb643f1bf430e3af40681e Binary files /dev/null and b/ppo_fix_continuous_action-90000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-900000.cleanrl_model b/ppo_fix_continuous_action-900000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4fffa955a244734ac99c9d28d0257502414baf82 Binary files /dev/null and b/ppo_fix_continuous_action-900000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-905000.cleanrl_model b/ppo_fix_continuous_action-905000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ba611a9a13a3613e03056293be787c88137fb525 Binary files /dev/null and b/ppo_fix_continuous_action-905000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-910000.cleanrl_model b/ppo_fix_continuous_action-910000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b828448deed372830e2b8178a7e642558453b126 Binary files /dev/null and b/ppo_fix_continuous_action-910000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-915000.cleanrl_model b/ppo_fix_continuous_action-915000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c219915f8f95e9f2481bc8c5b05e7255f2ecb518 Binary files /dev/null and b/ppo_fix_continuous_action-915000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-920000.cleanrl_model b/ppo_fix_continuous_action-920000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5ad4d7011e662aca666f785f3648af27a3aad2c9 Binary files /dev/null and b/ppo_fix_continuous_action-920000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-925000.cleanrl_model b/ppo_fix_continuous_action-925000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..77b1269dd5962431c57e41a99e41283b4d77af06 Binary files /dev/null and b/ppo_fix_continuous_action-925000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-930000.cleanrl_model b/ppo_fix_continuous_action-930000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b3f2026aab63884dccf9fcba8310c51885ff1533 Binary files /dev/null and b/ppo_fix_continuous_action-930000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-935000.cleanrl_model b/ppo_fix_continuous_action-935000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e34a23e6d044336d4326eb2ebee469e0159c280b Binary files /dev/null and b/ppo_fix_continuous_action-935000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-940000.cleanrl_model b/ppo_fix_continuous_action-940000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..436b0533a8b1d9a368e898cf26e155d2670d151d Binary files /dev/null and b/ppo_fix_continuous_action-940000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-945000.cleanrl_model b/ppo_fix_continuous_action-945000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..11d36712a54bb6ac6039a3641cf97e416043dc31 Binary files /dev/null and b/ppo_fix_continuous_action-945000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-95000.cleanrl_model b/ppo_fix_continuous_action-95000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d94ce46f33189aef01e189e7ae273740b46b5cbe Binary files /dev/null and b/ppo_fix_continuous_action-95000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-950000.cleanrl_model b/ppo_fix_continuous_action-950000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..554623cc8296d3022af6c07bfde44b2191cbf453 Binary files /dev/null and b/ppo_fix_continuous_action-950000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-955000.cleanrl_model b/ppo_fix_continuous_action-955000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0c72aa842e13975d41993358e1b6b6883474347 Binary files /dev/null and b/ppo_fix_continuous_action-955000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-960000.cleanrl_model b/ppo_fix_continuous_action-960000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..13d4a2e934a6075cf74cbd5bbfe5d22e1181d998 Binary files /dev/null and b/ppo_fix_continuous_action-960000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-965000.cleanrl_model b/ppo_fix_continuous_action-965000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3767e9d33ca079868c3f6399fe81c4dd51d8337b Binary files /dev/null and b/ppo_fix_continuous_action-965000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-970000.cleanrl_model b/ppo_fix_continuous_action-970000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ce55af45d2b81bd482908ca996f936e1944ea4d6 Binary files /dev/null and b/ppo_fix_continuous_action-970000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-975000.cleanrl_model b/ppo_fix_continuous_action-975000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..81801d3b91b5de91b5449429b89cd9cb2c8979e0 Binary files /dev/null and b/ppo_fix_continuous_action-975000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-980000.cleanrl_model b/ppo_fix_continuous_action-980000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..57f2cc3c9522796b6c2c40c2435daecf85ed2de0 Binary files /dev/null and b/ppo_fix_continuous_action-980000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-985000.cleanrl_model b/ppo_fix_continuous_action-985000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..011b887eec7e44e84516704eb8c4f3f198317523 Binary files /dev/null and b/ppo_fix_continuous_action-985000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-990000.cleanrl_model b/ppo_fix_continuous_action-990000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ba9ffa18670edf53cdf4e5c4cedcd06f450ab787 Binary files /dev/null and b/ppo_fix_continuous_action-990000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-995000.cleanrl_model b/ppo_fix_continuous_action-995000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..da53fca207193321e97e70d54b0d2fdcdca36770 Binary files /dev/null and b/ppo_fix_continuous_action-995000.cleanrl_model differ diff --git a/ppo_fix_continuous_action.cleanrl_model b/ppo_fix_continuous_action.cleanrl_model index 5d977ebd7b6d4aff12e4ec600e38ec1c84907d07..267894f9e9ae6e45c955df019fa7ce6c224d9d60 100644 Binary files a/ppo_fix_continuous_action.cleanrl_model and b/ppo_fix_continuous_action.cleanrl_model differ diff --git a/ppo_fix_continuous_action.py b/ppo_fix_continuous_action.py index f204e4e83e13f5da11c5e6e100efdc9b35ea4674..0d9b7c61f4d8f53a5f7c70e7065fa07fe9ad7a57 100644 --- a/ppo_fix_continuous_action.py +++ b/ppo_fix_continuous_action.py @@ -229,7 +229,7 @@ def evaluate( envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, capture_video, run_name, agent.obs_rms)]) obs, _ = envs.reset() - episodic_returns = [] + episodic_returns, episodic_lengths = [], [] while len(episodic_returns) < eval_episodes: actions, _, _, _ = agent.get_action_and_value(torch.Tensor(obs).to(device)) next_obs, _, _, _, infos = envs.step(actions.cpu().numpy()) @@ -239,9 +239,10 @@ def evaluate( continue print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") episodic_returns += [info["episode"]["r"]] + episodic_lengths += [info["episode"]["l"]] obs = next_obs - return episodic_returns + return episodic_returns, episodic_lengths def make_env(env_id, idx, capture_video, run_name, gamma): @@ -436,6 +437,28 @@ if __name__ == "__main__": terminal_value = agent.get_value(torch.Tensor(real_next_obs).to(device)).reshape(1, -1)[0][0] rewards[step][idx] += args.gamma * terminal_value + if global_step % (5000 // args.num_envs * args.num_envs) == 0: + obs_rms, return_rms = get_rms(envs.envs[0]) + agent.obs_rms = copy.deepcopy(get_rms(envs.envs[0])[0]) + model_path = f"runs/{run_name}/{args.exp_name}-{global_step}.cleanrl_model" + torch.save(agent.state_dict(), model_path) + print(f"model saved to {model_path}") + + episodic_returns, episodic_lengths = evaluate( + model_path, + make_eval_env, + args.env_id, + eval_episodes=3, + run_name=f"{run_name}-eval", + Model=Agent, + device=device, + capture_video=False, + ) + + print(episodic_returns, episodic_lengths) + writer.add_scalar("charts/eval/episodic_return", np.mean(episodic_returns), global_step) + writer.add_scalar("charts/eval/episodic_length", np.mean(episodic_lengths), global_step) + # Only print when at least 1 env is done if "final_info" not in infos: continue @@ -549,7 +572,7 @@ if __name__ == "__main__": torch.save(agent.state_dict(), model_path) print(f"model saved to {model_path}") - episodic_returns = evaluate( + episodic_returns, episodic_lengths = evaluate( model_path, make_eval_env, args.env_id, diff --git a/replay.mp4 b/replay.mp4 index e7a2d90d581d8ee10cf23f58d766d43fcb5d222b..ae2368e43ec6cfe22f919cc9233f21cd4b42ad71 100644 --- a/replay.mp4 +++ b/replay.mp4 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b8744cadc7072c2574043569012aa9a66807a709d0e92616e203097a76696cc -size 1143651 +oid sha256:1ee0b9b99dbeebad2ff245d3effbd22c72821ccd87cc74d243b3045f7d11aff4 +size 571074 diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-0.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-0.mp4 deleted file mode 100644 index 70d3dd5ab626d678ea761f7b027c64151472976b..0000000000000000000000000000000000000000 Binary files a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-0.mp4 and /dev/null differ diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-1.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-1.mp4 deleted file mode 100644 index b11307e5e15f26036f1bfc60408c977d56f03a1f..0000000000000000000000000000000000000000 Binary files a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-1.mp4 and /dev/null differ diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-8.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-8.mp4 deleted file mode 100644 index e7a2d90d581d8ee10cf23f58d766d43fcb5d222b..0000000000000000000000000000000000000000 --- a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705691768-eval/rl-video-episode-8.mp4 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b8744cadc7072c2574043569012aa9a66807a709d0e92616e203097a76696cc -size 1143651 diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-0.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b6ead6693fbbf8a560bd07a476f141e2f67bd4c8 Binary files /dev/null and b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-0.mp4 differ diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-1.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..49377c7548a973d98083f56d3a8b1ef5bfa432e2 --- /dev/null +++ b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-1.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096b182c251ac929334bcd16888d4dae59d6a0b8e9f53117f5a64f75e54fbe97 +size 1100734 diff --git a/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-8.mp4 b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-8.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1d0a457b54c6f658854e5971b389e3edbbedda7b Binary files /dev/null and b/videos/Hopper-v4__ppo_fix_continuous_action__5__1705726093-eval/rl-video-episode-8.mp4 differ