diff --git a/.gitattributes b/.gitattributes index b405a18b1e442cc8b40b2aa73bbebc3bf6bdea57..251e88becfbd3fad037ed22bad352cc4471f5587 100644 --- a/.gitattributes +++ b/.gitattributes @@ -42,3 +42,6 @@ videos/Ant-v4__ppo_fix_continuous_action__4__1704452176-eval/rl-video-episode-1. videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-0.mp4 filter=lfs diff=lfs merge=lfs -text videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-8.mp4 filter=lfs diff=lfs merge=lfs -text videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-1.mp4 filter=lfs diff=lfs merge=lfs -text +videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-8.mp4 filter=lfs diff=lfs merge=lfs -text +videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-1.mp4 filter=lfs diff=lfs merge=lfs -text +videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-0.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 426994fa00f5ae67ee322e1c9d84b76bdfebd04d..087bc7a0fe9f437d06925b7372a7632f619e4884 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ model-index: type: Ant-v4 metrics: - type: mean_reward - value: 2332.38 +/- 436.43 + value: 3006.10 +/- 815.68 name: mean_reward verified: false --- diff --git a/events.out.tfevents.1705691861.3090-172.2536679.0 b/events.out.tfevents.1705691861.3090-172.2536679.0 deleted file mode 100644 index 98948e0deef4b639c53f260ba56fd1c3c5290788..0000000000000000000000000000000000000000 --- a/events.out.tfevents.1705691861.3090-172.2536679.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:272b55408f00b76275cb6ec5aae033ef65d7ef4d7460a8531440f2b129ca9aca -size 754776 diff --git a/events.out.tfevents.1705733583.4090-171.2661326.0 b/events.out.tfevents.1705733583.4090-171.2661326.0 new file mode 100644 index 0000000000000000000000000000000000000000..d421be21667468c412d674c2486ce2f28f38aaf3 --- /dev/null +++ b/events.out.tfevents.1705733583.4090-171.2661326.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:460d165ecd6b9548d416ade27cbc4a5b8fdd9a247eafb474f426f02da14a3689 +size 770862 diff --git a/ppo_fix_continuous_action-10000.cleanrl_model b/ppo_fix_continuous_action-10000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8f62687430de81e3049b6de60bb21e35b18dda8f Binary files /dev/null and b/ppo_fix_continuous_action-10000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-100000.cleanrl_model b/ppo_fix_continuous_action-100000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a76cdacbe1295adfa2c7f86d26e33a9f269ce3fa Binary files /dev/null and b/ppo_fix_continuous_action-100000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-105000.cleanrl_model b/ppo_fix_continuous_action-105000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a2c00a7aec65c9d59c174b14ab82c581df7e7de1 Binary files /dev/null and b/ppo_fix_continuous_action-105000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-110000.cleanrl_model b/ppo_fix_continuous_action-110000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..83652ee64f92cf17e02814cd2568e87c1687961c Binary files /dev/null and b/ppo_fix_continuous_action-110000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-115000.cleanrl_model b/ppo_fix_continuous_action-115000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..73193d4c89fb6017582f912ec5f695c86795e868 Binary files /dev/null and b/ppo_fix_continuous_action-115000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-120000.cleanrl_model b/ppo_fix_continuous_action-120000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..53e5be23e20f3cca693b2ebcecc8afbdf1ca310d Binary files /dev/null and b/ppo_fix_continuous_action-120000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-125000.cleanrl_model b/ppo_fix_continuous_action-125000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4dd75c58a2f49b8db552e605d4c1defd9ffa4b22 Binary files /dev/null and b/ppo_fix_continuous_action-125000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-130000.cleanrl_model b/ppo_fix_continuous_action-130000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..001ad4f1088a0c43a1384e549b90912e043d94db Binary files /dev/null and b/ppo_fix_continuous_action-130000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-135000.cleanrl_model b/ppo_fix_continuous_action-135000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..98524bc7c7cacb9f1de462268851f5381b1f2ce1 Binary files /dev/null and b/ppo_fix_continuous_action-135000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-140000.cleanrl_model b/ppo_fix_continuous_action-140000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1e58a21efff02e21035c0c8e5ef329db9893fc11 Binary files /dev/null and b/ppo_fix_continuous_action-140000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-145000.cleanrl_model b/ppo_fix_continuous_action-145000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4ab1949e4cddcecc0c24a5a86e28451d67a5f366 Binary files /dev/null and b/ppo_fix_continuous_action-145000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-15000.cleanrl_model b/ppo_fix_continuous_action-15000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b1f18f1eb4c8b2f472da63f72a864c7d70e98314 Binary files /dev/null and b/ppo_fix_continuous_action-15000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-150000.cleanrl_model b/ppo_fix_continuous_action-150000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6bd262abadfbd5ab9f46ef751d0a4701c1a6876a Binary files /dev/null and b/ppo_fix_continuous_action-150000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-155000.cleanrl_model b/ppo_fix_continuous_action-155000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..18eea60f37dd751a23f0b34dff18bfa5da3f19d7 Binary files /dev/null and b/ppo_fix_continuous_action-155000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-160000.cleanrl_model b/ppo_fix_continuous_action-160000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f5b03b10913255114da8d64d87bf73a7cc3b58d0 Binary files /dev/null and b/ppo_fix_continuous_action-160000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-165000.cleanrl_model b/ppo_fix_continuous_action-165000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..466448c1e67e1017fc7e6809b834cfbc927cc92e Binary files /dev/null and b/ppo_fix_continuous_action-165000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-170000.cleanrl_model b/ppo_fix_continuous_action-170000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..31b7598322b09e139dec9f3150e4b11a0f488a90 Binary files /dev/null and b/ppo_fix_continuous_action-170000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-175000.cleanrl_model b/ppo_fix_continuous_action-175000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..aebda4cef595ecd9ceb1ba43612758ee16a352dd Binary files /dev/null and b/ppo_fix_continuous_action-175000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-180000.cleanrl_model b/ppo_fix_continuous_action-180000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f9cfdad13f4f8475cbd853fab28599f9ea796515 Binary files /dev/null and b/ppo_fix_continuous_action-180000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-185000.cleanrl_model b/ppo_fix_continuous_action-185000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9c52358e41d84c31dc0930774f8a374d5892698e Binary files /dev/null and b/ppo_fix_continuous_action-185000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-190000.cleanrl_model b/ppo_fix_continuous_action-190000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..23872a2edb449b4476b1d2b80240b4ba9c97a38f Binary files /dev/null and b/ppo_fix_continuous_action-190000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-195000.cleanrl_model b/ppo_fix_continuous_action-195000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c82e5ca2f29581c8d74b2ec89edbc2d745a760d4 Binary files /dev/null and b/ppo_fix_continuous_action-195000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-20000.cleanrl_model b/ppo_fix_continuous_action-20000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..098d853d93446fbc014f10dfefa46fe8013e1a77 Binary files /dev/null and b/ppo_fix_continuous_action-20000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-200000.cleanrl_model b/ppo_fix_continuous_action-200000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4724abe14c6c4fb24171bca55d19461cdefe4338 Binary files /dev/null and b/ppo_fix_continuous_action-200000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-205000.cleanrl_model b/ppo_fix_continuous_action-205000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..76178bf120182eecc28b7dc567dd82c0703d84ab Binary files /dev/null and b/ppo_fix_continuous_action-205000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-210000.cleanrl_model b/ppo_fix_continuous_action-210000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..858146f7fbec03a52e0dc1a523af965b2c22c94b Binary files /dev/null and b/ppo_fix_continuous_action-210000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-215000.cleanrl_model b/ppo_fix_continuous_action-215000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8dfda77979b2ff57f7e0ade0548cd034f6fe3393 Binary files /dev/null and b/ppo_fix_continuous_action-215000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-220000.cleanrl_model b/ppo_fix_continuous_action-220000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8af59970ab5a8df94afe97d3461f08f8fb1cee50 Binary files /dev/null and b/ppo_fix_continuous_action-220000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-225000.cleanrl_model b/ppo_fix_continuous_action-225000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ddec08209321eef689151c876c5607d413a594f7 Binary files /dev/null and b/ppo_fix_continuous_action-225000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-230000.cleanrl_model b/ppo_fix_continuous_action-230000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..54c514ba4d165ca9bcc38751a2ac0825a78d4e01 Binary files /dev/null and b/ppo_fix_continuous_action-230000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-235000.cleanrl_model b/ppo_fix_continuous_action-235000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..91551a533b1135d4bcd0f5f3f1f8d0566ecac7c9 Binary files /dev/null and b/ppo_fix_continuous_action-235000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-240000.cleanrl_model b/ppo_fix_continuous_action-240000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b63162695eb326cbec7c745284269bd4ac724229 Binary files /dev/null and b/ppo_fix_continuous_action-240000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-245000.cleanrl_model b/ppo_fix_continuous_action-245000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1b465a20e7ecb829eb515835c9cef7926741a79d Binary files /dev/null and b/ppo_fix_continuous_action-245000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-25000.cleanrl_model b/ppo_fix_continuous_action-25000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fb8d181f2e133286a64bf4ba72c95fbf5b1acec8 Binary files /dev/null and b/ppo_fix_continuous_action-25000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-250000.cleanrl_model b/ppo_fix_continuous_action-250000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6677fdbcb078e4f08057e07279f41c1af83c2720 Binary files /dev/null and b/ppo_fix_continuous_action-250000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-255000.cleanrl_model b/ppo_fix_continuous_action-255000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..77c73af5efefd9ad2ce87906d75c9e2c1404edc3 Binary files /dev/null and b/ppo_fix_continuous_action-255000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-260000.cleanrl_model b/ppo_fix_continuous_action-260000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7e25d49b14de7a618704e59240dd66be4ed42531 Binary files /dev/null and b/ppo_fix_continuous_action-260000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-265000.cleanrl_model b/ppo_fix_continuous_action-265000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d61188dfe9b3e3711cd38469e7a4741e141f41d4 Binary files /dev/null and b/ppo_fix_continuous_action-265000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-270000.cleanrl_model b/ppo_fix_continuous_action-270000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..585dd4cc9d55d6bdda7666a744cb5dea9e75222d Binary files /dev/null and b/ppo_fix_continuous_action-270000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-275000.cleanrl_model b/ppo_fix_continuous_action-275000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e1b9570a3c2f2e75ef4c575bc4603ff63ddfa95d Binary files /dev/null and b/ppo_fix_continuous_action-275000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-280000.cleanrl_model b/ppo_fix_continuous_action-280000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..acddb6c674346c4c9739da4d7d58d720a4787455 Binary files /dev/null and b/ppo_fix_continuous_action-280000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-285000.cleanrl_model b/ppo_fix_continuous_action-285000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3b72cc98a0b09cf86fe6b8361b00f5a254b12d43 Binary files /dev/null and b/ppo_fix_continuous_action-285000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-290000.cleanrl_model b/ppo_fix_continuous_action-290000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..218c3ab51cf23e4cf0646d4673eda58ecaabbcfc Binary files /dev/null and b/ppo_fix_continuous_action-290000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-295000.cleanrl_model b/ppo_fix_continuous_action-295000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8601476c3fb45c1c3cb53097913532a05a608792 Binary files /dev/null and b/ppo_fix_continuous_action-295000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-30000.cleanrl_model b/ppo_fix_continuous_action-30000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2af7b6f53fcd3e65dfb01560fecccd3bf224f084 Binary files /dev/null and b/ppo_fix_continuous_action-30000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-300000.cleanrl_model b/ppo_fix_continuous_action-300000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ca1fb6af2d6628849b3c0ac863b48a4c09f6077a Binary files /dev/null and b/ppo_fix_continuous_action-300000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-305000.cleanrl_model b/ppo_fix_continuous_action-305000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2d10a68611d5bb7f6138370bf205d464416231f0 Binary files /dev/null and b/ppo_fix_continuous_action-305000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-310000.cleanrl_model b/ppo_fix_continuous_action-310000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..62abcf96c39635a58f2ec37642fa488f1d127b92 Binary files /dev/null and b/ppo_fix_continuous_action-310000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-315000.cleanrl_model b/ppo_fix_continuous_action-315000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc57e3bc591b053ae73aa3598e3db241b2024a40 Binary files /dev/null and b/ppo_fix_continuous_action-315000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-320000.cleanrl_model b/ppo_fix_continuous_action-320000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..effba812dd1baab734fb2121c2be9d8c8cb24607 Binary files /dev/null and b/ppo_fix_continuous_action-320000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-325000.cleanrl_model b/ppo_fix_continuous_action-325000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..72b3ffc29fa2024fabc72e5b45c681155ccc8452 Binary files /dev/null and b/ppo_fix_continuous_action-325000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-330000.cleanrl_model b/ppo_fix_continuous_action-330000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..770472040c941ae4f273221a808d1dcbdacc27fe Binary files /dev/null and b/ppo_fix_continuous_action-330000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-335000.cleanrl_model b/ppo_fix_continuous_action-335000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9c594b652ecd7ad4cfeecd2102385a57f68dc1f5 Binary files /dev/null and b/ppo_fix_continuous_action-335000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-340000.cleanrl_model b/ppo_fix_continuous_action-340000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a07418e40c20a5314aa7ff0e4499120244be7c8b Binary files /dev/null and b/ppo_fix_continuous_action-340000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-345000.cleanrl_model b/ppo_fix_continuous_action-345000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8af04c67678cd4970b969171bf6b37f8059e9f98 Binary files /dev/null and b/ppo_fix_continuous_action-345000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-35000.cleanrl_model b/ppo_fix_continuous_action-35000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8a91d381bc92de51b562e00c6112912b2668c1af Binary files /dev/null and b/ppo_fix_continuous_action-35000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-350000.cleanrl_model b/ppo_fix_continuous_action-350000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..636f0e20faab9e57ca611a6a52a4d3e595225491 Binary files /dev/null and b/ppo_fix_continuous_action-350000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-355000.cleanrl_model b/ppo_fix_continuous_action-355000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b356a3e0514b9bae64ba969b7c6a24d57e5b9874 Binary files /dev/null and b/ppo_fix_continuous_action-355000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-360000.cleanrl_model b/ppo_fix_continuous_action-360000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ea213bca950aa47414bc81379c29fe7fad46cd54 Binary files /dev/null and b/ppo_fix_continuous_action-360000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-365000.cleanrl_model b/ppo_fix_continuous_action-365000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c144e49a3f4a61b65650fb16222d7d97e5f67453 Binary files /dev/null and b/ppo_fix_continuous_action-365000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-370000.cleanrl_model b/ppo_fix_continuous_action-370000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..30dc638bd2237547ae4fe594e10e43aca8d5b882 Binary files /dev/null and b/ppo_fix_continuous_action-370000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-375000.cleanrl_model b/ppo_fix_continuous_action-375000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ce3021c7b53029d141f1b66c261c928b835b05f9 Binary files /dev/null and b/ppo_fix_continuous_action-375000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-380000.cleanrl_model b/ppo_fix_continuous_action-380000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c0c6b594faa0f19ded288d5ad717f48e8788bfbb Binary files /dev/null and b/ppo_fix_continuous_action-380000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-385000.cleanrl_model b/ppo_fix_continuous_action-385000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7cb48dd2e40331e951b297a55fc7da9de1bd963f Binary files /dev/null and b/ppo_fix_continuous_action-385000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-390000.cleanrl_model b/ppo_fix_continuous_action-390000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8267ff87ccdb37c0da884638d7942940adbdc986 Binary files /dev/null and b/ppo_fix_continuous_action-390000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-395000.cleanrl_model b/ppo_fix_continuous_action-395000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4f346441f023808fc8bf0db3130f0cfa653e6700 Binary files /dev/null and b/ppo_fix_continuous_action-395000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-40000.cleanrl_model b/ppo_fix_continuous_action-40000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3611819b3539780c84c470a423bee1a9f2bde5f5 Binary files /dev/null and b/ppo_fix_continuous_action-40000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-400000.cleanrl_model b/ppo_fix_continuous_action-400000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c95f2238b34c1c0b787758b03d29ee5acfde78a2 Binary files /dev/null and b/ppo_fix_continuous_action-400000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-405000.cleanrl_model b/ppo_fix_continuous_action-405000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d63219ba5dd8185dfecbaa4d9449f0474eaf853d Binary files /dev/null and b/ppo_fix_continuous_action-405000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-410000.cleanrl_model b/ppo_fix_continuous_action-410000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..936db0347e701d51b033dbd318209471662c5d12 Binary files /dev/null and b/ppo_fix_continuous_action-410000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-415000.cleanrl_model b/ppo_fix_continuous_action-415000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29d2d002ebf57af9bed411aa205207f72d26b241 Binary files /dev/null and b/ppo_fix_continuous_action-415000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-420000.cleanrl_model b/ppo_fix_continuous_action-420000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c22ace2e5a7bac952a798eed74c021b1f0c6bfb8 Binary files /dev/null and b/ppo_fix_continuous_action-420000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-425000.cleanrl_model b/ppo_fix_continuous_action-425000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..eda8702bef9063e55c3ccd6972a212940f57161f Binary files /dev/null and b/ppo_fix_continuous_action-425000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-430000.cleanrl_model b/ppo_fix_continuous_action-430000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b47157070d8c31c14a9a815d52f72e589d43a55b Binary files /dev/null and b/ppo_fix_continuous_action-430000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-435000.cleanrl_model b/ppo_fix_continuous_action-435000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a38f7a2ca6aba1ae5c51afe4957a588fd9046783 Binary files /dev/null and b/ppo_fix_continuous_action-435000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-440000.cleanrl_model b/ppo_fix_continuous_action-440000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b05dc5900400651f0379a6f67d36cdb0202eb196 Binary files /dev/null and b/ppo_fix_continuous_action-440000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-445000.cleanrl_model b/ppo_fix_continuous_action-445000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4be40b49860f3a2046ab8418fd698fa296e9d3ce Binary files /dev/null and b/ppo_fix_continuous_action-445000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-45000.cleanrl_model b/ppo_fix_continuous_action-45000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6eeccbad7b1e6e6dcc087a708195fb074e4a3b7a Binary files /dev/null and b/ppo_fix_continuous_action-45000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-450000.cleanrl_model b/ppo_fix_continuous_action-450000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f0c2c0e1c6cf0852f41944aaea6027ddb140dce7 Binary files /dev/null and b/ppo_fix_continuous_action-450000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-455000.cleanrl_model b/ppo_fix_continuous_action-455000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fd5a255aa9ef626fb878c3c8d6ece3af7b4cd78c Binary files /dev/null and b/ppo_fix_continuous_action-455000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-460000.cleanrl_model b/ppo_fix_continuous_action-460000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..726fed939c97c6ad3f2b628d7735f9280983e826 Binary files /dev/null and b/ppo_fix_continuous_action-460000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-465000.cleanrl_model b/ppo_fix_continuous_action-465000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..985725bb264718c460905e3cc32fbb10426ba43f Binary files /dev/null and b/ppo_fix_continuous_action-465000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-470000.cleanrl_model b/ppo_fix_continuous_action-470000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8e460b34469b2afac93ffe6c29e51cbe774f6dc8 Binary files /dev/null and b/ppo_fix_continuous_action-470000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-475000.cleanrl_model b/ppo_fix_continuous_action-475000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..85b31d3ea198420883def0a4b09ce3e274abf713 Binary files /dev/null and b/ppo_fix_continuous_action-475000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-480000.cleanrl_model b/ppo_fix_continuous_action-480000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..354028242bedc59c555bcce3394679e53f82e5c2 Binary files /dev/null and b/ppo_fix_continuous_action-480000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-485000.cleanrl_model b/ppo_fix_continuous_action-485000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ec3156882dc19aa2f44898f30853517bd6ff9393 Binary files /dev/null and b/ppo_fix_continuous_action-485000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-490000.cleanrl_model b/ppo_fix_continuous_action-490000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..27e4a8ff52d9b39cce12e8c5c3e4c6fab17facbd Binary files /dev/null and b/ppo_fix_continuous_action-490000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-495000.cleanrl_model b/ppo_fix_continuous_action-495000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..abdcf8f49c6ea034d0eb464053e71317b542ef45 Binary files /dev/null and b/ppo_fix_continuous_action-495000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-5000.cleanrl_model b/ppo_fix_continuous_action-5000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1a863c63e49b3bb22a159ad332c6aa17776c3306 Binary files /dev/null and b/ppo_fix_continuous_action-5000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-50000.cleanrl_model b/ppo_fix_continuous_action-50000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..094ab89981c59524df3f858f16ab771c8e0c7895 Binary files /dev/null and b/ppo_fix_continuous_action-50000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-500000.cleanrl_model b/ppo_fix_continuous_action-500000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..96ae41c18e534c335ebb5de5f95d8e9ecccd81f2 Binary files /dev/null and b/ppo_fix_continuous_action-500000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-505000.cleanrl_model b/ppo_fix_continuous_action-505000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7481672c416c4e19c2a96e0e18db6d818985db83 Binary files /dev/null and b/ppo_fix_continuous_action-505000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-510000.cleanrl_model b/ppo_fix_continuous_action-510000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1db9c6f556435befc777bd5dc563cde71467206d Binary files /dev/null and b/ppo_fix_continuous_action-510000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-515000.cleanrl_model b/ppo_fix_continuous_action-515000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..252d8a7a1ce8901b8c436e5619b241eb71d57d8b Binary files /dev/null and b/ppo_fix_continuous_action-515000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-520000.cleanrl_model b/ppo_fix_continuous_action-520000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d472ab8aa8b5a9c2d027b7d64275497ba2c05c1a Binary files /dev/null and b/ppo_fix_continuous_action-520000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-525000.cleanrl_model b/ppo_fix_continuous_action-525000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..97281a3e7b957b85dd942aa58a17fba1353e957f Binary files /dev/null and b/ppo_fix_continuous_action-525000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-530000.cleanrl_model b/ppo_fix_continuous_action-530000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2a9cce86a6585cf8fea2cf6c0b9bda2c0b7ff252 Binary files /dev/null and b/ppo_fix_continuous_action-530000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-535000.cleanrl_model b/ppo_fix_continuous_action-535000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5d12535c47eb224ab0b14b86b9f2602ab90279c8 Binary files /dev/null and b/ppo_fix_continuous_action-535000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-540000.cleanrl_model b/ppo_fix_continuous_action-540000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f9dcddd75473ea538afa88342340077e2a69ea0 Binary files /dev/null and b/ppo_fix_continuous_action-540000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-545000.cleanrl_model b/ppo_fix_continuous_action-545000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5901223f1396b1b4e3b3d947f655b6b8ba3dcf0a Binary files /dev/null and b/ppo_fix_continuous_action-545000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-55000.cleanrl_model b/ppo_fix_continuous_action-55000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..828e829962d4e9e709ebcf01c452472874616ad2 Binary files /dev/null and b/ppo_fix_continuous_action-55000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-550000.cleanrl_model b/ppo_fix_continuous_action-550000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..81c1bf203dc77fa52123ad2abf0b788bdecf3731 Binary files /dev/null and b/ppo_fix_continuous_action-550000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-555000.cleanrl_model b/ppo_fix_continuous_action-555000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2c6e62a7dc72e868e2a733689ad45a690eb3471c Binary files /dev/null and b/ppo_fix_continuous_action-555000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-560000.cleanrl_model b/ppo_fix_continuous_action-560000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ddd458203a8c7ac38e7f128a18325e6c6287ed27 Binary files /dev/null and b/ppo_fix_continuous_action-560000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-565000.cleanrl_model b/ppo_fix_continuous_action-565000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6356a0370c252a83c995497f62a737fa1b75ba94 Binary files /dev/null and b/ppo_fix_continuous_action-565000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-570000.cleanrl_model b/ppo_fix_continuous_action-570000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..00381239e81c38c2240ad9f1906d0f02fcb91095 Binary files /dev/null and b/ppo_fix_continuous_action-570000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-575000.cleanrl_model b/ppo_fix_continuous_action-575000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0883b52aa140d16dfdc46d7d6c9daa719543501f Binary files /dev/null and b/ppo_fix_continuous_action-575000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-580000.cleanrl_model b/ppo_fix_continuous_action-580000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9df5263db93b95b8de899b45b66822178e943b82 Binary files /dev/null and b/ppo_fix_continuous_action-580000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-585000.cleanrl_model b/ppo_fix_continuous_action-585000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..05ba8f65c8e798bed9e5c5b15af2a4a50a14e69f Binary files /dev/null and b/ppo_fix_continuous_action-585000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-590000.cleanrl_model b/ppo_fix_continuous_action-590000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..89fcae8fe50785b0e9329dfe26cec07e63fba6af Binary files /dev/null and b/ppo_fix_continuous_action-590000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-595000.cleanrl_model b/ppo_fix_continuous_action-595000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5e75145456d67389398652d331c2b4edc6ff68f5 Binary files /dev/null and b/ppo_fix_continuous_action-595000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-60000.cleanrl_model b/ppo_fix_continuous_action-60000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f748e9d5394cf2250d9d1a124bb33ef5dd2772ca Binary files /dev/null and b/ppo_fix_continuous_action-60000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-600000.cleanrl_model b/ppo_fix_continuous_action-600000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5016a0a03cfea52eb41d8d1937daa5d8913a0e9e Binary files /dev/null and b/ppo_fix_continuous_action-600000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-605000.cleanrl_model b/ppo_fix_continuous_action-605000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5049029e43f776ec3c73a0a9d47c78248c791d29 Binary files /dev/null and b/ppo_fix_continuous_action-605000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-610000.cleanrl_model b/ppo_fix_continuous_action-610000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a7ec6e0e0d7f61d261edfae6228ed3e43192adb0 Binary files /dev/null and b/ppo_fix_continuous_action-610000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-615000.cleanrl_model b/ppo_fix_continuous_action-615000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a4e522042231f5edc6b60136eab4f2dc50fee4ed Binary files /dev/null and b/ppo_fix_continuous_action-615000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-620000.cleanrl_model b/ppo_fix_continuous_action-620000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c502179b228ed325edcfffde03f301ed85b42840 Binary files /dev/null and b/ppo_fix_continuous_action-620000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-625000.cleanrl_model b/ppo_fix_continuous_action-625000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0e182bebbe6ceeb871bd5f510a936203cccfa426 Binary files /dev/null and b/ppo_fix_continuous_action-625000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-630000.cleanrl_model b/ppo_fix_continuous_action-630000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1cc16f5ea94a5294f797b052335bf30076520dab Binary files /dev/null and b/ppo_fix_continuous_action-630000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-635000.cleanrl_model b/ppo_fix_continuous_action-635000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9aa5eca393eb1c37b0bf344daf673de68ed47419 Binary files /dev/null and b/ppo_fix_continuous_action-635000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-640000.cleanrl_model b/ppo_fix_continuous_action-640000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dc41a6a6e290244f68e0ae3cd63a7822c938b934 Binary files /dev/null and b/ppo_fix_continuous_action-640000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-645000.cleanrl_model b/ppo_fix_continuous_action-645000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a816e680ce2b14022fc525f0febc14f60464d08e Binary files /dev/null and b/ppo_fix_continuous_action-645000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-65000.cleanrl_model b/ppo_fix_continuous_action-65000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d6116700e629579b747c62ba3f51dfdb8d714227 Binary files /dev/null and b/ppo_fix_continuous_action-65000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-650000.cleanrl_model b/ppo_fix_continuous_action-650000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..23950aed4dd6b9f5751fb32efb13e32c58bfed9a Binary files /dev/null and b/ppo_fix_continuous_action-650000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-655000.cleanrl_model b/ppo_fix_continuous_action-655000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2429f9fa006f0ddd56332423a2cfefdf266522e5 Binary files /dev/null and b/ppo_fix_continuous_action-655000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-660000.cleanrl_model b/ppo_fix_continuous_action-660000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..71e42f7346b792bc61d26ce16891f10b25565377 Binary files /dev/null and b/ppo_fix_continuous_action-660000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-665000.cleanrl_model b/ppo_fix_continuous_action-665000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f854d388839f33c6ade2d2015fa1e3da5703b81e Binary files /dev/null and b/ppo_fix_continuous_action-665000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-670000.cleanrl_model b/ppo_fix_continuous_action-670000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ed59079aabf6accb4ff99556aa017cd7b2ef53a0 Binary files /dev/null and b/ppo_fix_continuous_action-670000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-675000.cleanrl_model b/ppo_fix_continuous_action-675000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a506d7ce082c6bb102004553200bcf5553e7759a Binary files /dev/null and b/ppo_fix_continuous_action-675000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-680000.cleanrl_model b/ppo_fix_continuous_action-680000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ff878f3a9b02a53c3f7fc3735a2da94dabb32d82 Binary files /dev/null and b/ppo_fix_continuous_action-680000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-685000.cleanrl_model b/ppo_fix_continuous_action-685000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..be8b90649b57e9728b414e1b9a91b56a5e2bc460 Binary files /dev/null and b/ppo_fix_continuous_action-685000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-690000.cleanrl_model b/ppo_fix_continuous_action-690000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d1e22e5b86d1b093527c5bd62bc63347736ee473 Binary files /dev/null and b/ppo_fix_continuous_action-690000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-695000.cleanrl_model b/ppo_fix_continuous_action-695000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..11607a2926780166cf6c1341e6665dbc0af9607b Binary files /dev/null and b/ppo_fix_continuous_action-695000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-70000.cleanrl_model b/ppo_fix_continuous_action-70000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..216e749f5f58c583fd7212b0b47bde81456d145a Binary files /dev/null and b/ppo_fix_continuous_action-70000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-700000.cleanrl_model b/ppo_fix_continuous_action-700000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cf9207f51783e6a28d2a482ce05352d766d408ed Binary files /dev/null and b/ppo_fix_continuous_action-700000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-705000.cleanrl_model b/ppo_fix_continuous_action-705000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..985bc8d3187d0d3c4ac7cd593e9a450a7651968b Binary files /dev/null and b/ppo_fix_continuous_action-705000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-710000.cleanrl_model b/ppo_fix_continuous_action-710000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..14f3e63dc3c03eb1e4e78f99b811d64695240e01 Binary files /dev/null and b/ppo_fix_continuous_action-710000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-715000.cleanrl_model b/ppo_fix_continuous_action-715000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0845edbd2b93439b7d32845048b34b88f3c2683b Binary files /dev/null and b/ppo_fix_continuous_action-715000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-720000.cleanrl_model b/ppo_fix_continuous_action-720000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a631d4c61aee8b7b1d433b746b1ed9045548803c Binary files /dev/null and b/ppo_fix_continuous_action-720000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-725000.cleanrl_model b/ppo_fix_continuous_action-725000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e340d0690bb35d9d863dfb74d722ca178ab4e032 Binary files /dev/null and b/ppo_fix_continuous_action-725000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-730000.cleanrl_model b/ppo_fix_continuous_action-730000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc7ec0f7a99cf37bfa6f91b397f4aebd3e93ff40 Binary files /dev/null and b/ppo_fix_continuous_action-730000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-735000.cleanrl_model b/ppo_fix_continuous_action-735000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..86c8bd40adf130696e95ddff6ce45f7eda1aa64a Binary files /dev/null and b/ppo_fix_continuous_action-735000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-740000.cleanrl_model b/ppo_fix_continuous_action-740000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..37e4956db05118c684dfc62187eabf7da97b592e Binary files /dev/null and b/ppo_fix_continuous_action-740000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-745000.cleanrl_model b/ppo_fix_continuous_action-745000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ff6ac4904d187f0e8f264e435878199ad568a7d2 Binary files /dev/null and b/ppo_fix_continuous_action-745000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-75000.cleanrl_model b/ppo_fix_continuous_action-75000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b6326bb81e08626f4224df657712af94682426ef Binary files /dev/null and b/ppo_fix_continuous_action-75000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-750000.cleanrl_model b/ppo_fix_continuous_action-750000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..715446b9626133249c05560b9533582887f563f5 Binary files /dev/null and b/ppo_fix_continuous_action-750000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-755000.cleanrl_model b/ppo_fix_continuous_action-755000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..95d56788e53f7608a505b963fcff9f31a8dce0bf Binary files /dev/null and b/ppo_fix_continuous_action-755000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-760000.cleanrl_model b/ppo_fix_continuous_action-760000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..502b73bbe3581821a6dff56abe602ad67e2d580e Binary files /dev/null and b/ppo_fix_continuous_action-760000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-765000.cleanrl_model b/ppo_fix_continuous_action-765000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..89e8db666398cc7364f07df8e4c03cad57b7968c Binary files /dev/null and b/ppo_fix_continuous_action-765000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-770000.cleanrl_model b/ppo_fix_continuous_action-770000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bdab5bcdfedf6f5d8e3babee643879631b12943e Binary files /dev/null and b/ppo_fix_continuous_action-770000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-775000.cleanrl_model b/ppo_fix_continuous_action-775000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e552dea0fe85c1a423fa2e9fe7c99f4a17aaa39f Binary files /dev/null and b/ppo_fix_continuous_action-775000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-780000.cleanrl_model b/ppo_fix_continuous_action-780000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..aa47acea09c5c13f1632cdb9b6c8e88c12453c51 Binary files /dev/null and b/ppo_fix_continuous_action-780000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-785000.cleanrl_model b/ppo_fix_continuous_action-785000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..03600dfbed1ffe1fbbca583ff75d986f73c844c2 Binary files /dev/null and b/ppo_fix_continuous_action-785000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-790000.cleanrl_model b/ppo_fix_continuous_action-790000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6807651d598b14c575b2d8eed44d936143d66fea Binary files /dev/null and b/ppo_fix_continuous_action-790000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-795000.cleanrl_model b/ppo_fix_continuous_action-795000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d9a04d4f5fc315891b5c843fc4c75787c129a4f6 Binary files /dev/null and b/ppo_fix_continuous_action-795000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-80000.cleanrl_model b/ppo_fix_continuous_action-80000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..437add3a45dd1c0f301ad82255749904c91e34bd Binary files /dev/null and b/ppo_fix_continuous_action-80000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-800000.cleanrl_model b/ppo_fix_continuous_action-800000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9399ada5b0532c45b807e9daaae370c60e8cf8f3 Binary files /dev/null and b/ppo_fix_continuous_action-800000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-805000.cleanrl_model b/ppo_fix_continuous_action-805000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..98369c97df0d68b6057d51582369829214b5d30c Binary files /dev/null and b/ppo_fix_continuous_action-805000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-810000.cleanrl_model b/ppo_fix_continuous_action-810000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7967632351a219330a385a4461159cac77553aa7 Binary files /dev/null and b/ppo_fix_continuous_action-810000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-815000.cleanrl_model b/ppo_fix_continuous_action-815000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ae76926aa4bf17018b2db0767adc523b9b323ebe Binary files /dev/null and b/ppo_fix_continuous_action-815000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-820000.cleanrl_model b/ppo_fix_continuous_action-820000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e94f98d1a58a935e6fa325c511aab1dfa11eeafe Binary files /dev/null and b/ppo_fix_continuous_action-820000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-825000.cleanrl_model b/ppo_fix_continuous_action-825000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bcc9ff7ceab2bdd834a125685581ee9dd759de90 Binary files /dev/null and b/ppo_fix_continuous_action-825000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-830000.cleanrl_model b/ppo_fix_continuous_action-830000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..af3c0343c46088a507a683c93a95bc4664434a68 Binary files /dev/null and b/ppo_fix_continuous_action-830000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-835000.cleanrl_model b/ppo_fix_continuous_action-835000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7a29f335aecf5718a42f0012f59bc1c31ce8b10e Binary files /dev/null and b/ppo_fix_continuous_action-835000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-840000.cleanrl_model b/ppo_fix_continuous_action-840000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..be0e9d8080bdbb573cb083e8a360b1dc5f3d554b Binary files /dev/null and b/ppo_fix_continuous_action-840000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-845000.cleanrl_model b/ppo_fix_continuous_action-845000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..982633015c17e6ccbba567c1250e9bc2e1607642 Binary files /dev/null and b/ppo_fix_continuous_action-845000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-85000.cleanrl_model b/ppo_fix_continuous_action-85000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..33715623027ceb542b8b9f1872009b3b9080797a Binary files /dev/null and b/ppo_fix_continuous_action-85000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-850000.cleanrl_model b/ppo_fix_continuous_action-850000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..93b714dbad138d9ca6f9a65328fe7d223922552b Binary files /dev/null and b/ppo_fix_continuous_action-850000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-855000.cleanrl_model b/ppo_fix_continuous_action-855000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2c48344358dcf816c2e009824d857ce78d64b5ec Binary files /dev/null and b/ppo_fix_continuous_action-855000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-860000.cleanrl_model b/ppo_fix_continuous_action-860000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..31f1b9a4b90906345a71060e4b6a66d4aacbabee Binary files /dev/null and b/ppo_fix_continuous_action-860000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-865000.cleanrl_model b/ppo_fix_continuous_action-865000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b694dbe184b6a11c62324447b37c1b013ef8ad3f Binary files /dev/null and b/ppo_fix_continuous_action-865000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-870000.cleanrl_model b/ppo_fix_continuous_action-870000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a6f7ddbcaf5d94184d02f4cd84c50788f8cb2562 Binary files /dev/null and b/ppo_fix_continuous_action-870000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-875000.cleanrl_model b/ppo_fix_continuous_action-875000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e9359479cf0d238844b81a2b3caa1eef00d2fdfc Binary files /dev/null and b/ppo_fix_continuous_action-875000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-880000.cleanrl_model b/ppo_fix_continuous_action-880000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b960b7896ae043477534d66cd0134a73d8e6d064 Binary files /dev/null and b/ppo_fix_continuous_action-880000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-885000.cleanrl_model b/ppo_fix_continuous_action-885000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e7fc7a0de25e75ac2d5e57060301c6ce373eabc2 Binary files /dev/null and b/ppo_fix_continuous_action-885000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-890000.cleanrl_model b/ppo_fix_continuous_action-890000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..432ff68a62ec4d52f11336396e04208a2eeb8971 Binary files /dev/null and b/ppo_fix_continuous_action-890000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-895000.cleanrl_model b/ppo_fix_continuous_action-895000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0e40a42d35e3a0a430dc16910ead4e28d53b8d9e Binary files /dev/null and b/ppo_fix_continuous_action-895000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-90000.cleanrl_model b/ppo_fix_continuous_action-90000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..80fa12478004e8819d75141d64a853da757e08aa Binary files /dev/null and b/ppo_fix_continuous_action-90000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-900000.cleanrl_model b/ppo_fix_continuous_action-900000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..75b8e0d0c9b3e2741375b9b0bf3574890ca269b7 Binary files /dev/null and b/ppo_fix_continuous_action-900000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-905000.cleanrl_model b/ppo_fix_continuous_action-905000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0221a462bfbd0a6bea33e2a8dbb9378a04762eb5 Binary files /dev/null and b/ppo_fix_continuous_action-905000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-910000.cleanrl_model b/ppo_fix_continuous_action-910000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e8eb3f92ffb01a6deee47e1eaf16d913ee0e3aef Binary files /dev/null and b/ppo_fix_continuous_action-910000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-915000.cleanrl_model b/ppo_fix_continuous_action-915000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29457a0f2f6f04c68e8783a96fd18e2fb671f04e Binary files /dev/null and b/ppo_fix_continuous_action-915000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-920000.cleanrl_model b/ppo_fix_continuous_action-920000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a0c792266d300310db5c96dba82983060263eed6 Binary files /dev/null and b/ppo_fix_continuous_action-920000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-925000.cleanrl_model b/ppo_fix_continuous_action-925000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4cb2bee5045156eae1cc21abf0813a95b9dc2c55 Binary files /dev/null and b/ppo_fix_continuous_action-925000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-930000.cleanrl_model b/ppo_fix_continuous_action-930000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..697571644d6b0b87c319d8f76e6ed183233f4bbf Binary files /dev/null and b/ppo_fix_continuous_action-930000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-935000.cleanrl_model b/ppo_fix_continuous_action-935000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9e5c8c332c0d922c59b4b0308e715e65d3a167f2 Binary files /dev/null and b/ppo_fix_continuous_action-935000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-940000.cleanrl_model b/ppo_fix_continuous_action-940000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fe183e2ddcb24962a10a7993f68bdce63c338bf3 Binary files /dev/null and b/ppo_fix_continuous_action-940000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-945000.cleanrl_model b/ppo_fix_continuous_action-945000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..56e9be0f66f7c72985b05ea4bf9f24d808ae9023 Binary files /dev/null and b/ppo_fix_continuous_action-945000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-95000.cleanrl_model b/ppo_fix_continuous_action-95000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2611fc3233a859636dd21cef1294aa42c073dbe2 Binary files /dev/null and b/ppo_fix_continuous_action-95000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-950000.cleanrl_model b/ppo_fix_continuous_action-950000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ce2c5d5df0f050d32273f7180df073551c64622f Binary files /dev/null and b/ppo_fix_continuous_action-950000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-955000.cleanrl_model b/ppo_fix_continuous_action-955000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f4d1a5eb2f277d47664f211fea3623d37c41567 Binary files /dev/null and b/ppo_fix_continuous_action-955000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-960000.cleanrl_model b/ppo_fix_continuous_action-960000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..740b435eccb4ff67c0df78f40e37bddd9e2c3a01 Binary files /dev/null and b/ppo_fix_continuous_action-960000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-965000.cleanrl_model b/ppo_fix_continuous_action-965000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e70994191b30e5dad420b5396a69f316411df211 Binary files /dev/null and b/ppo_fix_continuous_action-965000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-970000.cleanrl_model b/ppo_fix_continuous_action-970000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..64ced2a4302154e4a51f1c976cc75748181c0600 Binary files /dev/null and b/ppo_fix_continuous_action-970000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-975000.cleanrl_model b/ppo_fix_continuous_action-975000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f4ca2bce4a052fdb192e48e3f16a4b4067e15d49 Binary files /dev/null and b/ppo_fix_continuous_action-975000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-980000.cleanrl_model b/ppo_fix_continuous_action-980000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..09d3439a5e1f52f206ccef720912cb5afd8ba541 Binary files /dev/null and b/ppo_fix_continuous_action-980000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-985000.cleanrl_model b/ppo_fix_continuous_action-985000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..94ff3f849d20ff0970a1a47f325b5ba53accdafa Binary files /dev/null and b/ppo_fix_continuous_action-985000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-990000.cleanrl_model b/ppo_fix_continuous_action-990000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5603b022f3d897bedd5a97fb693955807969c0c7 Binary files /dev/null and b/ppo_fix_continuous_action-990000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-995000.cleanrl_model b/ppo_fix_continuous_action-995000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..109cf69cba5c37c34be9eab2e5a276d45b1911a0 Binary files /dev/null and b/ppo_fix_continuous_action-995000.cleanrl_model differ diff --git a/ppo_fix_continuous_action.cleanrl_model b/ppo_fix_continuous_action.cleanrl_model index b210c3088313e6ba58e2b9417aad88c3e3ccd9f6..d955a7ac1b90102287b6ed21640d14c9a9b2406b 100644 Binary files a/ppo_fix_continuous_action.cleanrl_model and b/ppo_fix_continuous_action.cleanrl_model differ diff --git a/ppo_fix_continuous_action.py b/ppo_fix_continuous_action.py index f204e4e83e13f5da11c5e6e100efdc9b35ea4674..0d9b7c61f4d8f53a5f7c70e7065fa07fe9ad7a57 100644 --- a/ppo_fix_continuous_action.py +++ b/ppo_fix_continuous_action.py @@ -229,7 +229,7 @@ def evaluate( envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, capture_video, run_name, agent.obs_rms)]) obs, _ = envs.reset() - episodic_returns = [] + episodic_returns, episodic_lengths = [], [] while len(episodic_returns) < eval_episodes: actions, _, _, _ = agent.get_action_and_value(torch.Tensor(obs).to(device)) next_obs, _, _, _, infos = envs.step(actions.cpu().numpy()) @@ -239,9 +239,10 @@ def evaluate( continue print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") episodic_returns += [info["episode"]["r"]] + episodic_lengths += [info["episode"]["l"]] obs = next_obs - return episodic_returns + return episodic_returns, episodic_lengths def make_env(env_id, idx, capture_video, run_name, gamma): @@ -436,6 +437,28 @@ if __name__ == "__main__": terminal_value = agent.get_value(torch.Tensor(real_next_obs).to(device)).reshape(1, -1)[0][0] rewards[step][idx] += args.gamma * terminal_value + if global_step % (5000 // args.num_envs * args.num_envs) == 0: + obs_rms, return_rms = get_rms(envs.envs[0]) + agent.obs_rms = copy.deepcopy(get_rms(envs.envs[0])[0]) + model_path = f"runs/{run_name}/{args.exp_name}-{global_step}.cleanrl_model" + torch.save(agent.state_dict(), model_path) + print(f"model saved to {model_path}") + + episodic_returns, episodic_lengths = evaluate( + model_path, + make_eval_env, + args.env_id, + eval_episodes=3, + run_name=f"{run_name}-eval", + Model=Agent, + device=device, + capture_video=False, + ) + + print(episodic_returns, episodic_lengths) + writer.add_scalar("charts/eval/episodic_return", np.mean(episodic_returns), global_step) + writer.add_scalar("charts/eval/episodic_length", np.mean(episodic_lengths), global_step) + # Only print when at least 1 env is done if "final_info" not in infos: continue @@ -549,7 +572,7 @@ if __name__ == "__main__": torch.save(agent.state_dict(), model_path) print(f"model saved to {model_path}") - episodic_returns = evaluate( + episodic_returns, episodic_lengths = evaluate( model_path, make_eval_env, args.env_id, diff --git a/replay.mp4 b/replay.mp4 index a43184f260b50f1a3e02f35de41e22079caaa34c..e2e32b4eba69b090e3b8a3ae8be59a04ca94c4ae 100644 --- a/replay.mp4 +++ b/replay.mp4 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f07b5c0312636d451db222d8400b1df16bcdf683d7b132bb3fb4a2fbb0f0e6c3 -size 2193916 +oid sha256:e170289f8d1dcecf08cc3b4faa9fa2c067d0d7fa4d08b75c1f2e0d0391ef0bd1 +size 1920520 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-0.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-0.mp4 deleted file mode 100644 index 712ca82f56da87983900aa3bc69ff9f31bb3298d..0000000000000000000000000000000000000000 --- a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-0.mp4 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:612917ca09c06e2be757e93da47b3082e86c462c8cd029109d49fdbacc23d362 -size 2166085 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-1.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-1.mp4 deleted file mode 100644 index c14f83b26fa909e558a53c0088c1545c7826f9ed..0000000000000000000000000000000000000000 --- a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-1.mp4 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:466892d581d9c54620499de54c31ae7e115ea188cf97516691b7308fd4837890 -size 2081984 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-8.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-8.mp4 deleted file mode 100644 index a43184f260b50f1a3e02f35de41e22079caaa34c..0000000000000000000000000000000000000000 --- a/videos/Ant-v4__ppo_fix_continuous_action__4__1705691854-eval/rl-video-episode-8.mp4 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f07b5c0312636d451db222d8400b1df16bcdf683d7b132bb3fb4a2fbb0f0e6c3 -size 2193916 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-0.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1fc2c28d584b100e471daa78e77cc134fadbe031 --- /dev/null +++ b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-0.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f7f1bf56d9b568adb83ded01185c370017be0aae6eff0c3c093e6f03813406 +size 1845028 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-1.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4524eda0c1a29040657d843675fe75f5fac6d913 --- /dev/null +++ b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-1.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0d97cd07068717d7c81de0b1079b52c5d5a317ef2a6ed8c25d53009a4d54e9 +size 1905469 diff --git a/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-8.mp4 b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-8.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e2e32b4eba69b090e3b8a3ae8be59a04ca94c4ae --- /dev/null +++ b/videos/Ant-v4__ppo_fix_continuous_action__4__1705733569-eval/rl-video-episode-8.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e170289f8d1dcecf08cc3b4faa9fa2c067d0d7fa4d08b75c1f2e0d0391ef0bd1 +size 1920520