Retrain RPPO model for CarRacing-v0 v3

Files changed (8) hide show

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ model-index:
   results:
   - metrics:
     - type: mean_reward
-      value: 404.69 +/- 253.19
       name: mean_reward
     task:
       type: reinforcement-learning

   results:
   - metrics:
     - type: mean_reward
+      value: 614.78 +/- 160.84
       name: mean_reward
     task:
       type: reinforcement-learning

RPPO-CarRacing-v0.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:453dc0f4b3d920be75ce425dddf58e5fd33cba7d87e626d223843b9923b29bc2
-size 31312527

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b387e8dbfef4f0caa33fd9b7fa2f74218f2c5a1df508dbf59a8555c4c1476bd
+size 60470983

RPPO-CarRacing-v0/data CHANGED Viewed

The diff for this file is too large to render. See raw diff

RPPO-CarRacing-v0/policy.optimizer.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6475bdcde6f6e4e3f4ff07169833385e2bc1fb87a1158e7baa1101500c3f08a
-size 20558387

 version https://git-lfs.github.com/spec/v1
+oid sha256:24cee0ca43d716df50502427957efe9ec73a23d34783d50b715be36045d79ec7
+size 40055003

RPPO-CarRacing-v0/policy.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb8d103f4551a8a8a3949b242c215184aea37b603e958b0c849e6e7528746b80
-size 10279477

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd6b942242c741b25bbd256e9a8f05a2a4bd440f7e944cf5eb0c6c127bc616a3
+size 20027061

config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ca8e9a77cf09ce3b00e7764d773ea0fb949d352d24220db49f2cb12308eec51
-size 1354174

 version https://git-lfs.github.com/spec/v1
+oid sha256:8852d81ba284721f4421e63a9a4b858399a66af6b6c2839e16686fa9567e35ef
+size 1475634

results.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"mean_reward": ~~404~~.~~68767416924237~~, "std_reward": ~~253~~.~~19299877757~~, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-~~13T22~~:54:08.~~578416~~"}


1	+ {"mean_reward": 614.780129430443, "std_reward": 160.83888510938422, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-14T15:59:34.919009"}