DBusAI commited on
Commit
fdfa894
1 Parent(s): 67eb795

Retrain RPPO model for CarRacing-v0 v3

Browse files
README.md CHANGED
@@ -10,7 +10,7 @@ model-index:
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
- value: 404.69 +/- 253.19
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
 
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
+ value: 614.78 +/- 160.84
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
RPPO-CarRacing-v0.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:453dc0f4b3d920be75ce425dddf58e5fd33cba7d87e626d223843b9923b29bc2
3
- size 31312527
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b387e8dbfef4f0caa33fd9b7fa2f74218f2c5a1df508dbf59a8555c4c1476bd
3
+ size 60470983
RPPO-CarRacing-v0/data CHANGED
The diff for this file is too large to render. See raw diff
 
RPPO-CarRacing-v0/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6475bdcde6f6e4e3f4ff07169833385e2bc1fb87a1158e7baa1101500c3f08a
3
- size 20558387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cee0ca43d716df50502427957efe9ec73a23d34783d50b715be36045d79ec7
3
+ size 40055003
RPPO-CarRacing-v0/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb8d103f4551a8a8a3949b242c215184aea37b603e958b0c849e6e7528746b80
3
- size 10279477
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd6b942242c741b25bbd256e9a8f05a2a4bd440f7e944cf5eb0c6c127bc616a3
3
+ size 20027061
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca8e9a77cf09ce3b00e7764d773ea0fb949d352d24220db49f2cb12308eec51
3
- size 1354174
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8852d81ba284721f4421e63a9a4b858399a66af6b6c2839e16686fa9567e35ef
3
+ size 1475634
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 404.68767416924237, "std_reward": 253.19299877757, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-13T22:54:08.578416"}
 
1
+ {"mean_reward": 614.780129430443, "std_reward": 160.83888510938422, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-14T15:59:34.919009"}