Giallar commited on
Commit
2c1873d
1 Parent(s): 83baf4a

Upload PPO LunarLander-v2 trained agent

Browse files
README.md CHANGED
@@ -10,7 +10,7 @@ model-index:
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
- value: -204.84 +/- 56.87
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
 
10
  results:
11
  - metrics:
12
  - type: mean_reward
13
+ value: -141.67 +/- 40.61
14
  name: mean_reward
15
  task:
16
  type: reinforcement-learning
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
ppo-LunarLander-v2.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b56966499411987d4e2159023440e948a7daff012be9277d1ce2da908fc1b55e
3
- size 188334
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76864629afd68ae5fed33dbe55790a6a75e12918ae97eeecb573f6807cbaead
3
+ size 188333
ppo-LunarLander-v2/data CHANGED
The diff for this file is too large to render. See raw diff
 
ppo-LunarLander-v2/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c485128cd1e6ce0c90039958eaec9c65f14a96806c671564162c469b40f473d
3
  size 84893
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b303dba5f95837aabc886446c27ad5b225301d2824e2f3c006c57c00e54988a
3
  size 84893
ppo-LunarLander-v2/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a6a337206b25e3116ec836ff7cf30dd91c54db046078f30e5214e07f9ed95d1
3
  size 43201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a33c08c82f08f3da19f216c2fe27ff798142b8a7943fdb635ad273b3969dcec8
3
  size 43201
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:450b1e58e40bf74297a26580840083fa14167b07a47d9ea2a8e46736f8f44c86
3
- size 247626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f852ea287c2cc0a3ccba4ea012b09721530622c1629cd4c86ad79c81a49e63eb
3
+ size 253943
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -204.84463142059175, "std_reward": 56.87323539444302, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-05T13:50:21.022000"}
 
1
+ {"mean_reward": -141.66860129119596, "std_reward": 40.6133855839029, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-05T14:16:29.289943"}