a bit more training

Files changed (10) hide show

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ model-index:
   results:
   - metrics:
     - type: mean_reward
-      value: 829.71 +/- 50.26
       name: mean_reward
     task:
       type: reinforcement-learning

   results:
   - metrics:
     - type: mean_reward
+      value: 840.32 +/- 21.17
       name: mean_reward
     task:
       type: reinforcement-learning

ppo-CarRacing-v0_more_steps.zip ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:229cdcac04ce1cf3e51d9c015fbc54df85e971824f986127693249e626df9305
+size 26597790

ppo-CarRacing-v0_more_steps/_stable_baselines3_version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1.5.0

ppo-CarRacing-v0_more_steps/data ADDED Viewed

The diff for this file is too large to render. See raw diff

ppo-CarRacing-v0_more_steps/policy.optimizer.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8de8baae8611aa6c0cf45023233991451afa58aa346dcd77c9d9d52d36d61fa7
+size 17424599

ppo-CarRacing-v0_more_steps/policy.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9501574d790e9a80ced1b2636b86b7e3377de7e20f25475b993dcd05a993e57c
+size 8713150

ppo-CarRacing-v0_more_steps/pytorch_variables.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:d030ad8db708280fcae77d87e973102039acd23a11bdecc3db8eb6c0ac940ee1
+size 431

ppo-CarRacing-v0_more_steps/system_info.txt ADDED Viewed

+OS: Linux-5.11.0-38-generic-x86_64-with-glibc2.31 #42~20.04.1-Ubuntu SMP Tue Sep 28 20:41:07 UTC 2021
+Python: 3.9.12
+Stable-Baselines3: 1.5.0
+PyTorch: 1.11.0+cu102
+GPU Enabled: True
+Numpy: 1.22.3
+Gym: 0.21.0

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f97b9013556b30d2aefa8c1f2a96bd3aec41dae6d45075e4dd02ed1425f419b6
-size 841747

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc70afa4d1ff12967154ef50574ab24a65ebc05daae849603277bd51c7406bf5
+size 860018

results.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"mean_reward": ~~829~~.~~7085274964571~~, "std_reward": 50.~~263206819596014~~, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-15T09:28:08.~~203235~~"}


1	+ {"mean_reward": 840.3218405112624, "std_reward": 21.17108198864501, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-05-15T09:31:13.988958"}