Farbum commited on
Commit
49fd89f
1 Parent(s): 496a09b

Push agent to the Hub

Browse files
Files changed (4) hide show
  1. README.md +8 -8
  2. model_scripted.pt +2 -2
  3. replay.mp4 +0 -0
  4. results.json +1 -1
README.md CHANGED
@@ -1,31 +1,31 @@
1
  ---
2
  tags:
3
- - Pixelcopter-PLE-v0
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - custom-implementation
8
  - deep-rl-course
9
  model-index:
10
- - name: SARSA_QLEARNING
11
  results:
12
  - task:
13
  type: reinforcement-learning
14
  name: reinforcement-learning
15
  dataset:
16
- name: Pixelcopter-PLE-v0
17
- type: Pixelcopter-PLE-v0
18
  metrics:
19
  - type: mean_reward
20
- value: 46.00 +/- 43.67
21
  name: mean_reward
22
  verified: false
23
  ---
24
 
25
- # REINFORCE Agent Playing Pixelcopter-PLE-v0
26
 
27
- This is a trained model of a REINFORCE agent playing Pixelcopter-PLE-v0.
28
 
29
  # Hyperparameters
30
- hp_seed: 1<br />hp_torch_deterministic: True<br />hp_nb_frames: 8<br />hp_total_timesteps: 1005000<br />hp_learning_t: 500<br />hp_num_envs: 12<br />hp_learning_rate: 0.0001<br />hp_gamma: 0.99<br />hp_buffer_size: 10000<br />hp_batch_size: 32
31
 
 
1
  ---
2
  tags:
3
+ - PandaReachDense-v3
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - custom-implementation
8
  - deep-rl-course
9
  model-index:
10
+ - name: Actor-Critic
11
  results:
12
  - task:
13
  type: reinforcement-learning
14
  name: reinforcement-learning
15
  dataset:
16
+ name: PandaReachDense-v3
17
+ type: PandaReachDense-v3
18
  metrics:
19
  - type: mean_reward
20
+ value: -0.15 +/- 0.03
21
  name: mean_reward
22
  verified: false
23
  ---
24
 
25
+ # Actor-Critic Agent Playing PandaReachDense-v3
26
 
27
+ This is a trained model of an AC agent playing PandaReachDense-v3.
28
 
29
  # Hyperparameters
30
+ hp_seed: 2444<br />hp_torch_deterministic: True<br />hp_total_timesteps: 20500<br />hp_critic_nstep: 1<br />hp_num_envs: 12<br />hp_learning_rate_actor: 0.001<br />hp_learning_rate_critic: 0.005<br />hp_minlr_actor: 2e-06<br />hp_minlr_critic: 1e-05<br />hp_gamma: 0.99<br />hp_reg_term: 3<br />hp_batch_size: 64
31
 
model_scripted.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af84a2d5fb993b2d8b56d648a394caef04d58b68bb4a4cf3f2e8927ae141369
3
- size 1186859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7197c2dc571e18719d98046e282a9b5f3f8c5dd6ea89be308c89a43ea257ea7a
3
+ size 34816
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"env_id": "Pixelcopter-PLE-v0", "mean_reward": 46.0, "std_reward": 43.669211121796096, "n_evaluation_episodes": 10, "eval_datetime": "2024-04-01T09:00:24.807934"}
 
1
+ {"env_id": "PandaReachDense-v3", "mean_reward": "-0.14658388", "std_reward": "0.03386605", "n_evaluation_episodes": 10, "eval_datetime": "2024-04-01T13:22:24.586948"}