Farbum commited on
Commit
c0bfcf0
1 Parent(s): 511d2b0

Push agent to the Hub

Browse files
Files changed (4) hide show
  1. README.md +8 -8
  2. model_scripted.pt +2 -2
  3. replay.mp4 +0 -0
  4. results.json +1 -1
README.md CHANGED
@@ -1,31 +1,31 @@
1
  ---
2
  tags:
3
- - PandaReachDense-v3
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - custom-implementation
8
  - deep-rl-course
9
  model-index:
10
- - name: Actor-Critic
11
  results:
12
  - task:
13
  type: reinforcement-learning
14
  name: reinforcement-learning
15
  dataset:
16
- name: PandaReachDense-v3
17
- type: PandaReachDense-v3
18
  metrics:
19
  - type: mean_reward
20
- value: -0.17 +/- 0.06
21
  name: mean_reward
22
  verified: false
23
  ---
24
 
25
- # Actor-Critic Agent Playing PandaReachDense-v3
26
 
27
- This is a trained model of an AC agent playing PandaReachDense-v3.
28
 
29
  # Hyperparameters
30
- hp_seed: 2444<br />hp_torch_deterministic: True<br />hp_total_timesteps: 20500<br />hp_critic_nstep: 1<br />hp_num_envs: 12<br />hp_learning_rate_actor: 0.001<br />hp_learning_rate_critic: 0.005<br />hp_minlr_actor: 2e-06<br />hp_minlr_critic: 1e-05<br />hp_gamma: 0.99<br />hp_reg_term: 3<br />hp_batch_size: 64
31
 
 
1
  ---
2
  tags:
3
+ - Pixelcopter-PLE-v0
4
  - ppo
5
  - deep-reinforcement-learning
6
  - reinforcement-learning
7
  - custom-implementation
8
  - deep-rl-course
9
  model-index:
10
+ - name: SARSA_QLEARNING
11
  results:
12
  - task:
13
  type: reinforcement-learning
14
  name: reinforcement-learning
15
  dataset:
16
+ name: Pixelcopter-PLE-v0
17
+ type: Pixelcopter-PLE-v0
18
  metrics:
19
  - type: mean_reward
20
+ value: 49.30 +/- 37.85
21
  name: mean_reward
22
  verified: false
23
  ---
24
 
25
+ # REINFORCE Agent Playing Pixelcopter-PLE-v0
26
 
27
+ This is a trained model of a REINFORCE agent playing Pixelcopter-PLE-v0.
28
 
29
  # Hyperparameters
30
+ hp_seed: 1<br />hp_torch_deterministic: True<br />hp_nb_frames: 8<br />hp_total_timesteps: 1005000<br />hp_learning_t: 500<br />hp_num_envs: 12<br />hp_learning_rate: 0.0001<br />hp_gamma: 0.99<br />hp_buffer_size: 10000<br />hp_batch_size: 32
31
 
model_scripted.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0afad2afd781a1dfcf0f0bfa7274986dc8bbbc6eb37a732dc958dc799080cf4
3
- size 34816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea2dc5275339fbb101b04eba7d996be154b35cebac4967a8eb26fa1134f171c
3
+ size 1186823
replay.mp4 CHANGED
Binary files a/replay.mp4 and b/replay.mp4 differ
 
results.json CHANGED
@@ -1 +1 @@
1
- {"env_id": "PandaReachDense-v3", "mean_reward": "-0.1670995", "std_reward": "0.05625326", "n_evaluation_episodes": 10, "eval_datetime": "2024-04-01T13:23:29.887964"}
 
1
+ {"env_id": "Pixelcopter-PLE-v0", "mean_reward": 49.3, "std_reward": 37.84983487414443, "n_evaluation_episodes": 10, "eval_datetime": "2024-04-01T22:25:28.056818"}