zjowowen commited on
Commit
223a428
1 Parent(s): b1372ef

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +18 -15
README.md CHANGED
@@ -21,7 +21,7 @@ model-index:
21
  type: OpenAI/Gym/ClassicControl-Pendulum-v1
22
  metrics:
23
  - type: mean_reward
24
- value: -162.89 +/- 144.82
25
  name: mean_reward
26
  ---
27
 
@@ -67,9 +67,9 @@ import torch
67
 
68
  # Pull model from files which are git cloned from huggingface
69
  policy_state_dict = torch.load("pytorch_model.bin", map_location=torch.device("cpu"))
70
- cfg = EasyDict(Config.file_to_dict("policy_config.py"))
71
  # Instantiate the agent
72
- agent = DDPGAgent(env="pendulum", exp_name="Pendulum-v1-DDPG", cfg=cfg.exp_config, policy_state_dict=policy_state_dict)
73
  # Continue training
74
  agent.train(step=5000)
75
  # Render the new agent performance
@@ -95,7 +95,7 @@ from huggingface_ding import pull_model_from_hub
95
  # Pull model from Hugggingface hub
96
  policy_state_dict, cfg = pull_model_from_hub(repo_id="OpenDILabCommunity/Pendulum-v1-DDPG")
97
  # Instantiate the agent
98
- agent = DDPGAgent(env="pendulum", exp_name="Pendulum-v1-DDPG", cfg=cfg.exp_config, policy_state_dict=policy_state_dict)
99
  # Continue training
100
  agent.train(step=5000)
101
  # Render the new agent performance
@@ -121,7 +121,7 @@ from ding.bonus import DDPGAgent
121
  from huggingface_ding import push_model_to_hub
122
 
123
  # Instantiate the agent
124
- agent = DDPGAgent("pendulum", exp_name="Pendulum-v1-DDPG")
125
  # Train the agent
126
  return_ = agent.train(step=int(4000000))
127
  # Push model to huggingface hub
@@ -138,7 +138,8 @@ push_model_to_hub(
138
  usage_file_by_git_clone="./ddpg/pendulum_ddpg_deploy.py",
139
  usage_file_by_huggingface_ding="./ddpg/pendulum_ddpg_download.py",
140
  train_file="./ddpg/pendulum_ddpg.py",
141
- repo_id="OpenDILabCommunity/Pendulum-v1-DDPG"
 
142
  )
143
 
144
  ```
@@ -163,10 +164,11 @@ exp_config = {
163
  'cfg_type': 'BaseEnvManagerDict'
164
  },
165
  'stop_value': -250,
 
 
166
  'collector_env_num': 8,
167
  'evaluator_env_num': 5,
168
- 'act_scale': True,
169
- 'n_evaluator_episode': 5
170
  },
171
  'policy': {
172
  'model': {
@@ -215,9 +217,10 @@ exp_config = {
215
  'render_freq': -1,
216
  'mode': 'train_iter'
217
  },
 
218
  'cfg_type': 'InteractionSerialEvaluatorDict',
219
- 'n_episode': 5,
220
- 'stop_value': -250
221
  }
222
  },
223
  'other': {
@@ -257,7 +260,7 @@ exp_config = {
257
 
258
  **Training Procedure**
259
  <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
260
- - **Weights & Biases (wandb):** [monitor link](https://wandb.ai/zhangpaipai/Pendulum-v1-DDPG)
261
 
262
  ## Model Information
263
  <!-- Provide the basic links for the model. -->
@@ -266,14 +269,14 @@ exp_config = {
266
  - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/Pendulum-v1-DDPG/blob/main/policy_config.py)
267
  - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/Pendulum-v1-DDPG/blob/main/replay.mp4)
268
  <!-- Provide the size information for the model. -->
269
- - **Parameters total size:** 35.26 KB
270
- - **Last Update Date:** 2023-04-29
271
 
272
  ## Environments
273
  <!-- Address questions around what environment the model is intended to be trained and deployed at, including the necessary information needed to be provided for future users. -->
274
  - **Benchmark:** OpenAI/Gym/ClassicControl
275
  - **Task:** Pendulum-v1
276
  - **Gym version:** 0.25.1
277
- - **DI-engine version:** v0.4.7
278
- - **PyTorch version:** 1.7.1
279
  - **Doc**: [DI-engine-docs Environments link](https://di-engine-docs.readthedocs.io/en/latest/13_envs/pendulum.html)
 
21
  type: OpenAI/Gym/ClassicControl-Pendulum-v1
22
  metrics:
23
  - type: mean_reward
24
+ value: -185.23 +/- 138.05
25
  name: mean_reward
26
  ---
27
 
 
67
 
68
  # Pull model from files which are git cloned from huggingface
69
  policy_state_dict = torch.load("pytorch_model.bin", map_location=torch.device("cpu"))
70
+ cfg = EasyDict(Config.file_to_dict("policy_config.py").cfg_dict)
71
  # Instantiate the agent
72
+ agent = DDPGAgent(env_id="Pendulum-v1", exp_name="Pendulum-v1-DDPG", cfg=cfg.exp_config, policy_state_dict=policy_state_dict)
73
  # Continue training
74
  agent.train(step=5000)
75
  # Render the new agent performance
 
95
  # Pull model from Hugggingface hub
96
  policy_state_dict, cfg = pull_model_from_hub(repo_id="OpenDILabCommunity/Pendulum-v1-DDPG")
97
  # Instantiate the agent
98
+ agent = DDPGAgent(env_id="Pendulum-v1", exp_name="Pendulum-v1-DDPG", cfg=cfg.exp_config, policy_state_dict=policy_state_dict)
99
  # Continue training
100
  agent.train(step=5000)
101
  # Render the new agent performance
 
121
  from huggingface_ding import push_model_to_hub
122
 
123
  # Instantiate the agent
124
+ agent = DDPGAgent(env_id="Pendulum-v1", exp_name="Pendulum-v1-DDPG")
125
  # Train the agent
126
  return_ = agent.train(step=int(4000000))
127
  # Push model to huggingface hub
 
138
  usage_file_by_git_clone="./ddpg/pendulum_ddpg_deploy.py",
139
  usage_file_by_huggingface_ding="./ddpg/pendulum_ddpg_download.py",
140
  train_file="./ddpg/pendulum_ddpg.py",
141
+ repo_id="OpenDILabCommunity/Pendulum-v1-DDPG",
142
+ create_repo=False
143
  )
144
 
145
  ```
 
164
  'cfg_type': 'BaseEnvManagerDict'
165
  },
166
  'stop_value': -250,
167
+ 'n_evaluator_episode': 5,
168
+ 'env_id': 'Pendulum-v1',
169
  'collector_env_num': 8,
170
  'evaluator_env_num': 5,
171
+ 'act_scale': True
 
172
  },
173
  'policy': {
174
  'model': {
 
217
  'render_freq': -1,
218
  'mode': 'train_iter'
219
  },
220
+ 'figure_path': None,
221
  'cfg_type': 'InteractionSerialEvaluatorDict',
222
+ 'stop_value': -250,
223
+ 'n_episode': 5
224
  }
225
  },
226
  'other': {
 
260
 
261
  **Training Procedure**
262
  <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
263
+ - **Weights & Biases (wandb):** [monitor link](https://wandb.ai/zjowowen/Pendulum-v1-DDPG)
264
 
265
  ## Model Information
266
  <!-- Provide the basic links for the model. -->
 
269
  - **Configuration:** [config link](https://huggingface.co/OpenDILabCommunity/Pendulum-v1-DDPG/blob/main/policy_config.py)
270
  - **Demo:** [video](https://huggingface.co/OpenDILabCommunity/Pendulum-v1-DDPG/blob/main/replay.mp4)
271
  <!-- Provide the size information for the model. -->
272
+ - **Parameters total size:** 70.52 KB
273
+ - **Last Update Date:** 2023-09-22
274
 
275
  ## Environments
276
  <!-- Address questions around what environment the model is intended to be trained and deployed at, including the necessary information needed to be provided for future users. -->
277
  - **Benchmark:** OpenAI/Gym/ClassicControl
278
  - **Task:** Pendulum-v1
279
  - **Gym version:** 0.25.1
280
+ - **DI-engine version:** v0.4.9
281
+ - **PyTorch version:** 2.0.1+cu117
282
  - **Doc**: [DI-engine-docs Environments link](https://di-engine-docs.readthedocs.io/en/latest/13_envs/pendulum.html)