michele-milesi
/

diambra-agent-example

Michele Milesi commited on Apr 18

Commit

e184661

•

1 Parent(s): 5d9a387

feat: variable renaming

Files changed (2) hide show

agent-dreamer_v3.py CHANGED Viewed

@@ -73,13 +73,13 @@ def main(cfg_path: str, checkpoint_path: str, test=False):
     print("Policy architecture:")
     print(agent)
-    o, info = env.reset()
     # Every time you reset the environment, you must reset the initial states of the model
     agent.init_states()
     while True:
         # Convert numpy observations into torch observations and normalize image observations
-        torch_obs = prepare_obs(fabric, o, cnn_keys=cnn_keys)
         # Select actions, the agent returns a one-hot categorical or
         # more one-hot categorical distributions for muli-discrete actions space
@@ -87,12 +87,12 @@ def main(cfg_path: str, checkpoint_path: str, test=False):
         # Convert actions from one-hot categorical to categorial
         actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
-        o, _, terminated, truncated, info = env.step(
             actions.cpu().numpy().reshape(env.action_space.shape)
         )
         if terminated or truncated:
-            o, info = env.reset()
             # Every time you reset the environment, you must reset the initial states of the model
             agent.init_states()
             if info["env_done"] or test is True:

     print("Policy architecture:")
     print(agent)
+    obs, info = env.reset()
     # Every time you reset the environment, you must reset the initial states of the model
     agent.init_states()
     while True:
         # Convert numpy observations into torch observations and normalize image observations
+        torch_obs = prepare_obs(fabric, obs, cnn_keys=cnn_keys)
         # Select actions, the agent returns a one-hot categorical or
         # more one-hot categorical distributions for muli-discrete actions space
         # Convert actions from one-hot categorical to categorial
         actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
+        obs, _, terminated, truncated, info = env.step(
             actions.cpu().numpy().reshape(env.action_space.shape)
         )
         if terminated or truncated:
+            obs, info = env.reset()
             # Every time you reset the environment, you must reset the initial states of the model
             agent.init_states()
             if info["env_done"] or test is True:

agent-ppo.py CHANGED Viewed

@@ -65,21 +65,21 @@ def main(cfg_path: str, checkpoint_path: str, test=False):
     print("Policy architecture:")
     print(agent)
-    o, info = env.reset()
     while True:
         # Convert numpy observations into torch observations and normalize image observations
-        torch_obs = prepare_obs(fabric, o, cnn_keys=cnn_keys)
         actions = agent.get_actions(torch_obs, greedy=True)
         actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
-        o, _, terminated, truncated, info = env.step(
             actions.cpu().numpy().reshape(env.action_space.shape)
         )
         if terminated or truncated:
-            o, info = env.reset()
             if info["env_done"] or test is True:
                 break

     print("Policy architecture:")
     print(agent)
+    obs, info = env.reset()
     while True:
         # Convert numpy observations into torch observations and normalize image observations
+        torch_obs = prepare_obs(fabric, obs, cnn_keys=cnn_keys)
         actions = agent.get_actions(torch_obs, greedy=True)
         actions = torch.cat([act.argmax(dim=-1) for act in actions], dim=-1)
+        obs, _, terminated, truncated, info = env.step(
             actions.cpu().numpy().reshape(env.action_space.shape)
         )
         if terminated or truncated:
+            obs, info = env.reset()
             if info["env_done"] or test is True:
                 break