rishiad
/

default_submission

Model card Files Files and versions

xet

Community

rishiad commited on 29 days ago

Commit

7fdb0a1

unverified ·

1 Parent(s): b0ac93f

refactor: replace logging with print statements for debugging in RLAgent

Browse files

Files changed (1) hide show

agent.py +106 -72

agent.py CHANGED Viewed

@@ -1,8 +1,3 @@
-"""
-Implementation of the AgentInterface for MetaWorld tasks.
-This agent uses the SawyerPickPlaceV2Policy from MetaWorld as an expert policy.
-"""
 import logging
 from typing import Any, Dict
@@ -31,17 +26,26 @@ class RLAgent(AgentInterface):
     ):
         super().__init__(observation_space, action_space, seed, **kwargs)
-        self.logger = logging.getLogger(__name__)
-        self.logger.info(f"Initializing MetaWorld agent with seed {self.seed}")
         self.policy = SawyerReachV3Policy()
-        self.logger.info("Successfully initialized SawyerReachV3Policy")
         # Track episode state
         self.episode_step = 0
         self.max_episode_steps = kwargs.get("max_episode_steps", 200)
-        self.logger.info("MetaWorld agent initialized successfully")
     def act(self, obs: Dict[str, Any], **kwargs) -> torch.Tensor:
         """
@@ -55,31 +59,60 @@ class RLAgent(AgentInterface):
             action: Action tensor to take in the environment
         """
         try:
             # Process observation to extract the format needed by the expert policy
             processed_obs = self._process_observation(obs)
-            # Use the expert policy (MetaWorld is always available)
-            # MetaWorld policies expect numpy arrays
-            action_numpy = self.policy.get_action(processed_obs)
-            action_tensor = torch.from_numpy(np.array(action_numpy)).float()
-            # Log occasionally
-            if self.episode_step % 50 == 0:
-                self.logger.debug(f"Using expert policy action: {action_numpy}")
-            # Increment episode step
             self.episode_step += 1
-            # Occasionally log actions to avoid spam
-            if self.episode_step % 50 == 0:
-                self.logger.debug(
-                    f"Step {self.episode_step}: Action shape {action_tensor.shape}"
-                )
             return action_tensor
         except Exception as e:
-            self.logger.error(f"Error in act method: {e}", exc_info=True)
             # Return zeros as a fallback
             if isinstance(self.action_space, gym.spaces.Box):
                 return torch.zeros(self.action_space.shape[0], dtype=torch.float32)
@@ -93,48 +126,56 @@ class RLAgent(AgentInterface):
         MetaWorld policies typically expect a specific observation format.
         """
         if isinstance(obs, dict):
-            # MetaWorld environment can return observations in different formats
-            if "observation" in obs:
-                # Standard format for goal-observable environments
-                processed_obs = obs["observation"]
-            elif "obs" in obs:
-                processed_obs = obs["obs"]
-            elif "state_observation" in obs:
-                # Some MetaWorld environments use this key
-                processed_obs = obs["state_observation"]
-            elif "goal_achieved" in obs:
-                # If we have information about goal achievement
-                # This might be needed for certain policy decisions
-                achievement = obs.get("goal_achieved", False)
-                base_obs = next(iter(obs.values()))
-                self.logger.debug(f"Goal achieved: {achievement}")
-                processed_obs = base_obs
-            else:
-                # If structure is unknown, use the first value
-                processed_obs = next(iter(obs.values()))
-                self.logger.debug(f"Using observation key: {next(iter(obs.keys()))}")
         else:
-            # If already a numpy array or similar, use directly
             processed_obs = obs
-        # Ensure we're returning a numpy array as expected by MetaWorld policies
         if not isinstance(processed_obs, np.ndarray):
             try:
                 processed_obs = np.array(processed_obs, dtype=np.float32)
             except Exception as e:
-                self.logger.error(f"Failed to convert observation to numpy array: {e}")
-                # Return a dummy observation if conversion fails
-                if (
-                    self.observation_space
-                    and hasattr(self.observation_space, "shape")
-                    and self.observation_space.shape is not None
-                ):
-                    processed_obs = np.zeros(
-                        self.observation_space.shape, dtype=np.float32
-                    )
-                else:
-                    # Typical MetaWorld observation dimension if all else fails
-                    processed_obs = np.zeros(39, dtype=np.float32)
         return processed_obs
@@ -142,9 +183,11 @@ class RLAgent(AgentInterface):
         """
         Reset agent state between episodes.
         """
-        self.logger.debug("Resetting agent")
         self.episode_step = 0
-        # Any other stateful components would be reset here
     def _build_model(self):
         """
@@ -153,13 +196,4 @@ class RLAgent(AgentInterface):
         This is a placeholder for where you would define your neural network
         architecture using PyTorch, TensorFlow, or another framework.
         """
-        # Example of where you might build a simple PyTorch model
-        # model = torch.nn.Sequential(
-        #     torch.nn.Linear(self.observation_space.shape[0], 128),
-        #     torch.nn.ReLU(),
-        #     torch.nn.Linear(128, 64),
-        #     torch.nn.ReLU(),
-        #     torch.nn.Linear(64, self.action_space.shape[0]),
-        # )
-        # return model
-        pass

 import logging
 from typing import Any, Dict
     ):
         super().__init__(observation_space, action_space, seed, **kwargs)
+        print(f"Initializing MetaWorld agent with seed {self.seed}")
+        # Log spaces for debugging
+        if observation_space:
+            print(f"Observation space: {observation_space}")
+        if action_space:
+            print(f"Action space: {action_space}")
         self.policy = SawyerReachV3Policy()
+        print("Successfully initialized SawyerReachV3Policy")
         # Track episode state
         self.episode_step = 0
         self.max_episode_steps = kwargs.get("max_episode_steps", 200)
+        # Debug flags
+        self.debug_observations = True
+        self.debug_actions = True
+        print("MetaWorld agent initialized successfully")
     def act(self, obs: Dict[str, Any], **kwargs) -> torch.Tensor:
         """
             action: Action tensor to take in the environment
         """
         try:
+            # Debug observation structure
+            if self.debug_observations and self.episode_step % 20 == 0:
+                print(f"Raw observation structure: {type(obs)}")
+                if isinstance(obs, dict):
+                    print(f"Observation keys: {list(obs.keys())}")
+                    for key, value in obs.items():
+                        if isinstance(value, np.ndarray):
+                            print(f"  {key}: shape={value.shape}, dtype={value.dtype}")
+                        else:
+                            print(f"  {key}: {type(value)} = {value}")
             # Process observation to extract the format needed by the expert policy
             processed_obs = self._process_observation(obs)
+            # Debug processed observation
+            if self.debug_observations and self.episode_step % 20 == 0:
+                print(f"Processed obs: shape={processed_obs.shape}, dtype={processed_obs.dtype}")
+                print(f"Processed obs sample: {processed_obs[:10]}...")  # First 10 values
+            # Use the expert policy
+            action_numpy = self.policy.get_action(processed_obs)
+            # Debug raw policy output
+            if self.debug_actions and self.episode_step % 20 == 0:
+                print(f"Raw policy action: {action_numpy}, type: {type(action_numpy)}")
+                print(f"Action shape: {np.array(action_numpy).shape}")
+            # Convert to tensor
+            if isinstance(action_numpy, (list, tuple)):
+                action_tensor = torch.tensor(action_numpy, dtype=torch.float32)
+            else:
+                action_tensor = torch.from_numpy(np.array(action_numpy)).float()
+            # Ensure correct action dimensionality
+            if self.action_space and hasattr(self.action_space, 'shape'):
+                expected_shape = self.action_space.shape[0]
+                if action_tensor.shape[0] != expected_shape:
+                    print(f"Action shape mismatch: got {action_tensor.shape[0]}, expected {expected_shape}")
+                    # Pad or truncate as needed
+                    if action_tensor.shape[0] < expected_shape:
+                        padding = torch.zeros(expected_shape - action_tensor.shape[0])
+                        action_tensor = torch.cat([action_tensor, padding])
+                    else:
+                        action_tensor = action_tensor[:expected_shape]
+            # Debug final action
+            if self.debug_actions and self.episode_step % 20 == 0:
+                print(f"Final action tensor: {action_tensor}")
             self.episode_step += 1
             return action_tensor
         except Exception as e:
+            print(f"Error in act method: {e}")
             # Return zeros as a fallback
             if isinstance(self.action_space, gym.spaces.Box):
                 return torch.zeros(self.action_space.shape[0], dtype=torch.float32)
         MetaWorld policies typically expect a specific observation format.
         """
         if isinstance(obs, dict):
+            # Try different keys that MetaWorld might use
+            possible_keys = [
+                "observation",
+                "obs",
+                "state_observation",
+                "achieved_goal",
+                "state"
+            ]
+            processed_obs = None
+            for key in possible_keys:
+                if key in obs:
+                    processed_obs = obs[key]
+                    if self.debug_observations and self.episode_step % 50 == 0:
+                        print(f"Using observation key: {key}")
+                    break
+            if processed_obs is None:
+                # If none of the expected keys found, concatenate all numeric values
+                numeric_values = []
+                for key, value in obs.items():
+                    if isinstance(value, (np.ndarray, list, tuple)):
+                        flat_value = np.array(value).flatten()
+                        numeric_values.append(flat_value)
+                        if self.debug_observations and self.episode_step % 50 == 0:
+                            print(f"Concatenating key {key}: shape={flat_value.shape}")
+                if numeric_values:
+                    processed_obs = np.concatenate(numeric_values)
+                    if self.debug_observations and self.episode_step % 50 == 0:
+                        print(f"Concatenated observation shape: {processed_obs.shape}")
+                else:
+                    # Last resort: use first value
+                    processed_obs = next(iter(obs.values()))
+                    print("No numeric values found, using first observation value")
         else:
             processed_obs = obs
+        # Ensure numpy array
         if not isinstance(processed_obs, np.ndarray):
             try:
                 processed_obs = np.array(processed_obs, dtype=np.float32)
             except Exception as e:
+                print(f"Failed to convert observation to numpy array: {e}")
+                # Return default observation size for MetaWorld reach task
+                processed_obs = np.zeros(39, dtype=np.float32)
+        # Ensure proper shape for MetaWorld reach policy
+        if processed_obs.ndim > 1:
+            processed_obs = processed_obs.flatten()
         return processed_obs
         """
         Reset agent state between episodes.
         """
+        print(f"Resetting agent after {self.episode_step} steps")
         self.episode_step = 0
+        # Reset debug flags if needed
+        self.debug_observations = True
+        self.debug_actions = True
     def _build_model(self):
         """
         This is a placeholder for where you would define your neural network
         architecture using PyTorch, TensorFlow, or another framework.
         """
+        pass