Spaces:

riiswa
/

RL-Interpretable-Policy-via-Kolmogorov-Arnold-Network

Running on CPU Upgrade

riiswa commited on May 5

Commit

934779e

•

1 Parent(s): b471ab8

Try to debug

Files changed (2) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ To follow the progress of KAN in RL you can check the repo [kanrl](https://githu
 [![riiswa/kanrl - GitHub](https://gh-card.dev/repos/riiswa/kanrl.svg)](https://github.com/riiswa/kanrl)
-*Please be patient, as the process may take a few minutes to run, especially in environments with large state/action spaces or with a complex KAN architecture.*
 """
 envs = ["CartPole-v1", "MountainCar-v0", "Acrobot-v1", "Pendulum-v1", "MountainCarContinuous-v0", "LunarLander-v2", "Swimmer-v3", "Hopper-v3", "HalfCheetah-v3", "Walker2d-v3"]
@@ -48,13 +48,9 @@ if __name__ == "__main__":
     def load_video_and_dataset(_env_name):
         env_name = _env_name
-        if env_name in ["Swimmer-v3", "Hopper-v3", "HalfCheetah-v3", "Walker2d-v3"]:
-            gr.Warning(
-                "We're currently in the process of adding support for Mujoco environments, so the application may encounter crashes during this phase. We encourage contributors to join us in the repository https://github.com/riiswa/kanrl to assist in the development and support of other environments. Your contributions are invaluable in ensuring a robust and comprehensive framework."
-            )
         agent = "ppo"
         if env_name == "Swimmer-v3" or env_name == "Walker2d-v3":
-            agent ="trpo"
         dataset_path, video_path = generate_dataset_from_expert(agent, _env_name, 15, 3)
         return video_path, gr.Button("Compute the symbolic policy!", interactive=True), {

 [![riiswa/kanrl - GitHub](https://gh-card.dev/repos/riiswa/kanrl.svg)](https://github.com/riiswa/kanrl)
+*Please be patient, as the process may take a few minutes to run, especially in environments with large state/action spaces or with a complex KAN architecture. For optimal performance, default parameters may not suffice. Feel free to experiment with different settings to achieve desired results.*
 """
 envs = ["CartPole-v1", "MountainCar-v0", "Acrobot-v1", "Pendulum-v1", "MountainCarContinuous-v0", "LunarLander-v2", "Swimmer-v3", "Hopper-v3", "HalfCheetah-v3", "Walker2d-v3"]
     def load_video_and_dataset(_env_name):
         env_name = _env_name
         agent = "ppo"
         if env_name == "Swimmer-v3" or env_name == "Walker2d-v3":
+            agent = "trpo"
         dataset_path, video_path = generate_dataset_from_expert(agent, _env_name, 15, 3)
         return video_path, gr.Button("Compute the symbolic policy!", interactive=True), {

interpretable.py CHANGED Viewed

@@ -35,12 +35,10 @@ class InterpretablePolicyExtractor:
             dataset["test_label"] = dataset["test_label"][:, None]
         dataset["train_input"] = dataset["train_input"].float()
         dataset["test_input"] = dataset["test_input"].float()
-        for k,v in dataset.items():
-            print(k, v.shape, v.dtype)
         return self.policy.train(dataset, opt="LBFGS", steps=steps, loss_fn=self.loss_fn)
     def forward(self, observation):
-        observation = torch.from_numpy(observation)
         action = self.policy(observation.unsqueeze(0))
         if self._action_is_discrete:
             return action.argmax(axis=-1).squeeze().item()

             dataset["test_label"] = dataset["test_label"][:, None]
         dataset["train_input"] = dataset["train_input"].float()
         dataset["test_input"] = dataset["test_input"].float()
         return self.policy.train(dataset, opt="LBFGS", steps=steps, loss_fn=self.loss_fn)
     def forward(self, observation):
+        observation = torch.from_numpy(observation).float()
         action = self.policy(observation.unsqueeze(0))
         if self._action_is_discrete:
             return action.argmax(axis=-1).squeeze().item()