Add full model

Files changed (8) hide show

README.md +57 -0
config.yaml +19 -0
example.py +22 -0
helpers/__init__.py +0 -0
helpers/agent.py +50 -0
helpers/load.py +59 -0
helpers/wrappers.py +14 -0
qdqn-FrozenLake-v1.pt +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+library_name:
+tags:
+- FrozenLake-v1
+- deep-reinforcement-learning
+- reinforcement-learning
+model-index:
+- name: QDQN
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: FrozenLake-v1
+      type: FrozenLake-v1
+    metrics:
+    - type: mean_reward
+      value: 0.12 +/- 0.0
+      name: mean_reward
+      verified: false
+---
+# **QDQN** Agent playing **FrozenLake-v0**
+This is a trained model of a **QDQN** agent playing **FrozenLake-v0**
+using the [qrl-dqn-gym](https://github.com/qdevpsi3/qrl-dqn-gym).
+This agent has been trained for the [research project](https://github.com/agercas/QHack2023_QRL) during the QHack 2023
+hackathon. The project explores the use of quantum algorithms in reinforcement learning.
+More details about the project and the trained agent can be found in the [project repository](https://github.com/agercas/QHack2023_QRL).
+## Usage
+```python
+import gym
+import yaml
+import torch
+from helpers.load import QuantumNet
+from helpers.wrappers import BinaryWrapper
+from helpers.agent import Agent
+# Environment
+env_name = 'FrozenLake-v1'
+env = gym.make(env_name)
+env = BinaryWrapper(env)
+# Network
+with open('config.yaml', 'r') as f:
+    hparams = yaml.safe_load(f)
+net = QuantumNet(hparams['n_layers'])
+state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
+net.load_state_dict(state_dict)
+# Agent
+agent = Agent(net)
+```

config.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+batch_size: 11
+device: auto
+eps_decay: 0.99
+eps_init: 1.0
+eps_min: 0.01
+gamma: 0.8
+log_ckp_freq: 50
+log_eval_freq: 20
+log_train_freq: 1
+logging: true
+loss: SmoothL1
+lr: 0.001
+memory: 10000
+n_eval_episodes: 5
+n_layers: 5
+optimizer: RMSprop
+target_freq: 10
+total_episodes: 10000
+train_freq: 5

example.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import gym
+import yaml
+import torch
+from helpers.load import QuantumNet
+from helpers.wrappers import BinaryWrapper
+from helpers.agent import Agent
+# Environment
+env_name = 'FrozenLake-v1'
+env = gym.make(env_name)
+env = BinaryWrapper(env)
+# Network
+with open('config.yaml', 'r') as f:
+    hparams = yaml.safe_load(f)
+net = QuantumNet(hparams['n_layers'])
+state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
+net.load_state_dict(state_dict)
+# Agent
+agent = Agent(net)

helpers/__init__.py ADDED Viewed

File without changes

helpers/agent.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Based on : https://github.com/djbyrne/core_rl/blob/master/algos/dqn/model.py
+import numpy as np
+import torch
+class Agent:
+    def __init__(self,
+                 net,
+                 action_space=None,
+                 exploration_initial_eps=None,
+                 exploration_decay=None,
+                 exploration_final_eps=None):
+        self.net = net
+        self.action_space = action_space
+        self.exploration_initial_eps = exploration_initial_eps
+        self.exploration_decay = exploration_decay
+        self.exploration_final_eps = exploration_final_eps
+        self.epsilon = 0.
+    def __call__(self, state, device=torch.device('cpu')):
+        if np.random.random() < self.epsilon:
+            action = self.get_random_action()
+        else:
+            action = self.get_action(state, device)
+        return action
+    def get_random_action(self):
+        action = self.action_space.sample()
+        return action
+    def get_action(self, state, device=torch.device('cpu')):
+        if not isinstance(state, torch.Tensor):
+            state = torch.tensor([state])
+        if device.type != 'cpu':
+            state = state.cuda(device)
+        q_values = self.net.eval()(state)
+        _, action = torch.max(q_values, dim=1)
+        return int(action.item())
+    def update_epsilon(self, step):
+        self.epsilon = max(
+            self.exploration_final_eps, self.exploration_final_eps +
+            (self.exploration_initial_eps - self.exploration_final_eps) *
+            self.exploration_decay**step)
+        return self.epsilon

helpers/load.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import numpy as np
+import pennylane as qml
+import torch.nn as nn
+def encode(n_qubits, inputs):
+    for wire in range(n_qubits):
+        qml.RX(inputs[wire], wires=wire)
+def layer(n_qubits, y_weight, z_weight):
+    for wire, y_weight in enumerate(y_weight):
+        qml.RY(y_weight, wires=wire)
+    for wire, z_weight in enumerate(z_weight):
+        qml.RZ(z_weight, wires=wire)
+    for wire in range(n_qubits):
+        qml.CZ(wires=[wire, (wire + 1) % n_qubits])
+def measure(n_qubits):
+    return [qml.expval(qml.PauliZ(wire)) for wire in range(n_qubits)]
+def get_model(n_qubits, n_layers, data_reupload):
+    # NOTE: need to select an appropriate device
+    # dev = qml.device('lightning.gpu', wires=n_qubits)
+    dev = qml.device("default.qubit", wires=n_qubits)
+    shapes = {
+        "y_weights": (n_layers, n_qubits),
+        "z_weights": (n_layers, n_qubits)
+    }
+    @qml.qnode(dev, interface='torch')
+    def circuit(inputs, y_weights, z_weights):
+        for layer_idx in range(n_layers):
+            if (layer_idx == 0) or data_reupload:
+                encode(n_qubits, inputs)
+            layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
+        return measure(n_qubits)
+    model = qml.qnn.TorchLayer(circuit, shapes)
+    return model
+class QuantumNet(nn.Module):
+    def __init__(self, n_layers):
+        super(QuantumNet, self).__init__()
+        self.n_qubits = 4
+        self.n_actions = 4
+        self.q_layers = get_model(n_qubits=self.n_qubits,
+                                  n_layers=n_layers,
+                                  data_reupload=False)
+    def forward(self, inputs):
+        inputs = inputs * np.pi
+        outputs = self.q_layers(inputs)
+        outputs = (1 + outputs) / 2
+        return outputs

helpers/wrappers.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gym
+import gym.spaces
+import numpy as np
+class BinaryWrapper(gym.ObservationWrapper):
+    def __init__(self, env):
+        super(BinaryWrapper, self).__init__(env)
+        self.bits = int(np.ceil(np.log2(env.observation_space.n)))
+        self.observation_space = gym.spaces.MultiBinary(self.bits)
+    def observation(self, obs):
+        binary = map(float, "{0:b}".format(int(obs)).zfill(self.bits))
+        return np.array(list(binary))

qdqn-FrozenLake-v1.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2fafc979aaa48d24691a9afaba3e5c4ca2faf99d8caea054fa46451e567dbe6
+size 1275