Arnas commited on
Commit
52dd602
1 Parent(s): 18ccb0e

Add full model

Browse files
README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name:
3
+ tags:
4
+ - FrozenLake-v1
5
+ - deep-reinforcement-learning
6
+ - reinforcement-learning
7
+ model-index:
8
+ - name: QDQN
9
+ results:
10
+ - task:
11
+ type: reinforcement-learning
12
+ name: reinforcement-learning
13
+ dataset:
14
+ name: FrozenLake-v1
15
+ type: FrozenLake-v1
16
+ metrics:
17
+ - type: mean_reward
18
+ value: 0.12 +/- 0.0
19
+ name: mean_reward
20
+ verified: false
21
+ ---
22
+
23
+ # **QDQN** Agent playing **FrozenLake-v0**
24
+ This is a trained model of a **QDQN** agent playing **FrozenLake-v0**
25
+ using the [qrl-dqn-gym](https://github.com/qdevpsi3/qrl-dqn-gym).
26
+
27
+ This agent has been trained for the [research project](https://github.com/agercas/QHack2023_QRL) during the QHack 2023
28
+ hackathon. The project explores the use of quantum algorithms in reinforcement learning.
29
+ More details about the project and the trained agent can be found in the [project repository](https://github.com/agercas/QHack2023_QRL).
30
+
31
+
32
+ ## Usage
33
+
34
+ ```python
35
+ import gym
36
+ import yaml
37
+ import torch
38
+ from helpers.load import QuantumNet
39
+ from helpers.wrappers import BinaryWrapper
40
+ from helpers.agent import Agent
41
+
42
+ # Environment
43
+ env_name = 'FrozenLake-v1'
44
+ env = gym.make(env_name)
45
+ env = BinaryWrapper(env)
46
+
47
+ # Network
48
+ with open('config.yaml', 'r') as f:
49
+ hparams = yaml.safe_load(f)
50
+
51
+ net = QuantumNet(hparams['n_layers'])
52
+ state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
53
+ net.load_state_dict(state_dict)
54
+
55
+ # Agent
56
+ agent = Agent(net)
57
+ ```
config.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size: 11
2
+ device: auto
3
+ eps_decay: 0.99
4
+ eps_init: 1.0
5
+ eps_min: 0.01
6
+ gamma: 0.8
7
+ log_ckp_freq: 50
8
+ log_eval_freq: 20
9
+ log_train_freq: 1
10
+ logging: true
11
+ loss: SmoothL1
12
+ lr: 0.001
13
+ memory: 10000
14
+ n_eval_episodes: 5
15
+ n_layers: 5
16
+ optimizer: RMSprop
17
+ target_freq: 10
18
+ total_episodes: 10000
19
+ train_freq: 5
example.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import yaml
3
+ import torch
4
+ from helpers.load import QuantumNet
5
+ from helpers.wrappers import BinaryWrapper
6
+ from helpers.agent import Agent
7
+
8
+ # Environment
9
+ env_name = 'FrozenLake-v1'
10
+ env = gym.make(env_name)
11
+ env = BinaryWrapper(env)
12
+
13
+ # Network
14
+ with open('config.yaml', 'r') as f:
15
+ hparams = yaml.safe_load(f)
16
+
17
+ net = QuantumNet(hparams['n_layers'])
18
+ state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
19
+ net.load_state_dict(state_dict)
20
+
21
+ # Agent
22
+ agent = Agent(net)
helpers/__init__.py ADDED
File without changes
helpers/agent.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Based on : https://github.com/djbyrne/core_rl/blob/master/algos/dqn/model.py
2
+
3
+ import numpy as np
4
+ import torch
5
+
6
+
7
+ class Agent:
8
+ def __init__(self,
9
+ net,
10
+ action_space=None,
11
+ exploration_initial_eps=None,
12
+ exploration_decay=None,
13
+ exploration_final_eps=None):
14
+
15
+ self.net = net
16
+ self.action_space = action_space
17
+ self.exploration_initial_eps = exploration_initial_eps
18
+ self.exploration_decay = exploration_decay
19
+ self.exploration_final_eps = exploration_final_eps
20
+ self.epsilon = 0.
21
+
22
+ def __call__(self, state, device=torch.device('cpu')):
23
+ if np.random.random() < self.epsilon:
24
+ action = self.get_random_action()
25
+ else:
26
+ action = self.get_action(state, device)
27
+
28
+ return action
29
+
30
+ def get_random_action(self):
31
+ action = self.action_space.sample()
32
+ return action
33
+
34
+ def get_action(self, state, device=torch.device('cpu')):
35
+ if not isinstance(state, torch.Tensor):
36
+ state = torch.tensor([state])
37
+
38
+ if device.type != 'cpu':
39
+ state = state.cuda(device)
40
+
41
+ q_values = self.net.eval()(state)
42
+ _, action = torch.max(q_values, dim=1)
43
+ return int(action.item())
44
+
45
+ def update_epsilon(self, step):
46
+ self.epsilon = max(
47
+ self.exploration_final_eps, self.exploration_final_eps +
48
+ (self.exploration_initial_eps - self.exploration_final_eps) *
49
+ self.exploration_decay**step)
50
+ return self.epsilon
helpers/load.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pennylane as qml
3
+ import torch.nn as nn
4
+
5
+
6
+ def encode(n_qubits, inputs):
7
+ for wire in range(n_qubits):
8
+ qml.RX(inputs[wire], wires=wire)
9
+
10
+
11
+ def layer(n_qubits, y_weight, z_weight):
12
+ for wire, y_weight in enumerate(y_weight):
13
+ qml.RY(y_weight, wires=wire)
14
+ for wire, z_weight in enumerate(z_weight):
15
+ qml.RZ(z_weight, wires=wire)
16
+ for wire in range(n_qubits):
17
+ qml.CZ(wires=[wire, (wire + 1) % n_qubits])
18
+
19
+
20
+ def measure(n_qubits):
21
+ return [qml.expval(qml.PauliZ(wire)) for wire in range(n_qubits)]
22
+
23
+
24
+ def get_model(n_qubits, n_layers, data_reupload):
25
+ # NOTE: need to select an appropriate device
26
+ # dev = qml.device('lightning.gpu', wires=n_qubits)
27
+ dev = qml.device("default.qubit", wires=n_qubits)
28
+ shapes = {
29
+ "y_weights": (n_layers, n_qubits),
30
+ "z_weights": (n_layers, n_qubits)
31
+ }
32
+
33
+ @qml.qnode(dev, interface='torch')
34
+ def circuit(inputs, y_weights, z_weights):
35
+ for layer_idx in range(n_layers):
36
+ if (layer_idx == 0) or data_reupload:
37
+ encode(n_qubits, inputs)
38
+ layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
39
+ return measure(n_qubits)
40
+
41
+ model = qml.qnn.TorchLayer(circuit, shapes)
42
+
43
+ return model
44
+
45
+
46
+ class QuantumNet(nn.Module):
47
+ def __init__(self, n_layers):
48
+ super(QuantumNet, self).__init__()
49
+ self.n_qubits = 4
50
+ self.n_actions = 4
51
+ self.q_layers = get_model(n_qubits=self.n_qubits,
52
+ n_layers=n_layers,
53
+ data_reupload=False)
54
+
55
+ def forward(self, inputs):
56
+ inputs = inputs * np.pi
57
+ outputs = self.q_layers(inputs)
58
+ outputs = (1 + outputs) / 2
59
+ return outputs
helpers/wrappers.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import gym.spaces
3
+ import numpy as np
4
+
5
+
6
+ class BinaryWrapper(gym.ObservationWrapper):
7
+ def __init__(self, env):
8
+ super(BinaryWrapper, self).__init__(env)
9
+ self.bits = int(np.ceil(np.log2(env.observation_space.n)))
10
+ self.observation_space = gym.spaces.MultiBinary(self.bits)
11
+
12
+ def observation(self, obs):
13
+ binary = map(float, "{0:b}".format(int(obs)).zfill(self.bits))
14
+ return np.array(list(binary))
qdqn-FrozenLake-v1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2fafc979aaa48d24691a9afaba3e5c4ca2faf99d8caea054fa46451e567dbe6
3
+ size 1275