Arnas
commited on
Commit
•
52dd602
1
Parent(s):
18ccb0e
Add full model
Browse files- README.md +57 -0
- config.yaml +19 -0
- example.py +22 -0
- helpers/__init__.py +0 -0
- helpers/agent.py +50 -0
- helpers/load.py +59 -0
- helpers/wrappers.py +14 -0
- qdqn-FrozenLake-v1.pt +3 -0
README.md
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name:
|
3 |
+
tags:
|
4 |
+
- FrozenLake-v1
|
5 |
+
- deep-reinforcement-learning
|
6 |
+
- reinforcement-learning
|
7 |
+
model-index:
|
8 |
+
- name: QDQN
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: FrozenLake-v1
|
15 |
+
type: FrozenLake-v1
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 0.12 +/- 0.0
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
# **QDQN** Agent playing **FrozenLake-v0**
|
24 |
+
This is a trained model of a **QDQN** agent playing **FrozenLake-v0**
|
25 |
+
using the [qrl-dqn-gym](https://github.com/qdevpsi3/qrl-dqn-gym).
|
26 |
+
|
27 |
+
This agent has been trained for the [research project](https://github.com/agercas/QHack2023_QRL) during the QHack 2023
|
28 |
+
hackathon. The project explores the use of quantum algorithms in reinforcement learning.
|
29 |
+
More details about the project and the trained agent can be found in the [project repository](https://github.com/agercas/QHack2023_QRL).
|
30 |
+
|
31 |
+
|
32 |
+
## Usage
|
33 |
+
|
34 |
+
```python
|
35 |
+
import gym
|
36 |
+
import yaml
|
37 |
+
import torch
|
38 |
+
from helpers.load import QuantumNet
|
39 |
+
from helpers.wrappers import BinaryWrapper
|
40 |
+
from helpers.agent import Agent
|
41 |
+
|
42 |
+
# Environment
|
43 |
+
env_name = 'FrozenLake-v1'
|
44 |
+
env = gym.make(env_name)
|
45 |
+
env = BinaryWrapper(env)
|
46 |
+
|
47 |
+
# Network
|
48 |
+
with open('config.yaml', 'r') as f:
|
49 |
+
hparams = yaml.safe_load(f)
|
50 |
+
|
51 |
+
net = QuantumNet(hparams['n_layers'])
|
52 |
+
state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
|
53 |
+
net.load_state_dict(state_dict)
|
54 |
+
|
55 |
+
# Agent
|
56 |
+
agent = Agent(net)
|
57 |
+
```
|
config.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
batch_size: 11
|
2 |
+
device: auto
|
3 |
+
eps_decay: 0.99
|
4 |
+
eps_init: 1.0
|
5 |
+
eps_min: 0.01
|
6 |
+
gamma: 0.8
|
7 |
+
log_ckp_freq: 50
|
8 |
+
log_eval_freq: 20
|
9 |
+
log_train_freq: 1
|
10 |
+
logging: true
|
11 |
+
loss: SmoothL1
|
12 |
+
lr: 0.001
|
13 |
+
memory: 10000
|
14 |
+
n_eval_episodes: 5
|
15 |
+
n_layers: 5
|
16 |
+
optimizer: RMSprop
|
17 |
+
target_freq: 10
|
18 |
+
total_episodes: 10000
|
19 |
+
train_freq: 5
|
example.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
import yaml
|
3 |
+
import torch
|
4 |
+
from helpers.load import QuantumNet
|
5 |
+
from helpers.wrappers import BinaryWrapper
|
6 |
+
from helpers.agent import Agent
|
7 |
+
|
8 |
+
# Environment
|
9 |
+
env_name = 'FrozenLake-v1'
|
10 |
+
env = gym.make(env_name)
|
11 |
+
env = BinaryWrapper(env)
|
12 |
+
|
13 |
+
# Network
|
14 |
+
with open('config.yaml', 'r') as f:
|
15 |
+
hparams = yaml.safe_load(f)
|
16 |
+
|
17 |
+
net = QuantumNet(hparams['n_layers'])
|
18 |
+
state_dict = torch.load('qdqn-FrozenLake-v1.pt', map_location=torch.device('cpu'))
|
19 |
+
net.load_state_dict(state_dict)
|
20 |
+
|
21 |
+
# Agent
|
22 |
+
agent = Agent(net)
|
helpers/__init__.py
ADDED
File without changes
|
helpers/agent.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Based on : https://github.com/djbyrne/core_rl/blob/master/algos/dqn/model.py
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
class Agent:
|
8 |
+
def __init__(self,
|
9 |
+
net,
|
10 |
+
action_space=None,
|
11 |
+
exploration_initial_eps=None,
|
12 |
+
exploration_decay=None,
|
13 |
+
exploration_final_eps=None):
|
14 |
+
|
15 |
+
self.net = net
|
16 |
+
self.action_space = action_space
|
17 |
+
self.exploration_initial_eps = exploration_initial_eps
|
18 |
+
self.exploration_decay = exploration_decay
|
19 |
+
self.exploration_final_eps = exploration_final_eps
|
20 |
+
self.epsilon = 0.
|
21 |
+
|
22 |
+
def __call__(self, state, device=torch.device('cpu')):
|
23 |
+
if np.random.random() < self.epsilon:
|
24 |
+
action = self.get_random_action()
|
25 |
+
else:
|
26 |
+
action = self.get_action(state, device)
|
27 |
+
|
28 |
+
return action
|
29 |
+
|
30 |
+
def get_random_action(self):
|
31 |
+
action = self.action_space.sample()
|
32 |
+
return action
|
33 |
+
|
34 |
+
def get_action(self, state, device=torch.device('cpu')):
|
35 |
+
if not isinstance(state, torch.Tensor):
|
36 |
+
state = torch.tensor([state])
|
37 |
+
|
38 |
+
if device.type != 'cpu':
|
39 |
+
state = state.cuda(device)
|
40 |
+
|
41 |
+
q_values = self.net.eval()(state)
|
42 |
+
_, action = torch.max(q_values, dim=1)
|
43 |
+
return int(action.item())
|
44 |
+
|
45 |
+
def update_epsilon(self, step):
|
46 |
+
self.epsilon = max(
|
47 |
+
self.exploration_final_eps, self.exploration_final_eps +
|
48 |
+
(self.exploration_initial_eps - self.exploration_final_eps) *
|
49 |
+
self.exploration_decay**step)
|
50 |
+
return self.epsilon
|
helpers/load.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pennylane as qml
|
3 |
+
import torch.nn as nn
|
4 |
+
|
5 |
+
|
6 |
+
def encode(n_qubits, inputs):
|
7 |
+
for wire in range(n_qubits):
|
8 |
+
qml.RX(inputs[wire], wires=wire)
|
9 |
+
|
10 |
+
|
11 |
+
def layer(n_qubits, y_weight, z_weight):
|
12 |
+
for wire, y_weight in enumerate(y_weight):
|
13 |
+
qml.RY(y_weight, wires=wire)
|
14 |
+
for wire, z_weight in enumerate(z_weight):
|
15 |
+
qml.RZ(z_weight, wires=wire)
|
16 |
+
for wire in range(n_qubits):
|
17 |
+
qml.CZ(wires=[wire, (wire + 1) % n_qubits])
|
18 |
+
|
19 |
+
|
20 |
+
def measure(n_qubits):
|
21 |
+
return [qml.expval(qml.PauliZ(wire)) for wire in range(n_qubits)]
|
22 |
+
|
23 |
+
|
24 |
+
def get_model(n_qubits, n_layers, data_reupload):
|
25 |
+
# NOTE: need to select an appropriate device
|
26 |
+
# dev = qml.device('lightning.gpu', wires=n_qubits)
|
27 |
+
dev = qml.device("default.qubit", wires=n_qubits)
|
28 |
+
shapes = {
|
29 |
+
"y_weights": (n_layers, n_qubits),
|
30 |
+
"z_weights": (n_layers, n_qubits)
|
31 |
+
}
|
32 |
+
|
33 |
+
@qml.qnode(dev, interface='torch')
|
34 |
+
def circuit(inputs, y_weights, z_weights):
|
35 |
+
for layer_idx in range(n_layers):
|
36 |
+
if (layer_idx == 0) or data_reupload:
|
37 |
+
encode(n_qubits, inputs)
|
38 |
+
layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
|
39 |
+
return measure(n_qubits)
|
40 |
+
|
41 |
+
model = qml.qnn.TorchLayer(circuit, shapes)
|
42 |
+
|
43 |
+
return model
|
44 |
+
|
45 |
+
|
46 |
+
class QuantumNet(nn.Module):
|
47 |
+
def __init__(self, n_layers):
|
48 |
+
super(QuantumNet, self).__init__()
|
49 |
+
self.n_qubits = 4
|
50 |
+
self.n_actions = 4
|
51 |
+
self.q_layers = get_model(n_qubits=self.n_qubits,
|
52 |
+
n_layers=n_layers,
|
53 |
+
data_reupload=False)
|
54 |
+
|
55 |
+
def forward(self, inputs):
|
56 |
+
inputs = inputs * np.pi
|
57 |
+
outputs = self.q_layers(inputs)
|
58 |
+
outputs = (1 + outputs) / 2
|
59 |
+
return outputs
|
helpers/wrappers.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gym
|
2 |
+
import gym.spaces
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
class BinaryWrapper(gym.ObservationWrapper):
|
7 |
+
def __init__(self, env):
|
8 |
+
super(BinaryWrapper, self).__init__(env)
|
9 |
+
self.bits = int(np.ceil(np.log2(env.observation_space.n)))
|
10 |
+
self.observation_space = gym.spaces.MultiBinary(self.bits)
|
11 |
+
|
12 |
+
def observation(self, obs):
|
13 |
+
binary = map(float, "{0:b}".format(int(obs)).zfill(self.bits))
|
14 |
+
return np.array(list(binary))
|
qdqn-FrozenLake-v1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2fafc979aaa48d24691a9afaba3e5c4ca2faf99d8caea054fa46451e567dbe6
|
3 |
+
size 1275
|