Arnas commited on
Commit
5cb9176
1 Parent(s): 6411c52

Add QDQN Mountain Car agent trained for 600 episodes

Browse files
Files changed (7) hide show
  1. README.md +59 -0
  2. config.yaml +24 -0
  3. example.py +26 -0
  4. model/__init__.py +0 -0
  5. model/agent.py +48 -0
  6. model/qnn.py +82 -0
  7. qdqn-MountainCar-v0.pt +3 -0
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - MountainCar-v0
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ model-index:
7
+ - name: QDQN
8
+ results:
9
+ - task:
10
+ type: reinforcement-learning
11
+ name: reinforcement-learning
12
+ dataset:
13
+ name: MountainCar-v0
14
+ type: MountainCar-v0
15
+ metrics:
16
+ - type: mean_reward
17
+ value: -200.0 +/- 0.0
18
+ name: mean_reward
19
+ verified: false
20
+ ---
21
+
22
+ # **QDQN** Agent playing **MountainCar-v0**
23
+ This is a trained model of a **QDQN** agent playing **MountainCar-v0**
24
+ using the [qrl-dqn-gym](https://github.com/qdevpsi3/qrl-dqn-gym).
25
+
26
+ This agent has been trained for the [research project](https://github.com/agercas/QHack2023_QRL) during the QHack 2023
27
+ hackathon. The project explores the use of quantum algorithms in reinforcement learning.
28
+ More details about the project and the trained agent can be found in the [project repository](https://github.com/agercas/QHack2023_QRL).
29
+
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ import gym
35
+ import yaml
36
+ import torch
37
+ from model.qnn import QuantumNet
38
+ from model.agent import Agent
39
+
40
+ # Environment
41
+ env_name = 'MountainCar-v0'
42
+ env = gym.make(env_name)
43
+
44
+ # Network
45
+ with open('config.yaml', 'r') as f:
46
+ hparams = yaml.safe_load(f)
47
+
48
+ net = QuantumNet(
49
+ n_layers=hparams['n_layers'],
50
+ w_input=hparams['w_input'],
51
+ w_output=hparams['w_output'],
52
+ data_reupload=hparams['data_reupload']
53
+ )
54
+ state_dict = torch.load('qdqn-MountainCar-v0.pt', map_location=torch.device('cpu'))
55
+ net.load_state_dict(state_dict)
56
+
57
+ # Agent
58
+ agent = Agent(net)
59
+ ```
config.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size: 16
2
+ data_reupload: true
3
+ device: auto
4
+ eps_decay: 0.99
5
+ eps_init: 1.0
6
+ eps_min: 0.01
7
+ gamma: 0.99
8
+ log_ckp_freq: 50
9
+ log_eval_freq: 20
10
+ log_train_freq: 1
11
+ logging: true
12
+ loss: SmoothL1
13
+ lr: 0.001
14
+ lr_input: 0.001
15
+ lr_output: 0.1
16
+ memory: 10000
17
+ n_eval_episodes: 5
18
+ n_layers: 5
19
+ optimizer: RMSprop
20
+ target_freq: 30
21
+ total_episodes: 5000
22
+ train_freq: 10
23
+ w_input: true
24
+ w_output: true
example.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ import torch
3
+ import yaml
4
+
5
+ from model.agent import Agent
6
+ from model.qnn import QuantumNet
7
+
8
+ # Environment
9
+ env_name = 'MountainCar-v0'
10
+ env = gym.make(env_name)
11
+
12
+ # Network
13
+ with open('config.yaml', 'r') as f:
14
+ hparams = yaml.safe_load(f)
15
+
16
+ net = QuantumNet(
17
+ n_layers=hparams['n_layers'],
18
+ w_input=hparams['w_input'],
19
+ w_output=hparams['w_output'],
20
+ data_reupload=hparams['data_reupload']
21
+ )
22
+ state_dict = torch.load('qdqn-MountainCar-v0.pt', map_location=torch.device('cpu'))
23
+ net.load_state_dict(state_dict)
24
+
25
+ # Agent
26
+ agent = Agent(net)
model/__init__.py ADDED
File without changes
model/agent.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+
5
+ class Agent:
6
+ def __init__(self,
7
+ net,
8
+ action_space=None,
9
+ exploration_initial_eps=None,
10
+ exploration_decay=None,
11
+ exploration_final_eps=None):
12
+
13
+ self.net = net
14
+ self.action_space = action_space
15
+ self.exploration_initial_eps = exploration_initial_eps
16
+ self.exploration_decay = exploration_decay
17
+ self.exploration_final_eps = exploration_final_eps
18
+ self.epsilon = 0.
19
+
20
+ def __call__(self, state, device=torch.device('cpu')):
21
+ if np.random.random() < self.epsilon:
22
+ action = self.get_random_action()
23
+ else:
24
+ action = self.get_action(state, device)
25
+
26
+ return action
27
+
28
+ def get_random_action(self):
29
+ action = self.action_space.sample()
30
+ return action
31
+
32
+ def get_action(self, state, device=torch.device('cpu')):
33
+ if not isinstance(state, torch.Tensor):
34
+ state = torch.tensor([state])
35
+
36
+ if device.type != 'cpu':
37
+ state = state.cuda(device)
38
+
39
+ q_values = self.net.eval()(state)
40
+ _, action = torch.max(q_values, dim=1)
41
+ return int(action.item())
42
+
43
+ def update_epsilon(self, step):
44
+ self.epsilon = max(
45
+ self.exploration_final_eps, self.exploration_final_eps +
46
+ (self.exploration_initial_eps - self.exploration_final_eps) *
47
+ self.exploration_decay**step)
48
+ return self.epsilon
model/qnn.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pennylane as qml
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.nn.parameter import Parameter
5
+
6
+
7
+ def encode(n_qubits, inputs):
8
+ for wire in range(n_qubits):
9
+ qml.RX(inputs[wire], wires=wire)
10
+
11
+
12
+ def layer(n_qubits, y_weight, z_weight):
13
+ for wire, y_weight in enumerate(y_weight):
14
+ qml.RY(y_weight, wires=wire)
15
+ for wire, z_weight in enumerate(z_weight):
16
+ qml.RZ(z_weight, wires=wire)
17
+ for wire in range(n_qubits):
18
+ qml.CZ(wires=[wire, (wire + 1) % n_qubits])
19
+
20
+
21
+ def measure(n_qubits):
22
+ return [qml.expval(qml.PauliZ(wire)) for wire in range(n_qubits)]
23
+
24
+
25
+ def get_model(n_qubits, n_layers, data_reupload):
26
+ # NOTE: need to select an appropriate device
27
+ # dev = qml.device('lightning.gpu', wires=n_qubits)
28
+ dev = qml.device("default.qubit", wires=n_qubits)
29
+ shapes = {
30
+ "y_weights": (n_layers, n_qubits),
31
+ "z_weights": (n_layers, n_qubits)
32
+ }
33
+
34
+ @qml.qnode(dev, interface='torch')
35
+ def circuit(inputs, y_weights, z_weights):
36
+ for layer_idx in range(n_layers):
37
+ if (layer_idx == 0) or data_reupload:
38
+ encode(n_qubits, inputs)
39
+ layer(n_qubits, y_weights[layer_idx], z_weights[layer_idx])
40
+ return measure(n_qubits)
41
+
42
+ model = qml.qnn.TorchLayer(circuit, shapes)
43
+
44
+ return model
45
+
46
+
47
+ class QuantumNet(nn.Module):
48
+ def __init__(self, n_layers, w_input, w_output, data_reupload):
49
+ super(QuantumNet, self).__init__()
50
+ self.n_qubits = 2
51
+ self.n_actions = 3
52
+ self.data_reupload = data_reupload
53
+ self.q_layers = get_model(n_qubits=self.n_qubits, n_layers=n_layers, data_reupload=data_reupload)
54
+ # convert from 2 qubits to 3 actions
55
+ # not adding more complexity here because we want to learn through quantum circuit
56
+ self.layer1 = nn.Linear(2, 3)
57
+
58
+ if w_input:
59
+ self.w_input = Parameter(torch.Tensor(self.n_qubits))
60
+ nn.init.normal_(self.w_input)
61
+ else:
62
+ self.register_parameter("w_input", None)
63
+ if w_output:
64
+ self.w_output = Parameter(torch.Tensor(self.n_actions))
65
+ nn.init.normal_(self.w_output, mean=90.0)
66
+ else:
67
+ self.register_parameter("w_output", None)
68
+
69
+ def forward(self, inputs):
70
+ if self.w_input is not None:
71
+ inputs = inputs * self.w_input
72
+ inputs = torch.atan(inputs)
73
+ q_outputs = self.q_layers(inputs)
74
+ q_outputs = (1 + q_outputs) / 2
75
+
76
+ outputs = self.layer1(q_outputs)
77
+
78
+ if self.w_output is not None:
79
+ outputs = outputs * self.w_output
80
+ else:
81
+ outputs = 90 * outputs
82
+ return outputs
qdqn-MountainCar-v0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4b523003e5cc160264642e07f71c9353648347fc93ca3f58504969bccecae2
3
+ size 2231