苏泓源 commited on
Commit
a257639
1 Parent(s): c8cf824
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +92 -22
  2. facility_location/__init__.py +0 -0
  3. facility_location/__pycache__/__init__.cpython-39.pyc +0 -0
  4. facility_location/__pycache__/multi_eval.cpython-39.pyc +0 -0
  5. facility_location/agent/__init__.py +4 -0
  6. facility_location/agent/__pycache__/__init__.cpython-310.pyc +0 -0
  7. facility_location/agent/__pycache__/__init__.cpython-39.pyc +0 -0
  8. facility_location/agent/__pycache__/features_extractor.cpython-310.pyc +0 -0
  9. facility_location/agent/__pycache__/features_extractor.cpython-39.pyc +0 -0
  10. facility_location/agent/__pycache__/ga.cpython-310.pyc +0 -0
  11. facility_location/agent/__pycache__/ga.cpython-39.pyc +0 -0
  12. facility_location/agent/__pycache__/heuristic.cpython-310.pyc +0 -0
  13. facility_location/agent/__pycache__/heuristic.cpython-39.pyc +0 -0
  14. facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc +0 -0
  15. facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc +0 -0
  16. facility_location/agent/__pycache__/policy.cpython-310.pyc +0 -0
  17. facility_location/agent/__pycache__/policy.cpython-39.pyc +0 -0
  18. facility_location/agent/__pycache__/solver.cpython-310.pyc +0 -0
  19. facility_location/agent/__pycache__/solver.cpython-39.pyc +0 -0
  20. facility_location/agent/features_extractor.py +225 -0
  21. facility_location/agent/policy.py +229 -0
  22. facility_location/agent/solver.py +33 -0
  23. facility_location/cfg/__init__.py +0 -0
  24. facility_location/cfg/plot.yaml +64 -0
  25. facility_location/env/__init__.py +3 -0
  26. facility_location/env/__pycache__/__init__.cpython-310.pyc +0 -0
  27. facility_location/env/__pycache__/__init__.cpython-39.pyc +0 -0
  28. facility_location/env/__pycache__/facility_location_client.cpython-310.pyc +0 -0
  29. facility_location/env/__pycache__/facility_location_client.cpython-39.pyc +0 -0
  30. facility_location/env/__pycache__/obs_extractor.cpython-310.pyc +0 -0
  31. facility_location/env/__pycache__/obs_extractor.cpython-39.pyc +0 -0
  32. facility_location/env/__pycache__/pmp.cpython-310.pyc +0 -0
  33. facility_location/env/__pycache__/pmp.cpython-39.pyc +0 -0
  34. facility_location/env/facility_location_client.py +278 -0
  35. facility_location/env/obs_extractor.py +184 -0
  36. facility_location/env/pmp.py +502 -0
  37. facility_location/multi_eval.py +96 -0
  38. facility_location/solutions.pkl +0 -0
  39. facility_location/utils/__init__.py +3 -0
  40. facility_location/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  41. facility_location/utils/__pycache__/__init__.cpython-39.pyc +0 -0
  42. facility_location/utils/__pycache__/config.cpython-310.pyc +0 -0
  43. facility_location/utils/__pycache__/config.cpython-39.pyc +0 -0
  44. facility_location/utils/__pycache__/policy.cpython-310.pyc +0 -0
  45. facility_location/utils/__pycache__/policy.cpython-39.pyc +0 -0
  46. facility_location/utils/config.py +133 -0
  47. facility_location/utils/policy.py +57 -0
  48. final_solutions.pkl +0 -0
  49. model.pth +0 -0
  50. model.py +0 -24
app.py CHANGED
@@ -4,26 +4,98 @@ import plotly.graph_objects as go
4
  import plotly.express as px
5
  from sklearn.metrics import pairwise_distances
6
  import torch
 
 
7
 
8
- def plot_from_npy(npy_data):
9
- fig = go.Figure()
10
 
11
- fig.add_trace(go.Scatter(x=[1, 2, 3, 4], y=[10, 11, 12, 13], mode='lines', name='New York'))
12
- fig.update_layout(title_text="Facility Distribution in Cities")
13
- fig.update_xaxes(title_text="Time")
14
- fig.update_yaxes(title_text="Facility Count")
15
-
16
-
17
- actual_fig = fig # Replace this line with your actual_fig
18
- solution_fig = fig # Replace this line with your solution_fig
19
-
20
- return actual_fig, solution_fig
21
 
22
  def solver_plot(data_npy, boost=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  actual_fig = go.Figure()
24
  solution_fig = go.Figure()
25
- actual_ac = 0 # Replace this line with your actual_ac
26
- solution_ac = 0 # Replace this line with your solution_ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  return actual_fig, solution_fig, actual_ac, solution_ac
28
 
29
  def demo_plot(city, facility):
@@ -104,7 +176,7 @@ def demo_plot(city, facility):
104
  return actual_fig, solution_fig, actual_ac, solution_ac
105
 
106
 
107
- def solver_plot(data_npy, boost=False):
108
  data = data_npy.split('\n')
109
  n = len(data)
110
  p = int((len(data[0].split(' '))-2) / 2)
@@ -115,7 +187,6 @@ def solver_plot(data_npy, boost=False):
115
  for row in data:
116
  row = row.split(' ')
117
  row = [x for x in row if len(x)]
118
- print(row)
119
 
120
  positions.append([float(row[0]), float(row[1])])
121
 
@@ -132,7 +203,6 @@ def solver_plot(data_npy, boost=False):
132
  demands = np.array(demands)
133
  actual_facilities = np.array(actual_facilities)
134
  solution_facilities = ~actual_facilities
135
- print(actual_facilities)
136
 
137
  actual_fig = go.Figure()
138
  solution_fig = go.Figure()
@@ -193,13 +263,13 @@ def solver_plot(data_npy, boost=False):
193
 
194
  def get_example():
195
  return [
196
- ('40.71 -73.93 213 0\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
197
- ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.61 -73.95 189 264 1 0")
198
  ]
199
 
200
 
201
  def load_npy_file(file_obj):
202
- data = np.load(file_obj.name)
203
  string_array = '\n'.join([' '.join(map(str, row)) for row in data])
204
  return string_array
205
 
@@ -231,8 +301,8 @@ with gr.Blocks() as demo:
231
  gr.Examples(
232
  examples=get_example(),
233
  inputs=[data_npy],
234
- fn=plot_from_npy,
235
- outputs=[actual_map, solution_map],
236
  )
237
  with gr.Row():
238
  boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)
 
4
  import plotly.express as px
5
  from sklearn.metrics import pairwise_distances
6
  import torch
7
+ from facility_location import multi_eval
8
+ import pickle
9
 
 
 
10
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def solver_plot(data_npy, boost=False):
13
+ multi_eval.main(data_npy, boost)
14
+ all_solutions = pickle.loads(open('./facility_location/solutions.pkl', 'rb').read())
15
+
16
+ data = data_npy.split('\n')
17
+ n = len(data)
18
+ p = int((len(data[0].split(' '))-2) / 2)
19
+
20
+ positions = []
21
+ demands = []
22
+ actual_facilities = []
23
+ for row in data:
24
+ row = row.split(' ')
25
+ row = [x for x in row if len(x)]
26
+
27
+ positions.append([float(row[0]), float(row[1])])
28
+
29
+ demand = []
30
+ for i in range(2, 2+p):
31
+ demand.append(float(row[i]))
32
+ demands.append(demand)
33
+
34
+ actual_facility = []
35
+ for i in range(2+p, 2+2*p):
36
+ actual_facility.append(bool(int(float(row[i]))))
37
+ actual_facilities.append(actual_facility)
38
+ positions = np.array(positions)
39
+ demands = np.array(demands)
40
+ actual_facilities = np.array(actual_facilities)
41
+ solution_facilities = np.array(all_solutions).T
42
+ # print(solution_facilities)
43
+ # print(actual_facilities)
44
+
45
  actual_fig = go.Figure()
46
  solution_fig = go.Figure()
47
+ for i in range(p):
48
+ actual_fig.add_trace(go.Scattermapbox(
49
+ lat=positions[actual_facilities[:, i]][:, 0],
50
+ lon=positions[actual_facilities[:, i]][:, 1],
51
+ mode='markers',
52
+ marker=go.scattermapbox.Marker(
53
+ size=10,
54
+ color=px.colors.qualitative.Plotly[i]
55
+ ),
56
+ name=f'Facility {i+1}'
57
+ ))
58
+ solution_fig.add_trace(go.Scattermapbox(
59
+ lat=positions[solution_facilities[:, i]][:, 0],
60
+ lon=positions[solution_facilities[:, i]][:, 1],
61
+ mode='markers',
62
+ marker=go.scattermapbox.Marker(
63
+ size=10,
64
+ color=px.colors.qualitative.Plotly[i]
65
+ ),
66
+ name=f'Facility {i+1}'
67
+ ))
68
+
69
+ actual_fig.update_layout(
70
+ mapbox=dict(
71
+ style='carto-positron',
72
+ center=dict(lat=np.mean(positions[actual_facilities[:, i]][:, 0]), \
73
+ lon=np.mean(positions[actual_facilities[:, i]][:, 1])),
74
+ zoom=11.0
75
+ ),
76
+ margin=dict(l=0, r=0, b=0, t=0),)
77
+
78
+ solution_fig.update_layout(
79
+ mapbox=dict(
80
+ style='carto-positron',
81
+ center=dict(lat=np.mean(positions[solution_facilities[:, i]][:, 0]), \
82
+ lon=np.mean(positions[solution_facilities[:, i]][:, 1])),
83
+ zoom=11.0
84
+ ),
85
+ margin=dict(l=0, r=0, b=0, t=0),)
86
+ # show legend
87
+ actual_fig.update_layout(showlegend=True)
88
+ solution_fig.update_layout(showlegend=True)
89
+
90
+ positions = np.deg2rad(positions)
91
+ dist = pairwise_distances(positions, metric='haversine') * 6371
92
+ actual_ac = 0
93
+ solution_ac = 0
94
+ for i in range(p):
95
+ ac_matrix = dist * demands[:, i][:, None]
96
+ actual_ac += ac_matrix[:, actual_facilities[:, i]].min(axis=-1).sum()
97
+ solution_ac += ac_matrix[:, solution_facilities[:, i]].min(axis=-1).sum()
98
+
99
  return actual_fig, solution_fig, actual_ac, solution_ac
100
 
101
  def demo_plot(city, facility):
 
176
  return actual_fig, solution_fig, actual_ac, solution_ac
177
 
178
 
179
+ def solver_plot1(data_npy, boost=False):
180
  data = data_npy.split('\n')
181
  n = len(data)
182
  p = int((len(data[0].split(' '))-2) / 2)
 
187
  for row in data:
188
  row = row.split(' ')
189
  row = [x for x in row if len(x)]
 
190
 
191
  positions.append([float(row[0]), float(row[1])])
192
 
 
203
  demands = np.array(demands)
204
  actual_facilities = np.array(actual_facilities)
205
  solution_facilities = ~actual_facilities
 
206
 
207
  actual_fig = go.Figure()
208
  solution_fig = go.Figure()
 
263
 
264
  def get_example():
265
  return [
266
+ ('40.71 -73.93 213 1\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
267
+ ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.60 -73.92 129 214 1 0\n40.61 -73.95 189 264 0 1\n40.63 -73.94 124 164 1 0"),
268
  ]
269
 
270
 
271
  def load_npy_file(file_obj):
272
+ data = np.loadtxt(file_obj.name)
273
  string_array = '\n'.join([' '.join(map(str, row)) for row in data])
274
  return string_array
275
 
 
301
  gr.Examples(
302
  examples=get_example(),
303
  inputs=[data_npy],
304
+ fn=solver_plot1,
305
+ outputs=[actual_map, solution_map, actual_ac, solution_ac],
306
  )
307
  with gr.Row():
308
  boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)
facility_location/__init__.py ADDED
File without changes
facility_location/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (145 Bytes). View file
 
facility_location/__pycache__/multi_eval.cpython-39.pyc ADDED
Binary file (3.13 kB). View file
 
facility_location/agent/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .policy import MaskedFacilityLocationActorCriticPolicy
2
+ from .features_extractor import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
3
+
4
+ __all__ = ['MaskedFacilityLocationActorCriticPolicy', 'FacilityLocationMLPExtractor', 'FacilityLocationGNNExtractor', 'FacilityLocationAttentionGNNExtractor']
facility_location/agent/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (419 Bytes). View file
 
facility_location/agent/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (420 Bytes). View file
 
facility_location/agent/__pycache__/features_extractor.cpython-310.pyc ADDED
Binary file (7.44 kB). View file
 
facility_location/agent/__pycache__/features_extractor.cpython-39.pyc ADDED
Binary file (7.46 kB). View file
 
facility_location/agent/__pycache__/ga.cpython-310.pyc ADDED
Binary file (3.2 kB). View file
 
facility_location/agent/__pycache__/ga.cpython-39.pyc ADDED
Binary file (3.19 kB). View file
 
facility_location/agent/__pycache__/heuristic.cpython-310.pyc ADDED
Binary file (3.07 kB). View file
 
facility_location/agent/__pycache__/heuristic.cpython-39.pyc ADDED
Binary file (3.12 kB). View file
 
facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc ADDED
Binary file (6.84 kB). View file
 
facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc ADDED
Binary file (6.86 kB). View file
 
facility_location/agent/__pycache__/policy.cpython-310.pyc ADDED
Binary file (6.36 kB). View file
 
facility_location/agent/__pycache__/policy.cpython-39.pyc ADDED
Binary file (6.29 kB). View file
 
facility_location/agent/__pycache__/solver.cpython-310.pyc ADDED
Binary file (1.5 kB). View file
 
facility_location/agent/__pycache__/solver.cpython-39.pyc ADDED
Binary file (1.5 kB). View file
 
facility_location/agent/features_extractor.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ from typing import Tuple
3
+
4
+ from gym import spaces
5
+ import torch as th
6
+ from torch import nn
7
+
8
+ from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
9
+ from stable_baselines3.common.type_aliases import TensorDict
10
+
11
+ import time
12
+
13
+
14
+ def mean_features(h: th.Tensor, mask: th.Tensor):
15
+ float_mask = mask.float()
16
+ mean_h = (h * float_mask.unsqueeze(-1)).sum(dim=1) / float_mask.sum(dim=1, keepdim=True)
17
+ return mean_h
18
+
19
+
20
+ # def compute_state(observations: TensorDict, h_nodes: th.Tensor):
21
+ # node_mask = observations['node_mask'].bool()
22
+ # mean_h_nodes = mean_features(h_nodes, node_mask)
23
+
24
+ # old_facility_mask = observations['old_facility_mask'].bool()
25
+ # h_old_facility = mean_features(h_nodes, old_facility_mask)
26
+ # h_old_facility_repeat = h_old_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
27
+ # state_policy_old_facility = th.cat([
28
+ # h_nodes,
29
+ # h_old_facility_repeat,
30
+ # h_nodes - h_old_facility_repeat,
31
+ # h_nodes * h_old_facility_repeat], dim=-1)
32
+
33
+ # new_facility_mask = observations['new_facility_mask'].bool()
34
+ # h_new_facility = mean_features(h_nodes, new_facility_mask)
35
+ # h_new_facility_repeat = h_new_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
36
+ # state_policy_new_facility = th.cat([
37
+ # h_nodes,
38
+ # h_new_facility_repeat,
39
+ # h_nodes - h_new_facility_repeat,
40
+
41
+ # state_value = th.cat([
42
+ # mean_h_nodes,
43
+ # h_old_facility,
44
+ # h_new_facility], dim=-1)
45
+
46
+ # return state_policy_old_facility, state_policy_new_facility, state_value, old_facility_mask, new_facility_mask
47
+
48
+ def compute_state(observations: TensorDict, h_edges: th.Tensor):
49
+ dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
50
+ mean_h_edges = mean_features(h_edges, dynamic_edge_mask)
51
+
52
+ state_policy_facility_pair = h_edges
53
+ state_value = mean_h_edges
54
+
55
+ return state_policy_facility_pair, state_value, dynamic_edge_mask
56
+
57
+
58
+ class FacilityLocationMLPExtractor(BaseFeaturesExtractor):
59
+ def __init__(
60
+ self,
61
+ observation_space: spaces.Dict,
62
+ hidden_units: Tuple = (32, 32),
63
+ ) -> None:
64
+ super().__init__(observation_space, features_dim=1)
65
+
66
+ self.node_mlp = self.create_mlp(observation_space.spaces['node_features'].shape[1], hidden_units)
67
+
68
+ @staticmethod
69
+ def create_mlp(input_dim: int, hidden_units: Tuple) -> nn.Sequential:
70
+ layers = OrderedDict()
71
+ for i, units in enumerate(hidden_units):
72
+ if i == 0:
73
+ layers[f'mlp-extractor-linear_{i}'] = nn.Linear(input_dim, units)
74
+ else:
75
+ layers[f'mlp-extractor-linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
76
+ layers[f'mlp-extractor-tanh_{i}'] = nn.Tanh()
77
+ return nn.Sequential(layers)
78
+
79
+ def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
80
+ node_features = observations['node_features']
81
+ h_nodes = self.node_mlp(node_features)
82
+ return compute_state(observations, h_nodes)
83
+
84
+ @staticmethod
85
+ def get_policy_feature_dim(node_dim: int) -> int:
86
+ return node_dim * 4
87
+
88
+ @staticmethod
89
+ def get_value_feature_dim(node_dim: int) -> int:
90
+ return node_dim * 3
91
+
92
+
93
+ class FacilityLocationGNNExtractor(BaseFeaturesExtractor):
94
+ def __init__(
95
+ self,
96
+ observation_space: spaces.Dict,
97
+ num_gnn_layers: int = 2,
98
+ node_dim: int = 32,
99
+ ) -> None:
100
+ super().__init__(observation_space, features_dim=1)
101
+
102
+ num_node_features = observation_space.spaces['node_features'].shape[1]
103
+ self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
104
+ self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
105
+ self.single_gnn_layer = self.create_gnn(1, node_dim)[0]
106
+
107
+ @staticmethod
108
+ def create_node_encoder(num_node_features: int, node_dim: int) -> nn.Sequential:
109
+ node_encoder = nn.Sequential(
110
+ nn.Linear(num_node_features, node_dim),
111
+ nn.Tanh())
112
+ return node_encoder
113
+
114
+ @staticmethod
115
+ def create_gnn(num_gnn_layers: int, node_dim: int) -> nn.ModuleList:
116
+ layers = nn.ModuleList()
117
+ for i in range(num_gnn_layers):
118
+ gnn_layer = nn.Sequential(
119
+ nn.Linear(node_dim, node_dim),
120
+ nn.Tanh())
121
+ layers.append(gnn_layer)
122
+ return layers
123
+
124
+ @staticmethod
125
+ def scatter_count(h_edges, indices, edge_mask, max_num_nodes):
126
+ batch_size = h_edges.shape[0]
127
+ num_latents = h_edges.shape[2]
128
+
129
+ h_nodes = th.zeros(batch_size, max_num_nodes, num_latents).to(h_edges.device)
130
+ count_edge = th.zeros_like(h_nodes)
131
+ count = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges.shape).float()
132
+
133
+ idx = indices.unsqueeze(-1).expand(-1, -1, num_latents)
134
+ h_nodes = h_nodes.scatter_add_(1, idx, h_edges)
135
+ count_edge = count_edge.scatter_add_(1, idx, count)
136
+ return h_nodes, count_edge
137
+
138
+ @staticmethod
139
+ def gather_to_edges(h_nodes, edge_index, edge_mask, gnn_layer):
140
+ h_nodes = gnn_layer(h_nodes)
141
+ h_edges_12 = th.gather(h_nodes, 1, edge_index[:, :, 0].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
142
+ h_edges_21 = th.gather(h_nodes, 1, edge_index[:, :, 1].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
143
+ mask = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges_12.shape)
144
+ h_edges_12 = th.where(mask, h_edges_12, th.zeros_like(h_edges_12))
145
+ h_edges_21 = th.where(mask, h_edges_21, th.zeros_like(h_edges_21))
146
+ return h_edges_12, h_edges_21
147
+
148
+ @classmethod
149
+ def scatter_to_nodes(cls, h_edges, edge_index, edge_mask, node_mask):
150
+ h_edges_12, h_edges_21 = h_edges
151
+ max_num_nodes = node_mask.shape[1]
152
+ h_nodes_1, count_1 = cls.scatter_count(h_edges_21, edge_index[:, :, 0], edge_mask, max_num_nodes)
153
+ h_nodes_2, count_2 = cls.scatter_count(h_edges_12, edge_index[:, :, 1], edge_mask, max_num_nodes)
154
+
155
+ h_nodes_sum = h_nodes_1 + h_nodes_2
156
+
157
+ mask = th.broadcast_to(node_mask.unsqueeze(-1), h_nodes_sum.shape)
158
+ count = count_1 + count_2
159
+ count_padding = th.ones_like(count)
160
+ count = th.where(mask, count, count_padding)
161
+
162
+ h_nodes = h_nodes_sum / count
163
+ return h_nodes
164
+
165
+ def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
166
+ t1 = time.time()
167
+ node_features = observations['node_features']
168
+ h_nodes = self.node_encoder(node_features)
169
+
170
+ edge_static_index = observations['static_adjacency_list'].long()
171
+ edge_dynamic_index = observations['dynamic_adjacency_list'].long()
172
+ node_mask = observations['node_mask'].bool()
173
+ static_edge_mask = observations['static_edge_mask'].bool()
174
+ dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
175
+ for gnn_layer in self.gnn_layers:
176
+ h_edges = self.gather_to_edges(h_nodes, edge_static_index, static_edge_mask, gnn_layer)
177
+ h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, static_edge_mask, node_mask)
178
+ h_nodes = h_nodes + h_nodes_new
179
+ h_edges12 , h_edges21 = self.gather_to_edges(h_nodes, edge_dynamic_index, dynamic_edge_mask, self.single_gnn_layer)
180
+ h_edges = th.cat([h_edges12, h_edges21], dim=-1)
181
+
182
+ t2 = time.time()
183
+ # print('cal embedding time:', t2-t1)
184
+
185
+ return compute_state(observations, h_edges)
186
+
187
+ @staticmethod
188
+ def get_policy_feature_dim(node_dim: int) -> int:
189
+ return node_dim * 2
190
+
191
+ @staticmethod
192
+ def get_value_feature_dim(node_dim: int) -> int:
193
+ return node_dim * 2
194
+
195
+
196
+ class FacilityLocationAttentionGNNExtractor(FacilityLocationGNNExtractor):
197
+ def __init__(
198
+ self,
199
+ observation_space: spaces.Dict,
200
+ num_gnn_layers: int = 2,
201
+ node_dim: int = 32,
202
+ ) -> None:
203
+ super().__init__(observation_space, num_gnn_layers, node_dim)
204
+
205
+ num_node_features = observation_space.spaces['node_features'].shape[1]
206
+ self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
207
+ self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
208
+ self.attention = nn.MultiheadAttention(node_dim, node_dim)
209
+
210
+ def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
211
+ node_features = observations['node_features']
212
+ h_nodes = self.node_encoder(node_features)
213
+
214
+ edge_static_index = observations['static_adjacency_list'].long()
215
+ edge_dynamic_index = observations['dynamic_adjacency_list'].long()
216
+ node_mask = observations['node_mask'].bool()
217
+ edge_mask = observations['edge_mask'].bool()
218
+ for gnn_layer in self.gnn_layers:
219
+ h_edges = self.gather_to_edges(h_nodes, edge_static_index, edge_mask, gnn_layer)
220
+ h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, edge_mask, node_mask)
221
+ h_nodes = h_nodes + h_nodes_new
222
+
223
+ h_nodes = self.attention(h_nodes, h_nodes, h_nodes)[0]
224
+
225
+ return compute_state(observations, h_nodes)
facility_location/agent/policy.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ from typing import Callable, Tuple, Text, Union
3
+ from collections import OrderedDict
4
+
5
+ import numpy as np
6
+ from gym import spaces
7
+ import torch as th
8
+ from torch import nn
9
+
10
+ from stable_baselines3.common.policies import ActorCriticPolicy
11
+ from stable_baselines3.common.utils import get_device
12
+ from stable_baselines3.common.type_aliases import Schedule
13
+
14
+
15
+ def create_mlp(head: Text, input_dim: int, hidden_units: Tuple) -> nn.Sequential:
16
+ layers = OrderedDict()
17
+ for i, units in enumerate(hidden_units):
18
+ if i == 0:
19
+ layers[f'{head}_linear_{i}'] = nn.Linear(input_dim, units)
20
+ else:
21
+ layers[f'{head}_linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
22
+ if i != len(hidden_units) - 1:
23
+ layers[f'{head}_tanh_{i}'] = nn.Tanh()
24
+ if head.startswith('policy'):
25
+ layers[f'{head}_flatten'] = nn.Flatten()
26
+ return nn.Sequential(layers)
27
+
28
+
29
+ class MaskedFacilityLocationNetwork(nn.Module):
30
+
31
+ def __init__(
32
+ self,
33
+ policy_feature_dim: int,
34
+ value_feature_dim: int,
35
+ policy_hidden_units: Tuple = (32, 32, 1),
36
+ value_hidden_units: Tuple = (32, 32, 1),
37
+ device: Union[th.device, Text] = "auto",
38
+ ):
39
+ super().__init__()
40
+ device = get_device(device)
41
+
42
+ # Policy network
43
+ # self.old_facility_policy_net = create_mlp('policy-old-facility',
44
+ # policy_feature_dim,
45
+ # policy_hidden_units).to(device)
46
+ # self.new_facility_policy_net = create_mlp('policy-new-facility',
47
+ # policy_feature_dim,
48
+ # policy_hidden_units).to(device)
49
+ self.pair_facility_policy_net = create_mlp('policy-pair-facility',
50
+ policy_feature_dim,
51
+ policy_hidden_units).to(device)
52
+ # Value network
53
+ self.value_net = create_mlp('value',
54
+ value_feature_dim,
55
+ value_hidden_units).to(device)
56
+
57
+ def forward(self,
58
+ features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
59
+ return self.forward_actor(features), self.forward_critic(features)
60
+
61
+ # def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
62
+ # state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
63
+
64
+ # old_facility_logits = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range)
65
+ # old_facility_padding = th.full_like(old_facility_mask, -th.inf, dtype=th.float32)
66
+ # masked_old_facility_logits = th.where(old_facility_mask, old_facility_logits, old_facility_padding)
67
+
68
+ # new_facility_logits = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range)
69
+ # new_facility_padding = th.full_like(new_facility_mask, -th.inf, dtype=th.float32)
70
+ # masked_new_facility_logits = th.where(new_facility_mask, new_facility_logits, new_facility_padding)
71
+
72
+ # masked_old_new_facility_logits = th.cat([masked_old_facility_logits, masked_new_facility_logits], dim=1)
73
+ # return masked_old_new_facility_logits
74
+
75
+ def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
76
+ state_policy_pair_facility, _, dynamic_edge_mask = features
77
+ pair_facility_logits = self.pair_facility_policy_net(state_policy_pair_facility)
78
+ pair_facility_padding = th.full_like(dynamic_edge_mask, -th.inf, dtype=th.float32)
79
+ masked_pair_facility_logits = th.where(dynamic_edge_mask, pair_facility_logits, pair_facility_padding)
80
+
81
+ return masked_pair_facility_logits
82
+
83
+ def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
84
+ _, state_value, _ = features
85
+ return self.value_net(state_value)
86
+
87
+
88
+ class POPSTARMaskedFacilityLocationNetwork(nn.Module):
89
+
90
+ def __init__(
91
+ self,
92
+ policy_feature_dim: int,
93
+ value_feature_dim: int,
94
+ policy_hidden_units: Tuple = (32, 32, 1),
95
+ value_hidden_units: Tuple = (32, 32, 1),
96
+ device: Union[th.device, Text] = "auto",
97
+ ):
98
+ super().__init__()
99
+ device = get_device(device)
100
+
101
+ # Policy network
102
+ self.old_facility_policy_net = create_mlp('policy-old-facility',
103
+ policy_feature_dim,
104
+ policy_hidden_units).to(device)
105
+ self.new_facility_policy_net = create_mlp('policy-new-facility',
106
+ policy_feature_dim,
107
+ policy_hidden_units).to(device)
108
+ self.old_new_facility_policy_net = create_mlp('policy-old-new-facility',
109
+ policy_feature_dim * 4,
110
+ policy_hidden_units).to(device)
111
+
112
+ # Value network
113
+ self.value_net = create_mlp('value',
114
+ value_feature_dim,
115
+ value_hidden_units).to(device)
116
+
117
+ def forward(self,
118
+ features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
119
+ return self.forward_actor(features), self.forward_critic(features)
120
+
121
+ def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
122
+ state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
123
+
124
+ node_range = old_facility_mask.shape[1]
125
+
126
+ loss = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range)
127
+ loss = loss.repeat_interleave(node_range, dim=1)
128
+
129
+ gain = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range)
130
+ gain = gain.repeat(1, node_range)
131
+
132
+
133
+ state_policy_old_facility_expand = state_policy_old_facility.unsqueeze(2).expand(-1, -1, node_range, -1)
134
+ state_policy_new_facility_expand = state_policy_new_facility.unsqueeze(1).expand(-1, node_range, -1, -1)
135
+ state_policy_old_new_facility = th.cat(
136
+ [
137
+ state_policy_old_facility_expand,
138
+ state_policy_new_facility_expand,
139
+ state_policy_old_facility_expand - state_policy_new_facility_expand,
140
+ state_policy_old_facility_expand * state_policy_new_facility_expand
141
+ ], dim=-1
142
+ )
143
+ extra = self.old_new_facility_policy_net(state_policy_old_new_facility) # (batch_size, node_range * node_range)
144
+
145
+ logits = gain - loss + extra
146
+
147
+ action_mask = th.logical_and(old_facility_mask.unsqueeze(2), new_facility_mask.unsqueeze(1)).flatten(start_dim=1)
148
+ padding = th.full_like(action_mask, -th.inf, dtype=th.float32)
149
+ masked_logits = th.where(action_mask, logits, padding)
150
+
151
+ return masked_logits
152
+
153
+ def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
154
+ _, _, state_value, _, _ = features
155
+ return self.value_net(state_value)
156
+
157
+
158
+ class MaskedFacilityLocationActorCriticPolicy(ActorCriticPolicy):
159
+ def __init__(
160
+ self,
161
+ observation_space: spaces.Space,
162
+ action_space: spaces.Space,
163
+ lr_schedule: Callable[[float], float],
164
+ *args,
165
+ **kwargs,
166
+ ):
167
+ self.policy_feature_dim = kwargs.pop('policy_feature_dim')
168
+ self.value_feature_dim = kwargs.pop('value_feature_dim')
169
+ self.policy_hidden_units = kwargs.pop('policy_hidden_units')
170
+ self.value_hidden_units = kwargs.pop('value_hidden_units')
171
+
172
+ self.popstar = kwargs.pop('popstar')
173
+
174
+ super().__init__(
175
+ observation_space,
176
+ action_space,
177
+ lr_schedule,
178
+ # Pass remaining arguments to base class
179
+ *args,
180
+ **kwargs,
181
+ )
182
+
183
+ def _build(self, lr_schedule: Schedule) -> None:
184
+ self._build_mlp_extractor()
185
+
186
+ self.action_net = nn.Identity()
187
+ self.value_net = nn.Identity()
188
+
189
+ # Init weights: use orthogonal initialization
190
+ # with small initial weight for the output
191
+ if self.ortho_init:
192
+ # TODO: check for features_extractor
193
+ # Values from stable-baselines.
194
+ # features_extractor/mlp values are
195
+ # originally from openai/baselines (default gains/init_scales).
196
+ module_gains = {
197
+ self.features_extractor: np.sqrt(2),
198
+ self.mlp_extractor: np.sqrt(2),
199
+ }
200
+ # if not self.share_features_extractor:
201
+ # # Note(antonin): this is to keep SB3 results
202
+ # # consistent, see GH#1148
203
+ # del module_gains[self.features_extractor]
204
+ # module_gains[self.pi_features_extractor] = np.sqrt(2)
205
+ # module_gains[self.vf_features_extractor] = np.sqrt(2)
206
+
207
+ for module, gain in module_gains.items():
208
+ module.apply(partial(self.init_weights, gain=gain))
209
+
210
+ # Setup optimizer with initial learning rate
211
+ self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
212
+
213
+ def _build_mlp_extractor(self) -> None:
214
+ if not self.popstar:
215
+ self.mlp_extractor = MaskedFacilityLocationNetwork(
216
+ self.policy_feature_dim,
217
+ self.value_feature_dim,
218
+ self.policy_hidden_units,
219
+ self.value_hidden_units,
220
+ self.device,
221
+ )
222
+ else:
223
+ self.mlp_extractor = POPSTARMaskedFacilityLocationNetwork(
224
+ self.policy_feature_dim,
225
+ self.value_feature_dim,
226
+ self.policy_hidden_units,
227
+ self.value_hidden_units,
228
+ self.device,
229
+ )
facility_location/agent/solver.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Text
2
+
3
+ import numpy as np
4
+ import pulp
5
+ from spopt.locate import PMedian
6
+
7
+ from facility_location.env import EvalPMPEnv
8
+
9
+
10
+ class PMPSolver:
11
+ def __init__(self, solver: Text, env: EvalPMPEnv):
12
+ if solver == 'GUROBI':
13
+ self._solver = pulp.GUROBI(msg=False)
14
+ elif solver == 'GUROBI_CMD':
15
+ self._solver = pulp.GUROBI_CMD(msg=False)
16
+ elif solver == 'PULP_CBC_CMD':
17
+ self._solver = pulp.PULP_CBC_CMD(msg=False)
18
+ elif solver == 'GLPK_CMD':
19
+ self._solver = pulp.GLPK_CMD(msg=False)
20
+ elif solver == 'MOSEK':
21
+ self._solver = pulp.MOSEK(msg=False)
22
+ else:
23
+ raise ValueError(f'Solver {solver} not supported.')
24
+
25
+ self.env = env
26
+
27
+ def solve(self):
28
+ _, demands, _, p = self.env.get_instance()
29
+ distance_matrix, _ = self.env.get_distance_and_cost()
30
+ pmedian_from_cost_matrix = PMedian.from_cost_matrix(distance_matrix, demands, p_facilities=p)
31
+ pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(self._solver)
32
+ solution = np.array([len(temp) > 0 for temp in pmedian_from_cost_matrix.fac2cli], dtype=bool)
33
+ return solution
facility_location/cfg/__init__.py ADDED
File without changes
facility_location/cfg/plot.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ env_specs:
3
+ region:
4
+ min_n: 20
5
+ max_n: 50
6
+ min_p_ratio: 0.1
7
+ max_p_ratio: 0.4
8
+ max_steps_scale: 0.5
9
+ tabu_time: 3
10
+ tabu_stable_steps_scale: 0.2
11
+ popstar: false
12
+
13
+ # evaluation
14
+ eval_specs:
15
+ region:
16
+ seed: 12345
17
+ max_nodes: 2488
18
+ max_edges: 5000
19
+ val_num_cases: 100
20
+ test_num_cases: 1
21
+ val_np: !!python/tuple [50,5]
22
+ test_np:
23
+ - !!python/tuple [2214,36]
24
+ - !!python/tuple [2214,189]
25
+ - !!python/tuple [2214,425]
26
+ # agent
27
+ agent_specs:
28
+ policy_feature_dim: 32
29
+ value_feature_dim: 32
30
+ policy_hidden_units: !!python/tuple [32, 32, 1]
31
+ value_hidden_units: !!python/tuple [32, 32, 1]
32
+
33
+ # mlp
34
+ mlp_specs:
35
+ hidden_units: !!python/tuple [32, 32]
36
+
37
+ gnn_specs:
38
+ num_gnn_layers: 2
39
+ node_dim: 32
40
+
41
+
42
+ # ts
43
+ ts_specs:
44
+ max_steps_scale: 2
45
+ stable_iterations_scale: 0.2
46
+
47
+
48
+ # popstar
49
+ popstar_specs:
50
+ graspit: 32
51
+ elite: 10
52
+
53
+
54
+ # ga
55
+ ga_specs:
56
+ num_generations: 100
57
+ num_parents_mating: 50
58
+ sol_per_pop: 100
59
+ parent_selection_type: sss
60
+ crossover_probability: 0.8
61
+ mutation_probability: 0.1
62
+
63
+
64
+
facility_location/env/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .pmp import PMPEnv, EvalPMPEnv, MULTIPMP
2
+
3
+ __all__ = ['PMPEnv', 'EvalPMPEnv', 'MULTIPMP']
facility_location/env/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (241 Bytes). View file
 
facility_location/env/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (242 Bytes). View file
 
facility_location/env/__pycache__/facility_location_client.cpython-310.pyc ADDED
Binary file (10.2 kB). View file
 
facility_location/env/__pycache__/facility_location_client.cpython-39.pyc ADDED
Binary file (10.2 kB). View file
 
facility_location/env/__pycache__/obs_extractor.cpython-310.pyc ADDED
Binary file (6.73 kB). View file
 
facility_location/env/__pycache__/obs_extractor.cpython-39.pyc ADDED
Binary file (6.75 kB). View file
 
facility_location/env/__pycache__/pmp.cpython-310.pyc ADDED
Binary file (19.7 kB). View file
 
facility_location/env/__pycache__/pmp.cpython-39.pyc ADDED
Binary file (17.8 kB). View file
 
facility_location/env/facility_location_client.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from typing import Tuple, Dict
3
+
4
+ import networkx as nx
5
+ import numpy as np
6
+ from geopandas import GeoDataFrame
7
+ from shapely.geometry import MultiPoint
8
+ from libpysal.weights.contiguity import Voronoi as Voronoi_weights
9
+ from sklearn.neighbors import kneighbors_graph
10
+ from sklearn.metrics import pairwise_distances
11
+
12
+ from facility_location.utils.config import Config
13
+ import time
14
+
15
+ class FacilityLocationClient:
16
+ def __init__(self, cfg: Config, rng: np.random.Generator):
17
+ self.cfg = cfg
18
+ self.rng = rng
19
+ self._cfg_tabu_time = cfg.env_specs['tabu_time']
20
+ self._t = 0
21
+
22
+ def set_instance(self, points: np.ndarray, demands: np.ndarray, n: int, p: int, real: bool) -> None:
23
+ self._points = points
24
+ self._demands = demands
25
+ points_geom = MultiPoint(points)
26
+ self._gdf = GeoDataFrame({
27
+ 'geometry': points_geom.geoms,
28
+ 'demand': demands,
29
+ })
30
+ self._n = n
31
+ self._p = p
32
+ self._old_facility_mask = np.zeros(self._n, dtype=bool)
33
+ self._new_facility_mask = np.zeros(self._n, dtype=bool)
34
+ self._construct_static_graph()
35
+
36
+ if real:
37
+ self._distance_matrix = pairwise_distances(points, metric='haversine')
38
+ else:
39
+ self._distance_matrix = pairwise_distances(points, metric='euclidean')
40
+ self._cost_matrix = self._distance_matrix * self._demands[:, None]
41
+ self._gain = np.zeros(self._n)
42
+ self._loss = np.zeros(self._n)
43
+ self._add_time = np.full(self._n, -np.inf)
44
+ self._drop_time = np.full(self._n, -np.inf)
45
+ self.reset_tabu_time()
46
+
47
+ def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
48
+ return self._points, self._demands, self._n, self._p
49
+
50
+ def get_distance_and_cost_matrix(self) -> Tuple[np.ndarray, np.ndarray]:
51
+ return self._distance_matrix, self._cost_matrix
52
+
53
+ def get_avg_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
54
+ avg_distance = self._distance_matrix.sum(axis=-1)/(self._n - 1)
55
+ avg_cost = self._cost_matrix.sum(axis=-1)/(self._n - 1)
56
+ return avg_distance, avg_cost
57
+
58
+ def _construct_static_graph(self) -> None:
59
+ self._connection_matrix = kneighbors_graph(self._points, n_neighbors=3, mode="connectivity").toarray()
60
+ self._static_graph = nx.from_numpy_matrix(self._connection_matrix)
61
+ self._static_edges = np.array(self._static_graph.edges(), dtype=np.int64)
62
+
63
+ def _construct_dynamic_graph(self) -> None:
64
+ t1 = time.time()
65
+ try:
66
+ solution_distace_min = np.partition(self._distance_matrix[:, self._solution][self._solution, :], 3, axis=-1)[:,2]
67
+ except:
68
+ raise ValueError('stop')
69
+ solution_distance_matrix = np.zeros((self._n, self._n))
70
+ solution_distance_matrix[:, self._solution] = solution_distace_min
71
+ solution_knearest_matrix = np.logical_and(self._distance_matrix < solution_distance_matrix, self._distance_matrix > 0)
72
+ old_tabu_mask, new_tabu_mask = self.get_tabu_mask(self._t)
73
+ solution_matrix = np.logical_and(np.logical_and(self._solution, old_tabu_mask)[:, None], (np.logical_and(~self._solution, new_tabu_mask)[None, :]))
74
+ solution_matrix = np.logical_or(solution_matrix, solution_matrix.T)
75
+ gainloss_matrix = np.logical_and((self._gain[:, None] > self._loss[None, :]), self._loss[None, :] > 0)
76
+ graph_matrix = np.logical_and(solution_matrix, np.logical_or(gainloss_matrix, solution_knearest_matrix))
77
+
78
+ if not np.any(graph_matrix):
79
+ if np.any(solution_matrix):
80
+ graph_matrix = solution_matrix
81
+ if not np.any(graph_matrix):
82
+ raise ValueError('Invalid graph_matrix')
83
+ else:
84
+ graph_matrix = self._solution[:, None] ^ self._solution[None, :]
85
+ self._dynamic_graph = nx.from_numpy_matrix(graph_matrix)
86
+ self._dynamic_edges = np.array(self._dynamic_graph.edges(), dtype=np.int64)
87
+
88
+ t2 = time.time()
89
+ # print('dynamic graph time:',t2-t1)
90
+
91
+
92
+ def get_static_adjacency_list(self) -> np.ndarray:
93
+ return self._static_edges
94
+
95
+ def get_dynamic_adjacency_list(self) -> np.ndarray:
96
+ return self._dynamic_edges
97
+
98
+ def compute_initial_solution(self) -> Tuple[float, np.ndarray]:
99
+ self._solution = np.zeros(self._n, dtype=bool)
100
+ p_0 = self._demands.argmax()
101
+ self._solution[p_0] = True
102
+ for _ in range(self._p - 1):
103
+ p_max_cost = self._cost_matrix[:, self._solution].min(axis=-1).argmax()
104
+ self._solution[p_max_cost] = True
105
+ self._init_gain_and_loss()
106
+ self._construct_dynamic_graph()
107
+ self._old_facility_mask = self._solution
108
+ self._new_facility_mask = ~self._solution
109
+ return self.compute_obj_value(), self._solution
110
+
111
+ def compute_obj_value(self) -> float:
112
+ obj_value = self._cost_matrix[:, self._solution].min(axis=-1).sum()
113
+ return obj_value
114
+
115
+ def compute_obj_value_from_solution(self, solution) -> float:
116
+ self._solution = solution
117
+ self._init_gain_and_loss()
118
+ self._construct_dynamic_graph()
119
+ obj_value = self.compute_obj_value()
120
+ return obj_value
121
+
122
+ # def swap(self, old_facility: int, new_facility: int, t: int) -> Tuple[float, np.ndarray, Dict]:
123
+ # if old_facility >= self._n or not self._solution[old_facility]:
124
+ # warn_msg = f'Old facility {old_facility} is not a facility of the current solution {self._solution}.'
125
+ # warnings.warn(warn_msg)
126
+ # old_facility = self.rng.choice(np.arange(self._n)[self._solution])
127
+ # if new_facility >= self._n or self._solution[new_facility]:
128
+ # warn_msg = f'New facility {new_facility} is already a facility of the current solution {self._solution}.'
129
+ # warnings.warn(warn_msg)
130
+ # new_facility = self.rng.choice(np.arange(self._n)[~self._solution])
131
+ # self._solution[old_facility] = False
132
+ # self._solution[new_facility] = True
133
+ # self._drop_time[old_facility] = t
134
+ # self._add_time[new_facility] = t
135
+ # self._t = t
136
+ # return self.compute_obj_value(), self._solution, {}
137
+
138
+ def swap(self, facility_pair_index: int, t: int) -> Tuple[float, np.ndarray, Dict]:
139
+ facility_pair = self._dynamic_edges[facility_pair_index]
140
+ facility1 = facility_pair[0]
141
+ facility2 = facility_pair[1]
142
+
143
+ if (not self._solution[facility1]) and (self._solution[facility2]):
144
+ new_facility = facility1
145
+ old_facility = facility2
146
+ elif (not self._solution[facility2]) and (self._solution[facility1]):
147
+ new_facility = facility2
148
+ old_facility = facility1
149
+ else:
150
+ raise ValueError('stop')
151
+
152
+ self._solution[old_facility] = False
153
+ self._solution[new_facility] = True
154
+ self._old_facility_mask[new_facility] = True
155
+ self._new_facility_mask[old_facility] = True
156
+ self._drop_time[old_facility] = t
157
+ self._add_time[new_facility] = t
158
+ self._t = t
159
+ self._update_env(new_facility, old_facility)
160
+ # print('st:',self._t)
161
+ return self.compute_obj_value(), self._solution, {}
162
+
163
+ def get_tabu_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
164
+ old_tabu_mask = self._add_time < t - self._drop_tabu_time
165
+ new_tabu_mask = self._drop_time < t - self._add_tabu_time
166
+ return old_tabu_mask, new_tabu_mask
167
+
168
+ def reset_tabu_time(self) -> None:
169
+ self._t = 0
170
+ if self._cfg_tabu_time <= 0:
171
+ self._add_tabu_time = 0
172
+ self._drop_tabu_time = 0
173
+ else:
174
+ self._add_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
175
+ self._drop_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
176
+
177
+ def get_current_solution(self) -> np.ndarray:
178
+ return self._solution
179
+
180
+ def set_solution(self, solution: np.ndarray) -> None:
181
+ self._solution = solution
182
+
183
+ def get_current_distance(self) -> np.ndarray:
184
+ dis2poi = self._distance_matrix[:, self._solution]
185
+ if self._p > 2:
186
+ dis = np.partition(dis2poi, 2, axis=-1)[:,:2]
187
+ else:
188
+ dis = dis2poi.min(axis=-1)
189
+ dis = np.stack([dis, dis], axis=-1)
190
+ return dis
191
+
192
+ def get_current_cost(self) -> np.ndarray:
193
+ cost2poi = self._cost_matrix[:, self._solution]
194
+ if self._p > 2:
195
+ cost = np.partition(cost2poi, 2, axis=-1)[:,:2]
196
+ else:
197
+ cost = cost2poi.min(axis=-1)
198
+ cost = np.stack([cost, cost], axis=-1)
199
+ return cost
200
+
201
+ def get_gain_and_loss(self) -> Tuple[np.ndarray, np.ndarray]:
202
+ return self._gain, self._loss
203
+
204
+ def get_gdf_facilities(self) -> Tuple[GeoDataFrame, np.ndarray]:
205
+ solution = self._solution
206
+ facilities = np.arange(self._n)[solution]
207
+ gdf = self._gdf.copy()
208
+ gdf['facility'] = False
209
+ gdf.loc[facilities, 'facility'] = True
210
+ node2facility = np.arange(self._n)[solution][self._cost_matrix[:, solution].argmin(axis=-1)]
211
+ gdf['assignment'] = node2facility
212
+ return gdf, facilities
213
+
214
+ def _init_env(self):
215
+ self._init_gain_and_loss()
216
+ self._construct_dynamic_graph()
217
+
218
+ def _update_env(self, insert_facility, remove_facility):
219
+ self._update_gain_and_loss(insert_facility, remove_facility)
220
+ self._construct_dynamic_graph()
221
+
222
+ def _init_gain_and_loss(self):
223
+ t1 = time.time()
224
+
225
+ for i in range(self._n):
226
+ _fake_solution = list(self._solution)
227
+ if self._solution[i]:
228
+ _fake_solution[i] = False
229
+ self._loss[i] = self._cost_matrix[:, _fake_solution].min(axis=-1).sum() - self._cost_matrix[:, self._solution].min(axis=-1).sum()
230
+ self._gain[i] = 0
231
+ else:
232
+ _fake_solution[i] = True
233
+ self._gain[i] = self._cost_matrix[:, self._solution].min(axis=-1).sum() - self._cost_matrix[:, _fake_solution].min(axis=-1).sum()
234
+ self._loss[i] = 0
235
+
236
+ self.argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
237
+ t2 = time.time()
238
+ # print('init gainloss time:',t2-t1)
239
+
240
+ def _update_gain_and_loss(self, insert_facility, remove_facility):
241
+
242
+ t1 = time.time()
243
+
244
+ _pre_solution = list(self._solution)
245
+ _pre_solution[insert_facility] = False
246
+ _pre_solution[remove_facility] = True
247
+ pre_closest_demands2solution = self._cost_matrix[:, _pre_solution][np.arange(self._n)[:, None], self.argpartition]
248
+ argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
249
+ closest_demands2solution = self._cost_matrix[:, self._solution][np.arange(self._n)[:, None], argpartition]
250
+
251
+ pre_solution_idx = np.where(_pre_solution)[0]
252
+ solution_idx = np.where(self._solution)[0]
253
+ for i in range(self._n):
254
+ if remove_facility in self.argpartition[i] or insert_facility in argpartition[i]:
255
+ self._loss[solution_idx[argpartition[i][0]]] += closest_demands2solution[i][1] - closest_demands2solution[i][0]
256
+ self._loss[pre_solution_idx[self.argpartition[i][0]]] -= pre_closest_demands2solution[i][1] - pre_closest_demands2solution[i][0]
257
+ # if self.argpartition[i][0] != argpartition[i][0]:
258
+ # for j in range(self._n):
259
+ # if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][argpartition[i][0]]:
260
+ # self._gain[j] += max(0, closest_demands2solution[i][0] - self._cost_matrix[i, j])
261
+ # if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][self.argpartition[i][0]]:
262
+ # self._gain[j] -= max(0, pre_closest_demands2solution[i][0] - self._cost_matrix[i, j])
263
+
264
+ self._loss[remove_facility] = 0
265
+ self._gain[insert_facility] = 0
266
+
267
+ self.argpartition = list(argpartition)
268
+ # print(self._gain, self._loss)
269
+ t2 = time.time()
270
+ # print('update gainloss time:',t2-t1)
271
+
272
+
273
+ def init_facility_mask(self, old_facility, new_facility):
274
+ self._old_facility_mask[old_facility] = True
275
+ self._new_facility_mask[new_facility] = True
276
+
277
+ def get_facility_mask(self):
278
+ return self._old_facility_mask, self._new_facility_mask
facility_location/env/obs_extractor.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Tuple, Text
2
+
3
+ import numpy as np
4
+
5
+ from facility_location.env.facility_location_client import FacilityLocationClient
6
+ from facility_location.utils.config import Config
7
+
8
+
9
+ class ObsExtractor:
10
+ def __init__(self, cfg: Config, flc: FacilityLocationClient, node_range: int, edge_range: int):
11
+ self.cfg = cfg
12
+ self._flc = flc
13
+ self._node_range = node_range
14
+ self._edge_range = edge_range
15
+
16
+ self._construct_virtual_node_feature()
17
+ self._construct_node_features()
18
+ self._construct_action_mask()
19
+
20
+ def _construct_virtual_node_feature(self) -> None:
21
+ virtual_node_facility = 0
22
+ virtual_node_distance_min = 0
23
+ virtual_node_distance_sub_min = 0
24
+ virtual_node_cost_min = 0
25
+ virtual_node_cost_sub_min = 0
26
+ virtual_gain = 0
27
+ virtual_loss = 0
28
+
29
+ virtual_node_x = 0.5
30
+ virtual_node_y = 0.5
31
+ virtual_node_demand = 1
32
+ virtual_node_avg_distance = 0
33
+ virtual_node_avg_cost = 0
34
+ self._virtual_dynamic_node_feature = np.array([
35
+ virtual_node_facility,
36
+ virtual_node_distance_min,
37
+ virtual_node_distance_sub_min,
38
+ virtual_node_cost_min,
39
+ virtual_node_cost_sub_min,
40
+ virtual_gain,
41
+ virtual_loss,
42
+ ], dtype=np.float32)
43
+ self._virtual_static_node_feature = np.array([
44
+ virtual_node_x,
45
+ virtual_node_y,
46
+ virtual_node_demand,
47
+ virtual_node_avg_distance,
48
+ virtual_node_avg_cost,
49
+ ], dtype=np.float32)
50
+ self._virtual_node_feature = np.concatenate([
51
+ self._virtual_dynamic_node_feature,
52
+ self._virtual_static_node_feature,
53
+ ], axis=-1)
54
+
55
+ def _construct_node_features(self) -> None:
56
+ self._node_features = np.zeros((self._node_range, self._virtual_node_feature.size), dtype=np.float32)
57
+
58
+ def _construct_action_mask(self) -> None:
59
+ self._old_facility_mask = np.full(self._node_range, False)
60
+ self._new_facility_mask = np.full(self._node_range, False)
61
+
62
+ def get_node_dim(self) -> int:
63
+ return self._virtual_node_feature.size
64
+
65
+ def reset(self) -> None:
66
+ self._compute_static_obs()
67
+ self._reset_node_features()
68
+ self._reset_action_mask()
69
+
70
+ def _compute_static_obs(self) -> None:
71
+ xy, demands, n, _ = self._flc.get_instance()
72
+ if n + 2 > self._node_range:
73
+ print(n, self._node_range)
74
+ # raise ValueError('The number of nodes exceeds the maximum limit.')
75
+ self._n = n
76
+ avg_distance, avg_cost = self._flc.get_avg_distance_and_cost()
77
+ avg_distance = avg_distance / np.max(avg_distance)
78
+ avg_cost = avg_cost / np.max(avg_cost)
79
+ self._static_node_features = np.stack([
80
+ xy[:, 0],
81
+ xy[:, 1],
82
+ demands,
83
+ avg_distance,
84
+ avg_cost,
85
+ ], axis=-1).astype(np.float32)
86
+ static_adjacency_list = self._flc.get_static_adjacency_list()
87
+
88
+ obs_node_mask = np.full(1 + n, True)
89
+ self._obs_node_mask = self._pad_mask(obs_node_mask, self._node_range, 'nodes')
90
+
91
+ obs_static_edge_mask = np.full(n + static_adjacency_list.shape[0], True)
92
+ self._obs_static_edge_mask = self._pad_mask(obs_static_edge_mask, self._edge_range, 'edges')
93
+
94
+ self._static_adjacency_list = self._pad_edge(static_adjacency_list)
95
+
96
+ def _reset_node_features(self) -> None:
97
+ self._node_features[:, :] = 0
98
+ self._node_features[0] = self._virtual_node_feature
99
+ self._node_features[1:self._n+1, len(self._virtual_dynamic_node_feature):] = self._static_node_features
100
+
101
+ def _reset_action_mask(self) -> None:
102
+ self._old_facility_mask[:] = False
103
+ self._new_facility_mask[:] = False
104
+
105
+ def get_obs(self, t: int) -> Dict:
106
+ obs_nodes, obs_static_edges, obs_dynamic_edges, \
107
+ obs_node_mask, obs_static_edge_mask, obs_dynamic_edges_mask = self._get_obs_graph()
108
+ obs = {
109
+ 'node_features': obs_nodes,
110
+ 'static_adjacency_list': obs_static_edges,
111
+ 'dynamic_adjacency_list': obs_dynamic_edges,
112
+ 'node_mask': obs_node_mask,
113
+ 'static_edge_mask': obs_static_edge_mask,
114
+ 'dynamic_edge_mask': obs_dynamic_edges_mask,
115
+ }
116
+
117
+ return obs
118
+
119
+ def _get_obs_graph(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
120
+ facility = self._flc.get_current_solution().astype(np.float32)
121
+ distance = self._flc.get_current_distance().astype(np.float32)
122
+ distance = distance / np.max(distance)
123
+ cost = self._flc.get_current_cost().astype(np.float32)
124
+ cost = cost / np.max(cost)
125
+ gain, loss = self._flc.get_gain_and_loss()
126
+ gain = gain / np.max(gain)
127
+ loss = loss / np.max(loss)
128
+ dynamic_node_features = np.stack([
129
+ facility,
130
+ distance[:,0],
131
+ distance[:,1],
132
+ cost[:,0],
133
+ cost[:,1],
134
+ gain,
135
+ loss,
136
+ ], axis=-1)
137
+ self._node_features[1:self._n+1, :len(self._virtual_dynamic_node_feature)] = dynamic_node_features
138
+ obs_nodes = self._node_features
139
+ obs_static_edges = self._static_adjacency_list
140
+ obs_dynamic_edges = self._flc.get_dynamic_adjacency_list()
141
+ # print(obs_dynamic_edges.shape)
142
+ obs_dynamic_edge_mask = np.full(obs_dynamic_edges.shape[0], True)
143
+ obs_node_mask = self._obs_node_mask
144
+ obs_static_edge_mask = self._obs_static_edge_mask
145
+ obs_dynamic_edges = self._pad_edge_wo_virtual(obs_dynamic_edges)
146
+ obs_dynamic_edge_mask = self._pad_mask(obs_dynamic_edge_mask, self._edge_range, 'edges')
147
+
148
+ return obs_nodes, obs_static_edges, obs_dynamic_edges, obs_node_mask, obs_static_edge_mask, obs_dynamic_edge_mask
149
+ # return obs_nodes, obs_static_edges, obs_node_mask, obs_edge_mask
150
+
151
+ def _get_obs_action_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
152
+ old_facility_mask, new_facility_mask = self._flc.get_facility_mask()
153
+ old_tabu_mask, new_tabu_mask = self._flc.get_tabu_mask(t)
154
+ self._old_facility_mask[1:self._n+1] = np.logical_and(old_facility_mask, old_tabu_mask)
155
+ self._new_facility_mask[1:self._n+1] = np.logical_and(new_facility_mask, new_tabu_mask)
156
+ obs_old_facility_mask = self._old_facility_mask
157
+ obs_new_facility_mask = self._new_facility_mask
158
+ if not np.any(obs_old_facility_mask) or not np.any(obs_new_facility_mask):
159
+ raise ValueError('The action mask is empty.')
160
+ return obs_old_facility_mask, obs_new_facility_mask
161
+
162
+ @staticmethod
163
+ def _pad_mask(mask: np.ndarray, max_num: int, name: Text) -> np.ndarray:
164
+ pad = (0, max_num - mask.size)
165
+ if pad[1] < 0:
166
+ raise ValueError(f'The number of {name} exceeds the maximum limit.')
167
+ return np.pad(mask, pad, mode='constant', constant_values=False)
168
+
169
+ def _pad_edge(self, edge: np.ndarray) -> np.ndarray:
170
+ virtual_edge = np.stack([np.zeros(self._n), np.arange(1, self._n + 1)], axis=-1).astype(np.int32)
171
+ edge = np.concatenate([virtual_edge, edge + 1], axis=0)
172
+ pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
173
+ if pad[0][1] < 0:
174
+ raise ValueError('The number of edges exceeds the maximum limit.')
175
+ return np.pad(edge, pad, mode='constant', constant_values=self._node_range - 1)
176
+
177
+ def _pad_edge_wo_virtual(self, edge: np.ndarray) -> np.ndarray:
178
+ pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
179
+ if pad[0][1] < 0:
180
+ print(self._edge_range, edge.shape[0])
181
+ raise ValueError('The number of edges exceeds the maximum limit.')
182
+
183
+ return np.pad(edge + 1, pad, mode='constant', constant_values=self._node_range - 1)
184
+
facility_location/env/pmp.py ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import warnings
3
+ from typing import Tuple, Dict, Optional, List, Text
4
+
5
+ import gym
6
+ import math
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import pickle, os
10
+
11
+ from numpy import ndarray
12
+
13
+ from facility_location.utils.config import Config
14
+ from facility_location.env.facility_location_client import FacilityLocationClient
15
+ from facility_location.env.obs_extractor import ObsExtractor
16
+ from stable_baselines3 import PPO
17
+ from stable_baselines3.common.vec_env import DummyVecEnv
18
+ from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
19
+ from facility_location.utils.policy import get_policy_kwargs
20
+
21
+
22
+ class PMPEnv(gym.Env):
23
+ EPSILON = 1e-6
24
+
25
+ def __init__(self,
26
+ cfg: Config):
27
+ self.cfg = cfg
28
+ self._train_region = cfg.env_specs['region']
29
+ self._eval_region = cfg.eval_specs['region']
30
+ self._min_n = cfg.env_specs['min_n']
31
+ self._max_n = cfg.env_specs['max_n']
32
+ self._min_p_ratio = cfg.env_specs['min_p_ratio']
33
+ self._max_p_ratio = cfg.env_specs['max_p_ratio']
34
+ self._max_steps_scale = cfg.env_specs['max_steps_scale']
35
+ self._tabu_stable_steps_scale = cfg.env_specs['tabu_stable_steps_scale']
36
+ self._popstar = cfg.env_specs['popstar']
37
+
38
+ self._seed(cfg.seed)
39
+
40
+ self._done = False
41
+
42
+ self._set_node_edge_range()
43
+
44
+ self._flc = FacilityLocationClient(cfg, self._np_random)
45
+ self._obs_extractor = ObsExtractor(cfg, self._flc, self._node_range, self._edge_range)
46
+
47
+ self._declare_spaces()
48
+
49
+ def _declare_spaces(self) -> None:
50
+ self.observation_space = gym.spaces.Dict({
51
+ 'node_features': gym.spaces.Box(low=0, high=1, shape=(self._node_range, self.get_node_feature_dim())),
52
+ 'static_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
53
+ 'dynamic_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
54
+ 'node_mask': gym.spaces.Box(low=0, high=1, shape=(self._node_range,), dtype=np.bool),
55
+ 'static_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
56
+ 'dynamic_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
57
+ })
58
+ if not self._popstar:
59
+ self.action_space = gym.spaces.Discrete(self._node_range ** 2)
60
+ else:
61
+ self.action_space = gym.spaces.Discrete(self._node_range ** 2)
62
+
63
+ def _set_node_edge_range(self) -> None:
64
+ self._node_range = self._max_n + 2
65
+ self._edge_range = int(self._max_n ** 2 * self._max_p_ratio)
66
+
67
+ def get_node_feature_dim(self) -> int:
68
+ return self._obs_extractor.get_node_dim()
69
+
70
+ def _seed(self, seed: int) -> None:
71
+ self._np_random = np.random.default_rng(seed)
72
+
73
+ def get_reward(self) -> float:
74
+ reward = self._obj_value[self._t - 1] - self._obj_value[self._t]
75
+ return reward
76
+
77
+ def _transform_action(self, action: np.ndarray) -> np.ndarray:
78
+ if self._popstar:
79
+ action = np.array(np.unravel_index(action, (self._node_range, self._node_range)))
80
+ action = action - 1
81
+ return action
82
+
83
+ def step(self, action: np.ndarray):
84
+ if self._done:
85
+ raise RuntimeError('Action taken after episode is done.')
86
+ obj_value, solution, info = self._flc.swap(action, self._t)
87
+ self._t += 1
88
+ self._done = (self._t == self._max_steps)
89
+ self._obj_value[self._t] = obj_value
90
+ self._solution[self._t] = solution
91
+ reward = self.get_reward()
92
+ if obj_value < self._best_obj_value - self.EPSILON:
93
+ self._best_obj_value = obj_value
94
+ self._best_solution = solution
95
+ self._last_best_t = self._t
96
+ elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
97
+ self._flc.reset_tabu_time()
98
+
99
+ # if self._done:
100
+ # print('done')
101
+ # for i in range(self._t):
102
+ # print(f'{i}:',np.where(self._solution[i]))
103
+
104
+ return self._get_obs(self._t), reward, self._done, False, info
105
+
106
+ def reset(self, seed = 0) -> Optional[Dict]:
107
+ if self._train_region is None:
108
+ points, demands, n, p = self._generate_new_instance()
109
+ self._flc.set_instance(points, demands, n, p, False)
110
+ else:
111
+ points, demands, n, p = self._use_real_instance()
112
+ self._flc.set_instance(points, demands, n, p, True)
113
+
114
+ return self.prepare(n, p), {}
115
+
116
+ def prepare(self, n: int, p: int) -> Dict:
117
+ initial_obj_value, initial_solution = self._flc.compute_initial_solution()
118
+ self._obs_extractor.reset()
119
+ self._done = False
120
+ self._t = 0
121
+ self._max_steps = max(int(p * self._max_steps_scale), 5)
122
+ self._obj_value = np.zeros(self._max_steps + 1)
123
+ self._obj_value[0] = initial_obj_value
124
+ self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
125
+ self._solution[0] = initial_solution
126
+ self._best_solution = initial_solution
127
+ self._best_obj_value = initial_obj_value
128
+ self._last_best_t = 0
129
+ self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
130
+ return self._get_obs(self._t)
131
+
132
+ def render(self, mode='human', dpi=300) -> Optional[np.ndarray]:
133
+ gdf, facilities = self._flc.get_gdf_facilities()
134
+ if len(facilities) > 10:
135
+ warnings.warn('Too many facilities to render. Only rendering the first 10.')
136
+ facilities = facilities[:10]
137
+
138
+ cm = plt.get_cmap('tab10')
139
+ fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=dpi)
140
+ for i, f in enumerate(facilities):
141
+ gdf.loc[gdf['assignment'] == f].plot(ax=axs[0],
142
+ zorder=2,
143
+ alpha=0.7,
144
+ edgecolor="k",
145
+ color=cm(i))
146
+ gdf.loc[[f]].plot(ax=axs[0],
147
+ marker='*',
148
+ markersize=300,
149
+ zorder=3,
150
+ alpha=0.7,
151
+ edgecolor="k",
152
+ color=cm(i))
153
+ axs[0].set_title("Facility Location", fontweight="bold")
154
+ plot_obj_value = self._obj_value[:self._t + 1]
155
+ axs[1].plot(plot_obj_value, marker='.', markersize=10, color='k')
156
+ axs[1].set_title("Objective Value", fontweight="bold")
157
+ axs[1].set_xticks(np.arange(self._max_steps + 1, step=math.ceil((self._max_steps + 1) / 10)))
158
+ fig.tight_layout()
159
+
160
+ if mode == 'human':
161
+ plt.show()
162
+
163
+ else:
164
+ io_buf = io.BytesIO()
165
+ fig.savefig(io_buf, format='raw', dpi=dpi)
166
+ io_buf.seek(0)
167
+ img_arr = np.reshape(np.frombuffer(io_buf.getvalue(), dtype=np.uint8),
168
+ newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1))
169
+ io_buf.close()
170
+ return img_arr
171
+
172
+ def close(self):
173
+ plt.close()
174
+
175
+ def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
176
+ n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
177
+ p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
178
+ p = int(max(n * p_ratio, 4))
179
+
180
+ points = self._np_random.uniform(size=(n, 2))
181
+ while np.unique(points, axis=0).shape[0] != n:
182
+ points = self._np_random.uniform(size=(n, 2))
183
+ demands = self._np_random.random(size=(n,))
184
+ return points, demands, n, p
185
+
186
+ def _use_real_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
187
+ data_path = './data/{}/pkl'.format(self.cfg.eval_specs['region'])
188
+ files = os.listdir(data_path)
189
+ files = [f for f in files if f.endswith('.pkl')]
190
+ sample_data_path = os.path.join(data_path, files[self._np_random.integers(len(files))])
191
+ with open(sample_data_path, 'rb') as f:
192
+ np_data = pickle.load(f)
193
+
194
+ n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
195
+ p = max(int(n * self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)), 4)
196
+ sample_cbgs = self._np_random.choice(list(np_data[1].keys()), n, replace=False)
197
+ points = []
198
+ demands = []
199
+ for cbg in sample_cbgs:
200
+ points.append(np_data[1][cbg]['pos'])
201
+ demands.append(np_data[1][cbg]['demand'])
202
+ points = np.array(points)
203
+ demands = np.array(demands)
204
+
205
+ return points, demands, n, p
206
+
207
+ def _get_obs(self, t: int) -> Dict:
208
+ return self._obs_extractor.get_obs(t)
209
+
210
+ def get_initial_solution(self) -> np.ndarray:
211
+ return self._solution[0]
212
+
213
+
214
+ class EvalPMPEnv(PMPEnv):
215
+ def __init__(self,
216
+ cfg: Config,
217
+ positions, demands, n, p, boost=False):
218
+ self._eval_np = (n,p)
219
+ self._eval_seed = cfg.eval_specs['seed']
220
+ self._boost = boost
221
+ print(self._boost)
222
+ self.points = positions
223
+ self.demands = demands
224
+ self._n = n
225
+ self._p = p
226
+
227
+ super().__init__(cfg)
228
+
229
+ def _set_node_edge_range(self) -> None:
230
+ n, p = self._eval_np
231
+
232
+ self._node_range = n + 2
233
+ self._edge_range = n * p
234
+
235
+ def get_eval_num_cases(self) -> int:
236
+ return self._eval_num_cases
237
+
238
+ def get_eval_np(self) -> Tuple[int, int]:
239
+ return self._eval_np
240
+
241
+ def reset_instance_id(self) -> None:
242
+ self._instance_id = 0
243
+
244
+ def step(self, action: np.ndarray):
245
+ if self._done:
246
+ raise RuntimeError('Action taken after episode is done.')
247
+ obj_value, solution, info = self._flc.swap(action, self._t)
248
+ self._t += 1
249
+ self._done = (self._t == self._max_steps)
250
+ self._obj_value[self._t] = obj_value
251
+ self._solution[self._t] = solution
252
+ reward = self.get_reward()
253
+ if obj_value < self._best_obj_value - self.EPSILON:
254
+ self._best_obj_value = obj_value
255
+ self._best_solution = solution
256
+ self._last_best_t = self._t
257
+ elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
258
+ self._flc.reset_tabu_time()
259
+ print(self._t, self._max_steps)
260
+
261
+ return self._get_obs(self._t), reward, self._done, False, info
262
+
263
+ def get_reward(self) -> float:
264
+ if self._done:
265
+ reward = -np.min(self._obj_value)
266
+ else:
267
+ reward = 0.0
268
+
269
+ return reward
270
+
271
+ def get_best_solution(self) -> np.ndarray:
272
+ return self._best_solution
273
+
274
+ def reset(self, seed = 0) -> Dict:
275
+ self._flc.set_instance(self.points, self.demands, self._n, self._p, False)
276
+ return self.prepare(self._n, self._p, self._boost), {}
277
+
278
+ def prepare(self, n: int, p: int, boost: bool) -> Dict:
279
+ initial_obj_value, initial_solution = self._flc.compute_initial_solution()
280
+ self._obs_extractor.reset()
281
+ self._done = False
282
+ self._t = 0
283
+ self._max_steps = max(int(p * self._max_steps_scale), 5)
284
+ if boost:
285
+ self._max_steps = max(int(self._max_steps_scale / 10), 5)
286
+ self._obj_value = np.zeros(self._max_steps + 1)
287
+ self._obj_value[0] = initial_obj_value
288
+ self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
289
+ self._solution[0] = initial_solution
290
+ self._best_solution = initial_solution
291
+ self._best_obj_value = initial_obj_value
292
+ self._last_best_t = 0
293
+ self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
294
+ return self._get_obs(self._t)
295
+
296
+ def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
297
+ points, demands, n, p = self._flc.get_instance()
298
+ return points, demands, n, p
299
+
300
+ def get_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
301
+ return self._flc.get_distance_and_cost_matrix()
302
+
303
+ def evaluate(self, solution: np.ndarray) -> float:
304
+ self._flc.set_solution(solution)
305
+ obj_value = self._flc.compute_obj_value()
306
+ return obj_value
307
+
308
+ class MULTIPMP(PMPEnv):
309
+ EPSILON = 1e-6
310
+ def __init__(self,
311
+ cfg,
312
+ data_npy,
313
+ boost = False):
314
+ self.cfg = cfg
315
+ self.data_npy = data_npy
316
+ self._boost = boost
317
+ self._all_points, self._all_demands, self._n, self._all_p = self._load_multi_facility_data(data_npy)
318
+ self.boost = boost
319
+ self._all_solutions = self._load_multi_facility_solutions(boost)
320
+ self._final_solutions = list(self._all_solutions)
321
+ self._num_types = len(self._all_p)
322
+ self._current_type = 0
323
+ self._all_max_steps, self._old_mask, self._new_mask = self._get_max_steps()
324
+ super().__init__(cfg)
325
+
326
+ def _set_node_edge_range(self) -> None:
327
+ self._node_range = self._n + 2
328
+ self._edge_range = self._n * max(self._all_p)
329
+
330
+ def step(self, action: np.ndarray):
331
+ if self._done:
332
+ raise RuntimeError('Action taken after episode is done.')
333
+ obj_value, solution, info = self._flc.swap(action, self._t)
334
+ self._t += 1
335
+ self._done = (self._t == self._all_max_steps[-1] and self._current_type == len(self._all_max_steps) - 1)
336
+ self._obj_value[self._t] = obj_value
337
+ self._solution[self._t] = solution
338
+ reward = self.get_reward()
339
+ if obj_value < self._best_obj_value - self.EPSILON:
340
+ self._best_obj_value = obj_value
341
+ self._best_solution = solution
342
+ self._last_best_t = self._t
343
+ elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
344
+ self._flc.reset_tabu_time()
345
+
346
+ if self._t == self._all_max_steps[self._current_type] and not self._done:
347
+ self._t = 0
348
+ self._multi_obj += obj_value
349
+ self._final_solutions[self._current_type] = solution
350
+ self._update_type()
351
+
352
+ if self._done:
353
+ pickle.dump(self._final_solutions, open('./facility_location/solutions.pkl', 'wb'))
354
+
355
+ return self._get_obs(self._t), reward, self._done, False, info
356
+
357
+ def reset(self, seed = 0) -> Optional[Dict]:
358
+ self._current_type = 0
359
+ points = self._all_points
360
+ demands = self._all_demands[:,0]
361
+ n = self._n
362
+ p = self._all_p[0]
363
+ solution = self._all_solutions[0]
364
+ self._multi_obj = 0
365
+
366
+ self._flc.set_instance(points, demands, n, p, True)
367
+
368
+ return self.prepare(n, p, solution), {}
369
+
370
+ def _update_type(self):
371
+ if self._current_type >= self._num_types:
372
+ raise RuntimeError('Action taken after episode is done.')
373
+ self._current_type += 1
374
+ if self._current_type < self._num_types - 1:
375
+ points = self._all_points
376
+ demands = self._all_demands[:,self._current_type]
377
+ n = self._n
378
+ p = self._all_p[self._current_type]
379
+ solution = self._all_solutions[self._current_type]
380
+ self._flc.set_instance(points, demands, n, p, True)
381
+ self.prepare(n, p, solution)
382
+
383
+ def prepare(self, n: int, p: int, solution: list) -> Dict:
384
+ initial_solution = solution
385
+ initial_obj_value = self._flc.compute_obj_value_from_solution(initial_solution)
386
+ self._obs_extractor.reset()
387
+ self._done = False
388
+ self._t = 0
389
+ self._max_steps = self._all_max_steps[self._current_type]
390
+ self._flc.init_facility_mask(self._old_mask[self._current_type], self._new_mask[self._current_type])
391
+ self._obj_value = np.zeros(self._max_steps + 1)
392
+ self._obj_value[0] = initial_obj_value
393
+ self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
394
+ self._solution[0] = initial_solution
395
+ self._best_solution = initial_solution
396
+ self._best_obj_value = initial_obj_value
397
+ self._last_best_t = 0
398
+ self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
399
+ return self._get_obs(self._t)
400
+
401
+ def _get_max_steps(self) -> list:
402
+ tmp_all_solitions = list(self._all_solutions)
403
+ count_true = [sum(s) for s in zip(*tmp_all_solitions)]
404
+ max_steps = []
405
+ old_idx = []
406
+ new_idx = []
407
+ for t in range(self._num_types):
408
+ old = [i for i in range(len(count_true)) if count_true[i] > 1 and tmp_all_solitions[t][i]]
409
+ new = [i for i in range(len(count_true)) if count_true[i] == 0]
410
+ if len(old):
411
+ old_idx.append(old)
412
+ new_idx.append(new)
413
+ max_steps.append(len(old))
414
+ for i in old:
415
+ count_true[i] = count_true[i] - 1
416
+ return max_steps, old_idx, new_idx
417
+
418
+ def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
419
+ n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
420
+ p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
421
+ p = int(max(n * p_ratio, 4))
422
+
423
+ points = self._np_random.uniform(size=(n, 2))
424
+ while np.unique(points, axis=0).shape[0] != n:
425
+ points = self._np_random.uniform(size=(n, 2))
426
+ demands = self._np_random.random(size=(n,))
427
+ return points, demands, n, p
428
+
429
+ def _load_multi_facility_data(self, data_npy) -> Tuple[np.ndarray, np.ndarray]:
430
+ data = data_npy.split('\n')
431
+ n = len(data)
432
+ p = int((len(data[0].split(' '))-2) / 2)
433
+
434
+ positions = []
435
+ demands = []
436
+ actual_facilities = []
437
+ ps = []
438
+ for row in data:
439
+ row = row.split(' ')
440
+ row = [x for x in row if len(x)]
441
+ positions.append([float(row[0]), float(row[1])])
442
+
443
+ demand = []
444
+ for i in range(2, 2+p):
445
+ demand.append(float(row[i]))
446
+ demands.append(demand)
447
+
448
+ actual_facility = []
449
+ for i in range(2+p, 2+2*p):
450
+ actual_facility.append(bool(int(float(row[i]))))
451
+ actual_facilities.append(actual_facility)
452
+
453
+ positions = np.array(positions)
454
+ positions = np.deg2rad(positions)
455
+ demands = np.array(demands)
456
+ actual_facilities = np.array(actual_facilities)
457
+ ps = actual_facilities.sum(axis=0)
458
+
459
+ return positions, demands, n, ps
460
+
461
+ def _load_multi_facility_solutions(self, boost) -> list:
462
+ def load_model(positions, demands, n, p, boost):
463
+ eval_env = EvalPMPEnv(self.cfg, positions, demands, n, p, boost)
464
+ eval_env = DummyVecEnv([lambda: eval_env])
465
+
466
+ policy_kwargs = get_policy_kwargs(self.cfg)
467
+ test_model = PPO(MaskedFacilityLocationActorCriticPolicy,
468
+ eval_env,
469
+ verbose=1,
470
+ policy_kwargs=policy_kwargs,
471
+ device='cuda:1')
472
+ train_model = PPO.load(self.cfg.load_model_path)
473
+ test_model.set_parameters(train_model.get_parameters())
474
+ return test_model, eval_env
475
+
476
+ def get_optimal_solution(model, eval_env):
477
+ obs = eval_env.reset()
478
+ done = False
479
+ while not done:
480
+ action, _ = model.predict(obs, deterministic=True)
481
+ obs, _, done, info = eval_env.step(action)
482
+ return eval_env.get_attr('_best_solution')[0]
483
+
484
+ multi_solutions = []
485
+ for i in range(len(self._all_p)):
486
+ positions = self._all_points
487
+ demands = self._all_demands[:,i]
488
+ n = self._n
489
+ p = self._all_p[i]
490
+ model, env = load_model(positions,demands,n,p,boost)
491
+ multi_solutions.append(get_optimal_solution(model, env))
492
+
493
+ return multi_solutions
494
+
495
+ def get_reward(self) -> float:
496
+ if self._done:
497
+ reward = np.min(self._obj_value)
498
+ else:
499
+ reward = 0.0
500
+ return reward
501
+
502
+
facility_location/multi_eval.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+
4
+ import setproctitle
5
+ from absl import app, flags
6
+ import time
7
+ import random
8
+ from typing import Tuple, Union, Text
9
+
10
+ import numpy as np
11
+ import torch as th
12
+
13
+ import sys
14
+ import gymnasium
15
+ sys.modules["gym"] = gymnasium
16
+
17
+ from stable_baselines3.common.evaluation import evaluate_policy
18
+ from stable_baselines3 import PPO
19
+ from stable_baselines3.common.monitor import Monitor
20
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecEnvWrapper
21
+
22
+ from facility_location.agent.solver import PMPSolver
23
+ from facility_location.env import EvalPMPEnv, MULTIPMP
24
+ from facility_location.utils import Config
25
+ from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
26
+ from facility_location.utils.policy import get_policy_kwargs
27
+
28
+ import warnings
29
+ warnings.filterwarnings('ignore')
30
+
31
+
32
+ AGENT = Union[PMPSolver, PPO]
33
+
34
+ def get_model(cfg: Config,
35
+ env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
36
+ device: str) -> PPO:
37
+ policy_kwargs = get_policy_kwargs(cfg)
38
+ model = PPO(MaskedFacilityLocationActorCriticPolicy,
39
+ env,
40
+ verbose=1,
41
+ policy_kwargs=policy_kwargs,
42
+ device=device)
43
+ return model
44
+
45
+
46
+ def get_agent(cfg: Config,
47
+ env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
48
+ model_path: Text) -> AGENT:
49
+ if cfg.agent in ['rl-mlp', 'rl-gnn', 'rl-agnn']:
50
+ test_model = get_model(cfg, env, device='cuda:0')
51
+ trained_model = PPO.load(model_path)
52
+ test_model.set_parameters(trained_model.get_parameters())
53
+ agent = test_model
54
+ else:
55
+ raise ValueError(f'Agent {cfg.agent} not supported.')
56
+ return agent
57
+
58
+
59
+ def evaluate(agent: AGENT,
60
+ env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
61
+ num_cases: int,
62
+ return_episode_rewards: bool):
63
+ if isinstance(agent, PPO):
64
+ return evaluate_ppo(agent, env, num_cases, return_episode_rewards=return_episode_rewards)
65
+ else:
66
+ raise ValueError(f'Agent {agent} not supported.')
67
+
68
+ from stable_baselines3.common.callbacks import BaseCallback
69
+
70
+
71
+ def evaluate_ppo(agent: PPO, env: EvalPMPEnv, num_cases: int, return_episode_rewards: bool) -> Tuple[float, float]:
72
+ rewards, _ = evaluate_policy(agent, env, n_eval_episodes=num_cases, return_episode_rewards=return_episode_rewards)
73
+ return rewards
74
+
75
+
76
+ def main(data_npy, boost=False):
77
+ th.manual_seed(0)
78
+ np.random.seed(0)
79
+ random.seed(0)
80
+ model_path = './facility_location/best_model.zip'
81
+
82
+ cfg = Config('plot', 0, False, '/data2/suhongyuan/flp', 'rl-gnn', model_path=model_path)
83
+
84
+ eval_env = MULTIPMP(cfg, data_npy, boost)
85
+ eval_env = Monitor(eval_env)
86
+ eval_env = DummyVecEnv([lambda: eval_env])
87
+ agent = get_agent(cfg, eval_env, model_path)
88
+ start_time = time.time()
89
+ _ = evaluate(agent, eval_env, 1, return_episode_rewards=True)
90
+ eval_time = time.time() - start_time
91
+ print(f'\t time: {eval_time}')
92
+
93
+
94
+ if __name__ == '__main__':
95
+ app.run(main)
96
+
facility_location/solutions.pkl ADDED
Binary file (1.92 kB). View file
 
facility_location/utils/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .config import Config
2
+
3
+ __all__ = ["Config"]
facility_location/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (209 Bytes). View file
 
facility_location/utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (210 Bytes). View file
 
facility_location/utils/__pycache__/config.cpython-310.pyc ADDED
Binary file (4.09 kB). View file
 
facility_location/utils/__pycache__/config.cpython-39.pyc ADDED
Binary file (4.61 kB). View file
 
facility_location/utils/__pycache__/policy.cpython-310.pyc ADDED
Binary file (1.47 kB). View file
 
facility_location/utils/__pycache__/policy.cpython-39.pyc ADDED
Binary file (1.47 kB). View file
 
facility_location/utils/config.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Text, Dict
3
+
4
+ from stable_baselines3.common.utils import get_latest_run_id
5
+
6
+ import yaml
7
+
8
+
9
+ class Config:
10
+
11
+ def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text,
12
+ agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None):
13
+ self.cfg_id = cfg_id
14
+ self.seed = global_seed
15
+ if cfg_dict is not None:
16
+ cfg = cfg_dict
17
+ else:
18
+ file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id)
19
+ class TupleSafeLoader(yaml.SafeLoader):
20
+ def construct_python_tuple(self, node):
21
+ return tuple(self.construct_sequence(node))
22
+ TupleSafeLoader.add_constructor(
23
+ u'tag:yaml.org,2002:python/tuple',
24
+ TupleSafeLoader.construct_python_tuple)
25
+ def load_yaml(file_path):
26
+ cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader)
27
+ return cfg
28
+ cfg = load_yaml(file_path)
29
+ # create dirs
30
+ self.root_dir = '/tmp/flp' if tmp else root_dir
31
+ self.agent = agent
32
+ self.multi = cfg.get('multi', False)
33
+
34
+ self.tb_log_path = os.path.join(self.root_dir, 'runs')
35
+ self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}'
36
+ latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name)
37
+ if not reset_num_timesteps:
38
+ # Continue training in the same directory
39
+ latest_run_id -= 1
40
+ self.cfg_dir = os.path.join(self.root_dir,
41
+ 'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}')
42
+ self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt')
43
+ self.best_model_path = os.path.join(self.cfg_dir, 'best-models')
44
+ self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models')
45
+ self.load_model_path = model_path
46
+
47
+
48
+ # env
49
+ self.env_specs = cfg.get('env_specs', dict())
50
+ self.reward_specs = cfg.get('reward_specs', dict())
51
+ self.obs_specs = cfg.get('obs_specs', dict())
52
+ self.eval_specs = cfg.get('eval_specs', dict())
53
+
54
+ # agent config
55
+ self.agent_specs = cfg.get('agent_specs', dict())
56
+ self.mlp_specs = cfg.get('mlp_specs', dict())
57
+ self.gnn_specs = cfg.get('gnn_specs', dict())
58
+ self.ts_specs = cfg.get('ts_specs', dict())
59
+ self.popstar_specs = cfg.get('popstar_specs', dict())
60
+ self.ga_specs = cfg.get('ga_specs', dict())
61
+
62
+ # training config
63
+ self.gamma = cfg.get('gamma', 0.99)
64
+ self.tau = cfg.get('tau', 0.95)
65
+ self.state_encoder_specs = cfg.get('state_encoder_specs', dict())
66
+ self.policy_specs = cfg.get('policy_specs', dict())
67
+ self.value_specs = cfg.get('value_specs', dict())
68
+ self.lr = cfg.get('lr', 4e-4)
69
+ self.weightdecay = cfg.get('weightdecay', 0.0)
70
+ self.eps = cfg.get('eps', 1e-5)
71
+ self.value_pred_coef = cfg.get('value_pred_coef', 0.5)
72
+ self.entropy_coef = cfg.get('entropy_coef', 0.01)
73
+ self.clip_epsilon = cfg.get('clip_epsilon', 0.2)
74
+ self.max_num_iterations = cfg.get('max_num_iterations', 1000)
75
+ self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000)
76
+ self.max_sequence_length = cfg.get('max_sequence_length', 100)
77
+ self.num_optim_epoch = cfg.get('num_optim_epoch', 4)
78
+ self.mini_batch_size = cfg.get('mini_batch_size', 1024)
79
+ self.save_model_interval = cfg.get('save_model_interval', 10)
80
+
81
+ def log(self, logger, tb_logger):
82
+ """Log cfg to logger and tensorboard."""
83
+ logger.info(f'id: {self.cfg_id}')
84
+ logger.info(f'seed: {self.seed}')
85
+ logger.info(f'env_specs: {self.env_specs}')
86
+ logger.info(f'reward_specs: {self.reward_specs}')
87
+ logger.info(f'obs_specs: {self.obs_specs}')
88
+ logger.info(f'agent_specs: {self.agent_specs}')
89
+ logger.info(f'gamma: {self.gamma}')
90
+ logger.info(f'tau: {self.tau}')
91
+ logger.info(f'state_encoder_specs: {self.state_encoder_specs}')
92
+ logger.info(f'policy_specs: {self.policy_specs}')
93
+ logger.info(f'value_specs: {self.value_specs}')
94
+ logger.info(f'lr: {self.lr}')
95
+ logger.info(f'weightdecay: {self.weightdecay}')
96
+ logger.info(f'eps: {self.eps}')
97
+ logger.info(f'value_pred_coef: {self.value_pred_coef}')
98
+ logger.info(f'entropy_coef: {self.entropy_coef}')
99
+ logger.info(f'clip_epsilon: {self.clip_epsilon}')
100
+ logger.info(f'max_num_iterations: {self.max_num_iterations}')
101
+ logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}')
102
+ logger.info(f'max_sequence_length: {self.max_sequence_length}')
103
+ logger.info(f'num_optim_epoch: {self.num_optim_epoch}')
104
+ logger.info(f'mini_batch_size: {self.mini_batch_size}')
105
+ logger.info(f'save_model_interval: {self.save_model_interval}')
106
+
107
+ if tb_logger is not None:
108
+ tb_logger.add_hparams(
109
+ hparam_dict={
110
+ 'id': self.cfg_id,
111
+ 'seed': self.seed,
112
+ 'env_specs': str(self.env_specs),
113
+ 'reward_specs': str(self.reward_specs),
114
+ 'obs_specs': str(self.obs_specs),
115
+ 'agent_specs': str(self.agent_specs),
116
+ 'gamma': self.gamma,
117
+ 'tau': self.tau,
118
+ 'state_encoder_specs': str(self.state_encoder_specs),
119
+ 'policy_specs': str(self.policy_specs),
120
+ 'value_specs': str(self.value_specs),
121
+ 'lr': self.lr,
122
+ 'weightdecay': self.weightdecay,
123
+ 'eps': self.eps,
124
+ 'value_pred_coef': self.value_pred_coef,
125
+ 'entropy_coef': self.entropy_coef,
126
+ 'clip_epsilon': self.clip_epsilon,
127
+ 'max_num_iterations': self.max_num_iterations,
128
+ 'num_episodes_per_iteration': self.num_episodes_per_iteration,
129
+ 'max_sequence_length': self.max_sequence_length,
130
+ 'num_optim_epoch': self.num_optim_epoch,
131
+ 'mini_batch_size': self.mini_batch_size,
132
+ 'save_model_interval': self.save_model_interval},
133
+ metric_dict={'hparam/placeholder': 0.0})
facility_location/utils/policy.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ from facility_location.agent import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
4
+ from facility_location.utils import Config
5
+
6
+
7
+ def get_policy_kwargs(cfg: Config) -> Dict:
8
+ if cfg.agent == 'rl-mlp':
9
+ hidden_units = cfg.mlp_specs.get('hidden_units', (32, 32))
10
+ node_dim = hidden_units[-1]
11
+ policy_feature_dim = FacilityLocationMLPExtractor.get_policy_feature_dim(node_dim)
12
+ value_feature_dim = FacilityLocationMLPExtractor.get_value_feature_dim(node_dim)
13
+ policy_kwargs = dict(
14
+ policy_feature_dim=policy_feature_dim,
15
+ value_feature_dim=value_feature_dim,
16
+ policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
17
+ value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
18
+ features_extractor_class=FacilityLocationMLPExtractor,
19
+ features_extractor_kwargs=dict(
20
+ hidden_units=hidden_units,),
21
+ popstar=cfg.env_specs.get('popstar', False),)
22
+
23
+ elif cfg.agent == 'rl-gnn':
24
+ num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
25
+ node_dim = cfg.gnn_specs.get('node_dim', 32)
26
+ policy_feature_dim = FacilityLocationGNNExtractor.get_policy_feature_dim(node_dim)
27
+ value_feature_dim = FacilityLocationGNNExtractor.get_value_feature_dim(node_dim)
28
+ policy_kwargs = dict(
29
+ policy_feature_dim=policy_feature_dim,
30
+ value_feature_dim=value_feature_dim,
31
+ policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
32
+ value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
33
+ features_extractor_class=FacilityLocationGNNExtractor,
34
+ features_extractor_kwargs=dict(
35
+ num_gnn_layers=num_gnn_layers,
36
+ node_dim=node_dim),
37
+ popstar=cfg.env_specs.get('popstar', False),)
38
+
39
+ elif cfg.agent == 'rl-agnn':
40
+ num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
41
+ node_dim = cfg.gnn_specs.get('node_dim', 32)
42
+ policy_feature_dim = FacilityLocationAttentionGNNExtractor.get_policy_feature_dim(node_dim)
43
+ value_feature_dim = FacilityLocationAttentionGNNExtractor.get_value_feature_dim(node_dim)
44
+ policy_kwargs = dict(
45
+ policy_feature_dim=policy_feature_dim,
46
+ value_feature_dim=value_feature_dim,
47
+ policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
48
+ value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
49
+ features_extractor_class=FacilityLocationAttentionGNNExtractor,
50
+ features_extractor_kwargs=dict(
51
+ num_gnn_layers=num_gnn_layers,
52
+ node_dim=node_dim),
53
+ popstar=cfg.env_specs.get('popstar', False),)
54
+
55
+ else:
56
+ raise NotImplementedError
57
+ return policy_kwargs
final_solutions.pkl ADDED
File without changes
model.pth DELETED
Binary file (24.3 kB)
 
model.py DELETED
@@ -1,24 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
-
5
-
6
- class Net(nn.Module):
7
- def __init__(self, input_size, hidden_size, num_classes):
8
- super(Net, self).__init__()
9
- self.fc1 = nn.Linear(input_size, hidden_size)
10
- self.relu = nn.ReLU()
11
- self.fc2 = nn.Linear(hidden_size, num_classes)
12
- self.softmax = nn.Softmax(dim=1)
13
-
14
- def forward(self, x):
15
- out = self.fc1(x)
16
- out = self.relu(out)
17
- out = self.fc2(out)
18
- out = self.softmax(out)
19
- return out
20
-
21
-
22
- if __name__ == '__main__':
23
- net = Net(100, 50, 10)
24
- torch.save(net.state_dict(), 'model.pth')