Spaces:
Runtime error
Runtime error
苏泓源
commited on
Commit
•
a257639
1
Parent(s):
c8cf824
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +92 -22
- facility_location/__init__.py +0 -0
- facility_location/__pycache__/__init__.cpython-39.pyc +0 -0
- facility_location/__pycache__/multi_eval.cpython-39.pyc +0 -0
- facility_location/agent/__init__.py +4 -0
- facility_location/agent/__pycache__/__init__.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/__init__.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/features_extractor.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/features_extractor.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/ga.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/ga.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/heuristic.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/heuristic.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/policy.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/policy.cpython-39.pyc +0 -0
- facility_location/agent/__pycache__/solver.cpython-310.pyc +0 -0
- facility_location/agent/__pycache__/solver.cpython-39.pyc +0 -0
- facility_location/agent/features_extractor.py +225 -0
- facility_location/agent/policy.py +229 -0
- facility_location/agent/solver.py +33 -0
- facility_location/cfg/__init__.py +0 -0
- facility_location/cfg/plot.yaml +64 -0
- facility_location/env/__init__.py +3 -0
- facility_location/env/__pycache__/__init__.cpython-310.pyc +0 -0
- facility_location/env/__pycache__/__init__.cpython-39.pyc +0 -0
- facility_location/env/__pycache__/facility_location_client.cpython-310.pyc +0 -0
- facility_location/env/__pycache__/facility_location_client.cpython-39.pyc +0 -0
- facility_location/env/__pycache__/obs_extractor.cpython-310.pyc +0 -0
- facility_location/env/__pycache__/obs_extractor.cpython-39.pyc +0 -0
- facility_location/env/__pycache__/pmp.cpython-310.pyc +0 -0
- facility_location/env/__pycache__/pmp.cpython-39.pyc +0 -0
- facility_location/env/facility_location_client.py +278 -0
- facility_location/env/obs_extractor.py +184 -0
- facility_location/env/pmp.py +502 -0
- facility_location/multi_eval.py +96 -0
- facility_location/solutions.pkl +0 -0
- facility_location/utils/__init__.py +3 -0
- facility_location/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- facility_location/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- facility_location/utils/__pycache__/config.cpython-310.pyc +0 -0
- facility_location/utils/__pycache__/config.cpython-39.pyc +0 -0
- facility_location/utils/__pycache__/policy.cpython-310.pyc +0 -0
- facility_location/utils/__pycache__/policy.cpython-39.pyc +0 -0
- facility_location/utils/config.py +133 -0
- facility_location/utils/policy.py +57 -0
- final_solutions.pkl +0 -0
- model.pth +0 -0
- model.py +0 -24
app.py
CHANGED
@@ -4,26 +4,98 @@ import plotly.graph_objects as go
|
|
4 |
import plotly.express as px
|
5 |
from sklearn.metrics import pairwise_distances
|
6 |
import torch
|
|
|
|
|
7 |
|
8 |
-
def plot_from_npy(npy_data):
|
9 |
-
fig = go.Figure()
|
10 |
|
11 |
-
fig.add_trace(go.Scatter(x=[1, 2, 3, 4], y=[10, 11, 12, 13], mode='lines', name='New York'))
|
12 |
-
fig.update_layout(title_text="Facility Distribution in Cities")
|
13 |
-
fig.update_xaxes(title_text="Time")
|
14 |
-
fig.update_yaxes(title_text="Facility Count")
|
15 |
-
|
16 |
-
|
17 |
-
actual_fig = fig # Replace this line with your actual_fig
|
18 |
-
solution_fig = fig # Replace this line with your solution_fig
|
19 |
-
|
20 |
-
return actual_fig, solution_fig
|
21 |
|
22 |
def solver_plot(data_npy, boost=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
actual_fig = go.Figure()
|
24 |
solution_fig = go.Figure()
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
return actual_fig, solution_fig, actual_ac, solution_ac
|
28 |
|
29 |
def demo_plot(city, facility):
|
@@ -104,7 +176,7 @@ def demo_plot(city, facility):
|
|
104 |
return actual_fig, solution_fig, actual_ac, solution_ac
|
105 |
|
106 |
|
107 |
-
def
|
108 |
data = data_npy.split('\n')
|
109 |
n = len(data)
|
110 |
p = int((len(data[0].split(' '))-2) / 2)
|
@@ -115,7 +187,6 @@ def solver_plot(data_npy, boost=False):
|
|
115 |
for row in data:
|
116 |
row = row.split(' ')
|
117 |
row = [x for x in row if len(x)]
|
118 |
-
print(row)
|
119 |
|
120 |
positions.append([float(row[0]), float(row[1])])
|
121 |
|
@@ -132,7 +203,6 @@ def solver_plot(data_npy, boost=False):
|
|
132 |
demands = np.array(demands)
|
133 |
actual_facilities = np.array(actual_facilities)
|
134 |
solution_facilities = ~actual_facilities
|
135 |
-
print(actual_facilities)
|
136 |
|
137 |
actual_fig = go.Figure()
|
138 |
solution_fig = go.Figure()
|
@@ -193,13 +263,13 @@ def solver_plot(data_npy, boost=False):
|
|
193 |
|
194 |
def get_example():
|
195 |
return [
|
196 |
-
('40.71 -73.93 213
|
197 |
-
("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.61 -73.95 189 264 1 0")
|
198 |
]
|
199 |
|
200 |
|
201 |
def load_npy_file(file_obj):
|
202 |
-
data = np.
|
203 |
string_array = '\n'.join([' '.join(map(str, row)) for row in data])
|
204 |
return string_array
|
205 |
|
@@ -231,8 +301,8 @@ with gr.Blocks() as demo:
|
|
231 |
gr.Examples(
|
232 |
examples=get_example(),
|
233 |
inputs=[data_npy],
|
234 |
-
fn=
|
235 |
-
outputs=[actual_map, solution_map],
|
236 |
)
|
237 |
with gr.Row():
|
238 |
boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)
|
|
|
4 |
import plotly.express as px
|
5 |
from sklearn.metrics import pairwise_distances
|
6 |
import torch
|
7 |
+
from facility_location import multi_eval
|
8 |
+
import pickle
|
9 |
|
|
|
|
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def solver_plot(data_npy, boost=False):
|
13 |
+
multi_eval.main(data_npy, boost)
|
14 |
+
all_solutions = pickle.loads(open('./facility_location/solutions.pkl', 'rb').read())
|
15 |
+
|
16 |
+
data = data_npy.split('\n')
|
17 |
+
n = len(data)
|
18 |
+
p = int((len(data[0].split(' '))-2) / 2)
|
19 |
+
|
20 |
+
positions = []
|
21 |
+
demands = []
|
22 |
+
actual_facilities = []
|
23 |
+
for row in data:
|
24 |
+
row = row.split(' ')
|
25 |
+
row = [x for x in row if len(x)]
|
26 |
+
|
27 |
+
positions.append([float(row[0]), float(row[1])])
|
28 |
+
|
29 |
+
demand = []
|
30 |
+
for i in range(2, 2+p):
|
31 |
+
demand.append(float(row[i]))
|
32 |
+
demands.append(demand)
|
33 |
+
|
34 |
+
actual_facility = []
|
35 |
+
for i in range(2+p, 2+2*p):
|
36 |
+
actual_facility.append(bool(int(float(row[i]))))
|
37 |
+
actual_facilities.append(actual_facility)
|
38 |
+
positions = np.array(positions)
|
39 |
+
demands = np.array(demands)
|
40 |
+
actual_facilities = np.array(actual_facilities)
|
41 |
+
solution_facilities = np.array(all_solutions).T
|
42 |
+
# print(solution_facilities)
|
43 |
+
# print(actual_facilities)
|
44 |
+
|
45 |
actual_fig = go.Figure()
|
46 |
solution_fig = go.Figure()
|
47 |
+
for i in range(p):
|
48 |
+
actual_fig.add_trace(go.Scattermapbox(
|
49 |
+
lat=positions[actual_facilities[:, i]][:, 0],
|
50 |
+
lon=positions[actual_facilities[:, i]][:, 1],
|
51 |
+
mode='markers',
|
52 |
+
marker=go.scattermapbox.Marker(
|
53 |
+
size=10,
|
54 |
+
color=px.colors.qualitative.Plotly[i]
|
55 |
+
),
|
56 |
+
name=f'Facility {i+1}'
|
57 |
+
))
|
58 |
+
solution_fig.add_trace(go.Scattermapbox(
|
59 |
+
lat=positions[solution_facilities[:, i]][:, 0],
|
60 |
+
lon=positions[solution_facilities[:, i]][:, 1],
|
61 |
+
mode='markers',
|
62 |
+
marker=go.scattermapbox.Marker(
|
63 |
+
size=10,
|
64 |
+
color=px.colors.qualitative.Plotly[i]
|
65 |
+
),
|
66 |
+
name=f'Facility {i+1}'
|
67 |
+
))
|
68 |
+
|
69 |
+
actual_fig.update_layout(
|
70 |
+
mapbox=dict(
|
71 |
+
style='carto-positron',
|
72 |
+
center=dict(lat=np.mean(positions[actual_facilities[:, i]][:, 0]), \
|
73 |
+
lon=np.mean(positions[actual_facilities[:, i]][:, 1])),
|
74 |
+
zoom=11.0
|
75 |
+
),
|
76 |
+
margin=dict(l=0, r=0, b=0, t=0),)
|
77 |
+
|
78 |
+
solution_fig.update_layout(
|
79 |
+
mapbox=dict(
|
80 |
+
style='carto-positron',
|
81 |
+
center=dict(lat=np.mean(positions[solution_facilities[:, i]][:, 0]), \
|
82 |
+
lon=np.mean(positions[solution_facilities[:, i]][:, 1])),
|
83 |
+
zoom=11.0
|
84 |
+
),
|
85 |
+
margin=dict(l=0, r=0, b=0, t=0),)
|
86 |
+
# show legend
|
87 |
+
actual_fig.update_layout(showlegend=True)
|
88 |
+
solution_fig.update_layout(showlegend=True)
|
89 |
+
|
90 |
+
positions = np.deg2rad(positions)
|
91 |
+
dist = pairwise_distances(positions, metric='haversine') * 6371
|
92 |
+
actual_ac = 0
|
93 |
+
solution_ac = 0
|
94 |
+
for i in range(p):
|
95 |
+
ac_matrix = dist * demands[:, i][:, None]
|
96 |
+
actual_ac += ac_matrix[:, actual_facilities[:, i]].min(axis=-1).sum()
|
97 |
+
solution_ac += ac_matrix[:, solution_facilities[:, i]].min(axis=-1).sum()
|
98 |
+
|
99 |
return actual_fig, solution_fig, actual_ac, solution_ac
|
100 |
|
101 |
def demo_plot(city, facility):
|
|
|
176 |
return actual_fig, solution_fig, actual_ac, solution_ac
|
177 |
|
178 |
|
179 |
+
def solver_plot1(data_npy, boost=False):
|
180 |
data = data_npy.split('\n')
|
181 |
n = len(data)
|
182 |
p = int((len(data[0].split(' '))-2) / 2)
|
|
|
187 |
for row in data:
|
188 |
row = row.split(' ')
|
189 |
row = [x for x in row if len(x)]
|
|
|
190 |
|
191 |
positions.append([float(row[0]), float(row[1])])
|
192 |
|
|
|
203 |
demands = np.array(demands)
|
204 |
actual_facilities = np.array(actual_facilities)
|
205 |
solution_facilities = ~actual_facilities
|
|
|
206 |
|
207 |
actual_fig = go.Figure()
|
208 |
solution_fig = go.Figure()
|
|
|
263 |
|
264 |
def get_example():
|
265 |
return [
|
266 |
+
('40.71 -73.93 213 1\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
|
267 |
+
("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.60 -73.92 129 214 1 0\n40.61 -73.95 189 264 0 1\n40.63 -73.94 124 164 1 0"),
|
268 |
]
|
269 |
|
270 |
|
271 |
def load_npy_file(file_obj):
|
272 |
+
data = np.loadtxt(file_obj.name)
|
273 |
string_array = '\n'.join([' '.join(map(str, row)) for row in data])
|
274 |
return string_array
|
275 |
|
|
|
301 |
gr.Examples(
|
302 |
examples=get_example(),
|
303 |
inputs=[data_npy],
|
304 |
+
fn=solver_plot1,
|
305 |
+
outputs=[actual_map, solution_map, actual_ac, solution_ac],
|
306 |
)
|
307 |
with gr.Row():
|
308 |
boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)
|
facility_location/__init__.py
ADDED
File without changes
|
facility_location/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (145 Bytes). View file
|
|
facility_location/__pycache__/multi_eval.cpython-39.pyc
ADDED
Binary file (3.13 kB). View file
|
|
facility_location/agent/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .policy import MaskedFacilityLocationActorCriticPolicy
|
2 |
+
from .features_extractor import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
|
3 |
+
|
4 |
+
__all__ = ['MaskedFacilityLocationActorCriticPolicy', 'FacilityLocationMLPExtractor', 'FacilityLocationGNNExtractor', 'FacilityLocationAttentionGNNExtractor']
|
facility_location/agent/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (419 Bytes). View file
|
|
facility_location/agent/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (420 Bytes). View file
|
|
facility_location/agent/__pycache__/features_extractor.cpython-310.pyc
ADDED
Binary file (7.44 kB). View file
|
|
facility_location/agent/__pycache__/features_extractor.cpython-39.pyc
ADDED
Binary file (7.46 kB). View file
|
|
facility_location/agent/__pycache__/ga.cpython-310.pyc
ADDED
Binary file (3.2 kB). View file
|
|
facility_location/agent/__pycache__/ga.cpython-39.pyc
ADDED
Binary file (3.19 kB). View file
|
|
facility_location/agent/__pycache__/heuristic.cpython-310.pyc
ADDED
Binary file (3.07 kB). View file
|
|
facility_location/agent/__pycache__/heuristic.cpython-39.pyc
ADDED
Binary file (3.12 kB). View file
|
|
facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc
ADDED
Binary file (6.84 kB). View file
|
|
facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc
ADDED
Binary file (6.86 kB). View file
|
|
facility_location/agent/__pycache__/policy.cpython-310.pyc
ADDED
Binary file (6.36 kB). View file
|
|
facility_location/agent/__pycache__/policy.cpython-39.pyc
ADDED
Binary file (6.29 kB). View file
|
|
facility_location/agent/__pycache__/solver.cpython-310.pyc
ADDED
Binary file (1.5 kB). View file
|
|
facility_location/agent/__pycache__/solver.cpython-39.pyc
ADDED
Binary file (1.5 kB). View file
|
|
facility_location/agent/features_extractor.py
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import OrderedDict
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
from gym import spaces
|
5 |
+
import torch as th
|
6 |
+
from torch import nn
|
7 |
+
|
8 |
+
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
|
9 |
+
from stable_baselines3.common.type_aliases import TensorDict
|
10 |
+
|
11 |
+
import time
|
12 |
+
|
13 |
+
|
14 |
+
def mean_features(h: th.Tensor, mask: th.Tensor):
|
15 |
+
float_mask = mask.float()
|
16 |
+
mean_h = (h * float_mask.unsqueeze(-1)).sum(dim=1) / float_mask.sum(dim=1, keepdim=True)
|
17 |
+
return mean_h
|
18 |
+
|
19 |
+
|
20 |
+
# def compute_state(observations: TensorDict, h_nodes: th.Tensor):
|
21 |
+
# node_mask = observations['node_mask'].bool()
|
22 |
+
# mean_h_nodes = mean_features(h_nodes, node_mask)
|
23 |
+
|
24 |
+
# old_facility_mask = observations['old_facility_mask'].bool()
|
25 |
+
# h_old_facility = mean_features(h_nodes, old_facility_mask)
|
26 |
+
# h_old_facility_repeat = h_old_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
|
27 |
+
# state_policy_old_facility = th.cat([
|
28 |
+
# h_nodes,
|
29 |
+
# h_old_facility_repeat,
|
30 |
+
# h_nodes - h_old_facility_repeat,
|
31 |
+
# h_nodes * h_old_facility_repeat], dim=-1)
|
32 |
+
|
33 |
+
# new_facility_mask = observations['new_facility_mask'].bool()
|
34 |
+
# h_new_facility = mean_features(h_nodes, new_facility_mask)
|
35 |
+
# h_new_facility_repeat = h_new_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
|
36 |
+
# state_policy_new_facility = th.cat([
|
37 |
+
# h_nodes,
|
38 |
+
# h_new_facility_repeat,
|
39 |
+
# h_nodes - h_new_facility_repeat,
|
40 |
+
|
41 |
+
# state_value = th.cat([
|
42 |
+
# mean_h_nodes,
|
43 |
+
# h_old_facility,
|
44 |
+
# h_new_facility], dim=-1)
|
45 |
+
|
46 |
+
# return state_policy_old_facility, state_policy_new_facility, state_value, old_facility_mask, new_facility_mask
|
47 |
+
|
48 |
+
def compute_state(observations: TensorDict, h_edges: th.Tensor):
|
49 |
+
dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
|
50 |
+
mean_h_edges = mean_features(h_edges, dynamic_edge_mask)
|
51 |
+
|
52 |
+
state_policy_facility_pair = h_edges
|
53 |
+
state_value = mean_h_edges
|
54 |
+
|
55 |
+
return state_policy_facility_pair, state_value, dynamic_edge_mask
|
56 |
+
|
57 |
+
|
58 |
+
class FacilityLocationMLPExtractor(BaseFeaturesExtractor):
|
59 |
+
def __init__(
|
60 |
+
self,
|
61 |
+
observation_space: spaces.Dict,
|
62 |
+
hidden_units: Tuple = (32, 32),
|
63 |
+
) -> None:
|
64 |
+
super().__init__(observation_space, features_dim=1)
|
65 |
+
|
66 |
+
self.node_mlp = self.create_mlp(observation_space.spaces['node_features'].shape[1], hidden_units)
|
67 |
+
|
68 |
+
@staticmethod
|
69 |
+
def create_mlp(input_dim: int, hidden_units: Tuple) -> nn.Sequential:
|
70 |
+
layers = OrderedDict()
|
71 |
+
for i, units in enumerate(hidden_units):
|
72 |
+
if i == 0:
|
73 |
+
layers[f'mlp-extractor-linear_{i}'] = nn.Linear(input_dim, units)
|
74 |
+
else:
|
75 |
+
layers[f'mlp-extractor-linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
|
76 |
+
layers[f'mlp-extractor-tanh_{i}'] = nn.Tanh()
|
77 |
+
return nn.Sequential(layers)
|
78 |
+
|
79 |
+
def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
|
80 |
+
node_features = observations['node_features']
|
81 |
+
h_nodes = self.node_mlp(node_features)
|
82 |
+
return compute_state(observations, h_nodes)
|
83 |
+
|
84 |
+
@staticmethod
|
85 |
+
def get_policy_feature_dim(node_dim: int) -> int:
|
86 |
+
return node_dim * 4
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def get_value_feature_dim(node_dim: int) -> int:
|
90 |
+
return node_dim * 3
|
91 |
+
|
92 |
+
|
93 |
+
class FacilityLocationGNNExtractor(BaseFeaturesExtractor):
|
94 |
+
def __init__(
|
95 |
+
self,
|
96 |
+
observation_space: spaces.Dict,
|
97 |
+
num_gnn_layers: int = 2,
|
98 |
+
node_dim: int = 32,
|
99 |
+
) -> None:
|
100 |
+
super().__init__(observation_space, features_dim=1)
|
101 |
+
|
102 |
+
num_node_features = observation_space.spaces['node_features'].shape[1]
|
103 |
+
self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
|
104 |
+
self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
|
105 |
+
self.single_gnn_layer = self.create_gnn(1, node_dim)[0]
|
106 |
+
|
107 |
+
@staticmethod
|
108 |
+
def create_node_encoder(num_node_features: int, node_dim: int) -> nn.Sequential:
|
109 |
+
node_encoder = nn.Sequential(
|
110 |
+
nn.Linear(num_node_features, node_dim),
|
111 |
+
nn.Tanh())
|
112 |
+
return node_encoder
|
113 |
+
|
114 |
+
@staticmethod
|
115 |
+
def create_gnn(num_gnn_layers: int, node_dim: int) -> nn.ModuleList:
|
116 |
+
layers = nn.ModuleList()
|
117 |
+
for i in range(num_gnn_layers):
|
118 |
+
gnn_layer = nn.Sequential(
|
119 |
+
nn.Linear(node_dim, node_dim),
|
120 |
+
nn.Tanh())
|
121 |
+
layers.append(gnn_layer)
|
122 |
+
return layers
|
123 |
+
|
124 |
+
@staticmethod
|
125 |
+
def scatter_count(h_edges, indices, edge_mask, max_num_nodes):
|
126 |
+
batch_size = h_edges.shape[0]
|
127 |
+
num_latents = h_edges.shape[2]
|
128 |
+
|
129 |
+
h_nodes = th.zeros(batch_size, max_num_nodes, num_latents).to(h_edges.device)
|
130 |
+
count_edge = th.zeros_like(h_nodes)
|
131 |
+
count = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges.shape).float()
|
132 |
+
|
133 |
+
idx = indices.unsqueeze(-1).expand(-1, -1, num_latents)
|
134 |
+
h_nodes = h_nodes.scatter_add_(1, idx, h_edges)
|
135 |
+
count_edge = count_edge.scatter_add_(1, idx, count)
|
136 |
+
return h_nodes, count_edge
|
137 |
+
|
138 |
+
@staticmethod
|
139 |
+
def gather_to_edges(h_nodes, edge_index, edge_mask, gnn_layer):
|
140 |
+
h_nodes = gnn_layer(h_nodes)
|
141 |
+
h_edges_12 = th.gather(h_nodes, 1, edge_index[:, :, 0].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
|
142 |
+
h_edges_21 = th.gather(h_nodes, 1, edge_index[:, :, 1].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
|
143 |
+
mask = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges_12.shape)
|
144 |
+
h_edges_12 = th.where(mask, h_edges_12, th.zeros_like(h_edges_12))
|
145 |
+
h_edges_21 = th.where(mask, h_edges_21, th.zeros_like(h_edges_21))
|
146 |
+
return h_edges_12, h_edges_21
|
147 |
+
|
148 |
+
@classmethod
|
149 |
+
def scatter_to_nodes(cls, h_edges, edge_index, edge_mask, node_mask):
|
150 |
+
h_edges_12, h_edges_21 = h_edges
|
151 |
+
max_num_nodes = node_mask.shape[1]
|
152 |
+
h_nodes_1, count_1 = cls.scatter_count(h_edges_21, edge_index[:, :, 0], edge_mask, max_num_nodes)
|
153 |
+
h_nodes_2, count_2 = cls.scatter_count(h_edges_12, edge_index[:, :, 1], edge_mask, max_num_nodes)
|
154 |
+
|
155 |
+
h_nodes_sum = h_nodes_1 + h_nodes_2
|
156 |
+
|
157 |
+
mask = th.broadcast_to(node_mask.unsqueeze(-1), h_nodes_sum.shape)
|
158 |
+
count = count_1 + count_2
|
159 |
+
count_padding = th.ones_like(count)
|
160 |
+
count = th.where(mask, count, count_padding)
|
161 |
+
|
162 |
+
h_nodes = h_nodes_sum / count
|
163 |
+
return h_nodes
|
164 |
+
|
165 |
+
def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
|
166 |
+
t1 = time.time()
|
167 |
+
node_features = observations['node_features']
|
168 |
+
h_nodes = self.node_encoder(node_features)
|
169 |
+
|
170 |
+
edge_static_index = observations['static_adjacency_list'].long()
|
171 |
+
edge_dynamic_index = observations['dynamic_adjacency_list'].long()
|
172 |
+
node_mask = observations['node_mask'].bool()
|
173 |
+
static_edge_mask = observations['static_edge_mask'].bool()
|
174 |
+
dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
|
175 |
+
for gnn_layer in self.gnn_layers:
|
176 |
+
h_edges = self.gather_to_edges(h_nodes, edge_static_index, static_edge_mask, gnn_layer)
|
177 |
+
h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, static_edge_mask, node_mask)
|
178 |
+
h_nodes = h_nodes + h_nodes_new
|
179 |
+
h_edges12 , h_edges21 = self.gather_to_edges(h_nodes, edge_dynamic_index, dynamic_edge_mask, self.single_gnn_layer)
|
180 |
+
h_edges = th.cat([h_edges12, h_edges21], dim=-1)
|
181 |
+
|
182 |
+
t2 = time.time()
|
183 |
+
# print('cal embedding time:', t2-t1)
|
184 |
+
|
185 |
+
return compute_state(observations, h_edges)
|
186 |
+
|
187 |
+
@staticmethod
|
188 |
+
def get_policy_feature_dim(node_dim: int) -> int:
|
189 |
+
return node_dim * 2
|
190 |
+
|
191 |
+
@staticmethod
|
192 |
+
def get_value_feature_dim(node_dim: int) -> int:
|
193 |
+
return node_dim * 2
|
194 |
+
|
195 |
+
|
196 |
+
class FacilityLocationAttentionGNNExtractor(FacilityLocationGNNExtractor):
|
197 |
+
def __init__(
|
198 |
+
self,
|
199 |
+
observation_space: spaces.Dict,
|
200 |
+
num_gnn_layers: int = 2,
|
201 |
+
node_dim: int = 32,
|
202 |
+
) -> None:
|
203 |
+
super().__init__(observation_space, num_gnn_layers, node_dim)
|
204 |
+
|
205 |
+
num_node_features = observation_space.spaces['node_features'].shape[1]
|
206 |
+
self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
|
207 |
+
self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
|
208 |
+
self.attention = nn.MultiheadAttention(node_dim, node_dim)
|
209 |
+
|
210 |
+
def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
|
211 |
+
node_features = observations['node_features']
|
212 |
+
h_nodes = self.node_encoder(node_features)
|
213 |
+
|
214 |
+
edge_static_index = observations['static_adjacency_list'].long()
|
215 |
+
edge_dynamic_index = observations['dynamic_adjacency_list'].long()
|
216 |
+
node_mask = observations['node_mask'].bool()
|
217 |
+
edge_mask = observations['edge_mask'].bool()
|
218 |
+
for gnn_layer in self.gnn_layers:
|
219 |
+
h_edges = self.gather_to_edges(h_nodes, edge_static_index, edge_mask, gnn_layer)
|
220 |
+
h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, edge_mask, node_mask)
|
221 |
+
h_nodes = h_nodes + h_nodes_new
|
222 |
+
|
223 |
+
h_nodes = self.attention(h_nodes, h_nodes, h_nodes)[0]
|
224 |
+
|
225 |
+
return compute_state(observations, h_nodes)
|
facility_location/agent/policy.py
ADDED
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import partial
|
2 |
+
from typing import Callable, Tuple, Text, Union
|
3 |
+
from collections import OrderedDict
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
from gym import spaces
|
7 |
+
import torch as th
|
8 |
+
from torch import nn
|
9 |
+
|
10 |
+
from stable_baselines3.common.policies import ActorCriticPolicy
|
11 |
+
from stable_baselines3.common.utils import get_device
|
12 |
+
from stable_baselines3.common.type_aliases import Schedule
|
13 |
+
|
14 |
+
|
15 |
+
def create_mlp(head: Text, input_dim: int, hidden_units: Tuple) -> nn.Sequential:
|
16 |
+
layers = OrderedDict()
|
17 |
+
for i, units in enumerate(hidden_units):
|
18 |
+
if i == 0:
|
19 |
+
layers[f'{head}_linear_{i}'] = nn.Linear(input_dim, units)
|
20 |
+
else:
|
21 |
+
layers[f'{head}_linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
|
22 |
+
if i != len(hidden_units) - 1:
|
23 |
+
layers[f'{head}_tanh_{i}'] = nn.Tanh()
|
24 |
+
if head.startswith('policy'):
|
25 |
+
layers[f'{head}_flatten'] = nn.Flatten()
|
26 |
+
return nn.Sequential(layers)
|
27 |
+
|
28 |
+
|
29 |
+
class MaskedFacilityLocationNetwork(nn.Module):
|
30 |
+
|
31 |
+
def __init__(
|
32 |
+
self,
|
33 |
+
policy_feature_dim: int,
|
34 |
+
value_feature_dim: int,
|
35 |
+
policy_hidden_units: Tuple = (32, 32, 1),
|
36 |
+
value_hidden_units: Tuple = (32, 32, 1),
|
37 |
+
device: Union[th.device, Text] = "auto",
|
38 |
+
):
|
39 |
+
super().__init__()
|
40 |
+
device = get_device(device)
|
41 |
+
|
42 |
+
# Policy network
|
43 |
+
# self.old_facility_policy_net = create_mlp('policy-old-facility',
|
44 |
+
# policy_feature_dim,
|
45 |
+
# policy_hidden_units).to(device)
|
46 |
+
# self.new_facility_policy_net = create_mlp('policy-new-facility',
|
47 |
+
# policy_feature_dim,
|
48 |
+
# policy_hidden_units).to(device)
|
49 |
+
self.pair_facility_policy_net = create_mlp('policy-pair-facility',
|
50 |
+
policy_feature_dim,
|
51 |
+
policy_hidden_units).to(device)
|
52 |
+
# Value network
|
53 |
+
self.value_net = create_mlp('value',
|
54 |
+
value_feature_dim,
|
55 |
+
value_hidden_units).to(device)
|
56 |
+
|
57 |
+
def forward(self,
|
58 |
+
features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
|
59 |
+
return self.forward_actor(features), self.forward_critic(features)
|
60 |
+
|
61 |
+
# def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
|
62 |
+
# state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
|
63 |
+
|
64 |
+
# old_facility_logits = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range)
|
65 |
+
# old_facility_padding = th.full_like(old_facility_mask, -th.inf, dtype=th.float32)
|
66 |
+
# masked_old_facility_logits = th.where(old_facility_mask, old_facility_logits, old_facility_padding)
|
67 |
+
|
68 |
+
# new_facility_logits = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range)
|
69 |
+
# new_facility_padding = th.full_like(new_facility_mask, -th.inf, dtype=th.float32)
|
70 |
+
# masked_new_facility_logits = th.where(new_facility_mask, new_facility_logits, new_facility_padding)
|
71 |
+
|
72 |
+
# masked_old_new_facility_logits = th.cat([masked_old_facility_logits, masked_new_facility_logits], dim=1)
|
73 |
+
# return masked_old_new_facility_logits
|
74 |
+
|
75 |
+
def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
|
76 |
+
state_policy_pair_facility, _, dynamic_edge_mask = features
|
77 |
+
pair_facility_logits = self.pair_facility_policy_net(state_policy_pair_facility)
|
78 |
+
pair_facility_padding = th.full_like(dynamic_edge_mask, -th.inf, dtype=th.float32)
|
79 |
+
masked_pair_facility_logits = th.where(dynamic_edge_mask, pair_facility_logits, pair_facility_padding)
|
80 |
+
|
81 |
+
return masked_pair_facility_logits
|
82 |
+
|
83 |
+
def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
|
84 |
+
_, state_value, _ = features
|
85 |
+
return self.value_net(state_value)
|
86 |
+
|
87 |
+
|
88 |
+
class POPSTARMaskedFacilityLocationNetwork(nn.Module):
|
89 |
+
|
90 |
+
def __init__(
|
91 |
+
self,
|
92 |
+
policy_feature_dim: int,
|
93 |
+
value_feature_dim: int,
|
94 |
+
policy_hidden_units: Tuple = (32, 32, 1),
|
95 |
+
value_hidden_units: Tuple = (32, 32, 1),
|
96 |
+
device: Union[th.device, Text] = "auto",
|
97 |
+
):
|
98 |
+
super().__init__()
|
99 |
+
device = get_device(device)
|
100 |
+
|
101 |
+
# Policy network
|
102 |
+
self.old_facility_policy_net = create_mlp('policy-old-facility',
|
103 |
+
policy_feature_dim,
|
104 |
+
policy_hidden_units).to(device)
|
105 |
+
self.new_facility_policy_net = create_mlp('policy-new-facility',
|
106 |
+
policy_feature_dim,
|
107 |
+
policy_hidden_units).to(device)
|
108 |
+
self.old_new_facility_policy_net = create_mlp('policy-old-new-facility',
|
109 |
+
policy_feature_dim * 4,
|
110 |
+
policy_hidden_units).to(device)
|
111 |
+
|
112 |
+
# Value network
|
113 |
+
self.value_net = create_mlp('value',
|
114 |
+
value_feature_dim,
|
115 |
+
value_hidden_units).to(device)
|
116 |
+
|
117 |
+
def forward(self,
|
118 |
+
features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
|
119 |
+
return self.forward_actor(features), self.forward_critic(features)
|
120 |
+
|
121 |
+
def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
|
122 |
+
state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
|
123 |
+
|
124 |
+
node_range = old_facility_mask.shape[1]
|
125 |
+
|
126 |
+
loss = self.old_facility_policy_net(state_policy_old_facility) # (batch_size, node_range)
|
127 |
+
loss = loss.repeat_interleave(node_range, dim=1)
|
128 |
+
|
129 |
+
gain = self.new_facility_policy_net(state_policy_new_facility) # (batch_size, node_range)
|
130 |
+
gain = gain.repeat(1, node_range)
|
131 |
+
|
132 |
+
|
133 |
+
state_policy_old_facility_expand = state_policy_old_facility.unsqueeze(2).expand(-1, -1, node_range, -1)
|
134 |
+
state_policy_new_facility_expand = state_policy_new_facility.unsqueeze(1).expand(-1, node_range, -1, -1)
|
135 |
+
state_policy_old_new_facility = th.cat(
|
136 |
+
[
|
137 |
+
state_policy_old_facility_expand,
|
138 |
+
state_policy_new_facility_expand,
|
139 |
+
state_policy_old_facility_expand - state_policy_new_facility_expand,
|
140 |
+
state_policy_old_facility_expand * state_policy_new_facility_expand
|
141 |
+
], dim=-1
|
142 |
+
)
|
143 |
+
extra = self.old_new_facility_policy_net(state_policy_old_new_facility) # (batch_size, node_range * node_range)
|
144 |
+
|
145 |
+
logits = gain - loss + extra
|
146 |
+
|
147 |
+
action_mask = th.logical_and(old_facility_mask.unsqueeze(2), new_facility_mask.unsqueeze(1)).flatten(start_dim=1)
|
148 |
+
padding = th.full_like(action_mask, -th.inf, dtype=th.float32)
|
149 |
+
masked_logits = th.where(action_mask, logits, padding)
|
150 |
+
|
151 |
+
return masked_logits
|
152 |
+
|
153 |
+
def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
|
154 |
+
_, _, state_value, _, _ = features
|
155 |
+
return self.value_net(state_value)
|
156 |
+
|
157 |
+
|
158 |
+
class MaskedFacilityLocationActorCriticPolicy(ActorCriticPolicy):
|
159 |
+
def __init__(
|
160 |
+
self,
|
161 |
+
observation_space: spaces.Space,
|
162 |
+
action_space: spaces.Space,
|
163 |
+
lr_schedule: Callable[[float], float],
|
164 |
+
*args,
|
165 |
+
**kwargs,
|
166 |
+
):
|
167 |
+
self.policy_feature_dim = kwargs.pop('policy_feature_dim')
|
168 |
+
self.value_feature_dim = kwargs.pop('value_feature_dim')
|
169 |
+
self.policy_hidden_units = kwargs.pop('policy_hidden_units')
|
170 |
+
self.value_hidden_units = kwargs.pop('value_hidden_units')
|
171 |
+
|
172 |
+
self.popstar = kwargs.pop('popstar')
|
173 |
+
|
174 |
+
super().__init__(
|
175 |
+
observation_space,
|
176 |
+
action_space,
|
177 |
+
lr_schedule,
|
178 |
+
# Pass remaining arguments to base class
|
179 |
+
*args,
|
180 |
+
**kwargs,
|
181 |
+
)
|
182 |
+
|
183 |
+
def _build(self, lr_schedule: Schedule) -> None:
|
184 |
+
self._build_mlp_extractor()
|
185 |
+
|
186 |
+
self.action_net = nn.Identity()
|
187 |
+
self.value_net = nn.Identity()
|
188 |
+
|
189 |
+
# Init weights: use orthogonal initialization
|
190 |
+
# with small initial weight for the output
|
191 |
+
if self.ortho_init:
|
192 |
+
# TODO: check for features_extractor
|
193 |
+
# Values from stable-baselines.
|
194 |
+
# features_extractor/mlp values are
|
195 |
+
# originally from openai/baselines (default gains/init_scales).
|
196 |
+
module_gains = {
|
197 |
+
self.features_extractor: np.sqrt(2),
|
198 |
+
self.mlp_extractor: np.sqrt(2),
|
199 |
+
}
|
200 |
+
# if not self.share_features_extractor:
|
201 |
+
# # Note(antonin): this is to keep SB3 results
|
202 |
+
# # consistent, see GH#1148
|
203 |
+
# del module_gains[self.features_extractor]
|
204 |
+
# module_gains[self.pi_features_extractor] = np.sqrt(2)
|
205 |
+
# module_gains[self.vf_features_extractor] = np.sqrt(2)
|
206 |
+
|
207 |
+
for module, gain in module_gains.items():
|
208 |
+
module.apply(partial(self.init_weights, gain=gain))
|
209 |
+
|
210 |
+
# Setup optimizer with initial learning rate
|
211 |
+
self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
|
212 |
+
|
213 |
+
def _build_mlp_extractor(self) -> None:
|
214 |
+
if not self.popstar:
|
215 |
+
self.mlp_extractor = MaskedFacilityLocationNetwork(
|
216 |
+
self.policy_feature_dim,
|
217 |
+
self.value_feature_dim,
|
218 |
+
self.policy_hidden_units,
|
219 |
+
self.value_hidden_units,
|
220 |
+
self.device,
|
221 |
+
)
|
222 |
+
else:
|
223 |
+
self.mlp_extractor = POPSTARMaskedFacilityLocationNetwork(
|
224 |
+
self.policy_feature_dim,
|
225 |
+
self.value_feature_dim,
|
226 |
+
self.policy_hidden_units,
|
227 |
+
self.value_hidden_units,
|
228 |
+
self.device,
|
229 |
+
)
|
facility_location/agent/solver.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Text
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import pulp
|
5 |
+
from spopt.locate import PMedian
|
6 |
+
|
7 |
+
from facility_location.env import EvalPMPEnv
|
8 |
+
|
9 |
+
|
10 |
+
class PMPSolver:
|
11 |
+
def __init__(self, solver: Text, env: EvalPMPEnv):
|
12 |
+
if solver == 'GUROBI':
|
13 |
+
self._solver = pulp.GUROBI(msg=False)
|
14 |
+
elif solver == 'GUROBI_CMD':
|
15 |
+
self._solver = pulp.GUROBI_CMD(msg=False)
|
16 |
+
elif solver == 'PULP_CBC_CMD':
|
17 |
+
self._solver = pulp.PULP_CBC_CMD(msg=False)
|
18 |
+
elif solver == 'GLPK_CMD':
|
19 |
+
self._solver = pulp.GLPK_CMD(msg=False)
|
20 |
+
elif solver == 'MOSEK':
|
21 |
+
self._solver = pulp.MOSEK(msg=False)
|
22 |
+
else:
|
23 |
+
raise ValueError(f'Solver {solver} not supported.')
|
24 |
+
|
25 |
+
self.env = env
|
26 |
+
|
27 |
+
def solve(self):
|
28 |
+
_, demands, _, p = self.env.get_instance()
|
29 |
+
distance_matrix, _ = self.env.get_distance_and_cost()
|
30 |
+
pmedian_from_cost_matrix = PMedian.from_cost_matrix(distance_matrix, demands, p_facilities=p)
|
31 |
+
pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(self._solver)
|
32 |
+
solution = np.array([len(temp) > 0 for temp in pmedian_from_cost_matrix.fac2cli], dtype=bool)
|
33 |
+
return solution
|
facility_location/cfg/__init__.py
ADDED
File without changes
|
facility_location/cfg/plot.yaml
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
env_specs:
|
3 |
+
region:
|
4 |
+
min_n: 20
|
5 |
+
max_n: 50
|
6 |
+
min_p_ratio: 0.1
|
7 |
+
max_p_ratio: 0.4
|
8 |
+
max_steps_scale: 0.5
|
9 |
+
tabu_time: 3
|
10 |
+
tabu_stable_steps_scale: 0.2
|
11 |
+
popstar: false
|
12 |
+
|
13 |
+
# evaluation
|
14 |
+
eval_specs:
|
15 |
+
region:
|
16 |
+
seed: 12345
|
17 |
+
max_nodes: 2488
|
18 |
+
max_edges: 5000
|
19 |
+
val_num_cases: 100
|
20 |
+
test_num_cases: 1
|
21 |
+
val_np: !!python/tuple [50,5]
|
22 |
+
test_np:
|
23 |
+
- !!python/tuple [2214,36]
|
24 |
+
- !!python/tuple [2214,189]
|
25 |
+
- !!python/tuple [2214,425]
|
26 |
+
# agent
|
27 |
+
agent_specs:
|
28 |
+
policy_feature_dim: 32
|
29 |
+
value_feature_dim: 32
|
30 |
+
policy_hidden_units: !!python/tuple [32, 32, 1]
|
31 |
+
value_hidden_units: !!python/tuple [32, 32, 1]
|
32 |
+
|
33 |
+
# mlp
|
34 |
+
mlp_specs:
|
35 |
+
hidden_units: !!python/tuple [32, 32]
|
36 |
+
|
37 |
+
gnn_specs:
|
38 |
+
num_gnn_layers: 2
|
39 |
+
node_dim: 32
|
40 |
+
|
41 |
+
|
42 |
+
# ts
|
43 |
+
ts_specs:
|
44 |
+
max_steps_scale: 2
|
45 |
+
stable_iterations_scale: 0.2
|
46 |
+
|
47 |
+
|
48 |
+
# popstar
|
49 |
+
popstar_specs:
|
50 |
+
graspit: 32
|
51 |
+
elite: 10
|
52 |
+
|
53 |
+
|
54 |
+
# ga
|
55 |
+
ga_specs:
|
56 |
+
num_generations: 100
|
57 |
+
num_parents_mating: 50
|
58 |
+
sol_per_pop: 100
|
59 |
+
parent_selection_type: sss
|
60 |
+
crossover_probability: 0.8
|
61 |
+
mutation_probability: 0.1
|
62 |
+
|
63 |
+
|
64 |
+
|
facility_location/env/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .pmp import PMPEnv, EvalPMPEnv, MULTIPMP
|
2 |
+
|
3 |
+
__all__ = ['PMPEnv', 'EvalPMPEnv', 'MULTIPMP']
|
facility_location/env/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (241 Bytes). View file
|
|
facility_location/env/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (242 Bytes). View file
|
|
facility_location/env/__pycache__/facility_location_client.cpython-310.pyc
ADDED
Binary file (10.2 kB). View file
|
|
facility_location/env/__pycache__/facility_location_client.cpython-39.pyc
ADDED
Binary file (10.2 kB). View file
|
|
facility_location/env/__pycache__/obs_extractor.cpython-310.pyc
ADDED
Binary file (6.73 kB). View file
|
|
facility_location/env/__pycache__/obs_extractor.cpython-39.pyc
ADDED
Binary file (6.75 kB). View file
|
|
facility_location/env/__pycache__/pmp.cpython-310.pyc
ADDED
Binary file (19.7 kB). View file
|
|
facility_location/env/__pycache__/pmp.cpython-39.pyc
ADDED
Binary file (17.8 kB). View file
|
|
facility_location/env/facility_location_client.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
from typing import Tuple, Dict
|
3 |
+
|
4 |
+
import networkx as nx
|
5 |
+
import numpy as np
|
6 |
+
from geopandas import GeoDataFrame
|
7 |
+
from shapely.geometry import MultiPoint
|
8 |
+
from libpysal.weights.contiguity import Voronoi as Voronoi_weights
|
9 |
+
from sklearn.neighbors import kneighbors_graph
|
10 |
+
from sklearn.metrics import pairwise_distances
|
11 |
+
|
12 |
+
from facility_location.utils.config import Config
|
13 |
+
import time
|
14 |
+
|
15 |
+
class FacilityLocationClient:
|
16 |
+
def __init__(self, cfg: Config, rng: np.random.Generator):
|
17 |
+
self.cfg = cfg
|
18 |
+
self.rng = rng
|
19 |
+
self._cfg_tabu_time = cfg.env_specs['tabu_time']
|
20 |
+
self._t = 0
|
21 |
+
|
22 |
+
def set_instance(self, points: np.ndarray, demands: np.ndarray, n: int, p: int, real: bool) -> None:
|
23 |
+
self._points = points
|
24 |
+
self._demands = demands
|
25 |
+
points_geom = MultiPoint(points)
|
26 |
+
self._gdf = GeoDataFrame({
|
27 |
+
'geometry': points_geom.geoms,
|
28 |
+
'demand': demands,
|
29 |
+
})
|
30 |
+
self._n = n
|
31 |
+
self._p = p
|
32 |
+
self._old_facility_mask = np.zeros(self._n, dtype=bool)
|
33 |
+
self._new_facility_mask = np.zeros(self._n, dtype=bool)
|
34 |
+
self._construct_static_graph()
|
35 |
+
|
36 |
+
if real:
|
37 |
+
self._distance_matrix = pairwise_distances(points, metric='haversine')
|
38 |
+
else:
|
39 |
+
self._distance_matrix = pairwise_distances(points, metric='euclidean')
|
40 |
+
self._cost_matrix = self._distance_matrix * self._demands[:, None]
|
41 |
+
self._gain = np.zeros(self._n)
|
42 |
+
self._loss = np.zeros(self._n)
|
43 |
+
self._add_time = np.full(self._n, -np.inf)
|
44 |
+
self._drop_time = np.full(self._n, -np.inf)
|
45 |
+
self.reset_tabu_time()
|
46 |
+
|
47 |
+
def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
|
48 |
+
return self._points, self._demands, self._n, self._p
|
49 |
+
|
50 |
+
def get_distance_and_cost_matrix(self) -> Tuple[np.ndarray, np.ndarray]:
|
51 |
+
return self._distance_matrix, self._cost_matrix
|
52 |
+
|
53 |
+
def get_avg_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
|
54 |
+
avg_distance = self._distance_matrix.sum(axis=-1)/(self._n - 1)
|
55 |
+
avg_cost = self._cost_matrix.sum(axis=-1)/(self._n - 1)
|
56 |
+
return avg_distance, avg_cost
|
57 |
+
|
58 |
+
def _construct_static_graph(self) -> None:
|
59 |
+
self._connection_matrix = kneighbors_graph(self._points, n_neighbors=3, mode="connectivity").toarray()
|
60 |
+
self._static_graph = nx.from_numpy_matrix(self._connection_matrix)
|
61 |
+
self._static_edges = np.array(self._static_graph.edges(), dtype=np.int64)
|
62 |
+
|
63 |
+
def _construct_dynamic_graph(self) -> None:
|
64 |
+
t1 = time.time()
|
65 |
+
try:
|
66 |
+
solution_distace_min = np.partition(self._distance_matrix[:, self._solution][self._solution, :], 3, axis=-1)[:,2]
|
67 |
+
except:
|
68 |
+
raise ValueError('stop')
|
69 |
+
solution_distance_matrix = np.zeros((self._n, self._n))
|
70 |
+
solution_distance_matrix[:, self._solution] = solution_distace_min
|
71 |
+
solution_knearest_matrix = np.logical_and(self._distance_matrix < solution_distance_matrix, self._distance_matrix > 0)
|
72 |
+
old_tabu_mask, new_tabu_mask = self.get_tabu_mask(self._t)
|
73 |
+
solution_matrix = np.logical_and(np.logical_and(self._solution, old_tabu_mask)[:, None], (np.logical_and(~self._solution, new_tabu_mask)[None, :]))
|
74 |
+
solution_matrix = np.logical_or(solution_matrix, solution_matrix.T)
|
75 |
+
gainloss_matrix = np.logical_and((self._gain[:, None] > self._loss[None, :]), self._loss[None, :] > 0)
|
76 |
+
graph_matrix = np.logical_and(solution_matrix, np.logical_or(gainloss_matrix, solution_knearest_matrix))
|
77 |
+
|
78 |
+
if not np.any(graph_matrix):
|
79 |
+
if np.any(solution_matrix):
|
80 |
+
graph_matrix = solution_matrix
|
81 |
+
if not np.any(graph_matrix):
|
82 |
+
raise ValueError('Invalid graph_matrix')
|
83 |
+
else:
|
84 |
+
graph_matrix = self._solution[:, None] ^ self._solution[None, :]
|
85 |
+
self._dynamic_graph = nx.from_numpy_matrix(graph_matrix)
|
86 |
+
self._dynamic_edges = np.array(self._dynamic_graph.edges(), dtype=np.int64)
|
87 |
+
|
88 |
+
t2 = time.time()
|
89 |
+
# print('dynamic graph time:',t2-t1)
|
90 |
+
|
91 |
+
|
92 |
+
def get_static_adjacency_list(self) -> np.ndarray:
|
93 |
+
return self._static_edges
|
94 |
+
|
95 |
+
def get_dynamic_adjacency_list(self) -> np.ndarray:
|
96 |
+
return self._dynamic_edges
|
97 |
+
|
98 |
+
def compute_initial_solution(self) -> Tuple[float, np.ndarray]:
|
99 |
+
self._solution = np.zeros(self._n, dtype=bool)
|
100 |
+
p_0 = self._demands.argmax()
|
101 |
+
self._solution[p_0] = True
|
102 |
+
for _ in range(self._p - 1):
|
103 |
+
p_max_cost = self._cost_matrix[:, self._solution].min(axis=-1).argmax()
|
104 |
+
self._solution[p_max_cost] = True
|
105 |
+
self._init_gain_and_loss()
|
106 |
+
self._construct_dynamic_graph()
|
107 |
+
self._old_facility_mask = self._solution
|
108 |
+
self._new_facility_mask = ~self._solution
|
109 |
+
return self.compute_obj_value(), self._solution
|
110 |
+
|
111 |
+
def compute_obj_value(self) -> float:
|
112 |
+
obj_value = self._cost_matrix[:, self._solution].min(axis=-1).sum()
|
113 |
+
return obj_value
|
114 |
+
|
115 |
+
def compute_obj_value_from_solution(self, solution) -> float:
|
116 |
+
self._solution = solution
|
117 |
+
self._init_gain_and_loss()
|
118 |
+
self._construct_dynamic_graph()
|
119 |
+
obj_value = self.compute_obj_value()
|
120 |
+
return obj_value
|
121 |
+
|
122 |
+
# def swap(self, old_facility: int, new_facility: int, t: int) -> Tuple[float, np.ndarray, Dict]:
|
123 |
+
# if old_facility >= self._n or not self._solution[old_facility]:
|
124 |
+
# warn_msg = f'Old facility {old_facility} is not a facility of the current solution {self._solution}.'
|
125 |
+
# warnings.warn(warn_msg)
|
126 |
+
# old_facility = self.rng.choice(np.arange(self._n)[self._solution])
|
127 |
+
# if new_facility >= self._n or self._solution[new_facility]:
|
128 |
+
# warn_msg = f'New facility {new_facility} is already a facility of the current solution {self._solution}.'
|
129 |
+
# warnings.warn(warn_msg)
|
130 |
+
# new_facility = self.rng.choice(np.arange(self._n)[~self._solution])
|
131 |
+
# self._solution[old_facility] = False
|
132 |
+
# self._solution[new_facility] = True
|
133 |
+
# self._drop_time[old_facility] = t
|
134 |
+
# self._add_time[new_facility] = t
|
135 |
+
# self._t = t
|
136 |
+
# return self.compute_obj_value(), self._solution, {}
|
137 |
+
|
138 |
+
def swap(self, facility_pair_index: int, t: int) -> Tuple[float, np.ndarray, Dict]:
|
139 |
+
facility_pair = self._dynamic_edges[facility_pair_index]
|
140 |
+
facility1 = facility_pair[0]
|
141 |
+
facility2 = facility_pair[1]
|
142 |
+
|
143 |
+
if (not self._solution[facility1]) and (self._solution[facility2]):
|
144 |
+
new_facility = facility1
|
145 |
+
old_facility = facility2
|
146 |
+
elif (not self._solution[facility2]) and (self._solution[facility1]):
|
147 |
+
new_facility = facility2
|
148 |
+
old_facility = facility1
|
149 |
+
else:
|
150 |
+
raise ValueError('stop')
|
151 |
+
|
152 |
+
self._solution[old_facility] = False
|
153 |
+
self._solution[new_facility] = True
|
154 |
+
self._old_facility_mask[new_facility] = True
|
155 |
+
self._new_facility_mask[old_facility] = True
|
156 |
+
self._drop_time[old_facility] = t
|
157 |
+
self._add_time[new_facility] = t
|
158 |
+
self._t = t
|
159 |
+
self._update_env(new_facility, old_facility)
|
160 |
+
# print('st:',self._t)
|
161 |
+
return self.compute_obj_value(), self._solution, {}
|
162 |
+
|
163 |
+
def get_tabu_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
|
164 |
+
old_tabu_mask = self._add_time < t - self._drop_tabu_time
|
165 |
+
new_tabu_mask = self._drop_time < t - self._add_tabu_time
|
166 |
+
return old_tabu_mask, new_tabu_mask
|
167 |
+
|
168 |
+
def reset_tabu_time(self) -> None:
|
169 |
+
self._t = 0
|
170 |
+
if self._cfg_tabu_time <= 0:
|
171 |
+
self._add_tabu_time = 0
|
172 |
+
self._drop_tabu_time = 0
|
173 |
+
else:
|
174 |
+
self._add_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
|
175 |
+
self._drop_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
|
176 |
+
|
177 |
+
def get_current_solution(self) -> np.ndarray:
|
178 |
+
return self._solution
|
179 |
+
|
180 |
+
def set_solution(self, solution: np.ndarray) -> None:
|
181 |
+
self._solution = solution
|
182 |
+
|
183 |
+
def get_current_distance(self) -> np.ndarray:
|
184 |
+
dis2poi = self._distance_matrix[:, self._solution]
|
185 |
+
if self._p > 2:
|
186 |
+
dis = np.partition(dis2poi, 2, axis=-1)[:,:2]
|
187 |
+
else:
|
188 |
+
dis = dis2poi.min(axis=-1)
|
189 |
+
dis = np.stack([dis, dis], axis=-1)
|
190 |
+
return dis
|
191 |
+
|
192 |
+
def get_current_cost(self) -> np.ndarray:
|
193 |
+
cost2poi = self._cost_matrix[:, self._solution]
|
194 |
+
if self._p > 2:
|
195 |
+
cost = np.partition(cost2poi, 2, axis=-1)[:,:2]
|
196 |
+
else:
|
197 |
+
cost = cost2poi.min(axis=-1)
|
198 |
+
cost = np.stack([cost, cost], axis=-1)
|
199 |
+
return cost
|
200 |
+
|
201 |
+
def get_gain_and_loss(self) -> Tuple[np.ndarray, np.ndarray]:
|
202 |
+
return self._gain, self._loss
|
203 |
+
|
204 |
+
def get_gdf_facilities(self) -> Tuple[GeoDataFrame, np.ndarray]:
|
205 |
+
solution = self._solution
|
206 |
+
facilities = np.arange(self._n)[solution]
|
207 |
+
gdf = self._gdf.copy()
|
208 |
+
gdf['facility'] = False
|
209 |
+
gdf.loc[facilities, 'facility'] = True
|
210 |
+
node2facility = np.arange(self._n)[solution][self._cost_matrix[:, solution].argmin(axis=-1)]
|
211 |
+
gdf['assignment'] = node2facility
|
212 |
+
return gdf, facilities
|
213 |
+
|
214 |
+
def _init_env(self):
|
215 |
+
self._init_gain_and_loss()
|
216 |
+
self._construct_dynamic_graph()
|
217 |
+
|
218 |
+
def _update_env(self, insert_facility, remove_facility):
|
219 |
+
self._update_gain_and_loss(insert_facility, remove_facility)
|
220 |
+
self._construct_dynamic_graph()
|
221 |
+
|
222 |
+
def _init_gain_and_loss(self):
|
223 |
+
t1 = time.time()
|
224 |
+
|
225 |
+
for i in range(self._n):
|
226 |
+
_fake_solution = list(self._solution)
|
227 |
+
if self._solution[i]:
|
228 |
+
_fake_solution[i] = False
|
229 |
+
self._loss[i] = self._cost_matrix[:, _fake_solution].min(axis=-1).sum() - self._cost_matrix[:, self._solution].min(axis=-1).sum()
|
230 |
+
self._gain[i] = 0
|
231 |
+
else:
|
232 |
+
_fake_solution[i] = True
|
233 |
+
self._gain[i] = self._cost_matrix[:, self._solution].min(axis=-1).sum() - self._cost_matrix[:, _fake_solution].min(axis=-1).sum()
|
234 |
+
self._loss[i] = 0
|
235 |
+
|
236 |
+
self.argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
|
237 |
+
t2 = time.time()
|
238 |
+
# print('init gainloss time:',t2-t1)
|
239 |
+
|
240 |
+
def _update_gain_and_loss(self, insert_facility, remove_facility):
|
241 |
+
|
242 |
+
t1 = time.time()
|
243 |
+
|
244 |
+
_pre_solution = list(self._solution)
|
245 |
+
_pre_solution[insert_facility] = False
|
246 |
+
_pre_solution[remove_facility] = True
|
247 |
+
pre_closest_demands2solution = self._cost_matrix[:, _pre_solution][np.arange(self._n)[:, None], self.argpartition]
|
248 |
+
argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
|
249 |
+
closest_demands2solution = self._cost_matrix[:, self._solution][np.arange(self._n)[:, None], argpartition]
|
250 |
+
|
251 |
+
pre_solution_idx = np.where(_pre_solution)[0]
|
252 |
+
solution_idx = np.where(self._solution)[0]
|
253 |
+
for i in range(self._n):
|
254 |
+
if remove_facility in self.argpartition[i] or insert_facility in argpartition[i]:
|
255 |
+
self._loss[solution_idx[argpartition[i][0]]] += closest_demands2solution[i][1] - closest_demands2solution[i][0]
|
256 |
+
self._loss[pre_solution_idx[self.argpartition[i][0]]] -= pre_closest_demands2solution[i][1] - pre_closest_demands2solution[i][0]
|
257 |
+
# if self.argpartition[i][0] != argpartition[i][0]:
|
258 |
+
# for j in range(self._n):
|
259 |
+
# if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][argpartition[i][0]]:
|
260 |
+
# self._gain[j] += max(0, closest_demands2solution[i][0] - self._cost_matrix[i, j])
|
261 |
+
# if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][self.argpartition[i][0]]:
|
262 |
+
# self._gain[j] -= max(0, pre_closest_demands2solution[i][0] - self._cost_matrix[i, j])
|
263 |
+
|
264 |
+
self._loss[remove_facility] = 0
|
265 |
+
self._gain[insert_facility] = 0
|
266 |
+
|
267 |
+
self.argpartition = list(argpartition)
|
268 |
+
# print(self._gain, self._loss)
|
269 |
+
t2 = time.time()
|
270 |
+
# print('update gainloss time:',t2-t1)
|
271 |
+
|
272 |
+
|
273 |
+
def init_facility_mask(self, old_facility, new_facility):
|
274 |
+
self._old_facility_mask[old_facility] = True
|
275 |
+
self._new_facility_mask[new_facility] = True
|
276 |
+
|
277 |
+
def get_facility_mask(self):
|
278 |
+
return self._old_facility_mask, self._new_facility_mask
|
facility_location/env/obs_extractor.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Tuple, Text
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from facility_location.env.facility_location_client import FacilityLocationClient
|
6 |
+
from facility_location.utils.config import Config
|
7 |
+
|
8 |
+
|
9 |
+
class ObsExtractor:
|
10 |
+
def __init__(self, cfg: Config, flc: FacilityLocationClient, node_range: int, edge_range: int):
|
11 |
+
self.cfg = cfg
|
12 |
+
self._flc = flc
|
13 |
+
self._node_range = node_range
|
14 |
+
self._edge_range = edge_range
|
15 |
+
|
16 |
+
self._construct_virtual_node_feature()
|
17 |
+
self._construct_node_features()
|
18 |
+
self._construct_action_mask()
|
19 |
+
|
20 |
+
def _construct_virtual_node_feature(self) -> None:
|
21 |
+
virtual_node_facility = 0
|
22 |
+
virtual_node_distance_min = 0
|
23 |
+
virtual_node_distance_sub_min = 0
|
24 |
+
virtual_node_cost_min = 0
|
25 |
+
virtual_node_cost_sub_min = 0
|
26 |
+
virtual_gain = 0
|
27 |
+
virtual_loss = 0
|
28 |
+
|
29 |
+
virtual_node_x = 0.5
|
30 |
+
virtual_node_y = 0.5
|
31 |
+
virtual_node_demand = 1
|
32 |
+
virtual_node_avg_distance = 0
|
33 |
+
virtual_node_avg_cost = 0
|
34 |
+
self._virtual_dynamic_node_feature = np.array([
|
35 |
+
virtual_node_facility,
|
36 |
+
virtual_node_distance_min,
|
37 |
+
virtual_node_distance_sub_min,
|
38 |
+
virtual_node_cost_min,
|
39 |
+
virtual_node_cost_sub_min,
|
40 |
+
virtual_gain,
|
41 |
+
virtual_loss,
|
42 |
+
], dtype=np.float32)
|
43 |
+
self._virtual_static_node_feature = np.array([
|
44 |
+
virtual_node_x,
|
45 |
+
virtual_node_y,
|
46 |
+
virtual_node_demand,
|
47 |
+
virtual_node_avg_distance,
|
48 |
+
virtual_node_avg_cost,
|
49 |
+
], dtype=np.float32)
|
50 |
+
self._virtual_node_feature = np.concatenate([
|
51 |
+
self._virtual_dynamic_node_feature,
|
52 |
+
self._virtual_static_node_feature,
|
53 |
+
], axis=-1)
|
54 |
+
|
55 |
+
def _construct_node_features(self) -> None:
|
56 |
+
self._node_features = np.zeros((self._node_range, self._virtual_node_feature.size), dtype=np.float32)
|
57 |
+
|
58 |
+
def _construct_action_mask(self) -> None:
|
59 |
+
self._old_facility_mask = np.full(self._node_range, False)
|
60 |
+
self._new_facility_mask = np.full(self._node_range, False)
|
61 |
+
|
62 |
+
def get_node_dim(self) -> int:
|
63 |
+
return self._virtual_node_feature.size
|
64 |
+
|
65 |
+
def reset(self) -> None:
|
66 |
+
self._compute_static_obs()
|
67 |
+
self._reset_node_features()
|
68 |
+
self._reset_action_mask()
|
69 |
+
|
70 |
+
def _compute_static_obs(self) -> None:
|
71 |
+
xy, demands, n, _ = self._flc.get_instance()
|
72 |
+
if n + 2 > self._node_range:
|
73 |
+
print(n, self._node_range)
|
74 |
+
# raise ValueError('The number of nodes exceeds the maximum limit.')
|
75 |
+
self._n = n
|
76 |
+
avg_distance, avg_cost = self._flc.get_avg_distance_and_cost()
|
77 |
+
avg_distance = avg_distance / np.max(avg_distance)
|
78 |
+
avg_cost = avg_cost / np.max(avg_cost)
|
79 |
+
self._static_node_features = np.stack([
|
80 |
+
xy[:, 0],
|
81 |
+
xy[:, 1],
|
82 |
+
demands,
|
83 |
+
avg_distance,
|
84 |
+
avg_cost,
|
85 |
+
], axis=-1).astype(np.float32)
|
86 |
+
static_adjacency_list = self._flc.get_static_adjacency_list()
|
87 |
+
|
88 |
+
obs_node_mask = np.full(1 + n, True)
|
89 |
+
self._obs_node_mask = self._pad_mask(obs_node_mask, self._node_range, 'nodes')
|
90 |
+
|
91 |
+
obs_static_edge_mask = np.full(n + static_adjacency_list.shape[0], True)
|
92 |
+
self._obs_static_edge_mask = self._pad_mask(obs_static_edge_mask, self._edge_range, 'edges')
|
93 |
+
|
94 |
+
self._static_adjacency_list = self._pad_edge(static_adjacency_list)
|
95 |
+
|
96 |
+
def _reset_node_features(self) -> None:
|
97 |
+
self._node_features[:, :] = 0
|
98 |
+
self._node_features[0] = self._virtual_node_feature
|
99 |
+
self._node_features[1:self._n+1, len(self._virtual_dynamic_node_feature):] = self._static_node_features
|
100 |
+
|
101 |
+
def _reset_action_mask(self) -> None:
|
102 |
+
self._old_facility_mask[:] = False
|
103 |
+
self._new_facility_mask[:] = False
|
104 |
+
|
105 |
+
def get_obs(self, t: int) -> Dict:
|
106 |
+
obs_nodes, obs_static_edges, obs_dynamic_edges, \
|
107 |
+
obs_node_mask, obs_static_edge_mask, obs_dynamic_edges_mask = self._get_obs_graph()
|
108 |
+
obs = {
|
109 |
+
'node_features': obs_nodes,
|
110 |
+
'static_adjacency_list': obs_static_edges,
|
111 |
+
'dynamic_adjacency_list': obs_dynamic_edges,
|
112 |
+
'node_mask': obs_node_mask,
|
113 |
+
'static_edge_mask': obs_static_edge_mask,
|
114 |
+
'dynamic_edge_mask': obs_dynamic_edges_mask,
|
115 |
+
}
|
116 |
+
|
117 |
+
return obs
|
118 |
+
|
119 |
+
def _get_obs_graph(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
120 |
+
facility = self._flc.get_current_solution().astype(np.float32)
|
121 |
+
distance = self._flc.get_current_distance().astype(np.float32)
|
122 |
+
distance = distance / np.max(distance)
|
123 |
+
cost = self._flc.get_current_cost().astype(np.float32)
|
124 |
+
cost = cost / np.max(cost)
|
125 |
+
gain, loss = self._flc.get_gain_and_loss()
|
126 |
+
gain = gain / np.max(gain)
|
127 |
+
loss = loss / np.max(loss)
|
128 |
+
dynamic_node_features = np.stack([
|
129 |
+
facility,
|
130 |
+
distance[:,0],
|
131 |
+
distance[:,1],
|
132 |
+
cost[:,0],
|
133 |
+
cost[:,1],
|
134 |
+
gain,
|
135 |
+
loss,
|
136 |
+
], axis=-1)
|
137 |
+
self._node_features[1:self._n+1, :len(self._virtual_dynamic_node_feature)] = dynamic_node_features
|
138 |
+
obs_nodes = self._node_features
|
139 |
+
obs_static_edges = self._static_adjacency_list
|
140 |
+
obs_dynamic_edges = self._flc.get_dynamic_adjacency_list()
|
141 |
+
# print(obs_dynamic_edges.shape)
|
142 |
+
obs_dynamic_edge_mask = np.full(obs_dynamic_edges.shape[0], True)
|
143 |
+
obs_node_mask = self._obs_node_mask
|
144 |
+
obs_static_edge_mask = self._obs_static_edge_mask
|
145 |
+
obs_dynamic_edges = self._pad_edge_wo_virtual(obs_dynamic_edges)
|
146 |
+
obs_dynamic_edge_mask = self._pad_mask(obs_dynamic_edge_mask, self._edge_range, 'edges')
|
147 |
+
|
148 |
+
return obs_nodes, obs_static_edges, obs_dynamic_edges, obs_node_mask, obs_static_edge_mask, obs_dynamic_edge_mask
|
149 |
+
# return obs_nodes, obs_static_edges, obs_node_mask, obs_edge_mask
|
150 |
+
|
151 |
+
def _get_obs_action_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
|
152 |
+
old_facility_mask, new_facility_mask = self._flc.get_facility_mask()
|
153 |
+
old_tabu_mask, new_tabu_mask = self._flc.get_tabu_mask(t)
|
154 |
+
self._old_facility_mask[1:self._n+1] = np.logical_and(old_facility_mask, old_tabu_mask)
|
155 |
+
self._new_facility_mask[1:self._n+1] = np.logical_and(new_facility_mask, new_tabu_mask)
|
156 |
+
obs_old_facility_mask = self._old_facility_mask
|
157 |
+
obs_new_facility_mask = self._new_facility_mask
|
158 |
+
if not np.any(obs_old_facility_mask) or not np.any(obs_new_facility_mask):
|
159 |
+
raise ValueError('The action mask is empty.')
|
160 |
+
return obs_old_facility_mask, obs_new_facility_mask
|
161 |
+
|
162 |
+
@staticmethod
|
163 |
+
def _pad_mask(mask: np.ndarray, max_num: int, name: Text) -> np.ndarray:
|
164 |
+
pad = (0, max_num - mask.size)
|
165 |
+
if pad[1] < 0:
|
166 |
+
raise ValueError(f'The number of {name} exceeds the maximum limit.')
|
167 |
+
return np.pad(mask, pad, mode='constant', constant_values=False)
|
168 |
+
|
169 |
+
def _pad_edge(self, edge: np.ndarray) -> np.ndarray:
|
170 |
+
virtual_edge = np.stack([np.zeros(self._n), np.arange(1, self._n + 1)], axis=-1).astype(np.int32)
|
171 |
+
edge = np.concatenate([virtual_edge, edge + 1], axis=0)
|
172 |
+
pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
|
173 |
+
if pad[0][1] < 0:
|
174 |
+
raise ValueError('The number of edges exceeds the maximum limit.')
|
175 |
+
return np.pad(edge, pad, mode='constant', constant_values=self._node_range - 1)
|
176 |
+
|
177 |
+
def _pad_edge_wo_virtual(self, edge: np.ndarray) -> np.ndarray:
|
178 |
+
pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
|
179 |
+
if pad[0][1] < 0:
|
180 |
+
print(self._edge_range, edge.shape[0])
|
181 |
+
raise ValueError('The number of edges exceeds the maximum limit.')
|
182 |
+
|
183 |
+
return np.pad(edge + 1, pad, mode='constant', constant_values=self._node_range - 1)
|
184 |
+
|
facility_location/env/pmp.py
ADDED
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import warnings
|
3 |
+
from typing import Tuple, Dict, Optional, List, Text
|
4 |
+
|
5 |
+
import gym
|
6 |
+
import math
|
7 |
+
import numpy as np
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import pickle, os
|
10 |
+
|
11 |
+
from numpy import ndarray
|
12 |
+
|
13 |
+
from facility_location.utils.config import Config
|
14 |
+
from facility_location.env.facility_location_client import FacilityLocationClient
|
15 |
+
from facility_location.env.obs_extractor import ObsExtractor
|
16 |
+
from stable_baselines3 import PPO
|
17 |
+
from stable_baselines3.common.vec_env import DummyVecEnv
|
18 |
+
from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
|
19 |
+
from facility_location.utils.policy import get_policy_kwargs
|
20 |
+
|
21 |
+
|
22 |
+
class PMPEnv(gym.Env):
|
23 |
+
EPSILON = 1e-6
|
24 |
+
|
25 |
+
def __init__(self,
|
26 |
+
cfg: Config):
|
27 |
+
self.cfg = cfg
|
28 |
+
self._train_region = cfg.env_specs['region']
|
29 |
+
self._eval_region = cfg.eval_specs['region']
|
30 |
+
self._min_n = cfg.env_specs['min_n']
|
31 |
+
self._max_n = cfg.env_specs['max_n']
|
32 |
+
self._min_p_ratio = cfg.env_specs['min_p_ratio']
|
33 |
+
self._max_p_ratio = cfg.env_specs['max_p_ratio']
|
34 |
+
self._max_steps_scale = cfg.env_specs['max_steps_scale']
|
35 |
+
self._tabu_stable_steps_scale = cfg.env_specs['tabu_stable_steps_scale']
|
36 |
+
self._popstar = cfg.env_specs['popstar']
|
37 |
+
|
38 |
+
self._seed(cfg.seed)
|
39 |
+
|
40 |
+
self._done = False
|
41 |
+
|
42 |
+
self._set_node_edge_range()
|
43 |
+
|
44 |
+
self._flc = FacilityLocationClient(cfg, self._np_random)
|
45 |
+
self._obs_extractor = ObsExtractor(cfg, self._flc, self._node_range, self._edge_range)
|
46 |
+
|
47 |
+
self._declare_spaces()
|
48 |
+
|
49 |
+
def _declare_spaces(self) -> None:
|
50 |
+
self.observation_space = gym.spaces.Dict({
|
51 |
+
'node_features': gym.spaces.Box(low=0, high=1, shape=(self._node_range, self.get_node_feature_dim())),
|
52 |
+
'static_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
|
53 |
+
'dynamic_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
|
54 |
+
'node_mask': gym.spaces.Box(low=0, high=1, shape=(self._node_range,), dtype=np.bool),
|
55 |
+
'static_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
|
56 |
+
'dynamic_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
|
57 |
+
})
|
58 |
+
if not self._popstar:
|
59 |
+
self.action_space = gym.spaces.Discrete(self._node_range ** 2)
|
60 |
+
else:
|
61 |
+
self.action_space = gym.spaces.Discrete(self._node_range ** 2)
|
62 |
+
|
63 |
+
def _set_node_edge_range(self) -> None:
|
64 |
+
self._node_range = self._max_n + 2
|
65 |
+
self._edge_range = int(self._max_n ** 2 * self._max_p_ratio)
|
66 |
+
|
67 |
+
def get_node_feature_dim(self) -> int:
|
68 |
+
return self._obs_extractor.get_node_dim()
|
69 |
+
|
70 |
+
def _seed(self, seed: int) -> None:
|
71 |
+
self._np_random = np.random.default_rng(seed)
|
72 |
+
|
73 |
+
def get_reward(self) -> float:
|
74 |
+
reward = self._obj_value[self._t - 1] - self._obj_value[self._t]
|
75 |
+
return reward
|
76 |
+
|
77 |
+
def _transform_action(self, action: np.ndarray) -> np.ndarray:
|
78 |
+
if self._popstar:
|
79 |
+
action = np.array(np.unravel_index(action, (self._node_range, self._node_range)))
|
80 |
+
action = action - 1
|
81 |
+
return action
|
82 |
+
|
83 |
+
def step(self, action: np.ndarray):
|
84 |
+
if self._done:
|
85 |
+
raise RuntimeError('Action taken after episode is done.')
|
86 |
+
obj_value, solution, info = self._flc.swap(action, self._t)
|
87 |
+
self._t += 1
|
88 |
+
self._done = (self._t == self._max_steps)
|
89 |
+
self._obj_value[self._t] = obj_value
|
90 |
+
self._solution[self._t] = solution
|
91 |
+
reward = self.get_reward()
|
92 |
+
if obj_value < self._best_obj_value - self.EPSILON:
|
93 |
+
self._best_obj_value = obj_value
|
94 |
+
self._best_solution = solution
|
95 |
+
self._last_best_t = self._t
|
96 |
+
elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
|
97 |
+
self._flc.reset_tabu_time()
|
98 |
+
|
99 |
+
# if self._done:
|
100 |
+
# print('done')
|
101 |
+
# for i in range(self._t):
|
102 |
+
# print(f'{i}:',np.where(self._solution[i]))
|
103 |
+
|
104 |
+
return self._get_obs(self._t), reward, self._done, False, info
|
105 |
+
|
106 |
+
def reset(self, seed = 0) -> Optional[Dict]:
|
107 |
+
if self._train_region is None:
|
108 |
+
points, demands, n, p = self._generate_new_instance()
|
109 |
+
self._flc.set_instance(points, demands, n, p, False)
|
110 |
+
else:
|
111 |
+
points, demands, n, p = self._use_real_instance()
|
112 |
+
self._flc.set_instance(points, demands, n, p, True)
|
113 |
+
|
114 |
+
return self.prepare(n, p), {}
|
115 |
+
|
116 |
+
def prepare(self, n: int, p: int) -> Dict:
|
117 |
+
initial_obj_value, initial_solution = self._flc.compute_initial_solution()
|
118 |
+
self._obs_extractor.reset()
|
119 |
+
self._done = False
|
120 |
+
self._t = 0
|
121 |
+
self._max_steps = max(int(p * self._max_steps_scale), 5)
|
122 |
+
self._obj_value = np.zeros(self._max_steps + 1)
|
123 |
+
self._obj_value[0] = initial_obj_value
|
124 |
+
self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
|
125 |
+
self._solution[0] = initial_solution
|
126 |
+
self._best_solution = initial_solution
|
127 |
+
self._best_obj_value = initial_obj_value
|
128 |
+
self._last_best_t = 0
|
129 |
+
self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
|
130 |
+
return self._get_obs(self._t)
|
131 |
+
|
132 |
+
def render(self, mode='human', dpi=300) -> Optional[np.ndarray]:
|
133 |
+
gdf, facilities = self._flc.get_gdf_facilities()
|
134 |
+
if len(facilities) > 10:
|
135 |
+
warnings.warn('Too many facilities to render. Only rendering the first 10.')
|
136 |
+
facilities = facilities[:10]
|
137 |
+
|
138 |
+
cm = plt.get_cmap('tab10')
|
139 |
+
fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=dpi)
|
140 |
+
for i, f in enumerate(facilities):
|
141 |
+
gdf.loc[gdf['assignment'] == f].plot(ax=axs[0],
|
142 |
+
zorder=2,
|
143 |
+
alpha=0.7,
|
144 |
+
edgecolor="k",
|
145 |
+
color=cm(i))
|
146 |
+
gdf.loc[[f]].plot(ax=axs[0],
|
147 |
+
marker='*',
|
148 |
+
markersize=300,
|
149 |
+
zorder=3,
|
150 |
+
alpha=0.7,
|
151 |
+
edgecolor="k",
|
152 |
+
color=cm(i))
|
153 |
+
axs[0].set_title("Facility Location", fontweight="bold")
|
154 |
+
plot_obj_value = self._obj_value[:self._t + 1]
|
155 |
+
axs[1].plot(plot_obj_value, marker='.', markersize=10, color='k')
|
156 |
+
axs[1].set_title("Objective Value", fontweight="bold")
|
157 |
+
axs[1].set_xticks(np.arange(self._max_steps + 1, step=math.ceil((self._max_steps + 1) / 10)))
|
158 |
+
fig.tight_layout()
|
159 |
+
|
160 |
+
if mode == 'human':
|
161 |
+
plt.show()
|
162 |
+
|
163 |
+
else:
|
164 |
+
io_buf = io.BytesIO()
|
165 |
+
fig.savefig(io_buf, format='raw', dpi=dpi)
|
166 |
+
io_buf.seek(0)
|
167 |
+
img_arr = np.reshape(np.frombuffer(io_buf.getvalue(), dtype=np.uint8),
|
168 |
+
newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1))
|
169 |
+
io_buf.close()
|
170 |
+
return img_arr
|
171 |
+
|
172 |
+
def close(self):
|
173 |
+
plt.close()
|
174 |
+
|
175 |
+
def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
|
176 |
+
n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
|
177 |
+
p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
|
178 |
+
p = int(max(n * p_ratio, 4))
|
179 |
+
|
180 |
+
points = self._np_random.uniform(size=(n, 2))
|
181 |
+
while np.unique(points, axis=0).shape[0] != n:
|
182 |
+
points = self._np_random.uniform(size=(n, 2))
|
183 |
+
demands = self._np_random.random(size=(n,))
|
184 |
+
return points, demands, n, p
|
185 |
+
|
186 |
+
def _use_real_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
|
187 |
+
data_path = './data/{}/pkl'.format(self.cfg.eval_specs['region'])
|
188 |
+
files = os.listdir(data_path)
|
189 |
+
files = [f for f in files if f.endswith('.pkl')]
|
190 |
+
sample_data_path = os.path.join(data_path, files[self._np_random.integers(len(files))])
|
191 |
+
with open(sample_data_path, 'rb') as f:
|
192 |
+
np_data = pickle.load(f)
|
193 |
+
|
194 |
+
n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
|
195 |
+
p = max(int(n * self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)), 4)
|
196 |
+
sample_cbgs = self._np_random.choice(list(np_data[1].keys()), n, replace=False)
|
197 |
+
points = []
|
198 |
+
demands = []
|
199 |
+
for cbg in sample_cbgs:
|
200 |
+
points.append(np_data[1][cbg]['pos'])
|
201 |
+
demands.append(np_data[1][cbg]['demand'])
|
202 |
+
points = np.array(points)
|
203 |
+
demands = np.array(demands)
|
204 |
+
|
205 |
+
return points, demands, n, p
|
206 |
+
|
207 |
+
def _get_obs(self, t: int) -> Dict:
|
208 |
+
return self._obs_extractor.get_obs(t)
|
209 |
+
|
210 |
+
def get_initial_solution(self) -> np.ndarray:
|
211 |
+
return self._solution[0]
|
212 |
+
|
213 |
+
|
214 |
+
class EvalPMPEnv(PMPEnv):
|
215 |
+
def __init__(self,
|
216 |
+
cfg: Config,
|
217 |
+
positions, demands, n, p, boost=False):
|
218 |
+
self._eval_np = (n,p)
|
219 |
+
self._eval_seed = cfg.eval_specs['seed']
|
220 |
+
self._boost = boost
|
221 |
+
print(self._boost)
|
222 |
+
self.points = positions
|
223 |
+
self.demands = demands
|
224 |
+
self._n = n
|
225 |
+
self._p = p
|
226 |
+
|
227 |
+
super().__init__(cfg)
|
228 |
+
|
229 |
+
def _set_node_edge_range(self) -> None:
|
230 |
+
n, p = self._eval_np
|
231 |
+
|
232 |
+
self._node_range = n + 2
|
233 |
+
self._edge_range = n * p
|
234 |
+
|
235 |
+
def get_eval_num_cases(self) -> int:
|
236 |
+
return self._eval_num_cases
|
237 |
+
|
238 |
+
def get_eval_np(self) -> Tuple[int, int]:
|
239 |
+
return self._eval_np
|
240 |
+
|
241 |
+
def reset_instance_id(self) -> None:
|
242 |
+
self._instance_id = 0
|
243 |
+
|
244 |
+
def step(self, action: np.ndarray):
|
245 |
+
if self._done:
|
246 |
+
raise RuntimeError('Action taken after episode is done.')
|
247 |
+
obj_value, solution, info = self._flc.swap(action, self._t)
|
248 |
+
self._t += 1
|
249 |
+
self._done = (self._t == self._max_steps)
|
250 |
+
self._obj_value[self._t] = obj_value
|
251 |
+
self._solution[self._t] = solution
|
252 |
+
reward = self.get_reward()
|
253 |
+
if obj_value < self._best_obj_value - self.EPSILON:
|
254 |
+
self._best_obj_value = obj_value
|
255 |
+
self._best_solution = solution
|
256 |
+
self._last_best_t = self._t
|
257 |
+
elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
|
258 |
+
self._flc.reset_tabu_time()
|
259 |
+
print(self._t, self._max_steps)
|
260 |
+
|
261 |
+
return self._get_obs(self._t), reward, self._done, False, info
|
262 |
+
|
263 |
+
def get_reward(self) -> float:
|
264 |
+
if self._done:
|
265 |
+
reward = -np.min(self._obj_value)
|
266 |
+
else:
|
267 |
+
reward = 0.0
|
268 |
+
|
269 |
+
return reward
|
270 |
+
|
271 |
+
def get_best_solution(self) -> np.ndarray:
|
272 |
+
return self._best_solution
|
273 |
+
|
274 |
+
def reset(self, seed = 0) -> Dict:
|
275 |
+
self._flc.set_instance(self.points, self.demands, self._n, self._p, False)
|
276 |
+
return self.prepare(self._n, self._p, self._boost), {}
|
277 |
+
|
278 |
+
def prepare(self, n: int, p: int, boost: bool) -> Dict:
|
279 |
+
initial_obj_value, initial_solution = self._flc.compute_initial_solution()
|
280 |
+
self._obs_extractor.reset()
|
281 |
+
self._done = False
|
282 |
+
self._t = 0
|
283 |
+
self._max_steps = max(int(p * self._max_steps_scale), 5)
|
284 |
+
if boost:
|
285 |
+
self._max_steps = max(int(self._max_steps_scale / 10), 5)
|
286 |
+
self._obj_value = np.zeros(self._max_steps + 1)
|
287 |
+
self._obj_value[0] = initial_obj_value
|
288 |
+
self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
|
289 |
+
self._solution[0] = initial_solution
|
290 |
+
self._best_solution = initial_solution
|
291 |
+
self._best_obj_value = initial_obj_value
|
292 |
+
self._last_best_t = 0
|
293 |
+
self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
|
294 |
+
return self._get_obs(self._t)
|
295 |
+
|
296 |
+
def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
|
297 |
+
points, demands, n, p = self._flc.get_instance()
|
298 |
+
return points, demands, n, p
|
299 |
+
|
300 |
+
def get_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
|
301 |
+
return self._flc.get_distance_and_cost_matrix()
|
302 |
+
|
303 |
+
def evaluate(self, solution: np.ndarray) -> float:
|
304 |
+
self._flc.set_solution(solution)
|
305 |
+
obj_value = self._flc.compute_obj_value()
|
306 |
+
return obj_value
|
307 |
+
|
308 |
+
class MULTIPMP(PMPEnv):
|
309 |
+
EPSILON = 1e-6
|
310 |
+
def __init__(self,
|
311 |
+
cfg,
|
312 |
+
data_npy,
|
313 |
+
boost = False):
|
314 |
+
self.cfg = cfg
|
315 |
+
self.data_npy = data_npy
|
316 |
+
self._boost = boost
|
317 |
+
self._all_points, self._all_demands, self._n, self._all_p = self._load_multi_facility_data(data_npy)
|
318 |
+
self.boost = boost
|
319 |
+
self._all_solutions = self._load_multi_facility_solutions(boost)
|
320 |
+
self._final_solutions = list(self._all_solutions)
|
321 |
+
self._num_types = len(self._all_p)
|
322 |
+
self._current_type = 0
|
323 |
+
self._all_max_steps, self._old_mask, self._new_mask = self._get_max_steps()
|
324 |
+
super().__init__(cfg)
|
325 |
+
|
326 |
+
def _set_node_edge_range(self) -> None:
|
327 |
+
self._node_range = self._n + 2
|
328 |
+
self._edge_range = self._n * max(self._all_p)
|
329 |
+
|
330 |
+
def step(self, action: np.ndarray):
|
331 |
+
if self._done:
|
332 |
+
raise RuntimeError('Action taken after episode is done.')
|
333 |
+
obj_value, solution, info = self._flc.swap(action, self._t)
|
334 |
+
self._t += 1
|
335 |
+
self._done = (self._t == self._all_max_steps[-1] and self._current_type == len(self._all_max_steps) - 1)
|
336 |
+
self._obj_value[self._t] = obj_value
|
337 |
+
self._solution[self._t] = solution
|
338 |
+
reward = self.get_reward()
|
339 |
+
if obj_value < self._best_obj_value - self.EPSILON:
|
340 |
+
self._best_obj_value = obj_value
|
341 |
+
self._best_solution = solution
|
342 |
+
self._last_best_t = self._t
|
343 |
+
elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
|
344 |
+
self._flc.reset_tabu_time()
|
345 |
+
|
346 |
+
if self._t == self._all_max_steps[self._current_type] and not self._done:
|
347 |
+
self._t = 0
|
348 |
+
self._multi_obj += obj_value
|
349 |
+
self._final_solutions[self._current_type] = solution
|
350 |
+
self._update_type()
|
351 |
+
|
352 |
+
if self._done:
|
353 |
+
pickle.dump(self._final_solutions, open('./facility_location/solutions.pkl', 'wb'))
|
354 |
+
|
355 |
+
return self._get_obs(self._t), reward, self._done, False, info
|
356 |
+
|
357 |
+
def reset(self, seed = 0) -> Optional[Dict]:
|
358 |
+
self._current_type = 0
|
359 |
+
points = self._all_points
|
360 |
+
demands = self._all_demands[:,0]
|
361 |
+
n = self._n
|
362 |
+
p = self._all_p[0]
|
363 |
+
solution = self._all_solutions[0]
|
364 |
+
self._multi_obj = 0
|
365 |
+
|
366 |
+
self._flc.set_instance(points, demands, n, p, True)
|
367 |
+
|
368 |
+
return self.prepare(n, p, solution), {}
|
369 |
+
|
370 |
+
def _update_type(self):
|
371 |
+
if self._current_type >= self._num_types:
|
372 |
+
raise RuntimeError('Action taken after episode is done.')
|
373 |
+
self._current_type += 1
|
374 |
+
if self._current_type < self._num_types - 1:
|
375 |
+
points = self._all_points
|
376 |
+
demands = self._all_demands[:,self._current_type]
|
377 |
+
n = self._n
|
378 |
+
p = self._all_p[self._current_type]
|
379 |
+
solution = self._all_solutions[self._current_type]
|
380 |
+
self._flc.set_instance(points, demands, n, p, True)
|
381 |
+
self.prepare(n, p, solution)
|
382 |
+
|
383 |
+
def prepare(self, n: int, p: int, solution: list) -> Dict:
|
384 |
+
initial_solution = solution
|
385 |
+
initial_obj_value = self._flc.compute_obj_value_from_solution(initial_solution)
|
386 |
+
self._obs_extractor.reset()
|
387 |
+
self._done = False
|
388 |
+
self._t = 0
|
389 |
+
self._max_steps = self._all_max_steps[self._current_type]
|
390 |
+
self._flc.init_facility_mask(self._old_mask[self._current_type], self._new_mask[self._current_type])
|
391 |
+
self._obj_value = np.zeros(self._max_steps + 1)
|
392 |
+
self._obj_value[0] = initial_obj_value
|
393 |
+
self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
|
394 |
+
self._solution[0] = initial_solution
|
395 |
+
self._best_solution = initial_solution
|
396 |
+
self._best_obj_value = initial_obj_value
|
397 |
+
self._last_best_t = 0
|
398 |
+
self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
|
399 |
+
return self._get_obs(self._t)
|
400 |
+
|
401 |
+
def _get_max_steps(self) -> list:
|
402 |
+
tmp_all_solitions = list(self._all_solutions)
|
403 |
+
count_true = [sum(s) for s in zip(*tmp_all_solitions)]
|
404 |
+
max_steps = []
|
405 |
+
old_idx = []
|
406 |
+
new_idx = []
|
407 |
+
for t in range(self._num_types):
|
408 |
+
old = [i for i in range(len(count_true)) if count_true[i] > 1 and tmp_all_solitions[t][i]]
|
409 |
+
new = [i for i in range(len(count_true)) if count_true[i] == 0]
|
410 |
+
if len(old):
|
411 |
+
old_idx.append(old)
|
412 |
+
new_idx.append(new)
|
413 |
+
max_steps.append(len(old))
|
414 |
+
for i in old:
|
415 |
+
count_true[i] = count_true[i] - 1
|
416 |
+
return max_steps, old_idx, new_idx
|
417 |
+
|
418 |
+
def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
|
419 |
+
n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
|
420 |
+
p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
|
421 |
+
p = int(max(n * p_ratio, 4))
|
422 |
+
|
423 |
+
points = self._np_random.uniform(size=(n, 2))
|
424 |
+
while np.unique(points, axis=0).shape[0] != n:
|
425 |
+
points = self._np_random.uniform(size=(n, 2))
|
426 |
+
demands = self._np_random.random(size=(n,))
|
427 |
+
return points, demands, n, p
|
428 |
+
|
429 |
+
def _load_multi_facility_data(self, data_npy) -> Tuple[np.ndarray, np.ndarray]:
|
430 |
+
data = data_npy.split('\n')
|
431 |
+
n = len(data)
|
432 |
+
p = int((len(data[0].split(' '))-2) / 2)
|
433 |
+
|
434 |
+
positions = []
|
435 |
+
demands = []
|
436 |
+
actual_facilities = []
|
437 |
+
ps = []
|
438 |
+
for row in data:
|
439 |
+
row = row.split(' ')
|
440 |
+
row = [x for x in row if len(x)]
|
441 |
+
positions.append([float(row[0]), float(row[1])])
|
442 |
+
|
443 |
+
demand = []
|
444 |
+
for i in range(2, 2+p):
|
445 |
+
demand.append(float(row[i]))
|
446 |
+
demands.append(demand)
|
447 |
+
|
448 |
+
actual_facility = []
|
449 |
+
for i in range(2+p, 2+2*p):
|
450 |
+
actual_facility.append(bool(int(float(row[i]))))
|
451 |
+
actual_facilities.append(actual_facility)
|
452 |
+
|
453 |
+
positions = np.array(positions)
|
454 |
+
positions = np.deg2rad(positions)
|
455 |
+
demands = np.array(demands)
|
456 |
+
actual_facilities = np.array(actual_facilities)
|
457 |
+
ps = actual_facilities.sum(axis=0)
|
458 |
+
|
459 |
+
return positions, demands, n, ps
|
460 |
+
|
461 |
+
def _load_multi_facility_solutions(self, boost) -> list:
|
462 |
+
def load_model(positions, demands, n, p, boost):
|
463 |
+
eval_env = EvalPMPEnv(self.cfg, positions, demands, n, p, boost)
|
464 |
+
eval_env = DummyVecEnv([lambda: eval_env])
|
465 |
+
|
466 |
+
policy_kwargs = get_policy_kwargs(self.cfg)
|
467 |
+
test_model = PPO(MaskedFacilityLocationActorCriticPolicy,
|
468 |
+
eval_env,
|
469 |
+
verbose=1,
|
470 |
+
policy_kwargs=policy_kwargs,
|
471 |
+
device='cuda:1')
|
472 |
+
train_model = PPO.load(self.cfg.load_model_path)
|
473 |
+
test_model.set_parameters(train_model.get_parameters())
|
474 |
+
return test_model, eval_env
|
475 |
+
|
476 |
+
def get_optimal_solution(model, eval_env):
|
477 |
+
obs = eval_env.reset()
|
478 |
+
done = False
|
479 |
+
while not done:
|
480 |
+
action, _ = model.predict(obs, deterministic=True)
|
481 |
+
obs, _, done, info = eval_env.step(action)
|
482 |
+
return eval_env.get_attr('_best_solution')[0]
|
483 |
+
|
484 |
+
multi_solutions = []
|
485 |
+
for i in range(len(self._all_p)):
|
486 |
+
positions = self._all_points
|
487 |
+
demands = self._all_demands[:,i]
|
488 |
+
n = self._n
|
489 |
+
p = self._all_p[i]
|
490 |
+
model, env = load_model(positions,demands,n,p,boost)
|
491 |
+
multi_solutions.append(get_optimal_solution(model, env))
|
492 |
+
|
493 |
+
return multi_solutions
|
494 |
+
|
495 |
+
def get_reward(self) -> float:
|
496 |
+
if self._done:
|
497 |
+
reward = np.min(self._obj_value)
|
498 |
+
else:
|
499 |
+
reward = 0.0
|
500 |
+
return reward
|
501 |
+
|
502 |
+
|
facility_location/multi_eval.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
|
4 |
+
import setproctitle
|
5 |
+
from absl import app, flags
|
6 |
+
import time
|
7 |
+
import random
|
8 |
+
from typing import Tuple, Union, Text
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import torch as th
|
12 |
+
|
13 |
+
import sys
|
14 |
+
import gymnasium
|
15 |
+
sys.modules["gym"] = gymnasium
|
16 |
+
|
17 |
+
from stable_baselines3.common.evaluation import evaluate_policy
|
18 |
+
from stable_baselines3 import PPO
|
19 |
+
from stable_baselines3.common.monitor import Monitor
|
20 |
+
from stable_baselines3.common.vec_env import DummyVecEnv, VecEnvWrapper
|
21 |
+
|
22 |
+
from facility_location.agent.solver import PMPSolver
|
23 |
+
from facility_location.env import EvalPMPEnv, MULTIPMP
|
24 |
+
from facility_location.utils import Config
|
25 |
+
from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
|
26 |
+
from facility_location.utils.policy import get_policy_kwargs
|
27 |
+
|
28 |
+
import warnings
|
29 |
+
warnings.filterwarnings('ignore')
|
30 |
+
|
31 |
+
|
32 |
+
AGENT = Union[PMPSolver, PPO]
|
33 |
+
|
34 |
+
def get_model(cfg: Config,
|
35 |
+
env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
|
36 |
+
device: str) -> PPO:
|
37 |
+
policy_kwargs = get_policy_kwargs(cfg)
|
38 |
+
model = PPO(MaskedFacilityLocationActorCriticPolicy,
|
39 |
+
env,
|
40 |
+
verbose=1,
|
41 |
+
policy_kwargs=policy_kwargs,
|
42 |
+
device=device)
|
43 |
+
return model
|
44 |
+
|
45 |
+
|
46 |
+
def get_agent(cfg: Config,
|
47 |
+
env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
|
48 |
+
model_path: Text) -> AGENT:
|
49 |
+
if cfg.agent in ['rl-mlp', 'rl-gnn', 'rl-agnn']:
|
50 |
+
test_model = get_model(cfg, env, device='cuda:0')
|
51 |
+
trained_model = PPO.load(model_path)
|
52 |
+
test_model.set_parameters(trained_model.get_parameters())
|
53 |
+
agent = test_model
|
54 |
+
else:
|
55 |
+
raise ValueError(f'Agent {cfg.agent} not supported.')
|
56 |
+
return agent
|
57 |
+
|
58 |
+
|
59 |
+
def evaluate(agent: AGENT,
|
60 |
+
env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
|
61 |
+
num_cases: int,
|
62 |
+
return_episode_rewards: bool):
|
63 |
+
if isinstance(agent, PPO):
|
64 |
+
return evaluate_ppo(agent, env, num_cases, return_episode_rewards=return_episode_rewards)
|
65 |
+
else:
|
66 |
+
raise ValueError(f'Agent {agent} not supported.')
|
67 |
+
|
68 |
+
from stable_baselines3.common.callbacks import BaseCallback
|
69 |
+
|
70 |
+
|
71 |
+
def evaluate_ppo(agent: PPO, env: EvalPMPEnv, num_cases: int, return_episode_rewards: bool) -> Tuple[float, float]:
|
72 |
+
rewards, _ = evaluate_policy(agent, env, n_eval_episodes=num_cases, return_episode_rewards=return_episode_rewards)
|
73 |
+
return rewards
|
74 |
+
|
75 |
+
|
76 |
+
def main(data_npy, boost=False):
|
77 |
+
th.manual_seed(0)
|
78 |
+
np.random.seed(0)
|
79 |
+
random.seed(0)
|
80 |
+
model_path = './facility_location/best_model.zip'
|
81 |
+
|
82 |
+
cfg = Config('plot', 0, False, '/data2/suhongyuan/flp', 'rl-gnn', model_path=model_path)
|
83 |
+
|
84 |
+
eval_env = MULTIPMP(cfg, data_npy, boost)
|
85 |
+
eval_env = Monitor(eval_env)
|
86 |
+
eval_env = DummyVecEnv([lambda: eval_env])
|
87 |
+
agent = get_agent(cfg, eval_env, model_path)
|
88 |
+
start_time = time.time()
|
89 |
+
_ = evaluate(agent, eval_env, 1, return_episode_rewards=True)
|
90 |
+
eval_time = time.time() - start_time
|
91 |
+
print(f'\t time: {eval_time}')
|
92 |
+
|
93 |
+
|
94 |
+
if __name__ == '__main__':
|
95 |
+
app.run(main)
|
96 |
+
|
facility_location/solutions.pkl
ADDED
Binary file (1.92 kB). View file
|
|
facility_location/utils/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .config import Config
|
2 |
+
|
3 |
+
__all__ = ["Config"]
|
facility_location/utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (209 Bytes). View file
|
|
facility_location/utils/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (210 Bytes). View file
|
|
facility_location/utils/__pycache__/config.cpython-310.pyc
ADDED
Binary file (4.09 kB). View file
|
|
facility_location/utils/__pycache__/config.cpython-39.pyc
ADDED
Binary file (4.61 kB). View file
|
|
facility_location/utils/__pycache__/policy.cpython-310.pyc
ADDED
Binary file (1.47 kB). View file
|
|
facility_location/utils/__pycache__/policy.cpython-39.pyc
ADDED
Binary file (1.47 kB). View file
|
|
facility_location/utils/config.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Text, Dict
|
3 |
+
|
4 |
+
from stable_baselines3.common.utils import get_latest_run_id
|
5 |
+
|
6 |
+
import yaml
|
7 |
+
|
8 |
+
|
9 |
+
class Config:
|
10 |
+
|
11 |
+
def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text,
|
12 |
+
agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None):
|
13 |
+
self.cfg_id = cfg_id
|
14 |
+
self.seed = global_seed
|
15 |
+
if cfg_dict is not None:
|
16 |
+
cfg = cfg_dict
|
17 |
+
else:
|
18 |
+
file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id)
|
19 |
+
class TupleSafeLoader(yaml.SafeLoader):
|
20 |
+
def construct_python_tuple(self, node):
|
21 |
+
return tuple(self.construct_sequence(node))
|
22 |
+
TupleSafeLoader.add_constructor(
|
23 |
+
u'tag:yaml.org,2002:python/tuple',
|
24 |
+
TupleSafeLoader.construct_python_tuple)
|
25 |
+
def load_yaml(file_path):
|
26 |
+
cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader)
|
27 |
+
return cfg
|
28 |
+
cfg = load_yaml(file_path)
|
29 |
+
# create dirs
|
30 |
+
self.root_dir = '/tmp/flp' if tmp else root_dir
|
31 |
+
self.agent = agent
|
32 |
+
self.multi = cfg.get('multi', False)
|
33 |
+
|
34 |
+
self.tb_log_path = os.path.join(self.root_dir, 'runs')
|
35 |
+
self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}'
|
36 |
+
latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name)
|
37 |
+
if not reset_num_timesteps:
|
38 |
+
# Continue training in the same directory
|
39 |
+
latest_run_id -= 1
|
40 |
+
self.cfg_dir = os.path.join(self.root_dir,
|
41 |
+
'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}')
|
42 |
+
self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt')
|
43 |
+
self.best_model_path = os.path.join(self.cfg_dir, 'best-models')
|
44 |
+
self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models')
|
45 |
+
self.load_model_path = model_path
|
46 |
+
|
47 |
+
|
48 |
+
# env
|
49 |
+
self.env_specs = cfg.get('env_specs', dict())
|
50 |
+
self.reward_specs = cfg.get('reward_specs', dict())
|
51 |
+
self.obs_specs = cfg.get('obs_specs', dict())
|
52 |
+
self.eval_specs = cfg.get('eval_specs', dict())
|
53 |
+
|
54 |
+
# agent config
|
55 |
+
self.agent_specs = cfg.get('agent_specs', dict())
|
56 |
+
self.mlp_specs = cfg.get('mlp_specs', dict())
|
57 |
+
self.gnn_specs = cfg.get('gnn_specs', dict())
|
58 |
+
self.ts_specs = cfg.get('ts_specs', dict())
|
59 |
+
self.popstar_specs = cfg.get('popstar_specs', dict())
|
60 |
+
self.ga_specs = cfg.get('ga_specs', dict())
|
61 |
+
|
62 |
+
# training config
|
63 |
+
self.gamma = cfg.get('gamma', 0.99)
|
64 |
+
self.tau = cfg.get('tau', 0.95)
|
65 |
+
self.state_encoder_specs = cfg.get('state_encoder_specs', dict())
|
66 |
+
self.policy_specs = cfg.get('policy_specs', dict())
|
67 |
+
self.value_specs = cfg.get('value_specs', dict())
|
68 |
+
self.lr = cfg.get('lr', 4e-4)
|
69 |
+
self.weightdecay = cfg.get('weightdecay', 0.0)
|
70 |
+
self.eps = cfg.get('eps', 1e-5)
|
71 |
+
self.value_pred_coef = cfg.get('value_pred_coef', 0.5)
|
72 |
+
self.entropy_coef = cfg.get('entropy_coef', 0.01)
|
73 |
+
self.clip_epsilon = cfg.get('clip_epsilon', 0.2)
|
74 |
+
self.max_num_iterations = cfg.get('max_num_iterations', 1000)
|
75 |
+
self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000)
|
76 |
+
self.max_sequence_length = cfg.get('max_sequence_length', 100)
|
77 |
+
self.num_optim_epoch = cfg.get('num_optim_epoch', 4)
|
78 |
+
self.mini_batch_size = cfg.get('mini_batch_size', 1024)
|
79 |
+
self.save_model_interval = cfg.get('save_model_interval', 10)
|
80 |
+
|
81 |
+
def log(self, logger, tb_logger):
|
82 |
+
"""Log cfg to logger and tensorboard."""
|
83 |
+
logger.info(f'id: {self.cfg_id}')
|
84 |
+
logger.info(f'seed: {self.seed}')
|
85 |
+
logger.info(f'env_specs: {self.env_specs}')
|
86 |
+
logger.info(f'reward_specs: {self.reward_specs}')
|
87 |
+
logger.info(f'obs_specs: {self.obs_specs}')
|
88 |
+
logger.info(f'agent_specs: {self.agent_specs}')
|
89 |
+
logger.info(f'gamma: {self.gamma}')
|
90 |
+
logger.info(f'tau: {self.tau}')
|
91 |
+
logger.info(f'state_encoder_specs: {self.state_encoder_specs}')
|
92 |
+
logger.info(f'policy_specs: {self.policy_specs}')
|
93 |
+
logger.info(f'value_specs: {self.value_specs}')
|
94 |
+
logger.info(f'lr: {self.lr}')
|
95 |
+
logger.info(f'weightdecay: {self.weightdecay}')
|
96 |
+
logger.info(f'eps: {self.eps}')
|
97 |
+
logger.info(f'value_pred_coef: {self.value_pred_coef}')
|
98 |
+
logger.info(f'entropy_coef: {self.entropy_coef}')
|
99 |
+
logger.info(f'clip_epsilon: {self.clip_epsilon}')
|
100 |
+
logger.info(f'max_num_iterations: {self.max_num_iterations}')
|
101 |
+
logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}')
|
102 |
+
logger.info(f'max_sequence_length: {self.max_sequence_length}')
|
103 |
+
logger.info(f'num_optim_epoch: {self.num_optim_epoch}')
|
104 |
+
logger.info(f'mini_batch_size: {self.mini_batch_size}')
|
105 |
+
logger.info(f'save_model_interval: {self.save_model_interval}')
|
106 |
+
|
107 |
+
if tb_logger is not None:
|
108 |
+
tb_logger.add_hparams(
|
109 |
+
hparam_dict={
|
110 |
+
'id': self.cfg_id,
|
111 |
+
'seed': self.seed,
|
112 |
+
'env_specs': str(self.env_specs),
|
113 |
+
'reward_specs': str(self.reward_specs),
|
114 |
+
'obs_specs': str(self.obs_specs),
|
115 |
+
'agent_specs': str(self.agent_specs),
|
116 |
+
'gamma': self.gamma,
|
117 |
+
'tau': self.tau,
|
118 |
+
'state_encoder_specs': str(self.state_encoder_specs),
|
119 |
+
'policy_specs': str(self.policy_specs),
|
120 |
+
'value_specs': str(self.value_specs),
|
121 |
+
'lr': self.lr,
|
122 |
+
'weightdecay': self.weightdecay,
|
123 |
+
'eps': self.eps,
|
124 |
+
'value_pred_coef': self.value_pred_coef,
|
125 |
+
'entropy_coef': self.entropy_coef,
|
126 |
+
'clip_epsilon': self.clip_epsilon,
|
127 |
+
'max_num_iterations': self.max_num_iterations,
|
128 |
+
'num_episodes_per_iteration': self.num_episodes_per_iteration,
|
129 |
+
'max_sequence_length': self.max_sequence_length,
|
130 |
+
'num_optim_epoch': self.num_optim_epoch,
|
131 |
+
'mini_batch_size': self.mini_batch_size,
|
132 |
+
'save_model_interval': self.save_model_interval},
|
133 |
+
metric_dict={'hparam/placeholder': 0.0})
|
facility_location/utils/policy.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
|
3 |
+
from facility_location.agent import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
|
4 |
+
from facility_location.utils import Config
|
5 |
+
|
6 |
+
|
7 |
+
def get_policy_kwargs(cfg: Config) -> Dict:
|
8 |
+
if cfg.agent == 'rl-mlp':
|
9 |
+
hidden_units = cfg.mlp_specs.get('hidden_units', (32, 32))
|
10 |
+
node_dim = hidden_units[-1]
|
11 |
+
policy_feature_dim = FacilityLocationMLPExtractor.get_policy_feature_dim(node_dim)
|
12 |
+
value_feature_dim = FacilityLocationMLPExtractor.get_value_feature_dim(node_dim)
|
13 |
+
policy_kwargs = dict(
|
14 |
+
policy_feature_dim=policy_feature_dim,
|
15 |
+
value_feature_dim=value_feature_dim,
|
16 |
+
policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
|
17 |
+
value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
|
18 |
+
features_extractor_class=FacilityLocationMLPExtractor,
|
19 |
+
features_extractor_kwargs=dict(
|
20 |
+
hidden_units=hidden_units,),
|
21 |
+
popstar=cfg.env_specs.get('popstar', False),)
|
22 |
+
|
23 |
+
elif cfg.agent == 'rl-gnn':
|
24 |
+
num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
|
25 |
+
node_dim = cfg.gnn_specs.get('node_dim', 32)
|
26 |
+
policy_feature_dim = FacilityLocationGNNExtractor.get_policy_feature_dim(node_dim)
|
27 |
+
value_feature_dim = FacilityLocationGNNExtractor.get_value_feature_dim(node_dim)
|
28 |
+
policy_kwargs = dict(
|
29 |
+
policy_feature_dim=policy_feature_dim,
|
30 |
+
value_feature_dim=value_feature_dim,
|
31 |
+
policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
|
32 |
+
value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
|
33 |
+
features_extractor_class=FacilityLocationGNNExtractor,
|
34 |
+
features_extractor_kwargs=dict(
|
35 |
+
num_gnn_layers=num_gnn_layers,
|
36 |
+
node_dim=node_dim),
|
37 |
+
popstar=cfg.env_specs.get('popstar', False),)
|
38 |
+
|
39 |
+
elif cfg.agent == 'rl-agnn':
|
40 |
+
num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
|
41 |
+
node_dim = cfg.gnn_specs.get('node_dim', 32)
|
42 |
+
policy_feature_dim = FacilityLocationAttentionGNNExtractor.get_policy_feature_dim(node_dim)
|
43 |
+
value_feature_dim = FacilityLocationAttentionGNNExtractor.get_value_feature_dim(node_dim)
|
44 |
+
policy_kwargs = dict(
|
45 |
+
policy_feature_dim=policy_feature_dim,
|
46 |
+
value_feature_dim=value_feature_dim,
|
47 |
+
policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
|
48 |
+
value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
|
49 |
+
features_extractor_class=FacilityLocationAttentionGNNExtractor,
|
50 |
+
features_extractor_kwargs=dict(
|
51 |
+
num_gnn_layers=num_gnn_layers,
|
52 |
+
node_dim=node_dim),
|
53 |
+
popstar=cfg.env_specs.get('popstar', False),)
|
54 |
+
|
55 |
+
else:
|
56 |
+
raise NotImplementedError
|
57 |
+
return policy_kwargs
|
final_solutions.pkl
ADDED
File without changes
|
model.pth
DELETED
Binary file (24.3 kB)
|
|
model.py
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
import torch.nn.functional as F
|
4 |
-
|
5 |
-
|
6 |
-
class Net(nn.Module):
|
7 |
-
def __init__(self, input_size, hidden_size, num_classes):
|
8 |
-
super(Net, self).__init__()
|
9 |
-
self.fc1 = nn.Linear(input_size, hidden_size)
|
10 |
-
self.relu = nn.ReLU()
|
11 |
-
self.fc2 = nn.Linear(hidden_size, num_classes)
|
12 |
-
self.softmax = nn.Softmax(dim=1)
|
13 |
-
|
14 |
-
def forward(self, x):
|
15 |
-
out = self.fc1(x)
|
16 |
-
out = self.relu(out)
|
17 |
-
out = self.fc2(out)
|
18 |
-
out = self.softmax(out)
|
19 |
-
return out
|
20 |
-
|
21 |
-
|
22 |
-
if __name__ == '__main__':
|
23 |
-
net = Net(100, 50, 10)
|
24 |
-
torch.save(net.state_dict(), 'model.pth')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|