Spaces:

tsinghua-fib-lab
/

MFLP

Runtime error

App Files Files Community

苏泓源 commited on Feb 8, 2024

Commit

a257639

•

1 Parent(s): c8cf824

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +92 -22
facility_location/__init__.py +0 -0
facility_location/__pycache__/__init__.cpython-39.pyc +0 -0
facility_location/__pycache__/multi_eval.cpython-39.pyc +0 -0
facility_location/agent/__init__.py +4 -0
facility_location/agent/__pycache__/__init__.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/__init__.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/features_extractor.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/features_extractor.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/ga.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/ga.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/heuristic.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/heuristic.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/policy.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/policy.cpython-39.pyc +0 -0
facility_location/agent/__pycache__/solver.cpython-310.pyc +0 -0
facility_location/agent/__pycache__/solver.cpython-39.pyc +0 -0
facility_location/agent/features_extractor.py +225 -0
facility_location/agent/policy.py +229 -0
facility_location/agent/solver.py +33 -0
facility_location/cfg/__init__.py +0 -0
facility_location/cfg/plot.yaml +64 -0
facility_location/env/__init__.py +3 -0
facility_location/env/__pycache__/__init__.cpython-310.pyc +0 -0
facility_location/env/__pycache__/__init__.cpython-39.pyc +0 -0
facility_location/env/__pycache__/facility_location_client.cpython-310.pyc +0 -0
facility_location/env/__pycache__/facility_location_client.cpython-39.pyc +0 -0
facility_location/env/__pycache__/obs_extractor.cpython-310.pyc +0 -0
facility_location/env/__pycache__/obs_extractor.cpython-39.pyc +0 -0
facility_location/env/__pycache__/pmp.cpython-310.pyc +0 -0
facility_location/env/__pycache__/pmp.cpython-39.pyc +0 -0
facility_location/env/facility_location_client.py +278 -0
facility_location/env/obs_extractor.py +184 -0
facility_location/env/pmp.py +502 -0
facility_location/multi_eval.py +96 -0
facility_location/solutions.pkl +0 -0
facility_location/utils/__init__.py +3 -0
facility_location/utils/__pycache__/__init__.cpython-310.pyc +0 -0
facility_location/utils/__pycache__/__init__.cpython-39.pyc +0 -0
facility_location/utils/__pycache__/config.cpython-310.pyc +0 -0
facility_location/utils/__pycache__/config.cpython-39.pyc +0 -0
facility_location/utils/__pycache__/policy.cpython-310.pyc +0 -0
facility_location/utils/__pycache__/policy.cpython-39.pyc +0 -0
facility_location/utils/config.py +133 -0
facility_location/utils/policy.py +57 -0
final_solutions.pkl +0 -0
model.pth +0 -0
model.py +0 -24

app.py CHANGED Viewed

@@ -4,26 +4,98 @@ import plotly.graph_objects as go
 import plotly.express as px
 from sklearn.metrics import pairwise_distances
 import torch
-def plot_from_npy(npy_data):
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(x=[1, 2, 3, 4], y=[10, 11, 12, 13], mode='lines', name='New York'))
-    fig.update_layout(title_text="Facility Distribution in Cities")
-    fig.update_xaxes(title_text="Time")
-    fig.update_yaxes(title_text="Facility Count")
-    actual_fig = fig  # Replace this line with your actual_fig
-    solution_fig = fig  # Replace this line with your solution_fig
-    return actual_fig, solution_fig
 def solver_plot(data_npy, boost=False):
     actual_fig = go.Figure()
     solution_fig = go.Figure()
-    actual_ac = 0  # Replace this line with your actual_ac
-    solution_ac = 0  # Replace this line with your solution_ac
     return actual_fig, solution_fig, actual_ac, solution_ac
 def demo_plot(city, facility):
@@ -104,7 +176,7 @@ def demo_plot(city, facility):
     return actual_fig, solution_fig, actual_ac, solution_ac
-def solver_plot(data_npy, boost=False):
     data = data_npy.split('\n')
     n = len(data)
     p = int((len(data[0].split(' '))-2) / 2)
@@ -115,7 +187,6 @@ def solver_plot(data_npy, boost=False):
     for row in data:
         row = row.split(' ')
         row = [x for x in row if len(x)]
-        print(row)
         positions.append([float(row[0]), float(row[1])])
@@ -132,7 +203,6 @@ def solver_plot(data_npy, boost=False):
     demands = np.array(demands)
     actual_facilities = np.array(actual_facilities)
     solution_facilities =  ~actual_facilities
-    print(actual_facilities)
     actual_fig = go.Figure()
     solution_fig = go.Figure()
@@ -193,13 +263,13 @@ def solver_plot(data_npy, boost=False):
 def get_example():
     return [
-        ('40.71 -73.93 213 0\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
-        ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.61 -73.95 189 264 1 0")
         ]
 def load_npy_file(file_obj):
-    data = np.load(file_obj.name)
     string_array = '\n'.join([' '.join(map(str, row)) for row in data])
     return string_array
@@ -231,8 +301,8 @@ with gr.Blocks() as demo:
             gr.Examples(
                 examples=get_example(),
                 inputs=[data_npy],
-                fn=plot_from_npy,
-                outputs=[actual_map, solution_map],
             )
         with gr.Row():
             boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)

 import plotly.express as px
 from sklearn.metrics import pairwise_distances
 import torch
+from facility_location import multi_eval
+import pickle
 def solver_plot(data_npy, boost=False):
+    multi_eval.main(data_npy, boost)
+    all_solutions = pickle.loads(open('./facility_location/solutions.pkl', 'rb').read())
+    data = data_npy.split('\n')
+    n = len(data)
+    p = int((len(data[0].split(' '))-2) / 2)
+    positions = []
+    demands = []
+    actual_facilities = []
+    for row in data:
+        row = row.split(' ')
+        row = [x for x in row if len(x)]
+        positions.append([float(row[0]), float(row[1])])
+        demand = []
+        for i in range(2, 2+p):
+            demand.append(float(row[i]))
+        demands.append(demand)
+        actual_facility = []
+        for i in range(2+p, 2+2*p):
+            actual_facility.append(bool(int(float(row[i]))))
+        actual_facilities.append(actual_facility)
+    positions = np.array(positions)
+    demands = np.array(demands)
+    actual_facilities = np.array(actual_facilities)
+    solution_facilities =  np.array(all_solutions).T
+    # print(solution_facilities)
+    # print(actual_facilities)
     actual_fig = go.Figure()
     solution_fig = go.Figure()
+    for i in range(p):
+        actual_fig.add_trace(go.Scattermapbox(
+            lat=positions[actual_facilities[:, i]][:, 0],
+            lon=positions[actual_facilities[:, i]][:, 1],
+            mode='markers',
+            marker=go.scattermapbox.Marker(
+                size=10,
+                color=px.colors.qualitative.Plotly[i]
+            ),
+            name=f'Facility {i+1}'
+        ))
+        solution_fig.add_trace(go.Scattermapbox(
+            lat=positions[solution_facilities[:, i]][:, 0],
+            lon=positions[solution_facilities[:, i]][:, 1],
+            mode='markers',
+            marker=go.scattermapbox.Marker(
+                size=10,
+                color=px.colors.qualitative.Plotly[i]
+            ),
+            name=f'Facility {i+1}'
+        ))
+        actual_fig.update_layout(
+        mapbox=dict(
+            style='carto-positron',
+            center=dict(lat=np.mean(positions[actual_facilities[:, i]][:, 0]), \
+                lon=np.mean(positions[actual_facilities[:, i]][:, 1])),
+            zoom=11.0
+        ),
+        margin=dict(l=0, r=0, b=0, t=0),)
+        solution_fig.update_layout(
+        mapbox=dict(
+            style='carto-positron',
+            center=dict(lat=np.mean(positions[solution_facilities[:, i]][:, 0]), \
+                lon=np.mean(positions[solution_facilities[:, i]][:, 1])),
+            zoom=11.0
+        ),
+        margin=dict(l=0, r=0, b=0, t=0),)
+        # show legend
+        actual_fig.update_layout(showlegend=True)
+        solution_fig.update_layout(showlegend=True)
+    positions = np.deg2rad(positions)
+    dist = pairwise_distances(positions, metric='haversine') * 6371
+    actual_ac = 0
+    solution_ac = 0
+    for i in range(p):
+        ac_matrix = dist * demands[:, i][:, None]
+        actual_ac += ac_matrix[:, actual_facilities[:, i]].min(axis=-1).sum()
+        solution_ac += ac_matrix[:, solution_facilities[:, i]].min(axis=-1).sum()
     return actual_fig, solution_fig, actual_ac, solution_ac
 def demo_plot(city, facility):
     return actual_fig, solution_fig, actual_ac, solution_ac
+def solver_plot1(data_npy, boost=False):
     data = data_npy.split('\n')
     n = len(data)
     p = int((len(data[0].split(' '))-2) / 2)
     for row in data:
         row = row.split(' ')
         row = [x for x in row if len(x)]
         positions.append([float(row[0]), float(row[1])])
     demands = np.array(demands)
     actual_facilities = np.array(actual_facilities)
     solution_facilities =  ~actual_facilities
     actual_fig = go.Figure()
     solution_fig = go.Figure()
 def get_example():
     return [
+        ('40.71 -73.93 213 1\n40.72 -73.99 15 1\n40.65 -73.88 365 1\n40.57 -73.96 629 0\n40.70 -73.97 106 0\n40.61 -73.95 189 1'),
+        ("40.71 -73.93 213 124 0 1\n40.72 -73.99 15 43 1 0\n40.65 -73.88 365 214 1 0\n40.57 -73.96 629 431 0 1\n40.70 -73.97 106 241 0 1\n40.60 -73.92 129 214 1 0\n40.61 -73.95 189 264 0 1\n40.63 -73.94 124 164 1 0"),
         ]
 def load_npy_file(file_obj):
+    data = np.loadtxt(file_obj.name)
     string_array = '\n'.join([' '.join(map(str, row)) for row in data])
     return string_array
             gr.Examples(
                 examples=get_example(),
                 inputs=[data_npy],
+                fn=solver_plot1,
+                outputs=[actual_map, solution_map, actual_ac, solution_ac],
             )
         with gr.Row():
             boost = gr.Checkbox(label="Turbo Boost (accelerate solution generation with fewer SWAP steps)", value=False)

facility_location/__init__.py ADDED Viewed

File without changes

facility_location/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (145 Bytes). View file

facility_location/__pycache__/multi_eval.cpython-39.pyc ADDED Viewed

Binary file (3.13 kB). View file

facility_location/agent/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .policy import MaskedFacilityLocationActorCriticPolicy
+from .features_extractor import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
+__all__ = ['MaskedFacilityLocationActorCriticPolicy', 'FacilityLocationMLPExtractor', 'FacilityLocationGNNExtractor', 'FacilityLocationAttentionGNNExtractor']

facility_location/agent/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (419 Bytes). View file

facility_location/agent/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (420 Bytes). View file

facility_location/agent/__pycache__/features_extractor.cpython-310.pyc ADDED Viewed

Binary file (7.44 kB). View file

facility_location/agent/__pycache__/features_extractor.cpython-39.pyc ADDED Viewed

Binary file (7.46 kB). View file

facility_location/agent/__pycache__/ga.cpython-310.pyc ADDED Viewed

Binary file (3.2 kB). View file

facility_location/agent/__pycache__/ga.cpython-39.pyc ADDED Viewed

Binary file (3.19 kB). View file

facility_location/agent/__pycache__/heuristic.cpython-310.pyc ADDED Viewed

Binary file (3.07 kB). View file

facility_location/agent/__pycache__/heuristic.cpython-39.pyc ADDED Viewed

Binary file (3.12 kB). View file

facility_location/agent/__pycache__/metaheuristic.cpython-310.pyc ADDED Viewed

Binary file (6.84 kB). View file

facility_location/agent/__pycache__/metaheuristic.cpython-39.pyc ADDED Viewed

Binary file (6.86 kB). View file

facility_location/agent/__pycache__/policy.cpython-310.pyc ADDED Viewed

Binary file (6.36 kB). View file

facility_location/agent/__pycache__/policy.cpython-39.pyc ADDED Viewed

Binary file (6.29 kB). View file

facility_location/agent/__pycache__/solver.cpython-310.pyc ADDED Viewed

Binary file (1.5 kB). View file

facility_location/agent/__pycache__/solver.cpython-39.pyc ADDED Viewed

Binary file (1.5 kB). View file

facility_location/agent/features_extractor.py ADDED Viewed

	@@ -0,0 +1,225 @@

+from collections import OrderedDict
+from typing import Tuple
+from gym import spaces
+import torch as th
+from torch import nn
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+from stable_baselines3.common.type_aliases import TensorDict
+import time
+def mean_features(h: th.Tensor, mask: th.Tensor):
+    float_mask = mask.float()
+    mean_h = (h * float_mask.unsqueeze(-1)).sum(dim=1) / float_mask.sum(dim=1, keepdim=True)
+    return mean_h
+# def compute_state(observations: TensorDict, h_nodes: th.Tensor):
+#     node_mask = observations['node_mask'].bool()
+#     mean_h_nodes = mean_features(h_nodes, node_mask)
+#     old_facility_mask = observations['old_facility_mask'].bool()
+#     h_old_facility = mean_features(h_nodes, old_facility_mask)
+#     h_old_facility_repeat = h_old_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
+#     state_policy_old_facility = th.cat([
+#         h_nodes,
+#         h_old_facility_repeat,
+#         h_nodes - h_old_facility_repeat,
+#         h_nodes * h_old_facility_repeat], dim=-1)
+#     new_facility_mask = observations['new_facility_mask'].bool()
+#     h_new_facility = mean_features(h_nodes, new_facility_mask)
+#     h_new_facility_repeat = h_new_facility.unsqueeze(1).expand(-1, h_nodes.shape[1], -1)
+#     state_policy_new_facility = th.cat([
+#         h_nodes,
+#         h_new_facility_repeat,
+#         h_nodes - h_new_facility_repeat,
+#     state_value = th.cat([
+#         mean_h_nodes,
+#         h_old_facility,
+#         h_new_facility], dim=-1)
+#     return state_policy_old_facility, state_policy_new_facility, state_value, old_facility_mask, new_facility_mask
+def compute_state(observations: TensorDict, h_edges: th.Tensor):
+    dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
+    mean_h_edges = mean_features(h_edges, dynamic_edge_mask)
+    state_policy_facility_pair = h_edges
+    state_value = mean_h_edges
+    return state_policy_facility_pair, state_value, dynamic_edge_mask
+class FacilityLocationMLPExtractor(BaseFeaturesExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        hidden_units: Tuple = (32, 32),
+    ) -> None:
+        super().__init__(observation_space, features_dim=1)
+        self.node_mlp = self.create_mlp(observation_space.spaces['node_features'].shape[1], hidden_units)
+    @staticmethod
+    def create_mlp(input_dim: int, hidden_units: Tuple) -> nn.Sequential:
+        layers = OrderedDict()
+        for i, units in enumerate(hidden_units):
+            if i == 0:
+                layers[f'mlp-extractor-linear_{i}'] = nn.Linear(input_dim, units)
+            else:
+                layers[f'mlp-extractor-linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
+            layers[f'mlp-extractor-tanh_{i}'] = nn.Tanh()
+        return nn.Sequential(layers)
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        node_features = observations['node_features']
+        h_nodes = self.node_mlp(node_features)
+        return compute_state(observations, h_nodes)
+    @staticmethod
+    def get_policy_feature_dim(node_dim: int) -> int:
+        return node_dim * 4
+    @staticmethod
+    def get_value_feature_dim(node_dim: int) -> int:
+        return node_dim * 3
+class FacilityLocationGNNExtractor(BaseFeaturesExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        num_gnn_layers: int = 2,
+        node_dim: int = 32,
+    ) -> None:
+        super().__init__(observation_space, features_dim=1)
+        num_node_features = observation_space.spaces['node_features'].shape[1]
+        self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
+        self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
+        self.single_gnn_layer = self.create_gnn(1, node_dim)[0]
+    @staticmethod
+    def create_node_encoder(num_node_features: int, node_dim: int) -> nn.Sequential:
+        node_encoder = nn.Sequential(
+            nn.Linear(num_node_features, node_dim),
+            nn.Tanh())
+        return node_encoder
+    @staticmethod
+    def create_gnn(num_gnn_layers: int, node_dim: int) -> nn.ModuleList:
+        layers = nn.ModuleList()
+        for i in range(num_gnn_layers):
+            gnn_layer = nn.Sequential(
+                nn.Linear(node_dim, node_dim),
+                nn.Tanh())
+            layers.append(gnn_layer)
+        return layers
+    @staticmethod
+    def scatter_count(h_edges, indices, edge_mask, max_num_nodes):
+        batch_size = h_edges.shape[0]
+        num_latents = h_edges.shape[2]
+        h_nodes = th.zeros(batch_size, max_num_nodes, num_latents).to(h_edges.device)
+        count_edge = th.zeros_like(h_nodes)
+        count = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges.shape).float()
+        idx = indices.unsqueeze(-1).expand(-1, -1, num_latents)
+        h_nodes = h_nodes.scatter_add_(1, idx, h_edges)
+        count_edge = count_edge.scatter_add_(1, idx, count)
+        return h_nodes, count_edge
+    @staticmethod
+    def gather_to_edges(h_nodes, edge_index, edge_mask, gnn_layer):
+        h_nodes = gnn_layer(h_nodes)
+        h_edges_12 = th.gather(h_nodes, 1, edge_index[:, :, 0].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
+        h_edges_21 = th.gather(h_nodes, 1, edge_index[:, :, 1].unsqueeze(-1).expand(-1, -1, h_nodes.size(-1)))
+        mask = th.broadcast_to(edge_mask.unsqueeze(-1), h_edges_12.shape)
+        h_edges_12 = th.where(mask, h_edges_12, th.zeros_like(h_edges_12))
+        h_edges_21 = th.where(mask, h_edges_21, th.zeros_like(h_edges_21))
+        return h_edges_12, h_edges_21
+    @classmethod
+    def scatter_to_nodes(cls, h_edges, edge_index, edge_mask, node_mask):
+        h_edges_12, h_edges_21 = h_edges
+        max_num_nodes = node_mask.shape[1]
+        h_nodes_1, count_1 = cls.scatter_count(h_edges_21, edge_index[:, :, 0], edge_mask, max_num_nodes)
+        h_nodes_2, count_2 = cls.scatter_count(h_edges_12, edge_index[:, :, 1], edge_mask, max_num_nodes)
+        h_nodes_sum = h_nodes_1 + h_nodes_2
+        mask = th.broadcast_to(node_mask.unsqueeze(-1), h_nodes_sum.shape)
+        count = count_1 + count_2
+        count_padding = th.ones_like(count)
+        count = th.where(mask, count, count_padding)
+        h_nodes = h_nodes_sum / count
+        return h_nodes
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        t1 = time.time()
+        node_features = observations['node_features']
+        h_nodes = self.node_encoder(node_features)
+        edge_static_index = observations['static_adjacency_list'].long()
+        edge_dynamic_index = observations['dynamic_adjacency_list'].long()
+        node_mask = observations['node_mask'].bool()
+        static_edge_mask = observations['static_edge_mask'].bool()
+        dynamic_edge_mask = observations['dynamic_edge_mask'].bool()
+        for gnn_layer in self.gnn_layers:
+            h_edges = self.gather_to_edges(h_nodes, edge_static_index, static_edge_mask, gnn_layer)
+            h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, static_edge_mask, node_mask)
+            h_nodes = h_nodes + h_nodes_new
+        h_edges12 , h_edges21 = self.gather_to_edges(h_nodes, edge_dynamic_index, dynamic_edge_mask, self.single_gnn_layer)
+        h_edges = th.cat([h_edges12, h_edges21], dim=-1)
+        t2 = time.time()
+        # print('cal embedding time:', t2-t1)
+        return compute_state(observations, h_edges)
+    @staticmethod
+    def get_policy_feature_dim(node_dim: int) -> int:
+        return node_dim * 2
+    @staticmethod
+    def get_value_feature_dim(node_dim: int) -> int:
+        return node_dim * 2
+class FacilityLocationAttentionGNNExtractor(FacilityLocationGNNExtractor):
+    def __init__(
+        self,
+        observation_space: spaces.Dict,
+        num_gnn_layers: int = 2,
+        node_dim: int = 32,
+    ) -> None:
+        super().__init__(observation_space, num_gnn_layers, node_dim)
+        num_node_features = observation_space.spaces['node_features'].shape[1]
+        self.node_encoder = self.create_node_encoder(num_node_features, node_dim)
+        self.gnn_layers = self.create_gnn(num_gnn_layers, node_dim)
+        self.attention = nn.MultiheadAttention(node_dim, node_dim)
+    def forward(self, observations: TensorDict) -> Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]:
+        node_features = observations['node_features']
+        h_nodes = self.node_encoder(node_features)
+        edge_static_index = observations['static_adjacency_list'].long()
+        edge_dynamic_index = observations['dynamic_adjacency_list'].long()
+        node_mask = observations['node_mask'].bool()
+        edge_mask = observations['edge_mask'].bool()
+        for gnn_layer in self.gnn_layers:
+            h_edges = self.gather_to_edges(h_nodes, edge_static_index, edge_mask, gnn_layer)
+            h_nodes_new = self.scatter_to_nodes(h_edges, edge_static_index, edge_mask, node_mask)
+            h_nodes = h_nodes + h_nodes_new
+        h_nodes = self.attention(h_nodes, h_nodes, h_nodes)[0]
+        return compute_state(observations, h_nodes)

facility_location/agent/policy.py ADDED Viewed

	@@ -0,0 +1,229 @@

+from functools import partial
+from typing import Callable, Tuple, Text, Union
+from collections import OrderedDict
+import numpy as np
+from gym import spaces
+import torch as th
+from torch import nn
+from stable_baselines3.common.policies import ActorCriticPolicy
+from stable_baselines3.common.utils import get_device
+from stable_baselines3.common.type_aliases import Schedule
+def create_mlp(head: Text, input_dim: int, hidden_units: Tuple) -> nn.Sequential:
+    layers = OrderedDict()
+    for i, units in enumerate(hidden_units):
+        if i == 0:
+            layers[f'{head}_linear_{i}'] = nn.Linear(input_dim, units)
+        else:
+            layers[f'{head}_linear_{i}'] = nn.Linear(hidden_units[i - 1], units)
+        if i != len(hidden_units) - 1:
+            layers[f'{head}_tanh_{i}'] = nn.Tanh()
+    if head.startswith('policy'):
+        layers[f'{head}_flatten'] = nn.Flatten()
+    return nn.Sequential(layers)
+class MaskedFacilityLocationNetwork(nn.Module):
+    def __init__(
+        self,
+        policy_feature_dim: int,
+        value_feature_dim: int,
+        policy_hidden_units: Tuple = (32, 32, 1),
+        value_hidden_units: Tuple = (32, 32, 1),
+        device: Union[th.device, Text] = "auto",
+    ):
+        super().__init__()
+        device = get_device(device)
+        # Policy network
+        # self.old_facility_policy_net = create_mlp('policy-old-facility',
+        #                                           policy_feature_dim,
+        #                                           policy_hidden_units).to(device)
+        # self.new_facility_policy_net = create_mlp('policy-new-facility',
+        #                                           policy_feature_dim,
+        #                                           policy_hidden_units).to(device)
+        self.pair_facility_policy_net = create_mlp('policy-pair-facility',
+                                                    policy_feature_dim,
+                                                    policy_hidden_units).to(device)
+        # Value network
+        self.value_net = create_mlp('value',
+                                    value_feature_dim,
+                                    value_hidden_units).to(device)
+    def forward(self,
+                features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
+        return self.forward_actor(features), self.forward_critic(features)
+    # def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+    #     state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
+    #     old_facility_logits = self.old_facility_policy_net(state_policy_old_facility)  # (batch_size, node_range)
+    #     old_facility_padding = th.full_like(old_facility_mask, -th.inf, dtype=th.float32)
+    #     masked_old_facility_logits = th.where(old_facility_mask, old_facility_logits, old_facility_padding)
+    #     new_facility_logits = self.new_facility_policy_net(state_policy_new_facility)  # (batch_size, node_range)
+    #     new_facility_padding = th.full_like(new_facility_mask, -th.inf, dtype=th.float32)
+    #     masked_new_facility_logits = th.where(new_facility_mask, new_facility_logits, new_facility_padding)
+    #     masked_old_new_facility_logits = th.cat([masked_old_facility_logits, masked_new_facility_logits], dim=1)
+    #     return masked_old_new_facility_logits
+    def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        state_policy_pair_facility, _, dynamic_edge_mask = features
+        pair_facility_logits = self.pair_facility_policy_net(state_policy_pair_facility)
+        pair_facility_padding = th.full_like(dynamic_edge_mask, -th.inf, dtype=th.float32)
+        masked_pair_facility_logits = th.where(dynamic_edge_mask, pair_facility_logits, pair_facility_padding)
+        return masked_pair_facility_logits
+    def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        _, state_value, _ = features
+        return self.value_net(state_value)
+class POPSTARMaskedFacilityLocationNetwork(nn.Module):
+    def __init__(
+            self,
+            policy_feature_dim: int,
+            value_feature_dim: int,
+            policy_hidden_units: Tuple = (32, 32, 1),
+            value_hidden_units: Tuple = (32, 32, 1),
+            device: Union[th.device, Text] = "auto",
+    ):
+        super().__init__()
+        device = get_device(device)
+        # Policy network
+        self.old_facility_policy_net = create_mlp('policy-old-facility',
+                                                  policy_feature_dim,
+                                                  policy_hidden_units).to(device)
+        self.new_facility_policy_net = create_mlp('policy-new-facility',
+                                                  policy_feature_dim,
+                                                  policy_hidden_units).to(device)
+        self.old_new_facility_policy_net = create_mlp('policy-old-new-facility',
+                                                      policy_feature_dim * 4,
+                                                      policy_hidden_units).to(device)
+        # Value network
+        self.value_net = create_mlp('value',
+                                    value_feature_dim,
+                                    value_hidden_units).to(device)
+    def forward(self,
+                features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> Tuple[th.Tensor, th.Tensor]:
+        return self.forward_actor(features), self.forward_critic(features)
+    def forward_actor(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        state_policy_old_facility, state_policy_new_facility, _, old_facility_mask, new_facility_mask = features
+        node_range = old_facility_mask.shape[1]
+        loss = self.old_facility_policy_net(state_policy_old_facility)  # (batch_size, node_range)
+        loss = loss.repeat_interleave(node_range, dim=1)
+        gain = self.new_facility_policy_net(state_policy_new_facility)  # (batch_size, node_range)
+        gain = gain.repeat(1, node_range)
+        state_policy_old_facility_expand = state_policy_old_facility.unsqueeze(2).expand(-1, -1, node_range, -1)
+        state_policy_new_facility_expand = state_policy_new_facility.unsqueeze(1).expand(-1, node_range, -1, -1)
+        state_policy_old_new_facility = th.cat(
+            [
+                state_policy_old_facility_expand,
+                state_policy_new_facility_expand,
+                state_policy_old_facility_expand - state_policy_new_facility_expand,
+                state_policy_old_facility_expand * state_policy_new_facility_expand
+            ], dim=-1
+        )
+        extra = self.old_new_facility_policy_net(state_policy_old_new_facility)  # (batch_size, node_range * node_range)
+        logits = gain - loss + extra
+        action_mask = th.logical_and(old_facility_mask.unsqueeze(2), new_facility_mask.unsqueeze(1)).flatten(start_dim=1)
+        padding = th.full_like(action_mask, -th.inf, dtype=th.float32)
+        masked_logits = th.where(action_mask, logits, padding)
+        return masked_logits
+    def forward_critic(self, features: Tuple[th.Tensor, th.Tensor, th.Tensor, th.Tensor, th.Tensor]) -> th.Tensor:
+        _, _, state_value, _, _ = features
+        return self.value_net(state_value)
+class MaskedFacilityLocationActorCriticPolicy(ActorCriticPolicy):
+    def __init__(
+        self,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        lr_schedule: Callable[[float], float],
+        *args,
+        **kwargs,
+    ):
+        self.policy_feature_dim = kwargs.pop('policy_feature_dim')
+        self.value_feature_dim = kwargs.pop('value_feature_dim')
+        self.policy_hidden_units = kwargs.pop('policy_hidden_units')
+        self.value_hidden_units = kwargs.pop('value_hidden_units')
+        self.popstar = kwargs.pop('popstar')
+        super().__init__(
+            observation_space,
+            action_space,
+            lr_schedule,
+            # Pass remaining arguments to base class
+            *args,
+            **kwargs,
+        )
+    def _build(self, lr_schedule: Schedule) -> None:
+        self._build_mlp_extractor()
+        self.action_net = nn.Identity()
+        self.value_net = nn.Identity()
+        # Init weights: use orthogonal initialization
+        # with small initial weight for the output
+        if self.ortho_init:
+            # TODO: check for features_extractor
+            # Values from stable-baselines.
+            # features_extractor/mlp values are
+            # originally from openai/baselines (default gains/init_scales).
+            module_gains = {
+                self.features_extractor: np.sqrt(2),
+                self.mlp_extractor: np.sqrt(2),
+            }
+            # if not self.share_features_extractor:
+            #     # Note(antonin): this is to keep SB3 results
+            #     # consistent, see GH#1148
+            #     del module_gains[self.features_extractor]
+            #     module_gains[self.pi_features_extractor] = np.sqrt(2)
+            #     module_gains[self.vf_features_extractor] = np.sqrt(2)
+            for module, gain in module_gains.items():
+                module.apply(partial(self.init_weights, gain=gain))
+        # Setup optimizer with initial learning rate
+        self.optimizer = self.optimizer_class(self.parameters(), lr=lr_schedule(1), **self.optimizer_kwargs)
+    def _build_mlp_extractor(self) -> None:
+        if not self.popstar:
+            self.mlp_extractor = MaskedFacilityLocationNetwork(
+                self.policy_feature_dim,
+                self.value_feature_dim,
+                self.policy_hidden_units,
+                self.value_hidden_units,
+                self.device,
+            )
+        else:
+            self.mlp_extractor = POPSTARMaskedFacilityLocationNetwork(
+                self.policy_feature_dim,
+                self.value_feature_dim,
+                self.policy_hidden_units,
+                self.value_hidden_units,
+                self.device,
+            )

facility_location/agent/solver.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import Text
+import numpy as np
+import pulp
+from spopt.locate import PMedian
+from facility_location.env import EvalPMPEnv
+class PMPSolver:
+    def __init__(self, solver: Text, env: EvalPMPEnv):
+        if solver == 'GUROBI':
+            self._solver = pulp.GUROBI(msg=False)
+        elif solver == 'GUROBI_CMD':
+            self._solver = pulp.GUROBI_CMD(msg=False)
+        elif solver == 'PULP_CBC_CMD':
+            self._solver = pulp.PULP_CBC_CMD(msg=False)
+        elif solver == 'GLPK_CMD':
+            self._solver = pulp.GLPK_CMD(msg=False)
+        elif solver == 'MOSEK':
+            self._solver = pulp.MOSEK(msg=False)
+        else:
+            raise ValueError(f'Solver {solver} not supported.')
+        self.env = env
+    def solve(self):
+        _, demands, _, p = self.env.get_instance()
+        distance_matrix, _ = self.env.get_distance_and_cost()
+        pmedian_from_cost_matrix = PMedian.from_cost_matrix(distance_matrix, demands, p_facilities=p)
+        pmedian_from_cost_matrix = pmedian_from_cost_matrix.solve(self._solver)
+        solution = np.array([len(temp) > 0 for temp in pmedian_from_cost_matrix.fac2cli], dtype=bool)
+        return solution

facility_location/cfg/__init__.py ADDED Viewed

File without changes

facility_location/cfg/plot.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+env_specs:
+  region:
+  min_n: 20
+  max_n: 50
+  min_p_ratio: 0.1
+  max_p_ratio: 0.4
+  max_steps_scale: 0.5
+  tabu_time: 3
+  tabu_stable_steps_scale: 0.2
+  popstar: false
+# evaluation
+eval_specs:
+  region:
+  seed: 12345
+  max_nodes: 2488
+  max_edges: 5000
+  val_num_cases: 100
+  test_num_cases: 1
+  val_np: !!python/tuple [50,5]
+  test_np:
+    - !!python/tuple [2214,36]
+    - !!python/tuple [2214,189]
+    - !!python/tuple [2214,425]
+# agent
+agent_specs:
+  policy_feature_dim: 32
+  value_feature_dim: 32
+  policy_hidden_units: !!python/tuple [32, 32, 1]
+  value_hidden_units: !!python/tuple [32, 32, 1]
+# mlp
+mlp_specs:
+  hidden_units: !!python/tuple [32, 32]
+gnn_specs:
+  num_gnn_layers: 2
+  node_dim: 32
+# ts
+ts_specs:
+  max_steps_scale: 2
+  stable_iterations_scale: 0.2
+# popstar
+popstar_specs:
+  graspit: 32
+  elite: 10
+# ga
+ga_specs:
+  num_generations: 100
+  num_parents_mating: 50
+  sol_per_pop: 100
+  parent_selection_type: sss
+  crossover_probability: 0.8
+  mutation_probability: 0.1

facility_location/env/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .pmp import PMPEnv, EvalPMPEnv, MULTIPMP
2	+
3	+ __all__ = ['PMPEnv', 'EvalPMPEnv', 'MULTIPMP']

facility_location/env/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (241 Bytes). View file

facility_location/env/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (242 Bytes). View file

facility_location/env/__pycache__/facility_location_client.cpython-310.pyc ADDED Viewed

Binary file (10.2 kB). View file

facility_location/env/__pycache__/facility_location_client.cpython-39.pyc ADDED Viewed

Binary file (10.2 kB). View file

facility_location/env/__pycache__/obs_extractor.cpython-310.pyc ADDED Viewed

Binary file (6.73 kB). View file

facility_location/env/__pycache__/obs_extractor.cpython-39.pyc ADDED Viewed

Binary file (6.75 kB). View file

facility_location/env/__pycache__/pmp.cpython-310.pyc ADDED Viewed

Binary file (19.7 kB). View file

facility_location/env/__pycache__/pmp.cpython-39.pyc ADDED Viewed

Binary file (17.8 kB). View file

facility_location/env/facility_location_client.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import warnings
+from typing import Tuple, Dict
+import networkx as nx
+import numpy as np
+from geopandas import GeoDataFrame
+from shapely.geometry import MultiPoint
+from libpysal.weights.contiguity import Voronoi as Voronoi_weights
+from sklearn.neighbors import kneighbors_graph
+from sklearn.metrics import pairwise_distances
+from facility_location.utils.config import Config
+import time
+class FacilityLocationClient:
+    def __init__(self, cfg: Config, rng: np.random.Generator):
+        self.cfg = cfg
+        self.rng = rng
+        self._cfg_tabu_time = cfg.env_specs['tabu_time']
+        self._t = 0
+    def set_instance(self, points: np.ndarray, demands: np.ndarray, n: int, p: int, real: bool) -> None:
+        self._points = points
+        self._demands = demands
+        points_geom = MultiPoint(points)
+        self._gdf = GeoDataFrame({
+            'geometry': points_geom.geoms,
+            'demand': demands,
+        })
+        self._n = n
+        self._p = p
+        self._old_facility_mask = np.zeros(self._n, dtype=bool)
+        self._new_facility_mask = np.zeros(self._n, dtype=bool)
+        self._construct_static_graph()
+        if real:
+            self._distance_matrix = pairwise_distances(points, metric='haversine')
+        else:
+            self._distance_matrix = pairwise_distances(points, metric='euclidean')
+        self._cost_matrix = self._distance_matrix * self._demands[:, None]
+        self._gain = np.zeros(self._n)
+        self._loss = np.zeros(self._n)
+        self._add_time = np.full(self._n, -np.inf)
+        self._drop_time = np.full(self._n, -np.inf)
+        self.reset_tabu_time()
+    def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        return self._points, self._demands, self._n, self._p
+    def get_distance_and_cost_matrix(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._distance_matrix, self._cost_matrix
+    def get_avg_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
+        avg_distance = self._distance_matrix.sum(axis=-1)/(self._n - 1)
+        avg_cost = self._cost_matrix.sum(axis=-1)/(self._n - 1)
+        return avg_distance, avg_cost
+    def _construct_static_graph(self) -> None:
+        self._connection_matrix = kneighbors_graph(self._points, n_neighbors=3, mode="connectivity").toarray()
+        self._static_graph = nx.from_numpy_matrix(self._connection_matrix)
+        self._static_edges = np.array(self._static_graph.edges(), dtype=np.int64)
+    def _construct_dynamic_graph(self) -> None:
+        t1 = time.time()
+        try:
+            solution_distace_min = np.partition(self._distance_matrix[:, self._solution][self._solution, :], 3, axis=-1)[:,2]
+        except:
+            raise ValueError('stop')
+        solution_distance_matrix = np.zeros((self._n, self._n))
+        solution_distance_matrix[:, self._solution] = solution_distace_min
+        solution_knearest_matrix = np.logical_and(self._distance_matrix < solution_distance_matrix, self._distance_matrix > 0)
+        old_tabu_mask, new_tabu_mask = self.get_tabu_mask(self._t)
+        solution_matrix = np.logical_and(np.logical_and(self._solution, old_tabu_mask)[:, None], (np.logical_and(~self._solution, new_tabu_mask)[None, :]))
+        solution_matrix = np.logical_or(solution_matrix, solution_matrix.T)
+        gainloss_matrix = np.logical_and((self._gain[:, None] > self._loss[None, :]), self._loss[None, :] > 0)
+        graph_matrix = np.logical_and(solution_matrix, np.logical_or(gainloss_matrix, solution_knearest_matrix))
+        if not np.any(graph_matrix):
+            if np.any(solution_matrix):
+                graph_matrix = solution_matrix
+                if not np.any(graph_matrix):
+                    raise ValueError('Invalid graph_matrix')
+            else:
+                graph_matrix = self._solution[:, None] ^ self._solution[None, :]
+        self._dynamic_graph = nx.from_numpy_matrix(graph_matrix)
+        self._dynamic_edges = np.array(self._dynamic_graph.edges(), dtype=np.int64)
+        t2 = time.time()
+        # print('dynamic graph time:',t2-t1)
+    def get_static_adjacency_list(self) -> np.ndarray:
+        return self._static_edges
+    def get_dynamic_adjacency_list(self) -> np.ndarray:
+        return self._dynamic_edges
+    def compute_initial_solution(self) -> Tuple[float, np.ndarray]:
+        self._solution = np.zeros(self._n, dtype=bool)
+        p_0 = self._demands.argmax()
+        self._solution[p_0] = True
+        for _ in range(self._p - 1):
+            p_max_cost = self._cost_matrix[:, self._solution].min(axis=-1).argmax()
+            self._solution[p_max_cost] = True
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+        self._old_facility_mask = self._solution
+        self._new_facility_mask = ~self._solution
+        return self.compute_obj_value(), self._solution
+    def compute_obj_value(self) -> float:
+        obj_value = self._cost_matrix[:, self._solution].min(axis=-1).sum()
+        return obj_value
+    def compute_obj_value_from_solution(self, solution) -> float:
+        self._solution = solution
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+        obj_value = self.compute_obj_value()
+        return obj_value
+    # def swap(self, old_facility: int, new_facility: int, t: int) -> Tuple[float, np.ndarray, Dict]:
+    #     if old_facility >= self._n or not self._solution[old_facility]:
+    #         warn_msg = f'Old facility {old_facility} is not a facility of the current solution {self._solution}.'
+    #         warnings.warn(warn_msg)
+    #         old_facility = self.rng.choice(np.arange(self._n)[self._solution])
+    #     if new_facility >= self._n or self._solution[new_facility]:
+    #         warn_msg = f'New facility {new_facility} is already a facility of the current solution {self._solution}.'
+    #         warnings.warn(warn_msg)
+    #         new_facility = self.rng.choice(np.arange(self._n)[~self._solution])
+    #     self._solution[old_facility] = False
+    #     self._solution[new_facility] = True
+    #     self._drop_time[old_facility] = t
+    #     self._add_time[new_facility] = t
+    #     self._t = t
+    #     return self.compute_obj_value(), self._solution, {}
+    def swap(self, facility_pair_index: int, t: int) -> Tuple[float, np.ndarray, Dict]:
+        facility_pair = self._dynamic_edges[facility_pair_index]
+        facility1 = facility_pair[0]
+        facility2 = facility_pair[1]
+        if (not self._solution[facility1]) and (self._solution[facility2]):
+            new_facility = facility1
+            old_facility = facility2
+        elif (not self._solution[facility2]) and (self._solution[facility1]):
+            new_facility = facility2
+            old_facility = facility1
+        else:
+            raise ValueError('stop')
+        self._solution[old_facility] = False
+        self._solution[new_facility] = True
+        self._old_facility_mask[new_facility] = True
+        self._new_facility_mask[old_facility] = True
+        self._drop_time[old_facility] = t
+        self._add_time[new_facility] = t
+        self._t = t
+        self._update_env(new_facility, old_facility)
+        # print('st:',self._t)
+        return self.compute_obj_value(), self._solution, {}
+    def get_tabu_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
+        old_tabu_mask = self._add_time < t - self._drop_tabu_time
+        new_tabu_mask = self._drop_time < t - self._add_tabu_time
+        return old_tabu_mask, new_tabu_mask
+    def reset_tabu_time(self) -> None:
+        self._t = 0
+        if self._cfg_tabu_time <= 0:
+            self._add_tabu_time = 0
+            self._drop_tabu_time = 0
+        else:
+            self._add_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
+            self._drop_tabu_time = self.rng.integers(0.1 * self._p, 0.5 * self._p)
+    def get_current_solution(self) -> np.ndarray:
+        return self._solution
+    def set_solution(self, solution: np.ndarray) -> None:
+        self._solution = solution
+    def get_current_distance(self) -> np.ndarray:
+        dis2poi = self._distance_matrix[:, self._solution]
+        if self._p > 2:
+            dis = np.partition(dis2poi, 2, axis=-1)[:,:2]
+        else:
+            dis = dis2poi.min(axis=-1)
+            dis = np.stack([dis, dis], axis=-1)
+        return dis
+    def get_current_cost(self) -> np.ndarray:
+        cost2poi = self._cost_matrix[:, self._solution]
+        if self._p > 2:
+            cost = np.partition(cost2poi, 2, axis=-1)[:,:2]
+        else:
+            cost = cost2poi.min(axis=-1)
+            cost = np.stack([cost, cost], axis=-1)
+        return cost
+    def get_gain_and_loss(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._gain, self._loss
+    def get_gdf_facilities(self) -> Tuple[GeoDataFrame, np.ndarray]:
+        solution = self._solution
+        facilities = np.arange(self._n)[solution]
+        gdf = self._gdf.copy()
+        gdf['facility'] = False
+        gdf.loc[facilities, 'facility'] = True
+        node2facility = np.arange(self._n)[solution][self._cost_matrix[:, solution].argmin(axis=-1)]
+        gdf['assignment'] = node2facility
+        return gdf, facilities
+    def _init_env(self):
+        self._init_gain_and_loss()
+        self._construct_dynamic_graph()
+    def _update_env(self, insert_facility, remove_facility):
+        self._update_gain_and_loss(insert_facility, remove_facility)
+        self._construct_dynamic_graph()
+    def _init_gain_and_loss(self):
+        t1 = time.time()
+        for i in range(self._n):
+            _fake_solution = list(self._solution)
+            if self._solution[i]:
+                _fake_solution[i] = False
+                self._loss[i] = self._cost_matrix[:, _fake_solution].min(axis=-1).sum() - self._cost_matrix[:, self._solution].min(axis=-1).sum()
+                self._gain[i] = 0
+            else:
+                _fake_solution[i] = True
+                self._gain[i] = self._cost_matrix[:, self._solution].min(axis=-1).sum() - self._cost_matrix[:, _fake_solution].min(axis=-1).sum()
+                self._loss[i] = 0
+        self.argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
+        t2 = time.time()
+        # print('init gainloss time:',t2-t1)
+    def _update_gain_and_loss(self, insert_facility, remove_facility):
+        t1 = time.time()
+        _pre_solution = list(self._solution)
+        _pre_solution[insert_facility] = False
+        _pre_solution[remove_facility] = True
+        pre_closest_demands2solution = self._cost_matrix[:, _pre_solution][np.arange(self._n)[:, None], self.argpartition]
+        argpartition = np.argpartition(self._distance_matrix[:, self._solution], 2, axis=-1)[:,:2]
+        closest_demands2solution = self._cost_matrix[:, self._solution][np.arange(self._n)[:, None], argpartition]
+        pre_solution_idx = np.where(_pre_solution)[0]
+        solution_idx = np.where(self._solution)[0]
+        for i in range(self._n):
+            if remove_facility in self.argpartition[i] or insert_facility in argpartition[i]:
+                self._loss[solution_idx[argpartition[i][0]]] += closest_demands2solution[i][1] - closest_demands2solution[i][0]
+                self._loss[pre_solution_idx[self.argpartition[i][0]]] -= pre_closest_demands2solution[i][1] - pre_closest_demands2solution[i][0]
+            # if self.argpartition[i][0] != argpartition[i][0]:
+            #     for j in range(self._n):
+            #         if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][argpartition[i][0]]:
+            #             self._gain[j] += max(0, closest_demands2solution[i][0] - self._cost_matrix[i, j])
+            #         if self._distance_matrix[i, j] < self._distance_matrix[i, self._solution][self.argpartition[i][0]]:
+            #             self._gain[j] -= max(0, pre_closest_demands2solution[i][0] - self._cost_matrix[i, j])
+        self._loss[remove_facility] = 0
+        self._gain[insert_facility] = 0
+        self.argpartition = list(argpartition)
+        # print(self._gain, self._loss)
+        t2 = time.time()
+        # print('update gainloss time:',t2-t1)
+    def init_facility_mask(self, old_facility, new_facility):
+        self._old_facility_mask[old_facility] = True
+        self._new_facility_mask[new_facility] = True
+    def get_facility_mask(self):
+        return self._old_facility_mask, self._new_facility_mask

facility_location/env/obs_extractor.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from typing import Dict, Tuple, Text
+import numpy as np
+from facility_location.env.facility_location_client import FacilityLocationClient
+from facility_location.utils.config import Config
+class ObsExtractor:
+    def __init__(self, cfg: Config, flc: FacilityLocationClient, node_range: int, edge_range: int):
+        self.cfg = cfg
+        self._flc = flc
+        self._node_range = node_range
+        self._edge_range = edge_range
+        self._construct_virtual_node_feature()
+        self._construct_node_features()
+        self._construct_action_mask()
+    def _construct_virtual_node_feature(self) -> None:
+        virtual_node_facility = 0
+        virtual_node_distance_min = 0
+        virtual_node_distance_sub_min = 0
+        virtual_node_cost_min = 0
+        virtual_node_cost_sub_min = 0
+        virtual_gain = 0
+        virtual_loss = 0
+        virtual_node_x = 0.5
+        virtual_node_y = 0.5
+        virtual_node_demand = 1
+        virtual_node_avg_distance = 0
+        virtual_node_avg_cost = 0
+        self._virtual_dynamic_node_feature = np.array([
+            virtual_node_facility,
+            virtual_node_distance_min,
+            virtual_node_distance_sub_min,
+            virtual_node_cost_min,
+            virtual_node_cost_sub_min,
+            virtual_gain,
+            virtual_loss,
+        ], dtype=np.float32)
+        self._virtual_static_node_feature = np.array([
+            virtual_node_x,
+            virtual_node_y,
+            virtual_node_demand,
+            virtual_node_avg_distance,
+            virtual_node_avg_cost,
+        ], dtype=np.float32)
+        self._virtual_node_feature = np.concatenate([
+            self._virtual_dynamic_node_feature,
+            self._virtual_static_node_feature,
+        ], axis=-1)
+    def _construct_node_features(self) -> None:
+        self._node_features = np.zeros((self._node_range, self._virtual_node_feature.size), dtype=np.float32)
+    def _construct_action_mask(self) -> None:
+        self._old_facility_mask = np.full(self._node_range, False)
+        self._new_facility_mask = np.full(self._node_range, False)
+    def get_node_dim(self) -> int:
+        return self._virtual_node_feature.size
+    def reset(self) -> None:
+        self._compute_static_obs()
+        self._reset_node_features()
+        self._reset_action_mask()
+    def _compute_static_obs(self) -> None:
+        xy, demands, n, _ = self._flc.get_instance()
+        if n + 2 > self._node_range:
+            print(n, self._node_range)
+            # raise ValueError('The number of nodes exceeds the maximum limit.')
+        self._n = n
+        avg_distance, avg_cost = self._flc.get_avg_distance_and_cost()
+        avg_distance = avg_distance / np.max(avg_distance)
+        avg_cost = avg_cost / np.max(avg_cost)
+        self._static_node_features = np.stack([
+            xy[:, 0],
+            xy[:, 1],
+            demands,
+            avg_distance,
+            avg_cost,
+        ], axis=-1).astype(np.float32)
+        static_adjacency_list = self._flc.get_static_adjacency_list()
+        obs_node_mask = np.full(1 + n, True)
+        self._obs_node_mask = self._pad_mask(obs_node_mask, self._node_range, 'nodes')
+        obs_static_edge_mask = np.full(n + static_adjacency_list.shape[0], True)
+        self._obs_static_edge_mask = self._pad_mask(obs_static_edge_mask, self._edge_range, 'edges')
+        self._static_adjacency_list = self._pad_edge(static_adjacency_list)
+    def _reset_node_features(self) -> None:
+        self._node_features[:, :] = 0
+        self._node_features[0] = self._virtual_node_feature
+        self._node_features[1:self._n+1, len(self._virtual_dynamic_node_feature):] = self._static_node_features
+    def _reset_action_mask(self) -> None:
+        self._old_facility_mask[:] = False
+        self._new_facility_mask[:] = False
+    def get_obs(self, t: int) -> Dict:
+        obs_nodes, obs_static_edges, obs_dynamic_edges, \
+            obs_node_mask, obs_static_edge_mask, obs_dynamic_edges_mask = self._get_obs_graph()
+        obs = {
+            'node_features': obs_nodes,
+            'static_adjacency_list': obs_static_edges,
+            'dynamic_adjacency_list': obs_dynamic_edges,
+            'node_mask': obs_node_mask,
+            'static_edge_mask': obs_static_edge_mask,
+            'dynamic_edge_mask': obs_dynamic_edges_mask,
+        }
+        return obs
+    def _get_obs_graph(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        facility = self._flc.get_current_solution().astype(np.float32)
+        distance = self._flc.get_current_distance().astype(np.float32)
+        distance = distance / np.max(distance)
+        cost = self._flc.get_current_cost().astype(np.float32)
+        cost = cost / np.max(cost)
+        gain, loss = self._flc.get_gain_and_loss()
+        gain = gain / np.max(gain)
+        loss = loss / np.max(loss)
+        dynamic_node_features = np.stack([
+            facility,
+            distance[:,0],
+            distance[:,1],
+            cost[:,0],
+            cost[:,1],
+            gain,
+            loss,
+        ], axis=-1)
+        self._node_features[1:self._n+1, :len(self._virtual_dynamic_node_feature)] = dynamic_node_features
+        obs_nodes = self._node_features
+        obs_static_edges = self._static_adjacency_list
+        obs_dynamic_edges = self._flc.get_dynamic_adjacency_list()
+        # print(obs_dynamic_edges.shape)
+        obs_dynamic_edge_mask = np.full(obs_dynamic_edges.shape[0], True)
+        obs_node_mask = self._obs_node_mask
+        obs_static_edge_mask = self._obs_static_edge_mask
+        obs_dynamic_edges = self._pad_edge_wo_virtual(obs_dynamic_edges)
+        obs_dynamic_edge_mask = self._pad_mask(obs_dynamic_edge_mask, self._edge_range, 'edges')
+        return obs_nodes, obs_static_edges, obs_dynamic_edges, obs_node_mask, obs_static_edge_mask, obs_dynamic_edge_mask
+        # return obs_nodes, obs_static_edges, obs_node_mask, obs_edge_mask
+    def _get_obs_action_mask(self, t: int) -> Tuple[np.ndarray, np.ndarray]:
+        old_facility_mask, new_facility_mask = self._flc.get_facility_mask()
+        old_tabu_mask, new_tabu_mask = self._flc.get_tabu_mask(t)
+        self._old_facility_mask[1:self._n+1] = np.logical_and(old_facility_mask, old_tabu_mask)
+        self._new_facility_mask[1:self._n+1] = np.logical_and(new_facility_mask, new_tabu_mask)
+        obs_old_facility_mask = self._old_facility_mask
+        obs_new_facility_mask = self._new_facility_mask
+        if not np.any(obs_old_facility_mask) or not np.any(obs_new_facility_mask):
+            raise ValueError('The action mask is empty.')
+        return obs_old_facility_mask, obs_new_facility_mask
+    @staticmethod
+    def _pad_mask(mask: np.ndarray, max_num: int, name: Text) -> np.ndarray:
+        pad = (0, max_num - mask.size)
+        if pad[1] < 0:
+            raise ValueError(f'The number of {name} exceeds the maximum limit.')
+        return np.pad(mask, pad, mode='constant', constant_values=False)
+    def _pad_edge(self, edge: np.ndarray) -> np.ndarray:
+        virtual_edge = np.stack([np.zeros(self._n), np.arange(1, self._n + 1)], axis=-1).astype(np.int32)
+        edge = np.concatenate([virtual_edge, edge + 1], axis=0)
+        pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
+        if pad[0][1] < 0:
+            raise ValueError('The number of edges exceeds the maximum limit.')
+        return np.pad(edge, pad, mode='constant', constant_values=self._node_range - 1)
+    def _pad_edge_wo_virtual(self, edge: np.ndarray) -> np.ndarray:
+        pad = ((0, self._edge_range - edge.shape[0]), (0, 0))
+        if pad[0][1] < 0:
+            print(self._edge_range, edge.shape[0])
+            raise ValueError('The number of edges exceeds the maximum limit.')
+        return np.pad(edge + 1, pad, mode='constant', constant_values=self._node_range - 1)

facility_location/env/pmp.py ADDED Viewed

	@@ -0,0 +1,502 @@

+import io
+import warnings
+from typing import Tuple, Dict, Optional, List, Text
+import gym
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle, os
+from numpy import ndarray
+from facility_location.utils.config import Config
+from facility_location.env.facility_location_client import FacilityLocationClient
+from facility_location.env.obs_extractor import ObsExtractor
+from stable_baselines3 import PPO
+from stable_baselines3.common.vec_env import DummyVecEnv
+from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
+from facility_location.utils.policy import get_policy_kwargs
+class PMPEnv(gym.Env):
+    EPSILON = 1e-6
+    def __init__(self,
+                 cfg: Config):
+        self.cfg = cfg
+        self._train_region = cfg.env_specs['region']
+        self._eval_region = cfg.eval_specs['region']
+        self._min_n = cfg.env_specs['min_n']
+        self._max_n = cfg.env_specs['max_n']
+        self._min_p_ratio = cfg.env_specs['min_p_ratio']
+        self._max_p_ratio = cfg.env_specs['max_p_ratio']
+        self._max_steps_scale = cfg.env_specs['max_steps_scale']
+        self._tabu_stable_steps_scale = cfg.env_specs['tabu_stable_steps_scale']
+        self._popstar = cfg.env_specs['popstar']
+        self._seed(cfg.seed)
+        self._done = False
+        self._set_node_edge_range()
+        self._flc = FacilityLocationClient(cfg, self._np_random)
+        self._obs_extractor = ObsExtractor(cfg, self._flc, self._node_range, self._edge_range)
+        self._declare_spaces()
+    def _declare_spaces(self) -> None:
+        self.observation_space = gym.spaces.Dict({
+            'node_features': gym.spaces.Box(low=0, high=1, shape=(self._node_range, self.get_node_feature_dim())),
+            'static_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
+            'dynamic_adjacency_list': gym.spaces.Box(low=0, high=self._node_range, shape=(self._edge_range, 2), dtype=np.int64),
+            'node_mask': gym.spaces.Box(low=0, high=1, shape=(self._node_range,), dtype=np.bool),
+            'static_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
+            'dynamic_edge_mask': gym.spaces.Box(low=0, high=1, shape=(self._edge_range,), dtype=np.bool),
+        })
+        if not self._popstar:
+            self.action_space = gym.spaces.Discrete(self._node_range ** 2)
+        else:
+            self.action_space = gym.spaces.Discrete(self._node_range ** 2)
+    def _set_node_edge_range(self) -> None:
+        self._node_range = self._max_n + 2
+        self._edge_range = int(self._max_n ** 2 * self._max_p_ratio)
+    def get_node_feature_dim(self) -> int:
+        return self._obs_extractor.get_node_dim()
+    def _seed(self, seed: int) -> None:
+        self._np_random = np.random.default_rng(seed)
+    def get_reward(self) -> float:
+        reward = self._obj_value[self._t - 1] - self._obj_value[self._t]
+        return reward
+    def _transform_action(self, action: np.ndarray) -> np.ndarray:
+        if self._popstar:
+            action = np.array(np.unravel_index(action, (self._node_range, self._node_range)))
+        action = action - 1
+        return action
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._max_steps)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time()
+        # if self._done:
+        #     print('done')
+        #     for i in range(self._t):
+        #         print(f'{i}:',np.where(self._solution[i]))
+        return self._get_obs(self._t), reward, self._done, False, info
+    def reset(self, seed = 0) -> Optional[Dict]:
+        if self._train_region is None:
+            points, demands, n, p = self._generate_new_instance()
+            self._flc.set_instance(points, demands, n, p, False)
+        else:
+            points, demands, n, p = self._use_real_instance()
+            self._flc.set_instance(points, demands, n, p, True)
+        return self.prepare(n, p), {}
+    def prepare(self, n: int, p: int) -> Dict:
+        initial_obj_value, initial_solution = self._flc.compute_initial_solution()
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = max(int(p * self._max_steps_scale), 5)
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+    def render(self, mode='human', dpi=300) -> Optional[np.ndarray]:
+        gdf, facilities = self._flc.get_gdf_facilities()
+        if len(facilities) > 10:
+            warnings.warn('Too many facilities to render. Only rendering the first 10.')
+            facilities = facilities[:10]
+        cm = plt.get_cmap('tab10')
+        fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=dpi)
+        for i, f in enumerate(facilities):
+            gdf.loc[gdf['assignment'] == f].plot(ax=axs[0],
+                                                 zorder=2,
+                                                 alpha=0.7,
+                                                 edgecolor="k",
+                                                 color=cm(i))
+            gdf.loc[[f]].plot(ax=axs[0],
+                              marker='*',
+                              markersize=300,
+                              zorder=3,
+                              alpha=0.7,
+                              edgecolor="k",
+                              color=cm(i))
+        axs[0].set_title("Facility Location", fontweight="bold")
+        plot_obj_value = self._obj_value[:self._t + 1]
+        axs[1].plot(plot_obj_value, marker='.', markersize=10, color='k')
+        axs[1].set_title("Objective Value", fontweight="bold")
+        axs[1].set_xticks(np.arange(self._max_steps + 1, step=math.ceil((self._max_steps + 1) / 10)))
+        fig.tight_layout()
+        if mode == 'human':
+            plt.show()
+        else:
+            io_buf = io.BytesIO()
+            fig.savefig(io_buf, format='raw', dpi=dpi)
+            io_buf.seek(0)
+            img_arr = np.reshape(np.frombuffer(io_buf.getvalue(), dtype=np.uint8),
+                                 newshape=(int(fig.bbox.bounds[3]), int(fig.bbox.bounds[2]), -1))
+            io_buf.close()
+            return img_arr
+    def close(self):
+        plt.close()
+    def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
+        p = int(max(n * p_ratio, 4))
+        points = self._np_random.uniform(size=(n, 2))
+        while np.unique(points, axis=0).shape[0] != n:
+            points = self._np_random.uniform(size=(n, 2))
+        demands = self._np_random.random(size=(n,))
+        return points, demands, n, p
+    def _use_real_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        data_path = './data/{}/pkl'.format(self.cfg.eval_specs['region'])
+        files = os.listdir(data_path)
+        files = [f for f in files if f.endswith('.pkl')]
+        sample_data_path = os.path.join(data_path, files[self._np_random.integers(len(files))])
+        with open(sample_data_path, 'rb') as f:
+            np_data = pickle.load(f)
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p = max(int(n * self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)), 4)
+        sample_cbgs = self._np_random.choice(list(np_data[1].keys()), n, replace=False)
+        points = []
+        demands = []
+        for cbg in sample_cbgs:
+            points.append(np_data[1][cbg]['pos'])
+            demands.append(np_data[1][cbg]['demand'])
+        points = np.array(points)
+        demands = np.array(demands)
+        return points, demands, n, p
+    def  _get_obs(self, t: int) -> Dict:
+        return self._obs_extractor.get_obs(t)
+    def get_initial_solution(self) -> np.ndarray:
+        return self._solution[0]
+class EvalPMPEnv(PMPEnv):
+    def __init__(self,
+                 cfg: Config,
+                positions, demands, n, p, boost=False):
+        self._eval_np = (n,p)
+        self._eval_seed = cfg.eval_specs['seed']
+        self._boost = boost
+        print(self._boost)
+        self.points = positions
+        self.demands = demands
+        self._n = n
+        self._p = p
+        super().__init__(cfg)
+    def _set_node_edge_range(self) -> None:
+        n, p = self._eval_np
+        self._node_range = n + 2
+        self._edge_range = n * p
+    def get_eval_num_cases(self) -> int:
+        return self._eval_num_cases
+    def get_eval_np(self) -> Tuple[int, int]:
+        return self._eval_np
+    def reset_instance_id(self) -> None:
+        self._instance_id = 0
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._max_steps)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time()
+        print(self._t, self._max_steps)
+        return self._get_obs(self._t), reward, self._done, False, info
+    def get_reward(self) -> float:
+        if self._done:
+            reward = -np.min(self._obj_value)
+        else:
+            reward = 0.0
+        return reward
+    def get_best_solution(self) -> np.ndarray:
+        return self._best_solution
+    def reset(self, seed = 0) -> Dict:
+        self._flc.set_instance(self.points, self.demands, self._n, self._p, False)
+        return self.prepare(self._n, self._p, self._boost), {}
+    def prepare(self, n: int, p: int, boost: bool) -> Dict:
+        initial_obj_value, initial_solution = self._flc.compute_initial_solution()
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = max(int(p * self._max_steps_scale), 5)
+        if boost:
+            self._max_steps = max(int(self._max_steps_scale / 10), 5)
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+    def get_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        points, demands, n, p = self._flc.get_instance()
+        return points, demands, n, p
+    def get_distance_and_cost(self) -> Tuple[np.ndarray, np.ndarray]:
+        return self._flc.get_distance_and_cost_matrix()
+    def evaluate(self, solution: np.ndarray) -> float:
+        self._flc.set_solution(solution)
+        obj_value = self._flc.compute_obj_value()
+        return obj_value
+class MULTIPMP(PMPEnv):
+    EPSILON = 1e-6
+    def __init__(self,
+                 cfg,
+                 data_npy,
+                 boost = False):
+        self.cfg = cfg
+        self.data_npy =  data_npy
+        self._boost = boost
+        self._all_points, self._all_demands, self._n, self._all_p = self._load_multi_facility_data(data_npy)
+        self.boost = boost
+        self._all_solutions = self._load_multi_facility_solutions(boost)
+        self._final_solutions = list(self._all_solutions)
+        self._num_types = len(self._all_p)
+        self._current_type = 0
+        self._all_max_steps, self._old_mask, self._new_mask = self._get_max_steps()
+        super().__init__(cfg)
+    def _set_node_edge_range(self) -> None:
+        self._node_range = self._n + 2
+        self._edge_range = self._n * max(self._all_p)
+    def step(self, action: np.ndarray):
+        if self._done:
+            raise RuntimeError('Action taken after episode is done.')
+        obj_value, solution, info = self._flc.swap(action, self._t)
+        self._t += 1
+        self._done = (self._t == self._all_max_steps[-1] and self._current_type == len(self._all_max_steps) - 1)
+        self._obj_value[self._t] = obj_value
+        self._solution[self._t] = solution
+        reward = self.get_reward()
+        if obj_value < self._best_obj_value - self.EPSILON:
+            self._best_obj_value = obj_value
+            self._best_solution = solution
+            self._last_best_t = self._t
+        elif (self._t - self._last_best_t) % self._tabu_stable_steps == 0:
+            self._flc.reset_tabu_time()
+        if self._t == self._all_max_steps[self._current_type] and not self._done:
+            self._t = 0
+            self._multi_obj += obj_value
+            self._final_solutions[self._current_type] = solution
+            self._update_type()
+        if self._done:
+            pickle.dump(self._final_solutions, open('./facility_location/solutions.pkl', 'wb'))
+        return self._get_obs(self._t), reward, self._done, False, info
+    def reset(self, seed = 0) -> Optional[Dict]:
+        self._current_type = 0
+        points = self._all_points
+        demands = self._all_demands[:,0]
+        n = self._n
+        p = self._all_p[0]
+        solution = self._all_solutions[0]
+        self._multi_obj = 0
+        self._flc.set_instance(points, demands, n, p, True)
+        return self.prepare(n, p, solution), {}
+    def _update_type(self):
+        if self._current_type >= self._num_types:
+            raise RuntimeError('Action taken after episode is done.')
+        self._current_type += 1
+        if self._current_type < self._num_types - 1:
+            points = self._all_points
+            demands = self._all_demands[:,self._current_type]
+            n = self._n
+            p = self._all_p[self._current_type]
+            solution = self._all_solutions[self._current_type]
+            self._flc.set_instance(points, demands, n, p, True)
+            self.prepare(n, p, solution)
+    def prepare(self, n: int, p: int, solution: list) -> Dict:
+        initial_solution = solution
+        initial_obj_value = self._flc.compute_obj_value_from_solution(initial_solution)
+        self._obs_extractor.reset()
+        self._done = False
+        self._t = 0
+        self._max_steps = self._all_max_steps[self._current_type]
+        self._flc.init_facility_mask(self._old_mask[self._current_type], self._new_mask[self._current_type])
+        self._obj_value = np.zeros(self._max_steps + 1)
+        self._obj_value[0] = initial_obj_value
+        self._solution = np.zeros((self._max_steps + 1, n), dtype=bool)
+        self._solution[0] = initial_solution
+        self._best_solution = initial_solution
+        self._best_obj_value = initial_obj_value
+        self._last_best_t = 0
+        self._tabu_stable_steps = max(1, round(self._max_steps * self._tabu_stable_steps_scale))
+        return self._get_obs(self._t)
+    def _get_max_steps(self) -> list:
+        tmp_all_solitions = list(self._all_solutions)
+        count_true = [sum(s) for s in zip(*tmp_all_solitions)]
+        max_steps = []
+        old_idx = []
+        new_idx = []
+        for t in range(self._num_types):
+            old = [i for i in range(len(count_true)) if count_true[i] > 1 and tmp_all_solitions[t][i]]
+            new = [i for i in range(len(count_true)) if count_true[i] == 0]
+            if len(old):
+                old_idx.append(old)
+                new_idx.append(new)
+                max_steps.append(len(old))
+                for i in old:
+                    count_true[i] = count_true[i] - 1
+        return max_steps, old_idx, new_idx
+    def _generate_new_instance(self) -> Tuple[np.ndarray, np.ndarray, int, int]:
+        n = self._np_random.integers(self._min_n, self._max_n, endpoint=True)
+        p_ratio = self._np_random.uniform(self._min_p_ratio, self._max_p_ratio)
+        p = int(max(n * p_ratio, 4))
+        points = self._np_random.uniform(size=(n, 2))
+        while np.unique(points, axis=0).shape[0] != n:
+            points = self._np_random.uniform(size=(n, 2))
+        demands = self._np_random.random(size=(n,))
+        return points, demands, n, p
+    def _load_multi_facility_data(self, data_npy) -> Tuple[np.ndarray, np.ndarray]:
+        data = data_npy.split('\n')
+        n = len(data)
+        p = int((len(data[0].split(' '))-2) / 2)
+        positions = []
+        demands = []
+        actual_facilities = []
+        ps = []
+        for row in data:
+            row = row.split(' ')
+            row = [x for x in row if len(x)]
+            positions.append([float(row[0]), float(row[1])])
+            demand = []
+            for i in range(2, 2+p):
+                demand.append(float(row[i]))
+            demands.append(demand)
+            actual_facility = []
+            for i in range(2+p, 2+2*p):
+                actual_facility.append(bool(int(float(row[i]))))
+            actual_facilities.append(actual_facility)
+        positions = np.array(positions)
+        positions = np.deg2rad(positions)
+        demands = np.array(demands)
+        actual_facilities = np.array(actual_facilities)
+        ps = actual_facilities.sum(axis=0)
+        return positions, demands, n, ps
+    def _load_multi_facility_solutions(self, boost) -> list:
+        def load_model(positions, demands, n, p, boost):
+            eval_env = EvalPMPEnv(self.cfg, positions, demands, n, p, boost)
+            eval_env = DummyVecEnv([lambda: eval_env])
+            policy_kwargs = get_policy_kwargs(self.cfg)
+            test_model = PPO(MaskedFacilityLocationActorCriticPolicy,
+                        eval_env,
+                        verbose=1,
+                        policy_kwargs=policy_kwargs,
+                        device='cuda:1')
+            train_model = PPO.load(self.cfg.load_model_path)
+            test_model.set_parameters(train_model.get_parameters())
+            return test_model, eval_env
+        def get_optimal_solution(model, eval_env):
+            obs = eval_env.reset()
+            done = False
+            while not done:
+                action, _ = model.predict(obs, deterministic=True)
+                obs, _, done, info = eval_env.step(action)
+            return eval_env.get_attr('_best_solution')[0]
+        multi_solutions = []
+        for i in range(len(self._all_p)):
+            positions = self._all_points
+            demands = self._all_demands[:,i]
+            n = self._n
+            p = self._all_p[i]
+            model, env = load_model(positions,demands,n,p,boost)
+            multi_solutions.append(get_optimal_solution(model, env))
+        return multi_solutions
+    def get_reward(self) -> float:
+        if self._done:
+            reward = np.min(self._obj_value)
+        else:
+            reward = 0.0
+        return reward

facility_location/multi_eval.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import pickle
+import setproctitle
+from absl import app, flags
+import time
+import random
+from typing import Tuple, Union, Text
+import numpy as np
+import torch as th
+import sys
+import gymnasium
+sys.modules["gym"] = gymnasium
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3 import PPO
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnvWrapper
+from facility_location.agent.solver import PMPSolver
+from facility_location.env import EvalPMPEnv, MULTIPMP
+from facility_location.utils import Config
+from facility_location.agent import MaskedFacilityLocationActorCriticPolicy
+from facility_location.utils.policy import get_policy_kwargs
+import warnings
+warnings.filterwarnings('ignore')
+AGENT = Union[PMPSolver, PPO]
+def get_model(cfg: Config,
+              env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+              device: str) -> PPO:
+    policy_kwargs = get_policy_kwargs(cfg)
+    model = PPO(MaskedFacilityLocationActorCriticPolicy,
+                env,
+                verbose=1,
+                policy_kwargs=policy_kwargs,
+                device=device)
+    return model
+def get_agent(cfg: Config,
+              env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+              model_path: Text) -> AGENT:
+    if cfg.agent in ['rl-mlp', 'rl-gnn', 'rl-agnn']:
+        test_model = get_model(cfg, env, device='cuda:0')
+        trained_model = PPO.load(model_path)
+        test_model.set_parameters(trained_model.get_parameters())
+        agent = test_model
+    else:
+        raise ValueError(f'Agent {cfg.agent} not supported.')
+    return agent
+def evaluate(agent: AGENT,
+             env: Union[VecEnvWrapper, DummyVecEnv, EvalPMPEnv],
+             num_cases: int,
+             return_episode_rewards: bool):
+    if isinstance(agent, PPO):
+        return evaluate_ppo(agent, env, num_cases, return_episode_rewards=return_episode_rewards)
+    else:
+        raise ValueError(f'Agent {agent} not supported.')
+from stable_baselines3.common.callbacks import BaseCallback
+def evaluate_ppo(agent: PPO, env: EvalPMPEnv, num_cases: int, return_episode_rewards: bool) -> Tuple[float, float]:
+    rewards, _ = evaluate_policy(agent, env, n_eval_episodes=num_cases, return_episode_rewards=return_episode_rewards)
+    return rewards
+def main(data_npy, boost=False):
+    th.manual_seed(0)
+    np.random.seed(0)
+    random.seed(0)
+    model_path = './facility_location/best_model.zip'
+    cfg = Config('plot', 0, False, '/data2/suhongyuan/flp', 'rl-gnn', model_path=model_path)
+    eval_env = MULTIPMP(cfg, data_npy, boost)
+    eval_env = Monitor(eval_env)
+    eval_env = DummyVecEnv([lambda: eval_env])
+    agent = get_agent(cfg, eval_env, model_path)
+    start_time = time.time()
+    _ = evaluate(agent, eval_env, 1, return_episode_rewards=True)
+    eval_time = time.time() - start_time
+    print(f'\t time: {eval_time}')
+if __name__ == '__main__':
+    app.run(main)

facility_location/solutions.pkl ADDED Viewed

Binary file (1.92 kB). View file

facility_location/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .config import Config
2	+
3	+ __all__ = ["Config"]

facility_location/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (209 Bytes). View file

facility_location/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (210 Bytes). View file

facility_location/utils/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (4.09 kB). View file

facility_location/utils/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (4.61 kB). View file

facility_location/utils/__pycache__/policy.cpython-310.pyc ADDED Viewed

Binary file (1.47 kB). View file

facility_location/utils/__pycache__/policy.cpython-39.pyc ADDED Viewed

Binary file (1.47 kB). View file

facility_location/utils/config.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+from typing import Text, Dict
+from stable_baselines3.common.utils import get_latest_run_id
+import yaml
+class Config:
+    def __init__(self, cfg_id: Text, global_seed: int, tmp: bool, root_dir: Text,
+                 agent: Text = 'rl-gnn', reset_num_timesteps: bool = True, cfg_dict: Dict = None, model_path: Text = None):
+        self.cfg_id = cfg_id
+        self.seed = global_seed
+        if cfg_dict is not None:
+            cfg = cfg_dict
+        else:
+            file_path = './facility_location/cfg/{}.yaml'.format(self.cfg_id)
+            class TupleSafeLoader(yaml.SafeLoader):
+                def construct_python_tuple(self, node):
+                    return tuple(self.construct_sequence(node))
+            TupleSafeLoader.add_constructor(
+                u'tag:yaml.org,2002:python/tuple',
+                TupleSafeLoader.construct_python_tuple)
+            def load_yaml(file_path):
+                cfg = yaml.load(open(file_path, 'r'), Loader=TupleSafeLoader)
+                return cfg
+            cfg = load_yaml(file_path)
+        # create dirs
+        self.root_dir = '/tmp/flp' if tmp else root_dir
+        self.agent = agent
+        self.multi = cfg.get('multi', False)
+        self.tb_log_path = os.path.join(self.root_dir, 'runs')
+        self.tb_log_name = f'{cfg_id}-agent-{agent}-seed-{global_seed}'
+        latest_run_id = get_latest_run_id(self.tb_log_path, self.tb_log_name)
+        if not reset_num_timesteps:
+            # Continue training in the same directory
+            latest_run_id -= 1
+        self.cfg_dir = os.path.join(self.root_dir,
+                                    'output', f'{cfg_id}-agent-{agent}-seed-{global_seed}_{latest_run_id + 1}')
+        self.ckpt_save_path = os.path.join(self.cfg_dir, 'ckpt')
+        self.best_model_path = os.path.join(self.cfg_dir, 'best-models')
+        self.latest_model_path = os.path.join(self.cfg_dir, 'latest-models')
+        self.load_model_path = model_path
+        # env
+        self.env_specs = cfg.get('env_specs', dict())
+        self.reward_specs = cfg.get('reward_specs', dict())
+        self.obs_specs = cfg.get('obs_specs', dict())
+        self.eval_specs = cfg.get('eval_specs', dict())
+        # agent config
+        self.agent_specs = cfg.get('agent_specs', dict())
+        self.mlp_specs = cfg.get('mlp_specs', dict())
+        self.gnn_specs = cfg.get('gnn_specs', dict())
+        self.ts_specs = cfg.get('ts_specs', dict())
+        self.popstar_specs = cfg.get('popstar_specs', dict())
+        self.ga_specs = cfg.get('ga_specs', dict())
+        # training config
+        self.gamma = cfg.get('gamma', 0.99)
+        self.tau = cfg.get('tau', 0.95)
+        self.state_encoder_specs = cfg.get('state_encoder_specs', dict())
+        self.policy_specs = cfg.get('policy_specs', dict())
+        self.value_specs = cfg.get('value_specs', dict())
+        self.lr = cfg.get('lr', 4e-4)
+        self.weightdecay = cfg.get('weightdecay', 0.0)
+        self.eps = cfg.get('eps', 1e-5)
+        self.value_pred_coef = cfg.get('value_pred_coef', 0.5)
+        self.entropy_coef = cfg.get('entropy_coef', 0.01)
+        self.clip_epsilon = cfg.get('clip_epsilon', 0.2)
+        self.max_num_iterations = cfg.get('max_num_iterations', 1000)
+        self.num_episodes_per_iteration = cfg.get('num_episodes_per_iteration', 1000)
+        self.max_sequence_length = cfg.get('max_sequence_length', 100)
+        self.num_optim_epoch = cfg.get('num_optim_epoch', 4)
+        self.mini_batch_size = cfg.get('mini_batch_size', 1024)
+        self.save_model_interval = cfg.get('save_model_interval', 10)
+    def log(self, logger, tb_logger):
+        """Log cfg to logger and tensorboard."""
+        logger.info(f'id: {self.cfg_id}')
+        logger.info(f'seed: {self.seed}')
+        logger.info(f'env_specs: {self.env_specs}')
+        logger.info(f'reward_specs: {self.reward_specs}')
+        logger.info(f'obs_specs: {self.obs_specs}')
+        logger.info(f'agent_specs: {self.agent_specs}')
+        logger.info(f'gamma: {self.gamma}')
+        logger.info(f'tau: {self.tau}')
+        logger.info(f'state_encoder_specs: {self.state_encoder_specs}')
+        logger.info(f'policy_specs: {self.policy_specs}')
+        logger.info(f'value_specs: {self.value_specs}')
+        logger.info(f'lr: {self.lr}')
+        logger.info(f'weightdecay: {self.weightdecay}')
+        logger.info(f'eps: {self.eps}')
+        logger.info(f'value_pred_coef: {self.value_pred_coef}')
+        logger.info(f'entropy_coef: {self.entropy_coef}')
+        logger.info(f'clip_epsilon: {self.clip_epsilon}')
+        logger.info(f'max_num_iterations: {self.max_num_iterations}')
+        logger.info(f'num_episodes_per_iteration: {self.num_episodes_per_iteration}')
+        logger.info(f'max_sequence_length: {self.max_sequence_length}')
+        logger.info(f'num_optim_epoch: {self.num_optim_epoch}')
+        logger.info(f'mini_batch_size: {self.mini_batch_size}')
+        logger.info(f'save_model_interval: {self.save_model_interval}')
+        if tb_logger is not None:
+            tb_logger.add_hparams(
+                hparam_dict={
+                    'id': self.cfg_id,
+                    'seed': self.seed,
+                    'env_specs': str(self.env_specs),
+                    'reward_specs': str(self.reward_specs),
+                    'obs_specs': str(self.obs_specs),
+                    'agent_specs': str(self.agent_specs),
+                    'gamma': self.gamma,
+                    'tau': self.tau,
+                    'state_encoder_specs': str(self.state_encoder_specs),
+                    'policy_specs': str(self.policy_specs),
+                    'value_specs': str(self.value_specs),
+                    'lr': self.lr,
+                    'weightdecay': self.weightdecay,
+                    'eps': self.eps,
+                    'value_pred_coef': self.value_pred_coef,
+                    'entropy_coef': self.entropy_coef,
+                    'clip_epsilon': self.clip_epsilon,
+                    'max_num_iterations': self.max_num_iterations,
+                    'num_episodes_per_iteration': self.num_episodes_per_iteration,
+                    'max_sequence_length': self.max_sequence_length,
+                    'num_optim_epoch': self.num_optim_epoch,
+                    'mini_batch_size': self.mini_batch_size,
+                    'save_model_interval': self.save_model_interval},
+                metric_dict={'hparam/placeholder': 0.0})

facility_location/utils/policy.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from typing import Dict
+from facility_location.agent import FacilityLocationMLPExtractor, FacilityLocationGNNExtractor, FacilityLocationAttentionGNNExtractor
+from facility_location.utils import Config
+def get_policy_kwargs(cfg: Config) -> Dict:
+    if cfg.agent == 'rl-mlp':
+        hidden_units = cfg.mlp_specs.get('hidden_units', (32, 32))
+        node_dim = hidden_units[-1]
+        policy_feature_dim = FacilityLocationMLPExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationMLPExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationMLPExtractor,
+            features_extractor_kwargs=dict(
+                hidden_units=hidden_units,),
+            popstar=cfg.env_specs.get('popstar', False),)
+    elif cfg.agent == 'rl-gnn':
+        num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
+        node_dim = cfg.gnn_specs.get('node_dim', 32)
+        policy_feature_dim = FacilityLocationGNNExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationGNNExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationGNNExtractor,
+            features_extractor_kwargs=dict(
+                num_gnn_layers=num_gnn_layers,
+                node_dim=node_dim),
+            popstar=cfg.env_specs.get('popstar', False),)
+    elif cfg.agent == 'rl-agnn':
+        num_gnn_layers = cfg.gnn_specs.get('num_gnn_layers', 2)
+        node_dim = cfg.gnn_specs.get('node_dim', 32)
+        policy_feature_dim = FacilityLocationAttentionGNNExtractor.get_policy_feature_dim(node_dim)
+        value_feature_dim = FacilityLocationAttentionGNNExtractor.get_value_feature_dim(node_dim)
+        policy_kwargs = dict(
+            policy_feature_dim=policy_feature_dim,
+            value_feature_dim=value_feature_dim,
+            policy_hidden_units=cfg.agent_specs.get('policy_hidden_units', (32, 32, 1)),
+            value_hidden_units=cfg.agent_specs.get('value_hidden_units', (32, 32, 1)),
+            features_extractor_class=FacilityLocationAttentionGNNExtractor,
+            features_extractor_kwargs=dict(
+                num_gnn_layers=num_gnn_layers,
+                node_dim=node_dim),
+            popstar=cfg.env_specs.get('popstar', False),)
+    else:
+        raise NotImplementedError
+    return policy_kwargs

final_solutions.pkl ADDED Viewed

File without changes

model.pth DELETED Viewed

Binary file (24.3 kB)

model.py DELETED Viewed

@@ -1,24 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class Net(nn.Module):
-    def __init__(self, input_size, hidden_size, num_classes):
-        super(Net, self).__init__()
-        self.fc1 = nn.Linear(input_size, hidden_size)
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Linear(hidden_size, num_classes)
-        self.softmax = nn.Softmax(dim=1)
-    def forward(self, x):
-        out = self.fc1(x)
-        out = self.relu(out)
-        out = self.fc2(out)
-        out = self.softmax(out)
-        return out
-if __name__ == '__main__':
-    net = Net(100, 50, 10)
-    torch.save(net.state_dict(), 'model.pth')