Dzy6
/

multisource-spatial-point-prediction

Model card Files Files and versions Community

Dzy6 commited on Jul 2

Commit

c7995e9

•

0 Parent(s):

init

Browse files

Files changed (9) hide show

README.md +3 -0
data/north/column +7 -0
data/south/column +7 -0
dataset.py +183 -0
model.py +386 -0
requirements.txt +300 -0
run.sh +4 -0
train.py +421 -0
utils/utils.py +67 -0

README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # KDD24 Self-consistent Deep Geometric Learning for Heterogeneous Multi-source Spatial Point Data Prediction
2	+
3	+ data is on [dropbox](https://www.dropbox.com/sh/fi5bsxqeuz46h6l/AABSkN6cav7omgvgATX1cs6ga?dl=0)

data/north/column ADDED Viewed

	@@ -0,0 +1,7 @@

+['DM8HA' 'Date.Local' 'relative_humidity_max' 'air_temperature_min'
+ 'precipitation' 'air_temperature_max' 'wind_direction' 'solar_radiation'
+ 'relative_humidity_min' 'wind_speed' 'elevation' 'CO' 'NH3' 'NOX' 'SO2'
+ 'VOC' 'PM25-PRI' 'PM10-PRI' 'population_density_county' 'open_water'
+ 'developed' 'bareRock_sand_clay' 'd_forest' 'e_forest' 'm_forest' 'shrub'
+ 'grassland' 'pasture' 'crops' 'w_wetlands' 'eh_wetlands' 'cmaq'
+ 'Latitude' 'Longitude']

data/south/column ADDED Viewed

	@@ -0,0 +1,7 @@

+['DM8HA' 'Date.Local' 'relative_humidity_max' 'air_temperature_min'
+ 'precipitation' 'air_temperature_max' 'wind_direction' 'solar_radiation'
+ 'relative_humidity_min' 'wind_speed' 'elevation' 'CO' 'NH3' 'NOX' 'SO2'
+ 'VOC' 'PM25-PRI' 'PM10-PRI' 'population_density_county' 'open_water'
+ 'developed' 'bareRock_sand_clay' 'd_forest' 'e_forest' 'm_forest' 'shrub'
+ 'grassland' 'pasture' 'crops' 'w_wetlands' 'eh_wetlands' 'cmaq'
+ 'Latitude' 'Longitude']

dataset.py ADDED Viewed

	@@ -0,0 +1,183 @@

+from __future__ import print_function
+import torch.utils.data as data
+import os
+import os.path
+import torch
+import numpy as np
+import pandas as pd
+import sys
+from torch_geometric.nn import knn_graph
+from torch_geometric.data import Data
+from torch_geometric.loader import DataLoader
+from torch_geometric.utils import add_self_loops
+from torch_geometric.data.collate import collate
+from torch_geometric.data.separate import separate
+import pickle
+import time
+from torch_geometric.data.data import BaseData
+from torch_geometric.data.storage import BaseStorage
+from typing import Any
+def mycollate(data_list):
+    r"""Collates a Python list of :obj:`torch_geometric.data.Data` objects
+    to the internal storage format of
+    :class:`~torch_geometric.data.InMemoryDataset`."""
+    if len(data_list) == 1:
+        return data_list[0], None
+    data, slices, _ = collate(
+        data_list[0].__class__,
+        data_list=data_list,
+        increment=False,
+        add_batch=False,
+    )
+    return data, slices
+def myseparate(cls, batch: BaseData, idx: int, slice_dict: Any) -> BaseData:
+    data = cls().stores_as(batch)
+    # We iterate over each storage object and recursively separate all its attributes:
+    for batch_store, data_store in zip(batch.stores, data.stores):
+        attrs = set(batch_store.keys())
+        for attr in attrs:
+            slices = slice_dict[attr]
+            data_store[attr] = _separate(attr, batch_store[attr], idx, slices,
+                                         batch, batch_store)
+    return data
+def _separate(
+    key: str,
+    value: Any,
+    idx: int,
+    slices: Any,
+    batch: BaseData,
+    store: BaseStorage,
+) :
+        # Narrow a `torch.Tensor` based on `slices`.
+        key = str(key)
+        cat_dim = batch.__cat_dim__(key, value, store)
+        start, end = int(slices[idx]), int(slices[idx + 1])
+        value = value.narrow(cat_dim or 0, start, end - start)
+        return value
+def load_point(datasetname="south",k=5,small=[False,50,100]):
+    """
+    load point and build graph pairs
+    """
+    print("loading")
+    time1=time.time()
+    if small[0]:
+        print("small south dataset k=5")
+        datasetname="south"
+        k=5
+        filename=os.path.join("data",datasetname,datasetname+f'_{k}.pt')
+        [data_graphs1,slices_graphs1,data_graphs2,slices_graphs2]=torch.load(filename)
+        flattened_list_graphs1 = [myseparate(cls=data_graphs1.__class__, batch=data_graphs1,idx=i,slice_dict=slices_graphs1) for i in range(small[1]*2)]
+        flattened_list_graphs2 = [myseparate(cls=data_graphs2.__class__, batch=data_graphs2,idx=i,slice_dict=slices_graphs2) for i in range(small[2]*2)]
+        unflattened_list_graphs1= [flattened_list_graphs1[n:n+2] for n in range(0, len(flattened_list_graphs1), 2)]
+        unflattened_list_graphs2= [flattened_list_graphs2[n:n+2] for n in range(0, len(flattened_list_graphs2), 2)]
+        print(f"Load data used {time.time()-time1:.1f} seconds")
+        return unflattened_list_graphs1,unflattened_list_graphs2
+    return process(datasetname,k)
+def process(datasetname="south",k=5):
+    time1=time.time()
+    """
+    build graph pairs
+    """
+    point_path= os.path.join("data",datasetname,datasetname+".pkl")
+    with open(point_path, 'rb') as f:
+        data = pickle.load(f)
+    graphs1=[]
+    graphs2=[]
+    for day in data:
+        day_d1=day[0]
+        day_d2=day[1]
+        assert(len(day_d1)<len(day_d2))
+        pos1=day_d1[:,-2:]
+        edge_index1=knn_graph(pos1,k=k)
+        pos2=day_d2[:,-2:]
+        edge_index2=knn_graph(pos2,k=k)
+        """
+        iterately mask point in day_d1, the high fidelity data, to build high fidelity graphs, which share the same structure
+        """
+        for i in range(day_d1.shape[0]):
+            day_d1_copy=day_d1.clone().detach()
+            target=day_d1[i,0]
+            day_d1_copy[i,0]=0
+            target_index=torch.tensor(i,dtype=torch.long)
+            is_source = torch.ones(day_d1.shape[0] ,dtype=torch.bool)
+            is_source[i]=False
+            graph1=Data(x=day_d1_copy,pos=pos1,edge_index=edge_index1,target=target[None],target_index=target_index[None],is_source=is_source,datasource=torch.tensor(0,dtype=torch.long)[None])
+            """
+            build pairing low fidelity graphs, which add the masked point in day_d1, so structure is changing
+            """
+            day_plus2=torch.cat([day_d1_copy[i][None,:],day_d2])
+            pos_plus2=day_plus2[:,-2:]
+            edge_index_plus2=knn_graph(pos_plus2,k=k)
+            is_source = torch.ones(day_d2.shape[0]+1 ,dtype=torch.bool)
+            is_source[0]=False
+            graph2=Data(x=day_plus2,pos=pos_plus2,edge_index=edge_index_plus2,target=target[None],target_index=torch.tensor(0,dtype=torch.long)[None],is_source=is_source,datasource=torch.tensor(0,dtype=torch.long)[None])
+            graphs1.append([graph1,graph2])
+        """
+        iterately mask point in day_d2, the low fidelity data, to build low fidelity graphs, which share the same structure
+        """
+        for i in range(day_d2.shape[0]):
+            day_d2_copy=day_d2.clone().detach()
+            target=day_d2[i,0]
+            day_d2_copy[i,0]=0
+            target_index=torch.tensor(i,dtype=torch.long)
+            is_source = torch.ones(day_d2.shape[0] ,dtype=torch.bool)
+            is_source[i]=False
+            graph2=Data(x=day_d2_copy,pos=pos2,edge_index=edge_index2,target=target[None],target_index=target_index[None],is_source=is_source,datasource=torch.tensor(1,dtype=torch.long)[None])
+            """
+            build pairing high fidelity graphs, which add the masked point in day_d2, so structure is changing
+            """
+            day_plus1=torch.cat([day_d2_copy[i][None,:],day_d1])
+            pos_plus1=day_plus1[:,-2:]
+            edge_index_plus1=knn_graph(pos_plus1,k=k)
+            is_source = torch.ones(day_d1.shape[0]+1 ,dtype=torch.bool)
+            is_source[0]=False
+            graph1=Data(x=day_plus1,pos=pos_plus1,edge_index=edge_index_plus1,target=target[None],target_index=torch.tensor(0,dtype=torch.long)[None],is_source=is_source,datasource=torch.tensor(1,dtype=torch.long)[None])
+            graphs2.append([graph1,graph2])
+    np.random.shuffle(graphs1)
+    np.random.shuffle(graphs2)
+    return [graphs1,graphs2]
+class MergeNeighborDataset(torch.utils.data.Dataset):
+    """ Customized dataset for each domain"""
+    def __init__(self,X):
+        self.X = X                           # set data
+    def __len__(self):
+        return len(self.X)                   # return length
+    def __getitem__(self, idx):
+        return self.X[idx]
+def kneighbor_point(datasetname="south",k=1,daily=False):
+    """
+    build k neighbor pairing
+    """
+    ranking_path= os.path.join("data",datasetname,datasetname+"_ranking.pkl")
+    with open(ranking_path, 'rb') as f:
+        rankings = pickle.load(f)
+    point_path= os.path.join("data",datasetname,datasetname+".pkl")
+    with open(point_path, 'rb') as f:
+        days = pickle.load(f)
+    samples=[]
+    for i in range(len(days)):
+        day_d1=days[i][0]
+        day_d2=days[i][1]
+        ranking=rankings[i]
+        """
+        iterately get point in day_d1, the high fidelity data, to build samples
+        """
+        sample1 = []
+        for j in range(day_d1.shape[0]):
+            point1=day_d1[j]
+            point1_neighbors=day_d2[ranking[j,:k]]
+            point1_neighbor=torch.mean(point1_neighbors,axis=0)
+            sample1.append([point1,point1_neighbor])
+        if daily:
+            samples.append(sample1)
+        else:
+            samples.extend(sample1)
+    if not daily:
+        return [samples]
+    return samples
+if __name__ == '__main__':
+    1

model.py ADDED Viewed

	@@ -0,0 +1,386 @@

+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.utils.data
+import numpy as np
+import torch.nn.functional as F
+from torch.nn import Parameter
+from torch_geometric.nn.dense.linear import Linear
+from torch_geometric.nn.conv import MessagePassing
+from torch_geometric.utils import softmax
+# from dataset import
+from torch_geometric.nn.inits import glorot, zeros
+from torch_scatter import scatter
+from utils.utils import triplets,get_angle,GaussianSmearing
+from torch.nn import ModuleList
+from math import pi as PI
+import math
+"""
+The theory based Grid cell spatial relation encoder,
+See https://openreview.net/forum?id=Syx0Mh05YQ
+Learning Grid Cells as Vector Representation of Self-Position Coupled with Matrix Representation of Self-Motion
+"""
+def _cal_freq_list(freq_init, frequency_num, max_radius, min_radius):
+    if freq_init == "random":
+        # the frequence we use for each block, alpha in paper
+        # freq_list shape: (frequency_num)
+        freq_list = np.random.random(size=[frequency_num]) * max_radius
+    elif freq_init == "geometric":
+        # freq_list = []
+        # for cur_freq in range(frequency_num):
+        #     base = 1.0/(np.power(max_radius, cur_freq*1.0/(frequency_num-1)))
+        #     freq_list.append(base)
+        # freq_list = np.asarray(freq_list)
+        log_timescale_increment = (math.log(float(max_radius) / float(min_radius)) /
+          (frequency_num*1.0 - 1))
+        timescales = min_radius * np.exp(
+            np.arange(frequency_num).astype(float) * log_timescale_increment)
+        freq_list = 1.0/timescales
+    return freq_list
+class TheoryGridCellSpatialRelationEncoder(nn.Module):
+    """
+    Given a list of (deltaX,deltaY), encode them using the position encoding function
+    """
+    def __init__(self, spa_embed_dim, coord_dim = 2, frequency_num = 16,
+        max_radius = 10000,  min_radius = 1000, freq_init = "geometric", ffn = None):
+        """
+        Args:
+            spa_embed_dim: the output spatial relation embedding dimention
+            coord_dim: the dimention of space, 2D, 3D, or other
+            frequency_num: the number of different sinusoidal with different frequencies/wavelengths
+            max_radius: the largest context radius this model can handle
+        """
+        super(TheoryGridCellSpatialRelationEncoder, self).__init__()
+        self.frequency_num = frequency_num
+        self.coord_dim = coord_dim
+        self.max_radius = max_radius
+        self.min_radius = min_radius
+        self.spa_embed_dim = spa_embed_dim
+        self.freq_init = freq_init
+        # the frequence we use for each block, alpha in paper
+        self.cal_freq_list()
+        self.cal_freq_mat()
+        # there unit vectors which is 120 degree apart from each other
+        self.unit_vec1 = np.asarray([1.0, 0.0])                        # 0
+        self.unit_vec2 = np.asarray([-1.0/2.0, math.sqrt(3)/2.0])      # 120 degree
+        self.unit_vec3 = np.asarray([-1.0/2.0, -math.sqrt(3)/2.0])     # 240 degree
+        self.input_embed_dim = self.cal_input_dim()
+        self.ffn = ffn
+    def cal_freq_list(self):
+        self.freq_list = _cal_freq_list(self.freq_init, self.frequency_num, self.max_radius, self.min_radius)
+    def cal_freq_mat(self):
+        # freq_mat shape: (frequency_num, 1)
+        freq_mat = np.expand_dims(self.freq_list, axis = 1)
+        # self.freq_mat shape: (frequency_num, 6)
+        self.freq_mat = np.repeat(freq_mat, 6, axis = 1)
+    def cal_input_dim(self):
+        # compute the dimention of the encoded spatial relation embedding
+        return int(6 * self.frequency_num)
+    def make_input_embeds(self, coords):
+        if type(coords) == np.ndarray:
+            assert self.coord_dim == np.shape(coords)[2]
+            coords = list(coords)
+        elif type(coords) == list:
+            assert self.coord_dim == len(coords[0][0])
+        elif type(coords)  == torch.Tensor:
+            assert self.coord_dim == (coords.shape)[2]
+            coords=coords.detach().cpu().numpy()
+        else:
+            raise Exception("Unknown coords data type for GridCellSpatialRelationEncoder")
+        # (batch_size, num_context_pt, coord_dim)
+        coords_mat = np.asarray(coords).astype(float)
+        batch_size = coords_mat.shape[0]
+        num_context_pt = coords_mat.shape[1]
+        # compute the dot product between [deltaX, deltaY] and each unit_vec
+        # (batch_size, num_context_pt, 1)
+        angle_mat1 = np.expand_dims(np.matmul(coords_mat, self.unit_vec1), axis = -1)
+        # (batch_size, num_context_pt, 1)
+        angle_mat2 = np.expand_dims(np.matmul(coords_mat, self.unit_vec2), axis = -1)
+        # (batch_size, num_context_pt, 1)
+        angle_mat3 = np.expand_dims(np.matmul(coords_mat, self.unit_vec3), axis = -1)
+        # (batch_size, num_context_pt, 6)
+        angle_mat = np.concatenate([angle_mat1, angle_mat1, angle_mat2, angle_mat2, angle_mat3, angle_mat3], axis = -1)
+        # (batch_size, num_context_pt, 1, 6)
+        angle_mat = np.expand_dims(angle_mat, axis = -2)
+        # (batch_size, num_context_pt, frequency_num, 6)
+        angle_mat = np.repeat(angle_mat, self.frequency_num, axis = -2)
+        # (batch_size, num_context_pt, frequency_num, 6)
+        angle_mat = angle_mat * self.freq_mat
+        # (batch_size, num_context_pt, frequency_num*6)
+        spr_embeds = np.reshape(angle_mat, (batch_size, num_context_pt, -1))
+        # make sinuniod function
+        # sin for 2i, cos for 2i+1
+        # spr_embeds: (batch_size, num_context_pt, frequency_num*6=input_embed_dim)
+        spr_embeds[:, :, 0::2] = np.sin(spr_embeds[:, :, 0::2])  # dim 2i
+        spr_embeds[:, :, 1::2] = np.cos(spr_embeds[:, :, 1::2])  # dim 2i+1
+        return spr_embeds
+    def forward(self, coords):
+        """
+        Given a list of coords (deltaX, deltaY), give their spatial relation embedding
+        Args:
+            coords: a python list with shape (batch_size, num_context_pt, coord_dim)
+        Return:
+            sprenc: Tensor shape (batch_size, num_context_pt, spa_embed_dim)
+        """
+        spr_embeds = self.make_input_embeds(coords)
+        # spr_embeds: (batch_size, num_context_pt, input_embed_dim)
+        spr_embeds = torch.FloatTensor(spr_embeds)
+        if self.ffn is not None:
+            return self.ffn(spr_embeds)
+        else:
+            return spr_embeds
+theoryencoder=TheoryGridCellSpatialRelationEncoder(spa_embed_dim=8)
+class GFusion(nn.Module):
+    def __init__(self,  h_channel=16,input_featuresize=32,localdepth=2,num_interactions=3,finaldepth=3,num_of_datasources=2,share=True,batchnorm="False"):
+        super(GFusion,self).__init__()
+        self.training=True
+        self.h_channel = h_channel
+        self.input_featuresize=input_featuresize
+        self.localdepth = localdepth
+        self.num_interactions=num_interactions
+        self.finaldepth=finaldepth
+        self.batchnorm = batchnorm
+        self.activation=nn.ReLU()
+        num_gaussians=(1,12)
+        self.theta_expansion = GaussianSmearing(-PI, PI, num_gaussians[1])
+        self.mlps_list = ModuleList()
+        if int(share[0])==1:
+            mlp_geo = ModuleList()
+            for i in range(self.localdepth):
+                if i == 0:
+                    mlp_geo.append(Linear(sum(num_gaussians), h_channel))
+                else:
+                    mlp_geo.append(Linear(h_channel, h_channel))
+                if self.batchnorm == "True":
+                    mlp_geo.append(nn.BatchNorm1d(h_channel))
+                mlp_geo.append(self.activation)
+            for i in range(num_of_datasources):
+                self.mlps_list.append(mlp_geo)
+        else:
+            for i in range(num_of_datasources):
+                mlp_geo = ModuleList()
+                for i in range(self.localdepth):
+                    if i == 0:
+                        mlp_geo.append(Linear(sum(num_gaussians), h_channel))
+                    else:
+                        mlp_geo.append(Linear(h_channel, h_channel))
+                    if self.batchnorm == "True":
+                        mlp_geo.append(nn.BatchNorm1d(h_channel))
+                    mlp_geo.append(self.activation)
+                self.mlps_list.append(mlp_geo)
+        self.mlps_list_backup = ModuleList()
+        for i in range(num_of_datasources):
+            mlp_geo = ModuleList()
+            for i in range(self.localdepth):
+                if i == 0:
+                    mlp_geo.append(Linear(4, h_channel)) # for FN version
+                else:
+                    mlp_geo.append(Linear(h_channel, h_channel))
+                if self.batchnorm == "True":
+                    mlp_geo.append(nn.BatchNorm1d(h_channel))
+                mlp_geo.append(self.activation)
+            self.mlps_list_backup.append(mlp_geo)
+        self.translinear=Linear(input_featuresize+1, self.h_channel)
+        self.interactions_list = ModuleList()
+        if int(share[1])==1:
+            interactions= ModuleList()
+            for i in range(self.num_interactions):
+                block = SPNN(
+                    in_ch=self.input_featuresize,
+                    hidden_channels=self.h_channel,
+                    activation=self.activation,
+                    finaldepth=self.finaldepth,
+                    batchnorm=self.batchnorm,
+                    num_input_geofeature=self.h_channel
+                )
+                interactions.append(block)
+            for i in range(num_of_datasources):
+                self.interactions_list.append(interactions)
+        else:
+            for i in range(num_of_datasources):
+                interactions= ModuleList()
+                for i in range(self.num_interactions):
+                    block = SPNN(
+                        in_ch=self.input_featuresize,
+                        hidden_channels=self.h_channel,
+                        activation=self.activation,
+                        finaldepth=self.finaldepth,
+                        batchnorm=self.batchnorm,
+                        num_input_geofeature=self.h_channel
+                    )
+                    interactions.append(block)
+                self.interactions_list.append(interactions)
+        self.finalMLP_list = ModuleList()
+        if int(share[2])==1:
+            finalMLP=ModuleList()
+            for i in range(self.finaldepth + 1):
+                finalMLP.append(Linear(self.h_channel, self.h_channel))
+                if self.batchnorm == "True":
+                    finalMLP.append(nn.BatchNorm1d(self.h_channel))
+                finalMLP.append(self.activation)
+            finalMLP.append(Linear(self.h_channel, 1))
+            for i in range(num_of_datasources):
+                self.finalMLP_list.append(finalMLP)
+        else:
+            for i in range(num_of_datasources):
+                finalMLP=ModuleList()
+                for i in range(self.finaldepth + 1):
+                    finalMLP.append(Linear(self.h_channel, self.h_channel))
+                    if self.batchnorm == "True":
+                        finalMLP.append(nn.BatchNorm1d(self.h_channel))
+                    finalMLP.append(self.activation)
+                finalMLP.append(Linear(self.h_channel, 1))
+                self.finalMLP_list.append(finalMLP)
+        self.reset_parameters()
+    def reset_parameters(self):
+        for i in range(len(self.mlps_list)):
+            for lin in self.mlps_list[i]:
+                if isinstance(lin, Linear):
+                    torch.nn.init.xavier_uniform_(lin.weight)
+                    lin.bias.data.fill_(0)
+        for i in range(len(self.interactions_list)):
+            for block in self.interactions_list[i]:
+                block.reset_parameters()
+        for finalMLP in self.finalMLP_list:
+            for lin in finalMLP:
+                if isinstance(lin, Linear):
+                    torch.nn.init.xavier_uniform_(lin.weight)
+                    lin.bias.data.fill_(0)
+    def single_forward(self, coords,edge_index,edge_index_2rd, edx_2nd,batch,input_feature,is_source,edge_rep,datasource_idx):
+        distances={}
+        thetas={}
+        if edge_rep:
+            i, j, k = edge_index_2rd
+            distances[1]=(coords[edge_index[0]] - coords[edge_index[1]]).norm(p=2, dim=1)
+            theta_ijk = get_angle(coords[j] - coords[i], coords[k] - coords[j])
+            v1 = torch.cross(F.pad(coords[j] - coords[i],(0,1)), F.pad(coords[k] - coords[j],(0,1)), dim=1)[...,2]
+            flag = torch.sign((v1))
+            flag[flag==0]=-1
+            thetas[1] = scatter(theta_ijk*flag ,edx_2nd,dim=0,dim_size=edge_index.shape[1],reduce='min')
+            thetas[1]=self.theta_expansion(thetas[1])
+            geo_encoding_1st=distances[1][:,None]
+            geo_encoding_1st[geo_encoding_1st==0]=1E-10
+            geo_encoding_1st=torch.pow(geo_encoding_1st,-1)
+            geo_encoding_2nd = thetas[1]
+            geo_encoding=torch.cat([geo_encoding_1st,geo_encoding_2nd],dim=-1)
+        else:
+            # coords=theoryencoder(coords[None,:])
+            # coords=coords[0].to("cuda")
+            coords_j = coords[edge_index[0]]
+            coords_i = coords[edge_index[1]]
+            geo_encoding=torch.cat([coords_j,coords_i],dim=-1)
+        if edge_rep:
+            for lin in self.mlps_list[datasource_idx]:
+                geo_encoding=lin(geo_encoding)
+        else:
+            for lin in self.mlps_list_backup[datasource_idx]:
+                geo_encoding=lin(geo_encoding)
+            geo_encoding=torch.zeros_like(geo_encoding,device=geo_encoding.device,dtype=geo_encoding.dtype)
+        node_feature=self.translinear(input_feature[:,:-2])
+        for interaction in self.interactions_list[datasource_idx]:
+            node_feature =  interaction(node_feature,geo_encoding,edge_index,is_source)
+        return node_feature
+    def forward(self, coords,edge_index,edge_index_2rd, edx_2nd,batch,input_feature,is_source,edge_rep):
+        outputs=[]
+        for i in range(len(coords)):
+            output=self.single_forward(coords[i],edge_index[i],edge_index_2rd[i], edx_2nd[i],batch[i],input_feature[i],is_source[i],edge_rep,i)
+            for lin in self.finalMLP_list[i]:
+                output=lin(output)
+            outputs.append(output)
+        return outputs
+class SPNN(torch.nn.Module):
+    def __init__(
+        self,
+        in_ch,
+        hidden_channels,
+        activation=torch.nn.ReLU(),
+        finaldepth=3,
+        batchnorm="False",
+        num_input_geofeature=13
+    ):
+        super(SPNN, self).__init__()
+        self.activation = activation
+        self.finaldepth = finaldepth
+        self.batchnorm = batchnorm
+        self.num_input_geofeature=num_input_geofeature
+        self.att = Parameter(torch.Tensor(1, hidden_channels),requires_grad=True)
+        self.WMLP = ModuleList()
+        for i in range(self.finaldepth + 1):
+            if i == 0:
+                self.WMLP.append(Linear(hidden_channels*2+num_input_geofeature, hidden_channels))
+            else:
+                self.WMLP.append(Linear(hidden_channels, hidden_channels))
+            if self.batchnorm == "True":
+                self.WMLP.append(nn.BatchNorm1d(hidden_channels))
+            self.WMLP.append(self.activation)
+        self.reset_parameters()
+    def reset_parameters(self):
+        for lin in self.WMLP:
+            if isinstance(lin, Linear):
+                torch.nn.init.xavier_uniform_(lin.weight)
+                lin.bias.data.fill_(0)
+        glorot(self.att)
+    def forward(self, node_feature,geo_encoding,edge_index,is_source):
+        j, i = edge_index
+        input_feature=node_feature.clone()
+        if node_feature is None:
+            concatenated_vector = geo_encoding
+        else:
+            node_attr_0st = node_feature[i]
+            node_attr_1st = node_feature[j]
+            concatenated_vector = torch.cat(
+                [
+                    node_attr_0st,
+                    node_attr_1st,
+                    geo_encoding,
+                ],
+                dim=-1,
+            )
+        x_i = concatenated_vector
+        for lin in self.WMLP:
+            x_i=lin(x_i)
+        input_feature_j=input_feature[edge_index[0]]
+        x_i = F.leaky_relu(x_i)
+        alpha = F.leaky_relu(x_i * self.att).sum(dim=-1)
+        alpha = softmax(alpha, edge_index[1])
+        message=input_feature_j * alpha.unsqueeze(-1)
+        out_feature = scatter(message, edge_index[1], dim=0, reduce='add')
+        out_feature=input_feature+out_feature
+        return out_feature

requirements.txt ADDED Viewed

	@@ -0,0 +1,300 @@

+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+_libgcc_mutex=0.1=conda_forge
+_openmp_mutex=4.5=2_gnu
+_py-xgboost-mutex=2.0=cpu_0
+absl-py=1.3.0=py310h06a4308_0
+aiohttp=3.8.1=py310h7f8727e_1
+aiosignal=1.2.0=pyhd3eb1b0_0
+anyio=3.6.2=pyhd8ed1ab_0
+argon2-cffi=21.3.0=pyhd8ed1ab_0
+argon2-cffi-bindings=21.2.0=py310h5764c6d_3
+asttokens=2.0.5=pyhd3eb1b0_0
+async-timeout=4.0.2=py310h06a4308_0
+attrs=21.4.0=pyhd3eb1b0_0
+autograd=1.5=pyhd8ed1ab_0
+autopep8=1.6.0=pyhd3eb1b0_1
+backcall=0.2.0=pyhd3eb1b0_0
+beautifulsoup4=4.11.2=pyha770c72_0
+blas=1.0=mkl
+bleach=6.0.0=pyhd8ed1ab_0
+blinker=1.4=py310h06a4308_0
+blosc=1.21.1=h83bc5f7_3
+boost-cpp=1.74.0=h75c5d50_8
+bottleneck=1.3.5=py310ha9d4c09_0
+branca=0.5.0=pyhd8ed1ab_0
+brotli=1.0.9=h5eee18b_7
+brotli-bin=1.0.9=h5eee18b_7
+brotlipy=0.7.0=py310h7f8727e_1002
+bzip2=1.0.8=h7b6447c_0
+c-ares=1.18.1=h7f8727e_0
+ca-certificates=2022.12.7=ha878542_0
+cachetools=4.2.2=pyhd3eb1b0_0
+cairo=1.16.0=h19f5f5c_2
+certifi=2022.12.7=pyhd8ed1ab_0
+cffi=1.15.1=py310h74dc2b5_0
+cfitsio=4.1.0=hd9d235c_0
+charset-normalizer=2.0.4=pyhd3eb1b0_0
+click=8.0.4=py310h06a4308_0
+click-plugins=1.1.1=pyhd3eb1b0_0
+cligj=0.7.2=pyhd3eb1b0_0
+cryptography=38.0.1=py310h9ce1e76_0
+cudatoolkit=11.6.0=hecad31d_10
+curl=7.84.0=h5eee18b_0
+cycler=0.11.0=pyhd3eb1b0_0
+dataclasses=0.8=pyh6d0b6a4_7
+debugpy=1.5.1=py310h295c915_0
+decorator=5.1.1=pyhd3eb1b0_0
+defusedxml=0.7.1=pyhd8ed1ab_0
+entrypoints=0.4=py310h06a4308_0
+executing=0.8.3=pyhd3eb1b0_0
+expat=2.4.9=h6a678d5_0
+ffmpeg=4.2.2=h20bf706_0
+fftw=3.3.9=h27cfd23_1
+fiona=1.8.21=py310h60a68a4_2
+flit-core=3.8.0=pyhd8ed1ab_0
+folium=0.12.1.post1=pyhd8ed1ab_1
+font-ttf-dejavu-sans-mono=2.37=hd3eb1b0_0
+font-ttf-inconsolata=2.001=hcb22688_0
+font-ttf-source-code-pro=2.030=hd3eb1b0_0
+font-ttf-ubuntu=0.83=h8b1ccd4_0
+fontconfig=2.14.0=h8e229c2_0
+fonts-anaconda=1=h8fa9717_0
+fonts-conda-ecosystem=1=hd3eb1b0_0
+fonttools=4.25.0=pyhd3eb1b0_0
+freetype=2.11.0=h70c0345_0
+freexl=1.0.6=h27cfd23_0
+frozenlist=1.2.0=py310h7f8727e_1
+future=0.18.3=pyhd8ed1ab_0
+gdal=3.5.1=py310hce6f0df_1
+geographiclib=1.52=pyhd8ed1ab_0
+geopandas=0.11.1=pyhd8ed1ab_0
+geopandas-base=0.11.1=pyha770c72_0
+geopy=2.2.0=pyhd8ed1ab_0
+geos=3.11.0=h27087fc_0
+geotiff=1.7.1=h4fc65e6_3
+gettext=0.21.0=hf68c758_0
+giflib=5.2.1=h7b6447c_0
+glib=2.72.1=h6239696_0
+glib-tools=2.72.1=h6239696_0
+gmp=6.2.1=h295c915_3
+gnutls=3.6.15=he1e5248_0
+google-auth=2.6.0=pyhd3eb1b0_0
+google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
+gpy=1.10.0=py310hde88566_3
+grpcio=1.42.0=py310hce63b2e_0
+hdf4=4.2.15=h9772cbc_4
+hdf5=1.12.1=nompi_h2386368_104
+icu=70.1=h27087fc_0
+idna=3.4=py310h06a4308_0
+importlib-metadata=4.11.3=py310h06a4308_0
+importlib_resources=5.12.0=pyhd8ed1ab_0
+intel-openmp=2021.4.0=h06a4308_3561
+ipykernel=6.15.2=py310h06a4308_0
+ipython=8.4.0=py310h06a4308_0
+ipython_genutils=0.2.0=py_1
+jedi=0.18.1=py310h06a4308_1
+jinja2=3.1.2=py310h06a4308_0
+joblib=1.1.1=py310h06a4308_0
+jpeg=9e=h7f8727e_0
+json-c=0.16=h5eee18b_0
+jsonschema=4.17.3=pyhd8ed1ab_0
+jupyter_client=7.3.4=py310h06a4308_0
+jupyter_core=4.10.0=py310h06a4308_0
+jupyter_server=1.23.4=py310h06a4308_0
+jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
+kealib=1.4.15=hfe1a663_0
+keyutils=1.6.1=h166bdaf_0
+kiwisolver=1.4.2=py310h295c915_0
+krb5=1.19.3=h3790be6_0
+lame=3.100=h7b6447c_0
+lcms2=2.12=h3be6417_0
+ld_impl_linux-64=2.38=h1181459_1
+lerc=3.0=h295c915_0
+libbrotlicommon=1.0.9=h5eee18b_7
+libbrotlidec=1.0.9=h5eee18b_7
+libbrotlienc=1.0.9=h5eee18b_7
+libcurl=7.84.0=h91b91d3_0
+libdap4=3.20.6=hd7c4107_2
+libdeflate=1.8=h7f8727e_5
+libedit=3.1.20210910=h7f8727e_0
+libev=4.33=h7f8727e_1
+libffi=3.4.2=h7f98852_5
+libgcc-ng=12.1.0=h8d9b700_16
+libgdal=3.5.1=h32640fd_1
+libgfortran-ng=11.2.0=h00389a5_1
+libgfortran5=11.2.0=h1234567_1
+libglib=2.72.1=h2d90d5f_0
+libgomp=12.1.0=h8d9b700_16
+libiconv=1.16=h7f8727e_2
+libidn2=2.3.2=h7f8727e_0
+libkml=1.3.0=h238a007_1014
+libnetcdf=4.8.1=nompi_h329d8a1_102
+libnghttp2=1.46.0=hce63b2e_0
+libnsl=2.0.0=h5eee18b_0
+libopus=1.3.1=h7b6447c_0
+libpng=1.6.37=hbc83047_0
+libpq=14.5=hd77ab85_0
+libprotobuf=3.20.1=h4ff587b_0
+libpysal=4.1.1=py_0
+librttopo=1.1.0=hf730bdb_11
+libsodium=1.0.18=h7b6447c_0
+libspatialindex=1.9.3=h2531618_0
+libspatialite=5.0.1=h38b5f51_18
+libsqlite=3.39.3=h753d276_0
+libssh2=1.10.0=h8f2d780_0
+libstdcxx-ng=12.1.0=ha89aaad_16
+libtasn1=4.16.0=h27cfd23_0
+libtiff=4.4.0=hecacb30_0
+libunistring=0.9.10=h27cfd23_0
+libuuid=2.32.1=h7f98852_1000
+libvpx=1.7.0=h439df22_0
+libwebp=1.2.4=h11a3e52_0
+libwebp-base=1.2.4=h5eee18b_0
+libxcb=1.15=h7f8727e_0
+libxgboost=1.7.1=cpu_ha3b9936_0
+libxml2=2.9.14=h22db469_4
+libzip=1.8.0=h5cef20c_0
+libzlib=1.2.12=h166bdaf_2
+littleutils=0.2.2=py_0
+llvm-openmp=8.0.1=hc9558a2_0
+lz4-c=1.9.3=h295c915_1
+mapclassify=2.4.3=pyhd3eb1b0_0
+markdown=3.3.4=py310h06a4308_0
+markupsafe=2.1.1=py310h7f8727e_0
+matplotlib-base=3.5.2=py310hf590b9c_0
+matplotlib-inline=0.1.6=py310h06a4308_0
+mgwr=2.1.2=py_0
+mistune=2.0.5=pyhd8ed1ab_0
+mkl=2021.4.0=h06a4308_640
+mkl-service=2.4.0=py310h7f8727e_0
+mkl_fft=1.3.1=py310hd6ae3a3_0
+mkl_random=1.2.2=py310h00e6091_0
+multidict=6.0.2=py310h5eee18b_0
+munch=2.5.0=pyhd3eb1b0_0
+munkres=1.1.4=py_0
+nbclassic=0.5.3=pyhb4ecaf3_3
+nbclient=0.5.13=pyhd8ed1ab_0
+nbconvert=7.2.9=pyhd8ed1ab_0
+nbconvert-core=7.2.9=pyhd8ed1ab_0
+nbconvert-pandoc=7.2.9=pyhd8ed1ab_0
+nbformat=5.7.3=pyhd8ed1ab_0
+ncurses=6.3=h5eee18b_3
+nest-asyncio=1.5.5=py310h06a4308_0
+nettle=3.7.3=hbbd107a_1
+networkx=2.8.4=py310h06a4308_0
+notebook=6.5.3=pyha770c72_0
+notebook-shim=0.2.2=pyhd8ed1ab_0
+nspr=4.33=h295c915_0
+nss=3.78=h2350873_0
+numexpr=2.8.3=py310hcea2de6_0
+numpy=1.23.1=py310h1794996_0
+numpy-base=1.23.1=py310hcba007f_0
+oauthlib=3.2.1=py310h06a4308_0
+ogb=1.3.5=pyhd8ed1ab_0
+openai=0.28.0=pypi_0
+openh264=2.1.1=h4ff587b_0
+openjpeg=2.4.0=h3ad879b_0
+openssl=1.1.1t=h0b41bf4_0
+outdated=0.2.2=pyhd8ed1ab_0
+packaging=21.3=pyhd3eb1b0_0
+pandas=1.4.3=py310h6a678d5_0
+pandoc=2.19.2=ha770c72_0
+pandocfilters=1.5.0=pyhd8ed1ab_0
+paramz=0.9.5=py_0
+parso=0.8.3=pyhd3eb1b0_0
+pcre=8.45=h295c915_0
+pexpect=4.8.0=pyhd3eb1b0_3
+pickleshare=0.7.5=pyhd3eb1b0_1003
+pillow=9.2.0=py310hace64e9_1
+pip=22.1.2=py310h06a4308_0
+pixman=0.40.0=h7f8727e_1
+pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
+poppler=22.04.0=h1434ded_1
+poppler-data=0.4.11=h06a4308_0
+postgresql=14.5=hfdbbde3_0
+proj=9.0.1=h93bde94_1
+prometheus_client=0.16.0=pyhd8ed1ab_0
+prompt-toolkit=3.0.20=pyhd3eb1b0_0
+protobuf=3.20.1=py310h295c915_0
+psutil=5.9.0=py310h5eee18b_0
+ptyprocess=0.7.0=pyhd3eb1b0_2
+pure_eval=0.2.2=pyhd3eb1b0_0
+py-xgboost=1.7.1=cpu_py310hd1aba9c_0
+pyasn1=0.4.8=pyhd3eb1b0_0
+pyasn1-modules=0.2.8=py_0
+pycodestyle=2.8.0=pyhd3eb1b0_0
+pycparser=2.21=pyhd3eb1b0_0
+pygments=2.11.2=pyhd3eb1b0_0
+pyjwt=2.4.0=py310h06a4308_0
+pyopenssl=22.0.0=pyhd3eb1b0_0
+pyparsing=3.0.9=py310h06a4308_0
+pyproj=3.4.0=py310hf94497c_0
+pyrsistent=0.19.3=py310h1fa729e_0
+pysocks=1.7.1=py310h06a4308_0
+python=3.10.6=h582c2e5_0_cpython
+python-dateutil=2.8.2=pyhd3eb1b0_0
+python-fastjsonschema=2.16.3=pyhd8ed1ab_0
+python_abi=3.10=2_cp310
+pytorch=1.12.1=py3.10_cuda11.6_cudnn8.3.2_0
+pytorch-mutex=1.0=cuda
+pytz=2022.1=py310h06a4308_0
+pyzmq=23.2.0=py310h6a678d5_0
+readline=8.1.2=h7f8727e_1
+requests=2.28.1=py310h06a4308_0
+requests-oauthlib=1.3.0=py_0
+rsa=4.7.2=pyhd3eb1b0_1
+rtree=0.9.7=py310h06a4308_1
+scikit-learn=1.1.3=py310h6a678d5_0
+scipy=1.9.3=py310hd5efca6_0
+send2trash=1.8.0=pyhd8ed1ab_0
+setuptools=63.4.1=py310h06a4308_0
+shapely=1.8.4=py310h5e49deb_0
+six=1.16.0=pyhd3eb1b0_1
+snappy=1.1.9=h295c915_0
+sniffio=1.3.0=pyhd8ed1ab_0
+soupsieve=2.3.2.post1=pyhd8ed1ab_0
+spglm=1.0.8=py_0
+spreg=1.3.0=pyhd8ed1ab_0
+sqlite=3.39.2=h5082296_0
+stack_data=0.2.0=pyhd3eb1b0_0
+tensorboard=2.10.1=pyhd8ed1ab_0
+tensorboard-data-server=0.6.0=py310hca6d32c_0
+tensorboard-plugin-wit=1.8.1=py310h06a4308_0
+terminado=0.17.1=pyh41d4057_0
+threadpoolctl=2.2.0=pyh0d69192_0
+tiledb=2.9.5=h1e4a385_0
+tinycss2=1.2.1=pyhd8ed1ab_0
+tk=8.6.12=h1ccaba5_0
+toml=0.10.2=pyhd3eb1b0_0
+torch-cluster=1.6.0=pypi_0
+torch-geometric=2.1.0.post1=pypi_0
+torch-scatter=2.0.9=pypi_0
+torch-sparse=0.6.15=pypi_0
+torch-spline-conv=1.2.1=pypi_0
+torch-tb-profiler=0.4.0=pypi_0
+torchaudio=0.12.1=py310_cu116
+torchvision=0.13.1=py310_cu116
+tornado=6.2=py310h5eee18b_0
+tqdm=4.64.0=py310h06a4308_0
+traitlets=5.1.1=pyhd3eb1b0_0
+typing_extensions=4.3.0=py310h06a4308_0
+tzcode=2022c=h166bdaf_0
+tzdata=2022a=hda174b7_0
+urllib3=1.26.12=py310h06a4308_0
+wcwidth=0.2.5=pyhd3eb1b0_0
+webencodings=0.5.1=py_1
+websocket-client=1.5.1=pyhd8ed1ab_0
+werkzeug=2.0.3=pyhd3eb1b0_0
+wheel=0.37.1=pyhd3eb1b0_0
+x264=1!157.20191217=h7b6447c_0
+xerces-c=3.2.3=h55805fa_5
+xgboost=1.7.1=cpu_py310hd1aba9c_0
+xyzservices=2022.9.0=py310h06a4308_0
+xz=5.2.6=h166bdaf_0
+yarl=1.8.1=py310h5eee18b_0
+zeromq=4.3.4=h2531618_0
+zipp=3.8.0=py310h06a4308_0
+zlib=1.2.12=h7f8727e_2
+zstd=1.5.2=ha4553b6_0

run.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+# dataset=north
+# dataset=south
+dataset=flu
+python3 ./train.py --dataset $dataset  --manualSeed True --man_seed 5770

train.py ADDED Viewed

	@@ -0,0 +1,421 @@

+import argparse
+import os
+import random
+import torch
+import pandas as pd
+import numpy as np
+import time
+import torch.optim as optim
+from matplotlib import cm
+import matplotlib.pyplot as plt
+import json
+from model import GFusion
+import torch.nn.functional as F
+from torch_geometric.data import Data
+from torch_geometric.loader import DataLoader
+from torch_geometric.utils import add_self_loops
+from torch.nn.functional import softmax
+from torch_geometric.nn import knn_graph
+import copy
+torch.autograd.set_detect_anomaly(True)
+from sklearn.metrics import explained_variance_score,mean_squared_error,mean_absolute_error,r2_score,precision_score,recall_score,f1_score,roc_auc_score,roc_curve, auc
+from sklearn.feature_selection import r_regression
+import pickle
+from utils.utils import triplets,unique,pos2key
+from torch.utils.tensorboard import SummaryWriter
+from datetime import datetime
+import dataset
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+blue = lambda x: '\033[94m' + x + '\033[0m'
+red = lambda x: '\033[31m' + x + '\033[0m'
+green = lambda x: '\033[32m' + x + '\033[0m'
+yellow = lambda x: '\033[33m' + x + '\033[0m'
+greenline = lambda x: '\033[42m' + x + '\033[0m'
+yellowline = lambda x: '\033[43m' + x + '\033[0m'
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--log', type=str, default="True")
+    parser.add_argument('--loadmodel', type=str, default="False")
+    parser.add_argument('--split_dataset', type=str, default="False")
+    parser.add_argument('--model', type=str, default="GFusion")
+    # ablation
+    parser.add_argument('--edge_rep', type=str, default="True")
+    parser.add_argument('--single_high', type=str, default="False")
+    parser.add_argument('--fidelity_train', type=str, default="True")
+    parser.add_argument('--fidelity_low_weight', type=float, default=-1.0)
+    parser.add_argument('--share', type=str, default="101")
+    parser.add_argument('--dataset', type=str, default='flu')
+    parser.add_argument('--manualSeed', type=str, default="False")
+    parser.add_argument('--man_seed', type=int, default=12345)
+    parser.add_argument('--test_per_round', type=int, default=10)
+    parser.add_argument('--patience', type=int, default=30)  #scheduler
+    parser.add_argument('--nepoch', type=int, default=201)
+    parser.add_argument('--lr', type=float, default=1e-3)
+    parser.add_argument('--activation', type=str, default='relu')#'lrelu'
+    parser.add_argument('--batchSize', type=int, default=512)
+    parser.add_argument('--num_neighbors', type=int, default=3)
+    parser.add_argument('--regression_loss', type=str, default='l2')
+    parser.add_argument('--h_ch', type=int, default=16)
+    parser.add_argument('--localdepth', type=int, default=1) # mlp(distance) mlp(theta) >=1
+    parser.add_argument('--num_interactions', type=int, default=1) #>=1
+    parser.add_argument('--finaldepth', type=int, default=3) # mlp(concat node_attr and geo_encoding)
+    args = parser.parse_args()
+    args.log=True if args.log=="True" else False
+    args.loadmodel=True if args.loadmodel=="True" else False
+    args.split_dataset=True if args.split_dataset=="True" else False
+    args.edge_rep=True if args.edge_rep=="True" else False
+    args.single_high=True if args.single_high=="True" else False
+    args.fidelity_train=True if args.fidelity_train=="True" and args.single_high is False and args.fidelity_low_weight==-1.0 else False
+    args.manualSeed=True if args.manualSeed=="True" else False
+    args.save_dir=os.path.join('./save/',args.dataset)
+    return args
+def main(args,train_Loader,val_Loader,test_Loader):
+    if flag:
+        return
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    measure_Pearsonr=r_regression
+    criterion_l1 = torch.nn.L1Loss() #reduction='sum'
+    criterion_l2 = torch.nn.MSELoss()
+    criterion=criterion_l1 if args.regression_loss=='l1' else criterion_l2
+    if args.model in ['GFusion']:
+        def myL1(pred,true,weight=None,reduction='mean'):
+            loss=(abs(pred-true))
+            num=len(pred)
+            if weight is not None:
+                loss=[weight[i]*loss[i] for i in range(num)]
+            loss=sum(loss)
+            if reduction=='mean':
+                loss=loss/num
+            return loss
+        def myL2(pred,true,weight=None,reduction='mean'):
+            loss=((pred-true)**2)
+            num=len(pred)
+            if weight is not None:
+                loss=[weight[i]*loss[i] for i in range(num)]
+            loss=sum(loss)
+            if reduction=='mean':
+                loss=loss/num
+            return loss
+        criterion=myL1 if args.regression_loss=='l1' else myL2
+        num_of_fidelities=len(train_graphs[0])
+        def reweight_fidelity():
+            if args.single_high:
+                weighted_fidelity_weight[0]=1
+                weighted_fidelity_weight[1]=0
+            elif args.fidelity_low_weight!=-1.0:
+                weighted_fidelity_weight[0]=1
+                weighted_fidelity_weight[1]=args.fidelity_low_weight
+            else:
+                exped_f=[torch.exp(fidelity_weight[i]) for i in range(num_of_fidelities)]
+                fsum=sum(exped_f)
+                for i in range(num_of_fidelities):
+                    weighted_fidelity_weight[i]=exped_f[i]/fsum
+        fidelity_weight,weighted_fidelity_weight=[],[]
+        if args.dataset in ['south',"north","flu"]:
+            for i in range(num_of_fidelities):
+                fidelity_weight+=[torch.tensor(1.0/num_of_fidelities,dtype=torch.float32).requires_grad_()]
+                weighted_fidelity_weight+=[0]
+        elif args.dataset in ["syn"]:
+            fidelity_weight=[torch.tensor(1,dtype=torch.float32).requires_grad_(),torch.tensor(0.0,dtype=torch.float32).requires_grad_()]
+            for i in range(num_of_fidelities):
+                # fidelity_weight+=[torch.tensor(1.0/num_of_fidelities,dtype=torch.float32).requires_grad_()]
+                weighted_fidelity_weight+=[0]
+        reweight_fidelity()
+        if args.dataset in ['south',"north"]:
+            x_in=30
+        elif args.dataset in ['flu']:
+            x_in=0
+        elif args.dataset=='syn':
+            x_in=1
+        else:
+            raise Exception('Dataset not recognized.')
+    if args.model=="GFusion":
+        GFusion_model=GFusion(h_channel=args.h_ch,input_featuresize=x_in,\
+                            localdepth=args.localdepth,num_interactions=args.num_interactions,finaldepth=args.finaldepth,share=args.share)
+        GFusion_model.to(device)
+        optimizer = torch.optim.Adam( list(GFusion_model.parameters()), lr=args.lr)
+        if args.fidelity_train:
+            optimizer2 = torch.optim.Adam(fidelity_weight, lr=optimizer.param_groups[0]['lr']*10)
+            scheduler2 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer2, factor=0.1, patience=args.patience, min_lr=1e-8)
+    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=args.patience, min_lr=1e-8)
+    def train(GFusion_model):
+        epochloss=0
+        y_hat, y_true,y_hat_logit = [], [], []
+        optimizer.zero_grad()
+        if args.fidelity_train: optimizer2.zero_grad()
+        if args.model=="GFusion":
+            GFusion_model.train()
+            for i,data in enumerate(train_Loader):
+                if num_of_fidelities==2:
+                    x1, pos1,edge_index1, batch1,target_index1,target1,is_source1 = data[0].x, data[0].pos,data[0].edge_index, data[0].batch,data[0].target_index,data[0].target,data[0].is_source
+                    x2, pos2,edge_index2, batch2,target_index2,target2,is_source2 = data[1].x, data[1].pos,data[1].edge_index, data[1].batch,data[1].target_index,data[1].target,data[1].is_source
+                    if args.dataset=='syn':
+                        x1[:,1]=x1[:,1]+x1[:,2]
+                        x1=x1[:,[0,1,3,4]]
+                        x2[:,1]=x2[:,1]+x2[:,2]
+                        x2=x2[:,[0,1,3,4]]
+                    x1,pos1,target1,x2,pos2,target2=x1.to(torch.float32),pos1.to(torch.float32),target1.to(torch.float32),x2.to(torch.float32),pos2.to(torch.float32),target2.to(torch.float32)
+                    x2[x2[:,0]>6666,0]=6666
+                    # edge_index,_=add_self_loops(edge_index,num_nodes=x.size(0))
+                    datasource=data[0].datasource
+                    Y = target1
+                    assert(torch.equal(target1,target2))
+                    Y[Y>6666]=6666
+                    x1, pos1,edge_index1, batch1, target_index1,is_source1 = x1.to(device),pos1.to(device), edge_index1.to(device), batch1.to(device),target_index1.to(device),is_source1.to(device)
+                    x2, pos2,edge_index2, batch2, target_index2,is_source2 = x2.to(device),pos2.to(device),edge_index2.to(device), batch2.to(device),target_index2.to(device),is_source2.to(device)
+                    """
+                    triplets are not the same for graphs when training
+                    """
+                    num_nodes1=x1.shape[0]
+                    num_nodes2=x2.shape[0]
+                    edge_index_2rd_1, _, _, edx_2nd_1 = triplets(edge_index1, num_nodes1)
+                    edge_index_2rd_2, _, _, edx_2nd_2 = triplets(edge_index2, num_nodes2)
+                    pm25_1,pm25_2=GFusion_model([pos1,pos2],[edge_index1,edge_index2],[edge_index_2rd_1,edge_index_2rd_2],\
+                                            [edx_2nd_1,edx_2nd_2],[batch1,batch2],[x1,x2],[is_source1,is_source2],args.edge_rep)
+                    pm25_1,pm25_2=pm25_1[target_index1],pm25_2[target_index2]
+                    if args.dataset=='syn':
+                        pred=((pm25_1*weighted_fidelity_weight[0]+pm25_2*weighted_fidelity_weight[1]).cpu())
+                    else:
+                        pred=F.relu((pm25_1*weighted_fidelity_weight[0]+pm25_2*weighted_fidelity_weight[1]).cpu())
+                    loss_weight= [weighted_fidelity_weight[i] for i in datasource]
+                    loss1 = criterion(pred.reshape(-1, 1), Y.reshape(-1, 1),loss_weight)
+                """
+                record predictions
+                """
+                y_hat += list(pred.detach().numpy().reshape(-1))
+                y_true += list(Y.detach().numpy().reshape(-1))
+                loss=loss1
+                loss.backward()
+                epochloss+=loss
+                optimizer.step()
+                optimizer.zero_grad()
+                if args.fidelity_train:
+                    optimizer2.step()
+                    optimizer2.zero_grad()
+                reweight_fidelity()
+        return epochloss.item()/len(train_Loader),y_hat, y_true
+    def test(loader,GFusion_model,fidelity_weight):
+        if not args.single_high:
+            weighted_fidelity_weight=[i.detach() for i in fidelity_weight]
+            exped_f=[torch.exp(fidelity_weight[i]) for i in range(num_of_fidelities)]
+            fsum=sum(exped_f)
+            for i in range(num_of_fidelities):
+                weighted_fidelity_weight[i]=exped_f[i]/fsum
+        else:
+            weighted_fidelity_weight=[1,0]
+        y_hat, y_true,y_hat_logit = [], [], []
+        loss_total, pred_num = 0, 0
+        GFusion_model.eval()
+        for i,data in enumerate(loader):
+            if num_of_fidelities==2:
+                x1, pos1,edge_index1, batch1,target_index1,target1,is_source1 = data[0].x, data[0].pos,data[0].edge_index, data[0].batch,data[0].target_index,data[0].target,data[0].is_source
+                x2, pos2,edge_index2, batch2,target_index2,target2,is_source2 = data[1].x, data[1].pos,data[1].edge_index, data[1].batch,data[1].target_index,data[1].target,data[1].is_source
+                if args.dataset=='syn':
+                    x1[:,1]=x1[:,1]+x1[:,2]
+                    x1=x1[:,[0,1,3,4]]
+                    x2[:,1]=x2[:,1]+x2[:,2]
+                    x2=x2[:,[0,1,3,4]]
+                x1,pos1,target1,x2,pos2,target2=x1.to(torch.float32),pos1.to(torch.float32),target1.to(torch.float32),x2.to(torch.float32),pos2.to(torch.float32),target2.to(torch.float32)
+                x2[x2[:,0]>6666,0]=6666
+                # edge_index,_=add_self_loops(edge_index,num_nodes=x.size(0))
+                datasource=data[0].datasource
+                Y = target1
+                assert(torch.equal(target1,target2))
+                Y[Y>6666]=6666
+                x1, pos1,edge_index1, batch1, target_index1,is_source1 = x1.to(device),pos1.to(device), edge_index1.to(device), batch1.to(device),target_index1.to(device),is_source1.to(device)
+                x2, pos2,edge_index2, batch2, target_index2,is_source2 = x2.to(device),pos2.to(device),edge_index2.to(device), batch2.to(device),target_index2.to(device),is_source2.to(device)
+                num_nodes1=x1.shape[0]
+                num_nodes2=x2.shape[0]
+                edge_index_2rd_1, num_2nd_neighbors_1, edx_1st_1, edx_2nd_1 = triplets(edge_index1, num_nodes1)
+                edge_index_2rd_2, num_2nd_neighbors_2, edx_1st_2, edx_2nd_2 = triplets(edge_index2, num_nodes2)
+                pm25_1,pm25_2=GFusion_model([pos1,pos2],[edge_index1,edge_index2],[edge_index_2rd_1,edge_index_2rd_2],\
+                                        [edx_2nd_1,edx_2nd_2],[batch1,batch2],[x1,x2],[is_source1,is_source2],args.edge_rep)
+                pm25_1,pm25_2=pm25_1[target_index1],pm25_2[target_index2]
+                with torch.no_grad():
+                    if args.dataset=='syn':
+                        pred=((pm25_1*weighted_fidelity_weight[0]+pm25_2*weighted_fidelity_weight[1]).cpu())
+                    else:
+                        pred=F.relu((pm25_1*weighted_fidelity_weight[0]+pm25_2*weighted_fidelity_weight[1]).cpu())
+                    assert(all(datasource==0))
+                    loss1 = criterion(pred.reshape(-1, 1), Y.reshape(-1, 1))*weighted_fidelity_weight[0]
+            """
+            record predictions
+            """
+            y_hat += list(pred.detach().numpy().reshape(-1))
+            y_true += list(Y.detach().numpy().reshape(-1))
+            pred_num += len(Y.reshape(-1, 1))
+            loss=loss1
+            loss_total += loss.detach() * len(Y.reshape(-1, 1))
+        return loss_total/pred_num, y_hat, y_true
+    if args.loadmodel:
+        try:
+            suffix='Oct31-11:50:30'
+            GFusion_model.load_state_dict(torch.load(os.path.join("save",args.dataset,'model','best_GFusion_model_'+suffix+'.pth')),strict=True)
+            best_GFusion_model = copy.deepcopy(GFusion_model)
+        except OSError:
+            pass
+    else:
+        best_val_trigger = 1e3
+        old_lr=1e3
+        suffix="{}{}-{}:{}:{}".format(datetime.now().strftime("%h"),
+                                        datetime.now().strftime("%d"),
+                                        datetime.now().strftime("%H"),
+                                        datetime.now().strftime("%M"),
+                                        datetime.now().strftime("%S"))
+        if args.log:
+            writer = SummaryWriter(os.path.join(tensorboard_dir,suffix))
+        for epoch in range(args.nepoch):
+            if args.model in ['GFusion']: train_loss,y_hat, y_true=train(GFusion_model)
+            if args.log:
+                writer.add_scalar('loss/Train', train_loss, epoch)
+            if args.dataset in ['south',"north",'syn','flu']:
+                train_mae=mean_absolute_error(y_true, y_hat)
+                train_rmse = np.sqrt(mean_squared_error(y_true, y_hat))
+                if args.log:
+                    writer.add_scalar('mae/Train', train_mae, epoch)
+                    writer.add_scalar('rmse/Train', train_rmse, epoch)
+                print(( f"epoch[{epoch:d}] train_loss : {train_loss:.3f} train_mae : {train_mae:.3f} train_rmse : {train_rmse:.3f}" ))
+                if args.model in ['GFusion']:
+                    if args.fidelity_train==True:
+                        print(f"fidelity weight: {fidelity_weight[0]:.3f}, {fidelity_weight[1]:.3f}")
+                    print(f"weighted_fidelity_weight: {weighted_fidelity_weight[0]:.3f}, {weighted_fidelity_weight[1]:.3f}")
+            if epoch % args.test_per_round == 0:
+                if args.model in ['GFusion']:
+                    val_loss, yhat_val, ytrue_val = test(val_Loader,GFusion_model,fidelity_weight)
+                    test_loss, yhat_test, ytrue_test = test(test_Loader,GFusion_model,fidelity_weight)
+                if args.log:
+                    writer.add_scalar('loss/val', val_loss, epoch)
+                    writer.add_scalar('loss/test', test_loss, epoch)
+                if args.dataset in ['south',"north",'syn','flu']:
+                    val_mae=mean_absolute_error(ytrue_val, yhat_val)
+                    val_rmse = np.sqrt(mean_squared_error(ytrue_val, yhat_val))
+                    if args.log:
+                        writer.add_scalar('mae/val', val_mae, epoch)
+                        writer.add_scalar('rmse/val', val_rmse, epoch)
+                    print(blue( f"epoch[{epoch:d}] val_mae : {val_mae:.3f} val_rmse : {val_rmse:.3f}" ))
+                    test_mae = mean_absolute_error(ytrue_test, yhat_test)
+                    test_rmse = np.sqrt(mean_squared_error(ytrue_test, yhat_test))
+                    test_var=explained_variance_score(ytrue_test,yhat_test)
+                    test_coefOfDetermination=r2_score(ytrue_test,yhat_test)
+                    test_Pearsonr=measure_Pearsonr(np.array(yhat_test).reshape(-1, 1),np.array(ytrue_test).reshape(-1))[0]
+                    if args.log:
+                        writer.add_scalar('mae/test', test_mae, epoch)
+                        writer.add_scalar('rmse/test', test_rmse, epoch)
+                    print(blue( f"epoch[{epoch:d}] test_mae: {test_mae:.3f} test_rmse: {test_rmse:.3f} test_Pearsonr: {test_Pearsonr:.3f} test_coefOfDetermination: {test_coefOfDetermination:.3f}" ))
+                    if args.model in ['GFusion']:
+                        if args.fidelity_train==True:
+                            print(f"fidelity weight: {fidelity_weight[0]:.3f}, {fidelity_weight[1]:.3f}")
+                        print(f"weighted_fidelity_weight: {weighted_fidelity_weight[0]:.3f}, {weighted_fidelity_weight[1]:.3f}")
+                    val_trigger=val_mae
+                if val_trigger < best_val_trigger:
+                    best_val_trigger = val_trigger
+                    best_GFusion_model = copy.deepcopy(GFusion_model)
+                    best_fidelity=copy.deepcopy(fidelity_weight)
+                    best_info=[epoch,val_trigger]
+            """
+            update lr when epoch≥30
+            """
+            if epoch >= 30:
+                lr = scheduler.optimizer.param_groups[0]['lr']
+                if old_lr!=lr:
+                    print(red('lr'), epoch, (lr), sep=', ')
+                    old_lr=lr
+                scheduler.step(val_trigger)
+                if args.fidelity_train:
+                    scheduler2.step(val_trigger)
+    val_loss, yhat_val, ytrue_val = test(val_Loader,best_GFusion_model,best_fidelity)
+    test_loss, yhat_test, ytrue_test = test(test_Loader,best_GFusion_model,best_fidelity)
+    if args.dataset in ['south',"north",'syn','flu']:
+        val_mae = mean_absolute_error(ytrue_val, yhat_val)
+        val_rmse=np.sqrt(mean_squared_error(ytrue_val,yhat_val))
+        val_var=explained_variance_score(ytrue_val,yhat_val)
+        print(blue( f"best_val  val_mae: {val_mae:.3f} val_rmse: {val_rmse:.3f} val_var: {val_var:.3f}" ))
+        test_mae=mean_absolute_error(ytrue_test,yhat_test)
+        test_rmse=np.sqrt(mean_squared_error(ytrue_test,yhat_test))
+        test_var=explained_variance_score(ytrue_test,yhat_test)
+        test_coefOfDetermination=r2_score(ytrue_test,yhat_test)
+        test_Pearsonr=measure_Pearsonr(np.array(yhat_test).reshape(-1, 1),np.array(ytrue_test).reshape(-1))[0]
+        print(blue( f"best_test test_mae: {test_mae:.3f} test_rmse: {test_rmse:.3f} test_var: {test_var:.3f}" ))
+    if not args.loadmodel:
+        """
+        save training info and best result
+        """
+        result_file=os.path.join(info_dir, suffix)
+        with open(result_file, 'w') as f:
+            print(args.num_neighbors,args.nepoch,sep=' ',file=f)
+            print(f"fidelity weight: {best_fidelity[0]:.3f}, {best_fidelity[1]:.3f}",file=f)
+            print("Random Seed: ", Seed,file=f)
+            if args.dataset in ['south',"north",'syn','flu']:
+                print(f"MAE  val : {val_mae:.3f}, Test : {test_mae:.3f}", file=f)
+                print(f"rmse val : {val_rmse:.3f}, Test : {test_rmse:.3f}", file=f)
+                print(f"var  val : {val_var:.3f}, Test : {test_var:.3f}", file=f)
+                print(f"test_coefOfDetermination: {test_coefOfDetermination:.3f}, test_Pearsonr : {test_Pearsonr:.3f}", file=f)
+            print(f"Best info: {best_info}", file=f)
+            for i in [[a,getattr(args, a)] for a in args.__dict__]:
+                print(i,sep='\n',file=f)
+        with open(os.path.join(model_dir,'best_f_weight'+"_"+suffix+".pkl"), 'wb') as handle:
+            pickle.dump(fidelity_weight, handle)
+        torch.save(best_GFusion_model.state_dict(), os.path.join(model_dir,'best_GFusion_model'+"_"+suffix+'.pth') )
+    print("done")
+if __name__ == '__main__':
+    args = get_args()
+    if not os.path.exists(args.save_dir):
+        os.makedirs(args.save_dir,exist_ok=True)
+    tensorboard_dir=os.path.join(args.save_dir,'log')
+    if not os.path.exists(tensorboard_dir):
+        os.makedirs(tensorboard_dir,exist_ok=True)
+    model_dir=os.path.join(args.save_dir,'model')
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir,exist_ok=True)
+    info_dir=os.path.join(args.save_dir,'info')
+    if not os.path.exists(info_dir):
+        os.makedirs(info_dir,exist_ok=True)
+    Seed = args.man_seed if args.manualSeed else random.randint(1, 10000)
+    print("Random Seed: ", Seed)
+    random.seed(Seed)
+    torch.manual_seed(Seed)
+    np.random.seed(Seed)
+    flag=0
+    if args.dataset in ['south',"north",'syn',"flu"]:
+        graphs1,graphs2=dataset.load_point(args.dataset,args.num_neighbors,[False,200,500])
+        np.random.shuffle(graphs1)
+        val_test_split = int(np.around( 2 / 10 * len(graphs1) ))
+        train_val_split = int(len(graphs1)-2*val_test_split)
+        if args.single_high:
+            train_graphs = graphs1[:train_val_split]
+        else:
+            train_graphs = graphs1[:train_val_split]+graphs2
+        val_graphs = graphs1[train_val_split:train_val_split+val_test_split]
+        test_graphs = graphs1[train_val_split+val_test_split:]
+        np.random.shuffle(train_graphs)
+        train_Loader=DataLoader(train_graphs, batch_size=args.batchSize)
+        val_Loader=DataLoader(val_graphs, batch_size=args.batchSize)
+        test_Loader=DataLoader(test_graphs, batch_size=args.batchSize)
+        print(f"train_pair_num: {len(train_graphs)}, val_pair_num: {len(val_graphs)}, test_pair_num: {len(test_graphs)}")
+    else:
+        raise Exception('Dataset not recognized.')
+    main(args,train_Loader,val_Loader,test_Loader)

utils/utils.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import numpy as np
+import networkx as nx
+from networkx.utils import UnionFind
+from typing import Optional
+import torch
+from torch import Tensor
+from torch_sparse import SparseTensor
+from scipy.sparse import csr_matrix
+from math import pi as PI
+import torch.nn.functional as F
+def unique(sequence):
+    seen = set()
+    return [x for x in sequence if not (x in seen or seen.add(x))]
+def pos2key(pos):
+    pos=pos.reshape(-1)
+    key="{:08.4f}".format(pos[0])+'_'+"{:08.4f}".format(pos[1])
+    return key
+def get_angle(v1: Tensor, v2: Tensor):
+    if v1.shape[1]==2:
+        v1=F.pad(v1, (0, 1))
+    if v2.shape[1]==2:
+        v2= F.pad(v2, (0, 1))
+    return torch.atan2(
+        torch.cross(v1, v2, dim=1).norm(p=2, dim=1), (v1 * v2).sum(dim=1))
+class GaussianSmearing(torch.nn.Module):
+    def __init__(self, start=-PI, stop=PI, num_gaussians=12):
+        super(GaussianSmearing, self).__init__()
+        offset = torch.linspace(start, stop, num_gaussians)
+        self.coeff = -0.5 / (offset[1] - offset[0]).item() ** 2
+        self.register_buffer("offset", offset)
+    def forward(self, dist):
+        dist = dist.view(-1, 1) - self.offset.view(1, -1)
+        return torch.exp(self.coeff * torch.pow(dist, 2))
+def triplets(edge_index, num_nodes):
+    row, col = edge_index
+    value = torch.arange(row.size(0), device=row.device)
+    adj_t = SparseTensor(row=row, col=col, value=value,
+                         sparse_sizes=(num_nodes, num_nodes))
+    adj_t_row = adj_t[col]
+    num_triplets = adj_t_row.set_value(None).sum(dim=1).to(torch.long)
+    idx_i = row.repeat_interleave(num_triplets)
+    idx_j = col.repeat_interleave(num_triplets)
+    edx_1st = value.repeat_interleave(num_triplets)
+    idx_k = adj_t_row.storage.col()
+    edx_2nd = adj_t_row.storage.value()
+    mask1 = (idx_i == idx_k) & (idx_j != idx_i)
+    mask2 = (idx_i == idx_j) & (idx_j != idx_k)
+    mask3 = (idx_j == idx_k) & (idx_i != idx_k)
+    mask = ~(mask1 | mask2 | mask3)
+    idx_i, idx_j, idx_k, edx_1st, edx_2nd = idx_i[mask], idx_j[mask], idx_k[mask], edx_1st[mask], edx_2nd[mask]
+    num_triplets_real = torch.cumsum(num_triplets, dim=0) - torch.cumsum(~mask, dim=0)[torch.cumsum(num_triplets, dim=0)-1]
+    return torch.stack([idx_i, idx_j, idx_k]), num_triplets_real.to(torch.long), edx_1st, edx_2nd
+if __name__ == '__main__':
+    1