|
import torch.utils.data as data |
|
import os |
|
import os.path |
|
import torch |
|
import numpy as np |
|
import pandas as pd |
|
import sys |
|
import pickle |
|
import time |
|
import torchvision.datasets as datasets |
|
import torchvision.transforms as transforms |
|
from PIL import Image |
|
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union |
|
from torchvision.datasets import VisionDataset |
|
from torch.utils.data import Dataset |
|
from datetime import date, timedelta,datetime |
|
import random |
|
import pickle as pkl |
|
import string |
|
|
|
valid_chars = 'EFHILOTUYZ' |
|
|
|
alphabetic_labels = [char1 + char2 for char1 in valid_chars for char2 in valid_chars] |
|
alphabetic_labels.sort() |
|
label_mapping = {label: idx for idx, label in enumerate(alphabetic_labels)} |
|
reverse_label_mapping = {v: k for k, v in label_mapping.items()} |
|
|
|
single_alphabetic_labels=[char1 for char1 in valid_chars] |
|
single_alphabetic_labels.sort() |
|
single_label_mapping = {label: idx for idx, label in enumerate(single_alphabetic_labels)} |
|
single_reverse_label_mapping = {v: k for k, v in single_label_mapping.items()} |
|
|
|
def get_mnist_dataset(data_dir='data/multi_mnist.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
for entry in dataset: |
|
entry.y -= 10 |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def get_building_dataset(data_dir='data/building_with_index.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
for entry in dataset: |
|
entry.y = label_mapping[entry.y] |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def get_mbuilding_dataset(data_dir='data/mp_building.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
for entry in dataset: |
|
entry.y = label_mapping[entry.y] |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def get_sbuilding_dataset(data_dir='data/single_building.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
for entry in dataset: |
|
entry.y = single_label_mapping[entry.y] |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def get_smnist_dataset(data_dir='data/single_mnist.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def get_dbp_dataset(data_dir='data/triple_building.pkl',Seed=0,test_ratio=0.2): |
|
|
|
random.seed(Seed) |
|
torch.manual_seed(Seed) |
|
np.random.seed(Seed) |
|
|
|
with open(data_dir, 'rb') as f: |
|
dataset = pkl.load(f) |
|
for entry in dataset: |
|
entry.y = 1 if entry.y>=1 else 0 |
|
|
|
np.random.shuffle(dataset) |
|
val_test_split = int(np.around( test_ratio * len(dataset) )) |
|
train_val_split = int(len(dataset)-2*val_test_split) |
|
train_ds = dataset[:train_val_split] |
|
val_ds = dataset[train_val_split:train_val_split+val_test_split] |
|
test_ds = dataset[train_val_split+val_test_split:] |
|
|
|
print(data_dir) |
|
print('Train: ' +str(len(train_ds))) |
|
print('Val : ' +str(len(val_ds))) |
|
print('Test : ' +str(len(test_ds))) |
|
|
|
return train_ds,val_ds,test_ds |
|
|
|
def affine_transform_to_range(ds, target_range=(-1, 1)): |
|
|
|
for item in ds: |
|
min_x = torch.min(item.pos[:,0]) |
|
min_y = torch.min(item.pos[:,1]) |
|
|
|
max_x = torch.max(item.pos[:,0]) |
|
max_y = torch.max(item.pos[:,1]) |
|
|
|
scale_x = (target_range[1] - target_range[0]) / (max_x - min_x) |
|
scale_y = (target_range[1] - target_range[0]) / (max_y - min_y) |
|
translate_x = target_range[0] - min_x * scale_x |
|
translate_y = target_range[0] - min_y * scale_y |
|
|
|
|
|
item.pos[:,0] = item.pos[:,0] * scale_x + translate_x |
|
item.pos[:,1] = item.pos[:,1] * scale_y + translate_y |
|
return ds |
|
|
|
class CustomDataset(Dataset): |
|
def __init__(self, data_list): |
|
super(CustomDataset, self).__init__() |
|
self.data_list = data_list |
|
|
|
def len(self): |
|
return len(self.data_list) |
|
|
|
def get(self, idx): |
|
return self.data_list[idx] |
|
|
|
if __name__ == '__main__': |
|
a,b,c=get_mnist_dataset() |
|
print("") |