ic_gan / data_utils /datasets_common.py
ArantxaCasanova
First model version
a00ee36
raw
history blame
No virus
32.3 kB
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
# All contributions by Andy Brock:
# Copyright (c) 2019 Andy Brock
#
# All contributions made by NAVER Corp.:
# Copyright (c) 2020-present NAVER Corp.
#
# MIT license
import sys
import os
import os.path
sys.path.insert(1, os.path.join(sys.path[0], ".."))
from data_utils import utils as data_utils
from PIL import Image
import numpy as np
from tqdm import tqdm
import random
import sklearn.metrics
import torch.utils.data as data
try:
import faiss
USE_FAISS = 1
except:
print('Faiss library not found!')
USE_FAISS = 0
import h5py as h5
import torch
IMG_EXTENSIONS = [".jpg", ".jpeg", ".png", ".ppm", ".bmp", ".pgm"]
def is_image_file(filename):
"""Checks if a file is an image.
Args:
filename (string): path to a file
Returns:
bool: True if the filename ends with a known image extension
"""
filename_lower = filename.lower()
return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)
def find_classes(dir):
classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx
def make_dataset(dir, class_to_idx):
images = []
dir = os.path.expanduser(dir)
for target in tqdm(sorted(os.listdir(dir))):
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
if is_image_file(fname):
path = os.path.join(root, fname)
item = (path, class_to_idx[target])
images.append(item)
return images
def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, "rb") as f:
img = Image.open(f)
return img.convert("RGB")
def accimage_loader(path):
import accimage
try:
return accimage.Image(path)
except IOError:
# Potentially a decoding problem, fall back to PIL.Image
return pil_loader(path)
def default_loader(path):
from torchvision import get_image_backend
if get_image_backend() == "accimage":
return accimage_loader(path)
else:
return pil_loader(path)
class ImageFolder(data.Dataset):
"""A generic data loader where the images are arranged in this way: ::
root/dogball/xxx.png
root/dogball/xxy.png
root/dogball/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
Parameters
----------
root: string. Root directory path.
transform: callable, optional. A function/transform that takes in an PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
target_transform: callable, optional. A function/transform that takes in the
target and transforms it.
loader: callable, optional. A function to load an image given its path.
Attributes
----------
classes: list. List of the class names.
class_to_idx: dict. Dict with items (class_name, class_index).
imgs: list. List of (image path, class_index) tuples
"""
def __init__(
self,
root,
transform=None,
target_transform=None,
loader=default_loader,
load_in_mem=False,
index_filename="imagenet_imgs.npz",
longtail=False,
subsampled=False,
split="train",
**kwargs
):
classes, class_to_idx = find_classes(root)
# Load pre-computed image directory walk
if False: # os.path.exists(os.path.join(index_filename)):
print("Loading pre-saved Index file %s..." % index_filename)
imgs = np.load(os.path.join(index_filename))["imgs"]
# If first time, walk the folder directory and save the
# results to a pre-computed file.
else:
print("Generating Index file %s..." % index_filename)
if not longtail:
imgs = make_dataset(root, class_to_idx)
if subsampled:
# Same number of samples as in ImageNet-LT
imgs = random.sample(imgs, 115846)
else:
imgs = []
print("Using long-tail version of the dataset with split ", split, "!")
with open(
"BigGAN_PyTorch/imagenet_lt/ImageNet_LT_" + split + ".txt"
) as f:
for line in f:
imgs.append(
(
os.path.join(
root, "/".join(line.split()[0].split("/")[1:])
),
int(line.split()[1]),
)
)
np.savez_compressed(os.path.join(index_filename), **{"imgs": imgs})
if len(imgs) == 0:
raise (
RuntimeError(
"Found 0 images in subfolders of: " + root + "\n"
"Supported image extensions are: " + ",".join(IMG_EXTENSIONS)
)
)
self.root = root
self.imgs = imgs
self.classes = classes
self.class_to_idx = class_to_idx
self.transform = transform
self.target_transform = target_transform
self.loader = loader
self.load_in_mem = load_in_mem
if self.load_in_mem:
print("Loading all images into memory...")
self.data, self.labels = [], []
for index in tqdm(range(len(self.imgs))):
path, target = imgs[index][0], imgs[index][1]
self.data.append(self.transform(self.loader(path)))
self.labels.append(target)
def __getitem__(self, index):
"""
Parameters
----------
index: int. Index
Returns
-------
tuple: (image, target) where target is class_index of the target class.
"""
if self.load_in_mem:
img = self.data[index]
target = self.labels[index]
else:
path, target = self.imgs[index]
img = self.loader(str(path))
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
return img, int(target), index
def __len__(self):
return len(self.imgs)
def __repr__(self):
fmt_str = "Dataset " + self.__class__.__name__ + "\n"
fmt_str += " Number of datapoints: {}\n".format(self.__len__())
fmt_str += " Root Location: {}\n".format(self.root)
tmp = " Transforms (if any): "
fmt_str += "{0}{1}\n".format(
tmp, self.transform.__repr__().replace("\n", "\n" + " " * len(tmp))
)
tmp = " Target Transforms (if any): "
fmt_str += "{0}{1}".format(
tmp, self.target_transform.__repr__().replace("\n", "\n" + " " * len(tmp))
)
return fmt_str
class ILSVRC_HDF5_feats(data.Dataset):
""" ILSVRC_HDF5_feats: A dataset to support I/O from an HDF5.
Parameters
----------
root :str
Path to the hdf5 file containing images and labels.
root_feats: str, optional
Path to the hdf5 file containing the instance features.
root_nns: str, optional
Path to the hdf5 file containing the list of nearest neighbors for each instance.
transform : callable, optional
A function/transform that takes in an PIL image and returns a transformed version.
E.g, ``transforms.RandomCrop``
target_transform: callable, optional
A function/transform that takes in the target and transforms it.
load_labels: bool, optional
Return labels for each example.
load_features: bool, optional
Return instance features and its neighbors (needed for IC-GAN).
load_in_mem_images: bool, optional
Load all images in memory.
load_in_mem_labels: bool, optional
Load all labels in memory.
load_in_mem_feats: bool, optional
Load all instance features in memory.
k_nn: int, optional
Size of the neighborhood obtained with the k-NN algorithm.
which_nn_balance: str, optional
Whether to sample an instance or a neighbor class first. By default,
``instance_balance`` is used.
Using ``nnclass_balance`` allows class balancing to be applied.
kmeans_file: str, optional
Path to a file where only the dataset indexes selected with k-means are stored.
It reduces the amount of available data to train or test the model.
n_subsampled_data: int, optional
If other than -1, that number of data points are randomly selected from the dataset.
It reduces the amount of available data to train or test the model.
filter_hd: int, optional
Only used for COCO-Stuff dataset. If -1, all COCO-Stuff evaluation set is used.
If 0, only images with seen class combinations are used.
If 1, only images with unseen class combinations are used.
label_dim: int, optional
Dimensionality of label embeddings. Useful for the StyleGAN2 backbone code.
feature_dim: int, optional
Dimensionality of instance features embeddings. Useful for the StyleGAN2 backbone
code.
feature_augmentation: bool, optional
Use the instance features of the flipped ground-truth image instances as
conditioning, with a 50% probability.
gpu_knn: bool, optional
Accelerate k-NN faiss computation with GPUs.
apply_norm: bool, optional
Normalize images between [-0.5, 0.5].
label_onehot: bool, optional
Return labels as a one hot encoding. Useful for StyleGAN2 backbone code.
Attributes
---------
root: str
Path to the hdf5 file containing images and labels.
root_feats: str
Path to the hdf5 file containing the instance features.
root_nns: str
Path to the hdf5 file containing the list of nearest neighbors for each
instance.
transform : callable
A function/transform that takes in an PIL image and returns a transformed version.
E.g, ``transforms.RandomCrop``
target_transform: callable
A function/transform that takes in the target and transforms it.
load_labels: bool
Return labels for each example.
load_features: bool
Return instance features and its neighbors (needed for IC-GAN).
load_in_mem_images: bool
Load all images in memory.
load_in_mem_labels: bool
Load all labels in memory.
load_in_mem_feats: bool
Load all instance features in memory.
feature_augmentation: bool
Use the instance features of the flipped ground-truth image instances as conditioning,
with a 50% probability.
which_nn_balance: str
Whether to sample an instance or a neighbor class first. By default,
``instance_balance`` is used. Using ``nnclass_balance`` allows class balancing to be
applied.
apply_norm: bool
Normalize images between [-0.5, 0.5].
label_onehot: bool
Return labels as a one hot encoding. Useful for StyleGAN2 backbone code.
num_imgs: int.
Number of data points in the dataset.
data: NumPy array
Image data, with the shape (num_imgs, w, h, 3), where w: width and h: height.
labels: NumPy array
Label data, with the shape (num_imgs, 1).
feats: NumPy array
Instance features data, with the shape (num_imgs, 2048).
sample_nns: list
List with length ``num_imgs``, that contains a list of the ``k_nn`` neighbor indexes
for each instance.
sample_nn_radius: NumPy array
Array of size (num_imgs) that stores the distance between each instance and its
farthest(k-th) neighbor.
possible_sampling_idxs: list
List of all effective possible data samples. By default, it is a range(0, num_imgs).
kmeans_samples: list
List of indexes for samples selected with k-means algorithm.
kth_values: NumPy array
Distances between instances and its k-th neighbor.
"""
def __init__(
self,
root,
root_feats=None,
root_nns=None,
transform=None,
target_transform=None,
load_labels=True,
load_features=True,
load_in_mem_images=False,
load_in_mem_labels=False,
load_in_mem_feats=False,
k_nn=4,
which_nn_balance="instance_balance",
kmeans_file=None,
n_subsampled_data=-1,
filter_hd=-1,
label_dim=0,
feature_dim=2048,
feature_augmentation=False,
gpu_knn=True,
apply_norm=True,
label_onehot=False,
**kwargs
):
self.root = root
self.root_feats = root_feats
self.root_nns = root_nns
self.load_labels = load_labels
self.load_features = load_features
self._label_dim = label_dim
self._feature_dim = feature_dim
self.label_onehot = label_onehot
self.feature_augmentation = feature_augmentation
# Set the transform here
self.transform = transform
self.target_transform = target_transform
# Normalization of images between -0.5 and 0.5 used in BigGAN
self.apply_norm = apply_norm
# load the entire dataset into memory?
self.load_in_mem_images = load_in_mem_images
self.load_in_mem_labels = load_in_mem_labels
self.load_in_mem_feats = load_in_mem_feats
self.which_nn_balance = which_nn_balance
self.num_imgs = len(h5.File(root, "r")["labels"])
self.labels, self.feats = None, None
self.kth_values = None
# If loading into memory, do so now
print(
"Load in mem? Images: %r, Labels: %r, Features: %r."
% (self.load_in_mem_images, self.load_in_mem_labels, self.load_in_mem_feats)
)
if self.load_in_mem_images:
print("Loading images from %s into memory..." % root)
with h5.File(root, "r") as f:
self.data = f["imgs"][:]
if load_labels and self.load_in_mem_labels:
print("Loading labels from %s into memory..." % root)
with h5.File(root, "r") as f:
self.labels = f["labels"][:]
if load_features and self.load_in_mem_feats:
print("Loading features from %s into memory..." % root_feats)
with h5.File(root_feats, "r") as f:
self.feats = f["feats"][:]
# Normalizing features
print("Normalizing features by their norm")
self.feats /= np.linalg.norm(self.feats, axis=1, keepdims=True)
self.feats = torch.from_numpy(self.feats)
self.feats.share_memory_()
if load_features:
if root_nns is None and self.load_in_mem_feats:
# We compute NNs only if we are loading features and there is no root_nns file.
self._obtain_nns(k_nn, gpu=gpu_knn, faiss_lib=USE_FAISS)
elif root_nns is not None:
# Still loading the NNs indexes!
print("Loading %s into memory..." % root_nns)
with h5.File(root_nns, "r") as f:
self.sample_nns = f["sample_nns"][:]
self.sample_nn_radius = f["sample_nns_radius"][:]
else:
raise ValueError(
"If no file with pre-computed neighborhoods is provided, "
"the features need to be loaded in memory to extract them."
" Set the load_in_mem_feats=True."
)
# Reducing the number of available samples according to different criteria
self.possible_sampling_idxs = range(self.num_imgs)
self.kmeans_samples = None
if kmeans_file is not None:
print("Loading file with just a few centroids (kmeans)... ", kmeans_file)
self.kmeans_samples = np.load(kmeans_file, allow_pickle=True).item()[
"center_examples"
][:, 0]
self.possible_sampling_idxs = self.kmeans_samples
elif n_subsampled_data > -1:
self.possible_sampling_idxs = np.random.choice(
np.array(self.possible_sampling_idxs),
int(n_subsampled_data),
replace=False,
)
elif filter_hd > -1:
# For COCO_Stuff, we can divide the evaluation set in seen class combinations
# (filter_hd=0)
# or unseen class combinations (filter_hd=1)
allowed_idxs = data_utils.filter_by_hd(filter_hd)
self.possible_sampling_idxs = allowed_idxs
# Change the size of the dataset if only a subset of the data is used
self.possible_sampling_idxs = np.array(self.possible_sampling_idxs)
self.num_imgs = len(self.possible_sampling_idxs)
print(
"All possible conditioning instances are ", len(self.possible_sampling_idxs)
)
def __getitem__(self, index):
"""
Parameters
----------
index: int
Returns
-------
If the dataset loads both features and labels, return 4 elements: neighbor image,
neighbor class label, instance features and instance radius
If the dataset loads only features (no labels), return 4 elements: neighbor image,
instance features, instance radius
If the dataset loads ony labels (no features), return 2 elements: neighbor image and
neighbor class label.
If the dataset does not load features nor labels, return only an image.
"""
# This only changes the index if possible_sampling_idx contains only a subset of the data
# (k-means/random sampling or evaluation sets in COCO-Stuff)
index = self.possible_sampling_idxs[index]
img = self._get_image(index)
target = self.get_label(index)
if self.load_features:
img_nn, label_nn, feats, radii = self._get_instance_features_and_nn(index)
img = img_nn
target = label_nn
else:
feats, radii = None, None
# Apply transform
img = torch.from_numpy(img)
if self.apply_norm:
img = ((img.float() / 255) - 0.5) * 2
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
target = self.target_transform(target)
if not self.label_onehot:
target = int(target)
if self.load_features and self.load_labels:
return img, target, feats, radii
elif self.load_features:
return img, feats, radii
elif self.load_labels:
return img, target
else:
return img
def sample_conditioning_instance_balance(self, batch_size, weights=None):
"""
It samples a batch size of conditionings.
First, by first sampling an instance, and then one of the neighbor's class.
Parameters
----------
batch_size: int
Number of conditioning to sample.
weights: NumPy array, optional
Array of size len(self.possible_sampling_idxs), indicating the weight for each instance,
used for sampling.
Returns
-------
labels_gen: torch.LongTensor
Tensor of shape (batch_size, label_dim). Batch of neighbor labels.
instance_gen: torch.LongTensor
Tensor of shape (batch_size, label_dim). Batch of instance features.
"""
# Control instance (center of k-NN) balancing with weights
# Sampling from p(h)
if weights is None:
# np.random.randint is a faster function than np.random.choice.
# If there is no sampling weights, use this one.
sel_idxs = np.random.randint(0, len(self.possible_sampling_idxs), size=batch_size)
sel_idxs = self.possible_sampling_idxs[sel_idxs]
else:
sel_idxs = np.random.choice(
self.possible_sampling_idxs, batch_size, replace=True, p=weights
)
# Features from center example
instance_gen = self.get_instance_features(sel_idxs)
# Get labels from neighbor
labels_gen = []
for idx_ in sel_idxs:
# Sampling neighbor from p(x_nn, y_nn| h)
chosen_idx = np.random.choice(self.sample_nns[idx_])
# Labels from neighbors
if self.load_labels:
labels_gen.append(self.get_label(chosen_idx)[np.newaxis, ...])
if self.load_labels:
labels_gen = np.concatenate(labels_gen, 0)
labels_gen = torch.LongTensor(labels_gen)
else:
labels_gen = None
instance_gen = torch.FloatTensor(instance_gen)
return labels_gen, instance_gen
def sample_conditioning_nnclass_balance(
self, batch_size, weights=None, num_classes=1000
):
"""
It samples a batch size of conditionings.
First, by sampling a class, then an image from this class, and finally an instance feature
that would have this image as a neighbor in feature space.
Parameters
----------
batch_size: int
Number of conditioning to sample.
weights: NumPy array, optional
Array of size num_classes, indicating the weight for each instance, used for sampling.
num_classes: int, optional
Number of classes in the dataset
Returns
-------
labels_gen: torch.LongTensor
Tensor of shape (batch_size, label_dim). Batch of neighbor labels.
instance_gen: torch.LongTensor
Tensor of shape (batch_size, label_dim). Batch of instance features.
"""
if weights is not None:
weights = np.array(weights) / sum(weights)
# Sampling from p(y)
chosen_class = np.random.choice(
range(num_classes), batch_size, replace=True, p=weights
)
nn_idxs = []
for lab_ in chosen_class:
# Sampling from p(x_nn|y)
chosen_xnn = np.random.choice((self.labels == lab_).nonzero()[0])
# Sampling from p(h| x_nn,y)
nn_idxs.append(np.random.choice(self.sample_nns[chosen_xnn]))
instance_gen = self.get_instance_features(nn_idxs)
instance_gen = torch.FloatTensor(instance_gen)
labels_gen = torch.LongTensor(chosen_class)
return labels_gen, instance_gen
def get_label(self, index):
"""Obtain a label as an int or as a one-hot vector."""
if not self.load_labels:
if self.label_onehot:
return np.zeros(self.label_dim, dtype=np.float32).copy()
else:
return 0
if self.load_labels:
if self.load_in_mem_labels:
target = self.labels[index]
else:
with h5.File(self.root, "r") as f:
target = f["labels"][index]
else:
target = None
if self.label_onehot:
onehot_vec = np.zeros(self.label_dim, dtype=np.float32)
onehot_vec[target] = 1
target = onehot_vec.copy()
return target
def get_instance_features(self, index):
"""Obtain an instance feature vector."""
if not self.load_features:
return np.zeros(self.feature_dim, dtype=np.float32).copy()
if self.load_in_mem_feats:
feat = self.feats[index].clone().float() # .astype('float')
else:
with h5.File(self.root_feats, "r") as f:
if isinstance(index, (int, np.int64)):
hflip = np.random.randint(2) == 1
if self.feature_augmentation and hflip:
feat = f["feats_hflip"][index].astype("float")
else:
feat = f["feats"][index].astype("float")
feat /= np.linalg.norm(feat, keepdims=True)
else:
feat = []
for sl_idx in index:
hflip = np.random.randint(2) == 1
if self.feature_augmentation and hflip:
feat.append(
f["feats_hflip"][sl_idx].astype("float")[
np.newaxis, ...
]
)
else:
feat.append(
f["feats"][sl_idx].astype("float")[np.newaxis, ...]
)
feat = np.concatenate(feat)
feat /= np.linalg.norm(feat, axis=1, keepdims=True)
return feat
@property
def resolution(self):
with h5.File(self.root, "r") as f:
sze = list(f["imgs"][0].shape)
return sze[1]
@property
def label_dim(self):
return self._label_dim
@property
def feature_dim(self):
return self._feature_dim
def _obtain_nns(self, k_nn=20, faiss_lib=True, feat_sz=2048, gpu=True):
"""
It obtains the neighborhoods for all instances using the k-NN algorithm.
Parameters
----------
k_nn: int, optional
Number of neighbors (k).
faiss_lib: bool, optional
If True, use the faiss library implementation of k-NN. If not, use the slower
implementation of sklearn.
feat_sz: int, optional
Feature dimensionality.
gpu: bool, optional
If True, leverage GPU resources to speed up computation with the faiss library.
"""
# K_nn computation takes into account the input sample as the first NN,
# so we add an extra NN to later remove the input sample.
k_nn += 1
self.sample_nns = [[] for _ in range(self.num_imgs)]
self.sample_nn_radius = np.zeros(self.num_imgs, dtype=float)
if faiss_lib:
cpu_index = faiss.IndexFlatL2(feat_sz)
if gpu:
gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) # build the index
index = gpu_index
else:
index = cpu_index
index.add(self.feats.float().numpy().astype("float32"))
kth_values, kth_values_arg = index.search(
self.feats.numpy().astype("float32"), k_nn
)
self.kth_values = np.sqrt(kth_values)
knn_radii = np.sqrt(kth_values[:, -1])
else:
dists = sklearn.metrics.pairwise_distances(
self.feats, self.feats, metric="euclidean", n_jobs=-1
)
print("Computed distances.")
knn_radii, kth_values_arg = self._get_kth_value_accurate(dists, k_nn)
for i_sample in range(self.num_imgs):
knns = kth_values_arg[i_sample]
# Discarding the input sample, also seen as the 0-NN.
knns = np.delete(knns, np.where(knns == i_sample)[0], 0)
self.sample_nns[i_sample] = knns.tolist()
self.sample_nn_radius[i_sample] = knn_radii[i_sample]
print("Computed NNs.")
@staticmethod
def _get_kth_value_accurate(distances, k, axis=-1):
""" Find k nearest neighbor
Parameters
---------
distances: NumPy array
Matrix of size (M, M) of unordered distances.
k: int
Neighborhood size
axis: int
Returns
-------
kth values: NumPy array
Distances of the k-th nearest neighbor along the designated axis.
indices: NumPy array
Array positions in the input matrix indicating all neighbors up until the k-th.
"""
indices = np.argpartition(distances, k - 1, axis=axis)[..., :k]
k_smallests = np.take_along_axis(distances, indices, axis=axis)
kth_values = k_smallests.max(axis=axis)
return kth_values, indices
def _get_image(self, index):
"""Obtain an image array."""
if self.load_in_mem_images:
img = self.data[index]
else:
with h5.File(self.root, "r") as f:
img = f["imgs"][index]
return img
def _get_instance_features_and_nn(self, index):
""" Builds a quadruplet of neighbor image, its label, conditioning instance features, radii.
Returns
----------
img_nn: NumPy array
Neighbor image.
label_nn: NumPy array
Neighbor label.
feats: NumPy array
Conditioning instance features.
radii: float
Distance between conditioning instance and farthest (k-th) neighbor.
"""
# Standard sampling: Obtain a feature vector for the input index,
# and image/class label for a neighbor.
if self.which_nn_balance == "instance_balance":
idx_h = index
# If we are only using a selected number of instances (kmeans), re-choose the index
if self.kmeans_samples is not None:
index = np.random.choice(self.kmeans_samples)
idx_nn = np.random.choice(self.sample_nns[index])
# Reverse sampling, used when we want to perform class balancing (long-tail setup).
# In class-conditional IC-GAN, the classes are taken from the neighbors.
# The reverse sampling allows us to control the class balancing by using extra weights
# in the DataLoader.
elif self.which_nn_balance == "nnclass_balance":
idx_h = np.random.choice(self.sample_nns[index])
idx_nn = index
# Index selects the instance feature vector
radii = self.sample_nn_radius[idx_h]
img_nn = self._get_image(idx_nn)
label_nn = self.get_label(idx_nn)
feats = self.get_instance_features(idx_h)
return img_nn, label_nn, feats, radii
def __len__(self):
return self.num_imgs