File size: 12,898 Bytes

a00ee36

# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#
# All contributions by Andy Brock:
# Copyright (c) 2019 Andy Brock
#
# MIT License
""" Convert dataset to HDF5
    This script preprocesses a dataset and saves it (images and labels) to 
    an HDF5 file for improved I/O. """
import os
import sys
from argparse import ArgumentParser
from tqdm import tqdm, trange
import h5py as h5

import numpy as np
import torch

import utils
import torchvision.transforms.functional as tv_f


def prepare_parser():
    usage = "Parser for ImageNet HDF5 scripts."
    parser = ArgumentParser(description=usage)
    parser.add_argument(
        "--resolution",
        type=int,
        default=128,
        help="Which Dataset resolution to train on, out of 64, 128, 256 (default: %(default)s)",
    )
    parser.add_argument(
        "--split",
        type=str,
        default="train",
        help="Which Dataset to convert: train, val (default: %(default)s)",
    )
    parser.add_argument(
        "--data_root",
        type=str,
        default="data",
        help="Default location where data is stored (default: %(default)s)",
    )
    parser.add_argument(
        "--out_path",
        type=str,
        default="data",
        help="Default location where data in hdf5 format will be stored (default: %(default)s)",
    )
    parser.add_argument(
        "--pretrained_model_path",
        type=str,
        default="",
        help="Location where the pretrained model (to extract features) can be found (default: %(default)s)",
    )
    parser.add_argument(
        "--save_features_only",
        action="store_true",
        default=False,
        help="Only save features in hdf5 file.",
    )
    parser.add_argument(
        "--save_images_only",
        action="store_true",
        default=False,
        help="Only save images and their labels in hdf5 file.",
    )
    parser.add_argument(
        "--feature_augmentation",
        action="store_true",
        default=False,
        help="Additioally store instance features with horizontally flipped input images.",
    )
    parser.add_argument(
        "--feature_extractor",
        type=str,
        default="classification",
        choices=["classification", "selfsupervised"],
        help="Choice of feature extractor",
    )
    parser.add_argument(
        "--backbone_feature_extractor",
        type=str,
        default="resnet50",
        choices=["resnet50"],
        help="Choice of feature extractor backbone",
    )
    parser.add_argument(
        "--which_dataset", type=str, default="imagenet", help="Dataset choice."
    )
    parser.add_argument(
        "--instance_json",
        type=str,
        default="",
        help="Path to JSON containing instance segmentations for COCO_Stuff",
    )
    parser.add_argument(
        "--stuff_json",
        type=str,
        default="",
        help="Path to JSON containing instance segmentations for COCO_Stuff",
    )
    parser.add_argument(
        "--batch_size",
        type=int,
        default=256,
        help="Default overall batchsize (default: %(default)s)",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=16,
        help="Number of dataloader workers (default: %(default)s)",
    )
    parser.add_argument(
        "--chunk_size",
        type=int,
        default=500,
        help="Default overall batchsize (default: %(default)s)",
    )
    parser.add_argument(
        "--compression",
        action="store_true",
        default=False,
        help="Use LZF compression? (default: %(default)s)",
    )
    return parser


def run(config):
    # Load pretrained feature extractor
    net = utils.load_pretrained_feature_extractor(
        config["pretrained_model_path"],
        config["feature_extractor"],
        config["backbone_feature_extractor"],
    )
    net.eval()

    # Update compression entry
    config["compression"] = (
        "lzf" if config["compression"] else None
    )  # No compression; can also use 'lzf'

    # Get dataset
    kwargs = {
        "num_workers": config["num_workers"],
        "pin_memory": False,
        "drop_last": False,
    }
    test_part = False
    if config["split"] == "test":
        config["split"] = "val"
        test_part = True
    if config["which_dataset"] in ["imagenet", "imagenet_lt"]:
        data_path = os.path.join(config["data_root"], config["split"])
    else:
        data_path = config["data_root"]
    dataset = utils.get_dataset_images(
        config["resolution"],
        data_path=data_path,
        longtail=config["which_dataset"] == "imagenet_lt",
        split=config["split"],
        test_part=test_part,
        which_dataset=config["which_dataset"],
        instance_json=config["instance_json"],
        stuff_json=config["stuff_json"],
    )
    train_loader = utils.get_dataloader(
        dataset, config["batch_size"], shuffle=False, **kwargs
    )

    # HDF5 supports chunking and compression. You may want to experiment
    # with different chunk sizes to see how it runs on your machines.
    # Chunk Size/compression     Read speed @ 256x256   Read speed @ 128x128  Filesize @ 128x128    Time to write @128x128
    # 1 / None                   20/s
    # 500 / None                 ramps up to 77/s       102/s                 61GB                  23min
    # 500 / LZF                                         8/s                   56GB                  23min
    # 1000 / None                78/s
    # 5000 / None                81/s
    # auto:(125,1,16,32) / None                         11/s                  61GB

    # Use imagenet statistics to preprocess images for the feature extractor (instance features)
    norm_mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).cuda()
    norm_std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).cuda()

    if config["which_dataset"] in ["imagenet", "imagenet_lt"]:
        dataset_name_prefix = "ILSVRC"
    elif config["which_dataset"] == "coco":
        dataset_name_prefix = "COCO"
    else:
        dataset_name_prefix = config["which_dataset"]

    if not config["save_features_only"]:
        h5file_name = config["out_path"] + "/%s%i%s%s%s_xy.hdf5" % (
            dataset_name_prefix,
            config["resolution"],
            "" if config["which_dataset"] != "imagenet_lt" else "longtail",
            "_val" if config["split"] == "val" else "",
            "_test" if test_part else "",
        )
        print("Filenames are ", h5file_name)

    if not config["save_images_only"]:
        h5file_name_feats = config["out_path"] + "/%s%i%s%s%s_feats_%s_%s.hdf5" % (
            dataset_name_prefix,
            config["resolution"],
            "" if config["which_dataset"] != "imagenet_lt" else "longtail",
            "_val" if config["split"] == "val" else "",
            "_test" if test_part else "",
            config["feature_extractor"],
            config["backbone_feature_extractor"],
        )
        print("Filenames are ", h5file_name_feats)

    print(
        "Starting to load dataset into an HDF5 file with chunk size %i and compression %s..."
        % (config["chunk_size"], config["compression"])
    )

    # Save original COCO image indexes in order for the evaluation set
    if config["which_dataset"] == "coco" and test_part:
        all_image_ids = []
    # Loop over loader
    for i, (x, y, image_id) in enumerate(tqdm(train_loader)):
        if config["which_dataset"] == "coco" and test_part:
            all_image_ids.append(image_id)
        if not config["save_images_only"]:
            with torch.no_grad():
                x_tf = x.cuda()
                x_tf = x_tf * 0.5 + 0.5
                x_tf = (x_tf - norm_mean) / norm_std
                x_tf = torch.nn.functional.upsample(x_tf, 224, mode="bicubic")

                x_feat, _ = net(x_tf)
                x_feat = x_feat.cpu().numpy()
                if config["feature_augmentation"]:
                    x_tf_hflip = tv_f.hflip(x_tf)
                    x_feat_hflip, _ = net(x_tf_hflip)
                    x_feat_hflip = x_feat_hflip.cpu().numpy()
                else:
                    x_feat_hflip = None
        else:
            x_feat, x_feat_hflip = None, None
        # Stick X into the range [0, 255] since it's coming from the train loader
        x = (255 * ((x + 1) / 2.0)).byte().numpy()
        # Numpyify y
        y = y.numpy()
        # If we're on the first batch, prepare the hdf5
        if i == 0:
            # Save images and labels in hdf5 file
            if not config["save_features_only"]:
                with h5.File(h5file_name, "w") as f:
                    print("Producing dataset of len %d" % len(train_loader.dataset))
                    imgs_dset = f.create_dataset(
                        "imgs",
                        x.shape,
                        dtype="uint8",
                        maxshape=(
                            len(train_loader.dataset),
                            3,
                            config["resolution"],
                            config["resolution"],
                        ),
                        chunks=(
                            config["chunk_size"],
                            3,
                            config["resolution"],
                            config["resolution"],
                        ),
                        compression=config["compression"],
                    )
                    print("Image chunks chosen as " + str(imgs_dset.chunks))
                    imgs_dset[...] = x
                    labels_dset = f.create_dataset(
                        "labels",
                        y.shape,
                        dtype="int64",
                        maxshape=(len(train_loader.dataset),),
                        chunks=(config["chunk_size"],),
                        compression=config["compression"],
                    )
                    print("Label chunks chosen as " + str(labels_dset.chunks))
                    labels_dset[...] = y

            # Save features in hdf5 file
            if not config["save_images_only"]:
                with h5.File(h5file_name_feats, "w") as f:
                    features_dset = f.create_dataset(
                        "feats",
                        x_feat.shape,
                        dtype="float",
                        maxshape=(len(train_loader.dataset), x_feat.shape[1]),
                        chunks=(config["chunk_size"], x_feat.shape[1]),
                        compression=config["compression"],
                    )
                    features_dset[...] = x_feat
                    if config["feature_augmentation"]:
                        features_dset_hflips = f.create_dataset(
                            "feats_hflip",
                            x_feat.shape,
                            dtype="float",
                            maxshape=(len(train_loader.dataset), x_feat.shape[1]),
                            chunks=(config["chunk_size"], x_feat.shape[1]),
                            compression=config["compression"],
                        )
                        features_dset_hflips[...] = x_feat_hflip

        # Else append to the hdf5
        else:
            if not config["save_features_only"]:
                with h5.File(h5file_name, "a") as f:
                    f["imgs"].resize(f["imgs"].shape[0] + x.shape[0], axis=0)
                    f["imgs"][-x.shape[0] :] = x
                    f["labels"].resize(f["labels"].shape[0] + y.shape[0], axis=0)
                    f["labels"][-y.shape[0] :] = y

            if not config["save_images_only"]:
                with h5.File(h5file_name_feats, "a") as f:
                    f["feats"].resize(f["feats"].shape[0] + x_feat.shape[0], axis=0)
                    f["feats"][-x_feat.shape[0] :] = x_feat
                if config["feature_augmentation"]:
                    with h5.File(h5file_name_feats, "a") as f:
                        f["feats_hflip"].resize(
                            f["feats_hflip"].shape[0] + x_feat_hflip.shape[0], axis=0
                        )
                        f["feats_hflip"][-x_feat_hflip.shape[0] :] = x_feat_hflip

    if config["which_dataset"] == "coco" and test_part:
        print(
            "Saved COCO index images for evaluation set (in order of appearance in the hdf5 file)"
        )
        np.save(
            os.path.join("coco_stuff_val_indexes", "cocostuff_val2_all_idxs"),
            np.concatenate(all_image_ids),
        )


def main():
    # parse command line and run
    parser = prepare_parser()
    config = vars(parser.parse_args())
    print(config)
    run(config)


if __name__ == "__main__":
    main()