Spaces:
Runtime error
Runtime error
import copy | |
import shutil | |
import argparse | |
import json | |
import sys | |
import os | |
from tqdm import tqdm | |
sys.path.append('.') | |
from imaginaire.utils.lmdb import create_metadata, \ | |
construct_file_path, check_and_add, build_lmdb # noqa: E402 | |
from imaginaire.config import Config # noqa: E402 | |
def parse_args(): | |
r"""Parse user input arguments""" | |
parser = argparse.ArgumentParser(description='Folder -> LMDB conversion') | |
parser.add_argument('--data_root', type=str, required=True, | |
help='Input data location.') | |
parser.add_argument('--config', type=str, required=True, | |
help='Config with label info.') | |
parser.add_argument('--output_root', type=str, required=True, | |
help='Output LMDB location') | |
parser.add_argument('--input_list', type=str, default='', | |
help='list of images that will be used.') | |
parser.add_argument('--metadata_factor', type=float, default=0.75, | |
help='Factor of filesize to allocate for metadata?') | |
parser.add_argument('--overwrite', default=False, action='store_true', | |
help='Overwrite output file if exists') | |
parser.add_argument('--paired', default=False, action='store_true', | |
help='Is the input data paired?') | |
parser.add_argument('--large', default=False, action='store_true', | |
help='Is the dataset large?') | |
parser.add_argument('--remove_missing', default=False, action='store_true', | |
help='Remove missing files from paired datasets?') | |
args = parser.parse_args() | |
return args | |
def main(): | |
r""" Build lmdb for training/testing. | |
Usage: | |
python scripts/build_lmdb.py \ | |
--config configs/data_image.yaml \ | |
--data_root /mnt/bigdata01/datasets/test_image \ | |
--output_root /mnt/bigdata01/datasets/test_image/lmdb_0/ \ | |
--overwrite | |
""" | |
args = parse_args() | |
cfg = Config(args.config) | |
# Check if output file already exists. | |
if os.path.exists(args.output_root): | |
if args.overwrite: | |
print('Deleting existing output LMDB.') | |
shutil.rmtree(args.output_root) | |
else: | |
print('Output root LMDB already exists. Use --overwrite. ' + | |
'Exiting...') | |
return | |
all_filenames, extensions = \ | |
create_metadata(data_root=args.data_root, | |
cfg=cfg, | |
paired=args.paired, | |
input_list=args.input_list) | |
required_data_types = cfg.data.data_types | |
# Build LMDB. | |
os.makedirs(args.output_root) | |
for data_type in required_data_types: | |
data_size = 0 | |
print('Data type:', data_type) | |
filepaths, keys = [], [] | |
print('>> Building file list.') | |
# Get appropriate list of files. | |
if args.paired: | |
filenames = all_filenames | |
else: | |
filenames = all_filenames[data_type] | |
for sequence in tqdm(filenames): | |
for filename in copy.deepcopy(filenames[sequence]): | |
filepath = construct_file_path( | |
args.data_root, data_type, sequence, filename, | |
extensions[data_type]) | |
key = '%s/%s' % (sequence, filename) | |
filesize = check_and_add(filepath, key, filepaths, keys, | |
remove_missing=args.remove_missing) | |
# Remove file from list, if missing. | |
if filesize == -1 and args.paired and args.remove_missing: | |
print('Removing %s from list' % (filename)) | |
filenames[sequence].remove(filename) | |
data_size += filesize | |
# Remove empty sequences. | |
if args.paired and args.remove_missing: | |
for sequence in copy.deepcopy(all_filenames): | |
if not all_filenames[sequence]: | |
all_filenames.pop(sequence) | |
# Allocate size. | |
data_size = max(int((1 + args.metadata_factor) * data_size), 1e9) | |
print('Reserved size: %s, %dGB' % (data_type, data_size // 1e9)) | |
# Write LMDB to file. | |
output_filepath = os.path.join(args.output_root, data_type) | |
build_lmdb(filepaths, keys, output_filepath, data_size, args.large) | |
# Output list of all filenames. | |
if args.output_root: | |
with open(args.output_root + '/all_filenames.json', 'w') as fout: | |
json.dump(all_filenames, fout, indent=4) | |
# Output metadata. | |
with open(args.output_root + '/metadata.json', 'w') as fout: | |
json.dump(extensions, fout, indent=4) | |
else: | |
return all_filenames, extensions | |
if __name__ == "__main__": | |
main() | |