""" Copyright (c) 2022, salesforce.com, inc. All rights reserved. SPDX-License-Identifier: BSD-3-Clause For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause """ import os from pathlib import Path from omegaconf import OmegaConf from lavis.common.utils import ( cleanup_dir, download_and_extract_archive, get_abs_path, get_cache_path, ) DATA_URL = { "train": "http://images.cocodataset.org/zips/train2014.zip", # md5: 0da8c0bd3d6becc4dcb32757491aca88 "val": "http://images.cocodataset.org/zips/val2014.zip", # md5: a3d79f5ed8d289b7a7554ce06a5782b3 "test": "http://images.cocodataset.org/zips/test2014.zip", # md5: 04127eef689ceac55e3a572c2c92f264 "test2015": "http://images.cocodataset.org/zips/test2015.zip", # md5: 04127eef689ceac55e3a572c2c92f264 } def download_datasets(root, url): download_and_extract_archive(url=url, download_root=root, extract_root=storage_dir) if __name__ == "__main__": config_path = get_abs_path("configs/datasets/coco/defaults_cap.yaml") storage_dir = OmegaConf.load( config_path ).datasets.coco_caption.build_info.images.storage download_dir = Path(get_cache_path(storage_dir)).parent / "download" storage_dir = Path(get_cache_path(storage_dir)) if storage_dir.exists(): print(f"Dataset already exists at {storage_dir}. Aborting.") exit(0) try: for k, v in DATA_URL.items(): print("Downloading {} to {}".format(v, k)) download_datasets(download_dir, v) except Exception as e: # remove download dir if failed cleanup_dir(download_dir) print("Failed to download or extracting datasets. Aborting.") cleanup_dir(download_dir)