wenkai's picture
Upload 560 files
a43ef32 verified
raw
history blame
No virus
1.84 kB
"""
Copyright (c) 2022, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
"""
import os
from pathlib import Path
from omegaconf import OmegaConf
from lavis.common.utils import (
cleanup_dir,
download_and_extract_archive,
get_abs_path,
get_cache_path,
)
DATA_URL = {
"train": "http://images.cocodataset.org/zips/train2014.zip", # md5: 0da8c0bd3d6becc4dcb32757491aca88
"val": "http://images.cocodataset.org/zips/val2014.zip", # md5: a3d79f5ed8d289b7a7554ce06a5782b3
"test": "http://images.cocodataset.org/zips/test2014.zip", # md5: 04127eef689ceac55e3a572c2c92f264
"test2015": "http://images.cocodataset.org/zips/test2015.zip", # md5: 04127eef689ceac55e3a572c2c92f264
}
def download_datasets(root, url):
download_and_extract_archive(url=url, download_root=root, extract_root=storage_dir)
if __name__ == "__main__":
config_path = get_abs_path("configs/datasets/coco/defaults_cap.yaml")
storage_dir = OmegaConf.load(
config_path
).datasets.coco_caption.build_info.images.storage
download_dir = Path(get_cache_path(storage_dir)).parent / "download"
storage_dir = Path(get_cache_path(storage_dir))
if storage_dir.exists():
print(f"Dataset already exists at {storage_dir}. Aborting.")
exit(0)
try:
for k, v in DATA_URL.items():
print("Downloading {} to {}".format(v, k))
download_datasets(download_dir, v)
except Exception as e:
# remove download dir if failed
cleanup_dir(download_dir)
print("Failed to download or extracting datasets. Aborting.")
cleanup_dir(download_dir)