|
""" |
|
Unpacks tar files from Imagenet-1k dataset while keeping the original directory structure. |
|
|
|
The script only unpacks the files from training subset. |
|
""" |
|
import tarfile |
|
from pathlib import Path |
|
from joblib import delayed, Parallel |
|
from realfake.utils import inject_args, Args |
|
|
|
|
|
class ImagenetArgs(Args): |
|
imagenet_dir: Path |
|
unpacked_dir: Path |
|
|
|
|
|
@inject_args |
|
def main(args: ImagenetArgs) -> None: |
|
train_dir = args.imagenet_dir/"train" |
|
assert train_dir.exists(), f"Directory {train_dir} does not exist" |
|
archives = train_dir.glob("*.tar") |
|
Parallel(n_jobs=-1, verbose=100)(delayed(unpack_tar)(tar_file, args.unpacked_dir) for tar_file in archives) |
|
|
|
|
|
def unpack_tar(tar_file: Path, output_dir: Path) -> None: |
|
output_subdir = output_dir/tar_file.stem |
|
with tarfile.open(tar_file) as tar: |
|
tar.extractall(output_subdir) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|