""" Unpacks tar files from Imagenet-1k dataset while keeping the original directory structure. The script only unpacks the files from training subset. """ import tarfile from pathlib import Path from joblib import delayed, Parallel from realfake.utils import inject_args, Args class ImagenetArgs(Args): imagenet_dir: Path unpacked_dir: Path @inject_args def main(args: ImagenetArgs) -> None: train_dir = args.imagenet_dir/"train" assert train_dir.exists(), f"Directory {train_dir} does not exist" archives = train_dir.glob("*.tar") Parallel(n_jobs=-1, verbose=100)(delayed(unpack_tar)(tar_file, args.unpacked_dir) for tar_file in archives) def unpack_tar(tar_file: Path, output_dir: Path) -> None: output_subdir = output_dir/tar_file.stem with tarfile.open(tar_file) as tar: tar.extractall(output_subdir) if __name__ == "__main__": main()