| # | |
| # script to extract ImageNet dataset | |
| # ILSVRC2012_img_train.tar (about 138 GB) | |
| wget --no-check-certificate https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar | |
| # ILSVRC2012_img_val.tar (about 6.3 GB) | |
| wget --no-check-certificate https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar | |
| # make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory | |
| # | |
| # Adapted from: | |
| # https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md | |
| # https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4 | |
| # | |
| # imagenet/train/ | |
| # βββ n01440764 | |
| # β βββ n01440764_10026.JPEG | |
| # β βββ n01440764_10027.JPEG | |
| # β βββ ...... | |
| # βββ ...... | |
| # imagenet/val/ | |
| # βββ n01440764 | |
| # β βββ ILSVRC2012_val_00000293.JPEG | |
| # β βββ ILSVRC2012_val_00002138.JPEG | |
| # β βββ ...... | |
| # βββ ...... | |
| # | |
| # | |
| # Make imagnet directory | |
| # | |
| mkdir imagenet | |
| # | |
| # Extract the training data: | |
| # | |
| # Create train directory; move .tar file; change directory | |
| mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train | |
| # Extract training set; remove compressed file | |
| tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar | |
| # | |
| # At this stage imagenet/train will contain 1000 compressed .tar files, one for each category | |
| # | |
| # For each .tar file: | |
| # 1. create directory with same name as .tar file | |
| # 2. extract and copy contents of .tar file into directory | |
| # 3. remove .tar file | |
| find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done | |
| # | |
| # This results in a training directory like so: | |
| # | |
| # imagenet/train/ | |
| # βββ n01440764 | |
| # β βββ n01440764_10026.JPEG | |
| # β βββ n01440764_10027.JPEG | |
| # β βββ ...... | |
| # βββ ...... | |
| # | |
| # Change back to original directory | |
| cd ../.. | |
| # | |
| # Extract the validation data and move images to subfolders: | |
| # | |
| # Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file | |
| mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar | |
| # get script from soumith and run; this script creates all class directories and moves images into corresponding directories | |
| # wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash | |
| cp ../../imagenetval.sh ./ | |
| bash imagenetval.sh | |
| # | |
| # This results in a validation directory like so: | |
| # | |
| # imagenet/val/ | |
| # βββ n01440764 | |
| # β βββ ILSVRC2012_val_00000293.JPEG | |
| # β βββ ILSVRC2012_val_00002138.JPEG | |
| # β βββ ...... | |
| # βββ ...... | |
| # | |
| # | |
| # Check total files after extract | |
| cd .. | |
| find train/ -name "*.JPEG" | wc -l | |
| # 1281167 | |
| find val/ -name "*.JPEG" | wc -l | |
| # 50000 | |