# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4 | |
cd /mnt/store/wbandar1/datasets | |
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar --no-check-certificate | |
wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar --no-check-certificate | |
# | |
# script to extract ImageNet dataset | |
# ILSVRC2012_img_train.tar (about 138 GB) | |
# ILSVRC2012_img_val.tar (about 6.3 GB) | |
# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory | |
# | |
# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md | |
# | |
# train/ | |
# βββ n01440764 | |
# β βββ n01440764_10026.JPEG | |
# β βββ n01440764_10027.JPEG | |
# β βββ ...... | |
# βββ ...... | |
# val/ | |
# βββ n01440764 | |
# β βββ ILSVRC2012_val_00000293.JPEG | |
# β βββ ILSVRC2012_val_00002138.JPEG | |
# β βββ ...... | |
# βββ ...... | |
# | |
# | |
# Extract the training data: | |
# | |
mkdir train && mv ILSVRC2012_img_train.tar train/ && cd train | |
tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar | |
find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done | |
cd .. | |
# | |
# Extract the validation data and move images to subfolders: | |
# | |
mkdir val && mv ILSVRC2012_img_val.tar val/ && cd val && tar -xvf ILSVRC2012_img_val.tar | |
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash | |
# | |
# Check total files after extract | |
# | |
# $ find train/ -name "*.JPEG" | wc -l | |
# 1281167 | |
# $ find val/ -name "*.JPEG" | wc -l | |
# 50000 | |
# |