# path where dataset will be downloaded | |
DATASET_DIR="data/diabetic-retinopathy-dataset" | |
mkdir -p "$DATASET_DIR" | |
# Start time of the script | |
start_time=$(date +%s) | |
# Array containing the names of the files you want to download | |
# Note: The files are split into multiple parts, so you need to | |
# download all parts to get the complete file | |
# Reference: Check Data Explorer on Kaggle for the list of files | |
# https://www.kaggle.com/c/diabetic-retinopathy-detection/data | |
files=( | |
"test.zip.001" | |
"test.zip.002" | |
"test.zip.003" | |
"test.zip.004" | |
"test.zip.005" | |
"test.zip.006" | |
"test.zip.007" | |
"sampleSubmission.csv.zip" | |
"sample.zip" | |
"train.zip.001" | |
"train.zip.002" | |
"train.zip.003" | |
"train.zip.004" | |
"train.zip.005" | |
"trainLabels.csv.zip" | |
) | |
# Define a function to download a single file | |
download_file() { | |
kaggle competitions download -c diabetic-retinopathy-detection -f "$1" -p "$DATASET_DIR" | |
local zip_file="$DATASET_DIR/$1" | |
# If .zip extension not present in $1, append it | |
if [[ "$1" != *.zip ]]; then | |
zip_file="$zip_file.zip" | |
fi | |
# Check if zip file exists | |
if [ ! -f "$zip_file" ]; then | |
echo "Error: $zip_file does not exist." | |
return 1 | |
fi | |
unzip -o "$zip_file" -d "$DATASET_DIR" # -o flag to overwrite existing files | |
rm -rf "$zip_file" | |
} | |
# Loop through the array of file names and download each file | |
for file in "${files[@]}"; do | |
download_file "$file" & | |
done | |
# Wait for all background processes to finish | |
wait | |
# End time of the script | |
end_time=$(date +%s) | |
# Calculate total time taken in minutes | |
total_time=$(( (end_time - start_time)/60 )) | |
# Print total time taken | |
echo "Total time taken: ${total_time} minutes" | |