Spaces:

bhimrazy
/

diabetic-retinopathy-detection

Running

App Files Files Community

diabetic-retinopathy-detection / scripts /download-dr-dataset.sh

bhimrazy

Adds shell scripts to download and merge dr dataset

31d1d47 8 months ago

raw

history blame contribute delete

1.75 kB

	#!/bin/bash

	# path where dataset will be downloaded
	DATASET_DIR="data/diabetic-retinopathy-dataset"
	mkdir -p "$DATASET_DIR"

	# Start time of the script
	start_time=$(date +%s)

	# Array containing the names of the files you want to download
	# Note: The files are split into multiple parts, so you need to
	# download all parts to get the complete file
	# Reference: Check Data Explorer on Kaggle for the list of files
	# https://www.kaggle.com/c/diabetic-retinopathy-detection/data
	files=(
	"test.zip.001"
	"test.zip.002"
	"test.zip.003"
	"test.zip.004"
	"test.zip.005"
	"test.zip.006"
	"test.zip.007"
	"sampleSubmission.csv.zip"
	"sample.zip"
	"train.zip.001"
	"train.zip.002"
	"train.zip.003"
	"train.zip.004"
	"train.zip.005"
	"trainLabels.csv.zip"
	)

	# Define a function to download a single file
	download_file() {
	kaggle competitions download -c diabetic-retinopathy-detection -f "$1" -p "$DATASET_DIR"

	local zip_file="$DATASET_DIR/$1"

	# If .zip extension not present in $1, append it
	if [[ "$1" != *.zip ]]; then
	zip_file="$zip_file.zip"
	fi

	# Check if zip file exists
	if [ ! -f "$zip_file" ]; then
	echo "Error: $zip_file does not exist."
	return 1
	fi

	unzip -o "$zip_file" -d "$DATASET_DIR" # -o flag to overwrite existing files
	rm -rf "$zip_file"
	}

	# Loop through the array of file names and download each file
	for file in "${files[@]}"; do
	download_file "$file" &
	done

	# Wait for all background processes to finish
	wait

	# End time of the script
	end_time=$(date +%s)

	# Calculate total time taken in minutes
	total_time=$(( (end_time - start_time)/60 ))

	# Print total time taken
	echo "Total time taken: ${total_time} minutes"