{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/home/shrey/Desktop/Kidney-Disease-Classifcation/src'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "os.chdir('../')" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/home/shrey/Desktop/Kidney-Disease-Classifcation'" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%pwd" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass\n", "from pathlib import Path\n", "\n", "\n", "@dataclass(frozen=True)\n", "class DataIngestionConfig:\n", " root_dir: Path\n", " source_URL: str\n", " local_data_file: Path\n", " unzip_dir: Path" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "from kidney_classification.constants import *\n", "from kidney_classification.utils.common import read_yaml, create_directories" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "class ConfigurationManager:\n", " def __init__(\n", " self,\n", " config_filepath=CONFIG_FILE_PATH,\n", " params_filepath=PARAMS_FILE_PATH):\n", "\n", " self.config = read_yaml(config_filepath)\n", " self.params = read_yaml(params_filepath)\n", "\n", " create_directories([self.config.artifacts_root])\n", "\n", " def get_data_ingestion_config(self) -> DataIngestionConfig:\n", " config = self.config.data_ingestion\n", "\n", " create_directories([config.root_dir])\n", "\n", " data_ingestion_config = DataIngestionConfig(\n", " root_dir=config.root_dir,\n", " source_URL=config.source_URL,\n", " local_data_file=config.local_data_file,\n", " unzip_dir=config.unzip_dir\n", " )\n", "\n", " return data_ingestion_config" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "import os\n", "import zipfile\n", "import gdown\n", "from kidney_classification import logger" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "class DataIngestion:\n", " def __init__(self, config: DataIngestionConfig):\n", " self.config = config\n", "\n", " def download_file(self) -> str:\n", " '''\n", " Fetch data from the url\n", " '''\n", "\n", " try:\n", " dataset_url = self.config.source_URL\n", " zip_download_dir = self.config.local_data_file\n", " os.makedirs(\"artifacts/data_ingestion\", exist_ok=True)\n", " logger.info(\n", " f\"Downloading data from {dataset_url} into file {zip_download_dir}\")\n", "\n", " file_id = dataset_url.split(\"/\")[-2]\n", " prefix = 'https://drive.google.com/uc?/export=download&id='\n", " gdown.download(prefix+file_id, zip_download_dir)\n", "\n", " logger.info(\n", " f\"Downloaded data from {dataset_url} into file {zip_download_dir}\")\n", "\n", " except Exception as e:\n", " raise e\n", "\n", " def extract_zip_file(self):\n", " \"\"\"\n", " zip_file_path: str\n", " Extracts the zip file into the data directory\n", " Function returns None\n", " \"\"\"\n", " unzip_path = self.config.unzip_dir\n", " os.makedirs(unzip_path, exist_ok=True)\n", " with zipfile.ZipFile(self.config.local_data_file, 'r') as zip_ref:\n", " zip_ref.extractall(unzip_path)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2023-12-23 01:06:36,655: INFO: common yaml file: config/config.yaml loaded successfully]\n", "[2023-12-23 01:06:36,656: INFO: common yaml file: params.yaml loaded successfully]\n", "[2023-12-23 01:06:36,657: INFO: common created directory at: artifacts]\n", "[2023-12-23 01:06:36,658: INFO: common created directory at: artifacts/data_ingestion]\n", "[2023-12-23 01:06:36,659: INFO: 986331348 Downloading data from https://drive.google.com/file/d/1CGXriP_nlctsaWVbz0W3mFdPK05lQmHK/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Downloading...\n", "From (uriginal): https://drive.google.com/uc?/export=download&id=1CGXriP_nlctsaWVbz0W3mFdPK05lQmHK\n", "From (redirected): https://drive.google.com/uc?/export=download&id=1CGXriP_nlctsaWVbz0W3mFdPK05lQmHK&confirm=t&uuid=3aeb0b9b-c680-4ec4-a1ab-78891e4f5025\n", "To: /home/shrey/Desktop/Kidney-Disease-Classifcation/artifacts/data_ingestion/data.zip\n", "100%|██████████| 1.63G/1.63G [02:31<00:00, 10.7MB/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2023-12-23 01:09:10,818: INFO: 986331348 Downloaded data from https://drive.google.com/file/d/1CGXriP_nlctsaWVbz0W3mFdPK05lQmHK/view?usp=sharing into file artifacts/data_ingestion/data.zip]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "try:\n", " config = ConfigurationManager()\n", " data_ingestion_config = config.get_data_ingestion_config()\n", " data_ingestion = DataIngestion(config=data_ingestion_config)\n", " data_ingestion.download_file()\n", " data_ingestion.extract_zip_file()\n", "except Exception as e:\n", " raise e" ] } ], "metadata": { "kernelspec": { "display_name": "env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 2 }