Spaces:

hf-nikhil
/

ipp

Sleeping

ipp

File size: 6,438 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'c:\\\\Users\\\\nikhil\\\\OneDrive\\\\Desktop\\\\ML Projects\\\\ipp\\\\research'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir(\"../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'c:\\\\Users\\\\nikhil\\\\OneDrive\\\\Desktop\\\\ML Projects\\\\ipp'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataclasses import dataclass\n",
    "from pathlib import Path\n",
    "\n",
    "@dataclass(frozen=True)\n",
    "class DataIngestionConfig:\n",
    "    root_dir: Path\n",
    "    source_URL: str\n",
    "    local_data_file: Path\n",
    "    train_data_file: Path\n",
    "    test_data_file: Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from insurancePP.constants import *\n",
    "from insurancePP.utils.common import read_yaml, create_directories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ConfigurationManager:\n",
    "    def __init__(self, config_filepath= CONFIG_FILE_PATH, params_filepath= PARAMS_FILE_PATH):\n",
    "        self.config = read_yaml(config_filepath)\n",
    "        self.params = read_yaml(params_filepath)\n",
    "        \n",
    "        create_directories([self.config.artifacts_root])\n",
    "        # logger.info(f\"Root directory {self.config.artifacts_root} created successfully\") \n",
    "        \n",
    "   \n",
    "    def get_data_ingestion_config(self) -> DataIngestionConfig:\n",
    "        config = self.config.data_ingestion\n",
    "        create_directories([config.root_dir])\n",
    "\n",
    "        data_ingestion_config = DataIngestionConfig(\n",
    "            root_dir = config.root_dir,\n",
    "            source_URL = config.source_URL,\n",
    "            local_data_file = config.local_data_file,\n",
    "            train_data_file = config.train_data_file,\n",
    "            test_data_file = config.test_data_file\n",
    "        )\n",
    "\n",
    "        return data_ingestion_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import urllib.request as request\n",
    "import zipfile\n",
    "import pandas as pd\n",
    "from insurancePP.logging import logger\n",
    "from insurancePP.utils.common import get_size\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DataIngestion:\n",
    "    def __init__(self, config: DataIngestionConfig):\n",
    "        self.config = config\n",
    "\n",
    "    def download_file(self):\n",
    "        if not os.path.exists(self.config.local_data_file):\n",
    "            filename, headers = request.urlretrieve(\n",
    "                url = self.config.source_URL,\n",
    "                filename = self.config.local_data_file\n",
    "            )\n",
    "            logger.info(f'Downloaded {filename} to {self.config.local_data_file}')\n",
    "        else:\n",
    "            logger.info(f'File {self.config.local_data_file} already exists')\n",
    "\n",
    "\n",
    "    def initiate_data_ingestion(self):\n",
    "        logger.info(\"Entered inside the data ingestion component\")\n",
    "\n",
    "        try:\n",
    "            data = pd.read_csv(self.config.local_data_file)\n",
    "            logger.info(\"train test split initiated\")\n",
    "\n",
    "            train, test = train_test_split(data, test_size=0.2, random_state=50)\n",
    "            train.to_csv(self.config.train_data_file)\n",
    "            test.to_csv(self.config.test_data_file)\n",
    "        except Exception as e:\n",
    "            raise e\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-02-25 12:08:14,157 : INFO : common : yaml file: config\\config.yaml loaded successfully]\n",
      "[2024-02-25 12:08:14,161 : INFO : common : yaml file: params.yaml loaded successfully]\n",
      "[2024-02-25 12:08:14,164 : INFO : common : directory artifacts created]\n",
      "[2024-02-25 12:08:14,166 : INFO : common : directory artifacts/data_ingestion created]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-02-25 12:08:15,563 : INFO : 489743891 : Downloaded artifacts/data_ingestion/data.csv to artifacts/data_ingestion/data.csv]\n",
      "[2024-02-25 12:08:15,563 : INFO : 489743891 : Entered inside the data ingestion component]\n",
      "[2024-02-25 12:08:15,592 : INFO : 489743891 : train test split initiated]\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    config = ConfigurationManager()\n",
    "    data_ingestion_config = config.get_data_ingestion_config()\n",
    "    data_ingestion = DataIngestion(data_ingestion_config)\n",
    "    data_ingestion.download_file()\n",
    "    data_ingestion.initiate_data_ingestion()\n",
    "\n",
    "except Exception as e:\n",
    "    raise e"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "0.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}