Spaces:

hf-nikhil
/

ipp

Sleeping

ipp

File size: 10,355 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'c:\\\\Users\\\\nikhil\\\\OneDrive\\\\Desktop\\\\ML Projects\\\\ipp\\\\research'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir(\"../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'c:\\\\Users\\\\nikhil\\\\OneDrive\\\\Desktop\\\\ML Projects\\\\ipp'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%pwd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataclasses import dataclass\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dataclass(frozen=True)\n",
    "class ModelTrainingConfig:\n",
    "    root_dir : Path\n",
    "    model_path : Path\n",
    "    prepared_train_data : Path\n",
    "    prepared_test_data : Path\n",
    "    preprocessor_obj_file_path : Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from insurancePP.constants import *\n",
    "from insurancePP.utils.common import read_yaml, create_directories, save_object, load_object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ConfigurationManager:\n",
    "    def __init__(self, config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):\n",
    "        self.config = read_yaml(config_filepath)\n",
    "        self.params = read_yaml(params_filepath)\n",
    "\n",
    "        create_directories([self.config.artifacts_root])\n",
    "\n",
    "    def get_model_training_configuration(self) -> ModelTrainingConfig:\n",
    "        config = self.config.model_trainer\n",
    "        create_directories([config.root_dir])\n",
    "\n",
    "        model_training_config = ModelTrainingConfig(\n",
    "            root_dir = config.root_dir,\n",
    "            model_path = config.model_path,\n",
    "            prepared_train_data = config.prepared_train_data,\n",
    "            prepared_test_data = config.prepared_test_data,\n",
    "            preprocessor_obj_file_path = config.preprocessor_obj_file_path\n",
    "        )\n",
    "\n",
    "        return model_training_config\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from numpy import load\n",
    "\n",
    "from insurancePP.logging import logger\n",
    "\n",
    "from insurancePP.constants import *\n",
    "from insurancePP.utils.common import read_yaml, create_directories, save_object, load_object, evaluate_models\n",
    "\n",
    "from dataclasses import dataclass\n",
    "from catboost import CatBoostRegressor\n",
    "from sklearn.ensemble import (\n",
    "    AdaBoostRegressor,\n",
    "    GradientBoostingRegressor,\n",
    "    RandomForestRegressor,\n",
    ")\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import r2_score\n",
    "from sklearn.neighbors import KNeighborsRegressor\n",
    "from sklearn.tree import DecisionTreeRegressor\n",
    "from xgboost import XGBRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ModelTraining:\n",
    "    def __init__(self, config : ModelTrainingConfig):\n",
    "        self.config = config\n",
    "    \n",
    "\n",
    "    def initiate_model_trainer(self):\n",
    "        try:\n",
    "            logger.info(\"loading the training and testing data\")\n",
    "            train_arr = load(self.config.prepared_train_data)\n",
    "            test_arr = load(self.config.prepared_test_data)\n",
    "\n",
    "            logger.info(\"spliting the training and testing data\")\n",
    "            X_train, y_train, X_test, y_test = (\n",
    "                train_arr[:, :-1],\n",
    "                train_arr[:, -1],\n",
    "                test_arr[:, :-1],\n",
    "                test_arr[:, -1],\n",
    "            )\n",
    "\n",
    "            models = {\n",
    "                \"Random Forest\": RandomForestRegressor(),\n",
    "                \"Decision Tree\": DecisionTreeRegressor(),\n",
    "                \"Gradient Boosting\": GradientBoostingRegressor(),\n",
    "                \"Linear Regression\": LinearRegression(),\n",
    "                \"XGBRegressor\": XGBRegressor(),\n",
    "                \"CatBoosting Regressor\": CatBoostRegressor(verbose=False),\n",
    "                \"AdaBoost Regressor\": AdaBoostRegressor(),\n",
    "            }\n",
    "\n",
    "            params={\n",
    "                \"Decision Tree\": {\n",
    "                    'criterion':['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],\n",
    "                },\n",
    "                \"Random Forest\":{\n",
    "                    'n_estimators': [8,16,32,64,128,256]\n",
    "                },\n",
    "                \"Gradient Boosting\":{\n",
    "                    # 'learning_rate':[.1,.01,.05,.001],\n",
    "                    'n_estimators': [8,16,32,64,128,256]\n",
    "                },\n",
    "                \"Linear Regression\":{},\n",
    "                \"XGBRegressor\":{\n",
    "                    'learning_rate':[.1,.01,.05,.001],\n",
    "                    'n_estimators': [8,16,32,64,128,256]\n",
    "                },\n",
    "                \"CatBoosting Regressor\":{\n",
    "                    'depth': [6,8,10],\n",
    "                    'learning_rate': [0.01, 0.05, 0.1],\n",
    "                    'iterations': [30, 50, 100]\n",
    "                },\n",
    "                \"AdaBoost Regressor\":{\n",
    "                    'learning_rate':[.1,.01,0.5,.001],\n",
    "                    'n_estimators': [8,16,32,64,128,256]\n",
    "                }\n",
    "            }\n",
    "\n",
    "            \n",
    "            model_report : dict = evaluate_models(X_train, y_train, X_test, y_test, models, params)\n",
    "         \n",
    "            performance_df = pd.DataFrame(list(zip(model_report.keys(), model_report.values())), columns=['Model Name', 'R2_Score']).sort_values(by=[\"R2_Score\"],ascending=False)\n",
    "            logger.info(f'Model Report: \\n {performance_df}')\n",
    "\n",
    "            best_model_score = max(sorted(model_report.values()))\n",
    "            best_model_name = list(model_report.keys())[list(model_report.values()).index(best_model_score)]\n",
    "            best_model = models[best_model_name]\n",
    "\n",
    "            # if best_model_score < 0.6:\n",
    "            #     raise e\n",
    "            # logging.info(f\"Best model found on both training and testing dataset\")\n",
    "    \n",
    "            save_object(\n",
    "                file_path = Path(self.config.model_path),\n",
    "                obj = best_model\n",
    "            )\n",
    "\n",
    "            predict = best_model.predict(X_test)\n",
    "            r2 = r2_score(y_test, predict)\n",
    "            \n",
    "\n",
    "        except Exception as e:\n",
    "            raise e"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-02-25 21:29:34,497 : INFO : common : yaml file: config\\config.yaml loaded successfully]\n",
      "[2024-02-25 21:29:34,501 : INFO : common : yaml file: params.yaml loaded successfully]\n",
      "[2024-02-25 21:29:34,504 : INFO : common : directory artifacts created]\n",
      "[2024-02-25 21:29:34,506 : INFO : common : directory artifacts/model_trainer created]\n",
      "[2024-02-25 21:29:34,508 : INFO : 177425094 : loading the training and testing data]\n",
      "[2024-02-25 21:29:34,513 : INFO : 177425094 : spliting the training and testing data]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2024-02-25 21:30:42,646 : INFO : 177425094 : Model Report: \n",
      "               Model Name  R2_Score\n",
      "5  CatBoosting Regressor  0.884958\n",
      "2      Gradient Boosting  0.880510\n",
      "6     AdaBoost Regressor  0.869562\n",
      "4           XGBRegressor  0.868819\n",
      "0          Random Forest  0.864245\n",
      "3      Linear Regression  0.778228\n",
      "1          Decision Tree  0.756036]\n",
      "[2024-02-25 21:30:42,663 : INFO : common : Binary Object is stored]\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    config = ConfigurationManager()\n",
    "    data_training_config = config.get_model_training_configuration()\n",
    "    model_trainer = ModelTraining(config = data_training_config)\n",
    "    model_trainer.initiate_model_trainer()\n",
    "    \n",
    "except Exception as e:\n",
    "    raise e"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "0.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}