{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "### **Importing the libraries**" ], "metadata": { "id": "rDFn6pi7D7Jk" } }, { "cell_type": "code", "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ], "metadata": { "id": "nGZJJWqfEKoy" }, "execution_count": 51, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Importing the dataset" ], "metadata": { "id": "18RFDOyuEhDc" } }, { "cell_type": "code", "source": [ "dataset=pd.read_csv('Bengaluru_House_Data.csv')\n", "dataset.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "o9F-OnzNEk4P", "outputId": "bfb5445e-c3f1-4883-f251-2ff56d1454a8" }, "execution_count": 52, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(13320, 9)" ] }, "metadata": {}, "execution_count": 52 } ] }, { "cell_type": "code", "source": [ "dataset=dataset.drop(['area_type','society','balcony','availability'],axis='columns')\n", "dataset.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "zqiJVjzDFHv0", "outputId": "f0b346b6-85bd-4c5d-888b-b8ffc84f9446" }, "execution_count": 53, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price\n", "0 Electronic City Phase II 2 BHK 1056 2.0 39.07\n", "1 Chikka Tirupathi 4 Bedroom 2600 5.0 120.00\n", "2 Uttarahalli 3 BHK 1440 2.0 62.00\n", "3 Lingadheeranahalli 3 BHK 1521 3.0 95.00\n", "4 Kothanur 2 BHK 1200 2.0 51.00" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathprice
0Electronic City Phase II2 BHK10562.039.07
1Chikka Tirupathi4 Bedroom26005.0120.00
2Uttarahalli3 BHK14402.062.00
3Lingadheeranahalli3 BHK15213.095.00
4Kothanur2 BHK12002.051.00
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset", "summary": "{\n \"name\": \"dataset\",\n \"rows\": 13320,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1305,\n \"samples\": [\n \"Ashok Nagar\",\n \"Maruthi Nagar\",\n \"CQAL Layout\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 31,\n \"samples\": [\n \"8 BHK\",\n \"5 Bedroom\",\n \"19 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2117,\n \"samples\": [\n \"5270\",\n \"832\",\n \"1145\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3414580781243859,\n \"min\": 1.0,\n \"max\": 40.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 2.0,\n 1.0,\n 14.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 148.97167376967403,\n \"min\": 8.0,\n \"max\": 3600.0,\n \"num_unique_values\": 1994,\n \"samples\": [\n 93.25,\n 33.645,\n 62.55\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 53 } ] }, { "cell_type": "markdown", "source": [ "# Data Preprocessing" ], "metadata": { "id": "31NmI8KSFz5C" } }, { "cell_type": "code", "source": [ "dataset.isnull().sum()\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ax5GQuqrF3IR", "outputId": "2a553a9a-8412-4137-bcb5-3f2d9437aeff" }, "execution_count": 54, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "location 1\n", "size 16\n", "total_sqft 0\n", "bath 73\n", "price 0\n", "dtype: int64" ] }, "metadata": {}, "execution_count": 54 } ] }, { "cell_type": "code", "source": [ "dataset=dataset.dropna()\n", "dataset.isnull().sum()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "j3HEv734GKiu", "outputId": "285bef86-a0f2-4e78-dd1c-1c0323761dcd" }, "execution_count": 55, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "location 0\n", "size 0\n", "total_sqft 0\n", "bath 0\n", "price 0\n", "dtype: int64" ] }, "metadata": {}, "execution_count": 55 } ] }, { "cell_type": "code", "source": [ "dataset.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ed3sO0ZLGSOh", "outputId": "7927dd35-3d24-4e0a-91af-c6b3aad35786" }, "execution_count": 56, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(13246, 5)" ] }, "metadata": {}, "execution_count": 56 } ] }, { "cell_type": "code", "source": [ "dataset['size'].unique()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4VO931jsGggb", "outputId": "e6c8d109-2f1e-424a-9029-c74f4aa11630" }, "execution_count": 57, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['2 BHK', '4 Bedroom', '3 BHK', '4 BHK', '6 Bedroom', '3 Bedroom',\n", " '1 BHK', '1 RK', '1 Bedroom', '8 Bedroom', '2 Bedroom',\n", " '7 Bedroom', '5 BHK', '7 BHK', '6 BHK', '5 Bedroom', '11 BHK',\n", " '9 BHK', '9 Bedroom', '27 BHK', '10 Bedroom', '11 Bedroom',\n", " '10 BHK', '19 BHK', '16 BHK', '43 Bedroom', '14 BHK', '8 BHK',\n", " '12 Bedroom', '13 BHK', '18 Bedroom'], dtype=object)" ] }, "metadata": {}, "execution_count": 57 } ] }, { "cell_type": "code", "source": [ "dataset['bhk'] = dataset['size'].apply(lambda x: int(x.split(' ')[0]))" ], "metadata": { "id": "lFY-MR7hHCfi" }, "execution_count": 58, "outputs": [] }, { "cell_type": "code", "source": [ "dataset.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "a3eSMJ1tHd5k", "outputId": "39ebc2a3-369d-4f63-cdb8-34655391b9fe" }, "execution_count": 59, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk\n", "0 Electronic City Phase II 2 BHK 1056 2.0 39.07 2\n", "1 Chikka Tirupathi 4 Bedroom 2600 5.0 120.00 4\n", "2 Uttarahalli 3 BHK 1440 2.0 62.00 3\n", "3 Lingadheeranahalli 3 BHK 1521 3.0 95.00 3\n", "4 Kothanur 2 BHK 1200 2.0 51.00 2" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhk
0Electronic City Phase II2 BHK10562.039.072
1Chikka Tirupathi4 Bedroom26005.0120.004
2Uttarahalli3 BHK14402.062.003
3Lingadheeranahalli3 BHK15213.095.003
4Kothanur2 BHK12002.051.002
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset", "summary": "{\n \"name\": \"dataset\",\n \"rows\": 13246,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1304,\n \"samples\": [\n \"Gollarahatti\",\n \"Maruthi Nagar\",\n \"Venugopal Reddy Layout\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 31,\n \"samples\": [\n \"8 BHK\",\n \"5 Bedroom\",\n \"19 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2067,\n \"samples\": [\n \"525\",\n \"1093\",\n \"502\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3415060582158718,\n \"min\": 1.0,\n \"max\": 40.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 2.0,\n 1.0,\n 14.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 149.0765868547877,\n \"min\": 8.0,\n \"max\": 3600.0,\n \"num_unique_values\": 1955,\n \"samples\": [\n 61.36,\n 48.4,\n 64.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 43,\n \"num_unique_values\": 19,\n \"samples\": [\n 2,\n 8,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 59 } ] }, { "cell_type": "code", "source": [ "dataset.total_sqft.unique()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lcRCFcHJH07j", "outputId": "3c63a664-210a-442c-aa63-c987a0a523b6" }, "execution_count": 60, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array(['1056', '2600', '1440', ..., '1133 - 1384', '774', '4689'],\n", " dtype=object)" ] }, "metadata": {}, "execution_count": 60 } ] }, { "cell_type": "code", "source": [ "def is_float(x):\n", " try:\n", " float(x)\n", " except:\n", " return False\n", " return True" ], "metadata": { "id": "FCvXdV_uIFe_" }, "execution_count": 61, "outputs": [] }, { "cell_type": "code", "source": [ "dataset[~dataset['total_sqft'].apply(is_float)].head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "0lgm-z1BIVYF", "outputId": "d91a1ee7-654a-42ef-f21c-f2e01c155d8d" }, "execution_count": 62, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk\n", "30 Yelahanka 4 BHK 2100 - 2850 4.0 186.000 4\n", "122 Hebbal 4 BHK 3067 - 8156 4.0 477.000 4\n", "137 8th Phase JP Nagar 2 BHK 1042 - 1105 2.0 54.005 2\n", "165 Sarjapur 2 BHK 1145 - 1340 2.0 43.490 2\n", "188 KR Puram 2 BHK 1015 - 1540 2.0 56.800 2" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhk
30Yelahanka4 BHK2100 - 28504.0186.0004
122Hebbal4 BHK3067 - 81564.0477.0004
1378th Phase JP Nagar2 BHK1042 - 11052.054.0052
165Sarjapur2 BHK1145 - 13402.043.4902
188KR Puram2 BHK1015 - 15402.056.8002
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"dataset[~dataset['total_sqft']\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Hebbal\",\n \"KR Puram\",\n \"8th Phase JP Nagar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"2 BHK\",\n \"4 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"3067 - 8156\",\n \"1015 - 1540\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0954451150103321,\n \"min\": 2.0,\n \"max\": 4.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 2.0,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 184.77346719699773,\n \"min\": 43.49,\n \"max\": 477.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 477.0,\n 56.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2,\n \"max\": 4,\n \"num_unique_values\": 2,\n \"samples\": [\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 62 } ] }, { "cell_type": "code", "source": [ "def convert_sqft_to_num(x):\n", " tokens = x.split('-')\n", " if len(tokens) == 2:\n", " return (float(tokens[0])+float(tokens[1]))/2\n", " try:\n", " return float(x)\n", " except:\n", " return None" ], "metadata": { "id": "K9J4A5UaIyIH" }, "execution_count": 63, "outputs": [] }, { "cell_type": "code", "source": [ "dataset=dataset.copy()\n", "dataset['total_sqft'] = dataset['total_sqft'].apply(convert_sqft_to_num)\n", "dataset.head(3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "emKsB1EuJRP1", "outputId": "7c85b774-903a-44b7-cabc-03b85d5b52e1" }, "execution_count": 64, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk\n", "0 Electronic City Phase II 2 BHK 1056.0 2.0 39.07 2\n", "1 Chikka Tirupathi 4 Bedroom 2600.0 5.0 120.00 4\n", "2 Uttarahalli 3 BHK 1440.0 2.0 62.00 3" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhk
0Electronic City Phase II2 BHK1056.02.039.072
1Chikka Tirupathi4 Bedroom2600.05.0120.004
2Uttarahalli3 BHK1440.02.062.003
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset", "summary": "{\n \"name\": \"dataset\",\n \"rows\": 13246,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1304,\n \"samples\": [\n \"Gollarahatti\",\n \"Maruthi Nagar\",\n \"Venugopal Reddy Layout\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 31,\n \"samples\": [\n \"8 BHK\",\n \"5 Bedroom\",\n \"19 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1237.3234454015146,\n \"min\": 1.0,\n \"max\": 52272.0,\n \"num_unique_values\": 1972,\n \"samples\": [\n 1041.0,\n 3042.0,\n 616.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3415060582158718,\n \"min\": 1.0,\n \"max\": 40.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 2.0,\n 1.0,\n 14.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 149.0765868547877,\n \"min\": 8.0,\n \"max\": 3600.0,\n \"num_unique_values\": 1955,\n \"samples\": [\n 61.36,\n 48.4,\n 64.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 43,\n \"num_unique_values\": 19,\n \"samples\": [\n 2,\n 8,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 64 } ] }, { "cell_type": "code", "source": [ "dataset.loc[30]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7crzfpJrJ0zN", "outputId": "34a285bf-a933-4b1f-9c38-5e654606e5cc" }, "execution_count": 65, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "location Yelahanka\n", "size 4 BHK\n", "total_sqft 2475.0\n", "bath 4.0\n", "price 186.0\n", "bhk 4\n", "Name: 30, dtype: object" ] }, "metadata": {}, "execution_count": 65 } ] }, { "cell_type": "code", "source": [ "dataset['price_per_sqft'] = dataset['price']*100000/dataset['total_sqft']\n", "dataset.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "M1B2dcgsLuXu", "outputId": "e2d0271b-4026-4dd8-9719-6fad76c3c931" }, "execution_count": 66, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk \\\n", "0 Electronic City Phase II 2 BHK 1056.0 2.0 39.07 2 \n", "1 Chikka Tirupathi 4 Bedroom 2600.0 5.0 120.00 4 \n", "2 Uttarahalli 3 BHK 1440.0 2.0 62.00 3 \n", "3 Lingadheeranahalli 3 BHK 1521.0 3.0 95.00 3 \n", "4 Kothanur 2 BHK 1200.0 2.0 51.00 2 \n", "\n", " price_per_sqft \n", "0 3699.810606 \n", "1 4615.384615 \n", "2 4305.555556 \n", "3 6245.890861 \n", "4 4250.000000 " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhkprice_per_sqft
0Electronic City Phase II2 BHK1056.02.039.0723699.810606
1Chikka Tirupathi4 Bedroom2600.05.0120.0044615.384615
2Uttarahalli3 BHK1440.02.062.0034305.555556
3Lingadheeranahalli3 BHK1521.03.095.0036245.890861
4Kothanur2 BHK1200.02.051.0024250.000000
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset", "summary": "{\n \"name\": \"dataset\",\n \"rows\": 13246,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1304,\n \"samples\": [\n \"Gollarahatti\",\n \"Maruthi Nagar\",\n \"Venugopal Reddy Layout\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 31,\n \"samples\": [\n \"8 BHK\",\n \"5 Bedroom\",\n \"19 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1237.3234454015146,\n \"min\": 1.0,\n \"max\": 52272.0,\n \"num_unique_values\": 1972,\n \"samples\": [\n 1041.0,\n 3042.0,\n 616.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.3415060582158718,\n \"min\": 1.0,\n \"max\": 40.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 2.0,\n 1.0,\n 14.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 149.0765868547877,\n \"min\": 8.0,\n \"max\": 3600.0,\n \"num_unique_values\": 1955,\n \"samples\": [\n 61.36,\n 48.4,\n 64.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 43,\n \"num_unique_values\": 19,\n \"samples\": [\n 2,\n 8,\n 10\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price_per_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 106727.155209311,\n \"min\": 267.82981328435875,\n \"max\": 12000000.0,\n \"num_unique_values\": 7537,\n \"samples\": [\n 6133.1338818249815,\n 3913.0434782608695,\n 3181.818181818182\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 66 } ] }, { "cell_type": "markdown", "source": [ "## **Feature** **Scaling**" ], "metadata": { "id": "UWkg-SFcLU5H" } }, { "cell_type": "code", "source": [ "dataset.location = dataset.location.apply(lambda x: x.strip())\n", "\n", "location_stats = dataset.groupby('location')['location'].agg('count').sort_values(ascending=False)\n", "location_stats\n", "\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yngAoNWqMpXQ", "outputId": "2263338b-2840-4cc9-b876-f1e243384ea7" }, "execution_count": 67, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "location\n", "Whitefield 535\n", "Sarjapur Road 392\n", "Electronic City 304\n", "Kanakpura Road 266\n", "Thanisandra 236\n", " ... \n", "1 Giri Nagar 1\n", "Kanakapura Road, 1\n", "Kanakapura main Road 1\n", "Karnataka Shabarimala 1\n", "whitefiled 1\n", "Name: location, Length: 1293, dtype: int64" ] }, "metadata": {}, "execution_count": 67 } ] }, { "cell_type": "code", "source": [ "location_stats_less_than_10 = location_stats[location_stats<=10]\n", "location_stats_less_than_10\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xVb_zMTGNSSo", "outputId": "aef5008a-f627-44ff-afb7-63bd041ec0ef" }, "execution_count": 68, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "location\n", "Basapura 10\n", "1st Block Koramangala 10\n", "Gunjur Palya 10\n", "Kalkere 10\n", "Sector 1 HSR Layout 10\n", " ..\n", "1 Giri Nagar 1\n", "Kanakapura Road, 1\n", "Kanakapura main Road 1\n", "Karnataka Shabarimala 1\n", "whitefiled 1\n", "Name: location, Length: 1052, dtype: int64" ] }, "metadata": {}, "execution_count": 68 } ] }, { "cell_type": "code", "source": [ "dataset.location = dataset.location.apply(lambda x: 'other' if x in location_stats_less_than_10 else x)\n", "len(dataset.location.unique())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IEjwlyu7NtQJ", "outputId": "3c38fb70-a6e0-4563-ec1c-e7e4438b48df" }, "execution_count": 69, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "242" ] }, "metadata": {}, "execution_count": 69 } ] }, { "cell_type": "markdown", "source": [ "## **Outlier** **Removal**" ], "metadata": { "id": "Q9cAmEn4RIn3" } }, { "cell_type": "code", "source": [ "dataset=dataset[~(dataset.total_sqft/dataset.bhk<300)]\n", "dataset.shape\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5b8IXQsGROeS", "outputId": "2fd47e4f-6ae9-4704-c7fd-93eb655d678d" }, "execution_count": 70, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(12502, 7)" ] }, "metadata": {}, "execution_count": 70 } ] }, { "cell_type": "code", "source": [ "dataset.price_per_sqft.describe()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KS5XOdy9R737", "outputId": "fb0cdeae-29bd-40fd-fc03-e296937ed7da" }, "execution_count": 71, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "count 12456.000000\n", "mean 6308.502826\n", "std 4168.127339\n", "min 267.829813\n", "25% 4210.526316\n", "50% 5294.117647\n", "75% 6916.666667\n", "max 176470.588235\n", "Name: price_per_sqft, dtype: float64" ] }, "metadata": {}, "execution_count": 71 } ] }, { "cell_type": "code", "source": [ "def remove_pps_outliers(df):\n", " df_out = pd.DataFrame()\n", " for key, subdf in df.groupby('location'):\n", " m=np.mean(subdf.price_per_sqft)\n", " st=np.std(subdf.price_per_sqft)\n", " reduced_df = subdf[(subdf.price_per_sqft>(m-st)) & (subdf.price_per_sqft<=(m+st))]\n", " df_out = pd.concat([df_out,reduced_df] ,ignore_index=True)\n", " return df_out\n", "\n", "\n", "dataset = remove_pps_outliers(dataset)\n", "dataset.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Rwr9dXvHSGG8", "outputId": "d7a12dc3-1252-42f9-f2de-190d5eb70153" }, "execution_count": 72, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(10241, 7)" ] }, "metadata": {}, "execution_count": 72 } ] }, { "cell_type": "code", "source": [ "def plot_scatter_chart(df,location):\n", " bhk2 = df[(df.location==location) & (df.bhk==2)]\n", " bhk3 = df[(df.location==location) & (df.bhk==3)]\n", " plt.rcParams['figure.figsize'] =(15,10)\n", " plt.scatter(bhk2.total_sqft, bhk2.price,color='blue',label=' 2 BHK',s=50)\n", " plt.scatter(bhk3.total_sqft, bhk3.price,marker='+',color='green',label=' 3 BHK',s=50)\n", " plt.xlabel(\"Total Square Foot Area\")\n", " plt.ylabel(\"Price Per Square Feet\")\n", " plt.title(location)\n", " plt.legend()\n", "\n", "plot_scatter_chart(dataset,'Hebbal')\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 872 }, "id": "qoZDeVi6TfH_", "outputId": "12d4806c-1859-4993-ed22-c841162b2eea" }, "execution_count": 73, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "def remove_bhk_outliers(df):\n", " exclude_indices = np.array([])\n", " for location, location_df in df.groupby('location'):\n", " bhk_stats={}\n", " for bhk , bhk_df in location_df.groupby('bhk'):\n", " bhk_stats[bhk] ={\n", " 'mean': np.mean(bhk_df.price_per_sqft),\n", " 'std':np.std(bhk_df.price_per_sqft),\n", " 'count': bhk_df.shape[0]\n", " }\n", " for bhk , bhk_df in location_df.groupby('bhk'):\n", " stats = bhk_stats.get(bhk-1)\n", " if stats and stats['count']>5:\n", " exclude_indices = np.append(exclude_indices, bhk_df[bhk_df.price_per_sqft<(stats['mean'])].index.values)\n", " return df.drop(exclude_indices,axis='index')\n", "\n", "\n", "dataset = remove_bhk_outliers(dataset)\n", "dataset.shape\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TwvlwWcXVc0e", "outputId": "5f785fd8-5e73-4fb3-a4bc-aa46bd55b732" }, "execution_count": 74, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(7329, 7)" ] }, "metadata": {}, "execution_count": 74 } ] }, { "cell_type": "code", "source": [ "plot_scatter_chart(dataset,'Hebbal')\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 872 }, "id": "ExexZnoeXeCv", "outputId": "0a7f1a1a-4ac2-43d7-c6c2-ac040ce193a6" }, "execution_count": 75, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "dataset.bath.unique()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wyLqg5X7XnJv", "outputId": "3a591eb4-8124-44b2-ea06-54e54a12d393" }, "execution_count": 76, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([ 4., 3., 2., 5., 8., 1., 6., 7., 9., 12., 16., 13.])" ] }, "metadata": {}, "execution_count": 76 } ] }, { "cell_type": "code", "source": [ "dataset[dataset.bath>10]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "1VS_EWxBYLHv", "outputId": "d801f8ac-38ab-4ec0-d917-84db681d38e8" }, "execution_count": 77, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk price_per_sqft\n", "5277 Neeladri Nagar 10 BHK 4000.0 12.0 160.0 10 4000.000000\n", "8486 other 10 BHK 12000.0 12.0 525.0 10 4375.000000\n", "8575 other 16 BHK 10000.0 16.0 550.0 16 5500.000000\n", "9308 other 11 BHK 6000.0 12.0 150.0 11 2500.000000\n", "9639 other 13 BHK 5425.0 13.0 275.0 13 5069.124424" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhkprice_per_sqft
5277Neeladri Nagar10 BHK4000.012.0160.0104000.000000
8486other10 BHK12000.012.0525.0104375.000000
8575other16 BHK10000.016.0550.0165500.000000
9308other11 BHK6000.012.0150.0112500.000000
9639other13 BHK5425.013.0275.0135069.124424
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"dataset[dataset\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"other\",\n \"Neeladri Nagar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"16 BHK\",\n \"13 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3365.430878802891,\n \"min\": 4000.0,\n \"max\": 12000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 12000.0,\n 5425.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.7320508075688772,\n \"min\": 12.0,\n \"max\": 16.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 12.0,\n 16.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 194.1198083658646,\n \"min\": 150.0,\n \"max\": 550.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 525.0,\n 275.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 10,\n \"max\": 16,\n \"num_unique_values\": 4,\n \"samples\": [\n 16,\n 13\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price_per_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1158.2910864218384,\n \"min\": 2500.0,\n \"max\": 5500.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 4375.0,\n 5069.124423963133\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 77 } ] }, { "cell_type": "code", "source": [ "dataset[dataset.bath>dataset.bhk+2]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 175 }, "id": "PH2OpQmEYOEH", "outputId": "5d0e858f-64ca-41cb-9582-1a1b5152714b" }, "execution_count": 78, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location size total_sqft bath price bhk price_per_sqft\n", "1626 Chikkabanavar 4 Bedroom 2460.0 7.0 80.0 4 3252.032520\n", "5238 Nagasandra 4 Bedroom 7000.0 8.0 450.0 4 6428.571429\n", "6711 Thanisandra 3 BHK 1806.0 6.0 116.0 3 6423.034330\n", "8411 other 6 BHK 11338.0 9.0 1000.0 6 8819.897689" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationsizetotal_sqftbathpricebhkprice_per_sqft
1626Chikkabanavar4 Bedroom2460.07.080.043252.032520
5238Nagasandra4 Bedroom7000.08.0450.046428.571429
6711Thanisandra3 BHK1806.06.0116.036423.034330
8411other6 BHK11338.09.01000.068819.897689
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"dataset[dataset\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Nagasandra\",\n \"other\",\n \"Chikkabanavar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"size\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"4 Bedroom\",\n \"3 BHK\",\n \"6 BHK\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4439.529854988401,\n \"min\": 1806.0,\n \"max\": 11338.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 7000.0,\n 11338.0,\n 2460.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.2909944487358056,\n \"min\": 6.0,\n \"max\": 9.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 8.0,\n 9.0,\n 7.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 426.234286122863,\n \"min\": 80.0,\n \"max\": 1000.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 450.0,\n 1000.0,\n 80.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 3,\n \"max\": 6,\n \"num_unique_values\": 3,\n \"samples\": [\n 4,\n 3,\n 6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price_per_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2284.1884109019225,\n \"min\": 3252.032520325203,\n \"max\": 8819.897689186806,\n \"num_unique_values\": 4,\n \"samples\": [\n 6428.571428571428,\n 8819.897689186806,\n 3252.032520325203\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 78 } ] }, { "cell_type": "code", "source": [ "dataset = dataset[dataset.bath\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationtotal_sqftbathpricebhk
01st Block Jayanagar2850.04.0428.04
11st Block Jayanagar1630.03.0194.03
21st Block Jayanagar1875.02.0235.03
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", " \n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset", "summary": "{\n \"name\": \"dataset\",\n \"rows\": 7251,\n \"fields\": [\n {\n \"column\": \"location\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 242,\n \"samples\": [\n \"BTM 2nd Stage\",\n \"6th Phase JP Nagar\",\n \"Kudlu\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_sqft\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 815.0184231766327,\n \"min\": 300.0,\n \"max\": 30000.0,\n \"num_unique_values\": 1501,\n \"samples\": [\n 661.0,\n 6000.0,\n 1435.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bath\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9385187010307673,\n \"min\": 1.0,\n \"max\": 16.0,\n \"num_unique_values\": 12,\n \"samples\": [\n 12.0,\n 16.0,\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 87.67472475827064,\n \"min\": 10.0,\n \"max\": 2200.0,\n \"num_unique_values\": 1238,\n \"samples\": [\n 137.0,\n 26.85,\n 332.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bhk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 16,\n \"num_unique_values\": 13,\n \"samples\": [\n 10,\n 11,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 80 } ] }, { "cell_type": "markdown", "source": [ "## **Model** **Building**" ], "metadata": { "id": "-Hvda5b-ZPpk" } }, { "cell_type": "code", "source": [ "dummies = pd.get_dummies(dataset.location)\n", "dummies.head(3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 245 }, "id": "g44JZw6JZXXL", "outputId": "fa11a343-fdd9-47be-ac44-929eb4d01bae" }, "execution_count": 81, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " 1st Block Jayanagar 1st Phase JP Nagar 2nd Phase Judicial Layout \\\n", "0 True False False \n", "1 True False False \n", "2 True False False \n", "\n", " 2nd Stage Nagarbhavi 5th Block Hbr Layout 5th Phase JP Nagar \\\n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "\n", " 6th Phase JP Nagar 7th Phase JP Nagar 8th Phase JP Nagar \\\n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "\n", " 9th Phase JP Nagar ... Vishveshwarya Layout Vishwapriya Layout \\\n", "0 False ... False False \n", "1 False ... False False \n", "2 False ... False False \n", "\n", " Vittasandra Whitefield Yelachenahalli Yelahanka Yelahanka New Town \\\n", "0 False False False False False \n", "1 False False False False False \n", "2 False False False False False \n", "\n", " Yelenahalli Yeshwanthpur other \n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "\n", "[3 rows x 242 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
1st Block Jayanagar1st Phase JP Nagar2nd Phase Judicial Layout2nd Stage Nagarbhavi5th Block Hbr Layout5th Phase JP Nagar6th Phase JP Nagar7th Phase JP Nagar8th Phase JP Nagar9th Phase JP Nagar...Vishveshwarya LayoutVishwapriya LayoutVittasandraWhitefieldYelachenahalliYelahankaYelahanka New TownYelenahalliYeshwanthpurother
0TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
1TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "

3 rows × 242 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dummies" } }, "metadata": {}, "execution_count": 81 } ] }, { "cell_type": "code", "source": [ "dataset = pd.concat([dataset,dummies.drop('other',axis='columns')],axis='columns')\n", "dataset.head(3)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 297 }, "id": "CW6tYp03Zn5W", "outputId": "09f1530e-efc4-455b-ebed-9657b0a524e6" }, "execution_count": 82, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " location total_sqft bath price bhk 1st Block Jayanagar \\\n", "0 1st Block Jayanagar 2850.0 4.0 428.0 4 True \n", "1 1st Block Jayanagar 1630.0 3.0 194.0 3 True \n", "2 1st Block Jayanagar 1875.0 2.0 235.0 3 True \n", "\n", " 1st Phase JP Nagar 2nd Phase Judicial Layout 2nd Stage Nagarbhavi \\\n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "\n", " 5th Block Hbr Layout ... Vijayanagar Vishveshwarya Layout \\\n", "0 False ... False False \n", "1 False ... False False \n", "2 False ... False False \n", "\n", " Vishwapriya Layout Vittasandra Whitefield Yelachenahalli Yelahanka \\\n", "0 False False False False False \n", "1 False False False False False \n", "2 False False False False False \n", "\n", " Yelahanka New Town Yelenahalli Yeshwanthpur \n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "\n", "[3 rows x 246 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
locationtotal_sqftbathpricebhk1st Block Jayanagar1st Phase JP Nagar2nd Phase Judicial Layout2nd Stage Nagarbhavi5th Block Hbr Layout...VijayanagarVishveshwarya LayoutVishwapriya LayoutVittasandraWhitefieldYelachenahalliYelahankaYelahanka New TownYelenahalliYeshwanthpur
01st Block Jayanagar2850.04.0428.04TrueFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
11st Block Jayanagar1630.03.0194.03TrueFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
21st Block Jayanagar1875.02.0235.03TrueFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "

3 rows × 246 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset" } }, "metadata": {}, "execution_count": 82 } ] }, { "cell_type": "code", "source": [ "dataset = dataset.drop('location',axis='columns')\n", "dataset.head(2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 214 }, "id": "1Fl8QNUeaI2A", "outputId": "8ff5df88-3d1d-4895-fd02-f8f5bbdfe950" }, "execution_count": 83, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " total_sqft bath price bhk 1st Block Jayanagar 1st Phase JP Nagar \\\n", "0 2850.0 4.0 428.0 4 True False \n", "1 1630.0 3.0 194.0 3 True False \n", "\n", " 2nd Phase Judicial Layout 2nd Stage Nagarbhavi 5th Block Hbr Layout \\\n", "0 False False False \n", "1 False False False \n", "\n", " 5th Phase JP Nagar ... Vijayanagar Vishveshwarya Layout \\\n", "0 False ... False False \n", "1 False ... False False \n", "\n", " Vishwapriya Layout Vittasandra Whitefield Yelachenahalli Yelahanka \\\n", "0 False False False False False \n", "1 False False False False False \n", "\n", " Yelahanka New Town Yelenahalli Yeshwanthpur \n", "0 False False False \n", "1 False False False \n", "\n", "[2 rows x 245 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_sqftbathpricebhk1st Block Jayanagar1st Phase JP Nagar2nd Phase Judicial Layout2nd Stage Nagarbhavi5th Block Hbr Layout5th Phase JP Nagar...VijayanagarVishveshwarya LayoutVishwapriya LayoutVittasandraWhitefieldYelachenahalliYelahankaYelahanka New TownYelenahalliYeshwanthpur
02850.04.0428.04TrueFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
11630.03.0194.03TrueFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "

2 rows × 245 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "dataset" } }, "metadata": {}, "execution_count": 83 } ] }, { "cell_type": "code", "source": [ "dataset.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-icPo_qDaSv7", "outputId": "30d4bf62-ccd8-461c-9b21-26e16cffef6d" }, "execution_count": 84, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(7251, 245)" ] }, "metadata": {}, "execution_count": 84 } ] }, { "cell_type": "code", "source": [ "X=dataset.drop('price',axis='columns')\n", "X.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 308 }, "id": "ZfjUY-u3aYNW", "outputId": "04affa01-b2c2-4806-dfa9-bde5d620c83f" }, "execution_count": 85, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " total_sqft bath bhk 1st Block Jayanagar 1st Phase JP Nagar \\\n", "0 2850.0 4.0 4 True False \n", "1 1630.0 3.0 3 True False \n", "2 1875.0 2.0 3 True False \n", "3 1200.0 2.0 3 True False \n", "4 1235.0 2.0 2 True False \n", "\n", " 2nd Phase Judicial Layout 2nd Stage Nagarbhavi 5th Block Hbr Layout \\\n", "0 False False False \n", "1 False False False \n", "2 False False False \n", "3 False False False \n", "4 False False False \n", "\n", " 5th Phase JP Nagar 6th Phase JP Nagar ... Vijayanagar \\\n", "0 False False ... False \n", "1 False False ... False \n", "2 False False ... False \n", "3 False False ... False \n", "4 False False ... False \n", "\n", " Vishveshwarya Layout Vishwapriya Layout Vittasandra Whitefield \\\n", "0 False False False False \n", "1 False False False False \n", "2 False False False False \n", "3 False False False False \n", "4 False False False False \n", "\n", " Yelachenahalli Yelahanka Yelahanka New Town Yelenahalli Yeshwanthpur \n", "0 False False False False False \n", "1 False False False False False \n", "2 False False False False False \n", "3 False False False False False \n", "4 False False False False False \n", "\n", "[5 rows x 244 columns]" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
total_sqftbathbhk1st Block Jayanagar1st Phase JP Nagar2nd Phase Judicial Layout2nd Stage Nagarbhavi5th Block Hbr Layout5th Phase JP Nagar6th Phase JP Nagar...VijayanagarVishveshwarya LayoutVishwapriya LayoutVittasandraWhitefieldYelachenahalliYelahankaYelahanka New TownYelenahalliYeshwanthpur
02850.04.04TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
11630.03.03TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
21875.02.03TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
31200.02.03TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
41235.02.02TrueFalseFalseFalseFalseFalseFalse...FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "

5 rows × 244 columns

\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "X" } }, "metadata": {}, "execution_count": 85 } ] }, { "cell_type": "code", "source": [ "y= dataset.price\n", "y.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kvFZ6tNpatU3", "outputId": "179167ac-2063-4a62-8850-2856b21063f9" }, "execution_count": 86, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 428.0\n", "1 194.0\n", "2 235.0\n", "3 130.0\n", "4 148.0\n", "Name: price, dtype: float64" ] }, "metadata": {}, "execution_count": 86 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=10)" ], "metadata": { "id": "8Oazppuha2P6" }, "execution_count": 87, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.linear_model import LinearRegression\n", "lr_clf =LinearRegression()\n", "lr_clf.fit(X_train,y_train)\n", "lr_clf.score(X_test,y_test)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7fYoIYfxbIpk", "outputId": "5223ac53-6c7b-47aa-b9df-595630266952" }, "execution_count": 88, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8452277697874376" ] }, "metadata": {}, "execution_count": 88 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import ShuffleSplit\n", "from sklearn.model_selection import cross_val_score\n", "\n", "cv = ShuffleSplit(n_splits=5, test_size=0.2,random_state=0)\n", "\n", "cross_val_score(LinearRegression(),X,y,cv=cv)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yWFIEnivbd2s", "outputId": "7ad72eac-952a-469e-ddd4-abc0af0cede2" }, "execution_count": 90, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([0.82430186, 0.77166234, 0.85089567, 0.80837764, 0.83653286])" ] }, "metadata": {}, "execution_count": 90 } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "from sklearn.model_selection import GridSearchCV, ShuffleSplit\n", "from sklearn.linear_model import LinearRegression, Lasso\n", "from sklearn.tree import DecisionTreeRegressor\n", "\n", "def find_best_model_using_gridsearchcv(X, y):\n", " algos = {\n", " 'linear_regression' : {\n", " 'model': LinearRegression(),\n", " 'params': {\n", " 'fit_intercept': [True, False],\n", " 'positive': [True, False]\n", " }\n", " },\n", " 'lasso': {\n", " 'model': Lasso(),\n", " 'params': {\n", " 'alpha': [1, 2],\n", " 'selection': ['random', 'cyclic']\n", " }\n", " },\n", " 'decision_tree': {\n", " 'model': DecisionTreeRegressor(),\n", " 'params': {\n", " 'criterion': ['squared_error', 'friedman_mse'],\n", " 'splitter': ['best', 'random']\n", " }\n", " }\n", " }\n", " scores = []\n", " cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)\n", " for algo_name, config in algos.items():\n", " gs = GridSearchCV(config['model'], config['params'], cv=cv, return_train_score=False)\n", " gs.fit(X, y)\n", " scores.append({\n", " 'model': algo_name,\n", " 'best_score': gs.best_score_,\n", " 'best_params': gs.best_params_\n", " })\n", "\n", " return pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])\n", "\n", "\n", "find_best_model_using_gridsearchcv(X,y)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "id": "Aj_4nJe6cVms", "outputId": "70b4b163-455b-4be3-d8bf-64feb157edcc" }, "execution_count": 97, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " model best_score \\\n", "0 linear_regression 0.819001 \n", "1 lasso 0.687435 \n", "2 decision_tree 0.727427 \n", "\n", " best_params \n", "0 {'fit_intercept': False, 'positive': False} \n", "1 {'alpha': 1, 'selection': 'random'} \n", "2 {'criterion': 'squared_error', 'splitter': 'be... " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
modelbest_scorebest_params
0linear_regression0.819001{'fit_intercept': False, 'positive': False}
1lasso0.687435{'alpha': 1, 'selection': 'random'}
2decision_tree0.727427{'criterion': 'squared_error', 'splitter': 'be...
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", " \n", "\n", "\n", "\n", " \n", "
\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"find_best_model_using_gridsearchcv(X,y)\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"model\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"linear_regression\",\n \"lasso\",\n \"decision_tree\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"best_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.06744755911762175,\n \"min\": 0.6874345229925464,\n \"max\": 0.8190012023138117,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.8190012023138117,\n 0.6874345229925464,\n 0.7274268494498024\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"best_params\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 97 } ] }, { "cell_type": "code", "source": [ "def predict_price(location,sqft,bath,bhk):\n", " loc_index = np.where(X.columns==location)[0][0]\n", "\n", " x = np.zeros(len(X.columns))\n", " x[0] = sqft\n", " x[1] = bath\n", " x[2] = bhk\n", " if loc_index >= 0:\n", " x[loc_index] = 1\n", "\n", " return lr_clf.predict([x])[0]" ], "metadata": { "id": "OPbolFxwfCIa" }, "execution_count": 98, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "\n", "predict_price('1st Phase JP Nagar',1000, 2, 2)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7ar6aML7fzLp", "outputId": "ac7a79cf-795d-44b1-e70a-aa92aa369144" }, "execution_count": 99, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "83.49904677206221" ] }, "metadata": {}, "execution_count": 99 } ] }, { "cell_type": "markdown", "source": [ "## Export the tested model to a pickle file" ], "metadata": { "id": "R3htTqBXgLt1" } }, { "cell_type": "code", "source": [ "import pickle\n", "with open('banglore_home_prices_model.pickle','wb') as f:\n", " pickle.dump(lr_clf,f)" ], "metadata": { "id": "1VUSmWkLgO1b" }, "execution_count": 100, "outputs": [] }, { "cell_type": "code", "source": [ "import json\n", "columns = {\n", " 'data_columns' : [col.lower() for col in X.columns]\n", "}\n", "with open(\"columns.json\",\"w\") as f:\n", " f.write(json.dumps(columns))" ], "metadata": { "id": "WrTG7RTcgTYY" }, "execution_count": 101, "outputs": [] } ] }