abps commited on
Commit
e917ff9
·
1 Parent(s): 814efa9

Upload 11 files

Browse files

Uploaded necessary files

.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ Client/house.jpg filter=lfs diff=lfs merge=lfs -text
Client/house.jpg ADDED

Git LFS Details

  • SHA256: 554bf4423826df49c3b38891f263e167e740d28f46c3410ede74d5232ad3317f
  • Pointer size: 132 Bytes
  • Size of remote file: 2.17 MB
Client/index.html ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
+ <link rel="stylesheet" href="style.css">
9
+ <title>Home Price Predictor</title>
10
+ </head>
11
+
12
+ <body>
13
+ <div class="container">
14
+ <h1 id="form_title">House price</h1>
15
+ <div id="left">
16
+ <hr>
17
+ </div>
18
+ <form id="get_price">
19
+ <select id="locations" name="location" , placeholder="location">
20
+ <option value="" disable selected>
21
+ --Please Select Location--
22
+ </option>
23
+ </select>
24
+ <input type="number" , name="bhk" , placeholder="No of Bedrooms (BHK)" required>
25
+ <input type="number" , name="bath" , placeholder="No of Bathrooms" required>
26
+ <input type="number" , name="total_sqft" , placeholder="Total area in Square ft." required>
27
+ <div class="res">
28
+ <button type="submit">Predict Price</button>
29
+ <textarea name="result" id="result" cols="10" rows="5" readonly></textarea>
30
+ </div>
31
+ </form>
32
+ </div>
33
+
34
+ <script src="https://code.jquery.com/jquery-3.6.4.min.js"
35
+ integrity="sha256-oP6HI9z1XaZNBrJURtCoUT5SUnxFr8s3BzRl+cbzUq8=" crossorigin="anonymous"></script>
36
+ <script>
37
+ function showLocations(res) {
38
+ var dropdown = document.getElementById('loc')
39
+ for (let i = 0; i < len(res); i++) {
40
+ var newNode = `<option value = ${res[i]} > ${res[i]} </option>`;
41
+ dropdown.appendChild(newNode);
42
+ }
43
+ }
44
+
45
+ let url_locations = '/api/loc'
46
+ $.get(url_locations, (data, status) => {
47
+ var dropdown = document.getElementById("locations");
48
+ for (i in data) {
49
+ let node = `<option value = ${data[i]} > ${data[i]} </option>`;
50
+ dropdown.insertAdjacentHTML("beforeend", node);
51
+ }
52
+
53
+ })
54
+
55
+ document.forms['get_price'].addEventListener('submit', (event) => {
56
+ event.preventDefault();
57
+ fetch('/api/get_price', {
58
+ method: 'POST',
59
+ body: new URLSearchParams(new FormData(event.target))
60
+ }).then((response) => {
61
+ if (!response.ok) {
62
+ throw new Error(`HTTP error! Status: ${response.status}`);
63
+ }
64
+ return response.text();
65
+ }).then((body) => {
66
+ let price = parseInt(parseFloat(body) * 100000);
67
+ document.getElementById("result").innerText = `Rs. ${price}`;
68
+ }).catch((error) => {
69
+ // TODO handle error
70
+ });
71
+ });
72
+ </script>
73
+ </body>
74
+
75
+ </html>
Client/style.css ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * {
2
+ margin: 0;
3
+ font-size: 1.3rem;
4
+ box-sizing: border-box;
5
+ }
6
+
7
+ body {
8
+ text-align: center;
9
+ overflow: hidden;
10
+ background-image: url('house.jpg');
11
+ background-size: auto;
12
+ height: 100%;
13
+ background-size: 100%;
14
+ background-repeat: no-repeat;
15
+ }
16
+
17
+ form {
18
+ text-align: left;
19
+ }
20
+
21
+
22
+ .container {
23
+ background-color: rgba(242, 244, 239, 0.45);
24
+ width: 24rem;
25
+ height: 28rem;
26
+ border-radius: 5px;
27
+ box-shadow: 2px 3px 20px rgb(201, 199, 199);
28
+ padding: 3rem 2.5rem;
29
+ margin: auto;
30
+ position: absolute;
31
+ top: 7%;
32
+ left: 8%;
33
+ z-index: 2;
34
+ }
35
+
36
+ #form_title {
37
+ font-size: 2rem;
38
+ text-align: left;
39
+ color: rgb(81, 79, 79);
40
+ }
41
+
42
+ hr {
43
+ border: none;
44
+ border-bottom: 10px solid white;
45
+ height: 10px;
46
+ outline: none;
47
+ width: 100%;
48
+ text-align: left;
49
+ margin-top: 2%;
50
+ }
51
+
52
+ #left {
53
+ width: 100%;
54
+ padding: 0;
55
+ padding-right: 65%;
56
+ }
57
+
58
+ input,
59
+ select,
60
+ option {
61
+ display: block;
62
+ margin: 5% 0;
63
+ padding: 10px 15px;
64
+ border-radius: 8px;
65
+ width: 100%;
66
+ box-sizing: border-box;
67
+ text-align: left;
68
+ outline: none;
69
+ border: none;
70
+ background-color: #f8f7f6ca;
71
+ color: rgba(101, 98, 98, 0.791);
72
+ }
73
+
74
+ .res {
75
+ display: flex;
76
+ justify-content: space-between;
77
+ height: 80px;
78
+ }
79
+
80
+ button {
81
+ display: block;
82
+ font-size: 1.3rem;
83
+ font-weight: bold;
84
+ padding: 10px 20px;
85
+ border-radius: 25px;
86
+ text-align: left;
87
+ color: #f8f7f6ca;
88
+ display: inline-block;
89
+ height: fit-content;
90
+ border: none;
91
+ background-color: rgba(101, 98, 98, 0.791);
92
+ }
93
+
94
+ textarea {
95
+ display: inline-block;
96
+ outline: none;
97
+ padding: 5px;
98
+ color: rgba(101, 98, 98, 0.791);
99
+ background-color: #f8f7f6ca;
100
+ }
101
+
102
+ button:hover {
103
+ background-color: #515050e4;
104
+ color: #f8f7f6d5;
105
+ transition: 0.3s cubic-bezier(1, 0.25, 0.21, 1.32);
106
+ }
107
+
108
+ button:active {
109
+ transform: translateY(4px);
110
+ transition: 0.6s cubic-bezier(1, 0.25, 0.21, 1.32);
111
+ }
112
+
113
+ @media screen and (max-width: 1075px) {
114
+ body {
115
+ background-size: 170%;
116
+ }
117
+
118
+ .container {
119
+ height: 28rem;
120
+ padding: 2.7rem 2.2rem;
121
+ width: 24rem;
122
+ }
123
+ }
124
+
125
+ @media screen and (max-width: 700px) {
126
+ body {
127
+ background-size: 300%;
128
+ }
129
+
130
+ .container {
131
+ height: 27rem;
132
+ width: 20rem;
133
+ padding: 2.5rem 2rem;
134
+ top: 5%;
135
+ left: 5%;
136
+ }
137
+ }
138
+
139
+ @media screen and (max-width: 450px) {
140
+ body {
141
+ background-size: 300%;
142
+ }
143
+
144
+ .container {
145
+ height: 70%;
146
+ width: 90%;
147
+ padding: 1rem 0.8rem;
148
+ top: 15%;
149
+ left: 3%;
150
+ }
151
+ }
assets/RealStatePrice.ipynb ADDED
@@ -0,0 +1,2246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "97e13347-621c-4656-a175-8ebcf9d842f0",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import numpy as np\n",
11
+ "import pandas as pd\n",
12
+ "import math\n",
13
+ "pd.options.display.max_rows = 4000"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "id": "aac70b32-b23f-4394-ba64-bc678a9429e7",
20
+ "metadata": {},
21
+ "outputs": [
22
+ {
23
+ "data": {
24
+ "text/html": [
25
+ "<div>\n",
26
+ "<style scoped>\n",
27
+ " .dataframe tbody tr th:only-of-type {\n",
28
+ " vertical-align: middle;\n",
29
+ " }\n",
30
+ "\n",
31
+ " .dataframe tbody tr th {\n",
32
+ " vertical-align: top;\n",
33
+ " }\n",
34
+ "\n",
35
+ " .dataframe thead th {\n",
36
+ " text-align: right;\n",
37
+ " }\n",
38
+ "</style>\n",
39
+ "<table border=\"1\" class=\"dataframe\">\n",
40
+ " <thead>\n",
41
+ " <tr style=\"text-align: right;\">\n",
42
+ " <th></th>\n",
43
+ " <th>area_type</th>\n",
44
+ " <th>availability</th>\n",
45
+ " <th>location</th>\n",
46
+ " <th>size</th>\n",
47
+ " <th>society</th>\n",
48
+ " <th>total_sqft</th>\n",
49
+ " <th>bath</th>\n",
50
+ " <th>balcony</th>\n",
51
+ " <th>price</th>\n",
52
+ " </tr>\n",
53
+ " </thead>\n",
54
+ " <tbody>\n",
55
+ " <tr>\n",
56
+ " <th>0</th>\n",
57
+ " <td>Super built-up Area</td>\n",
58
+ " <td>19-Dec</td>\n",
59
+ " <td>Electronic City Phase II</td>\n",
60
+ " <td>2 BHK</td>\n",
61
+ " <td>Coomee</td>\n",
62
+ " <td>1056</td>\n",
63
+ " <td>2.0</td>\n",
64
+ " <td>1.0</td>\n",
65
+ " <td>39.07</td>\n",
66
+ " </tr>\n",
67
+ " <tr>\n",
68
+ " <th>1</th>\n",
69
+ " <td>Plot Area</td>\n",
70
+ " <td>Ready To Move</td>\n",
71
+ " <td>Chikka Tirupathi</td>\n",
72
+ " <td>4 Bedroom</td>\n",
73
+ " <td>Theanmp</td>\n",
74
+ " <td>2600</td>\n",
75
+ " <td>5.0</td>\n",
76
+ " <td>3.0</td>\n",
77
+ " <td>120.00</td>\n",
78
+ " </tr>\n",
79
+ " <tr>\n",
80
+ " <th>2</th>\n",
81
+ " <td>Built-up Area</td>\n",
82
+ " <td>Ready To Move</td>\n",
83
+ " <td>Uttarahalli</td>\n",
84
+ " <td>3 BHK</td>\n",
85
+ " <td>NaN</td>\n",
86
+ " <td>1440</td>\n",
87
+ " <td>2.0</td>\n",
88
+ " <td>3.0</td>\n",
89
+ " <td>62.00</td>\n",
90
+ " </tr>\n",
91
+ " <tr>\n",
92
+ " <th>3</th>\n",
93
+ " <td>Super built-up Area</td>\n",
94
+ " <td>Ready To Move</td>\n",
95
+ " <td>Lingadheeranahalli</td>\n",
96
+ " <td>3 BHK</td>\n",
97
+ " <td>Soiewre</td>\n",
98
+ " <td>1521</td>\n",
99
+ " <td>3.0</td>\n",
100
+ " <td>1.0</td>\n",
101
+ " <td>95.00</td>\n",
102
+ " </tr>\n",
103
+ " <tr>\n",
104
+ " <th>4</th>\n",
105
+ " <td>Super built-up Area</td>\n",
106
+ " <td>Ready To Move</td>\n",
107
+ " <td>Kothanur</td>\n",
108
+ " <td>2 BHK</td>\n",
109
+ " <td>NaN</td>\n",
110
+ " <td>1200</td>\n",
111
+ " <td>2.0</td>\n",
112
+ " <td>1.0</td>\n",
113
+ " <td>51.00</td>\n",
114
+ " </tr>\n",
115
+ " </tbody>\n",
116
+ "</table>\n",
117
+ "</div>"
118
+ ],
119
+ "text/plain": [
120
+ " area_type availability location size \\\n",
121
+ "0 Super built-up Area 19-Dec Electronic City Phase II 2 BHK \n",
122
+ "1 Plot Area Ready To Move Chikka Tirupathi 4 Bedroom \n",
123
+ "2 Built-up Area Ready To Move Uttarahalli 3 BHK \n",
124
+ "3 Super built-up Area Ready To Move Lingadheeranahalli 3 BHK \n",
125
+ "4 Super built-up Area Ready To Move Kothanur 2 BHK \n",
126
+ "\n",
127
+ " society total_sqft bath balcony price \n",
128
+ "0 Coomee 1056 2.0 1.0 39.07 \n",
129
+ "1 Theanmp 2600 5.0 3.0 120.00 \n",
130
+ "2 NaN 1440 2.0 3.0 62.00 \n",
131
+ "3 Soiewre 1521 3.0 1.0 95.00 \n",
132
+ "4 NaN 1200 2.0 1.0 51.00 "
133
+ ]
134
+ },
135
+ "execution_count": 2,
136
+ "metadata": {},
137
+ "output_type": "execute_result"
138
+ }
139
+ ],
140
+ "source": [
141
+ "df = pd.read_csv(\"datasets/Bengaluru_House_Data.csv\")\n",
142
+ "df.head()"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 3,
148
+ "id": "bdd90a24-68a5-4d9d-87aa-05331c3e0c05",
149
+ "metadata": {},
150
+ "outputs": [
151
+ {
152
+ "data": {
153
+ "text/html": [
154
+ "<div>\n",
155
+ "<style scoped>\n",
156
+ " .dataframe tbody tr th:only-of-type {\n",
157
+ " vertical-align: middle;\n",
158
+ " }\n",
159
+ "\n",
160
+ " .dataframe tbody tr th {\n",
161
+ " vertical-align: top;\n",
162
+ " }\n",
163
+ "\n",
164
+ " .dataframe thead th {\n",
165
+ " text-align: right;\n",
166
+ " }\n",
167
+ "</style>\n",
168
+ "<table border=\"1\" class=\"dataframe\">\n",
169
+ " <thead>\n",
170
+ " <tr style=\"text-align: right;\">\n",
171
+ " <th></th>\n",
172
+ " <th>availability</th>\n",
173
+ " <th>location</th>\n",
174
+ " <th>size</th>\n",
175
+ " <th>society</th>\n",
176
+ " <th>total_sqft</th>\n",
177
+ " <th>bath</th>\n",
178
+ " <th>balcony</th>\n",
179
+ " <th>price</th>\n",
180
+ " </tr>\n",
181
+ " <tr>\n",
182
+ " <th>area_type</th>\n",
183
+ " <th></th>\n",
184
+ " <th></th>\n",
185
+ " <th></th>\n",
186
+ " <th></th>\n",
187
+ " <th></th>\n",
188
+ " <th></th>\n",
189
+ " <th></th>\n",
190
+ " <th></th>\n",
191
+ " </tr>\n",
192
+ " </thead>\n",
193
+ " <tbody>\n",
194
+ " <tr>\n",
195
+ " <th>Built-up Area</th>\n",
196
+ " <td>2418</td>\n",
197
+ " <td>2418</td>\n",
198
+ " <td>2418</td>\n",
199
+ " <td>1215</td>\n",
200
+ " <td>2418</td>\n",
201
+ " <td>2410</td>\n",
202
+ " <td>2310</td>\n",
203
+ " <td>2418</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>Carpet Area</th>\n",
207
+ " <td>87</td>\n",
208
+ " <td>87</td>\n",
209
+ " <td>87</td>\n",
210
+ " <td>54</td>\n",
211
+ " <td>87</td>\n",
212
+ " <td>87</td>\n",
213
+ " <td>82</td>\n",
214
+ " <td>87</td>\n",
215
+ " </tr>\n",
216
+ " <tr>\n",
217
+ " <th>Plot Area</th>\n",
218
+ " <td>2025</td>\n",
219
+ " <td>2025</td>\n",
220
+ " <td>2009</td>\n",
221
+ " <td>311</td>\n",
222
+ " <td>2025</td>\n",
223
+ " <td>2009</td>\n",
224
+ " <td>1837</td>\n",
225
+ " <td>2025</td>\n",
226
+ " </tr>\n",
227
+ " <tr>\n",
228
+ " <th>Super built-up Area</th>\n",
229
+ " <td>8790</td>\n",
230
+ " <td>8789</td>\n",
231
+ " <td>8790</td>\n",
232
+ " <td>6238</td>\n",
233
+ " <td>8790</td>\n",
234
+ " <td>8741</td>\n",
235
+ " <td>8482</td>\n",
236
+ " <td>8790</td>\n",
237
+ " </tr>\n",
238
+ " </tbody>\n",
239
+ "</table>\n",
240
+ "</div>"
241
+ ],
242
+ "text/plain": [
243
+ " availability location size society total_sqft bath \\\n",
244
+ "area_type \n",
245
+ "Built-up Area 2418 2418 2418 1215 2418 2410 \n",
246
+ "Carpet Area 87 87 87 54 87 87 \n",
247
+ "Plot Area 2025 2025 2009 311 2025 2009 \n",
248
+ "Super built-up Area 8790 8789 8790 6238 8790 8741 \n",
249
+ "\n",
250
+ " balcony price \n",
251
+ "area_type \n",
252
+ "Built-up Area 2310 2418 \n",
253
+ "Carpet Area 82 87 \n",
254
+ "Plot Area 1837 2025 \n",
255
+ "Super built-up Area 8482 8790 "
256
+ ]
257
+ },
258
+ "execution_count": 3,
259
+ "metadata": {},
260
+ "output_type": "execute_result"
261
+ }
262
+ ],
263
+ "source": [
264
+ "df.groupby(\"area_type\").agg('count')"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": 4,
270
+ "id": "08bba0ab-e89d-43c1-9594-d3dd660be63d",
271
+ "metadata": {},
272
+ "outputs": [],
273
+ "source": [
274
+ "df.drop(['area_type','availability','society','balcony'],axis=1,inplace=True)"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": 5,
280
+ "id": "ca2ade22-d6ee-402a-9e4f-1aef26b2f89e",
281
+ "metadata": {},
282
+ "outputs": [
283
+ {
284
+ "data": {
285
+ "text/plain": [
286
+ "(13320, 5)"
287
+ ]
288
+ },
289
+ "execution_count": 5,
290
+ "metadata": {},
291
+ "output_type": "execute_result"
292
+ }
293
+ ],
294
+ "source": [
295
+ "df.shape"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 6,
301
+ "id": "aa521583-3810-439e-aa8f-7693ba9fdbab",
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "data": {
306
+ "text/plain": [
307
+ "location 1\n",
308
+ "size 16\n",
309
+ "total_sqft 0\n",
310
+ "bath 73\n",
311
+ "price 0\n",
312
+ "dtype: int64"
313
+ ]
314
+ },
315
+ "execution_count": 6,
316
+ "metadata": {},
317
+ "output_type": "execute_result"
318
+ }
319
+ ],
320
+ "source": [
321
+ "df.isnull().sum()"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": 7,
327
+ "id": "7a17c0de-5ab0-44c6-8e5f-a8b05797b383",
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "df.dropna(inplace=True)"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": 8,
337
+ "id": "bc44288e-49f6-4972-9542-7b049368caa7",
338
+ "metadata": {},
339
+ "outputs": [
340
+ {
341
+ "data": {
342
+ "text/plain": [
343
+ "location False\n",
344
+ "size False\n",
345
+ "total_sqft False\n",
346
+ "bath False\n",
347
+ "price False\n",
348
+ "dtype: bool"
349
+ ]
350
+ },
351
+ "execution_count": 8,
352
+ "metadata": {},
353
+ "output_type": "execute_result"
354
+ }
355
+ ],
356
+ "source": [
357
+ "df.isnull().any()"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": 9,
363
+ "id": "96bc00f7-97be-495e-9833-fdef0cc9edda",
364
+ "metadata": {},
365
+ "outputs": [
366
+ {
367
+ "data": {
368
+ "text/html": [
369
+ "<div>\n",
370
+ "<style scoped>\n",
371
+ " .dataframe tbody tr th:only-of-type {\n",
372
+ " vertical-align: middle;\n",
373
+ " }\n",
374
+ "\n",
375
+ " .dataframe tbody tr th {\n",
376
+ " vertical-align: top;\n",
377
+ " }\n",
378
+ "\n",
379
+ " .dataframe thead th {\n",
380
+ " text-align: right;\n",
381
+ " }\n",
382
+ "</style>\n",
383
+ "<table border=\"1\" class=\"dataframe\">\n",
384
+ " <thead>\n",
385
+ " <tr style=\"text-align: right;\">\n",
386
+ " <th></th>\n",
387
+ " <th>location</th>\n",
388
+ " <th>size</th>\n",
389
+ " <th>total_sqft</th>\n",
390
+ " <th>bath</th>\n",
391
+ " <th>price</th>\n",
392
+ " </tr>\n",
393
+ " </thead>\n",
394
+ " <tbody>\n",
395
+ " <tr>\n",
396
+ " <th>0</th>\n",
397
+ " <td>Electronic City Phase II</td>\n",
398
+ " <td>2 BHK</td>\n",
399
+ " <td>1056</td>\n",
400
+ " <td>2.0</td>\n",
401
+ " <td>39.07</td>\n",
402
+ " </tr>\n",
403
+ " <tr>\n",
404
+ " <th>1</th>\n",
405
+ " <td>Chikka Tirupathi</td>\n",
406
+ " <td>4 Bedroom</td>\n",
407
+ " <td>2600</td>\n",
408
+ " <td>5.0</td>\n",
409
+ " <td>120.00</td>\n",
410
+ " </tr>\n",
411
+ " <tr>\n",
412
+ " <th>2</th>\n",
413
+ " <td>Uttarahalli</td>\n",
414
+ " <td>3 BHK</td>\n",
415
+ " <td>1440</td>\n",
416
+ " <td>2.0</td>\n",
417
+ " <td>62.00</td>\n",
418
+ " </tr>\n",
419
+ " <tr>\n",
420
+ " <th>3</th>\n",
421
+ " <td>Lingadheeranahalli</td>\n",
422
+ " <td>3 BHK</td>\n",
423
+ " <td>1521</td>\n",
424
+ " <td>3.0</td>\n",
425
+ " <td>95.00</td>\n",
426
+ " </tr>\n",
427
+ " <tr>\n",
428
+ " <th>4</th>\n",
429
+ " <td>Kothanur</td>\n",
430
+ " <td>2 BHK</td>\n",
431
+ " <td>1200</td>\n",
432
+ " <td>2.0</td>\n",
433
+ " <td>51.00</td>\n",
434
+ " </tr>\n",
435
+ " </tbody>\n",
436
+ "</table>\n",
437
+ "</div>"
438
+ ],
439
+ "text/plain": [
440
+ " location size total_sqft bath price\n",
441
+ "0 Electronic City Phase II 2 BHK 1056 2.0 39.07\n",
442
+ "1 Chikka Tirupathi 4 Bedroom 2600 5.0 120.00\n",
443
+ "2 Uttarahalli 3 BHK 1440 2.0 62.00\n",
444
+ "3 Lingadheeranahalli 3 BHK 1521 3.0 95.00\n",
445
+ "4 Kothanur 2 BHK 1200 2.0 51.00"
446
+ ]
447
+ },
448
+ "execution_count": 9,
449
+ "metadata": {},
450
+ "output_type": "execute_result"
451
+ }
452
+ ],
453
+ "source": [
454
+ "df.head()"
455
+ ]
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": 10,
460
+ "id": "e69ba835-f0dc-4531-b45a-91fba2a62f26",
461
+ "metadata": {},
462
+ "outputs": [
463
+ {
464
+ "data": {
465
+ "text/plain": [
466
+ "array(['2 BHK', '4 Bedroom', '3 BHK', '4 BHK', '6 Bedroom', '3 Bedroom',\n",
467
+ " '1 BHK', '1 RK', '1 Bedroom', '8 Bedroom', '2 Bedroom',\n",
468
+ " '7 Bedroom', '5 BHK', '7 BHK', '6 BHK', '5 Bedroom', '11 BHK',\n",
469
+ " '9 BHK', '9 Bedroom', '27 BHK', '10 Bedroom', '11 Bedroom',\n",
470
+ " '10 BHK', '19 BHK', '16 BHK', '43 Bedroom', '14 BHK', '8 BHK',\n",
471
+ " '12 Bedroom', '13 BHK', '18 Bedroom'], dtype=object)"
472
+ ]
473
+ },
474
+ "execution_count": 10,
475
+ "metadata": {},
476
+ "output_type": "execute_result"
477
+ }
478
+ ],
479
+ "source": [
480
+ "df['size'].unique()"
481
+ ]
482
+ },
483
+ {
484
+ "cell_type": "code",
485
+ "execution_count": 11,
486
+ "id": "8c31f9f0-27ac-4aca-b167-792c048ab7d2",
487
+ "metadata": {},
488
+ "outputs": [],
489
+ "source": [
490
+ "def extractNum(s):\n",
491
+ " num=0\n",
492
+ " i = 0\n",
493
+ " while s[i]>='0' and s[i]<='9':\n",
494
+ " num = num*10+ int(s[i])\n",
495
+ " i+=1\n",
496
+ " return num"
497
+ ]
498
+ },
499
+ {
500
+ "cell_type": "code",
501
+ "execution_count": 12,
502
+ "id": "1e83fd89-ed93-4aa2-9d7c-2ebd90f132be",
503
+ "metadata": {},
504
+ "outputs": [
505
+ {
506
+ "name": "stdout",
507
+ "output_type": "stream",
508
+ "text": [
509
+ "2\n"
510
+ ]
511
+ }
512
+ ],
513
+ "source": [
514
+ "print(extractNum(\"2 bhk\"))"
515
+ ]
516
+ },
517
+ {
518
+ "cell_type": "code",
519
+ "execution_count": 13,
520
+ "id": "9e70e370-1883-4c3e-8651-92bdc7e5c603",
521
+ "metadata": {},
522
+ "outputs": [],
523
+ "source": [
524
+ "df['size']=df['size'].apply(extractNum)"
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "code",
529
+ "execution_count": 14,
530
+ "id": "477c0c01-c8c2-4833-a317-ff9fa2bdabc5",
531
+ "metadata": {},
532
+ "outputs": [
533
+ {
534
+ "data": {
535
+ "text/plain": [
536
+ "array([ 2, 4, 3, 6, 1, 8, 7, 5, 11, 9, 27, 10, 19, 16, 43, 14, 12,\n",
537
+ " 13, 18], dtype=int64)"
538
+ ]
539
+ },
540
+ "execution_count": 14,
541
+ "metadata": {},
542
+ "output_type": "execute_result"
543
+ }
544
+ ],
545
+ "source": [
546
+ "df['size'].unique()"
547
+ ]
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "execution_count": 15,
552
+ "id": "3105a155-468d-4c85-a6a8-6dd3b27fb987",
553
+ "metadata": {},
554
+ "outputs": [],
555
+ "source": [
556
+ "df.to_csv('ygug.csv')"
557
+ ]
558
+ },
559
+ {
560
+ "cell_type": "code",
561
+ "execution_count": 16,
562
+ "id": "402344c6-aaed-4de4-9b13-ad5e77bc21bb",
563
+ "metadata": {},
564
+ "outputs": [
565
+ {
566
+ "data": {
567
+ "text/plain": [
568
+ "array(['1056', '2600', '1440', ..., '1133 - 1384', '774', '4689'],\n",
569
+ " dtype=object)"
570
+ ]
571
+ },
572
+ "execution_count": 16,
573
+ "metadata": {},
574
+ "output_type": "execute_result"
575
+ }
576
+ ],
577
+ "source": [
578
+ "df.total_sqft.unique()"
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "execution_count": 17,
584
+ "id": "ec23ac37-68ec-4ade-a189-c800fa06835c",
585
+ "metadata": {},
586
+ "outputs": [],
587
+ "source": [
588
+ "def rangeToMean(x):\n",
589
+ " try:\n",
590
+ " float(x)\n",
591
+ " except:\n",
592
+ " nums = x.split('-')\n",
593
+ " try:\n",
594
+ " float((float(nums[0])+float(nums[1]))/2)\n",
595
+ " except:\n",
596
+ " return None\n",
597
+ " return float((float(nums[0])+float(nums[1]))/2)\n",
598
+ " return float(x)"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "execution_count": 18,
604
+ "id": "a1de141c-81b3-4a1b-b664-025d2fdffc82",
605
+ "metadata": {},
606
+ "outputs": [],
607
+ "source": [
608
+ "df['total_sqft'] = df['total_sqft'].apply(rangeToMean)"
609
+ ]
610
+ },
611
+ {
612
+ "cell_type": "code",
613
+ "execution_count": 19,
614
+ "id": "3be0c853-0947-4b15-be72-187a708fc54c",
615
+ "metadata": {},
616
+ "outputs": [],
617
+ "source": [
618
+ "df.dropna(inplace=True)"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "execution_count": 20,
624
+ "id": "7d44c637-c9cd-4139-a7d2-57d227da687c",
625
+ "metadata": {},
626
+ "outputs": [
627
+ {
628
+ "data": {
629
+ "text/plain": [
630
+ "location False\n",
631
+ "size False\n",
632
+ "total_sqft False\n",
633
+ "bath False\n",
634
+ "price False\n",
635
+ "dtype: bool"
636
+ ]
637
+ },
638
+ "execution_count": 20,
639
+ "metadata": {},
640
+ "output_type": "execute_result"
641
+ }
642
+ ],
643
+ "source": [
644
+ "df.isna().any()"
645
+ ]
646
+ },
647
+ {
648
+ "cell_type": "code",
649
+ "execution_count": 21,
650
+ "id": "b03ac292-fea6-438c-82b0-7b9a29a0ea80",
651
+ "metadata": {},
652
+ "outputs": [
653
+ {
654
+ "data": {
655
+ "text/html": [
656
+ "<div>\n",
657
+ "<style scoped>\n",
658
+ " .dataframe tbody tr th:only-of-type {\n",
659
+ " vertical-align: middle;\n",
660
+ " }\n",
661
+ "\n",
662
+ " .dataframe tbody tr th {\n",
663
+ " vertical-align: top;\n",
664
+ " }\n",
665
+ "\n",
666
+ " .dataframe thead th {\n",
667
+ " text-align: right;\n",
668
+ " }\n",
669
+ "</style>\n",
670
+ "<table border=\"1\" class=\"dataframe\">\n",
671
+ " <thead>\n",
672
+ " <tr style=\"text-align: right;\">\n",
673
+ " <th></th>\n",
674
+ " <th>location</th>\n",
675
+ " <th>size</th>\n",
676
+ " <th>total_sqft</th>\n",
677
+ " <th>bath</th>\n",
678
+ " <th>price</th>\n",
679
+ " </tr>\n",
680
+ " </thead>\n",
681
+ " <tbody>\n",
682
+ " <tr>\n",
683
+ " <th>0</th>\n",
684
+ " <td>Electronic City Phase II</td>\n",
685
+ " <td>2</td>\n",
686
+ " <td>1056.0</td>\n",
687
+ " <td>2.0</td>\n",
688
+ " <td>39.07</td>\n",
689
+ " </tr>\n",
690
+ " <tr>\n",
691
+ " <th>1</th>\n",
692
+ " <td>Chikka Tirupathi</td>\n",
693
+ " <td>4</td>\n",
694
+ " <td>2600.0</td>\n",
695
+ " <td>5.0</td>\n",
696
+ " <td>120.00</td>\n",
697
+ " </tr>\n",
698
+ " <tr>\n",
699
+ " <th>2</th>\n",
700
+ " <td>Uttarahalli</td>\n",
701
+ " <td>3</td>\n",
702
+ " <td>1440.0</td>\n",
703
+ " <td>2.0</td>\n",
704
+ " <td>62.00</td>\n",
705
+ " </tr>\n",
706
+ " <tr>\n",
707
+ " <th>3</th>\n",
708
+ " <td>Lingadheeranahalli</td>\n",
709
+ " <td>3</td>\n",
710
+ " <td>1521.0</td>\n",
711
+ " <td>3.0</td>\n",
712
+ " <td>95.00</td>\n",
713
+ " </tr>\n",
714
+ " <tr>\n",
715
+ " <th>4</th>\n",
716
+ " <td>Kothanur</td>\n",
717
+ " <td>2</td>\n",
718
+ " <td>1200.0</td>\n",
719
+ " <td>2.0</td>\n",
720
+ " <td>51.00</td>\n",
721
+ " </tr>\n",
722
+ " </tbody>\n",
723
+ "</table>\n",
724
+ "</div>"
725
+ ],
726
+ "text/plain": [
727
+ " location size total_sqft bath price\n",
728
+ "0 Electronic City Phase II 2 1056.0 2.0 39.07\n",
729
+ "1 Chikka Tirupathi 4 2600.0 5.0 120.00\n",
730
+ "2 Uttarahalli 3 1440.0 2.0 62.00\n",
731
+ "3 Lingadheeranahalli 3 1521.0 3.0 95.00\n",
732
+ "4 Kothanur 2 1200.0 2.0 51.00"
733
+ ]
734
+ },
735
+ "execution_count": 21,
736
+ "metadata": {},
737
+ "output_type": "execute_result"
738
+ }
739
+ ],
740
+ "source": [
741
+ "df.head()"
742
+ ]
743
+ },
744
+ {
745
+ "cell_type": "code",
746
+ "execution_count": 22,
747
+ "id": "315a28ae-b560-4252-b3dc-3d926c0bcec2",
748
+ "metadata": {},
749
+ "outputs": [
750
+ {
751
+ "data": {
752
+ "text/html": [
753
+ "<div>\n",
754
+ "<style scoped>\n",
755
+ " .dataframe tbody tr th:only-of-type {\n",
756
+ " vertical-align: middle;\n",
757
+ " }\n",
758
+ "\n",
759
+ " .dataframe tbody tr th {\n",
760
+ " vertical-align: top;\n",
761
+ " }\n",
762
+ "\n",
763
+ " .dataframe thead th {\n",
764
+ " text-align: right;\n",
765
+ " }\n",
766
+ "</style>\n",
767
+ "<table border=\"1\" class=\"dataframe\">\n",
768
+ " <thead>\n",
769
+ " <tr style=\"text-align: right;\">\n",
770
+ " <th></th>\n",
771
+ " <th>location</th>\n",
772
+ " <th>size</th>\n",
773
+ " <th>total_sqft</th>\n",
774
+ " <th>bath</th>\n",
775
+ " <th>price</th>\n",
776
+ " <th>price_per_sqft</th>\n",
777
+ " </tr>\n",
778
+ " </thead>\n",
779
+ " <tbody>\n",
780
+ " <tr>\n",
781
+ " <th>0</th>\n",
782
+ " <td>Electronic City Phase II</td>\n",
783
+ " <td>2</td>\n",
784
+ " <td>1056.0</td>\n",
785
+ " <td>2.0</td>\n",
786
+ " <td>39.07</td>\n",
787
+ " <td>3699.810606</td>\n",
788
+ " </tr>\n",
789
+ " <tr>\n",
790
+ " <th>1</th>\n",
791
+ " <td>Chikka Tirupathi</td>\n",
792
+ " <td>4</td>\n",
793
+ " <td>2600.0</td>\n",
794
+ " <td>5.0</td>\n",
795
+ " <td>120.00</td>\n",
796
+ " <td>4615.384615</td>\n",
797
+ " </tr>\n",
798
+ " <tr>\n",
799
+ " <th>2</th>\n",
800
+ " <td>Uttarahalli</td>\n",
801
+ " <td>3</td>\n",
802
+ " <td>1440.0</td>\n",
803
+ " <td>2.0</td>\n",
804
+ " <td>62.00</td>\n",
805
+ " <td>4305.555556</td>\n",
806
+ " </tr>\n",
807
+ " <tr>\n",
808
+ " <th>3</th>\n",
809
+ " <td>Lingadheeranahalli</td>\n",
810
+ " <td>3</td>\n",
811
+ " <td>1521.0</td>\n",
812
+ " <td>3.0</td>\n",
813
+ " <td>95.00</td>\n",
814
+ " <td>6245.890861</td>\n",
815
+ " </tr>\n",
816
+ " <tr>\n",
817
+ " <th>4</th>\n",
818
+ " <td>Kothanur</td>\n",
819
+ " <td>2</td>\n",
820
+ " <td>1200.0</td>\n",
821
+ " <td>2.0</td>\n",
822
+ " <td>51.00</td>\n",
823
+ " <td>4250.000000</td>\n",
824
+ " </tr>\n",
825
+ " </tbody>\n",
826
+ "</table>\n",
827
+ "</div>"
828
+ ],
829
+ "text/plain": [
830
+ " location size total_sqft bath price price_per_sqft\n",
831
+ "0 Electronic City Phase II 2 1056.0 2.0 39.07 3699.810606\n",
832
+ "1 Chikka Tirupathi 4 2600.0 5.0 120.00 4615.384615\n",
833
+ "2 Uttarahalli 3 1440.0 2.0 62.00 4305.555556\n",
834
+ "3 Lingadheeranahalli 3 1521.0 3.0 95.00 6245.890861\n",
835
+ "4 Kothanur 2 1200.0 2.0 51.00 4250.000000"
836
+ ]
837
+ },
838
+ "execution_count": 22,
839
+ "metadata": {},
840
+ "output_type": "execute_result"
841
+ }
842
+ ],
843
+ "source": [
844
+ "df['price_per_sqft']=df['price']*100000/df['total_sqft']\n",
845
+ "df.head()"
846
+ ]
847
+ },
848
+ {
849
+ "cell_type": "code",
850
+ "execution_count": null,
851
+ "id": "6f5e4a78-29b9-4173-8c93-64a299b2bfff",
852
+ "metadata": {},
853
+ "outputs": [],
854
+ "source": []
855
+ },
856
+ {
857
+ "cell_type": "markdown",
858
+ "id": "2a629008-8a90-4270-8a64-eecf0c871221",
859
+ "metadata": {},
860
+ "source": [
861
+ "## Outlier Detection and Removal"
862
+ ]
863
+ },
864
+ {
865
+ "cell_type": "code",
866
+ "execution_count": 23,
867
+ "id": "ac9d1ebb-749c-4729-9dd5-f8cd12c074f5",
868
+ "metadata": {},
869
+ "outputs": [],
870
+ "source": [
871
+ "df = df[~(df['total_sqft']/df['size']<300)]"
872
+ ]
873
+ },
874
+ {
875
+ "cell_type": "code",
876
+ "execution_count": 24,
877
+ "id": "9a089a6a-0e86-4b15-9239-f4954326b8e0",
878
+ "metadata": {},
879
+ "outputs": [
880
+ {
881
+ "data": {
882
+ "text/plain": [
883
+ "count 12456.000000\n",
884
+ "mean 6308.502826\n",
885
+ "std 4168.127339\n",
886
+ "min 267.829813\n",
887
+ "25% 4210.526316\n",
888
+ "50% 5294.117647\n",
889
+ "75% 6916.666667\n",
890
+ "max 176470.588235\n",
891
+ "Name: price_per_sqft, dtype: float64"
892
+ ]
893
+ },
894
+ "execution_count": 24,
895
+ "metadata": {},
896
+ "output_type": "execute_result"
897
+ }
898
+ ],
899
+ "source": [
900
+ "df['price_per_sqft'].describe()"
901
+ ]
902
+ },
903
+ {
904
+ "cell_type": "code",
905
+ "execution_count": 25,
906
+ "id": "46907e8a-87ca-4cbb-bbc4-e37dae7a4a91",
907
+ "metadata": {},
908
+ "outputs": [],
909
+ "source": [
910
+ "# removing outliers for location-wise price_per_sqft \n",
911
+ "df.location=df.location.apply(lambda x: x.strip())\n",
912
+ "l= df.groupby('location')\n",
913
+ "new_df = pd.DataFrame()\n",
914
+ "for key, group in l:\n",
915
+ " if(len(group)>1):\n",
916
+ " m = np.mean(df['price_per_sqft'])\n",
917
+ " std = np.std(df['price_per_sqft'])\n",
918
+ " red_df = group[((group['price_per_sqft']>=(m-std)) & (group['price_per_sqft']<=(m+std)))]\n",
919
+ " new_df = pd.concat([new_df, red_df])\n",
920
+ "df = new_df"
921
+ ]
922
+ },
923
+ {
924
+ "cell_type": "code",
925
+ "execution_count": 26,
926
+ "id": "ad168cf9-2709-4331-8322-c7cb2cc42cc3",
927
+ "metadata": {},
928
+ "outputs": [
929
+ {
930
+ "data": {
931
+ "text/plain": [
932
+ "(10959, 6)"
933
+ ]
934
+ },
935
+ "execution_count": 26,
936
+ "metadata": {},
937
+ "output_type": "execute_result"
938
+ }
939
+ ],
940
+ "source": [
941
+ "df.shape"
942
+ ]
943
+ },
944
+ {
945
+ "cell_type": "code",
946
+ "execution_count": 27,
947
+ "id": "2493a43a-c464-41ac-b183-21a21e635acc",
948
+ "metadata": {},
949
+ "outputs": [
950
+ {
951
+ "data": {
952
+ "text/plain": [
953
+ "array([ 3, 1, 4, 2, 5, 6, 7, 8, 9, 16, 10], dtype=int64)"
954
+ ]
955
+ },
956
+ "execution_count": 27,
957
+ "metadata": {},
958
+ "output_type": "execute_result"
959
+ }
960
+ ],
961
+ "source": [
962
+ "df['size'].unique()"
963
+ ]
964
+ },
965
+ {
966
+ "cell_type": "code",
967
+ "execution_count": 28,
968
+ "id": "0bb99211-d1da-4556-8b5f-9703b58a3d03",
969
+ "metadata": {},
970
+ "outputs": [
971
+ {
972
+ "data": {
973
+ "image/png": "\n",
974
+ "text/plain": [
975
+ "<Figure size 432x288 with 1 Axes>"
976
+ ]
977
+ },
978
+ "metadata": {
979
+ "needs_background": "light"
980
+ },
981
+ "output_type": "display_data"
982
+ },
983
+ {
984
+ "data": {
985
+ "text/plain": [
986
+ "<Figure size 1080x360 with 0 Axes>"
987
+ ]
988
+ },
989
+ "metadata": {},
990
+ "output_type": "display_data"
991
+ }
992
+ ],
993
+ "source": [
994
+ "import matplotlib.pyplot as plt\n",
995
+ "plt.scatter(df[df['size']==2]['total_sqft'],df[df['size']==2]['price'],color='red',marker='+')\n",
996
+ "plt.scatter(df[df['size']==3]['total_sqft'],df[df['size']==3]['price'],color='green',marker='.')\n",
997
+ "plt.xlabel(\"total_sqft\")\n",
998
+ "plt.ylabel(\"Price\")\n",
999
+ "plt.figure(figsize=(15,5))\n",
1000
+ "plt.show()"
1001
+ ]
1002
+ },
1003
+ {
1004
+ "cell_type": "markdown",
1005
+ "id": "cca9fddd-d8fe-4de2-baf3-8cf3d0b1e7fd",
1006
+ "metadata": {},
1007
+ "source": [
1008
+ "## Visualisation with datashader"
1009
+ ]
1010
+ },
1011
+ {
1012
+ "cell_type": "code",
1013
+ "execution_count": 29,
1014
+ "id": "824fabbb-6e9b-45cc-98b0-e17d7df3993e",
1015
+ "metadata": {},
1016
+ "outputs": [
1017
+ {
1018
+ "name": "stderr",
1019
+ "output_type": "stream",
1020
+ "text": [
1021
+ "C:\\Users\\Abhay\\anaconda3\\lib\\site-packages\\dask\\dataframe\\utils.py:369: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
1022
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
1023
+ "C:\\Users\\Abhay\\anaconda3\\lib\\site-packages\\dask\\dataframe\\utils.py:369: FutureWarning: pandas.Float64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
1024
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n",
1025
+ "C:\\Users\\Abhay\\anaconda3\\lib\\site-packages\\dask\\dataframe\\utils.py:369: FutureWarning: pandas.UInt64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
1026
+ " _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)\n"
1027
+ ]
1028
+ },
1029
+ {
1030
+ "data": {
1031
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAADnCAYAAADl9EEgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAADeUlEQVR4nO3cMc5MYRSA4bnCBsQK7OBP9CqFHYhVWMKd3gpswRJYgkStVgmVjr+4GqIZV3Xc1+R5ypnkfNO8OckUZ9m27QT03Dn6BwCXiROixAlR4oQocULU3b0vz+ezv3Lhp3VdR+Zu27Zc+tzmhChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTojavb4H/DZ1fe9PbE6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEqKu6vjdxHe1fX1yDX2xOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVGHHPh6M3Q0yzEuronNCVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBB1yPW9h8vM3PcD1/du/rNLgS4QXg+bE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFq9/re1/M68uizoZt/r25n5k5wJY+/sTkhSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTonbv4L18PvPo29czc7/MjIVD2JwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUbvX95YX90YeffzhdmTug3cjY+EQNidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEqN0DX6dH30ce/fZpGZl7s64jc+EINidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcECVOiBInRIkTosQJUeKEKHFC1P71vc/3Rx59+nFk7OnJzFg4hM0JUeKEKHFClDghSpwQJU6IEidEiROixAlR4oQocUKUOCFKnBAlTogSJ0SJE6LECVHihChxQpQ4IUqcELVs23b0bwAusDkhSpwQJU6IEidEiROixAlRPwAfPC9vgQglngAAAABJRU5ErkJggg==\n",
1032
+ "text/plain": [
1033
+ "<Figure size 432x288 with 1 Axes>"
1034
+ ]
1035
+ },
1036
+ "metadata": {
1037
+ "needs_background": "light"
1038
+ },
1039
+ "output_type": "display_data"
1040
+ }
1041
+ ],
1042
+ "source": [
1043
+ "import datashader as ds\n",
1044
+ "import colorcet as cc\n",
1045
+ "bhk_2 = df[df['size']==2]\n",
1046
+ "x_r = [bhk_2['total_sqft'].min(),bhk_2['total_sqft'].max()]\n",
1047
+ "y_r = [bhk_2['price'].min(),bhk_2['price'].max()]\n",
1048
+ "cvs = ds.Canvas(plot_width=10, plot_height=10,x_range=x_r,y_range=y_r) # auto range or provide the `bounds` argument\n",
1049
+ "agg = cvs.points(bhk_2, 'total_sqft', 'price') # this is the histogram\n",
1050
+ "img = ds.tf.set_background(ds.tf.shade(agg, how=\"log\", cmap=cc.fire), \"grey\").to_pil() # create a rasterized image\n",
1051
+ "plt.imshow(img)\n",
1052
+ "plt.axis('off')\n",
1053
+ "plt.show()"
1054
+ ]
1055
+ },
1056
+ {
1057
+ "cell_type": "code",
1058
+ "execution_count": 30,
1059
+ "id": "3258accc-e592-4db1-9354-bdb81c1b0081",
1060
+ "metadata": {},
1061
+ "outputs": [],
1062
+ "source": [
1063
+ "def bhk_outlier(d,location):\n",
1064
+ " plt.title(location)\n",
1065
+ " plt.xlabel(\"total Sqft Area\")\n",
1066
+ " plt.ylabel(\"Price\")\n",
1067
+ " d_loc = d[d['location']==location]\n",
1068
+ " bhk_2 = d_loc[d_loc['size']==2]\n",
1069
+ " bhk_3 = d_loc[d_loc['size']==3]\n",
1070
+ " plt.scatter(bhk_2['total_sqft'],bhk_2['price'],marker='+',label='2 bhk',color='green')\n",
1071
+ " plt.scatter(bhk_3['total_sqft'],bhk_3['price'],marker='.',label='3_bhk',color='blue')\n",
1072
+ " plt.legend()\n",
1073
+ " plt.show()"
1074
+ ]
1075
+ },
1076
+ {
1077
+ "cell_type": "code",
1078
+ "execution_count": 31,
1079
+ "id": "dea56f46-e3b4-448b-bbc4-7f9cfaf4a10f",
1080
+ "metadata": {},
1081
+ "outputs": [],
1082
+ "source": [
1083
+ "# unique_locations = df.location.unique()\n",
1084
+ "# for location in unique_locations:\n",
1085
+ "# bhk_outlier(df,location)\n",
1086
+ "# print('\\n\\n\\n')"
1087
+ ]
1088
+ },
1089
+ {
1090
+ "cell_type": "code",
1091
+ "execution_count": 32,
1092
+ "id": "f96c2b1e-7efe-4d5a-8692-289d76e7aeab",
1093
+ "metadata": {},
1094
+ "outputs": [
1095
+ {
1096
+ "data": {
1097
+ "text/html": [
1098
+ "<div>\n",
1099
+ "<style scoped>\n",
1100
+ " .dataframe tbody tr th:only-of-type {\n",
1101
+ " vertical-align: middle;\n",
1102
+ " }\n",
1103
+ "\n",
1104
+ " .dataframe tbody tr th {\n",
1105
+ " vertical-align: top;\n",
1106
+ " }\n",
1107
+ "\n",
1108
+ " .dataframe thead th {\n",
1109
+ " text-align: right;\n",
1110
+ " }\n",
1111
+ "</style>\n",
1112
+ "<table border=\"1\" class=\"dataframe\">\n",
1113
+ " <thead>\n",
1114
+ " <tr style=\"text-align: right;\">\n",
1115
+ " <th></th>\n",
1116
+ " <th>location</th>\n",
1117
+ " <th>size</th>\n",
1118
+ " <th>total_sqft</th>\n",
1119
+ " <th>bath</th>\n",
1120
+ " <th>price</th>\n",
1121
+ " <th>price_per_sqft</th>\n",
1122
+ " </tr>\n",
1123
+ " </thead>\n",
1124
+ " <tbody>\n",
1125
+ " <tr>\n",
1126
+ " <th>5610</th>\n",
1127
+ " <td>1st Block BEL Layout</td>\n",
1128
+ " <td>3</td>\n",
1129
+ " <td>1540.0</td>\n",
1130
+ " <td>3.0</td>\n",
1131
+ " <td>85.0</td>\n",
1132
+ " <td>5519.480519</td>\n",
1133
+ " </tr>\n",
1134
+ " <tr>\n",
1135
+ " <th>8470</th>\n",
1136
+ " <td>1st Block HBR Layout</td>\n",
1137
+ " <td>1</td>\n",
1138
+ " <td>600.0</td>\n",
1139
+ " <td>1.0</td>\n",
1140
+ " <td>45.0</td>\n",
1141
+ " <td>7500.000000</td>\n",
1142
+ " </tr>\n",
1143
+ " <tr>\n",
1144
+ " <th>12564</th>\n",
1145
+ " <td>1st Block HBR Layout</td>\n",
1146
+ " <td>4</td>\n",
1147
+ " <td>3150.0</td>\n",
1148
+ " <td>4.0</td>\n",
1149
+ " <td>150.0</td>\n",
1150
+ " <td>4761.904762</td>\n",
1151
+ " </tr>\n",
1152
+ " <tr>\n",
1153
+ " <th>2308</th>\n",
1154
+ " <td>1st Block HRBR Layout</td>\n",
1155
+ " <td>3</td>\n",
1156
+ " <td>2300.0</td>\n",
1157
+ " <td>3.0</td>\n",
1158
+ " <td>80.0</td>\n",
1159
+ " <td>3478.260870</td>\n",
1160
+ " </tr>\n",
1161
+ " <tr>\n",
1162
+ " <th>7715</th>\n",
1163
+ " <td>1st Block HRBR Layout</td>\n",
1164
+ " <td>2</td>\n",
1165
+ " <td>1250.0</td>\n",
1166
+ " <td>2.0</td>\n",
1167
+ " <td>67.0</td>\n",
1168
+ " <td>5360.000000</td>\n",
1169
+ " </tr>\n",
1170
+ " </tbody>\n",
1171
+ "</table>\n",
1172
+ "</div>"
1173
+ ],
1174
+ "text/plain": [
1175
+ " location size total_sqft bath price price_per_sqft\n",
1176
+ "5610 1st Block BEL Layout 3 1540.0 3.0 85.0 5519.480519\n",
1177
+ "8470 1st Block HBR Layout 1 600.0 1.0 45.0 7500.000000\n",
1178
+ "12564 1st Block HBR Layout 4 3150.0 4.0 150.0 4761.904762\n",
1179
+ "2308 1st Block HRBR Layout 3 2300.0 3.0 80.0 3478.260870\n",
1180
+ "7715 1st Block HRBR Layout 2 1250.0 2.0 67.0 5360.000000"
1181
+ ]
1182
+ },
1183
+ "execution_count": 32,
1184
+ "metadata": {},
1185
+ "output_type": "execute_result"
1186
+ }
1187
+ ],
1188
+ "source": [
1189
+ "df.head()"
1190
+ ]
1191
+ },
1192
+ {
1193
+ "cell_type": "code",
1194
+ "execution_count": 33,
1195
+ "id": "394d151c-c806-4d0a-be81-f91f8e5f5ead",
1196
+ "metadata": {},
1197
+ "outputs": [],
1198
+ "source": [
1199
+ "def bhk_outlier_remover():\n",
1200
+ " exclude_indices = np.array([])\n",
1201
+ " for loc,loc_df in df.groupby('location'):\n",
1202
+ " # creating stats {mean,count,std} for each bhk\n",
1203
+ " stats = {}\n",
1204
+ " bhks = loc_df.groupby('size')\n",
1205
+ " for bhk,bhk_group in bhks:\n",
1206
+ " stats[bhk] = {\n",
1207
+ " 'mean':np.mean(bhk_group['price_per_sqft']) ,\n",
1208
+ " 'std': np.std(bhk_group['price_per_sqft']),\n",
1209
+ " 'count':bhk_group.shape[0]\n",
1210
+ " }\n",
1211
+ "# print(stats)\n",
1212
+ " # adding indexes coressponding to rows where price_per_sqft of bhk_n is less than mean of price_per_sqft of bhk_n-1 into exclude_indices\n",
1213
+ " for bhk,bhk_group in bhks:\n",
1214
+ " stats_prev = stats.get(bhk-1)\n",
1215
+ " if stats_prev and stats_prev['count']>5:\n",
1216
+ " index_to_del = bhk_group[bhk_group['price_per_sqft'] < (stats_prev['mean'])].index.values\n",
1217
+ " exclude_indices = np.append(exclude_indices,index_to_del)\n",
1218
+ " return df.drop(exclude_indices)"
1219
+ ]
1220
+ },
1221
+ {
1222
+ "cell_type": "code",
1223
+ "execution_count": 34,
1224
+ "id": "c2bf8e0f-6e97-4a4e-8a71-1334a0f88efd",
1225
+ "metadata": {},
1226
+ "outputs": [],
1227
+ "source": [
1228
+ "df2 = bhk_outlier_remover()"
1229
+ ]
1230
+ },
1231
+ {
1232
+ "cell_type": "code",
1233
+ "execution_count": 35,
1234
+ "id": "63c7ebf2-a16b-4be3-bd58-b87890b5354b",
1235
+ "metadata": {},
1236
+ "outputs": [
1237
+ {
1238
+ "data": {
1239
+ "text/plain": [
1240
+ "(8764, 6)"
1241
+ ]
1242
+ },
1243
+ "execution_count": 35,
1244
+ "metadata": {},
1245
+ "output_type": "execute_result"
1246
+ }
1247
+ ],
1248
+ "source": [
1249
+ "df2.shape"
1250
+ ]
1251
+ },
1252
+ {
1253
+ "cell_type": "code",
1254
+ "execution_count": 36,
1255
+ "id": "9d0156ce-912a-406f-83f7-7288e9ab83a4",
1256
+ "metadata": {},
1257
+ "outputs": [
1258
+ {
1259
+ "data": {
1260
+ "text/plain": [
1261
+ "(10959, 6)"
1262
+ ]
1263
+ },
1264
+ "execution_count": 36,
1265
+ "metadata": {},
1266
+ "output_type": "execute_result"
1267
+ }
1268
+ ],
1269
+ "source": [
1270
+ "df.shape"
1271
+ ]
1272
+ },
1273
+ {
1274
+ "cell_type": "code",
1275
+ "execution_count": 37,
1276
+ "id": "95ebb8af-8b2f-45c8-a82b-887a57ec6a08",
1277
+ "metadata": {
1278
+ "tags": []
1279
+ },
1280
+ "outputs": [],
1281
+ "source": [
1282
+ "# df.location =df.location.apply(lambda x: x.strip())\n",
1283
+ "# location_stats = df.groupby('location')['location'].agg('count')\n",
1284
+ "# location_stats"
1285
+ ]
1286
+ },
1287
+ {
1288
+ "cell_type": "code",
1289
+ "execution_count": 38,
1290
+ "id": "18a3b81b-487c-4fc9-b010-aae5a63f0e4c",
1291
+ "metadata": {},
1292
+ "outputs": [
1293
+ {
1294
+ "data": {
1295
+ "text/plain": [
1296
+ "752"
1297
+ ]
1298
+ },
1299
+ "execution_count": 38,
1300
+ "metadata": {},
1301
+ "output_type": "execute_result"
1302
+ }
1303
+ ],
1304
+ "source": [
1305
+ "len(df2.location.unique())"
1306
+ ]
1307
+ },
1308
+ {
1309
+ "cell_type": "markdown",
1310
+ "id": "61531f21-5d53-48d5-a2ed-b73c127d77f4",
1311
+ "metadata": {},
1312
+ "source": [
1313
+ "### model training"
1314
+ ]
1315
+ },
1316
+ {
1317
+ "cell_type": "code",
1318
+ "execution_count": 39,
1319
+ "id": "eb533a5c-70dc-4735-821d-4f35fc65a0bd",
1320
+ "metadata": {},
1321
+ "outputs": [],
1322
+ "source": [
1323
+ "df2.drop('price_per_sqft',axis=1,inplace=True)"
1324
+ ]
1325
+ },
1326
+ {
1327
+ "cell_type": "code",
1328
+ "execution_count": 40,
1329
+ "id": "b272fca5-4feb-4bd0-9caa-dec1cc875626",
1330
+ "metadata": {},
1331
+ "outputs": [
1332
+ {
1333
+ "data": {
1334
+ "text/html": [
1335
+ "<div>\n",
1336
+ "<style scoped>\n",
1337
+ " .dataframe tbody tr th:only-of-type {\n",
1338
+ " vertical-align: middle;\n",
1339
+ " }\n",
1340
+ "\n",
1341
+ " .dataframe tbody tr th {\n",
1342
+ " vertical-align: top;\n",
1343
+ " }\n",
1344
+ "\n",
1345
+ " .dataframe thead th {\n",
1346
+ " text-align: right;\n",
1347
+ " }\n",
1348
+ "</style>\n",
1349
+ "<table border=\"1\" class=\"dataframe\">\n",
1350
+ " <thead>\n",
1351
+ " <tr style=\"text-align: right;\">\n",
1352
+ " <th></th>\n",
1353
+ " <th>location</th>\n",
1354
+ " <th>size</th>\n",
1355
+ " <th>total_sqft</th>\n",
1356
+ " <th>bath</th>\n",
1357
+ " <th>price</th>\n",
1358
+ " </tr>\n",
1359
+ " </thead>\n",
1360
+ " <tbody>\n",
1361
+ " <tr>\n",
1362
+ " <th>5610</th>\n",
1363
+ " <td>1st Block BEL Layout</td>\n",
1364
+ " <td>3</td>\n",
1365
+ " <td>1540.0</td>\n",
1366
+ " <td>3.0</td>\n",
1367
+ " <td>85.0</td>\n",
1368
+ " </tr>\n",
1369
+ " <tr>\n",
1370
+ " <th>8470</th>\n",
1371
+ " <td>1st Block HBR Layout</td>\n",
1372
+ " <td>1</td>\n",
1373
+ " <td>600.0</td>\n",
1374
+ " <td>1.0</td>\n",
1375
+ " <td>45.0</td>\n",
1376
+ " </tr>\n",
1377
+ " <tr>\n",
1378
+ " <th>12564</th>\n",
1379
+ " <td>1st Block HBR Layout</td>\n",
1380
+ " <td>4</td>\n",
1381
+ " <td>3150.0</td>\n",
1382
+ " <td>4.0</td>\n",
1383
+ " <td>150.0</td>\n",
1384
+ " </tr>\n",
1385
+ " <tr>\n",
1386
+ " <th>2308</th>\n",
1387
+ " <td>1st Block HRBR Layout</td>\n",
1388
+ " <td>3</td>\n",
1389
+ " <td>2300.0</td>\n",
1390
+ " <td>3.0</td>\n",
1391
+ " <td>80.0</td>\n",
1392
+ " </tr>\n",
1393
+ " <tr>\n",
1394
+ " <th>7715</th>\n",
1395
+ " <td>1st Block HRBR Layout</td>\n",
1396
+ " <td>2</td>\n",
1397
+ " <td>1250.0</td>\n",
1398
+ " <td>2.0</td>\n",
1399
+ " <td>67.0</td>\n",
1400
+ " </tr>\n",
1401
+ " </tbody>\n",
1402
+ "</table>\n",
1403
+ "</div>"
1404
+ ],
1405
+ "text/plain": [
1406
+ " location size total_sqft bath price\n",
1407
+ "5610 1st Block BEL Layout 3 1540.0 3.0 85.0\n",
1408
+ "8470 1st Block HBR Layout 1 600.0 1.0 45.0\n",
1409
+ "12564 1st Block HBR Layout 4 3150.0 4.0 150.0\n",
1410
+ "2308 1st Block HRBR Layout 3 2300.0 3.0 80.0\n",
1411
+ "7715 1st Block HRBR Layout 2 1250.0 2.0 67.0"
1412
+ ]
1413
+ },
1414
+ "execution_count": 40,
1415
+ "metadata": {},
1416
+ "output_type": "execute_result"
1417
+ }
1418
+ ],
1419
+ "source": [
1420
+ "df2.head()"
1421
+ ]
1422
+ },
1423
+ {
1424
+ "cell_type": "code",
1425
+ "execution_count": 41,
1426
+ "id": "2a2ec5e7-d63a-463c-a447-5f0b6ef818b7",
1427
+ "metadata": {},
1428
+ "outputs": [],
1429
+ "source": [
1430
+ "from sklearn.preprocessing import OneHotEncoder"
1431
+ ]
1432
+ },
1433
+ {
1434
+ "cell_type": "code",
1435
+ "execution_count": 42,
1436
+ "id": "03a87d19-bd9a-46bf-a1fb-80d58250e54e",
1437
+ "metadata": {},
1438
+ "outputs": [],
1439
+ "source": [
1440
+ "ohe = OneHotEncoder()"
1441
+ ]
1442
+ },
1443
+ {
1444
+ "cell_type": "code",
1445
+ "execution_count": 43,
1446
+ "id": "c391eb23-71b2-4dd0-9bbd-7d067ddbe92e",
1447
+ "metadata": {},
1448
+ "outputs": [
1449
+ {
1450
+ "data": {
1451
+ "text/plain": [
1452
+ "OneHotEncoder()"
1453
+ ]
1454
+ },
1455
+ "execution_count": 43,
1456
+ "metadata": {},
1457
+ "output_type": "execute_result"
1458
+ }
1459
+ ],
1460
+ "source": [
1461
+ "ohe.fit(df2[['location']])"
1462
+ ]
1463
+ },
1464
+ {
1465
+ "cell_type": "code",
1466
+ "execution_count": 44,
1467
+ "id": "1f2d6a85-6d02-43be-a513-f1b9fa5b6258",
1468
+ "metadata": {},
1469
+ "outputs": [
1470
+ {
1471
+ "data": {
1472
+ "text/plain": [
1473
+ "(8764, 752)"
1474
+ ]
1475
+ },
1476
+ "execution_count": 44,
1477
+ "metadata": {},
1478
+ "output_type": "execute_result"
1479
+ }
1480
+ ],
1481
+ "source": [
1482
+ "location_encoding = ohe.transform(df2[['location']]).toarray()\n",
1483
+ "location_encoding.shape"
1484
+ ]
1485
+ },
1486
+ {
1487
+ "cell_type": "code",
1488
+ "execution_count": 45,
1489
+ "id": "04a57321-7031-465e-a507-8610abdb3b82",
1490
+ "metadata": {},
1491
+ "outputs": [
1492
+ {
1493
+ "data": {
1494
+ "text/plain": [
1495
+ "[array(['1st Block BEL Layout', '1st Block HBR Layout',\n",
1496
+ " '1st Block HRBR Layout', '1st Block Jayanagar',\n",
1497
+ " '1st Block Koramangala', '1st Phase JP Nagar',\n",
1498
+ " '1st Stage Indira Nagar', '2nd Block Bel Layout',\n",
1499
+ " '2nd Block Hrbr Layout', '2nd Block Jayanagar',\n",
1500
+ " '2nd Phase JP Nagar', '2nd Phase Judicial Layout',\n",
1501
+ " '2nd Stage Arekere Mico Layout', '2nd Stage Nagarbhavi',\n",
1502
+ " '3rd Block Banashankari', '3rd Block Hrbr Layout',\n",
1503
+ " '3rd Block Jayanagar', '3rd Block Koramangala',\n",
1504
+ " '3rd Phase JP Nagar', '4th Block Jayanagar',\n",
1505
+ " '4th Block Koramangala', '4th Phase JP Nagar',\n",
1506
+ " '4th T block Jayanagar', '5th Block Hbr Layout',\n",
1507
+ " '5th Phase JP Nagar', '5th Stage BEML Layout',\n",
1508
+ " '6th Phase JP Nagar', '6th block Koramangala',\n",
1509
+ " '7th Block Jayanagar', '7th Phase JP Nagar', '8th Block Jayanagar',\n",
1510
+ " '8th Phase JP Nagar', '8th block Koramangala',\n",
1511
+ " '9th Phase JP Nagar', 'A Narayanapura', 'AECS Layout',\n",
1512
+ " 'AGS Layout', 'AMS Layout', 'Abbaiah Reddy Layout', 'Abbigere',\n",
1513
+ " 'Adityanagar', 'Agrahara Dasarahalli', 'Aishwarya Crystal Layout',\n",
1514
+ " 'Akshaya Nagar', 'Akshaya Vana', 'Akshayanagara East',\n",
1515
+ " 'Akshayanagara West', 'Akshya Nagar', 'Alfa Garden Layout', 'Alur',\n",
1516
+ " 'Amam Enclave Layout', 'Amarjyothi Colony', 'Ambalipura',\n",
1517
+ " 'Ambedkar Colony', 'Ambedkar Nagar', 'Amblipura', 'Amruthahalli',\n",
1518
+ " 'Amruthnagar', 'Anand Nagar', 'Anand nagar', 'Anandapura',\n",
1519
+ " 'Anantapura', 'Ananth Nagar', 'Anekal', 'Anjanapura',\n",
1520
+ " 'Anjappa Layout', 'Ankappa Layout', 'Annaiah Reddy Layout',\n",
1521
+ " 'Annapurneshwari Nagar', 'Anugrah Layout', 'Anwar Layout',\n",
1522
+ " 'Ardendale', 'Arehalli', 'Arekere', 'Ashirvad Colony',\n",
1523
+ " 'Ashok Nagar', 'Ashwath Nagar', 'Ashwathnagar', 'Ashwini layout',\n",
1524
+ " 'Atmananda Colony', 'Attibele', 'Attur Layout', 'Austin Town',\n",
1525
+ " 'Avalahalli', 'Ayappa Nagar', 'B Channasandra', 'B Narayanapura',\n",
1526
+ " 'BCC Layout', 'BCMC Layout', 'BEL Road', 'BEML Layout',\n",
1527
+ " 'BHEL Layout', 'BSM Extension', 'BTM 1st Stage', 'BTM 2nd Stage',\n",
1528
+ " 'BTM 4th Stage', 'BTM Layout', 'Baba Nagar', 'Babusapalaya',\n",
1529
+ " 'Badavala Nagar', 'Bagalakunte', 'Bagalur', 'Bagalur Main Road',\n",
1530
+ " 'Balagere', 'Balaji Gardens Layout', 'Banagiri Nagar',\n",
1531
+ " 'Banashankari', 'Banashankari Stage II', 'Banashankari Stage III',\n",
1532
+ " 'Banashankari Stage V', 'Banashankari Stage VI', 'Banaswadi',\n",
1533
+ " 'Banjara Layout', 'Bank Of Baroda Colony', 'Bannerghatta',\n",
1534
+ " 'Bannerghatta Road', 'Basapura', 'Basava Nagar', 'Basavanagara',\n",
1535
+ " 'Basavanapura', 'Basavangudi', 'Basavanna Nagar',\n",
1536
+ " 'Basaveshwara Nagar', 'Basaveshwara Nagar Yelahanka',\n",
1537
+ " 'Battarahalli', 'Begur', 'Begur Road', 'Belathur', 'Belatur',\n",
1538
+ " 'Bellandur', 'Bellari Road', 'Bendiganahalli', 'Benson Town',\n",
1539
+ " 'Bethel Nagar', 'Bettahalsoor', 'Bhagyalakshmi Avenue',\n",
1540
+ " 'Bharathi Nagar', 'Bhoganhalli', 'Bhoopsandra',\n",
1541
+ " 'Bhuvaneshwari Nagar', 'Bhuvaneswari Nagar', 'Bidadi',\n",
1542
+ " 'Bidrahalli', 'Bikasipura', 'Bileshivale', 'Billekahalli',\n",
1543
+ " 'Binny Pete', 'Bisuvanahalli', 'Bommanahalli', 'Bommasandra',\n",
1544
+ " 'Bommasandra Industrial Area', 'Bommenahalli', 'Brindavan Layout',\n",
1545
+ " 'Brindavan Nagar', 'Brooke Bond First Cross', 'Brookefield',\n",
1546
+ " 'Budigere', 'Byadarahalli', 'Byagadadhenahalli', 'Byatarayanapura',\n",
1547
+ " 'Byrasandra', 'Byrathi Village', 'CQAL Layout', 'CV Raman Nagar',\n",
1548
+ " 'Cambridge Layout', 'Canara Bank Colony', 'Canara Bank Layout',\n",
1549
+ " 'Carmelaram', 'Celebrity Paradise Layout', 'Challaghatta',\n",
1550
+ " 'Chamrajpet', 'Chamundi Nagar', 'Chandapura', 'Chandra Layout',\n",
1551
+ " 'Channasandra', 'Channasandra Layout', 'Chelekare',\n",
1552
+ " 'Chennammana Kere', 'Chennammanakere Achukattu',\n",
1553
+ " 'Chennappa Layout', 'Chikka Banaswadi', 'Chikka Tirupathi',\n",
1554
+ " 'Chikkabanavar', 'Chikkadunnasandra', 'Chikkakannalli',\n",
1555
+ " 'Chikkalasandra', 'Chikkasandra', 'Chikkathoguru',\n",
1556
+ " 'Chinnapanahalli', 'Chokkanahalli', 'Cholanayakanahalli',\n",
1557
+ " 'Choodasandra', 'Classic Paradise Layout', 'Cleveland Town',\n",
1558
+ " 'Coconut Grove Layout', 'Coffee Board Layout', 'Cooke Town',\n",
1559
+ " 'Cottonpet', 'Cox Town', 'Crimson Layout',\n",
1560
+ " 'D Group Employees Layout', 'Daadys Gaarden Layout',\n",
1561
+ " 'Dairy Circle', 'Dasanapura', 'Dasarahalli', 'Dena Bank Colony',\n",
1562
+ " 'Devanahalli', 'Devanahalli Int. Airport', 'Devarabeesana Halli',\n",
1563
+ " 'Devarachikkanahalli', 'Devasthanagalu', 'Devi Nagar', 'Dinnur',\n",
1564
+ " 'Divya Unnathi Layout', 'Doctors Layout', 'Dodda Banaswadi',\n",
1565
+ " 'Dodda Kempaiah Layout', 'Dodda Nekkundi',\n",
1566
+ " 'Dodda Nekkundi Extension', 'Doddaballapur', 'Doddabanahalli',\n",
1567
+ " 'Doddabidrakallu', 'Doddabommasandra', 'Doddagubbi',\n",
1568
+ " 'Doddakallasandra', 'Doddakammanahalli', 'Doddakannelli',\n",
1569
+ " 'Doddanakundi Industrial Area 2', 'Doddanekundi', 'Doddathoguru',\n",
1570
+ " 'Dodsworth Layout', 'Dollar Scheme Colony', 'Dollars Colony',\n",
1571
+ " 'Dollars Layout', 'Domlur', 'Domlur Layout', 'Dommasandra',\n",
1572
+ " 'Doopanahalli', 'Dooravani Nagar', 'Dr Shivarama Karantha Nagar',\n",
1573
+ " 'Dwarka Nagar', 'ECC Road, Whitefield,', 'EPIP Zone', 'Ejipura',\n",
1574
+ " 'Electronic City', 'Electronic City Phase II',\n",
1575
+ " 'Electronic city Phase 1,', 'Electronics City Phase 1',\n",
1576
+ " 'Esther Enclave Layout', 'Ferrar Nagar', 'Frazer Town',\n",
1577
+ " 'Friends Colony', 'GD Layout', 'GM Palaya', 'Ganesha Block',\n",
1578
+ " 'Ganga Nagar', 'Garebhavipalya', 'Garudachar Palya', 'Gattahalli',\n",
1579
+ " 'Gaurava Nagar', 'Geddalahalli', 'Giri Nagar', 'Gkvk Layout',\n",
1580
+ " 'Glass Factory Layout', 'Gnana Bharathi', 'Gokula Extension',\n",
1581
+ " 'Gollahalli', 'Gollarapalya Hosahalli', 'Gopalapura',\n",
1582
+ " 'Gopalkrishna Nagar', 'Gottigere', 'Govindapura',\n",
1583
+ " 'Govindaraja Nagar Ward', 'Govindpura', 'Gowdanapalya',\n",
1584
+ " 'Green Domain Layout', 'Green Garden Layout', 'Green Glen Layout',\n",
1585
+ " 'Green View Layout', 'Green Woods Layout', 'Gubbalala',\n",
1586
+ " 'Guddadahalli', 'Gulimangala', 'Gunjur', 'Gunjur Palya',\n",
1587
+ " 'HAL 2nd Stage', 'HAL 3rd Stage', 'HBR Layout', 'HMT Layout',\n",
1588
+ " 'HOSUR MAIN ROAD', 'HRBR Layout', 'HSR Layout', 'Hadosiddapura',\n",
1589
+ " 'Hagadur', 'Hallehalli', 'Hanumanth Nagar', 'Hanumantha Nagar',\n",
1590
+ " 'Haralur Road', 'Harappanahalli', 'Harlur', 'Harsha Layout',\n",
1591
+ " 'Hebbal', 'Hebbal Kempapura', 'Hegde Nagar', 'Hegganahalli',\n",
1592
+ " 'Hennur', 'Hennur Bande', 'Hennur Gardens', 'Hennur Road',\n",
1593
+ " 'Herohalli', 'Hessarghatta', 'Himagiri Meadows', 'Hiremath Layout',\n",
1594
+ " 'Hongasandra', 'Hoodi', 'Hoodi Circle,', 'Hoodi Layout',\n",
1595
+ " 'Horamavu Agara', 'Horamavu Banaswadi', 'Hormavu', 'Hosa Road',\n",
1596
+ " 'Hosahalli Extension', 'Hosakerehalli', 'Hosakerehalli Layout',\n",
1597
+ " 'Hosapalya', 'Hoskote', 'Hosur Road', 'Hoysalanagar', 'Hulimavu',\n",
1598
+ " 'Huskur', 'ISRO Layout', 'ITI Layout', 'ITPL', 'Iblur Village',\n",
1599
+ " 'Immadihalli', 'Indira Nagar', 'Ittamadu', 'J C Nagar',\n",
1600
+ " 'JCR Layout', 'JP Nagar', 'JP Nagar 7th Phase,',\n",
1601
+ " 'JP Nagar 8th Phase,', 'Jai Bheema Nagar', 'Jakkasandra Extension',\n",
1602
+ " 'Jakkur', 'Jakkur Plantation', 'Jakkuru Layout', 'Jalahalli',\n",
1603
+ " 'Jalahalli East', 'Jalahalli West', 'Janatha Colony',\n",
1604
+ " 'Jaya Mahal layout', 'Jayamahal', 'Jayanagar', 'Jayanti Nagar',\n",
1605
+ " 'Jeevan bima nagar', 'Jigani', 'Jinkethimmanahalli',\n",
1606
+ " 'Jnana Ganga Nagar', 'Jnanabharathi Layout', 'Judicial Layout',\n",
1607
+ " 'Judicial Layout, Kanakapura Road,', 'Jyothi Nagar', 'KEB Colony',\n",
1608
+ " 'KR Garden', 'KR Layout', 'KR Puram', 'KSRTC Layout',\n",
1609
+ " 'KUDLU MAIN ROAD', 'Kachanayakanahalli', 'Kacharakanahalli',\n",
1610
+ " 'Kada Agrahara', 'Kadabagere', 'Kadubeesanahalli', 'Kadugodi',\n",
1611
+ " 'Kadugondanahalli', 'Kaggadasapura', 'Kaggalipura',\n",
1612
+ " 'Kaikondrahalli', 'Kalena Agrahara', 'Kalkere', 'Kallumantapa',\n",
1613
+ " 'Kalyan nagar', 'Kamakshipalya', 'Kamakya Layout', 'Kamala Nagar',\n",
1614
+ " 'Kambipura', 'Kammagondahalli', 'Kammanahalli', 'Kammasandra',\n",
1615
+ " 'Kanaka Nagar', 'Kanakapura', 'Kanakpura Road', 'Kannamangala',\n",
1616
+ " 'Kariyammana Agrahara', 'Karuna Nagar', 'Kasavanhalli',\n",
1617
+ " 'Kashi Nagar', 'Kasturi Nagar', 'Kathriguppe', 'Kattigenahalli',\n",
1618
+ " 'Kaval Byrasandra', 'Kaverappa Layout', 'Kaveri Nagar',\n",
1619
+ " 'Kempapura', 'Kempegowda Nagar', 'Kenchenahalli', 'Kenchenhalli',\n",
1620
+ " 'Kengeri', 'Kengeri Hobli', 'Kengeri Satellite Town',\n",
1621
+ " 'Kereguddadahalli', 'Keshava Nagar', 'Kirloskar Layout',\n",
1622
+ " 'Kithaganur', 'Kodathi', 'Kodbisanhalli', 'Kodichikkanahalli',\n",
1623
+ " 'Kodigehaali', 'Kodigehalli', 'Kodihalli', 'Kodipalya', 'Kogilu',\n",
1624
+ " 'Konanakunte', 'Konanakunte Cross', 'Konena Agrahara',\n",
1625
+ " 'Koramangala', 'Koramangala Industrial Layout', 'Kothannur',\n",
1626
+ " 'Kothanur', 'Kothnoor Dinne', 'Krishna Reddy Layout', 'Kudlu',\n",
1627
+ " 'Kudlu Gate', 'Kullappa Colony', 'Kumara Park', 'Kumarapalli',\n",
1628
+ " 'Kumaraswami Layout', 'Kumbena Agrahara', 'Kundalahalli',\n",
1629
+ " 'Kundalahalli Colony', 'Kurubarahalli', 'Kuvempu Nagar',\n",
1630
+ " 'Kyalasanahalli', 'LB Shastri Nagar', 'Laggere', 'Lake City',\n",
1631
+ " 'Lakshmiamma Garden', 'Lakshminarayana Pura', 'Lakshmipura',\n",
1632
+ " 'Lal Bahadur Shastri Nagar', 'Langford Town', 'Lavakusha Nagar',\n",
1633
+ " 'Laxmi Sagar Layout', 'Lingadheeranahalli', 'Lingarajapuram',\n",
1634
+ " 'Lottegolla Halli', 'MCECHS layout', 'MLA Layout', 'MS Pallya',\n",
1635
+ " 'Madiwala', 'Magadi Road', 'Mahadevpura', 'Mahaganapathy Nagar',\n",
1636
+ " 'Mahalakshmi Layout', 'Mahalakshmi Puram', 'Maithri Layout',\n",
1637
+ " 'Makali', 'Mallasandra', 'Mallathahalli', 'Malleshpalya',\n",
1638
+ " 'Malleshwaram', 'Manayata Tech Park', 'Mangammanapalya',\n",
1639
+ " 'Manjunatha Layout', 'Manorayana Palya', 'Maragondanahalli',\n",
1640
+ " 'Marasandra', 'Marathahalli', 'Marenahalli', 'Margondanahalli',\n",
1641
+ " 'Mariyannapalya', 'Marsur', 'Maruthi Nagar', 'Maruthi Sevanagar',\n",
1642
+ " 'Mathikere', 'Mathikere Extension', 'Medahalli', 'Meenakunte',\n",
1643
+ " 'Mico Layout', 'Moodalapalya', 'Motappa Layout',\n",
1644
+ " 'Muneshwara Nagar', 'Munivenkatppa Layout', 'Munnekollal',\n",
1645
+ " 'Murugeshpalya', 'Muthurayya Swamy Layout', 'Mylasandra',\n",
1646
+ " 'Mysore Highway', 'Mysore Road', 'NGR Layout', 'NRI Layout',\n",
1647
+ " 'NS Palya', 'Nagadevanahalli', 'Naganathapura',\n",
1648
+ " 'Nagappa Reddy Layout', 'Nagaraja Garden', 'Nagarbhavi',\n",
1649
+ " 'Nagasandra', 'Nagashetty Halli', 'Nagavara', 'Nagavarapalya',\n",
1650
+ " 'Nagawara Junction', 'Nagondanahalli', 'Naidu Layout',\n",
1651
+ " 'Nallurhalli', 'Nandi Durga Road', 'Nandi Hills', 'Nandini Layout',\n",
1652
+ " 'Nanjappa Garden', 'Nanjappa Layout', 'Narayana Nagar 1st Block',\n",
1653
+ " 'Narayanapura', 'Nayandanahalli', 'Near International Airport',\n",
1654
+ " 'Neeladri Nagar', 'Neelamangala', 'Nehru Nagar', 'Nelamangala',\n",
1655
+ " 'New Gurappana Palya', 'New Thippasandra', 'Ngef Layout',\n",
1656
+ " 'Nobo Nagar', 'Nyanappana Halli', 'OLd Gurappanapalya',\n",
1657
+ " 'OMBR Layout', 'Off Sarjapur Road,', 'Old Airport Road',\n",
1658
+ " 'Old Madras Road', 'Omarbagh Layout', 'Omkar Nagar',\n",
1659
+ " 'Outer Ring Road East', 'P&T Layout', 'Padmanabhanagar',\n",
1660
+ " 'Pai Layout', 'Palace Road', 'Pampa Extension', 'Panathur',\n",
1661
+ " 'Panduranga Nagar', 'Parappana Agrahara', 'Patelappa Layout',\n",
1662
+ " 'Pattanagere', 'Pattandur Agrahara', 'Peenya',\n",
1663
+ " 'Phase 1 Kammasandra', 'Poorna Pragna Layout',\n",
1664
+ " 'Poornapragna Housing Society Layout', 'Pragathi Nagar',\n",
1665
+ " 'Prashanth Nagar', 'Prithvi Layout', 'Pulkeshi Nagar',\n",
1666
+ " 'Puttanahalli', 'R.T. Nagar', 'RMV', 'RMV 2nd Stage',\n",
1667
+ " 'RMV Extension', 'RMV Extension Stage 2', 'RPC layout',\n",
1668
+ " 'RR Layout', 'RWF West Colony', 'Rachenahalli',\n",
1669
+ " 'Raghavendra Layout', 'Raghavendra Nagar', 'Raghuvanahalli',\n",
1670
+ " 'Raja Rajeshwari Nagar', 'Raja Rajeshwari Nagar 5th Stage',\n",
1671
+ " 'Rajaji Nagar', 'Rajankunte', 'Rajarajeshwari Nagara',\n",
1672
+ " 'Rajarajeshwari nagar', 'Rajarajeshwarinagar', 'Rajasree Layout',\n",
1673
+ " 'Rajiv Gandhi Nagar', 'Rajiv Nagar', 'Ramagondanahalli',\n",
1674
+ " 'Ramakrishnappa Layout', 'Ramamurthy Nagar',\n",
1675
+ " 'Ramamurthy Nagar Extension', 'Ramanashree Enclave',\n",
1676
+ " 'Ramanjaneyanagar', 'Ramesh Nagar', 'Rayasandra',\n",
1677
+ " 'Reliaable Tranquil Layout', 'Remco Bhel Layout', 'Richards Town',\n",
1678
+ " 'Richmond Town', 'Roopena Agrahara', 'Rustam Bagh Layout',\n",
1679
+ " 'SRINIVASAPURA', 'Sadanand Nagar', 'Sadaramangala',\n",
1680
+ " 'Sahakara Nagar', 'Sai Gardens', 'Samethanahalli',\n",
1681
+ " 'Sampangi Rama Nagar', 'Sampigehalli', 'Sanjay nagar',\n",
1682
+ " 'Sanjeevini Nagar', 'Sanne Amanikere', 'Sarakki Nagar', 'Sarjapur',\n",
1683
+ " 'Sarjapur Road', 'Sarjapur Road,', 'Sarjapura - Attibele Road',\n",
1684
+ " 'Sarvabhouma Nagar', 'Sathya Layout', 'Sathya Sai Layout',\n",
1685
+ " 'Sector 1 HSR Layout', 'Sector 2 HSR Layout',\n",
1686
+ " 'Sector 6 HSR Layout', 'Sector 7 HSR Layout', 'Seegehalli',\n",
1687
+ " 'Seetharampalya', 'Seshadripuram', 'Shampura', 'Shankarapuram',\n",
1688
+ " 'Shanthi Layout', 'Shanti Nagar', 'Shantiniketan Layout',\n",
1689
+ " 'Shettigere', 'Shetty Halli', 'Shikaripalya', 'Shingapura',\n",
1690
+ " 'Shirdi Sai Layout', 'Shivaji Nagar', 'Shree Ananth Nagar Layout',\n",
1691
+ " 'Siddapura', 'Sidedahalli', 'Silk Board', 'Silver Springs Layout',\n",
1692
+ " 'Singanayakanahalli', 'Singapura Village', 'Singasandra',\n",
1693
+ " 'Singena Agrahara', 'Sneha Colony', 'Somasundara Palya', 'Sompura',\n",
1694
+ " 'Sonnenahalli', 'Soundarya Layout', 'Sri Balaji Krupa Layout',\n",
1695
+ " 'Sri Sai Layout', 'Sri Venkateshpura Layout', 'Srinagar',\n",
1696
+ " 'Srinivasa Nagar', 'Srirampura', 'Srirampuram', \"St. John's Road\",\n",
1697
+ " 'Stage-4 Bommanahalli', 'Subash Nagar', 'Subramanyapura',\n",
1698
+ " 'Suddaguntepalya', 'Sultan Palaya', 'Sunder Ram Shetty Nagar',\n",
1699
+ " 'Sunkadakatte', 'Surabhi Layout', 'Suraksha Nagar',\n",
1700
+ " 'Syndicate Bank Colony', 'T Dasarahalli', 'T.C PALYA', 'TC Palaya',\n",
1701
+ " 'Tala Cauvery Layout', 'Talaghattapura', 'Tasker Town',\n",
1702
+ " 'Tata Nagar', 'Tavarekere', 'Teachers Colony', 'Tejaswini Nagar',\n",
1703
+ " 'Telecom Layout', 'Thanisandra', 'Thanisandra Main Road,',\n",
1704
+ " 'Thigalarapalya', 'Thippasandra', 'Thirumenahalli', 'Thomas Town',\n",
1705
+ " 'Thubarahalli', 'Thyagaraja Nagar', 'Tigalarpalya', 'Tindlu',\n",
1706
+ " 'Tirumanahalli', 'Tumkur Road', 'Tunganagara', 'Udaya Nagar',\n",
1707
+ " 'Udayapur Village', 'Ullal Uppanagar', 'Ulsoor',\n",
1708
+ " 'Upadhyaya Layout', 'Upkar Layout', 'Uttarahalli', 'VGP Layout',\n",
1709
+ " 'VHBCS Layout', 'Vadarpalya', 'Vaderahalli', 'Vaishnavi Layout',\n",
1710
+ " 'Vajarahalli', 'Varanasi', 'Varsova Layout', 'Varthur',\n",
1711
+ " 'Varthur Road', 'Varthur Road,', 'Vasantha Vallabha Nagar',\n",
1712
+ " 'Vasanthapura', 'Veer Sandra', 'Veerannapalya', 'Veersandra',\n",
1713
+ " 'Venkatadri Layout', 'Venkatapura', 'Venugopal Reddy Layout',\n",
1714
+ " 'Vibuthipura', 'Victoria Layout', 'Vidyaranyapura',\n",
1715
+ " 'Vignana Nagar', 'Vijaya Bank Layout', 'Vijayanagar', 'Vijinapura',\n",
1716
+ " 'Vimanapura', 'Vinayak Nagar', 'Vinayaka Nagar', 'Virat Nagar',\n",
1717
+ " 'Virupakshapura', 'Vishveshwarya Layout',\n",
1718
+ " 'Vishwanatha Nagenahalli', 'Vishwapriya Layout',\n",
1719
+ " 'Vishwapriya Nagar', 'Vittal Nagar', 'Vittasandra', 'Vivek Nagar',\n",
1720
+ " 'Volagerekallahalli', 'Weavers Colony', 'Whitefield',\n",
1721
+ " 'Whitefield,', 'Wilson Garden', 'Yarandahalli', 'Yelachenahalli',\n",
1722
+ " 'Yelahanka', 'Yelahanka New Town', 'Yelenahalli', 'Yemlur',\n",
1723
+ " 'Yeshwanthpur', 'Yeshwanthpur Industrial Suburb', 'cooketown',\n",
1724
+ " 'manyata park', 'tc.palya'], dtype=object)]"
1725
+ ]
1726
+ },
1727
+ "execution_count": 45,
1728
+ "metadata": {},
1729
+ "output_type": "execute_result"
1730
+ }
1731
+ ],
1732
+ "source": [
1733
+ "ohe.categories_"
1734
+ ]
1735
+ },
1736
+ {
1737
+ "cell_type": "code",
1738
+ "execution_count": 46,
1739
+ "id": "4db98a15-351d-4aaa-b296-0177bb60cd94",
1740
+ "metadata": {},
1741
+ "outputs": [
1742
+ {
1743
+ "data": {
1744
+ "text/plain": [
1745
+ "array([[0.00e+00, 0.00e+00, 0.00e+00, ..., 3.00e+00, 1.54e+03, 3.00e+00],\n",
1746
+ " [1.00e+00, 0.00e+00, 0.00e+00, ..., 1.00e+00, 6.00e+02, 1.00e+00],\n",
1747
+ " [1.00e+00, 0.00e+00, 0.00e+00, ..., 4.00e+00, 3.15e+03, 4.00e+00],\n",
1748
+ " ...,\n",
1749
+ " [0.00e+00, 0.00e+00, 0.00e+00, ..., 2.00e+00, 8.80e+02, 2.00e+00],\n",
1750
+ " [0.00e+00, 0.00e+00, 0.00e+00, ..., 2.00e+00, 1.00e+03, 2.00e+00],\n",
1751
+ " [0.00e+00, 0.00e+00, 0.00e+00, ..., 3.00e+00, 1.40e+03, 2.00e+00]])"
1752
+ ]
1753
+ },
1754
+ "execution_count": 46,
1755
+ "metadata": {},
1756
+ "output_type": "execute_result"
1757
+ }
1758
+ ],
1759
+ "source": [
1760
+ "x = np.append(location_encoding[:,1:],np.array(df2.drop(['location','price'],axis=1)),axis=1)\n",
1761
+ "x"
1762
+ ]
1763
+ },
1764
+ {
1765
+ "cell_type": "code",
1766
+ "execution_count": null,
1767
+ "id": "1648f0c5-19ba-474f-8e4b-72fa066a6972",
1768
+ "metadata": {},
1769
+ "outputs": [],
1770
+ "source": []
1771
+ },
1772
+ {
1773
+ "cell_type": "code",
1774
+ "execution_count": 47,
1775
+ "id": "44a3f1b9-c16e-4829-893c-598a042819e0",
1776
+ "metadata": {},
1777
+ "outputs": [
1778
+ {
1779
+ "data": {
1780
+ "text/plain": [
1781
+ "(8764,)"
1782
+ ]
1783
+ },
1784
+ "execution_count": 47,
1785
+ "metadata": {},
1786
+ "output_type": "execute_result"
1787
+ }
1788
+ ],
1789
+ "source": [
1790
+ "y = df2['price']\n",
1791
+ "y.shape"
1792
+ ]
1793
+ },
1794
+ {
1795
+ "cell_type": "code",
1796
+ "execution_count": 48,
1797
+ "id": "91524bc6-2a66-4543-a4d3-75b19c9c5a70",
1798
+ "metadata": {},
1799
+ "outputs": [
1800
+ {
1801
+ "data": {
1802
+ "text/plain": [
1803
+ "(8764, 754)"
1804
+ ]
1805
+ },
1806
+ "execution_count": 48,
1807
+ "metadata": {},
1808
+ "output_type": "execute_result"
1809
+ }
1810
+ ],
1811
+ "source": [
1812
+ "x.shape"
1813
+ ]
1814
+ },
1815
+ {
1816
+ "cell_type": "code",
1817
+ "execution_count": 49,
1818
+ "id": "5f82b539-6aa7-4546-967a-40558898c55b",
1819
+ "metadata": {},
1820
+ "outputs": [],
1821
+ "source": [
1822
+ "from sklearn.model_selection import train_test_split\n",
1823
+ "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)"
1824
+ ]
1825
+ },
1826
+ {
1827
+ "cell_type": "code",
1828
+ "execution_count": 50,
1829
+ "id": "4a3e86aa-e91a-43f0-865a-889d0b4f3ae1",
1830
+ "metadata": {},
1831
+ "outputs": [],
1832
+ "source": [
1833
+ "from sklearn.linear_model import LinearRegression\n",
1834
+ "lr = LinearRegression()"
1835
+ ]
1836
+ },
1837
+ {
1838
+ "cell_type": "code",
1839
+ "execution_count": 51,
1840
+ "id": "f313918d-6d8b-4868-8f3b-b793de473bdb",
1841
+ "metadata": {},
1842
+ "outputs": [
1843
+ {
1844
+ "data": {
1845
+ "text/plain": [
1846
+ "-1667207110599217.2"
1847
+ ]
1848
+ },
1849
+ "execution_count": 51,
1850
+ "metadata": {},
1851
+ "output_type": "execute_result"
1852
+ }
1853
+ ],
1854
+ "source": [
1855
+ "lr.fit(x_train,y_train)\n",
1856
+ "lr.score(x_test,y_test)"
1857
+ ]
1858
+ },
1859
+ {
1860
+ "cell_type": "code",
1861
+ "execution_count": 52,
1862
+ "id": "636ea141-22a8-4338-89ea-da3f47b4c298",
1863
+ "metadata": {},
1864
+ "outputs": [],
1865
+ "source": [
1866
+ "from sklearn.model_selection import GridSearchCV,cross_val_score,ShuffleSplit\n",
1867
+ "from sklearn.tree import DecisionTreeRegressor\n",
1868
+ "from sklearn.linear_model import Lasso"
1869
+ ]
1870
+ },
1871
+ {
1872
+ "cell_type": "code",
1873
+ "execution_count": 53,
1874
+ "id": "a4117694-4aa7-4773-ab21-34b4f3d1516b",
1875
+ "metadata": {},
1876
+ "outputs": [],
1877
+ "source": [
1878
+ "choices = {\n",
1879
+ " 'lr':{\n",
1880
+ " 'model':LinearRegression(),\n",
1881
+ " 'params':{\n",
1882
+ " 'normalize':[True,False]\n",
1883
+ " }\n",
1884
+ " },\n",
1885
+ " 'lasso': {\n",
1886
+ " 'model':Lasso(),\n",
1887
+ " 'params':{\n",
1888
+ " 'alpha':[1,2],\n",
1889
+ " 'selection' : ['cyclic', 'random']\n",
1890
+ " }\n",
1891
+ " },\n",
1892
+ " 'tree': {\n",
1893
+ " 'model':DecisionTreeRegressor(),\n",
1894
+ " 'params':{\n",
1895
+ " 'criterion' : [\"mse\", \"friedman_mse\"],\n",
1896
+ " 'splitter' : [\"best\", \"random\"]\n",
1897
+ " }\n",
1898
+ " }\n",
1899
+ "}"
1900
+ ]
1901
+ },
1902
+ {
1903
+ "cell_type": "code",
1904
+ "execution_count": 54,
1905
+ "id": "41880a33-cc7b-48e1-b650-cbdb32ed9243",
1906
+ "metadata": {},
1907
+ "outputs": [
1908
+ {
1909
+ "data": {
1910
+ "text/html": [
1911
+ "<div>\n",
1912
+ "<style scoped>\n",
1913
+ " .dataframe tbody tr th:only-of-type {\n",
1914
+ " vertical-align: middle;\n",
1915
+ " }\n",
1916
+ "\n",
1917
+ " .dataframe tbody tr th {\n",
1918
+ " vertical-align: top;\n",
1919
+ " }\n",
1920
+ "\n",
1921
+ " .dataframe thead th {\n",
1922
+ " text-align: right;\n",
1923
+ " }\n",
1924
+ "</style>\n",
1925
+ "<table border=\"1\" class=\"dataframe\">\n",
1926
+ " <thead>\n",
1927
+ " <tr style=\"text-align: right;\">\n",
1928
+ " <th></th>\n",
1929
+ " <th>model</th>\n",
1930
+ " <th>best_score</th>\n",
1931
+ " <th>best_params</th>\n",
1932
+ " </tr>\n",
1933
+ " </thead>\n",
1934
+ " <tbody>\n",
1935
+ " <tr>\n",
1936
+ " <th>0</th>\n",
1937
+ " <td>lr</td>\n",
1938
+ " <td>-7.365020e+14</td>\n",
1939
+ " <td>{'normalize': False}</td>\n",
1940
+ " </tr>\n",
1941
+ " <tr>\n",
1942
+ " <th>1</th>\n",
1943
+ " <td>lasso</td>\n",
1944
+ " <td>8.020881e-01</td>\n",
1945
+ " <td>{'alpha': 1, 'selection': 'random'}</td>\n",
1946
+ " </tr>\n",
1947
+ " <tr>\n",
1948
+ " <th>2</th>\n",
1949
+ " <td>tree</td>\n",
1950
+ " <td>7.988632e-01</td>\n",
1951
+ " <td>{'criterion': 'mse', 'splitter': 'random'}</td>\n",
1952
+ " </tr>\n",
1953
+ " </tbody>\n",
1954
+ "</table>\n",
1955
+ "</div>"
1956
+ ],
1957
+ "text/plain": [
1958
+ " model best_score best_params\n",
1959
+ "0 lr -7.365020e+14 {'normalize': False}\n",
1960
+ "1 lasso 8.020881e-01 {'alpha': 1, 'selection': 'random'}\n",
1961
+ "2 tree 7.988632e-01 {'criterion': 'mse', 'splitter': 'random'}"
1962
+ ]
1963
+ },
1964
+ "execution_count": 54,
1965
+ "metadata": {},
1966
+ "output_type": "execute_result"
1967
+ }
1968
+ ],
1969
+ "source": [
1970
+ "def best_model_param_pair(choices):\n",
1971
+ " cv = ShuffleSplit(n_splits=5,test_size=0.2,random_state=0)\n",
1972
+ " data = []\n",
1973
+ " for mn,mp in choices.items():\n",
1974
+ " gs = GridSearchCV(mp['model'],mp['params'],cv=cv)\n",
1975
+ " gs.fit(x,y)\n",
1976
+ " data.append({\n",
1977
+ " 'model':mn,\n",
1978
+ " 'best_score': gs.best_score_,\n",
1979
+ " 'best_params':gs.best_params_\n",
1980
+ " })\n",
1981
+ " return pd.DataFrame(data)\n",
1982
+ "daaa= best_model_param_pair(choices)\n",
1983
+ "daaa"
1984
+ ]
1985
+ },
1986
+ {
1987
+ "cell_type": "code",
1988
+ "execution_count": 55,
1989
+ "id": "cc5af773-698a-4e68-b974-5190b4dac88b",
1990
+ "metadata": {},
1991
+ "outputs": [],
1992
+ "source": [
1993
+ "def predict(location,bhk,tsqft,bath):\n",
1994
+ " x=ohe.transform([[location]]).toarray()\n",
1995
+ " x=np.append(x[:,1:],np.array([bhk,tsqft,bath]))\n",
1996
+ " print(lr.predict(x.reshape(1,-1)))"
1997
+ ]
1998
+ },
1999
+ {
2000
+ "cell_type": "code",
2001
+ "execution_count": 56,
2002
+ "id": "fa5a025a-35df-4bed-853d-1aeabbae4583",
2003
+ "metadata": {},
2004
+ "outputs": [
2005
+ {
2006
+ "name": "stdout",
2007
+ "output_type": "stream",
2008
+ "text": [
2009
+ "[94.38034082]\n"
2010
+ ]
2011
+ }
2012
+ ],
2013
+ "source": [
2014
+ "predict('Devarabeesana Halli',2,1100.0,2.0)"
2015
+ ]
2016
+ },
2017
+ {
2018
+ "cell_type": "code",
2019
+ "execution_count": 57,
2020
+ "id": "6e3dca9e-dd82-4add-80ed-d4ef7b3a2f07",
2021
+ "metadata": {},
2022
+ "outputs": [
2023
+ {
2024
+ "name": "stderr",
2025
+ "output_type": "stream",
2026
+ "text": [
2027
+ "<ipython-input-57-5d7303250545>:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
2028
+ " df2[df2.location=='Devarabeesana Halli'][df2.total_sqft==1100]\n"
2029
+ ]
2030
+ },
2031
+ {
2032
+ "data": {
2033
+ "text/html": [
2034
+ "<div>\n",
2035
+ "<style scoped>\n",
2036
+ " .dataframe tbody tr th:only-of-type {\n",
2037
+ " vertical-align: middle;\n",
2038
+ " }\n",
2039
+ "\n",
2040
+ " .dataframe tbody tr th {\n",
2041
+ " vertical-align: top;\n",
2042
+ " }\n",
2043
+ "\n",
2044
+ " .dataframe thead th {\n",
2045
+ " text-align: right;\n",
2046
+ " }\n",
2047
+ "</style>\n",
2048
+ "<table border=\"1\" class=\"dataframe\">\n",
2049
+ " <thead>\n",
2050
+ " <tr style=\"text-align: right;\">\n",
2051
+ " <th></th>\n",
2052
+ " <th>location</th>\n",
2053
+ " <th>size</th>\n",
2054
+ " <th>total_sqft</th>\n",
2055
+ " <th>bath</th>\n",
2056
+ " <th>price</th>\n",
2057
+ " </tr>\n",
2058
+ " </thead>\n",
2059
+ " <tbody>\n",
2060
+ " <tr>\n",
2061
+ " <th>2764</th>\n",
2062
+ " <td>Devarabeesana Halli</td>\n",
2063
+ " <td>2</td>\n",
2064
+ " <td>1100.0</td>\n",
2065
+ " <td>2.0</td>\n",
2066
+ " <td>70.0</td>\n",
2067
+ " </tr>\n",
2068
+ " </tbody>\n",
2069
+ "</table>\n",
2070
+ "</div>"
2071
+ ],
2072
+ "text/plain": [
2073
+ " location size total_sqft bath price\n",
2074
+ "2764 Devarabeesana Halli 2 1100.0 2.0 70.0"
2075
+ ]
2076
+ },
2077
+ "execution_count": 57,
2078
+ "metadata": {},
2079
+ "output_type": "execute_result"
2080
+ }
2081
+ ],
2082
+ "source": [
2083
+ "df2[df2.location=='Devarabeesana Halli'][df2.total_sqft==1100]"
2084
+ ]
2085
+ },
2086
+ {
2087
+ "cell_type": "code",
2088
+ "execution_count": 58,
2089
+ "id": "3214fdf2-6268-4c8f-9534-e9d106fec8f2",
2090
+ "metadata": {},
2091
+ "outputs": [
2092
+ {
2093
+ "name": "stdout",
2094
+ "output_type": "stream",
2095
+ "text": [
2096
+ "[84.99999962]\n"
2097
+ ]
2098
+ }
2099
+ ],
2100
+ "source": [
2101
+ "predict('1st Block BEL Layout',3,1540.0,3.0)"
2102
+ ]
2103
+ },
2104
+ {
2105
+ "cell_type": "code",
2106
+ "execution_count": 59,
2107
+ "id": "c5fd9afa-fd72-41c9-8610-f8712d55af35",
2108
+ "metadata": {},
2109
+ "outputs": [],
2110
+ "source": [
2111
+ "import pickle\n",
2112
+ "with open('banglore_price_prediction_model.pickle','wb') as f:\n",
2113
+ " pickle.dump(lr,f)"
2114
+ ]
2115
+ },
2116
+ {
2117
+ "cell_type": "code",
2118
+ "execution_count": 60,
2119
+ "id": "40dc20e3-6872-4eec-80b3-dab5c140e445",
2120
+ "metadata": {},
2121
+ "outputs": [],
2122
+ "source": [
2123
+ "with open('location_encoder.pickle','wb') as l:\n",
2124
+ " pickle.dump(ohe,l)"
2125
+ ]
2126
+ },
2127
+ {
2128
+ "cell_type": "code",
2129
+ "execution_count": 61,
2130
+ "id": "b2cf33b0-97d1-4a53-9972-9d26b4b6c3b7",
2131
+ "metadata": {},
2132
+ "outputs": [
2133
+ {
2134
+ "data": {
2135
+ "text/plain": [
2136
+ "'1st Block BEL Layout'"
2137
+ ]
2138
+ },
2139
+ "execution_count": 61,
2140
+ "metadata": {},
2141
+ "output_type": "execute_result"
2142
+ }
2143
+ ],
2144
+ "source": [
2145
+ "with open('location_encoder.pickle','rb') as lc:\n",
2146
+ " le= pickle.load(lc)\n",
2147
+ "loc = le.categories_[0]\n",
2148
+ "loc[0]"
2149
+ ]
2150
+ },
2151
+ {
2152
+ "cell_type": "code",
2153
+ "execution_count": 65,
2154
+ "id": "79ba7e3d-9b96-4d22-a2f9-c79f46ba20e3",
2155
+ "metadata": {},
2156
+ "outputs": [
2157
+ {
2158
+ "data": {
2159
+ "text/plain": [
2160
+ "array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2161
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2162
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2163
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2164
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2165
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2166
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2167
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2168
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2169
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2170
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2171
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2172
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2173
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2174
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2175
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2176
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2177
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2178
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2179
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2180
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2181
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2182
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2183
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2184
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2185
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2186
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2187
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2188
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2189
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2190
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2191
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2192
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2193
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2194
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2195
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2196
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2197
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2198
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2199
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2200
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2201
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2202
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2203
+ " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
2204
+ " 0., 0., 0.])"
2205
+ ]
2206
+ },
2207
+ "execution_count": 65,
2208
+ "metadata": {},
2209
+ "output_type": "execute_result"
2210
+ }
2211
+ ],
2212
+ "source": [
2213
+ "le.transform([['1st Block BEL Layout']]).toarray()[0][1:]"
2214
+ ]
2215
+ },
2216
+ {
2217
+ "cell_type": "code",
2218
+ "execution_count": null,
2219
+ "id": "ad3fb7ef-6346-414e-9ddd-24d4743830a0",
2220
+ "metadata": {},
2221
+ "outputs": [],
2222
+ "source": []
2223
+ }
2224
+ ],
2225
+ "metadata": {
2226
+ "kernelspec": {
2227
+ "display_name": "Python 3",
2228
+ "language": "python",
2229
+ "name": "python3"
2230
+ },
2231
+ "language_info": {
2232
+ "codemirror_mode": {
2233
+ "name": "ipython",
2234
+ "version": 3
2235
+ },
2236
+ "file_extension": ".py",
2237
+ "mimetype": "text/x-python",
2238
+ "name": "python",
2239
+ "nbconvert_exporter": "python",
2240
+ "pygments_lexer": "ipython3",
2241
+ "version": "3.8.8"
2242
+ }
2243
+ },
2244
+ "nbformat": 4,
2245
+ "nbformat_minor": 5
2246
+ }
assets/banglore_price_prediction_model.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcc5725563728fae38ba901ff4850b8c3df42614eebafa92d2781dbc36756fd
3
+ size 12533
assets/datasets/.ipynb_checkpoints/Bengaluru_House_Data-checkpoint.csv ADDED
The diff for this file is too large to render. See raw diff
 
assets/datasets/Bengaluru_House_Data.csv ADDED
The diff for this file is too large to render. See raw diff
 
assets/location_encoder.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:973bb7f926fd04dee19a265920bddc0289137611e91ed26bdc874bf546171eb4
3
+ size 13121
assets/test.js ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ function l(a, b, c) {
3
+
4
+ return Function.prototype.call.apply(Array.prototype.slice, arguments)
5
+ }
6
+ function m(a, b, c) {
7
+
8
+ var e = l(arguments, 2);
9
+ return function () {
10
+
11
+ return b.apply(a, e)
12
+ }
13
+ }
14
+ function n(a, b) {
15
+
16
+ var c = new p(b);
17
+ for (c.h = [a]; c.h.length;) {
18
+
19
+ var e = c, d = c.h.shift(); e.i(d);
20
+ for (d = d.firstChild; d; d = d.nextSibling)1 == d.nodeType && e.h.push(d)
21
+ }
22
+ }
23
+ function p(a) {
24
+
25
+ this.i = a
26
+ }
27
+ function q(a) {
28
+
29
+ a.style.display = ""
30
+ }
31
+ function r(a) {
32
+
33
+ a.style.display = "none"
34
+ };
35
+ var t = /\s*;\s*/;
36
+ function u(a, b) {
37
+
38
+ this.l.apply(this, arguments)
39
+ } u.prototype.l = function (a, b) {
40
+ this.a || (this.a = {
41
+
42
+ }); if (b) {
43
+ var c = this.a, e = b.a; for (d in e) c[d] = e[d] } else {
44
+ var d = this.a; e = v; for (c in e) d[c] = e[c] } this.a.$this = a; this.a.$context = this; this.f = "undefined" != typeof a && null != a ? a : ""; b || (this.a.$top = this.f) }; var v = {
45
+ $default: null }, w = []; function x(a) {
46
+ for (var b in a.a) delete a.a[b]; a.f = null; w.push(a) } function y(a, b, c) {
47
+ try {
48
+ return b.call(c, a.a, a.f) } catch (e) {
49
+ return v.$default } }
50
+ u.prototype.clone = function (a, b, c) {
51
+ if (0 < w.length) {
52
+ var e = w.pop(); u.call(e, a, this); a = e } else a = new u(a, this); a.a.$index = b; a.a.$count = c; return a }; var z; window.trustedTypes && (z = trustedTypes.createPolicy("jstemplate", {
53
+ createScript: function (a) {
54
+ return a } })); var A = {
55
+
56
+ }; function B(a) {
57
+ if (!A[a]) try {
58
+ var b = "(function(a_, b_) { with (a_) with (b_) return " + a + " }) ", c = window.trustedTypes ? z.createScript(b) : b; A[a] = window.eval(c) }
59
+ catch (e) {
60
+
61
+ } return A[a]
62
+ }
63
+
64
+ function E(a) {
65
+ var b = []; a = a.split(t); for (var c = 0, e = a.length; c < e; ++c) {
66
+ var d = a[c].indexOf(":"); if (!(0 > d)) {
67
+ var g = a[c].substr(0, d).replace(/^\s+/, "").replace(/\s+$/, ""); d = B(a[c].substr(d + 1)); b.push(g, d) } } return b }; function F() {
68
+
69
+ } var G = 0, H = {
70
+ 0: {
71
+
72
+ } }, I = {
73
+
74
+ }, J = {
75
+
76
+ }, K = []; function L(a) {
77
+ a.__jstcache || n(a, function (b) {
78
+ M(b) }) } var N = [["jsselect", B], ["jsdisplay", B], ["jsvalues", E], ["jsvars", E], ["jseval", function (a) {
79
+ var b = []; a = a.split(t); for (var c = 0, e = a.length; c < e; ++c)if (a[c]) {
80
+ var d = B(a[c]); b.push(d) } return b }], ["transclude", function (a) {
81
+ return a }], ["jscontent", B], ["jsskip", B]];
82
+ function M(a) {
83
+
84
+ if (a.__jstcache) return a.__jstcache; var b = a.getAttribute("jstcache"); if (null != b) return a.__jstcache = H[b]; b = K.length = 0; for (var c = N.length; b < c; ++b) {
85
+ var e = N[b][0], d = a.getAttribute(e); J[e] = d; null != d && K.push(e + "=" + d) } if (0 == K.length) return a.setAttribute("jstcache", "0"), a.__jstcache = H[0]; var g = K.join("&"); if (b = I[g]) return a.setAttribute("jstcache", b), a.__jstcache = H[b]; var h = {
86
+
87
+ }; b = 0; for (c = N.length; b < c; ++b) {
88
+ d = N[b]; e = d[0]; var f = d[1]; d = J[e]; null != d && (h[e] = f(d)) } b = "" + ++G; a.setAttribute("jstcache",
89
+ b); H[b] = h; I[g] = b; return a.__jstcache = h
90
+ } function P(a, b) {
91
+ a.j.push(b); a.o.push(0) } function Q(a) {
92
+ return a.c.length ? a.c.pop() : [] }
93
+ F.prototype.g = function (a, b) {
94
+
95
+ var c = R(b), e = c.transclude; if (e) (c = S(e)) ? (b.parentNode.replaceChild(c, b), e = Q(this), e.push(this.g, a, c), P(this, e)) : b.parentNode.removeChild(b); else if (c = c.jsselect) {
96
+
97
+ c = y(a, c, b); var d = b.getAttribute("jsinstance"); var g = !1; d && ("*" == d.charAt(0) ? (d = parseInt(d.substr(1), 10), g = !0) : d = parseInt(d, 10)); var h = null != c && "object" == typeof c && "number" == typeof c.length; e = h ? c.length : 1; var f = h && 0 == e; if (h) if (f) d ? b.parentNode.removeChild(b) : (b.setAttribute("jsinstance", "*0"), r(b)); else if (q(b),
98
+ null === d || "" === d || g && d < e - 1) {
99
+ g = Q(this); d = d || 0; for (h = e - 1; d < h; ++d) {
100
+ var k = b.cloneNode(!0); b.parentNode.insertBefore(k, b); T(k, c, d); f = a.clone(c[d], d, e); g.push(this.b, f, k, x, f, null) } T(b, c, d); f = a.clone(c[d], d, e); g.push(this.b, f, b, x, f, null); P(this, g) } else d < e ? (g = c[d], T(b, c, d), f = a.clone(g, d, e), g = Q(this), g.push(this.b, f, b, x, f, null), P(this, g)) : b.parentNode.removeChild(b); else null == c ? r(b) : (q(b), f = a.clone(c, 0, 1), g = Q(this), g.push(this.b, f, b, x, f, null), P(this, g))
101
+ } else this.b(a, b)
102
+ };
103
+ F.prototype.b = function (a, b) {
104
+
105
+ var c = R(b), e = c.jsdisplay; if (e) {
106
+ if (!y(a, e, b)) {
107
+ r(b); return } q(b) } if (e = c.jsvars) for (var d = 0, g = e.length; d < g; d += 2) {
108
+ var h = e[d], f = y(a, e[d + 1], b); a.a[h] = f } if (e = c.jsvalues) for (d = 0, g = e.length; d < g; d += 2)if (f = e[d], h = y(a, e[d + 1], b), "$" == f.charAt(0)) a.a[f] = h; else if ("." == f.charAt(0)) {
109
+ f = f.substr(1).split("."); for (var k = b, O = f.length, C = 0, U = O - 1; C < U; ++C) {
110
+ var D = f[C]; k[D] || (k[D] = {
111
+
112
+ }); k = k[D] } k[f[O - 1]] = h } else f && ("boolean" == typeof h ? h ? b.setAttribute(f, f) : b.removeAttribute(f) : b.setAttribute(f, "" + h));
113
+ if (e = c.jseval) for (d = 0, g = e.length; d < g; ++d)y(a, e[d], b); e = c.jsskip; if (!e || !y(a, e, b)) if (c = c.jscontent) {
114
+ if (c = "" + y(a, c, b), b.innerHTML != c) {
115
+ for (; b.firstChild;)e = b.firstChild, e.parentNode.removeChild(e); b.appendChild(this.m.createTextNode(c)) } } else {
116
+ c = Q(this); for (e = b.firstChild; e; e = e.nextSibling)1 == e.nodeType && c.push(this.g, a, e); c.length && P(this, c) }
117
+ }; function R(a) {
118
+ if (a.__jstcache) return a.__jstcache; var b = a.getAttribute("jstcache"); return b ? a.__jstcache = H[b] : M(a) }
119
+ function S(a, b) {
120
+ var c = document; if (b) {
121
+ var e = c.getElementById(a); if (!e) {
122
+ e = b(); var d = c.getElementById("jsts"); d || (d = c.createElement("div"), d.id = "jsts", r(d), d.style.position = "absolute", c.body.appendChild(d)); var g = c.createElement("div"); d.appendChild(g); g.innerHTML = e; e = c.getElementById(a) } c = e } else c = c.getElementById(a); return c ? (L(c), c = c.cloneNode(!0), c.removeAttribute("id"), c) : null } function T(a, b, c) {
123
+ c == b.length - 1 ? a.setAttribute("jsinstance", "*" + c) : a.setAttribute("jsinstance", "" + c) }; window.jstGetTemplate = S; window.JsEvalContext = u; window.jstProcess = function (a, b) {
124
+ var c = new F; L(b); c.m = b ? 9 == b.nodeType ? b : b.ownerDocument || document : document; var e = m(c, c.g, a, b), d = c.j = [], g = c.o = []; c.c = []; e(); for (var h, f, k; d.length;)h = d[d.length - 1], e = g[g.length - 1], e >= h.length ? (e = c, f = d.pop(), f.length = 0, e.c.push(f), g.pop()) : (f = h[e++], k = h[e++], h = h[e++], g[g.length - 1] = e, f.call(c, k, h)) };
server.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import util
2
+ import json
3
+ from flask import Flask, request, jsonify
4
+ app = Flask(__name__)
5
+
6
+ white =['http://127.0.0.1:5500']
7
+ @app.after_request
8
+ def add_cors_headers(response):
9
+ r = request.referrer[:-1]
10
+ if r in white:
11
+ response.headers.add('Access-Control-Allow-Origin', r)
12
+ response.headers.add('Access-Control-Allow-Credentials', 'true')
13
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
14
+ response.headers.add('Access-Control-Allow-Headers', 'Cache-Control')
15
+ response.headers.add('Access-Control-Allow-Headers', 'X-Requested-With')
16
+ response.headers.add('Access-Control-Allow-Headers', 'Authorization')
17
+ response.headers.add('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, PUT, DELETE')
18
+ return response
19
+
20
+ @app.route('/h')
21
+ def hello():
22
+ return "hi"
23
+
24
+ @app.route('/loc')
25
+ def get_location():
26
+ util.load_assests()
27
+ obj = jsonify(util.__location_list.tolist())
28
+ return obj
29
+
30
+ @app.route('/get_price', methods=['POST'])
31
+ def get_price():
32
+ totat_sqft = float(request.form['total_sqft'])
33
+ location = request.form['location']
34
+ bhk = int(request.form['bhk'])
35
+ bath = int(request.form['bath'])
36
+ util.load_assests()
37
+ price = util.get_estimated_price(location=location, bhk=bhk, bath=bath, tsqft=totat_sqft)
38
+ return str(price)
39
+
40
+ if __name__ == "__main__":
41
+ print("starting")
42
+ app.run()
43
+
44
+
util.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+
4
+ __model =None
5
+ __location_encoder = None
6
+ __location_list = None
7
+
8
+
9
+ def load_assests():
10
+ global __model
11
+ global __location_encoder
12
+ global __location_list
13
+
14
+ with open('assets/banglore_price_prediction_model.pickle', 'rb') as f:
15
+ __model = pickle.load(f)
16
+ with open('assets/location_encoder.pickle', 'rb') as ld:
17
+ __location_encoder= pickle.load(ld)
18
+ __location_list = __location_encoder.categories_[0]
19
+
20
+ def get_estimated_price(location,bhk,tsqft,bath):
21
+ try:
22
+ x = __location_encoder.transform([[location]]).toarray()[0]
23
+ except:
24
+ x = np.zeros(len(__location_list))
25
+
26
+ x = np.append(x[1:], np.array([bhk, tsqft, bath]))
27
+ return __model.predict(x.reshape(1, -1))[0]
28
+
29
+ # load_assests()
30
+ # get_estimated_price('Devarabeesana Halli', 2, 1100.0, 2.0)