Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,32 +6,12 @@ matplotlib.rcParams["figure.figsize"] = (20, 10)
|
|
6 |
|
7 |
path = 'bengaluru_house_prices.csv'
|
8 |
df = pd.read_csv(path)
|
9 |
-
df.head()
|
10 |
-
|
11 |
-
df.shape
|
12 |
-
|
13 |
-
df.groupby('area_type')['area_type'].agg('count')
|
14 |
|
15 |
df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
|
16 |
-
df.head()
|
17 |
-
|
18 |
-
df.isnull().sum()
|
19 |
|
20 |
df=df.dropna()
|
21 |
-
df.head()
|
22 |
-
|
23 |
-
df.shape
|
24 |
-
df.isnull().sum()
|
25 |
-
|
26 |
-
df['size'].unique()
|
27 |
-
|
28 |
df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
|
29 |
|
30 |
-
df.head()
|
31 |
-
|
32 |
-
df['BHK'].unique()
|
33 |
-
|
34 |
-
df['total_sqft'].unique()
|
35 |
|
36 |
def isfloat(x):
|
37 |
token = x.split('-')
|
@@ -42,43 +22,29 @@ def isfloat(x):
|
|
42 |
except:
|
43 |
return None
|
44 |
|
45 |
-
isfloat('2100 - 2600')
|
46 |
|
47 |
df['total_sqft'] = df['total_sqft'].apply(isfloat)
|
48 |
-
df.head(31)
|
49 |
|
50 |
df=df.drop(['size'], axis = 'columns')
|
51 |
|
52 |
-
df.head(31)
|
53 |
-
|
54 |
-
df.dtypes
|
55 |
|
56 |
df['price_per_sqft'] = df['price']*100000/df['total_sqft']
|
57 |
-
df.head()
|
58 |
|
59 |
-
len(df.location.unique())
|
60 |
|
61 |
df.location = df.location.apply(lambda x: x.strip())
|
62 |
loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
|
63 |
-
|
64 |
|
65 |
len(loc_stats[loc_stats <= 10])
|
66 |
|
67 |
loc_stats_ten = loc_stats[loc_stats<=10]
|
68 |
-
loc_stats_ten
|
69 |
-
|
70 |
-
df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
|
71 |
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
df.head(10)
|
76 |
|
77 |
-
df[df.total_sqft/df.BHK < 300].head()
|
78 |
|
79 |
df = df[~(df.total_sqft/df.BHK < 300)]
|
80 |
|
81 |
-
df.price_per_sqft.describe()
|
82 |
|
83 |
def rem_out(df):
|
84 |
df_out = pd.DataFrame()
|
@@ -90,22 +56,7 @@ def rem_out(df):
|
|
90 |
return df_out
|
91 |
|
92 |
df = rem_out(df);
|
93 |
-
df.shape
|
94 |
-
df.head()
|
95 |
-
|
96 |
-
def plot_scatter(df, location):
|
97 |
-
bhk2 = df[(df.location==location) & (df.BHK==2)]
|
98 |
-
bhk3 = df[(df.location==location) & (df.BHK==3)]
|
99 |
-
matplotlib.rcParams['figure.figsize'] = (15, 10)
|
100 |
-
plt.scatter(bhk2.total_sqft, bhk2.price, color = 'red', label = '2 BHK', s=50)
|
101 |
-
plt.scatter(bhk3.total_sqft, bhk3.price, color = 'blue', label = '3 BHK', s=50)
|
102 |
-
plt.xlabel('Total sq feet area')
|
103 |
-
plt.ylabel('price per sq feet area')
|
104 |
-
plt.legend()
|
105 |
|
106 |
-
plot_scatter(df, "Hebbal")
|
107 |
-
|
108 |
-
df.head()
|
109 |
|
110 |
def remove_outlier(df):
|
111 |
exclude = np.array([])
|
@@ -126,45 +77,20 @@ def remove_outlier(df):
|
|
126 |
return df.drop(exclude, axis='index')
|
127 |
|
128 |
df = remove_outlier(df)
|
129 |
-
df.shape
|
130 |
-
|
131 |
-
plot_scatter(df, "Hebbal")
|
132 |
-
|
133 |
-
matplotlib.rcParams["figure.figsize"] = (20,10)
|
134 |
-
plt.hist(df.price_per_sqft, rwidth=0.8)
|
135 |
-
plt.xlabel("price per sq feet")
|
136 |
-
plt.ylabel("count")
|
137 |
-
|
138 |
-
df.bath.unique()
|
139 |
-
|
140 |
-
plt.hist(df.bath, rwidth = 0.5)
|
141 |
-
plt.xlabel('no. of bathrooms')
|
142 |
-
plt.ylabel('count')
|
143 |
-
|
144 |
-
df[df.bath > df.BHK+2]
|
145 |
|
146 |
df = df[df.bath < df.BHK+2]
|
147 |
-
df.shape
|
148 |
|
149 |
df = df.drop(['price_per_sqft'], axis = 'columns')
|
150 |
-
df.head(10)
|
151 |
|
152 |
dummies = pd.get_dummies(df.location)
|
153 |
-
dummies.head()
|
154 |
|
155 |
df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
|
156 |
-
df.head()
|
157 |
|
158 |
df = df.drop('location', axis = 'columns')
|
159 |
-
df.head()
|
160 |
-
|
161 |
-
df.shape
|
162 |
|
163 |
x = df.drop('price', axis = 'columns')
|
164 |
-
x.head()
|
165 |
|
166 |
y = df.price
|
167 |
-
y.head()
|
168 |
|
169 |
from sklearn.model_selection import train_test_split
|
170 |
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
|
@@ -239,19 +165,6 @@ def predict_price_func(location, sqft, bath, bhk):
|
|
239 |
|
240 |
return lr_clf.predict([xdash])[0]
|
241 |
|
242 |
-
df.head()
|
243 |
-
|
244 |
-
print(x.columns)
|
245 |
-
|
246 |
-
predict_price_func('1st Phase JP Nagar', 1200, 2, 2)
|
247 |
-
|
248 |
-
predict_price_func('Indira Nagar', 1200, 3, 3)
|
249 |
-
|
250 |
-
predict_price_func('Indira Nagar', 1200, 1, 3)
|
251 |
-
|
252 |
-
predict_price_func('Indira Nagar', 1200, 3, 4)
|
253 |
-
|
254 |
-
|
255 |
import gradio as gr
|
256 |
|
257 |
from gradio.components import Textbox, Number
|
|
|
6 |
|
7 |
path = 'bengaluru_house_prices.csv'
|
8 |
df = pd.read_csv(path)
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
|
|
|
|
|
|
|
11 |
|
12 |
df=df.dropna()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
|
14 |
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def isfloat(x):
|
17 |
token = x.split('-')
|
|
|
22 |
except:
|
23 |
return None
|
24 |
|
|
|
25 |
|
26 |
df['total_sqft'] = df['total_sqft'].apply(isfloat)
|
|
|
27 |
|
28 |
df=df.drop(['size'], axis = 'columns')
|
29 |
|
|
|
|
|
|
|
30 |
|
31 |
df['price_per_sqft'] = df['price']*100000/df['total_sqft']
|
|
|
32 |
|
|
|
33 |
|
34 |
df.location = df.location.apply(lambda x: x.strip())
|
35 |
loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
|
36 |
+
|
37 |
|
38 |
len(loc_stats[loc_stats <= 10])
|
39 |
|
40 |
loc_stats_ten = loc_stats[loc_stats<=10]
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
+
df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
|
|
|
|
|
44 |
|
|
|
45 |
|
46 |
df = df[~(df.total_sqft/df.BHK < 300)]
|
47 |
|
|
|
48 |
|
49 |
def rem_out(df):
|
50 |
df_out = pd.DataFrame()
|
|
|
56 |
return df_out
|
57 |
|
58 |
df = rem_out(df);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
|
|
|
|
|
|
60 |
|
61 |
def remove_outlier(df):
|
62 |
exclude = np.array([])
|
|
|
77 |
return df.drop(exclude, axis='index')
|
78 |
|
79 |
df = remove_outlier(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
df = df[df.bath < df.BHK+2]
|
|
|
82 |
|
83 |
df = df.drop(['price_per_sqft'], axis = 'columns')
|
|
|
84 |
|
85 |
dummies = pd.get_dummies(df.location)
|
|
|
86 |
|
87 |
df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
|
|
|
88 |
|
89 |
df = df.drop('location', axis = 'columns')
|
|
|
|
|
|
|
90 |
|
91 |
x = df.drop('price', axis = 'columns')
|
|
|
92 |
|
93 |
y = df.price
|
|
|
94 |
|
95 |
from sklearn.model_selection import train_test_split
|
96 |
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
|
|
|
165 |
|
166 |
return lr_clf.predict([xdash])[0]
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
import gradio as gr
|
169 |
|
170 |
from gradio.components import Textbox, Number
|