Aashiue commited on
Commit
acae2bb
1 Parent(s): c2261d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -89
app.py CHANGED
@@ -6,32 +6,12 @@ matplotlib.rcParams["figure.figsize"] = (20, 10)
6
 
7
  path = 'bengaluru_house_prices.csv'
8
  df = pd.read_csv(path)
9
- df.head()
10
-
11
- df.shape
12
-
13
- df.groupby('area_type')['area_type'].agg('count')
14
 
15
  df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
16
- df.head()
17
-
18
- df.isnull().sum()
19
 
20
  df=df.dropna()
21
- df.head()
22
-
23
- df.shape
24
- df.isnull().sum()
25
-
26
- df['size'].unique()
27
-
28
  df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
29
 
30
- df.head()
31
-
32
- df['BHK'].unique()
33
-
34
- df['total_sqft'].unique()
35
 
36
  def isfloat(x):
37
  token = x.split('-')
@@ -42,43 +22,29 @@ def isfloat(x):
42
  except:
43
  return None
44
 
45
- isfloat('2100 - 2600')
46
 
47
  df['total_sqft'] = df['total_sqft'].apply(isfloat)
48
- df.head(31)
49
 
50
  df=df.drop(['size'], axis = 'columns')
51
 
52
- df.head(31)
53
-
54
- df.dtypes
55
 
56
  df['price_per_sqft'] = df['price']*100000/df['total_sqft']
57
- df.head()
58
 
59
- len(df.location.unique())
60
 
61
  df.location = df.location.apply(lambda x: x.strip())
62
  loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
63
- loc_stats
64
 
65
  len(loc_stats[loc_stats <= 10])
66
 
67
  loc_stats_ten = loc_stats[loc_stats<=10]
68
- loc_stats_ten
69
-
70
- df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
71
 
72
 
73
- len(df.location.unique());
74
-
75
- df.head(10)
76
 
77
- df[df.total_sqft/df.BHK < 300].head()
78
 
79
  df = df[~(df.total_sqft/df.BHK < 300)]
80
 
81
- df.price_per_sqft.describe()
82
 
83
  def rem_out(df):
84
  df_out = pd.DataFrame()
@@ -90,22 +56,7 @@ def rem_out(df):
90
  return df_out
91
 
92
  df = rem_out(df);
93
- df.shape
94
- df.head()
95
-
96
- def plot_scatter(df, location):
97
- bhk2 = df[(df.location==location) & (df.BHK==2)]
98
- bhk3 = df[(df.location==location) & (df.BHK==3)]
99
- matplotlib.rcParams['figure.figsize'] = (15, 10)
100
- plt.scatter(bhk2.total_sqft, bhk2.price, color = 'red', label = '2 BHK', s=50)
101
- plt.scatter(bhk3.total_sqft, bhk3.price, color = 'blue', label = '3 BHK', s=50)
102
- plt.xlabel('Total sq feet area')
103
- plt.ylabel('price per sq feet area')
104
- plt.legend()
105
 
106
- plot_scatter(df, "Hebbal")
107
-
108
- df.head()
109
 
110
  def remove_outlier(df):
111
  exclude = np.array([])
@@ -126,45 +77,20 @@ def remove_outlier(df):
126
  return df.drop(exclude, axis='index')
127
 
128
  df = remove_outlier(df)
129
- df.shape
130
-
131
- plot_scatter(df, "Hebbal")
132
-
133
- matplotlib.rcParams["figure.figsize"] = (20,10)
134
- plt.hist(df.price_per_sqft, rwidth=0.8)
135
- plt.xlabel("price per sq feet")
136
- plt.ylabel("count")
137
-
138
- df.bath.unique()
139
-
140
- plt.hist(df.bath, rwidth = 0.5)
141
- plt.xlabel('no. of bathrooms')
142
- plt.ylabel('count')
143
-
144
- df[df.bath > df.BHK+2]
145
 
146
  df = df[df.bath < df.BHK+2]
147
- df.shape
148
 
149
  df = df.drop(['price_per_sqft'], axis = 'columns')
150
- df.head(10)
151
 
152
  dummies = pd.get_dummies(df.location)
153
- dummies.head()
154
 
155
  df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
156
- df.head()
157
 
158
  df = df.drop('location', axis = 'columns')
159
- df.head()
160
-
161
- df.shape
162
 
163
  x = df.drop('price', axis = 'columns')
164
- x.head()
165
 
166
  y = df.price
167
- y.head()
168
 
169
  from sklearn.model_selection import train_test_split
170
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
@@ -239,19 +165,6 @@ def predict_price_func(location, sqft, bath, bhk):
239
 
240
  return lr_clf.predict([xdash])[0]
241
 
242
- df.head()
243
-
244
- print(x.columns)
245
-
246
- predict_price_func('1st Phase JP Nagar', 1200, 2, 2)
247
-
248
- predict_price_func('Indira Nagar', 1200, 3, 3)
249
-
250
- predict_price_func('Indira Nagar', 1200, 1, 3)
251
-
252
- predict_price_func('Indira Nagar', 1200, 3, 4)
253
-
254
-
255
  import gradio as gr
256
 
257
  from gradio.components import Textbox, Number
 
6
 
7
  path = 'bengaluru_house_prices.csv'
8
  df = pd.read_csv(path)
 
 
 
 
 
9
 
10
  df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
 
 
 
11
 
12
  df=df.dropna()
 
 
 
 
 
 
 
13
  df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
14
 
 
 
 
 
 
15
 
16
  def isfloat(x):
17
  token = x.split('-')
 
22
  except:
23
  return None
24
 
 
25
 
26
  df['total_sqft'] = df['total_sqft'].apply(isfloat)
 
27
 
28
  df=df.drop(['size'], axis = 'columns')
29
 
 
 
 
30
 
31
  df['price_per_sqft'] = df['price']*100000/df['total_sqft']
 
32
 
 
33
 
34
  df.location = df.location.apply(lambda x: x.strip())
35
  loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
36
+
37
 
38
  len(loc_stats[loc_stats <= 10])
39
 
40
  loc_stats_ten = loc_stats[loc_stats<=10]
 
 
 
41
 
42
 
43
+ df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
 
 
44
 
 
45
 
46
  df = df[~(df.total_sqft/df.BHK < 300)]
47
 
 
48
 
49
  def rem_out(df):
50
  df_out = pd.DataFrame()
 
56
  return df_out
57
 
58
  df = rem_out(df);
 
 
 
 
 
 
 
 
 
 
 
 
59
 
 
 
 
60
 
61
  def remove_outlier(df):
62
  exclude = np.array([])
 
77
  return df.drop(exclude, axis='index')
78
 
79
  df = remove_outlier(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  df = df[df.bath < df.BHK+2]
 
82
 
83
  df = df.drop(['price_per_sqft'], axis = 'columns')
 
84
 
85
  dummies = pd.get_dummies(df.location)
 
86
 
87
  df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
 
88
 
89
  df = df.drop('location', axis = 'columns')
 
 
 
90
 
91
  x = df.drop('price', axis = 'columns')
 
92
 
93
  y = df.price
 
94
 
95
  from sklearn.model_selection import train_test_split
96
  X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
 
165
 
166
  return lr_clf.predict([xdash])[0]
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  import gradio as gr
169
 
170
  from gradio.components import Textbox, Number