Spaces:

Aashiue
/

Real_Estate_Price_Prediction

Runtime error

App Files Files Community

Aashiue commited on Sep 13, 2023

Commit

acae2bb

•

1 Parent(s): c2261d4

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -89

app.py CHANGED Viewed

@@ -6,32 +6,12 @@ matplotlib.rcParams["figure.figsize"] = (20, 10)
 path = 'bengaluru_house_prices.csv'
 df = pd.read_csv(path)
-df.head()
-df.shape
-df.groupby('area_type')['area_type'].agg('count')
 df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
-df.head()
-df.isnull().sum()
 df=df.dropna()
-df.head()
-df.shape
-df.isnull().sum()
-df['size'].unique()
 df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
-df.head()
-df['BHK'].unique()
-df['total_sqft'].unique()
 def isfloat(x):
   token = x.split('-')
@@ -42,43 +22,29 @@ def isfloat(x):
   except:
     return None
-isfloat('2100 - 2600')
 df['total_sqft'] = df['total_sqft'].apply(isfloat)
-df.head(31)
 df=df.drop(['size'], axis = 'columns')
-df.head(31)
-df.dtypes
 df['price_per_sqft'] = df['price']*100000/df['total_sqft']
-df.head()
-len(df.location.unique())
 df.location = df.location.apply(lambda x: x.strip())
 loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
-loc_stats
 len(loc_stats[loc_stats <= 10])
 loc_stats_ten = loc_stats[loc_stats<=10]
-loc_stats_ten
-df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
-len(df.location.unique());
-df.head(10)
-df[df.total_sqft/df.BHK < 300].head()
 df = df[~(df.total_sqft/df.BHK < 300)]
-df.price_per_sqft.describe()
 def rem_out(df):
   df_out = pd.DataFrame()
@@ -90,22 +56,7 @@ def rem_out(df):
   return df_out
 df = rem_out(df);
-df.shape
-df.head()
-def plot_scatter(df, location):
-  bhk2 = df[(df.location==location) & (df.BHK==2)]
-  bhk3 = df[(df.location==location) & (df.BHK==3)]
-  matplotlib.rcParams['figure.figsize'] = (15, 10)
-  plt.scatter(bhk2.total_sqft, bhk2.price, color = 'red', label = '2 BHK', s=50)
-  plt.scatter(bhk3.total_sqft, bhk3.price, color = 'blue', label = '3 BHK', s=50)
-  plt.xlabel('Total sq feet area')
-  plt.ylabel('price per sq feet area')
-  plt.legend()
-plot_scatter(df, "Hebbal")
-df.head()
 def remove_outlier(df):
   exclude = np.array([])
@@ -126,45 +77,20 @@ def remove_outlier(df):
   return df.drop(exclude, axis='index')
 df = remove_outlier(df)
-df.shape
-plot_scatter(df, "Hebbal")
-matplotlib.rcParams["figure.figsize"] = (20,10)
-plt.hist(df.price_per_sqft, rwidth=0.8)
-plt.xlabel("price per sq feet")
-plt.ylabel("count")
-df.bath.unique()
-plt.hist(df.bath, rwidth = 0.5)
-plt.xlabel('no. of bathrooms')
-plt.ylabel('count')
-df[df.bath > df.BHK+2]
 df = df[df.bath < df.BHK+2]
-df.shape
 df = df.drop(['price_per_sqft'], axis = 'columns')
-df.head(10)
 dummies = pd.get_dummies(df.location)
-dummies.head()
 df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
-df.head()
 df = df.drop('location', axis = 'columns')
-df.head()
-df.shape
 x = df.drop('price', axis = 'columns')
-x.head()
 y = df.price
-y.head()
 from sklearn.model_selection import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
@@ -239,19 +165,6 @@ def predict_price_func(location, sqft, bath, bhk):
   return lr_clf.predict([xdash])[0]
-df.head()
-print(x.columns)
-predict_price_func('1st Phase JP Nagar', 1200, 2, 2)
-predict_price_func('Indira Nagar', 1200, 3, 3)
-predict_price_func('Indira Nagar', 1200, 1, 3)
-predict_price_func('Indira Nagar', 1200, 3, 4)
 import gradio as gr
 from gradio.components import Textbox, Number

 path = 'bengaluru_house_prices.csv'
 df = pd.read_csv(path)
 df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
 df=df.dropna()
 df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
 def isfloat(x):
   token = x.split('-')
   except:
     return None
 df['total_sqft'] = df['total_sqft'].apply(isfloat)
 df=df.drop(['size'], axis = 'columns')
 df['price_per_sqft'] = df['price']*100000/df['total_sqft']
 df.location = df.location.apply(lambda x: x.strip())
 loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
 len(loc_stats[loc_stats <= 10])
 loc_stats_ten = loc_stats[loc_stats<=10]
+df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
 df = df[~(df.total_sqft/df.BHK < 300)]
 def rem_out(df):
   df_out = pd.DataFrame()
   return df_out
 df = rem_out(df);
 def remove_outlier(df):
   exclude = np.array([])
   return df.drop(exclude, axis='index')
 df = remove_outlier(df)
 df = df[df.bath < df.BHK+2]
 df = df.drop(['price_per_sqft'], axis = 'columns')
 dummies = pd.get_dummies(df.location)
 df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
 df = df.drop('location', axis = 'columns')
 x = df.drop('price', axis = 'columns')
 y = df.price
 from sklearn.model_selection import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
   return lr_clf.predict([xdash])[0]
 import gradio as gr
 from gradio.components import Textbox, Number