Spaces:

Aashiue
/

Real_Estate_Price_Prediction

Runtime error

App Files Files Community

Aashiue commited on Sep 13, 2023

Commit

56ead55

•

1 Parent(s): a934882

Create app.py

Browse files

Files changed (1) hide show

app.py +272 -0

app.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import pandas as pd
+import numpy as np
+from matplotlib import pyplot as plt
+%matplotlib inline
+import matplotlib
+matplotlib.rcParams["figure.figsize"] = (20, 10)
+path = '/content/bengaluru_house_prices.csv'
+df = pd.read_csv(path)
+df.head()
+df.shape
+df.groupby('area_type')['area_type'].agg('count')
+df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
+df.head()
+df.isnull().sum()
+df=df.dropna()
+df.head()
+df.shape
+df.isnull().sum()
+df['size'].unique()
+df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))
+df.head()
+df['BHK'].unique()
+df['total_sqft'].unique()
+def isfloat(x):
+  token = x.split('-')
+  if len(token)==2:
+    return (float(token[0])+float(token[1]))/2
+  try:
+    return float(x)
+  except:
+    return None
+isfloat('2100 - 2600')
+df['total_sqft'] = df['total_sqft'].apply(isfloat)
+df.head(31)
+df=df.drop(['size'], axis = 'columns')
+df.head(31)
+df.dtypes
+df['price_per_sqft'] = df['price']*100000/df['total_sqft']
+df.head()
+len(df.location.unique())
+df.location = df.location.apply(lambda x: x.strip())
+loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
+loc_stats
+len(loc_stats[loc_stats <= 10])
+loc_stats_ten = loc_stats[loc_stats<=10]
+loc_stats_ten
+df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)
+len(df.location.unique());
+df.head(10)
+df[df.total_sqft/df.BHK < 300].head()
+df = df[~(df.total_sqft/df.BHK < 300)]
+df.price_per_sqft.describe()
+def rem_out(df):
+  df_out = pd.DataFrame()
+  for key, subdf in df.groupby('location'):
+    mu = np.mean(subdf.price_per_sqft)
+    std = np.std(subdf.price_per_sqft)
+    dft = subdf[(subdf.price_per_sqft > (mu-std)) & (subdf.price_per_sqft <= (mu+std))]
+    df_out = pd.concat([df_out, dft], ignore_index = True)
+  return df_out
+df = rem_out(df);
+df.shape
+df.head()
+def plot_scatter(df, location):
+  bhk2 = df[(df.location==location) & (df.BHK==2)]
+  bhk3 = df[(df.location==location) & (df.BHK==3)]
+  matplotlib.rcParams['figure.figsize'] = (15, 10)
+  plt.scatter(bhk2.total_sqft, bhk2.price, color = 'red', label = '2 BHK', s=50)
+  plt.scatter(bhk3.total_sqft, bhk3.price, color = 'blue', label = '3 BHK', s=50)
+  plt.xlabel('Total sq feet area')
+  plt.ylabel('price per sq feet area')
+  plt.legend()
+plot_scatter(df, "Hebbal")
+df.head()
+def remove_outlier(df):
+  exclude = np.array([])
+  for location, location_df in df.groupby('location'):
+    bhk_stat = {}
+    for BHK, bhk_df in location_df.groupby('BHK'):
+      bhk_stat[BHK] = {
+          'mean' : np.mean(bhk_df.price_per_sqft),
+          'std' : np.std(bhk_df.price_per_sqft),
+          'count' : bhk_df.shape[0]
+      }
+    # print(bhk_stat)
+    for BHK, bhk_df in location_df.groupby('BHK'):
+      stat = bhk_stat.get(BHK-1)
+      # print(stat)
+      if stat and stat['count']>5:
+        exclude = np.append(exclude, bhk_df[bhk_df.price_per_sqft<(stat['mean'])].index.values)
+  return df.drop(exclude, axis='index')
+df = remove_outlier(df)
+df.shape
+plot_scatter(df, "Hebbal")
+matplotlib.rcParams["figure.figsize"] = (20,10)
+plt.hist(df.price_per_sqft, rwidth=0.8)
+plt.xlabel("price per sq feet")
+plt.ylabel("count")
+df.bath.unique()
+plt.hist(df.bath, rwidth = 0.5)
+plt.xlabel('no. of bathrooms')
+plt.ylabel('count')
+df[df.bath > df.BHK+2]
+df = df[df.bath < df.BHK+2]
+df.shape
+df = df.drop(['price_per_sqft'], axis = 'columns')
+df.head(10)
+dummies = pd.get_dummies(df.location)
+dummies.head()
+df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
+df.head()
+df = df.drop('location', axis = 'columns')
+df.head()
+df.shape
+x = df.drop('price', axis = 'columns')
+x.head()
+y = df.price
+y.head()
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)
+from sklearn.linear_model import LinearRegression
+lr_clf = LinearRegression()
+lr_clf.fit(X_train, y_train)
+lr_clf.score(X_test, y_test)
+from sklearn.model_selection import ShuffleSplit
+from sklearn.model_selection import cross_val_score
+cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)
+cross_val_score(LinearRegression(), x, y, cv = cv)
+from sklearn.model_selection import GridSearchCV
+from sklearn.linear_model import Lasso
+from sklearn.tree import DecisionTreeRegressor
+def find_best_model(x, y):
+  algos = {
+      'linear_reg' : {
+          'model' : LinearRegression(),
+          'params' : {
+              'fit_intercept': [True, False],
+              'copy_X': [True, False],
+              'n_jobs': [None, -1],
+              'positive': [True, False]
+          }
+      },
+      'lasso' : {
+          'model' : Lasso(),
+          'params' : {
+              'alpha' : [1,2],
+              'selection' : ['random', 'cyclic']
+          }
+      },
+      'dec_tree' : {
+          'model' : DecisionTreeRegressor(),
+          'params' : {
+              'criterion': ['friedman_mse', 'squared_error', 'poisson', 'absolute_error'],
+              'splitter': ['best', 'random'],
+          }
+      }
+  }
+  scores = []
+  cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)
+  for algo_name, config in algos.items():
+    gs = GridSearchCV(config['model'], config['params'], cv = cv, return_train_score = False)
+    gs.fit(x,y);
+    scores.append({
+        'model' : algo_name,
+        'best_score' : gs.best_score_,
+        'best_params' : gs.best_params_
+    })
+  return pd.DataFrame(scores, columns = ['model', 'best_score', 'best_params'])
+find_best_model(x,y)
+def predict_price_func(location, sqft, bath, bhk):
+  loc_index = np.where(x.columns == location)[0][0]
+  xdash = np.zeros(len(x.columns))
+  xdash[0] = sqft
+  xdash[1] = bath
+  xdash[2] = bhk
+  if loc_index >= 0:
+    xdash[loc_index] = 1
+  return lr_clf.predict([xdash])[0]
+df.head()
+print(x.columns)
+predict_price_func('1st Phase JP Nagar', 1200, 2, 2)
+predict_price_func('Indira Nagar', 1200, 3, 3)
+predict_price_func('Indira Nagar', 1200, 1, 3)
+predict_price_func('Indira Nagar', 1200, 3, 4)
+!pip install gradio
+import gradio as gr
+from gradio.components import Textbox, Number
+interface = gr.Interface(
+    fn=predict_price_func,
+    inputs=[
+        gr.inputs.Textbox(),  # For location (text)
+        gr.inputs.Number(),  # For area (numeric)
+        gr.inputs.Number(),  # For bedrooms (numeric)
+        gr.inputs.Number()   # For bathrooms (numeric)
+    ],
+    outputs="text",
+    theme="huggingface"
+)
+interface.launch()