Spaces:

Aashiue
/

Real_Estate_Price_Prediction

Runtime error

File size: 4,878 Bytes

56ead55
 
 
 
 
 
c2261d4
56ead55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acae2bb
56ead55
 
 
 
 
 
acae2bb
56ead55

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib
matplotlib.rcParams["figure.figsize"] = (20, 10)

path = 'bengaluru_house_prices.csv'
df = pd.read_csv(path)

df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')

df=df.dropna()
df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))


def isfloat(x):
  token = x.split('-')
  if len(token)==2:
    return (float(token[0])+float(token[1]))/2
  try:
    return float(x)
  except:
    return None


df['total_sqft'] = df['total_sqft'].apply(isfloat)

df=df.drop(['size'], axis = 'columns')


df['price_per_sqft'] = df['price']*100000/df['total_sqft']


df.location = df.location.apply(lambda x: x.strip())
loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)


len(loc_stats[loc_stats <= 10])

loc_stats_ten = loc_stats[loc_stats<=10]


df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)


df = df[~(df.total_sqft/df.BHK < 300)]


def rem_out(df):
  df_out = pd.DataFrame()
  for key, subdf in df.groupby('location'):
    mu = np.mean(subdf.price_per_sqft)
    std = np.std(subdf.price_per_sqft)
    dft = subdf[(subdf.price_per_sqft > (mu-std)) & (subdf.price_per_sqft <= (mu+std))]
    df_out = pd.concat([df_out, dft], ignore_index = True)
  return df_out

df = rem_out(df);


def remove_outlier(df):
  exclude = np.array([])
  for location, location_df in df.groupby('location'):
    bhk_stat = {}
    for BHK, bhk_df in location_df.groupby('BHK'):
      bhk_stat[BHK] = {
          'mean' : np.mean(bhk_df.price_per_sqft),
          'std' : np.std(bhk_df.price_per_sqft),
          'count' : bhk_df.shape[0]
      }
    # print(bhk_stat)
    for BHK, bhk_df in location_df.groupby('BHK'):
      stat = bhk_stat.get(BHK-1)
      # print(stat)
      if stat and stat['count']>5:
        exclude = np.append(exclude, bhk_df[bhk_df.price_per_sqft<(stat['mean'])].index.values)
  return df.drop(exclude, axis='index')

df = remove_outlier(df)

df = df[df.bath < df.BHK+2]

df = df.drop(['price_per_sqft'], axis = 'columns')

dummies = pd.get_dummies(df.location)

df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')

df = df.drop('location', axis = 'columns')

x = df.drop('price', axis = 'columns')

y = df.price

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)

from sklearn.linear_model import LinearRegression
lr_clf = LinearRegression()
lr_clf.fit(X_train, y_train)
lr_clf.score(X_test, y_test)

from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)

cross_val_score(LinearRegression(), x, y, cv = cv)

from sklearn.model_selection import GridSearchCV

from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor

def find_best_model(x, y):
  algos = {
      'linear_reg' : {
          'model' : LinearRegression(),
          'params' : {
              'fit_intercept': [True, False],
              'copy_X': [True, False],
              'n_jobs': [None, -1],
              'positive': [True, False]
          }
      },
      'lasso' : {
          'model' : Lasso(),
          'params' : {
              'alpha' : [1,2],
              'selection' : ['random', 'cyclic']
          }
      },
      'dec_tree' : {
          'model' : DecisionTreeRegressor(),
          'params' : {
              'criterion': ['friedman_mse', 'squared_error', 'poisson', 'absolute_error'],
              'splitter': ['best', 'random'],
          }
      }
  }
  scores = []
  cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)
  for algo_name, config in algos.items():
    gs = GridSearchCV(config['model'], config['params'], cv = cv, return_train_score = False)
    gs.fit(x,y);
    scores.append({
        'model' : algo_name,
        'best_score' : gs.best_score_,
        'best_params' : gs.best_params_
    })
  return pd.DataFrame(scores, columns = ['model', 'best_score', 'best_params'])

find_best_model(x,y)

def predict_price_func(location, sqft, bath, bhk):
  loc_index = np.where(x.columns == location)[0][0]

  xdash = np.zeros(len(x.columns))
  xdash[0] = sqft
  xdash[1] = bath
  xdash[2] = bhk

  if loc_index >= 0:
    xdash[loc_index] = 1

  return lr_clf.predict([xdash])[0]

import gradio as gr

from gradio.components import Textbox, Number

interface = gr.Interface(
    fn=predict_price_func,
    inputs=[
        gr.inputs.Textbox(),  # For location (text)
        gr.inputs.Number(),  # For area (numeric)
        gr.inputs.Number(),  # For bedrooms (numeric)
        gr.inputs.Number()   # For bathrooms (numeric)
    ],
    outputs="text",
    theme="huggingface"
)

interface.launch()