Spaces:

Aashiue
/

Real_Estate_Price_Prediction

Runtime error

App Files Files Community

Real_Estate_Price_Prediction / app.py

Aashiue

Create app.py

56ead55 over 1 year ago

raw

history blame

6.48 kB

	import pandas as pd
	import numpy as np
	from matplotlib import pyplot as plt
	%matplotlib inline
	import matplotlib
	matplotlib.rcParams["figure.figsize"] = (20, 10)

	path = '/content/bengaluru_house_prices.csv'
	df = pd.read_csv(path)
	df.head()

	df.shape

	df.groupby('area_type')['area_type'].agg('count')

	df = df.drop(['area_type','society','balcony','availability'], axis = 'columns')
	df.head()

	df.isnull().sum()

	df=df.dropna()
	df.head()

	df.shape
	df.isnull().sum()

	df['size'].unique()

	df['BHK'] = df['size'].apply(lambda x: int(x.split(' ')[0]))

	df.head()

	df['BHK'].unique()

	df['total_sqft'].unique()

	def isfloat(x):
	token = x.split('-')
	if len(token)==2:
	return (float(token[0])+float(token[1]))/2
	try:
	return float(x)
	except:
	return None

	isfloat('2100 - 2600')

	df['total_sqft'] = df['total_sqft'].apply(isfloat)
	df.head(31)

	df=df.drop(['size'], axis = 'columns')

	df.head(31)

	df.dtypes

	df['price_per_sqft'] = df['price']*100000/df['total_sqft']
	df.head()

	len(df.location.unique())

	df.location = df.location.apply(lambda x: x.strip())
	loc_stats = df.groupby('location')['location'].agg('count').sort_values(ascending = False)
	loc_stats

	len(loc_stats[loc_stats <= 10])

	loc_stats_ten = loc_stats[loc_stats<=10]
	loc_stats_ten

	df.location = df.location.apply(lambda x: 'other' if x in loc_stats_ten else x)


	len(df.location.unique());

	df.head(10)

	df[df.total_sqft/df.BHK < 300].head()

	df = df[~(df.total_sqft/df.BHK < 300)]

	df.price_per_sqft.describe()

	def rem_out(df):
	df_out = pd.DataFrame()
	for key, subdf in df.groupby('location'):
	mu = np.mean(subdf.price_per_sqft)
	std = np.std(subdf.price_per_sqft)
	dft = subdf[(subdf.price_per_sqft > (mu-std)) & (subdf.price_per_sqft <= (mu+std))]
	df_out = pd.concat([df_out, dft], ignore_index = True)
	return df_out

	df = rem_out(df);
	df.shape
	df.head()

	def plot_scatter(df, location):
	bhk2 = df[(df.location==location) & (df.BHK==2)]
	bhk3 = df[(df.location==location) & (df.BHK==3)]
	matplotlib.rcParams['figure.figsize'] = (15, 10)
	plt.scatter(bhk2.total_sqft, bhk2.price, color = 'red', label = '2 BHK', s=50)
	plt.scatter(bhk3.total_sqft, bhk3.price, color = 'blue', label = '3 BHK', s=50)
	plt.xlabel('Total sq feet area')
	plt.ylabel('price per sq feet area')
	plt.legend()

	plot_scatter(df, "Hebbal")

	df.head()

	def remove_outlier(df):
	exclude = np.array([])
	for location, location_df in df.groupby('location'):
	bhk_stat = {}
	for BHK, bhk_df in location_df.groupby('BHK'):
	bhk_stat[BHK] = {
	'mean' : np.mean(bhk_df.price_per_sqft),
	'std' : np.std(bhk_df.price_per_sqft),
	'count' : bhk_df.shape[0]
	}
	# print(bhk_stat)
	for BHK, bhk_df in location_df.groupby('BHK'):
	stat = bhk_stat.get(BHK-1)
	# print(stat)
	if stat and stat['count']>5:
	exclude = np.append(exclude, bhk_df[bhk_df.price_per_sqft<(stat['mean'])].index.values)
	return df.drop(exclude, axis='index')

	df = remove_outlier(df)
	df.shape

	plot_scatter(df, "Hebbal")

	matplotlib.rcParams["figure.figsize"] = (20,10)
	plt.hist(df.price_per_sqft, rwidth=0.8)
	plt.xlabel("price per sq feet")
	plt.ylabel("count")

	df.bath.unique()

	plt.hist(df.bath, rwidth = 0.5)
	plt.xlabel('no. of bathrooms')
	plt.ylabel('count')

	df[df.bath > df.BHK+2]

	df = df[df.bath < df.BHK+2]
	df.shape

	df = df.drop(['price_per_sqft'], axis = 'columns')
	df.head(10)

	dummies = pd.get_dummies(df.location)
	dummies.head()

	df = pd.concat([df, dummies.drop('other', axis = 'columns')], axis = 'columns')
	df.head()

	df = df.drop('location', axis = 'columns')
	df.head()

	df.shape

	x = df.drop('price', axis = 'columns')
	x.head()

	y = df.price
	y.head()

	from sklearn.model_selection import train_test_split
	X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 10)

	from sklearn.linear_model import LinearRegression
	lr_clf = LinearRegression()
	lr_clf.fit(X_train, y_train)
	lr_clf.score(X_test, y_test)

	from sklearn.model_selection import ShuffleSplit
	from sklearn.model_selection import cross_val_score

	cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)

	cross_val_score(LinearRegression(), x, y, cv = cv)

	from sklearn.model_selection import GridSearchCV

	from sklearn.linear_model import Lasso
	from sklearn.tree import DecisionTreeRegressor

	def find_best_model(x, y):
	algos = {
	'linear_reg' : {
	'model' : LinearRegression(),
	'params' : {
	'fit_intercept': [True, False],
	'copy_X': [True, False],
	'n_jobs': [None, -1],
	'positive': [True, False]
	}
	},
	'lasso' : {
	'model' : Lasso(),
	'params' : {
	'alpha' : [1,2],
	'selection' : ['random', 'cyclic']
	}
	},
	'dec_tree' : {
	'model' : DecisionTreeRegressor(),
	'params' : {
	'criterion': ['friedman_mse', 'squared_error', 'poisson', 'absolute_error'],
	'splitter': ['best', 'random'],
	}
	}
	}
	scores = []
	cv = ShuffleSplit(n_splits = 5, test_size = 0.2, random_state = 10)
	for algo_name, config in algos.items():
	gs = GridSearchCV(config['model'], config['params'], cv = cv, return_train_score = False)
	gs.fit(x,y);
	scores.append({
	'model' : algo_name,
	'best_score' : gs.best_score_,
	'best_params' : gs.best_params_
	})
	return pd.DataFrame(scores, columns = ['model', 'best_score', 'best_params'])

	find_best_model(x,y)

	def predict_price_func(location, sqft, bath, bhk):
	loc_index = np.where(x.columns == location)[0][0]

	xdash = np.zeros(len(x.columns))
	xdash[0] = sqft
	xdash[1] = bath
	xdash[2] = bhk

	if loc_index >= 0:
	xdash[loc_index] = 1

	return lr_clf.predict([xdash])[0]

	df.head()

	print(x.columns)

	predict_price_func('1st Phase JP Nagar', 1200, 2, 2)

	predict_price_func('Indira Nagar', 1200, 3, 3)

	predict_price_func('Indira Nagar', 1200, 1, 3)

	predict_price_func('Indira Nagar', 1200, 3, 4)

	!pip install gradio
	import gradio as gr

	from gradio.components import Textbox, Number

	interface = gr.Interface(
	fn=predict_price_func,
	inputs=[
	gr.inputs.Textbox(), # For location (text)
	gr.inputs.Number(), # For area (numeric)
	gr.inputs.Number(), # For bedrooms (numeric)
	gr.inputs.Number() # For bathrooms (numeric)
	],
	outputs="text",
	theme="huggingface"
	)

	interface.launch()