SagarBapodara's picture
Rename app.py.py to app.py
d80af21
raw
history blame contribute delete
No virus
5.88 kB
#!/usr/bin/env python
# coding: utf-8
#Importing the dependencies
import pandas as pd
import numpy as np
import streamlit as st
# Loading the Dataset
RtData = pd.read_csv('RestaurantRatingData.csv', encoding='latin')
# Selecting the restaurants located in India
RtData = RtData[(RtData.Currency == "Indian Rupees(Rs.)")]
# Removing the data where Average cost is 0
RtData = RtData.loc[(RtData['Average Cost for two'] > 0)]
# Deleting those columns which are not useful in predictive analysis because these variables are qualitative
UselessColumns = ['Restaurant ID', 'Restaurant Name','City','Address',
'Locality', 'Locality Verbose','Cuisines']
RtData = RtData.drop(UselessColumns,axis=1)
RtData.head(5)
RtData.rename(columns={'Has Table booking': 'Has_Table_booking', 'Has Online delivery' : 'Has_Online_delivery', 'Average Cost for two':'Average_Cost_for_two', 'Price range':'Price_range'}, inplace=True)
# Finding nearest values to 4000 mark
RtData['Votes'][RtData['Votes']<4000].sort_values(ascending=False)
# Above result shows the nearest logical value is 3986, hence, replacing any value above 4000 with it.
# Replacing outliers with nearest possibe value
RtData['Votes'][RtData['Votes']>4000] =3986
# Above result shows the nearest logical value is 8000, hence, replacing any value above 50000 with it.
## Replacing outliers with nearest possibe value
RtData['Average_Cost_for_two'][RtData['Average_Cost_for_two']>50000] = 8000
#Final Selected Predictors
SelectedColumns=['Votes','Average_Cost_for_two','Has_Table_booking',
'Has_Online_delivery','Price_range']
# Selecting final columns
DataForML=RtData[SelectedColumns]
# Converting the binary nominal variable sex to numeric
DataForML['Has_Table_booking'].replace({'Yes':1, 'No':0}, inplace=True)
DataForML['Has_Online_delivery'].replace({'Yes':1, 'No':0}, inplace=True)
# Treating all the nominal variables at once using dummy variables
DataForML_Numeric=pd.get_dummies(DataForML)
# Adding Target Variable to the data
DataForML_Numeric['Rating']=RtData['Rating']
# Printing sample rows
DataForML_Numeric.head()
# Separate Target Variable and Predictor Variables
TargetVariable='Rating'
Predictors=['Votes', 'Average_Cost_for_two', 'Has_Table_booking',
'Has_Online_delivery', 'Price_range']
X=DataForML_Numeric[Predictors].values
y=DataForML_Numeric[TargetVariable].values
# Split the data into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=428)
# XGBOOST Model
# Xtreme Gradient Boosting (XGBoost)
from xgboost import XGBRegressor
RegModel=XGBRegressor(max_depth=2, learning_rate=0.1, verbosity = 0, silent=True, n_estimators=1000, objective='reg:linear', booster='gbtree')
# Printing all the parameters of XGBoost
print(RegModel)
# Creating the model on Training Data
XGB=RegModel.fit(X_train,y_train)
prediction=XGB.predict(X_test)
@st.cache()
# Defining the function which will make the prediction using the data which the user inputs
def prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range):
pred = None
if Has_Table_booking == "No":
Has_Table_booking = 0
else:
Has_Table_booking = 1
if Has_Online_delivery == "No":
Has_Online_delivery = 0
else:
Has_Online_delivery = 1
# Making predictions
pred_inputs = XGB.predict(pd.DataFrame([[Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range]]))
if pred_inputs[0] <= 2:
pred = 'It is a Low Rated Restaurant.'
elif ((pred_inputs[0] >= 3) and (pred_inputs[0] <= 4)):
pred = 'It is a Decent Rated Restaurant'
elif pred_inputs[0] >= 4:
pred = 'It is a High Rated Restaurant'
return pred
def main():
# front end elements of the web page
html_temp = """
<div style ="background-color:orange;padding:13px">
<h1 style ="color:black;text-align:center;"> Restaurant Rating
Prediction App</h1>
<h8 style ="color:black;text-align:center;"> The data from an online food app,
which needs assistance in predicting the future success or failure of a business (restaurant),
has been used in this case study. Such that they can choose whether to delete the restaurant
from their app or keep it. They have provided information from of 8643 eateries from different
states of India that are currently accessible on their app. It contains details about the
restaurants, including the overall rating. Below I have developed a machine learning model
that can predict a restaurant's rating based on its attributes.</h8>
</div>
"""
# Display dataset when check box is ON
if st.checkbox('View dataset in table data format'):
st.dataframe(RtData)
# display the front end aspect
st.markdown(html_temp, unsafe_allow_html = True)
# following lines create boxes in which user can enter data required to make prediction
Votes = st.number_input("No. of Votes (Range between 0 to 2500)")
Average_Cost_for_two= st.number_input("Cost of 2 person between 50 to 8000 (Indian Rupees(Rs.))")
Price_range = st.number_input("Price Range between 1(Inexpensive) to 4(Most Expensive)")
Has_Table_booking= st.selectbox(' Has Table Booking',("Yes","No"))
Has_Online_delivery= st.selectbox(' Has Online Delivery',("Yes","No"))
result =""
# when 'Predict' is clicked, make the prediction and store it
if st.button("Predict"):
result = prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range)
st.success('Final Decision: {}'.format(result))
if __name__=='__main__':
main()