Spaces:
Runtime error
Runtime error
SagarBapodara
commited on
Commit
•
3719961
1
Parent(s):
d4d9bfe
Added File
Browse files
app.py.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
#Importing the dependencies
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
import streamlit as st
|
8 |
+
|
9 |
+
|
10 |
+
# Loading the Dataset
|
11 |
+
RtData = pd.read_csv('RestaurantRatingData.csv', encoding='latin')
|
12 |
+
# Selecting the restaurants located in India
|
13 |
+
RtData = RtData[(RtData.Currency == "Indian Rupees(Rs.)")]
|
14 |
+
# Removing the data where Average cost is 0
|
15 |
+
RtData = RtData.loc[(RtData['Average Cost for two'] > 0)]
|
16 |
+
|
17 |
+
|
18 |
+
# Deleting those columns which are not useful in predictive analysis because these variables are qualitative
|
19 |
+
UselessColumns = ['Restaurant ID', 'Restaurant Name','City','Address',
|
20 |
+
'Locality', 'Locality Verbose','Cuisines']
|
21 |
+
RtData = RtData.drop(UselessColumns,axis=1)
|
22 |
+
RtData.head(5)
|
23 |
+
|
24 |
+
RtData.rename(columns={'Has Table booking': 'Has_Table_booking', 'Has Online delivery' : 'Has_Online_delivery', 'Average Cost for two':'Average_Cost_for_two', 'Price range':'Price_range'}, inplace=True)
|
25 |
+
|
26 |
+
# Finding nearest values to 4000 mark
|
27 |
+
RtData['Votes'][RtData['Votes']<4000].sort_values(ascending=False)
|
28 |
+
|
29 |
+
# Above result shows the nearest logical value is 3986, hence, replacing any value above 4000 with it.
|
30 |
+
# Replacing outliers with nearest possibe value
|
31 |
+
RtData['Votes'][RtData['Votes']>4000] =3986
|
32 |
+
|
33 |
+
# Above result shows the nearest logical value is 8000, hence, replacing any value above 50000 with it.
|
34 |
+
## Replacing outliers with nearest possibe value
|
35 |
+
RtData['Average_Cost_for_two'][RtData['Average_Cost_for_two']>50000] = 8000
|
36 |
+
|
37 |
+
#Final Selected Predictors
|
38 |
+
SelectedColumns=['Votes','Average_Cost_for_two','Has_Table_booking',
|
39 |
+
'Has_Online_delivery','Price_range']
|
40 |
+
|
41 |
+
# Selecting final columns
|
42 |
+
DataForML=RtData[SelectedColumns]
|
43 |
+
|
44 |
+
# Converting the binary nominal variable sex to numeric
|
45 |
+
DataForML['Has_Table_booking'].replace({'Yes':1, 'No':0}, inplace=True)
|
46 |
+
DataForML['Has_Online_delivery'].replace({'Yes':1, 'No':0}, inplace=True)
|
47 |
+
|
48 |
+
# Treating all the nominal variables at once using dummy variables
|
49 |
+
DataForML_Numeric=pd.get_dummies(DataForML)
|
50 |
+
|
51 |
+
# Adding Target Variable to the data
|
52 |
+
DataForML_Numeric['Rating']=RtData['Rating']
|
53 |
+
|
54 |
+
# Printing sample rows
|
55 |
+
DataForML_Numeric.head()
|
56 |
+
|
57 |
+
# Separate Target Variable and Predictor Variables
|
58 |
+
TargetVariable='Rating'
|
59 |
+
Predictors=['Votes', 'Average_Cost_for_two', 'Has_Table_booking',
|
60 |
+
'Has_Online_delivery', 'Price_range']
|
61 |
+
|
62 |
+
X=DataForML_Numeric[Predictors].values
|
63 |
+
y=DataForML_Numeric[TargetVariable].values
|
64 |
+
|
65 |
+
# Split the data into training and testing set
|
66 |
+
from sklearn.model_selection import train_test_split
|
67 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=428)
|
68 |
+
|
69 |
+
# XGBOOST Model
|
70 |
+
# Xtreme Gradient Boosting (XGBoost)
|
71 |
+
from xgboost import XGBRegressor
|
72 |
+
RegModel=XGBRegressor(max_depth=2, learning_rate=0.1, verbosity = 0, silent=True, n_estimators=1000, objective='reg:linear', booster='gbtree')
|
73 |
+
|
74 |
+
# Printing all the parameters of XGBoost
|
75 |
+
print(RegModel)
|
76 |
+
|
77 |
+
# Creating the model on Training Data
|
78 |
+
XGB=RegModel.fit(X_train,y_train)
|
79 |
+
prediction=XGB.predict(X_test)
|
80 |
+
|
81 |
+
@st.cache()
|
82 |
+
|
83 |
+
# Defining the function which will make the prediction using the data which the user inputs
|
84 |
+
|
85 |
+
|
86 |
+
def prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range):
|
87 |
+
pred = None
|
88 |
+
|
89 |
+
if Has_Table_booking == "No":
|
90 |
+
Has_Table_booking = 0
|
91 |
+
else:
|
92 |
+
Has_Table_booking = 1
|
93 |
+
|
94 |
+
if Has_Online_delivery == "No":
|
95 |
+
Has_Online_delivery = 0
|
96 |
+
else:
|
97 |
+
Has_Online_delivery = 1
|
98 |
+
|
99 |
+
|
100 |
+
# Making predictions
|
101 |
+
pred_inputs = XGB.predict(pd.DataFrame([[Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range]]))
|
102 |
+
|
103 |
+
|
104 |
+
if pred_inputs[0] <= 2:
|
105 |
+
pred = 'It is a Low Rated Restaurant.'
|
106 |
+
elif ((pred_inputs[0] >= 3) and (pred_inputs[0] <= 4)):
|
107 |
+
pred = 'It is a Decent Rated Restaurant'
|
108 |
+
elif pred_inputs[0] >= 4:
|
109 |
+
pred = 'It is a High Rated Restaurant'
|
110 |
+
|
111 |
+
return pred
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
def main():
|
117 |
+
# front end elements of the web page
|
118 |
+
html_temp = """
|
119 |
+
<div style ="background-color:orange;padding:13px">
|
120 |
+
<h1 style ="color:black;text-align:center;"> Restaurant Rating
|
121 |
+
Prediction App</h1>
|
122 |
+
<h8 style ="color:black;text-align:center;"> The data from an online food app,
|
123 |
+
which needs assistance in predicting the future success or failure of a business (restaurant),
|
124 |
+
has been used in this case study. Such that they can choose whether to delete the restaurant
|
125 |
+
from their app or keep it. They have provided information from of 8643 eateries from different
|
126 |
+
states of India that are currently accessible on their app. It contains details about the
|
127 |
+
restaurants, including the overall rating. Below I have developed a machine learning model
|
128 |
+
that can predict a restaurant's rating based on its attributes.</h8>
|
129 |
+
</div>
|
130 |
+
"""
|
131 |
+
|
132 |
+
# Display dataset when check box is ON
|
133 |
+
if st.checkbox('View dataset in table data format'):
|
134 |
+
st.dataframe(RtData)
|
135 |
+
|
136 |
+
# display the front end aspect
|
137 |
+
st.markdown(html_temp, unsafe_allow_html = True)
|
138 |
+
|
139 |
+
# following lines create boxes in which user can enter data required to make prediction
|
140 |
+
Votes = st.number_input("No. of Votes (Range between 0 to 2500)")
|
141 |
+
Average_Cost_for_two= st.number_input("Cost of 2 person between 50 to 8000 (Indian Rupees(Rs.))")
|
142 |
+
Price_range = st.number_input("Price Range between 1(Inexpensive) to 4(Most Expensive)")
|
143 |
+
Has_Table_booking= st.selectbox(' Has Table Booking',("Yes","No"))
|
144 |
+
Has_Online_delivery= st.selectbox(' Has Online Delivery',("Yes","No"))
|
145 |
+
result =""
|
146 |
+
|
147 |
+
|
148 |
+
# when 'Predict' is clicked, make the prediction and store it
|
149 |
+
if st.button("Predict"):
|
150 |
+
result = prediction(Votes, Average_Cost_for_two, Has_Table_booking, Has_Online_delivery, Price_range)
|
151 |
+
st.success('Final Decision: {}'.format(result))
|
152 |
+
|
153 |
+
|
154 |
+
if __name__=='__main__':
|
155 |
+
main()
|
156 |
+
|