Spaces:
Sleeping
Sleeping
add app files
Browse files- ML_model.py +41 -0
- app.py +54 -0
- final_model.sav +0 -0
- model_methods.py +21 -0
- requirements.txt +4 -0
- streamlit_data.csv +0 -0
- streamlit_imp_data.csv +0 -0
ML_model.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.model_selection import train_test_split, GridSearchCV
|
4 |
+
from sklearn.preprocessing import StandardScaler
|
5 |
+
from sklearn.pipeline import Pipeline
|
6 |
+
from sklearn.linear_model import ElasticNet
|
7 |
+
from sklearn.metrics import mean_absolute_error
|
8 |
+
from sklearn.impute import KNNImputer
|
9 |
+
import pickle
|
10 |
+
|
11 |
+
def ml_model():
|
12 |
+
url = 'https://raw.githubusercontent.com/yxmauw/General_Assembly_Pub/main/project_2/cloud_app/streamlit_data.csv'
|
13 |
+
df = pd.read_csv(url, header=0) # load data
|
14 |
+
X = df.drop('SalePrice', axis=1)
|
15 |
+
y = df['SalePrice']
|
16 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
17 |
+
|
18 |
+
enet_ratio = [.5,.8,.9,.95]
|
19 |
+
alpha_l = [1.,10.,100.,500.,1000.]
|
20 |
+
|
21 |
+
pipe_enet = Pipeline([
|
22 |
+
('ss', StandardScaler()),
|
23 |
+
('enet', ElasticNet())
|
24 |
+
])
|
25 |
+
|
26 |
+
pipe_enet_params = {'enet__alpha': alpha_l,
|
27 |
+
'enet__l1_ratio': enet_ratio
|
28 |
+
}
|
29 |
+
cv_ct = 5
|
30 |
+
score = 'neg_mean_absolute_error'
|
31 |
+
|
32 |
+
pipe_enet_gs = GridSearchCV(pipe_enet,
|
33 |
+
pipe_enet_params,
|
34 |
+
cv=cv_ct,
|
35 |
+
scoring=score,
|
36 |
+
verbose=1
|
37 |
+
)
|
38 |
+
|
39 |
+
pipe_enet_gs.fit(X_train,y_train)
|
40 |
+
|
41 |
+
pickle.dump(pipe_enet_gs, open('final_model.sav','wb'))
|
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://www.analyticsvidhya.com/blog/2021/07/streamlit-quickly-turn-your-ml-models-into-web-apps/
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from model_methods import predict
|
6 |
+
|
7 |
+
# configuration of the page
|
8 |
+
st.set_page_config(
|
9 |
+
layout="centered",
|
10 |
+
page_icon="π ",
|
11 |
+
page_title="Are you planning to sell your house?",
|
12 |
+
initial_sidebar_state='auto',
|
13 |
+
)
|
14 |
+
|
15 |
+
st.title("π Ames Housing Sale Price recommendation tool")
|
16 |
+
st.markdown('''
|
17 |
+
The algorithm driving this app is built on
|
18 |
+
historical housing sale price data to generate
|
19 |
+
recommended Sale Price! Please enter your house details
|
20 |
+
to get a Sale Price suggestion π
|
21 |
+
''')
|
22 |
+
###########################################################
|
23 |
+
st.info('Only Enter Numeric Values in the Following Fields')
|
24 |
+
|
25 |
+
gr_liv_area = st.text_input('Enter house ground living area in square feet. Accept values 334 to 3395 inclusive', '')
|
26 |
+
overall_qual = np.nan
|
27 |
+
total_bsmt_sf = st.text_input('Enter house total basement area in square feet. Accept values 0 to 3206 inclusive', '')
|
28 |
+
garage_area = st.text_input('Enter house garage area in square feet. Accept values 0 to 1356 inclusive', '')
|
29 |
+
year_built = st.text_input('Enter the year your house was built. Accept values 1872 to 2010 inclusive', '')
|
30 |
+
mas_vnr_area = st.text_input('Enter house masonry veneer area in square feet. Accept values 0 to 1129 inclusive', '')
|
31 |
+
|
32 |
+
def predict_price():
|
33 |
+
data = list(map(float, [gr_liv_area,
|
34 |
+
(float(gr_liv_area))**2,
|
35 |
+
(float(gr_liv_area))**3,
|
36 |
+
overall_qual,
|
37 |
+
total_bsmt_sf,
|
38 |
+
garage_area,
|
39 |
+
year_built,
|
40 |
+
mas_vnr_area]))
|
41 |
+
result = np.format_float_positional((predict(data)[0]), unique=False, precision=0)
|
42 |
+
st.info(f'# Our SalePrice suggestion is ${result}')
|
43 |
+
st.write('with an estimated uncertainty of Β± \$11K')
|
44 |
+
|
45 |
+
if st.button('Recommend Saleprice'):
|
46 |
+
if gr_liv_area and overall_qual and total_bsmt_sf and garage_area and year_built and mas_vnr_area:
|
47 |
+
with st.sidebar:
|
48 |
+
try:
|
49 |
+
predict_price()
|
50 |
+
except:
|
51 |
+
st.warning('''Oops, looks like you missed a spot.
|
52 |
+
Please complete all fields to get a quote estimate
|
53 |
+
for property Sale Price π.
|
54 |
+
\n\n Thank you. π''')
|
final_model.sav
ADDED
Binary file (5.76 kB). View file
|
|
model_methods.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.linear_model import ElasticNet
|
4 |
+
from sklearn.model_selection import GridSearchCV
|
5 |
+
from sklearn.metrics import mean_absolute_error
|
6 |
+
from sklearn.impute import KNNImputer
|
7 |
+
import pickle
|
8 |
+
|
9 |
+
def predict(new_data):
|
10 |
+
# impute missing `Overall Qual` values
|
11 |
+
url = 'https://raw.githubusercontent.com/yxmauw/General_Assembly_Pub/main/project_2/cloud_app/streamlit_imp_data.csv'
|
12 |
+
imp_data = pd.read_csv(url, header=0)
|
13 |
+
imp = KNNImputer()
|
14 |
+
imp.fit(imp_data)
|
15 |
+
shaped_data = np.reshape(new_data, (1, -1))
|
16 |
+
input_data = imp.transform(shaped_data)
|
17 |
+
# load model
|
18 |
+
with open('project_2/cloud_app/final_model.sav','rb') as f:
|
19 |
+
model = pickle.load(f)
|
20 |
+
pred = model.predict([input_data][0])
|
21 |
+
return pred
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.11.1
|
2 |
+
scikit-learn==1.0.2
|
3 |
+
pandas==1.4.2
|
4 |
+
numpy==1.21.5
|
streamlit_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
streamlit_imp_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|