samueltatsu commited on
Commit
f760835
1 Parent(s): 7a22cc9

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +14 -0
  2. data_eda.csv +0 -0
  3. eda.py +61 -0
  4. model.pkl +3 -0
  5. model.py +54 -0
  6. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Libraries
2
+ import streamlit as st
3
+
4
+ # Import finished streamlit pages
5
+ import eda
6
+ import model
7
+
8
+ # Navigation Button
9
+ navi = st.sidebar.selectbox('Choose page: ', ('Predictor', 'EDA'))
10
+
11
+ if navi == 'Predictor':
12
+ model.run()
13
+ else:
14
+ eda.run()
data_eda.csv ADDED
The diff for this file is too large to render. See raw diff
 
eda.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Essential Library
2
+ import streamlit as st
3
+ import pandas as pd
4
+
5
+ # Library for Visualization
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ # Function to run EDA
10
+ def run():
11
+ # Set Title
12
+ st.title('Insurance Lead Prediction Model')
13
+
14
+ # Sub Title
15
+ st.subheader('Exploratory Data Analysis Section')
16
+ st.markdown('---')
17
+
18
+ # Insert Image
19
+ st.image('https://www.startinsland.de/site/assets/files/4129/tk-logo_koop_official_health_partner_pos.800x0.png')
20
+
21
+ # Markdown
22
+ st.markdown('# Dataframe Insurance Lead')
23
+
24
+ # Load Data
25
+ data = pd.read_csv('data_eda.csv')
26
+
27
+ # Display dataframe in StreamLit
28
+ st.dataframe(data.head(20))
29
+ st.markdown('---')
30
+
31
+ # EDA
32
+ st.markdown('## EDA')
33
+
34
+ # Convert Rate Balance Visualization
35
+ st.markdown('### Convert Rate Balance')
36
+ canvas = plt.figure(figsize=(10,5))
37
+ sns.barplot(x=data['Response'].value_counts().index, y=data['Response'].value_counts(), hue=data['Response'].value_counts().index)
38
+ st.pyplot(canvas)
39
+ st.markdown('Data is still slightly imbalanced (biased towards clients who will not likely convert)')
40
+
41
+ # Holding Policy Duration Distribution Visualization
42
+ st.markdown('### Holding Policy Duration Distribution')
43
+ canvas = plt.figure(figsize=(10,5))
44
+ sns.histplot(data['Holding_Policy_Duration'], kde=True, bins=15)
45
+ st.pyplot(canvas)
46
+
47
+ # Holding Policy Type Distribution Visualization
48
+ st.markdown('### Holding Policy Type Distribution')
49
+ canvas = plt.figure(figsize=(10,5))
50
+ sns.barplot(x=data['Holding_Policy_Type'].value_counts().index, y=data['Holding_Policy_Type'].value_counts(), hue=data['Holding_Policy_Type'].value_counts().index)
51
+ st.pyplot(canvas)
52
+
53
+ # Recommended Policy Category Distribution Visualization
54
+ st.markdown('### Recommended Policy Category Distribution')
55
+ canvas = plt.figure(figsize=(15,5))
56
+ sns.barplot(x=data['Reco_Policy_Cat'].value_counts().index, y=data['Reco_Policy_Cat'].value_counts(), hue=data['Reco_Policy_Cat'].value_counts().index)
57
+ st.pyplot(canvas)
58
+
59
+
60
+ if __name__=='__main__':
61
+ run()
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5901c17ef92b073b79655e7bc96cab27876d787492a28271179e25f46d16c02c
3
+ size 34246
model.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import Essential Library
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import pickle
5
+
6
+ # Load Model
7
+ with open('model.pkl', 'rb') as file:
8
+ model = pickle.load(file)
9
+
10
+ list_cat_cols = ['education_level', 'pay_sep05', 'pay_aug05', 'pay_jul05', 'pay_jun05', 'pay_may05', 'pay_apr05']
11
+ list_num_cols = ['limit_balance', 'pay_amt_sep05', 'pay_amt_aug05', 'pay_amt_jul05', 'pay_amt_jun05', 'pay_amt_may05', 'pay_amt_apr05']
12
+
13
+ # Function to run model predictor
14
+ def run():
15
+ # Set Title
16
+ st.title('Credit Card Default Prediction Model')
17
+
18
+ # Sub Title
19
+ st.subheader('Model Predict Section')
20
+ st.markdown('---')
21
+
22
+ # Insert Image
23
+ st.image('https://www.startinsland.de/site/assets/files/4129/tk-logo_koop_official_health_partner_pos.800x0.png')
24
+
25
+ # Creating Form for Data Inference
26
+ st.markdown('## Input Data')
27
+ with st.form('my_form'):
28
+ Holding_Policy_Duration = st.slider('Holding Policy Duration', min_value=1, max_value=14, value=2, step=1)
29
+ Holding_Policy_Type = st.selectbox('Holding Policy Type', (1, 2, 3, 4))
30
+ Reco_Policy_Cat = st.slider('Recommended Policy Category', min_value=1, max_value=22, value=6, step=1)
31
+
32
+ submitted = st.form_submit_button("Check")
33
+
34
+ # Dataframe
35
+ data = {
36
+ 'Holding_Policy_Duration': Holding_Policy_Duration,
37
+ 'Holding_Policy_Type': Holding_Policy_Type,
38
+ 'Reco_Policy_Cat': Reco_Policy_Cat,
39
+ }
40
+ df = pd.DataFrame([data])
41
+
42
+ # display dataframe of inputted data
43
+ st.dataframe(df)
44
+
45
+ # show result
46
+ if submitted:
47
+ result = model.predict(df)
48
+ if result == 1:
49
+ st.write('Lead will likely become actual customer')
50
+ else:
51
+ st.write('Lead will not likely become actual customer')
52
+
53
+ if __name__=='__main__':
54
+ run()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas
2
+ seaborn
3
+ matplotlib
4
+ pickleshare
5
+ scikit-learn==1.3.0