Spaces:
Sleeping
Sleeping
Afitriawan
commited on
Commit
β’
0ceba31
1
Parent(s):
adc6325
'update'
Browse files- Car_Insurance_Claim.csv +0 -0
- Car_Insurance_Claim_Cleaned.csv +0 -0
- README.md +0 -12
- __pycache__/eda.cpython-312.pyc +0 -0
- __pycache__/prediction.cpython-312.pyc +0 -0
- age.png +0 -0
- app.py +17 -19
- distclaim.png +0 -0
- eda.py +252 -28
- expdrive.png +0 -0
- list_cat_cols.txt β feature_cat.txt +1 -1
- feature_num.txt +1 -0
- income.png +0 -0
- list_num_cols.txt +0 -1
- encoder.pkl β model.pkl +2 -2
- model.py +0 -93
- prediction.py +95 -0
- ranfo_pipe.pkl +0 -3
- requirements.txt +11 -1
- scaler.pkl +0 -3
- victyp.png +0 -0
Car_Insurance_Claim.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
Car_Insurance_Claim_Cleaned.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Milestone
|
3 |
-
emoji: π¨
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.32.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/eda.cpython-312.pyc
ADDED
Binary file (20 kB). View file
|
|
__pycache__/prediction.cpython-312.pyc
ADDED
Binary file (4.84 kB). View file
|
|
age.png
DELETED
Binary file (18 kB)
|
|
app.py
CHANGED
@@ -1,30 +1,28 @@
|
|
1 |
import streamlit as st
|
2 |
-
import eda
|
3 |
-
import model
|
4 |
|
|
|
|
|
|
|
5 |
|
6 |
-
|
|
|
|
|
7 |
|
8 |
-
if page ==
|
9 |
-
st.header(
|
10 |
-
st.write('')
|
11 |
-
st.write('Milestone 2')
|
12 |
-
st.write('Nama : Akbar Fitriawan')
|
13 |
-
st.write('Batch : HCK-14')
|
14 |
-
st.write('Tujuan Milestone : Classification atau Regression')
|
15 |
st.write('')
|
|
|
|
|
|
|
|
|
|
|
16 |
st.caption('Silahkan pilih menu lain di Select Box pada sebelah kiri layar anda untuk memulai!')
|
17 |
-
st.write('')
|
18 |
-
st.write('')
|
19 |
-
with st.expander("Latar Belakang"):
|
20 |
-
st.caption('lorem ipsum')
|
21 |
|
22 |
with st.expander("Problem Statement"):
|
23 |
-
st.caption('
|
24 |
-
|
25 |
with st.expander("Kesimpulan"):
|
26 |
-
st.caption('
|
27 |
-
elif page == '
|
28 |
eda.run()
|
29 |
else:
|
30 |
-
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
|
3 |
+
# page
|
4 |
+
import eda
|
5 |
+
import prediction
|
6 |
|
7 |
+
|
8 |
+
page = st.sidebar.radio(label="Navigation", options=['Home Page', 'Exploratory Data Analysis', 'Prediction'])
|
9 |
+
st.sidebar.divider()
|
10 |
|
11 |
+
if page == "Home Page":
|
12 |
+
st.header("Welcome Page")
|
|
|
|
|
|
|
|
|
|
|
13 |
st.write('')
|
14 |
+
st.write('Introduction')
|
15 |
+
st.write("Name\t: Akbar Fitriawan")
|
16 |
+
st.write("Batch\t: hacktiv8-15")
|
17 |
+
st.write('Tujuan Milestone : ')
|
18 |
+
st.write('Building a Classification Model for Car Insurance Claims')
|
19 |
st.caption('Silahkan pilih menu lain di Select Box pada sebelah kiri layar anda untuk memulai!')
|
|
|
|
|
|
|
|
|
20 |
|
21 |
with st.expander("Problem Statement"):
|
22 |
+
st.caption('Understanding Customer Behavior in Car Insurance Claims')
|
|
|
23 |
with st.expander("Kesimpulan"):
|
24 |
+
st.caption('With these strategies, we can manage risks more effectively, set premiums more accurately, and increase profits and customer satisfaction.')
|
25 |
+
elif page == 'Exploratory Data Analysis':
|
26 |
eda.run()
|
27 |
else:
|
28 |
+
prediction.run()
|
distclaim.png
DELETED
Binary file (18.6 kB)
|
|
eda.py
CHANGED
@@ -1,49 +1,273 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
|
6 |
-
|
7 |
|
8 |
-
#membuat function untuk nantinya dipanggil di app.py
|
9 |
def run():
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
13 |
|
14 |
-
|
|
|
15 |
st.table(df.head(5))
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
#
|
19 |
-
st.
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
#menampilkan penjelasan
|
24 |
with st.expander('Explanation'):
|
25 |
-
|
26 |
-
|
27 |
-
st.caption(f'Dari plot di atas {claim_percentage:.1f}% orang yang mengajukan claim dan {no_claim_percentage:.1f}% yang tidak mengajukan claim, target prediksi imbalnced atau tidak seimbang sehingga harus penyasuaian parameter atau metode lain untuk mengatasinya.')
|
28 |
|
|
|
29 |
|
30 |
-
#
|
31 |
-
st.
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
36 |
with st.expander('Explanation'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
|
|
|
|
39 |
|
|
|
|
|
|
|
40 |
|
41 |
-
#
|
42 |
-
st.
|
43 |
-
|
44 |
-
st.
|
|
|
45 |
|
46 |
-
#menampilkan penjelasan
|
47 |
with st.expander('Explanation'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import matplotlib.pyplot as plt
|
4 |
+
import plotly.express as px
|
5 |
+
|
6 |
|
|
|
7 |
def run():
|
8 |
+
# Set page configuration
|
9 |
+
|
10 |
+
# load dataset
|
11 |
+
df = pd.read_csv('Car_Insurance_Claim_Cleaned.csv')
|
12 |
|
13 |
+
st.title('Welcome to Exploratory Data Analysis :chart_with_upwards_trend:')
|
14 |
+
st.subheader("Looking Dataframe")
|
15 |
st.table(df.head(5))
|
16 |
|
17 |
+
st.subheader('Summary Statistic')
|
18 |
+
st.markdown('Summary data')
|
19 |
+
st.write(df.describe())
|
20 |
+
with st.expander('Explanation'):
|
21 |
+
st.write('Check description. It can be seen that the average number of times a speeding violation has been committed is 1, meaning that the individual has "had one traffic violation for speeding", "PAST_ACCIDENT" (ever filed a claim) on average 1 time, "VEHICLE_OWNERSHIP" is not his own , and a mean Outcome "0" ("No claim").')
|
22 |
+
|
23 |
+
st.markdown("Summary Top Frequency")
|
24 |
+
st.write(df.describe(include=['object']))
|
25 |
+
with st.expander('Explanation'):
|
26 |
+
st.write('The top frequency from the dataset is women aged "26-39 years" (middle age), have driving experience of less than "10 years", with income "upper_class", cars before 2015, sedans and city cars mostly in "New York"')
|
27 |
+
st.divider()
|
28 |
+
|
29 |
+
# Visualisasi by Outcome
|
30 |
+
st.subheader('History Outcome Car insurance')
|
31 |
+
pie_data = df['OUTCOME'].value_counts()
|
32 |
+
fig = px.pie(names=pie_data.index, values=pie_data.values, title='Outcome Claim or Not Claim')
|
33 |
+
st.plotly_chart(fig)
|
34 |
+
|
35 |
+
with st.expander('Explanation'):
|
36 |
+
st.write('From the visualization results, it can be seen that in the Outcome column, which is our target, there are more non-claim data compared to claims. The data is quite unbalanced so balancing must be done')
|
37 |
+
st.divider()
|
38 |
+
|
39 |
+
st.subheader('Analyze Status Personality')
|
40 |
+
|
41 |
+
fig_gender_outcome = px.histogram(df, x="GENDER", color="OUTCOME", barmode="group", title="Gender by Outcome")
|
42 |
+
st.plotly_chart(fig_gender_outcome, use_container_width=True)
|
43 |
+
|
44 |
+
|
45 |
+
fig_married_outcome = px.histogram(df, x="MARRIED", color="OUTCOME", barmode="group", title="Married by Outcome")
|
46 |
+
st.plotly_chart(fig_married_outcome, use_container_width=True)
|
47 |
+
|
48 |
+
|
49 |
+
fig_children_outcome = px.histogram(df, x="CHILDREN", color="OUTCOME", barmode="group", title="Children by Outcome")
|
50 |
+
st.plotly_chart(fig_children_outcome, use_container_width=True)
|
51 |
+
|
52 |
+
|
53 |
+
fig_city_outcome = px.histogram(df, x="CITY", color="OUTCOME", barmode="group", title="City by Outcome")
|
54 |
+
st.plotly_chart(fig_city_outcome, use_container_width=True)
|
55 |
+
|
56 |
+
with st.expander('Explanation'):
|
57 |
+
st.write('- Based on visualization of personal status where Outcome 1 (Claim insurance) is male, single, there is no difference "having children or not", and based on City New York is higher, followed by Orlando, San Diego, Baltimore')
|
58 |
+
st.write('- Based on visualization of personal status where Outcome 0 (No insurance claim) tends to be female, married, has children, and the city of New York, Orlando, San Diego.')
|
59 |
+
st.write('- Based on the City ranking, New York is in first place, Orlando is second, San Diego is third, last is Baltimore')
|
60 |
+
st.write('- Based on Gender by Age there is no significant difference in frequency in the dataset.')
|
61 |
+
st.write('Insights:')
|
62 |
+
st.write('- We must improve service in the San Diego and Baltimore areas, by carrying out promotional campaigns to attract customers')
|
63 |
+
|
64 |
+
|
65 |
+
# histogram
|
66 |
+
fig = px.histogram(df, x="AGE", color="OUTCOME", barmode="group",title="Age by Outcome", category_orders={"AGE": ['young', 'middle age', 'old', 'very old']})
|
67 |
+
# pie chart
|
68 |
+
fig_pie = px.pie(df, names="AGE", title="Frequency Age", category_orders={"AGE": ['young', 'middle age', 'old', 'very old']})
|
69 |
+
fig_pie.update_traces(textinfo='percent+label')
|
70 |
+
# Split the app into two columns
|
71 |
+
col1, col2 = st.columns(2)
|
72 |
+
with col1:
|
73 |
+
st.plotly_chart(fig, use_container_width=True)
|
74 |
+
with col2:
|
75 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
76 |
+
|
77 |
+
with st.expander('Explanation'):
|
78 |
+
st.write("- In the age range 16-25 years (young) there is a tendency to submit insurance claims (1), while in the age range 40-64 years (old) there is a tendency not (0)." )
|
79 |
+
st.write("- High frequency of not submitting insurance claims in the age range 40-64 years. (old)")
|
80 |
+
st.write("- In summary, as people get older, there is a tendency not to file insurance claims.")
|
81 |
+
st.divider()
|
82 |
+
|
83 |
+
fig = px.histogram(df, x='RACE', color='OUTCOME', barmode='group', title="RACE by Outcome")
|
84 |
+
fig_pie = px.pie(df, names="RACE", title="Frequency RACE")
|
85 |
+
col1,col2 = st.columns(2)
|
86 |
+
with col1:
|
87 |
+
st.plotly_chart(fig, use_container_width=True)
|
88 |
+
with col2:
|
89 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
90 |
+
|
91 |
+
with st.expander('Explanation'):
|
92 |
+
st.write('- It can be seen from the majority and minority races that Outcome 0 ("No claim insurance") is superior to Outcome 1 ("Claim insurance")')
|
93 |
+
st.write('- majority 90.1% and minority 9.9%')
|
94 |
+
st.write('in fact there is no correlation between the majority and the minority (if we imagine that it is very likely that the minority is more at risk or has a higher risk of Outcome 1 "Claim Insurance" namely racism)')
|
95 |
+
st.divider()
|
96 |
|
97 |
+
# Visualisasi Drive exp
|
98 |
+
st.subheader('Driving Experience')
|
99 |
+
# histogram
|
100 |
+
fig = px.histogram(df, x='DRIVING_EXPERIENCE', color='OUTCOME', barmode='group', title="Driving Experience by Outcome", category_orders={"DRIVING_EXPERIENCE": ['newbie', 'amateure', 'advanced', 'expert']})
|
101 |
+
fig_pie = px.pie(df, names="DRIVING_EXPERIENCE", title="Frequency Driving Experience")
|
102 |
+
col1,col2 = st.columns(2)
|
103 |
+
with col1:
|
104 |
+
st.plotly_chart(fig, use_container_width=True)
|
105 |
+
with col2:
|
106 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
107 |
|
|
|
108 |
with st.expander('Explanation'):
|
109 |
+
st.write('- In fact, the less experienced you are in driving, the higher the claim rate insurance (Outcome 1 "Claim loan").')
|
110 |
+
st.write('- In the dataset frequency Drive experience is high at 0-9 years, namely 35%, followed by 10-19 years is 33.0%, 20-29 years is 21.2%, 30+ years is 10.5%. by the way iam rename values ("Newbie", "Amateure", "Advanced", "Expert")')
|
|
|
111 |
|
112 |
+
st.divider()
|
113 |
|
114 |
+
# Visualisasi Education
|
115 |
+
st.subheader('Looking Education')
|
116 |
+
# histogram
|
117 |
+
fig = px.histogram(df, x='EDUCATION', color='OUTCOME', barmode='group', title="Driving Experience by Outcome", category_orders={"EDUCATION": ['high school', 'university', 'none']})
|
118 |
+
fig_pie = px.pie(df, names="EDUCATION", title="Frequency Education")
|
119 |
|
120 |
+
col1,col2 = st.columns(2)
|
121 |
+
with col1:
|
122 |
+
st.plotly_chart(fig, use_container_width=True)
|
123 |
+
with col2:
|
124 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
125 |
+
|
126 |
with st.expander('Explanation'):
|
127 |
+
st.write('- The visualization shows that Outcome 1 (claim insurance) has a high frequency in "high school" while Outcome 0 (No claim insurance) in "university"')
|
128 |
+
st.write('- And the amount of data in the dataset is 45% in high school, 39% in university and "19.1%" is none (work, etc.)')
|
129 |
+
st.write('Relates to Age where high school education levels (aged 16-25 years) tend to submit claims (Outcome 1 "Claim insurance"), as well as high frequency in this category. and I assume that high school is in the 16-25 year age range, university is in the 26-39 year age range, and none is in the 40-65+ age range')
|
130 |
+
st.divider()
|
131 |
+
|
132 |
+
# Visualisasi Income
|
133 |
+
st.subheader("Looking Income")
|
134 |
+
# Create a subplot with Plotly Express
|
135 |
+
fig_income_outcome = px.histogram(df, x="INCOME", color="OUTCOME", barmode="group", category_orders={"INCOME": ['poverty', 'working class', 'middle class', 'upper class']})
|
136 |
+
fig_income_outcome.update_layout(title="Income by Outcome", xaxis_title="Income", yaxis_title="Count")
|
137 |
+
|
138 |
+
# Create a pie chart with Plotly Express
|
139 |
+
dist_income = df["INCOME"].value_counts()
|
140 |
+
fig_income_frequency = px.pie(names=dist_income.index, values=dist_income.values, title="Frequency by Income",
|
141 |
+
labels={"label": "Income", "value": "Frequency"})
|
142 |
|
143 |
+
# Create a grouped bar chart for Income by Age
|
144 |
+
fig_income_age = px.bar(df, x="INCOME", color="AGE", category_orders={"INCOME": ['poverty', 'working class', 'middle class', 'upper class'], "AGE": ['young', 'middle age', 'old', 'very old']})
|
145 |
+
fig_income_age.update_layout(title="Income by Age", xaxis_title="Income", yaxis_title="Count")
|
146 |
|
147 |
+
# Create a box plot for Income by Past Accidents
|
148 |
+
fig_income_accidents = px.box(df, x="PAST_ACCIDENTS", y="INCOME")
|
149 |
+
fig_income_accidents.update_layout(title="Income by Past Accidents", xaxis_title="Past Accidents", yaxis_title="Income")
|
150 |
|
151 |
+
# Streamlit app
|
152 |
+
st.plotly_chart(fig_income_outcome, use_container_width=True)
|
153 |
+
st.plotly_chart(fig_income_frequency, use_container_width=True)
|
154 |
+
st.plotly_chart(fig_income_age, use_container_width=True)
|
155 |
+
st.plotly_chart(fig_income_accidents, use_container_width=True)
|
156 |
|
|
|
157 |
with st.expander('Explanation'):
|
158 |
+
st.write("- Based on the Income by Outcome visualization, those who tend to submit claims (outcome 1) are from the 'poverty' income group.")
|
159 |
+
st.write("- From the results of the frequency by income dataset, the upper class dominates, followed by the middle class, poverty, and working class.")
|
160 |
+
st.write("- From the Income by Age output, it can be seen that different income groups are dominated by different age groups.")
|
161 |
+
st.write("- From the Income by Past Accidents output, certain income groups have different patterns in past loan claims.")
|
162 |
+
|
163 |
+
# Insights
|
164 |
+
st.write("Insights:")
|
165 |
+
st.write("- Based on the Outcome (claim insurance / no claim insurance) that Income 'Poverty' has the potential to submit a claim, which is dominated by teenagers. Therefore, focusing on reducing this potential by segmenting customers and targeting ages over 25 years and by looking at their income (upper class, middle class, and working class).")
|
166 |
+
|
167 |
+
st.divider()
|
168 |
+
# visualisasi vehicle
|
169 |
+
st.subheader('Looking Vehicle Distribution')
|
170 |
+
fig_vehicle_year = px.histogram(df, x="VEHICLE_YEAR", color="OUTCOME", barmode="group", title="Vehicle Years by Outcome")
|
171 |
+
fig_vehicle_type = px.histogram(df, x="VEHICLE_TYPE", color="OUTCOME", barmode="group", title="Vehicle Type by Outcome")
|
172 |
+
fig_vehicle_ownership_outcome = px.histogram(df, x="VEHICLE_OWNERSHIP", color="OUTCOME", barmode="group", title="Vehicle by Ownership")
|
173 |
+
fig_vehicle_ownership_pastAccidents = px.box(df,x='VEHICLE_OWNERSHIP', y='PAST_ACCIDENTS')
|
174 |
+
st.plotly_chart(fig_vehicle_year, use_container_width=True)
|
175 |
+
st.plotly_chart(fig_vehicle_type, use_container_width=True)
|
176 |
+
st.plotly_chart(fig_vehicle_ownership_outcome, use_container_width=True)
|
177 |
+
st.plotly_chart(fig_vehicle_ownership_pastAccidents, use_container_width=True)
|
178 |
+
st.divider()
|
179 |
+
|
180 |
+
with st.expander('Conclusions'):
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
st.write("## Data Imbalance")
|
185 |
+
st.write("""
|
186 |
+
Data imbalance was found in the Outcome column, which requires balancing actions to improve the accuracy of the prediction model.
|
187 |
+
""")
|
188 |
+
|
189 |
+
st.write("## Personality Analysis")
|
190 |
+
st.write("Based on the results that I analyzed, there was a pattern of differences in insurance claims between customers based on gender, marital status, having children, etc. I categorize them based on outcomes as follows:")
|
191 |
+
|
192 |
+
st.write("### In case of outcome 1 (Claim insurance)")
|
193 |
+
st.write("""
|
194 |
+
- Tends to be male
|
195 |
+
- In the age range 16-25 years (young)
|
196 |
+
- Singles
|
197 |
+
- There is no significant difference "to have children or not"
|
198 |
+
- Based on City, New York is higher, followed by Orlando, San Diego, and finally Baltimore
|
199 |
+
- Based on education, high school is higher for submitting insurance claims
|
200 |
+
- Income tends to be Poverty (unstable)
|
201 |
+
- Driving experience, the less experienced you are, the higher you submit a claim
|
202 |
+
""")
|
203 |
+
|
204 |
+
st.write("### In case of outcome 0 (No insurance claim)")
|
205 |
+
st.write("""
|
206 |
+
- Tends to be female
|
207 |
+
- In the age range 40-64 (old)
|
208 |
+
- Married
|
209 |
+
- Have children
|
210 |
+
- Based on City High New York, Orlando, and San Diego
|
211 |
+
- Based on Education, University tends to be high, and high school
|
212 |
+
- Income tends to be upper class and middle class
|
213 |
+
- Drive experience: the more experienced you are in driving, the higher the risk of filing an insurance claim
|
214 |
+
- High credit score
|
215 |
+
- Even though there is no correlation, the outcome is high in the majority
|
216 |
+
""")
|
217 |
+
|
218 |
+
st.write("""
|
219 |
+
Based on analysis of insurance claim patterns, personal factors such as age, education and marital status influence the frequency of claims. Young drivers (16-25 years) and those with low education (high school) tend to have a higher frequency of claims. On the other hand, drivers who are older (40-64 years), have a higher education (university), are married, and have children tend to file claims less frequently.
|
220 |
+
""")
|
221 |
+
|
222 |
+
st.write("## Vehicle Analysis")
|
223 |
+
st.write("As follows are the findings from the vehicle:")
|
224 |
+
|
225 |
+
st.write("### In the case of outcome 1 (Claim Insurance)")
|
226 |
+
st.write("""
|
227 |
+
- Tend to be cars before 2015
|
228 |
+
- Sedan cars tend to be taller than sports cars
|
229 |
+
- Submission does not belong to the car itself
|
230 |
+
- Annual mileage tends to be high
|
231 |
+
""")
|
232 |
+
|
233 |
+
st.write("### In case of outcome 0 (No insurance claim)")
|
234 |
+
st.write("""
|
235 |
+
- Car types tend to be sedans
|
236 |
+
- Tend to be cars after 2015
|
237 |
+
- Private property
|
238 |
+
- Annual mileage tends to be low
|
239 |
+
""")
|
240 |
+
|
241 |
+
st.write("""
|
242 |
+
The type and age of the vehicle has a big influence on insurance claim patterns. Older cars or certain types of cars, such as sedans, tend to have a higher frequency of claims.
|
243 |
+
""")
|
244 |
+
|
245 |
+
st.write("## Analysis of Accidents")
|
246 |
+
st.write("From the findings, there is an influence on insurance claims based on violations or damage received as follows:")
|
247 |
+
|
248 |
+
st.write("### In the case of outcome 1 (Claim Insurance)")
|
249 |
+
st.write("""
|
250 |
+
- Rarely commit speed violations
|
251 |
+
- DUIS ("Driving Under the Influence Surcharge") rarely commits such offenses
|
252 |
+
- Past accidents tend to have a history of fewer past accidents
|
253 |
+
- Ages 16-25 (young) rarely commit violations
|
254 |
+
""")
|
255 |
+
|
256 |
+
st.write("### In case of outcome 0 (No Claim Insurance)")
|
257 |
+
st.write("""
|
258 |
+
- Have committed at least 1 speed violation or more
|
259 |
+
- DUIS ("Driving Under the Influence Surcharge") often commits this violation
|
260 |
+
- Most past accidents have a history of certain accidents
|
261 |
+
- In the age range of 40+ years there is a higher rate of committing violations
|
262 |
+
""")
|
263 |
+
|
264 |
+
st.write("""
|
265 |
+
Traffic violations and past accident history have a big influence on insurance claims. Customers with a lower history of violations and accidents tend to file claims less frequently.
|
266 |
+
""")
|
267 |
+
|
268 |
|
269 |
+
|
270 |
+
|
271 |
+
if __name__ == "__main__":
|
272 |
+
|
273 |
+
run()
|
expdrive.png
DELETED
Binary file (21.1 kB)
|
|
list_cat_cols.txt β feature_cat.txt
RENAMED
@@ -1 +1 @@
|
|
1 |
-
["AGE", "GENDER", "DRIVING_EXPERIENCE", "EDUCATION", "INCOME", "VEHICLE_YEAR"]
|
|
|
1 |
+
["AGE", "GENDER", "DRIVING_EXPERIENCE", "EDUCATION", "INCOME", "VEHICLE_OWNERSHIP", "VEHICLE_YEAR", "MARRIED", "CHILDREN", "CITY"]
|
feature_num.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["CREDIT_SCORE", "ANNUAL_MILEAGE", "SPEEDING_VIOLATIONS", "DUIS", "PAST_ACCIDENTS"]
|
income.png
DELETED
Binary file (23.4 kB)
|
|
list_num_cols.txt
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
["CREDIT_SCORE", "VEHICLE_OWNERSHIP", "MARRIED", "CHILDREN", "POSTAL_CODE", "ANNUAL_MILEAGE", "SPEEDING_VIOLATIONS", "DUIS", "PAST_ACCIDENTS"]
|
|
|
|
encoder.pkl β model.pkl
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afdff55560c48cdd5414579d5b8eb01430e6b975ba0c3e393bc11803d595ed4b
|
3 |
+
size 97331082
|
model.py
DELETED
@@ -1,93 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
import pickle
|
5 |
-
|
6 |
-
|
7 |
-
# Load the trained model
|
8 |
-
with open("ranfo_pipe.pkl", "rb") as model_file:
|
9 |
-
model = pickle.load(model_file)
|
10 |
-
|
11 |
-
def run():
|
12 |
-
|
13 |
-
# Streamlit UI
|
14 |
-
st.title("Insurance Claims over Cars")
|
15 |
-
|
16 |
-
# Introduction
|
17 |
-
st.subheader("π Prediction Insurance Claim or NO ")
|
18 |
-
|
19 |
-
st.markdown('## π Input Data')
|
20 |
-
with st.form('my_form'):
|
21 |
-
|
22 |
-
# Age
|
23 |
-
age_choice = st.selectbox("Age", ['Young', 'Middle Age', 'Old', 'Very Old'])
|
24 |
-
# Gender
|
25 |
-
gender_choice = st.selectbox("Gender", ['male', 'female'])
|
26 |
-
# Race
|
27 |
-
race_choice = st.selectbox("Race", ['Majority', 'Minority'])
|
28 |
-
# Driving Experience
|
29 |
-
driving_experience_choice = st.selectbox("Driving Experience", ['Newbie', 'Amateur', 'Advanced', 'Expert'])
|
30 |
-
# Education
|
31 |
-
education_choice = st.selectbox("Education", ['high school', 'none', 'university'])
|
32 |
-
# Income
|
33 |
-
income_choice = st.selectbox("Income", ['upper class', 'poverty', 'working class', 'middle class'])
|
34 |
-
# Credit Score Range
|
35 |
-
credit_score_range = st.number_input("Credit Score Range", min_value=0.0, max_value=999999.0)
|
36 |
-
# Vehicle Ownership
|
37 |
-
vehicle_ownership_choice = st.selectbox("Vehicle Ownership (True/False)", [0.0, 1.0])
|
38 |
-
# Vehicle Year
|
39 |
-
vehicle_year_choice = st.selectbox("Vehicle Year", ['before 2015', 'after 2015'])
|
40 |
-
# Married
|
41 |
-
married_choice = st.selectbox("Married (True/False)", [0.0, 1.0])
|
42 |
-
# Children
|
43 |
-
children_choice = st.selectbox("Children (True/False)", [0.0, 1.0])
|
44 |
-
# Postal Code
|
45 |
-
postal_code_choice = st.selectbox("Postal Code", [10238, 32765, 92101, 21217])
|
46 |
-
# Annual Mileage Range
|
47 |
-
annual_mileage_range = st.number_input("Annual Mileage Range", min_value=0, max_value=999999)
|
48 |
-
# Vehicle Type
|
49 |
-
vehicle_type_choice = st.selectbox("Vehicle Type", ['sedan', 'sport car'])
|
50 |
-
# Speeding Violations Range
|
51 |
-
speeding_violations_range = st.number_input("Speeding Violations Range", min_value=0, max_value=50)
|
52 |
-
# DUIs Range
|
53 |
-
duis_range = st.number_input("DUIs Range", min_value=0, max_value=50)
|
54 |
-
# Past Accidents Range
|
55 |
-
past_accidents_range = st.number_input("Past Accidents Range", min_value=0, max_value=50)
|
56 |
-
|
57 |
-
submitted = st.form_submit_button('π Let\'s Check!')
|
58 |
-
|
59 |
-
# Create DataFrame from user input
|
60 |
-
data = {
|
61 |
-
"AGE":age_choice,
|
62 |
-
"GENDER":gender_choice,
|
63 |
-
"RACE":race_choice,
|
64 |
-
"DRIVING_EXPERIENCE":driving_experience_choice,
|
65 |
-
"EDUCATION":education_choice,
|
66 |
-
"INCOME":income_choice,
|
67 |
-
"CREDIT_SCORE":credit_score_range,
|
68 |
-
"VEHICLE_OWNERSHIP":vehicle_ownership_choice,
|
69 |
-
"VEHICLE_YEAR":vehicle_year_choice,
|
70 |
-
"MARRIED":married_choice,
|
71 |
-
"CHILDREN":children_choice,
|
72 |
-
"POSTAL_CODE":postal_code_choice,
|
73 |
-
"ANNUAL_MILEAGE":annual_mileage_range,
|
74 |
-
"VEHICLE":vehicle_type_choice,
|
75 |
-
"SPEEDING_VIOLATIONS":speeding_violations_range,
|
76 |
-
"DUIS":duis_range,
|
77 |
-
"PAST_ACCIDENTS":past_accidents_range,
|
78 |
-
}
|
79 |
-
|
80 |
-
df = pd.DataFrame([data])
|
81 |
-
st.dataframe(df)
|
82 |
-
|
83 |
-
# Make prediction
|
84 |
-
if submitted:
|
85 |
-
prediction = model.predict(df)
|
86 |
-
# Display prediction result
|
87 |
-
if prediction[0] == 0:
|
88 |
-
st.write('π’ Claims loan')
|
89 |
-
else:
|
90 |
-
st.write('π΄ No Loan')
|
91 |
-
|
92 |
-
if __name__=='__main__':
|
93 |
-
run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Load Model
|
7 |
+
with open('model.pkl', 'rb') as model_file:
|
8 |
+
model = pickle.load(model_file)
|
9 |
+
|
10 |
+
|
11 |
+
def run():
|
12 |
+
st.title('Car Insurance Prediction :car::clipboard:')
|
13 |
+
st.write('Welcome to the car insurance claim or not claim prediction application. predict whether customers will claim insurance or not in the future based on existing information')
|
14 |
+
st.divider()
|
15 |
+
|
16 |
+
st.markdown('## π Input Data')
|
17 |
+
with st.form('my_form'):
|
18 |
+
# input age
|
19 |
+
age = st.selectbox('Age π§π»', ['young', 'middle age', 'old','very old']),
|
20 |
+
# input gender
|
21 |
+
gender = st.selectbox('Gender π»', ['male', 'female']),
|
22 |
+
# input race
|
23 |
+
race = st.selectbox('RACE π«±πΌβπ«²π»', ['majority', 'minority']),
|
24 |
+
# input drive
|
25 |
+
drive_exp = st.selectbox('Driving Experience π£οΈπ', ['newbie', 'amateure', 'advanced', 'expert']),
|
26 |
+
# input education
|
27 |
+
education = st.selectbox('Education π', ['high school','none', 'university']),
|
28 |
+
#input income
|
29 |
+
income = st.selectbox('Income π°', ['poverty', 'working class', 'middle class','upper class']),
|
30 |
+
# input credit score
|
31 |
+
credit_score = st.number_input('Credit score :credit_card:', 0.0, 1.0, step=0.1)
|
32 |
+
# input vehicle ownership
|
33 |
+
vehicle_ownership = st.radio('Vehicle Ownership π', ('No', 'Yes')),
|
34 |
+
vehicle_ownership_value = 0 if vehicle_ownership == 'No' else 1
|
35 |
+
|
36 |
+
# input vehicle year
|
37 |
+
vehicle_year = st.selectbox('Vehicle Year π
', ['before 2015', 'after 2015']),
|
38 |
+
# input married
|
39 |
+
married = st.radio('Married :man_and_woman_holding_hands::ring:', ('No', 'Yes')),
|
40 |
+
married_value = 0 if married == 'No' else 1
|
41 |
+
|
42 |
+
# input children
|
43 |
+
children = st.radio('Children πΆ', ('No', 'Yes')),
|
44 |
+
children_value = 0 if children == 'No' else 1
|
45 |
+
|
46 |
+
# input annual mileage
|
47 |
+
annual_mileage = st.slider('Annual Mileage β²', 1000, 25000, step=100),
|
48 |
+
# input vehicle type
|
49 |
+
vehicle_type = st.selectbox('Vehicle Type π', ['sedan', 'sport car']),
|
50 |
+
# input speed violations
|
51 |
+
speeding_violations = st.slider('Speeding Violations β‘', 0,15, step=1),
|
52 |
+
# input DUIS
|
53 |
+
duis = st.slider('DUIS π₯΄π', 0,15, step=1),
|
54 |
+
# input past accidents
|
55 |
+
past_accidents = st.slider('Past Accidents π₯', 0,15, step=1),
|
56 |
+
# Input city
|
57 |
+
city = st.selectbox('City π’', ['baltimore','new york','orlando','san diego'])
|
58 |
+
|
59 |
+
submitted = st.form_submit_button('Let\'s π Check ')
|
60 |
+
|
61 |
+
input_data = pd.DataFrame({
|
62 |
+
'AGE': age,
|
63 |
+
'GENDER':gender,
|
64 |
+
'RACE': race,
|
65 |
+
'DRIVING_EXPERIENCE':drive_exp,
|
66 |
+
'EDUCATION': education,
|
67 |
+
'INCOME': income,
|
68 |
+
'CREDIT_SCORE':credit_score,
|
69 |
+
'VEHICLE_OWNERSHIP':vehicle_ownership,
|
70 |
+
'VEHICLE_YEAR':vehicle_year,
|
71 |
+
'MARRIED':married,
|
72 |
+
'CHILDREN': children,
|
73 |
+
'ANNUAL_MILEAGE':annual_mileage,
|
74 |
+
'VEHICLE_TYPE':vehicle_type,
|
75 |
+
'SPEEDING_VIOLATIONS': speeding_violations,
|
76 |
+
'DUIS': duis,
|
77 |
+
'PAST_ACCIDENTS':past_accidents,
|
78 |
+
'CITY': city
|
79 |
+
})
|
80 |
+
|
81 |
+
st.markdown('Syntetic Dataframe')
|
82 |
+
st.dataframe(input_data)
|
83 |
+
|
84 |
+
st.markdown('Prediction π')
|
85 |
+
if submitted:
|
86 |
+
prediction = model.predict(input_data)
|
87 |
+
|
88 |
+
if prediction[0] == 0:
|
89 |
+
st.write('β Customer has not filed a claim')
|
90 |
+
else:
|
91 |
+
st.write('β
Customer has filed a claim')
|
92 |
+
|
93 |
+
|
94 |
+
if __name__=="__main__":
|
95 |
+
run()
|
ranfo_pipe.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:1966d5788619dfada661219e166418e2f09e4f90102f9f33a170bf18435529b4
|
3 |
-
size 17624931
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,7 +1,17 @@
|
|
|
|
1 |
pandas==1.5.3
|
2 |
numpy==1.25.2
|
3 |
seaborn==0.12.2
|
4 |
matplotlib==3.7.2
|
5 |
scikit-learn==1.3.1
|
6 |
imbalanced-learn==0.11.0
|
7 |
-
feature-engine==1.6.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<<<<<<< HEAD
|
2 |
pandas==1.5.3
|
3 |
numpy==1.25.2
|
4 |
seaborn==0.12.2
|
5 |
matplotlib==3.7.2
|
6 |
scikit-learn==1.3.1
|
7 |
imbalanced-learn==0.11.0
|
8 |
+
feature-engine==1.6.1
|
9 |
+
=======
|
10 |
+
streamlit
|
11 |
+
pandas
|
12 |
+
numpy
|
13 |
+
matplotlib
|
14 |
+
plotly
|
15 |
+
scikit-learn
|
16 |
+
imbalanced-learn
|
17 |
+
>>>>>>> 748f6ec (update)
|
scaler.pkl
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:54bb7f9c8142a2ac4a64dc1ec3b7770f852cc38fab927c77598b42863fbfe6a7
|
3 |
-
size 891
|
|
|
|
|
|
|
|
victyp.png
DELETED
Binary file (19.3 kB)
|
|