Afitriawan commited on
Commit
0ceba31
β€’
1 Parent(s): adc6325
Car_Insurance_Claim.csv DELETED
The diff for this file is too large to render. See raw diff
 
Car_Insurance_Claim_Cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: Milestone
3
- emoji: 🐨
4
- colorFrom: gray
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.32.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
__pycache__/eda.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
__pycache__/prediction.cpython-312.pyc ADDED
Binary file (4.84 kB). View file
 
age.png DELETED
Binary file (18 kB)
 
app.py CHANGED
@@ -1,30 +1,28 @@
1
  import streamlit as st
2
- import eda
3
- import model
4
 
 
 
 
5
 
6
- page = st.sidebar.selectbox(label='Select Page:', options=['Home Page', 'Exploration Data Analysis', 'Model Prediksi'])
 
 
7
 
8
- if page == 'Home Page':
9
- st.header('Welcome Page')
10
- st.write('')
11
- st.write('Milestone 2')
12
- st.write('Nama : Akbar Fitriawan')
13
- st.write('Batch : HCK-14')
14
- st.write('Tujuan Milestone : Classification atau Regression')
15
  st.write('')
 
 
 
 
 
16
  st.caption('Silahkan pilih menu lain di Select Box pada sebelah kiri layar anda untuk memulai!')
17
- st.write('')
18
- st.write('')
19
- with st.expander("Latar Belakang"):
20
- st.caption('lorem ipsum')
21
 
22
  with st.expander("Problem Statement"):
23
- st.caption('lorem ipsum')
24
-
25
  with st.expander("Kesimpulan"):
26
- st.caption('lorem ipsum')
27
- elif page == 'Exploration Data Analysis':
28
  eda.run()
29
  else:
30
- model.run()
 
1
  import streamlit as st
 
 
2
 
3
+ # page
4
+ import eda
5
+ import prediction
6
 
7
+
8
+ page = st.sidebar.radio(label="Navigation", options=['Home Page', 'Exploratory Data Analysis', 'Prediction'])
9
+ st.sidebar.divider()
10
 
11
+ if page == "Home Page":
12
+ st.header("Welcome Page")
 
 
 
 
 
13
  st.write('')
14
+ st.write('Introduction')
15
+ st.write("Name\t: Akbar Fitriawan")
16
+ st.write("Batch\t: hacktiv8-15")
17
+ st.write('Tujuan Milestone : ')
18
+ st.write('Building a Classification Model for Car Insurance Claims')
19
  st.caption('Silahkan pilih menu lain di Select Box pada sebelah kiri layar anda untuk memulai!')
 
 
 
 
20
 
21
  with st.expander("Problem Statement"):
22
+ st.caption('Understanding Customer Behavior in Car Insurance Claims')
 
23
  with st.expander("Kesimpulan"):
24
+ st.caption('With these strategies, we can manage risks more effectively, set premiums more accurately, and increase profits and customer satisfaction.')
25
+ elif page == 'Exploratory Data Analysis':
26
  eda.run()
27
  else:
28
+ prediction.run()
distclaim.png DELETED
Binary file (18.6 kB)
 
eda.py CHANGED
@@ -1,49 +1,273 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
  import matplotlib.pyplot as plt
5
- # from phik.report import plot_correlation_matrix
6
- from PIL import Image
7
 
8
- #membuat function untuk nantinya dipanggil di app.py
9
  def run():
10
- st.title('Welcome to Explaration Data Analysis')
11
- # Memanggil data csv
12
- df= pd.read_csv(r'Car_Insurance_Claim.csv')
 
13
 
14
- # menampilakn 5 data teratas
 
15
  st.table(df.head(5))
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- #menampilakn phik matrix
19
- st.title('Distribusi Claims or NO')
20
- image = Image.open('distclaim.png')
21
- st.image(image, caption='figure 1')
 
 
 
 
 
 
22
 
23
- #menampilkan penjelasan
24
  with st.expander('Explanation'):
25
- claim_percentage = 31.3
26
- no_claim_percentage = 68.7
27
- st.caption(f'Dari plot di atas {claim_percentage:.1f}% orang yang mengajukan claim dan {no_claim_percentage:.1f}% yang tidak mengajukan claim, target prediksi imbalnced atau tidak seimbang sehingga harus penyasuaian parameter atau metode lain untuk mengatasinya.')
28
 
 
29
 
30
- #menampilakn phik matrix
31
- st.title('Visualisasi Age Vs Outcome')
32
- image = Image.open('age.png')
33
- st.image(image, caption='figure 1')
 
34
 
35
- #menampilkan penjelasan
 
 
 
 
 
36
  with st.expander('Explanation'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- st.caption('lorem ipsum')
 
 
39
 
 
 
 
40
 
41
- #menampilakn phik matrix
42
- st.title('Visualisasi Drive experience Vs Outcome')
43
- image = Image.open('expdrive.png')
44
- st.image(image, caption='figure 1')
 
45
 
46
- #menampilkan penjelasan
47
  with st.expander('Explanation'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- st.caption('lorem upsum')
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import matplotlib.pyplot as plt
4
+ import plotly.express as px
5
+
6
 
 
7
  def run():
8
+ # Set page configuration
9
+
10
+ # load dataset
11
+ df = pd.read_csv('Car_Insurance_Claim_Cleaned.csv')
12
 
13
+ st.title('Welcome to Exploratory Data Analysis :chart_with_upwards_trend:')
14
+ st.subheader("Looking Dataframe")
15
  st.table(df.head(5))
16
 
17
+ st.subheader('Summary Statistic')
18
+ st.markdown('Summary data')
19
+ st.write(df.describe())
20
+ with st.expander('Explanation'):
21
+ st.write('Check description. It can be seen that the average number of times a speeding violation has been committed is 1, meaning that the individual has "had one traffic violation for speeding", "PAST_ACCIDENT" (ever filed a claim) on average 1 time, "VEHICLE_OWNERSHIP" is not his own , and a mean Outcome "0" ("No claim").')
22
+
23
+ st.markdown("Summary Top Frequency")
24
+ st.write(df.describe(include=['object']))
25
+ with st.expander('Explanation'):
26
+ st.write('The top frequency from the dataset is women aged "26-39 years" (middle age), have driving experience of less than "10 years", with income "upper_class", cars before 2015, sedans and city cars mostly in "New York"')
27
+ st.divider()
28
+
29
+ # Visualisasi by Outcome
30
+ st.subheader('History Outcome Car insurance')
31
+ pie_data = df['OUTCOME'].value_counts()
32
+ fig = px.pie(names=pie_data.index, values=pie_data.values, title='Outcome Claim or Not Claim')
33
+ st.plotly_chart(fig)
34
+
35
+ with st.expander('Explanation'):
36
+ st.write('From the visualization results, it can be seen that in the Outcome column, which is our target, there are more non-claim data compared to claims. The data is quite unbalanced so balancing must be done')
37
+ st.divider()
38
+
39
+ st.subheader('Analyze Status Personality')
40
+
41
+ fig_gender_outcome = px.histogram(df, x="GENDER", color="OUTCOME", barmode="group", title="Gender by Outcome")
42
+ st.plotly_chart(fig_gender_outcome, use_container_width=True)
43
+
44
+
45
+ fig_married_outcome = px.histogram(df, x="MARRIED", color="OUTCOME", barmode="group", title="Married by Outcome")
46
+ st.plotly_chart(fig_married_outcome, use_container_width=True)
47
+
48
+
49
+ fig_children_outcome = px.histogram(df, x="CHILDREN", color="OUTCOME", barmode="group", title="Children by Outcome")
50
+ st.plotly_chart(fig_children_outcome, use_container_width=True)
51
+
52
+
53
+ fig_city_outcome = px.histogram(df, x="CITY", color="OUTCOME", barmode="group", title="City by Outcome")
54
+ st.plotly_chart(fig_city_outcome, use_container_width=True)
55
+
56
+ with st.expander('Explanation'):
57
+ st.write('- Based on visualization of personal status where Outcome 1 (Claim insurance) is male, single, there is no difference "having children or not", and based on City New York is higher, followed by Orlando, San Diego, Baltimore')
58
+ st.write('- Based on visualization of personal status where Outcome 0 (No insurance claim) tends to be female, married, has children, and the city of New York, Orlando, San Diego.')
59
+ st.write('- Based on the City ranking, New York is in first place, Orlando is second, San Diego is third, last is Baltimore')
60
+ st.write('- Based on Gender by Age there is no significant difference in frequency in the dataset.')
61
+ st.write('Insights:')
62
+ st.write('- We must improve service in the San Diego and Baltimore areas, by carrying out promotional campaigns to attract customers')
63
+
64
+
65
+ # histogram
66
+ fig = px.histogram(df, x="AGE", color="OUTCOME", barmode="group",title="Age by Outcome", category_orders={"AGE": ['young', 'middle age', 'old', 'very old']})
67
+ # pie chart
68
+ fig_pie = px.pie(df, names="AGE", title="Frequency Age", category_orders={"AGE": ['young', 'middle age', 'old', 'very old']})
69
+ fig_pie.update_traces(textinfo='percent+label')
70
+ # Split the app into two columns
71
+ col1, col2 = st.columns(2)
72
+ with col1:
73
+ st.plotly_chart(fig, use_container_width=True)
74
+ with col2:
75
+ st.plotly_chart(fig_pie, use_container_width=True)
76
+
77
+ with st.expander('Explanation'):
78
+ st.write("- In the age range 16-25 years (young) there is a tendency to submit insurance claims (1), while in the age range 40-64 years (old) there is a tendency not (0)." )
79
+ st.write("- High frequency of not submitting insurance claims in the age range 40-64 years. (old)")
80
+ st.write("- In summary, as people get older, there is a tendency not to file insurance claims.")
81
+ st.divider()
82
+
83
+ fig = px.histogram(df, x='RACE', color='OUTCOME', barmode='group', title="RACE by Outcome")
84
+ fig_pie = px.pie(df, names="RACE", title="Frequency RACE")
85
+ col1,col2 = st.columns(2)
86
+ with col1:
87
+ st.plotly_chart(fig, use_container_width=True)
88
+ with col2:
89
+ st.plotly_chart(fig_pie, use_container_width=True)
90
+
91
+ with st.expander('Explanation'):
92
+ st.write('- It can be seen from the majority and minority races that Outcome 0 ("No claim insurance") is superior to Outcome 1 ("Claim insurance")')
93
+ st.write('- majority 90.1% and minority 9.9%')
94
+ st.write('in fact there is no correlation between the majority and the minority (if we imagine that it is very likely that the minority is more at risk or has a higher risk of Outcome 1 "Claim Insurance" namely racism)')
95
+ st.divider()
96
 
97
+ # Visualisasi Drive exp
98
+ st.subheader('Driving Experience')
99
+ # histogram
100
+ fig = px.histogram(df, x='DRIVING_EXPERIENCE', color='OUTCOME', barmode='group', title="Driving Experience by Outcome", category_orders={"DRIVING_EXPERIENCE": ['newbie', 'amateure', 'advanced', 'expert']})
101
+ fig_pie = px.pie(df, names="DRIVING_EXPERIENCE", title="Frequency Driving Experience")
102
+ col1,col2 = st.columns(2)
103
+ with col1:
104
+ st.plotly_chart(fig, use_container_width=True)
105
+ with col2:
106
+ st.plotly_chart(fig_pie, use_container_width=True)
107
 
 
108
  with st.expander('Explanation'):
109
+ st.write('- In fact, the less experienced you are in driving, the higher the claim rate insurance (Outcome 1 "Claim loan").')
110
+ st.write('- In the dataset frequency Drive experience is high at 0-9 years, namely 35%, followed by 10-19 years is 33.0%, 20-29 years is 21.2%, 30+ years is 10.5%. by the way iam rename values ("Newbie", "Amateure", "Advanced", "Expert")')
 
111
 
112
+ st.divider()
113
 
114
+ # Visualisasi Education
115
+ st.subheader('Looking Education')
116
+ # histogram
117
+ fig = px.histogram(df, x='EDUCATION', color='OUTCOME', barmode='group', title="Driving Experience by Outcome", category_orders={"EDUCATION": ['high school', 'university', 'none']})
118
+ fig_pie = px.pie(df, names="EDUCATION", title="Frequency Education")
119
 
120
+ col1,col2 = st.columns(2)
121
+ with col1:
122
+ st.plotly_chart(fig, use_container_width=True)
123
+ with col2:
124
+ st.plotly_chart(fig_pie, use_container_width=True)
125
+
126
  with st.expander('Explanation'):
127
+ st.write('- The visualization shows that Outcome 1 (claim insurance) has a high frequency in "high school" while Outcome 0 (No claim insurance) in "university"')
128
+ st.write('- And the amount of data in the dataset is 45% in high school, 39% in university and "19.1%" is none (work, etc.)')
129
+ st.write('Relates to Age where high school education levels (aged 16-25 years) tend to submit claims (Outcome 1 "Claim insurance"), as well as high frequency in this category. and I assume that high school is in the 16-25 year age range, university is in the 26-39 year age range, and none is in the 40-65+ age range')
130
+ st.divider()
131
+
132
+ # Visualisasi Income
133
+ st.subheader("Looking Income")
134
+ # Create a subplot with Plotly Express
135
+ fig_income_outcome = px.histogram(df, x="INCOME", color="OUTCOME", barmode="group", category_orders={"INCOME": ['poverty', 'working class', 'middle class', 'upper class']})
136
+ fig_income_outcome.update_layout(title="Income by Outcome", xaxis_title="Income", yaxis_title="Count")
137
+
138
+ # Create a pie chart with Plotly Express
139
+ dist_income = df["INCOME"].value_counts()
140
+ fig_income_frequency = px.pie(names=dist_income.index, values=dist_income.values, title="Frequency by Income",
141
+ labels={"label": "Income", "value": "Frequency"})
142
 
143
+ # Create a grouped bar chart for Income by Age
144
+ fig_income_age = px.bar(df, x="INCOME", color="AGE", category_orders={"INCOME": ['poverty', 'working class', 'middle class', 'upper class'], "AGE": ['young', 'middle age', 'old', 'very old']})
145
+ fig_income_age.update_layout(title="Income by Age", xaxis_title="Income", yaxis_title="Count")
146
 
147
+ # Create a box plot for Income by Past Accidents
148
+ fig_income_accidents = px.box(df, x="PAST_ACCIDENTS", y="INCOME")
149
+ fig_income_accidents.update_layout(title="Income by Past Accidents", xaxis_title="Past Accidents", yaxis_title="Income")
150
 
151
+ # Streamlit app
152
+ st.plotly_chart(fig_income_outcome, use_container_width=True)
153
+ st.plotly_chart(fig_income_frequency, use_container_width=True)
154
+ st.plotly_chart(fig_income_age, use_container_width=True)
155
+ st.plotly_chart(fig_income_accidents, use_container_width=True)
156
 
 
157
  with st.expander('Explanation'):
158
+ st.write("- Based on the Income by Outcome visualization, those who tend to submit claims (outcome 1) are from the 'poverty' income group.")
159
+ st.write("- From the results of the frequency by income dataset, the upper class dominates, followed by the middle class, poverty, and working class.")
160
+ st.write("- From the Income by Age output, it can be seen that different income groups are dominated by different age groups.")
161
+ st.write("- From the Income by Past Accidents output, certain income groups have different patterns in past loan claims.")
162
+
163
+ # Insights
164
+ st.write("Insights:")
165
+ st.write("- Based on the Outcome (claim insurance / no claim insurance) that Income 'Poverty' has the potential to submit a claim, which is dominated by teenagers. Therefore, focusing on reducing this potential by segmenting customers and targeting ages over 25 years and by looking at their income (upper class, middle class, and working class).")
166
+
167
+ st.divider()
168
+ # visualisasi vehicle
169
+ st.subheader('Looking Vehicle Distribution')
170
+ fig_vehicle_year = px.histogram(df, x="VEHICLE_YEAR", color="OUTCOME", barmode="group", title="Vehicle Years by Outcome")
171
+ fig_vehicle_type = px.histogram(df, x="VEHICLE_TYPE", color="OUTCOME", barmode="group", title="Vehicle Type by Outcome")
172
+ fig_vehicle_ownership_outcome = px.histogram(df, x="VEHICLE_OWNERSHIP", color="OUTCOME", barmode="group", title="Vehicle by Ownership")
173
+ fig_vehicle_ownership_pastAccidents = px.box(df,x='VEHICLE_OWNERSHIP', y='PAST_ACCIDENTS')
174
+ st.plotly_chart(fig_vehicle_year, use_container_width=True)
175
+ st.plotly_chart(fig_vehicle_type, use_container_width=True)
176
+ st.plotly_chart(fig_vehicle_ownership_outcome, use_container_width=True)
177
+ st.plotly_chart(fig_vehicle_ownership_pastAccidents, use_container_width=True)
178
+ st.divider()
179
+
180
+ with st.expander('Conclusions'):
181
+
182
+
183
+
184
+ st.write("## Data Imbalance")
185
+ st.write("""
186
+ Data imbalance was found in the Outcome column, which requires balancing actions to improve the accuracy of the prediction model.
187
+ """)
188
+
189
+ st.write("## Personality Analysis")
190
+ st.write("Based on the results that I analyzed, there was a pattern of differences in insurance claims between customers based on gender, marital status, having children, etc. I categorize them based on outcomes as follows:")
191
+
192
+ st.write("### In case of outcome 1 (Claim insurance)")
193
+ st.write("""
194
+ - Tends to be male
195
+ - In the age range 16-25 years (young)
196
+ - Singles
197
+ - There is no significant difference "to have children or not"
198
+ - Based on City, New York is higher, followed by Orlando, San Diego, and finally Baltimore
199
+ - Based on education, high school is higher for submitting insurance claims
200
+ - Income tends to be Poverty (unstable)
201
+ - Driving experience, the less experienced you are, the higher you submit a claim
202
+ """)
203
+
204
+ st.write("### In case of outcome 0 (No insurance claim)")
205
+ st.write("""
206
+ - Tends to be female
207
+ - In the age range 40-64 (old)
208
+ - Married
209
+ - Have children
210
+ - Based on City High New York, Orlando, and San Diego
211
+ - Based on Education, University tends to be high, and high school
212
+ - Income tends to be upper class and middle class
213
+ - Drive experience: the more experienced you are in driving, the higher the risk of filing an insurance claim
214
+ - High credit score
215
+ - Even though there is no correlation, the outcome is high in the majority
216
+ """)
217
+
218
+ st.write("""
219
+ Based on analysis of insurance claim patterns, personal factors such as age, education and marital status influence the frequency of claims. Young drivers (16-25 years) and those with low education (high school) tend to have a higher frequency of claims. On the other hand, drivers who are older (40-64 years), have a higher education (university), are married, and have children tend to file claims less frequently.
220
+ """)
221
+
222
+ st.write("## Vehicle Analysis")
223
+ st.write("As follows are the findings from the vehicle:")
224
+
225
+ st.write("### In the case of outcome 1 (Claim Insurance)")
226
+ st.write("""
227
+ - Tend to be cars before 2015
228
+ - Sedan cars tend to be taller than sports cars
229
+ - Submission does not belong to the car itself
230
+ - Annual mileage tends to be high
231
+ """)
232
+
233
+ st.write("### In case of outcome 0 (No insurance claim)")
234
+ st.write("""
235
+ - Car types tend to be sedans
236
+ - Tend to be cars after 2015
237
+ - Private property
238
+ - Annual mileage tends to be low
239
+ """)
240
+
241
+ st.write("""
242
+ The type and age of the vehicle has a big influence on insurance claim patterns. Older cars or certain types of cars, such as sedans, tend to have a higher frequency of claims.
243
+ """)
244
+
245
+ st.write("## Analysis of Accidents")
246
+ st.write("From the findings, there is an influence on insurance claims based on violations or damage received as follows:")
247
+
248
+ st.write("### In the case of outcome 1 (Claim Insurance)")
249
+ st.write("""
250
+ - Rarely commit speed violations
251
+ - DUIS ("Driving Under the Influence Surcharge") rarely commits such offenses
252
+ - Past accidents tend to have a history of fewer past accidents
253
+ - Ages 16-25 (young) rarely commit violations
254
+ """)
255
+
256
+ st.write("### In case of outcome 0 (No Claim Insurance)")
257
+ st.write("""
258
+ - Have committed at least 1 speed violation or more
259
+ - DUIS ("Driving Under the Influence Surcharge") often commits this violation
260
+ - Most past accidents have a history of certain accidents
261
+ - In the age range of 40+ years there is a higher rate of committing violations
262
+ """)
263
+
264
+ st.write("""
265
+ Traffic violations and past accident history have a big influence on insurance claims. Customers with a lower history of violations and accidents tend to file claims less frequently.
266
+ """)
267
+
268
 
269
+
270
+
271
+ if __name__ == "__main__":
272
+
273
+ run()
expdrive.png DELETED
Binary file (21.1 kB)
 
list_cat_cols.txt β†’ feature_cat.txt RENAMED
@@ -1 +1 @@
1
- ["AGE", "GENDER", "DRIVING_EXPERIENCE", "EDUCATION", "INCOME", "VEHICLE_YEAR"]
 
1
+ ["AGE", "GENDER", "DRIVING_EXPERIENCE", "EDUCATION", "INCOME", "VEHICLE_OWNERSHIP", "VEHICLE_YEAR", "MARRIED", "CHILDREN", "CITY"]
feature_num.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["CREDIT_SCORE", "ANNUAL_MILEAGE", "SPEEDING_VIOLATIONS", "DUIS", "PAST_ACCIDENTS"]
income.png DELETED
Binary file (23.4 kB)
 
list_num_cols.txt DELETED
@@ -1 +0,0 @@
1
- ["CREDIT_SCORE", "VEHICLE_OWNERSHIP", "MARRIED", "CHILDREN", "POSTAL_CODE", "ANNUAL_MILEAGE", "SPEEDING_VIOLATIONS", "DUIS", "PAST_ACCIDENTS"]
 
 
encoder.pkl β†’ model.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c804194d8a6a772d3d0d0682a3c3867d1c060291c5b554757908ac27d6cd01e1
3
- size 956
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdff55560c48cdd5414579d5b8eb01430e6b975ba0c3e393bc11803d595ed4b
3
+ size 97331082
model.py DELETED
@@ -1,93 +0,0 @@
1
- import streamlit as st
2
- import numpy as np
3
- import pandas as pd
4
- import pickle
5
-
6
-
7
- # Load the trained model
8
- with open("ranfo_pipe.pkl", "rb") as model_file:
9
- model = pickle.load(model_file)
10
-
11
- def run():
12
-
13
- # Streamlit UI
14
- st.title("Insurance Claims over Cars")
15
-
16
- # Introduction
17
- st.subheader("πŸ“Š Prediction Insurance Claim or NO ")
18
-
19
- st.markdown('## πŸ“ Input Data')
20
- with st.form('my_form'):
21
-
22
- # Age
23
- age_choice = st.selectbox("Age", ['Young', 'Middle Age', 'Old', 'Very Old'])
24
- # Gender
25
- gender_choice = st.selectbox("Gender", ['male', 'female'])
26
- # Race
27
- race_choice = st.selectbox("Race", ['Majority', 'Minority'])
28
- # Driving Experience
29
- driving_experience_choice = st.selectbox("Driving Experience", ['Newbie', 'Amateur', 'Advanced', 'Expert'])
30
- # Education
31
- education_choice = st.selectbox("Education", ['high school', 'none', 'university'])
32
- # Income
33
- income_choice = st.selectbox("Income", ['upper class', 'poverty', 'working class', 'middle class'])
34
- # Credit Score Range
35
- credit_score_range = st.number_input("Credit Score Range", min_value=0.0, max_value=999999.0)
36
- # Vehicle Ownership
37
- vehicle_ownership_choice = st.selectbox("Vehicle Ownership (True/False)", [0.0, 1.0])
38
- # Vehicle Year
39
- vehicle_year_choice = st.selectbox("Vehicle Year", ['before 2015', 'after 2015'])
40
- # Married
41
- married_choice = st.selectbox("Married (True/False)", [0.0, 1.0])
42
- # Children
43
- children_choice = st.selectbox("Children (True/False)", [0.0, 1.0])
44
- # Postal Code
45
- postal_code_choice = st.selectbox("Postal Code", [10238, 32765, 92101, 21217])
46
- # Annual Mileage Range
47
- annual_mileage_range = st.number_input("Annual Mileage Range", min_value=0, max_value=999999)
48
- # Vehicle Type
49
- vehicle_type_choice = st.selectbox("Vehicle Type", ['sedan', 'sport car'])
50
- # Speeding Violations Range
51
- speeding_violations_range = st.number_input("Speeding Violations Range", min_value=0, max_value=50)
52
- # DUIs Range
53
- duis_range = st.number_input("DUIs Range", min_value=0, max_value=50)
54
- # Past Accidents Range
55
- past_accidents_range = st.number_input("Past Accidents Range", min_value=0, max_value=50)
56
-
57
- submitted = st.form_submit_button('πŸ” Let\'s Check!')
58
-
59
- # Create DataFrame from user input
60
- data = {
61
- "AGE":age_choice,
62
- "GENDER":gender_choice,
63
- "RACE":race_choice,
64
- "DRIVING_EXPERIENCE":driving_experience_choice,
65
- "EDUCATION":education_choice,
66
- "INCOME":income_choice,
67
- "CREDIT_SCORE":credit_score_range,
68
- "VEHICLE_OWNERSHIP":vehicle_ownership_choice,
69
- "VEHICLE_YEAR":vehicle_year_choice,
70
- "MARRIED":married_choice,
71
- "CHILDREN":children_choice,
72
- "POSTAL_CODE":postal_code_choice,
73
- "ANNUAL_MILEAGE":annual_mileage_range,
74
- "VEHICLE":vehicle_type_choice,
75
- "SPEEDING_VIOLATIONS":speeding_violations_range,
76
- "DUIS":duis_range,
77
- "PAST_ACCIDENTS":past_accidents_range,
78
- }
79
-
80
- df = pd.DataFrame([data])
81
- st.dataframe(df)
82
-
83
- # Make prediction
84
- if submitted:
85
- prediction = model.predict(df)
86
- # Display prediction result
87
- if prediction[0] == 0:
88
- st.write('🟒 Claims loan')
89
- else:
90
- st.write('πŸ”΄ No Loan')
91
-
92
- if __name__=='__main__':
93
- run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prediction.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ # Load Model
7
+ with open('model.pkl', 'rb') as model_file:
8
+ model = pickle.load(model_file)
9
+
10
+
11
+ def run():
12
+ st.title('Car Insurance Prediction :car::clipboard:')
13
+ st.write('Welcome to the car insurance claim or not claim prediction application. predict whether customers will claim insurance or not in the future based on existing information')
14
+ st.divider()
15
+
16
+ st.markdown('## πŸ“ Input Data')
17
+ with st.form('my_form'):
18
+ # input age
19
+ age = st.selectbox('Age πŸ§’πŸ»', ['young', 'middle age', 'old','very old']),
20
+ # input gender
21
+ gender = st.selectbox('Gender 🚻', ['male', 'female']),
22
+ # input race
23
+ race = st.selectbox('RACE πŸ«±πŸΌβ€πŸ«²πŸ»', ['majority', 'minority']),
24
+ # input drive
25
+ drive_exp = st.selectbox('Driving Experience πŸ›£οΈπŸš˜', ['newbie', 'amateure', 'advanced', 'expert']),
26
+ # input education
27
+ education = st.selectbox('Education πŸŽ“', ['high school','none', 'university']),
28
+ #input income
29
+ income = st.selectbox('Income πŸ’°', ['poverty', 'working class', 'middle class','upper class']),
30
+ # input credit score
31
+ credit_score = st.number_input('Credit score :credit_card:', 0.0, 1.0, step=0.1)
32
+ # input vehicle ownership
33
+ vehicle_ownership = st.radio('Vehicle Ownership πŸ”‘', ('No', 'Yes')),
34
+ vehicle_ownership_value = 0 if vehicle_ownership == 'No' else 1
35
+
36
+ # input vehicle year
37
+ vehicle_year = st.selectbox('Vehicle Year πŸ“…', ['before 2015', 'after 2015']),
38
+ # input married
39
+ married = st.radio('Married :man_and_woman_holding_hands::ring:', ('No', 'Yes')),
40
+ married_value = 0 if married == 'No' else 1
41
+
42
+ # input children
43
+ children = st.radio('Children πŸ‘Ά', ('No', 'Yes')),
44
+ children_value = 0 if children == 'No' else 1
45
+
46
+ # input annual mileage
47
+ annual_mileage = st.slider('Annual Mileage ⏲', 1000, 25000, step=100),
48
+ # input vehicle type
49
+ vehicle_type = st.selectbox('Vehicle Type πŸš™', ['sedan', 'sport car']),
50
+ # input speed violations
51
+ speeding_violations = st.slider('Speeding Violations ⚑', 0,15, step=1),
52
+ # input DUIS
53
+ duis = st.slider('DUIS πŸ₯΄πŸ’Š', 0,15, step=1),
54
+ # input past accidents
55
+ past_accidents = st.slider('Past Accidents πŸ’₯', 0,15, step=1),
56
+ # Input city
57
+ city = st.selectbox('City 🏒', ['baltimore','new york','orlando','san diego'])
58
+
59
+ submitted = st.form_submit_button('Let\'s πŸ” Check ')
60
+
61
+ input_data = pd.DataFrame({
62
+ 'AGE': age,
63
+ 'GENDER':gender,
64
+ 'RACE': race,
65
+ 'DRIVING_EXPERIENCE':drive_exp,
66
+ 'EDUCATION': education,
67
+ 'INCOME': income,
68
+ 'CREDIT_SCORE':credit_score,
69
+ 'VEHICLE_OWNERSHIP':vehicle_ownership,
70
+ 'VEHICLE_YEAR':vehicle_year,
71
+ 'MARRIED':married,
72
+ 'CHILDREN': children,
73
+ 'ANNUAL_MILEAGE':annual_mileage,
74
+ 'VEHICLE_TYPE':vehicle_type,
75
+ 'SPEEDING_VIOLATIONS': speeding_violations,
76
+ 'DUIS': duis,
77
+ 'PAST_ACCIDENTS':past_accidents,
78
+ 'CITY': city
79
+ })
80
+
81
+ st.markdown('Syntetic Dataframe')
82
+ st.dataframe(input_data)
83
+
84
+ st.markdown('Prediction πŸ‘‡')
85
+ if submitted:
86
+ prediction = model.predict(input_data)
87
+
88
+ if prediction[0] == 0:
89
+ st.write('❌ Customer has not filed a claim')
90
+ else:
91
+ st.write('βœ… Customer has filed a claim')
92
+
93
+
94
+ if __name__=="__main__":
95
+ run()
ranfo_pipe.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1966d5788619dfada661219e166418e2f09e4f90102f9f33a170bf18435529b4
3
- size 17624931
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +1,17 @@
 
1
  pandas==1.5.3
2
  numpy==1.25.2
3
  seaborn==0.12.2
4
  matplotlib==3.7.2
5
  scikit-learn==1.3.1
6
  imbalanced-learn==0.11.0
7
- feature-engine==1.6.1
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  pandas==1.5.3
3
  numpy==1.25.2
4
  seaborn==0.12.2
5
  matplotlib==3.7.2
6
  scikit-learn==1.3.1
7
  imbalanced-learn==0.11.0
8
+ feature-engine==1.6.1
9
+ =======
10
+ streamlit
11
+ pandas
12
+ numpy
13
+ matplotlib
14
+ plotly
15
+ scikit-learn
16
+ imbalanced-learn
17
+ >>>>>>> 748f6ec (update)
scaler.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:54bb7f9c8142a2ac4a64dc1ec3b7770f852cc38fab927c77598b42863fbfe6a7
3
- size 891
 
 
 
 
victyp.png DELETED
Binary file (19.3 kB)