kothariyashhh commited on
Commit
c115711
1 Parent(s): 6324fab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -181
app.py CHANGED
@@ -1,181 +1,110 @@
1
- # import streamlit as st
2
- # import pandas as pd
3
- # import numpy as np
4
- # import joblib
5
- # from sklearn.preprocessing import LabelEncoder
6
-
7
- # class FraudDetectionApp:
8
- # def __init__(self):
9
- # self.model = joblib.load('model/only_model.joblib')
10
- # self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city']
11
- # self.encoders = {col: LabelEncoder() for col in self.categorical_columns}
12
- # self.fit_encoders()
13
-
14
- # def fit_encoders(self):
15
- # # Example unique values for fitting the encoders
16
- # example_data = {
17
- # 'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'],
18
- # 'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'],
19
- # 'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'],
20
- # 'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']
21
- # }
22
- # for col in self.categorical_columns:
23
- # self.encoders[col].fit(example_data[col])
24
-
25
- # def preprocess_single_data(self, data):
26
- # if not isinstance(data, pd.DataFrame):
27
- # data = pd.DataFrame(data, index=[0])
28
- # for col in self.categorical_columns:
29
- # if col in data.columns:
30
- # data[col] = self.encoders[col].transform(data[col])
31
- # return data
32
-
33
- # def predict_single_fraud(self, data):
34
- # data_processed = self.preprocess_single_data(data)
35
- # prediction = self.model.predict(data_processed)[0]
36
- # return prediction
37
-
38
- # def run(self):
39
- # st.title('Fraud Detection Prediction')
40
-
41
- # # Input fields
42
- # incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'])
43
- # insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping',
44
- # 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving',
45
- # 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking',
46
- # 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'])
47
- # total_claim_amount = st.number_input('Total Claim Amount')
48
- # months_as_customer = st.number_input('Months as Customer')
49
- # policy_annual_premium = st.number_input('Policy Annual Premium')
50
- # incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1)
51
- # capital_loss = st.number_input('Capital Loss')
52
- # capital_gains = st.number_input('Capital Gains')
53
- # insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'])
54
- # incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'])
55
-
56
- # # Collecting user input
57
- # new_data_point = {
58
- # 'incident_severity': incident_severity,
59
- # 'insured_hobbies': insured_hobbies,
60
- # 'total_claim_amount': total_claim_amount,
61
- # 'months_as_customer': months_as_customer,
62
- # 'policy_annual_premium': policy_annual_premium,
63
- # 'incident_date': incident_date,
64
- # 'capital-loss': capital_loss,
65
- # 'capital-gains': capital_gains,
66
- # 'insured_education_level': insured_education_level,
67
- # 'incident_city': incident_city,
68
- # }
69
-
70
- # # Prediction button
71
- # if st.button('Predict'):
72
- # prediction = self.predict_single_fraud(new_data_point)
73
- # if prediction == 0:
74
- # st.write('The applied application is not fraud.')
75
- # else:
76
- # st.write('The applied application is fraud.')
77
-
78
- # if __name__ == '__main__':
79
- # app = FraudDetectionApp()
80
- # app.run()
81
- import streamlit as st
82
- import pandas as pd
83
- import numpy as np
84
- import joblib
85
- from sklearn.preprocessing import LabelEncoder
86
-
87
- class FraudDetectionApp:
88
- def __init__(self):
89
- self.model = joblib.load('model/only_model.joblib')
90
- self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city']
91
- self.encoders = {col: LabelEncoder() for col in self.categorical_columns}
92
- self.fit_encoders()
93
-
94
- def fit_encoders(self):
95
- # Example unique values for fitting the encoders
96
- example_data = {
97
- 'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'],
98
- 'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'],
99
- 'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'],
100
- 'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']
101
- }
102
- for col in self.categorical_columns:
103
- self.encoders[col].fit(example_data[col])
104
-
105
- def preprocess_single_data(self, data):
106
- if not isinstance(data, pd.DataFrame):
107
- data = pd.DataFrame(data, index=[0])
108
- for col in self.categorical_columns:
109
- if col in data.columns:
110
- data[col] = self.encoders[col].transform(data[col])
111
- return data
112
-
113
- def predict_single_fraud(self, data):
114
- data_processed = self.preprocess_single_data(data)
115
- prediction = self.model.predict(data_processed)[0]
116
- return prediction
117
-
118
- def run(self):
119
- st.title('Insurance Fraud Prediction')
120
-
121
- # Input fields
122
- incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'])
123
- insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'])
124
- total_claim_amount = st.number_input('Total Claim Amount')
125
- months_as_customer = st.number_input('Months as Customer')
126
- policy_annual_premium = st.number_input('Policy Annual Premium')
127
- incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1)
128
- capital_loss = st.number_input('Capital Loss')
129
- capital_gains = st.number_input('Capital Gains')
130
- insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'])
131
- incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'])
132
-
133
- # Collecting user input
134
- new_data_point = {
135
- 'incident_severity': incident_severity,
136
- 'insured_hobbies': insured_hobbies,
137
- 'total_claim_amount': total_claim_amount,
138
- 'months_as_customer': months_as_customer,
139
- 'policy_annual_premium': policy_annual_premium,
140
- 'incident_date': incident_date,
141
- 'capital-loss': capital_loss,
142
- 'capital-gains': capital_gains,
143
- 'insured_education_level': insured_education_level,
144
- 'incident_city': incident_city,
145
- }
146
-
147
- # Prediction button
148
- if st.button('Predict'):
149
- prediction = self.predict_single_fraud(new_data_point)
150
- if prediction == 0:
151
- st.write('The applied application is not fraud.')
152
- else:
153
- st.write('The applied application is fraud.')
154
-
155
- # Generate sample data
156
- if st.button('Generate Sample Data'):
157
- sample_non_fraud = self.generate_sample_data(fraud=False)
158
- sample_fraud = self.generate_sample_data(fraud=True)
159
- st.write("Non-Fraud Sample Data:")
160
- st.write(sample_non_fraud)
161
- st.write("Fraud Sample Data:")
162
- st.write(sample_fraud)
163
-
164
- def generate_sample_data(self, fraud=False):
165
- sample_data = {
166
- 'incident_severity': ['Major Damage' if fraud else 'Minor Damage'],
167
- 'insured_hobbies': ['skydiving' if fraud else 'reading'],
168
- 'total_claim_amount': [50000 if fraud else 1000],
169
- 'months_as_customer': [1 if fraud else 60],
170
- 'policy_annual_premium': [10000 if fraud else 200],
171
- 'incident_date': [15],
172
- 'capital-loss': [1000 if fraud else 0],
173
- 'capital-gains': [5000 if fraud else 0],
174
- 'insured_education_level': ['PhD' if fraud else 'College'],
175
- 'incident_city': ['Riverwood' if fraud else 'Northbrook']
176
- }
177
- return pd.DataFrame(sample_data)
178
-
179
- if __name__ == '__main__':
180
- app = FraudDetectionApp()
181
- app.run()
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ from sklearn.preprocessing import LabelEncoder
6
+
7
+ class FraudDetectionApp:
8
+ def __init__(self):
9
+ self.model = joblib.load('model/only_model.joblib')
10
+
11
+ # Assuming the model has an attribute 'feature_names_in_' which stores the feature names used during training
12
+ self.feature_names = self.model.feature_names_in_ if hasattr(self.model, 'feature_names_in_') else [
13
+ 'incident_severity', 'insured_hobbies', 'total_claim_amount', 'months_as_customer', 'policy_annual_premium',
14
+ 'incident_date', 'capital-loss', 'capital-gains', 'insured_education_level', 'incident_city'
15
+ ]
16
+
17
+ self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city']
18
+ self.encoders = {col: LabelEncoder() for col in self.categorical_columns}
19
+ self.fit_encoders()
20
+
21
+ def fit_encoders(self):
22
+ # Example unique values for fitting the encoders
23
+ example_data = {
24
+ 'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'],
25
+ 'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'],
26
+ 'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'],
27
+ 'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']
28
+ }
29
+ for col in self.categorical_columns:
30
+ self.encoders[col].fit(example_data[col])
31
+
32
+ def preprocess_single_data(self, data):
33
+ if not isinstance(data, pd.DataFrame):
34
+ data = pd.DataFrame(data, index=[0])
35
+ for col in self.categorical_columns:
36
+ if col in data.columns:
37
+ data[col] = self.encoders[col].transform(data[col])
38
+ # Ensure the column order matches the training data
39
+ data = data[self.feature_names]
40
+ return data
41
+
42
+ def predict_single_fraud(self, data):
43
+ data_processed = self.preprocess_single_data(data)
44
+ prediction = self.model.predict(data_processed)[0]
45
+ return prediction
46
+
47
+ def run(self):
48
+ st.title('Insurance Fraud Prediction')
49
+
50
+ # Input fields
51
+ incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'])
52
+ insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'])
53
+ total_claim_amount = st.number_input('Total Claim Amount')
54
+ months_as_customer = st.number_input('Months as Customer')
55
+ policy_annual_premium = st.number_input('Policy Annual Premium')
56
+ incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1)
57
+ capital_loss = st.number_input('Capital Loss')
58
+ capital_gains = st.number_input('Capital Gains')
59
+ insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'])
60
+ incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'])
61
+
62
+ # Collecting user input
63
+ new_data_point = {
64
+ 'incident_severity': incident_severity,
65
+ 'insured_hobbies': insured_hobbies,
66
+ 'total_claim_amount': total_claim_amount,
67
+ 'months_as_customer': months_as_customer,
68
+ 'policy_annual_premium': policy_annual_premium,
69
+ 'incident_date': incident_date,
70
+ 'capital-loss': capital_loss,
71
+ 'capital-gains': capital_gains,
72
+ 'insured_education_level': insured_education_level,
73
+ 'incident_city': incident_city,
74
+ }
75
+
76
+ # Prediction button
77
+ if st.button('Predict'):
78
+ prediction = self.predict_single_fraud(new_data_point)
79
+ if prediction == 0:
80
+ st.write('The applied application is not fraud.')
81
+ else:
82
+ st.write('The applied application is fraud.')
83
+
84
+ # Generate sample data
85
+ if st.button('Generate Sample Data'):
86
+ sample_non_fraud = self.generate_sample_data(fraud=False)
87
+ sample_fraud = self.generate_sample_data(fraud=True)
88
+ st.write("Non-Fraud Sample Data:")
89
+ st.write(sample_non_fraud)
90
+ st.write("Fraud Sample Data:")
91
+ st.write(sample_fraud)
92
+
93
+ def generate_sample_data(self, fraud=False):
94
+ sample_data = {
95
+ 'incident_severity': ['Major Damage' if fraud else 'Minor Damage'],
96
+ 'insured_hobbies': ['skydiving' if fraud else 'reading'],
97
+ 'total_claim_amount': [50000 if fraud else 1000],
98
+ 'months_as_customer': [1 if fraud else 60],
99
+ 'policy_annual_premium': [10000 if fraud else 200],
100
+ 'incident_date': [15],
101
+ 'capital-loss': [1000 if fraud else 0],
102
+ 'capital-gains': [5000 if fraud else 0],
103
+ 'insured_education_level': ['PhD' if fraud else 'College'],
104
+ 'incident_city': ['Riverwood' if fraud else 'Northbrook']
105
+ }
106
+ return pd.DataFrame(sample_data)
107
+
108
+ if __name__ == '__main__':
109
+ app = FraudDetectionApp()
110
+ app.run()