|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import joblib |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
class FraudDetectionApp: |
|
def __init__(self): |
|
self.model = joblib.load('model/only_model.joblib') |
|
|
|
|
|
self.feature_names = self.model.feature_names_in_ if hasattr(self.model, 'feature_names_in_') else [ |
|
'incident_severity', 'insured_hobbies', 'total_claim_amount', 'months_as_customer', 'policy_annual_premium', |
|
'incident_date', 'capital-loss', 'capital-gains', 'insured_education_level', 'incident_city' |
|
] |
|
|
|
self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city'] |
|
self.encoders = {col: LabelEncoder() for col in self.categorical_columns} |
|
self.fit_encoders() |
|
|
|
def fit_encoders(self): |
|
|
|
example_data = { |
|
'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'], |
|
'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'], |
|
'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'], |
|
'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'] |
|
} |
|
for col in self.categorical_columns: |
|
self.encoders[col].fit(example_data[col]) |
|
|
|
def preprocess_single_data(self, data): |
|
if not isinstance(data, pd.DataFrame): |
|
data = pd.DataFrame(data, index=[0]) |
|
for col in self.categorical_columns: |
|
if col in data.columns: |
|
data[col] = self.encoders[col].transform(data[col]) |
|
|
|
data = data[self.feature_names] |
|
return data |
|
|
|
def predict_single_fraud(self, data): |
|
data_processed = self.preprocess_single_data(data) |
|
prediction = self.model.predict(data_processed)[0] |
|
return prediction |
|
|
|
def run(self): |
|
st.title('Insurance Fraud Prediction') |
|
|
|
|
|
incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage']) |
|
insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise']) |
|
total_claim_amount = st.number_input('Total Claim Amount') |
|
months_as_customer = st.number_input('Months as Customer') |
|
policy_annual_premium = st.number_input('Policy Annual Premium') |
|
incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1) |
|
capital_loss = st.number_input('Capital Loss') |
|
capital_gains = st.number_input('Capital Gains') |
|
insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD']) |
|
incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']) |
|
|
|
|
|
new_data_point = { |
|
'incident_severity': incident_severity, |
|
'insured_hobbies': insured_hobbies, |
|
'total_claim_amount': total_claim_amount, |
|
'months_as_customer': months_as_customer, |
|
'policy_annual_premium': policy_annual_premium, |
|
'incident_date': incident_date, |
|
'capital-loss': capital_loss, |
|
'capital-gains': capital_gains, |
|
'insured_education_level': insured_education_level, |
|
'incident_city': incident_city, |
|
} |
|
|
|
|
|
if st.button('Predict'): |
|
prediction = self.predict_single_fraud(new_data_point) |
|
if prediction == 0: |
|
st.write('The applied application is not fraud.') |
|
else: |
|
st.write('The applied application is fraud.') |
|
|
|
|
|
if st.button('Generate Sample Data'): |
|
sample_non_fraud = self.generate_sample_data(fraud=False) |
|
sample_fraud = self.generate_sample_data(fraud=True) |
|
st.write("Non-Fraud Sample Data:") |
|
st.write(sample_non_fraud) |
|
st.write("Fraud Sample Data:") |
|
st.write(sample_fraud) |
|
|
|
def generate_sample_data(self, fraud=False): |
|
sample_data = { |
|
'incident_severity': ['Major Damage' if fraud else 'Minor Damage'], |
|
'insured_hobbies': ['skydiving' if fraud else 'reading'], |
|
'total_claim_amount': [50000 if fraud else 1000], |
|
'months_as_customer': [1 if fraud else 60], |
|
'policy_annual_premium': [10000 if fraud else 200], |
|
'incident_date': [15], |
|
'capital-loss': [1000 if fraud else 0], |
|
'capital-gains': [5000 if fraud else 0], |
|
'insured_education_level': ['PhD' if fraud else 'College'], |
|
'incident_city': ['Riverwood' if fraud else 'Northbrook'] |
|
} |
|
return pd.DataFrame(sample_data) |
|
|
|
if __name__ == '__main__': |
|
app = FraudDetectionApp() |
|
app.run() |
|
|