kothariyashhh's picture
Update app.py
c115711 verified
raw
history blame contribute delete
No virus
5.67 kB
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
class FraudDetectionApp:
def __init__(self):
self.model = joblib.load('model/only_model.joblib')
# Assuming the model has an attribute 'feature_names_in_' which stores the feature names used during training
self.feature_names = self.model.feature_names_in_ if hasattr(self.model, 'feature_names_in_') else [
'incident_severity', 'insured_hobbies', 'total_claim_amount', 'months_as_customer', 'policy_annual_premium',
'incident_date', 'capital-loss', 'capital-gains', 'insured_education_level', 'incident_city'
]
self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city']
self.encoders = {col: LabelEncoder() for col in self.categorical_columns}
self.fit_encoders()
def fit_encoders(self):
# Example unique values for fitting the encoders
example_data = {
'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'],
'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'],
'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'],
'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']
}
for col in self.categorical_columns:
self.encoders[col].fit(example_data[col])
def preprocess_single_data(self, data):
if not isinstance(data, pd.DataFrame):
data = pd.DataFrame(data, index=[0])
for col in self.categorical_columns:
if col in data.columns:
data[col] = self.encoders[col].transform(data[col])
# Ensure the column order matches the training data
data = data[self.feature_names]
return data
def predict_single_fraud(self, data):
data_processed = self.preprocess_single_data(data)
prediction = self.model.predict(data_processed)[0]
return prediction
def run(self):
st.title('Insurance Fraud Prediction')
# Input fields
incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'])
insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'])
total_claim_amount = st.number_input('Total Claim Amount')
months_as_customer = st.number_input('Months as Customer')
policy_annual_premium = st.number_input('Policy Annual Premium')
incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1)
capital_loss = st.number_input('Capital Loss')
capital_gains = st.number_input('Capital Gains')
insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'])
incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'])
# Collecting user input
new_data_point = {
'incident_severity': incident_severity,
'insured_hobbies': insured_hobbies,
'total_claim_amount': total_claim_amount,
'months_as_customer': months_as_customer,
'policy_annual_premium': policy_annual_premium,
'incident_date': incident_date,
'capital-loss': capital_loss,
'capital-gains': capital_gains,
'insured_education_level': insured_education_level,
'incident_city': incident_city,
}
# Prediction button
if st.button('Predict'):
prediction = self.predict_single_fraud(new_data_point)
if prediction == 0:
st.write('The applied application is not fraud.')
else:
st.write('The applied application is fraud.')
# Generate sample data
if st.button('Generate Sample Data'):
sample_non_fraud = self.generate_sample_data(fraud=False)
sample_fraud = self.generate_sample_data(fraud=True)
st.write("Non-Fraud Sample Data:")
st.write(sample_non_fraud)
st.write("Fraud Sample Data:")
st.write(sample_fraud)
def generate_sample_data(self, fraud=False):
sample_data = {
'incident_severity': ['Major Damage' if fraud else 'Minor Damage'],
'insured_hobbies': ['skydiving' if fraud else 'reading'],
'total_claim_amount': [50000 if fraud else 1000],
'months_as_customer': [1 if fraud else 60],
'policy_annual_premium': [10000 if fraud else 200],
'incident_date': [15],
'capital-loss': [1000 if fraud else 0],
'capital-gains': [5000 if fraud else 0],
'insured_education_level': ['PhD' if fraud else 'College'],
'incident_city': ['Riverwood' if fraud else 'Northbrook']
}
return pd.DataFrame(sample_data)
if __name__ == '__main__':
app = FraudDetectionApp()
app.run()