import streamlit as st import pandas as pd import numpy as np import joblib from sklearn.preprocessing import LabelEncoder class FraudDetectionApp: def __init__(self): self.model = joblib.load('model/only_model.joblib') # Assuming the model has an attribute 'feature_names_in_' which stores the feature names used during training self.feature_names = self.model.feature_names_in_ if hasattr(self.model, 'feature_names_in_') else [ 'incident_severity', 'insured_hobbies', 'total_claim_amount', 'months_as_customer', 'policy_annual_premium', 'incident_date', 'capital-loss', 'capital-gains', 'insured_education_level', 'incident_city' ] self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city'] self.encoders = {col: LabelEncoder() for col in self.categorical_columns} self.fit_encoders() def fit_encoders(self): # Example unique values for fitting the encoders example_data = { 'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'], 'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'], 'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'], 'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'] } for col in self.categorical_columns: self.encoders[col].fit(example_data[col]) def preprocess_single_data(self, data): if not isinstance(data, pd.DataFrame): data = pd.DataFrame(data, index=[0]) for col in self.categorical_columns: if col in data.columns: data[col] = self.encoders[col].transform(data[col]) # Ensure the column order matches the training data data = data[self.feature_names] return data def predict_single_fraud(self, data): data_processed = self.preprocess_single_data(data) prediction = self.model.predict(data_processed)[0] return prediction def run(self): st.title('Insurance Fraud Prediction') # Input fields incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage']) insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise']) total_claim_amount = st.number_input('Total Claim Amount') months_as_customer = st.number_input('Months as Customer') policy_annual_premium = st.number_input('Policy Annual Premium') incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1) capital_loss = st.number_input('Capital Loss') capital_gains = st.number_input('Capital Gains') insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD']) incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']) # Collecting user input new_data_point = { 'incident_severity': incident_severity, 'insured_hobbies': insured_hobbies, 'total_claim_amount': total_claim_amount, 'months_as_customer': months_as_customer, 'policy_annual_premium': policy_annual_premium, 'incident_date': incident_date, 'capital-loss': capital_loss, 'capital-gains': capital_gains, 'insured_education_level': insured_education_level, 'incident_city': incident_city, } # Prediction button if st.button('Predict'): prediction = self.predict_single_fraud(new_data_point) if prediction == 0: st.write('The applied application is not fraud.') else: st.write('The applied application is fraud.') # Generate sample data if st.button('Generate Sample Data'): sample_non_fraud = self.generate_sample_data(fraud=False) sample_fraud = self.generate_sample_data(fraud=True) st.write("Non-Fraud Sample Data:") st.write(sample_non_fraud) st.write("Fraud Sample Data:") st.write(sample_fraud) def generate_sample_data(self, fraud=False): sample_data = { 'incident_severity': ['Major Damage' if fraud else 'Minor Damage'], 'insured_hobbies': ['skydiving' if fraud else 'reading'], 'total_claim_amount': [50000 if fraud else 1000], 'months_as_customer': [1 if fraud else 60], 'policy_annual_premium': [10000 if fraud else 200], 'incident_date': [15], 'capital-loss': [1000 if fraud else 0], 'capital-gains': [5000 if fraud else 0], 'insured_education_level': ['PhD' if fraud else 'College'], 'incident_city': ['Riverwood' if fraud else 'Northbrook'] } return pd.DataFrame(sample_data) if __name__ == '__main__': app = FraudDetectionApp() app.run()