File size: 5,668 Bytes
c115711
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder

class FraudDetectionApp:
    def __init__(self):
        self.model = joblib.load('model/only_model.joblib')
        
        # Assuming the model has an attribute 'feature_names_in_' which stores the feature names used during training
        self.feature_names = self.model.feature_names_in_ if hasattr(self.model, 'feature_names_in_') else [
            'incident_severity', 'insured_hobbies', 'total_claim_amount', 'months_as_customer', 'policy_annual_premium', 
            'incident_date', 'capital-loss', 'capital-gains', 'insured_education_level', 'incident_city'
        ]
        
        self.categorical_columns = ['incident_severity', 'insured_hobbies', 'insured_education_level', 'incident_city']
        self.encoders = {col: LabelEncoder() for col in self.categorical_columns}
        self.fit_encoders()

    def fit_encoders(self):
        # Example unique values for fitting the encoders
        example_data = {
            'incident_severity': ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'],
            'insured_hobbies': ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'],
            'insured_education_level': ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'],
            'incident_city': ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook']
        }
        for col in self.categorical_columns:
            self.encoders[col].fit(example_data[col])

    def preprocess_single_data(self, data):
        if not isinstance(data, pd.DataFrame):
            data = pd.DataFrame(data, index=[0])
        for col in self.categorical_columns:
            if col in data.columns:
                data[col] = self.encoders[col].transform(data[col])
        # Ensure the column order matches the training data
        data = data[self.feature_names]
        return data

    def predict_single_fraud(self, data):
        data_processed = self.preprocess_single_data(data)
        prediction = self.model.predict(data_processed)[0]
        return prediction

    def run(self):
        st.title('Insurance Fraud Prediction')

        # Input fields
        incident_severity = st.selectbox('Incident Severity', ['Minor Damage', 'Major Damage', 'Total Loss', 'Trivial Damage'])
        insured_hobbies = st.selectbox('Insured Hobbies', ['sleeping', 'reading', 'board-games', 'bungie-jumping', 'base-jumping', 'golf', 'camping', 'dancing', 'skydiving', 'movies', 'hiking', 'yachting', 'paintball', 'chess', 'kayaking', 'polo', 'basketball', 'video-games', 'cross-fit', 'exercise'])
        total_claim_amount = st.number_input('Total Claim Amount')
        months_as_customer = st.number_input('Months as Customer')
        policy_annual_premium = st.number_input('Policy Annual Premium')
        incident_date = st.number_input('Incident Date', min_value=1, max_value=31, step=1)
        capital_loss = st.number_input('Capital Loss')
        capital_gains = st.number_input('Capital Gains')
        insured_education_level = st.selectbox('Insured Education Level', ['MD', 'PhD', 'Associate', 'Masters', 'High School', 'College', 'JD'])
        incident_city = st.selectbox('Incident City', ['Columbus', 'Riverwood', 'Arlington', 'Springfield', 'Hillsdale', 'Northbend', 'Northbrook'])

        # Collecting user input
        new_data_point = {
            'incident_severity': incident_severity,
            'insured_hobbies': insured_hobbies,
            'total_claim_amount': total_claim_amount,
            'months_as_customer': months_as_customer,
            'policy_annual_premium': policy_annual_premium,
            'incident_date': incident_date,
            'capital-loss': capital_loss,
            'capital-gains': capital_gains,
            'insured_education_level': insured_education_level,
            'incident_city': incident_city,
        }

        # Prediction button
        if st.button('Predict'):
            prediction = self.predict_single_fraud(new_data_point)
            if prediction == 0:
                st.write('The applied application is not fraud.')
            else:
                st.write('The applied application is fraud.')

        # Generate sample data
        if st.button('Generate Sample Data'):
            sample_non_fraud = self.generate_sample_data(fraud=False)
            sample_fraud = self.generate_sample_data(fraud=True)
            st.write("Non-Fraud Sample Data:")
            st.write(sample_non_fraud)
            st.write("Fraud Sample Data:")
            st.write(sample_fraud)

    def generate_sample_data(self, fraud=False):
        sample_data = {
            'incident_severity': ['Major Damage' if fraud else 'Minor Damage'],
            'insured_hobbies': ['skydiving' if fraud else 'reading'],
            'total_claim_amount': [50000 if fraud else 1000],
            'months_as_customer': [1 if fraud else 60],
            'policy_annual_premium': [10000 if fraud else 200],
            'incident_date': [15],
            'capital-loss': [1000 if fraud else 0],
            'capital-gains': [5000 if fraud else 0],
            'insured_education_level': ['PhD' if fraud else 'College'],
            'incident_city': ['Riverwood' if fraud else 'Northbrook']
        }
        return pd.DataFrame(sample_data)

if __name__ == '__main__':
    app = FraudDetectionApp()
    app.run()