File size: 4,128 Bytes
ccb41e6
 
 
f5ccd41
0c8cb7b
ccb41e6
0c8cb7b
ccb41e6
 
 
0c8cb7b
f5ccd41
 
ccb41e6
0c8cb7b
 
 
 
 
90891d5
0c8cb7b
c522b4e
0a4c637
c522b4e
 
 
 
 
 
 
0a4c637
0c8cb7b
 
 
 
 
 
 
ccb41e6
9ce6f17
 
 
 
 
 
 
 
 
 
 
 
ccb41e6
721f815
 
 
f970793
9ce6f17
 
 
 
ccb41e6
9ce6f17
c522b4e
ccb41e6
9ce6f17
 
 
 
fa2b818
c522b4e
9ce6f17
7c02588
9ce6f17
edbfd27
b21965b
 
9ce6f17
 
ccb41e6
 
 
 
 
 
 
edbfd27
0c8cb7b
 
 
ccb41e6
 
0c8cb7b
ccb41e6
 
9ce6f17
bde8c05
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd
import gradio as gr
import json
import pickle
from sklearn.decomposition import PCA

# Load the JSON file
with open('fraud_mean_by_country.json', 'r') as json_file:
    fraud_mean_by_country_dict = json.load(json_file)

# Load the decision tree model
with open("decision_tree_model.pkl", "rb") as file:
    clf = pickle.load(file)

# Load PCA model
with open("pca_model.pkl", "rb") as file:
    pca = pickle.load(file)

# Dummy data for generating choices
df = pd.read_csv('synthetic_financial_data.csv')

# Let's create new column from age column
def group(x):
  if x < 33:
    a = '18-32'
  elif x < 60:
    a = '33-60'
  elif x >= 60:
    a = '60+'
  return a

# Choices for dropdown menus
card_type_choices = [(val, val) for val in df['card_type'].unique()]
location_choices = [(val, val) for val in df['location'].unique()]
purchase_category_choices = [(val, val) for val in df['purchase_category'].unique()]
country_choices = [(val, val) for val in df['country'].unique()]

# Function to predict
def predict(transaction_id, customer_id, merchant_id, amount, transaction_time, card_type, location, purchase_category, customer_age, transaction_description, country):
    try:
        data = pd.DataFrame({
            'amount': [amount],
            'country': [country],
            'customer_age': [customer_age],
            'card_type': [card_type],
            'purchase_category': [purchase_category]
        })
        data = pd.get_dummies(data, columns=['card_type', 'purchase_category'])
        data['country'] = data['country'].map(fraud_mean_by_country_dict)
        data['Age_group'] = data['customer_age'].apply(lambda x: group(x))
        data = pd.get_dummies(data, columns=['Age_group'], drop_first=True, dtype=float)

        columns_to_add = ['customer_age','card_type_MasterCard','card_type_Discover', 'card_type_Visa','purchase_category_Online Shopping', 
                          'purchase_category_Groceries', 'purchase_category_Restaurant','purchase_category_Retail', 'purchase_category_Travel','purchase_category_Gas Station'
                          ,'Age_group_33-60', 'Age_group_60+']

        # Add missing columns with default value of 0
        for column in columns_to_add:
            if column not in data.columns:
                data[column] = 0

        for i in data.columns:
          data[i] = data[i].astype(int)

        # Perform PCA
        X_pca = pca.transform(data[['purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+']])
        data['pca1'] = X_pca[:, 0]
        data['pca2'] = X_pca[:, 1]

        data.drop(columns=['customer_age', 'card_type_Discover', 'card_type_Visa', 'purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+'], inplace=True)
        # Make predictions
        data = data[['amount', 'country', 'card_type_MasterCard','purchase_category_Online Shopping', 'purchase_category_Restaurant','pca1', 'pca2']]
        prediction = clf.predict(data)
        result = "Fraud ❌" if prediction[0] == 1 else "Not Fraud ✅"
        # Custom output for specific IDs
        return f'Customer with ID {customer_id} and Transaction ID {transaction_id}, which happened at {transaction_time}, is {result}'
    except Exception as e:
        return str(e)

# Define Gradio interface
inputs = [
    gr.Textbox(label="Transaction ID"),
    gr.Textbox(label="Customer ID"),
    gr.Textbox(label="Merchant ID"),
    gr.Number(label="Amount"),
    gr.Textbox(label="Transaction Date (YYYY-MM-DD)"), 
    gr.Dropdown(choices=card_type_choices, label="Card Type"),  
    gr.Dropdown(choices=location_choices, label="Location"),
    gr.Dropdown(choices=purchase_category_choices, label="Purchase Category"),
    gr.Number(label="Customer Age"),
    gr.Textbox(label="Transaction Description"),
    gr.Dropdown(choices=country_choices, label="Country")
]

# Define Gradio interface
gr.Interface(fn=predict, inputs=inputs, outputs="text",title="Fraud Detection Model",description="Enter details to predict fraud.").launch()