File size: 3,247 Bytes
ccb41e6
 
 
 
 
 
 
 
f5ccd41
ccb41e6
 
 
 
 
f5ccd41
 
ccb41e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import gradio as gr
from sklearn.decomposition import PCA
import json
import pickle

# Open and read the JSON file
with open('fraud_mean_by_country.json', 'r') as json_file:
    fraud_mean_by_country_dict = json.load(json_file)

with open("decision_tree_model.pkl", "rb") as file:
    clf = pickle.load(file)

# Predict using the decision tree classifier
def predict(transaction_id, customer_id, merchant_id, amount, transaction_time, card_type, location, purchase_category, customer_age, transaction_description, country):
    data = pd.DataFrame({
        'amount': [amount],
        'country': [country],
        'customer_age': [customer_age],
        'card_type': [card_type],
        'purchase_category': [purchase_category]
    })
    data = pd.get_dummies(data, columns=['card_type', 'purchase_category'])
    data['country'] = data['country'].map(fraud_mean_by_country_dict)
    data['Age_group'] = data['customer_age'].apply(lambda x: group(x))
    data = pd.get_dummies(data, columns=['Age_group'], drop_first=True, dtype=float)

    columns_to_add = ['customer_age', 'card_type_Discover', 'card_type_Visa', 'purchase_category_Groceries', 'purchase_category_Restaurant','purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+']

    # Add missing columns with default value of 0
    for column in columns_to_add:
        if column not in data.columns:
            data[column] = 0

    for i in data.columns:
      data[i] = data[i].astype(int)

    # Perform PCA
    X_pca = pca.transform(data[['purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+']])
    data['pca1'] = X_pca[:, 0]
    data['pca2'] = X_pca[:, 1]

    data.drop(columns=['customer_age', 'card_type_Discover', 'card_type_Visa', 'purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+'], inplace=True)
    # Make predictions
    prediction = clf.predict(data)
    return "Fraudulent" if prediction[0] == 1 else "Not Fraudulent"


card_type_choices = [(val, val) for val in df['card_type'].unique()]
location_choices = [(val, val) for val in df['location'].unique()]
purchase_category_choices = [(val, val) for val in df['purchase_category'].unique()]
country_choices = [(val, val) for val in df['country'].unique()]

# Define Gradio interface
inputs = [
    gr.Textbox(label="Transaction ID"),
    gr.Textbox(label="Customer ID"),
    gr.Textbox(label="Merchant ID"),
    gr.Number(label="Amount"),
    gr.Textbox(label="Transaction Time"),
    gr.Dropdown(choices= card_type_choices, label="Card Type"),  
    gr.Dropdown(choices= location_choices, label="Location"),
    gr.Dropdown(choices= purchase_category_choices, label="Purchase Category"),
    gr.Number(label="Customer Age"),
    gr.Textbox(label="Transaction Description"),
    gr.Dropdown(choices= country_choices, label="Country")
]

# Define Gradio interface
gr.Interface(fn=predict, inputs=inputs, outputs="text").launch()