|
import pandas as pd |
|
import gradio as gr |
|
import json |
|
import pickle |
|
from sklearn.decomposition import PCA |
|
|
|
|
|
with open('fraud_mean_by_country.json', 'r') as json_file: |
|
fraud_mean_by_country_dict = json.load(json_file) |
|
|
|
|
|
with open("decision_tree_model.pkl", "rb") as file: |
|
clf = pickle.load(file) |
|
|
|
|
|
with open("pca_model.pkl", "rb") as file: |
|
pca = pickle.load(file) |
|
|
|
|
|
df = pd.read_csv('synthetic_financial_data.csv') |
|
|
|
|
|
def group(x): |
|
if x < 33: |
|
a = '18-32' |
|
elif x < 60: |
|
a = '33-60' |
|
elif x >= 60: |
|
a = '60+' |
|
return a |
|
|
|
|
|
card_type_choices = [(val, val) for val in df['card_type'].unique()] |
|
location_choices = [(val, val) for val in df['location'].unique()] |
|
purchase_category_choices = [(val, val) for val in df['purchase_category'].unique()] |
|
country_choices = [(val, val) for val in df['country'].unique()] |
|
|
|
|
|
def predict(transaction_id, customer_id, merchant_id, amount, transaction_time, card_type, location, purchase_category, customer_age, transaction_description, country): |
|
try: |
|
data = pd.DataFrame({ |
|
'amount': [amount], |
|
'country': [country], |
|
'customer_age': [customer_age], |
|
'card_type': [card_type], |
|
'purchase_category': [purchase_category] |
|
}) |
|
data = pd.get_dummies(data, columns=['card_type', 'purchase_category']) |
|
data['country'] = data['country'].map(fraud_mean_by_country_dict) |
|
data['Age_group'] = data['customer_age'].apply(lambda x: group(x)) |
|
data = pd.get_dummies(data, columns=['Age_group'], drop_first=True, dtype=float) |
|
|
|
columns_to_add = ['customer_age','card_type_MasterCard','purchase_category_Online Shopping' ,'card_type_Discover', 'card_type_Visa', 'purchase_category_Groceries', 'purchase_category_Restaurant','purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+'] |
|
|
|
|
|
for column in columns_to_add: |
|
if column not in data.columns: |
|
data[column] = 0 |
|
|
|
for i in data.columns: |
|
data[i] = data[i].astype(int) |
|
|
|
|
|
X_pca = pca.transform(data[['purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+']]) |
|
data['pca1'] = X_pca[:, 0] |
|
data['pca2'] = X_pca[:, 1] |
|
|
|
data.drop(columns=['customer_age', 'card_type_Discover', 'card_type_Visa', 'purchase_category_Groceries', 'purchase_category_Retail', 'purchase_category_Travel', 'Age_group_33-60', 'Age_group_60+'], inplace=True) |
|
|
|
prediction = clf.predict(data) |
|
result = "Fraud β" if prediction[0] == 1 else "Not Fraud β
" |
|
|
|
return f'Customer with ID {customer_id} and Transaction ID {transaction_id}, which happened at {transaction_time}, is {result}' |
|
except Exception as e: |
|
return str(e) |
|
|
|
|
|
inputs = [ |
|
gr.Textbox(label="Transaction ID"), |
|
gr.Textbox(label="Customer ID"), |
|
gr.Textbox(label="Merchant ID"), |
|
gr.Number(label="Amount"), |
|
gr.Textbox(label="Transaction Date (YYYY-MM-DD)"), |
|
gr.Dropdown(choices=card_type_choices, label="Card Type"), |
|
gr.Dropdown(choices=location_choices, label="Location"), |
|
gr.Dropdown(choices=purchase_category_choices, label="Purchase Category"), |
|
gr.Number(label="Customer Age"), |
|
gr.Textbox(label="Transaction Description"), |
|
gr.Dropdown(choices=country_choices, label="Country") |
|
] |
|
|
|
|
|
gr.Interface(fn=predict, inputs=inputs, outputs="text").launch() |