Spaces:
Sleeping
Sleeping
import pandas as pd | |
import pickle | |
import gradio as gr | |
# Define preprocessing function | |
def preprocess_input(data): | |
df = pd.DataFrame(data, index=[0]) | |
# Let's find which percent are paid | |
df['Percent_paid'] = df.apply(lambda row: row['Amount_paid'] / row['Transaction_Amount'] if row['Transaction_Amount'] != 0 else 0, axis=1) | |
# Let's also group Vehicle_Speed, Transaction_Amount columns | |
def Vehicle_Speed_group(speed): | |
if speed <= 21: | |
return '<= 21' | |
elif speed <= 41: | |
return '21-41' | |
elif speed <= 93: | |
return '41-93' | |
elif speed <= 103: | |
return '93-103' | |
else: | |
return '103 <' | |
def Transaction_Amount_group(amount): | |
if amount <= 60: | |
return '<= 60' | |
elif amount <= 180: | |
return '60-180' | |
elif amount <= 330: | |
return '180-330' | |
else: | |
return '330 <' | |
df['Vehicle_Speed_group'] = df['Vehicle_Speed'].apply(lambda x: Vehicle_Speed_group(x)) | |
df['Transaction_Amount_group'] = df['Transaction_Amount'].apply(lambda x: Transaction_Amount_group(x)) | |
# Let's encode categorical features | |
df = pd.get_dummies(df, columns=['Vehicle_Type', 'Lane_Type','TollBoothID', 'Vehicle_Dimensions','Vehicle_Speed_group','Transaction_Amount_group'], drop_first=True) | |
# Let's first remove unnecessary columns | |
df.drop(columns = ['Transaction_ID','Timestamp','FastagID','Geographical_Location','Vehicle_Plate_Number'], inplace = True) | |
# Let's convert all bool to int | |
for col in df.select_dtypes(include=['bool', 'object']): | |
try: | |
df[col] = df[col].astype(int) | |
except ValueError: | |
df[col] = df[col].astype(float).round().astype(int) | |
columns_to_add = ['Transaction_Amount', 'Amount_paid', 'Vehicle_Speed', 'Percent_paid', | |
'Vehicle_Type_Car', 'Vehicle_Type_Motorcycle', 'Vehicle_Type_SUV', | |
'Vehicle_Type_Sedan', 'Vehicle_Type_Truck', 'Vehicle_Type_Van', | |
'Lane_Type_Regular', 'TollBoothID_B-102', 'TollBoothID_C-103', | |
'TollBoothID_D-104', 'TollBoothID_D-105', 'TollBoothID_D-106', | |
'Vehicle_Dimensions_Medium', 'Vehicle_Dimensions_Small', | |
'Vehicle_Speed_group_21-41', 'Vehicle_Speed_group_41-93', | |
'Vehicle_Speed_group_93-103', 'Vehicle_Speed_group_<= 21', | |
'Transaction_Amount_group_330 <', 'Transaction_Amount_group_60-180', | |
'Transaction_Amount_group_<= 60'] | |
# Add missing columns with default value of 0 | |
for column in columns_to_add: | |
if column not in df.columns: | |
df[column] = 0 | |
return df | |
# Load the trained Decision tree classification model | |
with open('dt_model.pkl', 'rb') as file: | |
dt_model = pickle.load(file) | |
# Define function to detect fraud | |
def predict_salary(transaction_id, timestamp, vehicle_type, fastag_id, tollbooth_id, lane_type, vehicle_dimensions, transaction_amount, amount_paid, geographical_location, vehicle_speed, vehicle_plate_number): | |
# Preprocess input data | |
input_data = preprocess_input({ | |
"Transaction_ID": transaction_id, | |
"Timestamp": timestamp, | |
"Vehicle_Type": vehicle_type, | |
"FastagID": fastag_id, | |
"TollBoothID": tollbooth_id, | |
"Lane_Type": lane_type, | |
"Vehicle_Dimensions": vehicle_dimensions, | |
"Transaction_Amount": transaction_amount, | |
"Amount_paid": amount_paid, | |
"Geographical_Location": geographical_location, | |
"Vehicle_Speed": vehicle_speed, | |
"Vehicle_Plate_Number": vehicle_plate_number | |
}) | |
input_data = input_data[['Transaction_Amount', 'Amount_paid', 'Vehicle_Speed', 'Percent_paid', | |
'Vehicle_Type_Car', 'Vehicle_Type_Motorcycle', 'Vehicle_Type_SUV', | |
'Vehicle_Type_Sedan', 'Vehicle_Type_Truck', 'Vehicle_Type_Van', | |
'Lane_Type_Regular', 'TollBoothID_B-102', 'TollBoothID_C-103', | |
'TollBoothID_D-104', 'TollBoothID_D-105', 'TollBoothID_D-106', | |
'Vehicle_Dimensions_Medium', 'Vehicle_Dimensions_Small', | |
'Vehicle_Speed_group_21-41', 'Vehicle_Speed_group_41-93', | |
'Vehicle_Speed_group_93-103', 'Vehicle_Speed_group_<= 21', | |
'Transaction_Amount_group_330 <', 'Transaction_Amount_group_60-180', | |
'Transaction_Amount_group_<= 60']] | |
# Predict Fraud using the trained model | |
fraud_prediction = dt_model.predict(input_data) | |
fraud_prediction = ['Fraud❌' if fraud_prediction[0] == 1 else 'Not Fraud✅'] | |
result = f'Transaction ID {transaction_id}, the predicted fraud based on your details is {"Fraud" if fraud_prediction[0] == 1 else "Not Fraud"}' | |
return result | |
# Define Gradio interface | |
interface = gr.Interface( | |
fn=predict_salary, | |
inputs=[ | |
gr.Textbox(label="Transaction ID"), | |
gr.Textbox(label="Timestamp"), | |
gr.Dropdown(['Bus ','Car','Motorcycle','Truck','Van','Sedan','SUV'], label="Vehicle_Type"), | |
gr.Textbox(label="Fastag ID"), | |
gr.Dropdown(['A-101','B-102','D-104','C-103','D-105','D-106'], label="TollBoothID"), | |
gr.Dropdown(['Express','Regular'], label="Lane_Type"), | |
gr.Dropdown(['Large','Small','Medium'], label="Vehicle_Dimensions"), | |
gr.Number(label="Transaction Amount"), | |
gr.Number(label="Amount Paid"), | |
gr.Textbox(label="Geographical Location"), | |
gr.Number(label="Vehicle Speed"), | |
gr.Textbox(label="Vehicle Plate Number") | |
], | |
outputs=gr.Textbox(label="Predicted Salary"), | |
title="Fraud Detection Model", | |
description="Enter details to predict salary." | |
) | |
# Launch the Gradio interface | |
interface.launch() |