riyadahmadov's picture
Create app.py
232daa3 verified
raw
history blame
No virus
5.42 kB
import pandas as pd
import pickle
import gradio as gr
# Define preprocessing function
def preprocess_input(data):
df = pd.DataFrame(data, index=[0])
# Let's find which percent are paid
df['Percent_paid'] = df.apply(lambda row: row['Amount_paid'] / row['Transaction_Amount'] if row['Transaction_Amount'] != 0 else 0, axis=1)
# Let's convert fraud column to int data type
df['Fraud_indicator'] = df['Fraud_indicator'].replace({'Fraud':1, 'Not Fraud':0})
# Let's also group Vehicle_Speed, Transaction_Amount columns
def Vehicle_Speed_group(speed):
if speed <= 21:
return '<= 21'
elif speed <= 41:
return '21-41'
elif speed <= 93:
return '41-93'
elif speed <= 103:
return '93-103'
else:
return '103 <'
def Transaction_Amount_group(amount):
if amount <= 60:
return '<= 60'
elif amount <= 180:
return '60-180'
elif amount <= 330:
return '180-330'
else:
return '330 <'
df['Vehicle_Speed_group'] = df['Vehicle_Speed'].apply(lambda x: Vehicle_Speed_group(x))
df['Transaction_Amount_group'] = df['Transaction_Amount'].apply(lambda x: Transaction_Amount_group(x))
# Let's encode categorical features
df = pd.get_dummies(df, columns=['Vehicle_Type', 'Lane_Type','TollBoothID', 'Vehicle_Dimensions','Vehicle_Speed_group','Transaction_Amount_group'], drop_first=True)
# Let's first remove unnecessary columns
df.drop(columns = ['Transaction_ID','Timestamp','FastagID','Geographical_Location','Vehicle_Plate_Number'], inplace = True)
# Let's convert all bool to int
for col in df.select_dtypes(include=['bool', 'object']):
try:
df[col] = df[col].astype(int)
except ValueError:
df[col] = df[col].astype(float).round().astype(int)
columns_to_add = ['Transaction_Amount', 'Amount_paid', 'Vehicle_Speed',
'Percent_paid', 'Vehicle_Type_Car', 'Vehicle_Type_Motorcycle',
'Vehicle_Type_SUV', 'Vehicle_Type_Sedan', 'Vehicle_Type_Truck',
'Vehicle_Type_Van', 'Lane_Type_Regular', 'TollBoothID_B-102',
'TollBoothID_C-103', 'TollBoothID_D-104', 'TollBoothID_D-105',
'TollBoothID_D-106', 'Vehicle_Dimensions_Medium',
'Vehicle_Dimensions_Small', 'Vehicle_Speed_group_21-41',
'Vehicle_Speed_group_41-93', 'Vehicle_Speed_group_93-103',
'Vehicle_Speed_group_<= 21', 'Transaction_Amount_group_330 <',
'Transaction_Amount_group_60-180', 'Transaction_Amount_group_<= 60']
# Add missing columns with default value of 0
for column in columns_to_add:
if column not in df.columns:
df[column] = 0
return df
# Load the trained Random Forest Regression model
with open('dt_model.pkl', 'rb') as file:
dt_model = pickle.load(file)
# Define function to predict salary
def predict_salary(transaction_id, timestamp, vehicle_type, fastag_id, tollbooth_id, lane_type, vehicle_dimensions, transaction_amount, amount_paid, geographical_location, vehicle_speed, vehicle_plate_number, fraud_indicator):
# Preprocess input data
input_data = preprocess_input({
"Transaction_ID": transaction_id,
"Timestamp": timestamp,
"Vehicle_Type": vehicle_type,
"FastagID": fastag_id,
"TollBoothID": tollbooth_id,
"Lane_Type": lane_type,
"Vehicle_Dimensions": vehicle_dimensions,
"Transaction_Amount": transaction_amount,
"Amount_paid": amount_paid,
"Geographical_Location": geographical_location,
"Vehicle_Speed": vehicle_speed,
"Vehicle_Plate_Number": vehicle_plate_number,
"Fraud_indicator": fraud_indicator
})
input_data = input_data[['Transaction_Amount', 'Amount_paid', 'Vehicle_Speed', 'Fraud_indicator',
'Vehicle_Type_Car', 'Vehicle_Type_Truck', 'Vehicle_Type_Bus', 'Lane_Type_Fastag',
'Lane_Type_Cash', 'Vehicle_Dimensions_Small', 'Vehicle_Dimensions_Medium',
'Vehicle_Dimensions_Large', 'Geographical_Location_Urban', 'Geographical_Location_Rural',
'Geographical_Location_Highway']]
# Predict salary using the trained model
salary_prediction = dt_model.predict(input_data)
result = f'Transaction ID {transaction_id}, the predicted salary based on your details is ${salary_prediction[0]:,.2f}'
return result
# Define Gradio interface
interface = gr.Interface(
fn=predict_salary,
inputs=[
gr.Textbox(label="Transaction ID"),
gr.Textbox(label="Timestamp"),
gr.Dropdown(['Bus ','Car','Motorcycle','Truck','Van','Sedan','SUV'], label="Vehicle_Type"),
gr.Textbox(label="Fastag ID"),
gr.Dropdown(['A-101','B-102','D-104','C-103','D-105','D-106'], label="TollBoothID"),
gr.Dropdown(['Express','Regular'], label="Lane_Type"),
gr.Dropdown(['Large','Small','Medium'], label="Vehicle_Dimensions"),
gr.Number(label="Transaction Amount"),
gr.Number(label="Amount Paid"),
gr.Textbox(label="Geographical Location"),
gr.Number(label="Vehicle Speed"),
gr.Textbox(label="Vehicle Plate Number")
],
outputs=gr.Textbox(label="Predicted Salary"),
title="Salary Prediction Model",
description="Enter details to predict salary."
)
# Launch the Gradio interface
interface.launch()