|
import gradio as gr |
|
import pandas as pd |
|
import joblib |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import io |
|
|
|
|
|
model = joblib.load("anomaly_detector_rf_model.pkl") |
|
|
|
|
|
feature_cols = [ |
|
"amount", "hour", "day_of_week", "is_weekend", "merchant_avg_amount", |
|
"amount_zscore", "log_amount", "type_atm_withdrawal", "type_credit", |
|
"type_debit", "merchant_encoded" |
|
] |
|
|
|
|
|
def detect_anomalies(df): |
|
original_df = df.copy() |
|
|
|
for col in ["transaction_id", "merchant", "location", "amount"]: |
|
if col not in original_df.columns: |
|
original_df[col] = "N/A" if col != "amount" else 0.0 |
|
|
|
model_input = df.reindex(columns=feature_cols, fill_value=0) |
|
preds = model.predict(model_input) |
|
original_df["is_anomalous"] = preds |
|
|
|
anomalies = original_df[original_df["is_anomalous"] == 1] |
|
return original_df, anomalies[["transaction_id", "merchant", "location", "amount", "is_anomalous"]] |
|
|
|
|
|
def plot_charts(df): |
|
fig, axes = plt.subplots(2, 2, figsize=(12, 10)) |
|
|
|
if "amount" in df.columns: |
|
sns.histplot(df["amount"], bins=30, kde=True, ax=axes[0, 0]) |
|
axes[0, 0].set_title("Amount Distribution") |
|
sns.boxplot(x=df["amount"], ax=axes[0, 1]) |
|
axes[0, 1].set_title("Amount Box Plot") |
|
else: |
|
axes[0, 0].text(0.5, 0.5, "No 'amount' column", ha='center') |
|
axes[0, 1].text(0.5, 0.5, "No 'amount' column", ha='center') |
|
|
|
if "day_of_week" in df.columns: |
|
sns.countplot(x=df["day_of_week"], ax=axes[1, 0]) |
|
axes[1, 0].set_title("Transactions by Day of Week") |
|
else: |
|
axes[1, 0].text(0.5, 0.5, "No 'day_of_week' column", ha='center') |
|
|
|
if "merchant" in df.columns: |
|
top_merchants = df.groupby("merchant")["amount"].sum().nlargest(5).reset_index() |
|
sns.barplot(data=top_merchants, x="merchant", y="amount", ax=axes[1, 1]) |
|
axes[1, 1].set_title("Top 5 Merchants by Amount") |
|
else: |
|
axes[1, 1].text(0.5, 0.5, "No 'merchant' column", ha='center') |
|
|
|
plt.tight_layout() |
|
return fig |
|
|
|
|
|
def app_interface(csv_file): |
|
df = pd.read_csv(csv_file) |
|
full_df, anomalies = detect_anomalies(df) |
|
|
|
total = len(full_df) |
|
anom_count = len(anomalies) |
|
percent = (anom_count / total) * 100 if total > 0 else 0 |
|
|
|
summary = ( |
|
f"π’ **Total Transactions**: {total}\n" |
|
f"β οΈ **Anomalies Detected**: {anom_count}\n" |
|
f"π **Anomaly Percentage**: {percent:.2f}%" |
|
) |
|
|
|
|
|
csv_bytes = anomalies.to_csv(index=False).encode() |
|
download = io.BytesIO(csv_bytes) |
|
|
|
fig = plot_charts(full_df) |
|
|
|
return summary, anomalies, fig, download |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as interface: |
|
gr.Markdown("# π‘οΈ Financial Abuse & Anomaly Detection App") |
|
gr.Markdown("Upload your **transaction CSV** to detect anomalies and view insights.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File(label="π Upload CSV File", file_types=[".csv"]) |
|
detect_button = gr.Button("π¨ Run Detection", variant="primary") |
|
|
|
with gr.Row(): |
|
summary_box = gr.Markdown("") |
|
|
|
with gr.Tab("π Anomalies Detected"): |
|
result_table = gr.Dataframe(label="π΄ Anomalies") |
|
download_btn = gr.File(label="β¬οΈ Download Detected Anomalies") |
|
|
|
with gr.Tab("π Transaction Charts"): |
|
chart_output = gr.Plot() |
|
|
|
detect_button.click(fn=app_interface, inputs=file_input, |
|
outputs=[summary_box, result_table, chart_output, download_btn]) |
|
|
|
interface.launch(share=True) |
|
|