Vansh-ika00's picture
Update app.py
506cefd verified
import gradio as gr
import re
from urllib.parse import urlparse
import csv
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import os
# URL model
url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert")
url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer)
# Email model
email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1")
# Logic for checking the state of URL
def is_phishing_url(url):
suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking']
domain = urlparse(url).netloc
path = urlparse(url).path
score = 0
if re.match(r'https?://\d{1,3}(\.\d{1,3}){3}', url):
score += 2
if '-' in domain:
score += 1
if not url.startswith("https://"):
score += 3
if any(keyword in url.lower() for keyword in suspicious_keywords):
score += 2
if len(url) > 75:
score += 1
if '@' in url:
score += 2
return score
# Logic checking for phishing email
def predict_email(email_text):
inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = email_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist()
labels = {
"legitimate_email": probs[0],
"phishing_url": probs[1],
"legitimate_url": probs[2],
"phishing_url_alt": probs[3]
}
max_label, max_score = max(labels.items(), key=lambda x: x[1])
return max_label, max_score, labels
# Logging all data to CSV file
LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv")
def log_to_csv(url, rule_score, bert_label, bert_score, final_decision):
try:
file_exists = os.path.isfile(LOG_FILE)
with open(LOG_FILE, "a", newline='') as f:
writer = csv.writer(f)
if not file_exists:
writer.writerow(["Input", "Rule Score", "BERT Label", "Confidence", "Final Decision"])
writer.writerow([url, rule_score, bert_label, f"{bert_score:.2f}", final_decision])
except Exception as e:
print(f"Error writing to CSV: {e}")
# Combining URL and email checking logic
def combined_phishing_detector(url, input_type, log=True):
if input_type == "URL":
rule_score = is_phishing_url(url)
rule_result = "Phishing" if rule_score >= 3 else "Safe"
bert_result = url_classifier(url)[0]
label_map = {"LABEL_0": "safe", "LABEL_1": "phishing"}
bert_label = label_map.get(bert_result["label"].upper(), "unknown")
bert_score = bert_result["score"]
final_decision = "Phishing" if rule_result == "Phishing" and bert_label == "phishing" and bert_score > 0.75 else "Safe"
elif input_type == "Email/Message":
bert_label, bert_score, bert_probs = predict_email(url)
rule_score = "N/A"
rule_result = "Not Applicable"
final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe"
else:
rule_score = "N/A"
bert_label = "unknown"
bert_score = 0.0
final_decision = "Safe"
if log:
log_to_csv(url, rule_score, bert_label, bert_score, final_decision)
return url, rule_score, bert_label, bert_score, final_decision
def run_detector(text, input_type):
url, rule_score, bert_label, bert_score, final_decision = combined_phishing_detector(text, input_type, log=True)
# Add emoji based on result
if final_decision.lower() == "phishing":
emoji = "🚨" # warning
elif final_decision.lower() == "safe":
emoji = "βœ…" # check mark
else:
emoji = "❓"
message = (
f"{emoji} Result: {final_decision}\n"
f"πŸ“Š Rule Score: {rule_score}\n"
f"πŸ€– BERT Label: {bert_label}\n"
f"πŸ” Confidence: {bert_score:.2f}"
)
return message, LOG_FILE
# ---------- GUI ----------
with gr.Blocks() as demo:
# Custom font + CSS + title (no background image)
gr.HTML("""
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
.gradio-container {
background: radial-gradient(circle at top, #1e293b, #020617);
background-attachment: fixed;
font-family: "Poppins", sans-serif;
color: white;
}
.gradio-container * {
font-family: "Poppins", sans-serif !important;
}
label, .gr-textbox, .gr-button, .gr-file {
color: white !important;
}
</style>
<h1 style='text-align:center; color:white;'>πŸ” Phishing URL & Email Detector (BERT + Rules) πŸ”</h1>
""")
with gr.Row():
input_text = gr.Textbox(label="Enter URL or Email", lines=5, placeholder="Paste URL or email content here...")
input_type = gr.Radio(["URL", "Email/Message"], label="Input Type")
result_output = gr.Textbox(label="Detection Result", lines=6, interactive=False)
log_file_output = gr.File(label="Download Log File")
detect_button = gr.Button("Detect")
detect_button.click(
fn=run_detector,
inputs=[input_text, input_type],
outputs=[result_output, log_file_output]
)
demo.launch(share=True)