|
|
import gradio as gr |
|
|
import re |
|
|
from urllib.parse import urlparse |
|
|
import csv |
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline |
|
|
import torch |
|
|
import os |
|
|
|
|
|
|
|
|
url_tokenizer = AutoTokenizer.from_pretrained("najla45/phishing_detection_fine_tuned_bert") |
|
|
url_model = AutoModelForSequenceClassification.from_pretrained("najla45/phishing_detection_fine_tuned_bert") |
|
|
url_classifier = pipeline("text-classification", model=url_model, tokenizer=url_tokenizer) |
|
|
|
|
|
|
|
|
email_tokenizer = AutoTokenizer.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1") |
|
|
email_model = AutoModelForSequenceClassification.from_pretrained("cybersectony/phishing-email-detection-distilbert_v2.4.1") |
|
|
|
|
|
|
|
|
def is_phishing_url(url): |
|
|
suspicious_keywords = ['secure', 'account', 'update', 'free', 'login', 'verify', 'banking'] |
|
|
domain = urlparse(url).netloc |
|
|
path = urlparse(url).path |
|
|
|
|
|
score = 0 |
|
|
if re.match(r'https?://\d{1,3}(\.\d{1,3}){3}', url): |
|
|
score += 2 |
|
|
if '-' in domain: |
|
|
score += 1 |
|
|
if not url.startswith("https://"): |
|
|
score += 3 |
|
|
if any(keyword in url.lower() for keyword in suspicious_keywords): |
|
|
score += 2 |
|
|
if len(url) > 75: |
|
|
score += 1 |
|
|
if '@' in url: |
|
|
score += 2 |
|
|
|
|
|
return score |
|
|
|
|
|
|
|
|
def predict_email(email_text): |
|
|
inputs = email_tokenizer(email_text, return_tensors="pt", truncation=True, max_length=512) |
|
|
with torch.no_grad(): |
|
|
outputs = email_model(**inputs) |
|
|
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)[0].tolist() |
|
|
|
|
|
labels = { |
|
|
"legitimate_email": probs[0], |
|
|
"phishing_url": probs[1], |
|
|
"legitimate_url": probs[2], |
|
|
"phishing_url_alt": probs[3] |
|
|
} |
|
|
|
|
|
max_label, max_score = max(labels.items(), key=lambda x: x[1]) |
|
|
return max_label, max_score, labels |
|
|
|
|
|
|
|
|
LOG_FILE = os.path.join(os.path.dirname(__file__), "phishing_log.csv") |
|
|
|
|
|
def log_to_csv(url, rule_score, bert_label, bert_score, final_decision): |
|
|
try: |
|
|
file_exists = os.path.isfile(LOG_FILE) |
|
|
with open(LOG_FILE, "a", newline='') as f: |
|
|
writer = csv.writer(f) |
|
|
if not file_exists: |
|
|
writer.writerow(["Input", "Rule Score", "BERT Label", "Confidence", "Final Decision"]) |
|
|
writer.writerow([url, rule_score, bert_label, f"{bert_score:.2f}", final_decision]) |
|
|
except Exception as e: |
|
|
print(f"Error writing to CSV: {e}") |
|
|
|
|
|
|
|
|
def combined_phishing_detector(url, input_type, log=True): |
|
|
if input_type == "URL": |
|
|
rule_score = is_phishing_url(url) |
|
|
rule_result = "Phishing" if rule_score >= 3 else "Safe" |
|
|
|
|
|
bert_result = url_classifier(url)[0] |
|
|
label_map = {"LABEL_0": "safe", "LABEL_1": "phishing"} |
|
|
bert_label = label_map.get(bert_result["label"].upper(), "unknown") |
|
|
bert_score = bert_result["score"] |
|
|
|
|
|
final_decision = "Phishing" if rule_result == "Phishing" and bert_label == "phishing" and bert_score > 0.75 else "Safe" |
|
|
|
|
|
elif input_type == "Email/Message": |
|
|
bert_label, bert_score, bert_probs = predict_email(url) |
|
|
rule_score = "N/A" |
|
|
rule_result = "Not Applicable" |
|
|
final_decision = "Phishing" if bert_label.startswith("phishing") and bert_score > 0.7 else "Safe" |
|
|
else: |
|
|
rule_score = "N/A" |
|
|
bert_label = "unknown" |
|
|
bert_score = 0.0 |
|
|
final_decision = "Safe" |
|
|
|
|
|
if log: |
|
|
log_to_csv(url, rule_score, bert_label, bert_score, final_decision) |
|
|
|
|
|
return url, rule_score, bert_label, bert_score, final_decision |
|
|
|
|
|
def run_detector(text, input_type): |
|
|
url, rule_score, bert_label, bert_score, final_decision = combined_phishing_detector(text, input_type, log=True) |
|
|
|
|
|
|
|
|
if final_decision.lower() == "phishing": |
|
|
emoji = "π¨" |
|
|
elif final_decision.lower() == "safe": |
|
|
emoji = "β
" |
|
|
else: |
|
|
emoji = "β" |
|
|
|
|
|
message = ( |
|
|
f"{emoji} Result: {final_decision}\n" |
|
|
f"π Rule Score: {rule_score}\n" |
|
|
f"π€ BERT Label: {bert_label}\n" |
|
|
f"π Confidence: {bert_score:.2f}" |
|
|
) |
|
|
return message, LOG_FILE |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600;700&display=swap" rel="stylesheet"> |
|
|
<style> |
|
|
.gradio-container { |
|
|
background: radial-gradient(circle at top, #1e293b, #020617); |
|
|
background-attachment: fixed; |
|
|
font-family: "Poppins", sans-serif; |
|
|
color: white; |
|
|
} |
|
|
.gradio-container * { |
|
|
font-family: "Poppins", sans-serif !important; |
|
|
} |
|
|
label, .gr-textbox, .gr-button, .gr-file { |
|
|
color: white !important; |
|
|
} |
|
|
</style> |
|
|
<h1 style='text-align:center; color:white;'>π Phishing URL & Email Detector (BERT + Rules) π</h1> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
input_text = gr.Textbox(label="Enter URL or Email", lines=5, placeholder="Paste URL or email content here...") |
|
|
input_type = gr.Radio(["URL", "Email/Message"], label="Input Type") |
|
|
|
|
|
result_output = gr.Textbox(label="Detection Result", lines=6, interactive=False) |
|
|
log_file_output = gr.File(label="Download Log File") |
|
|
|
|
|
detect_button = gr.Button("Detect") |
|
|
|
|
|
detect_button.click( |
|
|
fn=run_detector, |
|
|
inputs=[input_text, input_type], |
|
|
outputs=[result_output, log_file_output] |
|
|
) |
|
|
|
|
|
demo.launch(share=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|