Spaces:

elecie
/

PhishGuard

Runtime error

App Files Files Community

elecie commited on Sep 4

Commit

466b21e

1 Parent(s): 05c0375

Lazy load

Browse files

Files changed (1) hide show

app.py +34 -24

app.py CHANGED Viewed

@@ -1,17 +1,11 @@
-import gradio as gr
-from transformers import pipeline
 import re
 import tldextract
 from rapidfuzz import fuzz
-# Load lightweight zero-shot model
-classifier = pipeline("zero-shot-classification", model="joeddav/distilbert-base-uncased-go-emotions")
-# Define categories
 LABELS = ["urgent", "fear", "authority", "financial scam", "safe"]
-# Regex backup cues
 CUES = {
     "urgency": [r"\burgent\b", r"\bimmediately\b", r"\bverify now\b", r"\blimited time\b"],
     "fear": [r"\bsuspended\b", r"\block(ed)?\b", r"\blegal action\b", r"\bunauthorized\b"],
@@ -23,9 +17,23 @@ TRUSTED_DOMAINS = ["google.com", "paypal.com", "microsoft.com", "amazon.com", "f
 SUSPICIOUS_TLDS = ["xyz", "top", "tk", "gq", "cf", "ml"]
 URL_PATTERN = re.compile(r"(https?://[^\s]+|www\.[^\s]+|\b[a-zA-Z0-9-]+\.[a-z]{2,}\b)")
 def regex_analysis(text):
-    findings, score = [], 0
     for category, patterns in CUES.items():
         for pat in patterns:
             if re.search(pat, text, re.IGNORECASE):
@@ -33,9 +41,9 @@ def regex_analysis(text):
                 score += 20
     return score, findings
 def huggingface_analysis(text):
-    result = classifier(text, LABELS)
     label_scores = list(zip(result["labels"], result["scores"]))
     label_scores.sort(key=lambda x: x[1], reverse=True)
@@ -45,9 +53,9 @@ def huggingface_analysis(text):
     return hf_score, findings
 def url_analysis(url):
-    findings, score = [], 0
     ext = tldextract.extract(url)
     domain = f"{ext.domain}.{ext.suffix}"
@@ -72,13 +80,11 @@ def url_analysis(url):
     return score, findings
 def extract_url_from_text(text):
     match = URL_PATTERN.search(text)
     return match.group(0) if match else None
-# Main analysis function (Gradio will call this)
 def analyze(text):
     regex_score, regex_findings = regex_analysis(text)
     hf_score, hf_findings = huggingface_analysis(text)
@@ -105,18 +111,22 @@ def analyze(text):
     return {
         "Score": total_score,
         "Risk Level": risk_level,
-        "Reasons": reasons,
         "Extracted URL": url if url else "None detected"
     }
-# Gradio UI
 iface = gr.Interface(
     fn=analyze,
-    inputs=gr.Textbox(lines=4, placeholder="Paste email/text here..."),
-    outputs="json",
-    title="Phishing Detection",
-    description="Analyzes text and URLs for phishing risks."
 )
 if __name__ == "__main__":

 import re
 import tldextract
 from rapidfuzz import fuzz
+import gradio as gr
+# --- Labels & Regex ---
 LABELS = ["urgent", "fear", "authority", "financial scam", "safe"]
 CUES = {
     "urgency": [r"\burgent\b", r"\bimmediately\b", r"\bverify now\b", r"\blimited time\b"],
     "fear": [r"\bsuspended\b", r"\block(ed)?\b", r"\blegal action\b", r"\bunauthorized\b"],
 SUSPICIOUS_TLDS = ["xyz", "top", "tk", "gq", "cf", "ml"]
 URL_PATTERN = re.compile(r"(https?://[^\s]+|www\.[^\s]+|\b[a-zA-Z0-9-]+\.[a-z]{2,}\b)")
+# --- Lazy-load Hugging Face model ---
+classifier = None
+def get_classifier():
+    global classifier
+    if classifier is None:
+        from transformers import pipeline
+        classifier = pipeline(
+            "zero-shot-classification",
+            model="valhalla/distilbart-mnli-12-1",
+            device=-1  # CPU
+        )
+    return classifier
+# --- Analysis functions ---
 def regex_analysis(text):
+    findings = []
+    score = 0
     for category, patterns in CUES.items():
         for pat in patterns:
             if re.search(pat, text, re.IGNORECASE):
                 score += 20
     return score, findings
 def huggingface_analysis(text):
+    clf = get_classifier()
+    result = clf(text, LABELS)
     label_scores = list(zip(result["labels"], result["scores"]))
     label_scores.sort(key=lambda x: x[1], reverse=True)
     return hf_score, findings
 def url_analysis(url):
+    findings = []
+    score = 0
     ext = tldextract.extract(url)
     domain = f"{ext.domain}.{ext.suffix}"
     return score, findings
 def extract_url_from_text(text):
     match = URL_PATTERN.search(text)
     return match.group(0) if match else None
+# --- Main analyze function for Gradio ---
 def analyze(text):
     regex_score, regex_findings = regex_analysis(text)
     hf_score, hf_findings = huggingface_analysis(text)
     return {
         "Score": total_score,
         "Risk Level": risk_level,
+        "Reasons": "\n".join(reasons),
         "Extracted URL": url if url else "None detected"
     }
+# --- Gradio Interface ---
 iface = gr.Interface(
     fn=analyze,
+    inputs=gr.Textbox(lines=5, placeholder="Paste text here..."),
+    outputs=[
+        gr.Textbox(label="Score"),
+        gr.Textbox(label="Risk Level"),
+        gr.Textbox(label="Reasons"),
+        gr.Textbox(label="Extracted URL")
+    ],
+    title="Phishing / Scam Detector",
+    description="Analyzes text for urgency, fear, authority, and financial scam cues, plus suspicious URLs."
 )
 if __name__ == "__main__":