elecie commited on
Commit
4737fbd
Β·
1 Parent(s): 45d0251
Files changed (2) hide show
  1. app.py +88 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+
5
+ # Load Hugging Face zero-shot classifier
6
+ MODEL = "valhalla/distilbart-mnli-12-1"
7
+ classifier = pipeline("zero-shot-classification", model=MODEL)
8
+
9
+ LABELS = ["urgent", "fear", "authority", "financial scam", "safe"]
10
+
11
+ # Regex cues
12
+ CUES = {
13
+ "urgency": [r"\burgent\b", r"\bverify now\b", r"\blast chance\b"],
14
+ "fear": [r"\bsuspended\b", r"\block(ed)?\b"],
15
+ "authority": [r"\bCEO\b", r"\badmin(istrator)?\b"],
16
+ "financial": [r"\bprize\b", r"\blottery\b", r"\bmoney\b"],
17
+ }
18
+
19
+ SAFE_PHRASES = [
20
+ "mandatory email service announcement",
21
+ "privacy policy",
22
+ "unsubscribe from these emails"
23
+ ]
24
+
25
+ URL_PATTERN_GLOBAL = re.compile(r"(https?://[^\s]+)")
26
+
27
+ def regex_analysis(text: str):
28
+ score, findings = 0, []
29
+ for cat, pats in CUES.items():
30
+ for p in pats:
31
+ matches = re.findall(p, text, re.I)
32
+ if matches:
33
+ findings.append(f"{cat} cue β†’ {matches[0]}")
34
+ score += 20
35
+ return score, findings
36
+
37
+ def hf_analysis(text: str):
38
+ res = classifier(text, LABELS)
39
+ top = list(zip(res["labels"], res["scores"]))
40
+ top.sort(key=lambda x: x[1], reverse=True)
41
+ findings, score = [], 0
42
+ for lbl, sc in top[:2]:
43
+ if lbl != "safe" and sc > 0.3:
44
+ findings.append(f"HuggingFace: {lbl} ({sc:.2f})")
45
+ score += int(sc * 30)
46
+ return score, findings
47
+
48
+ def extract_urls(text: str):
49
+ return list(set(URL_PATTERN_GLOBAL.findall(text)))
50
+
51
+ def analyze_text(text: str):
52
+ regex_score, regex_findings = regex_analysis(text)
53
+ hf_score, hf_findings = hf_analysis(text)
54
+ urls = extract_urls(text)
55
+
56
+ score = min(100, regex_score + hf_score)
57
+ reasons = regex_findings + hf_findings
58
+
59
+ for phrase in SAFE_PHRASES:
60
+ if phrase in text.lower():
61
+ reasons.append(f"Safe phrase: {phrase}")
62
+ score = max(0, score - 15)
63
+
64
+ risk = "Low"
65
+ if score >= 70:
66
+ risk = "High"
67
+ elif score >= 35:
68
+ risk = "Medium"
69
+
70
+ return f"""
71
+ πŸ“Š Score: {score}/100
72
+ ⚠️ Risk: {risk}
73
+ πŸ”Ž Reasons:
74
+ - {chr(10).join(reasons) if reasons else "None"}
75
+ 🌐 URLs: {', '.join(urls) if urls else "None"}
76
+ """
77
+
78
+ # Gradio UI
79
+ demo = gr.Interface(
80
+ fn=analyze_text,
81
+ inputs=gr.Textbox(lines=12, placeholder="Paste suspicious email or message here..."),
82
+ outputs="text",
83
+ title="PhishGuard πŸ›‘οΈ",
84
+ description="Detects phishing risks using regex cues + Hugging Face zero-shot classification."
85
+ )
86
+
87
+ if __name__ == "__main__":
88
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch