Spaces:
Sleeping
Sleeping
File size: 3,238 Bytes
75f5bd8 0e4c3b7 882ef51 75f5bd8 882ef51 0e4c3b7 882ef51 0e4c3b7 75f5bd8 882ef51 67a74ee 8a8ba08 0e4c3b7 882ef51 0e4c3b7 67a74ee e41451e 882ef51 e41451e 0e4c3b7 e41451e 0e4c3b7 8a8ba08 0e4c3b7 67a74ee 0e4c3b7 67a74ee 0e4c3b7 67a74ee 0e4c3b7 882ef51 8a8ba08 0e4c3b7 882ef51 8a8ba08 75f5bd8 8a8ba08 75f5bd8 0e4c3b7 8a8ba08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
from parse_email import parse_email
from header_analyzer import analyze_headers
from body_analyzer import analyze_body
from url_analyzer import analyze_urls
def analyze(file_path):
# --- Parse the email ---
headers, body, urls = parse_email(file_path)
# --- Analyze headers ---
header_findings, header_score = analyze_headers(headers)
# --- Analyze body (updated analyzer with improvements) ---
body_findings, body_score, highlighted_body, body_verdict = analyze_body(body)
# --- Analyze URLs ---
url_findings, url_score = analyze_urls(urls)
# --- Calculate total score ---
total_score = header_score + body_score + (url_score * 1.5)
total_score = min(total_score, 100)
# --- Determine final verdict ---
if total_score >= 70:
verdict = "π¨ Malicious"
elif 50 <= total_score < 70:
verdict = "β οΈ Suspicious"
elif 30 <= total_score < 50:
verdict = "π© Spam"
else:
verdict = "β
Safe"
# --- Determine attack type ---
body_lower = body.lower()
attack_type = "General Phishing" # default
if any(word in body_lower for word in ["invoice", "payment", "wire transfer", "bank details"]):
attack_type = "Invoice/Payment Fraud (BEC)"
elif any(word in body_lower for word in ["password", "verify", "account", "login", "credentials"]):
attack_type = "Credential Harvesting (Phishing)"
elif any("reply-to mismatch" in f.lower() for f in header_findings):
attack_type = "Business Email Compromise (BEC)"
elif any("spam" in f.lower() for f in body_findings):
attack_type = "Spam / Marketing"
elif verdict == "β
Safe":
attack_type = "Benign / Normal Email"
# --- Collect tags ---
tags = []
for finding in header_findings + body_findings + url_findings:
f_lower = finding.lower()
if "domain" in f_lower:
tags.append("Suspicious Sender Domain")
if "phishing" in f_lower or "malicious url" in f_lower:
tags.append("Phishing / Malicious URL")
if "urgent" in f_lower or "suspicious phrase" in f_lower:
tags.append("Urgent Language")
if "spam" in f_lower or "marketing" in f_lower:
tags.append("Spam Tone")
# --- Summary report ---
summary = {
"Final Verdict": verdict,
"Attack Type": attack_type,
"Attack Score": total_score,
"Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags"
}
# --- Detailed findings ---
details = {
"Header Findings": header_findings,
"Body Findings": body_findings,
"URL Findings": url_findings,
"Highlighted Body": highlighted_body
}
return summary, details
# --- Local testing ---
if __name__ == "__main__":
file_path = "sample.eml"
summary, details = analyze(file_path)
print("==== SUMMARY ====")
for k, v in summary.items():
print(f"{k}: {v}")
print("\n==== DETAILS ====")
for section, findings in details.items():
print(f"\n-- {section} --")
if isinstance(findings, list):
for f in findings:
print(f)
else:
print(findings)
|