File size: 3,238 Bytes
75f5bd8
 
 
 
 
0e4c3b7
882ef51
75f5bd8
 
882ef51
0e4c3b7
882ef51
 
 
 
 
0e4c3b7
75f5bd8
882ef51
67a74ee
8a8ba08
0e4c3b7
882ef51
0e4c3b7
 
 
 
 
 
 
 
 
67a74ee
e41451e
882ef51
e41451e
 
 
 
 
 
 
0e4c3b7
e41451e
 
0e4c3b7
8a8ba08
0e4c3b7
 
67a74ee
 
0e4c3b7
67a74ee
 
 
0e4c3b7
67a74ee
0e4c3b7
 
882ef51
8a8ba08
 
 
 
 
 
0e4c3b7
882ef51
8a8ba08
 
 
 
 
 
75f5bd8
8a8ba08
 
 
75f5bd8
0e4c3b7
8a8ba08
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from parse_email import parse_email
from header_analyzer import analyze_headers
from body_analyzer import analyze_body
from url_analyzer import analyze_urls

def analyze(file_path):
    # --- Parse the email ---
    headers, body, urls = parse_email(file_path)

    # --- Analyze headers ---
    header_findings, header_score = analyze_headers(headers)

    # --- Analyze body (updated analyzer with improvements) ---
    body_findings, body_score, highlighted_body, body_verdict = analyze_body(body)

    # --- Analyze URLs ---
    url_findings, url_score = analyze_urls(urls)

    # --- Calculate total score ---
    total_score = header_score + body_score + (url_score * 1.5)
    total_score = min(total_score, 100)

    # --- Determine final verdict ---
    if total_score >= 70:
        verdict = "🚨 Malicious"
    elif 50 <= total_score < 70:
        verdict = "⚠️ Suspicious"
    elif 30 <= total_score < 50:
        verdict = "πŸ“© Spam"
    else:
        verdict = "βœ… Safe"

    # --- Determine attack type ---
    body_lower = body.lower()
    attack_type = "General Phishing"  # default
    if any(word in body_lower for word in ["invoice", "payment", "wire transfer", "bank details"]):
        attack_type = "Invoice/Payment Fraud (BEC)"
    elif any(word in body_lower for word in ["password", "verify", "account", "login", "credentials"]):
        attack_type = "Credential Harvesting (Phishing)"
    elif any("reply-to mismatch" in f.lower() for f in header_findings):
        attack_type = "Business Email Compromise (BEC)"
    elif any("spam" in f.lower() for f in body_findings):
        attack_type = "Spam / Marketing"
    elif verdict == "βœ… Safe":
        attack_type = "Benign / Normal Email"

    # --- Collect tags ---
    tags = []
    for finding in header_findings + body_findings + url_findings:
        f_lower = finding.lower()
        if "domain" in f_lower:
            tags.append("Suspicious Sender Domain")
        if "phishing" in f_lower or "malicious url" in f_lower:
            tags.append("Phishing / Malicious URL")
        if "urgent" in f_lower or "suspicious phrase" in f_lower:
            tags.append("Urgent Language")
        if "spam" in f_lower or "marketing" in f_lower:
            tags.append("Spam Tone")

    # --- Summary report ---
    summary = {
        "Final Verdict": verdict,
        "Attack Type": attack_type,
        "Attack Score": total_score,
        "Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags"
    }

    # --- Detailed findings ---
    details = {
        "Header Findings": header_findings,
        "Body Findings": body_findings,
        "URL Findings": url_findings,
        "Highlighted Body": highlighted_body
    }

    return summary, details

# --- Local testing ---
if __name__ == "__main__":
    file_path = "sample.eml"
    summary, details = analyze(file_path)
    
    print("==== SUMMARY ====")
    for k, v in summary.items():
        print(f"{k}: {v}")
    
    print("\n==== DETAILS ====")
    for section, findings in details.items():
        print(f"\n-- {section} --")
        if isinstance(findings, list):
            for f in findings:
                print(f)
        else:
            print(findings)