soupstick commited on
Commit
3595d5c
·
0 Parent(s):

Initial commit: HF Spaces Fraud Detector scaffold

Browse files
Files changed (8) hide show
  1. .env.example +9 -0
  2. .gitignore +9 -0
  3. README.md +21 -0
  4. alerts.py +68 -0
  5. app.py +166 -0
  6. database.py +77 -0
  7. export_utils.py +48 -0
  8. requirements.txt +14 -0
.env.example ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ HF_TOKEN=hf_xxx
2
+ SENDGRID_API_KEY=
3
+ EMAIL_HOST=smtp.gmail.com
4
+ EMAIL_PORT=587
5
+ EMAIL_USER=
6
+ EMAIL_PASS=
7
+ SLACK_WEBHOOK=
8
+ ADMIN_EMAIL=you@example.com
9
+ QWEN_MODEL_ID=
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ env/
4
+ .env
5
+ /data/*
6
+ *.db
7
+ *.sqlite
8
+ *.pdf
9
+ *.csv
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fraud Detector Agent — HF Spaces (Gradio) scaffold
2
+
3
+ This repo is a deployable Hugging Face Space using Gradio + LangChain-style orchestration for transaction fraud detection.
4
+
5
+ FEATURES
6
+ - Multi-model inference (Hugging Face Inference API)
7
+ - Velocity (burst) detection
8
+ - Chroma vector store + sentence-transformers embeddings for RAG
9
+ - Per-user SQLite storage (register/login)
10
+ - CSV & PDF export of flagged results
11
+ - Email & Slack alerts hooks
12
+
13
+ Secrets needed in Space Settings:
14
+ - HF_TOKEN
15
+ - SENDGRID_API_KEY (or EMAIL_USER/EMAIL_PASS)
16
+ - SLACK_WEBHOOK (optional)
17
+ - ADMIN_EMAIL (optional)
18
+
19
+ Deploy:
20
+ - Create new Space on Hugging Face, choose `Gradio` Python.
21
+ - Push repository files, set secrets, wait for build to complete.
alerts.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import smtplib
3
+ from email.message import EmailMessage
4
+ import requests
5
+
6
+ SENDGRID_API_KEY = os.getenv("SENDGRID_API_KEY", "")
7
+ EMAIL_USER = os.getenv("EMAIL_USER", "")
8
+ EMAIL_PASS = os.getenv("EMAIL_PASS", "")
9
+ EMAIL_HOST = os.getenv("EMAIL_HOST", "smtp.gmail.com")
10
+ EMAIL_PORT = int(os.getenv("EMAIL_PORT", 587))
11
+ SLACK_WEBHOOK = os.getenv("SLACK_WEBHOOK", "")
12
+ ADMIN_EMAIL = os.getenv("ADMIN_EMAIL", "")
13
+
14
+ class EmailNotifier:
15
+ def __init__(self):
16
+ pass
17
+
18
+ def send_alert(self, recipient: str, subject: str, body: str, attachment: str = None) -> bool:
19
+ if SENDGRID_API_KEY:
20
+ try:
21
+ import sendgrid
22
+ from sendgrid.helpers.mail import Mail, Attachment, FileContent, FileName, FileType, Disposition
23
+ sg = sendgrid.SendGridAPIClient(SENDGRID_API_KEY)
24
+ message = Mail(from_email=EMAIL_USER or "noreply@example.com", to_emails=recipient, subject=subject, plain_text_content=body)
25
+ if attachment and os.path.exists(attachment):
26
+ with open(attachment, "rb") as f:
27
+ data = f.read()
28
+ import base64
29
+ encoded = base64.b64encode(data).decode()
30
+ attachedFile = Attachment(FileContent(encoded), FileName(os.path.basename(attachment)), FileType("application/pdf"), Disposition("attachment"))
31
+ message.attachment = attachedFile
32
+ sg.send(message)
33
+ return True
34
+ except Exception as e:
35
+ print("SendGrid error:", e)
36
+ try:
37
+ msg = EmailMessage()
38
+ msg["Subject"] = subject
39
+ msg["From"] = EMAIL_USER or "noreply@example.com"
40
+ msg["To"] = recipient
41
+ msg.set_content(body)
42
+ if attachment and os.path.exists(attachment):
43
+ with open(attachment, "rb") as f:
44
+ data = f.read()
45
+ msg.add_attachment(data, maintype="application", subtype="pdf", filename=os.path.basename(attachment))
46
+ with smtplib.SMTP(EMAIL_HOST, EMAIL_PORT) as server:
47
+ server.starttls()
48
+ if EMAIL_USER and EMAIL_PASS:
49
+ server.login(EMAIL_USER, EMAIL_PASS)
50
+ server.send_message(msg)
51
+ return True
52
+ except Exception as e:
53
+ print("SMTP send error:", e)
54
+ return False
55
+
56
+ class SlackNotifier:
57
+ def __init__(self, webhook_url: str):
58
+ self.webhook = webhook_url
59
+
60
+ def send(self, text: str):
61
+ if not self.webhook:
62
+ return False
63
+ try:
64
+ requests.post(self.webhook, json={"text": text}, timeout=10)
65
+ return True
66
+ except Exception as e:
67
+ print("Slack send error:", e)
68
+ return False
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from datetime import datetime
5
+ import pandas as pd
6
+ import gradio as gr
7
+
8
+ # local modules
9
+ from database import DatabaseManager
10
+ from models import ModelRouter
11
+ from velocity import detect_burst
12
+ from export_utils import generate_csv_report, generate_pdf_report
13
+ from alerts import EmailNotifier, SlackNotifier
14
+ from rag import RAGStore
15
+
16
+ # config from env
17
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
18
+ ADMIN_EMAIL = os.getenv("ADMIN_EMAIL", "")
19
+ SLACK_WEBHOOK = os.getenv("SLACK_WEBHOOK", "")
20
+
21
+ # initialize components (persistent storage under /data)
22
+ DATA_DIR = "/data"
23
+ os.makedirs(DATA_DIR, exist_ok=True)
24
+ DB_PATH = os.path.join(DATA_DIR, "fraud_detector.db")
25
+ VECTOR_DIR = os.path.join(DATA_DIR, "chroma_collection")
26
+
27
+ db = DatabaseManager(DB_PATH)
28
+ rag = RAGStore(collection_dir=VECTOR_DIR)
29
+ model_router = ModelRouter(hf_token=HF_TOKEN)
30
+ emailer = EmailNotifier()
31
+ slacknot = SlackNotifier(SLACK_WEBHOOK) if SLACK_WEBHOOK else None
32
+
33
+ # simple in-memory session (for demo only)
34
+ SESSIONS = {}
35
+
36
+ def register_user(username: str, password: str, email: str):
37
+ ok = db.create_user(username=username, password=password, email=email)
38
+ return "Registered" if ok else "Registration failed (username taken)."
39
+
40
+ def login_user(username: str, password: str):
41
+ user_id = db.authenticate_user(username, password)
42
+ if user_id:
43
+ token = f"session-{user_id}-{int(datetime.utcnow().timestamp())}"
44
+ SESSIONS[token] = user_id
45
+ return token, f"Logged in as {username}"
46
+ else:
47
+ return "", "Invalid credentials"
48
+
49
+ def process_file(session_token: str, file, rag_query: str = ""):
50
+ if session_token not in SESSIONS:
51
+ return "Please log in", {}, {}, ""
52
+ user_id = SESSIONS[session_token]
53
+ try:
54
+ df = pd.read_csv(file.name)
55
+ except Exception as e:
56
+ return f"Error reading CSV: {e}", {}, {}, ""
57
+
58
+ # Basic validation
59
+ required_cols = {"transaction_id", "timestamp", "amount", "description", "merchant"}
60
+ if not required_cols.issubset(set(df.columns)):
61
+ return f"CSV missing required columns: {required_cols}", {}, {}, ""
62
+
63
+ # store transactions
64
+ db.store_transactions(user_id, df)
65
+
66
+ # add to RAG (texts + metadata)
67
+ texts = df.apply(lambda x: f"txn_id:{x['transaction_id']} amount:{x['amount']} merchant:{x['merchant']} desc:{x.get('description','')}", axis=1).tolist()
68
+ metadatas = df.to_dict(orient="records")
69
+ rag.add(texts=texts, metadatas=metadatas)
70
+
71
+ # velocity detection
72
+ velocity_flags = detect_burst(df, window_minutes=10, threshold=5)
73
+
74
+ # run simple anomaly detection (z-score based) — small helper
75
+ amount_flags = []
76
+ if len(df) > 5:
77
+ mean = df['amount'].mean()
78
+ std = df['amount'].std()
79
+ for _, row in df.iterrows():
80
+ z = abs((row['amount'] - mean) / (std if std > 0 else 1))
81
+ if z > 2.5 or row['amount'] > 1000:
82
+ amount_flags.append({
83
+ "transaction": row.to_dict(),
84
+ "z_score": float(z),
85
+ "risk_factor": "amount_anomaly",
86
+ "risk_score": min(z/3.0, 1.0)
87
+ })
88
+
89
+ all_flagged = velocity_flags + amount_flags
90
+
91
+ # RAG lookup if requested
92
+ rag_results = []
93
+ if rag_query:
94
+ rag_results = rag.query(rag_query, k=5)
95
+
96
+ # Prepare prompt context
97
+ context = {
98
+ "num_transactions": len(df),
99
+ "velocity_flags": len(velocity_flags),
100
+ "amount_flags": len(amount_flags),
101
+ "rag_snippets": rag_results[:3]
102
+ }
103
+ prompt = f"""
104
+ You are a financial fraud analyst. Analyze the transactions and provide a concise report and remediation steps.
105
+ Context: {json.dumps(context, default=str)}
106
+ """
107
+
108
+ # pick model via router (Ministral -> default)
109
+ llm_output = model_router.run(prompt, task="analysis")
110
+
111
+ # generate reports if flagged
112
+ report_paths = {}
113
+ if all_flagged:
114
+ ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
115
+ csv_path = os.path.join(DATA_DIR, f"fraud_report_{user_id}_{ts}.csv")
116
+ pdf_path = os.path.join(DATA_DIR, f"fraud_report_{user_id}_{ts}.pdf")
117
+ generate_csv_report(all_flagged, csv_path)
118
+ generate_pdf_report(all_flagged, pdf_path)
119
+ report_paths = {"csv": csv_path, "pdf": pdf_path}
120
+
121
+ # send alerts
122
+ admin = ADMIN_EMAIL
123
+ if admin:
124
+ emailer.send_alert(recipient=admin,
125
+ subject=f"Fraud Alert for user {user_id}",
126
+ body=f"Detected {len(all_flagged)} suspicious txns. See attached.",
127
+ attachment=pdf_path)
128
+ if slacknot:
129
+ slacknot.send(f"Fraud Alert: user {user_id} has {len(all_flagged)} flagged transactions.")
130
+
131
+ return llm_output, all_flagged, report_paths, f"Found {len(all_flagged)} suspicious transactions."
132
+
133
+ # Gradio UI
134
+ with gr.Blocks(css=".output_json {height:300px;}") as demo:
135
+ gr.Markdown("# 🔍 Fraud Detector Analyst — HF Spaces (Gradio)")
136
+
137
+ with gr.Tab("Auth"):
138
+ with gr.Row():
139
+ reg_user = gr.Textbox(label="Username")
140
+ reg_pass = gr.Textbox(label="Password", type="password")
141
+ reg_email = gr.Textbox(label="Email")
142
+ reg_btn = gr.Button("Register")
143
+ reg_msg = gr.Textbox(label="Message", interactive=False)
144
+
145
+ with gr.Row():
146
+ login_user_tb = gr.Textbox(label="Username")
147
+ login_pass_tb = gr.Textbox(label="Password", type="password")
148
+ login_btn = gr.Button("Login")
149
+ token_out = gr.Textbox(label="Session Token", interactive=False)
150
+ login_msg = gr.Textbox(label="Message", interactive=False)
151
+
152
+ with gr.Tab("Analyze"):
153
+ session_token_tb = gr.Textbox(label="Session Token (from login)")
154
+ csv_file = gr.File(label="Upload transactions CSV (.csv)")
155
+ rag_query_tb = gr.Textbox(label="RAG Query (optional)")
156
+ analyze_btn = gr.Button("Analyze")
157
+ analysis_out = gr.Textbox(label="LLM Analysis", lines=12, interactive=False)
158
+ flagged_out = gr.JSON(label="Flagged Transactions")
159
+ reports_out = gr.JSON(label="Reports (paths)")
160
+
161
+ reg_btn.click(fn=register_user, inputs=[reg_user, reg_pass, reg_email], outputs=[reg_msg])
162
+ login_btn.click(fn=login_user, inputs=[login_user_tb, login_pass_tb], outputs=[token_out, login_msg])
163
+ analyze_btn.click(fn=process_file, inputs=[session_token_tb, csv_file, rag_query_tb], outputs=[analysis_out, flagged_out, reports_out, gr.Textbox(label="status")])
164
+
165
+ if __name__ == "__main__":
166
+ demo.launch(server_name="0.0.0.0", server_port=7860)
database.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+ import hashlib
3
+ import pandas as pd
4
+ from typing import Optional
5
+
6
+ class DatabaseManager:
7
+ def __init__(self, db_path=":memory:"):
8
+ self.db_path = db_path
9
+ self.init_db()
10
+
11
+ def init_db(self):
12
+ conn = sqlite3.connect(self.db_path, check_same_thread=False)
13
+ cur = conn.cursor()
14
+ cur.execute("""
15
+ CREATE TABLE IF NOT EXISTS users (
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ username TEXT UNIQUE,
18
+ password_hash TEXT,
19
+ email TEXT,
20
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
21
+ )
22
+ """)
23
+ cur.execute("""
24
+ CREATE TABLE IF NOT EXISTS transactions (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ user_id INTEGER,
27
+ transaction_id TEXT,
28
+ timestamp TEXT,
29
+ amount REAL,
30
+ description TEXT,
31
+ merchant TEXT,
32
+ category TEXT,
33
+ is_flagged INTEGER DEFAULT 0,
34
+ risk_score REAL DEFAULT 0.0,
35
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
36
+ )
37
+ """)
38
+ conn.commit()
39
+ conn.close()
40
+
41
+ def create_user(self, username: str, password: str, email: str = None) -> bool:
42
+ try:
43
+ conn = sqlite3.connect(self.db_path)
44
+ cur = conn.cursor()
45
+ pwd_hash = hashlib.sha256(password.encode()).hexdigest()
46
+ cur.execute("INSERT INTO users (username, password_hash, email) VALUES (?, ?, ?)", (username, pwd_hash, email))
47
+ conn.commit()
48
+ conn.close()
49
+ return True
50
+ except sqlite3.IntegrityError:
51
+ return False
52
+
53
+ def authenticate_user(self, username: str, password: str) -> Optional[int]:
54
+ conn = sqlite3.connect(self.db_path)
55
+ cur = conn.cursor()
56
+ pwd_hash = hashlib.sha256(password.encode()).hexdigest()
57
+ cur.execute("SELECT id FROM users WHERE username = ? AND password_hash = ?", (username, pwd_hash))
58
+ row = cur.fetchone()
59
+ conn.close()
60
+ return row[0] if row else None
61
+
62
+ def store_transactions(self, user_id: int, df: pd.DataFrame):
63
+ conn = sqlite3.connect(self.db_path)
64
+ df = df.copy()
65
+ df['user_id'] = user_id
66
+ df.to_sql('transactions', conn, if_exists='append', index=False)
67
+ conn.close()
68
+
69
+ def get_transactions(self, user_id: int, days: int = 30):
70
+ conn = sqlite3.connect(self.db_path)
71
+ cur = conn.cursor()
72
+ cur.execute("SELECT * FROM transactions WHERE user_id = ? ORDER BY timestamp DESC", (user_id,))
73
+ rows = cur.fetchall()
74
+ cols = [desc[0] for desc in cur.description]
75
+ conn.close()
76
+ import pandas as pd
77
+ return pd.DataFrame(rows, columns=cols)
export_utils.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from reportlab.lib.pagesizes import letter
3
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
4
+ from reportlab.lib import colors
5
+ from reportlab.lib.styles import getSampleStyleSheet
6
+
7
+ def generate_csv_report(flagged_list: list, path: str):
8
+ rows = []
9
+ for item in flagged_list:
10
+ txn = item.get("transaction", {})
11
+ rows.append({
12
+ "transaction_id": txn.get("transaction_id"),
13
+ "timestamp": txn.get("timestamp"),
14
+ "amount": txn.get("amount"),
15
+ "description": txn.get("description"),
16
+ "risk_factor": item.get("risk_factor"),
17
+ "risk_score": item.get("risk_score")
18
+ })
19
+ df = pd.DataFrame(rows)
20
+ df.to_csv(path, index=False)
21
+ return path
22
+
23
+ def generate_pdf_report(flagged_list: list, path: str):
24
+ doc = SimpleDocTemplate(path, pagesize=letter)
25
+ styles = getSampleStyleSheet()
26
+ elements = []
27
+ elements.append(Paragraph("Fraud Detection Report", styles["Title"]))
28
+ elements.append(Spacer(1, 12))
29
+ data = [["Transaction ID", "Timestamp", "Amount", "Risk Factor", "Risk Score"]]
30
+ for item in flagged_list:
31
+ txn = item.get("transaction", {})
32
+ data.append([
33
+ str(txn.get("transaction_id", "")),
34
+ str(txn.get("timestamp", "")),
35
+ f"{txn.get('amount', '')}",
36
+ item.get("risk_factor", ""),
37
+ f"{item.get('risk_score', 0):.2f}"
38
+ ])
39
+ table = Table(data, repeatRows=1)
40
+ table.setStyle(TableStyle([
41
+ ('BACKGROUND', (0,0), (-1,0), colors.grey),
42
+ ('TEXTCOLOR',(0,0),(-1,0),colors.whitesmoke),
43
+ ('GRID', (0,0), (-1,-1), 0.5, colors.black),
44
+ ('ALIGN',(2,1),(2,-1),'RIGHT')
45
+ ]))
46
+ elements.append(table)
47
+ doc.build(elements)
48
+ return path
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.5
2
+ langchain>=0.0.230
3
+ huggingface-hub>=0.15.1
4
+ sentence-transformers>=2.2.2
5
+ chromadb>=0.3.24
6
+ pandas
7
+ reportlab
8
+ fpdf
9
+ sqlalchemy
10
+ python-dotenv
11
+ slack-sdk
12
+ sendgrid
13
+ requests
14
+ tqdm