Upload 3 files
Browse files- README.md +34 -6
- app.py +171 -0
- requirements.txt +11 -0
README.md
CHANGED
|
@@ -1,12 +1,40 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: ETH Fraud Detection GraphSAGE
|
| 3 |
+
emoji: 🕵️♀️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
tags:
|
| 12 |
+
- blockchain
|
| 13 |
+
- ethereum
|
| 14 |
+
- graph-neural-networks
|
| 15 |
+
- fraud-detection
|
| 16 |
+
- security
|
| 17 |
---
|
| 18 |
|
| 19 |
+
# Ethereum Fraud Detection System
|
| 20 |
+
|
| 21 |
+
This Space demonstrates a **Graph Neural Network (GraphSAGE)** model designed to detect illicit activities on the Ethereum blockchain.
|
| 22 |
+
|
| 23 |
+
## 🧠 Model Overview
|
| 24 |
+
The model operates on an inductive basis, meaning it learns to aggregate information from a node's local neighborhood (transactions in/out) to generate embeddings and predict the likelihood of fraud.
|
| 25 |
+
|
| 26 |
+
* **Architecture:** GraphSAGE (Graph Sample and Aggregate).
|
| 27 |
+
* **Input Features:** Transaction volume, degree (in/out), time-based features, and graph properties (PageRank, etc.).
|
| 28 |
+
* **Output:** A probability score (0-1) indicating the likelihood of the address being involved in criminal activity (Phishing, Hack, Scam).
|
| 29 |
+
|
| 30 |
+
## 📊 How to use
|
| 31 |
+
1. Enter an Ethereum address (must be present in the analyzed snapshot dataset).
|
| 32 |
+
2. The system looks up the pre-calculated risk score from the model inference.
|
| 33 |
+
3. It visualizes the **Ego Graph** (1-hop neighborhood) to show who this wallet interacts with.
|
| 34 |
+
|
| 35 |
+
## 📂 Repository & Data
|
| 36 |
+
* **Model & Artifacts:** [uyen1109/eth-fraud-gnn-uyenuyen-v3](https://huggingface.co/uyen1109/eth-fraud-gnn-uyenuyen-v3)
|
| 37 |
+
* **Notebook Analysis:** Based on `btc2-3.ipynb`.
|
| 38 |
+
|
| 39 |
+
## ⚠️ Disclaimer
|
| 40 |
+
This is a research project. The risk scores are probabilistic estimations based on historical patterns and should not be taken as absolute financial or legal advice.
|
app.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import networkx as nx
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import os
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
+
|
| 8 |
+
# --- 1. SETUP & DATA LOADING ---
|
| 9 |
+
REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3" # Repo của bạn
|
| 10 |
+
TOKEN = os.getenv("HF_TOKEN") # Token được set trong Settings của Space
|
| 11 |
+
|
| 12 |
+
print("Loading data from Hugging Face Hub...")
|
| 13 |
+
|
| 14 |
+
# Tải file điểm số (Scores) - Kết quả từ model
|
| 15 |
+
try:
|
| 16 |
+
scores_path = hf_hub_download(repo_id=REPO_ID, filename="scores/node_scores_with_labels.csv", repo_type="model", token=TOKEN)
|
| 17 |
+
df_scores = pd.read_csv(scores_path)
|
| 18 |
+
# Chuẩn hóa địa chỉ về lowercase để tra cứu
|
| 19 |
+
if "address" in df_scores.columns:
|
| 20 |
+
df_scores["address"] = df_scores["address"].astype(str).str.lower()
|
| 21 |
+
df_scores.set_index("address", inplace=True)
|
| 22 |
+
print(f"Loaded {len(df_scores)} node scores.")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
print(f"Error loading scores: {e}")
|
| 25 |
+
df_scores = pd.DataFrame()
|
| 26 |
+
|
| 27 |
+
# Tải file cạnh (Edges) - Để vẽ đồ thị
|
| 28 |
+
# LƯU Ý: File này có thể rất lớn (3.6M dòng trong notebook).
|
| 29 |
+
# Để demo chạy nhanh, ta chỉ tải về nhưng sẽ lọc thông minh khi vẽ.
|
| 30 |
+
try:
|
| 31 |
+
edges_path = hf_hub_download(repo_id=REPO_ID, filename="graph/edges_all.csv", repo_type="model", token=TOKEN)
|
| 32 |
+
# Đọc tối ưu bộ nhớ, chỉ lấy cột cần thiết
|
| 33 |
+
df_edges = pd.read_csv(edges_path, usecols=["src", "dst", "edge_type", "weight"])
|
| 34 |
+
df_edges["src"] = df_edges["src"].astype(str).str.lower()
|
| 35 |
+
df_edges["dst"] = df_edges["dst"].astype(str).str.lower()
|
| 36 |
+
print(f"Loaded {len(df_edges)} edges.")
|
| 37 |
+
except Exception as e:
|
| 38 |
+
print(f"Error loading edges: {e}")
|
| 39 |
+
df_edges = pd.DataFrame(columns=["src", "dst", "edge_type"])
|
| 40 |
+
|
| 41 |
+
# --- 2. HELPER FUNCTIONS ---
|
| 42 |
+
|
| 43 |
+
def get_node_risk(address):
|
| 44 |
+
"""Tra cứu điểm rủi ro từ dữ liệu đã pre-calculate"""
|
| 45 |
+
address = address.strip().lower()
|
| 46 |
+
|
| 47 |
+
if address not in df_scores.index:
|
| 48 |
+
return None, "Address not found in the analyzed snapshot."
|
| 49 |
+
|
| 50 |
+
row = df_scores.loc[address]
|
| 51 |
+
|
| 52 |
+
# Lấy xác suất (prob_criminal) hoặc anomaly score
|
| 53 |
+
score = 0.0
|
| 54 |
+
if "prob_criminal" in row:
|
| 55 |
+
score = float(row["prob_criminal"])
|
| 56 |
+
elif "susp" in row:
|
| 57 |
+
score = float(row["susp"])
|
| 58 |
+
|
| 59 |
+
# Nhãn thực tế (nếu có)
|
| 60 |
+
label_str = "Unknown"
|
| 61 |
+
if "label" in row and not pd.isna(row["label"]):
|
| 62 |
+
l = int(row["label"])
|
| 63 |
+
label_str = "Criminal (1)" if l == 1 else "Benign (0)"
|
| 64 |
+
|
| 65 |
+
return score, label_str
|
| 66 |
+
|
| 67 |
+
def draw_ego_graph(address):
|
| 68 |
+
"""Vẽ đồ thị cục bộ (1-hop) quanh địa chỉ"""
|
| 69 |
+
address = address.strip().lower()
|
| 70 |
+
|
| 71 |
+
# Lọc các cạnh liên quan đến address (src hoặc dst là address)
|
| 72 |
+
# Chỉ lấy tối đa 50 cạnh để vẽ cho đẹp và nhanh
|
| 73 |
+
subset = df_edges[(df_edges["src"] == address) | (df_edges["dst"] == address)].head(50)
|
| 74 |
+
|
| 75 |
+
if subset.empty:
|
| 76 |
+
fig, ax = plt.subplots(figsize=(6, 6))
|
| 77 |
+
ax.text(0.5, 0.5, "No transaction history in graph", ha='center')
|
| 78 |
+
ax.axis('off')
|
| 79 |
+
return fig
|
| 80 |
+
|
| 81 |
+
# Tạo đồ thị NetworkX
|
| 82 |
+
G = nx.from_pandas_edgelist(subset, source="src", target="dst", edge_attr="edge_type", create_using=nx.DiGraph())
|
| 83 |
+
|
| 84 |
+
# Layout
|
| 85 |
+
pos = nx.spring_layout(G, seed=42, k=0.5)
|
| 86 |
+
|
| 87 |
+
plt.figure(figsize=(8, 8))
|
| 88 |
+
|
| 89 |
+
# Vẽ nút
|
| 90 |
+
node_colors = []
|
| 91 |
+
node_sizes = []
|
| 92 |
+
for node in G.nodes():
|
| 93 |
+
if node == address:
|
| 94 |
+
node_colors.append("red") # Nút mục tiêu màu đỏ
|
| 95 |
+
node_sizes.append(300)
|
| 96 |
+
else:
|
| 97 |
+
node_colors.append("skyblue") # Hàng xóm màu xanh
|
| 98 |
+
node_sizes.append(100)
|
| 99 |
+
|
| 100 |
+
nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes)
|
| 101 |
+
nx.draw_networkx_edges(G, pos, alpha=0.5, arrowstyle='->', arrowsize=10)
|
| 102 |
+
|
| 103 |
+
# Chỉ hiện label cho nút chính để đỡ rối
|
| 104 |
+
labels = {node: (node[:6] + "..." if node != address else "TARGET") for node in G.nodes()}
|
| 105 |
+
nx.draw_networkx_labels(G, pos, labels=labels, font_size=8)
|
| 106 |
+
|
| 107 |
+
plt.title(f"Local Transaction Graph (Ego Network)\nTarget: {address[:10]}...")
|
| 108 |
+
plt.axis('off')
|
| 109 |
+
|
| 110 |
+
return plt.gcf()
|
| 111 |
+
|
| 112 |
+
# --- 3. GRADIO MAIN FUNCTION ---
|
| 113 |
+
|
| 114 |
+
def analyze_wallet(address):
|
| 115 |
+
if not address:
|
| 116 |
+
return "Please enter an Ethereum address.", "N/A", None
|
| 117 |
+
|
| 118 |
+
score, label_status = get_node_risk(address)
|
| 119 |
+
|
| 120 |
+
if score is None:
|
| 121 |
+
return f"Address {address} not found in the dataset.", "Unknown", None
|
| 122 |
+
|
| 123 |
+
# Logic đánh giá rủi ro
|
| 124 |
+
risk_level = "LOW"
|
| 125 |
+
if score > 0.8: risk_level = "CRITICAL"
|
| 126 |
+
elif score > 0.5: risk_level = "HIGH"
|
| 127 |
+
elif score > 0.2: risk_level = "MEDIUM"
|
| 128 |
+
|
| 129 |
+
result_text = (
|
| 130 |
+
f"### Risk Score: {score:.4f}\n"
|
| 131 |
+
f"**Risk Level:** {risk_level}\n"
|
| 132 |
+
f"**Known Label:** {label_status}\n\n"
|
| 133 |
+
f"*Note: This score is generated by a GraphSAGE model analyzing the transaction patterns.*"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
plot = draw_ego_graph(address)
|
| 137 |
+
|
| 138 |
+
return result_text, risk_level, plot
|
| 139 |
+
|
| 140 |
+
# --- 4. UI CONSTRUCTION ---
|
| 141 |
+
|
| 142 |
+
with gr.Blocks(title="ETH Fraud Detection (GraphSAGE)") as demo:
|
| 143 |
+
gr.Markdown("# 🕵️♀️ Ethereum Fraud Detection with Graph Neural Networks")
|
| 144 |
+
gr.Markdown(
|
| 145 |
+
"""
|
| 146 |
+
**Model Architecture:** GraphSAGE (Inductive Learning on Graphs).
|
| 147 |
+
**Data Source:** Analyzed Ethereum transaction snapshot (Internal/Normal/Token transfers).
|
| 148 |
+
**Author:** Uyên Vũ (uyen1109).
|
| 149 |
+
"""
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
with gr.Row():
|
| 153 |
+
with gr.Column():
|
| 154 |
+
inp_addr = gr.Textbox(label="Ethereum Address (0x...)", placeholder="0x4c5380afabcf9b5...", lines=1)
|
| 155 |
+
btn_analyze = gr.Button("Analyze Risk", variant="primary")
|
| 156 |
+
|
| 157 |
+
gr.Markdown("### Sample Addresses (from dataset)")
|
| 158 |
+
# Lấy vài ví dụ từ dataset nếu có
|
| 159 |
+
sample_addrs = df_scores.head(3).index.tolist() if not df_scores.empty else []
|
| 160 |
+
gr.Examples(examples=sample_addrs, inputs=inp_addr)
|
| 161 |
+
|
| 162 |
+
with gr.Column():
|
| 163 |
+
lbl_risk = gr.Label(label="Risk Assessment")
|
| 164 |
+
out_details = gr.Markdown(label="Details")
|
| 165 |
+
out_plot = gr.Plot(label="Local Graph Structure")
|
| 166 |
+
|
| 167 |
+
btn_analyze.click(fn=analyze_wallet, inputs=inp_addr, outputs=[out_details, lbl_risk, out_plot])
|
| 168 |
+
|
| 169 |
+
# Launch
|
| 170 |
+
if __name__ == "__main__":
|
| 171 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core libraries
|
| 2 |
+
torch
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
networkx
|
| 6 |
+
matplotlib
|
| 7 |
+
scikit-learn
|
| 8 |
+
|
| 9 |
+
# UI and Hugging Face integration
|
| 10 |
+
gradio
|
| 11 |
+
huggingface_hub
|