Spaces:

uyen1109
/

DemoGraph

Sleeping

App Files Files Community

uyen1109 commited on 9 days ago

Commit

2d320dd

verified ·

1 Parent(s): afca78d

Upload 3 files

Browse files

Files changed (3) hide show

README.md +34 -6
app.py +171 -0
requirements.txt +11 -0

README.md CHANGED Viewed

@@ -1,12 +1,40 @@
 ---
-title: DemoGraph
-emoji: 😻
-colorFrom: green
-colorTo: gray
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ETH Fraud Detection GraphSAGE
+emoji: 🕵️‍♀️
+colorFrom: indigo
+colorTo: red
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: apache-2.0
+tags:
+- blockchain
+- ethereum
+- graph-neural-networks
+- fraud-detection
+- security
 ---
+# Ethereum Fraud Detection System
+This Space demonstrates a **Graph Neural Network (GraphSAGE)** model designed to detect illicit activities on the Ethereum blockchain.
+## 🧠 Model Overview
+The model operates on an inductive basis, meaning it learns to aggregate information from a node's local neighborhood (transactions in/out) to generate embeddings and predict the likelihood of fraud.
+* **Architecture:** GraphSAGE (Graph Sample and Aggregate).
+* **Input Features:** Transaction volume, degree (in/out), time-based features, and graph properties (PageRank, etc.).
+* **Output:** A probability score (0-1) indicating the likelihood of the address being involved in criminal activity (Phishing, Hack, Scam).
+## 📊 How to use
+1.  Enter an Ethereum address (must be present in the analyzed snapshot dataset).
+2.  The system looks up the pre-calculated risk score from the model inference.
+3.  It visualizes the **Ego Graph** (1-hop neighborhood) to show who this wallet interacts with.
+## 📂 Repository & Data
+* **Model & Artifacts:** [uyen1109/eth-fraud-gnn-uyenuyen-v3](https://huggingface.co/uyen1109/eth-fraud-gnn-uyenuyen-v3)
+* **Notebook Analysis:** Based on `btc2-3.ipynb`.
+## ⚠️ Disclaimer
+This is a research project. The risk scores are probabilistic estimations based on historical patterns and should not be taken as absolute financial or legal advice.

app.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import gradio as gr
+import pandas as pd
+import networkx as nx
+import matplotlib.pyplot as plt
+import os
+from huggingface_hub import hf_hub_download
+# --- 1. SETUP & DATA LOADING ---
+REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"  # Repo của bạn
+TOKEN = os.getenv("HF_TOKEN") # Token được set trong Settings của Space
+print("Loading data from Hugging Face Hub...")
+# Tải file điểm số (Scores) - Kết quả từ model
+try:
+    scores_path = hf_hub_download(repo_id=REPO_ID, filename="scores/node_scores_with_labels.csv", repo_type="model", token=TOKEN)
+    df_scores = pd.read_csv(scores_path)
+    # Chuẩn hóa địa chỉ về lowercase để tra cứu
+    if "address" in df_scores.columns:
+        df_scores["address"] = df_scores["address"].astype(str).str.lower()
+        df_scores.set_index("address", inplace=True)
+    print(f"Loaded {len(df_scores)} node scores.")
+except Exception as e:
+    print(f"Error loading scores: {e}")
+    df_scores = pd.DataFrame()
+# Tải file cạnh (Edges) - Để vẽ đồ thị
+# LƯU Ý: File này có thể rất lớn (3.6M dòng trong notebook).
+# Để demo chạy nhanh, ta chỉ tải về nhưng sẽ lọc thông minh khi vẽ.
+try:
+    edges_path = hf_hub_download(repo_id=REPO_ID, filename="graph/edges_all.csv", repo_type="model", token=TOKEN)
+    # Đọc tối ưu bộ nhớ, chỉ lấy cột cần thiết
+    df_edges = pd.read_csv(edges_path, usecols=["src", "dst", "edge_type", "weight"])
+    df_edges["src"] = df_edges["src"].astype(str).str.lower()
+    df_edges["dst"] = df_edges["dst"].astype(str).str.lower()
+    print(f"Loaded {len(df_edges)} edges.")
+except Exception as e:
+    print(f"Error loading edges: {e}")
+    df_edges = pd.DataFrame(columns=["src", "dst", "edge_type"])
+# --- 2. HELPER FUNCTIONS ---
+def get_node_risk(address):
+    """Tra cứu điểm rủi ro từ dữ liệu đã pre-calculate"""
+    address = address.strip().lower()
+    if address not in df_scores.index:
+        return None, "Address not found in the analyzed snapshot."
+    row = df_scores.loc[address]
+    # Lấy xác suất (prob_criminal) hoặc anomaly score
+    score = 0.0
+    if "prob_criminal" in row:
+        score = float(row["prob_criminal"])
+    elif "susp" in row:
+        score = float(row["susp"])
+    # Nhãn thực tế (nếu có)
+    label_str = "Unknown"
+    if "label" in row and not pd.isna(row["label"]):
+        l = int(row["label"])
+        label_str = "Criminal (1)" if l == 1 else "Benign (0)"
+    return score, label_str
+def draw_ego_graph(address):
+    """Vẽ đồ thị cục bộ (1-hop) quanh địa chỉ"""
+    address = address.strip().lower()
+    # Lọc các cạnh liên quan đến address (src hoặc dst là address)
+    # Chỉ lấy tối đa 50 cạnh để vẽ cho đẹp và nhanh
+    subset = df_edges[(df_edges["src"] == address) | (df_edges["dst"] == address)].head(50)
+    if subset.empty:
+        fig, ax = plt.subplots(figsize=(6, 6))
+        ax.text(0.5, 0.5, "No transaction history in graph", ha='center')
+        ax.axis('off')
+        return fig
+    # Tạo đồ thị NetworkX
+    G = nx.from_pandas_edgelist(subset, source="src", target="dst", edge_attr="edge_type", create_using=nx.DiGraph())
+    # Layout
+    pos = nx.spring_layout(G, seed=42, k=0.5)
+    plt.figure(figsize=(8, 8))
+    # Vẽ nút
+    node_colors = []
+    node_sizes = []
+    for node in G.nodes():
+        if node == address:
+            node_colors.append("red") # Nút mục tiêu màu đỏ
+            node_sizes.append(300)
+        else:
+            node_colors.append("skyblue") # Hàng xóm màu xanh
+            node_sizes.append(100)
+    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes)
+    nx.draw_networkx_edges(G, pos, alpha=0.5, arrowstyle='->', arrowsize=10)
+    # Chỉ hiện label cho nút chính để đỡ rối
+    labels = {node: (node[:6] + "..." if node != address else "TARGET") for node in G.nodes()}
+    nx.draw_networkx_labels(G, pos, labels=labels, font_size=8)
+    plt.title(f"Local Transaction Graph (Ego Network)\nTarget: {address[:10]}...")
+    plt.axis('off')
+    return plt.gcf()
+# --- 3. GRADIO MAIN FUNCTION ---
+def analyze_wallet(address):
+    if not address:
+        return "Please enter an Ethereum address.", "N/A", None
+    score, label_status = get_node_risk(address)
+    if score is None:
+        return f"Address {address} not found in the dataset.", "Unknown", None
+    # Logic đánh giá rủi ro
+    risk_level = "LOW"
+    if score > 0.8: risk_level = "CRITICAL"
+    elif score > 0.5: risk_level = "HIGH"
+    elif score > 0.2: risk_level = "MEDIUM"
+    result_text = (
+        f"### Risk Score: {score:.4f}\n"
+        f"**Risk Level:** {risk_level}\n"
+        f"**Known Label:** {label_status}\n\n"
+        f"*Note: This score is generated by a GraphSAGE model analyzing the transaction patterns.*"
+    )
+    plot = draw_ego_graph(address)
+    return result_text, risk_level, plot
+# --- 4. UI CONSTRUCTION ---
+with gr.Blocks(title="ETH Fraud Detection (GraphSAGE)") as demo:
+    gr.Markdown("# 🕵️‍♀️ Ethereum Fraud Detection with Graph Neural Networks")
+    gr.Markdown(
+        """
+        **Model Architecture:** GraphSAGE (Inductive Learning on Graphs).
+        **Data Source:** Analyzed Ethereum transaction snapshot (Internal/Normal/Token transfers).
+        **Author:** Uyên Vũ (uyen1109).
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            inp_addr = gr.Textbox(label="Ethereum Address (0x...)", placeholder="0x4c5380afabcf9b5...", lines=1)
+            btn_analyze = gr.Button("Analyze Risk", variant="primary")
+            gr.Markdown("### Sample Addresses (from dataset)")
+            # Lấy vài ví dụ từ dataset nếu có
+            sample_addrs = df_scores.head(3).index.tolist() if not df_scores.empty else []
+            gr.Examples(examples=sample_addrs, inputs=inp_addr)
+        with gr.Column():
+            lbl_risk = gr.Label(label="Risk Assessment")
+            out_details = gr.Markdown(label="Details")
+            out_plot = gr.Plot(label="Local Graph Structure")
+    btn_analyze.click(fn=analyze_wallet, inputs=inp_addr, outputs=[out_details, lbl_risk, out_plot])
+# Launch
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# Core libraries
+torch
+pandas
+numpy
+networkx
+matplotlib
+scikit-learn
+# UI and Hugging Face integration
+gradio
+huggingface_hub