Spaces:
Sleeping
Sleeping
File size: 6,587 Bytes
94a508d 5f0a407 70ebd4a 66f8fc1 1defe4d 20b2b87 b4c7402 fc43ad5 52b18ab 5f0a407 e98177c 3c8598e a2767e5 80ffc07 a2767e5 3c8598e a2767e5 567f66d a2767e5 3c8598e 812a13e a2767e5 3c8598e a2767e5 3c8598e a2767e5 e98177c 3c8598e 20b2b87 b4c7402 e98177c b4c7402 e98177c b4c7402 5a649d5 d65ea20 5a649d5 d65ea20 029945b 5a649d5 e98177c ce7ca57 6cdaa1a ce7ca57 3c8598e 1defe4d ce7ca57 b639ecc d65ea20 ce7ca57 ff724df 3c8598e f6ba557 3c8598e f6ba557 3c8598e f6ba557 812a13e 94a508d 3c8598e 42886c0 d65ea20 f6ba557 3c8598e f6ba557 d65ea20 523d28e b034509 d65ea20 ff724df 6cdaa1a 3c8598e d65ea20 523d28e d65ea20 ff724df 6cdaa1a 3c8598e d65ea20 66f8fc1 329f9c0 e98177c 41e9ae6 94a508d 1d30af4 94a508d 2901d44 20f12de 812a13e 3c8598e 41e9ae6 029945b a71a75a 523d28e 5a649d5 ff724df a74a996 ff724df 029945b 329f9c0 66f8fc1 22568e3 720784d e98177c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile
from dist import levenshtein_with_wildcard, print_match_summary
description = frontmatter.load("README.md").content
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture, options):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
[
"objdump",
"-D",
"-b",
"binary",
"-m",
architecture,
"-M",
options,
temp_bin_file_name,
],
capture_output=True,
text=True,
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
if result.returncode == 0:
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-dr", temp_o_file_name], capture_output=True, text=True
).stdout
disassembly = trim_objdump(disassembly)
# Relocs
json_relocs = subprocess.run(
[
"llvm-readobj-19",
"--elf-output-style=JSON",
"--relocations",
temp_o_file_name,
],
capture_output=True,
text=True,
).stdout
json_relocs = json.loads(json_relocs)
json_relocs = json_relocs[0]["Relocations"]
json_relocs = [r["Relocation"] for d in json_relocs for r in d["Relocs"]]
# Filter out .text
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
return json_relocs, compiled_bytes, compile_output, disassembly
else:
return None, None, compile_output, None
def _reloc_type2size(s):
match s:
case "R_X86_64_PC32":
return 4
case "R_X86_64_PLT32":
return 4
case _:
assert False, f"Unknown reloc {s}"
def _compute_relocs_byte_range(json_relocs):
relocs_byte_range = [
range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"]))
for r in json_relocs
]
# Flatten relocs_byte_range
relocs_byte_range = [i for r in relocs_byte_range for i in r]
return relocs_byte_range
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
target_bytes = bytes.fromhex(target_bytes)
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
compiler, flags, source
)
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
if compiled_bytes is not None:
reloc_edit_distance, reloc_operations = print_match_summary(
target_bytes,
compiled_bytes,
wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs),
)
print(f"reloc_edit_distance: {reloc_edit_distance}")
print(f"reloc operations: {reloc_operations}")
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
reloc_edit_distance,
"\n".join(reloc_operations),
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly,
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
None,
None,
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly,
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int x;\nint foo() { return x; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64"),
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Number(label="Edit distance (ignoring relocs; lower is better)"),
gr.Textbox(label="Edit description (ignoring relocs)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.JSON(label="Compiled relocations", open=True),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()
|