import gradio as gr import os import re import subprocess import tempfile model = gr.load("ejschwartz/oo-method-test-model-bylibrary", src="models") def get_all_dis(bname, addrs=None): anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana") ananame = anafile.name addrstr = "" if addrs is not None: addrstr = " ".join([f"--function-at {x}" for x in addrs]) subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True) output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True) output = re.sub(b' +', b' ', output) func_dis = {} last_func = None current_output = [] for l in output.splitlines(): if l.startswith(b";;; function 0x"): if last_func is not None: func_dis[last_func] = b"\n".join(current_output) last_func = int(l.split()[2], 16) current_output.clear() if not b";;" in l: current_output.append(l) if last_func is not None: if last_func in func_dis: print("Warning: Ignoring multiple functions at the same address") else: func_dis[last_func] = b"\n".join(current_output) return func_dis def get_funs(f): funs = get_all_dis(f.name) return "\n".join(("%#x" % addr) for addr in funs.keys()) with gr.Blocks() as demo: all_dis_state = gr.State() gr.Markdown( """ # Function/Method Detector First, upload a binary. Then, select a function from the dropdown. The function's disassembly, and the model's prediction of whether the function represents a C++ method or a regular function will be displayed below. This model was only trained on 32-bit MSVC++ binaries. You can provide other types of binaries, but the result will probably be gibberish. """ ) file_widget = gr.File(label="Binary file") example_widget = gr.Examples( examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))], inputs=file_widget, ) with gr.Column(visible=False) as col: #output = gr.Textbox("Output") fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True) with gr.Row(visible=True) as result: disassembly = gr.Textbox(label="Disassembly", lines=20) clazz = gr.Label() def file_change_fn(file): if file is None: return {col: gr.update(visible=False), all_dis_state: None} else: #fun_data = {42: 2, 43: 3} fun_data = get_all_dis(file.name) addrs = ["%#x" % addr for addr in fun_data.keys()] return {col: gr.update(visible=True), fun_dropdown: gr.Dropdown.update(choices=addrs, value=addrs[0]), all_dis_state: fun_data } def function_change_fn(selected_fun, fun_data): disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8") load_results = model.fn(disassembly_str) top_k = {e['label']: e['confidence'] for e in load_results['confidences']} return {disassembly: gr.Textbox.update(value=disassembly_str), clazz: gr.Label.update(top_k) } file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state]) fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly, clazz]) demo.launch(server_name="0.0.0.0", server_port=7860, share=True)