Spaces:
Sleeping
Sleeping
ejschwartz
commited on
Commit
•
8464e89
1
Parent(s):
dee983b
Use Ghidra to enumerate functions
Browse files- Dockerfile +2 -2
- main.py +44 -20
- scripts/dump_functions.py +5 -1
Dockerfile
CHANGED
@@ -28,8 +28,8 @@ RUN unzip ghidrathon/Ghidrathon-v4.0.0.zip -d /ghidra/Ghidra/Extensions
|
|
28 |
|
29 |
WORKDIR /
|
30 |
|
31 |
-
|
32 |
-
ADD ./DIRTY /DIRTY
|
33 |
|
34 |
RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /DIRTY/requirements.txt
|
35 |
|
|
|
28 |
|
29 |
WORKDIR /
|
30 |
|
31 |
+
RUN git clone -b main https://github.com/edmcman/DIRTY
|
32 |
+
#ADD ./DIRTY /DIRTY
|
33 |
|
34 |
RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /DIRTY/requirements.txt
|
35 |
|
main.py
CHANGED
@@ -4,22 +4,21 @@ import subprocess
|
|
4 |
import tempfile
|
5 |
import os
|
6 |
import sys
|
|
|
7 |
|
8 |
-
def
|
9 |
|
10 |
-
with tempfile.TemporaryDirectory() as TEMP_DIR
|
11 |
-
shutil.copy2(file.name, TEMP_DIR)
|
12 |
-
subprocess.run(f"ls -lR {TEMP_DIR}", shell=True)
|
13 |
|
14 |
-
|
15 |
-
print("Running DIRTY-Ghidra...", file=sys.stderr)
|
16 |
-
subprocess.run(f"python /DIRTY/dataset-gen-ghidra/generate.py --verbose --ghidra /ghidra/support/analyzeHeadless -t 1 -b {TEMP_DIR} -o {OUTPUT_DIR}", shell=True)
|
17 |
-
subprocess.run(f"ls -lR {OUTPUT_DIR}", shell=True)
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
with gr.Blocks() as demo:
|
21 |
|
22 |
-
|
23 |
|
24 |
gr.Markdown(
|
25 |
"""
|
@@ -30,30 +29,55 @@ with gr.Blocks() as demo:
|
|
30 |
|
31 |
file_widget = gr.File(label="Executable file")
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
if file is None:
|
37 |
return {
|
38 |
-
|
39 |
-
|
40 |
}
|
41 |
else:
|
42 |
|
43 |
#fun_data = {42: 2, 43: 3}
|
44 |
-
new_binary(file)
|
45 |
-
progress(0, desc="
|
46 |
-
|
|
|
|
|
47 |
|
48 |
-
|
|
|
|
|
49 |
|
50 |
return {
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
}
|
55 |
|
56 |
-
file_widget.change(file_change_fn, file_widget)
|
57 |
|
58 |
# spaces only shows stderr..
|
59 |
os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
|
|
|
4 |
import tempfile
|
5 |
import os
|
6 |
import sys
|
7 |
+
import json
|
8 |
|
9 |
+
def get_functions(file):
|
10 |
|
11 |
+
with tempfile.TemporaryDirectory() as TEMP_DIR:
|
|
|
|
|
12 |
|
13 |
+
subprocess.run(f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json", shell=True)
|
|
|
|
|
|
|
14 |
|
15 |
+
json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
|
16 |
+
|
17 |
+
return json_funcs
|
18 |
|
19 |
with gr.Blocks() as demo:
|
20 |
|
21 |
+
all_dis_state = gr.State()
|
22 |
|
23 |
gr.Markdown(
|
24 |
"""
|
|
|
29 |
|
30 |
file_widget = gr.File(label="Executable file")
|
31 |
|
32 |
+
with gr.Column(visible=False) as col:
|
33 |
+
#output = gr.Textbox("Output")
|
34 |
+
|
35 |
+
gr.Markdown("""
|
36 |
+
Great, you selected an executable! Now pick the function you would like to analyze.
|
37 |
+
""")
|
38 |
+
|
39 |
+
fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)
|
40 |
|
41 |
+
gr.Markdown("""
|
42 |
+
Below you can find the selected function's disassembly, and the model's
|
43 |
+
prediction of whether the function is an object-oriented method or a
|
44 |
+
regular function.
|
45 |
+
""")
|
46 |
+
|
47 |
+
with gr.Row(visible=True) as result:
|
48 |
+
disassembly = gr.Textbox(label="Disassembly", lines=20)
|
49 |
+
with gr.Column():
|
50 |
+
clazz = gr.Label()
|
51 |
+
#interpret_button = gr.Button("Interpret (very slow)")
|
52 |
+
#interpretation = gr.components.Interpretation(disassembly)
|
53 |
+
|
54 |
+
def file_change_fn(file, progress=gr.Progress()):
|
55 |
|
56 |
if file is None:
|
57 |
return {
|
58 |
+
col: gr.update(visible=False),
|
59 |
+
all_dis_state: None
|
60 |
}
|
61 |
else:
|
62 |
|
63 |
#fun_data = {42: 2, 43: 3}
|
64 |
+
#new_binary(file)
|
65 |
+
progress(0, desc="Analyzing binary...")
|
66 |
+
try:
|
67 |
+
fun_data = get_functions(file.name)
|
68 |
+
#print(fun_data)
|
69 |
|
70 |
+
addrs = [(f"{name} ({hex(int(addr))})", int(addr)) for addr, name in fun_data.items()]
|
71 |
+
except:
|
72 |
+
raise gr.Error("Unable to obtain functions")
|
73 |
|
74 |
return {
|
75 |
+
col: gr.Column(visible=True),
|
76 |
+
fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
|
77 |
+
all_dis_state: fun_data
|
78 |
}
|
79 |
|
80 |
+
file_widget.change(file_change_fn, file_widget, outputs=[col, fun_dropdown, all_dis_state])
|
81 |
|
82 |
# spaces only shows stderr..
|
83 |
os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
|
scripts/dump_functions.py
CHANGED
@@ -8,8 +8,12 @@ def dump_functions_to_json():
|
|
8 |
functions_dict = {}
|
9 |
|
10 |
for func in functions:
|
|
|
|
|
|
|
|
|
11 |
func_name = func.getName()
|
12 |
-
func_address = func.getEntryPoint().
|
13 |
|
14 |
# Add function name and address to the dictionary
|
15 |
functions_dict[func_address] = func_name
|
|
|
8 |
functions_dict = {}
|
9 |
|
10 |
for func in functions:
|
11 |
+
|
12 |
+
if func.isExternal() or func.isThunk():
|
13 |
+
continue
|
14 |
+
|
15 |
func_name = func.getName()
|
16 |
+
func_address = func.getEntryPoint().getOffset()
|
17 |
|
18 |
# Add function name and address to the dictionary
|
19 |
functions_dict[func_address] = func_name
|