ejschwartz commited on
Commit
8464e89
1 Parent(s): dee983b

Use Ghidra to enumerate functions

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -2
  2. main.py +44 -20
  3. scripts/dump_functions.py +5 -1
Dockerfile CHANGED
@@ -28,8 +28,8 @@ RUN unzip ghidrathon/Ghidrathon-v4.0.0.zip -d /ghidra/Ghidra/Extensions
28
 
29
  WORKDIR /
30
 
31
- #RUN git clone -b main https://github.com/edmcman/DIRTY
32
- ADD ./DIRTY /DIRTY
33
 
34
  RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /DIRTY/requirements.txt
35
 
 
28
 
29
  WORKDIR /
30
 
31
+ RUN git clone -b main https://github.com/edmcman/DIRTY
32
+ #ADD ./DIRTY /DIRTY
33
 
34
  RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /DIRTY/requirements.txt
35
 
main.py CHANGED
@@ -4,22 +4,21 @@ import subprocess
4
  import tempfile
5
  import os
6
  import sys
 
7
 
8
- def new_binary(file):
9
 
10
- with tempfile.TemporaryDirectory() as TEMP_DIR, tempfile.TemporaryDirectory() as OUTPUT_DIR:
11
- shutil.copy2(file.name, TEMP_DIR)
12
- subprocess.run(f"ls -lR {TEMP_DIR}", shell=True)
13
 
14
- # python3 generate.py --ghidra PATH_TO_GHIDRA_ANALYZEHEADLESS -t NUM_THREADS -n [NUM_FILES|None] -b BINARIES_DIR -o OUTPUT_DIR
15
- print("Running DIRTY-Ghidra...", file=sys.stderr)
16
- subprocess.run(f"python /DIRTY/dataset-gen-ghidra/generate.py --verbose --ghidra /ghidra/support/analyzeHeadless -t 1 -b {TEMP_DIR} -o {OUTPUT_DIR}", shell=True)
17
- subprocess.run(f"ls -lR {OUTPUT_DIR}", shell=True)
18
 
 
 
 
19
 
20
  with gr.Blocks() as demo:
21
 
22
- #all_dis_state = gr.State()
23
 
24
  gr.Markdown(
25
  """
@@ -30,30 +29,55 @@ with gr.Blocks() as demo:
30
 
31
  file_widget = gr.File(label="Executable file")
32
 
33
- def file_change_fn(file, progress=gr.Progress()):
 
 
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  if file is None:
37
  return {
38
- #col: gr.update(visible=False),
39
- #all_dis_state: None
40
  }
41
  else:
42
 
43
  #fun_data = {42: 2, 43: 3}
44
- new_binary(file)
45
- progress(0, desc="Decompiling binary...")
46
- #fun_data = get_all_dis(file.name)
 
 
47
 
48
- #addrs = ["%#x" % addr for addr in fun_data.keys()]
 
 
49
 
50
  return {
51
- #col: gr.update(visible=True),
52
- #fun_dropdown: gr.Dropdown.update(choices=addrs, value=addrs[0]),
53
- #all_dis_state: fun_data
54
  }
55
 
56
- file_widget.change(file_change_fn, file_widget)
57
 
58
  # spaces only shows stderr..
59
  os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
 
4
  import tempfile
5
  import os
6
  import sys
7
+ import json
8
 
9
+ def get_functions(file):
10
 
11
+ with tempfile.TemporaryDirectory() as TEMP_DIR:
 
 
12
 
13
+ subprocess.run(f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json", shell=True)
 
 
 
14
 
15
+ json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
16
+
17
+ return json_funcs
18
 
19
  with gr.Blocks() as demo:
20
 
21
+ all_dis_state = gr.State()
22
 
23
  gr.Markdown(
24
  """
 
29
 
30
  file_widget = gr.File(label="Executable file")
31
 
32
+ with gr.Column(visible=False) as col:
33
+ #output = gr.Textbox("Output")
34
+
35
+ gr.Markdown("""
36
+ Great, you selected an executable! Now pick the function you would like to analyze.
37
+ """)
38
+
39
+ fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)
40
 
41
+ gr.Markdown("""
42
+ Below you can find the selected function's disassembly, and the model's
43
+ prediction of whether the function is an object-oriented method or a
44
+ regular function.
45
+ """)
46
+
47
+ with gr.Row(visible=True) as result:
48
+ disassembly = gr.Textbox(label="Disassembly", lines=20)
49
+ with gr.Column():
50
+ clazz = gr.Label()
51
+ #interpret_button = gr.Button("Interpret (very slow)")
52
+ #interpretation = gr.components.Interpretation(disassembly)
53
+
54
+ def file_change_fn(file, progress=gr.Progress()):
55
 
56
  if file is None:
57
  return {
58
+ col: gr.update(visible=False),
59
+ all_dis_state: None
60
  }
61
  else:
62
 
63
  #fun_data = {42: 2, 43: 3}
64
+ #new_binary(file)
65
+ progress(0, desc="Analyzing binary...")
66
+ try:
67
+ fun_data = get_functions(file.name)
68
+ #print(fun_data)
69
 
70
+ addrs = [(f"{name} ({hex(int(addr))})", int(addr)) for addr, name in fun_data.items()]
71
+ except:
72
+ raise gr.Error("Unable to obtain functions")
73
 
74
  return {
75
+ col: gr.Column(visible=True),
76
+ fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
77
+ all_dis_state: fun_data
78
  }
79
 
80
+ file_widget.change(file_change_fn, file_widget, outputs=[col, fun_dropdown, all_dis_state])
81
 
82
  # spaces only shows stderr..
83
  os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
scripts/dump_functions.py CHANGED
@@ -8,8 +8,12 @@ def dump_functions_to_json():
8
  functions_dict = {}
9
 
10
  for func in functions:
 
 
 
 
11
  func_name = func.getName()
12
- func_address = func.getEntryPoint().toString()
13
 
14
  # Add function name and address to the dictionary
15
  functions_dict[func_address] = func_name
 
8
  functions_dict = {}
9
 
10
  for func in functions:
11
+
12
+ if func.isExternal() or func.isThunk():
13
+ continue
14
+
15
  func_name = func.getName()
16
+ func_address = func.getEntryPoint().getOffset()
17
 
18
  # Add function name and address to the dictionary
19
  functions_dict[func_address] = func_name