moddux commited on
Commit
aa1662c
·
verified ·
1 Parent(s): 116a9b9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +217 -1
src/streamlit_app.py CHANGED
@@ -1,4 +1,220 @@
1
- -import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  -import importlib.util
3
  -import os
4
  -import json
 
1
+ -import streamlit import streamlit as st
2
+ import importlib.util
3
+ import os
4
+ import json
5
+ import tempfile
6
+ from pathlib import Path
7
+ import inspect
8
+ import traceback
9
+
10
+ # --- EXISTING PIPELINE IMPORTS (keep these as in original app) ---
11
+ from modules.ingestion.ingest_data import run as ingest_run
12
+ from modules.preprocessing.preprocess_data import run as preprocess_run
13
+ from modules.ml_analysis.ml_analysis import run as ml_run
14
+ from modules.correlation.correlate_ioc import run as correlate_run
15
+ from modules.export.export_results import run as export_run
16
+
17
+ st.set_page_config(page_title="Modular OSINT Pipeline", layout="wide")
18
+ st.title("🚀 Modular OSINT Pipeline Dashboard")
19
+
20
+ # --- PIPELINE WORKFLOW ---
21
+ def write_temp(data: dict) -> str:
22
+ f = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
23
+ f.write(json.dumps(data).encode())
24
+ f.close()
25
+ return f.name
26
+
27
+ uploaded = st.file_uploader("Upload initial OSINTModuleInput JSON", type=["json"])
28
+ if uploaded:
29
+ init_input = json.load(uploaded)
30
+ st.session_state["input"] = init_input
31
+
32
+ if "input" in st.session_state:
33
+ st.markdown("### 🔍 Initial Input")
34
+ st.json(st.session_state["input"])
35
+
36
+ col1, col2 = st.columns(2)
37
+
38
+ with col1:
39
+ if st.button("Run Ingestion"):
40
+ path = write_temp(st.session_state["input"])
41
+ out = ingest_run(path)
42
+ st.session_state["ingest"] = json.loads(out.json())
43
+ if "ingest" in st.session_state:
44
+ st.markdown("#### Ingestion Output")
45
+ st.json(st.session_state["ingest"])
46
+
47
+ with col1:
48
+ if st.button("Run Preprocessing"):
49
+ prev = st.session_state.get("ingest", st.session_state["input"])
50
+ path = write_temp(prev)
51
+ out = preprocess_run(path)
52
+ st.session_state["preprocess"] = json.loads(out.json())
53
+ if "preprocess" in st.session_state:
54
+ st.markdown("#### Preprocessing Output")
55
+ st.json(st.session_state["preprocess"])
56
+
57
+ with col2:
58
+ if st.button("Run ML Analysis"):
59
+ prev = st.session_state.get("preprocess", st.session_state.get("ingest"))
60
+ path = write_temp(prev)
61
+ out = ml_run(path)
62
+ st.session_state["ml"] = json.loads(out.json())
63
+ if "ml" in st.session_state:
64
+ st.markdown("#### ML Analysis Output")
65
+ st.json(st.session_state["ml"])
66
+
67
+ with col2:
68
+ if st.button("Run Correlation"):
69
+ prev = st.session_state.get("ml", st.session_state.get("preprocess"))
70
+ path = write_temp(prev)
71
+ out = correlate_run(path)
72
+ st.session_state["correlate"] = json.loads(out.json())
73
+ if "correlate" in st.session_state:
74
+ st.markdown("#### Correlation Output")
75
+ st.json(st.session_state["correlate"])
76
+
77
+ if st.button("Run Export"):
78
+ prev = st.session_state.get("correlate", st.session_state.get("ml"))
79
+ path = write_temp(prev)
80
+ out = export_run(path)
81
+ st.session_state["export"] = json.loads(out.json())
82
+ if "export" in st.session_state:
83
+ st.markdown("#### Export Output")
84
+ st.json(st.session_state["export"])
85
+ st.download_button(
86
+ label="Download Exported Results",
87
+ data=json.dumps(st.session_state["export"], indent=2),
88
+ file_name="osint_export.json",
89
+ mime="application/json"
90
+ )
91
+
92
+ # --- MULTI-DIRECTORY MODULE LAUNCHER SECTION ---
93
+ st.sidebar.header("Standalone & Subdirectory Modules")
94
+
95
+ MODULES_DIR = Path("Modules")
96
+ MODULE_REGISTRY = MODULES_DIR / "module_registry.json"
97
+
98
+ def discover_py_modules(directory):
99
+ """Recursively list .py scripts (excluding __init__.py) with their relative paths."""
100
+ py_modules = []
101
+ for root, dirs, files in os.walk(directory):
102
+ for f in files:
103
+ if f.endswith(".py") and f != "__init__.py":
104
+ rel_path = Path(root).relative_to(directory) / f
105
+ py_modules.append(rel_path)
106
+ return py_modules
107
+
108
+ def normalize_registry_key(path: Path) -> str:
109
+ return str(path.with_suffix('')).replace(os.sep, ".").lower()
110
+
111
+ def load_module_description(module_path):
112
+ """Try to get description from registry or fallback to docstring."""
113
+ module_key = normalize_registry_key(module_path)
114
+ if MODULE_REGISTRY.exists():
115
+ with open(MODULE_REGISTRY) as regfile:
116
+ registry = json.load(regfile)
117
+ if module_key in registry:
118
+ return registry[module_key].get("description", "")
119
+ full_path = MODULES_DIR / module_path
120
+ if full_path.exists():
121
+ with open(full_path) as f:
122
+ first_line = f.readline()
123
+ if first_line.startswith("\"\"\"") or first_line.startswith("'''"):
124
+ delimiter = first_line[:3]
125
+ docstring = ""
126
+ while True:
127
+ l = f.readline()
128
+ if not l or l.startswith(delimiter):
129
+ break
130
+ docstring += l.strip() + " "
131
+ return docstring.strip()
132
+ return ""
133
+
134
+ def get_module_params(module_path):
135
+ """Load osintmodule.json file if present for the module (same name, same directory)."""
136
+ param_path = (MODULES_DIR / module_path).with_suffix('.osintmodule.json')
137
+ if param_path.exists():
138
+ with open(param_path, "r") as f:
139
+ return json.load(f), str(param_path)
140
+ return None, None
141
+
142
+ def run_module(module_path, params=None):
143
+ """
144
+ Dynamically import and run the main() function for any discovered .py module,
145
+ passing params if the signature supports it.
146
+ """
147
+ full_path = MODULES_DIR / module_path
148
+ if not full_path.exists():
149
+ st.error(f"Module file not found: {module_path}")
150
+ return
151
+ mod_name = "mod_" + str(module_path).replace("/", "_").replace("\\", "_").replace(".py", "")
152
+ try:
153
+ spec = importlib.util.spec_from_file_location(mod_name, str(full_path))
154
+ mod = importlib.util.module_from_spec(spec)
155
+ spec.loader.exec_module(mod)
156
+ if hasattr(mod, "main"):
157
+ sig = inspect.signature(mod.main)
158
+ if params and len(sig.parameters) > 0:
159
+ mod.main(params)
160
+ else:
161
+ mod.main()
162
+ else:
163
+ st.warning(f"{module_path} does not have a main() function.")
164
+ except Exception as e:
165
+ st.error(f"Error running {module_path}: {e}")
166
+ st.exception(traceback.format_exc())
167
+
168
+ def save_params_json(params, param_path):
169
+ with open(param_path, "w") as f:
170
+ json.dump(params, f, indent=2)
171
+
172
+ py_modules = discover_py_modules(MODULES_DIR)
173
+ for rel_path in py_modules:
174
+ mod_label = str(rel_path)
175
+ mod_name = rel_path.stem
176
+ desc = load_module_description(rel_path)
177
+ with st.sidebar.expander(mod_label, expanded=False):
178
+ if desc:
179
+ st.info(desc)
180
+ params, param_path = get_module_params(rel_path)
181
+ param_input = None
182
+ if params is not None:
183
+ st.markdown("**Edit module parameters:**")
184
+ param_str = st.text_area(
185
+ "Parameters (JSON)",
186
+ value=json.dumps(params, indent=2),
187
+ key=f"params_{mod_label}",
188
+ height=200,
189
+ )
190
+ try:
191
+ param_input = json.loads(param_str)
192
+ st.success("Valid JSON")
193
+ except Exception as e:
194
+ st.error(f"Invalid JSON: {e}")
195
+ param_input = None
196
+ if st.button(f"Run {mod_label}", key=f"run_{mod_label}"):
197
+ st.write(f"## Running: {mod_label}")
198
+ if desc:
199
+ st.info(desc)
200
+ if param_input is not None and param_path:
201
+ save_params_json(param_input, param_path)
202
+ params = param_input
203
+ run_module(rel_path, params)
204
+ results_folders = [
205
+ (MODULES_DIR / rel_path.parent / "Results"),
206
+ (MODULES_DIR / "Data" / "Results"),
207
+ ]
208
+ for results_dir in results_folders:
209
+ if results_dir.exists():
210
+ for f in results_dir.glob(f"{mod_name}*.*"):
211
+ with open(f, "rb") as fo:
212
+ st.download_button(
213
+ label=f"Download result: {f.name}",
214
+ data=fo,
215
+ file_name=f.name,
216
+ )
217
+ as st
218
  -import importlib.util
219
  -import os
220
  -import json