S-Dreamer commited on
Commit
5c62c2b
·
verified ·
1 Parent(s): 7fb215c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +876 -0
app.py ADDED
@@ -0,0 +1,876 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Passive OSINT Control Panel
3
+ Drift-aware, passive-first OSINT enrichment interface for Hugging Face Spaces.
4
+
5
+ Design constraints:
6
+ - Passive by default.
7
+ - No scanning, brute forcing, exploitation, or credential testing.
8
+ - All inputs are validated, sanitised, normalised, and hashed before audit logging.
9
+ - Modules that touch a user-provided target require explicit authorization.
10
+ - Correction verbs are limited to: ADAPT, CONSTRAIN, REVERT.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import csv
16
+ import hashlib
17
+ import hmac
18
+ import html
19
+ import ipaddress
20
+ import json
21
+ import os
22
+ import re
23
+ import socket
24
+ import time
25
+ import uuid
26
+ from dataclasses import asdict, dataclass
27
+ from datetime import datetime, timezone
28
+ from pathlib import Path
29
+ from typing import Any, Literal
30
+ from urllib.parse import quote_plus, urlparse
31
+
32
+ import gradio as gr
33
+
34
+
35
+ # =============================================================================
36
+ # Runtime configuration
37
+ # =============================================================================
38
+
39
+ APP_NAME = "Passive OSINT Control Panel"
40
+ APP_VERSION = "0.1.0"
41
+
42
+ BASE_DIR = Path(__file__).resolve().parent
43
+ RUNS_DIR = BASE_DIR / "runs"
44
+ REPORTS_DIR = RUNS_DIR / "reports"
45
+ AUDIT_DIR = RUNS_DIR / "audit"
46
+
47
+ for directory in (RUNS_DIR, REPORTS_DIR, AUDIT_DIR):
48
+ directory.mkdir(parents=True, exist_ok=True)
49
+
50
+ MAX_INPUT_LENGTH = 256
51
+ NETWORK_TIMEOUT_SECONDS = 4.0
52
+
53
+ CorrectionVerb = Literal["ADAPT", "CONSTRAIN", "REVERT", "OBSERVE"]
54
+ RiskTier = Literal["T1", "T2", "T3", "T4"]
55
+ IndicatorType = Literal["domain", "username", "email", "ip", "url", "unknown"]
56
+
57
+
58
+ # =============================================================================
59
+ # Source registry
60
+ # =============================================================================
61
+
62
+ OSINT_LINKS: dict[str, list[dict[str, str]]] = {
63
+ "domain": [
64
+ {
65
+ "name": "crt.sh",
66
+ "url": "https://crt.sh/?q={query}",
67
+ "description": "Certificate Transparency search",
68
+ },
69
+ {
70
+ "name": "SecurityTrails",
71
+ "url": "https://securitytrails.com/domain/{query}/dns",
72
+ "description": "DNS and historical domain intelligence",
73
+ },
74
+ {
75
+ "name": "URLScan",
76
+ "url": "https://urlscan.io/search/#{query}",
77
+ "description": "Public URL scan search",
78
+ },
79
+ {
80
+ "name": "VirusTotal",
81
+ "url": "https://www.virustotal.com/gui/domain/{query}",
82
+ "description": "Public domain reputation lookup",
83
+ },
84
+ {
85
+ "name": "Wayback Machine",
86
+ "url": "https://web.archive.org/web/*/{query}",
87
+ "description": "Archived pages",
88
+ },
89
+ ],
90
+ "username": [
91
+ {
92
+ "name": "WhatsMyName",
93
+ "url": "https://whatsmyname.app/?q={query}",
94
+ "description": "Username presence search",
95
+ },
96
+ {
97
+ "name": "Namechk",
98
+ "url": "https://namechk.com/{query}",
99
+ "description": "Username availability and footprinting",
100
+ },
101
+ {
102
+ "name": "GitHub",
103
+ "url": "https://github.com/{query}",
104
+ "description": "GitHub profile lookup",
105
+ },
106
+ {
107
+ "name": "Reddit",
108
+ "url": "https://www.reddit.com/user/{query}",
109
+ "description": "Reddit profile lookup",
110
+ },
111
+ ],
112
+ "email": [
113
+ {
114
+ "name": "Have I Been Pwned",
115
+ "url": "https://haveibeenpwned.com/",
116
+ "description": "Manual breach exposure check",
117
+ },
118
+ {
119
+ "name": "EmailRep",
120
+ "url": "https://emailrep.io/query/{query}",
121
+ "description": "Email reputation lookup",
122
+ },
123
+ ],
124
+ "ip": [
125
+ {
126
+ "name": "AbuseIPDB",
127
+ "url": "https://www.abuseipdb.com/check/{query}",
128
+ "description": "IP abuse reputation",
129
+ },
130
+ {
131
+ "name": "VirusTotal",
132
+ "url": "https://www.virustotal.com/gui/ip-address/{query}",
133
+ "description": "Public IP reputation lookup",
134
+ },
135
+ {
136
+ "name": "Shodan",
137
+ "url": "https://www.shodan.io/host/{query}",
138
+ "description": "Public internet exposure data",
139
+ },
140
+ ],
141
+ "url": [
142
+ {
143
+ "name": "URLScan",
144
+ "url": "https://urlscan.io/search/#{query}",
145
+ "description": "Public URL scan search",
146
+ },
147
+ {
148
+ "name": "VirusTotal",
149
+ "url": "https://www.virustotal.com/gui/search/{query}",
150
+ "description": "Public URL reputation lookup",
151
+ },
152
+ {
153
+ "name": "Wayback Machine",
154
+ "url": "https://web.archive.org/web/*/{query}",
155
+ "description": "Archived page history",
156
+ },
157
+ ],
158
+ }
159
+
160
+
161
+ # =============================================================================
162
+ # Data models
163
+ # =============================================================================
164
+
165
+ @dataclass
166
+ class Manifest:
167
+ artifact_id: str
168
+ version: str
169
+ assumptions: list[str]
170
+ invariants: list[str]
171
+ tier: RiskTier
172
+ manifest_hash: str
173
+
174
+
175
+ @dataclass
176
+ class TelemetryEvent:
177
+ run_id: str
178
+ timestamp: str
179
+ artifact_id: str
180
+ manifest_hash: str
181
+ indicator_type: IndicatorType
182
+ indicator_hash: str
183
+ authorized_target: bool
184
+ modules_requested: list[str]
185
+ modules_executed: list[str]
186
+ modules_blocked: list[str]
187
+ drift_vector: dict[str, float]
188
+ correction_verb: CorrectionVerb
189
+ duration_ms: int
190
+ errors: list[str]
191
+
192
+
193
+ @dataclass
194
+ class EnrichmentResult:
195
+ run_id: str
196
+ indicator_type: IndicatorType
197
+ normalized_indicator: str
198
+ indicator_hash: str
199
+ links_markdown: str
200
+ passive_results: dict[str, Any]
201
+ drift_vector: dict[str, float]
202
+ correction_verb: CorrectionVerb
203
+ report_path: str
204
+ audit_path: str
205
+ errors: list[str]
206
+
207
+
208
+ # =============================================================================
209
+ # Manifest and hashing
210
+ # =============================================================================
211
+
212
+ def now_utc() -> str:
213
+ return datetime.now(timezone.utc).isoformat()
214
+
215
+
216
+ def get_hash_salt() -> str:
217
+ """
218
+ Use OSINT_HASH_SALT in production.
219
+ In local/dev mode, allow a deterministic fallback only when explicitly enabled.
220
+ """
221
+ salt = os.getenv("OSINT_HASH_SALT")
222
+ if salt:
223
+ return salt
224
+
225
+ if os.getenv("ALLOW_DEV_SALT", "").lower() == "true":
226
+ return "dev-only-change-me"
227
+
228
+ raise RuntimeError(
229
+ "Missing OSINT_HASH_SALT. Add it as a Hugging Face Space Secret. "
230
+ "For local testing only, set ALLOW_DEV_SALT=true."
231
+ )
232
+
233
+
234
+ def hmac_sha256(value: str) -> str:
235
+ salt = get_hash_salt()
236
+ return hmac.new(
237
+ salt.encode("utf-8"),
238
+ value.strip().lower().encode("utf-8"),
239
+ hashlib.sha256,
240
+ ).hexdigest()
241
+
242
+
243
+ def make_manifest() -> Manifest:
244
+ assumptions = [
245
+ "OSINT activity is passive unless authorization is explicitly provided.",
246
+ "Raw indicators are not written to audit logs.",
247
+ "External target interaction requires authorized_target=True.",
248
+ "All mutation decisions route through Correction verbs.",
249
+ ]
250
+ invariants = [
251
+ "Reject unsupported or malformed inputs.",
252
+ "Escape HTML and remove control characters.",
253
+ "Hash indicators before audit persistence.",
254
+ "Block authorized-only modules without explicit confirmation.",
255
+ ]
256
+ body = {
257
+ "artifact_id": "passive_osint_control_panel",
258
+ "version": APP_VERSION,
259
+ "assumptions": assumptions,
260
+ "invariants": invariants,
261
+ "tier": "T2",
262
+ }
263
+ manifest_hash = hashlib.sha256(json.dumps(body, sort_keys=True).encode()).hexdigest()
264
+ return Manifest(
265
+ artifact_id=body["artifact_id"],
266
+ version=body["version"],
267
+ assumptions=assumptions,
268
+ invariants=invariants,
269
+ tier="T2",
270
+ manifest_hash=manifest_hash,
271
+ )
272
+
273
+
274
+ MANIFEST = make_manifest()
275
+
276
+
277
+ # =============================================================================
278
+ # Validation and sanitisation
279
+ # =============================================================================
280
+
281
+ CONTROL_CHARS = re.compile(r"[\x00-\x1f\x7f]")
282
+ DOMAIN_RE = re.compile(
283
+ r"^(?=.{1,253}$)(?!-)([a-zA-Z0-9-]{1,63}\.)+[a-zA-Z]{2,63}$"
284
+ )
285
+ USERNAME_RE = re.compile(r"^[a-zA-Z0-9_.-]{2,64}$")
286
+ EMAIL_RE = re.compile(r"^[^@\s]{1,64}@[^@\s]{1,255}\.[^@\s]{2,63}$")
287
+
288
+
289
+ def sanitize_text(value: str) -> str:
290
+ if value is None:
291
+ raise ValueError("Input is required.")
292
+
293
+ value = str(value).strip()
294
+ value = CONTROL_CHARS.sub("", value)
295
+ value = html.escape(value, quote=True)
296
+
297
+ if not value:
298
+ raise ValueError("Input is empty.")
299
+
300
+ if len(value) > MAX_INPUT_LENGTH:
301
+ raise ValueError(f"Input exceeds {MAX_INPUT_LENGTH} characters.")
302
+
303
+ return value
304
+
305
+
306
+ def classify_and_normalize(raw_value: str, forced_type: str = "Auto") -> tuple[IndicatorType, str]:
307
+ safe = sanitize_text(raw_value)
308
+ candidate = html.unescape(safe).strip()
309
+
310
+ if forced_type != "Auto":
311
+ wanted = forced_type.lower()
312
+ return validate_as_type(candidate, wanted)
313
+
314
+ # URL
315
+ parsed = urlparse(candidate)
316
+ if parsed.scheme in {"http", "https"} and parsed.netloc:
317
+ host = parsed.netloc.lower()
318
+ return "url", f"{parsed.scheme.lower()}://{host}{parsed.path or ''}"
319
+
320
+ # IP
321
+ try:
322
+ return "ip", str(ipaddress.ip_address(candidate))
323
+ except ValueError:
324
+ pass
325
+
326
+ # Email
327
+ lowered = candidate.lower()
328
+ if EMAIL_RE.fullmatch(lowered):
329
+ return "email", lowered
330
+
331
+ # Domain
332
+ domain = lowered.rstrip(".")
333
+ if DOMAIN_RE.fullmatch(domain):
334
+ return "domain", domain
335
+
336
+ # Username
337
+ if USERNAME_RE.fullmatch(candidate):
338
+ return "username", candidate
339
+
340
+ raise ValueError(
341
+ "Unsupported or malformed indicator. Supported types: domain, username, email, IP, URL."
342
+ )
343
+
344
+
345
+ def validate_as_type(candidate: str, wanted: str) -> tuple[IndicatorType, str]:
346
+ if wanted == "domain":
347
+ domain = candidate.lower().rstrip(".")
348
+ if DOMAIN_RE.fullmatch(domain):
349
+ return "domain", domain
350
+ raise ValueError("Invalid domain.")
351
+
352
+ if wanted == "username":
353
+ if USERNAME_RE.fullmatch(candidate):
354
+ return "username", candidate
355
+ raise ValueError("Invalid username.")
356
+
357
+ if wanted == "email":
358
+ lowered = candidate.lower()
359
+ if EMAIL_RE.fullmatch(lowered):
360
+ return "email", lowered
361
+ raise ValueError("Invalid email.")
362
+
363
+ if wanted == "ip":
364
+ try:
365
+ return "ip", str(ipaddress.ip_address(candidate))
366
+ except ValueError as exc:
367
+ raise ValueError("Invalid IP address.") from exc
368
+
369
+ if wanted == "url":
370
+ parsed = urlparse(candidate)
371
+ if parsed.scheme in {"http", "https"} and parsed.netloc:
372
+ return "url", f"{parsed.scheme.lower()}://{parsed.netloc.lower()}{parsed.path or ''}"
373
+ raise ValueError("Invalid URL. Only http:// and https:// are supported.")
374
+
375
+ raise ValueError("Unknown indicator type.")
376
+
377
+
378
+ # =============================================================================
379
+ # Passive modules
380
+ # =============================================================================
381
+
382
+ AUTHORIZED_ONLY_MODULES = {
383
+ "HTTP Headers",
384
+ "Robots.txt",
385
+ }
386
+
387
+ PASSIVE_MODULES = {
388
+ "Resource Links",
389
+ "DNS Records",
390
+ "Local URL Parse",
391
+ "HTTP Headers",
392
+ "Robots.txt",
393
+ }
394
+
395
+
396
+ def build_links(indicator_type: IndicatorType, normalized: str) -> str:
397
+ links = OSINT_LINKS.get(indicator_type, [])
398
+ if not links:
399
+ return "_No source links registered for this indicator type._"
400
+
401
+ query = quote_plus(normalized)
402
+ rows = []
403
+ for source in links:
404
+ url = source["url"].replace("{query}", query)
405
+ rows.append(f"- [{source['name']}]({url}) — {source['description']}")
406
+ return "\n".join(rows)
407
+
408
+
409
+ def resolve_dns(domain: str) -> dict[str, Any]:
410
+ """
411
+ Uses local resolver. This is passive/low-impact but still a network lookup.
412
+ """
413
+ result: dict[str, Any] = {"A": [], "AAAA": [], "MX": [], "NS": []}
414
+
415
+ try:
416
+ socket.setdefaulttimeout(NETWORK_TIMEOUT_SECONDS)
417
+ for family, key in ((socket.AF_INET, "A"), (socket.AF_INET6, "AAAA")):
418
+ try:
419
+ records = socket.getaddrinfo(domain, None, family, socket.SOCK_STREAM)
420
+ result[key] = sorted({record[4][0] for record in records})
421
+ except socket.gaierror:
422
+ result[key] = []
423
+ except Exception as exc:
424
+ result["error"] = str(exc)
425
+
426
+ return result
427
+
428
+
429
+ def parse_url_locally(url: str) -> dict[str, Any]:
430
+ parsed = urlparse(url)
431
+ return {
432
+ "scheme": parsed.scheme,
433
+ "hostname": parsed.hostname,
434
+ "path": parsed.path,
435
+ "query_present": bool(parsed.query),
436
+ "fragment_present": bool(parsed.fragment),
437
+ }
438
+
439
+
440
+ def fetch_http_headers(url_or_domain: str) -> dict[str, Any]:
441
+ """
442
+ Conditional module. Requires explicit authorization.
443
+ Uses Python stdlib sockets? To avoid extra dependency, only emit a safe placeholder.
444
+ Add httpx to requirements.txt and implement a bounded HEAD request if desired.
445
+ """
446
+ return {
447
+ "status": "not_implemented",
448
+ "reason": "HTTP header fetching is gated and intentionally stubbed in the base app. "
449
+ "Implement with httpx only after authorization/rate-limit review.",
450
+ }
451
+
452
+
453
+ def fetch_robots(url_or_domain: str) -> dict[str, Any]:
454
+ return {
455
+ "status": "not_implemented",
456
+ "reason": "robots.txt retrieval is gated and intentionally stubbed in the base app.",
457
+ }
458
+
459
+
460
+ # =============================================================================
461
+ # Drift detection and correction
462
+ # =============================================================================
463
+
464
+ def detect_drift(
465
+ indicator_type: IndicatorType,
466
+ normalized: str,
467
+ modules_requested: list[str],
468
+ modules_blocked: list[str],
469
+ errors: list[str],
470
+ authorized_target: bool,
471
+ ) -> dict[str, float]:
472
+ """
473
+ Lightweight initial drift vector. In later versions, persist baselines and compare windows.
474
+ """
475
+ drift = {
476
+ "statistical": 0.0,
477
+ "behavioral": 0.0,
478
+ "structural": 0.0,
479
+ "adversarial": 0.0,
480
+ "operational": 0.0,
481
+ "policy": 0.0,
482
+ }
483
+
484
+ suspicious_patterns = [
485
+ r"\.\./",
486
+ r"%2e%2e",
487
+ r";",
488
+ r"\|",
489
+ r"\$\(",
490
+ r"`",
491
+ r"<script",
492
+ r"127\.0\.0\.1",
493
+ r"localhost",
494
+ r"169\.254\.169\.254",
495
+ ]
496
+ lowered = normalized.lower()
497
+ if any(re.search(pattern, lowered) for pattern in suspicious_patterns):
498
+ drift["adversarial"] = 0.7
499
+
500
+ if modules_blocked:
501
+ drift["policy"] = 0.4
502
+
503
+ if errors:
504
+ drift["operational"] = min(0.2 * len(errors), 1.0)
505
+
506
+ if indicator_type == "unknown":
507
+ drift["statistical"] = 0.3
508
+
509
+ if not authorized_target and any(m in AUTHORIZED_ONLY_MODULES for m in modules_requested):
510
+ drift["policy"] = max(drift["policy"], 0.6)
511
+
512
+ return drift
513
+
514
+
515
+ def choose_correction(drift: dict[str, float]) -> CorrectionVerb:
516
+ """
517
+ Correction is intentionally conservative.
518
+ """
519
+ if drift.get("policy", 0.0) >= 0.6:
520
+ return "REVERT"
521
+
522
+ if drift.get("structural", 0.0) >= 0.5 or drift.get("behavioral", 0.0) >= 0.5:
523
+ return "REVERT"
524
+
525
+ if drift.get("adversarial", 0.0) >= 0.3:
526
+ return "CONSTRAIN"
527
+
528
+ if drift.get("statistical", 0.0) >= 0.5 and drift.get("adversarial", 0.0) == 0:
529
+ return "ADAPT"
530
+
531
+ return "OBSERVE"
532
+
533
+
534
+ # =============================================================================
535
+ # Reporting and audit
536
+ # =============================================================================
537
+
538
+ def write_audit(event: TelemetryEvent) -> Path:
539
+ path = AUDIT_DIR / f"{event.run_id}.json"
540
+ path.write_text(json.dumps(asdict(event), indent=2, sort_keys=True), encoding="utf-8")
541
+ return path
542
+
543
+
544
+ def write_report(result: EnrichmentResult, manifest: Manifest) -> Path:
545
+ path = REPORTS_DIR / f"{result.run_id}.md"
546
+
547
+ passive_json = json.dumps(result.passive_results, indent=2, sort_keys=True)
548
+ drift_json = json.dumps(result.drift_vector, indent=2, sort_keys=True)
549
+
550
+ body = f"""# Passive OSINT Report
551
+
552
+ ## Run
553
+
554
+ - Run ID: `{result.run_id}`
555
+ - Timestamp: `{now_utc()}`
556
+ - Indicator Type: `{result.indicator_type}`
557
+ - Indicator Hash: `{result.indicator_hash}`
558
+ - Correction Verb: `{result.correction_verb}`
559
+
560
+ ## Manifest
561
+
562
+ - Artifact: `{manifest.artifact_id}`
563
+ - Version: `{manifest.version}`
564
+ - Manifest Hash: `{manifest.manifest_hash}`
565
+ - Tier: `{manifest.tier}`
566
+
567
+ ## Source Links
568
+
569
+ {result.links_markdown}
570
+
571
+ ## Passive Results
572
+
573
+ ```json
574
+ {passive_json}
575
+ ```
576
+
577
+ ## Drift Vector
578
+
579
+ ```json
580
+ {drift_json}
581
+ ```
582
+
583
+ ## Errors
584
+
585
+ {chr(10).join(f"- {error}" for error in result.errors) if result.errors else "- None"}
586
+ """
587
+
588
+ path.write_text(body, encoding="utf-8")
589
+ return path
590
+
591
+
592
+ def format_result_markdown(result: EnrichmentResult) -> str:
593
+ passive_json = json.dumps(result.passive_results, indent=2, sort_keys=True)
594
+ drift_json = json.dumps(result.drift_vector, indent=2, sort_keys=True)
595
+
596
+ return f"""
597
+ ## Result
598
+
599
+ **Run ID:** `{result.run_id}`
600
+ **Type:** `{result.indicator_type}`
601
+ **Indicator Hash:** `{result.indicator_hash}`
602
+ **Correction:** `{result.correction_verb}`
603
+
604
+ ### Source Links
605
+
606
+ {result.links_markdown}
607
+
608
+ ### Passive Results
609
+
610
+ ```json
611
+ {passive_json}
612
+ ```
613
+
614
+ ### Drift Vector
615
+
616
+ ```json
617
+ {drift_json}
618
+ ```
619
+
620
+ ### Logs
621
+
622
+ - Audit: `{result.audit_path}`
623
+ - Report: `{result.report_path}`
624
+
625
+ ### Errors
626
+
627
+ {chr(10).join(f"- {error}" for error in result.errors) if result.errors else "- None"}
628
+ """
629
+
630
+
631
+ # =============================================================================
632
+ # Main orchestration
633
+ # =============================================================================
634
+
635
+ def run_enrichment(
636
+ raw_indicator: str,
637
+ forced_type: str,
638
+ selected_modules: list[str],
639
+ authorized_target: bool,
640
+ ) -> tuple[str, str | None, str | None]:
641
+ started = time.perf_counter()
642
+ run_id = f"run_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
643
+
644
+ errors: list[str] = []
645
+ modules_executed: list[str] = []
646
+ modules_blocked: list[str] = []
647
+ passive_results: dict[str, Any] = {}
648
+
649
+ try:
650
+ indicator_type, normalized = classify_and_normalize(raw_indicator, forced_type)
651
+ indicator_hash = hmac_sha256(normalized)
652
+ except Exception as exc:
653
+ return f"## Input rejected\n\n{str(exc)}", None, None
654
+
655
+ selected_modules = selected_modules or ["Resource Links"]
656
+
657
+ # Authorization gate
658
+ for module in selected_modules:
659
+ if module in AUTHORIZED_ONLY_MODULES and not authorized_target:
660
+ modules_blocked.append(module)
661
+
662
+ executable_modules = [m for m in selected_modules if m not in modules_blocked]
663
+
664
+ links_markdown = ""
665
+ if "Resource Links" in executable_modules:
666
+ links_markdown = build_links(indicator_type, normalized)
667
+ modules_executed.append("Resource Links")
668
+ else:
669
+ links_markdown = "_Resource link generation not selected._"
670
+
671
+ if "DNS Records" in executable_modules:
672
+ if indicator_type == "domain":
673
+ passive_results["dns"] = resolve_dns(normalized)
674
+ modules_executed.append("DNS Records")
675
+ else:
676
+ errors.append("DNS Records module requires a domain indicator.")
677
+
678
+ if "Local URL Parse" in executable_modules:
679
+ if indicator_type == "url":
680
+ passive_results["url_parse"] = parse_url_locally(normalized)
681
+ modules_executed.append("Local URL Parse")
682
+ else:
683
+ errors.append("Local URL Parse module requires a URL indicator.")
684
+
685
+ if "HTTP Headers" in executable_modules:
686
+ passive_results["http_headers"] = fetch_http_headers(normalized)
687
+ modules_executed.append("HTTP Headers")
688
+
689
+ if "Robots.txt" in executable_modules:
690
+ passive_results["robots"] = fetch_robots(normalized)
691
+ modules_executed.append("Robots.txt")
692
+
693
+ if modules_blocked:
694
+ errors.append(
695
+ "Blocked authorized-only module(s): "
696
+ + ", ".join(modules_blocked)
697
+ + ". Confirm target authorization to enable them."
698
+ )
699
+
700
+ drift = detect_drift(
701
+ indicator_type=indicator_type,
702
+ normalized=normalized,
703
+ modules_requested=selected_modules,
704
+ modules_blocked=modules_blocked,
705
+ errors=errors,
706
+ authorized_target=authorized_target,
707
+ )
708
+ correction = choose_correction(drift)
709
+
710
+ duration_ms = int((time.perf_counter() - started) * 1000)
711
+
712
+ event = TelemetryEvent(
713
+ run_id=run_id,
714
+ timestamp=now_utc(),
715
+ artifact_id=MANIFEST.artifact_id,
716
+ manifest_hash=MANIFEST.manifest_hash,
717
+ indicator_type=indicator_type,
718
+ indicator_hash=indicator_hash,
719
+ authorized_target=authorized_target,
720
+ modules_requested=selected_modules,
721
+ modules_executed=modules_executed,
722
+ modules_blocked=modules_blocked,
723
+ drift_vector=drift,
724
+ correction_verb=correction,
725
+ duration_ms=duration_ms,
726
+ errors=errors,
727
+ )
728
+ audit_path = write_audit(event)
729
+
730
+ result = EnrichmentResult(
731
+ run_id=run_id,
732
+ indicator_type=indicator_type,
733
+ normalized_indicator="[redacted]",
734
+ indicator_hash=indicator_hash,
735
+ links_markdown=links_markdown,
736
+ passive_results=passive_results,
737
+ drift_vector=drift,
738
+ correction_verb=correction,
739
+ report_path="",
740
+ audit_path=str(audit_path),
741
+ errors=errors,
742
+ )
743
+ report_path = write_report(result, MANIFEST)
744
+ result.report_path = str(report_path)
745
+
746
+ return format_result_markdown(result), str(report_path), str(audit_path)
747
+
748
+
749
+ def export_audit_index() -> str | None:
750
+ audit_files = sorted(AUDIT_DIR.glob("*.json"))
751
+ if not audit_files:
752
+ return None
753
+
754
+ csv_path = RUNS_DIR / "audit_index.csv"
755
+
756
+ rows = []
757
+ for path in audit_files:
758
+ try:
759
+ data = json.loads(path.read_text(encoding="utf-8"))
760
+ rows.append(
761
+ {
762
+ "timestamp": data.get("timestamp"),
763
+ "run_id": data.get("run_id"),
764
+ "artifact_id": data.get("artifact_id"),
765
+ "indicator_type": data.get("indicator_type"),
766
+ "indicator_hash": data.get("indicator_hash"),
767
+ "authorized_target": data.get("authorized_target"),
768
+ "correction_verb": data.get("correction_verb"),
769
+ "duration_ms": data.get("duration_ms"),
770
+ }
771
+ )
772
+ except Exception:
773
+ continue
774
+
775
+ with csv_path.open("w", newline="", encoding="utf-8") as handle:
776
+ writer = csv.DictWriter(handle, fieldnames=list(rows[0].keys()))
777
+ writer.writeheader()
778
+ writer.writerows(rows)
779
+
780
+ return str(csv_path)
781
+
782
+
783
+ def show_manifest() -> str:
784
+ return f"""```json
785
+ {json.dumps(asdict(MANIFEST), indent=2, sort_keys=True)}
786
+ ```"""
787
+
788
+
789
+ # =============================================================================
790
+ # Gradio UI
791
+ # =============================================================================
792
+
793
+ DESCRIPTION = """
794
+ This Space is a passive, drift-aware OSINT control panel.
795
+
796
+ It validates, sanitises, normalises, and hashes indicators before audit persistence.
797
+ Authorized-only modules are blocked unless explicitly confirmed.
798
+ Correction decisions are limited to ADAPT, CONSTRAIN, REVERT, or OBSERVE.
799
+ """
800
+
801
+ with gr.Blocks(title=APP_NAME) as demo:
802
+ gr.Markdown(f"# {APP_NAME}")
803
+ gr.Markdown(DESCRIPTION)
804
+
805
+ with gr.Tab("Control Panel"):
806
+ with gr.Row():
807
+ raw_indicator = gr.Textbox(
808
+ label="Indicator",
809
+ placeholder="example.com, username, user@example.com, 8.8.8.8, https://example.com/path",
810
+ max_lines=1,
811
+ )
812
+ forced_type = gr.Dropdown(
813
+ ["Auto", "Domain", "Username", "Email", "IP", "URL"],
814
+ value="Auto",
815
+ label="Indicator Type",
816
+ )
817
+
818
+ selected_modules = gr.CheckboxGroup(
819
+ choices=sorted(PASSIVE_MODULES),
820
+ value=["Resource Links"],
821
+ label="Modules",
822
+ )
823
+
824
+ authorized_target = gr.Checkbox(
825
+ label="I confirm this target is authorized for conditional interaction",
826
+ value=False,
827
+ )
828
+
829
+ run_button = gr.Button("Run Passive Enrichment", variant="primary")
830
+
831
+ output = gr.Markdown(label="Output")
832
+ report_file = gr.File(label="Markdown Report")
833
+ audit_file = gr.File(label="Audit JSON")
834
+
835
+ run_button.click(
836
+ fn=run_enrichment,
837
+ inputs=[raw_indicator, forced_type, selected_modules, authorized_target],
838
+ outputs=[output, report_file, audit_file],
839
+ )
840
+
841
+ with gr.Tab("Manifest"):
842
+ gr.Markdown(
843
+ "The manifest declares the assumptions and invariants this artifact was built under."
844
+ )
845
+ manifest_output = gr.Markdown(value=show_manifest())
846
+
847
+ with gr.Tab("Audit Export"):
848
+ gr.Markdown("Export a CSV index of audit records. Raw indicators are not included.")
849
+ export_button = gr.Button("Export Audit Index")
850
+ audit_index_file = gr.File(label="Audit Index CSV")
851
+ export_button.click(fn=export_audit_index, inputs=[], outputs=[audit_index_file])
852
+
853
+ with gr.Tab("Policy"):
854
+ gr.Markdown(
855
+ """
856
+ ## Operating Policy
857
+
858
+ - Passive by default.
859
+ - No scanning, brute forcing, exploitation, credential testing, or directory fuzzing.
860
+ - No raw indicators in audit logs.
861
+ - Authorized-only modules require explicit confirmation.
862
+ - Correction is the only state mutation authority.
863
+ - Correction cannot rewrite its own policy.
864
+
865
+ ## Correction Verbs
866
+
867
+ - **ADAPT**: assumptions may be safely updated.
868
+ - **CONSTRAIN**: reduce capability under uncertainty or adversarial pressure.
869
+ - **REVERT**: restore prior known-good state.
870
+ - **OBSERVE**: log only; no mutation.
871
+ """
872
+ )
873
+
874
+
875
+ if __name__ == "__main__":
876
+ demo.launch()