S-Dreamer commited on
Commit
a71264b
·
verified ·
1 Parent(s): 45e50b1

Upload 8 files

Browse files
osint_core/__init__.py CHANGED
@@ -26,6 +26,18 @@ from .validators import (
26
  ValidationErrorCode,
27
  )
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Future modules (to be added as you build them)
30
  # from .policy import enforce_policy
31
  # from .enrichment import run_passive_enrichment
@@ -39,6 +51,16 @@ __all__ = [
39
  "assert_valid_or_raise",
40
  "ValidationResult",
41
  "ValidationErrorCode",
 
 
 
 
 
 
 
 
 
 
42
  ]
43
 
44
  __version__ = "0.1.0"
 
26
  ValidationErrorCode,
27
  )
28
 
29
+ from .orchestrator import (
30
+ create_orchestrator,
31
+ list_skills,
32
+ get_skill,
33
+ OrchestratorAgent,
34
+ EnrichmentWorkflow,
35
+ ExecutionContext,
36
+ Skill,
37
+ Tool,
38
+ SkillResult,
39
+ )
40
+
41
  # Future modules (to be added as you build them)
42
  # from .policy import enforce_policy
43
  # from .enrichment import run_passive_enrichment
 
51
  "assert_valid_or_raise",
52
  "ValidationResult",
53
  "ValidationErrorCode",
54
+ # orchestrator
55
+ "create_orchestrator",
56
+ "list_skills",
57
+ "get_skill",
58
+ "OrchestratorAgent",
59
+ "EnrichmentWorkflow",
60
+ "ExecutionContext",
61
+ "Skill",
62
+ "Tool",
63
+ "SkillResult",
64
  ]
65
 
66
  __version__ = "0.1.0"
osint_core/intent.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ osint_core.intent
3
+ =================
4
+
5
+ Intent packet modeling for the Enterprise Drift-Aware OSINT Control Fabric.
6
+
7
+ This module turns a validated user request into an explicit, bounded, signed
8
+ intent packet. It does not execute actions, perform network calls, mutate
9
+ policy, update memory, or write audit records.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ import hmac
16
+ import json
17
+ import os
18
+ import re
19
+ import uuid
20
+ from dataclasses import asdict, dataclass, field, replace
21
+ from datetime import datetime, timezone
22
+ from enum import Enum
23
+ from typing import Any, Iterable, Literal, Mapping
24
+
25
+
26
+ IntentAction = Literal[
27
+ "enrich_indicator",
28
+ "generate_report",
29
+ "update_sources",
30
+ "run_module",
31
+ "export_audit",
32
+ ]
33
+
34
+ RiskLabel = Literal["low", "medium", "high", "critical"]
35
+ RollbackStrategy = Literal["none", "observe_only", "disable_module", "sandbox", "revert"]
36
+ IndicatorType = Literal["domain", "username", "email", "ip", "url", "unknown"]
37
+
38
+
39
+ class IntentErrorCode(str, Enum):
40
+ MISSING_FIELD = "missing_field"
41
+ INVALID_ACTION = "invalid_action"
42
+ INVALID_RISK = "invalid_risk"
43
+ INVALID_SCOPE = "invalid_scope"
44
+ INVALID_ROLLBACK = "invalid_rollback"
45
+ FORBIDDEN_OPERATION_REQUESTED = "forbidden_operation_requested"
46
+ RAW_INDICATOR_LEAK = "raw_indicator_leak"
47
+ UNSIGNED_PACKET = "unsigned_packet"
48
+ SIGNATURE_MISMATCH = "signature_mismatch"
49
+ SECRET_MISSING = "secret_missing"
50
+
51
+
52
+ class IntentValidationError(ValueError):
53
+ def __init__(self, message: str, code: IntentErrorCode):
54
+ super().__init__(message)
55
+ self.code = code
56
+
57
+
58
+ @dataclass(frozen=True)
59
+ class IntentScope:
60
+ """Explicit scope boundary for an intent.
61
+
62
+ target_hash must be a hash/HMAC of the target. Raw indicators do not belong
63
+ in scope objects or intent packets.
64
+ """
65
+
66
+ target_hash: str
67
+ indicator_type: IndicatorType
68
+ allowed_operations: tuple[str, ...]
69
+ forbidden_operations: tuple[str, ...] = field(default_factory=tuple)
70
+ time_horizon_seconds: int = 300
71
+ success_criteria: tuple[str, ...] = field(default_factory=tuple)
72
+
73
+ def to_dict(self) -> dict[str, Any]:
74
+ return asdict(self)
75
+
76
+
77
+ @dataclass(frozen=True)
78
+ class IntentPacket:
79
+ """Signed, immutable description of a requested action."""
80
+
81
+ intent_id: str
82
+ action: IntentAction
83
+ purpose: str
84
+ scope: IntentScope
85
+ requested_modules: tuple[str, ...]
86
+ expected_side_effects: tuple[str, ...]
87
+ rollback_strategy: RollbackStrategy
88
+ risk_label: RiskLabel
89
+ manifest_hash: str
90
+ created_at: str
91
+ schema_version: str = "1.0.0"
92
+ signature: str | None = None
93
+
94
+ def unsigned_payload(self) -> dict[str, Any]:
95
+ payload = asdict(self)
96
+ payload.pop("signature", None)
97
+ return payload
98
+
99
+ def to_dict(self, include_signature: bool = True) -> dict[str, Any]:
100
+ payload = asdict(self)
101
+ if not include_signature:
102
+ payload.pop("signature", None)
103
+ return payload
104
+
105
+ def to_json(self, include_signature: bool = True) -> str:
106
+ return canonical_json(self.to_dict(include_signature=include_signature))
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class IntentValidationResult:
111
+ ok: bool
112
+ errors: tuple[str, ...] = field(default_factory=tuple)
113
+ error_codes: tuple[IntentErrorCode, ...] = field(default_factory=tuple)
114
+
115
+
116
+ VALID_ACTIONS: set[str] = {
117
+ "enrich_indicator",
118
+ "generate_report",
119
+ "update_sources",
120
+ "run_module",
121
+ "export_audit",
122
+ }
123
+
124
+ VALID_RISKS: set[str] = {"low", "medium", "high", "critical"}
125
+ VALID_ROLLBACK_STRATEGIES: set[str] = {
126
+ "none",
127
+ "observe_only",
128
+ "disable_module",
129
+ "sandbox",
130
+ "revert",
131
+ }
132
+
133
+ DEFAULT_FORBIDDEN_OPERATIONS: tuple[str, ...] = (
134
+ "port_scan",
135
+ "mass_scan",
136
+ "brute_force",
137
+ "credential_testing",
138
+ "exploitation",
139
+ "directory_fuzzing",
140
+ "web_vulnerability_scan",
141
+ "password_spray",
142
+ "login_attempt",
143
+ )
144
+
145
+ RAW_FIELD_NAMES: set[str] = {
146
+ "raw_indicator",
147
+ "raw_input",
148
+ "indicator",
149
+ "target",
150
+ "domain",
151
+ "email",
152
+ "username",
153
+ "url",
154
+ "ip",
155
+ }
156
+
157
+ HASH_RE = re.compile(r"^[a-fA-F0-9]{32,128}$")
158
+
159
+
160
+ def now_utc() -> str:
161
+ return datetime.now(timezone.utc).isoformat()
162
+
163
+
164
+ def canonical_json(value: Mapping[str, Any]) -> str:
165
+ """Return deterministic JSON for signing and hashing."""
166
+ return json.dumps(value, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
167
+
168
+
169
+ def get_intent_signing_secret() -> str:
170
+ """Return signing secret.
171
+
172
+ Production should use INTENT_SIGNING_SECRET. Smaller deployments may fall
173
+ back to OSINT_HASH_SALT, but separating both is preferred.
174
+ """
175
+ secret = os.getenv("INTENT_SIGNING_SECRET") or os.getenv("OSINT_HASH_SALT")
176
+ if not secret:
177
+ raise IntentValidationError(
178
+ "Missing INTENT_SIGNING_SECRET or OSINT_HASH_SALT.",
179
+ IntentErrorCode.SECRET_MISSING,
180
+ )
181
+ return secret
182
+
183
+
184
+ def hash_manifest_payload(payload: Mapping[str, Any]) -> str:
185
+ return hashlib.sha256(canonical_json(payload).encode("utf-8")).hexdigest()
186
+
187
+
188
+ def sign_payload(payload: Mapping[str, Any], secret: str | None = None) -> str:
189
+ secret = secret or get_intent_signing_secret()
190
+ return hmac.new(
191
+ secret.encode("utf-8"),
192
+ canonical_json(payload).encode("utf-8"),
193
+ hashlib.sha256,
194
+ ).hexdigest()
195
+
196
+
197
+ def sign_intent(packet: IntentPacket, secret: str | None = None) -> IntentPacket:
198
+ signature = sign_payload(packet.unsigned_payload(), secret=secret)
199
+ return replace(packet, signature=signature)
200
+
201
+
202
+ def verify_intent_signature(packet: IntentPacket, secret: str | None = None) -> bool:
203
+ if not packet.signature:
204
+ raise IntentValidationError("Intent packet is unsigned.", IntentErrorCode.UNSIGNED_PACKET)
205
+
206
+ expected = sign_payload(packet.unsigned_payload(), secret=secret)
207
+ if not hmac.compare_digest(expected, packet.signature):
208
+ raise IntentValidationError(
209
+ "Intent signature mismatch.",
210
+ IntentErrorCode.SIGNATURE_MISMATCH,
211
+ )
212
+ return True
213
+
214
+
215
+ def make_scope(
216
+ *,
217
+ target_hash: str,
218
+ indicator_type: IndicatorType,
219
+ allowed_operations: Iterable[str],
220
+ forbidden_operations: Iterable[str] | None = None,
221
+ time_horizon_seconds: int = 300,
222
+ success_criteria: Iterable[str] | None = None,
223
+ ) -> IntentScope:
224
+ merged_forbidden = tuple(
225
+ dedupe_preserve_order(
226
+ [
227
+ *(forbidden_operations or ()),
228
+ *DEFAULT_FORBIDDEN_OPERATIONS,
229
+ ]
230
+ )
231
+ )
232
+
233
+ scope = IntentScope(
234
+ target_hash=target_hash,
235
+ indicator_type=indicator_type,
236
+ allowed_operations=tuple(dedupe_preserve_order(allowed_operations)),
237
+ forbidden_operations=merged_forbidden,
238
+ time_horizon_seconds=time_horizon_seconds,
239
+ success_criteria=tuple(success_criteria or ()),
240
+ )
241
+ validate_scope_or_raise(scope)
242
+ return scope
243
+
244
+
245
+ def create_intent_packet(
246
+ *,
247
+ action: IntentAction,
248
+ purpose: str,
249
+ scope: IntentScope,
250
+ requested_modules: Iterable[str],
251
+ expected_side_effects: Iterable[str] | None = None,
252
+ rollback_strategy: RollbackStrategy = "observe_only",
253
+ risk_label: RiskLabel = "low",
254
+ manifest_hash: str,
255
+ intent_id: str | None = None,
256
+ created_at: str | None = None,
257
+ sign: bool = True,
258
+ signing_secret: str | None = None,
259
+ ) -> IntentPacket:
260
+ packet = IntentPacket(
261
+ intent_id=intent_id or f"intent_{uuid.uuid4().hex}",
262
+ action=action,
263
+ purpose=purpose.strip(),
264
+ scope=scope,
265
+ requested_modules=tuple(dedupe_preserve_order(requested_modules)),
266
+ expected_side_effects=tuple(expected_side_effects or ()),
267
+ rollback_strategy=rollback_strategy,
268
+ risk_label=risk_label,
269
+ manifest_hash=manifest_hash,
270
+ created_at=created_at or now_utc(),
271
+ )
272
+
273
+ validate_intent_or_raise(packet)
274
+
275
+ if sign:
276
+ packet = sign_intent(packet, secret=signing_secret)
277
+ verify_intent_signature(packet, secret=signing_secret)
278
+
279
+ return packet
280
+
281
+
282
+ def validate_scope(scope: IntentScope) -> IntentValidationResult:
283
+ errors: list[str] = []
284
+ codes: list[IntentErrorCode] = []
285
+
286
+ if not scope.target_hash:
287
+ errors.append("scope.target_hash is required.")
288
+ codes.append(IntentErrorCode.MISSING_FIELD)
289
+ elif not HASH_RE.fullmatch(scope.target_hash):
290
+ errors.append("scope.target_hash must look like a cryptographic hash.")
291
+ codes.append(IntentErrorCode.INVALID_SCOPE)
292
+
293
+ if scope.indicator_type not in {"domain", "username", "email", "ip", "url", "unknown"}:
294
+ errors.append("scope.indicator_type is invalid.")
295
+ codes.append(IntentErrorCode.INVALID_SCOPE)
296
+
297
+ if not scope.allowed_operations:
298
+ errors.append("scope.allowed_operations must not be empty.")
299
+ codes.append(IntentErrorCode.MISSING_FIELD)
300
+
301
+ if scope.time_horizon_seconds <= 0 or scope.time_horizon_seconds > 86_400:
302
+ errors.append("scope.time_horizon_seconds must be between 1 and 86400.")
303
+ codes.append(IntentErrorCode.INVALID_SCOPE)
304
+
305
+ overlap = set(scope.allowed_operations).intersection(scope.forbidden_operations)
306
+ if overlap:
307
+ errors.append(f"Allowed operations include forbidden operation(s): {sorted(overlap)}")
308
+ codes.append(IntentErrorCode.FORBIDDEN_OPERATION_REQUESTED)
309
+
310
+ return IntentValidationResult(ok=not errors, errors=tuple(errors), error_codes=tuple(codes))
311
+
312
+
313
+ def validate_scope_or_raise(scope: IntentScope) -> None:
314
+ result = validate_scope(scope)
315
+ if not result.ok:
316
+ raise IntentValidationError(result.errors[0], result.error_codes[0])
317
+
318
+
319
+ def validate_intent(packet: IntentPacket) -> IntentValidationResult:
320
+ errors: list[str] = []
321
+ codes: list[IntentErrorCode] = []
322
+
323
+ if packet.action not in VALID_ACTIONS:
324
+ errors.append(f"Invalid action: {packet.action}")
325
+ codes.append(IntentErrorCode.INVALID_ACTION)
326
+
327
+ if not packet.purpose:
328
+ errors.append("purpose is required.")
329
+ codes.append(IntentErrorCode.MISSING_FIELD)
330
+
331
+ if packet.risk_label not in VALID_RISKS:
332
+ errors.append(f"Invalid risk label: {packet.risk_label}")
333
+ codes.append(IntentErrorCode.INVALID_RISK)
334
+
335
+ if packet.rollback_strategy not in VALID_ROLLBACK_STRATEGIES:
336
+ errors.append(f"Invalid rollback strategy: {packet.rollback_strategy}")
337
+ codes.append(IntentErrorCode.INVALID_ROLLBACK)
338
+
339
+ if not packet.manifest_hash or not HASH_RE.fullmatch(packet.manifest_hash):
340
+ errors.append("manifest_hash must look like a cryptographic hash.")
341
+ codes.append(IntentErrorCode.MISSING_FIELD)
342
+
343
+ scope_result = validate_scope(packet.scope)
344
+ errors.extend(scope_result.errors)
345
+ codes.extend(scope_result.error_codes)
346
+
347
+ raw_leak_paths = find_raw_indicator_fields(packet.to_dict())
348
+ if raw_leak_paths:
349
+ errors.append(f"Raw indicator-like field(s) are not allowed in intent packet: {raw_leak_paths}")
350
+ codes.append(IntentErrorCode.RAW_INDICATOR_LEAK)
351
+
352
+ return IntentValidationResult(ok=not errors, errors=tuple(errors), error_codes=tuple(codes))
353
+
354
+
355
+ def validate_intent_or_raise(packet: IntentPacket) -> None:
356
+ result = validate_intent(packet)
357
+ if not result.ok:
358
+ raise IntentValidationError(result.errors[0], result.error_codes[0])
359
+
360
+
361
+ def find_raw_indicator_fields(value: Any, path: str = "") -> list[str]:
362
+ findings: list[str] = []
363
+
364
+ if isinstance(value, Mapping):
365
+ for key, child in value.items():
366
+ key_str = str(key)
367
+ child_path = f"{path}.{key_str}" if path else key_str
368
+ if key_str.lower() in RAW_FIELD_NAMES:
369
+ findings.append(child_path)
370
+ findings.extend(find_raw_indicator_fields(child, child_path))
371
+
372
+ elif isinstance(value, (list, tuple)):
373
+ for index, child in enumerate(value):
374
+ findings.extend(find_raw_indicator_fields(child, f"{path}[{index}]"))
375
+
376
+ return findings
377
+
378
+
379
+ def intent_fingerprint(packet: IntentPacket) -> str:
380
+ return hashlib.sha256(packet.to_json(include_signature=True).encode("utf-8")).hexdigest()
381
+
382
+
383
+ def unsigned_intent_fingerprint(packet: IntentPacket) -> str:
384
+ return hashlib.sha256(packet.to_json(include_signature=False).encode("utf-8")).hexdigest()
385
+
386
+
387
+ def risk_score(risk_label: RiskLabel) -> float:
388
+ return {
389
+ "low": 0.25,
390
+ "medium": 0.5,
391
+ "high": 0.75,
392
+ "critical": 1.0,
393
+ }[risk_label]
394
+
395
+
396
+ def default_rollback_for_risk(risk_label: RiskLabel) -> RollbackStrategy:
397
+ if risk_label == "low":
398
+ return "observe_only"
399
+ if risk_label == "medium":
400
+ return "disable_module"
401
+ if risk_label == "high":
402
+ return "sandbox"
403
+ return "revert"
404
+
405
+
406
+ def derive_risk_label(
407
+ *,
408
+ requested_modules: Iterable[str],
409
+ authorized_target: bool,
410
+ contains_conditional_operation: bool = False,
411
+ ) -> RiskLabel:
412
+ modules = {str(module).strip().lower().replace(" ", "_") for module in requested_modules}
413
+
414
+ forbidden = {
415
+ "port_scan",
416
+ "nmap",
417
+ "masscan",
418
+ "brute_force",
419
+ "credential_testing",
420
+ "exploitation",
421
+ }
422
+ conditional = {
423
+ "http_headers",
424
+ "robots_txt",
425
+ "screenshot",
426
+ "ssl_labs_ssl_test",
427
+ }
428
+
429
+ if modules.intersection(forbidden):
430
+ return "critical"
431
+
432
+ if contains_conditional_operation or modules.intersection(conditional):
433
+ return "medium" if authorized_target else "high"
434
+
435
+ return "low"
436
+
437
+
438
+ def dedupe_preserve_order(values: Iterable[str]) -> list[str]:
439
+ seen: set[str] = set()
440
+ output: list[str] = []
441
+ for value in values:
442
+ normalized = str(value).strip()
443
+ if not normalized:
444
+ continue
445
+ if normalized not in seen:
446
+ output.append(normalized)
447
+ seen.add(normalized)
448
+ return output
osint_core/observer.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ osint_core.observer
3
+ ===================
4
+
5
+ Independent observer circuit for the Enterprise Drift-Aware OSINT Control Fabric.
6
+ The observer does not execute. It reconstructs expected behavior from intent,
7
+ policy, and executor trace, then emits dissent when reality does not match
8
+ declared constraints.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from typing import Any, Mapping
16
+
17
+
18
+ class ObserverSeverity(str, Enum):
19
+ INFO = "info"
20
+ WARNING = "warning"
21
+ CRITICAL = "critical"
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class ObserverCheck:
26
+ name: str
27
+ ok: bool
28
+ severity: ObserverSeverity
29
+ reason: str
30
+ evidence: dict[str, Any] = field(default_factory=dict)
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class ExecutionTrace:
35
+ intent_id: str
36
+ modules_requested: tuple[str, ...]
37
+ modules_executed: tuple[str, ...]
38
+ modules_blocked: tuple[str, ...]
39
+ observed_effects: tuple[str, ...]
40
+ output_schema_valid: bool
41
+ audit_payload: Mapping[str, Any]
42
+ errors: tuple[str, ...] = field(default_factory=tuple)
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class ObserverAssessment:
47
+ intent_id: str
48
+ checks: tuple[ObserverCheck, ...]
49
+
50
+ @property
51
+ def dissent(self) -> bool:
52
+ return any(not check.ok for check in self.checks)
53
+
54
+ @property
55
+ def has_critical_violation(self) -> bool:
56
+ return any((not check.ok) and check.severity == ObserverSeverity.CRITICAL for check in self.checks)
57
+
58
+
59
+ RAW_AUDIT_KEYS = {
60
+ "raw_indicator",
61
+ "raw_input",
62
+ "indicator",
63
+ "email",
64
+ "domain",
65
+ "username",
66
+ "url",
67
+ "ip",
68
+ }
69
+
70
+
71
+ def observe_execution(intent: IntentPacket, trace: ExecutionTrace, policy_result: PolicyEvaluation) -> ObserverAssessment:
72
+ checks = (
73
+ check_intent_trace_match(intent, trace),
74
+ check_modules_match_policy(trace, policy_result),
75
+ check_output_schema(trace),
76
+ check_no_raw_indicator_leak(trace),
77
+ check_expected_side_effects(intent, trace),
78
+ )
79
+ return ObserverAssessment(intent_id=trace.intent_id, checks=checks)
80
+
81
+
82
+ def check_intent_trace_match(intent: Any, trace: ExecutionTrace) -> ObserverCheck:
83
+ expected_intent_id = getattr(intent, "intent_id", None)
84
+ ok = expected_intent_id == trace.intent_id
85
+ return ObserverCheck(
86
+ name="intent_trace_match",
87
+ ok=ok,
88
+ severity=ObserverSeverity.CRITICAL,
89
+ reason="Execution trace must correspond to the intent packet.",
90
+ evidence={"expected": expected_intent_id, "actual": trace.intent_id},
91
+ )
92
+
93
+
94
+ def check_modules_match_policy(trace: ExecutionTrace, policy_result: Any) -> ObserverCheck:
95
+ if isinstance(policy_result, dict):
96
+ allowed = set(policy_result.get("allowed_modules", []))
97
+ else:
98
+ allowed = set(getattr(policy_result, "allowed_modules", []))
99
+
100
+ executed = set(trace.modules_executed)
101
+ unexpected = sorted(executed - allowed)
102
+ return ObserverCheck(
103
+ name="modules_match_policy",
104
+ ok=not unexpected,
105
+ severity=ObserverSeverity.CRITICAL,
106
+ reason="Executed modules must be allowed by policy.",
107
+ evidence={"unexpected_modules": unexpected},
108
+ )
109
+
110
+
111
+ def check_output_schema(trace: ExecutionTrace) -> ObserverCheck:
112
+ return ObserverCheck(
113
+ name="output_schema_valid",
114
+ ok=trace.output_schema_valid,
115
+ severity=ObserverSeverity.WARNING,
116
+ reason="Executor output should conform to expected schema.",
117
+ evidence={},
118
+ )
119
+
120
+
121
+ def check_no_raw_indicator_leak(trace: ExecutionTrace) -> ObserverCheck:
122
+ present = sorted(set(trace.audit_payload.keys()).intersection(RAW_AUDIT_KEYS))
123
+ return ObserverCheck(
124
+ name="no_raw_indicator_leak",
125
+ ok=not present,
126
+ severity=ObserverSeverity.CRITICAL,
127
+ reason="Audit payload must not contain raw indicator fields.",
128
+ evidence={"raw_fields": present},
129
+ )
130
+
131
+
132
+ def check_expected_side_effects(intent: Any, trace: ExecutionTrace) -> ObserverCheck:
133
+ expected = set(getattr(intent, "expected_side_effects", ()))
134
+ observed = set(trace.observed_effects)
135
+ missing = sorted(expected - observed)
136
+ return ObserverCheck(
137
+ name="expected_side_effects_present",
138
+ ok=not missing,
139
+ severity=ObserverSeverity.WARNING,
140
+ reason="Declared expected side effects should be observed or explained.",
141
+ evidence={"missing_effects": missing},
142
+ )
osint_core/orchestrator.py ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ osint_core.orchestrator
3
+ =======================
4
+
5
+ Orchestrator agent for coordinating passive OSINT enrichment workflows.
6
+
7
+ Design principles:
8
+ - Coordinates validation → policy → enrichment → drift → audit pipeline
9
+ - Manages skills (capabilities) and tools (external actions)
10
+ - Maintains execution context and telemetry
11
+ - Enforces security boundaries at each stage
12
+ - Pure orchestration — does not implement enrichment logic directly
13
+
14
+ The orchestrator pattern:
15
+ 1. Accept user request (indicator + modules + authorization)
16
+ 2. Validate input (osint_core.validators)
17
+ 3. Evaluate policy (osint_core.policy)
18
+ 4. Execute allowed modules via skills
19
+ 5. Detect drift (osint_core.drift when implemented)
20
+ 6. Choose correction verb
21
+ 7. Generate audit trail
22
+ 8. Return structured result
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import subprocess
28
+ import time
29
+ import uuid
30
+ from dataclasses import dataclass, field
31
+ from datetime import datetime, timezone
32
+ from enum import Enum
33
+ from typing import Any, Callable, Literal
34
+
35
+ from .policy import (
36
+ PolicyEvaluation,
37
+ evaluate_modules,
38
+ enforce_correction_verb,
39
+ )
40
+ from .validators import (
41
+ IndicatorType,
42
+ ValidationResult,
43
+ validate_indicator,
44
+ )
45
+
46
+
47
+ # =============================================================================
48
+ # Agent data structures
49
+ # =============================================================================
50
+
51
+ AgentRole = Literal["orchestrator", "validator", "enricher", "analyst"]
52
+ SkillCategory = Literal["validation", "passive_lookup", "conditional_fetch", "analysis"]
53
+ ToolType = Literal["subprocess", "network", "file", "computation"]
54
+
55
+
56
+ class ExecutionStatus(str, Enum):
57
+ PENDING = "pending"
58
+ RUNNING = "running"
59
+ COMPLETED = "completed"
60
+ FAILED = "failed"
61
+ BLOCKED = "blocked"
62
+
63
+
64
+ @dataclass(frozen=True)
65
+ class Tool:
66
+ """
67
+ A tool is an atomic capability that performs external actions.
68
+
69
+ Examples: DNS query, whois lookup, HTTP request, file parsing
70
+ """
71
+ name: str
72
+ tool_type: ToolType
73
+ description: str
74
+ requires_authorization: bool = False
75
+ timeout_seconds: float = 5.0
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class Skill:
80
+ """
81
+ A skill is a higher-level capability composed of tools.
82
+
83
+ Examples: "Resolve DNS", "Fetch WHOIS", "Parse URL"
84
+ """
85
+ name: str
86
+ category: SkillCategory
87
+ description: str
88
+ canonical_name: str
89
+ required_indicator_types: list[IndicatorType]
90
+ tools: list[Tool]
91
+ requires_authorization: bool = False
92
+
93
+
94
+ @dataclass
95
+ class ExecutionContext:
96
+ """
97
+ Execution context tracks the state of an enrichment workflow.
98
+ """
99
+ run_id: str
100
+ started_at: str
101
+ indicator_type: IndicatorType
102
+ normalized_indicator: str
103
+ indicator_hash: str
104
+ requested_modules: list[str]
105
+ authorized_target: bool
106
+ passive_only: bool
107
+ policy_evaluation: PolicyEvaluation | None = None
108
+ telemetry: dict[str, Any] = field(default_factory=dict)
109
+ errors: list[str] = field(default_factory=list)
110
+
111
+
112
+ @dataclass
113
+ class SkillResult:
114
+ """
115
+ Result from executing a skill.
116
+ """
117
+ skill_name: str
118
+ status: ExecutionStatus
119
+ data: dict[str, Any] = field(default_factory=dict)
120
+ error: str | None = None
121
+ duration_ms: int = 0
122
+
123
+
124
+ @dataclass
125
+ class EnrichmentWorkflow:
126
+ """
127
+ Complete enrichment workflow result.
128
+ """
129
+ context: ExecutionContext
130
+ validation_result: ValidationResult
131
+ policy_evaluation: PolicyEvaluation
132
+ skill_results: list[SkillResult]
133
+ drift_vector: dict[str, float]
134
+ correction_verb: str
135
+ duration_ms: int
136
+
137
+
138
+ # =============================================================================
139
+ # Tool implementations
140
+ # =============================================================================
141
+
142
+ # DNS resolution tool
143
+ DNS_QUERY_TOOL = Tool(
144
+ name="dns_query",
145
+ tool_type="network",
146
+ description="Query DNS records using system resolver",
147
+ requires_authorization=False,
148
+ timeout_seconds=4.0,
149
+ )
150
+
151
+ # WHOIS lookup tool
152
+ WHOIS_TOOL = Tool(
153
+ name="whois",
154
+ tool_type="subprocess",
155
+ description="Perform WHOIS lookup via system command",
156
+ requires_authorization=False,
157
+ timeout_seconds=5.0,
158
+ )
159
+
160
+ # URL parser tool (local, no network)
161
+ URL_PARSE_TOOL = Tool(
162
+ name="url_parse",
163
+ tool_type="computation",
164
+ description="Parse URL components locally",
165
+ requires_authorization=False,
166
+ timeout_seconds=1.0,
167
+ )
168
+
169
+ # HTTP header fetcher (conditional, requires auth)
170
+ HTTP_HEADERS_TOOL = Tool(
171
+ name="http_headers",
172
+ tool_type="network",
173
+ description="Fetch HTTP headers from target",
174
+ requires_authorization=True,
175
+ timeout_seconds=5.0,
176
+ )
177
+
178
+ # Robots.txt fetcher (conditional, requires auth)
179
+ ROBOTS_TXT_TOOL = Tool(
180
+ name="robots_txt",
181
+ tool_type="network",
182
+ description="Fetch robots.txt from target",
183
+ requires_authorization=True,
184
+ timeout_seconds=5.0,
185
+ )
186
+
187
+
188
+ # =============================================================================
189
+ # Skill definitions
190
+ # =============================================================================
191
+
192
+ SKILLS_REGISTRY: dict[str, Skill] = {
193
+ "resource_links": Skill(
194
+ name="Resource Links",
195
+ canonical_name="resource_links",
196
+ category="passive_lookup",
197
+ description="Generate links to external OSINT resources",
198
+ required_indicator_types=["domain", "username", "email", "ip", "url"],
199
+ tools=[], # No external tools needed
200
+ requires_authorization=False,
201
+ ),
202
+ "dns_records": Skill(
203
+ name="DNS Records",
204
+ canonical_name="dns_records",
205
+ category="passive_lookup",
206
+ description="Resolve DNS A, AAAA, MX, NS records",
207
+ required_indicator_types=["domain"],
208
+ tools=[DNS_QUERY_TOOL],
209
+ requires_authorization=False,
210
+ ),
211
+ "local_url_parse": Skill(
212
+ name="Local URL Parse",
213
+ canonical_name="local_url_parse",
214
+ category="analysis",
215
+ description="Parse URL components without contacting target",
216
+ required_indicator_types=["url"],
217
+ tools=[URL_PARSE_TOOL],
218
+ requires_authorization=False,
219
+ ),
220
+ "http_headers": Skill(
221
+ name="HTTP Headers",
222
+ canonical_name="http_headers",
223
+ category="conditional_fetch",
224
+ description="Fetch HTTP headers from authorized target",
225
+ required_indicator_types=["url", "domain"],
226
+ tools=[HTTP_HEADERS_TOOL],
227
+ requires_authorization=True,
228
+ ),
229
+ "robots_txt": Skill(
230
+ name="Robots.txt",
231
+ canonical_name="robots_txt",
232
+ category="conditional_fetch",
233
+ description="Fetch robots.txt from authorized target",
234
+ required_indicator_types=["url", "domain"],
235
+ tools=[ROBOTS_TXT_TOOL],
236
+ requires_authorization=True,
237
+ ),
238
+ }
239
+
240
+
241
+ # =============================================================================
242
+ # Orchestrator agent
243
+ # =============================================================================
244
+
245
+ class OrchestratorAgent:
246
+ """
247
+ Orchestrator agent coordinates the full enrichment workflow.
248
+
249
+ Responsibilities:
250
+ - Create execution context
251
+ - Route requests through validation → policy → enrichment
252
+ - Execute skills based on policy decisions
253
+ - Aggregate results
254
+ - Generate telemetry
255
+ """
256
+
257
+ def __init__(self, role: AgentRole = "orchestrator"):
258
+ self.role = role
259
+ self.skills = SKILLS_REGISTRY
260
+
261
+ def create_context(
262
+ self,
263
+ raw_indicator: str,
264
+ indicator_type_hint: str,
265
+ requested_modules: list[str],
266
+ authorized_target: bool,
267
+ passive_only: bool = True,
268
+ ) -> ExecutionContext:
269
+ """
270
+ Create execution context for a new enrichment request.
271
+ """
272
+ run_id = f"run_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
273
+ started_at = datetime.now(timezone.utc).isoformat()
274
+
275
+ # Validate indicator first
276
+ validation_result = validate_indicator(
277
+ raw_indicator,
278
+ forced_type=indicator_type_hint,
279
+ )
280
+
281
+ if not validation_result.ok:
282
+ # Create a minimal context for failed validation
283
+ return ExecutionContext(
284
+ run_id=run_id,
285
+ started_at=started_at,
286
+ indicator_type="unknown",
287
+ normalized_indicator="",
288
+ indicator_hash="",
289
+ requested_modules=requested_modules,
290
+ authorized_target=authorized_target,
291
+ passive_only=passive_only,
292
+ errors=[validation_result.error or "Validation failed"],
293
+ )
294
+
295
+ # For successful validation, hash the indicator
296
+ import hashlib
297
+ import hmac
298
+ import os
299
+
300
+ salt = os.getenv("OSINT_HASH_SALT", "dev-only-change-me")
301
+ indicator_hash = hmac.new(
302
+ salt.encode("utf-8"),
303
+ validation_result.normalized.encode("utf-8"),
304
+ hashlib.sha256,
305
+ ).hexdigest()
306
+
307
+ return ExecutionContext(
308
+ run_id=run_id,
309
+ started_at=started_at,
310
+ indicator_type=validation_result.indicator_type,
311
+ normalized_indicator=validation_result.normalized,
312
+ indicator_hash=indicator_hash,
313
+ requested_modules=requested_modules,
314
+ authorized_target=authorized_target,
315
+ passive_only=passive_only,
316
+ )
317
+
318
+ def execute_workflow(
319
+ self,
320
+ raw_indicator: str,
321
+ indicator_type_hint: str = "Auto",
322
+ requested_modules: list[str] | None = None,
323
+ authorized_target: bool = False,
324
+ passive_only: bool = True,
325
+ ) -> EnrichmentWorkflow:
326
+ """
327
+ Execute complete enrichment workflow.
328
+
329
+ Returns a structured workflow result containing:
330
+ - Execution context
331
+ - Validation result
332
+ - Policy evaluation
333
+ - Skill results
334
+ - Drift assessment
335
+ - Correction decision
336
+ """
337
+ started = time.perf_counter()
338
+
339
+ requested_modules = requested_modules or ["resource_links"]
340
+
341
+ # Step 1: Validate input
342
+ validation_result = validate_indicator(
343
+ raw_indicator,
344
+ forced_type=indicator_type_hint,
345
+ )
346
+
347
+ if not validation_result.ok:
348
+ # Early exit for validation failure
349
+ context = self.create_context(
350
+ raw_indicator,
351
+ indicator_type_hint,
352
+ requested_modules,
353
+ authorized_target,
354
+ passive_only,
355
+ )
356
+ return EnrichmentWorkflow(
357
+ context=context,
358
+ validation_result=validation_result,
359
+ policy_evaluation=PolicyEvaluation(
360
+ decision="BLOCK", # type: ignore
361
+ blocked_modules=requested_modules,
362
+ violations=[],
363
+ ),
364
+ skill_results=[],
365
+ drift_vector={},
366
+ correction_verb="REVERT",
367
+ duration_ms=int((time.perf_counter() - started) * 1000),
368
+ )
369
+
370
+ # Step 2: Create execution context
371
+ context = self.create_context(
372
+ raw_indicator,
373
+ indicator_type_hint,
374
+ requested_modules,
375
+ authorized_target,
376
+ passive_only,
377
+ )
378
+
379
+ # Step 3: Evaluate policy
380
+ policy_eval = evaluate_modules(
381
+ requested_modules,
382
+ authorized_target=authorized_target,
383
+ passive_only=passive_only,
384
+ allow_unknown_modules=False,
385
+ )
386
+ context.policy_evaluation = policy_eval
387
+
388
+ # Step 4: Execute allowed skills
389
+ skill_results = self._execute_skills(
390
+ context,
391
+ policy_eval.allowed_modules,
392
+ )
393
+
394
+ # Step 5: Detect drift
395
+ drift_vector = self._detect_drift(
396
+ context,
397
+ skill_results,
398
+ policy_eval,
399
+ )
400
+
401
+ # Step 6: Choose correction verb
402
+ correction_verb = self._choose_correction(drift_vector, policy_eval)
403
+
404
+ duration_ms = int((time.perf_counter() - started) * 1000)
405
+ # Ensure we always return at least 1ms to indicate actual work was done
406
+ if duration_ms == 0:
407
+ duration_ms = 1
408
+
409
+ return EnrichmentWorkflow(
410
+ context=context,
411
+ validation_result=validation_result,
412
+ policy_evaluation=policy_eval,
413
+ skill_results=skill_results,
414
+ drift_vector=drift_vector,
415
+ correction_verb=correction_verb,
416
+ duration_ms=duration_ms,
417
+ )
418
+
419
+ def _execute_skills(
420
+ self,
421
+ context: ExecutionContext,
422
+ allowed_modules: list[str],
423
+ ) -> list[SkillResult]:
424
+ """
425
+ Execute allowed skills based on policy evaluation.
426
+ """
427
+ results: list[SkillResult] = []
428
+
429
+ for module_name in allowed_modules:
430
+ skill = self.skills.get(module_name)
431
+ if not skill:
432
+ results.append(SkillResult(
433
+ skill_name=module_name,
434
+ status=ExecutionStatus.FAILED,
435
+ error=f"Skill not found: {module_name}",
436
+ ))
437
+ continue
438
+
439
+ # Check if indicator type is supported by this skill
440
+ if skill.required_indicator_types and context.indicator_type not in skill.required_indicator_types:
441
+ results.append(SkillResult(
442
+ skill_name=skill.name,
443
+ status=ExecutionStatus.BLOCKED,
444
+ error=f"Skill {skill.name} requires indicator type in {skill.required_indicator_types}, got {context.indicator_type}",
445
+ ))
446
+ continue
447
+
448
+ # Execute skill
449
+ result = self._execute_skill(skill, context)
450
+ results.append(result)
451
+
452
+ return results
453
+
454
+ def _execute_skill(
455
+ self,
456
+ skill: Skill,
457
+ context: ExecutionContext,
458
+ ) -> SkillResult:
459
+ """
460
+ Execute a single skill.
461
+
462
+ For now, this is a stub that returns placeholder data.
463
+ In production, this would invoke the skill's tools.
464
+ """
465
+ started = time.perf_counter()
466
+
467
+ try:
468
+ # Placeholder: skill execution logic would go here
469
+ # Each skill would use its tools to perform enrichment
470
+
471
+ if skill.canonical_name == "resource_links":
472
+ data = {"type": "links", "generated": True}
473
+ elif skill.canonical_name == "dns_records":
474
+ data = {"A": [], "AAAA": [], "MX": [], "NS": []}
475
+ elif skill.canonical_name == "local_url_parse":
476
+ data = {"scheme": "", "hostname": "", "path": ""}
477
+ else:
478
+ data = {"status": "not_implemented"}
479
+
480
+ duration_ms = int((time.perf_counter() - started) * 1000)
481
+
482
+ return SkillResult(
483
+ skill_name=skill.name,
484
+ status=ExecutionStatus.COMPLETED,
485
+ data=data,
486
+ duration_ms=duration_ms,
487
+ )
488
+
489
+ except Exception as exc:
490
+ duration_ms = int((time.perf_counter() - started) * 1000)
491
+ return SkillResult(
492
+ skill_name=skill.name,
493
+ status=ExecutionStatus.FAILED,
494
+ error=str(exc),
495
+ duration_ms=duration_ms,
496
+ )
497
+
498
+ def _detect_drift(
499
+ self,
500
+ context: ExecutionContext,
501
+ skill_results: list[SkillResult],
502
+ policy_eval: PolicyEvaluation,
503
+ ) -> dict[str, float]:
504
+ """
505
+ Detect drift from execution telemetry.
506
+
507
+ This is a simplified version. Full drift detection
508
+ would use osint_core.drift when implemented.
509
+ """
510
+ drift = {
511
+ "statistical": 0.0,
512
+ "behavioral": 0.0,
513
+ "structural": 0.0,
514
+ "adversarial": 0.0,
515
+ "operational": 0.0,
516
+ "policy": 0.0,
517
+ }
518
+
519
+ # Policy drift: blocked modules indicate policy boundary hit
520
+ if policy_eval.blocked_modules:
521
+ drift["policy"] = 0.4
522
+
523
+ # Operational drift: failed skills
524
+ failed_count = sum(1 for r in skill_results if r.status == ExecutionStatus.FAILED)
525
+ if failed_count > 0:
526
+ drift["operational"] = min(0.2 * failed_count, 1.0)
527
+
528
+ # Adversarial drift: check for suspicious patterns (stub)
529
+ # Full implementation would analyze normalized_indicator
530
+
531
+ return drift
532
+
533
+ def _choose_correction(
534
+ self,
535
+ drift_vector: dict[str, float],
536
+ policy_eval: PolicyEvaluation,
537
+ ) -> str:
538
+ """
539
+ Choose correction verb based on drift vector.
540
+
541
+ Priority: policy > structural > behavioral > adversarial > operational > statistical
542
+ """
543
+ if drift_vector.get("policy", 0.0) >= 0.4:
544
+ return "CONSTRAIN"
545
+
546
+ if drift_vector.get("structural", 0.0) >= 0.5:
547
+ return "REVERT"
548
+
549
+ if drift_vector.get("behavioral", 0.0) >= 0.5:
550
+ return "REVERT"
551
+
552
+ if drift_vector.get("adversarial", 0.0) >= 0.3:
553
+ return "CONSTRAIN"
554
+
555
+ if drift_vector.get("operational", 0.0) >= 0.4:
556
+ return "CONSTRAIN"
557
+
558
+ if drift_vector.get("statistical", 0.0) >= 0.5 and drift_vector.get("adversarial", 0.0) == 0:
559
+ return "ADAPT"
560
+
561
+ return "OBSERVE"
562
+
563
+
564
+ # =============================================================================
565
+ # Public API
566
+ # =============================================================================
567
+
568
+ def create_orchestrator() -> OrchestratorAgent:
569
+ """
570
+ Factory function to create an orchestrator agent.
571
+ """
572
+ return OrchestratorAgent(role="orchestrator")
573
+
574
+
575
+ def list_skills() -> dict[str, Skill]:
576
+ """
577
+ Return the skills registry.
578
+ """
579
+ return SKILLS_REGISTRY.copy()
580
+
581
+
582
+ def get_skill(skill_name: str) -> Skill | None:
583
+ """
584
+ Get a skill by canonical name.
585
+ """
586
+ return SKILLS_REGISTRY.get(skill_name)
osint_core/scheduler.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ osint_core.scheduler
3
+ ====================
4
+
5
+ Latency-conscious scheduler for the Enterprise Drift-Aware OSINT Control Fabric.
6
+
7
+ The scheduler allocates time, trust, and authority. It does not execute actions.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from enum import Enum
14
+ from typing import Literal
15
+
16
+
17
+ RiskLabel = Literal["low", "medium", "high", "critical"]
18
+ TrustState = Literal["normal", "degraded", "suspicious", "contested", "unsafe"]
19
+ RouteName = Literal["FAST", "DELIBERATIVE", "CONTAINMENT", "FAIL_CLOSED"]
20
+
21
+
22
+ class ScheduleReason(str, Enum):
23
+ FAST_PATH_AVAILABLE = "fast_path_available"
24
+ DELIBERATIVE_PATH_AVAILABLE = "deliberative_path_available"
25
+ DEADLINE_TOO_TIGHT = "deadline_too_tight"
26
+ TRUST_STATE_DEGRADED = "trust_state_degraded"
27
+ SHORTCUT_DEBT_TOO_HIGH = "shortcut_debt_too_high"
28
+ INVARIANT_VIOLATION = "invariant_violation"
29
+ NO_SAFE_ACTION_FITS = "no_safe_action_fits"
30
+ MISSING_ROLLBACK = "missing_rollback"
31
+ LOW_CONFIDENCE = "low_confidence"
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class DecisionPacket:
36
+ intent_id: str
37
+ action: str
38
+ risk_label: RiskLabel
39
+ confidence: float
40
+ reversibility: float
41
+ deadline_ms: int
42
+ verification_cost_ms: int
43
+ execution_cost_ms: int
44
+ rollback_cost_ms: int
45
+ expected_utility_decay: float
46
+ required_checks: tuple[str, ...]
47
+ rollback_plan: str
48
+ uncertainty_notes: tuple[str, ...] = field(default_factory=tuple)
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class ShortcutDebt:
53
+ reduced_checks: int = 0
54
+ cached_policy_uses: int = 0
55
+ skipped_deep_analysis: int = 0
56
+ emergency_overrides: int = 0
57
+
58
+ @property
59
+ def score(self) -> float:
60
+ return min(
61
+ 1.0,
62
+ (self.reduced_checks * 0.10)
63
+ + (self.cached_policy_uses * 0.05)
64
+ + (self.skipped_deep_analysis * 0.15)
65
+ + (self.emergency_overrides * 0.40),
66
+ )
67
+
68
+
69
+ @dataclass(frozen=True)
70
+ class SystemState:
71
+ trust_state: TrustState = "normal"
72
+ shortcut_debt: ShortcutDebt = field(default_factory=ShortcutDebt)
73
+ shortcut_debt_limit: float = 0.70
74
+ queue_depth: int = 0
75
+ hardware_state: TrustState = "normal"
76
+
77
+
78
+ @dataclass(frozen=True)
79
+ class ScheduleDecision:
80
+ route: RouteName
81
+ reason: ScheduleReason
82
+ allowed: bool
83
+ authority_scale: float
84
+ required_checks: tuple[str, ...]
85
+ skipped_checks: tuple[str, ...]
86
+ notes: tuple[str, ...] = field(default_factory=tuple)
87
+
88
+
89
+ INVARIANT_CHECKS: tuple[str, ...] = (
90
+ "hash_salt_present",
91
+ "intent_signature_verified",
92
+ "scope_validated",
93
+ "policy_evaluated",
94
+ "forbidden_modules_blocked",
95
+ "raw_indicators_excluded",
96
+ "conditional_modules_authorized",
97
+ )
98
+
99
+ ADAPTIVE_CHECKS: tuple[str, ...] = (
100
+ "deep_log_correlation",
101
+ "long_horizon_analysis",
102
+ "full_counterfactual_simulation",
103
+ "secondary_model_review",
104
+ )
105
+
106
+
107
+ def total_required_time_ms(packet: DecisionPacket) -> int:
108
+ return packet.verification_cost_ms + packet.execution_cost_ms + packet.rollback_cost_ms
109
+
110
+
111
+ def fits_deadline(packet: DecisionPacket) -> bool:
112
+ return total_required_time_ms(packet) <= packet.deadline_ms
113
+
114
+
115
+ def has_required_rollback(packet: DecisionPacket) -> bool:
116
+ if packet.risk_label in {"high", "critical"}:
117
+ return bool(packet.rollback_plan and packet.rollback_cost_ms > 0)
118
+ return True
119
+
120
+
121
+ def invariant_violations(packet: DecisionPacket) -> tuple[str, ...]:
122
+ required = set(packet.required_checks)
123
+ return tuple(check for check in INVARIANT_CHECKS if check not in required)
124
+
125
+
126
+ def risk_weight(risk_label: RiskLabel) -> float:
127
+ RISK_WEIGHTS = {"low": 0.25, "medium": 0.50, "high": 0.75, "critical": 1.00}
128
+
129
+ def risk_weight(risk_label: RiskLabel) -> float:
130
+ return RISK_WEIGHTS.get(risk_label, 1.00)
131
+
132
+
133
+ def safe_utility(packet: DecisionPacket) -> float:
134
+ time_ratio = min(1.0, total_required_time_ms(packet) / max(packet.deadline_ms, 1))
135
+ return max(
136
+ 0.0,
137
+ (packet.confidence * 0.40)
138
+ + (packet.reversibility * 0.30)
139
+ + ((1.0 - risk_weight(packet.risk_label)) * 0.20)
140
+ + ((1.0 - time_ratio) * 0.10)
141
+ - (packet.expected_utility_decay * 0.10),
142
+ )
143
+
144
+
145
+ def schedule_decision(packet: DecisionPacket, state: SystemState | None = None) -> ScheduleDecision:
146
+ state = state or SystemState()
147
+
148
+ missing_invariants = invariant_violations(packet)
149
+ if missing_invariants:
150
+ return ScheduleDecision(
151
+ route="FAIL_CLOSED",
152
+ reason=ScheduleReason.INVARIANT_VIOLATION,
153
+ allowed=False,
154
+ authority_scale=0.0,
155
+ required_checks=tuple(packet.required_checks),
156
+ skipped_checks=missing_invariants,
157
+ notes=("Invariant checks cannot be skipped under deadline pressure.",),
158
+ )
159
+
160
+ if not has_required_rollback(packet):
161
+ return ScheduleDecision(
162
+ route="FAIL_CLOSED",
163
+ reason=ScheduleReason.MISSING_ROLLBACK,
164
+ allowed=False,
165
+ authority_scale=0.0,
166
+ required_checks=tuple(packet.required_checks),
167
+ skipped_checks=(),
168
+ notes=("High-impact action requires rollback or containment plan.",),
169
+ )
170
+
171
+ if state.shortcut_debt.score >= state.shortcut_debt_limit:
172
+ return containment_decision(packet, ScheduleReason.SHORTCUT_DEBT_TOO_HIGH, "Shortcut debt exceeded configured limit.")
173
+
174
+ if state.trust_state in {"contested", "unsafe"} or state.hardware_state in {"contested", "unsafe"}:
175
+ return containment_decision(packet, ScheduleReason.TRUST_STATE_DEGRADED, "Trust or hardware state is contested/unsafe.")
176
+
177
+ if packet.confidence < 0.30 and packet.risk_label in {"high", "critical"}:
178
+ return containment_decision(packet, ScheduleReason.LOW_CONFIDENCE, "Confidence too low for high-impact decision.")
179
+
180
+ if fits_deadline(packet):
181
+ if packet.risk_label in {"low", "medium"} and packet.reversibility >= 0.50:
182
+ return ScheduleDecision(
183
+ route="FAST",
184
+ reason=ScheduleReason.FAST_PATH_AVAILABLE,
185
+ allowed=True,
186
+ authority_scale=1.0,
187
+ required_checks=tuple(packet.required_checks),
188
+ skipped_checks=(),
189
+ notes=("Low/medium risk action fits available decision window.",),
190
+ )
191
+
192
+ return ScheduleDecision(
193
+ route="DELIBERATIVE",
194
+ reason=ScheduleReason.DELIBERATIVE_PATH_AVAILABLE,
195
+ allowed=True,
196
+ authority_scale=0.75,
197
+ required_checks=tuple(packet.required_checks),
198
+ skipped_checks=(),
199
+ notes=("High-impact or lower-reversibility action fits full verification window.",),
200
+ )
201
+
202
+ if packet.reversibility >= 0.75:
203
+ return containment_decision(packet, ScheduleReason.DEADLINE_TOO_TIGHT, "Full verification/execution/rollback does not fit deadline.")
204
+
205
+ return ScheduleDecision(
206
+ route="FAIL_CLOSED",
207
+ reason=ScheduleReason.NO_SAFE_ACTION_FITS,
208
+ allowed=False,
209
+ authority_scale=0.0,
210
+ required_checks=tuple(packet.required_checks),
211
+ skipped_checks=(),
212
+ notes=("No safe action fits inside the useful decision window.",),
213
+ )
214
+
215
+
216
+ def containment_decision(packet: DecisionPacket, reason: ScheduleReason, note: str) -> ScheduleDecision:
217
+ skipped = tuple(check for check in ADAPTIVE_CHECKS if check in packet.required_checks)
218
+ effective = tuple(check for check in packet.required_checks if check not in skipped)
219
+ return ScheduleDecision(
220
+ route="CONTAINMENT",
221
+ reason=reason,
222
+ allowed=True,
223
+ authority_scale=0.25,
224
+ required_checks=effective,
225
+ skipped_checks=skipped,
226
+ notes=(note, "Authority reduced; prefer reversible, bounded action."),
227
+ )