Spaces:

S-Dreamer
/

PassiveOSINTControlPanel

Running

App Files Files Community

S-Dreamer commited on 11 days ago

Commit

a71264b

verified ·

1 Parent(s): 45e50b1

Upload 8 files

Browse files

Files changed (5) hide show

osint_core/__init__.py +22 -0
osint_core/intent.py +448 -0
osint_core/observer.py +142 -0
osint_core/orchestrator.py +586 -0
osint_core/scheduler.py +227 -0

osint_core/__init__.py CHANGED Viewed

@@ -26,6 +26,18 @@ from .validators import (
     ValidationErrorCode,
 )
 # Future modules (to be added as you build them)
 # from .policy import enforce_policy
 # from .enrichment import run_passive_enrichment
@@ -39,6 +51,16 @@ __all__ = [
     "assert_valid_or_raise",
     "ValidationResult",
     "ValidationErrorCode",
 ]
 __version__ = "0.1.0"

     ValidationErrorCode,
 )
+from .orchestrator import (
+    create_orchestrator,
+    list_skills,
+    get_skill,
+    OrchestratorAgent,
+    EnrichmentWorkflow,
+    ExecutionContext,
+    Skill,
+    Tool,
+    SkillResult,
+)
 # Future modules (to be added as you build them)
 # from .policy import enforce_policy
 # from .enrichment import run_passive_enrichment
     "assert_valid_or_raise",
     "ValidationResult",
     "ValidationErrorCode",
+    # orchestrator
+    "create_orchestrator",
+    "list_skills",
+    "get_skill",
+    "OrchestratorAgent",
+    "EnrichmentWorkflow",
+    "ExecutionContext",
+    "Skill",
+    "Tool",
+    "SkillResult",
 ]
 __version__ = "0.1.0"

osint_core/intent.py ADDED Viewed

	@@ -0,0 +1,448 @@

+"""
+osint_core.intent
+=================
+Intent packet modeling for the Enterprise Drift-Aware OSINT Control Fabric.
+This module turns a validated user request into an explicit, bounded, signed
+intent packet. It does not execute actions, perform network calls, mutate
+policy, update memory, or write audit records.
+"""
+from __future__ import annotations
+import hashlib
+import hmac
+import json
+import os
+import re
+import uuid
+from dataclasses import asdict, dataclass, field, replace
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Iterable, Literal, Mapping
+IntentAction = Literal[
+    "enrich_indicator",
+    "generate_report",
+    "update_sources",
+    "run_module",
+    "export_audit",
+]
+RiskLabel = Literal["low", "medium", "high", "critical"]
+RollbackStrategy = Literal["none", "observe_only", "disable_module", "sandbox", "revert"]
+IndicatorType = Literal["domain", "username", "email", "ip", "url", "unknown"]
+class IntentErrorCode(str, Enum):
+    MISSING_FIELD = "missing_field"
+    INVALID_ACTION = "invalid_action"
+    INVALID_RISK = "invalid_risk"
+    INVALID_SCOPE = "invalid_scope"
+    INVALID_ROLLBACK = "invalid_rollback"
+    FORBIDDEN_OPERATION_REQUESTED = "forbidden_operation_requested"
+    RAW_INDICATOR_LEAK = "raw_indicator_leak"
+    UNSIGNED_PACKET = "unsigned_packet"
+    SIGNATURE_MISMATCH = "signature_mismatch"
+    SECRET_MISSING = "secret_missing"
+class IntentValidationError(ValueError):
+    def __init__(self, message: str, code: IntentErrorCode):
+        super().__init__(message)
+        self.code = code
+@dataclass(frozen=True)
+class IntentScope:
+    """Explicit scope boundary for an intent.
+    target_hash must be a hash/HMAC of the target. Raw indicators do not belong
+    in scope objects or intent packets.
+    """
+    target_hash: str
+    indicator_type: IndicatorType
+    allowed_operations: tuple[str, ...]
+    forbidden_operations: tuple[str, ...] = field(default_factory=tuple)
+    time_horizon_seconds: int = 300
+    success_criteria: tuple[str, ...] = field(default_factory=tuple)
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+@dataclass(frozen=True)
+class IntentPacket:
+    """Signed, immutable description of a requested action."""
+    intent_id: str
+    action: IntentAction
+    purpose: str
+    scope: IntentScope
+    requested_modules: tuple[str, ...]
+    expected_side_effects: tuple[str, ...]
+    rollback_strategy: RollbackStrategy
+    risk_label: RiskLabel
+    manifest_hash: str
+    created_at: str
+    schema_version: str = "1.0.0"
+    signature: str | None = None
+    def unsigned_payload(self) -> dict[str, Any]:
+        payload = asdict(self)
+        payload.pop("signature", None)
+        return payload
+    def to_dict(self, include_signature: bool = True) -> dict[str, Any]:
+        payload = asdict(self)
+        if not include_signature:
+            payload.pop("signature", None)
+        return payload
+    def to_json(self, include_signature: bool = True) -> str:
+        return canonical_json(self.to_dict(include_signature=include_signature))
+@dataclass(frozen=True)
+class IntentValidationResult:
+    ok: bool
+    errors: tuple[str, ...] = field(default_factory=tuple)
+    error_codes: tuple[IntentErrorCode, ...] = field(default_factory=tuple)
+VALID_ACTIONS: set[str] = {
+    "enrich_indicator",
+    "generate_report",
+    "update_sources",
+    "run_module",
+    "export_audit",
+}
+VALID_RISKS: set[str] = {"low", "medium", "high", "critical"}
+VALID_ROLLBACK_STRATEGIES: set[str] = {
+    "none",
+    "observe_only",
+    "disable_module",
+    "sandbox",
+    "revert",
+}
+DEFAULT_FORBIDDEN_OPERATIONS: tuple[str, ...] = (
+    "port_scan",
+    "mass_scan",
+    "brute_force",
+    "credential_testing",
+    "exploitation",
+    "directory_fuzzing",
+    "web_vulnerability_scan",
+    "password_spray",
+    "login_attempt",
+)
+RAW_FIELD_NAMES: set[str] = {
+    "raw_indicator",
+    "raw_input",
+    "indicator",
+    "target",
+    "domain",
+    "email",
+    "username",
+    "url",
+    "ip",
+}
+HASH_RE = re.compile(r"^[a-fA-F0-9]{32,128}$")
+def now_utc() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def canonical_json(value: Mapping[str, Any]) -> str:
+    """Return deterministic JSON for signing and hashing."""
+    return json.dumps(value, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
+def get_intent_signing_secret() -> str:
+    """Return signing secret.
+    Production should use INTENT_SIGNING_SECRET. Smaller deployments may fall
+    back to OSINT_HASH_SALT, but separating both is preferred.
+    """
+    secret = os.getenv("INTENT_SIGNING_SECRET") or os.getenv("OSINT_HASH_SALT")
+    if not secret:
+        raise IntentValidationError(
+            "Missing INTENT_SIGNING_SECRET or OSINT_HASH_SALT.",
+            IntentErrorCode.SECRET_MISSING,
+        )
+    return secret
+def hash_manifest_payload(payload: Mapping[str, Any]) -> str:
+    return hashlib.sha256(canonical_json(payload).encode("utf-8")).hexdigest()
+def sign_payload(payload: Mapping[str, Any], secret: str | None = None) -> str:
+    secret = secret or get_intent_signing_secret()
+    return hmac.new(
+        secret.encode("utf-8"),
+        canonical_json(payload).encode("utf-8"),
+        hashlib.sha256,
+    ).hexdigest()
+def sign_intent(packet: IntentPacket, secret: str | None = None) -> IntentPacket:
+    signature = sign_payload(packet.unsigned_payload(), secret=secret)
+    return replace(packet, signature=signature)
+def verify_intent_signature(packet: IntentPacket, secret: str | None = None) -> bool:
+    if not packet.signature:
+        raise IntentValidationError("Intent packet is unsigned.", IntentErrorCode.UNSIGNED_PACKET)
+    expected = sign_payload(packet.unsigned_payload(), secret=secret)
+    if not hmac.compare_digest(expected, packet.signature):
+        raise IntentValidationError(
+            "Intent signature mismatch.",
+            IntentErrorCode.SIGNATURE_MISMATCH,
+        )
+    return True
+def make_scope(
+    *,
+    target_hash: str,
+    indicator_type: IndicatorType,
+    allowed_operations: Iterable[str],
+    forbidden_operations: Iterable[str] | None = None,
+    time_horizon_seconds: int = 300,
+    success_criteria: Iterable[str] | None = None,
+) -> IntentScope:
+    merged_forbidden = tuple(
+        dedupe_preserve_order(
+            [
+                *(forbidden_operations or ()),
+                *DEFAULT_FORBIDDEN_OPERATIONS,
+            ]
+        )
+    )
+    scope = IntentScope(
+        target_hash=target_hash,
+        indicator_type=indicator_type,
+        allowed_operations=tuple(dedupe_preserve_order(allowed_operations)),
+        forbidden_operations=merged_forbidden,
+        time_horizon_seconds=time_horizon_seconds,
+        success_criteria=tuple(success_criteria or ()),
+    )
+    validate_scope_or_raise(scope)
+    return scope
+def create_intent_packet(
+    *,
+    action: IntentAction,
+    purpose: str,
+    scope: IntentScope,
+    requested_modules: Iterable[str],
+    expected_side_effects: Iterable[str] | None = None,
+    rollback_strategy: RollbackStrategy = "observe_only",
+    risk_label: RiskLabel = "low",
+    manifest_hash: str,
+    intent_id: str | None = None,
+    created_at: str | None = None,
+    sign: bool = True,
+    signing_secret: str | None = None,
+) -> IntentPacket:
+    packet = IntentPacket(
+        intent_id=intent_id or f"intent_{uuid.uuid4().hex}",
+        action=action,
+        purpose=purpose.strip(),
+        scope=scope,
+        requested_modules=tuple(dedupe_preserve_order(requested_modules)),
+        expected_side_effects=tuple(expected_side_effects or ()),
+        rollback_strategy=rollback_strategy,
+        risk_label=risk_label,
+        manifest_hash=manifest_hash,
+        created_at=created_at or now_utc(),
+    )
+    validate_intent_or_raise(packet)
+    if sign:
+        packet = sign_intent(packet, secret=signing_secret)
+        verify_intent_signature(packet, secret=signing_secret)
+    return packet
+def validate_scope(scope: IntentScope) -> IntentValidationResult:
+    errors: list[str] = []
+    codes: list[IntentErrorCode] = []
+    if not scope.target_hash:
+        errors.append("scope.target_hash is required.")
+        codes.append(IntentErrorCode.MISSING_FIELD)
+    elif not HASH_RE.fullmatch(scope.target_hash):
+        errors.append("scope.target_hash must look like a cryptographic hash.")
+        codes.append(IntentErrorCode.INVALID_SCOPE)
+    if scope.indicator_type not in {"domain", "username", "email", "ip", "url", "unknown"}:
+        errors.append("scope.indicator_type is invalid.")
+        codes.append(IntentErrorCode.INVALID_SCOPE)
+    if not scope.allowed_operations:
+        errors.append("scope.allowed_operations must not be empty.")
+        codes.append(IntentErrorCode.MISSING_FIELD)
+    if scope.time_horizon_seconds <= 0 or scope.time_horizon_seconds > 86_400:
+        errors.append("scope.time_horizon_seconds must be between 1 and 86400.")
+        codes.append(IntentErrorCode.INVALID_SCOPE)
+    overlap = set(scope.allowed_operations).intersection(scope.forbidden_operations)
+    if overlap:
+        errors.append(f"Allowed operations include forbidden operation(s): {sorted(overlap)}")
+        codes.append(IntentErrorCode.FORBIDDEN_OPERATION_REQUESTED)
+    return IntentValidationResult(ok=not errors, errors=tuple(errors), error_codes=tuple(codes))
+def validate_scope_or_raise(scope: IntentScope) -> None:
+    result = validate_scope(scope)
+    if not result.ok:
+        raise IntentValidationError(result.errors[0], result.error_codes[0])
+def validate_intent(packet: IntentPacket) -> IntentValidationResult:
+    errors: list[str] = []
+    codes: list[IntentErrorCode] = []
+    if packet.action not in VALID_ACTIONS:
+        errors.append(f"Invalid action: {packet.action}")
+        codes.append(IntentErrorCode.INVALID_ACTION)
+    if not packet.purpose:
+        errors.append("purpose is required.")
+        codes.append(IntentErrorCode.MISSING_FIELD)
+    if packet.risk_label not in VALID_RISKS:
+        errors.append(f"Invalid risk label: {packet.risk_label}")
+        codes.append(IntentErrorCode.INVALID_RISK)
+    if packet.rollback_strategy not in VALID_ROLLBACK_STRATEGIES:
+        errors.append(f"Invalid rollback strategy: {packet.rollback_strategy}")
+        codes.append(IntentErrorCode.INVALID_ROLLBACK)
+    if not packet.manifest_hash or not HASH_RE.fullmatch(packet.manifest_hash):
+        errors.append("manifest_hash must look like a cryptographic hash.")
+        codes.append(IntentErrorCode.MISSING_FIELD)
+    scope_result = validate_scope(packet.scope)
+    errors.extend(scope_result.errors)
+    codes.extend(scope_result.error_codes)
+    raw_leak_paths = find_raw_indicator_fields(packet.to_dict())
+    if raw_leak_paths:
+        errors.append(f"Raw indicator-like field(s) are not allowed in intent packet: {raw_leak_paths}")
+        codes.append(IntentErrorCode.RAW_INDICATOR_LEAK)
+    return IntentValidationResult(ok=not errors, errors=tuple(errors), error_codes=tuple(codes))
+def validate_intent_or_raise(packet: IntentPacket) -> None:
+    result = validate_intent(packet)
+    if not result.ok:
+        raise IntentValidationError(result.errors[0], result.error_codes[0])
+def find_raw_indicator_fields(value: Any, path: str = "") -> list[str]:
+    findings: list[str] = []
+    if isinstance(value, Mapping):
+        for key, child in value.items():
+            key_str = str(key)
+            child_path = f"{path}.{key_str}" if path else key_str
+            if key_str.lower() in RAW_FIELD_NAMES:
+                findings.append(child_path)
+            findings.extend(find_raw_indicator_fields(child, child_path))
+    elif isinstance(value, (list, tuple)):
+        for index, child in enumerate(value):
+            findings.extend(find_raw_indicator_fields(child, f"{path}[{index}]"))
+    return findings
+def intent_fingerprint(packet: IntentPacket) -> str:
+    return hashlib.sha256(packet.to_json(include_signature=True).encode("utf-8")).hexdigest()
+def unsigned_intent_fingerprint(packet: IntentPacket) -> str:
+    return hashlib.sha256(packet.to_json(include_signature=False).encode("utf-8")).hexdigest()
+def risk_score(risk_label: RiskLabel) -> float:
+    return {
+        "low": 0.25,
+        "medium": 0.5,
+        "high": 0.75,
+        "critical": 1.0,
+    }[risk_label]
+def default_rollback_for_risk(risk_label: RiskLabel) -> RollbackStrategy:
+    if risk_label == "low":
+        return "observe_only"
+    if risk_label == "medium":
+        return "disable_module"
+    if risk_label == "high":
+        return "sandbox"
+    return "revert"
+def derive_risk_label(
+    *,
+    requested_modules: Iterable[str],
+    authorized_target: bool,
+    contains_conditional_operation: bool = False,
+) -> RiskLabel:
+    modules = {str(module).strip().lower().replace(" ", "_") for module in requested_modules}
+    forbidden = {
+        "port_scan",
+        "nmap",
+        "masscan",
+        "brute_force",
+        "credential_testing",
+        "exploitation",
+    }
+    conditional = {
+        "http_headers",
+        "robots_txt",
+        "screenshot",
+        "ssl_labs_ssl_test",
+    }
+    if modules.intersection(forbidden):
+        return "critical"
+    if contains_conditional_operation or modules.intersection(conditional):
+        return "medium" if authorized_target else "high"
+    return "low"
+def dedupe_preserve_order(values: Iterable[str]) -> list[str]:
+    seen: set[str] = set()
+    output: list[str] = []
+    for value in values:
+        normalized = str(value).strip()
+        if not normalized:
+            continue
+        if normalized not in seen:
+            output.append(normalized)
+            seen.add(normalized)
+    return output

osint_core/observer.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""
+osint_core.observer
+===================
+Independent observer circuit for the Enterprise Drift-Aware OSINT Control Fabric.
+The observer does not execute. It reconstructs expected behavior from intent,
+policy, and executor trace, then emits dissent when reality does not match
+declared constraints.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Mapping
+class ObserverSeverity(str, Enum):
+    INFO = "info"
+    WARNING = "warning"
+    CRITICAL = "critical"
+@dataclass(frozen=True)
+class ObserverCheck:
+    name: str
+    ok: bool
+    severity: ObserverSeverity
+    reason: str
+    evidence: dict[str, Any] = field(default_factory=dict)
+@dataclass(frozen=True)
+class ExecutionTrace:
+    intent_id: str
+    modules_requested: tuple[str, ...]
+    modules_executed: tuple[str, ...]
+    modules_blocked: tuple[str, ...]
+    observed_effects: tuple[str, ...]
+    output_schema_valid: bool
+    audit_payload: Mapping[str, Any]
+    errors: tuple[str, ...] = field(default_factory=tuple)
+@dataclass(frozen=True)
+class ObserverAssessment:
+    intent_id: str
+    checks: tuple[ObserverCheck, ...]
+    @property
+    def dissent(self) -> bool:
+        return any(not check.ok for check in self.checks)
+    @property
+    def has_critical_violation(self) -> bool:
+        return any((not check.ok) and check.severity == ObserverSeverity.CRITICAL for check in self.checks)
+RAW_AUDIT_KEYS = {
+    "raw_indicator",
+    "raw_input",
+    "indicator",
+    "email",
+    "domain",
+    "username",
+    "url",
+    "ip",
+}
+def observe_execution(intent: IntentPacket, trace: ExecutionTrace, policy_result: PolicyEvaluation) -> ObserverAssessment:
+    checks = (
+        check_intent_trace_match(intent, trace),
+        check_modules_match_policy(trace, policy_result),
+        check_output_schema(trace),
+        check_no_raw_indicator_leak(trace),
+        check_expected_side_effects(intent, trace),
+    )
+    return ObserverAssessment(intent_id=trace.intent_id, checks=checks)
+def check_intent_trace_match(intent: Any, trace: ExecutionTrace) -> ObserverCheck:
+    expected_intent_id = getattr(intent, "intent_id", None)
+    ok = expected_intent_id == trace.intent_id
+    return ObserverCheck(
+        name="intent_trace_match",
+        ok=ok,
+        severity=ObserverSeverity.CRITICAL,
+        reason="Execution trace must correspond to the intent packet.",
+        evidence={"expected": expected_intent_id, "actual": trace.intent_id},
+    )
+def check_modules_match_policy(trace: ExecutionTrace, policy_result: Any) -> ObserverCheck:
+    if isinstance(policy_result, dict):
+        allowed = set(policy_result.get("allowed_modules", []))
+    else:
+        allowed = set(getattr(policy_result, "allowed_modules", []))
+    executed = set(trace.modules_executed)
+    unexpected = sorted(executed - allowed)
+    return ObserverCheck(
+        name="modules_match_policy",
+        ok=not unexpected,
+        severity=ObserverSeverity.CRITICAL,
+        reason="Executed modules must be allowed by policy.",
+        evidence={"unexpected_modules": unexpected},
+    )
+def check_output_schema(trace: ExecutionTrace) -> ObserverCheck:
+    return ObserverCheck(
+        name="output_schema_valid",
+        ok=trace.output_schema_valid,
+        severity=ObserverSeverity.WARNING,
+        reason="Executor output should conform to expected schema.",
+        evidence={},
+    )
+def check_no_raw_indicator_leak(trace: ExecutionTrace) -> ObserverCheck:
+    present = sorted(set(trace.audit_payload.keys()).intersection(RAW_AUDIT_KEYS))
+    return ObserverCheck(
+        name="no_raw_indicator_leak",
+        ok=not present,
+        severity=ObserverSeverity.CRITICAL,
+        reason="Audit payload must not contain raw indicator fields.",
+        evidence={"raw_fields": present},
+    )
+def check_expected_side_effects(intent: Any, trace: ExecutionTrace) -> ObserverCheck:
+    expected = set(getattr(intent, "expected_side_effects", ()))
+    observed = set(trace.observed_effects)
+    missing = sorted(expected - observed)
+    return ObserverCheck(
+        name="expected_side_effects_present",
+        ok=not missing,
+        severity=ObserverSeverity.WARNING,
+        reason="Declared expected side effects should be observed or explained.",
+        evidence={"missing_effects": missing},
+    )

osint_core/orchestrator.py ADDED Viewed

	@@ -0,0 +1,586 @@

+"""
+osint_core.orchestrator
+=======================
+Orchestrator agent for coordinating passive OSINT enrichment workflows.
+Design principles:
+- Coordinates validation → policy → enrichment → drift → audit pipeline
+- Manages skills (capabilities) and tools (external actions)
+- Maintains execution context and telemetry
+- Enforces security boundaries at each stage
+- Pure orchestration — does not implement enrichment logic directly
+The orchestrator pattern:
+1. Accept user request (indicator + modules + authorization)
+2. Validate input (osint_core.validators)
+3. Evaluate policy (osint_core.policy)
+4. Execute allowed modules via skills
+5. Detect drift (osint_core.drift when implemented)
+6. Choose correction verb
+7. Generate audit trail
+8. Return structured result
+"""
+from __future__ import annotations
+import subprocess
+import time
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any, Callable, Literal
+from .policy import (
+    PolicyEvaluation,
+    evaluate_modules,
+    enforce_correction_verb,
+)
+from .validators import (
+    IndicatorType,
+    ValidationResult,
+    validate_indicator,
+)
+# =============================================================================
+# Agent data structures
+# =============================================================================
+AgentRole = Literal["orchestrator", "validator", "enricher", "analyst"]
+SkillCategory = Literal["validation", "passive_lookup", "conditional_fetch", "analysis"]
+ToolType = Literal["subprocess", "network", "file", "computation"]
+class ExecutionStatus(str, Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    BLOCKED = "blocked"
+@dataclass(frozen=True)
+class Tool:
+    """
+    A tool is an atomic capability that performs external actions.
+    Examples: DNS query, whois lookup, HTTP request, file parsing
+    """
+    name: str
+    tool_type: ToolType
+    description: str
+    requires_authorization: bool = False
+    timeout_seconds: float = 5.0
+@dataclass(frozen=True)
+class Skill:
+    """
+    A skill is a higher-level capability composed of tools.
+    Examples: "Resolve DNS", "Fetch WHOIS", "Parse URL"
+    """
+    name: str
+    category: SkillCategory
+    description: str
+    canonical_name: str
+    required_indicator_types: list[IndicatorType]
+    tools: list[Tool]
+    requires_authorization: bool = False
+@dataclass
+class ExecutionContext:
+    """
+    Execution context tracks the state of an enrichment workflow.
+    """
+    run_id: str
+    started_at: str
+    indicator_type: IndicatorType
+    normalized_indicator: str
+    indicator_hash: str
+    requested_modules: list[str]
+    authorized_target: bool
+    passive_only: bool
+    policy_evaluation: PolicyEvaluation | None = None
+    telemetry: dict[str, Any] = field(default_factory=dict)
+    errors: list[str] = field(default_factory=list)
+@dataclass
+class SkillResult:
+    """
+    Result from executing a skill.
+    """
+    skill_name: str
+    status: ExecutionStatus
+    data: dict[str, Any] = field(default_factory=dict)
+    error: str | None = None
+    duration_ms: int = 0
+@dataclass
+class EnrichmentWorkflow:
+    """
+    Complete enrichment workflow result.
+    """
+    context: ExecutionContext
+    validation_result: ValidationResult
+    policy_evaluation: PolicyEvaluation
+    skill_results: list[SkillResult]
+    drift_vector: dict[str, float]
+    correction_verb: str
+    duration_ms: int
+# =============================================================================
+# Tool implementations
+# =============================================================================
+# DNS resolution tool
+DNS_QUERY_TOOL = Tool(
+    name="dns_query",
+    tool_type="network",
+    description="Query DNS records using system resolver",
+    requires_authorization=False,
+    timeout_seconds=4.0,
+)
+# WHOIS lookup tool
+WHOIS_TOOL = Tool(
+    name="whois",
+    tool_type="subprocess",
+    description="Perform WHOIS lookup via system command",
+    requires_authorization=False,
+    timeout_seconds=5.0,
+)
+# URL parser tool (local, no network)
+URL_PARSE_TOOL = Tool(
+    name="url_parse",
+    tool_type="computation",
+    description="Parse URL components locally",
+    requires_authorization=False,
+    timeout_seconds=1.0,
+)
+# HTTP header fetcher (conditional, requires auth)
+HTTP_HEADERS_TOOL = Tool(
+    name="http_headers",
+    tool_type="network",
+    description="Fetch HTTP headers from target",
+    requires_authorization=True,
+    timeout_seconds=5.0,
+)
+# Robots.txt fetcher (conditional, requires auth)
+ROBOTS_TXT_TOOL = Tool(
+    name="robots_txt",
+    tool_type="network",
+    description="Fetch robots.txt from target",
+    requires_authorization=True,
+    timeout_seconds=5.0,
+)
+# =============================================================================
+# Skill definitions
+# =============================================================================
+SKILLS_REGISTRY: dict[str, Skill] = {
+    "resource_links": Skill(
+        name="Resource Links",
+        canonical_name="resource_links",
+        category="passive_lookup",
+        description="Generate links to external OSINT resources",
+        required_indicator_types=["domain", "username", "email", "ip", "url"],
+        tools=[],  # No external tools needed
+        requires_authorization=False,
+    ),
+    "dns_records": Skill(
+        name="DNS Records",
+        canonical_name="dns_records",
+        category="passive_lookup",
+        description="Resolve DNS A, AAAA, MX, NS records",
+        required_indicator_types=["domain"],
+        tools=[DNS_QUERY_TOOL],
+        requires_authorization=False,
+    ),
+    "local_url_parse": Skill(
+        name="Local URL Parse",
+        canonical_name="local_url_parse",
+        category="analysis",
+        description="Parse URL components without contacting target",
+        required_indicator_types=["url"],
+        tools=[URL_PARSE_TOOL],
+        requires_authorization=False,
+    ),
+    "http_headers": Skill(
+        name="HTTP Headers",
+        canonical_name="http_headers",
+        category="conditional_fetch",
+        description="Fetch HTTP headers from authorized target",
+        required_indicator_types=["url", "domain"],
+        tools=[HTTP_HEADERS_TOOL],
+        requires_authorization=True,
+    ),
+    "robots_txt": Skill(
+        name="Robots.txt",
+        canonical_name="robots_txt",
+        category="conditional_fetch",
+        description="Fetch robots.txt from authorized target",
+        required_indicator_types=["url", "domain"],
+        tools=[ROBOTS_TXT_TOOL],
+        requires_authorization=True,
+    ),
+}
+# =============================================================================
+# Orchestrator agent
+# =============================================================================
+class OrchestratorAgent:
+    """
+    Orchestrator agent coordinates the full enrichment workflow.
+    Responsibilities:
+    - Create execution context
+    - Route requests through validation → policy → enrichment
+    - Execute skills based on policy decisions
+    - Aggregate results
+    - Generate telemetry
+    """
+    def __init__(self, role: AgentRole = "orchestrator"):
+        self.role = role
+        self.skills = SKILLS_REGISTRY
+    def create_context(
+        self,
+        raw_indicator: str,
+        indicator_type_hint: str,
+        requested_modules: list[str],
+        authorized_target: bool,
+        passive_only: bool = True,
+    ) -> ExecutionContext:
+        """
+        Create execution context for a new enrichment request.
+        """
+        run_id = f"run_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+        started_at = datetime.now(timezone.utc).isoformat()
+        # Validate indicator first
+        validation_result = validate_indicator(
+            raw_indicator,
+            forced_type=indicator_type_hint,
+        )
+        if not validation_result.ok:
+            # Create a minimal context for failed validation
+            return ExecutionContext(
+                run_id=run_id,
+                started_at=started_at,
+                indicator_type="unknown",
+                normalized_indicator="",
+                indicator_hash="",
+                requested_modules=requested_modules,
+                authorized_target=authorized_target,
+                passive_only=passive_only,
+                errors=[validation_result.error or "Validation failed"],
+            )
+        # For successful validation, hash the indicator
+        import hashlib
+        import hmac
+        import os
+        salt = os.getenv("OSINT_HASH_SALT", "dev-only-change-me")
+        indicator_hash = hmac.new(
+            salt.encode("utf-8"),
+            validation_result.normalized.encode("utf-8"),
+            hashlib.sha256,
+        ).hexdigest()
+        return ExecutionContext(
+            run_id=run_id,
+            started_at=started_at,
+            indicator_type=validation_result.indicator_type,
+            normalized_indicator=validation_result.normalized,
+            indicator_hash=indicator_hash,
+            requested_modules=requested_modules,
+            authorized_target=authorized_target,
+            passive_only=passive_only,
+        )
+    def execute_workflow(
+        self,
+        raw_indicator: str,
+        indicator_type_hint: str = "Auto",
+        requested_modules: list[str] | None = None,
+        authorized_target: bool = False,
+        passive_only: bool = True,
+    ) -> EnrichmentWorkflow:
+        """
+        Execute complete enrichment workflow.
+        Returns a structured workflow result containing:
+        - Execution context
+        - Validation result
+        - Policy evaluation
+        - Skill results
+        - Drift assessment
+        - Correction decision
+        """
+        started = time.perf_counter()
+        requested_modules = requested_modules or ["resource_links"]
+        # Step 1: Validate input
+        validation_result = validate_indicator(
+            raw_indicator,
+            forced_type=indicator_type_hint,
+        )
+        if not validation_result.ok:
+            # Early exit for validation failure
+            context = self.create_context(
+                raw_indicator,
+                indicator_type_hint,
+                requested_modules,
+                authorized_target,
+                passive_only,
+            )
+            return EnrichmentWorkflow(
+                context=context,
+                validation_result=validation_result,
+                policy_evaluation=PolicyEvaluation(
+                    decision="BLOCK",  # type: ignore
+                    blocked_modules=requested_modules,
+                    violations=[],
+                ),
+                skill_results=[],
+                drift_vector={},
+                correction_verb="REVERT",
+                duration_ms=int((time.perf_counter() - started) * 1000),
+            )
+        # Step 2: Create execution context
+        context = self.create_context(
+            raw_indicator,
+            indicator_type_hint,
+            requested_modules,
+            authorized_target,
+            passive_only,
+        )
+        # Step 3: Evaluate policy
+        policy_eval = evaluate_modules(
+            requested_modules,
+            authorized_target=authorized_target,
+            passive_only=passive_only,
+            allow_unknown_modules=False,
+        )
+        context.policy_evaluation = policy_eval
+        # Step 4: Execute allowed skills
+        skill_results = self._execute_skills(
+            context,
+            policy_eval.allowed_modules,
+        )
+        # Step 5: Detect drift
+        drift_vector = self._detect_drift(
+            context,
+            skill_results,
+            policy_eval,
+        )
+        # Step 6: Choose correction verb
+        correction_verb = self._choose_correction(drift_vector, policy_eval)
+        duration_ms = int((time.perf_counter() - started) * 1000)
+        # Ensure we always return at least 1ms to indicate actual work was done
+        if duration_ms == 0:
+            duration_ms = 1
+        return EnrichmentWorkflow(
+            context=context,
+            validation_result=validation_result,
+            policy_evaluation=policy_eval,
+            skill_results=skill_results,
+            drift_vector=drift_vector,
+            correction_verb=correction_verb,
+            duration_ms=duration_ms,
+        )
+    def _execute_skills(
+        self,
+        context: ExecutionContext,
+        allowed_modules: list[str],
+    ) -> list[SkillResult]:
+        """
+        Execute allowed skills based on policy evaluation.
+        """
+        results: list[SkillResult] = []
+        for module_name in allowed_modules:
+            skill = self.skills.get(module_name)
+            if not skill:
+                results.append(SkillResult(
+                    skill_name=module_name,
+                    status=ExecutionStatus.FAILED,
+                    error=f"Skill not found: {module_name}",
+                ))
+                continue
+            # Check if indicator type is supported by this skill
+            if skill.required_indicator_types and context.indicator_type not in skill.required_indicator_types:
+                results.append(SkillResult(
+                    skill_name=skill.name,
+                    status=ExecutionStatus.BLOCKED,
+                    error=f"Skill {skill.name} requires indicator type in {skill.required_indicator_types}, got {context.indicator_type}",
+                ))
+                continue
+            # Execute skill
+            result = self._execute_skill(skill, context)
+            results.append(result)
+        return results
+    def _execute_skill(
+        self,
+        skill: Skill,
+        context: ExecutionContext,
+    ) -> SkillResult:
+        """
+        Execute a single skill.
+        For now, this is a stub that returns placeholder data.
+        In production, this would invoke the skill's tools.
+        """
+        started = time.perf_counter()
+        try:
+            # Placeholder: skill execution logic would go here
+            # Each skill would use its tools to perform enrichment
+            if skill.canonical_name == "resource_links":
+                data = {"type": "links", "generated": True}
+            elif skill.canonical_name == "dns_records":
+                data = {"A": [], "AAAA": [], "MX": [], "NS": []}
+            elif skill.canonical_name == "local_url_parse":
+                data = {"scheme": "", "hostname": "", "path": ""}
+            else:
+                data = {"status": "not_implemented"}
+            duration_ms = int((time.perf_counter() - started) * 1000)
+            return SkillResult(
+                skill_name=skill.name,
+                status=ExecutionStatus.COMPLETED,
+                data=data,
+                duration_ms=duration_ms,
+            )
+        except Exception as exc:
+            duration_ms = int((time.perf_counter() - started) * 1000)
+            return SkillResult(
+                skill_name=skill.name,
+                status=ExecutionStatus.FAILED,
+                error=str(exc),
+                duration_ms=duration_ms,
+            )
+    def _detect_drift(
+        self,
+        context: ExecutionContext,
+        skill_results: list[SkillResult],
+        policy_eval: PolicyEvaluation,
+    ) -> dict[str, float]:
+        """
+        Detect drift from execution telemetry.
+        This is a simplified version. Full drift detection
+        would use osint_core.drift when implemented.
+        """
+        drift = {
+            "statistical": 0.0,
+            "behavioral": 0.0,
+            "structural": 0.0,
+            "adversarial": 0.0,
+            "operational": 0.0,
+            "policy": 0.0,
+        }
+        # Policy drift: blocked modules indicate policy boundary hit
+        if policy_eval.blocked_modules:
+            drift["policy"] = 0.4
+        # Operational drift: failed skills
+        failed_count = sum(1 for r in skill_results if r.status == ExecutionStatus.FAILED)
+        if failed_count > 0:
+            drift["operational"] = min(0.2 * failed_count, 1.0)
+        # Adversarial drift: check for suspicious patterns (stub)
+        # Full implementation would analyze normalized_indicator
+        return drift
+    def _choose_correction(
+        self,
+        drift_vector: dict[str, float],
+        policy_eval: PolicyEvaluation,
+    ) -> str:
+        """
+        Choose correction verb based on drift vector.
+        Priority: policy > structural > behavioral > adversarial > operational > statistical
+        """
+        if drift_vector.get("policy", 0.0) >= 0.4:
+            return "CONSTRAIN"
+        if drift_vector.get("structural", 0.0) >= 0.5:
+            return "REVERT"
+        if drift_vector.get("behavioral", 0.0) >= 0.5:
+            return "REVERT"
+        if drift_vector.get("adversarial", 0.0) >= 0.3:
+            return "CONSTRAIN"
+        if drift_vector.get("operational", 0.0) >= 0.4:
+            return "CONSTRAIN"
+        if drift_vector.get("statistical", 0.0) >= 0.5 and drift_vector.get("adversarial", 0.0) == 0:
+            return "ADAPT"
+        return "OBSERVE"
+# =============================================================================
+# Public API
+# =============================================================================
+def create_orchestrator() -> OrchestratorAgent:
+    """
+    Factory function to create an orchestrator agent.
+    """
+    return OrchestratorAgent(role="orchestrator")
+def list_skills() -> dict[str, Skill]:
+    """
+    Return the skills registry.
+    """
+    return SKILLS_REGISTRY.copy()
+def get_skill(skill_name: str) -> Skill | None:
+    """
+    Get a skill by canonical name.
+    """
+    return SKILLS_REGISTRY.get(skill_name)

osint_core/scheduler.py ADDED Viewed

	@@ -0,0 +1,227 @@

+"""
+osint_core.scheduler
+====================
+Latency-conscious scheduler for the Enterprise Drift-Aware OSINT Control Fabric.
+The scheduler allocates time, trust, and authority. It does not execute actions.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Literal
+RiskLabel = Literal["low", "medium", "high", "critical"]
+TrustState = Literal["normal", "degraded", "suspicious", "contested", "unsafe"]
+RouteName = Literal["FAST", "DELIBERATIVE", "CONTAINMENT", "FAIL_CLOSED"]
+class ScheduleReason(str, Enum):
+    FAST_PATH_AVAILABLE = "fast_path_available"
+    DELIBERATIVE_PATH_AVAILABLE = "deliberative_path_available"
+    DEADLINE_TOO_TIGHT = "deadline_too_tight"
+    TRUST_STATE_DEGRADED = "trust_state_degraded"
+    SHORTCUT_DEBT_TOO_HIGH = "shortcut_debt_too_high"
+    INVARIANT_VIOLATION = "invariant_violation"
+    NO_SAFE_ACTION_FITS = "no_safe_action_fits"
+    MISSING_ROLLBACK = "missing_rollback"
+    LOW_CONFIDENCE = "low_confidence"
+@dataclass(frozen=True)
+class DecisionPacket:
+    intent_id: str
+    action: str
+    risk_label: RiskLabel
+    confidence: float
+    reversibility: float
+    deadline_ms: int
+    verification_cost_ms: int
+    execution_cost_ms: int
+    rollback_cost_ms: int
+    expected_utility_decay: float
+    required_checks: tuple[str, ...]
+    rollback_plan: str
+    uncertainty_notes: tuple[str, ...] = field(default_factory=tuple)
+@dataclass(frozen=True)
+class ShortcutDebt:
+    reduced_checks: int = 0
+    cached_policy_uses: int = 0
+    skipped_deep_analysis: int = 0
+    emergency_overrides: int = 0
+    @property
+    def score(self) -> float:
+        return min(
+            1.0,
+            (self.reduced_checks * 0.10)
+            + (self.cached_policy_uses * 0.05)
+            + (self.skipped_deep_analysis * 0.15)
+            + (self.emergency_overrides * 0.40),
+        )
+@dataclass(frozen=True)
+class SystemState:
+    trust_state: TrustState = "normal"
+    shortcut_debt: ShortcutDebt = field(default_factory=ShortcutDebt)
+    shortcut_debt_limit: float = 0.70
+    queue_depth: int = 0
+    hardware_state: TrustState = "normal"
+@dataclass(frozen=True)
+class ScheduleDecision:
+    route: RouteName
+    reason: ScheduleReason
+    allowed: bool
+    authority_scale: float
+    required_checks: tuple[str, ...]
+    skipped_checks: tuple[str, ...]
+    notes: tuple[str, ...] = field(default_factory=tuple)
+INVARIANT_CHECKS: tuple[str, ...] = (
+    "hash_salt_present",
+    "intent_signature_verified",
+    "scope_validated",
+    "policy_evaluated",
+    "forbidden_modules_blocked",
+    "raw_indicators_excluded",
+    "conditional_modules_authorized",
+)
+ADAPTIVE_CHECKS: tuple[str, ...] = (
+    "deep_log_correlation",
+    "long_horizon_analysis",
+    "full_counterfactual_simulation",
+    "secondary_model_review",
+)
+def total_required_time_ms(packet: DecisionPacket) -> int:
+    return packet.verification_cost_ms + packet.execution_cost_ms + packet.rollback_cost_ms
+def fits_deadline(packet: DecisionPacket) -> bool:
+    return total_required_time_ms(packet) <= packet.deadline_ms
+def has_required_rollback(packet: DecisionPacket) -> bool:
+    if packet.risk_label in {"high", "critical"}:
+        return bool(packet.rollback_plan and packet.rollback_cost_ms > 0)
+    return True
+def invariant_violations(packet: DecisionPacket) -> tuple[str, ...]:
+    required = set(packet.required_checks)
+    return tuple(check for check in INVARIANT_CHECKS if check not in required)
+def risk_weight(risk_label: RiskLabel) -> float:
+RISK_WEIGHTS = {"low": 0.25, "medium": 0.50, "high": 0.75, "critical": 1.00}
+def risk_weight(risk_label: RiskLabel) -> float:
+    return RISK_WEIGHTS.get(risk_label, 1.00)
+def safe_utility(packet: DecisionPacket) -> float:
+    time_ratio = min(1.0, total_required_time_ms(packet) / max(packet.deadline_ms, 1))
+    return max(
+        0.0,
+        (packet.confidence * 0.40)
+        + (packet.reversibility * 0.30)
+        + ((1.0 - risk_weight(packet.risk_label)) * 0.20)
+        + ((1.0 - time_ratio) * 0.10)
+        - (packet.expected_utility_decay * 0.10),
+    )
+def schedule_decision(packet: DecisionPacket, state: SystemState | None = None) -> ScheduleDecision:
+    state = state or SystemState()
+    missing_invariants = invariant_violations(packet)
+    if missing_invariants:
+        return ScheduleDecision(
+            route="FAIL_CLOSED",
+            reason=ScheduleReason.INVARIANT_VIOLATION,
+            allowed=False,
+            authority_scale=0.0,
+            required_checks=tuple(packet.required_checks),
+            skipped_checks=missing_invariants,
+            notes=("Invariant checks cannot be skipped under deadline pressure.",),
+        )
+    if not has_required_rollback(packet):
+        return ScheduleDecision(
+            route="FAIL_CLOSED",
+            reason=ScheduleReason.MISSING_ROLLBACK,
+            allowed=False,
+            authority_scale=0.0,
+            required_checks=tuple(packet.required_checks),
+            skipped_checks=(),
+            notes=("High-impact action requires rollback or containment plan.",),
+        )
+    if state.shortcut_debt.score >= state.shortcut_debt_limit:
+        return containment_decision(packet, ScheduleReason.SHORTCUT_DEBT_TOO_HIGH, "Shortcut debt exceeded configured limit.")
+    if state.trust_state in {"contested", "unsafe"} or state.hardware_state in {"contested", "unsafe"}:
+        return containment_decision(packet, ScheduleReason.TRUST_STATE_DEGRADED, "Trust or hardware state is contested/unsafe.")
+    if packet.confidence < 0.30 and packet.risk_label in {"high", "critical"}:
+        return containment_decision(packet, ScheduleReason.LOW_CONFIDENCE, "Confidence too low for high-impact decision.")
+    if fits_deadline(packet):
+        if packet.risk_label in {"low", "medium"} and packet.reversibility >= 0.50:
+            return ScheduleDecision(
+                route="FAST",
+                reason=ScheduleReason.FAST_PATH_AVAILABLE,
+                allowed=True,
+                authority_scale=1.0,
+                required_checks=tuple(packet.required_checks),
+                skipped_checks=(),
+                notes=("Low/medium risk action fits available decision window.",),
+            )
+        return ScheduleDecision(
+            route="DELIBERATIVE",
+            reason=ScheduleReason.DELIBERATIVE_PATH_AVAILABLE,
+            allowed=True,
+            authority_scale=0.75,
+            required_checks=tuple(packet.required_checks),
+            skipped_checks=(),
+            notes=("High-impact or lower-reversibility action fits full verification window.",),
+        )
+    if packet.reversibility >= 0.75:
+        return containment_decision(packet, ScheduleReason.DEADLINE_TOO_TIGHT, "Full verification/execution/rollback does not fit deadline.")
+    return ScheduleDecision(
+        route="FAIL_CLOSED",
+        reason=ScheduleReason.NO_SAFE_ACTION_FITS,
+        allowed=False,
+        authority_scale=0.0,
+        required_checks=tuple(packet.required_checks),
+        skipped_checks=(),
+        notes=("No safe action fits inside the useful decision window.",),
+    )
+def containment_decision(packet: DecisionPacket, reason: ScheduleReason, note: str) -> ScheduleDecision:
+    skipped = tuple(check for check in ADAPTIVE_CHECKS if check in packet.required_checks)
+    effective = tuple(check for check in packet.required_checks if check not in skipped)
+    return ScheduleDecision(
+        route="CONTAINMENT",
+        reason=reason,
+        allowed=True,
+        authority_scale=0.25,
+        required_checks=effective,
+        skipped_checks=skipped,
+        notes=(note, "Authority reduced; prefer reversible, bounded action."),
+    )