Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import json
|
| 3 |
import logging
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
from datetime import datetime
|
|
@@ -10,32 +11,64 @@ import time
|
|
| 10 |
import os
|
| 11 |
import sqlite3
|
| 12 |
import contextlib
|
|
|
|
|
|
|
|
|
|
| 13 |
from scipy.stats import beta
|
| 14 |
import plotly.graph_objects as go
|
|
|
|
| 15 |
|
| 16 |
# ----------------------------------------------------------------------
|
| 17 |
-
# Configuration from environment variables
|
| 18 |
# ----------------------------------------------------------------------
|
| 19 |
LOW_THRESHOLD = float(os.getenv("ARF_LOW_THRESHOLD", "0.2"))
|
| 20 |
HIGH_THRESHOLD = float(os.getenv("ARF_HIGH_THRESHOLD", "0.8"))
|
| 21 |
ALPHA_PRIOR = float(os.getenv("ARF_ALPHA_PRIOR", "1.0"))
|
| 22 |
BETA_PRIOR = float(os.getenv("ARF_BETA_PRIOR", "1.0"))
|
| 23 |
DB_PATH = os.getenv("ARF_DB_PATH", "/data/arf_decisions.db")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# ----------------------------------------------------------------------
|
| 26 |
-
# Logging
|
| 27 |
# ----------------------------------------------------------------------
|
| 28 |
-
|
| 29 |
-
level=logging.INFO,
|
| 30 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 31 |
-
)
|
| 32 |
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# ----------------------------------------------------------------------
|
| 35 |
-
# SQLite persistence
|
| 36 |
# ----------------------------------------------------------------------
|
| 37 |
def init_db():
|
| 38 |
-
"""Create the decisions table
|
|
|
|
|
|
|
|
|
|
| 39 |
with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
|
| 40 |
cursor = conn.cursor()
|
| 41 |
cursor.execute('''
|
|
@@ -47,7 +80,9 @@ def init_db():
|
|
| 47 |
)
|
| 48 |
''')
|
| 49 |
conn.commit()
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def save_decision_to_db(decision: dict, risk: float):
|
| 53 |
"""Insert a decision into the database."""
|
|
@@ -89,12 +124,21 @@ def vacuum_db():
|
|
| 89 |
except Exception as e:
|
| 90 |
logger.error(f"Vacuum failed: {e}")
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# ----------------------------------------------------------------------
|
| 93 |
# Thread‑safe history (in‑memory + DB backup)
|
| 94 |
# ----------------------------------------------------------------------
|
| 95 |
decision_history = []
|
| 96 |
risk_history = []
|
| 97 |
history_lock = threading.Lock()
|
|
|
|
| 98 |
|
| 99 |
def update_dashboard_data(decision: dict, risk: float):
|
| 100 |
"""Thread‑safe update of both in‑memory history and database."""
|
|
@@ -107,6 +151,9 @@ def update_dashboard_data(decision: dict, risk: float):
|
|
| 107 |
if len(risk_history) > 100:
|
| 108 |
risk_history.pop(0)
|
| 109 |
save_decision_to_db(decision, risk)
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def refresh_history_from_db():
|
| 112 |
"""Load recent history from database (called at startup)."""
|
|
@@ -118,9 +165,10 @@ def refresh_history_from_db():
|
|
| 118 |
for ts, dec, risk in decisions:
|
| 119 |
decision_history.append((ts, dec, risk))
|
| 120 |
risk_history.append((ts, risk))
|
|
|
|
| 121 |
|
| 122 |
# ----------------------------------------------------------------------
|
| 123 |
-
# Memory monitoring (daemon thread)
|
| 124 |
# ----------------------------------------------------------------------
|
| 125 |
def get_memory_usage():
|
| 126 |
"""Return current process memory usage in MB (RSS)."""
|
|
@@ -145,7 +193,7 @@ def get_memory_usage():
|
|
| 145 |
|
| 146 |
def memory_monitor_loop():
|
| 147 |
"""Periodically log memory usage. Runs in a daemon thread."""
|
| 148 |
-
while
|
| 149 |
try:
|
| 150 |
mem_mb = get_memory_usage()
|
| 151 |
if mem_mb is not None:
|
|
@@ -177,7 +225,7 @@ class BayesianRiskEngine:
|
|
| 177 |
return lo, hi
|
| 178 |
|
| 179 |
# ----------------------------------------------------------------------
|
| 180 |
-
# Policy Engine
|
| 181 |
# ----------------------------------------------------------------------
|
| 182 |
class PolicyEngine:
|
| 183 |
def __init__(self, thresholds: Dict[str, float] = None):
|
|
@@ -194,10 +242,17 @@ class PolicyEngine:
|
|
| 194 |
return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
|
| 195 |
|
| 196 |
# ----------------------------------------------------------------------
|
| 197 |
-
# Infrastructure analysis (synchronous, with
|
| 198 |
# ----------------------------------------------------------------------
|
| 199 |
def handle_infra_with_governance(fault_type: str, context_window: int, session_state: dict):
|
|
|
|
| 200 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
fault_map = {
|
| 202 |
"none": (1, 99),
|
| 203 |
"switch_down": (20, 80),
|
|
@@ -237,6 +292,8 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
|
|
| 237 |
"control_plane_decision": control_decision
|
| 238 |
}
|
| 239 |
}
|
|
|
|
|
|
|
| 240 |
return output, session_state
|
| 241 |
except Exception as e:
|
| 242 |
logger.exception("Error in handle_infra_with_governance")
|
|
@@ -285,6 +342,7 @@ def run_hmc_mcmc(samples: int, warmup: int):
|
|
| 285 |
# Input validation
|
| 286 |
samples = max(500, min(10000, int(samples)))
|
| 287 |
warmup = max(100, min(2000, int(warmup)))
|
|
|
|
| 288 |
|
| 289 |
# Generate data: 10 observations with mean 0.5, std 0.2
|
| 290 |
np.random.seed(42) # for reproducibility
|
|
@@ -327,8 +385,34 @@ def run_hmc_mcmc(samples: int, warmup: int):
|
|
| 327 |
return {"error": str(e)}, go.Figure(), go.Figure()
|
| 328 |
|
| 329 |
# ----------------------------------------------------------------------
|
| 330 |
-
# Dashboard plots (thread‑safe)
|
| 331 |
# ----------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
def generate_risk_gauge():
|
| 333 |
with history_lock:
|
| 334 |
if not risk_history:
|
|
@@ -349,6 +433,7 @@ def generate_risk_gauge():
|
|
| 349 |
}))
|
| 350 |
return fig
|
| 351 |
|
|
|
|
| 352 |
def generate_decision_pie():
|
| 353 |
with history_lock:
|
| 354 |
if not decision_history:
|
|
@@ -359,6 +444,7 @@ def generate_decision_pie():
|
|
| 359 |
fig.update_layout(title="Policy Decisions")
|
| 360 |
return fig
|
| 361 |
|
|
|
|
| 362 |
def generate_action_timeline():
|
| 363 |
with history_lock:
|
| 364 |
if not decision_history:
|
|
@@ -394,18 +480,38 @@ def refresh_dashboard():
|
|
| 394 |
# ----------------------------------------------------------------------
|
| 395 |
oss_caps = {
|
| 396 |
"edition": "OSS (Demo)",
|
| 397 |
-
"version":
|
| 398 |
"license": "Apache 2.0",
|
| 399 |
"execution": {"modes": ["advisory"], "max_incidents": 100},
|
| 400 |
"memory": {"type": "in-memory", "faiss_index_type": "flat", "max_incident_nodes": 100},
|
| 401 |
-
"enterprise_features": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
}
|
| 403 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 404 |
# ----------------------------------------------------------------------
|
| 405 |
# Startup
|
| 406 |
# ----------------------------------------------------------------------
|
| 407 |
-
# Ensure data directory exists
|
| 408 |
-
os.makedirs(os.path.dirname(DB_PATH) if os.path.dirname(DB_PATH) else ".", exist_ok=True)
|
| 409 |
init_db()
|
| 410 |
refresh_history_from_db()
|
| 411 |
|
|
@@ -415,18 +521,19 @@ mem_thread.start()
|
|
| 415 |
|
| 416 |
# Start periodic vacuum (once a day)
|
| 417 |
def vacuum_scheduler():
|
| 418 |
-
while
|
| 419 |
time.sleep(86400) # 24 hours
|
| 420 |
-
|
|
|
|
| 421 |
vacuum_thread = threading.Thread(target=vacuum_scheduler, daemon=True)
|
| 422 |
vacuum_thread.start()
|
| 423 |
|
| 424 |
# ----------------------------------------------------------------------
|
| 425 |
# Gradio UI
|
| 426 |
# ----------------------------------------------------------------------
|
| 427 |
-
with gr.Blocks(title="ARF
|
| 428 |
gr.Markdown(f"""
|
| 429 |
-
# 🧠 ARF
|
| 430 |
**Mathematically rigorous risk estimation using conjugate priors and MCMC**
|
| 431 |
This demo showcases:
|
| 432 |
- **Bayesian conjugate prior (Beta-Binomial)** – online risk update from observed failures/successes.
|
|
@@ -503,23 +610,54 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
|
|
| 503 |
]
|
| 504 |
gr.JSON(label="Active Policies", value=policies_json)
|
| 505 |
|
|
|
|
| 506 |
with gr.TabItem("Enterprise / OSS"):
|
| 507 |
gr.Markdown(f"""
|
| 508 |
-
#
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
""")
|
| 524 |
|
| 525 |
# Wire events
|
|
@@ -535,6 +673,9 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
|
|
| 535 |
outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
|
| 536 |
)
|
| 537 |
|
|
|
|
|
|
|
|
|
|
| 538 |
if __name__ == "__main__":
|
| 539 |
demo.queue()
|
| 540 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import json
|
| 3 |
import logging
|
| 4 |
+
import logging.handlers
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
from datetime import datetime
|
|
|
|
| 11 |
import os
|
| 12 |
import sqlite3
|
| 13 |
import contextlib
|
| 14 |
+
import signal
|
| 15 |
+
import sys
|
| 16 |
+
import functools
|
| 17 |
from scipy.stats import beta
|
| 18 |
import plotly.graph_objects as go
|
| 19 |
+
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
| 20 |
|
| 21 |
# ----------------------------------------------------------------------
|
| 22 |
+
# Configuration from environment variables with validation
|
| 23 |
# ----------------------------------------------------------------------
|
| 24 |
LOW_THRESHOLD = float(os.getenv("ARF_LOW_THRESHOLD", "0.2"))
|
| 25 |
HIGH_THRESHOLD = float(os.getenv("ARF_HIGH_THRESHOLD", "0.8"))
|
| 26 |
ALPHA_PRIOR = float(os.getenv("ARF_ALPHA_PRIOR", "1.0"))
|
| 27 |
BETA_PRIOR = float(os.getenv("ARF_BETA_PRIOR", "1.0"))
|
| 28 |
DB_PATH = os.getenv("ARF_DB_PATH", "/data/arf_decisions.db")
|
| 29 |
+
LOG_LEVEL = os.getenv("ARF_LOG_LEVEL", "INFO").upper()
|
| 30 |
+
VERSION = "4.2.0+oss-enhanced"
|
| 31 |
+
|
| 32 |
+
# Validate thresholds
|
| 33 |
+
if not (0 <= LOW_THRESHOLD < HIGH_THRESHOLD <= 1):
|
| 34 |
+
logging.warning(f"Invalid thresholds: low={LOW_THRESHOLD}, high={HIGH_THRESHOLD}. Using defaults.")
|
| 35 |
+
LOW_THRESHOLD = 0.2
|
| 36 |
+
HIGH_THRESHOLD = 0.8
|
| 37 |
+
|
| 38 |
+
# Validate priors
|
| 39 |
+
if ALPHA_PRIOR <= 0 or BETA_PRIOR <= 0:
|
| 40 |
+
logging.warning(f"Invalid priors: alpha={ALPHA_PRIOR}, beta={BETA_PRIOR}. Using defaults.")
|
| 41 |
+
ALPHA_PRIOR = 1.0
|
| 42 |
+
BETA_PRIOR = 1.0
|
| 43 |
|
| 44 |
# ----------------------------------------------------------------------
|
| 45 |
+
# Logging setup (file rotation + console)
|
| 46 |
# ----------------------------------------------------------------------
|
| 47 |
+
os.makedirs("/var/log/arf", exist_ok=True)
|
|
|
|
|
|
|
|
|
|
| 48 |
logger = logging.getLogger(__name__)
|
| 49 |
+
logger.setLevel(getattr(logging, LOG_LEVEL, logging.INFO))
|
| 50 |
+
|
| 51 |
+
# File handler with rotation
|
| 52 |
+
file_handler = logging.handlers.RotatingFileHandler(
|
| 53 |
+
"/var/log/arf/app.log", maxBytes=10_485_760, backupCount=5
|
| 54 |
+
)
|
| 55 |
+
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
| 56 |
+
|
| 57 |
+
# Console handler (for Docker logs)
|
| 58 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 59 |
+
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
| 60 |
+
|
| 61 |
+
logger.addHandler(file_handler)
|
| 62 |
+
logger.addHandler(console_handler)
|
| 63 |
|
| 64 |
# ----------------------------------------------------------------------
|
| 65 |
+
# SQLite persistence with secure permissions
|
| 66 |
# ----------------------------------------------------------------------
|
| 67 |
def init_db():
|
| 68 |
+
"""Create the decisions table with secure file permissions."""
|
| 69 |
+
db_dir = os.path.dirname(DB_PATH)
|
| 70 |
+
if db_dir and not os.path.exists(db_dir):
|
| 71 |
+
os.makedirs(db_dir, exist_ok=True)
|
| 72 |
with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
|
| 73 |
cursor = conn.cursor()
|
| 74 |
cursor.execute('''
|
|
|
|
| 80 |
)
|
| 81 |
''')
|
| 82 |
conn.commit()
|
| 83 |
+
# Restrict permissions (owner read/write only)
|
| 84 |
+
os.chmod(DB_PATH, 0o600)
|
| 85 |
+
logger.info(f"Database initialized at {DB_PATH} with secure permissions")
|
| 86 |
|
| 87 |
def save_decision_to_db(decision: dict, risk: float):
|
| 88 |
"""Insert a decision into the database."""
|
|
|
|
| 124 |
except Exception as e:
|
| 125 |
logger.error(f"Vacuum failed: {e}")
|
| 126 |
|
| 127 |
+
# ----------------------------------------------------------------------
|
| 128 |
+
# Prometheus metrics
|
| 129 |
+
# ----------------------------------------------------------------------
|
| 130 |
+
decisions_total = Counter('arf_decisions_total', 'Total decisions made', ['action'])
|
| 131 |
+
risk_gauge = Gauge('arf_current_risk', 'Current risk score')
|
| 132 |
+
decision_latency = Histogram('arf_decision_latency_seconds', 'Time to evaluate intent')
|
| 133 |
+
mcmc_runs = Counter('arf_mcmc_runs_total', 'Total MCMC runs')
|
| 134 |
+
|
| 135 |
# ----------------------------------------------------------------------
|
| 136 |
# Thread‑safe history (in‑memory + DB backup)
|
| 137 |
# ----------------------------------------------------------------------
|
| 138 |
decision_history = []
|
| 139 |
risk_history = []
|
| 140 |
history_lock = threading.Lock()
|
| 141 |
+
shutdown_event = threading.Event()
|
| 142 |
|
| 143 |
def update_dashboard_data(decision: dict, risk: float):
|
| 144 |
"""Thread‑safe update of both in‑memory history and database."""
|
|
|
|
| 151 |
if len(risk_history) > 100:
|
| 152 |
risk_history.pop(0)
|
| 153 |
save_decision_to_db(decision, risk)
|
| 154 |
+
# Update Prometheus metrics
|
| 155 |
+
decisions_total.labels(action=decision.get("risk_level", "unknown")).inc()
|
| 156 |
+
risk_gauge.set(risk)
|
| 157 |
|
| 158 |
def refresh_history_from_db():
|
| 159 |
"""Load recent history from database (called at startup)."""
|
|
|
|
| 165 |
for ts, dec, risk in decisions:
|
| 166 |
decision_history.append((ts, dec, risk))
|
| 167 |
risk_history.append((ts, risk))
|
| 168 |
+
risk_gauge.set(risk) # update gauge with latest risk
|
| 169 |
|
| 170 |
# ----------------------------------------------------------------------
|
| 171 |
+
# Memory monitoring (daemon thread with graceful stop)
|
| 172 |
# ----------------------------------------------------------------------
|
| 173 |
def get_memory_usage():
|
| 174 |
"""Return current process memory usage in MB (RSS)."""
|
|
|
|
| 193 |
|
| 194 |
def memory_monitor_loop():
|
| 195 |
"""Periodically log memory usage. Runs in a daemon thread."""
|
| 196 |
+
while not shutdown_event.is_set():
|
| 197 |
try:
|
| 198 |
mem_mb = get_memory_usage()
|
| 199 |
if mem_mb is not None:
|
|
|
|
| 225 |
return lo, hi
|
| 226 |
|
| 227 |
# ----------------------------------------------------------------------
|
| 228 |
+
# Policy Engine
|
| 229 |
# ----------------------------------------------------------------------
|
| 230 |
class PolicyEngine:
|
| 231 |
def __init__(self, thresholds: Dict[str, float] = None):
|
|
|
|
| 242 |
return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
|
| 243 |
|
| 244 |
# ----------------------------------------------------------------------
|
| 245 |
+
# Infrastructure analysis (synchronous, with validation)
|
| 246 |
# ----------------------------------------------------------------------
|
| 247 |
def handle_infra_with_governance(fault_type: str, context_window: int, session_state: dict):
|
| 248 |
+
start_time = time.time()
|
| 249 |
try:
|
| 250 |
+
# Input validation
|
| 251 |
+
fault_type = fault_type.strip()
|
| 252 |
+
if fault_type not in ["none", "switch_down", "server_overload", "cascade"]:
|
| 253 |
+
fault_type = "none"
|
| 254 |
+
context_window = max(0, min(1000, int(context_window))) # clamp
|
| 255 |
+
|
| 256 |
fault_map = {
|
| 257 |
"none": (1, 99),
|
| 258 |
"switch_down": (20, 80),
|
|
|
|
| 292 |
"control_plane_decision": control_decision
|
| 293 |
}
|
| 294 |
}
|
| 295 |
+
# Record latency metric
|
| 296 |
+
decision_latency.observe(time.time() - start_time)
|
| 297 |
return output, session_state
|
| 298 |
except Exception as e:
|
| 299 |
logger.exception("Error in handle_infra_with_governance")
|
|
|
|
| 342 |
# Input validation
|
| 343 |
samples = max(500, min(10000, int(samples)))
|
| 344 |
warmup = max(100, min(2000, int(warmup)))
|
| 345 |
+
mcmc_runs.inc() # record metric
|
| 346 |
|
| 347 |
# Generate data: 10 observations with mean 0.5, std 0.2
|
| 348 |
np.random.seed(42) # for reproducibility
|
|
|
|
| 385 |
return {"error": str(e)}, go.Figure(), go.Figure()
|
| 386 |
|
| 387 |
# ----------------------------------------------------------------------
|
| 388 |
+
# Dashboard plots (thread‑safe with caching)
|
| 389 |
# ----------------------------------------------------------------------
|
| 390 |
+
# Simple TTL cache decorator
|
| 391 |
+
class TTLCache:
|
| 392 |
+
def __init__(self, ttl_seconds=5):
|
| 393 |
+
self.ttl = ttl_seconds
|
| 394 |
+
self.cache = {}
|
| 395 |
+
self.lock = threading.Lock()
|
| 396 |
+
|
| 397 |
+
def __call__(self, func):
|
| 398 |
+
@functools.wraps(func)
|
| 399 |
+
def wrapper(*args, **kwargs):
|
| 400 |
+
key = (func.__name__, args, frozenset(kwargs.items()))
|
| 401 |
+
now = time.time()
|
| 402 |
+
with self.lock:
|
| 403 |
+
if key in self.cache:
|
| 404 |
+
result, timestamp = self.cache[key]
|
| 405 |
+
if now - timestamp < self.ttl:
|
| 406 |
+
return result
|
| 407 |
+
result = func(*args, **kwargs)
|
| 408 |
+
with self.lock:
|
| 409 |
+
self.cache[key] = (result, now)
|
| 410 |
+
return result
|
| 411 |
+
return wrapper
|
| 412 |
+
|
| 413 |
+
dashboard_cache = TTLCache(ttl_seconds=2) # cache for 2 seconds
|
| 414 |
+
|
| 415 |
+
@dashboard_cache
|
| 416 |
def generate_risk_gauge():
|
| 417 |
with history_lock:
|
| 418 |
if not risk_history:
|
|
|
|
| 433 |
}))
|
| 434 |
return fig
|
| 435 |
|
| 436 |
+
@dashboard_cache
|
| 437 |
def generate_decision_pie():
|
| 438 |
with history_lock:
|
| 439 |
if not decision_history:
|
|
|
|
| 444 |
fig.update_layout(title="Policy Decisions")
|
| 445 |
return fig
|
| 446 |
|
| 447 |
+
@dashboard_cache
|
| 448 |
def generate_action_timeline():
|
| 449 |
with history_lock:
|
| 450 |
if not decision_history:
|
|
|
|
| 480 |
# ----------------------------------------------------------------------
|
| 481 |
oss_caps = {
|
| 482 |
"edition": "OSS (Demo)",
|
| 483 |
+
"version": VERSION,
|
| 484 |
"license": "Apache 2.0",
|
| 485 |
"execution": {"modes": ["advisory"], "max_incidents": 100},
|
| 486 |
"memory": {"type": "in-memory", "faiss_index_type": "flat", "max_incident_nodes": 100},
|
| 487 |
+
"enterprise_features": [
|
| 488 |
+
"Real-time HMC (using PyMC)",
|
| 489 |
+
"Hyperpriors",
|
| 490 |
+
"Decision Engine",
|
| 491 |
+
"Full audit trails & compliance reporting",
|
| 492 |
+
"Blast radius limits & automatic rollback",
|
| 493 |
+
"Multi-cloud & hybrid deployment support"
|
| 494 |
+
]
|
| 495 |
}
|
| 496 |
|
| 497 |
+
# ----------------------------------------------------------------------
|
| 498 |
+
# Graceful shutdown
|
| 499 |
+
# ----------------------------------------------------------------------
|
| 500 |
+
def shutdown_handler(signum, frame):
|
| 501 |
+
logger.info("Received shutdown signal, cleaning up...")
|
| 502 |
+
shutdown_event.set()
|
| 503 |
+
# Wait a moment for threads to finish
|
| 504 |
+
time.sleep(2)
|
| 505 |
+
logger.info("Shutdown complete")
|
| 506 |
+
sys.exit(0)
|
| 507 |
+
|
| 508 |
+
signal.signal(signal.SIGTERM, shutdown_handler)
|
| 509 |
+
signal.signal(signal.SIGINT, shutdown_handler)
|
| 510 |
+
|
| 511 |
# ----------------------------------------------------------------------
|
| 512 |
# Startup
|
| 513 |
# ----------------------------------------------------------------------
|
| 514 |
+
# Ensure data directory exists and DB has secure permissions
|
|
|
|
| 515 |
init_db()
|
| 516 |
refresh_history_from_db()
|
| 517 |
|
|
|
|
| 521 |
|
| 522 |
# Start periodic vacuum (once a day)
|
| 523 |
def vacuum_scheduler():
|
| 524 |
+
while not shutdown_event.is_set():
|
| 525 |
time.sleep(86400) # 24 hours
|
| 526 |
+
if not shutdown_event.is_set():
|
| 527 |
+
vacuum_db()
|
| 528 |
vacuum_thread = threading.Thread(target=vacuum_scheduler, daemon=True)
|
| 529 |
vacuum_thread.start()
|
| 530 |
|
| 531 |
# ----------------------------------------------------------------------
|
| 532 |
# Gradio UI
|
| 533 |
# ----------------------------------------------------------------------
|
| 534 |
+
with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
|
| 535 |
gr.Markdown(f"""
|
| 536 |
+
# 🧠 ARF v{VERSION} – Bayesian Risk Scoring for AI Reliability (Demo)
|
| 537 |
**Mathematically rigorous risk estimation using conjugate priors and MCMC**
|
| 538 |
This demo showcases:
|
| 539 |
- **Bayesian conjugate prior (Beta-Binomial)** – online risk update from observed failures/successes.
|
|
|
|
| 610 |
]
|
| 611 |
gr.JSON(label="Active Policies", value=policies_json)
|
| 612 |
|
| 613 |
+
# Sales-driven Enterprise / OSS tab
|
| 614 |
with gr.TabItem("Enterprise / OSS"):
|
| 615 |
gr.Markdown(f"""
|
| 616 |
+
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 2rem; border-radius: 12px; margin-bottom: 2rem; text-align: center; color: white;">
|
| 617 |
+
<h1 style="margin: 0; font-size: 2.5rem;">🚀 ARF {oss_caps['edition'].upper()} Edition</h1>
|
| 618 |
+
<p style="font-size: 1.2rem; opacity: 0.9;">Version {oss_caps['version']} · Apache 2.0 License</p>
|
| 619 |
+
</div>
|
| 620 |
+
|
| 621 |
+
<div style="display: flex; gap: 1.5rem; margin-bottom: 2rem;">
|
| 622 |
+
<div style="flex: 1; background: #f8f9fa; padding: 1.5rem; border-radius: 8px;">
|
| 623 |
+
<h3>📦 OSS Capabilities (Demo)</h3>
|
| 624 |
+
<ul>
|
| 625 |
+
<li>✅ Bayesian conjugate prior – Beta-Binomial risk scoring</li>
|
| 626 |
+
<li>✅ Policy thresholds – configurable approve/escalate/deny</li>
|
| 627 |
+
<li>✅ MCMC sampling – Metropolis-Hastings (simulates HMC concepts)</li>
|
| 628 |
+
<li>✅ In-memory storage – no persistence</li>
|
| 629 |
+
<li>✅ Full open-source transparency</li>
|
| 630 |
+
</ul>
|
| 631 |
+
</div>
|
| 632 |
+
<div style="flex: 1; background: #f8f9fa; padding: 1.5rem; border-radius: 8px;">
|
| 633 |
+
<h3>🏢 Enterprise Features</h3>
|
| 634 |
+
<ul>
|
| 635 |
+
<li>🔒 Real-time HMC (using PyMC) – Bayesian deep learning for risk</li>
|
| 636 |
+
<li>🔒 Hyperpriors – hierarchical models for better generalization</li>
|
| 637 |
+
<li>🔒 Decision Engine with full audit trails</li>
|
| 638 |
+
<li>🔒 Blast radius limits & automatic rollback</li>
|
| 639 |
+
<li>🔒 Multi-cloud & hybrid deployment support</li>
|
| 640 |
+
<li>🔒 Compliance reporting (SOC2, ISO 27001)</li>
|
| 641 |
+
<li>🔒 24/7 enterprise support & SLAs</li>
|
| 642 |
+
</ul>
|
| 643 |
+
</div>
|
| 644 |
+
</div>
|
| 645 |
+
|
| 646 |
+
<div style="background: #e9ecef; padding: 1.5rem; border-radius: 8px; text-align: center;">
|
| 647 |
+
<h3 style="margin-top: 0;">✨ Why Upgrade to Enterprise?</h3>
|
| 648 |
+
<p>ARF Enterprise delivers the same mathematically rigorous foundation but with <strong>production‑grade reliability</strong> and <strong>governance controls</strong> that meet the strictest compliance requirements.</p>
|
| 649 |
+
<ul style="display: inline-block; text-align: left; margin: 1rem auto;">
|
| 650 |
+
<li>📊 **Persistent storage** – every decision logged and queryable</li>
|
| 651 |
+
<li>⚙️ **Advanced risk fusion** – combine conjugate, hyperprior, and HMC estimates</li>
|
| 652 |
+
<li>🛡️ **Semantic memory** – FAISS vector search for context‑aware policies</li>
|
| 653 |
+
<li>📈 **Real‑time dashboards** with Grafana & Prometheus integration</li>
|
| 654 |
+
</ul>
|
| 655 |
+
</div>
|
| 656 |
+
|
| 657 |
+
<div style="text-align: center; margin-top: 2rem;">
|
| 658 |
+
<a href="https://calendly.com/petter2025us/30min" target="_blank" style="background: #764ba2; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold; margin-right: 1rem;">📅 Book a Demo</a>
|
| 659 |
+
<a href="mailto:petter2025us@outlook.com" style="background: #667eea; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold;">📧 Contact Sales</a>
|
| 660 |
+
</div>
|
| 661 |
""")
|
| 662 |
|
| 663 |
# Wire events
|
|
|
|
| 673 |
outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
|
| 674 |
)
|
| 675 |
|
| 676 |
+
# Add Prometheus metrics endpoint
|
| 677 |
+
demo.fastapi_app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
|
| 678 |
+
|
| 679 |
if __name__ == "__main__":
|
| 680 |
demo.queue()
|
| 681 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|