petter2025 commited on
Commit
1737ba1
·
verified ·
1 Parent(s): 3c5135b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +279 -146
app.py CHANGED
@@ -1,20 +1,126 @@
1
  import gradio as gr
2
- import asyncio
3
  import json
4
  import logging
5
- import traceback
6
- import os
7
  import numpy as np
8
  import pandas as pd
9
  from datetime import datetime
10
- from typing import Dict, Any, List, Optional
11
  import threading
12
- import urllib.request
13
  import time
 
 
 
14
  from scipy.stats import beta
 
 
 
 
 
 
 
 
 
 
15
 
16
  # ----------------------------------------------------------------------
17
- # Memory monitoring (no external dependencies)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # ----------------------------------------------------------------------
19
  def get_memory_usage():
20
  """Return current process memory usage in MB (RSS)."""
@@ -37,30 +143,24 @@ def get_memory_usage():
37
  pass
38
  return None
39
 
40
- def log_memory_usage():
41
- mem_mb = get_memory_usage()
42
- if mem_mb is not None:
43
- logging.info(f"Process memory: {mem_mb:.1f} MB")
44
- else:
45
- logging.info("Process memory: unknown")
46
- threading.Timer(60, log_memory_usage).start()
47
-
48
- # ----------------------------------------------------------------------
49
- # Plotly
50
- # ----------------------------------------------------------------------
51
- import plotly.graph_objects as go
52
-
53
- # ----------------------------------------------------------------------
54
- # Logging
55
- # ----------------------------------------------------------------------
56
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
57
- logger = logging.getLogger(__name__)
58
 
59
  # ----------------------------------------------------------------------
60
  # Bayesian Risk Engine (Beta‑Binomial)
61
  # ----------------------------------------------------------------------
62
  class BayesianRiskEngine:
63
- def __init__(self, alpha=1.0, beta=1.0):
64
  self.alpha = alpha
65
  self.beta = beta
66
 
@@ -77,10 +177,12 @@ class BayesianRiskEngine:
77
  return lo, hi
78
 
79
  # ----------------------------------------------------------------------
80
- # Policy Engine
81
  # ----------------------------------------------------------------------
82
  class PolicyEngine:
83
- def __init__(self, thresholds={"low": 0.2, "high": 0.8}):
 
 
84
  self.thresholds = thresholds
85
 
86
  def evaluate(self, risk):
@@ -92,18 +194,53 @@ class PolicyEngine:
92
  return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
93
 
94
  # ----------------------------------------------------------------------
95
- # History
96
  # ----------------------------------------------------------------------
97
- decision_history = []
98
- risk_history = []
99
-
100
- def update_dashboard_data(decision, risk):
101
- decision_history.append((datetime.utcnow().isoformat(), decision, risk))
102
- risk_history.append((datetime.utcnow().isoformat(), risk))
103
- if len(decision_history) > 100:
104
- decision_history.pop(0)
105
- if len(risk_history) > 100:
106
- risk_history.pop(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def autonomous_control_decision(risk, risk_engine, policy_engine):
109
  action, reason = policy_engine.evaluate(risk)
@@ -118,53 +255,7 @@ def autonomous_control_decision(risk, risk_engine, policy_engine):
118
  return decision
119
 
120
  # ----------------------------------------------------------------------
121
- # Infrastructure analysis
122
- # ----------------------------------------------------------------------
123
- async def handle_infra_with_governance(fault_type, context_window, session_state):
124
- fault_map = {
125
- "none": (1, 99),
126
- "switch_down": (20, 80),
127
- "server_overload": (35, 65),
128
- "cascade": (60, 40)
129
- }
130
- failures, successes = fault_map.get(fault_type, (1, 99))
131
- severity = "low" if failures < 10 else "medium" if failures < 40 else "high"
132
-
133
- risk_engine = BayesianRiskEngine(alpha=1, beta=1)
134
- risk_engine.update(failures, successes)
135
- risk = risk_engine.risk()
136
- ci_low, ci_high = risk_engine.risk_interval(0.95)
137
-
138
- policy_engine = PolicyEngine(thresholds={"low": 0.2, "high": 0.8})
139
- action, reason = policy_engine.evaluate(risk)
140
- control_decision = autonomous_control_decision(risk, risk_engine, policy_engine)
141
-
142
- analysis_result = {
143
- "risk": risk,
144
- "risk_ci": [ci_low, ci_high],
145
- "decision": action,
146
- "justification": reason,
147
- "healing_actions": ["restart"] if action == "deny" else ["monitor"],
148
- "posterior_parameters": {
149
- "alpha": risk_engine.alpha,
150
- "beta": risk_engine.beta
151
- }
152
- }
153
- output = {
154
- **analysis_result,
155
- "governance": {
156
- "policy_evaluation": {
157
- "action": action,
158
- "reason": reason,
159
- "thresholds": policy_engine.thresholds
160
- },
161
- "control_plane_decision": control_decision
162
- }
163
- }
164
- return output, session_state
165
-
166
- # ----------------------------------------------------------------------
167
- # MCMC (Metropolis‑Hastings)
168
  # ----------------------------------------------------------------------
169
  class MHMCMC:
170
  def __init__(self, log_target, proposal_sd=0.1):
@@ -189,47 +280,60 @@ class MHMCMC:
189
  acceptance_rate = accepted / (n_samples + burn_in)
190
  return samples, acceptance_rate
191
 
192
- def run_hmc_mcmc(samples, warmup):
193
- # Generate data: 10 observations with mean 0.5, std 0.2
194
- data = np.random.normal(0.5, 0.2, 10)
195
- def log_prior(mu):
196
- return -0.5 * (mu ** 2) # prior N(0,1)
197
- def log_likelihood(mu):
198
- return -0.5 * np.sum(((data - mu) / 0.2) ** 2)
199
- def log_posterior(mu):
200
- return log_prior(mu) + log_likelihood(mu)
201
-
202
- sampler = MHMCMC(log_posterior, proposal_sd=0.05)
203
- mu_samples, acceptance = sampler.sample(samples, initial_state=[0.0], burn_in=warmup)
204
- mu_samples = mu_samples.flatten()
205
-
206
- mean = np.mean(mu_samples)
207
- median = np.median(mu_samples)
208
- credible_interval = np.percentile(mu_samples, [2.5, 97.5])
209
-
210
- fig_trace = go.Figure()
211
- fig_trace.add_trace(go.Scatter(y=mu_samples, mode='lines', name='μ', line=dict(width=1)))
212
- fig_trace.update_layout(title="Trace of μ (Metropolis-Hastings)", xaxis_title="Iteration", yaxis_title="μ")
213
-
214
- fig_hist = go.Figure()
215
- fig_hist.add_trace(go.Histogram(x=mu_samples, nbinsx=50, name='Posterior'))
216
- fig_hist.update_layout(title="Posterior Distribution of μ", xaxis_title="μ", yaxis_title="Density")
217
-
218
- summary = {
219
- "mean": mean,
220
- "median": median,
221
- "credible_interval_95": f"[{credible_interval[0]:.3f}, {credible_interval[1]:.3f}]",
222
- "acceptance_rate": f"{acceptance:.2%}"
223
- }
224
- return summary, fig_trace, fig_hist
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  # ----------------------------------------------------------------------
227
- # Dashboard plots
228
  # ----------------------------------------------------------------------
229
  def generate_risk_gauge():
230
- if not risk_history:
231
- return go.Figure()
232
- latest_risk = risk_history[-1][1]
 
233
  fig = go.Figure(go.Indicator(
234
  mode="gauge+number",
235
  value=latest_risk,
@@ -238,27 +342,29 @@ def generate_risk_gauge():
238
  'axis': {'range': [0, 1]},
239
  'bar': {'color': "darkblue"},
240
  'steps': [
241
- {'range': [0, 0.2], 'color': "lightgreen"},
242
- {'range': [0.2, 0.8], 'color': "yellow"},
243
- {'range': [0.8, 1], 'color': "red"}
244
  ]
245
  }))
246
  return fig
247
 
248
  def generate_decision_pie():
249
- if not decision_history:
250
- return go.Figure()
251
- approved = sum(1 for _, d, _ in decision_history if d.get("approved", False))
252
- blocked = len(decision_history) - approved
 
253
  fig = go.Figure(data=[go.Pie(labels=["Approved", "Blocked"], values=[approved, blocked])])
254
  fig.update_layout(title="Policy Decisions")
255
  return fig
256
 
257
  def generate_action_timeline():
258
- if not decision_history:
259
- return go.Figure()
260
- times = [d["timestamp"] for _, d, _ in decision_history]
261
- approvals = [1 if d.get("approved", False) else 0 for _, d, _ in decision_history]
 
262
  fig = go.Figure()
263
  fig.add_trace(go.Scatter(x=times, y=approvals, mode='markers+lines', name='Approvals'))
264
  fig.update_layout(title="Autonomous Actions Timeline", xaxis_title="Time", yaxis_title="Approved (1) / Blocked (0)")
@@ -295,24 +401,43 @@ oss_caps = {
295
  }
296
 
297
  # ----------------------------------------------------------------------
298
- # Start memory monitoring
299
  # ----------------------------------------------------------------------
300
- log_memory_usage()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  # ----------------------------------------------------------------------
303
  # Gradio UI
304
  # ----------------------------------------------------------------------
305
  with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
306
- gr.Markdown("""
307
  # 🧠 ARF v4 – Bayesian Risk Scoring for AI Reliability (Demo)
308
  **Mathematically rigorous risk estimation using conjugate priors and MCMC**
309
-
310
  This demo showcases:
311
  - **Bayesian conjugate prior (Beta-Binomial)** – online risk update from observed failures/successes.
312
- - **Policy thresholds** – approve (<0.2), escalate (0.20.8), deny (>0.8).
313
  - **Metropolis-Hastings MCMC** – sampling from a posterior distribution (simulating HMC concepts).
314
  - **Autonomous control decisions** – based on the current risk estimate.
315
-
316
  All components are implemented with only `numpy`, `scipy`, and standard libraries.
317
  """)
318
 
@@ -326,8 +451,8 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
326
  "version": oss_caps["version"],
327
  "governance_mode": "advisory",
328
  "policies_loaded": 2,
329
- "risk_threshold_low": 0.2,
330
- "risk_threshold_high": 0.8
331
  })
332
  with gr.Column():
333
  control_stats = gr.JSON(label="Control Statistics", value={
@@ -377,9 +502,9 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
377
  with gr.TabItem("Policy Management"):
378
  gr.Markdown("### 📋 Execution Policies")
379
  policies_json = [
380
- {"name": "Low Risk Policy", "conditions": ["risk < 0.2"], "action": "approve", "priority": 1},
381
- {"name": "Medium Risk Policy", "conditions": ["0.2 ≤ risk ≤ 0.8"], "action": "escalate", "priority": 2},
382
- {"name": "High Risk Policy", "conditions": ["risk > 0.8"], "action": "deny", "priority": 3}
383
  ]
384
  gr.JSON(label="Active Policies", value=policies_json)
385
 
@@ -404,7 +529,7 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
404
 
405
  # Wire events
406
  infra_btn.click(
407
- fn=lambda f, w, s: handle_infra_with_governance(f, w, s),
408
  inputs=[infra_fault, gr.State(50), infra_state],
409
  outputs=[infra_output, infra_state]
410
  )
@@ -415,5 +540,13 @@ with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
415
  outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
416
  )
417
 
 
 
 
 
 
 
 
418
  if __name__ == "__main__":
419
- demo.launch(theme="soft")
 
 
1
  import gradio as gr
 
2
  import json
3
  import logging
 
 
4
  import numpy as np
5
  import pandas as pd
6
  from datetime import datetime
7
+ from typing import Dict, Any, List, Optional, Tuple
8
  import threading
 
9
  import time
10
+ import os
11
+ import sqlite3
12
+ import contextlib
13
  from scipy.stats import beta
14
+ import plotly.graph_objects as go
15
+
16
+ # ----------------------------------------------------------------------
17
+ # Configuration from environment variables
18
+ # ----------------------------------------------------------------------
19
+ LOW_THRESHOLD = float(os.getenv("ARF_LOW_THRESHOLD", "0.2"))
20
+ HIGH_THRESHOLD = float(os.getenv("ARF_HIGH_THRESHOLD", "0.8"))
21
+ ALPHA_PRIOR = float(os.getenv("ARF_ALPHA_PRIOR", "1.0"))
22
+ BETA_PRIOR = float(os.getenv("ARF_BETA_PRIOR", "1.0"))
23
+ DB_PATH = os.getenv("ARF_DB_PATH", "/data/arf_decisions.db")
24
 
25
  # ----------------------------------------------------------------------
26
+ # Logging
27
+ # ----------------------------------------------------------------------
28
+ logging.basicConfig(
29
+ level=logging.INFO,
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # ----------------------------------------------------------------------
35
+ # SQLite persistence
36
+ # ----------------------------------------------------------------------
37
+ def init_db():
38
+ """Create the decisions table if it doesn't exist."""
39
+ with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
40
+ cursor = conn.cursor()
41
+ cursor.execute('''
42
+ CREATE TABLE IF NOT EXISTS decisions (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ timestamp TEXT NOT NULL,
45
+ decision_json TEXT NOT NULL,
46
+ risk REAL NOT NULL
47
+ )
48
+ ''')
49
+ conn.commit()
50
+ logger.info(f"Database initialized at {DB_PATH}")
51
+
52
+ def save_decision_to_db(decision: dict, risk: float):
53
+ """Insert a decision into the database."""
54
+ try:
55
+ with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
56
+ cursor = conn.cursor()
57
+ cursor.execute(
58
+ "INSERT INTO decisions (timestamp, decision_json, risk) VALUES (?, ?, ?)",
59
+ (decision["timestamp"], json.dumps(decision), risk)
60
+ )
61
+ conn.commit()
62
+ except Exception as e:
63
+ logger.error(f"Failed to save decision to DB: {e}")
64
+
65
+ def load_recent_decisions(limit: int = 100) -> List[Tuple[str, dict, float]]:
66
+ """Load the most recent decisions from the database."""
67
+ decisions = []
68
+ try:
69
+ with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
70
+ cursor = conn.cursor()
71
+ cursor.execute(
72
+ "SELECT timestamp, decision_json, risk FROM decisions ORDER BY timestamp DESC LIMIT ?",
73
+ (limit,)
74
+ )
75
+ rows = cursor.fetchall()
76
+ for ts, json_str, risk in rows:
77
+ decisions.append((ts, json.loads(json_str), risk))
78
+ decisions.reverse() # oldest first
79
+ except Exception as e:
80
+ logger.error(f"Failed to load decisions from DB: {e}")
81
+ return decisions
82
+
83
+ def vacuum_db():
84
+ """Run VACUUM on the database (periodic maintenance)."""
85
+ try:
86
+ with contextlib.closing(sqlite3.connect(DB_PATH)) as conn:
87
+ conn.execute("VACUUM")
88
+ logger.info("Database vacuumed")
89
+ except Exception as e:
90
+ logger.error(f"Vacuum failed: {e}")
91
+
92
+ # ----------------------------------------------------------------------
93
+ # Thread‑safe history (in‑memory + DB backup)
94
+ # ----------------------------------------------------------------------
95
+ decision_history = []
96
+ risk_history = []
97
+ history_lock = threading.Lock()
98
+
99
+ def update_dashboard_data(decision: dict, risk: float):
100
+ """Thread‑safe update of both in‑memory history and database."""
101
+ with history_lock:
102
+ decision_history.append((datetime.utcnow().isoformat(), decision, risk))
103
+ risk_history.append((datetime.utcnow().isoformat(), risk))
104
+ # Keep only last 100 in memory
105
+ if len(decision_history) > 100:
106
+ decision_history.pop(0)
107
+ if len(risk_history) > 100:
108
+ risk_history.pop(0)
109
+ save_decision_to_db(decision, risk)
110
+
111
+ def refresh_history_from_db():
112
+ """Load recent history from database (called at startup)."""
113
+ global decision_history, risk_history
114
+ decisions = load_recent_decisions(100)
115
+ with history_lock:
116
+ decision_history.clear()
117
+ risk_history.clear()
118
+ for ts, dec, risk in decisions:
119
+ decision_history.append((ts, dec, risk))
120
+ risk_history.append((ts, risk))
121
+
122
+ # ----------------------------------------------------------------------
123
+ # Memory monitoring (daemon thread)
124
  # ----------------------------------------------------------------------
125
  def get_memory_usage():
126
  """Return current process memory usage in MB (RSS)."""
 
143
  pass
144
  return None
145
 
146
+ def memory_monitor_loop():
147
+ """Periodically log memory usage. Runs in a daemon thread."""
148
+ while True:
149
+ try:
150
+ mem_mb = get_memory_usage()
151
+ if mem_mb is not None:
152
+ logger.info(f"Process memory: {mem_mb:.1f} MB")
153
+ else:
154
+ logger.info("Process memory: unknown")
155
+ except Exception as e:
156
+ logger.error(f"Memory logging error: {e}")
157
+ time.sleep(60)
 
 
 
 
 
 
158
 
159
  # ----------------------------------------------------------------------
160
  # Bayesian Risk Engine (Beta‑Binomial)
161
  # ----------------------------------------------------------------------
162
  class BayesianRiskEngine:
163
+ def __init__(self, alpha=ALPHA_PRIOR, beta=BETA_PRIOR):
164
  self.alpha = alpha
165
  self.beta = beta
166
 
 
177
  return lo, hi
178
 
179
  # ----------------------------------------------------------------------
180
+ # Policy Engine (now configurable)
181
  # ----------------------------------------------------------------------
182
  class PolicyEngine:
183
+ def __init__(self, thresholds: Dict[str, float] = None):
184
+ if thresholds is None:
185
+ thresholds = {"low": LOW_THRESHOLD, "high": HIGH_THRESHOLD}
186
  self.thresholds = thresholds
187
 
188
  def evaluate(self, risk):
 
194
  return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
195
 
196
  # ----------------------------------------------------------------------
197
+ # Infrastructure analysis (synchronous, with error handling)
198
  # ----------------------------------------------------------------------
199
+ def handle_infra_with_governance(fault_type: str, context_window: int, session_state: dict):
200
+ try:
201
+ fault_map = {
202
+ "none": (1, 99),
203
+ "switch_down": (20, 80),
204
+ "server_overload": (35, 65),
205
+ "cascade": (60, 40)
206
+ }
207
+ failures, successes = fault_map.get(fault_type, (1, 99))
208
+
209
+ risk_engine = BayesianRiskEngine()
210
+ risk_engine.update(failures, successes)
211
+ risk = risk_engine.risk()
212
+ ci_low, ci_high = risk_engine.risk_interval(0.95)
213
+
214
+ policy_engine = PolicyEngine()
215
+ action, reason = policy_engine.evaluate(risk)
216
+ control_decision = autonomous_control_decision(risk, risk_engine, policy_engine)
217
+
218
+ analysis_result = {
219
+ "risk": risk,
220
+ "risk_ci": [ci_low, ci_high],
221
+ "decision": action,
222
+ "justification": reason,
223
+ "healing_actions": ["restart"] if action == "deny" else ["monitor"],
224
+ "posterior_parameters": {
225
+ "alpha": risk_engine.alpha,
226
+ "beta": risk_engine.beta
227
+ }
228
+ }
229
+ output = {
230
+ **analysis_result,
231
+ "governance": {
232
+ "policy_evaluation": {
233
+ "action": action,
234
+ "reason": reason,
235
+ "thresholds": policy_engine.thresholds
236
+ },
237
+ "control_plane_decision": control_decision
238
+ }
239
+ }
240
+ return output, session_state
241
+ except Exception as e:
242
+ logger.exception("Error in handle_infra_with_governance")
243
+ return {"error": str(e)}, session_state
244
 
245
  def autonomous_control_decision(risk, risk_engine, policy_engine):
246
  action, reason = policy_engine.evaluate(risk)
 
255
  return decision
256
 
257
  # ----------------------------------------------------------------------
258
+ # MCMC (Metropolis‑Hastings) with input validation and timeout
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  # ----------------------------------------------------------------------
260
  class MHMCMC:
261
  def __init__(self, log_target, proposal_sd=0.1):
 
280
  acceptance_rate = accepted / (n_samples + burn_in)
281
  return samples, acceptance_rate
282
 
283
+ def run_hmc_mcmc(samples: int, warmup: int):
284
+ try:
285
+ # Input validation
286
+ samples = max(500, min(10000, int(samples)))
287
+ warmup = max(100, min(2000, int(warmup)))
288
+
289
+ # Generate data: 10 observations with mean 0.5, std 0.2
290
+ np.random.seed(42) # for reproducibility
291
+ data = np.random.normal(0.5, 0.2, 10)
292
+
293
+ def log_prior(mu):
294
+ return -0.5 * (mu ** 2) # prior N(0,1)
295
+
296
+ def log_likelihood(mu):
297
+ return -0.5 * np.sum(((data - mu) / 0.2) ** 2)
298
+
299
+ def log_posterior(mu):
300
+ return log_prior(mu) + log_likelihood(mu)
301
+
302
+ sampler = MHMCMC(log_posterior, proposal_sd=0.05)
303
+ mu_samples, acceptance = sampler.sample(samples, initial_state=[0.0], burn_in=warmup)
304
+ mu_samples = mu_samples.flatten()
305
+
306
+ mean = np.mean(mu_samples)
307
+ median = np.median(mu_samples)
308
+ credible_interval = np.percentile(mu_samples, [2.5, 97.5])
309
+
310
+ fig_trace = go.Figure()
311
+ fig_trace.add_trace(go.Scatter(y=mu_samples, mode='lines', name='μ', line=dict(width=1)))
312
+ fig_trace.update_layout(title="Trace of μ (Metropolis-Hastings)", xaxis_title="Iteration", yaxis_title="μ")
313
+
314
+ fig_hist = go.Figure()
315
+ fig_hist.add_trace(go.Histogram(x=mu_samples, nbinsx=50, name='Posterior'))
316
+ fig_hist.update_layout(title="Posterior Distribution of μ", xaxis_title="μ", yaxis_title="Density")
317
+
318
+ summary = {
319
+ "mean": mean,
320
+ "median": median,
321
+ "credible_interval_95": f"[{credible_interval[0]:.3f}, {credible_interval[1]:.3f}]",
322
+ "acceptance_rate": f"{acceptance:.2%}"
323
+ }
324
+ return summary, fig_trace, fig_hist
325
+ except Exception as e:
326
+ logger.exception("MCMC computation failed")
327
+ return {"error": str(e)}, go.Figure(), go.Figure()
328
 
329
  # ----------------------------------------------------------------------
330
+ # Dashboard plots (thread‑safe)
331
  # ----------------------------------------------------------------------
332
  def generate_risk_gauge():
333
+ with history_lock:
334
+ if not risk_history:
335
+ return go.Figure()
336
+ latest_risk = risk_history[-1][1]
337
  fig = go.Figure(go.Indicator(
338
  mode="gauge+number",
339
  value=latest_risk,
 
342
  'axis': {'range': [0, 1]},
343
  'bar': {'color': "darkblue"},
344
  'steps': [
345
+ {'range': [0, LOW_THRESHOLD], 'color': "lightgreen"},
346
+ {'range': [LOW_THRESHOLD, HIGH_THRESHOLD], 'color': "yellow"},
347
+ {'range': [HIGH_THRESHOLD, 1], 'color': "red"}
348
  ]
349
  }))
350
  return fig
351
 
352
  def generate_decision_pie():
353
+ with history_lock:
354
+ if not decision_history:
355
+ return go.Figure()
356
+ approved = sum(1 for _, d, _ in decision_history if d.get("approved", False))
357
+ blocked = len(decision_history) - approved
358
  fig = go.Figure(data=[go.Pie(labels=["Approved", "Blocked"], values=[approved, blocked])])
359
  fig.update_layout(title="Policy Decisions")
360
  return fig
361
 
362
  def generate_action_timeline():
363
+ with history_lock:
364
+ if not decision_history:
365
+ return go.Figure()
366
+ times = [d["timestamp"] for _, d, _ in decision_history]
367
+ approvals = [1 if d.get("approved", False) else 0 for _, d, _ in decision_history]
368
  fig = go.Figure()
369
  fig.add_trace(go.Scatter(x=times, y=approvals, mode='markers+lines', name='Approvals'))
370
  fig.update_layout(title="Autonomous Actions Timeline", xaxis_title="Time", yaxis_title="Approved (1) / Blocked (0)")
 
401
  }
402
 
403
  # ----------------------------------------------------------------------
404
+ # Health endpoint (custom route)
405
  # ----------------------------------------------------------------------
406
+ async def health_endpoint():
407
+ return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
408
+
409
+ # ----------------------------------------------------------------------
410
+ # Startup
411
+ # ----------------------------------------------------------------------
412
+ # Ensure data directory exists
413
+ os.makedirs(os.path.dirname(DB_PATH) if os.path.dirname(DB_PATH) else ".", exist_ok=True)
414
+ init_db()
415
+ refresh_history_from_db()
416
+
417
+ # Start memory monitor daemon thread
418
+ mem_thread = threading.Thread(target=memory_monitor_loop, daemon=True)
419
+ mem_thread.start()
420
+
421
+ # Start periodic vacuum (once a day)
422
+ def vacuum_scheduler():
423
+ while True:
424
+ time.sleep(86400) # 24 hours
425
+ vacuum_db()
426
+ vacuum_thread = threading.Thread(target=vacuum_scheduler, daemon=True)
427
+ vacuum_thread.start()
428
 
429
  # ----------------------------------------------------------------------
430
  # Gradio UI
431
  # ----------------------------------------------------------------------
432
  with gr.Blocks(title="ARF v4 – Bayesian Risk Scoring Demo") as demo:
433
+ gr.Markdown(f"""
434
  # 🧠 ARF v4 – Bayesian Risk Scoring for AI Reliability (Demo)
435
  **Mathematically rigorous risk estimation using conjugate priors and MCMC**
 
436
  This demo showcases:
437
  - **Bayesian conjugate prior (Beta-Binomial)** – online risk update from observed failures/successes.
438
+ - **Policy thresholds** – approve (<{LOW_THRESHOLD}), escalate ({LOW_THRESHOLD}{HIGH_THRESHOLD}), deny (>{HIGH_THRESHOLD}).
439
  - **Metropolis-Hastings MCMC** – sampling from a posterior distribution (simulating HMC concepts).
440
  - **Autonomous control decisions** – based on the current risk estimate.
 
441
  All components are implemented with only `numpy`, `scipy`, and standard libraries.
442
  """)
443
 
 
451
  "version": oss_caps["version"],
452
  "governance_mode": "advisory",
453
  "policies_loaded": 2,
454
+ "risk_threshold_low": LOW_THRESHOLD,
455
+ "risk_threshold_high": HIGH_THRESHOLD
456
  })
457
  with gr.Column():
458
  control_stats = gr.JSON(label="Control Statistics", value={
 
502
  with gr.TabItem("Policy Management"):
503
  gr.Markdown("### 📋 Execution Policies")
504
  policies_json = [
505
+ {"name": "Low Risk Policy", "conditions": [f"risk < {LOW_THRESHOLD}"], "action": "approve", "priority": 1},
506
+ {"name": "Medium Risk Policy", "conditions": [f"{LOW_THRESHOLD} ≤ risk ≤ {HIGH_THRESHOLD}"], "action": "escalate", "priority": 2},
507
+ {"name": "High Risk Policy", "conditions": [f"risk > {HIGH_THRESHOLD}"], "action": "deny", "priority": 3}
508
  ]
509
  gr.JSON(label="Active Policies", value=policies_json)
510
 
 
529
 
530
  # Wire events
531
  infra_btn.click(
532
+ fn=handle_infra_with_governance,
533
  inputs=[infra_fault, gr.State(50), infra_state],
534
  outputs=[infra_output, infra_state]
535
  )
 
540
  outputs=[hmc_summary, hmc_trace_plot, hmc_pair_plot]
541
  )
542
 
543
+ # Add health endpoint
544
+ demo.add_http_route(
545
+ path="/health",
546
+ method="GET",
547
+ endpoint=health_endpoint
548
+ )
549
+
550
  if __name__ == "__main__":
551
+ demo.queue()
552
+ demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)