petter2025 commited on
Commit
bc93cf1
·
verified ·
1 Parent(s): d4cd230

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -383
app.py DELETED
@@ -1,383 +0,0 @@
1
- # app.py – ARF v4 API with Gradio frontend (FastAPI mounted under /api)
2
- import logging
3
- import uuid
4
- from datetime import datetime, timezone
5
- from typing import Dict, Optional, List
6
-
7
- from fastapi import FastAPI, HTTPException
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from fastapi.openapi.docs import get_swagger_ui_html, get_redoc_html
10
- from fastapi.responses import RedirectResponse
11
- from pydantic import BaseModel
12
- import gradio as gr
13
-
14
- # ARF v4 imports
15
- from agentic_reliability_framework.core.governance.risk_engine import RiskEngine
16
- from agentic_reliability_framework.runtime.memory import create_faiss_index, RAGGraphMemory
17
- from agentic_reliability_framework.runtime.memory.constants import MemoryConstants
18
-
19
- # Additional imports for policy and cost
20
- from agentic_reliability_framework.core.governance.policy_engine import PolicyEngine
21
- from agentic_reliability_framework.core.governance.cost_estimator import CostEstimator
22
- from agentic_reliability_framework.core.governance.intents import (
23
- DeployConfigurationIntent,
24
- Environment,
25
- )
26
- from agentic_reliability_framework.core.governance.healing_intent import (
27
- HealingIntent,
28
- RecommendedAction,
29
- IntentStatus,
30
- IntentSource,
31
- )
32
-
33
- logging.basicConfig(level=logging.INFO)
34
- logger = logging.getLogger(__name__)
35
-
36
- # ========================= FASTAPI APP =========================
37
- fastapi_app = FastAPI(title="ARF v4 API")
38
-
39
- # Enable CORS for your frontend
40
- fastapi_app.add_middleware(
41
- CORSMiddleware,
42
- allow_origins=["https://arf-frontend-sandy.vercel.app"],
43
- allow_methods=["*"],
44
- allow_headers=["*"],
45
- )
46
-
47
- # ========================= ARF COMPONENTS =========================
48
- risk_engine = RiskEngine()
49
- faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM)
50
- memory = RAGGraphMemory(faiss_index)
51
-
52
- # Policy engine and cost estimator
53
- policy_engine = PolicyEngine() # You may need to load policies
54
- cost_estimator = CostEstimator() # Default estimator
55
-
56
- # In‑memory storage for demo purposes (used by /v1/history and /v1/feedback)
57
- decision_history = []
58
-
59
- # ========================= PYDANTIC MODELS =========================
60
- class EvaluateRequest(BaseModel):
61
- service_name: str
62
- event_type: str
63
- severity: str
64
- metrics: Dict[str, float] = {}
65
-
66
- class EvaluateResponse(BaseModel):
67
- risk_score: float
68
- base_risk: float
69
- memory_risk: Optional[float] = None
70
- weight: float
71
- similar_events: list = []
72
- confidence: float
73
-
74
- # ========================= HELPER: Demo Intent =========================
75
- class _DemoIntent:
76
- environment = "dev"
77
- deployment_target = "dev"
78
- service_name = "demo"
79
-
80
- # ========================= API ENDPOINTS =========================
81
- @fastapi_app.get("/")
82
- async def root():
83
- """Root endpoint – returns a welcome message."""
84
- return {"message": "ARF v4 API. See /docs for documentation."}
85
-
86
- @fastapi_app.get("/health")
87
- async def health():
88
- return {"status": "ok", "version": "4.2.0"}
89
-
90
- @fastapi_app.get("/v1/get_risk")
91
- async def get_risk():
92
- """Return the current demo risk."""
93
- intent = _DemoIntent()
94
- risk_value, explanation, contributions = risk_engine.calculate_risk(
95
- intent=intent,
96
- cost_estimate=None,
97
- policy_violations=[],
98
- )
99
- decision = "approve"
100
- if risk_value > 0.8:
101
- decision = "deny"
102
- elif risk_value > 0.2:
103
- decision = "escalate"
104
-
105
- decision_id = str(uuid.uuid4())
106
- decision_history.append({
107
- "decision_id": decision_id,
108
- "timestamp": datetime.now(timezone.utc).isoformat(),
109
- "risk_score": float(risk_value),
110
- "outcome": None, # will be filled when feedback is given
111
- })
112
-
113
- return {
114
- "system_risk": float(risk_value),
115
- "status": "critical" if risk_value > 0.8 else "normal",
116
- "explanation": explanation,
117
- "contributions": contributions,
118
- "decision_id": decision_id,
119
- "decision": decision,
120
- "timestamp": datetime.now(timezone.utc).isoformat()
121
- }
122
-
123
- @fastapi_app.get("/v1/history")
124
- async def get_history():
125
- """Return the last 10 decisions."""
126
- return decision_history[-10:]
127
-
128
- @fastapi_app.post("/v1/incidents/evaluate")
129
- async def evaluate_incident(request: EvaluateRequest):
130
- """
131
- Evaluate an incident by converting it into an infrastructure intent
132
- and running it through the full governance components. Returns a complete
133
- HealingIntent with risk assessment, similar incidents, and recommended actions.
134
- """
135
- try:
136
- # Map the incident to a DeployConfigurationIntent (as an example)
137
- # You can change the mapping logic based on your needs.
138
- intent = DeployConfigurationIntent(
139
- service_name=request.service_name,
140
- change_scope="single_instance", # default
141
- deployment_target=Environment.DEV, # assume dev for now
142
- configuration=request.metrics,
143
- requester="system",
144
- provenance={"source": "incident_evaluation", "event_type": request.event_type, "severity": request.severity},
145
- )
146
-
147
- # 1. Evaluate policies
148
- policy_violations = policy_engine.evaluate_policies(intent) or []
149
-
150
- # 2. Estimate cost
151
- cost_projection = cost_estimator.estimate_monthly_cost(intent)
152
-
153
- # 3. Compute risk score from risk engine
154
- risk_score, explanation, contributions = risk_engine.calculate_risk(
155
- intent=intent,
156
- cost_estimate=cost_projection,
157
- policy_violations=policy_violations,
158
- )
159
-
160
- # 4. Retrieve similar incidents from memory
161
- similar_incidents = []
162
- if memory and memory.has_historical_data():
163
- # You need to embed the incident appropriately; for now, pass a dummy event
164
- # This is a placeholder – you'll need to adapt based on your memory module.
165
- # For simplicity, we'll leave it empty.
166
- pass
167
-
168
- # 5. Determine recommended action based on risk score
169
- if risk_score < 0.2:
170
- action = RecommendedAction.APPROVE
171
- elif risk_score > 0.8:
172
- action = RecommendedAction.DENY
173
- else:
174
- action = RecommendedAction.ESCALATE
175
-
176
- # 6. Build risk_factors from component contributions
177
- risk_factors = {}
178
- weights = contributions.get("weights", {})
179
- if weights.get("conjugate", 0.0) > 0:
180
- conj_risk = contributions.get("conjugate_mean", risk_score)
181
- risk_factors["conjugate"] = weights["conjugate"] * conj_risk
182
- if weights.get("hyper", 0.0) > 0:
183
- hyper_risk = contributions.get("hyper_mean", risk_score)
184
- risk_factors["hyperprior"] = weights["hyper"] * hyper_risk
185
- if weights.get("hmc", 0.0) > 0:
186
- hmc_risk = contributions.get("hmc_prediction", risk_score)
187
- risk_factors["hmc"] = weights["hmc"] * hmc_risk
188
-
189
- # Fallback if no factors added
190
- if not risk_factors:
191
- risk_factors["conjugate"] = risk_score
192
-
193
- # 7. Build HealingIntent manually
194
- healing_intent = HealingIntent(
195
- action=action.value,
196
- component=intent.service_name,
197
- parameters={}, # You can add more parameters if needed
198
- justification=explanation,
199
- confidence=0.9, # Placeholder – could be derived from epistemic uncertainty
200
- incident_id="", # Not used in this context
201
- detected_at=datetime.now(timezone.utc).timestamp(),
202
- risk_score=risk_score,
203
- risk_factors=risk_factors,
204
- cost_projection=cost_projection,
205
- recommended_action=action,
206
- similar_incidents=similar_incidents,
207
- policy_violations=policy_violations,
208
- status=IntentStatus.OSS_ADVISORY_ONLY,
209
- source=IntentSource.INFRASTRUCTURE_ANALYSIS,
210
- requires_enterprise=True,
211
- execution_allowed=False,
212
- )
213
-
214
- # Convert to dictionary for response
215
- response_dict = healing_intent.to_dict(include_oss_context=True)
216
-
217
- # Add computed fields expected by frontend
218
- # (These might already be in HealingIntent, but ensure they exist)
219
- if "epistemic_uncertainty" not in response_dict:
220
- response_dict["epistemic_uncertainty"] = 0.05 # default
221
- if "confidence_interval" not in response_dict:
222
- # Use a simple +/- 0.05 interval
223
- response_dict["confidence_interval"] = [
224
- max(0.0, risk_score - 0.05),
225
- min(1.0, risk_score + 0.05),
226
- ]
227
- if "risk_contributions" not in response_dict:
228
- # Convert contributions to list format (keeping only factors)
229
- response_dict["risk_contributions"] = [
230
- {"factor": k, "contribution": v}
231
- for k, v in contributions.items() if k not in ["weights", "conjugate_mean", "hmc_prediction"]
232
- ]
233
-
234
- return response_dict
235
-
236
- except Exception as e:
237
- logger.exception("Error in evaluate_incident")
238
- raise HTTPException(status_code=500, detail=str(e))
239
-
240
- @fastapi_app.post("/v1/feedback")
241
- async def record_outcome(decision_id: str, success: bool):
242
- """Record the outcome of a decision (success/failure)."""
243
- for dec in decision_history:
244
- if dec["decision_id"] == decision_id:
245
- dec["outcome"] = "success" if success else "failure"
246
- # Update the risk engine (optional)
247
- intent = _DemoIntent()
248
- try:
249
- risk_engine.update_outcome(intent, success)
250
- except Exception as e:
251
- logger.exception("Outcome update failed")
252
- return {"status": "ok", "decision_id": decision_id, "outcome": dec["outcome"]}
253
- return {"error": "decision not found"}
254
-
255
- # ========================= NEW MEMORY STATS ENDPOINT =========================
256
- @fastapi_app.get("/v1/memory/stats")
257
- async def get_memory_stats():
258
- """Return current memory graph statistics."""
259
- if memory:
260
- return memory.get_graph_stats()
261
- return {"error": "Memory not initialized"}
262
-
263
- # ========================= GRADIO UI =========================
264
- def get_risk_snapshot():
265
- try:
266
- intent = _DemoIntent()
267
- risk_value, explanation, contributions = risk_engine.calculate_risk(
268
- intent=intent,
269
- cost_estimate=None,
270
- policy_violations=[],
271
- )
272
- decision = "approve"
273
- if risk_value > 0.8:
274
- decision = "deny"
275
- elif risk_value > 0.2:
276
- decision = "escalate"
277
- decision_id = str(uuid.uuid4())
278
- decision_history.append({
279
- "decision_id": decision_id,
280
- "timestamp": datetime.now(timezone.utc).isoformat(),
281
- "risk_score": float(risk_value),
282
- "outcome": None,
283
- })
284
-
285
- # Build risk_factors for UI
286
- risk_factors = {}
287
- weights = contributions.get("weights", {})
288
- if weights.get("conjugate", 0.0) > 0:
289
- conj_risk = contributions.get("conjugate_mean", risk_value)
290
- risk_factors["conjugate"] = weights["conjugate"] * conj_risk
291
- if weights.get("hyper", 0.0) > 0:
292
- hyper_risk = contributions.get("hyper_mean", risk_value)
293
- risk_factors["hyperprior"] = weights["hyper"] * hyper_risk
294
- if weights.get("hmc", 0.0) > 0:
295
- hmc_risk = contributions.get("hmc_prediction", risk_value)
296
- risk_factors["hmc"] = weights["hmc"] * hmc_risk
297
- if not risk_factors:
298
- risk_factors["conjugate"] = risk_value
299
-
300
- return {
301
- "risk": float(risk_value),
302
- "status": "critical" if risk_value > 0.8 else "normal",
303
- "explanation": explanation,
304
- "risk_factors": risk_factors,
305
- "decision_id": decision_id,
306
- "decision": decision,
307
- "timestamp": datetime.now(timezone.utc).isoformat()
308
- }
309
- except Exception as e:
310
- logger.exception("Failed to compute risk snapshot")
311
- return {"error": str(e)}
312
-
313
- def get_health_snapshot():
314
- return {"status": "ok", "version": "4.2.0", "service": "ARF OSS API", "timestamp": datetime.now(timezone.utc).isoformat()}
315
-
316
- def get_memory_snapshot():
317
- if memory.has_historical_data():
318
- return {"status": "ok", "memory_stats": memory.get_graph_stats(), "timestamp": datetime.now(timezone.utc).isoformat()}
319
- return {"status": "empty", "memory_stats": "No historical memory yet.", "timestamp": datetime.now(timezone.utc).isoformat()}
320
-
321
- def record_outcome_ui(success: bool):
322
- if not decision_history:
323
- return {"error": "no decisions yet"}
324
- last = decision_history[-1]
325
- last["outcome"] = "success" if success else "failure"
326
- intent = _DemoIntent()
327
- try:
328
- risk_engine.update_outcome(intent, success)
329
- except Exception as e:
330
- logger.exception("Outcome update failed")
331
- return {"decision_id": last["decision_id"], "outcome": last["outcome"], "timestamp": datetime.now(timezone.utc).isoformat()}
332
-
333
- with gr.Blocks(title="ARF v4.2.0 Demo", theme=gr.themes.Soft()) as demo:
334
- gr.Markdown("# Agentic Reliability Framework v4.2.0")
335
- gr.Markdown("### Probabilistic Infrastructure Governance – [📚 API Docs](/api/docs) | [📦 GitHub](https://github.com/arf-foundation/agentic-reliability-framework) | [📅 Book a Call](https://calendly.com/petter2025us/30min)")
336
- gr.Markdown("---")
337
- with gr.Row():
338
- health_output = gr.JSON(label="Health")
339
- risk_output = gr.JSON(label="Current Risk")
340
- with gr.Row():
341
- memory_output = gr.JSON(label="Memory Stats")
342
- with gr.Row():
343
- decision_output = gr.JSON(label="Recent Decisions")
344
- with gr.Row():
345
- refresh_btn = gr.Button("Evaluate Intent")
346
- success_btn = gr.Button("Action Succeeded")
347
- fail_btn = gr.Button("Action Failed")
348
- refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output)
349
- success_btn.click(fn=lambda: record_outcome_ui(True), outputs=decision_output)
350
- fail_btn.click(fn=lambda: record_outcome_ui(False), outputs=decision_output)
351
- with gr.Row():
352
- health_btn = gr.Button("Refresh Health")
353
- memory_btn = gr.Button("Refresh Memory")
354
- history_btn = gr.Button("Show Decision History")
355
- health_btn.click(fn=get_health_snapshot, outputs=health_output)
356
- memory_btn.click(fn=get_memory_snapshot, outputs=memory_output)
357
- history_btn.click(fn=lambda: decision_history[-10:], outputs=decision_output)
358
-
359
- # ========================= Mount Gradio and Add Documentation Routes =========================
360
- app = gr.mount_gradio_app(fastapi_app, demo, path="/api")
361
-
362
- # Add documentation routes at "/docs"
363
- @app.get("/docs", include_in_schema=False)
364
- async def swagger_ui():
365
- return get_swagger_ui_html(
366
- openapi_url="/openapi.json",
367
- title="ARF API Docs"
368
- )
369
-
370
- @app.get("/redoc", include_in_schema=False)
371
- async def redoc_ui():
372
- return get_redoc_html(
373
- openapi_url="/openapi.json",
374
- title="ARF API ReDoc"
375
- )
376
-
377
- @app.get("/openapi.json", include_in_schema=False)
378
- async def openapi():
379
- return fastapi_app.openapi()
380
-
381
- @app.get("/api/docs", include_in_schema=False)
382
- async def redirect_docs():
383
- return RedirectResponse(url="/docs")