Spaces:

ianshank
/

langgraph-mcts-demo

Running

langgraph-mcts-demo / src /api /rest_server.py

ianshank

feat: add personality output and bug fixes

40ee6b4 24 days ago

14.1 kB

	"""
	Production REST API server for LangGraph Multi-Agent MCTS Framework.

	Provides:
	- OpenAPI/Swagger documentation
	- Authentication via API keys
	- Rate limiting
	- Health and readiness endpoints
	- Request validation with Pydantic
	- Prometheus metrics exposure
	"""

	import asyncio
	import time
	from contextlib import asynccontextmanager
	from datetime import datetime
	from typing import Any

	from fastapi import Depends, FastAPI, Header, HTTPException, Request, Response
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, Field

	# Import framework components
	try:
	from src.adapters.llm import create_client # noqa: F401
	from src.api.auth import (
	APIKeyAuthenticator,
	ClientInfo,
	RateLimitConfig,
	get_authenticator,
	set_authenticator,
	)
	from src.api.exceptions import (
	AuthenticationError,
	AuthorizationError, # noqa: F401
	FrameworkError,
	RateLimitError,
	ValidationError, # noqa: F401
	)
	from src.models.validation import MCTSConfig, QueryInput # noqa: F401

	IMPORTS_AVAILABLE = True
	except ImportError as e:
	IMPORTS_AVAILABLE = False
	import_error = str(e)

	# Prometheus metrics (optional)
	try:
	from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest

	PROMETHEUS_AVAILABLE = True

	# Define metrics
	REQUEST_COUNT = Counter("mcts_requests_total", "Total number of requests", ["method", "endpoint", "status"])
	REQUEST_LATENCY = Histogram("mcts_request_duration_seconds", "Request latency in seconds", ["method", "endpoint"])
	ACTIVE_REQUESTS = Gauge("mcts_active_requests", "Number of active requests")
	ERROR_COUNT = Counter("mcts_errors_total", "Total number of errors", ["error_type"])
	except ImportError:
	PROMETHEUS_AVAILABLE = False


	# Request/Response Models
	class QueryRequest(BaseModel):
	"""Request model for query processing."""

	query: str = Field(
	...,
	min_length=1,
	max_length=10000,
	description="User query to process",
	json_schema_extra={"example": "Recommend defensive positions for night attack scenario"},
	)
	use_mcts: bool = Field(default=True, description="Enable MCTS tactical simulation")
	use_rag: bool = Field(default=True, description="Enable RAG context retrieval")
	mcts_iterations: int \| None = Field(default=None, ge=1, le=10000, description="Override default MCTS iterations")
	thread_id: str \| None = Field(
	default=None,
	max_length=100,
	pattern=r"^[a-zA-Z0-9_-]+$",
	description="Conversation thread ID for state persistence",
	)

	class Config:
	json_schema_extra = {
	"example": {
	"query": "Recommend defensive positions for night attack",
	"use_mcts": True,
	"use_rag": True,
	"mcts_iterations": 200,
	"thread_id": "session_123",
	}
	}


	class QueryResponse(BaseModel):
	"""Response model for query results."""

	response: str = Field(..., description="Final synthesized response")
	confidence: float = Field(..., ge=0.0, le=1.0, description="Overall confidence score")
	agents_used: list[str] = Field(..., description="List of agents that contributed")
	mcts_stats: dict[str, Any] \| None = Field(default=None, description="MCTS simulation statistics")
	processing_time_ms: float = Field(..., description="Total processing time in milliseconds")
	metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")


	class HealthResponse(BaseModel):
	"""Health check response."""

	status: str = Field(..., description="Service status")
	timestamp: str = Field(..., description="Current timestamp")
	version: str = Field(default="1.0.0", description="API version")
	uptime_seconds: float = Field(..., description="Service uptime")


	class ReadinessResponse(BaseModel):
	"""Readiness check response."""

	ready: bool = Field(..., description="Whether service is ready")
	checks: dict[str, bool] = Field(..., description="Individual check results")


	class ErrorResponse(BaseModel):
	"""Error response model."""

	error: bool = Field(default=True)
	error_code: str = Field(..., description="Machine-readable error code")
	message: str = Field(..., description="Human-readable error message")
	timestamp: str = Field(..., description="Error timestamp")


	# Application startup
	start_time = time.time()
	framework_instance = None


	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Application lifespan manager."""
	global framework_instance

	# Startup
	print("Starting MCTS Framework API server...")

	# Initialize authenticator with demo key (replace in production)
	authenticator = APIKeyAuthenticator(
	valid_keys=["demo-api-key-replace-in-production"],
	rate_limit_config=RateLimitConfig(
	requests_per_minute=60,
	requests_per_hour=1000,
	requests_per_day=10000,
	),
	)
	set_authenticator(authenticator)

	# Initialize framework (lazy loading)
	# framework_instance = create_framework()

	print("API server started successfully")

	yield

	# Shutdown
	print("Shutting down API server...")


	# Create FastAPI app
	app = FastAPI(
	title="LangGraph Multi-Agent MCTS API",
	description="""
	## Multi-Agent Reasoning API with MCTS Tactical Simulation

	This API provides access to a sophisticated multi-agent reasoning framework that combines:
	- HRM Agent: Hierarchical decomposition of complex queries
	- TRM Agent: Iterative refinement for response quality
	- MCTS Engine: Monte Carlo Tree Search for tactical simulation
	- RAG Integration: Context retrieval from vector stores

	### Features
	- Secure API key authentication
	- Rate limiting per client
	- Real-time metrics (Prometheus)
	- Distributed tracing (OpenTelemetry)
	- Production-grade error handling

	### Quick Start
	1. Obtain an API key
	2. Include `X-API-Key` header in requests
	3. Send queries to `/query` endpoint
	4. Monitor health via `/health` endpoint
	""",
	version="1.0.0",
	docs_url="/docs",
	redoc_url="/redoc",
	openapi_tags=[
	{"name": "query", "description": "Query processing operations"},
	{"name": "health", "description": "Health and readiness checks"},
	{"name": "metrics", "description": "Observability endpoints"},
	],
	lifespan=lifespan,
	)

	# CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Configure appropriately for production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# Middleware for metrics
	@app.middleware("http")
	async def metrics_middleware(request: Request, call_next):
	"""Track request metrics."""
	if PROMETHEUS_AVAILABLE:
	ACTIVE_REQUESTS.inc()

	start = time.perf_counter()

	try:
	response = await call_next(request)
	status = response.status_code
	except Exception:
	status = 500
	raise
	finally:
	if PROMETHEUS_AVAILABLE:
	ACTIVE_REQUESTS.dec()
	elapsed = time.perf_counter() - start
	REQUEST_COUNT.labels(method=request.method, endpoint=request.url.path, status=str(status)).inc()
	REQUEST_LATENCY.labels(method=request.method, endpoint=request.url.path).observe(elapsed)

	return response


	# Authentication dependency
	async def verify_api_key(x_api_key: str = Header(..., description="API key for authentication")):
	"""Verify API key and return client info."""
	if not IMPORTS_AVAILABLE:
	raise HTTPException(status_code=500, detail="Authentication module not available")

	try:
	authenticator = get_authenticator()
	client_info = authenticator.require_auth(x_api_key)
	return client_info
	except AuthenticationError as e:
	if PROMETHEUS_AVAILABLE:
	ERROR_COUNT.labels(error_type="authentication").inc()
	raise HTTPException(status_code=401, detail=e.user_message)
	except RateLimitError as e:
	if PROMETHEUS_AVAILABLE:
	ERROR_COUNT.labels(error_type="rate_limit").inc()
	raise HTTPException(
	status_code=429, detail=e.user_message, headers={"Retry-After": str(e.retry_after_seconds or 60)}
	)


	# Exception handlers
	@app.exception_handler(FrameworkError)
	async def framework_error_handler(request: Request, exc: FrameworkError):
	"""Handle framework-specific errors."""
	if PROMETHEUS_AVAILABLE:
	ERROR_COUNT.labels(error_type=exc.error_code).inc()

	return JSONResponse(status_code=500, content=exc.to_user_response())


	@app.exception_handler(ValidationError)
	async def validation_error_handler(request: Request, exc: ValidationError):
	"""Handle validation errors."""
	if PROMETHEUS_AVAILABLE:
	ERROR_COUNT.labels(error_type="validation").inc()

	return JSONResponse(status_code=400, content=exc.to_user_response())


	# Endpoints
	@app.get("/health", response_model=HealthResponse, tags=["health"])
	async def health_check():
	"""
	Health check endpoint.

	Returns basic service health status. Use this for load balancer health checks.
	"""
	return HealthResponse(
	status="healthy",
	timestamp=datetime.utcnow().isoformat(),
	version="1.0.0",
	uptime_seconds=time.time() - start_time,
	)


	@app.get("/ready", response_model=ReadinessResponse, tags=["health"])
	async def readiness_check():
	"""
	Readiness check endpoint.

	Verifies all dependencies are available. Use this for Kubernetes readiness probes.
	"""
	checks = {
	"imports_available": IMPORTS_AVAILABLE,
	"authenticator_configured": True,
	"llm_client_available": True, # Would check actual client
	"prometheus_available": PROMETHEUS_AVAILABLE,
	}

	# Check if all critical services are available
	all_ready = all(
	[
	checks["imports_available"],
	checks["authenticator_configured"],
	]
	)

	if not all_ready:
	raise HTTPException(status_code=503, detail="Service not ready")

	return ReadinessResponse(ready=all_ready, checks=checks)


	@app.get("/metrics", tags=["metrics"])
	async def prometheus_metrics():
	"""
	Prometheus metrics endpoint.

	Returns metrics in Prometheus text format for scraping.
	"""
	if not PROMETHEUS_AVAILABLE:
	raise HTTPException(status_code=501, detail="Prometheus metrics not available")

	return Response(content=generate_latest(), media_type=CONTENT_TYPE_LATEST)


	@app.post(
	"/query",
	response_model=QueryResponse,
	tags=["query"],
	responses={
	401: {"model": ErrorResponse, "description": "Authentication failed"},
	429: {"model": ErrorResponse, "description": "Rate limit exceeded"},
	400: {"model": ErrorResponse, "description": "Invalid input"},
	500: {"model": ErrorResponse, "description": "Internal server error"},
	},
	)
	async def process_query(request: QueryRequest, client_info: ClientInfo = Depends(verify_api_key)):
	"""
	Process a query using the multi-agent MCTS framework.

	This endpoint:
	1. Validates the input query
	2. Optionally retrieves context via RAG
	3. Processes through HRM and TRM agents
	4. Optionally runs MCTS simulation
	5. Synthesizes a final response

	Authentication: Requires valid API key in X-API-Key header.

	Rate Limiting: Subject to rate limits per client.
	"""
	start_time = time.perf_counter()

	# Validate input using validation models
	if IMPORTS_AVAILABLE:
	try:
	QueryInput(
	query=request.query,
	use_rag=request.use_rag,
	use_mcts=request.use_mcts,
	thread_id=request.thread_id,
	)
	except Exception as e:
	if PROMETHEUS_AVAILABLE:
	ERROR_COUNT.labels(error_type="validation").inc()
	raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")

	# Process query (mock implementation for demo)
	# In production, this would call the actual framework
	await asyncio.sleep(0.1) # Simulate processing

	processing_time = (time.perf_counter() - start_time) * 1000

	# Mock response
	return QueryResponse(
	response=f"Processed query: {request.query[:100]}...",
	confidence=0.85,
	agents_used=["hrm", "trm"] + (["mcts"] if request.use_mcts else []),
	mcts_stats=(
	{
	"iterations": request.mcts_iterations or 100,
	"best_action": "recommended_action",
	"root_visits": request.mcts_iterations or 100,
	}
	if request.use_mcts
	else None
	),
	processing_time_ms=processing_time,
	metadata={
	"client_id": client_info.client_id,
	"thread_id": request.thread_id,
	"rag_enabled": request.use_rag,
	},
	)


	@app.get("/stats", tags=["metrics"])
	async def get_stats(client_info: ClientInfo = Depends(verify_api_key)):
	"""
	Get usage statistics for the authenticated client.

	Returns request counts and rate limit information.
	"""
	authenticator = get_authenticator()
	stats = authenticator.get_client_stats(client_info.client_id)

	return {
	"client_id": client_info.client_id,
	"roles": list(client_info.roles),
	**stats,
	"rate_limits": {
	"per_minute": authenticator.rate_limit_config.requests_per_minute,
	"per_hour": authenticator.rate_limit_config.requests_per_hour,
	"per_day": authenticator.rate_limit_config.requests_per_day,
	},
	}


	# Entry point
	if __name__ == "__main__":
	import uvicorn

	uvicorn.run(
	"src.api.rest_server:app",
	host="0.0.0.0",
	port=8000,
	reload=False,
	workers=4,
	log_level="info",
	access_log=True,
	)