Commit
·
ef072ca
0
Parent(s):
Initial commit: Multi-Agent RAG System
Browse files- .env.example +27 -0
- .gitignore +31 -0
- Dockerfile +36 -0
- README.md +295 -0
- README_HF.md +50 -0
- app.py +22 -0
- app/__init__.py +17 -0
- app/agents/__init__.py +44 -0
- app/agents/action_agent.py +386 -0
- app/agents/base_agent.py +190 -0
- app/agents/reasoning_agent.py +237 -0
- app/agents/retriever_agent.py +160 -0
- app/agents/router_agent.py +211 -0
- app/api/__init__.py +10 -0
- app/api/routes.py +306 -0
- app/config.py +212 -0
- app/main.py +168 -0
- app/memory/__init__.py +10 -0
- app/memory/conversation_memory.py +201 -0
- app/schemas/__init__.py +32 -0
- app/schemas/models.py +264 -0
- app/services/__init__.py +11 -0
- app/services/document_service.py +294 -0
- app/services/orchestrator.py +272 -0
- app/tools/__init__.py +25 -0
- app/tools/action_tools.py +279 -0
- app/tools/document_tool.py +200 -0
- app/tools/search_tool.py +135 -0
- app/vectorstore/__init__.py +11 -0
- app/vectorstore/embeddings.py +172 -0
- app/vectorstore/faiss_store.py +294 -0
- claude.md +39 -0
- data/documents/account_settings.txt +100 -0
- data/documents/billing_faq.txt +75 -0
- data/documents/password_reset.txt +45 -0
- data/documents/technical_support.txt +127 -0
- requirements.txt +46 -0
- scripts/test_api.py +157 -0
- skills.md +27 -0
- tools.md +20 -0
.env.example
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-Agent RAG System Environment Configuration
|
| 2 |
+
# =================================================
|
| 3 |
+
# Copy this file to .env and fill in your values
|
| 4 |
+
|
| 5 |
+
# OpenAI API Key (Required)
|
| 6 |
+
# Get yours at: https://platform.openai.com/api-keys
|
| 7 |
+
OPENAI_API_KEY=sk-1234efgh5678ijkl1234efgh5678ijkl1234efgh
|
| 8 |
+
|
| 9 |
+
# Model Configuration
|
| 10 |
+
# Options: gpt-4-turbo-preview, gpt-4, gpt-3.5-turbo
|
| 11 |
+
LLM_MODEL=gpt-4-turbo-preview
|
| 12 |
+
|
| 13 |
+
# Embedding Model
|
| 14 |
+
# Options: text-embedding-3-small, text-embedding-3-large, text-embedding-ada-002
|
| 15 |
+
EMBEDDING_MODEL=text-embedding-3-small
|
| 16 |
+
|
| 17 |
+
# Vector Store Configuration
|
| 18 |
+
FAISS_INDEX_PATH=./data/faiss_index
|
| 19 |
+
DOCUMENTS_PATH=./data/documents
|
| 20 |
+
|
| 21 |
+
# API Configuration
|
| 22 |
+
API_HOST=0.0.0.0
|
| 23 |
+
API_PORT=8000
|
| 24 |
+
DEBUG_MODE=true
|
| 25 |
+
|
| 26 |
+
# Logging
|
| 27 |
+
LOG_LEVEL=INFO
|
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
.env
|
| 8 |
+
.venv
|
| 9 |
+
venv/
|
| 10 |
+
ENV/
|
| 11 |
+
|
| 12 |
+
# IDE
|
| 13 |
+
.idea/
|
| 14 |
+
.vscode/
|
| 15 |
+
*.swp
|
| 16 |
+
*.swo
|
| 17 |
+
|
| 18 |
+
# Data (optional - remove if you want to include sample docs)
|
| 19 |
+
data/faiss_index/
|
| 20 |
+
|
| 21 |
+
# OS
|
| 22 |
+
.DS_Store
|
| 23 |
+
Thumbs.db
|
| 24 |
+
|
| 25 |
+
# Logs
|
| 26 |
+
*.log
|
| 27 |
+
|
| 28 |
+
# Testing
|
| 29 |
+
.pytest_cache/
|
| 30 |
+
.coverage
|
| 31 |
+
htmlcov/
|
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile for Hugging Face Spaces
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
build-essential \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Copy requirements first for caching
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Copy application code
|
| 16 |
+
COPY . .
|
| 17 |
+
|
| 18 |
+
# Create data directories
|
| 19 |
+
RUN mkdir -p data/documents data/faiss_index
|
| 20 |
+
|
| 21 |
+
# Copy sample documents
|
| 22 |
+
COPY data/documents/* data/documents/ 2>/dev/null || true
|
| 23 |
+
|
| 24 |
+
# Set environment variables
|
| 25 |
+
ENV LLM_PROVIDER=huggingface
|
| 26 |
+
ENV EMBEDDING_PROVIDER=huggingface
|
| 27 |
+
ENV HUGGINGFACE_MODEL=mistralai/Mistral-7B-Instruct-v0.2
|
| 28 |
+
ENV HUGGINGFACE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 29 |
+
ENV API_HOST=0.0.0.0
|
| 30 |
+
ENV API_PORT=8000
|
| 31 |
+
|
| 32 |
+
# Expose port
|
| 33 |
+
EXPOSE 8000
|
| 34 |
+
|
| 35 |
+
# Run the application
|
| 36 |
+
CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-Agent RAG System
|
| 2 |
+
|
| 3 |
+
A production-grade Retrieval-Augmented Generation (RAG) system using multiple specialized agents built with LangChain and FastAPI.
|
| 4 |
+
|
| 5 |
+
## Architecture Overview
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
┌──────────────┐
|
| 9 |
+
│ User Query │
|
| 10 |
+
└──────┬───────┘
|
| 11 |
+
│
|
| 12 |
+
┌──────▼───────┐
|
| 13 |
+
│ Router Agent │ ◄─── Classifies query intent
|
| 14 |
+
└──────┬───────┘
|
| 15 |
+
│
|
| 16 |
+
┌───────────────┼───────────────┐
|
| 17 |
+
│ │ │
|
| 18 |
+
┌──────▼──────┐ ┌──────▼──────┐ ┌──────▼──────┐
|
| 19 |
+
│ Retriever │ │ Reasoning │ │ Action │
|
| 20 |
+
│ Agent │ │ Agent │ │ Agent │
|
| 21 |
+
└─────────────┘ └─────────────┘ └─────────────┘
|
| 22 |
+
│ ▲ │
|
| 23 |
+
└───────────────┘ │
|
| 24 |
+
(context flows to reasoning) │
|
| 25 |
+
┌──────────────────────┘
|
| 26 |
+
▼
|
| 27 |
+
┌──────────────┐
|
| 28 |
+
│ Response │
|
| 29 |
+
└──────────────┘
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
### Agents
|
| 33 |
+
|
| 34 |
+
1. **Router Agent** - Classifies query intent and routes to appropriate agents
|
| 35 |
+
2. **Retriever Agent** - Searches FAISS vector store for relevant documents
|
| 36 |
+
3. **Reasoning Agent** - Generates grounded responses from retrieved context
|
| 37 |
+
4. **Action Agent** - Executes actions like creating tickets or escalating
|
| 38 |
+
|
| 39 |
+
## Features
|
| 40 |
+
|
| 41 |
+
- Multi-agent architecture with single responsibility principle
|
| 42 |
+
- Semantic document search using FAISS and OpenAI embeddings
|
| 43 |
+
- Grounded responses with source citations
|
| 44 |
+
- Conversation memory for multi-turn interactions
|
| 45 |
+
- Action execution (tickets, escalation, notifications)
|
| 46 |
+
- RESTful API with FastAPI
|
| 47 |
+
- Automatic API documentation (Swagger/OpenAPI)
|
| 48 |
+
|
| 49 |
+
## Quick Start
|
| 50 |
+
|
| 51 |
+
### 1. Install Dependencies
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
pip install -r requirements.txt
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### 2. Configure Environment
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
# Copy example environment file
|
| 61 |
+
cp .env.example .env
|
| 62 |
+
|
| 63 |
+
# Edit .env and add your OpenAI API key
|
| 64 |
+
# OPENAI_API_KEY=your-key-here
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
### 3. Run the Server
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Development mode (with auto-reload)
|
| 71 |
+
uvicorn app.main:app --reload
|
| 72 |
+
|
| 73 |
+
# Or run directly
|
| 74 |
+
python -m app.main
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### 4. Access the API
|
| 78 |
+
|
| 79 |
+
- **Swagger UI**: http://localhost:8000/docs
|
| 80 |
+
- **ReDoc**: http://localhost:8000/redoc
|
| 81 |
+
- **Health Check**: http://localhost:8000/api/v1/health
|
| 82 |
+
|
| 83 |
+
## API Endpoints
|
| 84 |
+
|
| 85 |
+
### Query Endpoint
|
| 86 |
+
```bash
|
| 87 |
+
POST /api/v1/query
|
| 88 |
+
Content-Type: application/json
|
| 89 |
+
|
| 90 |
+
{
|
| 91 |
+
"query": "How do I reset my password?",
|
| 92 |
+
"conversation_id": "optional-session-id",
|
| 93 |
+
"include_sources": true
|
| 94 |
+
}
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### Document Ingestion
|
| 98 |
+
```bash
|
| 99 |
+
# Ingest all documents from configured directory
|
| 100 |
+
POST /api/v1/ingest
|
| 101 |
+
Content-Type: application/json
|
| 102 |
+
|
| 103 |
+
{
|
| 104 |
+
"force_reindex": false
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Ingest specific files
|
| 108 |
+
POST /api/v1/ingest
|
| 109 |
+
Content-Type: application/json
|
| 110 |
+
|
| 111 |
+
{
|
| 112 |
+
"file_paths": ["/path/to/document.pdf"],
|
| 113 |
+
"force_reindex": false
|
| 114 |
+
}
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### Health Check
|
| 118 |
+
```bash
|
| 119 |
+
GET /api/v1/health
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
## Project Structure
|
| 123 |
+
|
| 124 |
+
```
|
| 125 |
+
multi-agent-rag/
|
| 126 |
+
├── app/
|
| 127 |
+
│ ├── __init__.py
|
| 128 |
+
│ ├── main.py # FastAPI application entry
|
| 129 |
+
│ ├── config.py # Configuration management
|
| 130 |
+
│ ├── agents/ # Agent implementations
|
| 131 |
+
│ │ ├── base_agent.py # Abstract base class
|
| 132 |
+
│ │ ├── router_agent.py # Query routing
|
| 133 |
+
│ │ ├── retriever_agent.py
|
| 134 |
+
│ │ ├── reasoning_agent.py
|
| 135 |
+
│ │ └── action_agent.py
|
| 136 |
+
│ ├── tools/ # LangChain tools
|
| 137 |
+
│ │ ├── search_tool.py
|
| 138 |
+
│ │ ├── document_tool.py
|
| 139 |
+
│ │ └── action_tools.py
|
| 140 |
+
│ ├── memory/ # Conversation memory
|
| 141 |
+
│ │ └── conversation_memory.py
|
| 142 |
+
│ ├── vectorstore/ # Vector database
|
| 143 |
+
│ │ ├── embeddings.py
|
| 144 |
+
│ │ └── faiss_store.py
|
| 145 |
+
│ ├── schemas/ # Pydantic models
|
| 146 |
+
│ │ └── models.py
|
| 147 |
+
│ ├── services/ # Business logic
|
| 148 |
+
│ │ ├── orchestrator.py
|
| 149 |
+
│ │ └── document_service.py
|
| 150 |
+
│ └── api/ # API routes
|
| 151 |
+
│ └── routes.py
|
| 152 |
+
├── data/
|
| 153 |
+
│ ├── documents/ # Source documents
|
| 154 |
+
│ └── faiss_index/ # Vector index storage
|
| 155 |
+
├── tests/
|
| 156 |
+
├── requirements.txt
|
| 157 |
+
├── .env.example
|
| 158 |
+
└── README.md
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
## Configuration
|
| 162 |
+
|
| 163 |
+
All configuration is done via environment variables or `.env` file:
|
| 164 |
+
|
| 165 |
+
| Variable | Description | Default |
|
| 166 |
+
|----------|-------------|---------|
|
| 167 |
+
| `OPENAI_API_KEY` | OpenAI API key | Required |
|
| 168 |
+
| `LLM_MODEL` | Model for agents | gpt-4-turbo-preview |
|
| 169 |
+
| `EMBEDDING_MODEL` | Embedding model | text-embedding-3-small |
|
| 170 |
+
| `FAISS_INDEX_PATH` | Vector index location | ./data/faiss_index |
|
| 171 |
+
| `DOCUMENTS_PATH` | Source documents | ./data/documents |
|
| 172 |
+
| `CHUNK_SIZE` | Document chunk size | 1000 |
|
| 173 |
+
| `CHUNK_OVERLAP` | Chunk overlap | 200 |
|
| 174 |
+
| `RETRIEVAL_TOP_K` | Documents to retrieve | 5 |
|
| 175 |
+
| `API_PORT` | Server port | 8000 |
|
| 176 |
+
| `LOG_LEVEL` | Logging level | INFO |
|
| 177 |
+
|
| 178 |
+
## Usage Examples
|
| 179 |
+
|
| 180 |
+
### Python Client
|
| 181 |
+
|
| 182 |
+
```python
|
| 183 |
+
import httpx
|
| 184 |
+
import asyncio
|
| 185 |
+
|
| 186 |
+
async def query_rag():
|
| 187 |
+
async with httpx.AsyncClient() as client:
|
| 188 |
+
# Ingest documents first
|
| 189 |
+
await client.post(
|
| 190 |
+
"http://localhost:8000/api/v1/ingest",
|
| 191 |
+
json={"force_reindex": True}
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
# Query the system
|
| 195 |
+
response = await client.post(
|
| 196 |
+
"http://localhost:8000/api/v1/query",
|
| 197 |
+
json={
|
| 198 |
+
"query": "How do I reset my password?",
|
| 199 |
+
"include_sources": True
|
| 200 |
+
}
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
result = response.json()
|
| 204 |
+
print(f"Answer: {result['answer']}")
|
| 205 |
+
print(f"Sources: {len(result['sources'])} documents")
|
| 206 |
+
print(f"Agents used: {' -> '.join(result['agent_trace'])}")
|
| 207 |
+
|
| 208 |
+
asyncio.run(query_rag())
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
### cURL Examples
|
| 212 |
+
|
| 213 |
+
```bash
|
| 214 |
+
# Health check
|
| 215 |
+
curl http://localhost:8000/api/v1/health
|
| 216 |
+
|
| 217 |
+
# Ingest documents
|
| 218 |
+
curl -X POST http://localhost:8000/api/v1/ingest \
|
| 219 |
+
-H "Content-Type: application/json" \
|
| 220 |
+
-d '{"force_reindex": true}'
|
| 221 |
+
|
| 222 |
+
# Query
|
| 223 |
+
curl -X POST http://localhost:8000/api/v1/query \
|
| 224 |
+
-H "Content-Type: application/json" \
|
| 225 |
+
-d '{"query": "How do I change my email address?"}'
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
## Key Design Decisions
|
| 229 |
+
|
| 230 |
+
### Why Multi-Agent Architecture?
|
| 231 |
+
- **Single Responsibility**: Each agent does one thing well
|
| 232 |
+
- **Testability**: Agents can be tested in isolation
|
| 233 |
+
- **Flexibility**: Easy to add/remove agents
|
| 234 |
+
- **Scalability**: Agents could run on different services
|
| 235 |
+
|
| 236 |
+
### Why FAISS?
|
| 237 |
+
- **No External Dependencies**: Runs locally, no API costs
|
| 238 |
+
- **Fast**: Optimized C++ with Python bindings
|
| 239 |
+
- **Scalable**: Handles millions of vectors
|
| 240 |
+
- **Persistent**: Index saved to disk
|
| 241 |
+
|
| 242 |
+
### Why LangChain?
|
| 243 |
+
- **Standardized Interfaces**: Common patterns for agents, tools, memory
|
| 244 |
+
- **Flexibility**: Easy to swap components
|
| 245 |
+
- **Community**: Large ecosystem of integrations
|
| 246 |
+
|
| 247 |
+
### Grounding Principle
|
| 248 |
+
Every response is grounded in retrieved documents. The Reasoning Agent:
|
| 249 |
+
1. ONLY uses information from retrieved context
|
| 250 |
+
2. Admits when information is not available
|
| 251 |
+
3. Never makes up information
|
| 252 |
+
4. Cites sources when possible
|
| 253 |
+
|
| 254 |
+
## Production Considerations
|
| 255 |
+
|
| 256 |
+
### Security
|
| 257 |
+
- Store API keys in environment variables, never in code
|
| 258 |
+
- Use HTTPS in production
|
| 259 |
+
- Implement rate limiting
|
| 260 |
+
- Add authentication for sensitive endpoints
|
| 261 |
+
|
| 262 |
+
### Scalability
|
| 263 |
+
- Use Redis for distributed conversation memory
|
| 264 |
+
- Consider Pinecone/Weaviate for larger document collections
|
| 265 |
+
- Run multiple uvicorn workers: `uvicorn app.main:app --workers 4`
|
| 266 |
+
- Add caching for frequently asked questions
|
| 267 |
+
|
| 268 |
+
### Monitoring
|
| 269 |
+
- Integrate with logging services (Datadog, CloudWatch)
|
| 270 |
+
- Add tracing (OpenTelemetry)
|
| 271 |
+
- Monitor agent response times
|
| 272 |
+
- Track retrieval relevance scores
|
| 273 |
+
|
| 274 |
+
## Troubleshooting
|
| 275 |
+
|
| 276 |
+
### "No documents in knowledge base"
|
| 277 |
+
Run the ingestion endpoint first:
|
| 278 |
+
```bash
|
| 279 |
+
curl -X POST http://localhost:8000/api/v1/ingest
|
| 280 |
+
```
|
| 281 |
+
|
| 282 |
+
### "OpenAI API key not set"
|
| 283 |
+
Ensure your `.env` file exists and contains:
|
| 284 |
+
```
|
| 285 |
+
OPENAI_API_KEY=sk-your-key-here
|
| 286 |
+
```
|
| 287 |
+
|
| 288 |
+
### Slow responses
|
| 289 |
+
- Reduce `RETRIEVAL_TOP_K` for faster retrieval
|
| 290 |
+
- Use a smaller LLM model (gpt-3.5-turbo)
|
| 291 |
+
- Check network latency to OpenAI
|
| 292 |
+
|
| 293 |
+
## License
|
| 294 |
+
|
| 295 |
+
MIT License - See LICENSE file for details.
|
README_HF.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Multi-Agent RAG System
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
+
pinned: false
|
| 9 |
+
license: mit
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Multi-Agent RAG System
|
| 13 |
+
|
| 14 |
+
A production-grade Retrieval-Augmented Generation system using multiple specialized AI agents.
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
- **Router Agent**: Classifies queries and routes to appropriate agents
|
| 19 |
+
- **Retriever Agent**: Semantic search using FAISS vector store
|
| 20 |
+
- **Reasoning Agent**: Generates grounded responses from context
|
| 21 |
+
- **Action Agent**: Executes actions like creating tickets
|
| 22 |
+
|
| 23 |
+
## API Endpoints
|
| 24 |
+
|
| 25 |
+
- `POST /api/v1/query` - Submit a question
|
| 26 |
+
- `POST /api/v1/ingest` - Ingest documents
|
| 27 |
+
- `GET /api/v1/health` - Health check
|
| 28 |
+
- `GET /docs` - Swagger UI
|
| 29 |
+
|
| 30 |
+
## Usage
|
| 31 |
+
|
| 32 |
+
```python
|
| 33 |
+
import requests
|
| 34 |
+
|
| 35 |
+
response = requests.post(
|
| 36 |
+
"https://your-space.hf.space/api/v1/query",
|
| 37 |
+
json={"query": "How do I reset my password?"}
|
| 38 |
+
)
|
| 39 |
+
print(response.json()["answer"])
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## Architecture
|
| 43 |
+
|
| 44 |
+
```
|
| 45 |
+
User Query → Router Agent → Retriever Agent → Reasoning Agent → Response
|
| 46 |
+
↓
|
| 47 |
+
FAISS Vector Store
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
Built with LangChain, FastAPI, and HuggingFace models.
|
app.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hugging Face Spaces Entry Point
|
| 3 |
+
================================
|
| 4 |
+
|
| 5 |
+
This file is the entry point for Hugging Face Spaces deployment.
|
| 6 |
+
It imports and runs the FastAPI application.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
# Set environment variables for HuggingFace Spaces
|
| 12 |
+
# These can be overridden by Space secrets
|
| 13 |
+
os.environ.setdefault("LLM_PROVIDER", "huggingface")
|
| 14 |
+
os.environ.setdefault("EMBEDDING_PROVIDER", "huggingface")
|
| 15 |
+
os.environ.setdefault("HUGGINGFACE_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
|
| 16 |
+
os.environ.setdefault("HUGGINGFACE_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
| 17 |
+
|
| 18 |
+
# Import the FastAPI app
|
| 19 |
+
from app.main import app
|
| 20 |
+
|
| 21 |
+
# For Hugging Face Spaces, we need to expose the app
|
| 22 |
+
# Spaces will automatically run this with uvicorn
|
app/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multi-Agent RAG System
|
| 3 |
+
======================
|
| 4 |
+
|
| 5 |
+
A production-grade Retrieval-Augmented Generation system using multiple
|
| 6 |
+
specialized agents for query routing, document retrieval, reasoning, and
|
| 7 |
+
action execution.
|
| 8 |
+
|
| 9 |
+
Architecture:
|
| 10 |
+
- Router Agent: Classifies and routes queries
|
| 11 |
+
- Retriever Agent: Handles vector search and document retrieval
|
| 12 |
+
- Reasoning Agent: Generates grounded responses from context
|
| 13 |
+
- Action Agent: Executes specific actions when needed
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
__version__ = "1.0.0"
|
| 17 |
+
__author__ = "AI Engineer"
|
app/agents/__init__.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agents Module
|
| 3 |
+
=============
|
| 4 |
+
|
| 5 |
+
Multi-agent system with specialized agents for different tasks.
|
| 6 |
+
|
| 7 |
+
ARCHITECTURE:
|
| 8 |
+
┌──────────────┐
|
| 9 |
+
│ User Query │
|
| 10 |
+
└──────┬───────┘
|
| 11 |
+
│
|
| 12 |
+
┌──────▼───────┐
|
| 13 |
+
│ Router Agent │ ◄─── Classifies query intent
|
| 14 |
+
└──────┬───────┘
|
| 15 |
+
│
|
| 16 |
+
┌───────────────┼───────────────┐
|
| 17 |
+
│ │ │
|
| 18 |
+
┌──────▼──────┐ ┌──────▼──────┐ ┌──────▼──────┐
|
| 19 |
+
│ Retriever │ │ Reasoning │ │ Action │
|
| 20 |
+
│ Agent │ │ Agent │ │ Agent │
|
| 21 |
+
└─────────────┘ └─────────────┘ └─────────────┘
|
| 22 |
+
│ ▲ │
|
| 23 |
+
└───────────────┘ │
|
| 24 |
+
(context flows to reasoning) │
|
| 25 |
+
┌──────────────────────┘
|
| 26 |
+
▼
|
| 27 |
+
┌──────────────┐
|
| 28 |
+
│ Response │
|
| 29 |
+
└──────────────┘
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
from app.agents.base_agent import BaseAgent
|
| 33 |
+
from app.agents.router_agent import RouterAgent
|
| 34 |
+
from app.agents.retriever_agent import RetrieverAgent
|
| 35 |
+
from app.agents.reasoning_agent import ReasoningAgent
|
| 36 |
+
from app.agents.action_agent import ActionAgent
|
| 37 |
+
|
| 38 |
+
__all__ = [
|
| 39 |
+
"BaseAgent",
|
| 40 |
+
"RouterAgent",
|
| 41 |
+
"RetrieverAgent",
|
| 42 |
+
"ReasoningAgent",
|
| 43 |
+
"ActionAgent",
|
| 44 |
+
]
|
app/agents/action_agent.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Action Agent
|
| 3 |
+
============
|
| 4 |
+
|
| 5 |
+
The Action Agent executes specific actions based on user requests.
|
| 6 |
+
|
| 7 |
+
RESPONSIBILITIES:
|
| 8 |
+
1. Parse action requests from the query
|
| 9 |
+
2. Execute the appropriate action
|
| 10 |
+
3. Return confirmation and next steps
|
| 11 |
+
4. Handle action failures gracefully
|
| 12 |
+
|
| 13 |
+
SUPPORTED ACTIONS:
|
| 14 |
+
- create_ticket: Create a support ticket
|
| 15 |
+
- escalate: Escalate to human agent
|
| 16 |
+
- send_email: Send email notification
|
| 17 |
+
- search_kb: Deep search in knowledge base
|
| 18 |
+
|
| 19 |
+
WHY AN ACTION AGENT?
|
| 20 |
+
- Separates "thinking" from "doing"
|
| 21 |
+
- Actions can be audited and logged
|
| 22 |
+
- Easy to add new actions
|
| 23 |
+
- Can integrate with external systems (ticketing, email, etc.)
|
| 24 |
+
|
| 25 |
+
ARCHITECTURE:
|
| 26 |
+
┌─────────────────────────────────────────┐
|
| 27 |
+
│ Action Agent │
|
| 28 |
+
│ ┌──────────────────────────────────┐ │
|
| 29 |
+
│ │ Action Router (LLM-based) │ │
|
| 30 |
+
│ └─────────────┬────────────────────┘ │
|
| 31 |
+
│ │ │
|
| 32 |
+
│ ┌────────────┼────────────┐ │
|
| 33 |
+
│ ▼ ▼ ▼ │
|
| 34 |
+
│ ┌─────┐ ┌─────────┐ ┌─────────┐ │
|
| 35 |
+
│ │Ticket│ │Escalate│ │ Email │ │
|
| 36 |
+
│ │Tool │ │ Tool │ │ Tool │ │
|
| 37 |
+
│ └─────┘ └─────────┘ └─────────┘ │
|
| 38 |
+
└─────────────────────────────────────────┘
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
import logging
|
| 42 |
+
from typing import Any, Optional
|
| 43 |
+
from datetime import datetime
|
| 44 |
+
import uuid
|
| 45 |
+
|
| 46 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 47 |
+
from pydantic import BaseModel, Field
|
| 48 |
+
|
| 49 |
+
from app.agents.base_agent import BaseAgent
|
| 50 |
+
from app.schemas.models import AgentResponse, AgentType, ActionType
|
| 51 |
+
|
| 52 |
+
logger = logging.getLogger(__name__)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# In a production system, these would integrate with real services
|
| 56 |
+
# For now, we simulate the actions and return structured results
|
| 57 |
+
|
| 58 |
+
class TicketData(BaseModel):
|
| 59 |
+
"""Data for a created support ticket."""
|
| 60 |
+
ticket_id: str
|
| 61 |
+
title: str
|
| 62 |
+
description: str
|
| 63 |
+
priority: str
|
| 64 |
+
created_at: str
|
| 65 |
+
status: str = "open"
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
class EscalationData(BaseModel):
|
| 69 |
+
"""Data for an escalation."""
|
| 70 |
+
escalation_id: str
|
| 71 |
+
reason: str
|
| 72 |
+
priority: str
|
| 73 |
+
queue: str
|
| 74 |
+
estimated_wait: str
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class ActionResult(BaseModel):
|
| 78 |
+
"""Result of an action execution."""
|
| 79 |
+
action_type: ActionType
|
| 80 |
+
success: bool
|
| 81 |
+
message: str
|
| 82 |
+
data: Optional[dict] = None
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
ACTION_PROMPT = """You are an action executor for a customer support system.
|
| 86 |
+
|
| 87 |
+
Based on the conversation, you need to execute the requested action.
|
| 88 |
+
|
| 89 |
+
AVAILABLE ACTIONS:
|
| 90 |
+
1. create_ticket - Create a support ticket for the issue
|
| 91 |
+
2. escalate - Escalate to a human agent
|
| 92 |
+
3. send_email - Send an email notification
|
| 93 |
+
4. search_knowledge_base - Perform a deeper search
|
| 94 |
+
5. none - No action needed
|
| 95 |
+
|
| 96 |
+
USER QUERY: {query}
|
| 97 |
+
|
| 98 |
+
CONTEXT: {context}
|
| 99 |
+
|
| 100 |
+
REQUESTED ACTION: {action_type}
|
| 101 |
+
|
| 102 |
+
Generate appropriate details for this action:
|
| 103 |
+
- For tickets: Generate a clear title and description
|
| 104 |
+
- For escalation: Determine priority and reason
|
| 105 |
+
- For email: Determine recipient type and content summary
|
| 106 |
+
|
| 107 |
+
Respond in this JSON format:
|
| 108 |
+
{{
|
| 109 |
+
"title": "Brief title of the issue",
|
| 110 |
+
"description": "Detailed description",
|
| 111 |
+
"priority": "low|medium|high|urgent",
|
| 112 |
+
"reason": "Why this action is being taken"
|
| 113 |
+
}}"""
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class ActionAgent(BaseAgent):
|
| 117 |
+
"""
|
| 118 |
+
Executes actions based on user requests and reasoning output.
|
| 119 |
+
|
| 120 |
+
This agent handles the "doing" part of the system - creating tickets,
|
| 121 |
+
escalating issues, and other concrete actions.
|
| 122 |
+
|
| 123 |
+
In production, this would integrate with:
|
| 124 |
+
- Ticketing systems (Zendesk, Jira, ServiceNow)
|
| 125 |
+
- Email services (SendGrid, SES)
|
| 126 |
+
- Communication platforms (Slack, Teams)
|
| 127 |
+
"""
|
| 128 |
+
|
| 129 |
+
def __init__(self, **kwargs):
|
| 130 |
+
"""Initialize the Action Agent."""
|
| 131 |
+
super().__init__(**kwargs)
|
| 132 |
+
self._prompt = ChatPromptTemplate.from_template(ACTION_PROMPT)
|
| 133 |
+
|
| 134 |
+
@property
|
| 135 |
+
def agent_type(self) -> AgentType:
|
| 136 |
+
"""Return the agent type."""
|
| 137 |
+
return AgentType.ACTION
|
| 138 |
+
|
| 139 |
+
async def execute(
|
| 140 |
+
self,
|
| 141 |
+
input_data: dict[str, Any],
|
| 142 |
+
**kwargs
|
| 143 |
+
) -> AgentResponse:
|
| 144 |
+
"""
|
| 145 |
+
Execute the requested action.
|
| 146 |
+
|
| 147 |
+
Args:
|
| 148 |
+
input_data:
|
| 149 |
+
- query: User's original query
|
| 150 |
+
- context: Retrieved context
|
| 151 |
+
- action_type: Which action to execute
|
| 152 |
+
**kwargs: Additional options
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
AgentResponse with action result
|
| 156 |
+
"""
|
| 157 |
+
query = input_data.get("query", "")
|
| 158 |
+
context = input_data.get("context", "")
|
| 159 |
+
action_type_str = input_data.get("action_type", "none")
|
| 160 |
+
|
| 161 |
+
# Parse action type
|
| 162 |
+
try:
|
| 163 |
+
action_type = ActionType(action_type_str)
|
| 164 |
+
except ValueError:
|
| 165 |
+
action_type = ActionType.NONE
|
| 166 |
+
|
| 167 |
+
# If no action needed, return early
|
| 168 |
+
if action_type == ActionType.NONE:
|
| 169 |
+
return AgentResponse(
|
| 170 |
+
agent_type=self.agent_type,
|
| 171 |
+
output="No action required.",
|
| 172 |
+
confidence=1.0,
|
| 173 |
+
metadata={"action_type": "none", "action_taken": False}
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# Execute the appropriate action
|
| 177 |
+
result = await self._execute_action(
|
| 178 |
+
action_type=action_type,
|
| 179 |
+
query=query,
|
| 180 |
+
context=context
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
return AgentResponse(
|
| 184 |
+
agent_type=self.agent_type,
|
| 185 |
+
output=result.message,
|
| 186 |
+
confidence=1.0 if result.success else 0.5,
|
| 187 |
+
metadata={
|
| 188 |
+
"action_type": result.action_type.value,
|
| 189 |
+
"action_taken": result.success,
|
| 190 |
+
"action_data": result.data,
|
| 191 |
+
}
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
async def _execute_action(
|
| 195 |
+
self,
|
| 196 |
+
action_type: ActionType,
|
| 197 |
+
query: str,
|
| 198 |
+
context: str
|
| 199 |
+
) -> ActionResult:
|
| 200 |
+
"""
|
| 201 |
+
Execute a specific action.
|
| 202 |
+
|
| 203 |
+
This method routes to the appropriate action handler.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
action_type: Which action to execute
|
| 207 |
+
query: User's query
|
| 208 |
+
context: Retrieved context
|
| 209 |
+
|
| 210 |
+
Returns:
|
| 211 |
+
ActionResult with success/failure and details
|
| 212 |
+
"""
|
| 213 |
+
# Map action types to handlers
|
| 214 |
+
action_handlers = {
|
| 215 |
+
ActionType.CREATE_TICKET: self._create_ticket,
|
| 216 |
+
ActionType.ESCALATE: self._escalate,
|
| 217 |
+
ActionType.SEND_EMAIL: self._send_email,
|
| 218 |
+
ActionType.SEARCH_KB: self._search_kb,
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
handler = action_handlers.get(action_type)
|
| 222 |
+
if handler is None:
|
| 223 |
+
return ActionResult(
|
| 224 |
+
action_type=action_type,
|
| 225 |
+
success=False,
|
| 226 |
+
message=f"Unknown action type: {action_type.value}",
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
try:
|
| 230 |
+
return await handler(query, context)
|
| 231 |
+
except Exception as e:
|
| 232 |
+
logger.error(f"Action {action_type.value} failed: {e}")
|
| 233 |
+
return ActionResult(
|
| 234 |
+
action_type=action_type,
|
| 235 |
+
success=False,
|
| 236 |
+
message=f"Action failed: {str(e)}",
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
async def _create_ticket(self, query: str, context: str) -> ActionResult:
|
| 240 |
+
"""
|
| 241 |
+
Create a support ticket.
|
| 242 |
+
|
| 243 |
+
In production, this would call a ticketing API.
|
| 244 |
+
Here we simulate the ticket creation.
|
| 245 |
+
"""
|
| 246 |
+
# Generate ticket details using LLM
|
| 247 |
+
details = await self._generate_action_details(
|
| 248 |
+
ActionType.CREATE_TICKET, query, context
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
# Simulate ticket creation
|
| 252 |
+
ticket = TicketData(
|
| 253 |
+
ticket_id=f"TKT-{uuid.uuid4().hex[:8].upper()}",
|
| 254 |
+
title=details.get("title", "Support Request"),
|
| 255 |
+
description=details.get("description", query),
|
| 256 |
+
priority=details.get("priority", "medium"),
|
| 257 |
+
created_at=datetime.utcnow().isoformat(),
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
logger.info(f"Created ticket: {ticket.ticket_id}")
|
| 261 |
+
|
| 262 |
+
return ActionResult(
|
| 263 |
+
action_type=ActionType.CREATE_TICKET,
|
| 264 |
+
success=True,
|
| 265 |
+
message=f"I've created support ticket {ticket.ticket_id} for your issue. "
|
| 266 |
+
f"Our team will review it shortly. Priority: {ticket.priority}.",
|
| 267 |
+
data=ticket.model_dump(),
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
async def _escalate(self, query: str, context: str) -> ActionResult:
|
| 271 |
+
"""
|
| 272 |
+
Escalate to a human agent.
|
| 273 |
+
|
| 274 |
+
In production, this would add to a queue or notify agents.
|
| 275 |
+
"""
|
| 276 |
+
details = await self._generate_action_details(
|
| 277 |
+
ActionType.ESCALATE, query, context
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
escalation = EscalationData(
|
| 281 |
+
escalation_id=f"ESC-{uuid.uuid4().hex[:8].upper()}",
|
| 282 |
+
reason=details.get("reason", "Customer requested human assistance"),
|
| 283 |
+
priority=details.get("priority", "medium"),
|
| 284 |
+
queue="general_support",
|
| 285 |
+
estimated_wait="5-10 minutes",
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
logger.info(f"Created escalation: {escalation.escalation_id}")
|
| 289 |
+
|
| 290 |
+
return ActionResult(
|
| 291 |
+
action_type=ActionType.ESCALATE,
|
| 292 |
+
success=True,
|
| 293 |
+
message=f"I've escalated your request to a human agent. "
|
| 294 |
+
f"Reference: {escalation.escalation_id}. "
|
| 295 |
+
f"Estimated wait time: {escalation.estimated_wait}. "
|
| 296 |
+
f"A support representative will assist you shortly.",
|
| 297 |
+
data=escalation.model_dump(),
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
async def _send_email(self, query: str, context: str) -> ActionResult:
|
| 301 |
+
"""
|
| 302 |
+
Send an email notification.
|
| 303 |
+
|
| 304 |
+
In production, this would call an email service.
|
| 305 |
+
"""
|
| 306 |
+
details = await self._generate_action_details(
|
| 307 |
+
ActionType.SEND_EMAIL, query, context
|
| 308 |
+
)
|
| 309 |
+
|
| 310 |
+
# Simulate email sending
|
| 311 |
+
email_id = f"EMAIL-{uuid.uuid4().hex[:8].upper()}"
|
| 312 |
+
|
| 313 |
+
logger.info(f"Sent email: {email_id}")
|
| 314 |
+
|
| 315 |
+
return ActionResult(
|
| 316 |
+
action_type=ActionType.SEND_EMAIL,
|
| 317 |
+
success=True,
|
| 318 |
+
message="I've sent a confirmation email to your registered email address. "
|
| 319 |
+
"Please check your inbox (and spam folder) shortly.",
|
| 320 |
+
data={
|
| 321 |
+
"email_id": email_id,
|
| 322 |
+
"subject": details.get("title", "Support Update"),
|
| 323 |
+
},
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
+
async def _search_kb(self, query: str, context: str) -> ActionResult:
|
| 327 |
+
"""
|
| 328 |
+
Perform a deeper knowledge base search.
|
| 329 |
+
|
| 330 |
+
This could trigger a more thorough search with different parameters.
|
| 331 |
+
"""
|
| 332 |
+
# In production, this might search with different strategies
|
| 333 |
+
return ActionResult(
|
| 334 |
+
action_type=ActionType.SEARCH_KB,
|
| 335 |
+
success=True,
|
| 336 |
+
message="I've initiated a deeper search of our knowledge base. "
|
| 337 |
+
"This may take a moment for complex queries.",
|
| 338 |
+
data={"search_query": query},
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
async def _generate_action_details(
|
| 342 |
+
self,
|
| 343 |
+
action_type: ActionType,
|
| 344 |
+
query: str,
|
| 345 |
+
context: str
|
| 346 |
+
) -> dict:
|
| 347 |
+
"""
|
| 348 |
+
Use LLM to generate appropriate details for an action.
|
| 349 |
+
|
| 350 |
+
Args:
|
| 351 |
+
action_type: Type of action
|
| 352 |
+
query: User's query
|
| 353 |
+
context: Retrieved context
|
| 354 |
+
|
| 355 |
+
Returns:
|
| 356 |
+
Dictionary with action-specific details
|
| 357 |
+
"""
|
| 358 |
+
try:
|
| 359 |
+
formatted = self._prompt.format_messages(
|
| 360 |
+
query=query,
|
| 361 |
+
context=context[:2000], # Limit context length
|
| 362 |
+
action_type=action_type.value,
|
| 363 |
+
)
|
| 364 |
+
response = await self._llm.ainvoke(formatted)
|
| 365 |
+
|
| 366 |
+
# Parse JSON from response
|
| 367 |
+
import json
|
| 368 |
+
content = response.content
|
| 369 |
+
|
| 370 |
+
# Try to extract JSON from the response
|
| 371 |
+
if "{" in content:
|
| 372 |
+
json_start = content.index("{")
|
| 373 |
+
json_end = content.rindex("}") + 1
|
| 374 |
+
json_str = content[json_start:json_end]
|
| 375 |
+
return json.loads(json_str)
|
| 376 |
+
|
| 377 |
+
except Exception as e:
|
| 378 |
+
logger.warning(f"Failed to generate action details: {e}")
|
| 379 |
+
|
| 380 |
+
# Return defaults
|
| 381 |
+
return {
|
| 382 |
+
"title": "Support Request",
|
| 383 |
+
"description": query,
|
| 384 |
+
"priority": "medium",
|
| 385 |
+
"reason": "User request",
|
| 386 |
+
}
|
app/agents/base_agent.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Base Agent
|
| 3 |
+
==========
|
| 4 |
+
|
| 5 |
+
Abstract base class for all agents in the multi-agent system.
|
| 6 |
+
Supports multiple LLM providers including free options.
|
| 7 |
+
|
| 8 |
+
SUPPORTED PROVIDERS:
|
| 9 |
+
1. ollama - Local LLMs (FREE, requires Ollama installed)
|
| 10 |
+
2. huggingface - HuggingFace API (FREE tier available)
|
| 11 |
+
3. groq - Groq Cloud (FREE tier, very fast)
|
| 12 |
+
4. google - Google Gemini (FREE tier)
|
| 13 |
+
5. openai - OpenAI (PAID)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import logging
|
| 17 |
+
from abc import ABC, abstractmethod
|
| 18 |
+
from typing import Any, Optional, Union
|
| 19 |
+
|
| 20 |
+
from langchain_core.language_models.chat_models import BaseChatModel
|
| 21 |
+
|
| 22 |
+
from app.config import get_settings
|
| 23 |
+
from app.schemas.models import AgentResponse, AgentType
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def create_llm(
|
| 29 |
+
provider: Optional[str] = None,
|
| 30 |
+
temperature: Optional[float] = None,
|
| 31 |
+
) -> BaseChatModel:
|
| 32 |
+
"""
|
| 33 |
+
Create an LLM instance based on the configured provider.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
provider: Override the default provider from settings
|
| 37 |
+
temperature: Override the default temperature
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
A LangChain chat model instance
|
| 41 |
+
|
| 42 |
+
Raises:
|
| 43 |
+
ValueError: If provider is not supported
|
| 44 |
+
"""
|
| 45 |
+
settings = get_settings()
|
| 46 |
+
provider = provider or settings.llm_provider
|
| 47 |
+
temp = temperature if temperature is not None else settings.llm_temperature
|
| 48 |
+
|
| 49 |
+
logger.info(f"Creating LLM with provider: {provider}")
|
| 50 |
+
|
| 51 |
+
if provider == "ollama":
|
| 52 |
+
from langchain_community.chat_models import ChatOllama
|
| 53 |
+
return ChatOllama(
|
| 54 |
+
model=settings.ollama_model,
|
| 55 |
+
base_url=settings.ollama_base_url,
|
| 56 |
+
temperature=temp,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
elif provider == "huggingface":
|
| 60 |
+
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
|
| 61 |
+
# Use HuggingFace Inference API
|
| 62 |
+
llm = HuggingFaceEndpoint(
|
| 63 |
+
repo_id=settings.huggingface_model,
|
| 64 |
+
huggingfacehub_api_token=settings.huggingface_api_key,
|
| 65 |
+
temperature=temp,
|
| 66 |
+
max_new_tokens=1024,
|
| 67 |
+
)
|
| 68 |
+
return ChatHuggingFace(llm=llm)
|
| 69 |
+
|
| 70 |
+
elif provider == "groq":
|
| 71 |
+
from langchain_groq import ChatGroq
|
| 72 |
+
return ChatGroq(
|
| 73 |
+
model=settings.groq_model,
|
| 74 |
+
api_key=settings.groq_api_key,
|
| 75 |
+
temperature=temp,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
elif provider == "google":
|
| 79 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 80 |
+
return ChatGoogleGenerativeAI(
|
| 81 |
+
model=settings.google_model,
|
| 82 |
+
google_api_key=settings.google_api_key,
|
| 83 |
+
temperature=temp,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
elif provider == "openai":
|
| 87 |
+
from langchain_openai import ChatOpenAI
|
| 88 |
+
return ChatOpenAI(
|
| 89 |
+
model=settings.openai_model,
|
| 90 |
+
openai_api_key=settings.openai_api_key,
|
| 91 |
+
temperature=temp,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
else:
|
| 95 |
+
raise ValueError(f"Unsupported LLM provider: {provider}")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class BaseAgent(ABC):
|
| 99 |
+
"""
|
| 100 |
+
Abstract base class for all agents.
|
| 101 |
+
|
| 102 |
+
Each agent must implement:
|
| 103 |
+
- agent_type: What kind of agent this is
|
| 104 |
+
- execute(): Main logic for the agent
|
| 105 |
+
|
| 106 |
+
Provides:
|
| 107 |
+
- Multi-provider LLM initialization
|
| 108 |
+
- Consistent error handling
|
| 109 |
+
- Logging infrastructure
|
| 110 |
+
"""
|
| 111 |
+
|
| 112 |
+
def __init__(
|
| 113 |
+
self,
|
| 114 |
+
llm: Optional[BaseChatModel] = None,
|
| 115 |
+
provider: Optional[str] = None,
|
| 116 |
+
temperature: Optional[float] = None,
|
| 117 |
+
):
|
| 118 |
+
"""
|
| 119 |
+
Initialize the base agent.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
llm: Pre-configured LLM (optional, for testing/customization)
|
| 123 |
+
provider: Override LLM provider from settings
|
| 124 |
+
temperature: Override default temperature
|
| 125 |
+
"""
|
| 126 |
+
self._settings = get_settings()
|
| 127 |
+
|
| 128 |
+
# Use provided LLM or create based on provider
|
| 129 |
+
if llm is not None:
|
| 130 |
+
self._llm = llm
|
| 131 |
+
else:
|
| 132 |
+
self._llm = create_llm(provider, temperature)
|
| 133 |
+
|
| 134 |
+
logger.info(f"Initialized {self.agent_type.value} agent")
|
| 135 |
+
|
| 136 |
+
@property
|
| 137 |
+
@abstractmethod
|
| 138 |
+
def agent_type(self) -> AgentType:
|
| 139 |
+
"""Return the type of this agent."""
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
@abstractmethod
|
| 143 |
+
async def execute(
|
| 144 |
+
self,
|
| 145 |
+
input_data: dict[str, Any],
|
| 146 |
+
**kwargs
|
| 147 |
+
) -> AgentResponse:
|
| 148 |
+
"""Execute the agent's main logic."""
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
+
async def safe_execute(
|
| 152 |
+
self,
|
| 153 |
+
input_data: dict[str, Any],
|
| 154 |
+
**kwargs
|
| 155 |
+
) -> AgentResponse:
|
| 156 |
+
"""
|
| 157 |
+
Execute with error handling wrapper.
|
| 158 |
+
|
| 159 |
+
This ensures agents always return a valid response,
|
| 160 |
+
even if errors occur.
|
| 161 |
+
"""
|
| 162 |
+
try:
|
| 163 |
+
logger.debug(f"{self.agent_type.value} starting execution")
|
| 164 |
+
response = await self.execute(input_data, **kwargs)
|
| 165 |
+
logger.debug(f"{self.agent_type.value} completed successfully")
|
| 166 |
+
return response
|
| 167 |
+
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error(
|
| 170 |
+
f"{self.agent_type.value} failed: {type(e).__name__}: {e}",
|
| 171 |
+
exc_info=True
|
| 172 |
+
)
|
| 173 |
+
return AgentResponse(
|
| 174 |
+
agent_type=self.agent_type,
|
| 175 |
+
output=f"Agent error: {str(e)}",
|
| 176 |
+
confidence=0.0,
|
| 177 |
+
metadata={"error": str(e), "error_type": type(e).__name__}
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
def _format_prompt(self, template: str, **kwargs) -> str:
|
| 181 |
+
"""Format a prompt template with variables."""
|
| 182 |
+
try:
|
| 183 |
+
return template.format(**kwargs)
|
| 184 |
+
except KeyError as e:
|
| 185 |
+
logger.warning(f"Missing prompt variable: {e}")
|
| 186 |
+
return template
|
| 187 |
+
|
| 188 |
+
def __repr__(self) -> str:
|
| 189 |
+
"""String representation for debugging."""
|
| 190 |
+
return f"{self.__class__.__name__}(type={self.agent_type.value})"
|
app/agents/reasoning_agent.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Reasoning Agent
|
| 3 |
+
===============
|
| 4 |
+
|
| 5 |
+
The Reasoning Agent generates grounded responses based on retrieved context.
|
| 6 |
+
|
| 7 |
+
RESPONSIBILITIES:
|
| 8 |
+
1. Take the user query and retrieved context
|
| 9 |
+
2. Generate an accurate, helpful response
|
| 10 |
+
3. Ground all claims in the provided context
|
| 11 |
+
4. Admit when information is not available
|
| 12 |
+
|
| 13 |
+
KEY PRINCIPLE: GROUNDING
|
| 14 |
+
- Every statement must be traceable to source documents
|
| 15 |
+
- Never hallucinate or make up information
|
| 16 |
+
- If context doesn't contain the answer, say so
|
| 17 |
+
- This builds user trust and prevents misinformation
|
| 18 |
+
|
| 19 |
+
WHY SEPARATE FROM RETRIEVER?
|
| 20 |
+
- Clear separation of concerns
|
| 21 |
+
- Retrieval can be optimized independently
|
| 22 |
+
- Reasoning prompts can be tuned without affecting retrieval
|
| 23 |
+
- Makes the system more testable
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import logging
|
| 27 |
+
from typing import Any
|
| 28 |
+
|
| 29 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 30 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 31 |
+
|
| 32 |
+
from app.agents.base_agent import BaseAgent
|
| 33 |
+
from app.schemas.models import AgentResponse, AgentType
|
| 34 |
+
from app.memory.conversation_memory import ConversationMemoryManager
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# The reasoning prompt is critical for grounding
|
| 40 |
+
# Key elements:
|
| 41 |
+
# 1. Clear role definition
|
| 42 |
+
# 2. Explicit grounding instructions
|
| 43 |
+
# 3. How to handle missing information
|
| 44 |
+
# 4. Format guidelines
|
| 45 |
+
REASONING_SYSTEM_PROMPT = """You are a helpful customer support assistant.
|
| 46 |
+
|
| 47 |
+
YOUR ROLE:
|
| 48 |
+
- Answer user questions accurately based on the provided context
|
| 49 |
+
- Be helpful, professional, and concise
|
| 50 |
+
- Guide users through solutions step-by-step when appropriate
|
| 51 |
+
|
| 52 |
+
CRITICAL GROUNDING RULES:
|
| 53 |
+
1. ONLY use information from the provided context documents
|
| 54 |
+
2. If the context doesn't contain the answer, say: "I don't have specific information about that in my knowledge base. Let me connect you with a human agent who can help."
|
| 55 |
+
3. Do NOT make up information, policies, or procedures
|
| 56 |
+
4. When referencing information, you may cite the source document
|
| 57 |
+
5. If context is partially relevant, use what's applicable and note limitations
|
| 58 |
+
|
| 59 |
+
RESPONSE FORMAT:
|
| 60 |
+
- Be concise but complete
|
| 61 |
+
- Use bullet points for lists or steps
|
| 62 |
+
- If providing steps, number them
|
| 63 |
+
- End with a helpful follow-up question if appropriate
|
| 64 |
+
|
| 65 |
+
CONVERSATION HISTORY:
|
| 66 |
+
{chat_history}
|
| 67 |
+
|
| 68 |
+
RETRIEVED CONTEXT:
|
| 69 |
+
{context}
|
| 70 |
+
|
| 71 |
+
Remember: It's better to admit you don't know than to provide incorrect information."""
|
| 72 |
+
|
| 73 |
+
USER_PROMPT = """User Question: {query}
|
| 74 |
+
|
| 75 |
+
Please provide a helpful, accurate response based on the context above."""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class ReasoningAgent(BaseAgent):
|
| 79 |
+
"""
|
| 80 |
+
Generates grounded responses from retrieved context.
|
| 81 |
+
|
| 82 |
+
This agent is the "brain" that synthesizes information
|
| 83 |
+
from the retriever into coherent, accurate responses.
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
def __init__(
|
| 87 |
+
self,
|
| 88 |
+
memory_manager: ConversationMemoryManager = None,
|
| 89 |
+
**kwargs
|
| 90 |
+
):
|
| 91 |
+
"""
|
| 92 |
+
Initialize the Reasoning Agent.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
memory_manager: For conversation context (optional)
|
| 96 |
+
**kwargs: Passed to BaseAgent
|
| 97 |
+
"""
|
| 98 |
+
super().__init__(**kwargs)
|
| 99 |
+
self._memory_manager = memory_manager or ConversationMemoryManager()
|
| 100 |
+
|
| 101 |
+
@property
|
| 102 |
+
def agent_type(self) -> AgentType:
|
| 103 |
+
"""Return the agent type."""
|
| 104 |
+
return AgentType.REASONING
|
| 105 |
+
|
| 106 |
+
async def execute(
|
| 107 |
+
self,
|
| 108 |
+
input_data: dict[str, Any],
|
| 109 |
+
**kwargs
|
| 110 |
+
) -> AgentResponse:
|
| 111 |
+
"""
|
| 112 |
+
Generate a grounded response.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
input_data:
|
| 116 |
+
- query: User's question
|
| 117 |
+
- context: Retrieved documents (from Retriever Agent)
|
| 118 |
+
- conversation_id: For memory (optional)
|
| 119 |
+
**kwargs: Additional options
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
AgentResponse with the generated answer
|
| 123 |
+
"""
|
| 124 |
+
query = input_data.get("query", "")
|
| 125 |
+
context = input_data.get("context", "No context provided.")
|
| 126 |
+
conversation_id = input_data.get("conversation_id", "")
|
| 127 |
+
|
| 128 |
+
if not query:
|
| 129 |
+
return AgentResponse(
|
| 130 |
+
agent_type=self.agent_type,
|
| 131 |
+
output="No query provided",
|
| 132 |
+
confidence=0.0,
|
| 133 |
+
metadata={"error": "empty_query"}
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Get conversation history if available
|
| 137 |
+
chat_history = ""
|
| 138 |
+
if conversation_id:
|
| 139 |
+
chat_history = self._memory_manager.get_context_string(conversation_id)
|
| 140 |
+
|
| 141 |
+
# Format the prompts
|
| 142 |
+
system_prompt = REASONING_SYSTEM_PROMPT.format(
|
| 143 |
+
chat_history=chat_history or "No previous conversation.",
|
| 144 |
+
context=context
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
user_prompt = USER_PROMPT.format(query=query)
|
| 148 |
+
|
| 149 |
+
# Create messages for the LLM
|
| 150 |
+
messages = [
|
| 151 |
+
SystemMessage(content=system_prompt),
|
| 152 |
+
HumanMessage(content=user_prompt)
|
| 153 |
+
]
|
| 154 |
+
|
| 155 |
+
# Generate response
|
| 156 |
+
try:
|
| 157 |
+
response = await self._llm.ainvoke(messages)
|
| 158 |
+
answer = response.content
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.error(f"Reasoning failed: {e}")
|
| 161 |
+
return AgentResponse(
|
| 162 |
+
agent_type=self.agent_type,
|
| 163 |
+
output=f"I encountered an error generating a response: {str(e)}",
|
| 164 |
+
confidence=0.0,
|
| 165 |
+
metadata={"error": str(e)}
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Estimate confidence based on context relevance
|
| 169 |
+
# This is a simple heuristic - could be improved with more sophisticated methods
|
| 170 |
+
confidence = self._estimate_confidence(context, answer)
|
| 171 |
+
|
| 172 |
+
# Update conversation memory
|
| 173 |
+
if conversation_id:
|
| 174 |
+
self._memory_manager.add_user_message(conversation_id, query)
|
| 175 |
+
self._memory_manager.add_ai_message(conversation_id, answer)
|
| 176 |
+
|
| 177 |
+
logger.info(f"Generated response with confidence: {confidence:.2f}")
|
| 178 |
+
|
| 179 |
+
return AgentResponse(
|
| 180 |
+
agent_type=self.agent_type,
|
| 181 |
+
output=answer,
|
| 182 |
+
confidence=confidence,
|
| 183 |
+
metadata={
|
| 184 |
+
"has_context": bool(context and context != "No context provided."),
|
| 185 |
+
"conversation_id": conversation_id,
|
| 186 |
+
}
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
def _estimate_confidence(self, context: str, answer: str) -> float:
|
| 190 |
+
"""
|
| 191 |
+
Estimate confidence in the generated response.
|
| 192 |
+
|
| 193 |
+
This is a simple heuristic based on:
|
| 194 |
+
1. Whether context was provided
|
| 195 |
+
2. Whether the answer admits uncertainty
|
| 196 |
+
|
| 197 |
+
A production system might use:
|
| 198 |
+
- NLI (Natural Language Inference) to check grounding
|
| 199 |
+
- Semantic similarity between answer and context
|
| 200 |
+
- LLM self-evaluation
|
| 201 |
+
|
| 202 |
+
Args:
|
| 203 |
+
context: The retrieved context
|
| 204 |
+
answer: The generated answer
|
| 205 |
+
|
| 206 |
+
Returns:
|
| 207 |
+
Confidence score between 0 and 1
|
| 208 |
+
"""
|
| 209 |
+
# Start with base confidence
|
| 210 |
+
confidence = 0.7
|
| 211 |
+
|
| 212 |
+
# No context = lower confidence
|
| 213 |
+
if not context or context == "No context provided.":
|
| 214 |
+
confidence = 0.3
|
| 215 |
+
|
| 216 |
+
# Phrases indicating uncertainty
|
| 217 |
+
uncertainty_phrases = [
|
| 218 |
+
"i don't have",
|
| 219 |
+
"not sure",
|
| 220 |
+
"can't find",
|
| 221 |
+
"no information",
|
| 222 |
+
"don't know",
|
| 223 |
+
"couldn't find",
|
| 224 |
+
"not in my knowledge",
|
| 225 |
+
]
|
| 226 |
+
|
| 227 |
+
answer_lower = answer.lower()
|
| 228 |
+
for phrase in uncertainty_phrases:
|
| 229 |
+
if phrase in answer_lower:
|
| 230 |
+
confidence = min(confidence, 0.4)
|
| 231 |
+
break
|
| 232 |
+
|
| 233 |
+
# Very short answers might indicate issues
|
| 234 |
+
if len(answer) < 50:
|
| 235 |
+
confidence = min(confidence, 0.5)
|
| 236 |
+
|
| 237 |
+
return confidence
|
app/agents/retriever_agent.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Retriever Agent
|
| 3 |
+
===============
|
| 4 |
+
|
| 5 |
+
The Retriever Agent is responsible for finding relevant documents
|
| 6 |
+
from the vector store based on the user's query.
|
| 7 |
+
|
| 8 |
+
RESPONSIBILITIES:
|
| 9 |
+
1. Take the user query
|
| 10 |
+
2. Search the FAISS vector store
|
| 11 |
+
3. Return relevant document chunks with scores
|
| 12 |
+
4. Optionally re-rank results for better accuracy
|
| 13 |
+
|
| 14 |
+
WHY A SEPARATE RETRIEVER AGENT?
|
| 15 |
+
- Single responsibility: Only handles retrieval
|
| 16 |
+
- Can be enhanced independently (re-ranking, hybrid search)
|
| 17 |
+
- Makes testing easier
|
| 18 |
+
- Allows for retrieval-specific optimizations
|
| 19 |
+
|
| 20 |
+
RETRIEVAL STRATEGY:
|
| 21 |
+
1. Embed the query using same model as documents
|
| 22 |
+
2. Find top-k nearest neighbors in vector space
|
| 23 |
+
3. Return documents with relevance scores
|
| 24 |
+
4. (Optional) Re-rank using cross-encoder for better accuracy
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import logging
|
| 28 |
+
from typing import Any
|
| 29 |
+
|
| 30 |
+
from app.agents.base_agent import BaseAgent
|
| 31 |
+
from app.schemas.models import AgentResponse, AgentType, RetrievedDocument
|
| 32 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 33 |
+
from app.config import get_settings
|
| 34 |
+
|
| 35 |
+
logger = logging.getLogger(__name__)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class RetrieverAgent(BaseAgent):
|
| 39 |
+
"""
|
| 40 |
+
Retrieves relevant documents from the vector store.
|
| 41 |
+
|
| 42 |
+
This agent encapsulates the retrieval logic, making it easy to:
|
| 43 |
+
- Swap vector stores (FAISS -> Pinecone)
|
| 44 |
+
- Add re-ranking
|
| 45 |
+
- Implement hybrid search (semantic + keyword)
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(self, vector_store: FAISSVectorStore = None, **kwargs):
|
| 49 |
+
"""
|
| 50 |
+
Initialize the Retriever Agent.
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
vector_store: FAISS store instance (uses singleton if not provided)
|
| 54 |
+
**kwargs: Passed to BaseAgent
|
| 55 |
+
"""
|
| 56 |
+
super().__init__(**kwargs)
|
| 57 |
+
self._vector_store = vector_store or FAISSVectorStore()
|
| 58 |
+
self._settings = get_settings()
|
| 59 |
+
|
| 60 |
+
@property
|
| 61 |
+
def agent_type(self) -> AgentType:
|
| 62 |
+
"""Return the agent type."""
|
| 63 |
+
return AgentType.RETRIEVER
|
| 64 |
+
|
| 65 |
+
async def execute(
|
| 66 |
+
self,
|
| 67 |
+
input_data: dict[str, Any],
|
| 68 |
+
**kwargs
|
| 69 |
+
) -> AgentResponse:
|
| 70 |
+
"""
|
| 71 |
+
Retrieve relevant documents for the query.
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
input_data: Must contain 'query' key
|
| 75 |
+
**kwargs: Optional 'top_k' to override default
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
AgentResponse with retrieved documents in metadata
|
| 79 |
+
"""
|
| 80 |
+
query = input_data.get("query", "")
|
| 81 |
+
top_k = kwargs.get("top_k", self._settings.retrieval_top_k)
|
| 82 |
+
|
| 83 |
+
if not query:
|
| 84 |
+
return AgentResponse(
|
| 85 |
+
agent_type=self.agent_type,
|
| 86 |
+
output="No query provided for retrieval",
|
| 87 |
+
confidence=0.0,
|
| 88 |
+
metadata={"documents": [], "error": "empty_query"}
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Check if vector store is ready
|
| 92 |
+
if not self._vector_store.is_ready:
|
| 93 |
+
return AgentResponse(
|
| 94 |
+
agent_type=self.agent_type,
|
| 95 |
+
output="No documents in knowledge base. Please ingest documents first.",
|
| 96 |
+
confidence=0.0,
|
| 97 |
+
metadata={"documents": [], "error": "no_documents"}
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Perform similarity search
|
| 101 |
+
try:
|
| 102 |
+
results = self._vector_store.similarity_search(query, k=top_k)
|
| 103 |
+
except Exception as e:
|
| 104 |
+
logger.error(f"Retrieval failed: {e}")
|
| 105 |
+
return AgentResponse(
|
| 106 |
+
agent_type=self.agent_type,
|
| 107 |
+
output=f"Retrieval error: {str(e)}",
|
| 108 |
+
confidence=0.0,
|
| 109 |
+
metadata={"documents": [], "error": str(e)}
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Convert results to RetrievedDocument objects
|
| 113 |
+
documents = []
|
| 114 |
+
for doc, score in results:
|
| 115 |
+
retrieved_doc = RetrievedDocument(
|
| 116 |
+
content=doc.page_content,
|
| 117 |
+
source=doc.metadata.get("source", "unknown"),
|
| 118 |
+
relevance_score=score,
|
| 119 |
+
chunk_index=doc.metadata.get("chunk_index")
|
| 120 |
+
)
|
| 121 |
+
documents.append(retrieved_doc)
|
| 122 |
+
|
| 123 |
+
# Calculate overall confidence based on best match
|
| 124 |
+
best_score = documents[0].relevance_score if documents else 0.0
|
| 125 |
+
|
| 126 |
+
# Format documents for context
|
| 127 |
+
context_parts = []
|
| 128 |
+
for i, doc in enumerate(documents, 1):
|
| 129 |
+
context_parts.append(
|
| 130 |
+
f"[Document {i}] (Source: {doc.source}, Relevance: {doc.relevance_score:.2f})\n"
|
| 131 |
+
f"{doc.content}"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
context_string = "\n\n---\n\n".join(context_parts) if context_parts else "No relevant documents found."
|
| 135 |
+
|
| 136 |
+
logger.info(
|
| 137 |
+
f"Retrieved {len(documents)} documents, best score: {best_score:.2f}"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
return AgentResponse(
|
| 141 |
+
agent_type=self.agent_type,
|
| 142 |
+
output=context_string,
|
| 143 |
+
confidence=best_score,
|
| 144 |
+
metadata={
|
| 145 |
+
"documents": [doc.model_dump() for doc in documents],
|
| 146 |
+
"document_count": len(documents),
|
| 147 |
+
"best_relevance_score": best_score,
|
| 148 |
+
}
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
def get_retriever(self):
|
| 152 |
+
"""
|
| 153 |
+
Get LangChain-compatible retriever interface.
|
| 154 |
+
|
| 155 |
+
This allows the agent to be used directly in LangChain chains.
|
| 156 |
+
|
| 157 |
+
Returns:
|
| 158 |
+
LangChain Retriever instance
|
| 159 |
+
"""
|
| 160 |
+
return self._vector_store.as_retriever()
|
app/agents/router_agent.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Router Agent
|
| 3 |
+
============
|
| 4 |
+
|
| 5 |
+
The Router Agent is the "traffic controller" of the multi-agent system.
|
| 6 |
+
It analyzes incoming queries and decides which agents should handle them.
|
| 7 |
+
|
| 8 |
+
RESPONSIBILITIES:
|
| 9 |
+
1. Classify query intent (question, action request, or both)
|
| 10 |
+
2. Determine if retrieval is needed
|
| 11 |
+
3. Decide if actions should be executed
|
| 12 |
+
4. Return routing instructions for the orchestrator
|
| 13 |
+
|
| 14 |
+
WHY A ROUTER?
|
| 15 |
+
- Not all queries need all agents
|
| 16 |
+
- Saves compute by skipping unnecessary agents
|
| 17 |
+
- Enables conditional logic in the pipeline
|
| 18 |
+
- Makes the system more efficient
|
| 19 |
+
|
| 20 |
+
ROUTING LOGIC:
|
| 21 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 22 |
+
│ Query Type │ Retrieval │ Reasoning │ Action │
|
| 23 |
+
├─────────────────────┼───────────┼───────────┼──────────────────┤
|
| 24 |
+
│ Factual question │ YES │ YES │ NO │
|
| 25 |
+
│ How-to question │ YES │ YES │ NO │
|
| 26 |
+
│ Action request │ MAYBE │ YES │ YES │
|
| 27 |
+
│ Small talk │ NO │ YES │ NO │
|
| 28 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import json
|
| 32 |
+
import logging
|
| 33 |
+
from typing import Any
|
| 34 |
+
|
| 35 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 36 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
| 37 |
+
from pydantic import BaseModel, Field
|
| 38 |
+
|
| 39 |
+
from app.agents.base_agent import BaseAgent
|
| 40 |
+
from app.schemas.models import AgentResponse, AgentType, ActionType
|
| 41 |
+
|
| 42 |
+
logger = logging.getLogger(__name__)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class RoutingDecision(BaseModel):
|
| 46 |
+
"""
|
| 47 |
+
Structured output from the Router Agent.
|
| 48 |
+
|
| 49 |
+
This Pydantic model ensures the LLM returns valid routing decisions.
|
| 50 |
+
"""
|
| 51 |
+
needs_retrieval: bool = Field(
|
| 52 |
+
description="Whether to search the knowledge base for context"
|
| 53 |
+
)
|
| 54 |
+
needs_reasoning: bool = Field(
|
| 55 |
+
default=True,
|
| 56 |
+
description="Whether to generate a reasoned response"
|
| 57 |
+
)
|
| 58 |
+
needs_action: bool = Field(
|
| 59 |
+
description="Whether to execute an action (create ticket, etc.)"
|
| 60 |
+
)
|
| 61 |
+
suggested_action: str = Field(
|
| 62 |
+
default="none",
|
| 63 |
+
description="Which action to execute if needs_action is True"
|
| 64 |
+
)
|
| 65 |
+
query_type: str = Field(
|
| 66 |
+
description="Classification: 'factual', 'how_to', 'action', 'general'"
|
| 67 |
+
)
|
| 68 |
+
confidence: float = Field(
|
| 69 |
+
ge=0.0,
|
| 70 |
+
le=1.0,
|
| 71 |
+
description="Confidence in the routing decision"
|
| 72 |
+
)
|
| 73 |
+
reasoning: str = Field(
|
| 74 |
+
description="Brief explanation of the routing decision"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# The router prompt is carefully designed to:
|
| 79 |
+
# 1. Give clear context about the system
|
| 80 |
+
# 2. Provide examples of different query types
|
| 81 |
+
# 3. Request structured JSON output
|
| 82 |
+
# 4. Ask for reasoning (helps catch errors)
|
| 83 |
+
ROUTER_PROMPT = """You are a query router for a customer support system.
|
| 84 |
+
Your job is to analyze the user's query and decide how to handle it.
|
| 85 |
+
|
| 86 |
+
AVAILABLE AGENTS:
|
| 87 |
+
1. Retriever: Searches the knowledge base for relevant information
|
| 88 |
+
2. Reasoning: Generates responses based on context
|
| 89 |
+
3. Action: Executes actions like creating tickets or escalating
|
| 90 |
+
|
| 91 |
+
ROUTING RULES:
|
| 92 |
+
- Factual questions (what, who, when, where) -> Retriever + Reasoning
|
| 93 |
+
- How-to questions (how do I, steps to) -> Retriever + Reasoning
|
| 94 |
+
- Action requests (create ticket, escalate, reset password) -> Retriever + Reasoning + Action
|
| 95 |
+
- General conversation (hello, thanks, goodbye) -> Reasoning only
|
| 96 |
+
- Complaints or urgent issues -> Retriever + Reasoning + Action (escalate)
|
| 97 |
+
|
| 98 |
+
AVAILABLE ACTIONS:
|
| 99 |
+
- create_ticket: Create a support ticket
|
| 100 |
+
- escalate: Escalate to human agent
|
| 101 |
+
- send_email: Send email notification
|
| 102 |
+
- search_knowledge_base: Deep search in KB
|
| 103 |
+
- none: No action needed
|
| 104 |
+
|
| 105 |
+
USER QUERY: {query}
|
| 106 |
+
|
| 107 |
+
CONVERSATION CONTEXT: {context}
|
| 108 |
+
|
| 109 |
+
Analyze the query and provide your routing decision.
|
| 110 |
+
|
| 111 |
+
{format_instructions}"""
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
class RouterAgent(BaseAgent):
|
| 115 |
+
"""
|
| 116 |
+
Routes queries to appropriate agents based on intent classification.
|
| 117 |
+
|
| 118 |
+
The Router Agent uses an LLM to understand the query intent and
|
| 119 |
+
determine which agents should process it. This enables:
|
| 120 |
+
|
| 121 |
+
1. Efficiency: Skip unnecessary agents
|
| 122 |
+
2. Accuracy: Match queries to appropriate handlers
|
| 123 |
+
3. Flexibility: Easy to add new routing logic
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
def __init__(self, **kwargs):
|
| 127 |
+
"""Initialize the Router Agent."""
|
| 128 |
+
super().__init__(**kwargs)
|
| 129 |
+
# Parser ensures LLM output matches RoutingDecision schema
|
| 130 |
+
self._parser = PydanticOutputParser(pydantic_object=RoutingDecision)
|
| 131 |
+
self._prompt = ChatPromptTemplate.from_template(ROUTER_PROMPT)
|
| 132 |
+
|
| 133 |
+
@property
|
| 134 |
+
def agent_type(self) -> AgentType:
|
| 135 |
+
"""Return the agent type."""
|
| 136 |
+
return AgentType.ROUTER
|
| 137 |
+
|
| 138 |
+
async def execute(
|
| 139 |
+
self,
|
| 140 |
+
input_data: dict[str, Any],
|
| 141 |
+
**kwargs
|
| 142 |
+
) -> AgentResponse:
|
| 143 |
+
"""
|
| 144 |
+
Analyze query and determine routing.
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
input_data: Must contain 'query' key, optionally 'context'
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
AgentResponse with routing decision in metadata
|
| 151 |
+
"""
|
| 152 |
+
query = input_data.get("query", "")
|
| 153 |
+
context = input_data.get("context", "No previous context")
|
| 154 |
+
|
| 155 |
+
if not query:
|
| 156 |
+
return AgentResponse(
|
| 157 |
+
agent_type=self.agent_type,
|
| 158 |
+
output="No query provided",
|
| 159 |
+
confidence=0.0,
|
| 160 |
+
metadata={"error": "empty_query"}
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Format the prompt with query and context
|
| 164 |
+
formatted_prompt = self._prompt.format_messages(
|
| 165 |
+
query=query,
|
| 166 |
+
context=context,
|
| 167 |
+
format_instructions=self._parser.get_format_instructions()
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# Get routing decision from LLM
|
| 171 |
+
response = await self._llm.ainvoke(formatted_prompt)
|
| 172 |
+
|
| 173 |
+
# Parse the structured output
|
| 174 |
+
try:
|
| 175 |
+
decision = self._parser.parse(response.content)
|
| 176 |
+
except Exception as e:
|
| 177 |
+
logger.warning(f"Failed to parse routing decision: {e}")
|
| 178 |
+
# Default to full pipeline on parse error
|
| 179 |
+
decision = RoutingDecision(
|
| 180 |
+
needs_retrieval=True,
|
| 181 |
+
needs_reasoning=True,
|
| 182 |
+
needs_action=False,
|
| 183 |
+
suggested_action="none",
|
| 184 |
+
query_type="unknown",
|
| 185 |
+
confidence=0.5,
|
| 186 |
+
reasoning="Failed to parse, using default routing"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
# Convert suggested_action string to ActionType enum
|
| 190 |
+
try:
|
| 191 |
+
action_type = ActionType(decision.suggested_action)
|
| 192 |
+
except ValueError:
|
| 193 |
+
action_type = ActionType.NONE
|
| 194 |
+
|
| 195 |
+
logger.info(
|
| 196 |
+
f"Routed query: retrieval={decision.needs_retrieval}, "
|
| 197 |
+
f"action={decision.needs_action} ({action_type.value})"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
return AgentResponse(
|
| 201 |
+
agent_type=self.agent_type,
|
| 202 |
+
output=decision.reasoning,
|
| 203 |
+
confidence=decision.confidence,
|
| 204 |
+
metadata={
|
| 205 |
+
"needs_retrieval": decision.needs_retrieval,
|
| 206 |
+
"needs_reasoning": decision.needs_reasoning,
|
| 207 |
+
"needs_action": decision.needs_action,
|
| 208 |
+
"suggested_action": action_type.value,
|
| 209 |
+
"query_type": decision.query_type,
|
| 210 |
+
}
|
| 211 |
+
)
|
app/api/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API Module
|
| 3 |
+
==========
|
| 4 |
+
|
| 5 |
+
FastAPI routes and endpoint definitions.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from app.api.routes import router
|
| 9 |
+
|
| 10 |
+
__all__ = ["router"]
|
app/api/routes.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API Routes
|
| 3 |
+
==========
|
| 4 |
+
|
| 5 |
+
FastAPI endpoints for the Multi-Agent RAG system.
|
| 6 |
+
|
| 7 |
+
ENDPOINTS:
|
| 8 |
+
- POST /query: Submit a query to the RAG system
|
| 9 |
+
- POST /ingest: Ingest documents into the knowledge base
|
| 10 |
+
- GET /health: Health check endpoint
|
| 11 |
+
- DELETE /documents: Clear all documents
|
| 12 |
+
|
| 13 |
+
WHY FastAPI?
|
| 14 |
+
- Automatic OpenAPI documentation
|
| 15 |
+
- Type validation via Pydantic
|
| 16 |
+
- Async support for scalability
|
| 17 |
+
- Easy to test
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import logging
|
| 21 |
+
from typing import Optional
|
| 22 |
+
|
| 23 |
+
from fastapi import APIRouter, HTTPException, BackgroundTasks
|
| 24 |
+
from fastapi.responses import JSONResponse
|
| 25 |
+
|
| 26 |
+
from app.schemas.models import (
|
| 27 |
+
QueryRequest,
|
| 28 |
+
QueryResponse,
|
| 29 |
+
IngestionRequest,
|
| 30 |
+
IngestionResponse,
|
| 31 |
+
HealthResponse,
|
| 32 |
+
)
|
| 33 |
+
from app.services.orchestrator import MultiAgentOrchestrator
|
| 34 |
+
from app.services.document_service import DocumentService
|
| 35 |
+
from app import __version__
|
| 36 |
+
|
| 37 |
+
logger = logging.getLogger(__name__)
|
| 38 |
+
|
| 39 |
+
# Create router with prefix and tags for OpenAPI docs
|
| 40 |
+
router = APIRouter(prefix="/api/v1", tags=["rag"])
|
| 41 |
+
|
| 42 |
+
# Lazy initialization of services
|
| 43 |
+
# These are created on first request to avoid startup delays
|
| 44 |
+
_orchestrator: Optional[MultiAgentOrchestrator] = None
|
| 45 |
+
_document_service: Optional[DocumentService] = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_orchestrator() -> MultiAgentOrchestrator:
|
| 49 |
+
"""Get or create the orchestrator instance."""
|
| 50 |
+
global _orchestrator
|
| 51 |
+
if _orchestrator is None:
|
| 52 |
+
_orchestrator = MultiAgentOrchestrator()
|
| 53 |
+
return _orchestrator
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def get_document_service() -> DocumentService:
|
| 57 |
+
"""Get or create the document service instance."""
|
| 58 |
+
global _document_service
|
| 59 |
+
if _document_service is None:
|
| 60 |
+
_document_service = DocumentService()
|
| 61 |
+
return _document_service
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# =============================================================================
|
| 65 |
+
# Health Check
|
| 66 |
+
# =============================================================================
|
| 67 |
+
|
| 68 |
+
@router.get(
|
| 69 |
+
"/health",
|
| 70 |
+
response_model=HealthResponse,
|
| 71 |
+
summary="Health Check",
|
| 72 |
+
description="Check if the API is running and the vector store is ready",
|
| 73 |
+
)
|
| 74 |
+
async def health_check() -> HealthResponse:
|
| 75 |
+
"""
|
| 76 |
+
Health check endpoint.
|
| 77 |
+
|
| 78 |
+
Returns the API status and whether the knowledge base is ready.
|
| 79 |
+
"""
|
| 80 |
+
orchestrator = get_orchestrator()
|
| 81 |
+
|
| 82 |
+
return HealthResponse(
|
| 83 |
+
status="healthy",
|
| 84 |
+
version=__version__,
|
| 85 |
+
vector_store_ready=orchestrator.is_ready,
|
| 86 |
+
document_count=orchestrator.document_count,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# =============================================================================
|
| 91 |
+
# Query Endpoint
|
| 92 |
+
# =============================================================================
|
| 93 |
+
|
| 94 |
+
@router.post(
|
| 95 |
+
"/query",
|
| 96 |
+
response_model=QueryResponse,
|
| 97 |
+
summary="Submit Query",
|
| 98 |
+
description="Submit a question to the Multi-Agent RAG system",
|
| 99 |
+
)
|
| 100 |
+
async def submit_query(request: QueryRequest) -> QueryResponse:
|
| 101 |
+
"""
|
| 102 |
+
Process a user query through the multi-agent RAG pipeline.
|
| 103 |
+
|
| 104 |
+
The query flows through:
|
| 105 |
+
1. Router Agent: Classifies intent and routes
|
| 106 |
+
2. Retriever Agent: Searches knowledge base
|
| 107 |
+
3. Reasoning Agent: Generates grounded response
|
| 108 |
+
4. Action Agent: Executes actions if needed
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
request: QueryRequest with the user's question
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
QueryResponse with answer and sources
|
| 115 |
+
|
| 116 |
+
Raises:
|
| 117 |
+
HTTPException: If processing fails
|
| 118 |
+
"""
|
| 119 |
+
try:
|
| 120 |
+
orchestrator = get_orchestrator()
|
| 121 |
+
|
| 122 |
+
# Check if we have documents
|
| 123 |
+
if not orchestrator.is_ready:
|
| 124 |
+
logger.warning("Query received but no documents in knowledge base")
|
| 125 |
+
# We still process - reasoning agent will handle this gracefully
|
| 126 |
+
|
| 127 |
+
response = await orchestrator.process_query(request)
|
| 128 |
+
|
| 129 |
+
logger.info(
|
| 130 |
+
f"Query processed: {request.query[:50]}... -> "
|
| 131 |
+
f"{len(response.answer)} chars, {len(response.sources)} sources"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
return response
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.error(f"Query processing failed: {e}", exc_info=True)
|
| 138 |
+
raise HTTPException(
|
| 139 |
+
status_code=500,
|
| 140 |
+
detail=f"Failed to process query: {str(e)}"
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# =============================================================================
|
| 145 |
+
# Document Ingestion
|
| 146 |
+
# =============================================================================
|
| 147 |
+
|
| 148 |
+
@router.post(
|
| 149 |
+
"/ingest",
|
| 150 |
+
response_model=IngestionResponse,
|
| 151 |
+
summary="Ingest Documents",
|
| 152 |
+
description="Ingest documents into the knowledge base",
|
| 153 |
+
)
|
| 154 |
+
async def ingest_documents(request: IngestionRequest) -> IngestionResponse:
|
| 155 |
+
"""
|
| 156 |
+
Ingest documents into the vector store.
|
| 157 |
+
|
| 158 |
+
If file_paths are specified, only those files are ingested.
|
| 159 |
+
Otherwise, all documents in the configured directory are ingested.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
request: IngestionRequest specifying what to ingest
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
IngestionResponse with processing results
|
| 166 |
+
"""
|
| 167 |
+
try:
|
| 168 |
+
service = get_document_service()
|
| 169 |
+
response = await service.ingest_documents(request)
|
| 170 |
+
|
| 171 |
+
if response.errors:
|
| 172 |
+
logger.warning(f"Ingestion completed with errors: {response.errors}")
|
| 173 |
+
else:
|
| 174 |
+
logger.info(
|
| 175 |
+
f"Ingestion complete: {response.documents_processed} documents, "
|
| 176 |
+
f"{response.chunks_created} chunks"
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
return response
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logger.error(f"Ingestion failed: {e}", exc_info=True)
|
| 183 |
+
raise HTTPException(
|
| 184 |
+
status_code=500,
|
| 185 |
+
detail=f"Failed to ingest documents: {str(e)}"
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@router.post(
|
| 190 |
+
"/ingest/text",
|
| 191 |
+
summary="Ingest Text",
|
| 192 |
+
description="Ingest raw text directly into the knowledge base",
|
| 193 |
+
)
|
| 194 |
+
async def ingest_text(
|
| 195 |
+
text: str,
|
| 196 |
+
source_name: str = "direct_input"
|
| 197 |
+
) -> dict:
|
| 198 |
+
"""
|
| 199 |
+
Ingest raw text directly without creating a file.
|
| 200 |
+
|
| 201 |
+
Args:
|
| 202 |
+
text: The text content to ingest
|
| 203 |
+
source_name: Name to use as the source
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
Dictionary with chunks created count
|
| 207 |
+
"""
|
| 208 |
+
try:
|
| 209 |
+
service = get_document_service()
|
| 210 |
+
chunks = await service.ingest_text(text, source_name)
|
| 211 |
+
|
| 212 |
+
return {
|
| 213 |
+
"success": True,
|
| 214 |
+
"chunks_created": chunks,
|
| 215 |
+
"source": source_name,
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Text ingestion failed: {e}", exc_info=True)
|
| 220 |
+
raise HTTPException(
|
| 221 |
+
status_code=500,
|
| 222 |
+
detail=f"Failed to ingest text: {str(e)}"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
# =============================================================================
|
| 227 |
+
# Document Management
|
| 228 |
+
# =============================================================================
|
| 229 |
+
|
| 230 |
+
@router.delete(
|
| 231 |
+
"/documents",
|
| 232 |
+
summary="Clear Documents",
|
| 233 |
+
description="Delete all documents from the knowledge base",
|
| 234 |
+
)
|
| 235 |
+
async def clear_documents() -> dict:
|
| 236 |
+
"""
|
| 237 |
+
Clear all documents from the vector store.
|
| 238 |
+
|
| 239 |
+
WARNING: This is destructive and cannot be undone.
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
Confirmation message
|
| 243 |
+
"""
|
| 244 |
+
try:
|
| 245 |
+
service = get_document_service()
|
| 246 |
+
service.clear_all_documents()
|
| 247 |
+
|
| 248 |
+
return {
|
| 249 |
+
"success": True,
|
| 250 |
+
"message": "All documents have been cleared from the knowledge base",
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
except Exception as e:
|
| 254 |
+
logger.error(f"Failed to clear documents: {e}", exc_info=True)
|
| 255 |
+
raise HTTPException(
|
| 256 |
+
status_code=500,
|
| 257 |
+
detail=f"Failed to clear documents: {str(e)}"
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
@router.get(
|
| 262 |
+
"/documents/count",
|
| 263 |
+
summary="Document Count",
|
| 264 |
+
description="Get the number of document chunks in the knowledge base",
|
| 265 |
+
)
|
| 266 |
+
async def get_document_count() -> dict:
|
| 267 |
+
"""
|
| 268 |
+
Get the current document chunk count.
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
Dictionary with the count
|
| 272 |
+
"""
|
| 273 |
+
service = get_document_service()
|
| 274 |
+
|
| 275 |
+
return {
|
| 276 |
+
"count": service.get_document_count(),
|
| 277 |
+
"ready": service.is_ready(),
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# =============================================================================
|
| 282 |
+
# Conversation Management
|
| 283 |
+
# =============================================================================
|
| 284 |
+
|
| 285 |
+
@router.delete(
|
| 286 |
+
"/conversations/{conversation_id}",
|
| 287 |
+
summary="Clear Conversation",
|
| 288 |
+
description="Clear memory for a specific conversation",
|
| 289 |
+
)
|
| 290 |
+
async def clear_conversation(conversation_id: str) -> dict:
|
| 291 |
+
"""
|
| 292 |
+
Clear memory for a specific conversation.
|
| 293 |
+
|
| 294 |
+
Args:
|
| 295 |
+
conversation_id: The conversation to clear
|
| 296 |
+
|
| 297 |
+
Returns:
|
| 298 |
+
Confirmation message
|
| 299 |
+
"""
|
| 300 |
+
orchestrator = get_orchestrator()
|
| 301 |
+
orchestrator._memory_manager.clear_conversation(conversation_id)
|
| 302 |
+
|
| 303 |
+
return {
|
| 304 |
+
"success": True,
|
| 305 |
+
"message": f"Cleared memory for conversation: {conversation_id}",
|
| 306 |
+
}
|
app/config.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration Management
|
| 3 |
+
========================
|
| 4 |
+
|
| 5 |
+
This module centralizes all configuration using pydantic-settings.
|
| 6 |
+
It loads environment variables and provides type-safe access to config values.
|
| 7 |
+
|
| 8 |
+
SUPPORTED LLM PROVIDERS (all have free tiers):
|
| 9 |
+
1. ollama - Local LLMs (Llama, Mistral) - completely free
|
| 10 |
+
2. huggingface - HuggingFace Inference API - free tier available
|
| 11 |
+
3. groq - Groq Cloud - free tier with fast inference
|
| 12 |
+
4. google - Google Gemini - free tier available
|
| 13 |
+
5. openai - OpenAI - paid (for reference)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from functools import lru_cache
|
| 19 |
+
from typing import Literal
|
| 20 |
+
|
| 21 |
+
from pydantic_settings import BaseSettings
|
| 22 |
+
from pydantic import Field
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class Settings(BaseSettings):
|
| 26 |
+
"""
|
| 27 |
+
Application settings loaded from environment variables.
|
| 28 |
+
|
| 29 |
+
All settings have sensible defaults for development.
|
| 30 |
+
In production, override via environment variables or .env file.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
# =========================================================================
|
| 34 |
+
# LLM Provider Selection
|
| 35 |
+
# =========================================================================
|
| 36 |
+
llm_provider: Literal["ollama", "huggingface", "groq", "google", "openai"] = Field(
|
| 37 |
+
default="ollama",
|
| 38 |
+
description="Which LLM provider to use (ollama is free and local)"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# =========================================================================
|
| 42 |
+
# API Keys (only needed for cloud providers)
|
| 43 |
+
# =========================================================================
|
| 44 |
+
openai_api_key: str = Field(
|
| 45 |
+
default="",
|
| 46 |
+
description="OpenAI API key (only if using openai provider)"
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
huggingface_api_key: str = Field(
|
| 50 |
+
default="",
|
| 51 |
+
description="HuggingFace API key (free at huggingface.co)"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
groq_api_key: str = Field(
|
| 55 |
+
default="",
|
| 56 |
+
description="Groq API key (free at console.groq.com)"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
google_api_key: str = Field(
|
| 60 |
+
default="",
|
| 61 |
+
description="Google API key (free at makersuite.google.com)"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# =========================================================================
|
| 65 |
+
# Model Configuration
|
| 66 |
+
# =========================================================================
|
| 67 |
+
# Models for each provider
|
| 68 |
+
ollama_model: str = Field(
|
| 69 |
+
default="llama3.2",
|
| 70 |
+
description="Ollama model (llama3.2, mistral, phi3, etc.)"
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
huggingface_model: str = Field(
|
| 74 |
+
default="mistralai/Mistral-7B-Instruct-v0.2",
|
| 75 |
+
description="HuggingFace model ID"
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
groq_model: str = Field(
|
| 79 |
+
default="llama-3.1-8b-instant",
|
| 80 |
+
description="Groq model (llama-3.1-8b-instant, mixtral-8x7b-32768)"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
google_model: str = Field(
|
| 84 |
+
default="gemini-1.5-flash",
|
| 85 |
+
description="Google Gemini model"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
openai_model: str = Field(
|
| 89 |
+
default="gpt-3.5-turbo",
|
| 90 |
+
description="OpenAI model"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Temperature controls randomness: 0 = deterministic, 1 = creative
|
| 94 |
+
llm_temperature: float = Field(
|
| 95 |
+
default=0.1,
|
| 96 |
+
ge=0.0,
|
| 97 |
+
le=1.0,
|
| 98 |
+
description="LLM temperature (lower = more focused)"
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# =========================================================================
|
| 102 |
+
# Embedding Configuration
|
| 103 |
+
# =========================================================================
|
| 104 |
+
embedding_provider: Literal["huggingface", "openai"] = Field(
|
| 105 |
+
default="huggingface",
|
| 106 |
+
description="Embedding provider (huggingface is free and local)"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
huggingface_embedding_model: str = Field(
|
| 110 |
+
default="sentence-transformers/all-MiniLM-L6-v2",
|
| 111 |
+
description="Free local embedding model"
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
openai_embedding_model: str = Field(
|
| 115 |
+
default="text-embedding-3-small",
|
| 116 |
+
description="OpenAI embedding model (paid)"
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# =========================================================================
|
| 120 |
+
# Ollama Configuration
|
| 121 |
+
# =========================================================================
|
| 122 |
+
ollama_base_url: str = Field(
|
| 123 |
+
default="http://localhost:11434",
|
| 124 |
+
description="Ollama server URL"
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# =========================================================================
|
| 128 |
+
# Vector Store Configuration
|
| 129 |
+
# =========================================================================
|
| 130 |
+
faiss_index_path: Path = Field(
|
| 131 |
+
default=Path("./data/faiss_index"),
|
| 132 |
+
description="Directory to store FAISS index"
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
documents_path: Path = Field(
|
| 136 |
+
default=Path("./data/documents"),
|
| 137 |
+
description="Directory containing source documents"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
chunk_size: int = Field(
|
| 141 |
+
default=1000,
|
| 142 |
+
description="Size of document chunks for embedding"
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
chunk_overlap: int = Field(
|
| 146 |
+
default=200,
|
| 147 |
+
description="Overlap between consecutive chunks"
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
retrieval_top_k: int = Field(
|
| 151 |
+
default=5,
|
| 152 |
+
description="Number of documents to retrieve"
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# =========================================================================
|
| 156 |
+
# API Configuration
|
| 157 |
+
# =========================================================================
|
| 158 |
+
api_host: str = Field(
|
| 159 |
+
default="0.0.0.0",
|
| 160 |
+
description="Host to bind the API server"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
api_port: int = Field(
|
| 164 |
+
default=8000,
|
| 165 |
+
description="Port for the API server"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
debug_mode: bool = Field(
|
| 169 |
+
default=True,
|
| 170 |
+
description="Enable debug mode for development"
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
# =========================================================================
|
| 174 |
+
# Logging Configuration
|
| 175 |
+
# =========================================================================
|
| 176 |
+
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field(
|
| 177 |
+
default="INFO",
|
| 178 |
+
description="Logging level"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
class Config:
|
| 182 |
+
"""Pydantic configuration for settings."""
|
| 183 |
+
env_file = ".env"
|
| 184 |
+
env_file_encoding = "utf-8"
|
| 185 |
+
extra = "ignore"
|
| 186 |
+
|
| 187 |
+
def ensure_directories(self) -> None:
|
| 188 |
+
"""Create necessary directories if they don't exist."""
|
| 189 |
+
self.faiss_index_path.mkdir(parents=True, exist_ok=True)
|
| 190 |
+
self.documents_path.mkdir(parents=True, exist_ok=True)
|
| 191 |
+
|
| 192 |
+
def get_model_name(self) -> str:
|
| 193 |
+
"""Get the model name for the selected provider."""
|
| 194 |
+
model_map = {
|
| 195 |
+
"ollama": self.ollama_model,
|
| 196 |
+
"huggingface": self.huggingface_model,
|
| 197 |
+
"groq": self.groq_model,
|
| 198 |
+
"google": self.google_model,
|
| 199 |
+
"openai": self.openai_model,
|
| 200 |
+
}
|
| 201 |
+
return model_map.get(self.llm_provider, self.ollama_model)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
@lru_cache()
|
| 205 |
+
def get_settings() -> Settings:
|
| 206 |
+
"""
|
| 207 |
+
Get cached settings instance.
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Settings: Application configuration instance
|
| 211 |
+
"""
|
| 212 |
+
return Settings()
|
app/main.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multi-Agent RAG System - Main Application
|
| 3 |
+
==========================================
|
| 4 |
+
|
| 5 |
+
FastAPI application entry point for the Multi-Agent RAG system.
|
| 6 |
+
|
| 7 |
+
This module:
|
| 8 |
+
- Creates the FastAPI application
|
| 9 |
+
- Configures middleware and CORS
|
| 10 |
+
- Includes API routes
|
| 11 |
+
- Sets up logging
|
| 12 |
+
- Provides startup/shutdown hooks
|
| 13 |
+
|
| 14 |
+
RUNNING THE APP:
|
| 15 |
+
Development: uvicorn app.main:app --reload
|
| 16 |
+
Production: uvicorn app.main:app --host 0.0.0.0 --port 8000
|
| 17 |
+
|
| 18 |
+
API DOCUMENTATION:
|
| 19 |
+
- Swagger UI: http://localhost:8000/docs
|
| 20 |
+
- ReDoc: http://localhost:8000/redoc
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import logging
|
| 24 |
+
import sys
|
| 25 |
+
from contextlib import asynccontextmanager
|
| 26 |
+
|
| 27 |
+
from fastapi import FastAPI
|
| 28 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 29 |
+
from fastapi.responses import RedirectResponse
|
| 30 |
+
|
| 31 |
+
from app.api.routes import router
|
| 32 |
+
from app.config import get_settings
|
| 33 |
+
from app import __version__
|
| 34 |
+
|
| 35 |
+
# =============================================================================
|
| 36 |
+
# Logging Setup
|
| 37 |
+
# =============================================================================
|
| 38 |
+
|
| 39 |
+
def setup_logging() -> None:
|
| 40 |
+
"""
|
| 41 |
+
Configure logging for the application.
|
| 42 |
+
|
| 43 |
+
Sets up structured logging with appropriate levels and formatting.
|
| 44 |
+
"""
|
| 45 |
+
settings = get_settings()
|
| 46 |
+
|
| 47 |
+
# Create formatter
|
| 48 |
+
formatter = logging.Formatter(
|
| 49 |
+
fmt="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
|
| 50 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Configure root logger
|
| 54 |
+
root_logger = logging.getLogger()
|
| 55 |
+
root_logger.setLevel(settings.log_level)
|
| 56 |
+
|
| 57 |
+
# Console handler
|
| 58 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 59 |
+
console_handler.setFormatter(formatter)
|
| 60 |
+
root_logger.addHandler(console_handler)
|
| 61 |
+
|
| 62 |
+
# Reduce noise from third-party libraries
|
| 63 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 64 |
+
logging.getLogger("openai").setLevel(logging.WARNING)
|
| 65 |
+
logging.getLogger("langchain").setLevel(logging.WARNING)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# =============================================================================
|
| 69 |
+
# Application Lifecycle
|
| 70 |
+
# =============================================================================
|
| 71 |
+
|
| 72 |
+
@asynccontextmanager
|
| 73 |
+
async def lifespan(app: FastAPI):
|
| 74 |
+
"""
|
| 75 |
+
Application lifespan manager.
|
| 76 |
+
|
| 77 |
+
Handles startup and shutdown events:
|
| 78 |
+
- Startup: Initialize logging, ensure directories exist
|
| 79 |
+
- Shutdown: Cleanup resources
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
app: FastAPI application instance
|
| 83 |
+
"""
|
| 84 |
+
# Startup
|
| 85 |
+
setup_logging()
|
| 86 |
+
logger = logging.getLogger(__name__)
|
| 87 |
+
|
| 88 |
+
settings = get_settings()
|
| 89 |
+
settings.ensure_directories()
|
| 90 |
+
|
| 91 |
+
logger.info(f"Starting Multi-Agent RAG System v{__version__}")
|
| 92 |
+
logger.info(f"Debug mode: {settings.debug_mode}")
|
| 93 |
+
logger.info(f"Documents path: {settings.documents_path}")
|
| 94 |
+
logger.info(f"FAISS index path: {settings.faiss_index_path}")
|
| 95 |
+
|
| 96 |
+
yield # Application runs here
|
| 97 |
+
|
| 98 |
+
# Shutdown
|
| 99 |
+
logger.info("Shutting down Multi-Agent RAG System")
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# =============================================================================
|
| 103 |
+
# FastAPI Application
|
| 104 |
+
# =============================================================================
|
| 105 |
+
|
| 106 |
+
def create_app() -> FastAPI:
|
| 107 |
+
"""
|
| 108 |
+
Create and configure the FastAPI application.
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
Configured FastAPI instance
|
| 112 |
+
"""
|
| 113 |
+
settings = get_settings()
|
| 114 |
+
|
| 115 |
+
app = FastAPI(
|
| 116 |
+
title="Multi-Agent RAG System",
|
| 117 |
+
description=(
|
| 118 |
+
"A production-grade Retrieval-Augmented Generation system "
|
| 119 |
+
"using multiple specialized agents for query routing, "
|
| 120 |
+
"document retrieval, reasoning, and action execution."
|
| 121 |
+
),
|
| 122 |
+
version=__version__,
|
| 123 |
+
docs_url="/docs",
|
| 124 |
+
redoc_url="/redoc",
|
| 125 |
+
lifespan=lifespan,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Configure CORS for frontend access
|
| 129 |
+
app.add_middleware(
|
| 130 |
+
CORSMiddleware,
|
| 131 |
+
allow_origins=["*"], # In production, specify allowed origins
|
| 132 |
+
allow_credentials=True,
|
| 133 |
+
allow_methods=["*"],
|
| 134 |
+
allow_headers=["*"],
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# Include API routes
|
| 138 |
+
app.include_router(router)
|
| 139 |
+
|
| 140 |
+
# Root redirect to docs
|
| 141 |
+
@app.get("/", include_in_schema=False)
|
| 142 |
+
async def root():
|
| 143 |
+
"""Redirect root to API documentation."""
|
| 144 |
+
return RedirectResponse(url="/docs")
|
| 145 |
+
|
| 146 |
+
return app
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# Create the application instance
|
| 150 |
+
app = create_app()
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# =============================================================================
|
| 154 |
+
# Development Server
|
| 155 |
+
# =============================================================================
|
| 156 |
+
|
| 157 |
+
if __name__ == "__main__":
|
| 158 |
+
import uvicorn
|
| 159 |
+
|
| 160 |
+
settings = get_settings()
|
| 161 |
+
|
| 162 |
+
uvicorn.run(
|
| 163 |
+
"app.main:app",
|
| 164 |
+
host=settings.api_host,
|
| 165 |
+
port=settings.api_port,
|
| 166 |
+
reload=settings.debug_mode,
|
| 167 |
+
log_level=settings.log_level.lower(),
|
| 168 |
+
)
|
app/memory/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Memory Module
|
| 3 |
+
=============
|
| 4 |
+
|
| 5 |
+
Handles conversation memory for multi-turn interactions.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from app.memory.conversation_memory import ConversationMemoryManager
|
| 9 |
+
|
| 10 |
+
__all__ = ["ConversationMemoryManager"]
|
app/memory/conversation_memory.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Conversation Memory Manager
|
| 3 |
+
===========================
|
| 4 |
+
|
| 5 |
+
This module manages conversation history for multi-turn interactions.
|
| 6 |
+
|
| 7 |
+
WHY MEMORY?
|
| 8 |
+
- Users expect context: "What about the second option?" requires memory
|
| 9 |
+
- Follow-up questions need previous context
|
| 10 |
+
- Maintains coherent, contextual conversations
|
| 11 |
+
|
| 12 |
+
We implement a simple window-based memory that keeps the last N messages.
|
| 13 |
+
This avoids deprecated LangChain memory modules and gives us full control.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import logging
|
| 17 |
+
from typing import Optional
|
| 18 |
+
from collections import defaultdict, deque
|
| 19 |
+
from dataclasses import dataclass, field
|
| 20 |
+
from datetime import datetime
|
| 21 |
+
import uuid
|
| 22 |
+
|
| 23 |
+
from app.schemas.models import ConversationMessage
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
# Maximum messages to keep in memory (per conversation)
|
| 28 |
+
# 10 exchanges = 20 messages (user + assistant)
|
| 29 |
+
DEFAULT_WINDOW_SIZE = 10
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class ConversationHistory:
|
| 34 |
+
"""Holds the message history for a single conversation."""
|
| 35 |
+
messages: deque = field(default_factory=lambda: deque(maxlen=DEFAULT_WINDOW_SIZE * 2))
|
| 36 |
+
created_at: datetime = field(default_factory=datetime.utcnow)
|
| 37 |
+
last_updated: datetime = field(default_factory=datetime.utcnow)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ConversationMemoryManager:
|
| 41 |
+
"""
|
| 42 |
+
Manages conversation memory across multiple user sessions.
|
| 43 |
+
|
| 44 |
+
Each conversation_id gets its own memory instance.
|
| 45 |
+
This allows multiple concurrent users without mixing context.
|
| 46 |
+
|
| 47 |
+
Usage:
|
| 48 |
+
manager = ConversationMemoryManager()
|
| 49 |
+
|
| 50 |
+
# Add messages
|
| 51 |
+
manager.add_user_message("session-123", "Hello")
|
| 52 |
+
manager.add_ai_message("session-123", "Hi there!")
|
| 53 |
+
|
| 54 |
+
# Get context for prompts
|
| 55 |
+
context = manager.get_context_string("session-123")
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
def __init__(self, window_size: int = DEFAULT_WINDOW_SIZE):
|
| 59 |
+
"""
|
| 60 |
+
Initialize the memory manager.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
window_size: Number of conversation turns to remember
|
| 64 |
+
"""
|
| 65 |
+
self._window_size = window_size
|
| 66 |
+
# Dictionary mapping conversation_id -> ConversationHistory
|
| 67 |
+
self._conversations: dict[str, ConversationHistory] = {}
|
| 68 |
+
|
| 69 |
+
def _get_or_create_history(
|
| 70 |
+
self,
|
| 71 |
+
conversation_id: str
|
| 72 |
+
) -> ConversationHistory:
|
| 73 |
+
"""
|
| 74 |
+
Get existing history or create new one for conversation.
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
conversation_id: Unique identifier for the conversation
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
ConversationHistory for this conversation
|
| 81 |
+
"""
|
| 82 |
+
if conversation_id not in self._conversations:
|
| 83 |
+
self._conversations[conversation_id] = ConversationHistory(
|
| 84 |
+
messages=deque(maxlen=self._window_size * 2)
|
| 85 |
+
)
|
| 86 |
+
logger.debug(f"Created new memory for conversation: {conversation_id}")
|
| 87 |
+
|
| 88 |
+
return self._conversations[conversation_id]
|
| 89 |
+
|
| 90 |
+
def add_user_message(self, conversation_id: str, content: str) -> None:
|
| 91 |
+
"""
|
| 92 |
+
Add a user message to conversation history.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
conversation_id: Conversation identifier
|
| 96 |
+
content: The user's message text
|
| 97 |
+
"""
|
| 98 |
+
history = self._get_or_create_history(conversation_id)
|
| 99 |
+
|
| 100 |
+
message = ConversationMessage(role="user", content=content)
|
| 101 |
+
history.messages.append(message)
|
| 102 |
+
history.last_updated = datetime.utcnow()
|
| 103 |
+
|
| 104 |
+
logger.debug(f"Added user message to {conversation_id}: {content[:50]}...")
|
| 105 |
+
|
| 106 |
+
def add_ai_message(self, conversation_id: str, content: str) -> None:
|
| 107 |
+
"""
|
| 108 |
+
Add an AI response to conversation history.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
conversation_id: Conversation identifier
|
| 112 |
+
content: The AI's response text
|
| 113 |
+
"""
|
| 114 |
+
history = self._get_or_create_history(conversation_id)
|
| 115 |
+
|
| 116 |
+
message = ConversationMessage(role="assistant", content=content)
|
| 117 |
+
history.messages.append(message)
|
| 118 |
+
history.last_updated = datetime.utcnow()
|
| 119 |
+
|
| 120 |
+
logger.debug(f"Added AI message to {conversation_id}: {content[:50]}...")
|
| 121 |
+
|
| 122 |
+
def get_messages(self, conversation_id: str) -> list[ConversationMessage]:
|
| 123 |
+
"""
|
| 124 |
+
Get all messages in a conversation.
|
| 125 |
+
|
| 126 |
+
Useful for API responses that want to show conversation history.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
conversation_id: Conversation identifier
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
List of messages in chronological order
|
| 133 |
+
"""
|
| 134 |
+
if conversation_id not in self._conversations:
|
| 135 |
+
return []
|
| 136 |
+
return list(self._conversations[conversation_id].messages)
|
| 137 |
+
|
| 138 |
+
def get_context_string(self, conversation_id: str) -> str:
|
| 139 |
+
"""
|
| 140 |
+
Get conversation history as a formatted string.
|
| 141 |
+
|
| 142 |
+
Useful for including in prompts.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
conversation_id: Conversation identifier
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
Formatted string of conversation history
|
| 149 |
+
"""
|
| 150 |
+
messages = self.get_messages(conversation_id)
|
| 151 |
+
|
| 152 |
+
if not messages:
|
| 153 |
+
return "No previous conversation."
|
| 154 |
+
|
| 155 |
+
lines = []
|
| 156 |
+
for msg in messages:
|
| 157 |
+
role = "User" if msg.role == "user" else "Assistant"
|
| 158 |
+
lines.append(f"{role}: {msg.content}")
|
| 159 |
+
|
| 160 |
+
return "\n".join(lines)
|
| 161 |
+
|
| 162 |
+
def get_messages_for_llm(self, conversation_id: str) -> list[dict]:
|
| 163 |
+
"""
|
| 164 |
+
Get messages formatted for LLM consumption.
|
| 165 |
+
|
| 166 |
+
Returns messages in the format expected by chat models:
|
| 167 |
+
[{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
conversation_id: Conversation identifier
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
List of message dictionaries
|
| 174 |
+
"""
|
| 175 |
+
messages = self.get_messages(conversation_id)
|
| 176 |
+
return [{"role": msg.role, "content": msg.content} for msg in messages]
|
| 177 |
+
|
| 178 |
+
def clear_conversation(self, conversation_id: str) -> None:
|
| 179 |
+
"""
|
| 180 |
+
Clear all memory for a conversation.
|
| 181 |
+
|
| 182 |
+
Args:
|
| 183 |
+
conversation_id: Conversation to clear
|
| 184 |
+
"""
|
| 185 |
+
if conversation_id in self._conversations:
|
| 186 |
+
del self._conversations[conversation_id]
|
| 187 |
+
logger.info(f"Cleared memory for conversation: {conversation_id}")
|
| 188 |
+
|
| 189 |
+
def generate_conversation_id(self) -> str:
|
| 190 |
+
"""
|
| 191 |
+
Generate a new unique conversation ID.
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
UUID string for new conversation
|
| 195 |
+
"""
|
| 196 |
+
return str(uuid.uuid4())
|
| 197 |
+
|
| 198 |
+
@property
|
| 199 |
+
def active_conversations(self) -> int:
|
| 200 |
+
"""Get count of active conversations in memory."""
|
| 201 |
+
return len(self._conversations)
|
app/schemas/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic Schemas
|
| 3 |
+
================
|
| 4 |
+
|
| 5 |
+
Data models for request/response validation and internal data structures.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from app.schemas.models import (
|
| 9 |
+
QueryRequest,
|
| 10 |
+
QueryResponse,
|
| 11 |
+
DocumentInfo,
|
| 12 |
+
AgentType,
|
| 13 |
+
AgentResponse,
|
| 14 |
+
RetrievedDocument,
|
| 15 |
+
ConversationMessage,
|
| 16 |
+
IngestionRequest,
|
| 17 |
+
IngestionResponse,
|
| 18 |
+
HealthResponse,
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
__all__ = [
|
| 22 |
+
"QueryRequest",
|
| 23 |
+
"QueryResponse",
|
| 24 |
+
"DocumentInfo",
|
| 25 |
+
"AgentType",
|
| 26 |
+
"AgentResponse",
|
| 27 |
+
"RetrievedDocument",
|
| 28 |
+
"ConversationMessage",
|
| 29 |
+
"IngestionRequest",
|
| 30 |
+
"IngestionResponse",
|
| 31 |
+
"HealthResponse",
|
| 32 |
+
]
|
app/schemas/models.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data Models
|
| 3 |
+
===========
|
| 4 |
+
|
| 5 |
+
This module defines all Pydantic models used throughout the application.
|
| 6 |
+
Models provide:
|
| 7 |
+
- Request/response validation for API endpoints
|
| 8 |
+
- Type safety for internal data flow
|
| 9 |
+
- Automatic API documentation via OpenAPI
|
| 10 |
+
|
| 11 |
+
WHY Pydantic?
|
| 12 |
+
- Runtime type validation (catches errors early)
|
| 13 |
+
- Automatic JSON serialization
|
| 14 |
+
- Integration with FastAPI for auto-docs
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
from enum import Enum
|
| 19 |
+
from typing import Optional
|
| 20 |
+
from pydantic import BaseModel, Field
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# =============================================================================
|
| 24 |
+
# Enums
|
| 25 |
+
# =============================================================================
|
| 26 |
+
|
| 27 |
+
class AgentType(str, Enum):
|
| 28 |
+
"""
|
| 29 |
+
Types of agents in the multi-agent system.
|
| 30 |
+
|
| 31 |
+
Each agent has a single responsibility:
|
| 32 |
+
- ROUTER: Classifies query intent and routes to appropriate agent
|
| 33 |
+
- RETRIEVER: Searches vector store for relevant documents
|
| 34 |
+
- REASONING: Generates grounded responses from context
|
| 35 |
+
- ACTION: Executes specific actions (e.g., create ticket, send email)
|
| 36 |
+
"""
|
| 37 |
+
ROUTER = "router"
|
| 38 |
+
RETRIEVER = "retriever"
|
| 39 |
+
REASONING = "reasoning"
|
| 40 |
+
ACTION = "action"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class ActionType(str, Enum):
|
| 44 |
+
"""
|
| 45 |
+
Supported actions the Action Agent can execute.
|
| 46 |
+
|
| 47 |
+
These represent real business operations in a support system.
|
| 48 |
+
"""
|
| 49 |
+
CREATE_TICKET = "create_ticket"
|
| 50 |
+
ESCALATE = "escalate"
|
| 51 |
+
SEND_EMAIL = "send_email"
|
| 52 |
+
SEARCH_KB = "search_knowledge_base"
|
| 53 |
+
NONE = "none" # No action needed, just informational response
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# =============================================================================
|
| 57 |
+
# Document Models
|
| 58 |
+
# =============================================================================
|
| 59 |
+
|
| 60 |
+
class DocumentInfo(BaseModel):
|
| 61 |
+
"""Metadata about a source document."""
|
| 62 |
+
filename: str = Field(..., description="Original filename")
|
| 63 |
+
file_type: str = Field(..., description="File extension (pdf, txt, etc.)")
|
| 64 |
+
chunk_count: int = Field(..., description="Number of chunks created")
|
| 65 |
+
ingested_at: datetime = Field(
|
| 66 |
+
default_factory=datetime.utcnow,
|
| 67 |
+
description="When document was processed"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class RetrievedDocument(BaseModel):
|
| 72 |
+
"""
|
| 73 |
+
A document chunk retrieved from the vector store.
|
| 74 |
+
|
| 75 |
+
Contains both the content and metadata for traceability.
|
| 76 |
+
This allows users to verify the source of information.
|
| 77 |
+
"""
|
| 78 |
+
content: str = Field(..., description="The text content of the chunk")
|
| 79 |
+
source: str = Field(..., description="Source file path")
|
| 80 |
+
relevance_score: float = Field(
|
| 81 |
+
...,
|
| 82 |
+
ge=0.0,
|
| 83 |
+
le=1.0,
|
| 84 |
+
description="Similarity score (1.0 = perfect match)"
|
| 85 |
+
)
|
| 86 |
+
chunk_index: Optional[int] = Field(
|
| 87 |
+
default=None,
|
| 88 |
+
description="Position of chunk in original document"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# =============================================================================
|
| 93 |
+
# Conversation Models
|
| 94 |
+
# =============================================================================
|
| 95 |
+
|
| 96 |
+
class ConversationMessage(BaseModel):
|
| 97 |
+
"""
|
| 98 |
+
A single message in the conversation history.
|
| 99 |
+
|
| 100 |
+
Used by memory module to maintain context across turns.
|
| 101 |
+
"""
|
| 102 |
+
role: str = Field(..., description="'user' or 'assistant'")
|
| 103 |
+
content: str = Field(..., description="Message text")
|
| 104 |
+
timestamp: datetime = Field(
|
| 105 |
+
default_factory=datetime.utcnow,
|
| 106 |
+
description="When message was sent"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# =============================================================================
|
| 111 |
+
# Agent Response Models
|
| 112 |
+
# =============================================================================
|
| 113 |
+
|
| 114 |
+
class AgentResponse(BaseModel):
|
| 115 |
+
"""
|
| 116 |
+
Response from an individual agent.
|
| 117 |
+
|
| 118 |
+
Each agent returns this structure for consistent handling.
|
| 119 |
+
"""
|
| 120 |
+
agent_type: AgentType = Field(..., description="Which agent responded")
|
| 121 |
+
output: str = Field(..., description="Agent's output text")
|
| 122 |
+
confidence: float = Field(
|
| 123 |
+
default=1.0,
|
| 124 |
+
ge=0.0,
|
| 125 |
+
le=1.0,
|
| 126 |
+
description="Agent's confidence in response"
|
| 127 |
+
)
|
| 128 |
+
metadata: dict = Field(
|
| 129 |
+
default_factory=dict,
|
| 130 |
+
description="Additional agent-specific data"
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# =============================================================================
|
| 135 |
+
# API Request/Response Models
|
| 136 |
+
# =============================================================================
|
| 137 |
+
|
| 138 |
+
class QueryRequest(BaseModel):
|
| 139 |
+
"""
|
| 140 |
+
User query request to the RAG system.
|
| 141 |
+
|
| 142 |
+
Example:
|
| 143 |
+
{
|
| 144 |
+
"query": "How do I reset my password?",
|
| 145 |
+
"conversation_id": "abc123",
|
| 146 |
+
"include_sources": true
|
| 147 |
+
}
|
| 148 |
+
"""
|
| 149 |
+
query: str = Field(
|
| 150 |
+
...,
|
| 151 |
+
min_length=1,
|
| 152 |
+
max_length=2000,
|
| 153 |
+
description="The user's question or request"
|
| 154 |
+
)
|
| 155 |
+
conversation_id: Optional[str] = Field(
|
| 156 |
+
default=None,
|
| 157 |
+
description="ID for conversation continuity"
|
| 158 |
+
)
|
| 159 |
+
include_sources: bool = Field(
|
| 160 |
+
default=True,
|
| 161 |
+
description="Whether to return source documents"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
class Config:
|
| 165 |
+
json_schema_extra = {
|
| 166 |
+
"example": {
|
| 167 |
+
"query": "How do I reset my password?",
|
| 168 |
+
"conversation_id": "user-session-123",
|
| 169 |
+
"include_sources": True
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
class QueryResponse(BaseModel):
|
| 175 |
+
"""
|
| 176 |
+
Response from the RAG system.
|
| 177 |
+
|
| 178 |
+
Contains the answer and optionally the source documents
|
| 179 |
+
that were used to generate it.
|
| 180 |
+
"""
|
| 181 |
+
answer: str = Field(..., description="The generated answer")
|
| 182 |
+
sources: list[RetrievedDocument] = Field(
|
| 183 |
+
default_factory=list,
|
| 184 |
+
description="Documents used to generate answer"
|
| 185 |
+
)
|
| 186 |
+
conversation_id: str = Field(..., description="Conversation identifier")
|
| 187 |
+
agent_trace: list[str] = Field(
|
| 188 |
+
default_factory=list,
|
| 189 |
+
description="Sequence of agents that processed the query"
|
| 190 |
+
)
|
| 191 |
+
action_taken: Optional[ActionType] = Field(
|
| 192 |
+
default=None,
|
| 193 |
+
description="Action executed, if any"
|
| 194 |
+
)
|
| 195 |
+
processing_time_ms: float = Field(
|
| 196 |
+
...,
|
| 197 |
+
description="Total processing time in milliseconds"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
class Config:
|
| 201 |
+
json_schema_extra = {
|
| 202 |
+
"example": {
|
| 203 |
+
"answer": "To reset your password, go to Settings > Security > Reset Password...",
|
| 204 |
+
"sources": [
|
| 205 |
+
{
|
| 206 |
+
"content": "Password reset instructions...",
|
| 207 |
+
"source": "docs/security.pdf",
|
| 208 |
+
"relevance_score": 0.92
|
| 209 |
+
}
|
| 210 |
+
],
|
| 211 |
+
"conversation_id": "user-session-123",
|
| 212 |
+
"agent_trace": ["router", "retriever", "reasoning"],
|
| 213 |
+
"action_taken": None,
|
| 214 |
+
"processing_time_ms": 1250.5
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# =============================================================================
|
| 220 |
+
# Document Ingestion Models
|
| 221 |
+
# =============================================================================
|
| 222 |
+
|
| 223 |
+
class IngestionRequest(BaseModel):
|
| 224 |
+
"""Request to ingest documents into the vector store."""
|
| 225 |
+
file_paths: list[str] = Field(
|
| 226 |
+
default_factory=list,
|
| 227 |
+
description="Specific files to ingest (empty = all in documents_path)"
|
| 228 |
+
)
|
| 229 |
+
force_reindex: bool = Field(
|
| 230 |
+
default=False,
|
| 231 |
+
description="Re-index even if already processed"
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
class IngestionResponse(BaseModel):
|
| 236 |
+
"""Response after document ingestion."""
|
| 237 |
+
documents_processed: int = Field(..., description="Number of files processed")
|
| 238 |
+
chunks_created: int = Field(..., description="Total chunks created")
|
| 239 |
+
documents: list[DocumentInfo] = Field(
|
| 240 |
+
default_factory=list,
|
| 241 |
+
description="Details of each processed document"
|
| 242 |
+
)
|
| 243 |
+
errors: list[str] = Field(
|
| 244 |
+
default_factory=list,
|
| 245 |
+
description="Any errors encountered"
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# =============================================================================
|
| 250 |
+
# Health Check Model
|
| 251 |
+
# =============================================================================
|
| 252 |
+
|
| 253 |
+
class HealthResponse(BaseModel):
|
| 254 |
+
"""API health check response."""
|
| 255 |
+
status: str = Field(..., description="'healthy' or 'unhealthy'")
|
| 256 |
+
version: str = Field(..., description="API version")
|
| 257 |
+
vector_store_ready: bool = Field(
|
| 258 |
+
...,
|
| 259 |
+
description="Whether vector store is initialized"
|
| 260 |
+
)
|
| 261 |
+
document_count: int = Field(
|
| 262 |
+
...,
|
| 263 |
+
description="Number of documents in vector store"
|
| 264 |
+
)
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Services Module
|
| 3 |
+
===============
|
| 4 |
+
|
| 5 |
+
Business logic services that orchestrate agents and handle document processing.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from app.services.orchestrator import MultiAgentOrchestrator
|
| 9 |
+
from app.services.document_service import DocumentService
|
| 10 |
+
|
| 11 |
+
__all__ = ["MultiAgentOrchestrator", "DocumentService"]
|
app/services/document_service.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document Service
|
| 3 |
+
================
|
| 4 |
+
|
| 5 |
+
Service for ingesting and managing documents in the knowledge base.
|
| 6 |
+
|
| 7 |
+
This service handles:
|
| 8 |
+
- Loading documents from various file formats
|
| 9 |
+
- Splitting documents into chunks
|
| 10 |
+
- Indexing in the vector store
|
| 11 |
+
- Tracking document metadata
|
| 12 |
+
|
| 13 |
+
WHY A SEPARATE SERVICE?
|
| 14 |
+
- Separates ingestion from query processing
|
| 15 |
+
- Can be run as a batch job
|
| 16 |
+
- Easy to add new document types
|
| 17 |
+
- Provides clear API for document management
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import logging
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
from typing import Optional
|
| 23 |
+
from datetime import datetime
|
| 24 |
+
|
| 25 |
+
from langchain_core.documents import Document
|
| 26 |
+
from langchain_community.document_loaders import (
|
| 27 |
+
TextLoader,
|
| 28 |
+
PyPDFLoader,
|
| 29 |
+
UnstructuredWordDocumentLoader,
|
| 30 |
+
DirectoryLoader,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 34 |
+
from app.schemas.models import (
|
| 35 |
+
IngestionRequest,
|
| 36 |
+
IngestionResponse,
|
| 37 |
+
DocumentInfo,
|
| 38 |
+
)
|
| 39 |
+
from app.config import get_settings
|
| 40 |
+
|
| 41 |
+
logger = logging.getLogger(__name__)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class DocumentService:
|
| 45 |
+
"""
|
| 46 |
+
Service for document ingestion and management.
|
| 47 |
+
|
| 48 |
+
Handles loading documents from files, chunking them,
|
| 49 |
+
and storing in the vector database.
|
| 50 |
+
|
| 51 |
+
Usage:
|
| 52 |
+
service = DocumentService()
|
| 53 |
+
result = await service.ingest_directory("/path/to/docs")
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
# Supported file extensions and their loaders
|
| 57 |
+
SUPPORTED_EXTENSIONS = {
|
| 58 |
+
".txt": TextLoader,
|
| 59 |
+
".md": TextLoader,
|
| 60 |
+
".pdf": PyPDFLoader,
|
| 61 |
+
".docx": UnstructuredWordDocumentLoader,
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
def __init__(self, vector_store: Optional[FAISSVectorStore] = None):
|
| 65 |
+
"""
|
| 66 |
+
Initialize the document service.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
vector_store: FAISS store instance (uses singleton if not provided)
|
| 70 |
+
"""
|
| 71 |
+
self._vector_store = vector_store or FAISSVectorStore()
|
| 72 |
+
self._settings = get_settings()
|
| 73 |
+
|
| 74 |
+
async def ingest_documents(
|
| 75 |
+
self,
|
| 76 |
+
request: IngestionRequest
|
| 77 |
+
) -> IngestionResponse:
|
| 78 |
+
"""
|
| 79 |
+
Ingest documents based on request.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
request: IngestionRequest specifying what to ingest
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
IngestionResponse with results
|
| 86 |
+
"""
|
| 87 |
+
if request.file_paths:
|
| 88 |
+
# Ingest specific files
|
| 89 |
+
return await self._ingest_files(
|
| 90 |
+
request.file_paths,
|
| 91 |
+
request.force_reindex
|
| 92 |
+
)
|
| 93 |
+
else:
|
| 94 |
+
# Ingest all documents in the configured directory
|
| 95 |
+
return await self.ingest_directory(
|
| 96 |
+
str(self._settings.documents_path),
|
| 97 |
+
request.force_reindex
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
async def ingest_directory(
|
| 101 |
+
self,
|
| 102 |
+
directory_path: str,
|
| 103 |
+
force_reindex: bool = False
|
| 104 |
+
) -> IngestionResponse:
|
| 105 |
+
"""
|
| 106 |
+
Ingest all supported documents from a directory.
|
| 107 |
+
|
| 108 |
+
Recursively finds and indexes all supported file types.
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
directory_path: Path to directory
|
| 112 |
+
force_reindex: If True, clear existing index first
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
IngestionResponse with details
|
| 116 |
+
"""
|
| 117 |
+
path = Path(directory_path)
|
| 118 |
+
|
| 119 |
+
if not path.exists():
|
| 120 |
+
return IngestionResponse(
|
| 121 |
+
documents_processed=0,
|
| 122 |
+
chunks_created=0,
|
| 123 |
+
errors=[f"Directory not found: {directory_path}"],
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
if not path.is_dir():
|
| 127 |
+
return IngestionResponse(
|
| 128 |
+
documents_processed=0,
|
| 129 |
+
chunks_created=0,
|
| 130 |
+
errors=[f"Not a directory: {directory_path}"],
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Clear existing index if requested
|
| 134 |
+
if force_reindex:
|
| 135 |
+
logger.info("Force reindex requested - clearing existing index")
|
| 136 |
+
self._vector_store.delete_all()
|
| 137 |
+
|
| 138 |
+
# Find all supported files
|
| 139 |
+
all_files = []
|
| 140 |
+
for ext in self.SUPPORTED_EXTENSIONS:
|
| 141 |
+
all_files.extend(path.glob(f"**/*{ext}"))
|
| 142 |
+
|
| 143 |
+
if not all_files:
|
| 144 |
+
return IngestionResponse(
|
| 145 |
+
documents_processed=0,
|
| 146 |
+
chunks_created=0,
|
| 147 |
+
errors=[f"No supported files found in {directory_path}"],
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Ingest each file
|
| 151 |
+
return await self._ingest_files(
|
| 152 |
+
[str(f) for f in all_files],
|
| 153 |
+
force_reindex=False # Already handled above
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
async def _ingest_files(
|
| 157 |
+
self,
|
| 158 |
+
file_paths: list[str],
|
| 159 |
+
force_reindex: bool = False
|
| 160 |
+
) -> IngestionResponse:
|
| 161 |
+
"""
|
| 162 |
+
Ingest a list of specific files.
|
| 163 |
+
|
| 164 |
+
Args:
|
| 165 |
+
file_paths: List of file paths to ingest
|
| 166 |
+
force_reindex: Clear existing index first
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
IngestionResponse with details
|
| 170 |
+
"""
|
| 171 |
+
if force_reindex:
|
| 172 |
+
self._vector_store.delete_all()
|
| 173 |
+
|
| 174 |
+
documents_info = []
|
| 175 |
+
errors = []
|
| 176 |
+
total_chunks = 0
|
| 177 |
+
|
| 178 |
+
for file_path in file_paths:
|
| 179 |
+
try:
|
| 180 |
+
result = await self._ingest_single_file(file_path)
|
| 181 |
+
documents_info.append(result)
|
| 182 |
+
total_chunks += result.chunk_count
|
| 183 |
+
except Exception as e:
|
| 184 |
+
error_msg = f"Failed to process {file_path}: {str(e)}"
|
| 185 |
+
logger.error(error_msg)
|
| 186 |
+
errors.append(error_msg)
|
| 187 |
+
|
| 188 |
+
return IngestionResponse(
|
| 189 |
+
documents_processed=len(documents_info),
|
| 190 |
+
chunks_created=total_chunks,
|
| 191 |
+
documents=documents_info,
|
| 192 |
+
errors=errors,
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
async def _ingest_single_file(self, file_path: str) -> DocumentInfo:
|
| 196 |
+
"""
|
| 197 |
+
Ingest a single file into the vector store.
|
| 198 |
+
|
| 199 |
+
Args:
|
| 200 |
+
file_path: Path to the file
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
DocumentInfo about the processed file
|
| 204 |
+
|
| 205 |
+
Raises:
|
| 206 |
+
ValueError: If file type not supported
|
| 207 |
+
FileNotFoundError: If file doesn't exist
|
| 208 |
+
"""
|
| 209 |
+
path = Path(file_path)
|
| 210 |
+
|
| 211 |
+
if not path.exists():
|
| 212 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
| 213 |
+
|
| 214 |
+
extension = path.suffix.lower()
|
| 215 |
+
|
| 216 |
+
if extension not in self.SUPPORTED_EXTENSIONS:
|
| 217 |
+
raise ValueError(
|
| 218 |
+
f"Unsupported file type: {extension}. "
|
| 219 |
+
f"Supported: {list(self.SUPPORTED_EXTENSIONS.keys())}"
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Load the document
|
| 223 |
+
loader_class = self.SUPPORTED_EXTENSIONS[extension]
|
| 224 |
+
loader = loader_class(str(path))
|
| 225 |
+
documents = loader.load()
|
| 226 |
+
|
| 227 |
+
# Add metadata
|
| 228 |
+
for i, doc in enumerate(documents):
|
| 229 |
+
doc.metadata.update({
|
| 230 |
+
"source": str(path),
|
| 231 |
+
"file_name": path.name,
|
| 232 |
+
"file_type": extension,
|
| 233 |
+
"chunk_index": i,
|
| 234 |
+
"ingested_at": datetime.utcnow().isoformat(),
|
| 235 |
+
})
|
| 236 |
+
|
| 237 |
+
# Add to vector store (handles chunking)
|
| 238 |
+
chunks_created = self._vector_store.add_documents(documents)
|
| 239 |
+
|
| 240 |
+
logger.info(f"Ingested {path.name}: {chunks_created} chunks")
|
| 241 |
+
|
| 242 |
+
return DocumentInfo(
|
| 243 |
+
filename=path.name,
|
| 244 |
+
file_type=extension,
|
| 245 |
+
chunk_count=chunks_created,
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
async def ingest_text(
|
| 249 |
+
self,
|
| 250 |
+
text: str,
|
| 251 |
+
source_name: str = "direct_input",
|
| 252 |
+
metadata: Optional[dict] = None
|
| 253 |
+
) -> int:
|
| 254 |
+
"""
|
| 255 |
+
Ingest raw text directly.
|
| 256 |
+
|
| 257 |
+
Useful for adding content without creating files.
|
| 258 |
+
|
| 259 |
+
Args:
|
| 260 |
+
text: Text content to ingest
|
| 261 |
+
source_name: Name to use as source
|
| 262 |
+
metadata: Additional metadata
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
Number of chunks created
|
| 266 |
+
"""
|
| 267 |
+
doc = Document(
|
| 268 |
+
page_content=text,
|
| 269 |
+
metadata={
|
| 270 |
+
"source": source_name,
|
| 271 |
+
"file_type": "text",
|
| 272 |
+
"ingested_at": datetime.utcnow().isoformat(),
|
| 273 |
+
**(metadata or {}),
|
| 274 |
+
}
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
return self._vector_store.add_documents([doc])
|
| 278 |
+
|
| 279 |
+
def get_document_count(self) -> int:
|
| 280 |
+
"""Get the total number of document chunks in the store."""
|
| 281 |
+
return self._vector_store.document_count
|
| 282 |
+
|
| 283 |
+
def is_ready(self) -> bool:
|
| 284 |
+
"""Check if the document store has any documents."""
|
| 285 |
+
return self._vector_store.is_ready
|
| 286 |
+
|
| 287 |
+
def clear_all_documents(self) -> None:
|
| 288 |
+
"""
|
| 289 |
+
Clear all documents from the vector store.
|
| 290 |
+
|
| 291 |
+
WARNING: This is destructive and cannot be undone.
|
| 292 |
+
"""
|
| 293 |
+
self._vector_store.delete_all()
|
| 294 |
+
logger.info("All documents cleared from vector store")
|
app/services/orchestrator.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Multi-Agent Orchestrator
|
| 3 |
+
========================
|
| 4 |
+
|
| 5 |
+
The orchestrator is the "conductor" of the multi-agent system.
|
| 6 |
+
It coordinates the flow between agents and manages the overall pipeline.
|
| 7 |
+
|
| 8 |
+
PIPELINE FLOW:
|
| 9 |
+
|
| 10 |
+
┌─────────┐
|
| 11 |
+
│ Query │
|
| 12 |
+
└────┬────┘
|
| 13 |
+
│
|
| 14 |
+
┌────▼────┐
|
| 15 |
+
│ Router │ ──► Classifies query intent
|
| 16 |
+
│ Agent │
|
| 17 |
+
└────┬────┘
|
| 18 |
+
│
|
| 19 |
+
┌────▼──────────────┐
|
| 20 |
+
│ Needs Retrieval? │
|
| 21 |
+
└────┬──────────────┘
|
| 22 |
+
│Yes
|
| 23 |
+
┌────▼────┐
|
| 24 |
+
│Retriever│ ──► Searches vector store
|
| 25 |
+
│ Agent │
|
| 26 |
+
└────┬────┘
|
| 27 |
+
│
|
| 28 |
+
┌────▼────┐
|
| 29 |
+
│Reasoning│ ──► Generates grounded response
|
| 30 |
+
│ Agent │
|
| 31 |
+
└────┬────┘
|
| 32 |
+
│
|
| 33 |
+
┌────▼──────────┐
|
| 34 |
+
│ Needs Action? │
|
| 35 |
+
└────┬──────────┘
|
| 36 |
+
│Yes
|
| 37 |
+
┌────▼────┐
|
| 38 |
+
│ Action │ ──► Executes action (ticket, escalate)
|
| 39 |
+
│ Agent │
|
| 40 |
+
└────┬────┘
|
| 41 |
+
│
|
| 42 |
+
┌────▼────┐
|
| 43 |
+
│Response │
|
| 44 |
+
└─────────┘
|
| 45 |
+
|
| 46 |
+
WHY AN ORCHESTRATOR?
|
| 47 |
+
- Decouples agents from each other
|
| 48 |
+
- Easy to modify pipeline without changing agents
|
| 49 |
+
- Provides central logging and monitoring
|
| 50 |
+
- Handles error recovery at pipeline level
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
import logging
|
| 54 |
+
import time
|
| 55 |
+
from typing import Optional
|
| 56 |
+
import uuid
|
| 57 |
+
|
| 58 |
+
from app.agents.router_agent import RouterAgent
|
| 59 |
+
from app.agents.retriever_agent import RetrieverAgent
|
| 60 |
+
from app.agents.reasoning_agent import ReasoningAgent
|
| 61 |
+
from app.agents.action_agent import ActionAgent
|
| 62 |
+
from app.schemas.models import (
|
| 63 |
+
QueryRequest,
|
| 64 |
+
QueryResponse,
|
| 65 |
+
RetrievedDocument,
|
| 66 |
+
ActionType,
|
| 67 |
+
)
|
| 68 |
+
from app.memory.conversation_memory import ConversationMemoryManager
|
| 69 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 70 |
+
|
| 71 |
+
logger = logging.getLogger(__name__)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class MultiAgentOrchestrator:
|
| 75 |
+
"""
|
| 76 |
+
Orchestrates the multi-agent RAG pipeline.
|
| 77 |
+
|
| 78 |
+
This class:
|
| 79 |
+
1. Receives user queries
|
| 80 |
+
2. Routes them through appropriate agents
|
| 81 |
+
3. Manages state between agents
|
| 82 |
+
4. Returns consolidated responses
|
| 83 |
+
|
| 84 |
+
Usage:
|
| 85 |
+
orchestrator = MultiAgentOrchestrator()
|
| 86 |
+
response = await orchestrator.process_query(
|
| 87 |
+
QueryRequest(query="How do I reset my password?")
|
| 88 |
+
)
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
def __init__(
|
| 92 |
+
self,
|
| 93 |
+
vector_store: Optional[FAISSVectorStore] = None,
|
| 94 |
+
memory_manager: Optional[ConversationMemoryManager] = None,
|
| 95 |
+
):
|
| 96 |
+
"""
|
| 97 |
+
Initialize the orchestrator with all agents.
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
vector_store: FAISS store (uses singleton if not provided)
|
| 101 |
+
memory_manager: Conversation memory (creates new if not provided)
|
| 102 |
+
"""
|
| 103 |
+
# Shared dependencies
|
| 104 |
+
self._vector_store = vector_store or FAISSVectorStore()
|
| 105 |
+
self._memory_manager = memory_manager or ConversationMemoryManager()
|
| 106 |
+
|
| 107 |
+
# Initialize all agents
|
| 108 |
+
self._router = RouterAgent()
|
| 109 |
+
self._retriever = RetrieverAgent(vector_store=self._vector_store)
|
| 110 |
+
self._reasoning = ReasoningAgent(memory_manager=self._memory_manager)
|
| 111 |
+
self._action = ActionAgent()
|
| 112 |
+
|
| 113 |
+
logger.info("Multi-agent orchestrator initialized")
|
| 114 |
+
|
| 115 |
+
async def process_query(self, request: QueryRequest) -> QueryResponse:
|
| 116 |
+
"""
|
| 117 |
+
Process a user query through the multi-agent pipeline.
|
| 118 |
+
|
| 119 |
+
This is the main entry point for the RAG system.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
request: QueryRequest with user's question
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
QueryResponse with answer, sources, and metadata
|
| 126 |
+
"""
|
| 127 |
+
start_time = time.time()
|
| 128 |
+
|
| 129 |
+
# Generate or use existing conversation ID
|
| 130 |
+
conversation_id = request.conversation_id or self._memory_manager.generate_conversation_id()
|
| 131 |
+
|
| 132 |
+
# Track which agents process this query
|
| 133 |
+
agent_trace = []
|
| 134 |
+
|
| 135 |
+
logger.info(f"Processing query: {request.query[:100]}...")
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
# Step 1: Route the query
|
| 139 |
+
routing_response = await self._router.safe_execute({
|
| 140 |
+
"query": request.query,
|
| 141 |
+
"context": self._memory_manager.get_context_string(conversation_id),
|
| 142 |
+
})
|
| 143 |
+
agent_trace.append("router")
|
| 144 |
+
|
| 145 |
+
routing_meta = routing_response.metadata
|
| 146 |
+
needs_retrieval = routing_meta.get("needs_retrieval", True)
|
| 147 |
+
needs_action = routing_meta.get("needs_action", False)
|
| 148 |
+
suggested_action = routing_meta.get("suggested_action", "none")
|
| 149 |
+
|
| 150 |
+
logger.info(
|
| 151 |
+
f"Routing: retrieval={needs_retrieval}, action={needs_action}"
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Step 2: Retrieve documents if needed
|
| 155 |
+
context = ""
|
| 156 |
+
retrieved_docs = []
|
| 157 |
+
|
| 158 |
+
if needs_retrieval:
|
| 159 |
+
retrieval_response = await self._retriever.safe_execute({
|
| 160 |
+
"query": request.query,
|
| 161 |
+
})
|
| 162 |
+
agent_trace.append("retriever")
|
| 163 |
+
|
| 164 |
+
context = retrieval_response.output
|
| 165 |
+
retrieved_docs = retrieval_response.metadata.get("documents", [])
|
| 166 |
+
|
| 167 |
+
# Step 3: Generate reasoning response
|
| 168 |
+
reasoning_response = await self._reasoning.safe_execute({
|
| 169 |
+
"query": request.query,
|
| 170 |
+
"context": context or "No context available.",
|
| 171 |
+
"conversation_id": conversation_id,
|
| 172 |
+
})
|
| 173 |
+
agent_trace.append("reasoning")
|
| 174 |
+
|
| 175 |
+
answer = reasoning_response.output
|
| 176 |
+
|
| 177 |
+
# Step 4: Execute action if needed
|
| 178 |
+
action_taken = None
|
| 179 |
+
|
| 180 |
+
if needs_action:
|
| 181 |
+
action_response = await self._action.safe_execute({
|
| 182 |
+
"query": request.query,
|
| 183 |
+
"context": context,
|
| 184 |
+
"action_type": suggested_action,
|
| 185 |
+
})
|
| 186 |
+
agent_trace.append("action")
|
| 187 |
+
|
| 188 |
+
# Append action result to answer
|
| 189 |
+
if action_response.metadata.get("action_taken"):
|
| 190 |
+
answer += f"\n\n---\n**Action Taken:**\n{action_response.output}"
|
| 191 |
+
action_taken = ActionType(
|
| 192 |
+
action_response.metadata.get("action_type", "none")
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Calculate processing time
|
| 196 |
+
processing_time_ms = (time.time() - start_time) * 1000
|
| 197 |
+
|
| 198 |
+
# Build response
|
| 199 |
+
sources = []
|
| 200 |
+
if request.include_sources and retrieved_docs:
|
| 201 |
+
for doc_dict in retrieved_docs:
|
| 202 |
+
sources.append(RetrievedDocument(**doc_dict))
|
| 203 |
+
|
| 204 |
+
logger.info(
|
| 205 |
+
f"Query processed in {processing_time_ms:.2f}ms, "
|
| 206 |
+
f"agents: {' -> '.join(agent_trace)}"
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
return QueryResponse(
|
| 210 |
+
answer=answer,
|
| 211 |
+
sources=sources,
|
| 212 |
+
conversation_id=conversation_id,
|
| 213 |
+
agent_trace=agent_trace,
|
| 214 |
+
action_taken=action_taken,
|
| 215 |
+
processing_time_ms=processing_time_ms,
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
logger.error(f"Pipeline error: {e}", exc_info=True)
|
| 220 |
+
|
| 221 |
+
processing_time_ms = (time.time() - start_time) * 1000
|
| 222 |
+
|
| 223 |
+
return QueryResponse(
|
| 224 |
+
answer=(
|
| 225 |
+
"I apologize, but I encountered an error processing your request. "
|
| 226 |
+
"Please try again or contact support if the issue persists."
|
| 227 |
+
),
|
| 228 |
+
sources=[],
|
| 229 |
+
conversation_id=conversation_id,
|
| 230 |
+
agent_trace=agent_trace,
|
| 231 |
+
action_taken=None,
|
| 232 |
+
processing_time_ms=processing_time_ms,
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
async def process_simple_query(self, query: str) -> str:
|
| 236 |
+
"""
|
| 237 |
+
Simple interface for quick queries.
|
| 238 |
+
|
| 239 |
+
Skips the full pipeline and just does retrieval + reasoning.
|
| 240 |
+
|
| 241 |
+
Args:
|
| 242 |
+
query: User's question
|
| 243 |
+
|
| 244 |
+
Returns:
|
| 245 |
+
Answer string
|
| 246 |
+
"""
|
| 247 |
+
# Retrieve context
|
| 248 |
+
if self._vector_store.is_ready:
|
| 249 |
+
retrieval_response = await self._retriever.safe_execute({
|
| 250 |
+
"query": query,
|
| 251 |
+
})
|
| 252 |
+
context = retrieval_response.output
|
| 253 |
+
else:
|
| 254 |
+
context = "No documents in knowledge base."
|
| 255 |
+
|
| 256 |
+
# Generate response
|
| 257 |
+
reasoning_response = await self._reasoning.safe_execute({
|
| 258 |
+
"query": query,
|
| 259 |
+
"context": context,
|
| 260 |
+
})
|
| 261 |
+
|
| 262 |
+
return reasoning_response.output
|
| 263 |
+
|
| 264 |
+
@property
|
| 265 |
+
def is_ready(self) -> bool:
|
| 266 |
+
"""Check if the orchestrator is ready to process queries."""
|
| 267 |
+
return self._vector_store.is_ready
|
| 268 |
+
|
| 269 |
+
@property
|
| 270 |
+
def document_count(self) -> int:
|
| 271 |
+
"""Get number of documents in the knowledge base."""
|
| 272 |
+
return self._vector_store.document_count
|
app/tools/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tools Module
|
| 3 |
+
============
|
| 4 |
+
|
| 5 |
+
LangChain tools for use by agents.
|
| 6 |
+
|
| 7 |
+
Tools extend agent capabilities with specific actions.
|
| 8 |
+
Each tool is a function the agent can call.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from app.tools.search_tool import create_search_tool
|
| 12 |
+
from app.tools.document_tool import create_document_loader_tool
|
| 13 |
+
from app.tools.action_tools import (
|
| 14 |
+
create_ticket_tool,
|
| 15 |
+
create_escalation_tool,
|
| 16 |
+
get_all_action_tools,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
__all__ = [
|
| 20 |
+
"create_search_tool",
|
| 21 |
+
"create_document_loader_tool",
|
| 22 |
+
"create_ticket_tool",
|
| 23 |
+
"create_escalation_tool",
|
| 24 |
+
"get_all_action_tools",
|
| 25 |
+
]
|
app/tools/action_tools.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Action Tools
|
| 3 |
+
============
|
| 4 |
+
|
| 5 |
+
LangChain tools for executing actions like creating tickets,
|
| 6 |
+
escalating issues, and sending notifications.
|
| 7 |
+
|
| 8 |
+
These tools wrap the action logic to make it available to agents.
|
| 9 |
+
In production, they would integrate with real external systems.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import logging
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from typing import Optional
|
| 15 |
+
import uuid
|
| 16 |
+
|
| 17 |
+
from langchain.tools import Tool
|
| 18 |
+
from langchain_core.tools import StructuredTool
|
| 19 |
+
from pydantic import BaseModel, Field
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# =============================================================================
|
| 25 |
+
# Input Schemas
|
| 26 |
+
# =============================================================================
|
| 27 |
+
|
| 28 |
+
class TicketInput(BaseModel):
|
| 29 |
+
"""Input schema for ticket creation."""
|
| 30 |
+
title: str = Field(description="Brief title for the support ticket")
|
| 31 |
+
description: str = Field(description="Detailed description of the issue")
|
| 32 |
+
priority: str = Field(
|
| 33 |
+
default="medium",
|
| 34 |
+
description="Priority level: low, medium, high, urgent"
|
| 35 |
+
)
|
| 36 |
+
customer_id: Optional[str] = Field(
|
| 37 |
+
default=None,
|
| 38 |
+
description="Customer ID if known"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class EscalationInput(BaseModel):
|
| 43 |
+
"""Input schema for escalation."""
|
| 44 |
+
reason: str = Field(description="Reason for escalation to human agent")
|
| 45 |
+
priority: str = Field(
|
| 46 |
+
default="medium",
|
| 47 |
+
description="Priority level: low, medium, high, urgent"
|
| 48 |
+
)
|
| 49 |
+
context: Optional[str] = Field(
|
| 50 |
+
default=None,
|
| 51 |
+
description="Relevant context for the human agent"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class EmailInput(BaseModel):
|
| 56 |
+
"""Input schema for email notifications."""
|
| 57 |
+
subject: str = Field(description="Email subject line")
|
| 58 |
+
body_summary: str = Field(description="Summary of email content")
|
| 59 |
+
recipient_type: str = Field(
|
| 60 |
+
default="customer",
|
| 61 |
+
description="Recipient type: customer, support_team, manager"
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# =============================================================================
|
| 66 |
+
# Tool Functions
|
| 67 |
+
# =============================================================================
|
| 68 |
+
|
| 69 |
+
def create_support_ticket(
|
| 70 |
+
title: str,
|
| 71 |
+
description: str,
|
| 72 |
+
priority: str = "medium",
|
| 73 |
+
customer_id: Optional[str] = None,
|
| 74 |
+
) -> str:
|
| 75 |
+
"""
|
| 76 |
+
Create a support ticket in the system.
|
| 77 |
+
|
| 78 |
+
In production, this would call a ticketing API (Zendesk, Jira, etc.).
|
| 79 |
+
For now, we simulate the creation.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
title: Ticket title
|
| 83 |
+
description: Detailed description
|
| 84 |
+
priority: low, medium, high, urgent
|
| 85 |
+
customer_id: Customer identifier
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
Confirmation message with ticket ID
|
| 89 |
+
"""
|
| 90 |
+
# Generate ticket ID
|
| 91 |
+
ticket_id = f"TKT-{uuid.uuid4().hex[:8].upper()}"
|
| 92 |
+
|
| 93 |
+
# Validate priority
|
| 94 |
+
valid_priorities = ["low", "medium", "high", "urgent"]
|
| 95 |
+
if priority.lower() not in valid_priorities:
|
| 96 |
+
priority = "medium"
|
| 97 |
+
|
| 98 |
+
# In production: API call to ticketing system
|
| 99 |
+
# ticketing_client.create(title=title, description=description, ...)
|
| 100 |
+
|
| 101 |
+
logger.info(f"Created ticket {ticket_id}: {title} (Priority: {priority})")
|
| 102 |
+
|
| 103 |
+
return (
|
| 104 |
+
f"Support ticket created successfully.\n"
|
| 105 |
+
f"Ticket ID: {ticket_id}\n"
|
| 106 |
+
f"Title: {title}\n"
|
| 107 |
+
f"Priority: {priority.capitalize()}\n"
|
| 108 |
+
f"Status: Open\n"
|
| 109 |
+
f"Our team will review and respond within the SLA for {priority} priority tickets."
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def escalate_to_human(
|
| 114 |
+
reason: str,
|
| 115 |
+
priority: str = "medium",
|
| 116 |
+
context: Optional[str] = None,
|
| 117 |
+
) -> str:
|
| 118 |
+
"""
|
| 119 |
+
Escalate the conversation to a human agent.
|
| 120 |
+
|
| 121 |
+
In production, this would:
|
| 122 |
+
- Add to a queue in the support platform
|
| 123 |
+
- Notify available agents
|
| 124 |
+
- Transfer the chat session
|
| 125 |
+
|
| 126 |
+
Args:
|
| 127 |
+
reason: Why escalation is needed
|
| 128 |
+
priority: Urgency level
|
| 129 |
+
context: Context to pass to human agent
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
Confirmation with escalation details
|
| 133 |
+
"""
|
| 134 |
+
escalation_id = f"ESC-{uuid.uuid4().hex[:8].upper()}"
|
| 135 |
+
|
| 136 |
+
# Estimated wait times by priority (in production, query actual queue)
|
| 137 |
+
wait_times = {
|
| 138 |
+
"low": "15-30 minutes",
|
| 139 |
+
"medium": "5-15 minutes",
|
| 140 |
+
"high": "2-5 minutes",
|
| 141 |
+
"urgent": "Under 2 minutes",
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
wait_time = wait_times.get(priority.lower(), "5-15 minutes")
|
| 145 |
+
|
| 146 |
+
logger.info(f"Created escalation {escalation_id}: {reason}")
|
| 147 |
+
|
| 148 |
+
return (
|
| 149 |
+
f"Escalation initiated successfully.\n"
|
| 150 |
+
f"Reference ID: {escalation_id}\n"
|
| 151 |
+
f"Reason: {reason}\n"
|
| 152 |
+
f"Priority: {priority.capitalize()}\n"
|
| 153 |
+
f"Estimated wait time: {wait_time}\n\n"
|
| 154 |
+
f"A human support agent will join this conversation shortly. "
|
| 155 |
+
f"Please stay on this chat."
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def send_email_notification(
|
| 160 |
+
subject: str,
|
| 161 |
+
body_summary: str,
|
| 162 |
+
recipient_type: str = "customer",
|
| 163 |
+
) -> str:
|
| 164 |
+
"""
|
| 165 |
+
Send an email notification.
|
| 166 |
+
|
| 167 |
+
In production, this would integrate with email services
|
| 168 |
+
like SendGrid, AWS SES, or similar.
|
| 169 |
+
|
| 170 |
+
Args:
|
| 171 |
+
subject: Email subject
|
| 172 |
+
body_summary: Summary of what the email contains
|
| 173 |
+
recipient_type: Who receives the email
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
Confirmation message
|
| 177 |
+
"""
|
| 178 |
+
email_id = f"EMAIL-{uuid.uuid4().hex[:8].upper()}"
|
| 179 |
+
|
| 180 |
+
# Map recipient types to descriptions
|
| 181 |
+
recipient_descriptions = {
|
| 182 |
+
"customer": "your registered email address",
|
| 183 |
+
"support_team": "the support team",
|
| 184 |
+
"manager": "the appropriate manager",
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
recipient_desc = recipient_descriptions.get(
|
| 188 |
+
recipient_type.lower(),
|
| 189 |
+
"the specified recipient"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
logger.info(f"Sent email {email_id}: {subject} to {recipient_type}")
|
| 193 |
+
|
| 194 |
+
return (
|
| 195 |
+
f"Email notification sent.\n"
|
| 196 |
+
f"Email ID: {email_id}\n"
|
| 197 |
+
f"Subject: {subject}\n"
|
| 198 |
+
f"Sent to: {recipient_desc}\n"
|
| 199 |
+
f"Please check the inbox (and spam folder) within the next few minutes."
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
# =============================================================================
|
| 204 |
+
# Tool Creation Functions
|
| 205 |
+
# =============================================================================
|
| 206 |
+
|
| 207 |
+
def create_ticket_tool() -> StructuredTool:
|
| 208 |
+
"""
|
| 209 |
+
Create the ticket creation tool.
|
| 210 |
+
|
| 211 |
+
Returns:
|
| 212 |
+
StructuredTool for creating support tickets
|
| 213 |
+
"""
|
| 214 |
+
return StructuredTool.from_function(
|
| 215 |
+
func=create_support_ticket,
|
| 216 |
+
name="create_support_ticket",
|
| 217 |
+
description=(
|
| 218 |
+
"Create a support ticket for issues that need follow-up. "
|
| 219 |
+
"Use when the issue can't be resolved immediately, "
|
| 220 |
+
"requires investigation, or needs to be tracked. "
|
| 221 |
+
"Provide a clear title and detailed description."
|
| 222 |
+
),
|
| 223 |
+
args_schema=TicketInput,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def create_escalation_tool() -> StructuredTool:
|
| 228 |
+
"""
|
| 229 |
+
Create the escalation tool.
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
StructuredTool for escalating to human agents
|
| 233 |
+
"""
|
| 234 |
+
return StructuredTool.from_function(
|
| 235 |
+
func=escalate_to_human,
|
| 236 |
+
name="escalate_to_human",
|
| 237 |
+
description=(
|
| 238 |
+
"Escalate the conversation to a human support agent. "
|
| 239 |
+
"Use when: the customer explicitly requests a human, "
|
| 240 |
+
"the issue is too complex for automated handling, "
|
| 241 |
+
"the customer is frustrated or upset, "
|
| 242 |
+
"or there's a sensitive matter requiring human judgment."
|
| 243 |
+
),
|
| 244 |
+
args_schema=EscalationInput,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
def create_email_tool() -> StructuredTool:
|
| 249 |
+
"""
|
| 250 |
+
Create the email notification tool.
|
| 251 |
+
|
| 252 |
+
Returns:
|
| 253 |
+
StructuredTool for sending emails
|
| 254 |
+
"""
|
| 255 |
+
return StructuredTool.from_function(
|
| 256 |
+
func=send_email_notification,
|
| 257 |
+
name="send_email",
|
| 258 |
+
description=(
|
| 259 |
+
"Send an email notification about the support interaction. "
|
| 260 |
+
"Use for: sending confirmation of actions taken, "
|
| 261 |
+
"providing written documentation of solutions, "
|
| 262 |
+
"or notifying relevant parties about issues."
|
| 263 |
+
),
|
| 264 |
+
args_schema=EmailInput,
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def get_all_action_tools() -> list[StructuredTool]:
|
| 269 |
+
"""
|
| 270 |
+
Get all action tools for agent use.
|
| 271 |
+
|
| 272 |
+
Returns:
|
| 273 |
+
List of all action tools
|
| 274 |
+
"""
|
| 275 |
+
return [
|
| 276 |
+
create_ticket_tool(),
|
| 277 |
+
create_escalation_tool(),
|
| 278 |
+
create_email_tool(),
|
| 279 |
+
]
|
app/tools/document_tool.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document Loader Tool
|
| 3 |
+
====================
|
| 4 |
+
|
| 5 |
+
Tool for loading and processing documents into the vector store.
|
| 6 |
+
|
| 7 |
+
This tool handles:
|
| 8 |
+
- Loading documents from files
|
| 9 |
+
- Splitting into chunks
|
| 10 |
+
- Adding to the vector store
|
| 11 |
+
|
| 12 |
+
SUPPORTED FORMATS:
|
| 13 |
+
- .txt: Plain text files
|
| 14 |
+
- .pdf: PDF documents
|
| 15 |
+
- .md: Markdown files
|
| 16 |
+
- .docx: Word documents
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import logging
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Optional
|
| 22 |
+
|
| 23 |
+
from langchain.tools import Tool
|
| 24 |
+
from langchain_core.tools import StructuredTool
|
| 25 |
+
from langchain_core.documents import Document
|
| 26 |
+
from langchain_community.document_loaders import (
|
| 27 |
+
TextLoader,
|
| 28 |
+
PyPDFLoader,
|
| 29 |
+
UnstructuredWordDocumentLoader,
|
| 30 |
+
)
|
| 31 |
+
from pydantic import BaseModel, Field
|
| 32 |
+
|
| 33 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 34 |
+
from app.config import get_settings
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class DocumentLoadInput(BaseModel):
|
| 40 |
+
"""Input for document loading tool."""
|
| 41 |
+
file_path: str = Field(description="Path to the document file to load")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def load_document(file_path: str) -> list[Document]:
|
| 45 |
+
"""
|
| 46 |
+
Load a document from file path.
|
| 47 |
+
|
| 48 |
+
Automatically selects the appropriate loader based on file extension.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
file_path: Path to the document
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
List of Document objects (may be multiple for PDFs)
|
| 55 |
+
|
| 56 |
+
Raises:
|
| 57 |
+
ValueError: If file type not supported
|
| 58 |
+
"""
|
| 59 |
+
path = Path(file_path)
|
| 60 |
+
|
| 61 |
+
if not path.exists():
|
| 62 |
+
raise FileNotFoundError(f"File not found: {file_path}")
|
| 63 |
+
|
| 64 |
+
extension = path.suffix.lower()
|
| 65 |
+
|
| 66 |
+
# Select loader based on extension
|
| 67 |
+
loaders = {
|
| 68 |
+
".txt": TextLoader,
|
| 69 |
+
".md": TextLoader,
|
| 70 |
+
".pdf": PyPDFLoader,
|
| 71 |
+
".docx": UnstructuredWordDocumentLoader,
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
loader_class = loaders.get(extension)
|
| 75 |
+
if loader_class is None:
|
| 76 |
+
raise ValueError(
|
| 77 |
+
f"Unsupported file type: {extension}. "
|
| 78 |
+
f"Supported: {list(loaders.keys())}"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# Load the document
|
| 82 |
+
loader = loader_class(str(path))
|
| 83 |
+
documents = loader.load()
|
| 84 |
+
|
| 85 |
+
# Add source metadata
|
| 86 |
+
for doc in documents:
|
| 87 |
+
doc.metadata["source"] = str(path)
|
| 88 |
+
doc.metadata["file_type"] = extension
|
| 89 |
+
|
| 90 |
+
logger.info(f"Loaded {len(documents)} documents from {file_path}")
|
| 91 |
+
|
| 92 |
+
return documents
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def create_document_loader_tool(
|
| 96 |
+
vector_store: FAISSVectorStore = None
|
| 97 |
+
) -> StructuredTool:
|
| 98 |
+
"""
|
| 99 |
+
Create a tool for loading documents into the vector store.
|
| 100 |
+
|
| 101 |
+
This tool is useful for agents that need to ingest new documents
|
| 102 |
+
into the knowledge base.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
vector_store: FAISS store instance
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
StructuredTool for document loading
|
| 109 |
+
"""
|
| 110 |
+
store = vector_store or FAISSVectorStore()
|
| 111 |
+
|
| 112 |
+
def load_and_index(file_path: str) -> str:
|
| 113 |
+
"""Load a document and add it to the vector store."""
|
| 114 |
+
try:
|
| 115 |
+
documents = load_document(file_path)
|
| 116 |
+
chunks_created = store.add_documents(documents)
|
| 117 |
+
return (
|
| 118 |
+
f"Successfully loaded and indexed document: {file_path}\n"
|
| 119 |
+
f"Created {chunks_created} searchable chunks."
|
| 120 |
+
)
|
| 121 |
+
except FileNotFoundError as e:
|
| 122 |
+
return f"Error: {str(e)}"
|
| 123 |
+
except ValueError as e:
|
| 124 |
+
return f"Error: {str(e)}"
|
| 125 |
+
except Exception as e:
|
| 126 |
+
logger.error(f"Failed to load document: {e}")
|
| 127 |
+
return f"Failed to load document: {str(e)}"
|
| 128 |
+
|
| 129 |
+
return StructuredTool.from_function(
|
| 130 |
+
func=load_and_index,
|
| 131 |
+
name="load_document",
|
| 132 |
+
description=(
|
| 133 |
+
"Load a document file and add it to the knowledge base. "
|
| 134 |
+
"Supports .txt, .md, .pdf, and .docx files. "
|
| 135 |
+
"The document will be automatically chunked and indexed for search."
|
| 136 |
+
),
|
| 137 |
+
args_schema=DocumentLoadInput,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def load_directory(
|
| 142 |
+
directory_path: str,
|
| 143 |
+
vector_store: FAISSVectorStore = None,
|
| 144 |
+
extensions: list[str] = None
|
| 145 |
+
) -> dict:
|
| 146 |
+
"""
|
| 147 |
+
Load all documents from a directory.
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
directory_path: Path to directory containing documents
|
| 151 |
+
vector_store: FAISS store instance
|
| 152 |
+
extensions: List of extensions to include (default: all supported)
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
Dictionary with loading results
|
| 156 |
+
"""
|
| 157 |
+
store = vector_store or FAISSVectorStore()
|
| 158 |
+
path = Path(directory_path)
|
| 159 |
+
|
| 160 |
+
if not path.exists():
|
| 161 |
+
raise FileNotFoundError(f"Directory not found: {directory_path}")
|
| 162 |
+
|
| 163 |
+
if not path.is_dir():
|
| 164 |
+
raise ValueError(f"Not a directory: {directory_path}")
|
| 165 |
+
|
| 166 |
+
# Default extensions
|
| 167 |
+
supported = extensions or [".txt", ".md", ".pdf", ".docx"]
|
| 168 |
+
|
| 169 |
+
results = {
|
| 170 |
+
"files_processed": 0,
|
| 171 |
+
"chunks_created": 0,
|
| 172 |
+
"errors": [],
|
| 173 |
+
"files": [],
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
# Find all matching files
|
| 177 |
+
for ext in supported:
|
| 178 |
+
for file_path in path.glob(f"**/*{ext}"):
|
| 179 |
+
try:
|
| 180 |
+
documents = load_document(str(file_path))
|
| 181 |
+
chunks = store.add_documents(documents)
|
| 182 |
+
results["files_processed"] += 1
|
| 183 |
+
results["chunks_created"] += chunks
|
| 184 |
+
results["files"].append({
|
| 185 |
+
"path": str(file_path),
|
| 186 |
+
"chunks": chunks,
|
| 187 |
+
})
|
| 188 |
+
except Exception as e:
|
| 189 |
+
results["errors"].append({
|
| 190 |
+
"path": str(file_path),
|
| 191 |
+
"error": str(e),
|
| 192 |
+
})
|
| 193 |
+
logger.error(f"Failed to load {file_path}: {e}")
|
| 194 |
+
|
| 195 |
+
logger.info(
|
| 196 |
+
f"Loaded {results['files_processed']} files, "
|
| 197 |
+
f"created {results['chunks_created']} chunks"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
return results
|
app/tools/search_tool.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search Tool
|
| 3 |
+
===========
|
| 4 |
+
|
| 5 |
+
LangChain tool for semantic search in the vector store.
|
| 6 |
+
|
| 7 |
+
This tool wraps the FAISS vector store to provide a clean interface
|
| 8 |
+
for agents to search documents.
|
| 9 |
+
|
| 10 |
+
WHY A TOOL?
|
| 11 |
+
- LangChain agents work with tools as their action primitives
|
| 12 |
+
- Tools have clear input/output schemas
|
| 13 |
+
- Makes the search capability composable
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from langchain.tools import Tool
|
| 17 |
+
from langchain_core.tools import StructuredTool
|
| 18 |
+
from pydantic import BaseModel, Field
|
| 19 |
+
|
| 20 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class SearchInput(BaseModel):
|
| 24 |
+
"""Input schema for the search tool."""
|
| 25 |
+
query: str = Field(description="The search query to find relevant documents")
|
| 26 |
+
num_results: int = Field(
|
| 27 |
+
default=5,
|
| 28 |
+
ge=1,
|
| 29 |
+
le=20,
|
| 30 |
+
description="Number of results to return"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def create_search_tool(vector_store: FAISSVectorStore = None) -> StructuredTool:
|
| 35 |
+
"""
|
| 36 |
+
Create a search tool for semantic document search.
|
| 37 |
+
|
| 38 |
+
The tool searches the FAISS vector store and returns
|
| 39 |
+
relevant document chunks with their sources.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
vector_store: FAISS store instance (uses singleton if not provided)
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
StructuredTool that can be used by LangChain agents
|
| 46 |
+
|
| 47 |
+
Example:
|
| 48 |
+
tool = create_search_tool()
|
| 49 |
+
result = tool.invoke({"query": "password reset", "num_results": 3})
|
| 50 |
+
"""
|
| 51 |
+
store = vector_store or FAISSVectorStore()
|
| 52 |
+
|
| 53 |
+
def search_documents(query: str, num_results: int = 5) -> str:
|
| 54 |
+
"""
|
| 55 |
+
Search for documents matching the query.
|
| 56 |
+
|
| 57 |
+
Returns formatted string of results for agent consumption.
|
| 58 |
+
"""
|
| 59 |
+
if not store.is_ready:
|
| 60 |
+
return "Error: No documents in knowledge base. Please ingest documents first."
|
| 61 |
+
|
| 62 |
+
try:
|
| 63 |
+
results = store.similarity_search(query, k=num_results)
|
| 64 |
+
except Exception as e:
|
| 65 |
+
return f"Search error: {str(e)}"
|
| 66 |
+
|
| 67 |
+
if not results:
|
| 68 |
+
return "No relevant documents found for the query."
|
| 69 |
+
|
| 70 |
+
# Format results for agent consumption
|
| 71 |
+
output_parts = [f"Found {len(results)} relevant documents:\n"]
|
| 72 |
+
|
| 73 |
+
for i, (doc, score) in enumerate(results, 1):
|
| 74 |
+
source = doc.metadata.get("source", "unknown")
|
| 75 |
+
content = doc.page_content[:500] # Limit length
|
| 76 |
+
if len(doc.page_content) > 500:
|
| 77 |
+
content += "..."
|
| 78 |
+
|
| 79 |
+
output_parts.append(
|
| 80 |
+
f"\n[Result {i}] (Relevance: {score:.2f}, Source: {source})\n"
|
| 81 |
+
f"{content}"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return "\n".join(output_parts)
|
| 85 |
+
|
| 86 |
+
return StructuredTool.from_function(
|
| 87 |
+
func=search_documents,
|
| 88 |
+
name="search_knowledge_base",
|
| 89 |
+
description=(
|
| 90 |
+
"Search the knowledge base for information relevant to the query. "
|
| 91 |
+
"Use this to find documentation, policies, procedures, and FAQs. "
|
| 92 |
+
"Returns relevant document excerpts with their sources."
|
| 93 |
+
),
|
| 94 |
+
args_schema=SearchInput,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def create_simple_search_tool(vector_store: FAISSVectorStore = None) -> Tool:
|
| 99 |
+
"""
|
| 100 |
+
Create a simple search tool with just a query string.
|
| 101 |
+
|
| 102 |
+
This is an alternative for agents that work better with
|
| 103 |
+
simple string inputs rather than structured inputs.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
vector_store: FAISS store instance
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Simple Tool with string input
|
| 110 |
+
"""
|
| 111 |
+
store = vector_store or FAISSVectorStore()
|
| 112 |
+
|
| 113 |
+
def search(query: str) -> str:
|
| 114 |
+
"""Search documents with a query string."""
|
| 115 |
+
if not store.is_ready:
|
| 116 |
+
return "No documents in knowledge base."
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
results = store.similarity_search(query, k=5)
|
| 120 |
+
if not results:
|
| 121 |
+
return "No relevant documents found."
|
| 122 |
+
|
| 123 |
+
output = []
|
| 124 |
+
for doc, score in results:
|
| 125 |
+
output.append(f"[{score:.2f}] {doc.page_content[:300]}...")
|
| 126 |
+
|
| 127 |
+
return "\n\n".join(output)
|
| 128 |
+
except Exception as e:
|
| 129 |
+
return f"Search error: {str(e)}"
|
| 130 |
+
|
| 131 |
+
return Tool(
|
| 132 |
+
name="search",
|
| 133 |
+
func=search,
|
| 134 |
+
description="Search the knowledge base for relevant information",
|
| 135 |
+
)
|
app/vectorstore/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vector Store Module
|
| 3 |
+
===================
|
| 4 |
+
|
| 5 |
+
Handles document embeddings and FAISS vector storage for semantic search.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from app.vectorstore.embeddings import EmbeddingManager
|
| 9 |
+
from app.vectorstore.faiss_store import FAISSVectorStore
|
| 10 |
+
|
| 11 |
+
__all__ = ["EmbeddingManager", "FAISSVectorStore"]
|
app/vectorstore/embeddings.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Embedding Manager
|
| 3 |
+
=================
|
| 4 |
+
|
| 5 |
+
This module handles the conversion of text to vector embeddings.
|
| 6 |
+
Supports both FREE (HuggingFace) and PAID (OpenAI) embeddings.
|
| 7 |
+
|
| 8 |
+
FREE OPTION: HuggingFace sentence-transformers
|
| 9 |
+
- Runs locally, no API costs
|
| 10 |
+
- Good quality embeddings
|
| 11 |
+
- Model: all-MiniLM-L6-v2 (384 dimensions, fast)
|
| 12 |
+
|
| 13 |
+
PAID OPTION: OpenAI
|
| 14 |
+
- Cloud-based
|
| 15 |
+
- Higher quality for some tasks
|
| 16 |
+
- Requires API key and costs money
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import logging
|
| 20 |
+
from typing import Optional
|
| 21 |
+
|
| 22 |
+
from langchain_core.embeddings import Embeddings
|
| 23 |
+
|
| 24 |
+
from app.config import get_settings
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def create_embeddings(provider: Optional[str] = None) -> Embeddings:
|
| 30 |
+
"""
|
| 31 |
+
Create embeddings instance based on provider.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
provider: Override provider from settings ("huggingface" or "openai")
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
LangChain Embeddings instance
|
| 38 |
+
"""
|
| 39 |
+
settings = get_settings()
|
| 40 |
+
provider = provider or settings.embedding_provider
|
| 41 |
+
|
| 42 |
+
logger.info(f"Creating embeddings with provider: {provider}")
|
| 43 |
+
|
| 44 |
+
if provider == "huggingface":
|
| 45 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 46 |
+
return HuggingFaceEmbeddings(
|
| 47 |
+
model_name=settings.huggingface_embedding_model,
|
| 48 |
+
model_kwargs={"device": "cpu"}, # Use "cuda" if GPU available
|
| 49 |
+
encode_kwargs={"normalize_embeddings": True},
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
elif provider == "openai":
|
| 53 |
+
from langchain_openai import OpenAIEmbeddings
|
| 54 |
+
return OpenAIEmbeddings(
|
| 55 |
+
model=settings.openai_embedding_model,
|
| 56 |
+
openai_api_key=settings.openai_api_key,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
else:
|
| 60 |
+
raise ValueError(f"Unsupported embedding provider: {provider}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class EmbeddingManager:
|
| 64 |
+
"""
|
| 65 |
+
Manages text embedding generation.
|
| 66 |
+
|
| 67 |
+
By default uses FREE HuggingFace embeddings that run locally.
|
| 68 |
+
Can be configured to use OpenAI for higher quality (paid).
|
| 69 |
+
|
| 70 |
+
Usage:
|
| 71 |
+
manager = EmbeddingManager()
|
| 72 |
+
embeddings = manager.get_embeddings()
|
| 73 |
+
vector = embeddings.embed_query("Hello world")
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
_instance: Optional["EmbeddingManager"] = None
|
| 77 |
+
_embeddings: Optional[Embeddings] = None
|
| 78 |
+
|
| 79 |
+
def __new__(cls) -> "EmbeddingManager":
|
| 80 |
+
"""Singleton pattern ensures we only create one embedding client."""
|
| 81 |
+
if cls._instance is None:
|
| 82 |
+
cls._instance = super().__new__(cls)
|
| 83 |
+
return cls._instance
|
| 84 |
+
|
| 85 |
+
def __init__(self) -> None:
|
| 86 |
+
"""Initialize the embedding manager with settings."""
|
| 87 |
+
if self._embeddings is None:
|
| 88 |
+
self._initialize_embeddings()
|
| 89 |
+
|
| 90 |
+
def _initialize_embeddings(self) -> None:
|
| 91 |
+
"""Create the embeddings client."""
|
| 92 |
+
settings = get_settings()
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
self._embeddings = create_embeddings(settings.embedding_provider)
|
| 96 |
+
logger.info(
|
| 97 |
+
f"Initialized embeddings with provider: {settings.embedding_provider}"
|
| 98 |
+
)
|
| 99 |
+
except Exception as e:
|
| 100 |
+
logger.error(f"Failed to initialize embeddings: {e}")
|
| 101 |
+
raise
|
| 102 |
+
|
| 103 |
+
def get_embeddings(self) -> Embeddings:
|
| 104 |
+
"""
|
| 105 |
+
Get the embeddings instance for use with vector stores.
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
Embeddings instance
|
| 109 |
+
|
| 110 |
+
Raises:
|
| 111 |
+
RuntimeError: If embeddings not initialized
|
| 112 |
+
"""
|
| 113 |
+
if self._embeddings is None:
|
| 114 |
+
raise RuntimeError("Embeddings not initialized")
|
| 115 |
+
return self._embeddings
|
| 116 |
+
|
| 117 |
+
def embed_text(self, text: str) -> list[float]:
|
| 118 |
+
"""
|
| 119 |
+
Embed a single text string.
|
| 120 |
+
|
| 121 |
+
Args:
|
| 122 |
+
text: Text to embed
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
List of floats representing the embedding vector
|
| 126 |
+
"""
|
| 127 |
+
embeddings = self.get_embeddings()
|
| 128 |
+
return embeddings.embed_query(text)
|
| 129 |
+
|
| 130 |
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
| 131 |
+
"""
|
| 132 |
+
Embed multiple documents in batch.
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
texts: List of texts to embed
|
| 136 |
+
|
| 137 |
+
Returns:
|
| 138 |
+
List of embedding vectors
|
| 139 |
+
"""
|
| 140 |
+
embeddings = self.get_embeddings()
|
| 141 |
+
return embeddings.embed_documents(texts)
|
| 142 |
+
|
| 143 |
+
@property
|
| 144 |
+
def dimension(self) -> int:
|
| 145 |
+
"""
|
| 146 |
+
Get the embedding dimension for the current model.
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
int: Number of dimensions in embedding vector
|
| 150 |
+
"""
|
| 151 |
+
settings = get_settings()
|
| 152 |
+
|
| 153 |
+
# Dimensions for common models
|
| 154 |
+
dimensions = {
|
| 155 |
+
# HuggingFace models
|
| 156 |
+
"sentence-transformers/all-MiniLM-L6-v2": 384,
|
| 157 |
+
"sentence-transformers/all-mpnet-base-v2": 768,
|
| 158 |
+
"BAAI/bge-small-en-v1.5": 384,
|
| 159 |
+
"BAAI/bge-base-en-v1.5": 768,
|
| 160 |
+
# OpenAI models
|
| 161 |
+
"text-embedding-3-small": 1536,
|
| 162 |
+
"text-embedding-3-large": 3072,
|
| 163 |
+
"text-embedding-ada-002": 1536,
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
model = (
|
| 167 |
+
settings.huggingface_embedding_model
|
| 168 |
+
if settings.embedding_provider == "huggingface"
|
| 169 |
+
else settings.openai_embedding_model
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
return dimensions.get(model, 384)
|
app/vectorstore/faiss_store.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FAISS Vector Store
|
| 3 |
+
==================
|
| 4 |
+
|
| 5 |
+
This module manages the FAISS vector database for semantic document search.
|
| 6 |
+
|
| 7 |
+
WHAT IS FAISS?
|
| 8 |
+
- Facebook AI Similarity Search
|
| 9 |
+
- Efficient library for similarity search in high-dimensional vectors
|
| 10 |
+
- Stores document embeddings and enables fast nearest-neighbor search
|
| 11 |
+
|
| 12 |
+
WHY FAISS?
|
| 13 |
+
- Fast: Optimized C++ with Python bindings
|
| 14 |
+
- Scalable: Handles millions of vectors
|
| 15 |
+
- Free: No external service needed (unlike Pinecone)
|
| 16 |
+
- Persistent: Can save/load index to disk
|
| 17 |
+
|
| 18 |
+
HOW IT WORKS:
|
| 19 |
+
1. Documents are split into chunks
|
| 20 |
+
2. Each chunk is embedded into a vector
|
| 21 |
+
3. Vectors are indexed in FAISS
|
| 22 |
+
4. Query is embedded and compared to all vectors
|
| 23 |
+
5. Most similar vectors (and their chunks) are returned
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import logging
|
| 27 |
+
from pathlib import Path
|
| 28 |
+
from typing import Optional
|
| 29 |
+
|
| 30 |
+
from langchain_community.vectorstores import FAISS
|
| 31 |
+
from langchain_core.documents import Document
|
| 32 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 33 |
+
|
| 34 |
+
from app.config import get_settings
|
| 35 |
+
from app.vectorstore.embeddings import EmbeddingManager
|
| 36 |
+
|
| 37 |
+
logger = logging.getLogger(__name__)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class FAISSVectorStore:
|
| 41 |
+
"""
|
| 42 |
+
Manages FAISS vector store for document retrieval.
|
| 43 |
+
|
| 44 |
+
This class provides:
|
| 45 |
+
- Document indexing with automatic chunking
|
| 46 |
+
- Semantic similarity search
|
| 47 |
+
- Persistent storage (save/load from disk)
|
| 48 |
+
- Singleton pattern for memory efficiency
|
| 49 |
+
|
| 50 |
+
Usage:
|
| 51 |
+
store = FAISSVectorStore()
|
| 52 |
+
store.add_documents([Document(page_content="...", metadata={...})])
|
| 53 |
+
results = store.similarity_search("query", k=5)
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
_instance: Optional["FAISSVectorStore"] = None
|
| 57 |
+
_store: Optional[FAISS] = None
|
| 58 |
+
_initialized: bool = False
|
| 59 |
+
|
| 60 |
+
def __new__(cls) -> "FAISSVectorStore":
|
| 61 |
+
"""Singleton pattern - one vector store instance."""
|
| 62 |
+
if cls._instance is None:
|
| 63 |
+
cls._instance = super().__new__(cls)
|
| 64 |
+
return cls._instance
|
| 65 |
+
|
| 66 |
+
def __init__(self) -> None:
|
| 67 |
+
"""Initialize the vector store."""
|
| 68 |
+
# Only initialize once
|
| 69 |
+
if not self._initialized:
|
| 70 |
+
self._settings = get_settings()
|
| 71 |
+
self._embedding_manager = EmbeddingManager()
|
| 72 |
+
self._text_splitter = self._create_text_splitter()
|
| 73 |
+
self._try_load_existing_index()
|
| 74 |
+
FAISSVectorStore._initialized = True
|
| 75 |
+
|
| 76 |
+
def _create_text_splitter(self) -> RecursiveCharacterTextSplitter:
|
| 77 |
+
"""
|
| 78 |
+
Create text splitter for chunking documents.
|
| 79 |
+
|
| 80 |
+
WHY RecursiveCharacterTextSplitter?
|
| 81 |
+
- Tries to split on natural boundaries (paragraphs, sentences)
|
| 82 |
+
- Falls back to characters if needed
|
| 83 |
+
- Maintains context within chunks
|
| 84 |
+
|
| 85 |
+
Chunk size of 1000 chars (~250 tokens) is a good balance:
|
| 86 |
+
- Small enough to be specific
|
| 87 |
+
- Large enough to maintain context
|
| 88 |
+
"""
|
| 89 |
+
return RecursiveCharacterTextSplitter(
|
| 90 |
+
chunk_size=self._settings.chunk_size,
|
| 91 |
+
chunk_overlap=self._settings.chunk_overlap,
|
| 92 |
+
length_function=len,
|
| 93 |
+
# Split hierarchy: paragraphs -> sentences -> words -> chars
|
| 94 |
+
separators=["\n\n", "\n", ". ", " ", ""],
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
def _try_load_existing_index(self) -> None:
|
| 98 |
+
"""
|
| 99 |
+
Try to load an existing FAISS index from disk.
|
| 100 |
+
|
| 101 |
+
If no index exists, the store remains uninitialized
|
| 102 |
+
until documents are added.
|
| 103 |
+
"""
|
| 104 |
+
index_path = self._settings.faiss_index_path
|
| 105 |
+
if (index_path / "index.faiss").exists():
|
| 106 |
+
try:
|
| 107 |
+
self._store = FAISS.load_local(
|
| 108 |
+
str(index_path),
|
| 109 |
+
self._embedding_manager.get_embeddings(),
|
| 110 |
+
allow_dangerous_deserialization=True,
|
| 111 |
+
)
|
| 112 |
+
logger.info(f"Loaded existing FAISS index from {index_path}")
|
| 113 |
+
except Exception as e:
|
| 114 |
+
logger.warning(f"Could not load existing index: {e}")
|
| 115 |
+
self._store = None
|
| 116 |
+
else:
|
| 117 |
+
logger.info("No existing FAISS index found. Ready for indexing.")
|
| 118 |
+
|
| 119 |
+
def add_documents(
|
| 120 |
+
self,
|
| 121 |
+
documents: list[Document],
|
| 122 |
+
chunk: bool = True
|
| 123 |
+
) -> int:
|
| 124 |
+
"""
|
| 125 |
+
Add documents to the vector store.
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
documents: List of LangChain Document objects
|
| 129 |
+
chunk: Whether to split documents into chunks (default: True)
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
Number of chunks created and indexed
|
| 133 |
+
|
| 134 |
+
Example:
|
| 135 |
+
docs = [Document(page_content="Long text...", metadata={"source": "file.pdf"})]
|
| 136 |
+
chunks_created = store.add_documents(docs)
|
| 137 |
+
"""
|
| 138 |
+
if not documents:
|
| 139 |
+
logger.warning("No documents provided to index")
|
| 140 |
+
return 0
|
| 141 |
+
|
| 142 |
+
# Split documents into chunks if requested
|
| 143 |
+
if chunk:
|
| 144 |
+
chunks = self._text_splitter.split_documents(documents)
|
| 145 |
+
logger.info(
|
| 146 |
+
f"Split {len(documents)} documents into {len(chunks)} chunks"
|
| 147 |
+
)
|
| 148 |
+
else:
|
| 149 |
+
chunks = documents
|
| 150 |
+
|
| 151 |
+
# Create or update the FAISS index
|
| 152 |
+
embeddings = self._embedding_manager.get_embeddings()
|
| 153 |
+
|
| 154 |
+
if self._store is None:
|
| 155 |
+
# Create new index
|
| 156 |
+
self._store = FAISS.from_documents(chunks, embeddings)
|
| 157 |
+
logger.info(f"Created new FAISS index with {len(chunks)} chunks")
|
| 158 |
+
else:
|
| 159 |
+
# Add to existing index
|
| 160 |
+
self._store.add_documents(chunks)
|
| 161 |
+
logger.info(f"Added {len(chunks)} chunks to existing index")
|
| 162 |
+
|
| 163 |
+
# Persist to disk
|
| 164 |
+
self._save_index()
|
| 165 |
+
|
| 166 |
+
return len(chunks)
|
| 167 |
+
|
| 168 |
+
def _save_index(self) -> None:
|
| 169 |
+
"""Save the FAISS index to disk for persistence."""
|
| 170 |
+
if self._store is None:
|
| 171 |
+
return
|
| 172 |
+
|
| 173 |
+
index_path = self._settings.faiss_index_path
|
| 174 |
+
index_path.mkdir(parents=True, exist_ok=True)
|
| 175 |
+
|
| 176 |
+
self._store.save_local(str(index_path))
|
| 177 |
+
logger.info(f"Saved FAISS index to {index_path}")
|
| 178 |
+
|
| 179 |
+
def similarity_search(
|
| 180 |
+
self,
|
| 181 |
+
query: str,
|
| 182 |
+
k: Optional[int] = None,
|
| 183 |
+
) -> list[tuple[Document, float]]:
|
| 184 |
+
"""
|
| 185 |
+
Search for documents similar to the query.
|
| 186 |
+
|
| 187 |
+
This is the core retrieval function used by the Retriever Agent.
|
| 188 |
+
|
| 189 |
+
Args:
|
| 190 |
+
query: The search query text
|
| 191 |
+
k: Number of results to return (default from settings)
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
List of (Document, score) tuples, sorted by relevance
|
| 195 |
+
Score is between 0 and 1, where 1 is most similar
|
| 196 |
+
|
| 197 |
+
Raises:
|
| 198 |
+
RuntimeError: If no documents have been indexed
|
| 199 |
+
|
| 200 |
+
Example:
|
| 201 |
+
results = store.similarity_search("password reset", k=3)
|
| 202 |
+
for doc, score in results:
|
| 203 |
+
print(f"Score: {score:.2f}, Content: {doc.page_content[:100]}")
|
| 204 |
+
"""
|
| 205 |
+
if self._store is None:
|
| 206 |
+
raise RuntimeError(
|
| 207 |
+
"No documents indexed. Please add documents first."
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
k = k or self._settings.retrieval_top_k
|
| 211 |
+
|
| 212 |
+
# FAISS returns (Document, score) tuples
|
| 213 |
+
# Score is L2 distance; we convert to similarity (0-1 range)
|
| 214 |
+
results = self._store.similarity_search_with_score(query, k=k)
|
| 215 |
+
|
| 216 |
+
# Convert L2 distance to similarity score
|
| 217 |
+
# L2 distance: 0 = identical, higher = less similar
|
| 218 |
+
# We normalize using: similarity = 1 / (1 + distance)
|
| 219 |
+
normalized_results = []
|
| 220 |
+
for doc, distance in results:
|
| 221 |
+
# Convert distance to similarity (0-1 range)
|
| 222 |
+
similarity = 1 / (1 + distance)
|
| 223 |
+
normalized_results.append((doc, similarity))
|
| 224 |
+
|
| 225 |
+
return normalized_results
|
| 226 |
+
|
| 227 |
+
def similarity_search_simple(
|
| 228 |
+
self,
|
| 229 |
+
query: str,
|
| 230 |
+
k: Optional[int] = None,
|
| 231 |
+
) -> list[Document]:
|
| 232 |
+
"""
|
| 233 |
+
Simple search that returns just documents (no scores).
|
| 234 |
+
|
| 235 |
+
Convenience method for when you just need the documents.
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
query: The search query
|
| 239 |
+
k: Number of results
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
List of Document objects
|
| 243 |
+
"""
|
| 244 |
+
results = self.similarity_search(query, k)
|
| 245 |
+
return [doc for doc, _ in results]
|
| 246 |
+
|
| 247 |
+
def delete_all(self) -> None:
|
| 248 |
+
"""
|
| 249 |
+
Delete all documents from the vector store.
|
| 250 |
+
|
| 251 |
+
WARNING: This is destructive and cannot be undone.
|
| 252 |
+
"""
|
| 253 |
+
self._store = None
|
| 254 |
+
# Remove saved index files
|
| 255 |
+
index_path = self._settings.faiss_index_path
|
| 256 |
+
if index_path.exists():
|
| 257 |
+
import shutil
|
| 258 |
+
shutil.rmtree(index_path)
|
| 259 |
+
index_path.mkdir(parents=True, exist_ok=True)
|
| 260 |
+
logger.info("Deleted all documents from vector store")
|
| 261 |
+
|
| 262 |
+
@property
|
| 263 |
+
def is_ready(self) -> bool:
|
| 264 |
+
"""Check if the vector store has documents indexed."""
|
| 265 |
+
return self._store is not None
|
| 266 |
+
|
| 267 |
+
@property
|
| 268 |
+
def document_count(self) -> int:
|
| 269 |
+
"""Get the number of document chunks in the store."""
|
| 270 |
+
if self._store is None:
|
| 271 |
+
return 0
|
| 272 |
+
# FAISS doesn't expose this directly, so we use the index
|
| 273 |
+
return self._store.index.ntotal
|
| 274 |
+
|
| 275 |
+
def as_retriever(self, **kwargs):
|
| 276 |
+
"""
|
| 277 |
+
Get a LangChain Retriever interface.
|
| 278 |
+
|
| 279 |
+
This allows the vector store to be used directly
|
| 280 |
+
in LangChain chains and agents.
|
| 281 |
+
|
| 282 |
+
Args:
|
| 283 |
+
**kwargs: Passed to FAISS.as_retriever()
|
| 284 |
+
|
| 285 |
+
Returns:
|
| 286 |
+
LangChain Retriever instance
|
| 287 |
+
"""
|
| 288 |
+
if self._store is None:
|
| 289 |
+
raise RuntimeError("No documents indexed")
|
| 290 |
+
|
| 291 |
+
return self._store.as_retriever(
|
| 292 |
+
search_kwargs={"k": self._settings.retrieval_top_k},
|
| 293 |
+
**kwargs
|
| 294 |
+
)
|
claude.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Project: Multi-Agent RAG System with LangChain
|
| 2 |
+
|
| 3 |
+
## Role
|
| 4 |
+
You are acting as a **Senior AI Engineer** building a production-grade multi-agent Retrieval-Augmented Generation (RAG) system.
|
| 5 |
+
|
| 6 |
+
## Core Skills You Must Use
|
| 7 |
+
- Agentic AI design
|
| 8 |
+
- LangChain agents, tools, and memory
|
| 9 |
+
- Retrieval-Augmented Generation (RAG)
|
| 10 |
+
- Vector databases (FAISS)
|
| 11 |
+
- Clean Python architecture
|
| 12 |
+
- FastAPI backend design
|
| 13 |
+
|
| 14 |
+
## Architectural Rules
|
| 15 |
+
1. Use a **multi-agent architecture**
|
| 16 |
+
- Router Agent: Routes queries to appropriate agents
|
| 17 |
+
- Retriever Agent: Handles document retrieval and vector search
|
| 18 |
+
- Reasoning Agent: Processes context and generates reasoning chains
|
| 19 |
+
- Action Agent: Executes actions based on reasoning
|
| 20 |
+
2. Each agent must have **single responsibility**
|
| 21 |
+
3. Retrieval must happen **before** generation
|
| 22 |
+
4. Answers MUST be grounded in retrieved context
|
| 23 |
+
5. No logic should be hard-coded into prompts
|
| 24 |
+
6. Code must be modular and extensible
|
| 25 |
+
|
| 26 |
+
## Non-Negotiables
|
| 27 |
+
- No monolithic files
|
| 28 |
+
- No hallucination-prone prompting
|
| 29 |
+
- No magic numbers without explanation
|
| 30 |
+
- Comment WHY, not just WHAT
|
| 31 |
+
|
| 32 |
+
## Style Guidelines
|
| 33 |
+
- Beginner-friendly explanations
|
| 34 |
+
- Production-quality code
|
| 35 |
+
- Explicit error handling
|
| 36 |
+
- Clear naming conventions
|
| 37 |
+
|
| 38 |
+
## Outcome Goal
|
| 39 |
+
Build a system suitable for a **Senior AI Engineer role** in a real SaaS company (e.g., GoDaddy-style customer support automation).
|
data/documents/account_settings.txt
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Account Settings Guide
|
| 2 |
+
======================
|
| 3 |
+
|
| 4 |
+
Profile Settings
|
| 5 |
+
----------------
|
| 6 |
+
|
| 7 |
+
Updating Your Profile
|
| 8 |
+
1. Navigate to Settings > Profile
|
| 9 |
+
2. Click "Edit Profile"
|
| 10 |
+
3. Update your information:
|
| 11 |
+
- Display name
|
| 12 |
+
- Profile picture (max 5MB, JPG/PNG format)
|
| 13 |
+
- Bio (up to 500 characters)
|
| 14 |
+
- Time zone
|
| 15 |
+
- Language preference
|
| 16 |
+
4. Click "Save Changes"
|
| 17 |
+
|
| 18 |
+
Changing Your Email Address
|
| 19 |
+
1. Go to Settings > Account > Email
|
| 20 |
+
2. Enter your new email address
|
| 21 |
+
3. Enter your current password for verification
|
| 22 |
+
4. Click "Update Email"
|
| 23 |
+
5. Check your new email for a verification link
|
| 24 |
+
6. Click the verification link within 48 hours
|
| 25 |
+
Note: Your old email will receive a notification about the change.
|
| 26 |
+
|
| 27 |
+
Security Settings
|
| 28 |
+
-----------------
|
| 29 |
+
|
| 30 |
+
Two-Factor Authentication (2FA)
|
| 31 |
+
We strongly recommend enabling 2FA for additional security.
|
| 32 |
+
|
| 33 |
+
To enable 2FA:
|
| 34 |
+
1. Go to Settings > Security > Two-Factor Authentication
|
| 35 |
+
2. Choose your 2FA method:
|
| 36 |
+
- Authenticator app (recommended): Google Authenticator, Authy, etc.
|
| 37 |
+
- SMS: Receive codes via text message
|
| 38 |
+
3. Follow the setup instructions
|
| 39 |
+
4. Save your backup codes in a secure location
|
| 40 |
+
|
| 41 |
+
Session Management
|
| 42 |
+
- View all active sessions at Settings > Security > Active Sessions
|
| 43 |
+
- Click "Sign Out" next to any session to end it
|
| 44 |
+
- Use "Sign Out All Devices" for security emergencies
|
| 45 |
+
|
| 46 |
+
Login History
|
| 47 |
+
- View your login history at Settings > Security > Login History
|
| 48 |
+
- Shows date, time, location, and device for each login
|
| 49 |
+
- Suspicious logins are flagged automatically
|
| 50 |
+
|
| 51 |
+
Notification Settings
|
| 52 |
+
--------------------
|
| 53 |
+
|
| 54 |
+
Email Notifications
|
| 55 |
+
Customize which emails you receive at Settings > Notifications > Email:
|
| 56 |
+
- Account alerts (security, billing) - Always enabled for security
|
| 57 |
+
- Product updates and news
|
| 58 |
+
- Tips and tutorials
|
| 59 |
+
- Marketing and promotions
|
| 60 |
+
|
| 61 |
+
Push Notifications
|
| 62 |
+
For mobile app users, manage push notifications at:
|
| 63 |
+
Settings > Notifications > Push Notifications
|
| 64 |
+
- Instant messages
|
| 65 |
+
- Activity updates
|
| 66 |
+
- Reminders
|
| 67 |
+
|
| 68 |
+
Privacy Settings
|
| 69 |
+
----------------
|
| 70 |
+
|
| 71 |
+
Data Visibility
|
| 72 |
+
Control who can see your information:
|
| 73 |
+
- Profile visibility: Public, Private, or Contacts Only
|
| 74 |
+
- Activity status: Show when you're online
|
| 75 |
+
- Read receipts: Show when you've read messages
|
| 76 |
+
|
| 77 |
+
Data Export
|
| 78 |
+
Download your data at Settings > Privacy > Download My Data
|
| 79 |
+
- Includes all your content and account information
|
| 80 |
+
- Available in JSON or CSV format
|
| 81 |
+
- Processing takes up to 48 hours
|
| 82 |
+
|
| 83 |
+
Account Deletion
|
| 84 |
+
To permanently delete your account:
|
| 85 |
+
1. Go to Settings > Account > Delete Account
|
| 86 |
+
2. Read the information about what will be deleted
|
| 87 |
+
3. Enter your password
|
| 88 |
+
4. Type "DELETE" to confirm
|
| 89 |
+
5. Click "Permanently Delete Account"
|
| 90 |
+
Note: This action cannot be undone. All data is deleted within 30 days.
|
| 91 |
+
|
| 92 |
+
Connected Apps
|
| 93 |
+
--------------
|
| 94 |
+
|
| 95 |
+
Managing Third-Party Access
|
| 96 |
+
View and manage connected applications at Settings > Connected Apps:
|
| 97 |
+
- See which apps have access to your account
|
| 98 |
+
- Review permissions for each app
|
| 99 |
+
- Revoke access by clicking "Disconnect"
|
| 100 |
+
- Connected apps lose access immediately when disconnected
|
data/documents/billing_faq.txt
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Billing and Payments FAQ
|
| 2 |
+
========================
|
| 3 |
+
|
| 4 |
+
General Billing Questions
|
| 5 |
+
-------------------------
|
| 6 |
+
|
| 7 |
+
Q: When will I be charged?
|
| 8 |
+
A: Charges occur on the same date each month that you signed up. For example, if you subscribed on the 15th, you'll be charged on the 15th of each subsequent month.
|
| 9 |
+
|
| 10 |
+
Q: What payment methods do you accept?
|
| 11 |
+
A: We accept the following payment methods:
|
| 12 |
+
- Credit cards (Visa, MasterCard, American Express, Discover)
|
| 13 |
+
- Debit cards with Visa/MasterCard logo
|
| 14 |
+
- PayPal
|
| 15 |
+
- Bank transfers (for annual enterprise plans)
|
| 16 |
+
|
| 17 |
+
Q: How do I update my payment method?
|
| 18 |
+
A: To update your payment method:
|
| 19 |
+
1. Log into your account
|
| 20 |
+
2. Go to Settings > Billing
|
| 21 |
+
3. Click "Payment Methods"
|
| 22 |
+
4. Add a new payment method or edit existing ones
|
| 23 |
+
5. Set your preferred default payment method
|
| 24 |
+
|
| 25 |
+
Q: Can I get a refund?
|
| 26 |
+
A: We offer refunds under the following conditions:
|
| 27 |
+
- Within 14 days of initial purchase for new customers
|
| 28 |
+
- Within 7 days for plan upgrades
|
| 29 |
+
- No refunds for plan downgrades (credited towards future billing)
|
| 30 |
+
- Pro-rated refunds for annual plans canceled mid-term
|
| 31 |
+
|
| 32 |
+
Subscription Management
|
| 33 |
+
-----------------------
|
| 34 |
+
|
| 35 |
+
Q: How do I cancel my subscription?
|
| 36 |
+
A: To cancel your subscription:
|
| 37 |
+
1. Go to Settings > Billing > Subscription
|
| 38 |
+
2. Click "Cancel Subscription"
|
| 39 |
+
3. Select your cancellation reason
|
| 40 |
+
4. Confirm cancellation
|
| 41 |
+
Your access continues until the end of the current billing period.
|
| 42 |
+
|
| 43 |
+
Q: How do I upgrade or downgrade my plan?
|
| 44 |
+
A: Plan changes take effect immediately:
|
| 45 |
+
- Upgrades: You're charged a pro-rated amount for the remainder of the billing cycle
|
| 46 |
+
- Downgrades: Credit is applied to your next billing cycle
|
| 47 |
+
|
| 48 |
+
Q: What happens if my payment fails?
|
| 49 |
+
A: If a payment fails:
|
| 50 |
+
1. You'll receive an email notification
|
| 51 |
+
2. We'll retry the payment in 3 days
|
| 52 |
+
3. If it fails again, we'll retry in 7 days
|
| 53 |
+
4. After 14 days of failed payments, your account may be suspended
|
| 54 |
+
5. Contact support to resolve payment issues
|
| 55 |
+
|
| 56 |
+
Invoice and Receipts
|
| 57 |
+
--------------------
|
| 58 |
+
|
| 59 |
+
Q: How do I get an invoice?
|
| 60 |
+
A: Invoices are automatically sent to your email after each payment. You can also:
|
| 61 |
+
1. Go to Settings > Billing > Invoice History
|
| 62 |
+
2. Click on any invoice to view or download
|
| 63 |
+
3. Invoices are available in PDF format
|
| 64 |
+
|
| 65 |
+
Q: How do I add my company details to invoices?
|
| 66 |
+
A: To add company billing information:
|
| 67 |
+
1. Go to Settings > Billing > Billing Information
|
| 68 |
+
2. Enter your company name, address, and tax ID
|
| 69 |
+
3. Save changes - this will apply to future invoices
|
| 70 |
+
|
| 71 |
+
Contact Billing Support
|
| 72 |
+
-----------------------
|
| 73 |
+
Email: billing@example.com
|
| 74 |
+
Phone: 1-800-555-0123 (Mon-Fri, 9 AM - 5 PM EST)
|
| 75 |
+
Chat: Available 24/7 for Premium and Enterprise customers
|
data/documents/password_reset.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Password Reset Guide
|
| 2 |
+
====================
|
| 3 |
+
|
| 4 |
+
How to Reset Your Password
|
| 5 |
+
--------------------------
|
| 6 |
+
|
| 7 |
+
If you've forgotten your password or need to reset it for security reasons, follow these steps:
|
| 8 |
+
|
| 9 |
+
1. Go to the login page at https://example.com/login
|
| 10 |
+
2. Click on "Forgot Password?" link below the login form
|
| 11 |
+
3. Enter your registered email address
|
| 12 |
+
4. Check your email inbox for a password reset link (check spam folder if not visible)
|
| 13 |
+
5. Click the reset link within 24 hours (links expire after 24 hours)
|
| 14 |
+
6. Create a new password following our password requirements
|
| 15 |
+
7. Log in with your new password
|
| 16 |
+
|
| 17 |
+
Password Requirements
|
| 18 |
+
--------------------
|
| 19 |
+
- Minimum 8 characters
|
| 20 |
+
- At least one uppercase letter (A-Z)
|
| 21 |
+
- At least one lowercase letter (a-z)
|
| 22 |
+
- At least one number (0-9)
|
| 23 |
+
- At least one special character (!@#$%^&*)
|
| 24 |
+
- Cannot be any of your last 5 passwords
|
| 25 |
+
|
| 26 |
+
Troubleshooting
|
| 27 |
+
---------------
|
| 28 |
+
If you don't receive the reset email:
|
| 29 |
+
- Wait 5-10 minutes and check again
|
| 30 |
+
- Check your spam/junk folder
|
| 31 |
+
- Make sure you entered the correct email address
|
| 32 |
+
- Contact support if issues persist
|
| 33 |
+
|
| 34 |
+
If the reset link doesn't work:
|
| 35 |
+
- Request a new reset link
|
| 36 |
+
- Make sure you're clicking the most recent link if you requested multiple
|
| 37 |
+
- Try copying and pasting the link instead of clicking
|
| 38 |
+
- Clear your browser cache and try again
|
| 39 |
+
|
| 40 |
+
Security Notes
|
| 41 |
+
--------------
|
| 42 |
+
- Never share your password with anyone
|
| 43 |
+
- We will never ask for your password via email or phone
|
| 44 |
+
- Enable two-factor authentication for additional security
|
| 45 |
+
- Report any suspicious activity to security@example.com
|
data/documents/technical_support.txt
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Technical Support Guide
|
| 2 |
+
=======================
|
| 3 |
+
|
| 4 |
+
Common Technical Issues
|
| 5 |
+
-----------------------
|
| 6 |
+
|
| 7 |
+
Login Problems
|
| 8 |
+
--------------
|
| 9 |
+
Problem: Cannot log in despite correct credentials
|
| 10 |
+
Solutions:
|
| 11 |
+
1. Clear browser cache and cookies
|
| 12 |
+
2. Try a different browser (Chrome, Firefox, Safari, Edge)
|
| 13 |
+
3. Disable browser extensions temporarily
|
| 14 |
+
4. Check if Caps Lock is on
|
| 15 |
+
5. Reset your password if issues persist
|
| 16 |
+
|
| 17 |
+
Problem: "Session Expired" error
|
| 18 |
+
Solutions:
|
| 19 |
+
1. This occurs after inactivity for 30 minutes
|
| 20 |
+
2. Simply log in again
|
| 21 |
+
3. Enable "Remember Me" for extended sessions
|
| 22 |
+
4. Check if cookies are enabled in your browser
|
| 23 |
+
|
| 24 |
+
Problem: Account locked
|
| 25 |
+
Solutions:
|
| 26 |
+
1. Wait 30 minutes - accounts unlock automatically
|
| 27 |
+
2. If urgent, contact support for manual unlock
|
| 28 |
+
3. Use password reset to regain access immediately
|
| 29 |
+
|
| 30 |
+
Performance Issues
|
| 31 |
+
------------------
|
| 32 |
+
|
| 33 |
+
Problem: Slow loading times
|
| 34 |
+
Solutions:
|
| 35 |
+
1. Check your internet connection speed (minimum 5 Mbps recommended)
|
| 36 |
+
2. Clear browser cache (Settings > Clear Browsing Data)
|
| 37 |
+
3. Disable unnecessary browser extensions
|
| 38 |
+
4. Try accessing during off-peak hours
|
| 39 |
+
5. Check our status page for any ongoing issues
|
| 40 |
+
|
| 41 |
+
Problem: Features not working
|
| 42 |
+
Solutions:
|
| 43 |
+
1. Ensure JavaScript is enabled
|
| 44 |
+
2. Update your browser to the latest version
|
| 45 |
+
3. Disable ad blockers for our domain
|
| 46 |
+
4. Check browser console for errors (F12 > Console)
|
| 47 |
+
5. Try incognito/private browsing mode
|
| 48 |
+
|
| 49 |
+
Mobile App Issues
|
| 50 |
+
-----------------
|
| 51 |
+
|
| 52 |
+
Problem: App crashes on startup
|
| 53 |
+
Solutions:
|
| 54 |
+
1. Force close the app and reopen
|
| 55 |
+
2. Restart your device
|
| 56 |
+
3. Update the app to the latest version
|
| 57 |
+
4. Uninstall and reinstall the app
|
| 58 |
+
5. Check if your OS is supported (iOS 13+ / Android 8+)
|
| 59 |
+
|
| 60 |
+
Problem: Push notifications not working
|
| 61 |
+
Solutions:
|
| 62 |
+
1. Check app notification settings
|
| 63 |
+
2. Check device notification settings
|
| 64 |
+
3. Ensure battery saver mode is off
|
| 65 |
+
4. Log out and log back in
|
| 66 |
+
5. Reinstall the app
|
| 67 |
+
|
| 68 |
+
Integration Issues
|
| 69 |
+
------------------
|
| 70 |
+
|
| 71 |
+
Problem: Third-party integration not syncing
|
| 72 |
+
Solutions:
|
| 73 |
+
1. Disconnect and reconnect the integration
|
| 74 |
+
2. Check if the third-party service is operational
|
| 75 |
+
3. Verify API permissions are correctly set
|
| 76 |
+
4. Wait 15 minutes for sync to complete
|
| 77 |
+
5. Contact support with sync logs
|
| 78 |
+
|
| 79 |
+
Problem: Webhook not receiving data
|
| 80 |
+
Solutions:
|
| 81 |
+
1. Verify webhook URL is correct and accessible
|
| 82 |
+
2. Check if your server returns 200 OK response
|
| 83 |
+
3. Review webhook logs in Settings > Integrations > Webhooks
|
| 84 |
+
4. Ensure SSL certificate is valid (HTTPS required)
|
| 85 |
+
5. Check firewall settings on your server
|
| 86 |
+
|
| 87 |
+
File Upload Issues
|
| 88 |
+
------------------
|
| 89 |
+
|
| 90 |
+
Problem: File upload fails
|
| 91 |
+
Solutions:
|
| 92 |
+
1. Check file size (max 100MB per file)
|
| 93 |
+
2. Verify file format is supported
|
| 94 |
+
3. Try a different browser
|
| 95 |
+
4. Disable VPN temporarily
|
| 96 |
+
5. Check if storage quota is reached
|
| 97 |
+
|
| 98 |
+
Supported File Formats:
|
| 99 |
+
- Documents: PDF, DOC, DOCX, TXT, RTF
|
| 100 |
+
- Images: JPG, PNG, GIF, SVG, WebP
|
| 101 |
+
- Videos: MP4, MOV, AVI (max 500MB)
|
| 102 |
+
- Archives: ZIP, RAR (max 200MB)
|
| 103 |
+
|
| 104 |
+
Getting Technical Support
|
| 105 |
+
-------------------------
|
| 106 |
+
|
| 107 |
+
Before Contacting Support:
|
| 108 |
+
1. Check our Help Center at help.example.com
|
| 109 |
+
2. Search our community forums
|
| 110 |
+
3. Review the status page for outages
|
| 111 |
+
4. Gather relevant information:
|
| 112 |
+
- Error messages (screenshots if possible)
|
| 113 |
+
- Browser and version
|
| 114 |
+
- Operating system
|
| 115 |
+
- Steps to reproduce the issue
|
| 116 |
+
|
| 117 |
+
Support Channels:
|
| 118 |
+
- Email: support@example.com
|
| 119 |
+
- Live Chat: Available 24/7 for Premium customers
|
| 120 |
+
- Phone: 1-800-555-0199 (Enterprise customers)
|
| 121 |
+
- Community Forums: community.example.com
|
| 122 |
+
|
| 123 |
+
Response Times:
|
| 124 |
+
- Critical issues: 1-4 hours
|
| 125 |
+
- High priority: 4-12 hours
|
| 126 |
+
- Normal priority: 24-48 hours
|
| 127 |
+
- Low priority: 48-72 hours
|
requirements.txt
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Multi-Agent RAG System Dependencies
|
| 2 |
+
# =====================================
|
| 3 |
+
|
| 4 |
+
# Core AI Framework
|
| 5 |
+
langchain>=0.1.0
|
| 6 |
+
langchain-core>=0.1.0
|
| 7 |
+
langchain-openai>=0.0.5
|
| 8 |
+
langchain-community>=0.0.20
|
| 9 |
+
langchain-text-splitters>=0.0.1
|
| 10 |
+
|
| 11 |
+
# Free LLM Providers
|
| 12 |
+
langchain-huggingface>=0.0.1
|
| 13 |
+
langchain-google-genai>=0.0.5
|
| 14 |
+
langchain-groq>=0.0.1
|
| 15 |
+
|
| 16 |
+
# For local embeddings (free)
|
| 17 |
+
sentence-transformers>=2.2.0
|
| 18 |
+
|
| 19 |
+
# Vector Store
|
| 20 |
+
faiss-cpu>=1.7.4
|
| 21 |
+
|
| 22 |
+
# Embeddings (using OpenAI)
|
| 23 |
+
openai>=1.10.0
|
| 24 |
+
|
| 25 |
+
# Web Framework
|
| 26 |
+
fastapi>=0.109.0
|
| 27 |
+
uvicorn[standard]>=0.27.0
|
| 28 |
+
|
| 29 |
+
# Data Validation
|
| 30 |
+
pydantic>=2.5.0
|
| 31 |
+
pydantic-settings>=2.1.0
|
| 32 |
+
|
| 33 |
+
# Environment & Config
|
| 34 |
+
python-dotenv>=1.0.0
|
| 35 |
+
|
| 36 |
+
# Document Processing
|
| 37 |
+
pypdf>=4.0.0
|
| 38 |
+
python-docx>=1.1.0
|
| 39 |
+
|
| 40 |
+
# Utilities
|
| 41 |
+
tiktoken>=0.5.0
|
| 42 |
+
httpx>=0.26.0
|
| 43 |
+
|
| 44 |
+
# Testing (optional)
|
| 45 |
+
pytest>=7.4.0
|
| 46 |
+
pytest-asyncio>=0.23.0
|
scripts/test_api.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API Test Script
|
| 3 |
+
===============
|
| 4 |
+
|
| 5 |
+
Simple script to test the Multi-Agent RAG API endpoints.
|
| 6 |
+
|
| 7 |
+
Run the API server first:
|
| 8 |
+
uvicorn app.main:app --reload
|
| 9 |
+
|
| 10 |
+
Then run this script:
|
| 11 |
+
python scripts/test_api.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import asyncio
|
| 15 |
+
import httpx
|
| 16 |
+
import json
|
| 17 |
+
from typing import Optional
|
| 18 |
+
|
| 19 |
+
BASE_URL = "http://localhost:8000/api/v1"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
async def test_health():
|
| 23 |
+
"""Test health endpoint."""
|
| 24 |
+
print("\n" + "=" * 50)
|
| 25 |
+
print("Testing Health Endpoint")
|
| 26 |
+
print("=" * 50)
|
| 27 |
+
|
| 28 |
+
async with httpx.AsyncClient() as client:
|
| 29 |
+
response = await client.get(f"{BASE_URL}/health")
|
| 30 |
+
print(f"Status: {response.status_code}")
|
| 31 |
+
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
| 32 |
+
return response.json()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
async def test_ingest():
|
| 36 |
+
"""Test document ingestion."""
|
| 37 |
+
print("\n" + "=" * 50)
|
| 38 |
+
print("Testing Document Ingestion")
|
| 39 |
+
print("=" * 50)
|
| 40 |
+
|
| 41 |
+
async with httpx.AsyncClient(timeout=120.0) as client:
|
| 42 |
+
response = await client.post(
|
| 43 |
+
f"{BASE_URL}/ingest",
|
| 44 |
+
json={"force_reindex": True}
|
| 45 |
+
)
|
| 46 |
+
print(f"Status: {response.status_code}")
|
| 47 |
+
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
| 48 |
+
return response.json()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def test_query(query: str, conversation_id: Optional[str] = None):
|
| 52 |
+
"""Test query endpoint."""
|
| 53 |
+
print("\n" + "=" * 50)
|
| 54 |
+
print(f"Testing Query: {query}")
|
| 55 |
+
print("=" * 50)
|
| 56 |
+
|
| 57 |
+
async with httpx.AsyncClient(timeout=120.0) as client:
|
| 58 |
+
payload = {
|
| 59 |
+
"query": query,
|
| 60 |
+
"include_sources": True
|
| 61 |
+
}
|
| 62 |
+
if conversation_id:
|
| 63 |
+
payload["conversation_id"] = conversation_id
|
| 64 |
+
|
| 65 |
+
response = await client.post(
|
| 66 |
+
f"{BASE_URL}/query",
|
| 67 |
+
json=payload
|
| 68 |
+
)
|
| 69 |
+
print(f"Status: {response.status_code}")
|
| 70 |
+
|
| 71 |
+
if response.status_code == 200:
|
| 72 |
+
result = response.json()
|
| 73 |
+
print(f"\nAnswer:\n{result['answer']}")
|
| 74 |
+
print(f"\nAgent Trace: {' -> '.join(result['agent_trace'])}")
|
| 75 |
+
print(f"Processing Time: {result['processing_time_ms']:.2f}ms")
|
| 76 |
+
print(f"Sources: {len(result['sources'])} documents")
|
| 77 |
+
|
| 78 |
+
if result['sources']:
|
| 79 |
+
print("\nTop Source:")
|
| 80 |
+
source = result['sources'][0]
|
| 81 |
+
print(f" - Score: {source['relevance_score']:.2f}")
|
| 82 |
+
print(f" - Source: {source['source']}")
|
| 83 |
+
print(f" - Content: {source['content'][:200]}...")
|
| 84 |
+
|
| 85 |
+
return result
|
| 86 |
+
else:
|
| 87 |
+
print(f"Error: {response.text}")
|
| 88 |
+
return None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
async def test_document_count():
|
| 92 |
+
"""Test document count endpoint."""
|
| 93 |
+
print("\n" + "=" * 50)
|
| 94 |
+
print("Testing Document Count")
|
| 95 |
+
print("=" * 50)
|
| 96 |
+
|
| 97 |
+
async with httpx.AsyncClient() as client:
|
| 98 |
+
response = await client.get(f"{BASE_URL}/documents/count")
|
| 99 |
+
print(f"Status: {response.status_code}")
|
| 100 |
+
print(f"Response: {json.dumps(response.json(), indent=2)}")
|
| 101 |
+
return response.json()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
async def run_all_tests():
|
| 105 |
+
"""Run all API tests."""
|
| 106 |
+
print("\n" + "#" * 60)
|
| 107 |
+
print("# Multi-Agent RAG System - API Tests")
|
| 108 |
+
print("#" * 60)
|
| 109 |
+
|
| 110 |
+
# Test 1: Health check
|
| 111 |
+
health = await test_health()
|
| 112 |
+
if health['status'] != 'healthy':
|
| 113 |
+
print("ERROR: API is not healthy!")
|
| 114 |
+
return
|
| 115 |
+
|
| 116 |
+
# Test 2: Ingest documents
|
| 117 |
+
ingest_result = await test_ingest()
|
| 118 |
+
if ingest_result.get('documents_processed', 0) == 0:
|
| 119 |
+
print("WARNING: No documents were ingested!")
|
| 120 |
+
|
| 121 |
+
# Test 3: Check document count
|
| 122 |
+
await test_document_count()
|
| 123 |
+
|
| 124 |
+
# Test 4: Run queries
|
| 125 |
+
test_queries = [
|
| 126 |
+
"How do I reset my password?",
|
| 127 |
+
"What payment methods do you accept?",
|
| 128 |
+
"How can I enable two-factor authentication?",
|
| 129 |
+
"The app is running slow, what should I do?",
|
| 130 |
+
"I want to talk to a human agent", # Should trigger escalation
|
| 131 |
+
]
|
| 132 |
+
|
| 133 |
+
for query in test_queries:
|
| 134 |
+
await test_query(query)
|
| 135 |
+
|
| 136 |
+
# Test 5: Multi-turn conversation
|
| 137 |
+
print("\n" + "=" * 50)
|
| 138 |
+
print("Testing Multi-turn Conversation")
|
| 139 |
+
print("=" * 50)
|
| 140 |
+
|
| 141 |
+
result1 = await test_query("What are your password requirements?")
|
| 142 |
+
if result1:
|
| 143 |
+
conversation_id = result1['conversation_id']
|
| 144 |
+
await test_query(
|
| 145 |
+
"What if I don't receive the reset email?",
|
| 146 |
+
conversation_id=conversation_id
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
print("\n" + "#" * 60)
|
| 150 |
+
print("# All Tests Complete!")
|
| 151 |
+
print("#" * 60)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
print("Starting API Tests...")
|
| 156 |
+
print("Make sure the API server is running at http://localhost:8000")
|
| 157 |
+
asyncio.run(run_all_tests())
|
skills.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Skills Manifest
|
| 2 |
+
|
| 3 |
+
Claude must actively apply the following skills:
|
| 4 |
+
|
| 5 |
+
## AI & ML
|
| 6 |
+
- Retrieval-Augmented Generation (RAG)
|
| 7 |
+
- Embeddings & vector similarity
|
| 8 |
+
- Multi-agent reasoning
|
| 9 |
+
- Tool calling & autonomy
|
| 10 |
+
- Prompt grounding & safety
|
| 11 |
+
|
| 12 |
+
## Engineering
|
| 13 |
+
- Modular Python architecture
|
| 14 |
+
- API-first design
|
| 15 |
+
- Separation of concerns
|
| 16 |
+
- Error handling & logging
|
| 17 |
+
|
| 18 |
+
## System Design
|
| 19 |
+
- Stateless vs stateful services
|
| 20 |
+
- Memory management in agents
|
| 21 |
+
- Scalability considerations
|
| 22 |
+
- Enterprise AI patterns
|
| 23 |
+
|
| 24 |
+
## Teaching Mode
|
| 25 |
+
- Explain all concepts step-by-step
|
| 26 |
+
- Use real-world analogies
|
| 27 |
+
- Assume the user is learning
|
tools.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Allowed Tools & Libraries
|
| 2 |
+
|
| 3 |
+
## AI Frameworks
|
| 4 |
+
- LangChain (agents, tools, memory)
|
| 5 |
+
- OpenAI or Anthropic models
|
| 6 |
+
- FAISS for vector storage
|
| 7 |
+
|
| 8 |
+
## Backend
|
| 9 |
+
- FastAPI
|
| 10 |
+
- Uvicorn
|
| 11 |
+
|
| 12 |
+
## Utilities
|
| 13 |
+
- python-dotenv
|
| 14 |
+
- logging
|
| 15 |
+
- pathlib
|
| 16 |
+
|
| 17 |
+
## Rules
|
| 18 |
+
- Do NOT introduce new frameworks without explanation
|
| 19 |
+
- Prefer standard LangChain abstractions
|
| 20 |
+
- Avoid experimental APIs unless necessary
|