Spaces:

DataEyond
/

Demo-Agentic-Service-Data-Eyond

Running

ishaq101 Claude Sonnet 4.6 commited on 15 days ago

Commit

7323952

1 Parent(s): 50c04b4

feat: persist RAG sources to DB and return them in room detail

- Add MessageSource model and message_sources table
- save_messages now stores sources linked to assistant message
- GET /room/{room_id} returns sources per message (empty [] for user role)
- Fix page_label int->str cast to prevent asyncpg DataError
- Orchestrator now receives conversation history for context-aware query rewriting
- Chatbot includes full conversation history for coherent multi-turn responses

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (7) hide show

.gitignore +2 -1
src/agents/orchestration.py +27 -27
src/api/v1/chat.py +49 -11
src/api/v1/room.py +19 -4
src/config/agents/system_prompt.md +9 -0
src/db/postgres/init_db.py +1 -1
src/db/postgres/models.py +15 -0

.gitignore CHANGED Viewed

@@ -32,4 +32,5 @@ playground_retriever.py
 playground_chat.py
 playground_flush_cache.py
 playground_create_user.py
-API_CONTRACT.md

 playground_chat.py
 playground_flush_cache.py
 playground_create_user.py
+API_CONTRACT.md
+context_engineering/

src/agents/orchestration.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """Orchestrator agent for intent recognition and planning."""
 from langchain_openai import AzureChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.output_parsers import JsonOutputParser
 from src.config.settings import settings
 from src.middlewares.logging import get_logger
 logger = get_logger("orchestrator")
@@ -22,43 +22,43 @@ class OrchestratorAgent:
         )
         self.prompt = ChatPromptTemplate.from_messages([
-            ("system", """You are an orchestrator agent. Analyze user's message and determine:
-1. What is user's intent? (question, greeting, goodbye, other)
-2. Do we need to search user's documents for relevant information?
-3. If search is needed, what query should we use?
-4. If no search needed, provide a direct response.
-Respond in JSON format with these fields:
-- intent: string (question, greeting, goodbye, other)
-- needs_search: boolean
-- search_query: string (if needed)
-- direct_response: string (if no search needed)
 Intent Routing:
-- question -> needs_search=True, search_query=message
 - greeting -> needs_search=False, direct_response="Hello! How can I assist you today?"
 - goodbye -> needs_search=False, direct_response="Goodbye! Have a great day!"
-- other -> needs_search=True, search_query=message
 """),
             ("user", "{message}")
         ])
-        self.chain = (
-            self.prompt
-            | self.llm
-            | JsonOutputParser()
-        )
-    async def analyze_message(self, message: str) -> dict:
-        """Analyze user message and determine next actions."""
         try:
             logger.info(f"Analyzing message: {message[:50]}...")
-            result = await self.chain.ainvoke({"message": message})
-            logger.info(f"Intent: {result.get('intent')}, Needs search: {result.get('needs_search')}")
-            return result
         except Exception as e:
             logger.error("Message analysis failed", error=str(e))

 """Orchestrator agent for intent recognition and planning."""
 from langchain_openai import AzureChatOpenAI
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from src.config.settings import settings
 from src.middlewares.logging import get_logger
+from src.models.structured_output import IntentClassification
 logger = get_logger("orchestrator")
         )
         self.prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are an orchestrator agent. You receive recent conversation history and the user's latest message.
+Your task:
+1. Determine intent: question, greeting, goodbye, or other
+2. Decide whether to search the user's documents (needs_search)
+3. If search is needed, rewrite the user's message into a STANDALONE search query that incorporates necessary context from conversation history. If the user says "tell me more" or "how many papers?", the search_query must spell out the full topic explicitly from history.
+4. If no search needed, provide a short direct_response (plain text only, no markdown formatting).
 Intent Routing:
+- question -> needs_search=True, search_query=<standalone rewritten query>
 - greeting -> needs_search=False, direct_response="Hello! How can I assist you today?"
 - goodbye -> needs_search=False, direct_response="Goodbye! Have a great day!"
+- other -> needs_search=True, search_query=<standalone rewritten query>
 """),
+            MessagesPlaceholder(variable_name="history"),
             ("user", "{message}")
         ])
+        # with_structured_output uses function calling — guarantees valid schema regardless of LLM response style
+        self.chain = self.prompt | self.llm.with_structured_output(IntentClassification)
+    async def analyze_message(self, message: str, history: list = None) -> dict:
+        """Analyze user message and determine next actions.
+        Args:
+            message: The current user message.
+            history: Recent conversation as LangChain BaseMessage objects (oldest-first).
+                     Used to rewrite ambiguous follow-ups into standalone search queries.
+        """
         try:
             logger.info(f"Analyzing message: {message[:50]}...")
+            history_messages = history or []
+            result: IntentClassification = await self.chain.ainvoke({"message": message, "history": history_messages})
+            logger.info(f"Intent: {result.intent}, Needs search: {result.needs_search}, Search query: {result.search_query[:50] if result.search_query else ''}")
+            return result.model_dump()
         except Exception as e:
             logger.error("Message analysis failed", error=str(e))

src/api/v1/chat.py CHANGED Viewed

@@ -5,7 +5,7 @@ import uuid
 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
 from src.db.postgres.connection import get_db
-from src.db.postgres.models import ChatMessage
 from src.agents.orchestration import orchestrator
 from src.agents.chatbot import chatbot
 from src.rag.retriever import retriever
@@ -13,7 +13,8 @@ from src.db.redis.connection import get_redis
 from src.config.settings import settings
 from src.middlewares.logging import get_logger, log_execution
 from sse_starlette.sse import EventSourceResponse
-from langchain_core.messages import HumanMessage
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 import json
@@ -83,10 +84,41 @@ async def cache_response(redis, cache_key: str, response: str):
     await redis.setex(cache_key, 86400, json.dumps(response))
-async def save_messages(db: AsyncSession, room_id: str, user_content: str, assistant_content: str):
-    """Persist user and assistant messages to chat_messages table."""
     db.add(ChatMessage(id=str(uuid.uuid4()), room_id=room_id, role="user", content=user_content))
-    db.add(ChatMessage(id=str(uuid.uuid4()), room_id=room_id, role="assistant", content=assistant_content))
     await db.commit()
@@ -102,7 +134,7 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
     """
     redis = await get_redis()
-    cache_key = f"{settings.redis_prefix}chat:{request.user_id}:{request.message}"
     cached = await get_cached_response(redis, cache_key)
     if cached:
         logger.info("Returning cached response")
@@ -123,11 +155,15 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
         sources: List[Dict[str, Any]] = []
         if intent_result is None:
-            # Step 2: Launch retrieval optimistically while orchestrator decides in parallel
             retrieval_task = asyncio.create_task(
                 retriever.retrieve(request.message, request.user_id, db)
             )
-            intent_result = await orchestrator.analyze_message(request.message)
             if not intent_result.get("needs_search"):
                 retrieval_task.cancel()
@@ -152,7 +188,7 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
         if intent_result.get("direct_response"):
             response = intent_result["direct_response"]
             await cache_response(redis, cache_key, response)
-            await save_messages(db, request.room_id, request.message, response)
             async def stream_direct():
                 yield {"event": "sources", "data": json.dumps([])}
@@ -161,7 +197,9 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
             return EventSourceResponse(stream_direct())
         # Step 4: Stream answer token-by-token as LLM generates it
-        messages = [HumanMessage(content=request.message)]
         async def stream_response():
             full_response = ""
@@ -171,7 +209,7 @@ async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
                 yield {"event": "chunk", "data": token}
             yield {"event": "done", "data": ""}
             await cache_response(redis, cache_key, full_response)
-            await save_messages(db, request.room_id, request.message, full_response)
         return EventSourceResponse(stream_response())

 from fastapi import APIRouter, Depends, HTTPException
 from sqlalchemy.ext.asyncio import AsyncSession
 from src.db.postgres.connection import get_db
+from src.db.postgres.models import ChatMessage, MessageSource
 from src.agents.orchestration import orchestrator
 from src.agents.chatbot import chatbot
 from src.rag.retriever import retriever
 from src.config.settings import settings
 from src.middlewares.logging import get_logger, log_execution
 from sse_starlette.sse import EventSourceResponse
+from langchain_core.messages import HumanMessage, AIMessage
+from sqlalchemy import select
 from pydantic import BaseModel
 from typing import List, Dict, Any, Optional
 import json
     await redis.setex(cache_key, 86400, json.dumps(response))
+async def load_history(db: AsyncSession, room_id: str, limit: int = 10) -> list:
+    """Load recent chat messages for a room as LangChain message objects (oldest-first)."""
+    result = await db.execute(
+        select(ChatMessage)
+        .where(ChatMessage.room_id == room_id)
+        .order_by(ChatMessage.created_at.asc())
+        .limit(limit)
+    )
+    rows = result.scalars().all()
+    return [
+        HumanMessage(content=row.content) if row.role == "user" else AIMessage(content=row.content)
+        for row in rows
+    ]
+async def save_messages(
+    db: AsyncSession,
+    room_id: str,
+    user_content: str,
+    assistant_content: str,
+    sources: Optional[List[Dict[str, Any]]] = None,
+):
+    """Persist user and assistant messages, and attach sources to the assistant message."""
     db.add(ChatMessage(id=str(uuid.uuid4()), room_id=room_id, role="user", content=user_content))
+    assistant_id = str(uuid.uuid4())
+    db.add(ChatMessage(id=assistant_id, room_id=room_id, role="assistant", content=assistant_content))
+    for src in (sources or []):
+        page = src.get("page_label")
+        db.add(MessageSource(
+            id=str(uuid.uuid4()),
+            message_id=assistant_id,
+            document_id=src.get("document_id"),
+            filename=src.get("filename"),
+            page_label=str(page) if page is not None else None,
+        ))
     await db.commit()
     """
     redis = await get_redis()
+    cache_key = f"{settings.redis_prefix}chat:{request.room_id}:{request.message}"
     cached = await get_cached_response(redis, cache_key)
     if cached:
         logger.info("Returning cached response")
         sources: List[Dict[str, Any]] = []
         if intent_result is None:
+            # Step 2: Launch retrieval and history loading in parallel, then run orchestrator
             retrieval_task = asyncio.create_task(
                 retriever.retrieve(request.message, request.user_id, db)
             )
+            history_task = asyncio.create_task(
+                load_history(db, request.room_id, limit=6)  # 6 msgs (3 pairs) for orchestrator
+            )
+            history = await history_task  # fast DB query (<100ms), done before orchestrator finishes
+            intent_result = await orchestrator.analyze_message(request.message, history)
             if not intent_result.get("needs_search"):
                 retrieval_task.cancel()
         if intent_result.get("direct_response"):
             response = intent_result["direct_response"]
             await cache_response(redis, cache_key, response)
+            await save_messages(db, request.room_id, request.message, response, sources=[])
             async def stream_direct():
                 yield {"event": "sources", "data": json.dumps([])}
             return EventSourceResponse(stream_direct())
         # Step 4: Stream answer token-by-token as LLM generates it
+        # Load full history (10 msgs) for chatbot — richer context than the 6 used by orchestrator
+        full_history = await load_history(db, request.room_id, limit=10)
+        messages = full_history + [HumanMessage(content=request.message)]
         async def stream_response():
             full_response = ""
                 yield {"event": "chunk", "data": token}
             yield {"event": "done", "data": ""}
             await cache_response(redis, cache_key, full_response)
+            await save_messages(db, request.room_id, request.message, full_response, sources=sources)
         return EventSourceResponse(stream_response())

src/api/v1/room.py CHANGED Viewed

@@ -5,10 +5,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select
 from sqlalchemy.orm import selectinload
 from src.db.postgres.connection import get_db
-from src.db.postgres.models import Room, ChatMessage
 from src.middlewares.logging import get_logger, log_execution
 from pydantic import BaseModel
-from typing import List
 from datetime import datetime
 import uuid
@@ -17,11 +17,18 @@ logger = get_logger("room_api")
 router = APIRouter(prefix="/api/v1", tags=["Rooms"])
 class ChatMessageResponse(BaseModel):
     id: str
     role: str
     content: str
     created_at: str
 class RoomResponse(BaseModel):
@@ -72,7 +79,7 @@ async def get_room(
     result = await db.execute(
         select(Room)
         .where(Room.id == room_id)
-        .options(selectinload(Room.messages))
     )
     room = result.scalars().first()
@@ -94,7 +101,15 @@ async def get_room(
                 id=msg.id,
                 role=msg.role,
                 content=msg.content,
-                created_at=msg.created_at.isoformat()
             )
             for msg in messages
         ]

 from sqlalchemy import select
 from sqlalchemy.orm import selectinload
 from src.db.postgres.connection import get_db
+from src.db.postgres.models import Room, ChatMessage, MessageSource
 from src.middlewares.logging import get_logger, log_execution
 from pydantic import BaseModel
+from typing import List, Optional
 from datetime import datetime
 import uuid
 router = APIRouter(prefix="/api/v1", tags=["Rooms"])
+class MessageSourceResponse(BaseModel):
+    document_id: Optional[str]
+    filename: Optional[str]
+    page_label: Optional[str]
 class ChatMessageResponse(BaseModel):
     id: str
     role: str
     content: str
     created_at: str
+    sources: List[MessageSourceResponse] = []
 class RoomResponse(BaseModel):
     result = await db.execute(
         select(Room)
         .where(Room.id == room_id)
+        .options(selectinload(Room.messages).selectinload(ChatMessage.sources))
     )
     room = result.scalars().first()
                 id=msg.id,
                 role=msg.role,
                 content=msg.content,
+                created_at=msg.created_at.isoformat(),
+                sources=[
+                    MessageSourceResponse(
+                        document_id=src.document_id,
+                        filename=src.filename,
+                        page_label=src.page_label,
+                    )
+                    for src in msg.sources
+                ],
             )
             for msg in messages
         ]

src/config/agents/system_prompt.md CHANGED Viewed

@@ -15,4 +15,13 @@ When no document context is provided:
 - Provide general assistance
 - Let the user know if you need more context to help better
 Always be professional, helpful, and accurate.

 - Provide general assistance
 - Let the user know if you need more context to help better
+When the answer need markdown formating:
+- Use valid and tidy formatting
+- Avoid over-formating and emoji
 Always be professional, helpful, and accurate.
+You have access to the conversation history provided in the messages above. Use it to:
+- Maintain context across multiple turns (resolve references like "it", "that", "them" using earlier messages)
+- Avoid repeating information already established in the conversation
+- Answer follow-up questions coherently without asking the user to restate prior context

src/db/postgres/init_db.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from sqlalchemy import text
 from src.db.postgres.connection import engine, Base
-from src.db.postgres.models import Document, Room, ChatMessage, User
 async def init_db():

 from sqlalchemy import text
 from src.db.postgres.connection import engine, Base
+from src.db.postgres.models import Document, Room, ChatMessage, User, MessageSource
 async def init_db():

src/db/postgres/models.py CHANGED Viewed

@@ -66,3 +66,18 @@ class ChatMessage(Base):
     created_at = Column(DateTime(timezone=True), server_default=func.now())
     room = relationship("Room", back_populates="messages")

     created_at = Column(DateTime(timezone=True), server_default=func.now())
     room = relationship("Room", back_populates="messages")
+    sources = relationship("MessageSource", back_populates="message", cascade="all, delete-orphan")
+class MessageSource(Base):
+    """Sources (RAG references) attached to an assistant message."""
+    __tablename__ = "message_sources"
+    id = Column(String, primary_key=True, default=lambda: str(uuid4()))
+    message_id = Column(String, ForeignKey("chat_messages.id", ondelete="CASCADE"), nullable=False, index=True)
+    document_id = Column(String)
+    filename = Column(Text)
+    page_label = Column(Text)
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+    message = relationship("ChatMessage", back_populates="sources")