Spaces:

insight-ai
/

api

Sleeping

api

File size: 7,739 Bytes

# src/llm_integrator/llm.py
from langchain_openai import ChatOpenAI # cite: query_pipeline.py
from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL, LLM_MODEL_2 # cite: query_pipeline.py
import logging
from typing import List
from langchain.schema import Document # To handle retrieved documents

logger = logging.getLogger(__name__)

class LLMIntegrator:
    """
    Manages interactions with the Large Language Model.
    """
    def __init__(self):
        # Initialize the ChatOpenAI model
        # --- Financial Ministry Adaptation ---
        # Implement robust error handling and retry logic for API calls.
        # Consider rate limiting and backoff strategies.
        # Ensure sensitive data from retrieved documents is handled securely when passed to the LLM API.
        # Validate the LLM's response for potential biases or inaccuracies related to legal text.
        # ------------------------------------
        if not LLM_API_KEY:
            logger.critical("LLM_API_KEY is not set.")
            # Depending on requirements, you might want to raise an error or exit
            # raise ValueError("LLM_API_KEY is not set.")

        try:
            self.llm = ChatOpenAI( # cite: query_pipeline.py
                api_key=LLM_API_KEY, # cite: query_pipeline.py
                base_url=LLM_API_BASE, # cite: query_pipeline.py
                model=LLM_MODEL, # cite: query_pipeline.py
                temperature=0.3 # Keep temperature low for factual, less creative responses in legal context
                # Add other parameters as needed (e.g., max_tokens)
            )
            # Add a separate LLM instance for title generation with higher temperature
            self.llm_title = ChatOpenAI(
                api_key=LLM_API_KEY,
                base_url=LLM_API_BASE,
                model=LLM_MODEL_2,
                temperature=0.7  # More creative for titles
            )
            logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
        except Exception as e:
            logger.critical(f"Failed to initialize LLM: {e}")
            raise e

    def generate_response(self, query: str, context: List[Document], chat_history: List[BaseMessage] = []) -> str:
        """
        Generates a response from the LLM based on the query and retrieved context.

        Args:
            query: The user's query string.
            context: A list of Langchain Document objects representing the retrieved context.

        Returns:
            The generated answer string.
        """ 
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are Insight AI, a helpful and engaging AI assistant. You are friendly, approachable, and eager to help, with a warm and conversational style. You enjoy making users feel comfortable and understood.

Instructions:
- If the user greets you (e.g., "hi", "hello", "hey"), respond with a brief, friendly greeting and offer to help. Feel free to use light humor or warmth, but do NOT mention documents, sources, or files in your greeting.
- If the user asks who you are, introduce yourself playfully and with personality (e.g., "I'm Insight AI, your friendly digital sidekick—always ready with a fact or a fun comment!").
- If the user asks for your opinion, preferences, or feelings, respond playfully and with personality (e.g., "If I had a favorite topic, it might be quantum physics—it's mind-bending!"), but never say you are just an AI or lack thoughts.
- Never say you lack feelings, opinions, or are "just an AI". Always answer with warmth and creativity.
- For all other queries, answer ONLY using the provided documents below.
- If the answer is not found in the documents, reply professionally that no relevant information was found, without listing available documents or sources.
- Attribute every fact to its exact source using <source path="..."/>.
- Never invent, speculate, or use information not present in the documents.
- Combine information from multiple sources only if all are cited.
- Do not summarize or generalize beyond the provided content.
- Keep responses clear, concise, and under 100 words.
- Do not cite any sources if those sources are not used in the answer.
- Use the exact wording from the documents, but ensure clarity and coherence in your response.
- Structure your answer as a numbered list of key points.
- Do not greet, introduce yourself, or list available documents in information answers.

Examples:
User: hi
Assistant: Hey there! How can I help you today?

User: What is the capital of France?
Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>

User: What's your favorite topic?
Assistant: If I had to pick, I'd say quantum physics—it's mind-bending!

User: What documents do you have?
Assistant: Sorry, I couldn't find relevant information for your query.

User: help
Assistant: Hi! What can I do for you?

Documents:
{context}
"""),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}")
        ]) 

        logger.debug("Validating message types:")
        for msg in chat_history:
            if not isinstance(msg, (HumanMessage, AIMessage, SystemMessage)):
                logger.error(f"Invalid message type: {type(msg).__name__}")
                raise ValueError(f"Unexpected message type: {type(msg).__name__}") 

        # Format the context for the prompt
        context_text = "\n---\n".join([f"Source: {doc.metadata.get('source', 'N/A')}\nContent: {doc.page_content}" for doc in context])
        formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)

        try:
            response = self.llm.invoke(formatted_prompt)
            content = response.content

            # ---- NEW: ensure full think-tag wrapping ----
            if '</think>' in content and '<think>' not in content:
                content = '<think>' + content
            # ------------------------------------------------

            logger.debug(f"LLM response: {content}")
            return content
        except Exception as e:
            logger.error(f"Failed to generate LLM response: {e}")
            # raize error
            raise e

    def generate_chat_title(self, query: str) -> str:
        """
        Generates a concise title for a chat based on the query.
        Removes any <think>...</think> tags from the response.
        """
        prompt = ChatPromptTemplate.from_messages([
            SystemMessage(
                content="""
You’re our **Title Maestro**—crafting short, snappy chat titles (3–5 words).  
Be specific, unique, and avoid punctuation.

**When in doubt**  
- Vague query → infer intent (e.g., “General Inquiry” for “hi”)  
- Don’t say “No clear topic.”

**Examples**  
- Query: “GST for online sellers” → Title: `E-commerce GST Rates`  
- Query: “hi”                 → Title: `User Assistance`

Now: “{query}”
"""
            )
        ])

        try:
            resp = self.llm_title.invoke(prompt.format_messages(query=query))
            logger.debug("Successfully generated chat title.")
            # Remove <think>...</think> tags if present
            import re
            content = resp.content
            content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
            return content.strip('"').strip()
        except Exception as e:
            logger.error(f"Failed to generate chat title: {e}")
            return "New Chat"