Spaces:

insight-ai
/

api

Sleeping

App Files Files Community

Chandima Prabhath commited on May 1

Commit

213465c

1 Parent(s): 0aa7bc9

Update LLM model settings and enhance response handling in LLMIntegrator

Browse files

Files changed (2) hide show

config/settings.py +3 -2
src/llm_integrator/llm.py +34 -39

config/settings.py CHANGED Viewed

@@ -21,10 +21,11 @@ CHUNK_OVERLAP = int(os.getenv("CHUNK_OVERLAP", "10")) # Default chunk overlap
 # --- LLM Settings (OpenAI Compatible) ---
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
-LLM_MODEL = os.getenv("LLM_MODEL", "chutesai/Llama-4-Scout-17B-16E-Instruct") # Default LLM model
 # --- Retrieval Settings ---
-TOP_K = int(os.getenv("TOP_K", "5")) # Default number of documents to retrieve
 CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
 # --- Security Settings (Placeholders - Implement according to government standards) ---

 # --- LLM Settings (OpenAI Compatible) ---
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_API_BASE = os.getenv("LLM_API_BASE", "https://llm.chutes.ai/v1") # Default API base
+LLM_MODEL = os.getenv("LLM_MODEL", "microsoft/MAI-DS-R1-FP8") # Default LLM model
+LLM_MODEL_2 = os.getenv("LLM_MODEL_2", "Qwen/Qwen3-8B")
 # --- Retrieval Settings ---
+TOP_K = int(os.getenv("TOP_K", "10")) # Default number of documents to retrieve
 CHROMADB_COLLECTION_NAME = os.getenv("CHROMADB_COLLECTION_NAME", "my_rulings_collection") # Unique collection name
 # --- Security Settings (Placeholders - Implement according to government standards) ---

src/llm_integrator/llm.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from langchain_openai import ChatOpenAI # cite: query_pipeline.py
 from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
-from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL # cite: query_pipeline.py
 import logging
 from typing import List
 from langchain.schema import Document # To handle retrieved documents
@@ -38,8 +38,8 @@ class LLMIntegrator:
             self.llm_title = ChatOpenAI(
                 api_key=LLM_API_KEY,
                 base_url=LLM_API_BASE,
-                model=LLM_MODEL,
-                temperature=0.9  # More creative for titles
             )
             logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
         except Exception as e:
@@ -81,9 +81,6 @@ Examples:
 User: hi
 Assistant: Hey there! How can I help you today?
-User: Who are you?
-Assistant: I'm Insight AI, your friendly digital sidekick—always ready with a fact or a fun comment!
 User: What is the capital of France?
 Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
@@ -114,55 +111,53 @@ Documents:
         formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
         try:
-            # Invoke the LLM with the formatted prompt
             response = self.llm.invoke(formatted_prompt)
-            logger.debug("Successfully generated LLM response.")
-            return response.content # Get the string content of the AI message
         except Exception as e:
             logger.error(f"Failed to generate LLM response: {e}")
-            # Depending on requirements, implement retry or return a specific error message
-            return "An error occurred while generating the response." # Provide a user-friendly error
     def generate_chat_title(self, query: str) -> str:
         """
         Generates a concise title for a chat based on the query.
-        Args:
-            query: The user's query string.
-        Returns:
-            A short title string.
         """
         prompt = ChatPromptTemplate.from_messages([
-            ("system", """Generate a clear, specific, unique and concise 3-5 word title for the following user query.
-If the query is vague, generic, or a greeting (e.g., "hi", "hello", "help"), infer a likely intent or use a default like "General Inquiry" or "User Assistance".
-Never reply with "No clear topic provided". Do not use markdown, quotes, or punctuation.
-Examples:
-Query: Tax implications for foreign investments
-Title: Foreign Investment Taxes
-Query: GST rates for e-commerce
-Title: E-commerce GST Rates
-Query: How to file quarterly TDS returns
-Title: Quarterly TDS Filing
-Query: hi
-Title: General Inquiry
-Query: help
-Title: User Assistance
-Query: {query}""")
         ])
         try:
-            # Use the higher-temperature LLM for title generation
-            response = self.llm_title.invoke(prompt.format_messages(query=query))
             logger.debug("Successfully generated chat title.")
-            return response.content.strip('"').replace("Title:", "").strip()
         except Exception as e:
             logger.error(f"Failed to generate chat title: {e}")
-            # Provide a fallback title
             return "New Chat"

 from langchain_openai import ChatOpenAI # cite: query_pipeline.py
 from langchain_core.messages import HumanMessage, BaseMessage, AIMessage, SystemMessage # Often used with Chat models
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # For structured prompts
+from config.settings import LLM_API_KEY, LLM_API_BASE, LLM_MODEL, LLM_MODEL_2 # cite: query_pipeline.py
 import logging
 from typing import List
 from langchain.schema import Document # To handle retrieved documents
             self.llm_title = ChatOpenAI(
                 api_key=LLM_API_KEY,
                 base_url=LLM_API_BASE,
+                model=LLM_MODEL_2,
+                temperature=0.7  # More creative for titles
             )
             logger.info(f"Initialized LLM: {LLM_MODEL} at {LLM_API_BASE}")
         except Exception as e:
 User: hi
 Assistant: Hey there! How can I help you today?
 User: What is the capital of France?
 Assistant: 1. The capital of France is Paris <source path="docs/geography.txt"/>
         formatted_prompt = prompt.format_messages(context=context_text, chat_history=chat_history, input=query)
         try:
             response = self.llm.invoke(formatted_prompt)
+            content = response.content
+            # ---- NEW: ensure full think-tag wrapping ----
+            if '</think>' in content and '<think>' not in content:
+                content = '<think>' + content
+            # ------------------------------------------------
+            logger.debug(f"LLM response: {content}")
+            return content
         except Exception as e:
             logger.error(f"Failed to generate LLM response: {e}")
+            # raize error
+            raise e
     def generate_chat_title(self, query: str) -> str:
         """
         Generates a concise title for a chat based on the query.
+        Removes any <think>...</think> tags from the response.
         """
         prompt = ChatPromptTemplate.from_messages([
+            SystemMessage(
+                content="""
+You’re our **Title Maestro**—crafting short, snappy chat titles (3–5 words).
+Be specific, unique, and avoid punctuation.
+**When in doubt**
+- Vague query → infer intent (e.g., “General Inquiry” for “hi”)
+- Don’t say “No clear topic.”
+**Examples**
+- Query: “GST for online sellers” → Title: `E-commerce GST Rates`
+- Query: “hi”                 → Title: `User Assistance`
+Now: “{query}”
+"""
+            )
         ])
         try:
+            resp = self.llm_title.invoke(prompt.format_messages(query=query))
             logger.debug("Successfully generated chat title.")
+            # Remove <think>...</think> tags if present
+            import re
+            content = resp.content
+            content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
+            return content.strip('"').strip()
         except Exception as e:
             logger.error(f"Failed to generate chat title: {e}")
             return "New Chat"