Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on Aug 1

Commit

46ecbc8

1 Parent(s): 987f59c

updated

Browse files

Files changed (1) hide show

backend/services/codingo_chatbot.py +141 -112

backend/services/codingo_chatbot.py CHANGED Viewed

@@ -4,9 +4,12 @@ codingo_chatbot.py
 This module encapsulates the logic for Codingo's website chatbot.  It
 loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
-database using Chroma and SentenceTransformers, and uses a local LLM
-powered by ``llama‑cpp‑python`` to generate answers constrained to the
-retrieved context.
 """
 from __future__ import annotations
@@ -21,37 +24,42 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from sentence_transformers import SentenceTransformer
 import chromadb
 from chromadb.config import Settings
-from huggingface_hub import hf_hub_download
 try:
-    from llama_cpp import Llama  # type: ignore
-except Exception as exc:  # pragma: no cover - import may fail until dependency installed
-    raise ImportError(
-        "llama_cpp is required for the chatbot. Please add 'llama-cpp-python' "
-        "to your requirements.txt"
-    ) from exc
 # Configuration
 PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
 CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")
-# TinyLlama model settings
-LLAMA_REPO = os.getenv("LLAMA_REPO", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF")
-LLAMA_FILE = os.getenv("LLAMA_FILE", "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf")
-LLAMA_LOCAL_DIR = os.path.join("/tmp", "llama_models")
-# Generation parameters - adjusted for better responses
-MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))
-TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))
-TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))
-REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))
 # Thread lock and globals
 _init_lock = threading.Lock()
 _embedder: SentenceTransformer | None = None
 _collection: chromadb.Collection | None = None
-_llm: Llama | None = None
 def _load_chatbot_text() -> str:
@@ -136,135 +144,156 @@ def init_embedder_and_db() -> None:
 def init_llm() -> None:
-    """Initialize the llama‑cpp model for response generation."""
     global _llm
     if _llm is not None:
         return
     with _init_lock:
         if _llm is not None:
             return
-        os.makedirs(LLAMA_LOCAL_DIR, exist_ok=True)
-        local_path = os.path.join(LLAMA_LOCAL_DIR, LLAMA_FILE)
-        if not os.path.exists(local_path):
-            local_path = hf_hub_download(
-                repo_id=LLAMA_REPO,
-                filename=LLAMA_FILE,
-                local_dir=LLAMA_LOCAL_DIR,
-                local_dir_use_symlinks=False,
-            )
-        # GPU configuration
-        try:
-            import torch
-            use_cuda = torch.cuda.is_available()
-        except Exception:
-            use_cuda = False
-        n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0"))
-        n_ctx = int(os.getenv("LLAMA_N_CTX", "2048"))
-        n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4
-        _llm = Llama(
-            model_path=local_path,
-            n_ctx=n_ctx,
-            n_threads=n_threads,
-            n_gpu_layers=n_gpu_layers,
-            verbose=False,  # Reduce logging
-        )
 def _build_prompt(query: str, context: str) -> str:
-    """Construct a natural prompt for the TinyLlama chat model."""
-    # Use a more direct, conversational system prompt
     system_prompt = (
-        "You are LUNA, a friendly AI assistant for the Codingo recruitment platform. "
-        "Answer questions naturally and conversationally. Use the provided information "
-        "to give helpful, direct answers. Keep responses concise and relevant."
     )
-    # Build the prompt with context integrated naturally
     if context:
-        prompt = (
-            f"<|system|>\n{system_prompt}</s>\n"
-            f"<|user|>\nContext: {context}\n\n"
-            f"Question: {query}</s>\n"
-            f"<|assistant|>\n"
         )
     else:
-        prompt = (
-            f"<|system|>\n{system_prompt}</s>\n"
-            f"<|user|>\n{query}</s>\n"
-            f"<|assistant|>\n"
         )
-    return prompt
 def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
-    """Return a chatbot response for the given query."""
     if not query or not query.strip():
         return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
     init_embedder_and_db()
     init_llm()
-    assert _embedder is not None and _collection is not None and _llm is not None
-    # Handle greetings directly
     greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
     if query.lower().strip() in greetings:
         return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
-    # Embed query and search
     query_vector = _embedder.encode([query])[0]
     results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
     docs = results.get("documents", [[]])[0] if results else []
     distances = results.get("distances", [[]])[0] if results else []
-    # Filter by score (lower threshold for better matching)
     relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
     if not relevant:
-        # Provide a helpful response even without specific context
         return (
-            "I don't have specific information about that in my knowledge base. "
-            "However, I can tell you that Codingo is an AI-powered recruitment platform "
-            "that helps with job applications, candidate screening, and hiring. "
-            "Would you like to know more about our features?"
         )
-    # Join context with better formatting
-    context = " ".join(relevant[:2])  # Use top 2 most relevant chunks
     prompt = _build_prompt(query, context)
-    # Generate response with better parameters
-    output = _llm(
-        prompt,
-        max_tokens=MAX_TOKENS,
-        temperature=TEMPERATURE,
-        top_p=TOP_P,
-        repeat_penalty=REPEAT_PENALTY,
-        stop=["</s>", "<|user|>", "<|system|>"],
-        echo=False,
-    )
-    # Extract and clean the response
-    text = output["choices"][0]["text"].strip()
-    # Remove any meta-descriptions that might have leaked through
     lines = text.split('\n')
     cleaned_lines = []
     for line in lines:
-        if any(phrase in line.lower() for phrase in [
             'the chatbot', 'this bot', 'the bot provides',
-            'in response to', 'overall,'
         ]):
             continue
         cleaned_lines.append(line)
-    text = '\n'.join(cleaned_lines).strip()
-    return text or "I'm here to help you with Codingo. Could you please rephrase your question?"

 This module encapsulates the logic for Codingo's website chatbot.  It
 loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
+database using Chroma and SentenceTransformers, and uses the shared
+Groq language model (imported from ``backend.services.interview_engine``)
+to generate answers constrained to the retrieved context.  If a Groq API
+key is not configured, a lightweight dummy model will be used as a
+fallback.  TinyLlama and other local models are no longer used in this
+module.
 """
 from __future__ import annotations
 from sentence_transformers import SentenceTransformer
 import chromadb
 from chromadb.config import Settings
+# Import the shared Groq LLM instance from the interview engine.  This ensures
+# that the chatbot uses the exact same language model as the interview API.
+from backend.services.interview_engine import groq_llm
+# The llama_cpp dependency is no longer used for the chatbot.  We keep the
+# import guarded to avoid breaking environments where llama_cpp is not
+# installed, but it is no longer required for generating responses.
 try:
+    from llama_cpp import Llama  # type: ignore  # noqa: F401
+except Exception:
+    # We don't raise here because the Groq LLM will be used instead.  If
+    # llama_cpp is unavailable, it won't affect chatbot functionality.
+    Llama = None  # type: ignore
 # Configuration
 PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
 CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")
+# Generation parameters for the Groq LLM.  These values can be adjusted via
+# environment variables if desired.  They loosely mirror the previous TinyLlama
+# settings but are applied when constructing prompts for the Groq LLM.  Note
+# that Groq models internally determine sampling behaviour; these variables
+# mainly govern how much content we include in the prompt and do not directly
+# control the sampling temperature of the Groq API.
+MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))  # kept for compatibility
+TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))  # unused but retained
+TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))  # unused but retained
+REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))  # unused
 # Thread lock and globals
 _init_lock = threading.Lock()
 _embedder: SentenceTransformer | None = None
 _collection: chromadb.Collection | None = None
+_llm = None  # This will be set to the shared Groq LLM instance
 def _load_chatbot_text() -> str:
 def init_llm() -> None:
+    """
+    Initialize the chatbot's language model.  This function now assigns
+    the globally shared Groq LLM instance imported from the interview
+    engine.  If the Groq API key is unavailable, the fallback dummy
+    model defined in the interview engine will be used automatically.
+    """
     global _llm
     if _llm is not None:
         return
     with _init_lock:
         if _llm is not None:
             return
+        # Assign the shared Groq LLM instance.  This may be a DummyGroq when
+        # no API key is provided.  We avoid loading any local GGUF models.
+        _llm = groq_llm
 def _build_prompt(query: str, context: str) -> str:
+    """
+    Construct a prompt for the Groq LLM.  The prompt instructs the model to
+    behave as LUNA, Codingo's friendly assistant.  It emphasises using only
+    information from the provided context to answer the question and
+    encourages the model to admit when the answer is unknown.  This plain
+    format works well with ChatGroq's ``invoke`` API.
+    Args:
+        query: The user's question.
+        context: Concatenated snippets from the knowledge base deemed
+            relevant to the query.
+    Returns:
+        A formatted string prompt ready for submission to the Groq LLM.
+    """
     system_prompt = (
+        "You are LUNA, the friendly AI assistant for the Codingo recruitment "
+        "platform. You only answer questions using the information provided "
+        "in the context below. If the context does not contain the answer, "
+        "respond politely that you don't know. Keep your answers concise and "
+        "helpful."
     )
     if context:
+        return (
+            f"{system_prompt}\n\n"
+            f"Context:\n{context}\n\n"
+            f"Question: {query}\n"
+            f"Answer:"
         )
     else:
+        # When no context is available, still pass an empty context so the
+        # model knows there is no supporting information.
+        return (
+            f"{system_prompt}\n\n"
+            "Context:\n\n"
+            f"Question: {query}\n"
+            f"Answer:"
         )
 def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
+    """
+    Generate a response to the user's query using the shared Groq LLM and the
+    chatbot's knowledge base.  The function retrieves relevant context
+    passages from the vector store, constructs a prompt instructing the
+    model to answer as LUNA using only that context, and returns the
+    resulting answer.  If no context is available, a polite fallback
+    message is returned without calling the LLM.
+    Args:
+        query: The user's question or statement.
+        k: Number of nearest neighbour documents to retrieve from the
+            knowledge base (default 3).
+        score_threshold: Maximum distance for a document to be considered
+            relevant (smaller means more similar).
+    Returns:
+        A string response appropriate for the chatbot UI.
+    """
+    # Handle empty queries gracefully
     if not query or not query.strip():
         return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
+    # Initialise embedder, vector DB and LLM if necessary
     init_embedder_and_db()
     init_llm()
+    # If embedder or collection or LLM didn't initialise, provide a safe fallback
+    if _embedder is None or _collection is None or _llm is None:
+        return "I'm sorry, I'm unable to process your request right now. Please try again later."
+    # Normalise for simple greetings
     greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
     if query.lower().strip() in greetings:
         return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
+    # Embed query and search for relevant documents
     query_vector = _embedder.encode([query])[0]
     results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
     docs = results.get("documents", [[]])[0] if results else []
     distances = results.get("distances", [[]])[0] if results else []
+    # Filter by distance threshold
     relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
+    # If no relevant context is found, politely admit ignorance
     if not relevant:
         return (
+            "I'm sorry, I don't know the answer to that question based on my knowledge. "
+            "Could you ask something else about Codingo or its services?"
         )
+    # Concatenate the most relevant passages for context (use top 2)
+    context = "\n\n".join(relevant[:2])
     prompt = _build_prompt(query, context)
+    try:
+        # Invoke the Groq LLM.  The ``invoke`` method may return an object
+        # with a ``content`` attribute or a plain string, depending on the
+        # backend.  We handle both cases transparently.
+        response = _llm.invoke(prompt)
+    except Exception:
+        # If invocation fails, return a generic error message
+        return "I'm sorry, I encountered an error while generating a response. Please try again later."
+    # Extract text from the LLM response
+    if hasattr(response, 'content'):
+        text = str(response.content).strip()
+    elif isinstance(response, dict):
+        # Some wrappers may return dicts (e.g. ChatCompletion).  Try common keys.
+        text = response.get('message', '') or response.get('text', '') or str(response)
+        text = text.strip()
+    else:
+        text = str(response).strip()
+    # Post-process the answer: remove unwanted phrases referring to the bot
     lines = text.split('\n')
     cleaned_lines = []
     for line in lines:
+        lower_line = line.lower()
+        if any(phrase in lower_line for phrase in [
             'the chatbot', 'this bot', 'the bot provides',
+            'in response to', 'overall,',
+            'as an ai language model'
         ]):
             continue
         cleaned_lines.append(line)
+    cleaned_text = '\n'.join(cleaned_lines).strip()
+    # Ensure we return some meaningful text
+    return cleaned_text or (
+        "I'm sorry, I couldn't generate a proper response. Could you rephrase your question?"
+    )