Spaces:
Paused
Paused
| """ | |
| codingo_chatbot.py | |
| =================== | |
| This module encapsulates the logic for Codingo's website chatbot. It | |
| loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector | |
| database using Chroma and SentenceTransformers, and uses the shared | |
| Groq language model (imported from ``backend.services.interview_engine``) | |
| to generate answers constrained to the retrieved context. If a Groq API | |
| key is not configured, a lightweight dummy model will be used as a | |
| fallback. TinyLlama and other local models are no longer used in this | |
| module. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import threading | |
| from typing import List | |
| import numpy as np | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from sentence_transformers import SentenceTransformer | |
| import chromadb | |
| from chromadb.config import Settings | |
| # Import the shared Groq LLM instance from the interview engine. This ensures | |
| # that the chatbot uses the exact same language model as the interview API. | |
| from backend.services.interview_engine import groq_llm | |
| # The llama_cpp dependency is no longer used for the chatbot. We keep the | |
| # import guarded to avoid breaking environments where llama_cpp is not | |
| # installed, but it is no longer required for generating responses. | |
| try: | |
| from llama_cpp import Llama # type: ignore # noqa: F401 | |
| except Exception: | |
| # We don't raise here because the Groq LLM will be used instead. If | |
| # llama_cpp is unavailable, it won't affect chatbot functionality. | |
| Llama = None # type: ignore | |
| # Configuration | |
| PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) | |
| CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt") | |
| CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma") | |
| # Generation parameters for the Groq LLM. These values can be adjusted via | |
| # environment variables if desired. They loosely mirror the previous TinyLlama | |
| # settings but are applied when constructing prompts for the Groq LLM. Note | |
| # that Groq models internally determine sampling behaviour; these variables | |
| # mainly govern how much content we include in the prompt and do not directly | |
| # control the sampling temperature of the Groq API. | |
| MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility | |
| TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained | |
| TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained | |
| REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused | |
| # Thread lock and globals | |
| _init_lock = threading.Lock() | |
| _embedder: SentenceTransformer | None = None | |
| _collection: chromadb.Collection | None = None | |
| _llm = None # This will be set to the shared Groq LLM instance | |
| def _load_chatbot_text() -> str: | |
| """Read the chatbot knowledge base from disk.""" | |
| try: | |
| with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: | |
| content = f.read() | |
| # Clean up the content to avoid meta-descriptions | |
| # Remove any lines that look like instructions about the chatbot | |
| lines = content.split('\n') | |
| cleaned_lines = [] | |
| for line in lines: | |
| # Skip lines that describe what the chatbot does | |
| if any(phrase in line.lower() for phrase in [ | |
| 'the chatbot', 'this bot', 'the bot provides', | |
| 'chatbot provides', 'chatbot is used for', | |
| 'official chatbot of' | |
| ]): | |
| continue | |
| cleaned_lines.append(line) | |
| return '\n'.join(cleaned_lines) | |
| except FileNotFoundError: | |
| return ( | |
| "Codingo is an AI‑powered recruitment platform designed to " | |
| "streamline job applications, candidate screening and hiring. " | |
| "We make hiring smarter, faster and fairer through automation " | |
| "and intelligent recommendations." | |
| ) | |
| def init_embedder_and_db() -> None: | |
| """Initialize the SentenceTransformer embedder and Chroma vector DB.""" | |
| global _embedder, _collection | |
| if _embedder is not None and _collection is not None: | |
| return | |
| with _init_lock: | |
| if _embedder is not None and _collection is not None: | |
| return | |
| os.makedirs(CHROMA_DB_DIR, exist_ok=True) | |
| text = _load_chatbot_text() | |
| # Split into chunks | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=500, # Increased for better context | |
| chunk_overlap=100, | |
| separators=["\n\n", "\n", ". ", " "], | |
| ) | |
| docs: List[str] = [doc.strip() for doc in splitter.split_text(text) if doc.strip()] | |
| # Initialize embedder | |
| embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32) | |
| # Initialize Chroma | |
| client = chromadb.Client( | |
| Settings( | |
| persist_directory=CHROMA_DB_DIR, | |
| anonymized_telemetry=False, | |
| is_persistent=True, | |
| ) | |
| ) | |
| # Create or get collection | |
| collection = client.get_or_create_collection("codingo_chatbot") | |
| # Populate if empty | |
| need_populate = False | |
| try: | |
| existing = collection.get(limit=1) | |
| if not existing or not existing.get("documents"): | |
| need_populate = True | |
| except Exception: | |
| need_populate = True | |
| if need_populate: | |
| ids = [f"doc_{i}" for i in range(len(docs))] | |
| collection.add(documents=docs, embeddings=embeddings.tolist(), ids=ids) | |
| _embedder = embedder | |
| _collection = collection | |
| def init_llm() -> None: | |
| """ | |
| Initialize the chatbot's language model. This function now assigns | |
| the globally shared Groq LLM instance imported from the interview | |
| engine. If the Groq API key is unavailable, the fallback dummy | |
| model defined in the interview engine will be used automatically. | |
| """ | |
| global _llm | |
| if _llm is not None: | |
| return | |
| with _init_lock: | |
| if _llm is not None: | |
| return | |
| # Assign the shared Groq LLM instance. This may be a DummyGroq when | |
| # no API key is provided. We avoid loading any local GGUF models. | |
| _llm = groq_llm | |
| def _build_prompt(query: str, context: str) -> str: | |
| """ | |
| Construct a prompt for the Groq LLM. The prompt instructs the model to | |
| behave as LUNA, Codingo's friendly assistant. It emphasises using only | |
| information from the provided context to answer the question and | |
| encourages the model to admit when the answer is unknown. This plain | |
| format works well with ChatGroq's ``invoke`` API. | |
| Args: | |
| query: The user's question. | |
| context: Concatenated snippets from the knowledge base deemed | |
| relevant to the query. | |
| Returns: | |
| A formatted string prompt ready for submission to the Groq LLM. | |
| """ | |
| system_prompt = """ | |
| You are LUNA, the official AI assistant of Codingo, an AI-powered recruitment platform. | |
| You must: | |
| - Answer questions using ONLY the provided context. | |
| - Be concise, clear, and professional. | |
| - If the context does not have the answer, politely say you do not know. | |
| - Never make up features or information not in the context. | |
| - Always focus on Codingo’s platform, services, and functionality. | |
| """ | |
| if context: | |
| return ( | |
| f"{system_prompt}\n\n" | |
| f"Context:\n{context}\n\n" | |
| f"Question: {query}\n" | |
| f"Answer:" | |
| ) | |
| else: | |
| # When no context is available, still pass an empty context so the | |
| # model knows there is no supporting information. | |
| return ( | |
| f"{system_prompt}\n\n" | |
| "Context:\n\n" | |
| f"Question: {query}\n" | |
| f"Answer:" | |
| ) | |
| def get_response(query: str, k: int = 3, score_threshold: float = 2.0) -> str: | |
| """ | |
| Generate a response to the user's query using the shared Groq LLM and the | |
| chatbot's knowledge base. The function retrieves relevant context | |
| passages from the vector store, constructs a prompt instructing the | |
| model to answer as LUNA using only that context, and returns the | |
| resulting answer. If no context is available, a polite fallback | |
| message is returned without calling the LLM. | |
| Args: | |
| query: The user's question or statement. | |
| k: Number of nearest neighbour documents to retrieve from the | |
| knowledge base (default 3). | |
| score_threshold: Maximum distance for a document to be considered | |
| relevant (smaller means more similar). | |
| Returns: | |
| A string response appropriate for the chatbot UI. | |
| """ | |
| # Handle empty queries gracefully | |
| if not query or not query.strip(): | |
| return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?" | |
| # Initialise embedder, vector DB and LLM if necessary | |
| init_embedder_and_db() | |
| init_llm() | |
| # If embedder or collection or LLM didn't initialise, provide a safe fallback | |
| if _embedder is None or _collection is None or _llm is None: | |
| return "I'm sorry, I'm unable to process your request right now. Please try again later." | |
| # Normalise for simple greetings | |
| greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening'] | |
| if query.lower().strip() in greetings: | |
| return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?" | |
| # Embed query and search for relevant documents | |
| query_vector = _embedder.encode([query])[0] | |
| results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k) | |
| docs = results.get("documents", [[]])[0] if results else [] | |
| distances = results.get("distances", [[]])[0] if results else [] | |
| # Filter by distance threshold | |
| relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold] | |
| # If no relevant context is found, politely admit ignorance | |
| if not relevant: | |
| try: | |
| with open(CHATBOT_TXT_PATH, encoding="utf-8") as f: | |
| full_context = f.read() | |
| context = full_context | |
| except FileNotFoundError: | |
| return ( | |
| "I'm sorry, I don't know the answer to that question based on my knowledge. " | |
| "Could you ask something else about Codingo or its services?" | |
| ) | |
| else: | |
| context = "\n\n".join(relevant[:2]) | |
| # Concatenate the most relevant passages for context (use top 2) | |
| context = "\n\n".join(relevant[:2]) | |
| prompt = _build_prompt(query, context) | |
| try: | |
| # Invoke the Groq LLM. The ``invoke`` method may return an object | |
| # with a ``content`` attribute or a plain string, depending on the | |
| # backend. We handle both cases transparently. | |
| response = _llm.invoke(prompt) | |
| except Exception: | |
| # If invocation fails, return a generic error message | |
| return "I'm sorry, I encountered an error while generating a response. Please try again later." | |
| # Extract text from the LLM response | |
| if hasattr(response, 'content'): | |
| text = str(response.content).strip() | |
| elif isinstance(response, dict): | |
| # Some wrappers may return dicts (e.g. ChatCompletion). Try common keys. | |
| text = response.get('message', '') or response.get('text', '') or str(response) | |
| text = text.strip() | |
| else: | |
| text = str(response).strip() | |
| # Post-process the answer: remove unwanted phrases referring to the bot | |
| lines = text.split('\n') | |
| cleaned_lines = [] | |
| for line in lines: | |
| lower_line = line.lower() | |
| if any(phrase in lower_line for phrase in [ | |
| 'the chatbot', 'this bot', 'the bot provides', | |
| 'in response to', 'overall,', | |
| 'as an ai language model' | |
| ]): | |
| continue | |
| cleaned_lines.append(line) | |
| cleaned_text = '\n'.join(cleaned_lines).strip() | |
| # Ensure we return some meaningful text | |
| return cleaned_text or ( | |
| "I'm sorry, I couldn't generate a proper response. Could you rephrase your question?" | |
| ) |