Spaces:
Paused
Paused
Commit
·
46ecbc8
1
Parent(s):
987f59c
updated
Browse files- backend/services/codingo_chatbot.py +141 -112
backend/services/codingo_chatbot.py
CHANGED
|
@@ -4,9 +4,12 @@ codingo_chatbot.py
|
|
| 4 |
|
| 5 |
This module encapsulates the logic for Codingo's website chatbot. It
|
| 6 |
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
|
| 7 |
-
database using Chroma and SentenceTransformers, and uses
|
| 8 |
-
|
| 9 |
-
retrieved context.
|
|
|
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
from __future__ import annotations
|
|
@@ -21,37 +24,42 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
| 21 |
from sentence_transformers import SentenceTransformer
|
| 22 |
import chromadb
|
| 23 |
from chromadb.config import Settings
|
| 24 |
-
from huggingface_hub import hf_hub_download
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
try:
|
| 27 |
-
from llama_cpp import Llama # type: ignore
|
| 28 |
-
except Exception
|
| 29 |
-
raise
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
) from exc
|
| 33 |
|
| 34 |
# Configuration
|
| 35 |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
| 36 |
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
|
| 37 |
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")
|
| 38 |
|
| 39 |
-
#
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
#
|
| 45 |
-
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512"))
|
| 46 |
-
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3"))
|
| 47 |
-
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9"))
|
| 48 |
-
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1"))
|
| 49 |
|
| 50 |
# Thread lock and globals
|
| 51 |
_init_lock = threading.Lock()
|
| 52 |
_embedder: SentenceTransformer | None = None
|
| 53 |
_collection: chromadb.Collection | None = None
|
| 54 |
-
_llm
|
| 55 |
|
| 56 |
|
| 57 |
def _load_chatbot_text() -> str:
|
|
@@ -136,135 +144,156 @@ def init_embedder_and_db() -> None:
|
|
| 136 |
|
| 137 |
|
| 138 |
def init_llm() -> None:
|
| 139 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
global _llm
|
| 141 |
if _llm is not None:
|
| 142 |
return
|
| 143 |
with _init_lock:
|
| 144 |
if _llm is not None:
|
| 145 |
return
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if not os.path.exists(local_path):
|
| 151 |
-
local_path = hf_hub_download(
|
| 152 |
-
repo_id=LLAMA_REPO,
|
| 153 |
-
filename=LLAMA_FILE,
|
| 154 |
-
local_dir=LLAMA_LOCAL_DIR,
|
| 155 |
-
local_dir_use_symlinks=False,
|
| 156 |
-
)
|
| 157 |
-
|
| 158 |
-
# GPU configuration
|
| 159 |
-
try:
|
| 160 |
-
import torch
|
| 161 |
-
use_cuda = torch.cuda.is_available()
|
| 162 |
-
except Exception:
|
| 163 |
-
use_cuda = False
|
| 164 |
-
|
| 165 |
-
n_gpu_layers = int(os.getenv("LLAMA_N_GPU_LAYERS", "35" if use_cuda else "0"))
|
| 166 |
-
n_ctx = int(os.getenv("LLAMA_N_CTX", "2048"))
|
| 167 |
-
n_threads = max(1, os.cpu_count() // 2) if os.cpu_count() else 4
|
| 168 |
-
|
| 169 |
-
_llm = Llama(
|
| 170 |
-
model_path=local_path,
|
| 171 |
-
n_ctx=n_ctx,
|
| 172 |
-
n_threads=n_threads,
|
| 173 |
-
n_gpu_layers=n_gpu_layers,
|
| 174 |
-
verbose=False, # Reduce logging
|
| 175 |
-
)
|
| 176 |
|
| 177 |
|
| 178 |
def _build_prompt(query: str, context: str) -> str:
|
| 179 |
-
"""
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
system_prompt = (
|
| 182 |
-
"You are LUNA,
|
| 183 |
-
"
|
| 184 |
-
"
|
|
|
|
|
|
|
| 185 |
)
|
| 186 |
-
|
| 187 |
-
# Build the prompt with context integrated naturally
|
| 188 |
if context:
|
| 189 |
-
|
| 190 |
-
f"
|
| 191 |
-
f"
|
| 192 |
-
f"Question: {query}
|
| 193 |
-
f"
|
| 194 |
)
|
| 195 |
else:
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
f"
|
|
|
|
|
|
|
|
|
|
| 200 |
)
|
| 201 |
-
|
| 202 |
-
return prompt
|
| 203 |
|
| 204 |
|
| 205 |
def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
|
| 206 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
if not query or not query.strip():
|
| 208 |
return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
|
| 209 |
-
|
|
|
|
| 210 |
init_embedder_and_db()
|
| 211 |
init_llm()
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
| 216 |
greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
|
| 217 |
if query.lower().strip() in greetings:
|
| 218 |
return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
|
| 219 |
-
|
| 220 |
-
# Embed query and search
|
| 221 |
query_vector = _embedder.encode([query])[0]
|
| 222 |
results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
|
| 223 |
-
|
| 224 |
docs = results.get("documents", [[]])[0] if results else []
|
| 225 |
distances = results.get("distances", [[]])[0] if results else []
|
| 226 |
-
|
| 227 |
-
# Filter by
|
| 228 |
relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
|
| 229 |
-
|
|
|
|
| 230 |
if not relevant:
|
| 231 |
-
# Provide a helpful response even without specific context
|
| 232 |
return (
|
| 233 |
-
"I don't
|
| 234 |
-
"
|
| 235 |
-
"that helps with job applications, candidate screening, and hiring. "
|
| 236 |
-
"Would you like to know more about our features?"
|
| 237 |
)
|
| 238 |
-
|
| 239 |
-
#
|
| 240 |
-
context = "
|
| 241 |
prompt = _build_prompt(query, context)
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
lines = text.split('\n')
|
| 259 |
cleaned_lines = []
|
| 260 |
for line in lines:
|
| 261 |
-
|
|
|
|
| 262 |
'the chatbot', 'this bot', 'the bot provides',
|
| 263 |
-
'in response to', 'overall,'
|
|
|
|
| 264 |
]):
|
| 265 |
continue
|
| 266 |
cleaned_lines.append(line)
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
return
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
This module encapsulates the logic for Codingo's website chatbot. It
|
| 6 |
loads a knowledge base from ``chatbot/chatbot.txt``, builds a vector
|
| 7 |
+
database using Chroma and SentenceTransformers, and uses the shared
|
| 8 |
+
Groq language model (imported from ``backend.services.interview_engine``)
|
| 9 |
+
to generate answers constrained to the retrieved context. If a Groq API
|
| 10 |
+
key is not configured, a lightweight dummy model will be used as a
|
| 11 |
+
fallback. TinyLlama and other local models are no longer used in this
|
| 12 |
+
module.
|
| 13 |
"""
|
| 14 |
|
| 15 |
from __future__ import annotations
|
|
|
|
| 24 |
from sentence_transformers import SentenceTransformer
|
| 25 |
import chromadb
|
| 26 |
from chromadb.config import Settings
|
|
|
|
| 27 |
|
| 28 |
+
# Import the shared Groq LLM instance from the interview engine. This ensures
|
| 29 |
+
# that the chatbot uses the exact same language model as the interview API.
|
| 30 |
+
from backend.services.interview_engine import groq_llm
|
| 31 |
+
|
| 32 |
+
# The llama_cpp dependency is no longer used for the chatbot. We keep the
|
| 33 |
+
# import guarded to avoid breaking environments where llama_cpp is not
|
| 34 |
+
# installed, but it is no longer required for generating responses.
|
| 35 |
try:
|
| 36 |
+
from llama_cpp import Llama # type: ignore # noqa: F401
|
| 37 |
+
except Exception:
|
| 38 |
+
# We don't raise here because the Groq LLM will be used instead. If
|
| 39 |
+
# llama_cpp is unavailable, it won't affect chatbot functionality.
|
| 40 |
+
Llama = None # type: ignore
|
|
|
|
| 41 |
|
| 42 |
# Configuration
|
| 43 |
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
| 44 |
CHATBOT_TXT_PATH = os.path.join(PROJECT_ROOT, "chatbot", "chatbot.txt")
|
| 45 |
CHROMA_DB_DIR = os.path.join("/tmp", "chatbot_chroma")
|
| 46 |
|
| 47 |
+
# Generation parameters for the Groq LLM. These values can be adjusted via
|
| 48 |
+
# environment variables if desired. They loosely mirror the previous TinyLlama
|
| 49 |
+
# settings but are applied when constructing prompts for the Groq LLM. Note
|
| 50 |
+
# that Groq models internally determine sampling behaviour; these variables
|
| 51 |
+
# mainly govern how much content we include in the prompt and do not directly
|
| 52 |
+
# control the sampling temperature of the Groq API.
|
| 53 |
+
MAX_TOKENS = int(os.getenv("LLAMA_MAX_TOKENS", "512")) # kept for compatibility
|
| 54 |
+
TEMPERATURE = float(os.getenv("LLAMA_TEMPERATURE", "0.3")) # unused but retained
|
| 55 |
+
TOP_P = float(os.getenv("LLAMA_TOP_P", "0.9")) # unused but retained
|
| 56 |
+
REPEAT_PENALTY = float(os.getenv("LLAMA_REPEAT_PENALTY", "1.1")) # unused
|
| 57 |
|
| 58 |
# Thread lock and globals
|
| 59 |
_init_lock = threading.Lock()
|
| 60 |
_embedder: SentenceTransformer | None = None
|
| 61 |
_collection: chromadb.Collection | None = None
|
| 62 |
+
_llm = None # This will be set to the shared Groq LLM instance
|
| 63 |
|
| 64 |
|
| 65 |
def _load_chatbot_text() -> str:
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
def init_llm() -> None:
|
| 147 |
+
"""
|
| 148 |
+
Initialize the chatbot's language model. This function now assigns
|
| 149 |
+
the globally shared Groq LLM instance imported from the interview
|
| 150 |
+
engine. If the Groq API key is unavailable, the fallback dummy
|
| 151 |
+
model defined in the interview engine will be used automatically.
|
| 152 |
+
"""
|
| 153 |
global _llm
|
| 154 |
if _llm is not None:
|
| 155 |
return
|
| 156 |
with _init_lock:
|
| 157 |
if _llm is not None:
|
| 158 |
return
|
| 159 |
+
# Assign the shared Groq LLM instance. This may be a DummyGroq when
|
| 160 |
+
# no API key is provided. We avoid loading any local GGUF models.
|
| 161 |
+
_llm = groq_llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
|
| 164 |
def _build_prompt(query: str, context: str) -> str:
|
| 165 |
+
"""
|
| 166 |
+
Construct a prompt for the Groq LLM. The prompt instructs the model to
|
| 167 |
+
behave as LUNA, Codingo's friendly assistant. It emphasises using only
|
| 168 |
+
information from the provided context to answer the question and
|
| 169 |
+
encourages the model to admit when the answer is unknown. This plain
|
| 170 |
+
format works well with ChatGroq's ``invoke`` API.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
query: The user's question.
|
| 174 |
+
context: Concatenated snippets from the knowledge base deemed
|
| 175 |
+
relevant to the query.
|
| 176 |
+
|
| 177 |
+
Returns:
|
| 178 |
+
A formatted string prompt ready for submission to the Groq LLM.
|
| 179 |
+
"""
|
| 180 |
system_prompt = (
|
| 181 |
+
"You are LUNA, the friendly AI assistant for the Codingo recruitment "
|
| 182 |
+
"platform. You only answer questions using the information provided "
|
| 183 |
+
"in the context below. If the context does not contain the answer, "
|
| 184 |
+
"respond politely that you don't know. Keep your answers concise and "
|
| 185 |
+
"helpful."
|
| 186 |
)
|
| 187 |
+
|
|
|
|
| 188 |
if context:
|
| 189 |
+
return (
|
| 190 |
+
f"{system_prompt}\n\n"
|
| 191 |
+
f"Context:\n{context}\n\n"
|
| 192 |
+
f"Question: {query}\n"
|
| 193 |
+
f"Answer:"
|
| 194 |
)
|
| 195 |
else:
|
| 196 |
+
# When no context is available, still pass an empty context so the
|
| 197 |
+
# model knows there is no supporting information.
|
| 198 |
+
return (
|
| 199 |
+
f"{system_prompt}\n\n"
|
| 200 |
+
"Context:\n\n"
|
| 201 |
+
f"Question: {query}\n"
|
| 202 |
+
f"Answer:"
|
| 203 |
)
|
|
|
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
def get_response(query: str, k: int = 3, score_threshold: float = 1.5) -> str:
|
| 207 |
+
"""
|
| 208 |
+
Generate a response to the user's query using the shared Groq LLM and the
|
| 209 |
+
chatbot's knowledge base. The function retrieves relevant context
|
| 210 |
+
passages from the vector store, constructs a prompt instructing the
|
| 211 |
+
model to answer as LUNA using only that context, and returns the
|
| 212 |
+
resulting answer. If no context is available, a polite fallback
|
| 213 |
+
message is returned without calling the LLM.
|
| 214 |
+
|
| 215 |
+
Args:
|
| 216 |
+
query: The user's question or statement.
|
| 217 |
+
k: Number of nearest neighbour documents to retrieve from the
|
| 218 |
+
knowledge base (default 3).
|
| 219 |
+
score_threshold: Maximum distance for a document to be considered
|
| 220 |
+
relevant (smaller means more similar).
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
A string response appropriate for the chatbot UI.
|
| 224 |
+
"""
|
| 225 |
+
# Handle empty queries gracefully
|
| 226 |
if not query or not query.strip():
|
| 227 |
return "Hi! I'm LUNA, your Codingo assistant. How can I help you today?"
|
| 228 |
+
|
| 229 |
+
# Initialise embedder, vector DB and LLM if necessary
|
| 230 |
init_embedder_and_db()
|
| 231 |
init_llm()
|
| 232 |
+
|
| 233 |
+
# If embedder or collection or LLM didn't initialise, provide a safe fallback
|
| 234 |
+
if _embedder is None or _collection is None or _llm is None:
|
| 235 |
+
return "I'm sorry, I'm unable to process your request right now. Please try again later."
|
| 236 |
+
|
| 237 |
+
# Normalise for simple greetings
|
| 238 |
greetings = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening']
|
| 239 |
if query.lower().strip() in greetings:
|
| 240 |
return "Hello! I'm LUNA, your AI assistant for Codingo. How can I help you with our recruitment platform today?"
|
| 241 |
+
|
| 242 |
+
# Embed query and search for relevant documents
|
| 243 |
query_vector = _embedder.encode([query])[0]
|
| 244 |
results = _collection.query(query_embeddings=[query_vector.tolist()], n_results=k)
|
| 245 |
+
|
| 246 |
docs = results.get("documents", [[]])[0] if results else []
|
| 247 |
distances = results.get("distances", [[]])[0] if results else []
|
| 248 |
+
|
| 249 |
+
# Filter by distance threshold
|
| 250 |
relevant: List[str] = [d for d, s in zip(docs, distances) if s < score_threshold]
|
| 251 |
+
|
| 252 |
+
# If no relevant context is found, politely admit ignorance
|
| 253 |
if not relevant:
|
|
|
|
| 254 |
return (
|
| 255 |
+
"I'm sorry, I don't know the answer to that question based on my knowledge. "
|
| 256 |
+
"Could you ask something else about Codingo or its services?"
|
|
|
|
|
|
|
| 257 |
)
|
| 258 |
+
|
| 259 |
+
# Concatenate the most relevant passages for context (use top 2)
|
| 260 |
+
context = "\n\n".join(relevant[:2])
|
| 261 |
prompt = _build_prompt(query, context)
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
# Invoke the Groq LLM. The ``invoke`` method may return an object
|
| 265 |
+
# with a ``content`` attribute or a plain string, depending on the
|
| 266 |
+
# backend. We handle both cases transparently.
|
| 267 |
+
response = _llm.invoke(prompt)
|
| 268 |
+
except Exception:
|
| 269 |
+
# If invocation fails, return a generic error message
|
| 270 |
+
return "I'm sorry, I encountered an error while generating a response. Please try again later."
|
| 271 |
+
|
| 272 |
+
# Extract text from the LLM response
|
| 273 |
+
if hasattr(response, 'content'):
|
| 274 |
+
text = str(response.content).strip()
|
| 275 |
+
elif isinstance(response, dict):
|
| 276 |
+
# Some wrappers may return dicts (e.g. ChatCompletion). Try common keys.
|
| 277 |
+
text = response.get('message', '') or response.get('text', '') or str(response)
|
| 278 |
+
text = text.strip()
|
| 279 |
+
else:
|
| 280 |
+
text = str(response).strip()
|
| 281 |
+
|
| 282 |
+
# Post-process the answer: remove unwanted phrases referring to the bot
|
| 283 |
lines = text.split('\n')
|
| 284 |
cleaned_lines = []
|
| 285 |
for line in lines:
|
| 286 |
+
lower_line = line.lower()
|
| 287 |
+
if any(phrase in lower_line for phrase in [
|
| 288 |
'the chatbot', 'this bot', 'the bot provides',
|
| 289 |
+
'in response to', 'overall,',
|
| 290 |
+
'as an ai language model'
|
| 291 |
]):
|
| 292 |
continue
|
| 293 |
cleaned_lines.append(line)
|
| 294 |
+
cleaned_text = '\n'.join(cleaned_lines).strip()
|
| 295 |
+
|
| 296 |
+
# Ensure we return some meaningful text
|
| 297 |
+
return cleaned_text or (
|
| 298 |
+
"I'm sorry, I couldn't generate a proper response. Could you rephrase your question?"
|
| 299 |
+
)
|