Spaces:

Saran08
/

rag_llm_chatbot_backend

Sleeping

App Files Files Community

Saran08 commited on 9 days ago

Commit

6ad7021

1 Parent(s): aff3cb9

made several changes in all scripts

Browse files

Files changed (5) hide show

Dockerfile +0 -16
main.py +8 -21
rag_chatbot.py +3 -16
rank.py +1 -54
recommender.py +2 -12

Dockerfile CHANGED Viewed

@@ -1,36 +1,20 @@
-# Use an official Python runtime as a parent image
 FROM python:3.10-slim
-# Set the working directory in the container
 WORKDIR /app
-# --- THIS IS THE CRITICAL NEW SECTION ---
-# Install system dependencies required for building llama-cpp-python
-# - apt-get update: Refreshes the package list
-# - build-essential: Installs C/C++ compilers (gcc, g++) and make
-# - cmake: The build system generator used by llama-cpp-python
-# - --no-install-recommends: Reduces image size by not installing optional packages
-# - rm -rf /var/lib/apt/lists/*: Cleans up the apt cache to keep the image small
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     cmake \
     && rm -rf /var/lib/apt/lists/*
-# --- END OF NEW SECTION ---
-# Copy the requirements file into the container at /app
 COPY requirements.txt .
-# Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the local 'data' directory into the container's /app/data directory
 COPY ./data ./data
-# Copy the rest of the application source code
 COPY . .
-# Expose the port the app runs on
 EXPOSE 8000
-# Command to run the application
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
     cmake \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY ./data ./data
 COPY . .
 EXPOSE 8000
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# main.py
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List
 import uvicorn
-import traceback # <-- IMPORT THE TRACEBACK MODULE
 from deep_translator import GoogleTranslator
 from rag_chatbot import RAGChatBot
 from recommender import QuestionRecommender
@@ -63,39 +62,33 @@ async def startup_event():
     """
     global bot, recommender
     try:
-        # 1. Create the embedding model - THE SINGLE SOURCE OF TRUTH
         print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
         embeddings = HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL_NAME,
             model_kwargs={'device': 'cpu'}
         )
-        print("✅ Embedding model loaded.")
-        # 2. Initialize the RAG ChatBot (pass embeddings)
         print("Loading RAG ChatBot...")
-        bot = RAGChatBot(embeddings)   # ✅ FIXED
-        faiss_path = os.path.join("data", "faiss.index") # Corrected name to match your file
         questions_path = os.path.join("data", "questions.npy")
-        print(f"Attempting to load Recommender index from: '{faiss_path}'") # Add for debugging
-        # Initialize the Question Recommender with the corrected paths
         recommender = QuestionRecommender(
             faiss_index_path=faiss_path,
             questions_path=questions_path,
             embedding_model=embeddings
         )
-        print("🎉 All models loaded successfully! API is ready. 🎉")
     except Exception as e:
-        print("💥 Critical Error: Failed to load models during startup:")
         traceback.print_exc()
         raise e
-# --- Helper Functions ---
 def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
     if not text or source_lang == target_lang or (target_lang == "en" and source_lang == "auto"):
         return text
@@ -119,8 +112,6 @@ def get_bot_response(query_en: str) -> tuple[str, List[str]]:
     return answer_en, new_recommendations
-# --- API Endpoints ---
 @app.get("/")
 async def root():
     return {"message": "Sat2Farm AI Assistant API is running!"}
@@ -155,9 +146,7 @@ async def get_initial_recommendations():
 async def chat(request: ChatRequest):
     """Main endpoint to process a user's chat message."""
     if not bot or not recommender:
-        raise HTTPException(status_code=503, detail="Chat service is not available. Models are not loaded.")
-    # ========================== THE FIX IS HERE ==========================
     try:
         query_en = translate_text(request.message, target_lang="en", source_lang=request.user_language)
@@ -170,11 +159,9 @@ async def chat(request: ChatRequest):
             recommendations=new_recommendations_en
         )
     except Exception as e:
-        # This will now print the FULL error traceback to your logs
         print(f"Error during /chat processing. Full traceback below:")
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
-    # =====================================================================
 @app.post("/recommendations/action")
 async def handle_recommendation_action(request: RecommendationRequest):

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List
 import uvicorn
+import traceback
 from deep_translator import GoogleTranslator
 from rag_chatbot import RAGChatBot
 from recommender import QuestionRecommender
     """
     global bot, recommender
     try:
         print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}")
         embeddings = HuggingFaceEmbeddings(
             model_name=EMBEDDING_MODEL_NAME,
             model_kwargs={'device': 'cpu'}
         )
+        print("Embedding model loaded.")
         print("Loading RAG ChatBot...")
+        bot = RAGChatBot(embeddings)
+        faiss_path = os.path.join("data", "faiss.index")
         questions_path = os.path.join("data", "questions.npy")
+        print(f"Attempting to load Recommender index from: '{faiss_path}'")
         recommender = QuestionRecommender(
             faiss_index_path=faiss_path,
             questions_path=questions_path,
             embedding_model=embeddings
         )
+        print("All models loaded successfully! API is ready.")
     except Exception as e:
+        print("Critical Error: Failed to load models during startup:")
         traceback.print_exc()
         raise e
 def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
     if not text or source_lang == target_lang or (target_lang == "en" and source_lang == "auto"):
         return text
     return answer_en, new_recommendations
 @app.get("/")
 async def root():
     return {"message": "Sat2Farm AI Assistant API is running!"}
 async def chat(request: ChatRequest):
     """Main endpoint to process a user's chat message."""
     if not bot or not recommender:
+        raise HTTPException(status_code=503, detail="Chat service is not available. Models are not loaded.")
     try:
         query_en = translate_text(request.message, target_lang="en", source_lang=request.user_language)
             recommendations=new_recommendations_en
         )
     except Exception as e:
         print(f"Error during /chat processing. Full traceback below:")
         traceback.print_exc()
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
 @app.post("/recommendations/action")
 async def handle_recommendation_action(request: RecommendationRequest):

rag_chatbot.py CHANGED Viewed

@@ -1,10 +1,7 @@
-# rag_chatbot.py
 import os
 from huggingface_hub import hf_hub_download
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import FAISS
-# <<< CHANGED: Import from the new, correct package
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.retrievers import BM25Retriever
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -16,13 +13,8 @@ from langchain.retrievers.document_compressors import FlashrankRerank
 from flashrank import Ranker
 from typing import Dict, Any, List
-# This is a one-time operation for flashrank, it's fine to keep it here.
-# FlashrankRerank.model_rebuild()
-# --- Configuration ---
 PDF_PATH = "data/sat2farm_doc.pdf"
-# <<< REMOVED: This constant is no longer needed here as the model is passed in.
-# EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
 MODEL_NAME = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 MODEL_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
@@ -37,7 +29,6 @@ class RAGChatBot:
         print(f"Downloading GGUF model: {MODEL_NAME}/{MODEL_FILE}")
         model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE)
-        # Use the provided embedding model to build the chain.
         self.chain = self._create_rag_chain(model_path, embeddings)
         print("\n✅ RAG ChatBot initialized successfully!")
@@ -54,12 +45,10 @@ class RAGChatBot:
         chunks = text_splitter.split_documents(documents)
         print(f"Created {len(chunks)} document chunks.")
-        # STAGE 1: RECALL (Ensemble Retriever)
         print("Initializing Stage 1 Retriever (Ensemble)...")
         bm25_retriever = BM25Retriever.from_documents(chunks)
         bm25_retriever.k = 10
-        # This line is key: it uses the centrally-managed `embeddings` object.
         vectorstore = FAISS.from_documents(chunks, embeddings)
         faiss_retriever = vectorstore.as_retriever(search_kwargs={'k': 10})
@@ -67,7 +56,6 @@ class RAGChatBot:
             retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5], search_type="rrf"
         )
-        # STAGE 2: RE-RANK (Flashrank)
         print("Initializing Stage 2 Re-ranker (Flashrank)...")
         compressor = FlashrankRerank(top_n=3)
         final_retriever = ContextualCompressionRetriever(
@@ -76,7 +64,7 @@ class RAGChatBot:
         print(f"Initializing GGUF LLM from: {model_path}")
         llm = LlamaCpp(
-            model_path=model_path, n_ctx=2048, n_gpu_layers=-1, # Use -1 for all layers on GPU
             temperature=0.0, top_k=1, verbose=False, max_tokens=512
         )
@@ -85,6 +73,7 @@ You are a factual question-answering assistant. Your task is to answer the user'
 Follow these rules strictly:
 1. Provide only the direct answer to the question and nothing else. DO NOT add any summary, conclusion, or other extra information.
 2. If the answer is not in the context, state that you do not have that information.
 Context: {context}
 Question: {question}
@@ -108,12 +97,10 @@ Helpful Answer:"""
         print(f"Invoking RAG chain with query: '{query}'")
         result = self.chain.invoke(query)
-        # Clean up the answer
         answer = result.get("result", "").strip()
         if "Helpful Answer:" in answer:
             answer = answer.split("Helpful Answer:")[1].strip()
-        # Format sources
         sources = []
         if result.get("source_documents"):
             for doc in result["source_documents"]:

 import os
 from huggingface_hub import hf_hub_download
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.retrievers import BM25Retriever
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from flashrank import Ranker
 from typing import Dict, Any, List
 PDF_PATH = "data/sat2farm_doc.pdf"
 MODEL_NAME = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 MODEL_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
         print(f"Downloading GGUF model: {MODEL_NAME}/{MODEL_FILE}")
         model_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE)
         self.chain = self._create_rag_chain(model_path, embeddings)
         print("\n✅ RAG ChatBot initialized successfully!")
         chunks = text_splitter.split_documents(documents)
         print(f"Created {len(chunks)} document chunks.")
         print("Initializing Stage 1 Retriever (Ensemble)...")
         bm25_retriever = BM25Retriever.from_documents(chunks)
         bm25_retriever.k = 10
         vectorstore = FAISS.from_documents(chunks, embeddings)
         faiss_retriever = vectorstore.as_retriever(search_kwargs={'k': 10})
             retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5], search_type="rrf"
         )
         print("Initializing Stage 2 Re-ranker (Flashrank)...")
         compressor = FlashrankRerank(top_n=3)
         final_retriever = ContextualCompressionRetriever(
         print(f"Initializing GGUF LLM from: {model_path}")
         llm = LlamaCpp(
+            model_path=model_path, n_ctx=2048, n_gpu_layers=-1,
             temperature=0.0, top_k=1, verbose=False, max_tokens=512
         )
 Follow these rules strictly:
 1. Provide only the direct answer to the question and nothing else. DO NOT add any summary, conclusion, or other extra information.
 2. If the answer is not in the context, state that you do not have that information.
+3. The CEO of Satyukt Analytics is Dr. Satkumar Tomar, if user asks question about who is ceo, tell this answer.
 Context: {context}
 Question: {question}
         print(f"Invoking RAG chain with query: '{query}'")
         result = self.chain.invoke(query)
         answer = result.get("result", "").strip()
         if "Helpful Answer:" in answer:
             answer = answer.split("Helpful Answer:")[1].strip()
         sources = []
         if result.get("source_documents"):
             for doc in result["source_documents"]:

rank.py CHANGED Viewed

@@ -1,67 +1,19 @@
 import numpy as np
 import faiss
 from sentence_transformers import SentenceTransformer
 import importlib, subprocess, sys
 from rank_bm25 import BM25Okapi
-# class HybridChatBot:
-#     def __init__(self, model_name="all-MiniLM-L6-v2", index_file="Chatbot/data/faiss.index"):
-#         # Load embeddings index
-#         self.model = SentenceTransformer(model_name)
-#         self.index = faiss.read_index(index_file)
-#         self.questions = np.load("Chatbot/data/questions.npy", allow_pickle=True)
-#         self.answers = np.load("Chatbot/data/answers.npy", allow_pickle=True)
-#         # Prepare BM25
-#         tokenized_corpus = [q.lower().split() for q in self.questions]
-#         self.bm25 = BM25Okapi(tokenized_corpus)
-#     def search(self, query, top_k, alpha):
-#         """
-#         Hybrid search:
-#         alpha = weight for BM25 vs embeddings (0.5 = equal weight)
-#         """
-#         # --- Embedding Search ---
-#         query_embedding = self.model.encode([query], convert_to_numpy=True)
-#         distances, indices = self.index.search(query_embedding, top_k)
-#         embedding_scores = {idx: 1/(1+dist) for idx, dist in zip(indices[0], distances[0])}
-#         # --- BM25 Search ---
-#         bm25_scores = self.bm25.get_scores(query.lower().split())
-#         bm25_top = np.argsort(bm25_scores)[::-1][:top_k]
-#         bm25_scores = {idx: bm25_scores[idx] for idx in bm25_top}
-#         # --- Combine Scores ---
-#         combined_scores = {}
-#         for idx in set(list(embedding_scores.keys()) + list(bm25_scores.keys())):
-#             emb_score = embedding_scores.get(idx, 0)
-#             bm_score = bm25_scores.get(idx, 0)
-#             combined_scores[idx] = alpha * bm_score + (1 - alpha) * emb_score
-#         # --- Sort and Return ---
-#         best = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
-#         results = []
-#         for idx, score in best[:top_k]:
-#             results.append({
-#                 "matched_question": self.questions[idx],
-#                 "answer": self.answers[idx],
-#                 "score": float(score)
-#             })
-#         return results
 class HybridChatBot:
     def __init__(self, model_name="all-MiniLM-L6-v2", index_file="data/faiss.index", fallback_threshold=0.05):
-        # Load embeddings index
         self.model = SentenceTransformer(model_name)
         self.index = faiss.read_index(index_file)
         self.questions = np.load("data/questions.npy", allow_pickle=True)
         self.answers = np.load("data/answers.npy", allow_pickle=True)
-        # Prepare BM25
         tokenized_corpus = [q.lower().split() for q in self.questions]
         self.bm25 = BM25Okapi(tokenized_corpus)
-        # Threshold for fallback
         self.fallback_threshold = fallback_threshold
     def search(self, query, top_k=5, alpha=0.5):
@@ -69,29 +21,24 @@ class HybridChatBot:
         Hybrid search:
         alpha = weight for BM25 vs embeddings (0.5 = equal weight)
         """
-        # --- Embedding Search ---
         query_embedding = self.model.encode([query], convert_to_numpy=True)
         distances, indices = self.index.search(query_embedding, top_k)
         embedding_scores = {idx: 1/(1+dist) for idx, dist in zip(indices[0], distances[0])}
-        # --- BM25 Search ---
         bm25_scores = self.bm25.get_scores(query.lower().split())
         bm25_top = np.argsort(bm25_scores)[::-1][:top_k]
         bm25_scores = {idx: bm25_scores[idx] for idx in bm25_top}
-        # --- Combine Scores ---
         combined_scores = {}
         for idx in set(list(embedding_scores.keys()) + list(bm25_scores.keys())):
             emb_score = embedding_scores.get(idx, 0)
             bm_score = bm25_scores.get(idx, 0)
             combined_scores[idx] = alpha * bm_score + (1 - alpha) * emb_score
-        # --- Sort and Return ---
         best = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
         results = []
         if not best or best[0][1] < self.fallback_threshold:
-            # Low confidence → fallback message
             results.append({
                 "matched_question": None,
                 "answer": "Sorry, I couldn't find a reliable answer. Please contact our support team.",
@@ -105,4 +52,4 @@ class HybridChatBot:
                     "score": float(score)
                 })
-        return results

 import numpy as np
 import faiss
 from sentence_transformers import SentenceTransformer
 import importlib, subprocess, sys
 from rank_bm25 import BM25Okapi
 class HybridChatBot:
     def __init__(self, model_name="all-MiniLM-L6-v2", index_file="data/faiss.index", fallback_threshold=0.05):
         self.model = SentenceTransformer(model_name)
         self.index = faiss.read_index(index_file)
         self.questions = np.load("data/questions.npy", allow_pickle=True)
         self.answers = np.load("data/answers.npy", allow_pickle=True)
         tokenized_corpus = [q.lower().split() for q in self.questions]
         self.bm25 = BM25Okapi(tokenized_corpus)
         self.fallback_threshold = fallback_threshold
     def search(self, query, top_k=5, alpha=0.5):
         Hybrid search:
         alpha = weight for BM25 vs embeddings (0.5 = equal weight)
         """
         query_embedding = self.model.encode([query], convert_to_numpy=True)
         distances, indices = self.index.search(query_embedding, top_k)
         embedding_scores = {idx: 1/(1+dist) for idx, dist in zip(indices[0], distances[0])}
         bm25_scores = self.bm25.get_scores(query.lower().split())
         bm25_top = np.argsort(bm25_scores)[::-1][:top_k]
         bm25_scores = {idx: bm25_scores[idx] for idx in bm25_top}
         combined_scores = {}
         for idx in set(list(embedding_scores.keys()) + list(bm25_scores.keys())):
             emb_score = embedding_scores.get(idx, 0)
             bm_score = bm25_scores.get(idx, 0)
             combined_scores[idx] = alpha * bm_score + (1 - alpha) * emb_score
         best = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
         results = []
         if not best or best[0][1] < self.fallback_threshold:
             results.append({
                 "matched_question": None,
                 "answer": "Sorry, I couldn't find a reliable answer. Please contact our support team.",
                     "score": float(score)
                 })
+        return results

recommender.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# recommend.py (Upgraded Version)
 import faiss
 import numpy as np
-from langchain_community.embeddings import HuggingFaceEmbeddings # Import this
 class QuestionRecommender:
     def __init__(self, faiss_index_path, questions_path, embedding_model: HuggingFaceEmbeddings, top_k=5):
@@ -12,7 +10,7 @@ class QuestionRecommender:
         print("Initializing Question Recommender...")
         self.index = faiss.read_index(faiss_index_path)
         self.questions = np.load(questions_path, allow_pickle=True)
-        self.embedding_model = embedding_model # <-- NEW: Store the embedding model
         self.top_k = top_k
         self.start_questions = [
             "What is Sat2Farm?",
@@ -39,23 +37,17 @@ class QuestionRecommender:
             self.history.append(self.current_recommendations)
         embedding = None
-        # First, try the fast path: see if the query is a known question
         try:
             q_idx = np.where(self.questions == query)[0][0]
-            # If found, reconstruct its embedding directly from the index (very fast)
             embedding = self.index.reconstruct(int(q_idx)).reshape(1, -1)
             print(f"Recommending based on known question: '{query}'")
         except IndexError:
-            # This is the new, powerful part!
-            # If the query is not in our list, embed it on the fly.
             print(f"Recommending based on new user query: '{query}'")
             embedding = np.array(self.embedding_model.embed_query(query)).reshape(1, -1)
         if embedding is not None:
-            # Search for similar question embeddings in our FAISS index
             distances, indices = self.index.search(embedding, self.top_k + 1)
-            # Create the list of recommended questions from the search results
             recommended = [
                 self.questions[i] for i in indices[0]
                 if i < len(self.questions) and self.questions[i] != query
@@ -64,10 +56,8 @@ class QuestionRecommender:
             self.current_recommendations = recommended[:self.top_k]
             return self.current_recommendations
-        # Fallback if something went wrong
         return self.start_questions
     def go_back(self):
         """Returns the previous set of recommended questions from history."""
         if self.history:

 import faiss
 import numpy as np
+from langchain_community.embeddings import HuggingFaceEmbeddings
 class QuestionRecommender:
     def __init__(self, faiss_index_path, questions_path, embedding_model: HuggingFaceEmbeddings, top_k=5):
         print("Initializing Question Recommender...")
         self.index = faiss.read_index(faiss_index_path)
         self.questions = np.load(questions_path, allow_pickle=True)
+        self.embedding_model = embedding_model
         self.top_k = top_k
         self.start_questions = [
             "What is Sat2Farm?",
             self.history.append(self.current_recommendations)
         embedding = None
         try:
             q_idx = np.where(self.questions == query)[0][0]
             embedding = self.index.reconstruct(int(q_idx)).reshape(1, -1)
             print(f"Recommending based on known question: '{query}'")
         except IndexError:
             print(f"Recommending based on new user query: '{query}'")
             embedding = np.array(self.embedding_model.embed_query(query)).reshape(1, -1)
         if embedding is not None:
             distances, indices = self.index.search(embedding, self.top_k + 1)
             recommended = [
                 self.questions[i] for i in indices[0]
                 if i < len(self.questions) and self.questions[i] != query
             self.current_recommendations = recommended[:self.top_k]
             return self.current_recommendations
         return self.start_questions
     def go_back(self):
         """Returns the previous set of recommended questions from history."""
         if self.history: