Spaces:

ankanghosh
/

anveshak

Running

App Files Files

xet

Community

ankanghosh commited on Mar 21

Commit

4b849de

verified ·

1 Parent(s): 0795f5a

Update rag_engine.py

Browse files

Files changed (1) hide show

rag_engine.py +19 -0

rag_engine.py CHANGED Viewed

@@ -51,6 +51,7 @@ def setup_openai_client():
         print(f"❌ OpenAI client initialization error: {str(e)}")
         return False
 def load_model():
     """Load the embedding model and store in session state"""
     try:
@@ -90,6 +91,7 @@ def load_model():
         # Return None values - don't raise exception
         return None, None
 def download_file_from_gcs(bucket, gcs_path, local_path):
     """Download a file from GCS to local storage."""
     try:
@@ -106,6 +108,7 @@ def download_file_from_gcs(bucket, gcs_path, local_path):
         print(f"❌ Error downloading {gcs_path}: {str(e)}")
         return False
 def load_data_files():
     """Load FAISS index, text chunks, and metadata"""
     # Check if already loaded in session state
@@ -178,6 +181,7 @@ def average_pool(last_hidden_states, attention_mask):
 # Cache for query embeddings
 query_embedding_cache = {}
 def get_embedding(text):
     """Generate embeddings for a text query"""
     # Check cache first
@@ -227,6 +231,7 @@ def get_embedding(text):
         print(f"❌ Embedding error: {str(e)}")
         return np.zeros((1, 384), dtype=np.float32)
 def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
     try:
@@ -271,12 +276,17 @@ def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, s
                 if len(retrieved_passages) == top_k:
                     break
         print(f"Retrieved {len(retrieved_passages)} passages")
         return retrieved_passages, retrieved_sources
     except Exception as e:
         print(f"❌ Error in retrieve_passages: {str(e)}")
         return [], []
 def answer_with_llm(query, context=None, word_limit=100):
     """Generate an answer using OpenAI GPT model with formatted citations."""
     try:
@@ -338,6 +348,10 @@ def answer_with_llm(query, context=None, word_limit=100):
             if not answer.endswith((".", "!", "?")):
                 answer += "."
         return answer
     except Exception as e:
@@ -356,6 +370,7 @@ def format_citations(sources):
     return "\n".join(formatted_citations)
 def process_query(query, top_k=5, word_limit=100):
     """Process a query through the RAG pipeline with proper formatting."""
     print(f"\n🔍 Processing query: {query}")
@@ -390,4 +405,8 @@ def process_query(query, top_k=5, word_limit=100):
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
     return {"query": query, "answer_with_rag": llm_answer_with_rag, "citations": sources}

         print(f"❌ OpenAI client initialization error: {str(e)}")
         return False
+@st.cache_resource
 def load_model():
     """Load the embedding model and store in session state"""
     try:
         # Return None values - don't raise exception
         return None, None
+@st.cache_data(ttl=3600)
 def download_file_from_gcs(bucket, gcs_path, local_path):
     """Download a file from GCS to local storage."""
     try:
         print(f"❌ Error downloading {gcs_path}: {str(e)}")
         return False
+@st.cache_resource
 def load_data_files():
     """Load FAISS index, text chunks, and metadata"""
     # Check if already loaded in session state
 # Cache for query embeddings
 query_embedding_cache = {}
+@st.cache_data(ttl=1800)
 def get_embedding(text):
     """Generate embeddings for a text query"""
     # Check cache first
         print(f"❌ Embedding error: {str(e)}")
         return np.zeros((1, 384), dtype=np.float32)
+@st.cache_data(ttl=900)
 def retrieve_passages(query, faiss_index, text_chunks, metadata_dict, top_k=5, similarity_threshold=0.5):
     """Retrieve top-k most relevant passages using FAISS with metadata."""
     try:
                 if len(retrieved_passages) == top_k:
                     break
+        # Clean up
+        del query_embedding, distances, indices
+        gc.collect()
         print(f"Retrieved {len(retrieved_passages)} passages")
         return retrieved_passages, retrieved_sources
     except Exception as e:
         print(f"❌ Error in retrieve_passages: {str(e)}")
         return [], []
+@st.cache_data(ttl=1800)
 def answer_with_llm(query, context=None, word_limit=100):
     """Generate an answer using OpenAI GPT model with formatted citations."""
     try:
             if not answer.endswith((".", "!", "?")):
                 answer += "."
+        # Clean up
+        del response, formatted_context, system_message, user_message
+        gc.collect()
         return answer
     except Exception as e:
     return "\n".join(formatted_citations)
+@st.cache_data(ttl=3600)
 def process_query(query, top_k=5, word_limit=100):
     """Process a query through the RAG pipeline with proper formatting."""
     print(f"\n🔍 Processing query: {query}")
     else:
         llm_answer_with_rag = "⚠️ No relevant context found."
+    # Clean up
+    del retrieved_context, retrieved_sources
+    gc.collect()
     return {"query": query, "answer_with_rag": llm_answer_with_rag, "citations": sources}