Spaces:

akryldigital
/

audit_assistant

Sleeping

App Files Files Community

Ara Yeroyan commited on 27 days ago

Commit

3fc1b5f

1 Parent(s): f8a1d41

finalize gemini version

Browse files

Files changed (2) hide show

app.py +60 -7
src/vectorstore.py +27 -8

app.py CHANGED Viewed

@@ -33,6 +33,7 @@ from src.config.paths import (
     CONVERSATIONS_DIR,
 )
 # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
 # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
 omp_threads = os.environ.get("OMP_NUM_THREADS", "")
@@ -72,6 +73,9 @@ if IS_DEPLOYED and HF_CACHE_DIR:
     except (PermissionError, OSError):
         # If we can't create it, log but continue (might already exist from Dockerfile)
         pass
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -191,18 +195,35 @@ def main():
     if 'chatbot_version' not in st.session_state:
         st.session_state.chatbot_version = "v1"
-    # Initialize chatbot based on version (reinitialize if version changes)
     chatbot_version_key = f"chatbot_{st.session_state.chatbot_version}"
-    if chatbot_version_key not in st.session_state or st.session_state.get('_last_version') != st.session_state.chatbot_version:
         try:
-            with st.spinner("🔄 Loading AI models and connecting to database..."):
                 st.session_state[chatbot_version_key] = get_chatbot(st.session_state.chatbot_version)
                 st.session_state['_last_version'] = st.session_state.chatbot_version
                 st.session_state.chatbot = st.session_state[chatbot_version_key]
-            st.success("✅ AI system ready!")
         except Exception as e:
             st.error(f"❌ Failed to initialize chatbot: {str(e)}")
-            st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
             # Reset to v1 to prevent infinite loop
             st.session_state.chatbot_version = "v1"
             st.session_state['_last_version'] = "v1"
@@ -210,6 +231,7 @@ def main():
                 del st.session_state['chatbot']
             st.stop()  # Stop execution to prevent infinite loop
     else:
         st.session_state.chatbot = st.session_state[chatbot_version_key]
     # Reset conversation history if needed (but keep chatbot cached)
@@ -223,7 +245,38 @@ def main():
         st.rerun()
-    st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
     # Show version info
     if st.session_state.chatbot_version == "beta":
@@ -289,7 +342,7 @@ def main():
         # Determine if filename filter is active
         filename_mode = len(selected_filenames) > 0
         # Sources filter
-        st.markdown('<div class="filter-section">', unsafe_allow_html=True)
         st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
         selected_sources = st.multiselect(
             "Select sources:",

     CONVERSATIONS_DIR,
 )
 # ===== CRITICAL: Fix OMP_NUM_THREADS FIRST, before ANY other imports =====
 # Some libraries load at import time and will fail if OMP_NUM_THREADS is invalid
 omp_threads = os.environ.get("OMP_NUM_THREADS", "")
     except (PermissionError, OSError):
         # If we can't create it, log but continue (might already exist from Dockerfile)
         pass
+else:
+    from dotenv import load_dotenv
+    load_dotenv()
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
     if 'chatbot_version' not in st.session_state:
         st.session_state.chatbot_version = "v1"
+    # Initialize chatbot based on version (only if not already initialized for this version)
     chatbot_version_key = f"chatbot_{st.session_state.chatbot_version}"
+    # Check if we need to initialize: chatbot doesn't exist OR version changed
+    needs_init = (
+        chatbot_version_key not in st.session_state or
+        st.session_state.get('_last_version') != st.session_state.chatbot_version
+    )
+    if needs_init:
         try:
+            # Different spinner messages for different versions
+            if st.session_state.chatbot_version == "beta":
+                spinner_msg = "🔄 Initializing Gemini File Search..."
+            else:
+                spinner_msg = "🔄 Loading AI models and connecting to database..."
+            with st.spinner(spinner_msg):
                 st.session_state[chatbot_version_key] = get_chatbot(st.session_state.chatbot_version)
                 st.session_state['_last_version'] = st.session_state.chatbot_version
                 st.session_state.chatbot = st.session_state[chatbot_version_key]
+            print("✅ AI system ready!")
         except Exception as e:
             st.error(f"❌ Failed to initialize chatbot: {str(e)}")
+            # Only show Gemini-specific error message for beta version
+            if st.session_state.chatbot_version == "beta":
+                st.error("Please check your environment variables (GEMINI_API_KEY, GEMINI_FILESTORE_NAME for beta)")
+            else:
+                st.error("Please check your configuration and ensure all required models and databases are accessible.")
             # Reset to v1 to prevent infinite loop
             st.session_state.chatbot_version = "v1"
             st.session_state['_last_version'] = "v1"
                 del st.session_state['chatbot']
             st.stop()  # Stop execution to prevent infinite loop
     else:
+        # Chatbot already initialized for this version, just use it
         st.session_state.chatbot = st.session_state[chatbot_version_key]
     # Reset conversation history if needed (but keep chatbot cached)
         st.rerun()
+    # Version selection radio button (top right)
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        st.markdown('<p class="subtitle">Ask questions about audit reports. Use the sidebar filters to narrow down your search!</p>', unsafe_allow_html=True)
+    with col2:
+        st.markdown("<br>", unsafe_allow_html=True)  # Add some spacing
+        selected_version = st.radio(
+            "**Version:**",
+            options=["v1", "beta"],
+            index=0 if st.session_state.chatbot_version == "v1" else 1,
+            horizontal=True,
+            key="version_selector",
+            help="Select v1 (default RAG system) or beta (Gemini File Search)"
+        )
+        # Update version if changed
+        if selected_version != st.session_state.chatbot_version:
+            # Store the old version to check if we need to switch
+            old_version = st.session_state.chatbot_version
+            st.session_state.chatbot_version = selected_version
+            # If chatbot for new version already exists, just switch to it
+            new_chatbot_key = f"chatbot_{selected_version}"
+            if new_chatbot_key in st.session_state:
+                # Chatbot already exists, just switch
+                st.session_state.chatbot = st.session_state[new_chatbot_key]
+                st.session_state['_last_version'] = selected_version
+            else:
+                # Need to initialize new version - will be handled by initialization logic above
+                st.session_state['_last_version'] = old_version  # Set to old to trigger init check
+            st.rerun()
     # Show version info
     if st.session_state.chatbot_version == "beta":
         # Determine if filename filter is active
         filename_mode = len(selected_filenames) > 0
         # Sources filter
+        # st.markdown('<div class="filter-section">', unsafe_allow_html=True)
         st.markdown('<div class="filter-title">📊 Sources</div>', unsafe_allow_html=True)
         selected_sources = st.multiselect(
             "Select sources:",

src/vectorstore.py CHANGED Viewed

@@ -1,9 +1,20 @@
 """Vector store management and operations."""
 from pathlib import Path
 from typing import Dict, Any, List, Optional
 import torch
 from langchain_qdrant import QdrantVectorStore
 from langchain.docstore.document import Document
 from langchain_core.embeddings import Embeddings
@@ -28,19 +39,23 @@ class MatryoshkaEmbeddings(Embeddings):
         if truncate_dim and "matryoshka" in model_name.lower():
             # Use SentenceTransformer directly for Matryoshka models
-            # Explicitly load on CPU first to avoid meta tensor issues
             self.model = SentenceTransformer(
                 model_name,
                 truncate_dim=truncate_dim,
-                device="cpu"  # Load on CPU first, prevents meta tensor error
             )
             print(f"🔧 Matryoshka model configured for {truncate_dim} dimensions")
         else:
             # Use standard HuggingFaceEmbeddings
-            # Pass device="cpu" to prevent meta tensor issues
             if "model_kwargs" not in kwargs:
                 kwargs["model_kwargs"] = {}
-            kwargs["model_kwargs"]["device"] = "cpu"
             self.model = HuggingFaceEmbeddings(model_name=model_name, **kwargs)
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
@@ -87,12 +102,14 @@ class VectorStoreManager:
         model_name = self.config["retriever"]["model"]
         normalize = self.config["retriever"]["normalize"]
-        # Fix for meta tensor issue: explicitly load on CPU first
-        # This prevents HuggingFaceEmbeddings from trying to move meta tensors
-        # The model will be loaded on CPU and can be moved later if needed
         model_kwargs = {
-            "device": "cpu"  # Load on CPU first to avoid meta tensor issues
         }
         encode_kwargs = {
             "normalize_embeddings": normalize,
             "batch_size": 100,
@@ -119,6 +136,8 @@ class VectorStoreManager:
                 return embeddings
         # Use standard HuggingFaceEmbeddings for non-Matryoshka models
         embeddings = HuggingFaceEmbeddings(
             model_name=model_name,
             model_kwargs=model_kwargs,

 """Vector store management and operations."""
+import os
+# Disable MPS before importing torch to prevent meta tensor issues on Mac
+os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
+os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
 from pathlib import Path
 from typing import Dict, Any, List, Optional
 import torch
+# Disable MPS backend explicitly to prevent meta tensor issues
+if hasattr(torch.backends, 'mps'):
+    # Monkey patch to disable MPS
+    original_mps_available = torch.backends.mps.is_available
+    torch.backends.mps.is_available = lambda: False
 from langchain_qdrant import QdrantVectorStore
 from langchain.docstore.document import Document
 from langchain_core.embeddings import Embeddings
         if truncate_dim and "matryoshka" in model_name.lower():
             # Use SentenceTransformer directly for Matryoshka models
+            # Fix for meta tensor issue: Explicitly force CPU
+            # MPS is already disabled at module level
+            # Explicitly pass device="cpu" to prevent MPS/CUDA detection
             self.model = SentenceTransformer(
                 model_name,
                 truncate_dim=truncate_dim,
+                device="cpu"  # Force CPU to prevent meta tensor issues
             )
             print(f"🔧 Matryoshka model configured for {truncate_dim} dimensions")
         else:
             # Use standard HuggingFaceEmbeddings
+            # Don't pass device parameter - let it load naturally on CPU
+            # This prevents the meta tensor error
             if "model_kwargs" not in kwargs:
                 kwargs["model_kwargs"] = {}
+            # Remove device from model_kwargs if present to prevent meta tensor issues
+            kwargs["model_kwargs"].pop("device", None)
             self.model = HuggingFaceEmbeddings(model_name=model_name, **kwargs)
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
         model_name = self.config["retriever"]["model"]
         normalize = self.config["retriever"]["normalize"]
+        # Fix for meta tensor issue: Force CPU usage to prevent MPS/CUDA detection
+        # The error occurs when SentenceTransformer detects MPS/CUDA and tries to move meta tensors
+        # MPS is already disabled at module level, now we explicitly force CPU in model_kwargs
         model_kwargs = {
+            "device": "cpu",  # Explicitly force CPU to prevent MPS/CUDA detection
+            "trust_remote_code": True,  # Some models need this
         }
         encode_kwargs = {
             "normalize_embeddings": normalize,
             "batch_size": 100,
                 return embeddings
         # Use standard HuggingFaceEmbeddings for non-Matryoshka models
+        # Don't pass device in model_kwargs - let HuggingFaceEmbeddings handle it
+        # but ensure we're not using meta device
         embeddings = HuggingFaceEmbeddings(
             model_name=model_name,
             model_kwargs=model_kwargs,