gabrielaltay commited on
Commit
eeef8f5
Β·
1 Parent(s): 2681dfc
src/legisqa_local/app.py CHANGED
@@ -36,10 +36,8 @@ def main():
36
  setup_chromadb()
37
  logger.info("βœ… ChromaDB setup complete")
38
 
39
- # Initialize vectorstore (load once and cache)
40
- logger.info("πŸ”„ Initializing vectorstore...")
41
  initialize_vectorstore()
42
- logger.info("βœ… Vectorstore initialization complete")
43
 
44
  # Main content
45
  st.title(":classical_building: LegisQA :classical_building:")
 
36
  setup_chromadb()
37
  logger.info("βœ… ChromaDB setup complete")
38
 
39
+ # Initialize vectorstore (load once and cache in session state)
 
40
  initialize_vectorstore()
 
41
 
42
  # Main content
43
  st.title(":classical_building: LegisQA :classical_building:")
src/legisqa_local/components/sidebar.py CHANGED
@@ -3,28 +3,29 @@
3
  import streamlit as st
4
  import os
5
  from legisqa_local.config.settings import get_chroma_config
 
6
 
7
 
8
  def render_chromadb_status():
9
  """Render ChromaDB status in sidebar"""
10
  st.subheader("πŸ—„οΈ Vector Database")
11
 
12
- try:
13
- config = get_chroma_config()
14
- chromadb_path = config["persist_directory"]
15
-
16
- if os.path.exists(chromadb_path):
17
  st.success("βœ… ChromaDB Ready")
18
- st.caption("πŸ“Š Using pre-existing database")
 
 
19
  st.caption(f"πŸ“ Collection: {config['collection_name']}")
20
- st.caption(f"πŸ“ Path: .../{os.path.basename(os.path.dirname(chromadb_path))}")
21
- else:
22
- st.error("❌ ChromaDB Not Found")
23
- st.caption(f"Expected path: {chromadb_path}")
24
- st.caption("Please check the database path")
25
- except Exception as e:
26
- st.error("❌ ChromaDB Configuration Error")
27
- st.caption(f"Error: {str(e)[:50]}...")
28
 
29
 
30
  def render_outreach_links():
 
3
  import streamlit as st
4
  import os
5
  from legisqa_local.config.settings import get_chroma_config
6
+ from legisqa_local.core.vectorstore import get_vectorstore
7
 
8
 
9
  def render_chromadb_status():
10
  """Render ChromaDB status in sidebar"""
11
  st.subheader("πŸ—„οΈ Vector Database")
12
 
13
+ vectorstore = get_vectorstore()
14
+ if vectorstore is not None:
15
+ try:
16
+ # Test the vectorstore to get document count
17
+ count = vectorstore._collection.count()
18
  st.success("βœ… ChromaDB Ready")
19
+ st.caption(f"πŸ“Š {count:,} documents loaded")
20
+
21
+ config = get_chroma_config()
22
  st.caption(f"πŸ“ Collection: {config['collection_name']}")
23
+ except Exception as e:
24
+ st.warning("⚠️ ChromaDB Loaded (verification failed)")
25
+ st.caption(f"Error: {str(e)[:50]}...")
26
+ else:
27
+ st.info("⏳ ChromaDB Loading...")
28
+ st.caption("Vectorstore is being initialized")
 
 
29
 
30
 
31
  def render_outreach_links():
src/legisqa_local/core/rag.py CHANGED
@@ -4,7 +4,7 @@ from langchain_core.prompts import ChatPromptTemplate
4
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
5
 
6
  from legisqa_local.core.llm import get_llm
7
- from legisqa_local.core.vectorstore import load_vectorstore, get_vectorstore_filter
8
  from legisqa_local.utils.formatting import format_docs
9
 
10
 
@@ -41,7 +41,15 @@ Query: {query}"""
41
 
42
  def process_query(gen_config: dict, ret_config: dict, query: str):
43
  """Process a query using RAG"""
44
- vectorstore = load_vectorstore()
 
 
 
 
 
 
 
 
45
  llm = get_llm(gen_config)
46
  vs_filter = get_vectorstore_filter(ret_config)
47
 
 
4
  from langchain_core.runnables import RunnableParallel, RunnablePassthrough
5
 
6
  from legisqa_local.core.llm import get_llm
7
+ from legisqa_local.core.vectorstore import get_vectorstore, get_vectorstore_filter
8
  from legisqa_local.utils.formatting import format_docs
9
 
10
 
 
41
 
42
  def process_query(gen_config: dict, ret_config: dict, query: str):
43
  """Process a query using RAG"""
44
+ # Check if vectorstore is loaded
45
+ vectorstore = get_vectorstore()
46
+ if vectorstore is None:
47
+ return {
48
+ "aimessage": "⏳ Vectorstore is still loading. Please wait a moment and try again.",
49
+ "docs": [],
50
+ "query": query
51
+ }
52
+
53
  llm = get_llm(gen_config)
54
  vs_filter = get_vectorstore_filter(ret_config)
55
 
src/legisqa_local/core/vectorstore.py CHANGED
@@ -11,65 +11,42 @@ logger = logging.getLogger(__name__)
11
 
12
 
13
  def load_vectorstore():
14
- """Load and return the ChromaDB vectorstore (cached in session state)"""
15
- # Check if vectorstore is already loaded in session state
16
- if hasattr(st, 'session_state') and hasattr(st.session_state, 'vectorstore'):
17
- logger.debug("Using cached vectorstore from session state")
18
- return st.session_state.vectorstore
19
-
20
  logger.info("πŸ”„ Loading ChromaDB vectorstore...")
21
  config = get_chroma_config()
22
  emb_fn = load_embeddings()
23
 
24
- # Debug logging to identify path issues
25
- logger.info(f"πŸ” Vectorstore config:")
26
- logger.info(f" persist_directory: {config['persist_directory']}")
27
- logger.info(f" collection_name: {config['collection_name']}")
28
- logger.info(f"🌍 Environment variables:")
29
- for key, value in os.environ.items():
30
- if "CHROMA" in key:
31
- logger.info(f" {key}={value}")
32
-
33
- # Check if the directory actually exists
34
- if not os.path.exists(config["persist_directory"]):
35
- logger.error(f"❌ ChromaDB directory does not exist: {config['persist_directory']}")
36
- # Try to find the correct path
37
- if os.path.exists("/data/chromadb"):
38
- logger.info(f"πŸ”§ Found ChromaDB at /data/chromadb, updating config")
39
- config["persist_directory"] = "/data/chromadb"
40
- os.environ["CHROMA_PERSIST_DIRECTORY"] = "/data/chromadb"
41
-
42
  vectorstore = Chroma(
43
  persist_directory=config["persist_directory"],
44
  collection_name=config["collection_name"],
45
  embedding_function=emb_fn,
46
  )
47
 
48
- # Cache in session state for future use
49
- if hasattr(st, 'session_state'):
50
- st.session_state.vectorstore = vectorstore
51
- logger.info("βœ… Vectorstore loaded and cached in session state")
52
- else:
53
- logger.info("βœ… Vectorstore loaded (session state not available)")
54
-
55
  return vectorstore
56
 
57
 
58
  def initialize_vectorstore():
59
- """Initialize the vectorstore at application startup"""
60
  logger.info("πŸš€ Initializing vectorstore at startup...")
61
- vectorstore = load_vectorstore()
62
 
63
- # Test the vectorstore with a simple query to ensure it's working
64
  try:
65
- # Get collection info for verification
 
 
 
66
  collection = vectorstore._collection
67
  count = collection.count()
68
- logger.info(f"βœ… Vectorstore initialized successfully - {count} documents available")
69
- return vectorstore
70
  except Exception as e:
71
- logger.error(f"❌ Error testing vectorstore: {e}")
72
- raise
 
 
 
 
 
73
 
74
 
75
  def get_vectorstore_filter(ret_config: dict) -> dict:
 
11
 
12
 
13
  def load_vectorstore():
14
+ """Load and return the ChromaDB vectorstore"""
 
 
 
 
 
15
  logger.info("πŸ”„ Loading ChromaDB vectorstore...")
16
  config = get_chroma_config()
17
  emb_fn = load_embeddings()
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  vectorstore = Chroma(
20
  persist_directory=config["persist_directory"],
21
  collection_name=config["collection_name"],
22
  embedding_function=emb_fn,
23
  )
24
 
25
+ logger.info("βœ… Vectorstore loaded successfully")
 
 
 
 
 
 
26
  return vectorstore
27
 
28
 
29
  def initialize_vectorstore():
30
+ """Initialize the vectorstore at application startup and cache in session state"""
31
  logger.info("πŸš€ Initializing vectorstore at startup...")
 
32
 
 
33
  try:
34
+ vectorstore = load_vectorstore()
35
+ st.session_state.vectorstore = vectorstore
36
+
37
+ # Test the vectorstore to verify it's working
38
  collection = vectorstore._collection
39
  count = collection.count()
40
+ logger.info(f"βœ… Vectorstore initialized and cached - {count} documents available")
41
+
42
  except Exception as e:
43
+ logger.error(f"❌ Error initializing vectorstore: {e}")
44
+ # Don't raise - let the app continue and show loading message to users
45
+
46
+
47
+ def get_vectorstore():
48
+ """Get vectorstore from session state, or return None if not loaded"""
49
+ return getattr(st.session_state, 'vectorstore', None)
50
 
51
 
52
  def get_vectorstore_filter(ret_config: dict) -> dict: