Spaces:
Running
Running
Update search_utils.py
Browse files- search_utils.py +5 -7
search_utils.py
CHANGED
@@ -12,21 +12,19 @@ class MetadataManager:
|
|
12 |
self.shard_map = {}
|
13 |
self.loaded_shards = {}
|
14 |
self.total_docs = 0
|
15 |
-
self._ensure_unzipped()
|
16 |
self._build_shard_map()
|
17 |
|
18 |
def _ensure_unzipped(self):
|
19 |
-
"""Handle ZIP extraction
|
20 |
if not self.shard_dir.exists():
|
21 |
zip_path = Path("metadata_shards.zip")
|
22 |
if zip_path.exists():
|
23 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
24 |
zip_ref.extractall(self.shard_dir)
|
25 |
-
st.toast("π¦ Metadata shards extracted successfully!", icon="β
")
|
26 |
else:
|
27 |
-
st.error("β Missing metadata_shards.zip file!")
|
28 |
raise FileNotFoundError("Metadata ZIP file not found")
|
29 |
-
|
30 |
def _build_shard_map(self):
|
31 |
"""Create index range to shard mapping"""
|
32 |
self.total_docs = 0
|
@@ -67,9 +65,9 @@ class SemanticSearch:
|
|
67 |
self.shard_dir = Path("compressed_shards")
|
68 |
self.model = None
|
69 |
self.index_shards = []
|
70 |
-
self.metadata_mgr = MetadataManager()
|
71 |
self.shard_sizes = []
|
72 |
-
|
73 |
@st.cache_resource
|
74 |
def load_model(_self):
|
75 |
return SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
12 |
self.shard_map = {}
|
13 |
self.loaded_shards = {}
|
14 |
self.total_docs = 0
|
15 |
+
self._ensure_unzipped() # Removed Streamlit elements from here
|
16 |
self._build_shard_map()
|
17 |
|
18 |
def _ensure_unzipped(self):
|
19 |
+
"""Handle ZIP extraction without Streamlit elements"""
|
20 |
if not self.shard_dir.exists():
|
21 |
zip_path = Path("metadata_shards.zip")
|
22 |
if zip_path.exists():
|
23 |
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
24 |
zip_ref.extractall(self.shard_dir)
|
|
|
25 |
else:
|
|
|
26 |
raise FileNotFoundError("Metadata ZIP file not found")
|
27 |
+
|
28 |
def _build_shard_map(self):
|
29 |
"""Create index range to shard mapping"""
|
30 |
self.total_docs = 0
|
|
|
65 |
self.shard_dir = Path("compressed_shards")
|
66 |
self.model = None
|
67 |
self.index_shards = []
|
68 |
+
self.metadata_mgr = MetadataManager() # No Streamlit elements in constructor
|
69 |
self.shard_sizes = []
|
70 |
+
|
71 |
@st.cache_resource
|
72 |
def load_model(_self):
|
73 |
return SentenceTransformer('all-MiniLM-L6-v2')
|