Remove deprecated search_by_prefix
Browse files- lightrag/kg/chroma_impl.py +0 -36
- lightrag/kg/faiss_impl.py +0 -21
- lightrag/kg/milvus_impl.py +0 -25
- lightrag/kg/mongo_impl.py +0 -29
- lightrag/kg/nano_vector_db_impl.py +0 -20
- lightrag/kg/postgres_impl.py +0 -35
- lightrag/kg/qdrant_impl.py +0 -40
- lightrag/kg/tidb_impl.py +0 -55
lightrag/kg/chroma_impl.py
CHANGED
@@ -243,42 +243,6 @@ class ChromaVectorDBStorage(BaseVectorStorage):
|
|
243 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
244 |
raise
|
245 |
|
246 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
247 |
-
"""Search for records with IDs starting with a specific prefix.
|
248 |
-
|
249 |
-
Args:
|
250 |
-
prefix: The prefix to search for in record IDs
|
251 |
-
|
252 |
-
Returns:
|
253 |
-
List of records with matching ID prefixes
|
254 |
-
"""
|
255 |
-
try:
|
256 |
-
# Get all records from the collection
|
257 |
-
# Since ChromaDB doesn't directly support prefix search on IDs,
|
258 |
-
# we'll get all records and filter in Python
|
259 |
-
results = self._collection.get(
|
260 |
-
include=["metadatas", "documents", "embeddings"]
|
261 |
-
)
|
262 |
-
|
263 |
-
matching_records = []
|
264 |
-
|
265 |
-
# Filter records where ID starts with the prefix
|
266 |
-
for i, record_id in enumerate(results["ids"]):
|
267 |
-
if record_id.startswith(prefix):
|
268 |
-
matching_records.append(
|
269 |
-
{
|
270 |
-
"id": record_id,
|
271 |
-
"content": results["documents"][i],
|
272 |
-
"vector": results["embeddings"][i],
|
273 |
-
**results["metadatas"][i],
|
274 |
-
}
|
275 |
-
)
|
276 |
-
|
277 |
-
logger.debug(
|
278 |
-
f"Found {len(matching_records)} records with prefix '{prefix}'"
|
279 |
-
)
|
280 |
-
return matching_records
|
281 |
-
|
282 |
except Exception as e:
|
283 |
logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
|
284 |
raise
|
|
|
243 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
244 |
raise
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
except Exception as e:
|
247 |
logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
|
248 |
raise
|
lightrag/kg/faiss_impl.py
CHANGED
@@ -385,27 +385,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
385 |
|
386 |
return True # Return success
|
387 |
|
388 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
389 |
-
"""Search for records with IDs starting with a specific prefix.
|
390 |
-
|
391 |
-
Args:
|
392 |
-
prefix: The prefix to search for in record IDs
|
393 |
-
|
394 |
-
Returns:
|
395 |
-
List of records with matching ID prefixes
|
396 |
-
"""
|
397 |
-
matching_records = []
|
398 |
-
|
399 |
-
# Search for records with IDs starting with the prefix
|
400 |
-
for faiss_id, meta in self._id_to_meta.items():
|
401 |
-
if "__id__" in meta and meta["__id__"].startswith(prefix):
|
402 |
-
# Create a copy of all metadata and add "id" field
|
403 |
-
record = {**meta, "id": meta["__id__"]}
|
404 |
-
matching_records.append(record)
|
405 |
-
|
406 |
-
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
407 |
-
return matching_records
|
408 |
-
|
409 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
410 |
"""Get vector data by its ID
|
411 |
|
|
|
385 |
|
386 |
return True # Return success
|
387 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
389 |
"""Get vector data by its ID
|
390 |
|
lightrag/kg/milvus_impl.py
CHANGED
@@ -221,31 +221,6 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|
221 |
except Exception as e:
|
222 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
223 |
|
224 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
225 |
-
"""Search for records with IDs starting with a specific prefix.
|
226 |
-
|
227 |
-
Args:
|
228 |
-
prefix: The prefix to search for in record IDs
|
229 |
-
|
230 |
-
Returns:
|
231 |
-
List of records with matching ID prefixes
|
232 |
-
"""
|
233 |
-
try:
|
234 |
-
# Use Milvus query with expression to find IDs with the given prefix
|
235 |
-
expression = f'id like "{prefix}%"'
|
236 |
-
results = self._client.query(
|
237 |
-
collection_name=self.namespace,
|
238 |
-
filter=expression,
|
239 |
-
output_fields=list(self.meta_fields) + ["id"],
|
240 |
-
)
|
241 |
-
|
242 |
-
logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
|
243 |
-
return results
|
244 |
-
|
245 |
-
except Exception as e:
|
246 |
-
logger.error(f"Error searching for records with prefix '{prefix}': {e}")
|
247 |
-
return []
|
248 |
-
|
249 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
250 |
"""Get vector data by its ID
|
251 |
|
|
|
221 |
except Exception as e:
|
222 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
225 |
"""Get vector data by its ID
|
226 |
|
lightrag/kg/mongo_impl.py
CHANGED
@@ -1149,35 +1149,6 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|
1149 |
except PyMongoError as e:
|
1150 |
logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
|
1151 |
|
1152 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
1153 |
-
"""Search for records with IDs starting with a specific prefix.
|
1154 |
-
|
1155 |
-
Args:
|
1156 |
-
prefix: The prefix to search for in record IDs
|
1157 |
-
|
1158 |
-
Returns:
|
1159 |
-
List of records with matching ID prefixes
|
1160 |
-
"""
|
1161 |
-
try:
|
1162 |
-
# Use MongoDB regex to find documents where _id starts with the prefix
|
1163 |
-
cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
|
1164 |
-
matching_records = await cursor.to_list(length=None)
|
1165 |
-
|
1166 |
-
# Format results, ensuring created_at is included
|
1167 |
-
results = [
|
1168 |
-
{
|
1169 |
-
**doc,
|
1170 |
-
"id": doc["_id"],
|
1171 |
-
"created_at": doc.get("created_at"), # Include created_at field
|
1172 |
-
}
|
1173 |
-
for doc in matching_records
|
1174 |
-
]
|
1175 |
-
|
1176 |
-
logger.debug(
|
1177 |
-
f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
|
1178 |
-
)
|
1179 |
-
return results
|
1180 |
-
|
1181 |
except PyMongoError as e:
|
1182 |
logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
|
1183 |
return []
|
|
|
1149 |
except PyMongoError as e:
|
1150 |
logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
|
1151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1152 |
except PyMongoError as e:
|
1153 |
logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
|
1154 |
return []
|
lightrag/kg/nano_vector_db_impl.py
CHANGED
@@ -259,26 +259,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
259 |
|
260 |
return True # Return success
|
261 |
|
262 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
263 |
-
"""Search for records with IDs starting with a specific prefix.
|
264 |
-
|
265 |
-
Args:
|
266 |
-
prefix: The prefix to search for in record IDs
|
267 |
-
|
268 |
-
Returns:
|
269 |
-
List of records with matching ID prefixes
|
270 |
-
"""
|
271 |
-
storage = await self.client_storage
|
272 |
-
matching_records = []
|
273 |
-
|
274 |
-
# Search for records with IDs starting with the prefix
|
275 |
-
for record in storage["data"]:
|
276 |
-
if "__id__" in record and record["__id__"].startswith(prefix):
|
277 |
-
matching_records.append({**record, "id": record["__id__"]})
|
278 |
-
|
279 |
-
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
280 |
-
return matching_records
|
281 |
-
|
282 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
283 |
"""Get vector data by its ID
|
284 |
|
|
|
259 |
|
260 |
return True # Return success
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
263 |
"""Get vector data by its ID
|
264 |
|
lightrag/kg/postgres_impl.py
CHANGED
@@ -800,41 +800,6 @@ class PGVectorStorage(BaseVectorStorage):
|
|
800 |
except Exception as e:
|
801 |
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
802 |
|
803 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
804 |
-
"""Search for records with IDs starting with a specific prefix.
|
805 |
-
|
806 |
-
Args:
|
807 |
-
prefix: The prefix to search for in record IDs
|
808 |
-
|
809 |
-
Returns:
|
810 |
-
List of records with matching ID prefixes
|
811 |
-
"""
|
812 |
-
table_name = namespace_to_table_name(self.namespace)
|
813 |
-
if not table_name:
|
814 |
-
logger.error(f"Unknown namespace for prefix search: {self.namespace}")
|
815 |
-
return []
|
816 |
-
|
817 |
-
search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
|
818 |
-
params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
|
819 |
-
|
820 |
-
try:
|
821 |
-
results = await self.db.query(search_sql, params, multirows=True)
|
822 |
-
logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
|
823 |
-
|
824 |
-
# Format results to match the expected return format
|
825 |
-
formatted_results = []
|
826 |
-
for record in results:
|
827 |
-
formatted_record = dict(record)
|
828 |
-
# Ensure id field is available (for consistency with NanoVectorDB implementation)
|
829 |
-
if "id" not in formatted_record:
|
830 |
-
formatted_record["id"] = record["id"]
|
831 |
-
formatted_results.append(formatted_record)
|
832 |
-
|
833 |
-
return formatted_results
|
834 |
-
except Exception as e:
|
835 |
-
logger.error(f"Error during prefix search for '{prefix}': {e}")
|
836 |
-
return []
|
837 |
-
|
838 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
839 |
"""Get vector data by its ID
|
840 |
|
|
|
800 |
except Exception as e:
|
801 |
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
802 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
803 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
804 |
"""Get vector data by its ID
|
805 |
|
lightrag/kg/qdrant_impl.py
CHANGED
@@ -249,46 +249,6 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
|
249 |
except Exception as e:
|
250 |
logger.error(f"Error deleting relations for {entity_name}: {e}")
|
251 |
|
252 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
253 |
-
"""Search for records with IDs starting with a specific prefix.
|
254 |
-
|
255 |
-
Args:
|
256 |
-
prefix: The prefix to search for in record IDs
|
257 |
-
|
258 |
-
Returns:
|
259 |
-
List of records with matching ID prefixes
|
260 |
-
"""
|
261 |
-
try:
|
262 |
-
# Use scroll method to find records with IDs starting with the prefix
|
263 |
-
results = self._client.scroll(
|
264 |
-
collection_name=self.namespace,
|
265 |
-
scroll_filter=models.Filter(
|
266 |
-
must=[
|
267 |
-
models.FieldCondition(
|
268 |
-
key="id", match=models.MatchText(text=prefix, prefix=True)
|
269 |
-
)
|
270 |
-
]
|
271 |
-
),
|
272 |
-
with_payload=True,
|
273 |
-
with_vectors=False,
|
274 |
-
limit=1000, # Adjust as needed for your use case
|
275 |
-
)
|
276 |
-
|
277 |
-
# Extract matching points
|
278 |
-
matching_records = results[0]
|
279 |
-
|
280 |
-
# Format the results to match expected return format
|
281 |
-
formatted_results = [{**point.payload} for point in matching_records]
|
282 |
-
|
283 |
-
logger.debug(
|
284 |
-
f"Found {len(formatted_results)} records with prefix '{prefix}'"
|
285 |
-
)
|
286 |
-
return formatted_results
|
287 |
-
|
288 |
-
except Exception as e:
|
289 |
-
logger.error(f"Error searching for prefix '{prefix}': {e}")
|
290 |
-
return []
|
291 |
-
|
292 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
293 |
"""Get vector data by its ID
|
294 |
|
|
|
249 |
except Exception as e:
|
250 |
logger.error(f"Error deleting relations for {entity_name}: {e}")
|
251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
253 |
"""Get vector data by its ID
|
254 |
|
lightrag/kg/tidb_impl.py
CHANGED
@@ -642,42 +642,6 @@ class TiDBVectorDBStorage(BaseVectorStorage):
|
|
642 |
except Exception as e:
|
643 |
return {"status": "error", "message": str(e)}
|
644 |
|
645 |
-
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
646 |
-
"""Search for records with IDs starting with a specific prefix.
|
647 |
-
|
648 |
-
Args:
|
649 |
-
prefix: The prefix to search for in record IDs
|
650 |
-
|
651 |
-
Returns:
|
652 |
-
List of records with matching ID prefixes
|
653 |
-
"""
|
654 |
-
# Determine which table to query based on namespace
|
655 |
-
if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
|
656 |
-
sql_template = SQL_TEMPLATES["search_entity_by_prefix"]
|
657 |
-
elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
|
658 |
-
sql_template = SQL_TEMPLATES["search_relationship_by_prefix"]
|
659 |
-
elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
|
660 |
-
sql_template = SQL_TEMPLATES["search_chunk_by_prefix"]
|
661 |
-
else:
|
662 |
-
logger.warning(
|
663 |
-
f"Namespace {self.namespace} not supported for prefix search"
|
664 |
-
)
|
665 |
-
return []
|
666 |
-
|
667 |
-
# Add prefix pattern parameter with % for SQL LIKE
|
668 |
-
prefix_pattern = f"{prefix}%"
|
669 |
-
params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
|
670 |
-
|
671 |
-
try:
|
672 |
-
results = await self.db.query(sql_template, params=params, multirows=True)
|
673 |
-
logger.debug(
|
674 |
-
f"Found {len(results) if results else 0} records with prefix '{prefix}'"
|
675 |
-
)
|
676 |
-
return results if results else []
|
677 |
-
except Exception as e:
|
678 |
-
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
679 |
-
return []
|
680 |
-
|
681 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
682 |
"""Get vector data by its ID
|
683 |
|
@@ -1333,25 +1297,6 @@ SQL_TEMPLATES = {
|
|
1333 |
WHERE (source_name = :source AND target_name = :target)
|
1334 |
AND workspace = :workspace
|
1335 |
""",
|
1336 |
-
# Search by prefix SQL templates
|
1337 |
-
"search_entity_by_prefix": """
|
1338 |
-
SELECT entity_id as id, name as entity_name, entity_type, description, content,
|
1339 |
-
UNIX_TIMESTAMP(createtime) as created_at
|
1340 |
-
FROM LIGHTRAG_GRAPH_NODES
|
1341 |
-
WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
|
1342 |
-
""",
|
1343 |
-
"search_relationship_by_prefix": """
|
1344 |
-
SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content,
|
1345 |
-
UNIX_TIMESTAMP(createtime) as created_at
|
1346 |
-
FROM LIGHTRAG_GRAPH_EDGES
|
1347 |
-
WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
|
1348 |
-
""",
|
1349 |
-
"search_chunk_by_prefix": """
|
1350 |
-
SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id,
|
1351 |
-
UNIX_TIMESTAMP(createtime) as created_at
|
1352 |
-
FROM LIGHTRAG_DOC_CHUNKS
|
1353 |
-
WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
|
1354 |
-
""",
|
1355 |
# Drop tables
|
1356 |
"drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace",
|
1357 |
}
|
|
|
642 |
except Exception as e:
|
643 |
return {"status": "error", "message": str(e)}
|
644 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
645 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
646 |
"""Get vector data by its ID
|
647 |
|
|
|
1297 |
WHERE (source_name = :source AND target_name = :target)
|
1298 |
AND workspace = :workspace
|
1299 |
""",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1300 |
# Drop tables
|
1301 |
"drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace",
|
1302 |
}
|