gzdaniel commited on
Commit
09a047e
·
1 Parent(s): 531b3da

Remove deprecated search_by_prefix

Browse files
lightrag/kg/chroma_impl.py CHANGED
@@ -243,42 +243,6 @@ class ChromaVectorDBStorage(BaseVectorStorage):
243
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
244
  raise
245
 
246
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
247
- """Search for records with IDs starting with a specific prefix.
248
-
249
- Args:
250
- prefix: The prefix to search for in record IDs
251
-
252
- Returns:
253
- List of records with matching ID prefixes
254
- """
255
- try:
256
- # Get all records from the collection
257
- # Since ChromaDB doesn't directly support prefix search on IDs,
258
- # we'll get all records and filter in Python
259
- results = self._collection.get(
260
- include=["metadatas", "documents", "embeddings"]
261
- )
262
-
263
- matching_records = []
264
-
265
- # Filter records where ID starts with the prefix
266
- for i, record_id in enumerate(results["ids"]):
267
- if record_id.startswith(prefix):
268
- matching_records.append(
269
- {
270
- "id": record_id,
271
- "content": results["documents"][i],
272
- "vector": results["embeddings"][i],
273
- **results["metadatas"][i],
274
- }
275
- )
276
-
277
- logger.debug(
278
- f"Found {len(matching_records)} records with prefix '{prefix}'"
279
- )
280
- return matching_records
281
-
282
  except Exception as e:
283
  logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
284
  raise
 
243
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
244
  raise
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  except Exception as e:
247
  logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
248
  raise
lightrag/kg/faiss_impl.py CHANGED
@@ -385,27 +385,6 @@ class FaissVectorDBStorage(BaseVectorStorage):
385
 
386
  return True # Return success
387
 
388
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
389
- """Search for records with IDs starting with a specific prefix.
390
-
391
- Args:
392
- prefix: The prefix to search for in record IDs
393
-
394
- Returns:
395
- List of records with matching ID prefixes
396
- """
397
- matching_records = []
398
-
399
- # Search for records with IDs starting with the prefix
400
- for faiss_id, meta in self._id_to_meta.items():
401
- if "__id__" in meta and meta["__id__"].startswith(prefix):
402
- # Create a copy of all metadata and add "id" field
403
- record = {**meta, "id": meta["__id__"]}
404
- matching_records.append(record)
405
-
406
- logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
407
- return matching_records
408
-
409
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
410
  """Get vector data by its ID
411
 
 
385
 
386
  return True # Return success
387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
389
  """Get vector data by its ID
390
 
lightrag/kg/milvus_impl.py CHANGED
@@ -221,31 +221,6 @@ class MilvusVectorDBStorage(BaseVectorStorage):
221
  except Exception as e:
222
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
223
 
224
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
225
- """Search for records with IDs starting with a specific prefix.
226
-
227
- Args:
228
- prefix: The prefix to search for in record IDs
229
-
230
- Returns:
231
- List of records with matching ID prefixes
232
- """
233
- try:
234
- # Use Milvus query with expression to find IDs with the given prefix
235
- expression = f'id like "{prefix}%"'
236
- results = self._client.query(
237
- collection_name=self.namespace,
238
- filter=expression,
239
- output_fields=list(self.meta_fields) + ["id"],
240
- )
241
-
242
- logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
243
- return results
244
-
245
- except Exception as e:
246
- logger.error(f"Error searching for records with prefix '{prefix}': {e}")
247
- return []
248
-
249
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
250
  """Get vector data by its ID
251
 
 
221
  except Exception as e:
222
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
225
  """Get vector data by its ID
226
 
lightrag/kg/mongo_impl.py CHANGED
@@ -1149,35 +1149,6 @@ class MongoVectorDBStorage(BaseVectorStorage):
1149
  except PyMongoError as e:
1150
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1151
 
1152
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
1153
- """Search for records with IDs starting with a specific prefix.
1154
-
1155
- Args:
1156
- prefix: The prefix to search for in record IDs
1157
-
1158
- Returns:
1159
- List of records with matching ID prefixes
1160
- """
1161
- try:
1162
- # Use MongoDB regex to find documents where _id starts with the prefix
1163
- cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
1164
- matching_records = await cursor.to_list(length=None)
1165
-
1166
- # Format results, ensuring created_at is included
1167
- results = [
1168
- {
1169
- **doc,
1170
- "id": doc["_id"],
1171
- "created_at": doc.get("created_at"), # Include created_at field
1172
- }
1173
- for doc in matching_records
1174
- ]
1175
-
1176
- logger.debug(
1177
- f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
1178
- )
1179
- return results
1180
-
1181
  except PyMongoError as e:
1182
  logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1183
  return []
 
1149
  except PyMongoError as e:
1150
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
  except PyMongoError as e:
1153
  logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1154
  return []
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -259,26 +259,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
259
 
260
  return True # Return success
261
 
262
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
263
- """Search for records with IDs starting with a specific prefix.
264
-
265
- Args:
266
- prefix: The prefix to search for in record IDs
267
-
268
- Returns:
269
- List of records with matching ID prefixes
270
- """
271
- storage = await self.client_storage
272
- matching_records = []
273
-
274
- # Search for records with IDs starting with the prefix
275
- for record in storage["data"]:
276
- if "__id__" in record and record["__id__"].startswith(prefix):
277
- matching_records.append({**record, "id": record["__id__"]})
278
-
279
- logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
280
- return matching_records
281
-
282
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
283
  """Get vector data by its ID
284
 
 
259
 
260
  return True # Return success
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
263
  """Get vector data by its ID
264
 
lightrag/kg/postgres_impl.py CHANGED
@@ -800,41 +800,6 @@ class PGVectorStorage(BaseVectorStorage):
800
  except Exception as e:
801
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
802
 
803
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
804
- """Search for records with IDs starting with a specific prefix.
805
-
806
- Args:
807
- prefix: The prefix to search for in record IDs
808
-
809
- Returns:
810
- List of records with matching ID prefixes
811
- """
812
- table_name = namespace_to_table_name(self.namespace)
813
- if not table_name:
814
- logger.error(f"Unknown namespace for prefix search: {self.namespace}")
815
- return []
816
-
817
- search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
818
- params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
819
-
820
- try:
821
- results = await self.db.query(search_sql, params, multirows=True)
822
- logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
823
-
824
- # Format results to match the expected return format
825
- formatted_results = []
826
- for record in results:
827
- formatted_record = dict(record)
828
- # Ensure id field is available (for consistency with NanoVectorDB implementation)
829
- if "id" not in formatted_record:
830
- formatted_record["id"] = record["id"]
831
- formatted_results.append(formatted_record)
832
-
833
- return formatted_results
834
- except Exception as e:
835
- logger.error(f"Error during prefix search for '{prefix}': {e}")
836
- return []
837
-
838
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
839
  """Get vector data by its ID
840
 
 
800
  except Exception as e:
801
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
804
  """Get vector data by its ID
805
 
lightrag/kg/qdrant_impl.py CHANGED
@@ -249,46 +249,6 @@ class QdrantVectorDBStorage(BaseVectorStorage):
249
  except Exception as e:
250
  logger.error(f"Error deleting relations for {entity_name}: {e}")
251
 
252
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
253
- """Search for records with IDs starting with a specific prefix.
254
-
255
- Args:
256
- prefix: The prefix to search for in record IDs
257
-
258
- Returns:
259
- List of records with matching ID prefixes
260
- """
261
- try:
262
- # Use scroll method to find records with IDs starting with the prefix
263
- results = self._client.scroll(
264
- collection_name=self.namespace,
265
- scroll_filter=models.Filter(
266
- must=[
267
- models.FieldCondition(
268
- key="id", match=models.MatchText(text=prefix, prefix=True)
269
- )
270
- ]
271
- ),
272
- with_payload=True,
273
- with_vectors=False,
274
- limit=1000, # Adjust as needed for your use case
275
- )
276
-
277
- # Extract matching points
278
- matching_records = results[0]
279
-
280
- # Format the results to match expected return format
281
- formatted_results = [{**point.payload} for point in matching_records]
282
-
283
- logger.debug(
284
- f"Found {len(formatted_results)} records with prefix '{prefix}'"
285
- )
286
- return formatted_results
287
-
288
- except Exception as e:
289
- logger.error(f"Error searching for prefix '{prefix}': {e}")
290
- return []
291
-
292
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
293
  """Get vector data by its ID
294
 
 
249
  except Exception as e:
250
  logger.error(f"Error deleting relations for {entity_name}: {e}")
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
253
  """Get vector data by its ID
254
 
lightrag/kg/tidb_impl.py CHANGED
@@ -642,42 +642,6 @@ class TiDBVectorDBStorage(BaseVectorStorage):
642
  except Exception as e:
643
  return {"status": "error", "message": str(e)}
644
 
645
- async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
646
- """Search for records with IDs starting with a specific prefix.
647
-
648
- Args:
649
- prefix: The prefix to search for in record IDs
650
-
651
- Returns:
652
- List of records with matching ID prefixes
653
- """
654
- # Determine which table to query based on namespace
655
- if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
656
- sql_template = SQL_TEMPLATES["search_entity_by_prefix"]
657
- elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
658
- sql_template = SQL_TEMPLATES["search_relationship_by_prefix"]
659
- elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
660
- sql_template = SQL_TEMPLATES["search_chunk_by_prefix"]
661
- else:
662
- logger.warning(
663
- f"Namespace {self.namespace} not supported for prefix search"
664
- )
665
- return []
666
-
667
- # Add prefix pattern parameter with % for SQL LIKE
668
- prefix_pattern = f"{prefix}%"
669
- params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
670
-
671
- try:
672
- results = await self.db.query(sql_template, params=params, multirows=True)
673
- logger.debug(
674
- f"Found {len(results) if results else 0} records with prefix '{prefix}'"
675
- )
676
- return results if results else []
677
- except Exception as e:
678
- logger.error(f"Error searching records with prefix '{prefix}': {e}")
679
- return []
680
-
681
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
682
  """Get vector data by its ID
683
 
@@ -1333,25 +1297,6 @@ SQL_TEMPLATES = {
1333
  WHERE (source_name = :source AND target_name = :target)
1334
  AND workspace = :workspace
1335
  """,
1336
- # Search by prefix SQL templates
1337
- "search_entity_by_prefix": """
1338
- SELECT entity_id as id, name as entity_name, entity_type, description, content,
1339
- UNIX_TIMESTAMP(createtime) as created_at
1340
- FROM LIGHTRAG_GRAPH_NODES
1341
- WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
1342
- """,
1343
- "search_relationship_by_prefix": """
1344
- SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content,
1345
- UNIX_TIMESTAMP(createtime) as created_at
1346
- FROM LIGHTRAG_GRAPH_EDGES
1347
- WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
1348
- """,
1349
- "search_chunk_by_prefix": """
1350
- SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id,
1351
- UNIX_TIMESTAMP(createtime) as created_at
1352
- FROM LIGHTRAG_DOC_CHUNKS
1353
- WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
1354
- """,
1355
  # Drop tables
1356
  "drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace",
1357
  }
 
642
  except Exception as e:
643
  return {"status": "error", "message": str(e)}
644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
646
  """Get vector data by its ID
647
 
 
1297
  WHERE (source_name = :source AND target_name = :target)
1298
  AND workspace = :workspace
1299
  """,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1300
  # Drop tables
1301
  "drop_specifiy_table_workspace": "DELETE FROM {table_name} WHERE workspace = :workspace",
1302
  }