LarFii commited on
Commit
fcdd61e
·
1 Parent(s): e3bfe1a

Fix edit entity and relation bugs

Browse files
lightrag/kg/chroma_impl.py CHANGED
@@ -229,3 +229,43 @@ class ChromaVectorDBStorage(BaseVectorStorage):
229
  except Exception as e:
230
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
231
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  except Exception as e:
230
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
231
  raise
232
+
233
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
234
+ """Search for records with IDs starting with a specific prefix.
235
+
236
+ Args:
237
+ prefix: The prefix to search for in record IDs
238
+
239
+ Returns:
240
+ List of records with matching ID prefixes
241
+ """
242
+ try:
243
+ # Get all records from the collection
244
+ # Since ChromaDB doesn't directly support prefix search on IDs,
245
+ # we'll get all records and filter in Python
246
+ results = self._collection.get(
247
+ include=["metadatas", "documents", "embeddings"]
248
+ )
249
+
250
+ matching_records = []
251
+
252
+ # Filter records where ID starts with the prefix
253
+ for i, record_id in enumerate(results["ids"]):
254
+ if record_id.startswith(prefix):
255
+ matching_records.append(
256
+ {
257
+ "id": record_id,
258
+ "content": results["documents"][i],
259
+ "vector": results["embeddings"][i],
260
+ **results["metadatas"][i],
261
+ }
262
+ )
263
+
264
+ logger.debug(
265
+ f"Found {len(matching_records)} records with prefix '{prefix}'"
266
+ )
267
+ return matching_records
268
+
269
+ except Exception as e:
270
+ logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
271
+ raise
lightrag/kg/faiss_impl.py CHANGED
@@ -371,3 +371,24 @@ class FaissVectorDBStorage(BaseVectorStorage):
371
  return False # Return error
372
 
373
  return True # Return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  return False # Return error
372
 
373
  return True # Return success
374
+
375
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
376
+ """Search for records with IDs starting with a specific prefix.
377
+
378
+ Args:
379
+ prefix: The prefix to search for in record IDs
380
+
381
+ Returns:
382
+ List of records with matching ID prefixes
383
+ """
384
+ matching_records = []
385
+
386
+ # Search for records with IDs starting with the prefix
387
+ for faiss_id, meta in self._id_to_meta.items():
388
+ if "__id__" in meta and meta["__id__"].startswith(prefix):
389
+ # Create a copy of all metadata and add "id" field
390
+ record = {**meta, "id": meta["__id__"]}
391
+ matching_records.append(record)
392
+
393
+ logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
394
+ return matching_records
lightrag/kg/milvus_impl.py CHANGED
@@ -206,3 +206,28 @@ class MilvusVectorDBStorage(BaseVectorStorage):
206
 
207
  except Exception as e:
208
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  except Exception as e:
208
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
209
+
210
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
211
+ """Search for records with IDs starting with a specific prefix.
212
+
213
+ Args:
214
+ prefix: The prefix to search for in record IDs
215
+
216
+ Returns:
217
+ List of records with matching ID prefixes
218
+ """
219
+ try:
220
+ # Use Milvus query with expression to find IDs with the given prefix
221
+ expression = f'id like "{prefix}%"'
222
+ results = self._client.query(
223
+ collection_name=self.namespace,
224
+ filter=expression,
225
+ output_fields=list(self.meta_fields) + ["id"],
226
+ )
227
+
228
+ logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
229
+ return results
230
+
231
+ except Exception as e:
232
+ logger.error(f"Error searching for records with prefix '{prefix}': {e}")
233
+ return []
lightrag/kg/mongo_impl.py CHANGED
@@ -1045,6 +1045,32 @@ class MongoVectorDBStorage(BaseVectorStorage):
1045
  except PyMongoError as e:
1046
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1048
 
1049
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1050
  collection_names = await db.list_collection_names()
 
1045
  except PyMongoError as e:
1046
  logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1047
 
1048
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
1049
+ """Search for records with IDs starting with a specific prefix.
1050
+
1051
+ Args:
1052
+ prefix: The prefix to search for in record IDs
1053
+
1054
+ Returns:
1055
+ List of records with matching ID prefixes
1056
+ """
1057
+ try:
1058
+ # Use MongoDB regex to find documents where _id starts with the prefix
1059
+ cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
1060
+ matching_records = await cursor.to_list(length=None)
1061
+
1062
+ # Format results
1063
+ results = [{**doc, "id": doc["_id"]} for doc in matching_records]
1064
+
1065
+ logger.debug(
1066
+ f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
1067
+ )
1068
+ return results
1069
+
1070
+ except PyMongoError as e:
1071
+ logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
1072
+ return []
1073
+
1074
 
1075
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
1076
  collection_names = await db.list_collection_names()
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -236,3 +236,23 @@ class NanoVectorDBStorage(BaseVectorStorage):
236
  return False # Return error
237
 
238
  return True # Return success
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  return False # Return error
237
 
238
  return True # Return success
239
+
240
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
241
+ """Search for records with IDs starting with a specific prefix.
242
+
243
+ Args:
244
+ prefix: The prefix to search for in record IDs
245
+
246
+ Returns:
247
+ List of records with matching ID prefixes
248
+ """
249
+ storage = await self.client_storage
250
+ matching_records = []
251
+
252
+ # Search for records with IDs starting with the prefix
253
+ for record in storage["data"]:
254
+ if "__id__" in record and record["__id__"].startswith(prefix):
255
+ matching_records.append({**record, "id": record["__id__"]})
256
+
257
+ logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
258
+ return matching_records
lightrag/kg/oracle_impl.py CHANGED
@@ -494,6 +494,41 @@ class OracleVectorDBStorage(BaseVectorStorage):
494
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
495
  raise
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
  @final
499
  @dataclass
 
494
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
495
  raise
496
 
497
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
498
+ """Search for records with IDs starting with a specific prefix.
499
+
500
+ Args:
501
+ prefix: The prefix to search for in record IDs
502
+
503
+ Returns:
504
+ List of records with matching ID prefixes
505
+ """
506
+ try:
507
+ # Determine the appropriate table based on namespace
508
+ table_name = namespace_to_table_name(self.namespace)
509
+
510
+ # Create SQL query to find records with IDs starting with prefix
511
+ search_sql = f"""
512
+ SELECT * FROM {table_name}
513
+ WHERE workspace = :workspace
514
+ AND id LIKE :prefix_pattern
515
+ ORDER BY id
516
+ """
517
+
518
+ params = {"workspace": self.db.workspace, "prefix_pattern": f"{prefix}%"}
519
+
520
+ # Execute query and get results
521
+ results = await self.db.query(search_sql, params, multirows=True)
522
+
523
+ logger.debug(
524
+ f"Found {len(results) if results else 0} records with prefix '{prefix}'"
525
+ )
526
+ return results or []
527
+
528
+ except Exception as e:
529
+ logger.error(f"Error searching records with prefix '{prefix}': {e}")
530
+ return []
531
+
532
 
533
  @final
534
  @dataclass
lightrag/kg/postgres_impl.py CHANGED
@@ -575,6 +575,41 @@ class PGVectorStorage(BaseVectorStorage):
575
  except Exception as e:
576
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
 
579
  @final
580
  @dataclass
 
575
  except Exception as e:
576
  logger.error(f"Error deleting relations for entity {entity_name}: {e}")
577
 
578
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
579
+ """Search for records with IDs starting with a specific prefix.
580
+
581
+ Args:
582
+ prefix: The prefix to search for in record IDs
583
+
584
+ Returns:
585
+ List of records with matching ID prefixes
586
+ """
587
+ table_name = namespace_to_table_name(self.namespace)
588
+ if not table_name:
589
+ logger.error(f"Unknown namespace for prefix search: {self.namespace}")
590
+ return []
591
+
592
+ search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
593
+ params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
594
+
595
+ try:
596
+ results = await self.db.query(search_sql, params, multirows=True)
597
+ logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
598
+
599
+ # Format results to match the expected return format
600
+ formatted_results = []
601
+ for record in results:
602
+ formatted_record = dict(record)
603
+ # Ensure id field is available (for consistency with NanoVectorDB implementation)
604
+ if "id" not in formatted_record:
605
+ formatted_record["id"] = record["id"]
606
+ formatted_results.append(formatted_record)
607
+
608
+ return formatted_results
609
+ except Exception as e:
610
+ logger.error(f"Error during prefix search for '{prefix}': {e}")
611
+ return []
612
+
613
 
614
  @final
615
  @dataclass
lightrag/kg/qdrant_impl.py CHANGED
@@ -233,3 +233,45 @@ class QdrantVectorDBStorage(BaseVectorStorage):
233
  logger.debug(f"No relations found for entity {entity_name}")
234
  except Exception as e:
235
  logger.error(f"Error deleting relations for {entity_name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  logger.debug(f"No relations found for entity {entity_name}")
234
  except Exception as e:
235
  logger.error(f"Error deleting relations for {entity_name}: {e}")
236
+
237
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
238
+ """Search for records with IDs starting with a specific prefix.
239
+
240
+ Args:
241
+ prefix: The prefix to search for in record IDs
242
+
243
+ Returns:
244
+ List of records with matching ID prefixes
245
+ """
246
+ try:
247
+ # Use scroll method to find records with IDs starting with the prefix
248
+ results = self._client.scroll(
249
+ collection_name=self.namespace,
250
+ scroll_filter=models.Filter(
251
+ must=[
252
+ models.FieldCondition(
253
+ key="id", match=models.MatchText(text=prefix, prefix=True)
254
+ )
255
+ ]
256
+ ),
257
+ with_payload=True,
258
+ with_vectors=False,
259
+ limit=1000, # Adjust as needed for your use case
260
+ )
261
+
262
+ # Extract matching points
263
+ matching_records = results[0]
264
+
265
+ # Format the results to match expected return format
266
+ formatted_results = [
267
+ {**point.payload, "id": point.id} for point in matching_records
268
+ ]
269
+
270
+ logger.debug(
271
+ f"Found {len(formatted_results)} records with prefix '{prefix}'"
272
+ )
273
+ return formatted_results
274
+
275
+ except Exception as e:
276
+ logger.error(f"Error searching for prefix '{prefix}': {e}")
277
+ return []
lightrag/kg/tidb_impl.py CHANGED
@@ -414,6 +414,55 @@ class TiDBVectorDBStorage(BaseVectorStorage):
414
  # Ti handles persistence automatically
415
  pass
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
  @final
419
  @dataclass
@@ -968,4 +1017,20 @@ SQL_TEMPLATES = {
968
  WHERE (source_name = :source AND target_name = :target)
969
  AND workspace = :workspace
970
  """,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971
  }
 
414
  # Ti handles persistence automatically
415
  pass
416
 
417
+ async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
418
+ """Search for records with IDs starting with a specific prefix.
419
+
420
+ Args:
421
+ prefix: The prefix to search for in record IDs
422
+
423
+ Returns:
424
+ List of records with matching ID prefixes
425
+ """
426
+ # Determine which table to query based on namespace
427
+ if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
428
+ sql_template = """
429
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
430
+ FROM LIGHTRAG_GRAPH_NODES
431
+ WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
432
+ """
433
+ elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
434
+ sql_template = """
435
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
436
+ keywords, description, content
437
+ FROM LIGHTRAG_GRAPH_EDGES
438
+ WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
439
+ """
440
+ elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
441
+ sql_template = """
442
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
443
+ FROM LIGHTRAG_DOC_CHUNKS
444
+ WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
445
+ """
446
+ else:
447
+ logger.warning(
448
+ f"Namespace {self.namespace} not supported for prefix search"
449
+ )
450
+ return []
451
+
452
+ # Add prefix pattern parameter with % for SQL LIKE
453
+ prefix_pattern = f"{prefix}%"
454
+ params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
455
+
456
+ try:
457
+ results = await self.db.query(sql_template, params=params, multirows=True)
458
+ logger.debug(
459
+ f"Found {len(results) if results else 0} records with prefix '{prefix}'"
460
+ )
461
+ return results if results else []
462
+ except Exception as e:
463
+ logger.error(f"Error searching records with prefix '{prefix}': {e}")
464
+ return []
465
+
466
 
467
  @final
468
  @dataclass
 
1017
  WHERE (source_name = :source AND target_name = :target)
1018
  AND workspace = :workspace
1019
  """,
1020
+ # Search by prefix SQL templates
1021
+ "search_entity_by_prefix": """
1022
+ SELECT entity_id as id, name as entity_name, entity_type, description, content
1023
+ FROM LIGHTRAG_GRAPH_NODES
1024
+ WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
1025
+ """,
1026
+ "search_relationship_by_prefix": """
1027
+ SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content
1028
+ FROM LIGHTRAG_GRAPH_EDGES
1029
+ WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
1030
+ """,
1031
+ "search_chunk_by_prefix": """
1032
+ SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
1033
+ FROM LIGHTRAG_DOC_CHUNKS
1034
+ WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
1035
+ """,
1036
  }
lightrag/lightrag.py CHANGED
@@ -2044,6 +2044,9 @@ class LightRAG:
2044
  # Delete old entity record from vector database
2045
  old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2046
  await self.entities_vdb.delete([old_entity_id])
 
 
 
2047
 
2048
  # Update relationship vector representations
2049
  for src, tgt, edge_data in relations_to_update:
@@ -2171,6 +2174,15 @@ class LightRAG:
2171
  f"Relation from '{source_entity}' to '{target_entity}' does not exist"
2172
  )
2173
 
 
 
 
 
 
 
 
 
 
2174
  # 2. Update relation information in the graph
2175
  new_edge_data = {**edge_data, **updated_data}
2176
  await self.chunk_entity_relation_graph.upsert_edge(
@@ -2669,12 +2681,29 @@ class LightRAG:
2669
 
2670
  # 9. Delete source entities
2671
  for entity_name in source_entities:
2672
- # Delete entity node
2673
  await self.chunk_entity_relation_graph.delete_node(entity_name)
2674
- # Delete record from vector database
 
2675
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2676
  await self.entities_vdb.delete([entity_id])
2677
- logger.info(f"Deleted source entity '{entity_name}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2678
 
2679
  # 10. Save changes
2680
  await self._merge_entities_done()
 
2044
  # Delete old entity record from vector database
2045
  old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2046
  await self.entities_vdb.delete([old_entity_id])
2047
+ logger.info(
2048
+ f"Deleted old entity '{entity_name}' and its vector embedding from database"
2049
+ )
2050
 
2051
  # Update relationship vector representations
2052
  for src, tgt, edge_data in relations_to_update:
 
2174
  f"Relation from '{source_entity}' to '{target_entity}' does not exist"
2175
  )
2176
 
2177
+ # Important: First delete the old relation record from the vector database
2178
+ old_relation_id = compute_mdhash_id(
2179
+ source_entity + target_entity, prefix="rel-"
2180
+ )
2181
+ await self.relationships_vdb.delete([old_relation_id])
2182
+ logger.info(
2183
+ f"Deleted old relation record from vector database for relation {source_entity} -> {target_entity}"
2184
+ )
2185
+
2186
  # 2. Update relation information in the graph
2187
  new_edge_data = {**edge_data, **updated_data}
2188
  await self.chunk_entity_relation_graph.upsert_edge(
 
2681
 
2682
  # 9. Delete source entities
2683
  for entity_name in source_entities:
2684
+ # Delete entity node from knowledge graph
2685
  await self.chunk_entity_relation_graph.delete_node(entity_name)
2686
+
2687
+ # Delete entity record from vector database
2688
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
2689
  await self.entities_vdb.delete([entity_id])
2690
+
2691
+ # Also ensure any relationships specific to this entity are deleted from vector DB
2692
+ # This is a safety check, as these should have been transformed to the target entity already
2693
+ entity_relation_prefix = compute_mdhash_id(entity_name, prefix="rel-")
2694
+ relations_with_entity = await self.relationships_vdb.search_by_prefix(
2695
+ entity_relation_prefix
2696
+ )
2697
+ if relations_with_entity:
2698
+ relation_ids = [r["id"] for r in relations_with_entity]
2699
+ await self.relationships_vdb.delete(relation_ids)
2700
+ logger.info(
2701
+ f"Deleted {len(relation_ids)} relation records for entity '{entity_name}' from vector database"
2702
+ )
2703
+
2704
+ logger.info(
2705
+ f"Deleted source entity '{entity_name}' and its vector embedding from database"
2706
+ )
2707
 
2708
  # 10. Save changes
2709
  await self._merge_entities_done()