LarFii commited on
Commit
0ecc785
·
1 Parent(s): ccc1a22

Implement the missing methods.

Browse files
lightrag/kg/age_impl.py CHANGED
@@ -8,7 +8,7 @@ from dataclasses import dataclass
8
  from typing import Any, Dict, List, NamedTuple, Optional, Union, final
9
  import numpy as np
10
  import pipmaster as pm
11
- from lightrag.types import KnowledgeGraph
12
 
13
  from tenacity import (
14
  retry,
@@ -613,20 +613,258 @@ class AGEStorage(BaseGraphStorage):
613
  await self._driver.putconn(connection)
614
 
615
  async def delete_node(self, node_id: str) -> None:
616
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
 
618
  async def embed_nodes(
619
  self, algorithm: str
620
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
621
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
622
 
623
  async def get_all_labels(self) -> list[str]:
624
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
 
626
  async def get_knowledge_graph(
627
  self, node_label: str, max_depth: int = 5
628
  ) -> KnowledgeGraph:
629
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
  async def index_done_callback(self) -> None:
632
  # AGES handles persistence automatically
 
8
  from typing import Any, Dict, List, NamedTuple, Optional, Union, final
9
  import numpy as np
10
  import pipmaster as pm
11
+ from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
12
 
13
  from tenacity import (
14
  retry,
 
613
  await self._driver.putconn(connection)
614
 
615
  async def delete_node(self, node_id: str) -> None:
616
+ """Delete a node with the specified label
617
+
618
+ Args:
619
+ node_id: The label of the node to delete
620
+ """
621
+ entity_name_label = node_id.strip('"')
622
+
623
+ query = """
624
+ MATCH (n:`{label}`)
625
+ DETACH DELETE n
626
+ """
627
+ params = {"label": AGEStorage._encode_graph_label(entity_name_label)}
628
+ try:
629
+ await self._query(query, **params)
630
+ logger.debug(f"Deleted node with label '{entity_name_label}'")
631
+ except Exception as e:
632
+ logger.error(f"Error during node deletion: {str(e)}")
633
+ raise
634
+
635
+ async def remove_nodes(self, nodes: list[str]):
636
+ """Delete multiple nodes
637
+
638
+ Args:
639
+ nodes: List of node labels to be deleted
640
+ """
641
+ for node in nodes:
642
+ await self.delete_node(node)
643
+
644
+ async def remove_edges(self, edges: list[tuple[str, str]]):
645
+ """Delete multiple edges
646
+
647
+ Args:
648
+ edges: List of edges to be deleted, each edge is a (source, target) tuple
649
+ """
650
+ for source, target in edges:
651
+ entity_name_label_source = source.strip('"')
652
+ entity_name_label_target = target.strip('"')
653
+
654
+ query = """
655
+ MATCH (source:`{src_label}`)-[r]->(target:`{tgt_label}`)
656
+ DELETE r
657
+ """
658
+ params = {
659
+ "src_label": AGEStorage._encode_graph_label(entity_name_label_source),
660
+ "tgt_label": AGEStorage._encode_graph_label(entity_name_label_target)
661
+ }
662
+ try:
663
+ await self._query(query, **params)
664
+ logger.debug(f"Deleted edge from '{entity_name_label_source}' to '{entity_name_label_target}'")
665
+ except Exception as e:
666
+ logger.error(f"Error during edge deletion: {str(e)}")
667
+ raise
668
 
669
  async def embed_nodes(
670
  self, algorithm: str
671
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
672
+ """Embed nodes using the specified algorithm
673
+
674
+ Args:
675
+ algorithm: Name of the embedding algorithm
676
+
677
+ Returns:
678
+ tuple: (embedding matrix, list of node identifiers)
679
+ """
680
+ if algorithm not in self._node_embed_algorithms:
681
+ raise ValueError(f"Node embedding algorithm {algorithm} not supported")
682
+ return await self._node_embed_algorithms[algorithm]()
683
 
684
  async def get_all_labels(self) -> list[str]:
685
+ """Get all node labels in the database
686
+
687
+ Returns:
688
+ ["label1", "label2", ...] # Alphabetically sorted label list
689
+ """
690
+ query = """
691
+ MATCH (n)
692
+ RETURN DISTINCT labels(n) AS node_labels
693
+ """
694
+ results = await self._query(query)
695
+
696
+ all_labels = []
697
+ for record in results:
698
+ if record and "node_labels" in record:
699
+ for label in record["node_labels"]:
700
+ if label:
701
+ # Decode label
702
+ decoded_label = AGEStorage._decode_graph_label(label)
703
+ all_labels.append(decoded_label)
704
+
705
+ # Remove duplicates and sort
706
+ return sorted(list(set(all_labels)))
707
 
708
  async def get_knowledge_graph(
709
  self, node_label: str, max_depth: int = 5
710
  ) -> KnowledgeGraph:
711
+ """
712
+ Retrieve a connected subgraph of nodes where the label includes the specified 'node_label'.
713
+ Maximum number of nodes is constrained by the environment variable 'MAX_GRAPH_NODES' (default: 1000).
714
+ When reducing the number of nodes, the prioritization criteria are as follows:
715
+ 1. Label matching nodes take precedence (nodes containing the specified label string)
716
+ 2. Followed by nodes directly connected to the matching nodes
717
+ 3. Finally, the degree of the nodes
718
+
719
+ Args:
720
+ node_label: String to match in node labels (will match any node containing this string in its label)
721
+ max_depth: Maximum depth of the graph. Defaults to 5.
722
+
723
+ Returns:
724
+ KnowledgeGraph: Complete connected subgraph for specified node
725
+ """
726
+ max_graph_nodes = int(os.getenv("MAX_GRAPH_NODES", 1000))
727
+ result = KnowledgeGraph()
728
+ seen_nodes = set()
729
+ seen_edges = set()
730
+
731
+ # Handle special case for "*" label
732
+ if node_label == "*":
733
+ # Query all nodes and sort by degree
734
+ query = """
735
+ MATCH (n)
736
+ OPTIONAL MATCH (n)-[r]-()
737
+ WITH n, count(r) AS degree
738
+ ORDER BY degree DESC
739
+ LIMIT {max_nodes}
740
+ RETURN n, degree
741
+ """
742
+ params = {"max_nodes": max_graph_nodes}
743
+ nodes_result = await self._query(query, **params)
744
+
745
+ # Add nodes to result
746
+ node_ids = []
747
+ for record in nodes_result:
748
+ if "n" in record:
749
+ node = record["n"]
750
+ node_id = str(node.get("id", ""))
751
+ if node_id not in seen_nodes:
752
+ node_properties = {k: v for k, v in node.items()}
753
+ node_label = node.get("label", "")
754
+ result.nodes.append(
755
+ KnowledgeGraphNode(
756
+ id=node_id,
757
+ labels=[node_label],
758
+ properties=node_properties
759
+ )
760
+ )
761
+ seen_nodes.add(node_id)
762
+ node_ids.append(node_id)
763
+
764
+ # Query edges between these nodes
765
+ if node_ids:
766
+ edges_query = """
767
+ MATCH (a)-[r]->(b)
768
+ WHERE a.id IN {node_ids} AND b.id IN {node_ids}
769
+ RETURN a, r, b
770
+ """
771
+ edges_params = {"node_ids": node_ids}
772
+ edges_result = await self._query(edges_query, **edges_params)
773
+
774
+ # Add edges to result
775
+ for record in edges_result:
776
+ if "r" in record and "a" in record and "b" in record:
777
+ source = record["a"].get("id", "")
778
+ target = record["b"].get("id", "")
779
+ edge_id = f"{source}-{target}"
780
+ if edge_id not in seen_edges:
781
+ edge_properties = {k: v for k, v in record["r"].items()}
782
+ result.edges.append(
783
+ KnowledgeGraphEdge(
784
+ id=edge_id,
785
+ type="DIRECTED",
786
+ source=source,
787
+ target=target,
788
+ properties=edge_properties
789
+ )
790
+ )
791
+ seen_edges.add(edge_id)
792
+ else:
793
+ # For specific label, use partial matching
794
+ entity_name_label = node_label.strip('"')
795
+ encoded_label = AGEStorage._encode_graph_label(entity_name_label)
796
+
797
+ # Find matching start nodes
798
+ start_query = """
799
+ MATCH (n:`{label}`)
800
+ RETURN n
801
+ """
802
+ start_params = {"label": encoded_label}
803
+ start_nodes = await self._query(start_query, **start_params)
804
+
805
+ if not start_nodes:
806
+ logger.warning(f"No nodes found with label '{entity_name_label}'!")
807
+ return result
808
+
809
+ # Traverse graph from each start node
810
+ for start_node_record in start_nodes:
811
+ if "n" in start_node_record:
812
+ start_node = start_node_record["n"]
813
+ start_id = str(start_node.get("id", ""))
814
+
815
+ # Use BFS to traverse graph
816
+ query = """
817
+ MATCH (start:`{label}`)
818
+ CALL {
819
+ MATCH path = (start)-[*0..{max_depth}]->(n)
820
+ RETURN nodes(path) AS path_nodes, relationships(path) AS path_rels
821
+ }
822
+ RETURN DISTINCT path_nodes, path_rels
823
+ """
824
+ params = {"label": encoded_label, "max_depth": max_depth}
825
+ results = await self._query(query, **params)
826
+
827
+ # Extract nodes and edges from results
828
+ for record in results:
829
+ if "path_nodes" in record:
830
+ # Process nodes
831
+ for node in record["path_nodes"]:
832
+ node_id = str(node.get("id", ""))
833
+ if node_id not in seen_nodes and len(seen_nodes) < max_graph_nodes:
834
+ node_properties = {k: v for k, v in node.items()}
835
+ node_label = node.get("label", "")
836
+ result.nodes.append(
837
+ KnowledgeGraphNode(
838
+ id=node_id,
839
+ labels=[node_label],
840
+ properties=node_properties
841
+ )
842
+ )
843
+ seen_nodes.add(node_id)
844
+
845
+ if "path_rels" in record:
846
+ # Process edges
847
+ for rel in record["path_rels"]:
848
+ source = str(rel.get("start_id", ""))
849
+ target = str(rel.get("end_id", ""))
850
+ edge_id = f"{source}-{target}"
851
+ if edge_id not in seen_edges:
852
+ edge_properties = {k: v for k, v in rel.items()}
853
+ result.edges.append(
854
+ KnowledgeGraphEdge(
855
+ id=edge_id,
856
+ type=rel.get("label", "DIRECTED"),
857
+ source=source,
858
+ target=target,
859
+ properties=edge_properties
860
+ )
861
+ )
862
+ seen_edges.add(edge_id)
863
+
864
+ logger.info(
865
+ f"Subgraph query successful | Node count: {len(result.nodes)} | Edge count: {len(result.edges)}"
866
+ )
867
+ return result
868
 
869
  async def index_done_callback(self) -> None:
870
  # AGES handles persistence automatically
lightrag/kg/chroma_impl.py CHANGED
@@ -193,7 +193,37 @@ class ChromaVectorDBStorage(BaseVectorStorage):
193
  pass
194
 
195
  async def delete_entity(self, entity_name: str) -> None:
196
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
197
 
198
  async def delete_entity_relation(self, entity_name: str) -> None:
199
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  pass
194
 
195
  async def delete_entity(self, entity_name: str) -> None:
196
+ """Delete an entity by its ID.
197
+
198
+ Args:
199
+ entity_name: The ID of the entity to delete
200
+ """
201
+ try:
202
+ logger.info(f"Deleting entity with ID {entity_name} from {self.namespace}")
203
+ self._collection.delete(ids=[entity_name])
204
+ except Exception as e:
205
+ logger.error(f"Error during entity deletion: {str(e)}")
206
+ raise
207
 
208
  async def delete_entity_relation(self, entity_name: str) -> None:
209
+ """Delete an entity and its relations by ID.
210
+ In vector DB context, this is equivalent to delete_entity.
211
+
212
+ Args:
213
+ entity_name: The ID of the entity to delete
214
+ """
215
+ await self.delete_entity(entity_name)
216
+
217
+ async def delete(self, ids: list[str]) -> None:
218
+ """Delete vectors with specified IDs
219
+
220
+ Args:
221
+ ids: List of vector IDs to be deleted
222
+ """
223
+ try:
224
+ logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
225
+ self._collection.delete(ids=ids)
226
+ logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
227
+ except Exception as e:
228
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
229
+ raise
lightrag/kg/gremlin_impl.py CHANGED
@@ -16,7 +16,7 @@ from tenacity import (
16
  wait_exponential,
17
  )
18
 
19
- from lightrag.types import KnowledgeGraph
20
  from lightrag.utils import logger
21
 
22
  from ..base import BaseGraphStorage
@@ -396,17 +396,286 @@ class GremlinStorage(BaseGraphStorage):
396
  print("Implemented but never called.")
397
 
398
  async def delete_node(self, node_id: str) -> None:
399
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
  async def embed_nodes(
402
  self, algorithm: str
403
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
404
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
 
406
  async def get_all_labels(self) -> list[str]:
407
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
  async def get_knowledge_graph(
410
  self, node_label: str, max_depth: int = 5
411
  ) -> KnowledgeGraph:
412
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  wait_exponential,
17
  )
18
 
19
+ from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
20
  from lightrag.utils import logger
21
 
22
  from ..base import BaseGraphStorage
 
396
  print("Implemented but never called.")
397
 
398
  async def delete_node(self, node_id: str) -> None:
399
+ """Delete a node with the specified entity_name
400
+
401
+ Args:
402
+ node_id: The entity_name of the node to delete
403
+ """
404
+ entity_name = GremlinStorage._fix_name(node_id)
405
+
406
+ query = f"""g
407
+ .V().has('graph', {self.graph_name})
408
+ .has('entity_name', {entity_name})
409
+ .drop()
410
+ """
411
+ try:
412
+ await self._query(query)
413
+ logger.debug(
414
+ "{%s}: Deleted node with entity_name '%s'",
415
+ inspect.currentframe().f_code.co_name,
416
+ entity_name
417
+ )
418
+ except Exception as e:
419
+ logger.error(f"Error during node deletion: {str(e)}")
420
+ raise
421
 
422
  async def embed_nodes(
423
  self, algorithm: str
424
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
425
+ """
426
+ Embed nodes using the specified algorithm.
427
+ Currently, only node2vec is supported but never called.
428
+
429
+ Args:
430
+ algorithm: The name of the embedding algorithm to use
431
+
432
+ Returns:
433
+ A tuple of (embeddings, node_ids)
434
+
435
+ Raises:
436
+ NotImplementedError: If the specified algorithm is not supported
437
+ ValueError: If the algorithm is not supported
438
+ """
439
+ if algorithm not in self._node_embed_algorithms:
440
+ raise ValueError(f"Node embedding algorithm {algorithm} not supported")
441
+ return await self._node_embed_algorithms[algorithm]()
442
 
443
  async def get_all_labels(self) -> list[str]:
444
+ """
445
+ Get all node entity_names in the graph
446
+ Returns:
447
+ [entity_name1, entity_name2, ...] # Alphabetically sorted entity_name list
448
+ """
449
+ query = f"""g
450
+ .V().has('graph', {self.graph_name})
451
+ .values('entity_name')
452
+ .dedup()
453
+ .order()
454
+ """
455
+ try:
456
+ result = await self._query(query)
457
+ labels = result if result else []
458
+ logger.debug(
459
+ "{%s}: Retrieved %d labels",
460
+ inspect.currentframe().f_code.co_name,
461
+ len(labels)
462
+ )
463
+ return labels
464
+ except Exception as e:
465
+ logger.error(f"Error retrieving labels: {str(e)}")
466
+ return []
467
 
468
  async def get_knowledge_graph(
469
  self, node_label: str, max_depth: int = 5
470
  ) -> KnowledgeGraph:
471
+ """
472
+ Retrieve a connected subgraph of nodes where the entity_name includes the specified `node_label`.
473
+ Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
474
+
475
+ Args:
476
+ node_label: Entity name of the starting node
477
+ max_depth: Maximum depth of the subgraph
478
+
479
+ Returns:
480
+ KnowledgeGraph object containing nodes and edges
481
+ """
482
+ result = KnowledgeGraph()
483
+ seen_nodes = set()
484
+ seen_edges = set()
485
+
486
+ # Get maximum number of graph nodes from environment variable, default is 1000
487
+ MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
488
+
489
+ entity_name = GremlinStorage._fix_name(node_label)
490
+
491
+ # Handle special case for "*" label
492
+ if node_label == "*":
493
+ # For "*", get all nodes and their edges (limited by MAX_GRAPH_NODES)
494
+ query = f"""g
495
+ .V().has('graph', {self.graph_name})
496
+ .limit({MAX_GRAPH_NODES})
497
+ .elementMap()
498
+ """
499
+ nodes_result = await self._query(query)
500
+
501
+ # Add nodes to result
502
+ for node_data in nodes_result:
503
+ node_id = node_data.get('entity_name', str(node_data.get('id', '')))
504
+ if str(node_id) in seen_nodes:
505
+ continue
506
+
507
+ # Create node with properties
508
+ node_properties = {k: v for k, v in node_data.items() if k not in ['id', 'label']}
509
+
510
+ result.nodes.append(
511
+ KnowledgeGraphNode(
512
+ id=str(node_id),
513
+ labels=[str(node_id)],
514
+ properties=node_properties
515
+ )
516
+ )
517
+ seen_nodes.add(str(node_id))
518
+
519
+ # Get and add edges
520
+ if nodes_result:
521
+ query = f"""g
522
+ .V().has('graph', {self.graph_name})
523
+ .limit({MAX_GRAPH_NODES})
524
+ .outE()
525
+ .inV().has('graph', {self.graph_name})
526
+ .limit({MAX_GRAPH_NODES})
527
+ .path()
528
+ .by(elementMap())
529
+ .by(elementMap())
530
+ .by(elementMap())
531
+ """
532
+ edges_result = await self._query(query)
533
+
534
+ for path in edges_result:
535
+ if len(path) >= 3: # source -> edge -> target
536
+ source = path[0]
537
+ edge_data = path[1]
538
+ target = path[2]
539
+
540
+ source_id = source.get('entity_name', str(source.get('id', '')))
541
+ target_id = target.get('entity_name', str(target.get('id', '')))
542
+
543
+ edge_id = f"{source_id}-{target_id}"
544
+ if edge_id in seen_edges:
545
+ continue
546
+
547
+ # Create edge with properties
548
+ edge_properties = {k: v for k, v in edge_data.items() if k not in ['id', 'label']}
549
+
550
+ result.edges.append(
551
+ KnowledgeGraphEdge(
552
+ id=edge_id,
553
+ type="DIRECTED",
554
+ source=str(source_id),
555
+ target=str(target_id),
556
+ properties=edge_properties
557
+ )
558
+ )
559
+ seen_edges.add(edge_id)
560
+ else:
561
+ # Search for specific node and get its neighborhood
562
+ query = f"""g
563
+ .V().has('graph', {self.graph_name})
564
+ .has('entity_name', {entity_name})
565
+ .repeat(__.both().simplePath().dedup())
566
+ .times({max_depth})
567
+ .emit()
568
+ .dedup()
569
+ .limit({MAX_GRAPH_NODES})
570
+ .elementMap()
571
+ """
572
+ nodes_result = await self._query(query)
573
+
574
+ # Add nodes to result
575
+ for node_data in nodes_result:
576
+ node_id = node_data.get('entity_name', str(node_data.get('id', '')))
577
+ if str(node_id) in seen_nodes:
578
+ continue
579
+
580
+ # Create node with properties
581
+ node_properties = {k: v for k, v in node_data.items() if k not in ['id', 'label']}
582
+
583
+ result.nodes.append(
584
+ KnowledgeGraphNode(
585
+ id=str(node_id),
586
+ labels=[str(node_id)],
587
+ properties=node_properties
588
+ )
589
+ )
590
+ seen_nodes.add(str(node_id))
591
+
592
+ # Get edges between the nodes in the result
593
+ if nodes_result:
594
+ node_ids = [n.get('entity_name', str(n.get('id', ''))) for n in nodes_result]
595
+ node_ids_query = ", ".join([GremlinStorage._to_value_map(nid) for nid in node_ids])
596
+
597
+ query = f"""g
598
+ .V().has('graph', {self.graph_name})
599
+ .has('entity_name', within({node_ids_query}))
600
+ .outE()
601
+ .where(inV().has('graph', {self.graph_name})
602
+ .has('entity_name', within({node_ids_query})))
603
+ .path()
604
+ .by(elementMap())
605
+ .by(elementMap())
606
+ .by(elementMap())
607
+ """
608
+ edges_result = await self._query(query)
609
+
610
+ for path in edges_result:
611
+ if len(path) >= 3: # source -> edge -> target
612
+ source = path[0]
613
+ edge_data = path[1]
614
+ target = path[2]
615
+
616
+ source_id = source.get('entity_name', str(source.get('id', '')))
617
+ target_id = target.get('entity_name', str(target.get('id', '')))
618
+
619
+ edge_id = f"{source_id}-{target_id}"
620
+ if edge_id in seen_edges:
621
+ continue
622
+
623
+ # Create edge with properties
624
+ edge_properties = {k: v for k, v in edge_data.items() if k not in ['id', 'label']}
625
+
626
+ result.edges.append(
627
+ KnowledgeGraphEdge(
628
+ id=edge_id,
629
+ type="DIRECTED",
630
+ source=str(source_id),
631
+ target=str(target_id),
632
+ properties=edge_properties
633
+ )
634
+ )
635
+ seen_edges.add(edge_id)
636
+
637
+ logger.info(
638
+ "Subgraph query successful | Node count: %d | Edge count: %d",
639
+ len(result.nodes),
640
+ len(result.edges)
641
+ )
642
+ return result
643
+
644
+ async def remove_nodes(self, nodes: list[str]):
645
+ """Delete multiple nodes
646
+
647
+ Args:
648
+ nodes: List of node entity_names to be deleted
649
+ """
650
+ for node in nodes:
651
+ await self.delete_node(node)
652
+
653
+ async def remove_edges(self, edges: list[tuple[str, str]]):
654
+ """Delete multiple edges
655
+
656
+ Args:
657
+ edges: List of edges to be deleted, each edge is a (source, target) tuple
658
+ """
659
+ for source, target in edges:
660
+ entity_name_source = GremlinStorage._fix_name(source)
661
+ entity_name_target = GremlinStorage._fix_name(target)
662
+
663
+ query = f"""g
664
+ .V().has('graph', {self.graph_name})
665
+ .has('entity_name', {entity_name_source})
666
+ .outE()
667
+ .where(inV().has('graph', {self.graph_name})
668
+ .has('entity_name', {entity_name_target}))
669
+ .drop()
670
+ """
671
+ try:
672
+ await self._query(query)
673
+ logger.debug(
674
+ "{%s}: Deleted edge from '%s' to '%s'",
675
+ inspect.currentframe().f_code.co_name,
676
+ entity_name_source,
677
+ entity_name_target
678
+ )
679
+ except Exception as e:
680
+ logger.error(f"Error during edge deletion: {str(e)}")
681
+ raise
lightrag/kg/milvus_impl.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from typing import Any, final
4
  from dataclasses import dataclass
5
  import numpy as np
6
- from lightrag.utils import logger
7
  from ..base import BaseVectorStorage
8
  import pipmaster as pm
9
 
@@ -124,7 +124,84 @@ class MilvusVectorDBStorage(BaseVectorStorage):
124
  pass
125
 
126
  async def delete_entity(self, entity_name: str) -> None:
127
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  async def delete_entity_relation(self, entity_name: str) -> None:
130
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from typing import Any, final
4
  from dataclasses import dataclass
5
  import numpy as np
6
+ from lightrag.utils import logger, compute_mdhash_id
7
  from ..base import BaseVectorStorage
8
  import pipmaster as pm
9
 
 
124
  pass
125
 
126
  async def delete_entity(self, entity_name: str) -> None:
127
+ """Delete an entity from the vector database
128
+
129
+ Args:
130
+ entity_name: The name of the entity to delete
131
+ """
132
+ try:
133
+ # Compute entity ID from name
134
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
135
+ logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
136
+
137
+ # Delete the entity from Milvus collection
138
+ result = self._client.delete(
139
+ collection_name=self.namespace,
140
+ pks=[entity_id]
141
+ )
142
+
143
+ if result and result.get("delete_count", 0) > 0:
144
+ logger.debug(f"Successfully deleted entity {entity_name}")
145
+ else:
146
+ logger.debug(f"Entity {entity_name} not found in storage")
147
+
148
+ except Exception as e:
149
+ logger.error(f"Error deleting entity {entity_name}: {e}")
150
 
151
  async def delete_entity_relation(self, entity_name: str) -> None:
152
+ """Delete all relations associated with an entity
153
+
154
+ Args:
155
+ entity_name: The name of the entity whose relations should be deleted
156
+ """
157
+ try:
158
+ # Search for relations where entity is either source or target
159
+ expr = f'src_id == "{entity_name}" or tgt_id == "{entity_name}"'
160
+
161
+ # Find all relations involving this entity
162
+ results = self._client.query(
163
+ collection_name=self.namespace,
164
+ filter=expr,
165
+ output_fields=["id"]
166
+ )
167
+
168
+ if not results or len(results) == 0:
169
+ logger.debug(f"No relations found for entity {entity_name}")
170
+ return
171
+
172
+ # Extract IDs of relations to delete
173
+ relation_ids = [item["id"] for item in results]
174
+ logger.debug(f"Found {len(relation_ids)} relations for entity {entity_name}")
175
+
176
+ # Delete the relations
177
+ if relation_ids:
178
+ delete_result = self._client.delete(
179
+ collection_name=self.namespace,
180
+ pks=relation_ids
181
+ )
182
+
183
+ logger.debug(f"Deleted {delete_result.get('delete_count', 0)} relations for {entity_name}")
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error deleting relations for {entity_name}: {e}")
187
+
188
+ async def delete(self, ids: list[str]) -> None:
189
+ """Delete vectors with specified IDs
190
+
191
+ Args:
192
+ ids: List of vector IDs to be deleted
193
+ """
194
+ try:
195
+ # Delete vectors by IDs
196
+ result = self._client.delete(
197
+ collection_name=self.namespace,
198
+ pks=ids
199
+ )
200
+
201
+ if result and result.get("delete_count", 0) > 0:
202
+ logger.debug(f"Successfully deleted {result.get('delete_count', 0)} vectors from {self.namespace}")
203
+ else:
204
+ logger.debug(f"No vectors were deleted from {self.namespace}")
205
+
206
+ except Exception as e:
207
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
lightrag/kg/mongo_impl.py CHANGED
@@ -15,7 +15,7 @@ from ..base import (
15
  DocStatusStorage,
16
  )
17
  from ..namespace import NameSpace, is_namespace
18
- from ..utils import logger
19
  from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
20
  import pipmaster as pm
21
 
@@ -333,7 +333,7 @@ class MongoGraphStorage(BaseGraphStorage):
333
  Check if there's a direct single-hop edge from source_node_id to target_node_id.
334
 
335
  We'll do a $graphLookup with maxDepth=0 from the source node—meaning
336
- Look up zero expansions.” Actually, for a direct edge check, we can do maxDepth=1
337
  and then see if the target node is in the "reachableNodes" at depth=0.
338
 
339
  But typically for a direct edge, we might just do a find_one.
@@ -795,6 +795,52 @@ class MongoGraphStorage(BaseGraphStorage):
795
  # Mongo handles persistence automatically
796
  pass
797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
 
799
  @final
800
  @dataclass
@@ -932,11 +978,66 @@ class MongoVectorDBStorage(BaseVectorStorage):
932
  # Mongo handles persistence automatically
933
  pass
934
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
935
  async def delete_entity(self, entity_name: str) -> None:
936
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
 
938
  async def delete_entity_relation(self, entity_name: str) -> None:
939
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
 
941
 
942
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
 
15
  DocStatusStorage,
16
  )
17
  from ..namespace import NameSpace, is_namespace
18
+ from ..utils import logger, compute_mdhash_id
19
  from ..types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
20
  import pipmaster as pm
21
 
 
333
  Check if there's a direct single-hop edge from source_node_id to target_node_id.
334
 
335
  We'll do a $graphLookup with maxDepth=0 from the source node—meaning
336
+ "Look up zero expansions." Actually, for a direct edge check, we can do maxDepth=1
337
  and then see if the target node is in the "reachableNodes" at depth=0.
338
 
339
  But typically for a direct edge, we might just do a find_one.
 
795
  # Mongo handles persistence automatically
796
  pass
797
 
798
+ async def remove_nodes(self, nodes: list[str]) -> None:
799
+ """Delete multiple nodes
800
+
801
+ Args:
802
+ nodes: List of node IDs to be deleted
803
+ """
804
+ logger.info(f"Deleting {len(nodes)} nodes")
805
+ if not nodes:
806
+ return
807
+
808
+ # 1. Remove all edges referencing these nodes (remove from edges array of other nodes)
809
+ await self.collection.update_many(
810
+ {},
811
+ {"$pull": {"edges": {"target": {"$in": nodes}}}}
812
+ )
813
+
814
+ # 2. Delete the node documents
815
+ await self.collection.delete_many({"_id": {"$in": nodes}})
816
+
817
+ logger.debug(f"Successfully deleted nodes: {nodes}")
818
+
819
+ async def remove_edges(self, edges: list[tuple[str, str]]) -> None:
820
+ """Delete multiple edges
821
+
822
+ Args:
823
+ edges: List of edges to be deleted, each edge is a (source, target) tuple
824
+ """
825
+ logger.info(f"Deleting {len(edges)} edges")
826
+ if not edges:
827
+ return
828
+
829
+ update_tasks = []
830
+ for source, target in edges:
831
+ # Remove edge pointing to target from source node's edges array
832
+ update_tasks.append(
833
+ self.collection.update_one(
834
+ {"_id": source},
835
+ {"$pull": {"edges": {"target": target}}}
836
+ )
837
+ )
838
+
839
+ if update_tasks:
840
+ await asyncio.gather(*update_tasks)
841
+
842
+ logger.debug(f"Successfully deleted edges: {edges}")
843
+
844
 
845
  @final
846
  @dataclass
 
978
  # Mongo handles persistence automatically
979
  pass
980
 
981
+ async def delete(self, ids: list[str]) -> None:
982
+ """Delete vectors with specified IDs
983
+
984
+ Args:
985
+ ids: List of vector IDs to be deleted
986
+ """
987
+ logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
988
+ if not ids:
989
+ return
990
+
991
+ try:
992
+ result = await self._data.delete_many({"_id": {"$in": ids}})
993
+ logger.debug(f"Successfully deleted {result.deleted_count} vectors from {self.namespace}")
994
+ except PyMongoError as e:
995
+ logger.error(f"Error while deleting vectors from {self.namespace}: {str(e)}")
996
+
997
  async def delete_entity(self, entity_name: str) -> None:
998
+ """Delete an entity by its name
999
+
1000
+ Args:
1001
+ entity_name: Name of the entity to delete
1002
+ """
1003
+ try:
1004
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
1005
+ logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
1006
+
1007
+ result = await self._data.delete_one({"_id": entity_id})
1008
+ if result.deleted_count > 0:
1009
+ logger.debug(f"Successfully deleted entity {entity_name}")
1010
+ else:
1011
+ logger.debug(f"Entity {entity_name} not found in storage")
1012
+ except PyMongoError as e:
1013
+ logger.error(f"Error deleting entity {entity_name}: {str(e)}")
1014
 
1015
  async def delete_entity_relation(self, entity_name: str) -> None:
1016
+ """Delete all relations associated with an entity
1017
+
1018
+ Args:
1019
+ entity_name: Name of the entity whose relations should be deleted
1020
+ """
1021
+ try:
1022
+ # Find relations where entity appears as source or target
1023
+ relations_cursor = self._data.find(
1024
+ {"$or": [{"src_id": entity_name}, {"tgt_id": entity_name}]}
1025
+ )
1026
+ relations = await relations_cursor.to_list(length=None)
1027
+
1028
+ if not relations:
1029
+ logger.debug(f"No relations found for entity {entity_name}")
1030
+ return
1031
+
1032
+ # Extract IDs of relations to delete
1033
+ relation_ids = [relation["_id"] for relation in relations]
1034
+ logger.debug(f"Found {len(relation_ids)} relations for entity {entity_name}")
1035
+
1036
+ # Delete the relations
1037
+ result = await self._data.delete_many({"_id": {"$in": relation_ids}})
1038
+ logger.debug(f"Deleted {result.deleted_count} relations for {entity_name}")
1039
+ except PyMongoError as e:
1040
+ logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
1041
 
1042
 
1043
  async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
lightrag/kg/oracle_impl.py CHANGED
@@ -8,7 +8,7 @@ from typing import Any, Union, final
8
  import numpy as np
9
  import configparser
10
 
11
- from lightrag.types import KnowledgeGraph
12
 
13
  from ..base import (
14
  BaseGraphStorage,
@@ -442,11 +442,55 @@ class OracleVectorDBStorage(BaseVectorStorage):
442
  # Oracles handles persistence automatically
443
  pass
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  async def delete_entity(self, entity_name: str) -> None:
446
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
  async def delete_entity_relation(self, entity_name: str) -> None:
449
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
 
452
  @final
@@ -668,15 +712,206 @@ class OracleGraphStorage(BaseGraphStorage):
668
  return res
669
 
670
  async def delete_node(self, node_id: str) -> None:
671
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
  async def get_all_labels(self) -> list[str]:
674
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
  async def get_knowledge_graph(
677
  self, node_label: str, max_depth: int = 5
678
  ) -> KnowledgeGraph:
679
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
 
681
 
682
  N_T = {
@@ -927,4 +1162,12 @@ SQL_TEMPLATES = {
927
  select 'edge' as type, TO_CHAR(id) id FROM GRAPH_TABLE (lightrag_graph
928
  MATCH (a)-[e]->(b) WHERE e.workspace=:workspace columns(e.id))
929
  )""",
 
 
 
 
 
 
 
 
930
  }
 
8
  import numpy as np
9
  import configparser
10
 
11
+ from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
12
 
13
  from ..base import (
14
  BaseGraphStorage,
 
442
  # Oracles handles persistence automatically
443
  pass
444
 
445
+ async def delete(self, ids: list[str]) -> None:
446
+ """Delete vectors with specified IDs
447
+
448
+ Args:
449
+ ids: List of vector IDs to be deleted
450
+ """
451
+ if not ids:
452
+ return
453
+
454
+ try:
455
+ SQL = SQL_TEMPLATES["delete_vectors"].format(
456
+ ids=",".join([f"'{id}'" for id in ids])
457
+ )
458
+ params = {"workspace": self.db.workspace}
459
+ await self.db.execute(SQL, params)
460
+ logger.info(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
461
+ except Exception as e:
462
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
463
+ raise
464
+
465
  async def delete_entity(self, entity_name: str) -> None:
466
+ """Delete entity by name
467
+
468
+ Args:
469
+ entity_name: Name of the entity to delete
470
+ """
471
+ try:
472
+ SQL = SQL_TEMPLATES["delete_entity"]
473
+ params = {"workspace": self.db.workspace, "entity_name": entity_name}
474
+ await self.db.execute(SQL, params)
475
+ logger.info(f"Successfully deleted entity {entity_name}")
476
+ except Exception as e:
477
+ logger.error(f"Error deleting entity {entity_name}: {e}")
478
+ raise
479
 
480
  async def delete_entity_relation(self, entity_name: str) -> None:
481
+ """Delete all relations connected to an entity
482
+
483
+ Args:
484
+ entity_name: Name of the entity whose relations should be deleted
485
+ """
486
+ try:
487
+ SQL = SQL_TEMPLATES["delete_entity_relations"]
488
+ params = {"workspace": self.db.workspace, "entity_name": entity_name}
489
+ await self.db.execute(SQL, params)
490
+ logger.info(f"Successfully deleted relations for entity {entity_name}")
491
+ except Exception as e:
492
+ logger.error(f"Error deleting relations for entity {entity_name}: {e}")
493
+ raise
494
 
495
 
496
  @final
 
712
  return res
713
 
714
  async def delete_node(self, node_id: str) -> None:
715
+ """Delete a node from the graph
716
+
717
+ Args:
718
+ node_id: ID of the node to delete
719
+ """
720
+ try:
721
+ # First delete all relations connected to this node
722
+ delete_relations_sql = SQL_TEMPLATES["delete_entity_relations"]
723
+ params_relations = {"workspace": self.db.workspace, "entity_name": node_id}
724
+ await self.db.execute(delete_relations_sql, params_relations)
725
+
726
+ # Then delete the node itself
727
+ delete_node_sql = SQL_TEMPLATES["delete_entity"]
728
+ params_node = {"workspace": self.db.workspace, "entity_name": node_id}
729
+ await self.db.execute(delete_node_sql, params_node)
730
+
731
+ logger.info(f"Successfully deleted node {node_id} and all its relationships")
732
+ except Exception as e:
733
+ logger.error(f"Error deleting node {node_id}: {e}")
734
+ raise
735
 
736
  async def get_all_labels(self) -> list[str]:
737
+ """Get all unique entity types (labels) in the graph
738
+
739
+ Returns:
740
+ List of unique entity types/labels
741
+ """
742
+ try:
743
+ SQL = """
744
+ SELECT DISTINCT entity_type
745
+ FROM LIGHTRAG_GRAPH_NODES
746
+ WHERE workspace = :workspace
747
+ ORDER BY entity_type
748
+ """
749
+ params = {"workspace": self.db.workspace}
750
+ results = await self.db.query(SQL, params, multirows=True)
751
+
752
+ if results:
753
+ labels = [row["entity_type"] for row in results]
754
+ return labels
755
+ else:
756
+ return []
757
+ except Exception as e:
758
+ logger.error(f"Error retrieving entity types: {e}")
759
+ return []
760
 
761
  async def get_knowledge_graph(
762
  self, node_label: str, max_depth: int = 5
763
  ) -> KnowledgeGraph:
764
+ """Retrieve a connected subgraph starting from nodes matching the given label
765
+
766
+ Maximum number of nodes is constrained by MAX_GRAPH_NODES environment variable.
767
+ Prioritizes nodes by:
768
+ 1. Nodes matching the specified label
769
+ 2. Nodes directly connected to matching nodes
770
+ 3. Node degree (number of connections)
771
+
772
+ Args:
773
+ node_label: Label to match for starting nodes (use "*" for all nodes)
774
+ max_depth: Maximum depth of traversal from starting nodes
775
+
776
+ Returns:
777
+ KnowledgeGraph object containing nodes and edges
778
+ """
779
+ result = KnowledgeGraph()
780
+
781
+ try:
782
+ # Define maximum number of nodes to return
783
+ max_graph_nodes = int(os.environ.get("MAX_GRAPH_NODES", 1000))
784
+
785
+ if node_label == "*":
786
+ # For "*" label, get all nodes up to the limit
787
+ nodes_sql = """
788
+ SELECT name, entity_type, description, source_chunk_id
789
+ FROM LIGHTRAG_GRAPH_NODES
790
+ WHERE workspace = :workspace
791
+ ORDER BY id
792
+ FETCH FIRST :limit ROWS ONLY
793
+ """
794
+ nodes_params = {"workspace": self.db.workspace, "limit": max_graph_nodes}
795
+ nodes = await self.db.query(nodes_sql, nodes_params, multirows=True)
796
+ else:
797
+ # For specific label, find matching nodes and related nodes
798
+ nodes_sql = """
799
+ WITH matching_nodes AS (
800
+ SELECT name
801
+ FROM LIGHTRAG_GRAPH_NODES
802
+ WHERE workspace = :workspace
803
+ AND (name LIKE '%' || :node_label || '%' OR entity_type LIKE '%' || :node_label || '%')
804
+ )
805
+ SELECT n.name, n.entity_type, n.description, n.source_chunk_id,
806
+ CASE
807
+ WHEN n.name IN (SELECT name FROM matching_nodes) THEN 2
808
+ WHEN EXISTS (
809
+ SELECT 1 FROM LIGHTRAG_GRAPH_EDGES e
810
+ WHERE workspace = :workspace
811
+ AND ((e.source_name = n.name AND e.target_name IN (SELECT name FROM matching_nodes))
812
+ OR (e.target_name = n.name AND e.source_name IN (SELECT name FROM matching_nodes)))
813
+ ) THEN 1
814
+ ELSE 0
815
+ END AS priority,
816
+ (SELECT COUNT(*) FROM LIGHTRAG_GRAPH_EDGES e
817
+ WHERE workspace = :workspace
818
+ AND (e.source_name = n.name OR e.target_name = n.name)) AS degree
819
+ FROM LIGHTRAG_GRAPH_NODES n
820
+ WHERE workspace = :workspace
821
+ ORDER BY priority DESC, degree DESC
822
+ FETCH FIRST :limit ROWS ONLY
823
+ """
824
+ nodes_params = {
825
+ "workspace": self.db.workspace,
826
+ "node_label": node_label,
827
+ "limit": max_graph_nodes
828
+ }
829
+ nodes = await self.db.query(nodes_sql, nodes_params, multirows=True)
830
+
831
+ if not nodes:
832
+ logger.warning(f"No nodes found matching '{node_label}'")
833
+ return result
834
+
835
+ # Create mapping of node IDs to be used to filter edges
836
+ node_names = [node["name"] for node in nodes]
837
+
838
+ # Add nodes to result
839
+ seen_nodes = set()
840
+ for node in nodes:
841
+ node_id = node["name"]
842
+ if node_id in seen_nodes:
843
+ continue
844
+
845
+ # Create node properties dictionary
846
+ properties = {
847
+ "entity_type": node["entity_type"],
848
+ "description": node["description"] or "",
849
+ "source_id": node["source_chunk_id"] or ""
850
+ }
851
+
852
+ # Add node to result
853
+ result.nodes.append(
854
+ KnowledgeGraphNode(
855
+ id=node_id,
856
+ labels=[node["entity_type"]],
857
+ properties=properties
858
+ )
859
+ )
860
+ seen_nodes.add(node_id)
861
+
862
+ # Get edges between these nodes
863
+ edges_sql = """
864
+ SELECT source_name, target_name, weight, keywords, description, source_chunk_id
865
+ FROM LIGHTRAG_GRAPH_EDGES
866
+ WHERE workspace = :workspace
867
+ AND source_name IN (SELECT COLUMN_VALUE FROM TABLE(CAST(:node_names AS SYS.ODCIVARCHAR2LIST)))
868
+ AND target_name IN (SELECT COLUMN_VALUE FROM TABLE(CAST(:node_names AS SYS.ODCIVARCHAR2LIST)))
869
+ ORDER BY id
870
+ """
871
+ edges_params = {
872
+ "workspace": self.db.workspace,
873
+ "node_names": node_names
874
+ }
875
+ edges = await self.db.query(edges_sql, edges_params, multirows=True)
876
+
877
+ # Add edges to result
878
+ seen_edges = set()
879
+ for edge in edges:
880
+ source = edge["source_name"]
881
+ target = edge["target_name"]
882
+ edge_id = f"{source}-{target}"
883
+
884
+ if edge_id in seen_edges:
885
+ continue
886
+
887
+ # Create edge properties dictionary
888
+ properties = {
889
+ "weight": edge["weight"] or 0.0,
890
+ "keywords": edge["keywords"] or "",
891
+ "description": edge["description"] or "",
892
+ "source_id": edge["source_chunk_id"] or ""
893
+ }
894
+
895
+ # Add edge to result
896
+ result.edges.append(
897
+ KnowledgeGraphEdge(
898
+ id=edge_id,
899
+ type="RELATED",
900
+ source=source,
901
+ target=target,
902
+ properties=properties
903
+ )
904
+ )
905
+ seen_edges.add(edge_id)
906
+
907
+ logger.info(
908
+ f"Subgraph query successful | Node count: {len(result.nodes)} | Edge count: {len(result.edges)}"
909
+ )
910
+
911
+ except Exception as e:
912
+ logger.error(f"Error retrieving knowledge graph: {e}")
913
+
914
+ return result
915
 
916
 
917
  N_T = {
 
1162
  select 'edge' as type, TO_CHAR(id) id FROM GRAPH_TABLE (lightrag_graph
1163
  MATCH (a)-[e]->(b) WHERE e.workspace=:workspace columns(e.id))
1164
  )""",
1165
+ # SQL for deletion
1166
+ "delete_vectors": "DELETE FROM LIGHTRAG_DOC_CHUNKS WHERE workspace=:workspace AND id IN ({ids})",
1167
+ "delete_entity": "DELETE FROM LIGHTRAG_GRAPH_NODES WHERE workspace=:workspace AND name=:entity_name",
1168
+ "delete_entity_relations": "DELETE FROM LIGHTRAG_GRAPH_EDGES WHERE workspace=:workspace AND (source_name=:entity_name OR target_name=:entity_name)",
1169
+ "delete_node": """DELETE FROM GRAPH_TABLE (lightrag_graph
1170
+ MATCH (a)
1171
+ WHERE a.workspace=:workspace AND a.name=:node_id
1172
+ ACTION DELETE a)""",
1173
  }
lightrag/kg/postgres_impl.py CHANGED
@@ -7,7 +7,7 @@ from typing import Any, Union, final
7
  import numpy as np
8
  import configparser
9
 
10
- from lightrag.types import KnowledgeGraph
11
 
12
  import sys
13
  from tenacity import (
@@ -512,11 +512,66 @@ class PGVectorStorage(BaseVectorStorage):
512
  # PG handles persistence automatically
513
  pass
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  async def delete_entity(self, entity_name: str) -> None:
516
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
 
518
  async def delete_entity_relation(self, entity_name: str) -> None:
519
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
 
522
  @final
@@ -1086,20 +1141,192 @@ class PGGraphStorage(BaseGraphStorage):
1086
  print("Implemented but never called.")
1087
 
1088
  async def delete_node(self, node_id: str) -> None:
1089
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
 
1091
  async def embed_nodes(
1092
  self, algorithm: str
1093
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
1094
- raise NotImplementedError
 
1095
 
1096
- async def get_all_labels(self) -> list[str]:
1097
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
1098
 
1099
  async def get_knowledge_graph(
1100
  self, node_label: str, max_depth: int = 5
1101
  ) -> KnowledgeGraph:
1102
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1103
 
1104
  async def drop(self) -> None:
1105
  """Drop the storage"""
 
7
  import numpy as np
8
  import configparser
9
 
10
+ from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
11
 
12
  import sys
13
  from tenacity import (
 
512
  # PG handles persistence automatically
513
  pass
514
 
515
+ async def delete(self, ids: list[str]) -> None:
516
+ """Delete vectors with specified IDs from the storage.
517
+
518
+ Args:
519
+ ids: List of vector IDs to be deleted
520
+ """
521
+ if not ids:
522
+ return
523
+
524
+ table_name = namespace_to_table_name(self.namespace)
525
+ if not table_name:
526
+ logger.error(f"Unknown namespace for vector deletion: {self.namespace}")
527
+ return
528
+
529
+ ids_list = ",".join([f"'{id}'" for id in ids])
530
+ delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id IN ({ids_list})"
531
+
532
+ try:
533
+ await self.db.execute(delete_sql, {"workspace": self.db.workspace})
534
+ logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
535
+ except Exception as e:
536
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
537
+
538
  async def delete_entity(self, entity_name: str) -> None:
539
+ """Delete an entity by its name from the vector storage.
540
+
541
+ Args:
542
+ entity_name: The name of the entity to delete
543
+ """
544
+ try:
545
+ # Construct SQL to delete the entity
546
+ delete_sql = """DELETE FROM LIGHTRAG_VDB_ENTITY
547
+ WHERE workspace=$1 AND entity_name=$2"""
548
+
549
+ await self.db.execute(
550
+ delete_sql,
551
+ {"workspace": self.db.workspace, "entity_name": entity_name}
552
+ )
553
+ logger.debug(f"Successfully deleted entity {entity_name}")
554
+ except Exception as e:
555
+ logger.error(f"Error deleting entity {entity_name}: {e}")
556
 
557
  async def delete_entity_relation(self, entity_name: str) -> None:
558
+ """Delete all relations associated with an entity.
559
+
560
+ Args:
561
+ entity_name: The name of the entity whose relations should be deleted
562
+ """
563
+ try:
564
+ # Delete relations where the entity is either the source or target
565
+ delete_sql = """DELETE FROM LIGHTRAG_VDB_RELATION
566
+ WHERE workspace=$1 AND (source_id=$2 OR target_id=$2)"""
567
+
568
+ await self.db.execute(
569
+ delete_sql,
570
+ {"workspace": self.db.workspace, "entity_name": entity_name}
571
+ )
572
+ logger.debug(f"Successfully deleted relations for entity {entity_name}")
573
+ except Exception as e:
574
+ logger.error(f"Error deleting relations for entity {entity_name}: {e}")
575
 
576
 
577
  @final
 
1141
  print("Implemented but never called.")
1142
 
1143
  async def delete_node(self, node_id: str) -> None:
1144
+ """
1145
+ Delete a node from the graph.
1146
+
1147
+ Args:
1148
+ node_id (str): The ID of the node to delete.
1149
+ """
1150
+ label = self._encode_graph_label(node_id.strip('"'))
1151
+
1152
+ query = """SELECT * FROM cypher('%s', $$
1153
+ MATCH (n:Entity {node_id: "%s"})
1154
+ DETACH DELETE n
1155
+ $$) AS (n agtype)""" % (self.graph_name, label)
1156
+
1157
+ try:
1158
+ await self._query(query, readonly=False)
1159
+ except Exception as e:
1160
+ logger.error("Error during node deletion: {%s}", e)
1161
+ raise
1162
+
1163
+ async def remove_nodes(self, node_ids: list[str]) -> None:
1164
+ """
1165
+ Remove multiple nodes from the graph.
1166
+
1167
+ Args:
1168
+ node_ids (list[str]): A list of node IDs to remove.
1169
+ """
1170
+ encoded_node_ids = [self._encode_graph_label(node_id.strip('"')) for node_id in node_ids]
1171
+ node_id_list = ", ".join([f'"{node_id}"' for node_id in encoded_node_ids])
1172
+
1173
+ query = """SELECT * FROM cypher('%s', $$
1174
+ MATCH (n:Entity)
1175
+ WHERE n.node_id IN [%s]
1176
+ DETACH DELETE n
1177
+ $$) AS (n agtype)""" % (self.graph_name, node_id_list)
1178
+
1179
+ try:
1180
+ await self._query(query, readonly=False)
1181
+ except Exception as e:
1182
+ logger.error("Error during node removal: {%s}", e)
1183
+ raise
1184
+
1185
+ async def remove_edges(self, edges: list[tuple[str, str]]) -> None:
1186
+ """
1187
+ Remove multiple edges from the graph.
1188
+
1189
+ Args:
1190
+ edges (list[tuple[str, str]]): A list of edges to remove, where each edge is a tuple of (source_node_id, target_node_id).
1191
+ """
1192
+ encoded_edges = [(self._encode_graph_label(src.strip('"')), self._encode_graph_label(tgt.strip('"'))) for src, tgt in edges]
1193
+ edge_list = ", ".join([f'["{src}", "{tgt}"]' for src, tgt in encoded_edges])
1194
+
1195
+ query = """SELECT * FROM cypher('%s', $$
1196
+ MATCH (a:Entity)-[r]->(b:Entity)
1197
+ WHERE [a.node_id, b.node_id] IN [%s]
1198
+ DELETE r
1199
+ $$) AS (r agtype)""" % (self.graph_name, edge_list)
1200
+
1201
+ try:
1202
+ await self._query(query, readonly=False)
1203
+ except Exception as e:
1204
+ logger.error("Error during edge removal: {%s}", e)
1205
+ raise
1206
+
1207
+ async def get_all_labels(self) -> list[str]:
1208
+ """
1209
+ Get all labels (node IDs) in the graph.
1210
+
1211
+ Returns:
1212
+ list[str]: A list of all labels in the graph.
1213
+ """
1214
+ query = """SELECT * FROM cypher('%s', $$
1215
+ MATCH (n:Entity)
1216
+ RETURN DISTINCT n.node_id AS label
1217
+ $$) AS (label text)""" % self.graph_name
1218
+
1219
+ results = await self._query(query)
1220
+ labels = [self._decode_graph_label(result["label"]) for result in results]
1221
+
1222
+ return labels
1223
 
1224
  async def embed_nodes(
1225
  self, algorithm: str
1226
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
1227
+ """
1228
+ Generate node embeddings using the specified algorithm.
1229
 
1230
+ Args:
1231
+ algorithm (str): The name of the embedding algorithm to use.
1232
+
1233
+ Returns:
1234
+ tuple[np.ndarray[Any, Any], list[str]]: A tuple containing the embeddings and the corresponding node IDs.
1235
+ """
1236
+ if algorithm not in self._node_embed_algorithms:
1237
+ raise ValueError(f"Unsupported embedding algorithm: {algorithm}")
1238
+
1239
+ embed_func = self._node_embed_algorithms[algorithm]
1240
+ return await embed_func()
1241
 
1242
  async def get_knowledge_graph(
1243
  self, node_label: str, max_depth: int = 5
1244
  ) -> KnowledgeGraph:
1245
+ """
1246
+ Retrieve a subgraph containing the specified node and its neighbors up to the specified depth.
1247
+
1248
+ Args:
1249
+ node_label (str): The label of the node to start from. If "*", the entire graph is returned.
1250
+ max_depth (int): The maximum depth to traverse from the starting node.
1251
+
1252
+ Returns:
1253
+ KnowledgeGraph: The retrieved subgraph.
1254
+ """
1255
+ MAX_GRAPH_NODES = 1000
1256
+
1257
+ if node_label == "*":
1258
+ query = """SELECT * FROM cypher('%s', $$
1259
+ MATCH (n:Entity)
1260
+ OPTIONAL MATCH (n)-[r]->(m:Entity)
1261
+ RETURN n, r, m
1262
+ LIMIT %d
1263
+ $$) AS (n agtype, r agtype, m agtype)""" % (self.graph_name, MAX_GRAPH_NODES)
1264
+ else:
1265
+ encoded_node_label = self._encode_graph_label(node_label.strip('"'))
1266
+ query = """SELECT * FROM cypher('%s', $$
1267
+ MATCH (n:Entity {node_id: "%s"})
1268
+ OPTIONAL MATCH p = (n)-[*..%d]-(m)
1269
+ RETURN nodes(p) AS nodes, relationships(p) AS relationships
1270
+ LIMIT %d
1271
+ $$) AS (nodes agtype[], relationships agtype[])""" % (self.graph_name, encoded_node_label, max_depth, MAX_GRAPH_NODES)
1272
+
1273
+ results = await self._query(query)
1274
+
1275
+ nodes = set()
1276
+ edges = []
1277
+
1278
+ for result in results:
1279
+ if node_label == "*":
1280
+ if result["n"]:
1281
+ node = result["n"]
1282
+ nodes.add(self._decode_graph_label(node["node_id"]))
1283
+ if result["m"]:
1284
+ node = result["m"]
1285
+ nodes.add(self._decode_graph_label(node["node_id"]))
1286
+ if result["r"]:
1287
+ edge = result["r"]
1288
+ src_id = self._decode_graph_label(edge["start_id"])
1289
+ tgt_id = self._decode_graph_label(edge["end_id"])
1290
+ edges.append((src_id, tgt_id))
1291
+ else:
1292
+ if result["nodes"]:
1293
+ for node in result["nodes"]:
1294
+ nodes.add(self._decode_graph_label(node["node_id"]))
1295
+ if result["relationships"]:
1296
+ for edge in result["relationships"]:
1297
+ src_id = self._decode_graph_label(edge["start_id"])
1298
+ tgt_id = self._decode_graph_label(edge["end_id"])
1299
+ edges.append((src_id, tgt_id))
1300
+
1301
+ kg = KnowledgeGraph(
1302
+ nodes=[KnowledgeGraphNode(id=node_id) for node_id in nodes],
1303
+ edges=[KnowledgeGraphEdge(source=src, target=tgt) for src, tgt in edges],
1304
+ )
1305
+
1306
+ return kg
1307
+
1308
+ async def get_all_labels(self) -> list[str]:
1309
+ """
1310
+ Get all node labels in the graph
1311
+ Returns:
1312
+ [label1, label2, ...] # Alphabetically sorted label list
1313
+ """
1314
+ query = """SELECT * FROM cypher('%s', $$
1315
+ MATCH (n:Entity)
1316
+ RETURN DISTINCT n.node_id AS label
1317
+ ORDER BY label
1318
+ $$) AS (label agtype)""" % (self.graph_name)
1319
+
1320
+ try:
1321
+ results = await self._query(query)
1322
+ labels = []
1323
+ for record in results:
1324
+ if record["label"]:
1325
+ labels.append(self._decode_graph_label(record["label"]))
1326
+ return labels
1327
+ except Exception as e:
1328
+ logger.error(f"Error getting all labels: {str(e)}")
1329
+ return []
1330
 
1331
  async def drop(self) -> None:
1332
  """Drop the storage"""
lightrag/kg/qdrant_impl.py CHANGED
@@ -1,6 +1,6 @@
1
  import asyncio
2
  import os
3
- from typing import Any, final
4
  from dataclasses import dataclass
5
  import numpy as np
6
  import hashlib
@@ -141,8 +141,91 @@ class QdrantVectorDBStorage(BaseVectorStorage):
141
  # Qdrant handles persistence automatically
142
  pass
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  async def delete_entity(self, entity_name: str) -> None:
145
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  async def delete_entity_relation(self, entity_name: str) -> None:
148
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import asyncio
2
  import os
3
+ from typing import Any, final, List
4
  from dataclasses import dataclass
5
  import numpy as np
6
  import hashlib
 
141
  # Qdrant handles persistence automatically
142
  pass
143
 
144
+ async def delete(self, ids: List[str]) -> None:
145
+ """Delete vectors with specified IDs
146
+
147
+ Args:
148
+ ids: List of vector IDs to be deleted
149
+ """
150
+ try:
151
+ # Convert regular ids to Qdrant compatible ids
152
+ qdrant_ids = [compute_mdhash_id_for_qdrant(id) for id in ids]
153
+ # Delete points from the collection
154
+ self._client.delete(
155
+ collection_name=self.namespace,
156
+ points_selector=models.PointIdsList(
157
+ points=qdrant_ids,
158
+ ),
159
+ wait=True
160
+ )
161
+ logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
162
+ except Exception as e:
163
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
164
+
165
  async def delete_entity(self, entity_name: str) -> None:
166
+ """Delete an entity by name
167
+
168
+ Args:
169
+ entity_name: Name of the entity to delete
170
+ """
171
+ try:
172
+ # Generate the entity ID
173
+ entity_id = compute_mdhash_id_for_qdrant(entity_name, prefix="ent-")
174
+ logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
175
+
176
+ # Delete the entity point from the collection
177
+ self._client.delete(
178
+ collection_name=self.namespace,
179
+ points_selector=models.PointIdsList(
180
+ points=[entity_id],
181
+ ),
182
+ wait=True
183
+ )
184
+ logger.debug(f"Successfully deleted entity {entity_name}")
185
+ except Exception as e:
186
+ logger.error(f"Error deleting entity {entity_name}: {e}")
187
 
188
  async def delete_entity_relation(self, entity_name: str) -> None:
189
+ """Delete all relations associated with an entity
190
+
191
+ Args:
192
+ entity_name: Name of the entity whose relations should be deleted
193
+ """
194
+ try:
195
+ # Find relations where the entity is either source or target
196
+ results = self._client.scroll(
197
+ collection_name=self.namespace,
198
+ scroll_filter=models.Filter(
199
+ should=[
200
+ models.FieldCondition(
201
+ key="src_id",
202
+ match=models.MatchValue(value=entity_name)
203
+ ),
204
+ models.FieldCondition(
205
+ key="tgt_id",
206
+ match=models.MatchValue(value=entity_name)
207
+ )
208
+ ]
209
+ ),
210
+ with_payload=True,
211
+ limit=1000 # Adjust as needed for your use case
212
+ )
213
+
214
+ # Extract points that need to be deleted
215
+ relation_points = results[0]
216
+ ids_to_delete = [point.id for point in relation_points]
217
+
218
+ if ids_to_delete:
219
+ # Delete the relations
220
+ self._client.delete(
221
+ collection_name=self.namespace,
222
+ points_selector=models.PointIdsList(
223
+ points=ids_to_delete,
224
+ ),
225
+ wait=True
226
+ )
227
+ logger.debug(f"Deleted {len(ids_to_delete)} relations for {entity_name}")
228
+ else:
229
+ logger.debug(f"No relations found for entity {entity_name}")
230
+ except Exception as e:
231
+ logger.error(f"Error deleting relations for {entity_name}: {e}")
lightrag/kg/redis_impl.py CHANGED
@@ -9,7 +9,7 @@ if not pm.is_installed("redis"):
9
 
10
  # aioredis is a depricated library, replaced with redis
11
  from redis.asyncio import Redis
12
- from lightrag.utils import logger
13
  from lightrag.base import BaseKVStorage
14
  import json
15
 
@@ -64,3 +64,79 @@ class RedisKVStorage(BaseKVStorage):
64
  async def index_done_callback(self) -> None:
65
  # Redis handles persistence automatically
66
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # aioredis is a depricated library, replaced with redis
11
  from redis.asyncio import Redis
12
+ from lightrag.utils import logger, compute_mdhash_id
13
  from lightrag.base import BaseKVStorage
14
  import json
15
 
 
64
  async def index_done_callback(self) -> None:
65
  # Redis handles persistence automatically
66
  pass
67
+
68
+ async def delete(self, ids: list[str]) -> None:
69
+ """Delete entries with specified IDs
70
+
71
+ Args:
72
+ ids: List of entry IDs to be deleted
73
+ """
74
+ if not ids:
75
+ return
76
+
77
+ pipe = self._redis.pipeline()
78
+ for id in ids:
79
+ pipe.delete(f"{self.namespace}:{id}")
80
+
81
+ results = await pipe.execute()
82
+ deleted_count = sum(results)
83
+ logger.info(f"Deleted {deleted_count} of {len(ids)} entries from {self.namespace}")
84
+
85
+ async def delete_entity(self, entity_name: str) -> None:
86
+ """Delete an entity by name
87
+
88
+ Args:
89
+ entity_name: Name of the entity to delete
90
+ """
91
+
92
+ try:
93
+ entity_id = compute_mdhash_id(entity_name, prefix="ent-")
94
+ logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
95
+
96
+ # Delete the entity
97
+ result = await self._redis.delete(f"{self.namespace}:{entity_id}")
98
+
99
+ if result:
100
+ logger.debug(f"Successfully deleted entity {entity_name}")
101
+ else:
102
+ logger.debug(f"Entity {entity_name} not found in storage")
103
+ except Exception as e:
104
+ logger.error(f"Error deleting entity {entity_name}: {e}")
105
+
106
+ async def delete_entity_relation(self, entity_name: str) -> None:
107
+ """Delete all relations associated with an entity
108
+
109
+ Args:
110
+ entity_name: Name of the entity whose relations should be deleted
111
+ """
112
+ try:
113
+ # Get all keys in this namespace
114
+ cursor = 0
115
+ relation_keys = []
116
+ pattern = f"{self.namespace}:*"
117
+
118
+ while True:
119
+ cursor, keys = await self._redis.scan(cursor, match=pattern)
120
+
121
+ # For each key, get the value and check if it's related to entity_name
122
+ for key in keys:
123
+ value = await self._redis.get(key)
124
+ if value:
125
+ data = json.loads(value)
126
+ # Check if this is a relation involving the entity
127
+ if data.get("src_id") == entity_name or data.get("tgt_id") == entity_name:
128
+ relation_keys.append(key)
129
+
130
+ # Exit loop when cursor returns to 0
131
+ if cursor == 0:
132
+ break
133
+
134
+ # Delete the relation keys
135
+ if relation_keys:
136
+ deleted = await self._redis.delete(*relation_keys)
137
+ logger.debug(f"Deleted {deleted} relations for {entity_name}")
138
+ else:
139
+ logger.debug(f"No relations found for entity {entity_name}")
140
+
141
+ except Exception as e:
142
+ logger.error(f"Error deleting relations for {entity_name}: {e}")
lightrag/kg/tidb_impl.py CHANGED
@@ -5,7 +5,7 @@ from typing import Any, Union, final
5
 
6
  import numpy as np
7
 
8
- from lightrag.types import KnowledgeGraph
9
 
10
 
11
  from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
@@ -566,15 +566,148 @@ class TiDBGraphStorage(BaseGraphStorage):
566
  pass
567
 
568
  async def delete_node(self, node_id: str) -> None:
569
- raise NotImplementedError
570
-
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  async def get_all_labels(self) -> list[str]:
572
- raise NotImplementedError
573
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
  async def get_knowledge_graph(
575
  self, node_label: str, max_depth: int = 5
576
  ) -> KnowledgeGraph:
577
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
 
579
 
580
  N_T = {
@@ -785,4 +918,39 @@ SQL_TEMPLATES = {
785
  weight = VALUES(weight), keywords = VALUES(keywords), description = VALUES(description),
786
  source_chunk_id = VALUES(source_chunk_id)
787
  """,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
  }
 
5
 
6
  import numpy as np
7
 
8
+ from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
9
 
10
 
11
  from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 
566
  pass
567
 
568
  async def delete_node(self, node_id: str) -> None:
569
+ """Delete a node and all its related edges
570
+
571
+ Args:
572
+ node_id: The ID of the node to delete
573
+ """
574
+ # First delete all edges related to this node
575
+ await self.db.execute(SQL_TEMPLATES["delete_node_edges"],
576
+ {"name": node_id, "workspace": self.db.workspace})
577
+
578
+ # Then delete the node itself
579
+ await self.db.execute(SQL_TEMPLATES["delete_node"],
580
+ {"name": node_id, "workspace": self.db.workspace})
581
+
582
+ logger.debug(f"Node {node_id} and its related edges have been deleted from the graph")
583
+
584
  async def get_all_labels(self) -> list[str]:
585
+ """Get all entity types (labels) in the database
586
+
587
+ Returns:
588
+ List of labels sorted alphabetically
589
+ """
590
+ result = await self.db.query(
591
+ SQL_TEMPLATES["get_all_labels"],
592
+ {"workspace": self.db.workspace},
593
+ multirows=True
594
+ )
595
+
596
+ if not result:
597
+ return []
598
+
599
+ # Extract all labels
600
+ return [item["label"] for item in result]
601
+
602
  async def get_knowledge_graph(
603
  self, node_label: str, max_depth: int = 5
604
  ) -> KnowledgeGraph:
605
+ """
606
+ Get a connected subgraph of nodes matching the specified label
607
+ Maximum number of nodes is limited by MAX_GRAPH_NODES environment variable (default: 1000)
608
+
609
+ Args:
610
+ node_label: The node label to match
611
+ max_depth: Maximum depth of the subgraph
612
+
613
+ Returns:
614
+ KnowledgeGraph object containing nodes and edges
615
+ """
616
+ result = KnowledgeGraph()
617
+ MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000))
618
+
619
+ # Get matching nodes
620
+ if node_label == "*":
621
+ # Handle special case, get all nodes
622
+ node_results = await self.db.query(
623
+ SQL_TEMPLATES["get_all_nodes"],
624
+ {"workspace": self.db.workspace, "max_nodes": MAX_GRAPH_NODES},
625
+ multirows=True
626
+ )
627
+ else:
628
+ # Get nodes matching the label
629
+ label_pattern = f"%{node_label}%"
630
+ node_results = await self.db.query(
631
+ SQL_TEMPLATES["get_matching_nodes"],
632
+ {"workspace": self.db.workspace, "label_pattern": label_pattern},
633
+ multirows=True
634
+ )
635
+
636
+ if not node_results:
637
+ logger.warning(f"No nodes found matching label {node_label}")
638
+ return result
639
+
640
+ # Limit the number of returned nodes
641
+ if len(node_results) > MAX_GRAPH_NODES:
642
+ node_results = node_results[:MAX_GRAPH_NODES]
643
+
644
+ # Extract node names for edge query
645
+ node_names = [node["name"] for node in node_results]
646
+ node_names_str = ",".join([f"'{name}'" for name in node_names])
647
+
648
+ # Add nodes to result
649
+ for node in node_results:
650
+ node_properties = {k: v for k, v in node.items() if k not in ["id", "name", "entity_type"]}
651
+ result.nodes.append(
652
+ KnowledgeGraphNode(
653
+ id=node["name"],
654
+ labels=[node["entity_type"]] if node.get("entity_type") else [node["name"]],
655
+ properties=node_properties
656
+ )
657
+ )
658
+
659
+ # Get related edges
660
+ edge_results = await self.db.query(
661
+ SQL_TEMPLATES["get_related_edges"].format(node_names=node_names_str),
662
+ {"workspace": self.db.workspace},
663
+ multirows=True
664
+ )
665
+
666
+ if edge_results:
667
+ # Add edges to result
668
+ for edge in edge_results:
669
+ # Only include edges related to selected nodes
670
+ if edge["source_name"] in node_names and edge["target_name"] in node_names:
671
+ edge_id = f"{edge['source_name']}-{edge['target_name']}"
672
+ edge_properties = {k: v for k, v in edge.items()
673
+ if k not in ["id", "source_name", "target_name"]}
674
+
675
+ result.edges.append(
676
+ KnowledgeGraphEdge(
677
+ id=edge_id,
678
+ type="RELATED",
679
+ source=edge["source_name"],
680
+ target=edge["target_name"],
681
+ properties=edge_properties
682
+ )
683
+ )
684
+
685
+ logger.info(
686
+ f"Subgraph query successful | Node count: {len(result.nodes)} | Edge count: {len(result.edges)}"
687
+ )
688
+ return result
689
+
690
+ async def remove_nodes(self, nodes: list[str]):
691
+ """Delete multiple nodes
692
+
693
+ Args:
694
+ nodes: List of node IDs to delete
695
+ """
696
+ for node_id in nodes:
697
+ await self.delete_node(node_id)
698
+
699
+ async def remove_edges(self, edges: list[tuple[str, str]]):
700
+ """Delete multiple edges
701
+
702
+ Args:
703
+ edges: List of edges to delete, each edge is a (source, target) tuple
704
+ """
705
+ for source, target in edges:
706
+ await self.db.execute(SQL_TEMPLATES["remove_multiple_edges"], {
707
+ "source": source,
708
+ "target": target,
709
+ "workspace": self.db.workspace
710
+ })
711
 
712
 
713
  N_T = {
 
918
  weight = VALUES(weight), keywords = VALUES(keywords), description = VALUES(description),
919
  source_chunk_id = VALUES(source_chunk_id)
920
  """,
921
+ "delete_node": """
922
+ DELETE FROM LIGHTRAG_GRAPH_NODES
923
+ WHERE name = :name AND workspace = :workspace
924
+ """,
925
+ "delete_node_edges": """
926
+ DELETE FROM LIGHTRAG_GRAPH_EDGES
927
+ WHERE (source_name = :name OR target_name = :name) AND workspace = :workspace
928
+ """,
929
+ "get_all_labels": """
930
+ SELECT DISTINCT entity_type as label
931
+ FROM LIGHTRAG_GRAPH_NODES
932
+ WHERE workspace = :workspace
933
+ ORDER BY entity_type
934
+ """,
935
+ "get_matching_nodes": """
936
+ SELECT * FROM LIGHTRAG_GRAPH_NODES
937
+ WHERE name LIKE :label_pattern AND workspace = :workspace
938
+ ORDER BY name
939
+ """,
940
+ "get_all_nodes": """
941
+ SELECT * FROM LIGHTRAG_GRAPH_NODES
942
+ WHERE workspace = :workspace
943
+ ORDER BY name
944
+ LIMIT :max_nodes
945
+ """,
946
+ "get_related_edges": """
947
+ SELECT * FROM LIGHTRAG_GRAPH_EDGES
948
+ WHERE (source_name IN (:node_names) OR target_name IN (:node_names))
949
+ AND workspace = :workspace
950
+ """,
951
+ "remove_multiple_edges": """
952
+ DELETE FROM LIGHTRAG_GRAPH_EDGES
953
+ WHERE (source_name = :source AND target_name = :target)
954
+ AND workspace = :workspace
955
+ """
956
  }
lightrag/lightrag.py CHANGED
@@ -1399,6 +1399,54 @@ class LightRAG:
1399
  ]
1400
  )
1401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1402
  def _get_content_summary(self, content: str, max_length: int = 100) -> str:
1403
  """Get summary of document content
1404
 
 
1399
  ]
1400
  )
1401
 
1402
+ def delete_by_relation(self, source_entity: str, target_entity: str) -> None:
1403
+ """Synchronously delete a relation between two entities.
1404
+
1405
+ Args:
1406
+ source_entity: Name of the source entity
1407
+ target_entity: Name of the target entity
1408
+ """
1409
+ loop = always_get_an_event_loop()
1410
+ return loop.run_until_complete(self.adelete_by_relation(source_entity, target_entity))
1411
+
1412
+ async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
1413
+ """Asynchronously delete a relation between two entities.
1414
+
1415
+ Args:
1416
+ source_entity: Name of the source entity
1417
+ target_entity: Name of the target entity
1418
+ """
1419
+ try:
1420
+ # Check if the relation exists
1421
+ edge_exists = await self.chunk_entity_relation_graph.has_edge(source_entity, target_entity)
1422
+ if not edge_exists:
1423
+ logger.warning(f"Relation from '{source_entity}' to '{target_entity}' does not exist")
1424
+ return
1425
+
1426
+ # Delete relation from vector database
1427
+ relation_id = compute_mdhash_id(source_entity + target_entity, prefix="rel-")
1428
+ await self.relationships_vdb.delete([relation_id])
1429
+
1430
+ # Delete relation from knowledge graph
1431
+ await self.chunk_entity_relation_graph.remove_edges([(source_entity, target_entity)])
1432
+
1433
+ logger.info(f"Successfully deleted relation from '{source_entity}' to '{target_entity}'")
1434
+ await self._delete_relation_done()
1435
+ except Exception as e:
1436
+ logger.error(f"Error while deleting relation from '{source_entity}' to '{target_entity}': {e}")
1437
+
1438
+ async def _delete_relation_done(self) -> None:
1439
+ """Callback after relation deletion is complete"""
1440
+ await asyncio.gather(
1441
+ *[
1442
+ cast(StorageNameSpace, storage_inst).index_done_callback()
1443
+ for storage_inst in [ # type: ignore
1444
+ self.relationships_vdb,
1445
+ self.chunk_entity_relation_graph,
1446
+ ]
1447
+ ]
1448
+ )
1449
+
1450
  def _get_content_summary(self, content: str, max_length: int = 100) -> str:
1451
  """Get summary of document content
1452